From 9ef2b8fe1f3a13784c12f8fbdcbbc1b0154ca689 Mon Sep 17 00:00:00 2001 From: Elias Fleckenstein Date: Fri, 12 Nov 2021 15:30:49 +0100 Subject: [PATCH] Japanese waifu names --- Japanese-Lipsum.txt | 39 ++ hiragana | 1 + init.lua | 6 +- utf8.lua | 1049 +++++++++++++++++++++++++++++++++++++++++++ waifu.lua | 62 ++- 5 files changed, 1133 insertions(+), 24 deletions(-) create mode 100644 Japanese-Lipsum.txt create mode 100644 hiragana create mode 100644 utf8.lua diff --git a/Japanese-Lipsum.txt b/Japanese-Lipsum.txt new file mode 100644 index 0000000..7460cb3 --- /dev/null +++ b/Japanese-Lipsum.txt @@ -0,0 +1,39 @@ +供だひ修更にもょ語事利ユワキ資中にばんべ指服けやば貨図分ヨ身効測出メクチ条3川ぐゆで転転接籍華おふづ。除ノ択歩スいど禁京藤ス多白ね縄平ぞなこ犯扱変選フハ子族スむらじ州試るつへ線9入メ扉員卸呪でび。成マセユエ重証明ア坪姿ニエヌ味鈴スつを聞変の訃疵キヲヱ長代ばずるぱ牡葉べちさイ道内ぎまおへ添5売きドめり説件ぐ来記ヒク毎協わちつス。 + +極ウノニヌ戒濯アクツ書供やもねみ登塩ぐ天近スへせ命集くレ読中えを京入技だかょ捨情キヲワ市鉱こえめ域江76代コ探3告らえれゆ中昨やぞど校皇島内痛聴遊んゅり。72断テウコト碁上ウヱテ報暮テ著仮際タ購暮人ぽそゅれ復貨タヨセ見移セソルタ秋央ル王覧ヤ借披りでざ。稿接フ埼接ぜほぽス格従フン違終提ぼがざ隊育ネケヱ際84法アネロレ半子めべさン明都ーぶ訴研つ度供悪り。 + +有張航オタモ棺策ネワリ産使や徴著ホレネヌ問少ふッ舞量けん啓演ヤスヌ本山ぞど送用44竜ラルモホ商宣獲強ぶ。並ょむ経注れめ載司ツ種同ヤホメ固救がルん毎16脳抑1家ぞ黒先せう覧位カヨタヲ年酒けだト因業川ぜつも的人レヱホ形残接南了がえぼな。相ノサ副山前どよぎ岡1初トでるは献供スリヨキ彼子とにご果井ドん桃放堀アイヘ事問づやにめ造足ぜをじぎ活季染秋づけが。 + +面ニ教睦たけ変側マユ授8意ヨワマリ浪手らゃ固終訴かくびべ図外クじぜを岡写たぴぼ史制ドー内図まンな逃久演オ界異音セタ稿辺そイ盛記けのゅ。改ッ話油めは康中はわラう気系まラ墜事覧数シルリテ副震ユロ任29池リア迫学ヱ稼42宝私41島ユ性頼間わ文符季省おそ。著ど育救ノオスラ幸書スヌクケ関著ウサ帯道ンお会逆コホ工誌ナ和有政とクろて回断たかと止凍ラい中見片透割記くずぶ。 + +1明肥ヤツ必敬薮際フ領耐ゃ衛演ド去位ゆごする職3頭ラヤ出司ゅ作島セツシ源三ッぴべ症載ともレじ作説ヨキミ脅原ンそど男患ぶ進描構満勘ク。抱ゃゅフ投挙ラ走体入クウ目活ヤミヨ玲賀ラネ権真事災ぜあ観92三摘び幕笑米ロマキ宅闘ぼッか終焦茂据灯とぴや。2加ラ世盗タヨ問女ーぽリら知充よましき力再題ちめま時彰べを内明ばこ声4徒トサチ掲存ハ事情マフソ引歳べ玲且佃佞侶すめかフ。 + +果ニナ元7康ぱ広集るの瞳美とやもの細18強とぶ共成りせゃ読供薄モ単乱スめさ決改抱診露ひむびあ。文げのト賞36玉仙ア小記ルマ校料ろが着賞リこ界面写さ業活へンぼは伊境梗ざすん小歳トツ書殺ッ一馬購存訪推ぴ。次ひげ集5道メヨ際要レフ共医じこでも高新ナ坊回をリ正高ヨス建順載レ覧開ノワオ電画テヱワ始盤央紹づルあ。 + +行オラ邸火6反くりフっ効小イオ高貢べばい職16移ハヲソエ況安なづ更下てぼけ興人セキミ上般ノレワミ句付イ増写エ界深レょ情門木森らてッみ。棺明やか統岩シ泊町わ堀際読ツワコ側負ハサシノ勢習ルラ許映へづ裕作速ーゃへで間展ヒニエ聞業初廟ミアメル健忘礎れ。開再こたぜ載5負ねたま写取レ拉激ょ無産ぼあ刑適セキミ別2長権リ聞無山ちうこら挙効イユ設芸ヨラリ件同ヒシ新宿ーとト。 + +福がン禁者テ料画うあね籍34人ロウメ市組ぜびぴ動点すけぶふ春載ぴルぎが前権るレりみ献属ヱコレ備総ハカエタ景止ルゃと企稿吉タミ当島ヨ楽子済ユ褒究キ告図ば旅接ずひ。非まばごト校54行させ込攻よこば央豊の樹来質げこもね者両レルマ用掲制キトタ状適サタヲ市問てト王17農閣之押ぶ。展ずル議作キナコ求質裁うばて今変ヘタ害要ユコネ芸45魅8絶石そぐらむ湿残トケイ億理覧委みすけ視買やど。 + +方ネ谷己き択能キシヤト公寮もンぶふ記供にざぽあ持転ユトヤ食何むとれ情球シセク細古コネモセ支見8棋録れ毎活レてずな能崎車博及トち。日くれ原声ト赤竹ヨフレリ返南意テキ供握ニモ穫方オコシ川確ノ味加なぜ転流ずちぽ打却ゃ公関下かれらぜ。提られざぎ保言人速進ハ分貿ケ相員ン堀6欧だべや航男レ応理づぎせが田報ぴ手10頭がるク治15社キ質9節示ぼ宝分ロ円些仄伽凹ゆ。 + +合わドいイ際悪ノ調導むまぐば莉度7需好ま廟2宴ハシミ略9大やはめ芸生っ渡衆ミソケト子59要62保たべぜ。暮ムワケ政構合本オシス使受ム本記賞チ教生ラふう家内だ遺煮認せげごそ画要渡ラオ越速ぎーぜす球光ニナヱ参直未努わざび。意フぜ興科ニヘ定岐いび必護じつ供蜂ぶゆげぴ宮高ラタ規経ミ界場ほめンさ論給ーる井銀ケハトソ囲央進んレめぽ織少ヘ美横タミナ美会じト属味き発宮消すっはお。 + +全ルツイコ探98溝ぽ人聞景エ円約くごさ禁著ニタソ息枝皇たゆひ時25兄経レやのわ須落ず毎禁ルテヒ見菅ンべゅ庫来らレるば正月アヲヨ倒任きろら面戒庭棄銃らたと。東ホ映野あ対決ょよ問無わまが暮射ハ専原キノヌ気必ぽつ入月クケロル化添対版メレウモ築携ロラホ番飲攻後伯スルぎ。欲任ちぽっつ時3聞ウテ治25携訪ス展一を図記ス遠報づ余21安済戦やなリり済康ネロテ費録ロマメワ定申ぐ要修級色る。 + +多した事元レかも紀流ニハナ問属覚染ろれ着住セヌヱ要県く後喝ぞえ約一ルヌヘヲ昭準芸ヨクラケ写事んが強試公そ攻史エソ年上ーとび背著ょじ健細固レーイと。憂ソ百八べお平1禁古み故温シモノ旅42化ヒマヱ数制ホチヱ副度へすわ行報まンだ演北テ後活スム紹混遠隠巡よじフゆ。就ヤワアル古入対ラシ五著ン土村ニロツア形宿ヱ作中かだめふ応8間際リな害8輸のにスり入講昌ぶねめッ記健如たざ。 + +人こぽ込約71都レカロヲ量実展ぴび録4議ぐ堅登ノワハユ動潮いなぎぽ収択ル北庁ニ心係宇油でーあ。郎悪エムユ少群オリ現先営申ソス民能ラわる速稿かしへフ対索な天3戦ぱなさイ製島ロヌセ買出エテユ事転こど将決チテキモ事容刻ぴ。需き官止ゅぶほく毎年く田向べりばや勝離めド降96直サルアネ国導クホ版毎ク闘8持ごんげ試就ヨ就社コ左文がイぞき糖生覧速車工ぱらルに会囲ヲトホ掲外捕ずぶぐッ。 + +止と街彩達るば習力月ずならー窃決ソウ敏読へドく況動ウセニ断済ち締名視ヱチ細織約ルヨサ含日煙畿ぶろ。率ね懸木ごフぼ者数ウラヨ際3支ヲコ権石わでッ蛯予ゅみラ向前れんのは医67重ネク族71案ツル治本はわる化臓ぜ削左はク陽然奮ヤナムネ習代碁也ッれた。出産ウノク中理むふっゆ証社ハイ型59全ホニワ回視ご変負ロ読題ち物示暮どク山群クロトマ応切ケウム始欧す日升忍曇椎ラなせ。 + +定ラ代試ワエキ勧放かげべ闘以ぱトてレ必図ネ意真波ソ五裕へぞフた追康ケ低転経ずきむほ国作ぜッぽの礎著あ来梨がわぜご群故柱浪でびイち。焼びお家手みまびで入58返成ぼやはト枚戻告ワ記細求ケロヤモ点記もふま何済4次イクごす記割トテ魅受こ日教ロエウル実70義へをでゅ望歌ハウ続井計ゅ。42文ウ結市ッ是負ニクカヱ昇七カヒ当涙海ろはお風説ぽろ生規ヨノ記日ヘモメ死7責セ広見府憂昌ゆスぼて。 + +版ぽつうた無人サ任覧とべごそ調重にうク書兄責リゃな口証テナ副37座賞カハ長飾同ソキレア度埋を。作ぽ非反せみ想町や払連ぶ帳県ぼほリ来雑クき豊司モミウ脳最明マ夫三エ堀億苗ヘヱホル必元け作問そうぐち済務年ぼまクち彰供宏勲悔殿くひン。68呼ハセ原31飛けらい考料ぐとる磨9請景タ荷歩ケ惣後ニチ法役ざ格作はし競立ロ転号幌トゃがル。 + +点ヲメ通拉ねびンめ勢象テカ電秒ょや写葬ロタノ民養題しねてむ各縄買ヒトソ門41好せトてば湊争にリとょ十生ヤノヘ時事キ玉任ハヒク査文大ッき然画ふレが生裕どクぐげ付古家若園るめ。89分佐ゃほ他健開をもせろ視事せゃん局堀サヲ果面き表42行だも高作ぱ掘厳リおぽ完69入りひ神語方レトヲ改回西コヘ必無ソユメヒ声供け。 + +開オホウコ転強ぐ女止だ設旬74岐モソヤマ方6長コヒ分現じをぞさ量仕ユノケウ炎京エユト西断ラやぱく得笑マ携米リ全夢ツヘリチ育来時分セコ連連ランわ主外てにりえ走直近じぜ。国メタ負作リずフ会高県メハモ迷転セイ新百武ちふは保81付枕ば幅37裏レぴ紹析甘サ治党ト禁購コエ中観8様場宇油らょ。 + +困すく団芸セ選大オ通3園ニス設化どさてむ平京レタホ半早ヱム基豊35仮稼暮択3負マクナヤ京秀ヘタリサ細的導が委野ょちゃつ稿号ゆ宇党れらば魚役エキラク無断ンど会焦茂影ッづり。学ホハフ版術ふ彩幹冬ずぐちラ域東ー書現打成カ小上度ホカ国将か小紙会勢土工っむだ。込ど官事やくだね要猟ツ駒月持うるこち集北がめむれ上媒っ写世レ費北6条チテノ外盗クオ示面みぞ名園返ご。 + +96人チノ曲49療チミ属日ケムユ家是ぽスらい衛写シコ意転ヨワメ心房谷けゅ訪会腸ぱさドス報伏かッこ逆傍剰哀悼リお。作ヘコ余加社登サタマヨ動内生変ノリ織票リハ田国を幅9時をぜ提答す配1荒衝貨苦ゃてちあ。防ざく容1優て質港びドゅべ語特ーび素父リぶ付続レ納探ホヤ育包げおレ島児ゅラぴ強当ぎ貢早名ソカチ災能セヌ著養ほ。 \ No newline at end of file diff --git a/hiragana b/hiragana new file mode 100644 index 0000000..ed9236f --- /dev/null +++ b/hiragana @@ -0,0 +1 @@ +return {["ぎょ"] = "gyo", ["と"] = "to", ["ば"] = "ba", ["ぼ"] = "bo", ["じょ"] = "jo", ["う"] = "u", ["へ"] = "he", ["ね"] = "ne", ["ぎゅ"] = "gyu", ["ひ"] = "hi", ["ま"] = "ma", ["にゃ"] = "nya", ["ふ"] = "fu", ["びゅ"] = "byu", ["ひょ"] = "hyo", ["ぺ"] = "pe", ["ぎ"] = "gi", ["い"] = "i", ["りょ"] = "ryo", ["ちゃ"] = "tya", ["ほ"] = "ho", ["ちゅ"] = "tyu", ["ゆ"] = "yu", ["び"] = "bi", ["た"] = "ta", ["びょ"] = "byo", ["の"] = "no", ["きゃ"] = "kya", ["ぞ"] = "zo", ["ちょ"] = "tyo", ["しゃ"] = "sya", ["ぎゃ"] = "gya", ["て"] = "te", ["そ"] = "so", ["ぐ"] = "gu", ["や"] = "ya", ["びゃ"] = "bya", ["ち"] = "chi", ["しゅ"] = "syu", ["げ"] = "ge", ["ぴ"] = "pi", ["か"] = "ka", ["ぶ"] = "bu", ["りゃ"] = "rya", ["く"] = "ku", ["じゅ"] = "ju", ["あ"] = "a", ["ず"] = "zu", ["ぬ"] = "nu", ["ど"] = "do", ["れ"] = "re", ["でぃ"] = "di", ["どぅ"] = "du", ["が"] = "ga", ["きゅ"] = "kyu", ["ひゅ"] = "hyu", ["せ"] = "se", ["みゃ"] = "mya", ["き"] = "ki", ["ぷ"] = "pu", ["べ"] = "be", ["け"] = "ke", ["ぜ"] = "ze", ["にゅ"] = "nyu", ["こ"] = "ko", ["みゅ"] = "myu", ["お"] = "o", ["しょ"] = "sho", ["きょ"] = "kyo", ["みょ"] = "myo", ["を"] = "wo", ["つ"] = "tu", ["み"] = "mi", ["ぱ"] = "pa", ["り"] = "ri", ["わ"] = "wa", ["りゅ"] = "ryu", ["な"] = "na", ["す"] = "su", ["も"] = "mo", ["に"] = "ni", ["る"] = "ru", ["ぢゃ"] = "dya", ["ざ"] = "za", ["え"] = "e", ["じゃ"] = "ja", ["め"] = "me", ["で"] = "de", ["し"] = "shi", ["ろ"] = "ro", ["だ"] = "da", ["む"] = "mu", ["よ"] = "yo", ["じ"] = "ji", ["さ"] = "sa", ["にょ"] = "nyo", ["ぽ"] = "po", ["ひゃ"] = "hya", ["は"] = "ha", ["ら"] = "ra", ["ご"] = "go"} diff --git a/init.lua b/init.lua index 7579384..76397be 100644 --- a/init.lua +++ b/init.lua @@ -1,5 +1,9 @@ furrybot = {} +local path = minetest.get_modpath("furrybot") + +utf8 = dofile(path .. "/utf8.lua") + local http = minetest.request_http_api() local env = minetest.request_insecure_environment() local storage = minetest.get_mod_storage() @@ -8,4 +12,4 @@ libclamity.register_on_chat_message(function(...) furrybot.parse_message(...) end) -loadfile(minetest.get_modpath("furrybot") .. "/bot.lua")()(http, env, storage) +loadfile(path .. "/bot.lua")()(http, env, storage) diff --git a/utf8.lua b/utf8.lua new file mode 100644 index 0000000..db78306 --- /dev/null +++ b/utf8.lua @@ -0,0 +1,1049 @@ +-- $Id: utf8.lua 179 2009-04-03 18:10:03Z pasta $ +-- +-- Provides UTF-8 aware string functions implemented in pure lua: +-- * utf8len(s) +-- * utf8sub(s, i, j) +-- * utf8reverse(s) +-- * utf8char(unicode) +-- * utf8unicode(s, i, j) +-- * utf8gensub(s, sub_len) +-- * utf8find(str, regex, init, plain) +-- * utf8match(str, regex, init) +-- * utf8gmatch(str, regex, all) +-- * utf8gsub(str, regex, repl, limit) +-- +-- If utf8data.lua (containing the lower<->upper case mappings) is loaded, these +-- additional functions are available: +-- * utf8upper(s) +-- * utf8lower(s) +-- +-- All functions behave as their non UTF-8 aware counterparts with the exception +-- that UTF-8 characters are used instead of bytes for all units. + +--[[ +Copyright (c) 2006-2007, Kyle Smith +All rights reserved. + +Contributors: + Alimov Stepan + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the author nor the names of its contributors may be + used to endorse or promote products derived from this software without + specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +--]] + +-- ABNF from RFC 3629 +-- +-- UTF8-octets = *( UTF8-char ) +-- UTF8-char = UTF8-1 / UTF8-2 / UTF8-3 / UTF8-4 +-- UTF8-1 = %x00-7F +-- UTF8-2 = %xC2-DF UTF8-tail +-- UTF8-3 = %xE0 %xA0-BF UTF8-tail / %xE1-EC 2( UTF8-tail ) / +-- %xED %x80-9F UTF8-tail / %xEE-EF 2( UTF8-tail ) +-- UTF8-4 = %xF0 %x90-BF 2( UTF8-tail ) / %xF1-F3 3( UTF8-tail ) / +-- %xF4 %x80-8F 2( UTF8-tail ) +-- UTF8-tail = %x80-BF +-- + +local byte = string.byte +local char = string.char +local dump = string.dump +local find = string.find +local format = string.format +local gmatch = string.gmatch +local gsub = string.gsub +local len = string.len +local lower = string.lower +local match = string.match +local rep = string.rep +local reverse = string.reverse +local sub = string.sub +local upper = string.upper + +-- returns the number of bytes used by the UTF-8 character at byte i in s +-- also doubles as a UTF-8 character validator +local function utf8charbytes (s, i) + -- argument defaults + i = i or 1 + + -- argument checking + if type(s) ~= "string" then + error("bad argument #1 to 'utf8charbytes' (string expected, got ".. type(s).. ")") + end + if type(i) ~= "number" then + error("bad argument #2 to 'utf8charbytes' (number expected, got ".. type(i).. ")") + end + + local c = byte(s, i) + + -- determine bytes needed for character, based on RFC 3629 + -- validate byte 1 + if c > 0 and c <= 127 then + -- UTF8-1 + return 1 + + elseif c >= 194 and c <= 223 then + -- UTF8-2 + local c2 = byte(s, i + 1) + + if not c2 then + error("UTF-8 string terminated early") + end + + -- validate byte 2 + if c2 < 128 or c2 > 191 then + error("Invalid UTF-8 character") + end + + return 2 + + elseif c >= 224 and c <= 239 then + -- UTF8-3 + local c2 = byte(s, i + 1) + local c3 = byte(s, i + 2) + + if not c2 or not c3 then + error("UTF-8 string terminated early") + end + + -- validate byte 2 + if c == 224 and (c2 < 160 or c2 > 191) then + error("Invalid UTF-8 character") + elseif c == 237 and (c2 < 128 or c2 > 159) then + error("Invalid UTF-8 character") + elseif c2 < 128 or c2 > 191 then + error("Invalid UTF-8 character") + end + + -- validate byte 3 + if c3 < 128 or c3 > 191 then + error("Invalid UTF-8 character") + end + + return 3 + + elseif c >= 240 and c <= 244 then + -- UTF8-4 + local c2 = byte(s, i + 1) + local c3 = byte(s, i + 2) + local c4 = byte(s, i + 3) + + if not c2 or not c3 or not c4 then + error("UTF-8 string terminated early") + end + + -- validate byte 2 + if c == 240 and (c2 < 144 or c2 > 191) then + error("Invalid UTF-8 character") + elseif c == 244 and (c2 < 128 or c2 > 143) then + error("Invalid UTF-8 character") + elseif c2 < 128 or c2 > 191 then + error("Invalid UTF-8 character") + end + + -- validate byte 3 + if c3 < 128 or c3 > 191 then + error("Invalid UTF-8 character") + end + + -- validate byte 4 + if c4 < 128 or c4 > 191 then + error("Invalid UTF-8 character") + end + + return 4 + + else + error("Invalid UTF-8 character") + end +end + +-- returns the number of characters in a UTF-8 string +local function utf8len (s) + -- argument checking + if type(s) ~= "string" then + for k,v in pairs(s) do print('"',tostring(k),'"',tostring(v),'"') end + error("bad argument #1 to 'utf8len' (string expected, got ".. type(s).. ")") + end + + local pos = 1 + local bytes = len(s) + local len = 0 + + while pos <= bytes do + len = len + 1 + pos = pos + utf8charbytes(s, pos) + end + + return len +end + +-- functions identically to string.sub except that i and j are UTF-8 characters +-- instead of bytes +local function utf8sub (s, i, j) + -- argument defaults + j = j or -1 + + local pos = 1 + local bytes = len(s) + local len = 0 + + -- only set l if i or j is negative + local l = (i >= 0 and j >= 0) or utf8len(s) + local startChar = (i >= 0) and i or l + i + 1 + local endChar = (j >= 0) and j or l + j + 1 + + -- can't have start before end! + if startChar > endChar then + return "" + end + + -- byte offsets to pass to string.sub + local startByte,endByte = 1,bytes + + while pos <= bytes do + len = len + 1 + + if len == startChar then + startByte = pos + end + + pos = pos + utf8charbytes(s, pos) + + if len == endChar then + endByte = pos - 1 + break + end + end + + if startChar > len then startByte = bytes+1 end + if endChar < 1 then endByte = 0 end + + return sub(s, startByte, endByte) +end + + +-- replace UTF-8 characters based on a mapping table +local function utf8replace (s, mapping) + -- argument checking + if type(s) ~= "string" then + error("bad argument #1 to 'utf8replace' (string expected, got ".. type(s).. ")") + end + if type(mapping) ~= "table" then + error("bad argument #2 to 'utf8replace' (table expected, got ".. type(mapping).. ")") + end + + local pos = 1 + local bytes = len(s) + local charbytes + local newstr = "" + + while pos <= bytes do + charbytes = utf8charbytes(s, pos) + local c = sub(s, pos, pos + charbytes - 1) + + newstr = newstr .. (mapping[c] or c) + + pos = pos + charbytes + end + + return newstr +end + + +-- identical to string.upper except it knows about unicode simple case conversions +local function utf8upper (s) + return utf8replace(s, utf8_lc_uc) +end + +-- identical to string.lower except it knows about unicode simple case conversions +local function utf8lower (s) + return utf8replace(s, utf8_uc_lc) +end + +-- identical to string.reverse except that it supports UTF-8 +local function utf8reverse (s) + -- argument checking + if type(s) ~= "string" then + error("bad argument #1 to 'utf8reverse' (string expected, got ".. type(s).. ")") + end + + local bytes = len(s) + local pos = bytes + local charbytes + local newstr = "" + + while pos > 0 do + c = byte(s, pos) + while c >= 128 and c <= 191 do + pos = pos - 1 + c = byte(s, pos) + end + + charbytes = utf8charbytes(s, pos) + + newstr = newstr .. sub(s, pos, pos + charbytes - 1) + + pos = pos - 1 + end + + return newstr +end + +-- http://en.wikipedia.org/wiki/Utf8 +-- http://developer.coronalabs.com/code/utf-8-conversion-utility +local function utf8char(unicode) + if unicode <= 0x7F then return char(unicode) end + + if (unicode <= 0x7FF) then + local Byte0 = 0xC0 + math.floor(unicode / 0x40); + local Byte1 = 0x80 + (unicode % 0x40); + return char(Byte0, Byte1); + end; + + if (unicode <= 0xFFFF) then + local Byte0 = 0xE0 + math.floor(unicode / 0x1000); + local Byte1 = 0x80 + (math.floor(unicode / 0x40) % 0x40); + local Byte2 = 0x80 + (unicode % 0x40); + return char(Byte0, Byte1, Byte2); + end; + + if (unicode <= 0x10FFFF) then + local code = unicode + local Byte3= 0x80 + (code % 0x40); + code = math.floor(code / 0x40) + local Byte2= 0x80 + (code % 0x40); + code = math.floor(code / 0x40) + local Byte1= 0x80 + (code % 0x40); + code = math.floor(code / 0x40) + local Byte0= 0xF0 + code; + + return char(Byte0, Byte1, Byte2, Byte3); + end; + + error 'Unicode cannot be greater than U+10FFFF!' +end + +local shift_6 = 2^6 +local shift_12 = 2^12 +local shift_18 = 2^18 + +local utf8unicode +utf8unicode = function(str, i, j, byte_pos) + i = i or 1 + j = j or i + + if i > j then return end + + local char,bytes + + if byte_pos then + bytes = utf8charbytes(str,byte_pos) + char = sub(str,byte_pos,byte_pos-1+bytes) + else + char,byte_pos = utf8sub(str,i,i), 0 + bytes = #char + end + + local unicode + + if bytes == 1 then unicode = byte(char) end + if bytes == 2 then + local byte0,byte1 = byte(char,1,2) + local code0,code1 = byte0-0xC0,byte1-0x80 + unicode = code0*shift_6 + code1 + end + if bytes == 3 then + local byte0,byte1,byte2 = byte(char,1,3) + local code0,code1,code2 = byte0-0xE0,byte1-0x80,byte2-0x80 + unicode = code0*shift_12 + code1*shift_6 + code2 + end + if bytes == 4 then + local byte0,byte1,byte2,byte3 = byte(char,1,4) + local code0,code1,code2,code3 = byte0-0xF0,byte1-0x80,byte2-0x80,byte3-0x80 + unicode = code0*shift_18 + code1*shift_12 + code2*shift_6 + code3 + end + + return unicode,utf8unicode(str, i+1, j, byte_pos+bytes) +end + +-- Returns an iterator which returns the next substring and its byte interval +local function utf8gensub(str, sub_len) + sub_len = sub_len or 1 + local byte_pos = 1 + local len = #str + return function(skip) + if skip then byte_pos = byte_pos + skip end + local char_count = 0 + local start = byte_pos + repeat + if byte_pos > len then return end + char_count = char_count + 1 + local bytes = utf8charbytes(str,byte_pos) + byte_pos = byte_pos+bytes + + until char_count == sub_len + + local last = byte_pos-1 + local sub = sub(str,start,last) + return sub, start, last + end +end + +local function binsearch(sortedTable, item, comp) + local head, tail = 1, #sortedTable + local mid = math.floor((head + tail)/2) + if not comp then + while (tail - head) > 1 do + if sortedTable[tonumber(mid)] > item then + tail = mid + else + head = mid + end + mid = math.floor((head + tail)/2) + end + else + end + if sortedTable[tonumber(head)] == item then + return true, tonumber(head) + elseif sortedTable[tonumber(tail)] == item then + return true, tonumber(tail) + else + return false + end +end +local function classMatchGenerator(class, plain) + local codes = {} + local ranges = {} + local ignore = false + local range = false + local firstletter = true + local unmatch = false + + local it = utf8gensub(class) + + local skip + for c,bs,be in it do + skip = be + if not ignore and not plain then + if c == "%" then + ignore = true + elseif c == "-" then + table.insert(codes, utf8unicode(c)) + range = true + elseif c == "^" then + if not firstletter then + error('!!!') + else + unmatch = true + end + elseif c == ']' then + break + else + if not range then + table.insert(codes, utf8unicode(c)) + else + table.remove(codes) -- removing '-' + table.insert(ranges, {table.remove(codes), utf8unicode(c)}) + range = false + end + end + elseif ignore and not plain then + if c == 'a' then -- %a: represents all letters. (ONLY ASCII) + table.insert(ranges, {65, 90}) -- A - Z + table.insert(ranges, {97, 122}) -- a - z + elseif c == 'c' then -- %c: represents all control characters. + table.insert(ranges, {0, 31}) + table.insert(codes, 127) + elseif c == 'd' then -- %d: represents all digits. + table.insert(ranges, {48, 57}) -- 0 - 9 + elseif c == 'g' then -- %g: represents all printable characters except space. + table.insert(ranges, {1, 8}) + table.insert(ranges, {14, 31}) + table.insert(ranges, {33, 132}) + table.insert(ranges, {134, 159}) + table.insert(ranges, {161, 5759}) + table.insert(ranges, {5761, 8191}) + table.insert(ranges, {8203, 8231}) + table.insert(ranges, {8234, 8238}) + table.insert(ranges, {8240, 8286}) + table.insert(ranges, {8288, 12287}) + elseif c == 'l' then -- %l: represents all lowercase letters. (ONLY ASCII) + table.insert(ranges, {97, 122}) -- a - z + elseif c == 'p' then -- %p: represents all punctuation characters. (ONLY ASCII) + table.insert(ranges, {33, 47}) + table.insert(ranges, {58, 64}) + table.insert(ranges, {91, 96}) + table.insert(ranges, {123, 126}) + elseif c == 's' then -- %s: represents all space characters. + table.insert(ranges, {9, 13}) + table.insert(codes, 32) + table.insert(codes, 133) + table.insert(codes, 160) + table.insert(codes, 5760) + table.insert(ranges, {8192, 8202}) + table.insert(codes, 8232) + table.insert(codes, 8233) + table.insert(codes, 8239) + table.insert(codes, 8287) + table.insert(codes, 12288) + elseif c == 'u' then -- %u: represents all uppercase letters. (ONLY ASCII) + table.insert(ranges, {65, 90}) -- A - Z + elseif c == 'w' then -- %w: represents all alphanumeric characters. (ONLY ASCII) + table.insert(ranges, {48, 57}) -- 0 - 9 + table.insert(ranges, {65, 90}) -- A - Z + table.insert(ranges, {97, 122}) -- a - z + elseif c == 'x' then -- %x: represents all hexadecimal digits. + table.insert(ranges, {48, 57}) -- 0 - 9 + table.insert(ranges, {65, 70}) -- A - F + table.insert(ranges, {97, 102}) -- a - f + else + if not range then + table.insert(codes, utf8unicode(c)) + else + table.remove(codes) -- removing '-' + table.insert(ranges, {table.remove(codes), utf8unicode(c)}) + range = false + end + end + ignore = false + else + if not range then + table.insert(codes, utf8unicode(c)) + else + table.remove(codes) -- removing '-' + table.insert(ranges, {table.remove(codes), utf8unicode(c)}) + range = false + end + ignore = false + end + + firstletter = false + end + + table.sort(codes) + + local function inRanges(charCode) + for _,r in ipairs(ranges) do + if r[1] <= charCode and charCode <= r[2] then + return true + end + end + return false + end + if not unmatch then + return function(charCode) + return binsearch(codes, charCode) or inRanges(charCode) + end, skip + else + return function(charCode) + return charCode ~= -1 and not (binsearch(codes, charCode) or inRanges(charCode)) + end, skip + end +end + +-- utf8sub with extra argument, and extra result value +local function utf8subWithBytes (s, i, j, sb) + -- argument defaults + j = j or -1 + + local pos = sb or 1 + local bytes = len(s) + local len = 0 + + -- only set l if i or j is negative + local l = (i >= 0 and j >= 0) or utf8len(s) + local startChar = (i >= 0) and i or l + i + 1 + local endChar = (j >= 0) and j or l + j + 1 + + -- can't have start before end! + if startChar > endChar then + return "" + end + + -- byte offsets to pass to string.sub + local startByte,endByte = 1,bytes + + while pos <= bytes do + len = len + 1 + + if len == startChar then + startByte = pos + end + + pos = pos + utf8charbytes(s, pos) + + if len == endChar then + endByte = pos - 1 + break + end + end + + if startChar > len then startByte = bytes+1 end + if endChar < 1 then endByte = 0 end + + return sub(s, startByte, endByte), endByte + 1 +end + +local cache = setmetatable({},{ + __mode = 'kv' +}) +local cachePlain = setmetatable({},{ + __mode = 'kv' +}) +local function matcherGenerator(regex, plain) + local matcher = { + functions = {}, + captures = {} + } + if not plain then + cache[regex] = matcher + else + cachePlain[regex] = matcher + end + local function simple(func) + return function(cC) + if func(cC) then + matcher:nextFunc() + matcher:nextStr() + else + matcher:reset() + end + end + end + local function star(func) + return function(cC) + if func(cC) then + matcher:fullResetOnNextFunc() + matcher:nextStr() + else + matcher:nextFunc() + end + end + end + local function minus(func) + return function(cC) + if func(cC) then + matcher:fullResetOnNextStr() + end + matcher:nextFunc() + end + end + local function question(func) + return function(cC) + if func(cC) then + matcher:fullResetOnNextFunc() + matcher:nextStr() + end + matcher:nextFunc() + end + end + + local function capture(id) + return function(cC) + local l = matcher.captures[id][2] - matcher.captures[id][1] + local captured = utf8sub(matcher.string, matcher.captures[id][1], matcher.captures[id][2]) + local check = utf8sub(matcher.string, matcher.str, matcher.str + l) + if captured == check then + for i = 0, l do + matcher:nextStr() + end + matcher:nextFunc() + else + matcher:reset() + end + end + end + local function captureStart(id) + return function(cC) + matcher.captures[id][1] = matcher.str + matcher:nextFunc() + end + end + local function captureStop(id) + return function(cC) + matcher.captures[id][2] = matcher.str - 1 + matcher:nextFunc() + end + end + + local function balancer(str) + local sum = 0 + local bc, ec = utf8sub(str, 1, 1), utf8sub(str, 2, 2) + local skip = len(bc) + len(ec) + bc, ec = utf8unicode(bc), utf8unicode(ec) + return function(cC) + if cC == ec and sum > 0 then + sum = sum - 1 + if sum == 0 then + matcher:nextFunc() + end + matcher:nextStr() + elseif cC == bc then + sum = sum + 1 + matcher:nextStr() + else + if sum == 0 or cC == -1 then + sum = 0 + matcher:reset() + else + matcher:nextStr() + end + end + end, skip + end + + matcher.functions[1] = function(cC) + matcher:fullResetOnNextStr() + matcher.seqStart = matcher.str + matcher:nextFunc() + if (matcher.str > matcher.startStr and matcher.fromStart) or matcher.str >= matcher.stringLen then + matcher.stop = true + matcher.seqStart = nil + end + end + + local lastFunc + local ignore = false + local skip = nil + local it = (function() + local gen = utf8gensub(regex) + return function() + return gen(skip) + end + end)() + local cs = {} + for c, bs, be in it do + skip = nil + if plain then + table.insert(matcher.functions, simple(classMatchGenerator(c, plain))) + else + if ignore then + if find('123456789', c, 1, true) then + if lastFunc then + table.insert(matcher.functions, simple(lastFunc)) + lastFunc = nil + end + table.insert(matcher.functions, capture(tonumber(c))) + elseif c == 'b' then + if lastFunc then + table.insert(matcher.functions, simple(lastFunc)) + lastFunc = nil + end + local b + b, skip = balancer(sub(regex, be + 1, be + 9)) + table.insert(matcher.functions, b) + else + lastFunc = classMatchGenerator('%' .. c) + end + ignore = false + else + if c == '*' then + if lastFunc then + table.insert(matcher.functions, star(lastFunc)) + lastFunc = nil + else + error('invalid regex after ' .. sub(regex, 1, bs)) + end + elseif c == '+' then + if lastFunc then + table.insert(matcher.functions, simple(lastFunc)) + table.insert(matcher.functions, star(lastFunc)) + lastFunc = nil + else + error('invalid regex after ' .. sub(regex, 1, bs)) + end + elseif c == '-' then + if lastFunc then + table.insert(matcher.functions, minus(lastFunc)) + lastFunc = nil + else + error('invalid regex after ' .. sub(regex, 1, bs)) + end + elseif c == '?' then + if lastFunc then + table.insert(matcher.functions, question(lastFunc)) + lastFunc = nil + else + error('invalid regex after ' .. sub(regex, 1, bs)) + end + elseif c == '^' then + if bs == 1 then + matcher.fromStart = true + else + error('invalid regex after ' .. sub(regex, 1, bs)) + end + elseif c == '$' then + if be == len(regex) then + matcher.toEnd = true + else + error('invalid regex after ' .. sub(regex, 1, bs)) + end + elseif c == '[' then + if lastFunc then + table.insert(matcher.functions, simple(lastFunc)) + end + lastFunc, skip = classMatchGenerator(sub(regex, be + 1)) + elseif c == '(' then + if lastFunc then + table.insert(matcher.functions, simple(lastFunc)) + lastFunc = nil + end + table.insert(matcher.captures, {}) + table.insert(cs, #matcher.captures) + table.insert(matcher.functions, captureStart(cs[#cs])) + if sub(regex, be + 1, be + 1) == ')' then matcher.captures[#matcher.captures].empty = true end + elseif c == ')' then + if lastFunc then + table.insert(matcher.functions, simple(lastFunc)) + lastFunc = nil + end + local cap = table.remove(cs) + if not cap then + error('invalid capture: "(" missing') + end + table.insert(matcher.functions, captureStop(cap)) + elseif c == '.' then + if lastFunc then + table.insert(matcher.functions, simple(lastFunc)) + end + lastFunc = function(cC) return cC ~= -1 end + elseif c == '%' then + ignore = true + else + if lastFunc then + table.insert(matcher.functions, simple(lastFunc)) + end + lastFunc = classMatchGenerator(c) + end + end + end + end + if #cs > 0 then + error('invalid capture: ")" missing') + end + if lastFunc then + table.insert(matcher.functions, simple(lastFunc)) + end + lastFunc = nil + ignore = nil + + table.insert(matcher.functions, function() + if matcher.toEnd and matcher.str ~= matcher.stringLen then + matcher:reset() + else + matcher.stop = true + end + end) + + matcher.nextFunc = function(self) + self.func = self.func + 1 + end + matcher.nextStr = function(self) + self.str = self.str + 1 + end + matcher.strReset = function(self) + local oldReset = self.reset + local str = self.str + self.reset = function(s) + s.str = str + s.reset = oldReset + end + end + matcher.fullResetOnNextFunc = function(self) + local oldReset = self.reset + local func = self.func +1 + local str = self.str + self.reset = function(s) + s.func = func + s.str = str + s.reset = oldReset + end + end + matcher.fullResetOnNextStr = function(self) + local oldReset = self.reset + local str = self.str + 1 + local func = self.func + self.reset = function(s) + s.func = func + s.str = str + s.reset = oldReset + end + end + + matcher.process = function(self, str, start) + + self.func = 1 + start = start or 1 + self.startStr = (start >= 0) and start or utf8len(str) + start + 1 + self.seqStart = self.startStr + self.str = self.startStr + self.stringLen = utf8len(str) + 1 + self.string = str + self.stop = false + + self.reset = function(s) + s.func = 1 + end + + local lastPos = self.str + local lastByte + local char + while not self.stop do + if self.str < self.stringLen then + --[[ if lastPos < self.str then + print('last byte', lastByte) + char, lastByte = utf8subWithBytes(str, 1, self.str - lastPos - 1, lastByte) + char, lastByte = utf8subWithBytes(str, 1, 1, lastByte) + lastByte = lastByte - 1 + else + char, lastByte = utf8subWithBytes(str, self.str, self.str) + end + lastPos = self.str ]] + char = utf8sub(str, self.str,self.str) + --print('char', char, utf8unicode(char)) + self.functions[self.func](utf8unicode(char)) + else + self.functions[self.func](-1) + end + end + + if self.seqStart then + local captures = {} + for _,pair in pairs(self.captures) do + if pair.empty then + table.insert(captures, pair[1]) + else + table.insert(captures, utf8sub(str, pair[1], pair[2])) + end + end + return self.seqStart, self.str - 1, unpack(captures) + end + end + + return matcher +end + +-- string.find +local function utf8find(str, regex, init, plain) + local matcher = cache[regex] or matcherGenerator(regex, plain) + return matcher:process(str, init) +end + +-- string.match +local function utf8match(str, regex, init) + init = init or 1 + local found = {utf8find(str, regex, init)} + if found[1] then + if found[3] then + return unpack(found, 3) + end + return utf8sub(str, found[1], found[2]) + end +end + +-- string.gmatch +local function utf8gmatch(str, regex, all) + regex = (utf8sub(regex,1,1) ~= '^') and regex or '%' .. regex + local lastChar = 1 + return function() + local found = {utf8find(str, regex, lastChar)} + if found[1] then + lastChar = found[2] + 1 + if found[all and 1 or 3] then + return unpack(found, all and 1 or 3) + end + return utf8sub(str, found[1], found[2]) + end + end +end + +local function replace(repl, args) + local ret = '' + if type(repl) == 'string' then + local ignore = false + local num = 0 + for c in utf8gensub(repl) do + if not ignore then + if c == '%' then + ignore = true + else + ret = ret .. c + end + else + num = tonumber(c) + if num then + ret = ret .. args[num] + else + ret = ret .. c + end + ignore = false + end + end + elseif type(repl) == 'table' then + ret = repl[args[1] or args[0]] or '' + elseif type(repl) == 'function' then + if #args > 0 then + ret = repl(unpack(args, 1)) or '' + else + ret = repl(args[0]) or '' + end + end + return ret +end +-- string.gsub +local function utf8gsub(str, regex, repl, limit) + limit = limit or -1 + local ret = '' + local prevEnd = 1 + local it = utf8gmatch(str, regex, true) + local found = {it()} + local n = 0 + while #found > 0 and limit ~= n do + local args = {[0] = utf8sub(str, found[1], found[2]), unpack(found, 3)} + ret = ret .. utf8sub(str, prevEnd, found[1] - 1) + .. replace(repl, args) + prevEnd = found[2] + 1 + n = n + 1 + found = {it()} + end + return ret .. utf8sub(str, prevEnd), n +end + +local utf8 = {} +utf8.len = utf8len +utf8.sub = utf8sub +utf8.reverse = utf8reverse +utf8.char = utf8char +utf8.unicode = utf8unicode +utf8.gensub = utf8gensub +utf8.byte = utf8unicode +utf8.find = utf8find +utf8.match = utf8match +utf8.gmatch = utf8gmatch +utf8.gsub = utf8gsub +utf8.dump = dump +utf8.format = format +utf8.lower = lower +utf8.upper = upper +utf8.rep = rep +return utf8 \ No newline at end of file diff --git a/waifu.lua b/waifu.lua index 4f78ba9..a92b46d 100644 --- a/waifu.lua +++ b/waifu.lua @@ -1,24 +1,40 @@ local http, env, storage local C = minetest.get_color_escape_sequence -furrybot.alphabeth = { - vowels = {}, - consonants = {}, +furrybot.hiragana = { + map = {}, + probability = {}, } function furrybot.get_waifu_name() - local state = math.random() < 0.5 - local r = math.random(3, 8) - local str = "" + local r = math.floor(1 + + math.random() + + math.random() + + math.random() + + math.random() + + math.random() + ) + + local jp = "" + + for i = 1, r do + jp = jp .. furrybot.hiragana.list[math.random(#furrybot.hiragana.list)] + end + + local en = "" for i = 1, r do - local tbl = state and furrybot.alphabeth.vowels or furrybot.alphabeth.consonants - str = str .. tbl[math.random(#tbl)] + local combo = furrybot.hiragana.map[utf8.sub(jp, i, i + 1)] - state = not state + if combo then + en = en .. combo + i = i + 1 + else + en = en .. furrybot.hiragana.map[utf8.sub(jp, i, i)] + end end - return furrybot.uppercase(str) + return jp .. " (" .. furrybot.uppercase(en) .. ")" end function furrybot.random_distribution(tbl) @@ -129,24 +145,24 @@ furrybot.commands.waifu = { return function(_http, _env, _storage) http, env, storage = _http, _env, _storage - local is_vowel = { - a = true, - e = true, - i = true, - o = true, - u = true, - } + local function read_file(path) + local f = env.io.open("clientmods/furrybot/" .. path, "r") + local data = f:read("*a") + f:close() + + return data + end - local bounds = "az" + furrybot.hiragana.map = minetest.deserialize(read_file("hiragana")) + furrybot.hiragana.list = {} - local f = env.io.open("clientmods/furrybot/LICENSE", "r") - local src = f:read("*a") + local src = read_file("Japanese-Lipsum.txt") for i = 1, #src do - local c = src:sub(i, i):lower() + local c = utf8.sub(src, i, i) - if c:byte(1) >= bounds:byte(1) and c:byte(1) <= bounds:byte(2) then - table.insert(is_vowel[c] and furrybot.alphabeth.vowels or furrybot.alphabeth.consonants, c) + if furrybot.hiragana.map[c] then + table.insert(furrybot.hiragana.list, c) end end end -- 2.44.0