5 "github.com/Kovensky/go-anidb/titles"
10 var db = &titles.TitlesDatabase{}
13 if fh, err := os.Open("anime-titles.dat.gz"); err != nil {
14 if fh, err = os.Open("anime-titles.dat"); err != nil {
22 type TestVector struct {
28 func TestFuzzySearch(T *testing.T) {
29 // Each vector goes one step deeper in the fuzzy search stack
32 TestVector{Input: "\x00", Limit: -1, AIDs: []int{}},
34 TestVector{Input: "SAC2", Limit: 1, AIDs: []int{1176}},
35 // exact, but in hungarian!
36 TestVector{Input: "Varázslatos álmok", Limit: -1, AIDs: []int{235}},
38 TestVector{Input: "Varázslatos", Limit: 3, AIDs: []int{235, 2152, 2538}},
40 TestVector{Input: "A rózsa ígérete", Limit: -1, AIDs: []int{2152}},
42 TestVector{Input: "Stand Alone", Limit: 1, AIDs: []int{247}},
44 TestVector{Input: "Ghost in t", Limit: 1, AIDs: []int{61}},
46 TestVector{Input: "flowne", Limit: 1, AIDs: []int{184}},
47 // words, first word first in name
48 TestVector{Input: "Kumo Mukou", Limit: -1, AIDs: []int{469}},
49 // words, last word last in name
50 TestVector{Input: "A titka", Limit: 1, AIDs: []int{303}},
51 // words, infix but not contiguous
52 TestVector{Input: "Kidoutai 2nd", Limit: 1, AIDs: []int{1176}},
53 // strings, first string first in name
54 TestVector{Input: "Kouka Kidou", Limit: 1, AIDs: []int{61}},
55 // strings, last string last in name
56 TestVector{Input: "app Princess", Limit: 1, AIDs: []int{640}},
57 // strings, anywhere in this order
58 TestVector{Input: "ouka douta", Limit: 2, AIDs: []int{61, 247}},
60 TestVector{Input: "", Limit: 1, AIDs: []int{1}},
63 for i, v := range vec {
64 res := db.FuzzySearch(v.Input).ResultsByAID()
65 if v.Limit > 0 && len(res) > v.Limit {
70 if len(v.AIDs) != len(res) {
73 for j, r := range res {
74 if v.AIDs[j] != r.AID {
81 list := make([]string, 0, len(res))
82 for _, r := range res {
83 list = append(list, fmt.Sprintf("%d (%s)", r.AID, r.PrimaryTitle))
85 T.Errorf("Vector #%d: Expected AID list %v, got AID list %v", i+1, v.AIDs, list)
90 func TestFuzzySearchFold(T *testing.T) {
91 // Same vector as the previous one, but with disturbed word cases
94 TestVector{Input: "sac2", Limit: 1, AIDs: []int{1176}},
95 // exact, but in hungarian!
96 TestVector{Input: "VarÁzslatos Álmok", Limit: -1, AIDs: []int{235}},
98 TestVector{Input: "varázslatos", Limit: 3, AIDs: []int{235, 2152, 2538}},
100 TestVector{Input: "a rÓzsa ígérete", Limit: -1, AIDs: []int{2152}},
102 TestVector{Input: "Stand Alone", Limit: 1, AIDs: []int{247}},
104 TestVector{Input: "ghost in t", Limit: 1, AIDs: []int{61}},
106 TestVector{Input: "FlownE", Limit: 1, AIDs: []int{184}},
107 // words, first word first in name
108 TestVector{Input: "kumo mukou", Limit: -1, AIDs: []int{469}},
109 // words, last word last in name
110 TestVector{Input: "a titka", Limit: -1, AIDs: []int{303}},
111 // words, infix but not contiguous
112 TestVector{Input: "kidoutai 2nd", Limit: 1, AIDs: []int{1176}},
113 // strings, first string first in name
114 TestVector{Input: "Kouka kidou", Limit: 1, AIDs: []int{61}},
115 // strings, last string last in name
116 TestVector{Input: "app princess", Limit: 1, AIDs: []int{640}},
117 // strings, anywhere in this order
118 TestVector{Input: "Ouka Douta", Limit: 2, AIDs: []int{61, 247}},
120 TestVector{Input: "\x00", Limit: -1, AIDs: []int{}},
123 for i, v := range vec {
124 res := db.FuzzySearchFold(v.Input).ResultsByAID()
125 if v.Limit > 0 && len(res) > v.Limit {
130 if len(v.AIDs) != len(res) {
133 for j, r := range res {
134 if v.AIDs[j] != r.AID {
141 list := make([]string, 0, len(res))
142 for _, r := range res {
143 list = append(list, fmt.Sprintf("%d (%s)", r.AID, r.PrimaryTitle))
145 T.Errorf("Vector #%d: Expected AID list %v, got AID list %v", i+1, v.AIDs, list)
150 // exact match of primary title
151 func BenchmarkFuzzySearch_bestCase(B *testing.B) {
152 // grep '|1|' anime-titles.dat | cut -d'|' -f4 | sort -R | sed 's/\(.*\)/"\1",/' | \
155 "Shin Tennis no Ouji-sama", "Shimai Ningyou", "Aniyome",
156 "Dragon Ball Z: Kyokugen Battle!! Sandai Super Saiyajin", "Uchuu Kuubo Blue Noah",
157 "Hotaru no Haka", "First Kiss Story: Kiss Kara Hajimaru Monogatari", "Seikai no Senki III",
158 "Ikkitousen: Xtreme Xecutor", "Houkago Ren`ai Club: Koi no Etude",
159 "DNA2: Dokoka de Nakushita Aitsu no Aitsu (1995)", "Bamboo Blade", "Accelerando",
160 "Soukyuu no Fafner: Dead Aggressor", "Eiga Futari wa Precure Max Heart",
161 "Kyoufu no Kyou-chan", "Shin Taketori Monogatari: 1000-nen Joou", "Fresh Precure!",
162 "Grope: Yami no Naka no Kotori-tachi", "Seitokai Yakuindomo", "Chikyuu Shoujo Arjuna",
163 "Choukou Tenshi Escalayer", "Dragon Ball Kai", "Dragon League", "Hatsukoi Limited",
164 "Sexfriend", "Ao no Exorcist", "Futatsu no Spica", "Adesugata Mahou no Sannin Musume",
165 "Yawara! A Fashionable Judo Girl",
169 for i := 0; i < B.N; i++ {
170 db.FuzzySearch(vec[i%len(vec)])
174 // // exact match of x-jat, en or ja non-primary title
175 // func BenchmarkFuzzySearch_secondBestCase(B *testing.B) {
176 // // grep -E '\|3\|(x-jat|en|ja)\|' anime-titles.dat | cut -d'|' -f4 | sort -R | \
177 // // sed 's/\(.*\)/"\1",/' | head -n 30
179 // "yosusora", "heartcatch", "chuunibyou", "Stringendo", "おれいも", "yamato 2199",
180 // "mai otome zwei", "cg r1", "harem", "Dorvack", "Natsume 1", "SMJA", "SM", "J2",
181 // "amstv2", "BJ Movie (2005)", "munto2", "nyc", "MT", "DBZ Movie 2",
182 // "Zatch Bell Movie 2", "Armitage", "J0ker", "CH", "sugar", "vga", "Nadesico",
183 // "dgc nyo", "setv", "D.g", "マジプリ", "myyour", "Haruhi 2009", "bantorra", "yamato2",
184 // "bakuhan", "vk2", "BBB", "5-2", "GSD SE III", "akasaka", "GS SE II", "F3", "おれつば",
185 // "sencolle", "wellber", "SailorMoon", "ay", "HCPC", "kxstv", "Shana III",
189 // for i := 0; i < B.N; i++ {
190 // db.FuzzySearch(vec[i%len(vec)])
194 // // exact match of non-primary title in any other language
195 // func BenchmarkFuzzySearch_thirdBestCase(B *testing.B) {
196 // // grep '|2|' anime-titles.dat | grep -Ev '(x-jat|en|ja)' | cut -d'|' -f4 | \
197 // // sort -R | sed 's/\(.*\)/"\1",/' | head -n 30
199 // "Зірка☆Щастя", "La ilusión de triunfar", "La scomparsa di Haruhi Suzumiya",
200 // "Код Геас: Бунтът на Люлюш 2", "我的女神 剧场版", "Lamu - Un rêve sans fin",
201 // "Lupin III: La cospirazione dei Fuma", "Адовая Девочка дубль 2", "夏娃的时间",
202 // "Дівчинка, що стрибала крізь всесвіт", "Мій сусід Тоторо", "机巧魔神",
203 // "City Hunter - Flash spécial !? La mort de Ryo Saeba", "Ateştopu", "مسدس×سيف",
204 // "Gli amici animali", "沉默的未知", "忧伤大人二之宫", "Пита-Тен", "Глава-гора", "高校龍中龍",
205 // "Яблочное зернышко (фильм второй)", "پروکسی مابعد", "青之花", "Heidi, la fille des Alpes",
206 // "银盘万花筒", "Temi d`amore tra i banchi di scuola", "Съюзът на Среброкрилите", "Аякаши",
207 // "Дух в оболонці: комплекс окремості", "贫乏姊妹物语", "La rose de Versailles",
208 // "แฮปปี้ เลสซั่น", "Juodasis Dievas", "Ерата Сенгоку: Последното парти",
209 // "Белина: Чезнеща в тъмнината", "Пламенный лабиринт", "Капризный Робот", "Kovboy Bebop: Film",
210 // "Bavel`in Kitabı", "东京魔人学院剑风帖 龙龙", "سكول رمبل الفصل الثاني", "青之驱魔师", "سايكانو",
211 // "神的记事本", "死神的歌谣", "Angel e a Flor de Sete Cores", "ماگی: هزارتوی جادو", "Spirală",
212 // "Chié la petite peste",
216 // for i := 0; i < B.N; i++ {
217 // db.FuzzySearch(vec[i%len(vec)])
221 // match of initial words
222 func BenchmarkFuzzySearch_initialWords(B *testing.B) {
223 // cat anime-titles.dat | cut -d'|' -f4 | grep -E '[^ ]+ [^ ]+ [^ ]+' | \
224 // sort -R | cut -d' ' -f1,2 | sed 's/\(.*\)/"\1",/' | head -n 30
226 "To Love", "Utawarerumono -", "Eden of", "D.C.if ~ダ・カーポ", "Вечност над",
227 "Rupan Sansei:", "Los Caballeros", "Neko Hiki", "LoGH: A", "Arcadia of",
228 "Pokémon 4Ever:", "Lenda Lunar", "Transformers: Master", "Tάρο, ο", "El Puño",
229 "El taxi", "Lupin the", "Ah! My", "Le journal", "Odin: Koushi", "Amazing-man: The",
230 "Legend of", "Youka no", "Я люблю", "Abe George", "Sisters of", "Ouran High",
231 "Batman: Gotham", "Dantalian no", "Koi to", "Night Shift",
235 for i := 0; i < B.N; i++ {
236 db.FuzzySearch(vec[i%len(vec)])
240 // match of final words
241 func BenchmarkFuzzySearch_finalWords(B *testing.B) {
242 // cat anime-titles.dat | cut -d'|' -f4 | grep -E '^[^ ]+ [^ ]+ [^ ]+ [^ ]+$' | \
243 // sort -R | cut -d' ' -f3,4 | sed 's/\(.*\)/"\1",/' | head -n 30
245 "do Zodíaco", "Formula 91", "Shuto Houkai", "Deadly Sins", "gui lai",
246 "muistoja tulevaisuudesta", "Mission 1-3", "スペシャルエディションII それぞれの剣", "Một Giây",
247 "Meia-Lua Acima", "Mighty: Decode", "To Screw", "do Tênis", "(Duke Fleed)", "Olympic Taikai",
248 "Драма ангелов", "Shihosha Judge", "демонов Йоко", "Shoujo Club", "Family (2)", "do Tesouro",
249 "Witte Leeuw", "von Mandraguar", "Jin Xia", "Tabi Movie", "Symphonia 2", "no Tenkousei",
250 "Movie (2011)", "Guardian Signs", "Você 2",
254 for i := 0; i < B.N; i++ {
255 db.FuzzySearch(vec[i%len(vec)])
259 // XXX: This is somehow the most time-consuming case, despite terminating several
260 // regular expressions earlier than the next two benchmarks.
262 // All regular expressions checked here (besides the .*-peppered one for initial condidate search)
263 // have no metacharacters at all besides the trivial \A and \z; while the ones for the following
264 // cases include more complicated grouped expressions...
265 func BenchmarkFuzzySearch_infixWords(B *testing.B) {
266 // cat anime-titles.dat | cut -d'|' -f4 | grep -E '^[^ ]+ [^ ]+ [^ ]+ [^ ]+$' | \
267 // sort -R | cut -d' ' -f2,3 | sed 's/\(.*\)/"\1",/' | head -n 30
269 "Yes! プリキュア5GoGo!", "Grime X-Rated", "Diễn Ngàn", "Super-Refined Ninja",
270 "o Haita", "Conan: 14.", "the Seagulls", "009 Kaijuu", "Monogatari Daini-hen:",
271 "no Haha", "по Ловец", "Centimeters per", "wang gui", "the Wandering", "Saru Kani",
272 "Dark Red", "Pair: Project", "Охотник на", "trois petits", "of Teacher", "wa Suitai",
273 "Lolita Fantasy", "εκατοστά το", "Eri-sama Katsudou", "希望の学園と絶望の高校生 The",
274 "Comet SPT", "HUNTER スペシャル", "no Makemono", "Kızı: İkinci", "Pirate Captain",
278 for i := 0; i < B.N; i++ {
279 db.FuzzySearch(vec[i%len(vec)])
283 func BenchmarkFuzzySearch_alternatingWords(B *testing.B) {
284 // cat anime-titles.dat | cut -d'|' -f4 | grep -E '^[^ ]+ [^ ]+ [^ ]+ [^ ]+ [^ ]+$' | \
285 // sort -R | cut -d' ' -f2,4 | sed 's/\(.*\)/"\1",/' | head -n 30
287 "of Millennium", "Kreuz: und", "для Літнє", "Saikyou Deshi", "Hearts: no", "Roh Wolf",
288 "III: Columbus", "Shin-chan Film", "Ball Superandroid", "恋のステージ=HEART FIRE!",
289 "Disease Moon", "Corps Mecha", "BLOOD-C Last", "- trésor", "Lover a", "dievčati, preskočilo",
290 "Star: Szomorú", "Ai Marchen", "Kishin &", "Seiya: Goddess", "Orange Shiroi", "Punch Sekai:",
291 "No.1: no", "ο του", "プリキュアオールスターズ Stage", "Ankoku Hakai", "8-ма по", "II Ultimate",
292 "Tenma Kuro", "Grade Kakusei",
296 for i := 0; i < B.N; i++ {
297 db.FuzzySearch(vec[i%len(vec)])
301 func BenchmarkFuzzySearch_worstCase(B *testing.B) {
302 // cat anime-titles.dat | cut -d'|' -f4 | \
304 // -pe'chomp; $_ = encode_utf8(substr(decode_utf8($_), 1, -1) . "\n")' | \
305 // sort -R | sed 's/\(.*\)/"\1",/' | head -n 30
306 // further perturbed by hand
308 "ig ray S in han: Den tsu o Yob Amig",
309 "ar Ben th Sea: 20.00 Mil for Lov",
315 "aji no ppo: pion Roa",
320 "aint : Ο Χαμέ μβάς - Μυθολογία Άδ",
321 "as Camarer s Mágica",
323 "RAG BALL SODE of BAR",
324 "ero eroppi no ken: Pink no",
325 "acre east chin Cyg",
332 "2 sk sbrutna pojkar äventyrens",
340 for i := 0; i < B.N; i++ {
341 db.FuzzySearch(vec[i%len(vec)])