5 "github.com/Kovensky/go-anidb/titles"
10 var db = &titles.TitlesDatabase{}
13 if fh, err := os.Open("anime-titles.dat.gz"); err == nil {
15 } else if fh, err = os.Open("anime-titles.dat"); err == nil {
22 type TestVector struct {
28 func TestDB(T *testing.T) {
29 if db.UpdateTime.IsZero() {
30 T.Error("Error reading anime-titles database")
34 func TestFuzzySearch(T *testing.T) {
35 // Each vector goes one step deeper in the fuzzy search stack
38 TestVector{Input: "\x00", Limit: -1, AIDs: []titles.AID{}},
40 TestVector{Input: "SAC2", Limit: 1, AIDs: []titles.AID{1176}},
41 // exact, but in hungarian!
42 TestVector{Input: "Varázslatos álmok", Limit: -1, AIDs: []titles.AID{235}},
44 TestVector{Input: "Varázslatos", Limit: 3, AIDs: []titles.AID{235, 2152, 2538}},
46 TestVector{Input: "A rózsa ígérete", Limit: -1, AIDs: []titles.AID{2152}},
48 TestVector{Input: "Stand Alone", Limit: 1, AIDs: []titles.AID{247}},
50 TestVector{Input: "Ghost in t", Limit: 1, AIDs: []titles.AID{61}},
52 TestVector{Input: "flowne", Limit: 1, AIDs: []titles.AID{184}},
53 // words, first word first in name
54 TestVector{Input: "Kumo Mukou", Limit: -1, AIDs: []titles.AID{469}},
55 // words, last word last in name
56 TestVector{Input: "A titka", Limit: 1, AIDs: []titles.AID{303}},
57 // words, infix but not contiguous
58 TestVector{Input: "Kidoutai 2nd", Limit: 1, AIDs: []titles.AID{1176}},
59 // strings, first string first in name
60 TestVector{Input: "Kouka Kidou", Limit: 1, AIDs: []titles.AID{61}},
61 // strings, last string last in name
62 TestVector{Input: "app Princess", Limit: 1, AIDs: []titles.AID{640}},
63 // strings, anywhere in this order
64 TestVector{Input: "ouka douta", Limit: 2, AIDs: []titles.AID{61, 247}},
66 TestVector{Input: "", Limit: 1, AIDs: []titles.AID{1}},
69 for i, v := range vec {
70 res := db.FuzzySearch(v.Input).ResultsByAID()
71 if v.Limit > 0 && len(res) > v.Limit {
76 if len(v.AIDs) != len(res) {
79 for j, r := range res {
80 if v.AIDs[j] != r.AID {
87 list := make([]string, 0, len(res))
88 for _, r := range res {
89 list = append(list, fmt.Sprintf("%d (%s)", r.AID, r.PrimaryTitle))
91 T.Errorf("Vector #%d: Expected AID list %v, got AID list %v", i+1, v.AIDs, list)
96 func TestFuzzySearchFold(T *testing.T) {
97 // Same vector as the previous one, but with disturbed word cases
100 TestVector{Input: "sac2", Limit: 1, AIDs: []titles.AID{1176}},
101 // exact, but in hungarian!
102 TestVector{Input: "VarÁzslatos Álmok", Limit: -1, AIDs: []titles.AID{235}},
104 TestVector{Input: "varázslatos", Limit: 3, AIDs: []titles.AID{235, 2152, 2538}},
106 TestVector{Input: "a rÓzsa ígérete", Limit: -1, AIDs: []titles.AID{2152}},
108 TestVector{Input: "Stand Alone", Limit: 1, AIDs: []titles.AID{247}},
110 TestVector{Input: "ghost in t", Limit: 1, AIDs: []titles.AID{61}},
112 TestVector{Input: "FlownE", Limit: 1, AIDs: []titles.AID{184}},
113 // words, first word first in name
114 TestVector{Input: "kumo mukou", Limit: -1, AIDs: []titles.AID{469}},
115 // words, last word last in name
116 TestVector{Input: "a titka", Limit: -1, AIDs: []titles.AID{303}},
117 // words, infix but not contiguous
118 TestVector{Input: "kidoutai 2nd", Limit: 1, AIDs: []titles.AID{1176}},
119 // strings, first string first in name
120 TestVector{Input: "Kouka kidou", Limit: 1, AIDs: []titles.AID{61}},
121 // strings, last string last in name
122 TestVector{Input: "app princess", Limit: 1, AIDs: []titles.AID{640}},
123 // strings, anywhere in this order
124 TestVector{Input: "Ouka Douta", Limit: 2, AIDs: []titles.AID{61, 247}},
126 TestVector{Input: "\x00", Limit: -1, AIDs: []titles.AID{}},
129 for i, v := range vec {
130 res := db.FuzzySearchFold(v.Input).ResultsByAID()
131 if v.Limit > 0 && len(res) > v.Limit {
136 if len(v.AIDs) != len(res) {
139 for j, r := range res {
140 if v.AIDs[j] != r.AID {
147 list := make([]string, 0, len(res))
148 for _, r := range res {
149 list = append(list, fmt.Sprintf("%d (%s)", r.AID, r.PrimaryTitle))
151 T.Errorf("Vector #%d: Expected AID list %v, got AID list %v", i+1, v.AIDs, list)
156 // exact match of primary title
157 func BenchmarkFuzzySearch_bestCase(B *testing.B) {
158 // grep '|1|' anime-titles.dat | cut -d'|' -f4 | sort -R | sed 's/\(.*\)/"\1",/' | \
161 "Shin Tennis no Ouji-sama", "Shimai Ningyou", "Aniyome",
162 "Dragon Ball Z: Kyokugen Battle!! Sandai Super Saiyajin", "Uchuu Kuubo Blue Noah",
163 "Hotaru no Haka", "First Kiss Story: Kiss Kara Hajimaru Monogatari", "Seikai no Senki III",
164 "Ikkitousen: Xtreme Xecutor", "Houkago Ren`ai Club: Koi no Etude",
165 "DNA2: Dokoka de Nakushita Aitsu no Aitsu (1995)", "Bamboo Blade", "Accelerando",
166 "Soukyuu no Fafner: Dead Aggressor", "Eiga Futari wa Precure Max Heart",
167 "Kyoufu no Kyou-chan", "Shin Taketori Monogatari: 1000-nen Joou", "Fresh Precure!",
168 "Grope: Yami no Naka no Kotori-tachi", "Seitokai Yakuindomo", "Chikyuu Shoujo Arjuna",
169 "Choukou Tenshi Escalayer", "Dragon Ball Kai", "Dragon League", "Hatsukoi Limited",
170 "Sexfriend", "Ao no Exorcist", "Futatsu no Spica", "Adesugata Mahou no Sannin Musume",
171 "Yawara! A Fashionable Judo Girl",
175 for i := 0; i < B.N; i++ {
176 db.FuzzySearch(vec[i%len(vec)])
180 // // exact match of x-jat, en or ja non-primary title
181 // func BenchmarkFuzzySearch_secondBestCase(B *testing.B) {
182 // // grep -E '\|3\|(x-jat|en|ja)\|' anime-titles.dat | cut -d'|' -f4 | sort -R | \
183 // // sed 's/\(.*\)/"\1",/' | head -n 30
185 // "yosusora", "heartcatch", "chuunibyou", "Stringendo", "おれいも", "yamato 2199",
186 // "mai otome zwei", "cg r1", "harem", "Dorvack", "Natsume 1", "SMJA", "SM", "J2",
187 // "amstv2", "BJ Movie (2005)", "munto2", "nyc", "MT", "DBZ Movie 2",
188 // "Zatch Bell Movie 2", "Armitage", "J0ker", "CH", "sugar", "vga", "Nadesico",
189 // "dgc nyo", "setv", "D.g", "マジプリ", "myyour", "Haruhi 2009", "bantorra", "yamato2",
190 // "bakuhan", "vk2", "BBB", "5-2", "GSD SE III", "akasaka", "GS SE II", "F3", "おれつば",
191 // "sencolle", "wellber", "SailorMoon", "ay", "HCPC", "kxstv", "Shana III",
195 // for i := 0; i < B.N; i++ {
196 // db.FuzzySearch(vec[i%len(vec)])
200 // // exact match of non-primary title in any other language
201 // func BenchmarkFuzzySearch_thirdBestCase(B *testing.B) {
202 // // grep '|2|' anime-titles.dat | grep -Ev '(x-jat|en|ja)' | cut -d'|' -f4 | \
203 // // sort -R | sed 's/\(.*\)/"\1",/' | head -n 30
205 // "Зірка☆Щастя", "La ilusión de triunfar", "La scomparsa di Haruhi Suzumiya",
206 // "Код Геас: Бунтът на Люлюш 2", "我的女神 剧场版", "Lamu - Un rêve sans fin",
207 // "Lupin III: La cospirazione dei Fuma", "Адовая Девочка дубль 2", "夏娃的时间",
208 // "Дівчинка, що стрибала крізь всесвіт", "Мій сусід Тоторо", "机巧魔神",
209 // "City Hunter - Flash spécial !? La mort de Ryo Saeba", "Ateştopu", "مسدس×سيف",
210 // "Gli amici animali", "沉默的未知", "忧伤大人二之宫", "Пита-Тен", "Глава-гора", "高校龍中龍",
211 // "Яблочное зернышко (фильм второй)", "پروکسی مابعد", "青之花", "Heidi, la fille des Alpes",
212 // "银盘万花筒", "Temi d`amore tra i banchi di scuola", "Съюзът на Среброкрилите", "Аякаши",
213 // "Дух в оболонці: комплекс окремості", "贫乏姊妹物语", "La rose de Versailles",
214 // "แฮปปี้ เลสซั่น", "Juodasis Dievas", "Ерата Сенгоку: Последното парти",
215 // "Белина: Чезнеща в тъмнината", "Пламенный лабиринт", "Капризный Робот", "Kovboy Bebop: Film",
216 // "Bavel`in Kitabı", "东京魔人学院剑风帖 龙龙", "سكول رمبل الفصل الثاني", "青之驱魔师", "سايكانو",
217 // "神的记事本", "死神的歌谣", "Angel e a Flor de Sete Cores", "ماگی: هزارتوی جادو", "Spirală",
218 // "Chié la petite peste",
222 // for i := 0; i < B.N; i++ {
223 // db.FuzzySearch(vec[i%len(vec)])
227 // match of initial words
228 func BenchmarkFuzzySearch_initialWords(B *testing.B) {
229 // cat anime-titles.dat | cut -d'|' -f4 | grep -E '[^ ]+ [^ ]+ [^ ]+' | \
230 // sort -R | cut -d' ' -f1,2 | sed 's/\(.*\)/"\1",/' | head -n 30
232 "To Love", "Utawarerumono -", "Eden of", "D.C.if ~ダ・カーポ", "Вечност над",
233 "Rupan Sansei:", "Los Caballeros", "Neko Hiki", "LoGH: A", "Arcadia of",
234 "Pokémon 4Ever:", "Lenda Lunar", "Transformers: Master", "Tάρο, ο", "El Puño",
235 "El taxi", "Lupin the", "Ah! My", "Le journal", "Odin: Koushi", "Amazing-man: The",
236 "Legend of", "Youka no", "Я люблю", "Abe George", "Sisters of", "Ouran High",
237 "Batman: Gotham", "Dantalian no", "Koi to", "Night Shift",
241 for i := 0; i < B.N; i++ {
242 db.FuzzySearch(vec[i%len(vec)])
246 // match of final words
247 func BenchmarkFuzzySearch_finalWords(B *testing.B) {
248 // cat anime-titles.dat | cut -d'|' -f4 | grep -E '^[^ ]+ [^ ]+ [^ ]+ [^ ]+$' | \
249 // sort -R | cut -d' ' -f3,4 | sed 's/\(.*\)/"\1",/' | head -n 30
251 "do Zodíaco", "Formula 91", "Shuto Houkai", "Deadly Sins", "gui lai",
252 "muistoja tulevaisuudesta", "Mission 1-3", "スペシャルエディションII それぞれの剣", "Một Giây",
253 "Meia-Lua Acima", "Mighty: Decode", "To Screw", "do Tênis", "(Duke Fleed)", "Olympic Taikai",
254 "Драма ангелов", "Shihosha Judge", "демонов Йоко", "Shoujo Club", "Family (2)", "do Tesouro",
255 "Witte Leeuw", "von Mandraguar", "Jin Xia", "Tabi Movie", "Symphonia 2", "no Tenkousei",
256 "Movie (2011)", "Guardian Signs", "Você 2",
260 for i := 0; i < B.N; i++ {
261 db.FuzzySearch(vec[i%len(vec)])
265 // XXX: This is somehow the most time-consuming case, despite terminating several
266 // regular expressions earlier than the next two benchmarks.
268 // All regular expressions checked here (besides the .*-peppered one for initial condidate search)
269 // have no metacharacters at all besides the trivial \A and \z; while the ones for the following
270 // cases include more complicated grouped expressions...
271 func BenchmarkFuzzySearch_infixWords(B *testing.B) {
272 // cat anime-titles.dat | cut -d'|' -f4 | grep -E '^[^ ]+ [^ ]+ [^ ]+ [^ ]+$' | \
273 // sort -R | cut -d' ' -f2,3 | sed 's/\(.*\)/"\1",/' | head -n 30
275 "Yes! プリキュア5GoGo!", "Grime X-Rated", "Diễn Ngàn", "Super-Refined Ninja",
276 "o Haita", "Conan: 14.", "the Seagulls", "009 Kaijuu", "Monogatari Daini-hen:",
277 "no Haha", "по Ловец", "Centimeters per", "wang gui", "the Wandering", "Saru Kani",
278 "Dark Red", "Pair: Project", "Охотник на", "trois petits", "of Teacher", "wa Suitai",
279 "Lolita Fantasy", "εκατοστά το", "Eri-sama Katsudou", "希望の学園と絶望の高校生 The",
280 "Comet SPT", "HUNTER スペシャル", "no Makemono", "Kızı: İkinci", "Pirate Captain",
284 for i := 0; i < B.N; i++ {
285 db.FuzzySearch(vec[i%len(vec)])
289 func BenchmarkFuzzySearch_alternatingWords(B *testing.B) {
290 // cat anime-titles.dat | cut -d'|' -f4 | grep -E '^[^ ]+ [^ ]+ [^ ]+ [^ ]+ [^ ]+$' | \
291 // sort -R | cut -d' ' -f2,4 | sed 's/\(.*\)/"\1",/' | head -n 30
293 "of Millennium", "Kreuz: und", "для Літнє", "Saikyou Deshi", "Hearts: no", "Roh Wolf",
294 "III: Columbus", "Shin-chan Film", "Ball Superandroid", "恋のステージ=HEART FIRE!",
295 "Disease Moon", "Corps Mecha", "BLOOD-C Last", "- trésor", "Lover a", "dievčati, preskočilo",
296 "Star: Szomorú", "Ai Marchen", "Kishin &", "Seiya: Goddess", "Orange Shiroi", "Punch Sekai:",
297 "No.1: no", "ο του", "プリキュアオールスターズ Stage", "Ankoku Hakai", "8-ма по", "II Ultimate",
298 "Tenma Kuro", "Grade Kakusei",
302 for i := 0; i < B.N; i++ {
303 db.FuzzySearch(vec[i%len(vec)])
307 func BenchmarkFuzzySearch_worstCase(B *testing.B) {
308 // cat anime-titles.dat | cut -d'|' -f4 | \
310 // -pe'chomp; $_ = encode_utf8(substr(decode_utf8($_), 1, -1) . "\n")' | \
311 // sort -R | sed 's/\(.*\)/"\1",/' | head -n 30
312 // further perturbed by hand
314 "ig ray S in han: Den tsu o Yob Amig",
315 "ar Ben th Sea: 20.00 Mil for Lov",
321 "aji no ppo: pion Roa",
326 "aint : Ο Χαμέ μβάς - Μυθολογία Άδ",
327 "as Camarer s Mágica",
329 "RAG BALL SODE of BAR",
330 "ero eroppi no ken: Pink no",
331 "acre east chin Cyg",
338 "2 sk sbrutna pojkar äventyrens",
346 for i := 0; i < B.N; i++ {
347 db.FuzzySearch(vec[i%len(vec)])