]> git.lizzy.rs Git - rust.git/blob - library/core/tests/pattern.rs
Auto merge of #98914 - fee1-dead-contrib:min-deref-patterns, r=compiler-errors
[rust.git] / library / core / tests / pattern.rs
1 use std::str::pattern::*;
2
3 // This macro makes it easier to write
4 // tests that do a series of iterations
5 macro_rules! search_asserts {
6     ($haystack:expr, $needle:expr, $testname:expr, [$($func:ident),*], $result:expr) => {
7         let mut searcher = $needle.into_searcher($haystack);
8         let arr = [$( Step::from(searcher.$func()) ),*];
9         assert_eq!(&arr[..], &$result, $testname);
10     }
11 }
12
13 /// Combined enum for the results of next() and next_match()/next_reject()
14 #[derive(Debug, PartialEq, Eq)]
15 enum Step {
16     // variant names purposely chosen to
17     // be the same length for easy alignment
18     Matches(usize, usize),
19     Rejects(usize, usize),
20     InRange(usize, usize),
21     Done,
22 }
23
24 use self::Step::*;
25
26 impl From<SearchStep> for Step {
27     fn from(x: SearchStep) -> Self {
28         match x {
29             SearchStep::Match(a, b) => Matches(a, b),
30             SearchStep::Reject(a, b) => Rejects(a, b),
31             SearchStep::Done => Done,
32         }
33     }
34 }
35
36 impl From<Option<(usize, usize)>> for Step {
37     fn from(x: Option<(usize, usize)>) -> Self {
38         match x {
39             Some((a, b)) => InRange(a, b),
40             None => Done,
41         }
42     }
43 }
44
45 // FIXME(Manishearth) these tests focus on single-character searching  (CharSearcher)
46 // and on next()/next_match(), not next_reject(). This is because
47 // the memchr changes make next_match() for single chars complex, but next_reject()
48 // continues to use next() under the hood. We should add more test cases for all
49 // of these, as well as tests for StrSearcher and higher level tests for str::find() (etc)
50
51 #[test]
52 fn test_simple_iteration() {
53     search_asserts!(
54         "abcdeabcd",
55         'a',
56         "forward iteration for ASCII string",
57         // a            b              c              d              e              a              b              c              d              EOF
58         [next, next, next, next, next, next, next, next, next, next],
59         [
60             Matches(0, 1),
61             Rejects(1, 2),
62             Rejects(2, 3),
63             Rejects(3, 4),
64             Rejects(4, 5),
65             Matches(5, 6),
66             Rejects(6, 7),
67             Rejects(7, 8),
68             Rejects(8, 9),
69             Done
70         ]
71     );
72
73     search_asserts!(
74         "abcdeabcd",
75         'a',
76         "reverse iteration for ASCII string",
77         // d            c              b              a            e                d              c              b              a             EOF
78         [
79             next_back, next_back, next_back, next_back, next_back, next_back, next_back, next_back,
80             next_back, next_back
81         ],
82         [
83             Rejects(8, 9),
84             Rejects(7, 8),
85             Rejects(6, 7),
86             Matches(5, 6),
87             Rejects(4, 5),
88             Rejects(3, 4),
89             Rejects(2, 3),
90             Rejects(1, 2),
91             Matches(0, 1),
92             Done
93         ]
94     );
95
96     search_asserts!(
97         "我爱我的猫",
98         '我',
99         "forward iteration for Chinese string",
100         // 我           愛             我             的              貓               EOF
101         [next, next, next, next, next, next],
102         [Matches(0, 3), Rejects(3, 6), Matches(6, 9), Rejects(9, 12), Rejects(12, 15), Done]
103     );
104
105     search_asserts!(
106         "我的猫说meow",
107         'm',
108         "forward iteration for mixed string",
109         // 我           的             猫             说              m                e                o                w                EOF
110         [next, next, next, next, next, next, next, next, next],
111         [
112             Rejects(0, 3),
113             Rejects(3, 6),
114             Rejects(6, 9),
115             Rejects(9, 12),
116             Matches(12, 13),
117             Rejects(13, 14),
118             Rejects(14, 15),
119             Rejects(15, 16),
120             Done
121         ]
122     );
123
124     search_asserts!(
125         "我的猫说meow",
126         '猫',
127         "reverse iteration for mixed string",
128         // w             o                 e                m                说              猫             的             我             EOF
129         [
130             next_back, next_back, next_back, next_back, next_back, next_back, next_back, next_back,
131             next_back
132         ],
133         [
134             Rejects(15, 16),
135             Rejects(14, 15),
136             Rejects(13, 14),
137             Rejects(12, 13),
138             Rejects(9, 12),
139             Matches(6, 9),
140             Rejects(3, 6),
141             Rejects(0, 3),
142             Done
143         ]
144     );
145 }
146
147 #[test]
148 fn test_simple_search() {
149     search_asserts!(
150         "abcdeabcdeabcde",
151         'a',
152         "next_match for ASCII string",
153         [next_match, next_match, next_match, next_match],
154         [InRange(0, 1), InRange(5, 6), InRange(10, 11), Done]
155     );
156
157     search_asserts!(
158         "abcdeabcdeabcde",
159         'a',
160         "next_match_back for ASCII string",
161         [next_match_back, next_match_back, next_match_back, next_match_back],
162         [InRange(10, 11), InRange(5, 6), InRange(0, 1), Done]
163     );
164
165     search_asserts!(
166         "abcdeab",
167         'a',
168         "next_reject for ASCII string",
169         [next_reject, next_reject, next_match, next_reject, next_reject],
170         [InRange(1, 2), InRange(2, 3), InRange(5, 6), InRange(6, 7), Done]
171     );
172
173     search_asserts!(
174         "abcdeabcdeabcde",
175         'a',
176         "next_reject_back for ASCII string",
177         [
178             next_reject_back,
179             next_reject_back,
180             next_match_back,
181             next_reject_back,
182             next_reject_back,
183             next_reject_back
184         ],
185         [
186             InRange(14, 15),
187             InRange(13, 14),
188             InRange(10, 11),
189             InRange(9, 10),
190             InRange(8, 9),
191             InRange(7, 8)
192         ]
193     );
194 }
195
196 // Á, 각, ก, 😀 all end in 0x81
197 // 🁀, ᘀ do not end in 0x81 but contain the byte
198 // ꁁ has 0x81 as its second and third bytes.
199 //
200 // The memchr-using implementation of next_match
201 // and next_match_back temporarily violate
202 // the property that the search is always on a unicode boundary,
203 // which is fine as long as this never reaches next() or next_back().
204 // So we test if next() is correct after each next_match() as well.
205 const STRESS: &str = "Áa🁀bÁꁁfg😁각กᘀ각aÁ각ꁁก😁a";
206
207 #[test]
208 fn test_stress_indices() {
209     // this isn't really a test, more of documentation on the indices of each character in the stresstest string
210
211     search_asserts!(
212         STRESS,
213         'x',
214         "Indices of characters in stress test",
215         [
216             next, next, next, next, next, next, next, next, next, next, next, next, next, next,
217             next, next, next, next, next, next, next
218         ],
219         [
220             Rejects(0, 2),   // Á
221             Rejects(2, 3),   // a
222             Rejects(3, 7),   // 🁀
223             Rejects(7, 8),   // b
224             Rejects(8, 10),  // Á
225             Rejects(10, 13), // ꁁ
226             Rejects(13, 14), // f
227             Rejects(14, 15), // g
228             Rejects(15, 19), // 😀
229             Rejects(19, 22), // 각
230             Rejects(22, 25), // ก
231             Rejects(25, 28), // ᘀ
232             Rejects(28, 31), // 각
233             Rejects(31, 32), // a
234             Rejects(32, 34), // Á
235             Rejects(34, 37), // 각
236             Rejects(37, 40), // ꁁ
237             Rejects(40, 43), // ก
238             Rejects(43, 47), // 😀
239             Rejects(47, 48), // a
240             Done
241         ]
242     );
243 }
244
245 #[test]
246 fn test_forward_search_shared_bytes() {
247     search_asserts!(
248         STRESS,
249         'Á',
250         "Forward search for two-byte Latin character",
251         [next_match, next_match, next_match, next_match],
252         [InRange(0, 2), InRange(8, 10), InRange(32, 34), Done]
253     );
254
255     search_asserts!(
256         STRESS,
257         'Á',
258         "Forward search for two-byte Latin character; check if next() still works",
259         [next_match, next, next_match, next, next_match, next, next_match],
260         [
261             InRange(0, 2),
262             Rejects(2, 3),
263             InRange(8, 10),
264             Rejects(10, 13),
265             InRange(32, 34),
266             Rejects(34, 37),
267             Done
268         ]
269     );
270
271     search_asserts!(
272         STRESS,
273         '각',
274         "Forward search for three-byte Hangul character",
275         [next_match, next, next_match, next_match, next_match],
276         [InRange(19, 22), Rejects(22, 25), InRange(28, 31), InRange(34, 37), Done]
277     );
278
279     search_asserts!(
280         STRESS,
281         '각',
282         "Forward search for three-byte Hangul character; check if next() still works",
283         [next_match, next, next_match, next, next_match, next, next_match],
284         [
285             InRange(19, 22),
286             Rejects(22, 25),
287             InRange(28, 31),
288             Rejects(31, 32),
289             InRange(34, 37),
290             Rejects(37, 40),
291             Done
292         ]
293     );
294
295     search_asserts!(
296         STRESS,
297         'ก',
298         "Forward search for three-byte Thai character",
299         [next_match, next, next_match, next, next_match],
300         [InRange(22, 25), Rejects(25, 28), InRange(40, 43), Rejects(43, 47), Done]
301     );
302
303     search_asserts!(
304         STRESS,
305         'ก',
306         "Forward search for three-byte Thai character; check if next() still works",
307         [next_match, next, next_match, next, next_match],
308         [InRange(22, 25), Rejects(25, 28), InRange(40, 43), Rejects(43, 47), Done]
309     );
310
311     search_asserts!(
312         STRESS,
313         '😁',
314         "Forward search for four-byte emoji",
315         [next_match, next, next_match, next, next_match],
316         [InRange(15, 19), Rejects(19, 22), InRange(43, 47), Rejects(47, 48), Done]
317     );
318
319     search_asserts!(
320         STRESS,
321         '😁',
322         "Forward search for four-byte emoji; check if next() still works",
323         [next_match, next, next_match, next, next_match],
324         [InRange(15, 19), Rejects(19, 22), InRange(43, 47), Rejects(47, 48), Done]
325     );
326
327     search_asserts!(
328         STRESS,
329         'ꁁ',
330         "Forward search for three-byte Yi character with repeated bytes",
331         [next_match, next, next_match, next, next_match],
332         [InRange(10, 13), Rejects(13, 14), InRange(37, 40), Rejects(40, 43), Done]
333     );
334
335     search_asserts!(
336         STRESS,
337         'ꁁ',
338         "Forward search for three-byte Yi character with repeated bytes; check if next() still works",
339         [next_match, next, next_match, next, next_match],
340         [InRange(10, 13), Rejects(13, 14), InRange(37, 40), Rejects(40, 43), Done]
341     );
342 }
343
344 #[test]
345 fn test_reverse_search_shared_bytes() {
346     search_asserts!(
347         STRESS,
348         'Á',
349         "Reverse search for two-byte Latin character",
350         [next_match_back, next_match_back, next_match_back, next_match_back],
351         [InRange(32, 34), InRange(8, 10), InRange(0, 2), Done]
352     );
353
354     search_asserts!(
355         STRESS,
356         'Á',
357         "Reverse search for two-byte Latin character; check if next_back() still works",
358         [next_match_back, next_back, next_match_back, next_back, next_match_back, next_back],
359         [InRange(32, 34), Rejects(31, 32), InRange(8, 10), Rejects(7, 8), InRange(0, 2), Done]
360     );
361
362     search_asserts!(
363         STRESS,
364         '각',
365         "Reverse search for three-byte Hangul character",
366         [next_match_back, next_back, next_match_back, next_match_back, next_match_back],
367         [InRange(34, 37), Rejects(32, 34), InRange(28, 31), InRange(19, 22), Done]
368     );
369
370     search_asserts!(
371         STRESS,
372         '각',
373         "Reverse search for three-byte Hangul character; check if next_back() still works",
374         [
375             next_match_back,
376             next_back,
377             next_match_back,
378             next_back,
379             next_match_back,
380             next_back,
381             next_match_back
382         ],
383         [
384             InRange(34, 37),
385             Rejects(32, 34),
386             InRange(28, 31),
387             Rejects(25, 28),
388             InRange(19, 22),
389             Rejects(15, 19),
390             Done
391         ]
392     );
393
394     search_asserts!(
395         STRESS,
396         'ก',
397         "Reverse search for three-byte Thai character",
398         [next_match_back, next_back, next_match_back, next_back, next_match_back],
399         [InRange(40, 43), Rejects(37, 40), InRange(22, 25), Rejects(19, 22), Done]
400     );
401
402     search_asserts!(
403         STRESS,
404         'ก',
405         "Reverse search for three-byte Thai character; check if next_back() still works",
406         [next_match_back, next_back, next_match_back, next_back, next_match_back],
407         [InRange(40, 43), Rejects(37, 40), InRange(22, 25), Rejects(19, 22), Done]
408     );
409
410     search_asserts!(
411         STRESS,
412         '😁',
413         "Reverse search for four-byte emoji",
414         [next_match_back, next_back, next_match_back, next_back, next_match_back],
415         [InRange(43, 47), Rejects(40, 43), InRange(15, 19), Rejects(14, 15), Done]
416     );
417
418     search_asserts!(
419         STRESS,
420         '😁',
421         "Reverse search for four-byte emoji; check if next_back() still works",
422         [next_match_back, next_back, next_match_back, next_back, next_match_back],
423         [InRange(43, 47), Rejects(40, 43), InRange(15, 19), Rejects(14, 15), Done]
424     );
425
426     search_asserts!(
427         STRESS,
428         'ꁁ',
429         "Reverse search for three-byte Yi character with repeated bytes",
430         [next_match_back, next_back, next_match_back, next_back, next_match_back],
431         [InRange(37, 40), Rejects(34, 37), InRange(10, 13), Rejects(8, 10), Done]
432     );
433
434     search_asserts!(
435         STRESS,
436         'ꁁ',
437         "Reverse search for three-byte Yi character with repeated bytes; check if next_back() still works",
438         [next_match_back, next_back, next_match_back, next_back, next_match_back],
439         [InRange(37, 40), Rejects(34, 37), InRange(10, 13), Rejects(8, 10), Done]
440     );
441 }
442
443 #[test]
444 fn double_ended_regression_test() {
445     // https://github.com/rust-lang/rust/issues/47175
446     // Ensures that double ended searching comes to a convergence
447     search_asserts!(
448         "abcdeabcdeabcde",
449         'a',
450         "alternating double ended search",
451         [next_match, next_match_back, next_match, next_match_back],
452         [InRange(0, 1), InRange(10, 11), InRange(5, 6), Done]
453     );
454     search_asserts!(
455         "abcdeabcdeabcde",
456         'a',
457         "triple double ended search for a",
458         [next_match, next_match_back, next_match_back, next_match_back],
459         [InRange(0, 1), InRange(10, 11), InRange(5, 6), Done]
460     );
461     search_asserts!(
462         "abcdeabcdeabcde",
463         'd',
464         "triple double ended search for d",
465         [next_match, next_match_back, next_match_back, next_match_back],
466         [InRange(3, 4), InRange(13, 14), InRange(8, 9), Done]
467     );
468     search_asserts!(
469         STRESS,
470         'Á',
471         "Double ended search for two-byte Latin character",
472         [next_match, next_match_back, next_match, next_match_back],
473         [InRange(0, 2), InRange(32, 34), InRange(8, 10), Done]
474     );
475     search_asserts!(
476         STRESS,
477         '각',
478         "Reverse double ended search for three-byte Hangul character",
479         [next_match_back, next_back, next_match, next, next_match_back, next_match],
480         [InRange(34, 37), Rejects(32, 34), InRange(19, 22), Rejects(22, 25), InRange(28, 31), Done]
481     );
482     search_asserts!(
483         STRESS,
484         'ก',
485         "Double ended search for three-byte Thai character",
486         [next_match, next_back, next, next_match_back, next_match],
487         [InRange(22, 25), Rejects(47, 48), Rejects(25, 28), InRange(40, 43), Done]
488     );
489     search_asserts!(
490         STRESS,
491         '😁',
492         "Double ended search for four-byte emoji",
493         [next_match_back, next, next_match, next_back, next_match],
494         [InRange(43, 47), Rejects(0, 2), InRange(15, 19), Rejects(40, 43), Done]
495     );
496     search_asserts!(
497         STRESS,
498         'ꁁ',
499         "Double ended search for three-byte Yi character with repeated bytes",
500         [next_match, next, next_match_back, next_back, next_match],
501         [InRange(10, 13), Rejects(13, 14), InRange(37, 40), Rejects(34, 37), Done]
502     );
503 }