]> git.lizzy.rs Git - rust.git/blob - crates/syntax/src/ast/token_ext.rs
Merge #8135
[rust.git] / crates / syntax / src / ast / token_ext.rs
1 //! There are many AstNodes, but only a few tokens, so we hand-write them here.
2
3 use std::{
4     borrow::Cow,
5     convert::{TryFrom, TryInto},
6 };
7
8 use rustc_lexer::unescape::{unescape_literal, Mode};
9
10 use crate::{
11     ast::{self, AstToken},
12     TextRange, TextSize,
13 };
14
15 impl ast::Comment {
16     pub fn kind(&self) -> CommentKind {
17         CommentKind::from_text(self.text())
18     }
19
20     pub fn is_inner(&self) -> bool {
21         self.kind().doc == Some(CommentPlacement::Inner)
22     }
23
24     pub fn is_outer(&self) -> bool {
25         self.kind().doc == Some(CommentPlacement::Outer)
26     }
27
28     pub fn prefix(&self) -> &'static str {
29         let &(prefix, _kind) = CommentKind::BY_PREFIX
30             .iter()
31             .find(|&(prefix, kind)| self.kind() == *kind && self.text().starts_with(prefix))
32             .unwrap();
33         prefix
34     }
35
36     /// Returns the textual content of a doc comment node as a single string with prefix and suffix
37     /// removed.
38     pub fn doc_comment(&self) -> Option<&str> {
39         let kind = self.kind();
40         match kind {
41             CommentKind { shape, doc: Some(_) } => {
42                 let prefix = kind.prefix();
43                 let text = &self.text()[prefix.len()..];
44                 let text = if shape == CommentShape::Block {
45                     text.strip_suffix("*/").unwrap_or(text)
46                 } else {
47                     text
48                 };
49                 Some(text)
50             }
51             _ => None,
52         }
53     }
54 }
55
56 #[derive(Debug, PartialEq, Eq, Clone, Copy)]
57 pub struct CommentKind {
58     pub shape: CommentShape,
59     pub doc: Option<CommentPlacement>,
60 }
61
62 #[derive(Debug, PartialEq, Eq, Clone, Copy)]
63 pub enum CommentShape {
64     Line,
65     Block,
66 }
67
68 impl CommentShape {
69     pub fn is_line(self) -> bool {
70         self == CommentShape::Line
71     }
72
73     pub fn is_block(self) -> bool {
74         self == CommentShape::Block
75     }
76 }
77
78 #[derive(Debug, PartialEq, Eq, Clone, Copy)]
79 pub enum CommentPlacement {
80     Inner,
81     Outer,
82 }
83
84 impl CommentKind {
85     const BY_PREFIX: [(&'static str, CommentKind); 9] = [
86         ("/**/", CommentKind { shape: CommentShape::Block, doc: None }),
87         ("/***", CommentKind { shape: CommentShape::Block, doc: None }),
88         ("////", CommentKind { shape: CommentShape::Line, doc: None }),
89         ("///", CommentKind { shape: CommentShape::Line, doc: Some(CommentPlacement::Outer) }),
90         ("//!", CommentKind { shape: CommentShape::Line, doc: Some(CommentPlacement::Inner) }),
91         ("/**", CommentKind { shape: CommentShape::Block, doc: Some(CommentPlacement::Outer) }),
92         ("/*!", CommentKind { shape: CommentShape::Block, doc: Some(CommentPlacement::Inner) }),
93         ("//", CommentKind { shape: CommentShape::Line, doc: None }),
94         ("/*", CommentKind { shape: CommentShape::Block, doc: None }),
95     ];
96
97     pub(crate) fn from_text(text: &str) -> CommentKind {
98         let &(_prefix, kind) = CommentKind::BY_PREFIX
99             .iter()
100             .find(|&(prefix, _kind)| text.starts_with(prefix))
101             .unwrap();
102         kind
103     }
104
105     fn prefix(&self) -> &'static str {
106         let &(prefix, _) = CommentKind::BY_PREFIX.iter().find(|(_, kind)| kind == self).unwrap();
107         prefix
108     }
109 }
110
111 impl ast::Whitespace {
112     pub fn spans_multiple_lines(&self) -> bool {
113         let text = self.text();
114         text.find('\n').map_or(false, |idx| text[idx + 1..].contains('\n'))
115     }
116 }
117
118 pub struct QuoteOffsets {
119     pub quotes: (TextRange, TextRange),
120     pub contents: TextRange,
121 }
122
123 impl QuoteOffsets {
124     fn new(literal: &str) -> Option<QuoteOffsets> {
125         let left_quote = literal.find('"')?;
126         let right_quote = literal.rfind('"')?;
127         if left_quote == right_quote {
128             // `literal` only contains one quote
129             return None;
130         }
131
132         let start = TextSize::from(0);
133         let left_quote = TextSize::try_from(left_quote).unwrap() + TextSize::of('"');
134         let right_quote = TextSize::try_from(right_quote).unwrap();
135         let end = TextSize::of(literal);
136
137         let res = QuoteOffsets {
138             quotes: (TextRange::new(start, left_quote), TextRange::new(right_quote, end)),
139             contents: TextRange::new(left_quote, right_quote),
140         };
141         Some(res)
142     }
143 }
144
145 impl ast::String {
146     pub fn is_raw(&self) -> bool {
147         self.text().starts_with('r')
148     }
149     pub fn map_range_up(&self, range: TextRange) -> Option<TextRange> {
150         let contents_range = self.text_range_between_quotes()?;
151         assert!(TextRange::up_to(contents_range.len()).contains_range(range));
152         Some(range + contents_range.start())
153     }
154
155     pub fn value(&self) -> Option<Cow<'_, str>> {
156         if self.is_raw() {
157             let text = self.text();
158             let text =
159                 &text[self.text_range_between_quotes()? - self.syntax().text_range().start()];
160             return Some(Cow::Borrowed(text));
161         }
162
163         let text = self.text();
164         let text = &text[self.text_range_between_quotes()? - self.syntax().text_range().start()];
165
166         let mut buf = String::new();
167         let mut text_iter = text.chars();
168         let mut has_error = false;
169         unescape_literal(text, Mode::Str, &mut |char_range, unescaped_char| match (
170             unescaped_char,
171             buf.capacity() == 0,
172         ) {
173             (Ok(c), false) => buf.push(c),
174             (Ok(c), true) if char_range.len() == 1 && Some(c) == text_iter.next() => (),
175             (Ok(c), true) => {
176                 buf.reserve_exact(text.len());
177                 buf.push_str(&text[..char_range.start]);
178                 buf.push(c);
179             }
180             (Err(_), _) => has_error = true,
181         });
182
183         match (has_error, buf.capacity() == 0) {
184             (true, _) => None,
185             (false, true) => Some(Cow::Borrowed(text)),
186             (false, false) => Some(Cow::Owned(buf)),
187         }
188     }
189
190     pub fn quote_offsets(&self) -> Option<QuoteOffsets> {
191         let text = self.text();
192         let offsets = QuoteOffsets::new(text)?;
193         let o = self.syntax().text_range().start();
194         let offsets = QuoteOffsets {
195             quotes: (offsets.quotes.0 + o, offsets.quotes.1 + o),
196             contents: offsets.contents + o,
197         };
198         Some(offsets)
199     }
200     pub fn text_range_between_quotes(&self) -> Option<TextRange> {
201         self.quote_offsets().map(|it| it.contents)
202     }
203     pub fn open_quote_text_range(&self) -> Option<TextRange> {
204         self.quote_offsets().map(|it| it.quotes.0)
205     }
206     pub fn close_quote_text_range(&self) -> Option<TextRange> {
207         self.quote_offsets().map(|it| it.quotes.1)
208     }
209 }
210
211 impl ast::ByteString {
212     pub fn is_raw(&self) -> bool {
213         self.text().starts_with("br")
214     }
215 }
216
217 #[derive(Debug)]
218 pub enum FormatSpecifier {
219     Open,
220     Close,
221     Integer,
222     Identifier,
223     Colon,
224     Fill,
225     Align,
226     Sign,
227     NumberSign,
228     Zero,
229     DollarSign,
230     Dot,
231     Asterisk,
232     QuestionMark,
233 }
234
235 pub trait HasFormatSpecifier: AstToken {
236     fn char_ranges(
237         &self,
238     ) -> Option<Vec<(TextRange, Result<char, rustc_lexer::unescape::EscapeError>)>>;
239
240     fn lex_format_specifier<F>(&self, mut callback: F)
241     where
242         F: FnMut(TextRange, FormatSpecifier),
243     {
244         let char_ranges = if let Some(char_ranges) = self.char_ranges() {
245             char_ranges
246         } else {
247             return;
248         };
249         let mut chars = char_ranges.iter().peekable();
250
251         while let Some((range, first_char)) = chars.next() {
252             match first_char {
253                 Ok('{') => {
254                     // Format specifier, see syntax at https://doc.rust-lang.org/std/fmt/index.html#syntax
255                     if let Some((_, Ok('{'))) = chars.peek() {
256                         // Escaped format specifier, `{{`
257                         chars.next();
258                         continue;
259                     }
260
261                     callback(*range, FormatSpecifier::Open);
262
263                     // check for integer/identifier
264                     match chars
265                         .peek()
266                         .and_then(|next| next.1.as_ref().ok())
267                         .copied()
268                         .unwrap_or_default()
269                     {
270                         '0'..='9' => {
271                             // integer
272                             read_integer(&mut chars, &mut callback);
273                         }
274                         c if c == '_' || c.is_alphabetic() => {
275                             // identifier
276                             read_identifier(&mut chars, &mut callback);
277                         }
278                         _ => {}
279                     }
280
281                     if let Some((_, Ok(':'))) = chars.peek() {
282                         skip_char_and_emit(&mut chars, FormatSpecifier::Colon, &mut callback);
283
284                         // check for fill/align
285                         let mut cloned = chars.clone().take(2);
286                         let first = cloned
287                             .next()
288                             .and_then(|next| next.1.as_ref().ok())
289                             .copied()
290                             .unwrap_or_default();
291                         let second = cloned
292                             .next()
293                             .and_then(|next| next.1.as_ref().ok())
294                             .copied()
295                             .unwrap_or_default();
296                         match second {
297                             '<' | '^' | '>' => {
298                                 // alignment specifier, first char specifies fillment
299                                 skip_char_and_emit(
300                                     &mut chars,
301                                     FormatSpecifier::Fill,
302                                     &mut callback,
303                                 );
304                                 skip_char_and_emit(
305                                     &mut chars,
306                                     FormatSpecifier::Align,
307                                     &mut callback,
308                                 );
309                             }
310                             _ => match first {
311                                 '<' | '^' | '>' => {
312                                     skip_char_and_emit(
313                                         &mut chars,
314                                         FormatSpecifier::Align,
315                                         &mut callback,
316                                     );
317                                 }
318                                 _ => {}
319                             },
320                         }
321
322                         // check for sign
323                         match chars
324                             .peek()
325                             .and_then(|next| next.1.as_ref().ok())
326                             .copied()
327                             .unwrap_or_default()
328                         {
329                             '+' | '-' => {
330                                 skip_char_and_emit(
331                                     &mut chars,
332                                     FormatSpecifier::Sign,
333                                     &mut callback,
334                                 );
335                             }
336                             _ => {}
337                         }
338
339                         // check for `#`
340                         if let Some((_, Ok('#'))) = chars.peek() {
341                             skip_char_and_emit(
342                                 &mut chars,
343                                 FormatSpecifier::NumberSign,
344                                 &mut callback,
345                             );
346                         }
347
348                         // check for `0`
349                         let mut cloned = chars.clone().take(2);
350                         let first = cloned.next().and_then(|next| next.1.as_ref().ok()).copied();
351                         let second = cloned.next().and_then(|next| next.1.as_ref().ok()).copied();
352
353                         if first == Some('0') && second != Some('$') {
354                             skip_char_and_emit(&mut chars, FormatSpecifier::Zero, &mut callback);
355                         }
356
357                         // width
358                         match chars
359                             .peek()
360                             .and_then(|next| next.1.as_ref().ok())
361                             .copied()
362                             .unwrap_or_default()
363                         {
364                             '0'..='9' => {
365                                 read_integer(&mut chars, &mut callback);
366                                 if let Some((_, Ok('$'))) = chars.peek() {
367                                     skip_char_and_emit(
368                                         &mut chars,
369                                         FormatSpecifier::DollarSign,
370                                         &mut callback,
371                                     );
372                                 }
373                             }
374                             c if c == '_' || c.is_alphabetic() => {
375                                 read_identifier(&mut chars, &mut callback);
376
377                                 if chars.peek().and_then(|next| next.1.as_ref().ok()).copied()
378                                     == Some('?')
379                                 {
380                                     skip_char_and_emit(
381                                         &mut chars,
382                                         FormatSpecifier::QuestionMark,
383                                         &mut callback,
384                                     );
385                                 }
386
387                                 // can be either width (indicated by dollar sign, or type in which case
388                                 // the next sign has to be `}`)
389                                 let next =
390                                     chars.peek().and_then(|next| next.1.as_ref().ok()).copied();
391
392                                 match next {
393                                     Some('$') => skip_char_and_emit(
394                                         &mut chars,
395                                         FormatSpecifier::DollarSign,
396                                         &mut callback,
397                                     ),
398                                     Some('}') => {
399                                         skip_char_and_emit(
400                                             &mut chars,
401                                             FormatSpecifier::Close,
402                                             &mut callback,
403                                         );
404                                         continue;
405                                     }
406                                     _ => continue,
407                                 };
408                             }
409                             _ => {}
410                         }
411
412                         // precision
413                         if let Some((_, Ok('.'))) = chars.peek() {
414                             skip_char_and_emit(&mut chars, FormatSpecifier::Dot, &mut callback);
415
416                             match chars
417                                 .peek()
418                                 .and_then(|next| next.1.as_ref().ok())
419                                 .copied()
420                                 .unwrap_or_default()
421                             {
422                                 '*' => {
423                                     skip_char_and_emit(
424                                         &mut chars,
425                                         FormatSpecifier::Asterisk,
426                                         &mut callback,
427                                     );
428                                 }
429                                 '0'..='9' => {
430                                     read_integer(&mut chars, &mut callback);
431                                     if let Some((_, Ok('$'))) = chars.peek() {
432                                         skip_char_and_emit(
433                                             &mut chars,
434                                             FormatSpecifier::DollarSign,
435                                             &mut callback,
436                                         );
437                                     }
438                                 }
439                                 c if c == '_' || c.is_alphabetic() => {
440                                     read_identifier(&mut chars, &mut callback);
441                                     if chars.peek().and_then(|next| next.1.as_ref().ok()).copied()
442                                         != Some('$')
443                                     {
444                                         continue;
445                                     }
446                                     skip_char_and_emit(
447                                         &mut chars,
448                                         FormatSpecifier::DollarSign,
449                                         &mut callback,
450                                     );
451                                 }
452                                 _ => {
453                                     continue;
454                                 }
455                             }
456                         }
457
458                         // type
459                         match chars
460                             .peek()
461                             .and_then(|next| next.1.as_ref().ok())
462                             .copied()
463                             .unwrap_or_default()
464                         {
465                             '?' => {
466                                 skip_char_and_emit(
467                                     &mut chars,
468                                     FormatSpecifier::QuestionMark,
469                                     &mut callback,
470                                 );
471                             }
472                             c if c == '_' || c.is_alphabetic() => {
473                                 read_identifier(&mut chars, &mut callback);
474
475                                 if chars.peek().and_then(|next| next.1.as_ref().ok()).copied()
476                                     == Some('?')
477                                 {
478                                     skip_char_and_emit(
479                                         &mut chars,
480                                         FormatSpecifier::QuestionMark,
481                                         &mut callback,
482                                     );
483                                 }
484                             }
485                             _ => {}
486                         }
487                     }
488
489                     if let Some((_, Ok('}'))) = chars.peek() {
490                         skip_char_and_emit(&mut chars, FormatSpecifier::Close, &mut callback);
491                     } else {
492                         continue;
493                     }
494                 }
495                 _ => {
496                     while let Some((_, Ok(next_char))) = chars.peek() {
497                         if next_char == &'{' {
498                             break;
499                         }
500                         chars.next();
501                     }
502                 }
503             };
504         }
505
506         fn skip_char_and_emit<'a, I, F>(
507             chars: &mut std::iter::Peekable<I>,
508             emit: FormatSpecifier,
509             callback: &mut F,
510         ) where
511             I: Iterator<Item = &'a (TextRange, Result<char, rustc_lexer::unescape::EscapeError>)>,
512             F: FnMut(TextRange, FormatSpecifier),
513         {
514             let (range, _) = chars.next().unwrap();
515             callback(*range, emit);
516         }
517
518         fn read_integer<'a, I, F>(chars: &mut std::iter::Peekable<I>, callback: &mut F)
519         where
520             I: Iterator<Item = &'a (TextRange, Result<char, rustc_lexer::unescape::EscapeError>)>,
521             F: FnMut(TextRange, FormatSpecifier),
522         {
523             let (mut range, c) = chars.next().unwrap();
524             assert!(c.as_ref().unwrap().is_ascii_digit());
525             while let Some((r, Ok(next_char))) = chars.peek() {
526                 if next_char.is_ascii_digit() {
527                     chars.next();
528                     range = range.cover(*r);
529                 } else {
530                     break;
531                 }
532             }
533             callback(range, FormatSpecifier::Integer);
534         }
535
536         fn read_identifier<'a, I, F>(chars: &mut std::iter::Peekable<I>, callback: &mut F)
537         where
538             I: Iterator<Item = &'a (TextRange, Result<char, rustc_lexer::unescape::EscapeError>)>,
539             F: FnMut(TextRange, FormatSpecifier),
540         {
541             let (mut range, c) = chars.next().unwrap();
542             assert!(c.as_ref().unwrap().is_alphabetic() || *c.as_ref().unwrap() == '_');
543             while let Some((r, Ok(next_char))) = chars.peek() {
544                 if *next_char == '_' || next_char.is_ascii_digit() || next_char.is_alphabetic() {
545                     chars.next();
546                     range = range.cover(*r);
547                 } else {
548                     break;
549                 }
550             }
551             callback(range, FormatSpecifier::Identifier);
552         }
553     }
554 }
555
556 impl HasFormatSpecifier for ast::String {
557     fn char_ranges(
558         &self,
559     ) -> Option<Vec<(TextRange, Result<char, rustc_lexer::unescape::EscapeError>)>> {
560         let text = self.text();
561         let text = &text[self.text_range_between_quotes()? - self.syntax().text_range().start()];
562         let offset = self.text_range_between_quotes()?.start() - self.syntax().text_range().start();
563
564         let mut res = Vec::with_capacity(text.len());
565         unescape_literal(text, Mode::Str, &mut |range, unescaped_char| {
566             res.push((
567                 TextRange::new(range.start.try_into().unwrap(), range.end.try_into().unwrap())
568                     + offset,
569                 unescaped_char,
570             ))
571         });
572
573         Some(res)
574     }
575 }
576
577 impl ast::IntNumber {
578     pub fn radix(&self) -> Radix {
579         match self.text().get(..2).unwrap_or_default() {
580             "0b" => Radix::Binary,
581             "0o" => Radix::Octal,
582             "0x" => Radix::Hexadecimal,
583             _ => Radix::Decimal,
584         }
585     }
586
587     pub fn value(&self) -> Option<u128> {
588         let token = self.syntax();
589
590         let mut text = token.text();
591         if let Some(suffix) = self.suffix() {
592             text = &text[..text.len() - suffix.len()]
593         }
594
595         let radix = self.radix();
596         text = &text[radix.prefix_len()..];
597
598         let buf;
599         if text.contains('_') {
600             buf = text.replace('_', "");
601             text = buf.as_str();
602         };
603
604         let value = u128::from_str_radix(text, radix as u32).ok()?;
605         Some(value)
606     }
607
608     pub fn suffix(&self) -> Option<&str> {
609         let text = self.text();
610         let radix = self.radix();
611         let mut indices = text.char_indices();
612         if radix != Radix::Decimal {
613             indices.next()?;
614             indices.next()?;
615         }
616         let is_suffix_start: fn(&(usize, char)) -> bool = match radix {
617             Radix::Hexadecimal => |(_, c)| matches!(c, 'g'..='z' | 'G'..='Z'),
618             _ => |(_, c)| c.is_ascii_alphabetic(),
619         };
620         let (suffix_start, _) = indices.find(is_suffix_start)?;
621         Some(&text[suffix_start..])
622     }
623 }
624
625 impl ast::FloatNumber {
626     pub fn suffix(&self) -> Option<&str> {
627         let text = self.text();
628         let mut indices = text.char_indices();
629         let (mut suffix_start, c) = indices.by_ref().find(|(_, c)| c.is_ascii_alphabetic())?;
630         if c == 'e' || c == 'E' {
631             suffix_start = indices.find(|(_, c)| c.is_ascii_alphabetic())?.0;
632         }
633         Some(&text[suffix_start..])
634     }
635 }
636
637 #[derive(Debug, PartialEq, Eq, Copy, Clone)]
638 pub enum Radix {
639     Binary = 2,
640     Octal = 8,
641     Decimal = 10,
642     Hexadecimal = 16,
643 }
644
645 impl Radix {
646     pub const ALL: &'static [Radix] =
647         &[Radix::Binary, Radix::Octal, Radix::Decimal, Radix::Hexadecimal];
648
649     const fn prefix_len(&self) -> usize {
650         match self {
651             Self::Decimal => 0,
652             _ => 2,
653         }
654     }
655 }
656
657 #[cfg(test)]
658 mod tests {
659     use crate::ast::{self, make, FloatNumber, IntNumber};
660
661     fn check_float_suffix<'a>(lit: &str, expected: impl Into<Option<&'a str>>) {
662         assert_eq!(FloatNumber { syntax: make::tokens::literal(lit) }.suffix(), expected.into());
663     }
664
665     fn check_int_suffix<'a>(lit: &str, expected: impl Into<Option<&'a str>>) {
666         assert_eq!(IntNumber { syntax: make::tokens::literal(lit) }.suffix(), expected.into());
667     }
668
669     #[test]
670     fn test_float_number_suffix() {
671         check_float_suffix("123.0", None);
672         check_float_suffix("123f32", "f32");
673         check_float_suffix("123.0e", None);
674         check_float_suffix("123.0e4", None);
675         check_float_suffix("123.0ef32", "f32");
676         check_float_suffix("123.0E4f32", "f32");
677         check_float_suffix("1_2_3.0_f32", "f32");
678     }
679
680     #[test]
681     fn test_int_number_suffix() {
682         check_int_suffix("123", None);
683         check_int_suffix("123i32", "i32");
684         check_int_suffix("1_0_1_l_o_l", "l_o_l");
685         check_int_suffix("0b11", None);
686         check_int_suffix("0o11", None);
687         check_int_suffix("0xff", None);
688         check_int_suffix("0b11u32", "u32");
689         check_int_suffix("0o11u32", "u32");
690         check_int_suffix("0xffu32", "u32");
691     }
692
693     fn check_string_value<'a>(lit: &str, expected: impl Into<Option<&'a str>>) {
694         assert_eq!(
695             ast::String { syntax: make::tokens::literal(&format!("\"{}\"", lit)) }
696                 .value()
697                 .as_deref(),
698             expected.into()
699         );
700     }
701
702     #[test]
703     fn test_string_escape() {
704         check_string_value(r"foobar", "foobar");
705         check_string_value(r"\foobar", None);
706         check_string_value(r"\nfoobar", "\nfoobar");
707         check_string_value(r"C:\\Windows\\System32\\", "C:\\Windows\\System32\\");
708     }
709 }