]> git.lizzy.rs Git - rust.git/blob - crates/syntax/src/ast/token_ext.rs
Merge #7837
[rust.git] / crates / syntax / src / ast / token_ext.rs
1 //! There are many AstNodes, but only a few tokens, so we hand-write them here.
2
3 use std::{
4     borrow::Cow,
5     convert::{TryFrom, TryInto},
6 };
7
8 use rustc_lexer::unescape::{unescape_literal, Mode};
9
10 use crate::{
11     ast::{self, AstToken},
12     TextRange, TextSize,
13 };
14
15 impl ast::Comment {
16     pub fn kind(&self) -> CommentKind {
17         CommentKind::from_text(self.text())
18     }
19
20     pub fn is_inner(&self) -> bool {
21         self.kind().doc == Some(CommentPlacement::Inner)
22     }
23
24     pub fn is_outer(&self) -> bool {
25         self.kind().doc == Some(CommentPlacement::Outer)
26     }
27
28     pub fn prefix(&self) -> &'static str {
29         let &(prefix, _kind) = CommentKind::BY_PREFIX
30             .iter()
31             .find(|&(prefix, kind)| self.kind() == *kind && self.text().starts_with(prefix))
32             .unwrap();
33         prefix
34     }
35
36     /// Returns the textual content of a doc comment block as a single string.
37     /// That is, strips leading `///` (+ optional 1 character of whitespace),
38     /// trailing `*/`, trailing whitespace and then joins the lines.
39     pub fn doc_comment(&self) -> Option<&str> {
40         let kind = self.kind();
41         match kind {
42             CommentKind { shape, doc: Some(_) } => {
43                 let prefix = kind.prefix();
44                 let text = &self.text()[prefix.len()..];
45                 let ws = text.chars().next().filter(|c| c.is_whitespace());
46                 let text = ws.map_or(text, |ws| &text[ws.len_utf8()..]);
47                 match shape {
48                     CommentShape::Block if text.ends_with("*/") => {
49                         Some(&text[..text.len() - "*/".len()])
50                     }
51                     _ => Some(text),
52                 }
53             }
54             _ => None,
55         }
56     }
57 }
58
59 #[derive(Debug, PartialEq, Eq, Clone, Copy)]
60 pub struct CommentKind {
61     pub shape: CommentShape,
62     pub doc: Option<CommentPlacement>,
63 }
64
65 #[derive(Debug, PartialEq, Eq, Clone, Copy)]
66 pub enum CommentShape {
67     Line,
68     Block,
69 }
70
71 impl CommentShape {
72     pub fn is_line(self) -> bool {
73         self == CommentShape::Line
74     }
75
76     pub fn is_block(self) -> bool {
77         self == CommentShape::Block
78     }
79 }
80
81 #[derive(Debug, PartialEq, Eq, Clone, Copy)]
82 pub enum CommentPlacement {
83     Inner,
84     Outer,
85 }
86
87 impl CommentKind {
88     const BY_PREFIX: [(&'static str, CommentKind); 9] = [
89         ("/**/", CommentKind { shape: CommentShape::Block, doc: None }),
90         ("/***", CommentKind { shape: CommentShape::Block, doc: None }),
91         ("////", CommentKind { shape: CommentShape::Line, doc: None }),
92         ("///", CommentKind { shape: CommentShape::Line, doc: Some(CommentPlacement::Outer) }),
93         ("//!", CommentKind { shape: CommentShape::Line, doc: Some(CommentPlacement::Inner) }),
94         ("/**", CommentKind { shape: CommentShape::Block, doc: Some(CommentPlacement::Outer) }),
95         ("/*!", CommentKind { shape: CommentShape::Block, doc: Some(CommentPlacement::Inner) }),
96         ("//", CommentKind { shape: CommentShape::Line, doc: None }),
97         ("/*", CommentKind { shape: CommentShape::Block, doc: None }),
98     ];
99
100     pub(crate) fn from_text(text: &str) -> CommentKind {
101         let &(_prefix, kind) = CommentKind::BY_PREFIX
102             .iter()
103             .find(|&(prefix, _kind)| text.starts_with(prefix))
104             .unwrap();
105         kind
106     }
107
108     fn prefix(&self) -> &'static str {
109         let &(prefix, _) = CommentKind::BY_PREFIX.iter().find(|(_, kind)| kind == self).unwrap();
110         prefix
111     }
112 }
113
114 impl ast::Whitespace {
115     pub fn spans_multiple_lines(&self) -> bool {
116         let text = self.text();
117         text.find('\n').map_or(false, |idx| text[idx + 1..].contains('\n'))
118     }
119 }
120
121 pub struct QuoteOffsets {
122     pub quotes: (TextRange, TextRange),
123     pub contents: TextRange,
124 }
125
126 impl QuoteOffsets {
127     fn new(literal: &str) -> Option<QuoteOffsets> {
128         let left_quote = literal.find('"')?;
129         let right_quote = literal.rfind('"')?;
130         if left_quote == right_quote {
131             // `literal` only contains one quote
132             return None;
133         }
134
135         let start = TextSize::from(0);
136         let left_quote = TextSize::try_from(left_quote).unwrap() + TextSize::of('"');
137         let right_quote = TextSize::try_from(right_quote).unwrap();
138         let end = TextSize::of(literal);
139
140         let res = QuoteOffsets {
141             quotes: (TextRange::new(start, left_quote), TextRange::new(right_quote, end)),
142             contents: TextRange::new(left_quote, right_quote),
143         };
144         Some(res)
145     }
146 }
147
148 impl ast::String {
149     pub fn is_raw(&self) -> bool {
150         self.text().starts_with('r')
151     }
152     pub fn map_range_up(&self, range: TextRange) -> Option<TextRange> {
153         let contents_range = self.text_range_between_quotes()?;
154         assert!(TextRange::up_to(contents_range.len()).contains_range(range));
155         Some(range + contents_range.start())
156     }
157
158     pub fn value(&self) -> Option<Cow<'_, str>> {
159         if self.is_raw() {
160             let text = self.text();
161             let text =
162                 &text[self.text_range_between_quotes()? - self.syntax().text_range().start()];
163             return Some(Cow::Borrowed(text));
164         }
165
166         let text = self.text();
167         let text = &text[self.text_range_between_quotes()? - self.syntax().text_range().start()];
168
169         let mut buf = String::new();
170         let mut text_iter = text.chars();
171         let mut has_error = false;
172         unescape_literal(text, Mode::Str, &mut |char_range, unescaped_char| match (
173             unescaped_char,
174             buf.capacity() == 0,
175         ) {
176             (Ok(c), false) => buf.push(c),
177             (Ok(c), true) if char_range.len() == 1 && Some(c) == text_iter.next() => (),
178             (Ok(c), true) => {
179                 buf.reserve_exact(text.len());
180                 buf.push_str(&text[..char_range.start]);
181                 buf.push(c);
182             }
183             (Err(_), _) => has_error = true,
184         });
185
186         match (has_error, buf.capacity() == 0) {
187             (true, _) => None,
188             (false, true) => Some(Cow::Borrowed(text)),
189             (false, false) => Some(Cow::Owned(buf)),
190         }
191     }
192
193     pub fn quote_offsets(&self) -> Option<QuoteOffsets> {
194         let text = self.text();
195         let offsets = QuoteOffsets::new(text)?;
196         let o = self.syntax().text_range().start();
197         let offsets = QuoteOffsets {
198             quotes: (offsets.quotes.0 + o, offsets.quotes.1 + o),
199             contents: offsets.contents + o,
200         };
201         Some(offsets)
202     }
203     pub fn text_range_between_quotes(&self) -> Option<TextRange> {
204         self.quote_offsets().map(|it| it.contents)
205     }
206     pub fn open_quote_text_range(&self) -> Option<TextRange> {
207         self.quote_offsets().map(|it| it.quotes.0)
208     }
209     pub fn close_quote_text_range(&self) -> Option<TextRange> {
210         self.quote_offsets().map(|it| it.quotes.1)
211     }
212 }
213
214 impl ast::ByteString {
215     pub fn is_raw(&self) -> bool {
216         self.text().starts_with("br")
217     }
218 }
219
220 #[derive(Debug)]
221 pub enum FormatSpecifier {
222     Open,
223     Close,
224     Integer,
225     Identifier,
226     Colon,
227     Fill,
228     Align,
229     Sign,
230     NumberSign,
231     Zero,
232     DollarSign,
233     Dot,
234     Asterisk,
235     QuestionMark,
236 }
237
238 pub trait HasFormatSpecifier: AstToken {
239     fn char_ranges(
240         &self,
241     ) -> Option<Vec<(TextRange, Result<char, rustc_lexer::unescape::EscapeError>)>>;
242
243     fn lex_format_specifier<F>(&self, mut callback: F)
244     where
245         F: FnMut(TextRange, FormatSpecifier),
246     {
247         let char_ranges = if let Some(char_ranges) = self.char_ranges() {
248             char_ranges
249         } else {
250             return;
251         };
252         let mut chars = char_ranges.iter().peekable();
253
254         while let Some((range, first_char)) = chars.next() {
255             match first_char {
256                 Ok('{') => {
257                     // Format specifier, see syntax at https://doc.rust-lang.org/std/fmt/index.html#syntax
258                     if let Some((_, Ok('{'))) = chars.peek() {
259                         // Escaped format specifier, `{{`
260                         chars.next();
261                         continue;
262                     }
263
264                     callback(*range, FormatSpecifier::Open);
265
266                     // check for integer/identifier
267                     match chars
268                         .peek()
269                         .and_then(|next| next.1.as_ref().ok())
270                         .copied()
271                         .unwrap_or_default()
272                     {
273                         '0'..='9' => {
274                             // integer
275                             read_integer(&mut chars, &mut callback);
276                         }
277                         c if c == '_' || c.is_alphabetic() => {
278                             // identifier
279                             read_identifier(&mut chars, &mut callback);
280                         }
281                         _ => {}
282                     }
283
284                     if let Some((_, Ok(':'))) = chars.peek() {
285                         skip_char_and_emit(&mut chars, FormatSpecifier::Colon, &mut callback);
286
287                         // check for fill/align
288                         let mut cloned = chars.clone().take(2);
289                         let first = cloned
290                             .next()
291                             .and_then(|next| next.1.as_ref().ok())
292                             .copied()
293                             .unwrap_or_default();
294                         let second = cloned
295                             .next()
296                             .and_then(|next| next.1.as_ref().ok())
297                             .copied()
298                             .unwrap_or_default();
299                         match second {
300                             '<' | '^' | '>' => {
301                                 // alignment specifier, first char specifies fillment
302                                 skip_char_and_emit(
303                                     &mut chars,
304                                     FormatSpecifier::Fill,
305                                     &mut callback,
306                                 );
307                                 skip_char_and_emit(
308                                     &mut chars,
309                                     FormatSpecifier::Align,
310                                     &mut callback,
311                                 );
312                             }
313                             _ => match first {
314                                 '<' | '^' | '>' => {
315                                     skip_char_and_emit(
316                                         &mut chars,
317                                         FormatSpecifier::Align,
318                                         &mut callback,
319                                     );
320                                 }
321                                 _ => {}
322                             },
323                         }
324
325                         // check for sign
326                         match chars
327                             .peek()
328                             .and_then(|next| next.1.as_ref().ok())
329                             .copied()
330                             .unwrap_or_default()
331                         {
332                             '+' | '-' => {
333                                 skip_char_and_emit(
334                                     &mut chars,
335                                     FormatSpecifier::Sign,
336                                     &mut callback,
337                                 );
338                             }
339                             _ => {}
340                         }
341
342                         // check for `#`
343                         if let Some((_, Ok('#'))) = chars.peek() {
344                             skip_char_and_emit(
345                                 &mut chars,
346                                 FormatSpecifier::NumberSign,
347                                 &mut callback,
348                             );
349                         }
350
351                         // check for `0`
352                         let mut cloned = chars.clone().take(2);
353                         let first = cloned.next().and_then(|next| next.1.as_ref().ok()).copied();
354                         let second = cloned.next().and_then(|next| next.1.as_ref().ok()).copied();
355
356                         if first == Some('0') && second != Some('$') {
357                             skip_char_and_emit(&mut chars, FormatSpecifier::Zero, &mut callback);
358                         }
359
360                         // width
361                         match chars
362                             .peek()
363                             .and_then(|next| next.1.as_ref().ok())
364                             .copied()
365                             .unwrap_or_default()
366                         {
367                             '0'..='9' => {
368                                 read_integer(&mut chars, &mut callback);
369                                 if let Some((_, Ok('$'))) = chars.peek() {
370                                     skip_char_and_emit(
371                                         &mut chars,
372                                         FormatSpecifier::DollarSign,
373                                         &mut callback,
374                                     );
375                                 }
376                             }
377                             c if c == '_' || c.is_alphabetic() => {
378                                 read_identifier(&mut chars, &mut callback);
379
380                                 if chars.peek().and_then(|next| next.1.as_ref().ok()).copied()
381                                     == Some('?')
382                                 {
383                                     skip_char_and_emit(
384                                         &mut chars,
385                                         FormatSpecifier::QuestionMark,
386                                         &mut callback,
387                                     );
388                                 }
389
390                                 // can be either width (indicated by dollar sign, or type in which case
391                                 // the next sign has to be `}`)
392                                 let next =
393                                     chars.peek().and_then(|next| next.1.as_ref().ok()).copied();
394
395                                 match next {
396                                     Some('$') => skip_char_and_emit(
397                                         &mut chars,
398                                         FormatSpecifier::DollarSign,
399                                         &mut callback,
400                                     ),
401                                     Some('}') => {
402                                         skip_char_and_emit(
403                                             &mut chars,
404                                             FormatSpecifier::Close,
405                                             &mut callback,
406                                         );
407                                         continue;
408                                     }
409                                     _ => continue,
410                                 };
411                             }
412                             _ => {}
413                         }
414
415                         // precision
416                         if let Some((_, Ok('.'))) = chars.peek() {
417                             skip_char_and_emit(&mut chars, FormatSpecifier::Dot, &mut callback);
418
419                             match chars
420                                 .peek()
421                                 .and_then(|next| next.1.as_ref().ok())
422                                 .copied()
423                                 .unwrap_or_default()
424                             {
425                                 '*' => {
426                                     skip_char_and_emit(
427                                         &mut chars,
428                                         FormatSpecifier::Asterisk,
429                                         &mut callback,
430                                     );
431                                 }
432                                 '0'..='9' => {
433                                     read_integer(&mut chars, &mut callback);
434                                     if let Some((_, Ok('$'))) = chars.peek() {
435                                         skip_char_and_emit(
436                                             &mut chars,
437                                             FormatSpecifier::DollarSign,
438                                             &mut callback,
439                                         );
440                                     }
441                                 }
442                                 c if c == '_' || c.is_alphabetic() => {
443                                     read_identifier(&mut chars, &mut callback);
444                                     if chars.peek().and_then(|next| next.1.as_ref().ok()).copied()
445                                         != Some('$')
446                                     {
447                                         continue;
448                                     }
449                                     skip_char_and_emit(
450                                         &mut chars,
451                                         FormatSpecifier::DollarSign,
452                                         &mut callback,
453                                     );
454                                 }
455                                 _ => {
456                                     continue;
457                                 }
458                             }
459                         }
460
461                         // type
462                         match chars
463                             .peek()
464                             .and_then(|next| next.1.as_ref().ok())
465                             .copied()
466                             .unwrap_or_default()
467                         {
468                             '?' => {
469                                 skip_char_and_emit(
470                                     &mut chars,
471                                     FormatSpecifier::QuestionMark,
472                                     &mut callback,
473                                 );
474                             }
475                             c if c == '_' || c.is_alphabetic() => {
476                                 read_identifier(&mut chars, &mut callback);
477
478                                 if chars.peek().and_then(|next| next.1.as_ref().ok()).copied()
479                                     == Some('?')
480                                 {
481                                     skip_char_and_emit(
482                                         &mut chars,
483                                         FormatSpecifier::QuestionMark,
484                                         &mut callback,
485                                     );
486                                 }
487                             }
488                             _ => {}
489                         }
490                     }
491
492                     if let Some((_, Ok('}'))) = chars.peek() {
493                         skip_char_and_emit(&mut chars, FormatSpecifier::Close, &mut callback);
494                     } else {
495                         continue;
496                     }
497                 }
498                 _ => {
499                     while let Some((_, Ok(next_char))) = chars.peek() {
500                         match next_char {
501                             '{' => break,
502                             _ => {}
503                         }
504                         chars.next();
505                     }
506                 }
507             };
508         }
509
510         fn skip_char_and_emit<'a, I, F>(
511             chars: &mut std::iter::Peekable<I>,
512             emit: FormatSpecifier,
513             callback: &mut F,
514         ) where
515             I: Iterator<Item = &'a (TextRange, Result<char, rustc_lexer::unescape::EscapeError>)>,
516             F: FnMut(TextRange, FormatSpecifier),
517         {
518             let (range, _) = chars.next().unwrap();
519             callback(*range, emit);
520         }
521
522         fn read_integer<'a, I, F>(chars: &mut std::iter::Peekable<I>, callback: &mut F)
523         where
524             I: Iterator<Item = &'a (TextRange, Result<char, rustc_lexer::unescape::EscapeError>)>,
525             F: FnMut(TextRange, FormatSpecifier),
526         {
527             let (mut range, c) = chars.next().unwrap();
528             assert!(c.as_ref().unwrap().is_ascii_digit());
529             while let Some((r, Ok(next_char))) = chars.peek() {
530                 if next_char.is_ascii_digit() {
531                     chars.next();
532                     range = range.cover(*r);
533                 } else {
534                     break;
535                 }
536             }
537             callback(range, FormatSpecifier::Integer);
538         }
539
540         fn read_identifier<'a, I, F>(chars: &mut std::iter::Peekable<I>, callback: &mut F)
541         where
542             I: Iterator<Item = &'a (TextRange, Result<char, rustc_lexer::unescape::EscapeError>)>,
543             F: FnMut(TextRange, FormatSpecifier),
544         {
545             let (mut range, c) = chars.next().unwrap();
546             assert!(c.as_ref().unwrap().is_alphabetic() || *c.as_ref().unwrap() == '_');
547             while let Some((r, Ok(next_char))) = chars.peek() {
548                 if *next_char == '_' || next_char.is_ascii_digit() || next_char.is_alphabetic() {
549                     chars.next();
550                     range = range.cover(*r);
551                 } else {
552                     break;
553                 }
554             }
555             callback(range, FormatSpecifier::Identifier);
556         }
557     }
558 }
559
560 impl HasFormatSpecifier for ast::String {
561     fn char_ranges(
562         &self,
563     ) -> Option<Vec<(TextRange, Result<char, rustc_lexer::unescape::EscapeError>)>> {
564         let text = self.text();
565         let text = &text[self.text_range_between_quotes()? - self.syntax().text_range().start()];
566         let offset = self.text_range_between_quotes()?.start() - self.syntax().text_range().start();
567
568         let mut res = Vec::with_capacity(text.len());
569         unescape_literal(text, Mode::Str, &mut |range, unescaped_char| {
570             res.push((
571                 TextRange::new(range.start.try_into().unwrap(), range.end.try_into().unwrap())
572                     + offset,
573                 unescaped_char,
574             ))
575         });
576
577         Some(res)
578     }
579 }
580
581 impl ast::IntNumber {
582     pub fn radix(&self) -> Radix {
583         match self.text().get(..2).unwrap_or_default() {
584             "0b" => Radix::Binary,
585             "0o" => Radix::Octal,
586             "0x" => Radix::Hexadecimal,
587             _ => Radix::Decimal,
588         }
589     }
590
591     pub fn value(&self) -> Option<u128> {
592         let token = self.syntax();
593
594         let mut text = token.text();
595         if let Some(suffix) = self.suffix() {
596             text = &text[..text.len() - suffix.len()]
597         }
598
599         let radix = self.radix();
600         text = &text[radix.prefix_len()..];
601
602         let buf;
603         if text.contains('_') {
604             buf = text.replace('_', "");
605             text = buf.as_str();
606         };
607
608         let value = u128::from_str_radix(text, radix as u32).ok()?;
609         Some(value)
610     }
611
612     pub fn suffix(&self) -> Option<&str> {
613         let text = self.text();
614         let radix = self.radix();
615         let mut indices = text.char_indices();
616         if radix != Radix::Decimal {
617             indices.next()?;
618             indices.next()?;
619         }
620         let is_suffix_start: fn(&(usize, char)) -> bool = match radix {
621             Radix::Hexadecimal => |(_, c)| matches!(c, 'g'..='z' | 'G'..='Z'),
622             _ => |(_, c)| c.is_ascii_alphabetic(),
623         };
624         let (suffix_start, _) = indices.find(is_suffix_start)?;
625         Some(&text[suffix_start..])
626     }
627 }
628
629 impl ast::FloatNumber {
630     pub fn suffix(&self) -> Option<&str> {
631         let text = self.text();
632         let mut indices = text.char_indices();
633         let (mut suffix_start, c) = indices.by_ref().find(|(_, c)| c.is_ascii_alphabetic())?;
634         if c == 'e' || c == 'E' {
635             suffix_start = indices.find(|(_, c)| c.is_ascii_alphabetic())?.0;
636         }
637         Some(&text[suffix_start..])
638     }
639 }
640
641 #[derive(Debug, PartialEq, Eq, Copy, Clone)]
642 pub enum Radix {
643     Binary = 2,
644     Octal = 8,
645     Decimal = 10,
646     Hexadecimal = 16,
647 }
648
649 impl Radix {
650     pub const ALL: &'static [Radix] =
651         &[Radix::Binary, Radix::Octal, Radix::Decimal, Radix::Hexadecimal];
652
653     const fn prefix_len(&self) -> usize {
654         match self {
655             Self::Decimal => 0,
656             _ => 2,
657         }
658     }
659 }
660
661 #[cfg(test)]
662 mod tests {
663     use crate::ast::{self, make, FloatNumber, IntNumber};
664
665     fn check_float_suffix<'a>(lit: &str, expected: impl Into<Option<&'a str>>) {
666         assert_eq!(FloatNumber { syntax: make::tokens::literal(lit) }.suffix(), expected.into());
667     }
668
669     fn check_int_suffix<'a>(lit: &str, expected: impl Into<Option<&'a str>>) {
670         assert_eq!(IntNumber { syntax: make::tokens::literal(lit) }.suffix(), expected.into());
671     }
672
673     #[test]
674     fn test_float_number_suffix() {
675         check_float_suffix("123.0", None);
676         check_float_suffix("123f32", "f32");
677         check_float_suffix("123.0e", None);
678         check_float_suffix("123.0e4", None);
679         check_float_suffix("123.0ef32", "f32");
680         check_float_suffix("123.0E4f32", "f32");
681         check_float_suffix("1_2_3.0_f32", "f32");
682     }
683
684     #[test]
685     fn test_int_number_suffix() {
686         check_int_suffix("123", None);
687         check_int_suffix("123i32", "i32");
688         check_int_suffix("1_0_1_l_o_l", "l_o_l");
689         check_int_suffix("0b11", None);
690         check_int_suffix("0o11", None);
691         check_int_suffix("0xff", None);
692         check_int_suffix("0b11u32", "u32");
693         check_int_suffix("0o11u32", "u32");
694         check_int_suffix("0xffu32", "u32");
695     }
696
697     fn check_string_value<'a>(lit: &str, expected: impl Into<Option<&'a str>>) {
698         assert_eq!(
699             ast::String { syntax: make::tokens::literal(&format!("\"{}\"", lit)) }
700                 .value()
701                 .as_deref(),
702             expected.into()
703         );
704     }
705
706     #[test]
707     fn test_string_escape() {
708         check_string_value(r"foobar", "foobar");
709         check_string_value(r"\foobar", None);
710         check_string_value(r"\nfoobar", "\nfoobar");
711         check_string_value(r"C:\\Windows\\System32\\", "C:\\Windows\\System32\\");
712     }
713 }