]> git.lizzy.rs Git - rust.git/blob - crates/syntax/src/ast/token_ext.rs
Merge #7907
[rust.git] / crates / syntax / src / ast / token_ext.rs
1 //! There are many AstNodes, but only a few tokens, so we hand-write them here.
2
3 use std::{
4     borrow::Cow,
5     convert::{TryFrom, TryInto},
6 };
7
8 use rustc_lexer::unescape::{unescape_literal, Mode};
9
10 use crate::{
11     ast::{self, AstToken},
12     TextRange, TextSize,
13 };
14
15 impl ast::Comment {
16     pub fn kind(&self) -> CommentKind {
17         CommentKind::from_text(self.text())
18     }
19
20     pub fn is_inner(&self) -> bool {
21         self.kind().doc == Some(CommentPlacement::Inner)
22     }
23
24     pub fn is_outer(&self) -> bool {
25         self.kind().doc == Some(CommentPlacement::Outer)
26     }
27
28     pub fn prefix(&self) -> &'static str {
29         let &(prefix, _kind) = CommentKind::BY_PREFIX
30             .iter()
31             .find(|&(prefix, kind)| self.kind() == *kind && self.text().starts_with(prefix))
32             .unwrap();
33         prefix
34     }
35
36     /// Returns the textual content of a doc comment node as a single string with prefix and suffix
37     /// removed.
38     pub fn doc_comment(&self) -> Option<&str> {
39         let kind = self.kind();
40         match kind {
41             CommentKind { shape, doc: Some(_) } => {
42                 let prefix = kind.prefix();
43                 let text = &self.text()[prefix.len()..];
44                 let text = if shape == CommentShape::Block {
45                     text.strip_suffix("*/").unwrap_or(text)
46                 } else {
47                     text
48                 };
49                 Some(text)
50             }
51             _ => None,
52         }
53     }
54 }
55
56 #[derive(Debug, PartialEq, Eq, Clone, Copy)]
57 pub struct CommentKind {
58     pub shape: CommentShape,
59     pub doc: Option<CommentPlacement>,
60 }
61
62 #[derive(Debug, PartialEq, Eq, Clone, Copy)]
63 pub enum CommentShape {
64     Line,
65     Block,
66 }
67
68 impl CommentShape {
69     pub fn is_line(self) -> bool {
70         self == CommentShape::Line
71     }
72
73     pub fn is_block(self) -> bool {
74         self == CommentShape::Block
75     }
76 }
77
78 #[derive(Debug, PartialEq, Eq, Clone, Copy)]
79 pub enum CommentPlacement {
80     Inner,
81     Outer,
82 }
83
84 impl CommentKind {
85     const BY_PREFIX: [(&'static str, CommentKind); 9] = [
86         ("/**/", CommentKind { shape: CommentShape::Block, doc: None }),
87         ("/***", CommentKind { shape: CommentShape::Block, doc: None }),
88         ("////", CommentKind { shape: CommentShape::Line, doc: None }),
89         ("///", CommentKind { shape: CommentShape::Line, doc: Some(CommentPlacement::Outer) }),
90         ("//!", CommentKind { shape: CommentShape::Line, doc: Some(CommentPlacement::Inner) }),
91         ("/**", CommentKind { shape: CommentShape::Block, doc: Some(CommentPlacement::Outer) }),
92         ("/*!", CommentKind { shape: CommentShape::Block, doc: Some(CommentPlacement::Inner) }),
93         ("//", CommentKind { shape: CommentShape::Line, doc: None }),
94         ("/*", CommentKind { shape: CommentShape::Block, doc: None }),
95     ];
96
97     pub(crate) fn from_text(text: &str) -> CommentKind {
98         let &(_prefix, kind) = CommentKind::BY_PREFIX
99             .iter()
100             .find(|&(prefix, _kind)| text.starts_with(prefix))
101             .unwrap();
102         kind
103     }
104
105     pub fn prefix(&self) -> &'static str {
106         let &(prefix, _) =
107             CommentKind::BY_PREFIX.iter().rev().find(|(_, kind)| kind == self).unwrap();
108         prefix
109     }
110 }
111
112 impl ast::Whitespace {
113     pub fn spans_multiple_lines(&self) -> bool {
114         let text = self.text();
115         text.find('\n').map_or(false, |idx| text[idx + 1..].contains('\n'))
116     }
117 }
118
119 pub struct QuoteOffsets {
120     pub quotes: (TextRange, TextRange),
121     pub contents: TextRange,
122 }
123
124 impl QuoteOffsets {
125     fn new(literal: &str) -> Option<QuoteOffsets> {
126         let left_quote = literal.find('"')?;
127         let right_quote = literal.rfind('"')?;
128         if left_quote == right_quote {
129             // `literal` only contains one quote
130             return None;
131         }
132
133         let start = TextSize::from(0);
134         let left_quote = TextSize::try_from(left_quote).unwrap() + TextSize::of('"');
135         let right_quote = TextSize::try_from(right_quote).unwrap();
136         let end = TextSize::of(literal);
137
138         let res = QuoteOffsets {
139             quotes: (TextRange::new(start, left_quote), TextRange::new(right_quote, end)),
140             contents: TextRange::new(left_quote, right_quote),
141         };
142         Some(res)
143     }
144 }
145
146 impl ast::String {
147     pub fn is_raw(&self) -> bool {
148         self.text().starts_with('r')
149     }
150     pub fn map_range_up(&self, range: TextRange) -> Option<TextRange> {
151         let contents_range = self.text_range_between_quotes()?;
152         assert!(TextRange::up_to(contents_range.len()).contains_range(range));
153         Some(range + contents_range.start())
154     }
155
156     pub fn value(&self) -> Option<Cow<'_, str>> {
157         if self.is_raw() {
158             let text = self.text();
159             let text =
160                 &text[self.text_range_between_quotes()? - self.syntax().text_range().start()];
161             return Some(Cow::Borrowed(text));
162         }
163
164         let text = self.text();
165         let text = &text[self.text_range_between_quotes()? - self.syntax().text_range().start()];
166
167         let mut buf = String::new();
168         let mut text_iter = text.chars();
169         let mut has_error = false;
170         unescape_literal(text, Mode::Str, &mut |char_range, unescaped_char| match (
171             unescaped_char,
172             buf.capacity() == 0,
173         ) {
174             (Ok(c), false) => buf.push(c),
175             (Ok(c), true) if char_range.len() == 1 && Some(c) == text_iter.next() => (),
176             (Ok(c), true) => {
177                 buf.reserve_exact(text.len());
178                 buf.push_str(&text[..char_range.start]);
179                 buf.push(c);
180             }
181             (Err(_), _) => has_error = true,
182         });
183
184         match (has_error, buf.capacity() == 0) {
185             (true, _) => None,
186             (false, true) => Some(Cow::Borrowed(text)),
187             (false, false) => Some(Cow::Owned(buf)),
188         }
189     }
190
191     pub fn quote_offsets(&self) -> Option<QuoteOffsets> {
192         let text = self.text();
193         let offsets = QuoteOffsets::new(text)?;
194         let o = self.syntax().text_range().start();
195         let offsets = QuoteOffsets {
196             quotes: (offsets.quotes.0 + o, offsets.quotes.1 + o),
197             contents: offsets.contents + o,
198         };
199         Some(offsets)
200     }
201     pub fn text_range_between_quotes(&self) -> Option<TextRange> {
202         self.quote_offsets().map(|it| it.contents)
203     }
204     pub fn open_quote_text_range(&self) -> Option<TextRange> {
205         self.quote_offsets().map(|it| it.quotes.0)
206     }
207     pub fn close_quote_text_range(&self) -> Option<TextRange> {
208         self.quote_offsets().map(|it| it.quotes.1)
209     }
210 }
211
212 impl ast::ByteString {
213     pub fn is_raw(&self) -> bool {
214         self.text().starts_with("br")
215     }
216 }
217
218 #[derive(Debug)]
219 pub enum FormatSpecifier {
220     Open,
221     Close,
222     Integer,
223     Identifier,
224     Colon,
225     Fill,
226     Align,
227     Sign,
228     NumberSign,
229     Zero,
230     DollarSign,
231     Dot,
232     Asterisk,
233     QuestionMark,
234 }
235
236 pub trait HasFormatSpecifier: AstToken {
237     fn char_ranges(
238         &self,
239     ) -> Option<Vec<(TextRange, Result<char, rustc_lexer::unescape::EscapeError>)>>;
240
241     fn lex_format_specifier<F>(&self, mut callback: F)
242     where
243         F: FnMut(TextRange, FormatSpecifier),
244     {
245         let char_ranges = if let Some(char_ranges) = self.char_ranges() {
246             char_ranges
247         } else {
248             return;
249         };
250         let mut chars = char_ranges.iter().peekable();
251
252         while let Some((range, first_char)) = chars.next() {
253             match first_char {
254                 Ok('{') => {
255                     // Format specifier, see syntax at https://doc.rust-lang.org/std/fmt/index.html#syntax
256                     if let Some((_, Ok('{'))) = chars.peek() {
257                         // Escaped format specifier, `{{`
258                         chars.next();
259                         continue;
260                     }
261
262                     callback(*range, FormatSpecifier::Open);
263
264                     // check for integer/identifier
265                     match chars
266                         .peek()
267                         .and_then(|next| next.1.as_ref().ok())
268                         .copied()
269                         .unwrap_or_default()
270                     {
271                         '0'..='9' => {
272                             // integer
273                             read_integer(&mut chars, &mut callback);
274                         }
275                         c if c == '_' || c.is_alphabetic() => {
276                             // identifier
277                             read_identifier(&mut chars, &mut callback);
278                         }
279                         _ => {}
280                     }
281
282                     if let Some((_, Ok(':'))) = chars.peek() {
283                         skip_char_and_emit(&mut chars, FormatSpecifier::Colon, &mut callback);
284
285                         // check for fill/align
286                         let mut cloned = chars.clone().take(2);
287                         let first = cloned
288                             .next()
289                             .and_then(|next| next.1.as_ref().ok())
290                             .copied()
291                             .unwrap_or_default();
292                         let second = cloned
293                             .next()
294                             .and_then(|next| next.1.as_ref().ok())
295                             .copied()
296                             .unwrap_or_default();
297                         match second {
298                             '<' | '^' | '>' => {
299                                 // alignment specifier, first char specifies fillment
300                                 skip_char_and_emit(
301                                     &mut chars,
302                                     FormatSpecifier::Fill,
303                                     &mut callback,
304                                 );
305                                 skip_char_and_emit(
306                                     &mut chars,
307                                     FormatSpecifier::Align,
308                                     &mut callback,
309                                 );
310                             }
311                             _ => match first {
312                                 '<' | '^' | '>' => {
313                                     skip_char_and_emit(
314                                         &mut chars,
315                                         FormatSpecifier::Align,
316                                         &mut callback,
317                                     );
318                                 }
319                                 _ => {}
320                             },
321                         }
322
323                         // check for sign
324                         match chars
325                             .peek()
326                             .and_then(|next| next.1.as_ref().ok())
327                             .copied()
328                             .unwrap_or_default()
329                         {
330                             '+' | '-' => {
331                                 skip_char_and_emit(
332                                     &mut chars,
333                                     FormatSpecifier::Sign,
334                                     &mut callback,
335                                 );
336                             }
337                             _ => {}
338                         }
339
340                         // check for `#`
341                         if let Some((_, Ok('#'))) = chars.peek() {
342                             skip_char_and_emit(
343                                 &mut chars,
344                                 FormatSpecifier::NumberSign,
345                                 &mut callback,
346                             );
347                         }
348
349                         // check for `0`
350                         let mut cloned = chars.clone().take(2);
351                         let first = cloned.next().and_then(|next| next.1.as_ref().ok()).copied();
352                         let second = cloned.next().and_then(|next| next.1.as_ref().ok()).copied();
353
354                         if first == Some('0') && second != Some('$') {
355                             skip_char_and_emit(&mut chars, FormatSpecifier::Zero, &mut callback);
356                         }
357
358                         // width
359                         match chars
360                             .peek()
361                             .and_then(|next| next.1.as_ref().ok())
362                             .copied()
363                             .unwrap_or_default()
364                         {
365                             '0'..='9' => {
366                                 read_integer(&mut chars, &mut callback);
367                                 if let Some((_, Ok('$'))) = chars.peek() {
368                                     skip_char_and_emit(
369                                         &mut chars,
370                                         FormatSpecifier::DollarSign,
371                                         &mut callback,
372                                     );
373                                 }
374                             }
375                             c if c == '_' || c.is_alphabetic() => {
376                                 read_identifier(&mut chars, &mut callback);
377
378                                 if chars.peek().and_then(|next| next.1.as_ref().ok()).copied()
379                                     == Some('?')
380                                 {
381                                     skip_char_and_emit(
382                                         &mut chars,
383                                         FormatSpecifier::QuestionMark,
384                                         &mut callback,
385                                     );
386                                 }
387
388                                 // can be either width (indicated by dollar sign, or type in which case
389                                 // the next sign has to be `}`)
390                                 let next =
391                                     chars.peek().and_then(|next| next.1.as_ref().ok()).copied();
392
393                                 match next {
394                                     Some('$') => skip_char_and_emit(
395                                         &mut chars,
396                                         FormatSpecifier::DollarSign,
397                                         &mut callback,
398                                     ),
399                                     Some('}') => {
400                                         skip_char_and_emit(
401                                             &mut chars,
402                                             FormatSpecifier::Close,
403                                             &mut callback,
404                                         );
405                                         continue;
406                                     }
407                                     _ => continue,
408                                 };
409                             }
410                             _ => {}
411                         }
412
413                         // precision
414                         if let Some((_, Ok('.'))) = chars.peek() {
415                             skip_char_and_emit(&mut chars, FormatSpecifier::Dot, &mut callback);
416
417                             match chars
418                                 .peek()
419                                 .and_then(|next| next.1.as_ref().ok())
420                                 .copied()
421                                 .unwrap_or_default()
422                             {
423                                 '*' => {
424                                     skip_char_and_emit(
425                                         &mut chars,
426                                         FormatSpecifier::Asterisk,
427                                         &mut callback,
428                                     );
429                                 }
430                                 '0'..='9' => {
431                                     read_integer(&mut chars, &mut callback);
432                                     if let Some((_, Ok('$'))) = chars.peek() {
433                                         skip_char_and_emit(
434                                             &mut chars,
435                                             FormatSpecifier::DollarSign,
436                                             &mut callback,
437                                         );
438                                     }
439                                 }
440                                 c if c == '_' || c.is_alphabetic() => {
441                                     read_identifier(&mut chars, &mut callback);
442                                     if chars.peek().and_then(|next| next.1.as_ref().ok()).copied()
443                                         != Some('$')
444                                     {
445                                         continue;
446                                     }
447                                     skip_char_and_emit(
448                                         &mut chars,
449                                         FormatSpecifier::DollarSign,
450                                         &mut callback,
451                                     );
452                                 }
453                                 _ => {
454                                     continue;
455                                 }
456                             }
457                         }
458
459                         // type
460                         match chars
461                             .peek()
462                             .and_then(|next| next.1.as_ref().ok())
463                             .copied()
464                             .unwrap_or_default()
465                         {
466                             '?' => {
467                                 skip_char_and_emit(
468                                     &mut chars,
469                                     FormatSpecifier::QuestionMark,
470                                     &mut callback,
471                                 );
472                             }
473                             c if c == '_' || c.is_alphabetic() => {
474                                 read_identifier(&mut chars, &mut callback);
475
476                                 if chars.peek().and_then(|next| next.1.as_ref().ok()).copied()
477                                     == Some('?')
478                                 {
479                                     skip_char_and_emit(
480                                         &mut chars,
481                                         FormatSpecifier::QuestionMark,
482                                         &mut callback,
483                                     );
484                                 }
485                             }
486                             _ => {}
487                         }
488                     }
489
490                     if let Some((_, Ok('}'))) = chars.peek() {
491                         skip_char_and_emit(&mut chars, FormatSpecifier::Close, &mut callback);
492                     } else {
493                         continue;
494                     }
495                 }
496                 _ => {
497                     while let Some((_, Ok(next_char))) = chars.peek() {
498                         if next_char == &'{' {
499                             break;
500                         }
501                         chars.next();
502                     }
503                 }
504             };
505         }
506
507         fn skip_char_and_emit<'a, I, F>(
508             chars: &mut std::iter::Peekable<I>,
509             emit: FormatSpecifier,
510             callback: &mut F,
511         ) where
512             I: Iterator<Item = &'a (TextRange, Result<char, rustc_lexer::unescape::EscapeError>)>,
513             F: FnMut(TextRange, FormatSpecifier),
514         {
515             let (range, _) = chars.next().unwrap();
516             callback(*range, emit);
517         }
518
519         fn read_integer<'a, I, F>(chars: &mut std::iter::Peekable<I>, callback: &mut F)
520         where
521             I: Iterator<Item = &'a (TextRange, Result<char, rustc_lexer::unescape::EscapeError>)>,
522             F: FnMut(TextRange, FormatSpecifier),
523         {
524             let (mut range, c) = chars.next().unwrap();
525             assert!(c.as_ref().unwrap().is_ascii_digit());
526             while let Some((r, Ok(next_char))) = chars.peek() {
527                 if next_char.is_ascii_digit() {
528                     chars.next();
529                     range = range.cover(*r);
530                 } else {
531                     break;
532                 }
533             }
534             callback(range, FormatSpecifier::Integer);
535         }
536
537         fn read_identifier<'a, I, F>(chars: &mut std::iter::Peekable<I>, callback: &mut F)
538         where
539             I: Iterator<Item = &'a (TextRange, Result<char, rustc_lexer::unescape::EscapeError>)>,
540             F: FnMut(TextRange, FormatSpecifier),
541         {
542             let (mut range, c) = chars.next().unwrap();
543             assert!(c.as_ref().unwrap().is_alphabetic() || *c.as_ref().unwrap() == '_');
544             while let Some((r, Ok(next_char))) = chars.peek() {
545                 if *next_char == '_' || next_char.is_ascii_digit() || next_char.is_alphabetic() {
546                     chars.next();
547                     range = range.cover(*r);
548                 } else {
549                     break;
550                 }
551             }
552             callback(range, FormatSpecifier::Identifier);
553         }
554     }
555 }
556
557 impl HasFormatSpecifier for ast::String {
558     fn char_ranges(
559         &self,
560     ) -> Option<Vec<(TextRange, Result<char, rustc_lexer::unescape::EscapeError>)>> {
561         let text = self.text();
562         let text = &text[self.text_range_between_quotes()? - self.syntax().text_range().start()];
563         let offset = self.text_range_between_quotes()?.start() - self.syntax().text_range().start();
564
565         let mut res = Vec::with_capacity(text.len());
566         unescape_literal(text, Mode::Str, &mut |range, unescaped_char| {
567             res.push((
568                 TextRange::new(range.start.try_into().unwrap(), range.end.try_into().unwrap())
569                     + offset,
570                 unescaped_char,
571             ))
572         });
573
574         Some(res)
575     }
576 }
577
578 impl ast::IntNumber {
579     pub fn radix(&self) -> Radix {
580         match self.text().get(..2).unwrap_or_default() {
581             "0b" => Radix::Binary,
582             "0o" => Radix::Octal,
583             "0x" => Radix::Hexadecimal,
584             _ => Radix::Decimal,
585         }
586     }
587
588     pub fn value(&self) -> Option<u128> {
589         let token = self.syntax();
590
591         let mut text = token.text();
592         if let Some(suffix) = self.suffix() {
593             text = &text[..text.len() - suffix.len()]
594         }
595
596         let radix = self.radix();
597         text = &text[radix.prefix_len()..];
598
599         let buf;
600         if text.contains('_') {
601             buf = text.replace('_', "");
602             text = buf.as_str();
603         };
604
605         let value = u128::from_str_radix(text, radix as u32).ok()?;
606         Some(value)
607     }
608
609     pub fn suffix(&self) -> Option<&str> {
610         let text = self.text();
611         let radix = self.radix();
612         let mut indices = text.char_indices();
613         if radix != Radix::Decimal {
614             indices.next()?;
615             indices.next()?;
616         }
617         let is_suffix_start: fn(&(usize, char)) -> bool = match radix {
618             Radix::Hexadecimal => |(_, c)| matches!(c, 'g'..='z' | 'G'..='Z'),
619             _ => |(_, c)| c.is_ascii_alphabetic(),
620         };
621         let (suffix_start, _) = indices.find(is_suffix_start)?;
622         Some(&text[suffix_start..])
623     }
624 }
625
626 impl ast::FloatNumber {
627     pub fn suffix(&self) -> Option<&str> {
628         let text = self.text();
629         let mut indices = text.char_indices();
630         let (mut suffix_start, c) = indices.by_ref().find(|(_, c)| c.is_ascii_alphabetic())?;
631         if c == 'e' || c == 'E' {
632             suffix_start = indices.find(|(_, c)| c.is_ascii_alphabetic())?.0;
633         }
634         Some(&text[suffix_start..])
635     }
636 }
637
638 #[derive(Debug, PartialEq, Eq, Copy, Clone)]
639 pub enum Radix {
640     Binary = 2,
641     Octal = 8,
642     Decimal = 10,
643     Hexadecimal = 16,
644 }
645
646 impl Radix {
647     pub const ALL: &'static [Radix] =
648         &[Radix::Binary, Radix::Octal, Radix::Decimal, Radix::Hexadecimal];
649
650     const fn prefix_len(&self) -> usize {
651         match self {
652             Self::Decimal => 0,
653             _ => 2,
654         }
655     }
656 }
657
658 #[cfg(test)]
659 mod tests {
660     use crate::ast::{self, make, FloatNumber, IntNumber};
661
662     fn check_float_suffix<'a>(lit: &str, expected: impl Into<Option<&'a str>>) {
663         assert_eq!(FloatNumber { syntax: make::tokens::literal(lit) }.suffix(), expected.into());
664     }
665
666     fn check_int_suffix<'a>(lit: &str, expected: impl Into<Option<&'a str>>) {
667         assert_eq!(IntNumber { syntax: make::tokens::literal(lit) }.suffix(), expected.into());
668     }
669
670     #[test]
671     fn test_float_number_suffix() {
672         check_float_suffix("123.0", None);
673         check_float_suffix("123f32", "f32");
674         check_float_suffix("123.0e", None);
675         check_float_suffix("123.0e4", None);
676         check_float_suffix("123.0ef32", "f32");
677         check_float_suffix("123.0E4f32", "f32");
678         check_float_suffix("1_2_3.0_f32", "f32");
679     }
680
681     #[test]
682     fn test_int_number_suffix() {
683         check_int_suffix("123", None);
684         check_int_suffix("123i32", "i32");
685         check_int_suffix("1_0_1_l_o_l", "l_o_l");
686         check_int_suffix("0b11", None);
687         check_int_suffix("0o11", None);
688         check_int_suffix("0xff", None);
689         check_int_suffix("0b11u32", "u32");
690         check_int_suffix("0o11u32", "u32");
691         check_int_suffix("0xffu32", "u32");
692     }
693
694     fn check_string_value<'a>(lit: &str, expected: impl Into<Option<&'a str>>) {
695         assert_eq!(
696             ast::String { syntax: make::tokens::literal(&format!("\"{}\"", lit)) }
697                 .value()
698                 .as_deref(),
699             expected.into()
700         );
701     }
702
703     #[test]
704     fn test_string_escape() {
705         check_string_value(r"foobar", "foobar");
706         check_string_value(r"\foobar", None);
707         check_string_value(r"\nfoobar", "\nfoobar");
708         check_string_value(r"C:\\Windows\\System32\\", "C:\\Windows\\System32\\");
709     }
710 }