]> git.lizzy.rs Git - rust.git/blob - crates/syntax/src/ast/token_ext.rs
Merge #8082
[rust.git] / crates / syntax / src / ast / token_ext.rs
1 //! There are many AstNodes, but only a few tokens, so we hand-write them here.
2
3 use std::{
4     borrow::Cow,
5     convert::{TryFrom, TryInto},
6 };
7
8 use rustc_lexer::unescape::{unescape_literal, Mode};
9
10 use crate::{
11     ast::{self, AstToken},
12     TextRange, TextSize,
13 };
14
15 impl ast::Comment {
16     pub fn kind(&self) -> CommentKind {
17         CommentKind::from_text(self.text())
18     }
19
20     pub fn is_inner(&self) -> bool {
21         self.kind().doc == Some(CommentPlacement::Inner)
22     }
23
24     pub fn is_outer(&self) -> bool {
25         self.kind().doc == Some(CommentPlacement::Outer)
26     }
27
28     pub fn prefix(&self) -> &'static str {
29         let &(prefix, _kind) = CommentKind::BY_PREFIX
30             .iter()
31             .find(|&(prefix, kind)| self.kind() == *kind && self.text().starts_with(prefix))
32             .unwrap();
33         prefix
34     }
35
36     /// Returns the textual content of a doc comment node as a single string with prefix and suffix
37     /// removed.
38     pub fn doc_comment(&self) -> Option<&str> {
39         let kind = self.kind();
40         match kind {
41             CommentKind { shape, doc: Some(_) } => {
42                 let prefix = kind.prefix();
43                 let text = &self.text()[prefix.len()..];
44                 let text = if shape == CommentShape::Block {
45                     text.strip_suffix("*/").unwrap_or(text)
46                 } else {
47                     text
48                 };
49                 Some(text)
50             }
51             _ => None,
52         }
53     }
54 }
55
56 #[derive(Debug, PartialEq, Eq, Clone, Copy)]
57 pub struct CommentKind {
58     pub shape: CommentShape,
59     pub doc: Option<CommentPlacement>,
60 }
61
62 #[derive(Debug, PartialEq, Eq, Clone, Copy)]
63 pub enum CommentShape {
64     Line,
65     Block,
66 }
67
68 impl CommentShape {
69     pub fn is_line(self) -> bool {
70         self == CommentShape::Line
71     }
72
73     pub fn is_block(self) -> bool {
74         self == CommentShape::Block
75     }
76 }
77
78 #[derive(Debug, PartialEq, Eq, Clone, Copy)]
79 pub enum CommentPlacement {
80     Inner,
81     Outer,
82 }
83
84 impl CommentKind {
85     const BY_PREFIX: [(&'static str, CommentKind); 9] = [
86         ("/**/", CommentKind { shape: CommentShape::Block, doc: None }),
87         ("/***", CommentKind { shape: CommentShape::Block, doc: None }),
88         ("////", CommentKind { shape: CommentShape::Line, doc: None }),
89         ("///", CommentKind { shape: CommentShape::Line, doc: Some(CommentPlacement::Outer) }),
90         ("//!", CommentKind { shape: CommentShape::Line, doc: Some(CommentPlacement::Inner) }),
91         ("/**", CommentKind { shape: CommentShape::Block, doc: Some(CommentPlacement::Outer) }),
92         ("/*!", CommentKind { shape: CommentShape::Block, doc: Some(CommentPlacement::Inner) }),
93         ("//", CommentKind { shape: CommentShape::Line, doc: None }),
94         ("/*", CommentKind { shape: CommentShape::Block, doc: None }),
95     ];
96
97     pub(crate) fn from_text(text: &str) -> CommentKind {
98         let &(_prefix, kind) = CommentKind::BY_PREFIX
99             .iter()
100             .find(|&(prefix, _kind)| text.starts_with(prefix))
101             .unwrap();
102         kind
103     }
104
105     fn prefix(&self) -> &'static str {
106         let &(prefix, _) = CommentKind::BY_PREFIX.iter().find(|(_, kind)| kind == self).unwrap();
107         prefix
108     }
109 }
110
111 impl ast::Whitespace {
112     pub fn spans_multiple_lines(&self) -> bool {
113         let text = self.text();
114         text.find('\n').map_or(false, |idx| text[idx + 1..].contains('\n'))
115     }
116 }
117
118 pub struct QuoteOffsets {
119     pub quotes: (TextRange, TextRange),
120     pub contents: TextRange,
121 }
122
123 impl QuoteOffsets {
124     fn new(literal: &str) -> Option<QuoteOffsets> {
125         let left_quote = literal.find('"')?;
126         let right_quote = literal.rfind('"')?;
127         if left_quote == right_quote {
128             // `literal` only contains one quote
129             return None;
130         }
131
132         let start = TextSize::from(0);
133         let left_quote = TextSize::try_from(left_quote).unwrap() + TextSize::of('"');
134         let right_quote = TextSize::try_from(right_quote).unwrap();
135         let end = TextSize::of(literal);
136
137         let res = QuoteOffsets {
138             quotes: (TextRange::new(start, left_quote), TextRange::new(right_quote, end)),
139             contents: TextRange::new(left_quote, right_quote),
140         };
141         Some(res)
142     }
143 }
144
145 impl ast::String {
146     pub fn is_raw(&self) -> bool {
147         self.text().starts_with('r')
148     }
149     pub fn map_range_up(&self, range: TextRange) -> Option<TextRange> {
150         let contents_range = self.text_range_between_quotes()?;
151         assert!(TextRange::up_to(contents_range.len()).contains_range(range));
152         Some(range + contents_range.start())
153     }
154
155     pub fn value(&self) -> Option<Cow<'_, str>> {
156         if self.is_raw() {
157             let text = self.text();
158             let text =
159                 &text[self.text_range_between_quotes()? - self.syntax().text_range().start()];
160             return Some(Cow::Borrowed(text));
161         }
162
163         let text = self.text();
164         let text = &text[self.text_range_between_quotes()? - self.syntax().text_range().start()];
165
166         let mut buf = String::new();
167         let mut text_iter = text.chars();
168         let mut has_error = false;
169         unescape_literal(text, Mode::Str, &mut |char_range, unescaped_char| match (
170             unescaped_char,
171             buf.capacity() == 0,
172         ) {
173             (Ok(c), false) => buf.push(c),
174             (Ok(c), true) if char_range.len() == 1 && Some(c) == text_iter.next() => (),
175             (Ok(c), true) => {
176                 buf.reserve_exact(text.len());
177                 buf.push_str(&text[..char_range.start]);
178                 buf.push(c);
179             }
180             (Err(_), _) => has_error = true,
181         });
182
183         match (has_error, buf.capacity() == 0) {
184             (true, _) => None,
185             (false, true) => Some(Cow::Borrowed(text)),
186             (false, false) => Some(Cow::Owned(buf)),
187         }
188     }
189
190     pub fn quote_offsets(&self) -> Option<QuoteOffsets> {
191         let text = self.text();
192         let offsets = QuoteOffsets::new(text)?;
193         let o = self.syntax().text_range().start();
194         let offsets = QuoteOffsets {
195             quotes: (offsets.quotes.0 + o, offsets.quotes.1 + o),
196             contents: offsets.contents + o,
197         };
198         Some(offsets)
199     }
200     pub fn text_range_between_quotes(&self) -> Option<TextRange> {
201         self.quote_offsets().map(|it| it.contents)
202     }
203     pub fn open_quote_text_range(&self) -> Option<TextRange> {
204         self.quote_offsets().map(|it| it.quotes.0)
205     }
206     pub fn close_quote_text_range(&self) -> Option<TextRange> {
207         self.quote_offsets().map(|it| it.quotes.1)
208     }
209 }
210
211 impl ast::ByteString {
212     pub fn is_raw(&self) -> bool {
213         self.text().starts_with("br")
214     }
215 }
216
217 #[derive(Debug)]
218 pub enum FormatSpecifier {
219     Open,
220     Close,
221     Integer,
222     Identifier,
223     Colon,
224     Fill,
225     Align,
226     Sign,
227     NumberSign,
228     Zero,
229     DollarSign,
230     Dot,
231     Asterisk,
232     QuestionMark,
233 }
234
235 pub trait HasFormatSpecifier: AstToken {
236     fn char_ranges(
237         &self,
238     ) -> Option<Vec<(TextRange, Result<char, rustc_lexer::unescape::EscapeError>)>>;
239
240     fn lex_format_specifier<F>(&self, mut callback: F)
241     where
242         F: FnMut(TextRange, FormatSpecifier),
243     {
244         let char_ranges = if let Some(char_ranges) = self.char_ranges() {
245             char_ranges
246         } else {
247             return;
248         };
249         let mut chars = char_ranges.iter().peekable();
250
251         while let Some((range, first_char)) = chars.next() {
252             match first_char {
253                 Ok('{') => {
254                     // Format specifier, see syntax at https://doc.rust-lang.org/std/fmt/index.html#syntax
255                     if let Some((_, Ok('{'))) = chars.peek() {
256                         // Escaped format specifier, `{{`
257                         chars.next();
258                         continue;
259                     }
260
261                     callback(*range, FormatSpecifier::Open);
262
263                     // check for integer/identifier
264                     match chars
265                         .peek()
266                         .and_then(|next| next.1.as_ref().ok())
267                         .copied()
268                         .unwrap_or_default()
269                     {
270                         '0'..='9' => {
271                             // integer
272                             read_integer(&mut chars, &mut callback);
273                         }
274                         c if c == '_' || c.is_alphabetic() => {
275                             // identifier
276                             read_identifier(&mut chars, &mut callback);
277                         }
278                         _ => {}
279                     }
280
281                     if let Some((_, Ok(':'))) = chars.peek() {
282                         skip_char_and_emit(&mut chars, FormatSpecifier::Colon, &mut callback);
283
284                         // check for fill/align
285                         let mut cloned = chars.clone().take(2);
286                         let first = cloned
287                             .next()
288                             .and_then(|next| next.1.as_ref().ok())
289                             .copied()
290                             .unwrap_or_default();
291                         let second = cloned
292                             .next()
293                             .and_then(|next| next.1.as_ref().ok())
294                             .copied()
295                             .unwrap_or_default();
296                         match second {
297                             '<' | '^' | '>' => {
298                                 // alignment specifier, first char specifies fillment
299                                 skip_char_and_emit(
300                                     &mut chars,
301                                     FormatSpecifier::Fill,
302                                     &mut callback,
303                                 );
304                                 skip_char_and_emit(
305                                     &mut chars,
306                                     FormatSpecifier::Align,
307                                     &mut callback,
308                                 );
309                             }
310                             _ => match first {
311                                 '<' | '^' | '>' => {
312                                     skip_char_and_emit(
313                                         &mut chars,
314                                         FormatSpecifier::Align,
315                                         &mut callback,
316                                     );
317                                 }
318                                 _ => {}
319                             },
320                         }
321
322                         // check for sign
323                         match chars
324                             .peek()
325                             .and_then(|next| next.1.as_ref().ok())
326                             .copied()
327                             .unwrap_or_default()
328                         {
329                             '+' | '-' => {
330                                 skip_char_and_emit(
331                                     &mut chars,
332                                     FormatSpecifier::Sign,
333                                     &mut callback,
334                                 );
335                             }
336                             _ => {}
337                         }
338
339                         // check for `#`
340                         if let Some((_, Ok('#'))) = chars.peek() {
341                             skip_char_and_emit(
342                                 &mut chars,
343                                 FormatSpecifier::NumberSign,
344                                 &mut callback,
345                             );
346                         }
347
348                         // check for `0`
349                         let mut cloned = chars.clone().take(2);
350                         let first = cloned.next().and_then(|next| next.1.as_ref().ok()).copied();
351                         let second = cloned.next().and_then(|next| next.1.as_ref().ok()).copied();
352
353                         if first == Some('0') && second != Some('$') {
354                             skip_char_and_emit(&mut chars, FormatSpecifier::Zero, &mut callback);
355                         }
356
357                         // width
358                         match chars
359                             .peek()
360                             .and_then(|next| next.1.as_ref().ok())
361                             .copied()
362                             .unwrap_or_default()
363                         {
364                             '0'..='9' => {
365                                 read_integer(&mut chars, &mut callback);
366                                 if let Some((_, Ok('$'))) = chars.peek() {
367                                     skip_char_and_emit(
368                                         &mut chars,
369                                         FormatSpecifier::DollarSign,
370                                         &mut callback,
371                                     );
372                                 }
373                             }
374                             c if c == '_' || c.is_alphabetic() => {
375                                 read_identifier(&mut chars, &mut callback);
376
377                                 if chars.peek().and_then(|next| next.1.as_ref().ok()).copied()
378                                     == Some('?')
379                                 {
380                                     skip_char_and_emit(
381                                         &mut chars,
382                                         FormatSpecifier::QuestionMark,
383                                         &mut callback,
384                                     );
385                                 }
386
387                                 // can be either width (indicated by dollar sign, or type in which case
388                                 // the next sign has to be `}`)
389                                 let next =
390                                     chars.peek().and_then(|next| next.1.as_ref().ok()).copied();
391
392                                 match next {
393                                     Some('$') => skip_char_and_emit(
394                                         &mut chars,
395                                         FormatSpecifier::DollarSign,
396                                         &mut callback,
397                                     ),
398                                     Some('}') => {
399                                         skip_char_and_emit(
400                                             &mut chars,
401                                             FormatSpecifier::Close,
402                                             &mut callback,
403                                         );
404                                         continue;
405                                     }
406                                     _ => continue,
407                                 };
408                             }
409                             _ => {}
410                         }
411
412                         // precision
413                         if let Some((_, Ok('.'))) = chars.peek() {
414                             skip_char_and_emit(&mut chars, FormatSpecifier::Dot, &mut callback);
415
416                             match chars
417                                 .peek()
418                                 .and_then(|next| next.1.as_ref().ok())
419                                 .copied()
420                                 .unwrap_or_default()
421                             {
422                                 '*' => {
423                                     skip_char_and_emit(
424                                         &mut chars,
425                                         FormatSpecifier::Asterisk,
426                                         &mut callback,
427                                     );
428                                 }
429                                 '0'..='9' => {
430                                     read_integer(&mut chars, &mut callback);
431                                     if let Some((_, Ok('$'))) = chars.peek() {
432                                         skip_char_and_emit(
433                                             &mut chars,
434                                             FormatSpecifier::DollarSign,
435                                             &mut callback,
436                                         );
437                                     }
438                                 }
439                                 c if c == '_' || c.is_alphabetic() => {
440                                     read_identifier(&mut chars, &mut callback);
441                                     if chars.peek().and_then(|next| next.1.as_ref().ok()).copied()
442                                         != Some('$')
443                                     {
444                                         continue;
445                                     }
446                                     skip_char_and_emit(
447                                         &mut chars,
448                                         FormatSpecifier::DollarSign,
449                                         &mut callback,
450                                     );
451                                 }
452                                 _ => {
453                                     continue;
454                                 }
455                             }
456                         }
457
458                         // type
459                         match chars
460                             .peek()
461                             .and_then(|next| next.1.as_ref().ok())
462                             .copied()
463                             .unwrap_or_default()
464                         {
465                             '?' => {
466                                 skip_char_and_emit(
467                                     &mut chars,
468                                     FormatSpecifier::QuestionMark,
469                                     &mut callback,
470                                 );
471                             }
472                             c if c == '_' || c.is_alphabetic() => {
473                                 read_identifier(&mut chars, &mut callback);
474
475                                 if chars.peek().and_then(|next| next.1.as_ref().ok()).copied()
476                                     == Some('?')
477                                 {
478                                     skip_char_and_emit(
479                                         &mut chars,
480                                         FormatSpecifier::QuestionMark,
481                                         &mut callback,
482                                     );
483                                 }
484                             }
485                             _ => {}
486                         }
487                     }
488
489                     if let Some((_, Ok('}'))) = chars.peek() {
490                         skip_char_and_emit(&mut chars, FormatSpecifier::Close, &mut callback);
491                     } else {
492                         continue;
493                     }
494                 }
495                 _ => {
496                     while let Some((_, Ok(next_char))) = chars.peek() {
497                         match next_char {
498                             '{' => break,
499                             _ => {}
500                         }
501                         chars.next();
502                     }
503                 }
504             };
505         }
506
507         fn skip_char_and_emit<'a, I, F>(
508             chars: &mut std::iter::Peekable<I>,
509             emit: FormatSpecifier,
510             callback: &mut F,
511         ) where
512             I: Iterator<Item = &'a (TextRange, Result<char, rustc_lexer::unescape::EscapeError>)>,
513             F: FnMut(TextRange, FormatSpecifier),
514         {
515             let (range, _) = chars.next().unwrap();
516             callback(*range, emit);
517         }
518
519         fn read_integer<'a, I, F>(chars: &mut std::iter::Peekable<I>, callback: &mut F)
520         where
521             I: Iterator<Item = &'a (TextRange, Result<char, rustc_lexer::unescape::EscapeError>)>,
522             F: FnMut(TextRange, FormatSpecifier),
523         {
524             let (mut range, c) = chars.next().unwrap();
525             assert!(c.as_ref().unwrap().is_ascii_digit());
526             while let Some((r, Ok(next_char))) = chars.peek() {
527                 if next_char.is_ascii_digit() {
528                     chars.next();
529                     range = range.cover(*r);
530                 } else {
531                     break;
532                 }
533             }
534             callback(range, FormatSpecifier::Integer);
535         }
536
537         fn read_identifier<'a, I, F>(chars: &mut std::iter::Peekable<I>, callback: &mut F)
538         where
539             I: Iterator<Item = &'a (TextRange, Result<char, rustc_lexer::unescape::EscapeError>)>,
540             F: FnMut(TextRange, FormatSpecifier),
541         {
542             let (mut range, c) = chars.next().unwrap();
543             assert!(c.as_ref().unwrap().is_alphabetic() || *c.as_ref().unwrap() == '_');
544             while let Some((r, Ok(next_char))) = chars.peek() {
545                 if *next_char == '_' || next_char.is_ascii_digit() || next_char.is_alphabetic() {
546                     chars.next();
547                     range = range.cover(*r);
548                 } else {
549                     break;
550                 }
551             }
552             callback(range, FormatSpecifier::Identifier);
553         }
554     }
555 }
556
557 impl HasFormatSpecifier for ast::String {
558     fn char_ranges(
559         &self,
560     ) -> Option<Vec<(TextRange, Result<char, rustc_lexer::unescape::EscapeError>)>> {
561         let text = self.text();
562         let text = &text[self.text_range_between_quotes()? - self.syntax().text_range().start()];
563         let offset = self.text_range_between_quotes()?.start() - self.syntax().text_range().start();
564
565         let mut res = Vec::with_capacity(text.len());
566         unescape_literal(text, Mode::Str, &mut |range, unescaped_char| {
567             res.push((
568                 TextRange::new(range.start.try_into().unwrap(), range.end.try_into().unwrap())
569                     + offset,
570                 unescaped_char,
571             ))
572         });
573
574         Some(res)
575     }
576 }
577
578 impl ast::IntNumber {
579     pub fn radix(&self) -> Radix {
580         match self.text().get(..2).unwrap_or_default() {
581             "0b" => Radix::Binary,
582             "0o" => Radix::Octal,
583             "0x" => Radix::Hexadecimal,
584             _ => Radix::Decimal,
585         }
586     }
587
588     pub fn value(&self) -> Option<u128> {
589         let token = self.syntax();
590
591         let mut text = token.text();
592         if let Some(suffix) = self.suffix() {
593             text = &text[..text.len() - suffix.len()]
594         }
595
596         let radix = self.radix();
597         text = &text[radix.prefix_len()..];
598
599         let buf;
600         if text.contains('_') {
601             buf = text.replace('_', "");
602             text = buf.as_str();
603         };
604
605         let value = u128::from_str_radix(text, radix as u32).ok()?;
606         Some(value)
607     }
608
609     pub fn suffix(&self) -> Option<&str> {
610         let text = self.text();
611         let radix = self.radix();
612         let mut indices = text.char_indices();
613         if radix != Radix::Decimal {
614             indices.next()?;
615             indices.next()?;
616         }
617         let is_suffix_start: fn(&(usize, char)) -> bool = match radix {
618             Radix::Hexadecimal => |(_, c)| matches!(c, 'g'..='z' | 'G'..='Z'),
619             _ => |(_, c)| c.is_ascii_alphabetic(),
620         };
621         let (suffix_start, _) = indices.find(is_suffix_start)?;
622         Some(&text[suffix_start..])
623     }
624 }
625
626 impl ast::FloatNumber {
627     pub fn suffix(&self) -> Option<&str> {
628         let text = self.text();
629         let mut indices = text.char_indices();
630         let (mut suffix_start, c) = indices.by_ref().find(|(_, c)| c.is_ascii_alphabetic())?;
631         if c == 'e' || c == 'E' {
632             suffix_start = indices.find(|(_, c)| c.is_ascii_alphabetic())?.0;
633         }
634         Some(&text[suffix_start..])
635     }
636 }
637
638 #[derive(Debug, PartialEq, Eq, Copy, Clone)]
639 pub enum Radix {
640     Binary = 2,
641     Octal = 8,
642     Decimal = 10,
643     Hexadecimal = 16,
644 }
645
646 impl Radix {
647     pub const ALL: &'static [Radix] =
648         &[Radix::Binary, Radix::Octal, Radix::Decimal, Radix::Hexadecimal];
649
650     const fn prefix_len(&self) -> usize {
651         match self {
652             Self::Decimal => 0,
653             _ => 2,
654         }
655     }
656 }
657
658 #[cfg(test)]
659 mod tests {
660     use crate::ast::{self, make, FloatNumber, IntNumber};
661
662     fn check_float_suffix<'a>(lit: &str, expected: impl Into<Option<&'a str>>) {
663         assert_eq!(FloatNumber { syntax: make::tokens::literal(lit) }.suffix(), expected.into());
664     }
665
666     fn check_int_suffix<'a>(lit: &str, expected: impl Into<Option<&'a str>>) {
667         assert_eq!(IntNumber { syntax: make::tokens::literal(lit) }.suffix(), expected.into());
668     }
669
670     #[test]
671     fn test_float_number_suffix() {
672         check_float_suffix("123.0", None);
673         check_float_suffix("123f32", "f32");
674         check_float_suffix("123.0e", None);
675         check_float_suffix("123.0e4", None);
676         check_float_suffix("123.0ef32", "f32");
677         check_float_suffix("123.0E4f32", "f32");
678         check_float_suffix("1_2_3.0_f32", "f32");
679     }
680
681     #[test]
682     fn test_int_number_suffix() {
683         check_int_suffix("123", None);
684         check_int_suffix("123i32", "i32");
685         check_int_suffix("1_0_1_l_o_l", "l_o_l");
686         check_int_suffix("0b11", None);
687         check_int_suffix("0o11", None);
688         check_int_suffix("0xff", None);
689         check_int_suffix("0b11u32", "u32");
690         check_int_suffix("0o11u32", "u32");
691         check_int_suffix("0xffu32", "u32");
692     }
693
694     fn check_string_value<'a>(lit: &str, expected: impl Into<Option<&'a str>>) {
695         assert_eq!(
696             ast::String { syntax: make::tokens::literal(&format!("\"{}\"", lit)) }
697                 .value()
698                 .as_deref(),
699             expected.into()
700         );
701     }
702
703     #[test]
704     fn test_string_escape() {
705         check_string_value(r"foobar", "foobar");
706         check_string_value(r"\foobar", None);
707         check_string_value(r"\nfoobar", "\nfoobar");
708         check_string_value(r"C:\\Windows\\System32\\", "C:\\Windows\\System32\\");
709     }
710 }