]> git.lizzy.rs Git - rust.git/blob - crates/syntax/src/ast/token_ext.rs
Merge #11293
[rust.git] / crates / syntax / src / ast / token_ext.rs
1 //! There are many AstNodes, but only a few tokens, so we hand-write them here.
2
3 use std::borrow::Cow;
4
5 use rustc_lexer::unescape::{unescape_literal, Mode};
6
7 use crate::{
8     ast::{self, AstToken},
9     TextRange, TextSize,
10 };
11
12 impl ast::Comment {
13     pub fn kind(&self) -> CommentKind {
14         CommentKind::from_text(self.text())
15     }
16
17     pub fn is_doc(&self) -> bool {
18         self.kind().doc.is_some()
19     }
20
21     pub fn is_inner(&self) -> bool {
22         self.kind().doc == Some(CommentPlacement::Inner)
23     }
24
25     pub fn is_outer(&self) -> bool {
26         self.kind().doc == Some(CommentPlacement::Outer)
27     }
28
29     pub fn prefix(&self) -> &'static str {
30         let &(prefix, _kind) = CommentKind::BY_PREFIX
31             .iter()
32             .find(|&(prefix, kind)| self.kind() == *kind && self.text().starts_with(prefix))
33             .unwrap();
34         prefix
35     }
36
37     /// Returns the textual content of a doc comment node as a single string with prefix and suffix
38     /// removed.
39     pub fn doc_comment(&self) -> Option<&str> {
40         let kind = self.kind();
41         match kind {
42             CommentKind { shape, doc: Some(_) } => {
43                 let prefix = kind.prefix();
44                 let text = &self.text()[prefix.len()..];
45                 let text = if shape == CommentShape::Block {
46                     text.strip_suffix("*/").unwrap_or(text)
47                 } else {
48                     text
49                 };
50                 Some(text)
51             }
52             _ => None,
53         }
54     }
55 }
56
57 #[derive(Debug, PartialEq, Eq, Clone, Copy)]
58 pub struct CommentKind {
59     pub shape: CommentShape,
60     pub doc: Option<CommentPlacement>,
61 }
62
63 #[derive(Debug, PartialEq, Eq, Clone, Copy)]
64 pub enum CommentShape {
65     Line,
66     Block,
67 }
68
69 impl CommentShape {
70     pub fn is_line(self) -> bool {
71         self == CommentShape::Line
72     }
73
74     pub fn is_block(self) -> bool {
75         self == CommentShape::Block
76     }
77 }
78
79 #[derive(Debug, PartialEq, Eq, Clone, Copy)]
80 pub enum CommentPlacement {
81     Inner,
82     Outer,
83 }
84
85 impl CommentKind {
86     const BY_PREFIX: [(&'static str, CommentKind); 9] = [
87         ("/**/", CommentKind { shape: CommentShape::Block, doc: None }),
88         ("/***", CommentKind { shape: CommentShape::Block, doc: None }),
89         ("////", CommentKind { shape: CommentShape::Line, doc: None }),
90         ("///", CommentKind { shape: CommentShape::Line, doc: Some(CommentPlacement::Outer) }),
91         ("//!", CommentKind { shape: CommentShape::Line, doc: Some(CommentPlacement::Inner) }),
92         ("/**", CommentKind { shape: CommentShape::Block, doc: Some(CommentPlacement::Outer) }),
93         ("/*!", CommentKind { shape: CommentShape::Block, doc: Some(CommentPlacement::Inner) }),
94         ("//", CommentKind { shape: CommentShape::Line, doc: None }),
95         ("/*", CommentKind { shape: CommentShape::Block, doc: None }),
96     ];
97
98     pub(crate) fn from_text(text: &str) -> CommentKind {
99         let &(_prefix, kind) = CommentKind::BY_PREFIX
100             .iter()
101             .find(|&(prefix, _kind)| text.starts_with(prefix))
102             .unwrap();
103         kind
104     }
105
106     pub fn prefix(&self) -> &'static str {
107         let &(prefix, _) =
108             CommentKind::BY_PREFIX.iter().rev().find(|(_, kind)| kind == self).unwrap();
109         prefix
110     }
111 }
112
113 impl ast::Whitespace {
114     pub fn spans_multiple_lines(&self) -> bool {
115         let text = self.text();
116         text.find('\n').map_or(false, |idx| text[idx + 1..].contains('\n'))
117     }
118 }
119
120 pub struct QuoteOffsets {
121     pub quotes: (TextRange, TextRange),
122     pub contents: TextRange,
123 }
124
125 impl QuoteOffsets {
126     fn new(literal: &str) -> Option<QuoteOffsets> {
127         let left_quote = literal.find('"')?;
128         let right_quote = literal.rfind('"')?;
129         if left_quote == right_quote {
130             // `literal` only contains one quote
131             return None;
132         }
133
134         let start = TextSize::from(0);
135         let left_quote = TextSize::try_from(left_quote).unwrap() + TextSize::of('"');
136         let right_quote = TextSize::try_from(right_quote).unwrap();
137         let end = TextSize::of(literal);
138
139         let res = QuoteOffsets {
140             quotes: (TextRange::new(start, left_quote), TextRange::new(right_quote, end)),
141             contents: TextRange::new(left_quote, right_quote),
142         };
143         Some(res)
144     }
145 }
146
147 pub trait IsString: AstToken {
148     fn quote_offsets(&self) -> Option<QuoteOffsets> {
149         let text = self.text();
150         let offsets = QuoteOffsets::new(text)?;
151         let o = self.syntax().text_range().start();
152         let offsets = QuoteOffsets {
153             quotes: (offsets.quotes.0 + o, offsets.quotes.1 + o),
154             contents: offsets.contents + o,
155         };
156         Some(offsets)
157     }
158     fn text_range_between_quotes(&self) -> Option<TextRange> {
159         self.quote_offsets().map(|it| it.contents)
160     }
161     fn open_quote_text_range(&self) -> Option<TextRange> {
162         self.quote_offsets().map(|it| it.quotes.0)
163     }
164     fn close_quote_text_range(&self) -> Option<TextRange> {
165         self.quote_offsets().map(|it| it.quotes.1)
166     }
167 }
168
169 impl IsString for ast::String {}
170
171 impl ast::String {
172     pub fn is_raw(&self) -> bool {
173         self.text().starts_with('r')
174     }
175     pub fn map_range_up(&self, range: TextRange) -> Option<TextRange> {
176         let contents_range = self.text_range_between_quotes()?;
177         assert!(TextRange::up_to(contents_range.len()).contains_range(range));
178         Some(range + contents_range.start())
179     }
180
181     pub fn value(&self) -> Option<Cow<'_, str>> {
182         if self.is_raw() {
183             let text = self.text();
184             let text =
185                 &text[self.text_range_between_quotes()? - self.syntax().text_range().start()];
186             return Some(Cow::Borrowed(text));
187         }
188
189         let text = self.text();
190         let text = &text[self.text_range_between_quotes()? - self.syntax().text_range().start()];
191
192         let mut buf = String::new();
193         let mut text_iter = text.chars();
194         let mut has_error = false;
195         unescape_literal(text, Mode::Str, &mut |char_range, unescaped_char| match (
196             unescaped_char,
197             buf.capacity() == 0,
198         ) {
199             (Ok(c), false) => buf.push(c),
200             (Ok(c), true) if char_range.len() == 1 && Some(c) == text_iter.next() => (),
201             (Ok(c), true) => {
202                 buf.reserve_exact(text.len());
203                 buf.push_str(&text[..char_range.start]);
204                 buf.push(c);
205             }
206             (Err(_), _) => has_error = true,
207         });
208
209         match (has_error, buf.capacity() == 0) {
210             (true, _) => None,
211             (false, true) => Some(Cow::Borrowed(text)),
212             (false, false) => Some(Cow::Owned(buf)),
213         }
214     }
215 }
216
217 impl IsString for ast::ByteString {}
218
219 impl ast::ByteString {
220     pub fn is_raw(&self) -> bool {
221         self.text().starts_with("br")
222     }
223
224     pub fn value(&self) -> Option<Cow<'_, [u8]>> {
225         if self.is_raw() {
226             let text = self.text();
227             let text =
228                 &text[self.text_range_between_quotes()? - self.syntax().text_range().start()];
229             return Some(Cow::Borrowed(text.as_bytes()));
230         }
231
232         let text = self.text();
233         let text = &text[self.text_range_between_quotes()? - self.syntax().text_range().start()];
234
235         let mut buf: Vec<u8> = Vec::new();
236         let mut text_iter = text.chars();
237         let mut has_error = false;
238         unescape_literal(text, Mode::ByteStr, &mut |char_range, unescaped_char| match (
239             unescaped_char,
240             buf.capacity() == 0,
241         ) {
242             (Ok(c), false) => buf.push(c as u8),
243             (Ok(c), true) if char_range.len() == 1 && Some(c) == text_iter.next() => (),
244             (Ok(c), true) => {
245                 buf.reserve_exact(text.len());
246                 buf.extend_from_slice(text[..char_range.start].as_bytes());
247                 buf.push(c as u8);
248             }
249             (Err(_), _) => has_error = true,
250         });
251
252         match (has_error, buf.capacity() == 0) {
253             (true, _) => None,
254             (false, true) => Some(Cow::Borrowed(text.as_bytes())),
255             (false, false) => Some(Cow::Owned(buf)),
256         }
257     }
258 }
259
260 #[derive(Debug)]
261 pub enum FormatSpecifier {
262     Open,
263     Close,
264     Integer,
265     Identifier,
266     Colon,
267     Fill,
268     Align,
269     Sign,
270     NumberSign,
271     Zero,
272     DollarSign,
273     Dot,
274     Asterisk,
275     QuestionMark,
276 }
277
278 pub trait HasFormatSpecifier: AstToken {
279     fn char_ranges(
280         &self,
281     ) -> Option<Vec<(TextRange, Result<char, rustc_lexer::unescape::EscapeError>)>>;
282
283     fn lex_format_specifier<F>(&self, mut callback: F)
284     where
285         F: FnMut(TextRange, FormatSpecifier),
286     {
287         let char_ranges = match self.char_ranges() {
288             Some(char_ranges) => char_ranges,
289             None => return,
290         };
291         let mut chars = char_ranges
292             .iter()
293             .filter_map(|(range, res)| Some((*range, *res.as_ref().ok()?)))
294             .peekable();
295
296         while let Some((range, first_char)) = chars.next() {
297             if let '{' = first_char {
298                 // Format specifier, see syntax at https://doc.rust-lang.org/std/fmt/index.html#syntax
299                 if let Some((_, '{')) = chars.peek() {
300                     // Escaped format specifier, `{{`
301                     chars.next();
302                     continue;
303                 }
304
305                 callback(range, FormatSpecifier::Open);
306
307                 // check for integer/identifier
308                 let (_, int_char) = chars.peek().copied().unwrap_or_default();
309                 match int_char {
310                     // integer
311                     '0'..='9' => read_integer(&mut chars, &mut callback),
312                     // identifier
313                     c if c == '_' || c.is_alphabetic() => {
314                         read_identifier(&mut chars, &mut callback)
315                     }
316                     _ => {}
317                 }
318
319                 if let Some((_, ':')) = chars.peek() {
320                     skip_char_and_emit(&mut chars, FormatSpecifier::Colon, &mut callback);
321
322                     // check for fill/align
323                     let mut cloned = chars.clone().take(2);
324                     let (_, first) = cloned.next().unwrap_or_default();
325                     let (_, second) = cloned.next().unwrap_or_default();
326                     match second {
327                         '<' | '^' | '>' => {
328                             // alignment specifier, first char specifies fillment
329                             skip_char_and_emit(&mut chars, FormatSpecifier::Fill, &mut callback);
330                             skip_char_and_emit(&mut chars, FormatSpecifier::Align, &mut callback);
331                         }
332                         _ => {
333                             if let '<' | '^' | '>' = first {
334                                 skip_char_and_emit(
335                                     &mut chars,
336                                     FormatSpecifier::Align,
337                                     &mut callback,
338                                 );
339                             }
340                         }
341                     }
342
343                     // check for sign
344                     match chars.peek().copied().unwrap_or_default().1 {
345                         '+' | '-' => {
346                             skip_char_and_emit(&mut chars, FormatSpecifier::Sign, &mut callback);
347                         }
348                         _ => {}
349                     }
350
351                     // check for `#`
352                     if let Some((_, '#')) = chars.peek() {
353                         skip_char_and_emit(&mut chars, FormatSpecifier::NumberSign, &mut callback);
354                     }
355
356                     // check for `0`
357                     let mut cloned = chars.clone().take(2);
358                     let first = cloned.next().map(|next| next.1);
359                     let second = cloned.next().map(|next| next.1);
360
361                     if first == Some('0') && second != Some('$') {
362                         skip_char_and_emit(&mut chars, FormatSpecifier::Zero, &mut callback);
363                     }
364
365                     // width
366                     match chars.peek().copied().unwrap_or_default().1 {
367                         '0'..='9' => {
368                             read_integer(&mut chars, &mut callback);
369                             if let Some((_, '$')) = chars.peek() {
370                                 skip_char_and_emit(
371                                     &mut chars,
372                                     FormatSpecifier::DollarSign,
373                                     &mut callback,
374                                 );
375                             }
376                         }
377                         c if c == '_' || c.is_alphabetic() => {
378                             read_identifier(&mut chars, &mut callback);
379
380                             if chars.peek().map(|&(_, c)| c) == Some('?') {
381                                 skip_char_and_emit(
382                                     &mut chars,
383                                     FormatSpecifier::QuestionMark,
384                                     &mut callback,
385                                 );
386                             }
387
388                             // can be either width (indicated by dollar sign, or type in which case
389                             // the next sign has to be `}`)
390                             let next = chars.peek().map(|&(_, c)| c);
391
392                             match next {
393                                 Some('$') => skip_char_and_emit(
394                                     &mut chars,
395                                     FormatSpecifier::DollarSign,
396                                     &mut callback,
397                                 ),
398                                 Some('}') => {
399                                     skip_char_and_emit(
400                                         &mut chars,
401                                         FormatSpecifier::Close,
402                                         &mut callback,
403                                     );
404                                     continue;
405                                 }
406                                 _ => continue,
407                             };
408                         }
409                         _ => {}
410                     }
411
412                     // precision
413                     if let Some((_, '.')) = chars.peek() {
414                         skip_char_and_emit(&mut chars, FormatSpecifier::Dot, &mut callback);
415
416                         match chars.peek().copied().unwrap_or_default().1 {
417                             '*' => {
418                                 skip_char_and_emit(
419                                     &mut chars,
420                                     FormatSpecifier::Asterisk,
421                                     &mut callback,
422                                 );
423                             }
424                             '0'..='9' => {
425                                 read_integer(&mut chars, &mut callback);
426                                 if let Some((_, '$')) = chars.peek() {
427                                     skip_char_and_emit(
428                                         &mut chars,
429                                         FormatSpecifier::DollarSign,
430                                         &mut callback,
431                                     );
432                                 }
433                             }
434                             c if c == '_' || c.is_alphabetic() => {
435                                 read_identifier(&mut chars, &mut callback);
436                                 if chars.peek().map(|&(_, c)| c) != Some('$') {
437                                     continue;
438                                 }
439                                 skip_char_and_emit(
440                                     &mut chars,
441                                     FormatSpecifier::DollarSign,
442                                     &mut callback,
443                                 );
444                             }
445                             _ => {
446                                 continue;
447                             }
448                         }
449                     }
450
451                     // type
452                     match chars.peek().copied().unwrap_or_default().1 {
453                         '?' => {
454                             skip_char_and_emit(
455                                 &mut chars,
456                                 FormatSpecifier::QuestionMark,
457                                 &mut callback,
458                             );
459                         }
460                         c if c == '_' || c.is_alphabetic() => {
461                             read_identifier(&mut chars, &mut callback);
462
463                             if chars.peek().map(|&(_, c)| c) == Some('?') {
464                                 skip_char_and_emit(
465                                     &mut chars,
466                                     FormatSpecifier::QuestionMark,
467                                     &mut callback,
468                                 );
469                             }
470                         }
471                         _ => {}
472                     }
473                 }
474
475                 if let Some((_, '}')) = chars.peek() {
476                     skip_char_and_emit(&mut chars, FormatSpecifier::Close, &mut callback);
477                 }
478                 continue;
479             }
480         }
481
482         fn skip_char_and_emit<I, F>(
483             chars: &mut std::iter::Peekable<I>,
484             emit: FormatSpecifier,
485             callback: &mut F,
486         ) where
487             I: Iterator<Item = (TextRange, char)>,
488             F: FnMut(TextRange, FormatSpecifier),
489         {
490             let (range, _) = chars.next().unwrap();
491             callback(range, emit);
492         }
493
494         fn read_integer<I, F>(chars: &mut std::iter::Peekable<I>, callback: &mut F)
495         where
496             I: Iterator<Item = (TextRange, char)>,
497             F: FnMut(TextRange, FormatSpecifier),
498         {
499             let (mut range, c) = chars.next().unwrap();
500             assert!(c.is_ascii_digit());
501             while let Some(&(r, next_char)) = chars.peek() {
502                 if next_char.is_ascii_digit() {
503                     chars.next();
504                     range = range.cover(r);
505                 } else {
506                     break;
507                 }
508             }
509             callback(range, FormatSpecifier::Integer);
510         }
511
512         fn read_identifier<I, F>(chars: &mut std::iter::Peekable<I>, callback: &mut F)
513         where
514             I: Iterator<Item = (TextRange, char)>,
515             F: FnMut(TextRange, FormatSpecifier),
516         {
517             let (mut range, c) = chars.next().unwrap();
518             assert!(c.is_alphabetic() || c == '_');
519             while let Some(&(r, next_char)) = chars.peek() {
520                 if next_char == '_' || next_char.is_ascii_digit() || next_char.is_alphabetic() {
521                     chars.next();
522                     range = range.cover(r);
523                 } else {
524                     break;
525                 }
526             }
527             callback(range, FormatSpecifier::Identifier);
528         }
529     }
530 }
531
532 impl HasFormatSpecifier for ast::String {
533     fn char_ranges(
534         &self,
535     ) -> Option<Vec<(TextRange, Result<char, rustc_lexer::unescape::EscapeError>)>> {
536         let text = self.text();
537         let text = &text[self.text_range_between_quotes()? - self.syntax().text_range().start()];
538         let offset = self.text_range_between_quotes()?.start() - self.syntax().text_range().start();
539
540         let mut res = Vec::with_capacity(text.len());
541         unescape_literal(text, Mode::Str, &mut |range, unescaped_char| {
542             res.push((
543                 TextRange::new(range.start.try_into().unwrap(), range.end.try_into().unwrap())
544                     + offset,
545                 unescaped_char,
546             ));
547         });
548
549         Some(res)
550     }
551 }
552
553 impl ast::IntNumber {
554     pub fn radix(&self) -> Radix {
555         match self.text().get(..2).unwrap_or_default() {
556             "0b" => Radix::Binary,
557             "0o" => Radix::Octal,
558             "0x" => Radix::Hexadecimal,
559             _ => Radix::Decimal,
560         }
561     }
562
563     pub fn split_into_parts(&self) -> (&str, &str, &str) {
564         let radix = self.radix();
565         let (prefix, mut text) = self.text().split_at(radix.prefix_len());
566
567         let is_suffix_start: fn(&(usize, char)) -> bool = match radix {
568             Radix::Hexadecimal => |(_, c)| matches!(c, 'g'..='z' | 'G'..='Z'),
569             _ => |(_, c)| c.is_ascii_alphabetic(),
570         };
571
572         let mut suffix = "";
573         if let Some((suffix_start, _)) = text.char_indices().find(is_suffix_start) {
574             let (text2, suffix2) = text.split_at(suffix_start);
575             text = text2;
576             suffix = suffix2;
577         };
578
579         (prefix, text, suffix)
580     }
581
582     pub fn value(&self) -> Option<u128> {
583         let (_, text, _) = self.split_into_parts();
584         let value = u128::from_str_radix(&text.replace("_", ""), self.radix() as u32).ok()?;
585         Some(value)
586     }
587
588     pub fn suffix(&self) -> Option<&str> {
589         let (_, _, suffix) = self.split_into_parts();
590         if suffix.is_empty() {
591             None
592         } else {
593             Some(suffix)
594         }
595     }
596 }
597
598 impl ast::FloatNumber {
599     pub fn suffix(&self) -> Option<&str> {
600         let text = self.text();
601         let mut indices = text.char_indices();
602         let (mut suffix_start, c) = indices.by_ref().find(|(_, c)| c.is_ascii_alphabetic())?;
603         if c == 'e' || c == 'E' {
604             suffix_start = indices.find(|(_, c)| c.is_ascii_alphabetic())?.0;
605         }
606         Some(&text[suffix_start..])
607     }
608 }
609
610 #[derive(Debug, PartialEq, Eq, Copy, Clone)]
611 pub enum Radix {
612     Binary = 2,
613     Octal = 8,
614     Decimal = 10,
615     Hexadecimal = 16,
616 }
617
618 impl Radix {
619     pub const ALL: &'static [Radix] =
620         &[Radix::Binary, Radix::Octal, Radix::Decimal, Radix::Hexadecimal];
621
622     const fn prefix_len(self) -> usize {
623         match self {
624             Self::Decimal => 0,
625             _ => 2,
626         }
627     }
628 }
629
630 #[cfg(test)]
631 mod tests {
632     use crate::ast::{self, make, FloatNumber, IntNumber};
633
634     fn check_float_suffix<'a>(lit: &str, expected: impl Into<Option<&'a str>>) {
635         assert_eq!(FloatNumber { syntax: make::tokens::literal(lit) }.suffix(), expected.into());
636     }
637
638     fn check_int_suffix<'a>(lit: &str, expected: impl Into<Option<&'a str>>) {
639         assert_eq!(IntNumber { syntax: make::tokens::literal(lit) }.suffix(), expected.into());
640     }
641
642     #[test]
643     fn test_float_number_suffix() {
644         check_float_suffix("123.0", None);
645         check_float_suffix("123f32", "f32");
646         check_float_suffix("123.0e", None);
647         check_float_suffix("123.0e4", None);
648         check_float_suffix("123.0ef32", "f32");
649         check_float_suffix("123.0E4f32", "f32");
650         check_float_suffix("1_2_3.0_f32", "f32");
651     }
652
653     #[test]
654     fn test_int_number_suffix() {
655         check_int_suffix("123", None);
656         check_int_suffix("123i32", "i32");
657         check_int_suffix("1_0_1_l_o_l", "l_o_l");
658         check_int_suffix("0b11", None);
659         check_int_suffix("0o11", None);
660         check_int_suffix("0xff", None);
661         check_int_suffix("0b11u32", "u32");
662         check_int_suffix("0o11u32", "u32");
663         check_int_suffix("0xffu32", "u32");
664     }
665
666     fn check_string_value<'a>(lit: &str, expected: impl Into<Option<&'a str>>) {
667         assert_eq!(
668             ast::String { syntax: make::tokens::literal(&format!("\"{}\"", lit)) }
669                 .value()
670                 .as_deref(),
671             expected.into()
672         );
673     }
674
675     #[test]
676     fn test_string_escape() {
677         check_string_value(r"foobar", "foobar");
678         check_string_value(r"\foobar", None);
679         check_string_value(r"\nfoobar", "\nfoobar");
680         check_string_value(r"C:\\Windows\\System32\\", "C:\\Windows\\System32\\");
681     }
682 }