]> git.lizzy.rs Git - rust.git/blob - crates/syntax/src/ast/token_ext.rs
Merge #7500
[rust.git] / crates / syntax / src / ast / token_ext.rs
1 //! There are many AstNodes, but only a few tokens, so we hand-write them here.
2
3 use std::{
4     borrow::Cow,
5     convert::{TryFrom, TryInto},
6 };
7
8 use rustc_lexer::unescape::{unescape_literal, Mode};
9
10 use crate::{
11     ast::{self, AstToken},
12     TextRange, TextSize,
13 };
14
15 impl ast::Comment {
16     pub fn kind(&self) -> CommentKind {
17         CommentKind::from_text(self.text())
18     }
19
20     pub fn is_inner(&self) -> bool {
21         self.kind().doc == Some(CommentPlacement::Inner)
22     }
23
24     pub fn is_outer(&self) -> bool {
25         self.kind().doc == Some(CommentPlacement::Outer)
26     }
27
28     pub fn prefix(&self) -> &'static str {
29         let &(prefix, _kind) = CommentKind::BY_PREFIX
30             .iter()
31             .find(|&(prefix, kind)| self.kind() == *kind && self.text().starts_with(prefix))
32             .unwrap();
33         prefix
34     }
35
36     /// Returns the textual content of a doc comment block as a single string.
37     /// That is, strips leading `///` (+ optional 1 character of whitespace),
38     /// trailing `*/`, trailing whitespace and then joins the lines.
39     pub fn doc_comment(&self) -> Option<&str> {
40         let kind = self.kind();
41         match kind {
42             CommentKind { shape, doc: Some(_) } => {
43                 let prefix = kind.prefix();
44                 let text = &self.text()[prefix.len()..];
45                 let ws = text.chars().next().filter(|c| c.is_whitespace());
46                 let text = ws.map_or(text, |ws| &text[ws.len_utf8()..]);
47                 match shape {
48                     CommentShape::Block if text.ends_with("*/") => {
49                         Some(&text[..text.len() - "*/".len()])
50                     }
51                     _ => Some(text),
52                 }
53             }
54             _ => None,
55         }
56     }
57 }
58
59 #[derive(Debug, PartialEq, Eq, Clone, Copy)]
60 pub struct CommentKind {
61     pub shape: CommentShape,
62     pub doc: Option<CommentPlacement>,
63 }
64
65 #[derive(Debug, PartialEq, Eq, Clone, Copy)]
66 pub enum CommentShape {
67     Line,
68     Block,
69 }
70
71 impl CommentShape {
72     pub fn is_line(self) -> bool {
73         self == CommentShape::Line
74     }
75
76     pub fn is_block(self) -> bool {
77         self == CommentShape::Block
78     }
79 }
80
81 #[derive(Debug, PartialEq, Eq, Clone, Copy)]
82 pub enum CommentPlacement {
83     Inner,
84     Outer,
85 }
86
87 impl CommentKind {
88     const BY_PREFIX: [(&'static str, CommentKind); 8] = [
89         ("/**/", CommentKind { shape: CommentShape::Block, doc: None }),
90         ("////", CommentKind { shape: CommentShape::Line, doc: None }),
91         ("///", CommentKind { shape: CommentShape::Line, doc: Some(CommentPlacement::Outer) }),
92         ("//!", CommentKind { shape: CommentShape::Line, doc: Some(CommentPlacement::Inner) }),
93         ("/**", CommentKind { shape: CommentShape::Block, doc: Some(CommentPlacement::Outer) }),
94         ("/*!", CommentKind { shape: CommentShape::Block, doc: Some(CommentPlacement::Inner) }),
95         ("//", CommentKind { shape: CommentShape::Line, doc: None }),
96         ("/*", CommentKind { shape: CommentShape::Block, doc: None }),
97     ];
98
99     pub(crate) fn from_text(text: &str) -> CommentKind {
100         let &(_prefix, kind) = CommentKind::BY_PREFIX
101             .iter()
102             .find(|&(prefix, _kind)| text.starts_with(prefix))
103             .unwrap();
104         kind
105     }
106
107     fn prefix(&self) -> &'static str {
108         let &(prefix, _) = CommentKind::BY_PREFIX.iter().find(|(_, kind)| kind == self).unwrap();
109         prefix
110     }
111 }
112
113 impl ast::Whitespace {
114     pub fn spans_multiple_lines(&self) -> bool {
115         let text = self.text();
116         text.find('\n').map_or(false, |idx| text[idx + 1..].contains('\n'))
117     }
118 }
119
120 pub struct QuoteOffsets {
121     pub quotes: (TextRange, TextRange),
122     pub contents: TextRange,
123 }
124
125 impl QuoteOffsets {
126     fn new(literal: &str) -> Option<QuoteOffsets> {
127         let left_quote = literal.find('"')?;
128         let right_quote = literal.rfind('"')?;
129         if left_quote == right_quote {
130             // `literal` only contains one quote
131             return None;
132         }
133
134         let start = TextSize::from(0);
135         let left_quote = TextSize::try_from(left_quote).unwrap() + TextSize::of('"');
136         let right_quote = TextSize::try_from(right_quote).unwrap();
137         let end = TextSize::of(literal);
138
139         let res = QuoteOffsets {
140             quotes: (TextRange::new(start, left_quote), TextRange::new(right_quote, end)),
141             contents: TextRange::new(left_quote, right_quote),
142         };
143         Some(res)
144     }
145 }
146
147 impl ast::String {
148     pub fn is_raw(&self) -> bool {
149         self.text().starts_with('r')
150     }
151     pub fn map_range_up(&self, range: TextRange) -> Option<TextRange> {
152         let contents_range = self.text_range_between_quotes()?;
153         assert!(TextRange::up_to(contents_range.len()).contains_range(range));
154         Some(range + contents_range.start())
155     }
156
157     pub fn value(&self) -> Option<Cow<'_, str>> {
158         if self.is_raw() {
159             let text = self.text();
160             let text =
161                 &text[self.text_range_between_quotes()? - self.syntax().text_range().start()];
162             return Some(Cow::Borrowed(text));
163         }
164
165         let text = self.text();
166         let text = &text[self.text_range_between_quotes()? - self.syntax().text_range().start()];
167
168         let mut buf = String::new();
169         let mut text_iter = text.chars();
170         let mut has_error = false;
171         unescape_literal(text, Mode::Str, &mut |char_range, unescaped_char| match (
172             unescaped_char,
173             buf.capacity() == 0,
174         ) {
175             (Ok(c), false) => buf.push(c),
176             (Ok(c), true) if char_range.len() == 1 && Some(c) == text_iter.next() => (),
177             (Ok(c), true) => {
178                 buf.reserve_exact(text.len());
179                 buf.push_str(&text[..char_range.start]);
180                 buf.push(c);
181             }
182             (Err(_), _) => has_error = true,
183         });
184
185         match (has_error, buf.capacity() == 0) {
186             (true, _) => None,
187             (false, true) => Some(Cow::Borrowed(text)),
188             (false, false) => Some(Cow::Owned(buf)),
189         }
190     }
191
192     pub fn quote_offsets(&self) -> Option<QuoteOffsets> {
193         let text = self.text();
194         let offsets = QuoteOffsets::new(text)?;
195         let o = self.syntax().text_range().start();
196         let offsets = QuoteOffsets {
197             quotes: (offsets.quotes.0 + o, offsets.quotes.1 + o),
198             contents: offsets.contents + o,
199         };
200         Some(offsets)
201     }
202     pub fn text_range_between_quotes(&self) -> Option<TextRange> {
203         self.quote_offsets().map(|it| it.contents)
204     }
205     pub fn open_quote_text_range(&self) -> Option<TextRange> {
206         self.quote_offsets().map(|it| it.quotes.0)
207     }
208     pub fn close_quote_text_range(&self) -> Option<TextRange> {
209         self.quote_offsets().map(|it| it.quotes.1)
210     }
211 }
212
213 impl ast::ByteString {
214     pub fn is_raw(&self) -> bool {
215         self.text().starts_with("br")
216     }
217 }
218
219 #[derive(Debug)]
220 pub enum FormatSpecifier {
221     Open,
222     Close,
223     Integer,
224     Identifier,
225     Colon,
226     Fill,
227     Align,
228     Sign,
229     NumberSign,
230     Zero,
231     DollarSign,
232     Dot,
233     Asterisk,
234     QuestionMark,
235 }
236
237 pub trait HasFormatSpecifier: AstToken {
238     fn char_ranges(
239         &self,
240     ) -> Option<Vec<(TextRange, Result<char, rustc_lexer::unescape::EscapeError>)>>;
241
242     fn lex_format_specifier<F>(&self, mut callback: F)
243     where
244         F: FnMut(TextRange, FormatSpecifier),
245     {
246         let char_ranges = if let Some(char_ranges) = self.char_ranges() {
247             char_ranges
248         } else {
249             return;
250         };
251         let mut chars = char_ranges.iter().peekable();
252
253         while let Some((range, first_char)) = chars.next() {
254             match first_char {
255                 Ok('{') => {
256                     // Format specifier, see syntax at https://doc.rust-lang.org/std/fmt/index.html#syntax
257                     if let Some((_, Ok('{'))) = chars.peek() {
258                         // Escaped format specifier, `{{`
259                         chars.next();
260                         continue;
261                     }
262
263                     callback(*range, FormatSpecifier::Open);
264
265                     // check for integer/identifier
266                     match chars
267                         .peek()
268                         .and_then(|next| next.1.as_ref().ok())
269                         .copied()
270                         .unwrap_or_default()
271                     {
272                         '0'..='9' => {
273                             // integer
274                             read_integer(&mut chars, &mut callback);
275                         }
276                         c if c == '_' || c.is_alphabetic() => {
277                             // identifier
278                             read_identifier(&mut chars, &mut callback);
279                         }
280                         _ => {}
281                     }
282
283                     if let Some((_, Ok(':'))) = chars.peek() {
284                         skip_char_and_emit(&mut chars, FormatSpecifier::Colon, &mut callback);
285
286                         // check for fill/align
287                         let mut cloned = chars.clone().take(2);
288                         let first = cloned
289                             .next()
290                             .and_then(|next| next.1.as_ref().ok())
291                             .copied()
292                             .unwrap_or_default();
293                         let second = cloned
294                             .next()
295                             .and_then(|next| next.1.as_ref().ok())
296                             .copied()
297                             .unwrap_or_default();
298                         match second {
299                             '<' | '^' | '>' => {
300                                 // alignment specifier, first char specifies fillment
301                                 skip_char_and_emit(
302                                     &mut chars,
303                                     FormatSpecifier::Fill,
304                                     &mut callback,
305                                 );
306                                 skip_char_and_emit(
307                                     &mut chars,
308                                     FormatSpecifier::Align,
309                                     &mut callback,
310                                 );
311                             }
312                             _ => match first {
313                                 '<' | '^' | '>' => {
314                                     skip_char_and_emit(
315                                         &mut chars,
316                                         FormatSpecifier::Align,
317                                         &mut callback,
318                                     );
319                                 }
320                                 _ => {}
321                             },
322                         }
323
324                         // check for sign
325                         match chars
326                             .peek()
327                             .and_then(|next| next.1.as_ref().ok())
328                             .copied()
329                             .unwrap_or_default()
330                         {
331                             '+' | '-' => {
332                                 skip_char_and_emit(
333                                     &mut chars,
334                                     FormatSpecifier::Sign,
335                                     &mut callback,
336                                 );
337                             }
338                             _ => {}
339                         }
340
341                         // check for `#`
342                         if let Some((_, Ok('#'))) = chars.peek() {
343                             skip_char_and_emit(
344                                 &mut chars,
345                                 FormatSpecifier::NumberSign,
346                                 &mut callback,
347                             );
348                         }
349
350                         // check for `0`
351                         let mut cloned = chars.clone().take(2);
352                         let first = cloned.next().and_then(|next| next.1.as_ref().ok()).copied();
353                         let second = cloned.next().and_then(|next| next.1.as_ref().ok()).copied();
354
355                         if first == Some('0') && second != Some('$') {
356                             skip_char_and_emit(&mut chars, FormatSpecifier::Zero, &mut callback);
357                         }
358
359                         // width
360                         match chars
361                             .peek()
362                             .and_then(|next| next.1.as_ref().ok())
363                             .copied()
364                             .unwrap_or_default()
365                         {
366                             '0'..='9' => {
367                                 read_integer(&mut chars, &mut callback);
368                                 if let Some((_, Ok('$'))) = chars.peek() {
369                                     skip_char_and_emit(
370                                         &mut chars,
371                                         FormatSpecifier::DollarSign,
372                                         &mut callback,
373                                     );
374                                 }
375                             }
376                             c if c == '_' || c.is_alphabetic() => {
377                                 read_identifier(&mut chars, &mut callback);
378
379                                 if chars.peek().and_then(|next| next.1.as_ref().ok()).copied()
380                                     == Some('?')
381                                 {
382                                     skip_char_and_emit(
383                                         &mut chars,
384                                         FormatSpecifier::QuestionMark,
385                                         &mut callback,
386                                     );
387                                 }
388
389                                 // can be either width (indicated by dollar sign, or type in which case
390                                 // the next sign has to be `}`)
391                                 let next =
392                                     chars.peek().and_then(|next| next.1.as_ref().ok()).copied();
393
394                                 match next {
395                                     Some('$') => skip_char_and_emit(
396                                         &mut chars,
397                                         FormatSpecifier::DollarSign,
398                                         &mut callback,
399                                     ),
400                                     Some('}') => {
401                                         skip_char_and_emit(
402                                             &mut chars,
403                                             FormatSpecifier::Close,
404                                             &mut callback,
405                                         );
406                                         continue;
407                                     }
408                                     _ => continue,
409                                 };
410                             }
411                             _ => {}
412                         }
413
414                         // precision
415                         if let Some((_, Ok('.'))) = chars.peek() {
416                             skip_char_and_emit(&mut chars, FormatSpecifier::Dot, &mut callback);
417
418                             match chars
419                                 .peek()
420                                 .and_then(|next| next.1.as_ref().ok())
421                                 .copied()
422                                 .unwrap_or_default()
423                             {
424                                 '*' => {
425                                     skip_char_and_emit(
426                                         &mut chars,
427                                         FormatSpecifier::Asterisk,
428                                         &mut callback,
429                                     );
430                                 }
431                                 '0'..='9' => {
432                                     read_integer(&mut chars, &mut callback);
433                                     if let Some((_, Ok('$'))) = chars.peek() {
434                                         skip_char_and_emit(
435                                             &mut chars,
436                                             FormatSpecifier::DollarSign,
437                                             &mut callback,
438                                         );
439                                     }
440                                 }
441                                 c if c == '_' || c.is_alphabetic() => {
442                                     read_identifier(&mut chars, &mut callback);
443                                     if chars.peek().and_then(|next| next.1.as_ref().ok()).copied()
444                                         != Some('$')
445                                     {
446                                         continue;
447                                     }
448                                     skip_char_and_emit(
449                                         &mut chars,
450                                         FormatSpecifier::DollarSign,
451                                         &mut callback,
452                                     );
453                                 }
454                                 _ => {
455                                     continue;
456                                 }
457                             }
458                         }
459
460                         // type
461                         match chars
462                             .peek()
463                             .and_then(|next| next.1.as_ref().ok())
464                             .copied()
465                             .unwrap_or_default()
466                         {
467                             '?' => {
468                                 skip_char_and_emit(
469                                     &mut chars,
470                                     FormatSpecifier::QuestionMark,
471                                     &mut callback,
472                                 );
473                             }
474                             c if c == '_' || c.is_alphabetic() => {
475                                 read_identifier(&mut chars, &mut callback);
476
477                                 if chars.peek().and_then(|next| next.1.as_ref().ok()).copied()
478                                     == Some('?')
479                                 {
480                                     skip_char_and_emit(
481                                         &mut chars,
482                                         FormatSpecifier::QuestionMark,
483                                         &mut callback,
484                                     );
485                                 }
486                             }
487                             _ => {}
488                         }
489                     }
490
491                     if let Some((_, Ok('}'))) = chars.peek() {
492                         skip_char_and_emit(&mut chars, FormatSpecifier::Close, &mut callback);
493                     } else {
494                         continue;
495                     }
496                 }
497                 _ => {
498                     while let Some((_, Ok(next_char))) = chars.peek() {
499                         match next_char {
500                             '{' => break,
501                             _ => {}
502                         }
503                         chars.next();
504                     }
505                 }
506             };
507         }
508
509         fn skip_char_and_emit<'a, I, F>(
510             chars: &mut std::iter::Peekable<I>,
511             emit: FormatSpecifier,
512             callback: &mut F,
513         ) where
514             I: Iterator<Item = &'a (TextRange, Result<char, rustc_lexer::unescape::EscapeError>)>,
515             F: FnMut(TextRange, FormatSpecifier),
516         {
517             let (range, _) = chars.next().unwrap();
518             callback(*range, emit);
519         }
520
521         fn read_integer<'a, I, F>(chars: &mut std::iter::Peekable<I>, callback: &mut F)
522         where
523             I: Iterator<Item = &'a (TextRange, Result<char, rustc_lexer::unescape::EscapeError>)>,
524             F: FnMut(TextRange, FormatSpecifier),
525         {
526             let (mut range, c) = chars.next().unwrap();
527             assert!(c.as_ref().unwrap().is_ascii_digit());
528             while let Some((r, Ok(next_char))) = chars.peek() {
529                 if next_char.is_ascii_digit() {
530                     chars.next();
531                     range = range.cover(*r);
532                 } else {
533                     break;
534                 }
535             }
536             callback(range, FormatSpecifier::Integer);
537         }
538
539         fn read_identifier<'a, I, F>(chars: &mut std::iter::Peekable<I>, callback: &mut F)
540         where
541             I: Iterator<Item = &'a (TextRange, Result<char, rustc_lexer::unescape::EscapeError>)>,
542             F: FnMut(TextRange, FormatSpecifier),
543         {
544             let (mut range, c) = chars.next().unwrap();
545             assert!(c.as_ref().unwrap().is_alphabetic() || *c.as_ref().unwrap() == '_');
546             while let Some((r, Ok(next_char))) = chars.peek() {
547                 if *next_char == '_' || next_char.is_ascii_digit() || next_char.is_alphabetic() {
548                     chars.next();
549                     range = range.cover(*r);
550                 } else {
551                     break;
552                 }
553             }
554             callback(range, FormatSpecifier::Identifier);
555         }
556     }
557 }
558
559 impl HasFormatSpecifier for ast::String {
560     fn char_ranges(
561         &self,
562     ) -> Option<Vec<(TextRange, Result<char, rustc_lexer::unescape::EscapeError>)>> {
563         let text = self.text();
564         let text = &text[self.text_range_between_quotes()? - self.syntax().text_range().start()];
565         let offset = self.text_range_between_quotes()?.start() - self.syntax().text_range().start();
566
567         let mut res = Vec::with_capacity(text.len());
568         unescape_literal(text, Mode::Str, &mut |range, unescaped_char| {
569             res.push((
570                 TextRange::new(range.start.try_into().unwrap(), range.end.try_into().unwrap())
571                     + offset,
572                 unescaped_char,
573             ))
574         });
575
576         Some(res)
577     }
578 }
579
580 impl ast::IntNumber {
581     pub fn radix(&self) -> Radix {
582         match self.text().get(..2).unwrap_or_default() {
583             "0b" => Radix::Binary,
584             "0o" => Radix::Octal,
585             "0x" => Radix::Hexadecimal,
586             _ => Radix::Decimal,
587         }
588     }
589
590     pub fn value(&self) -> Option<u128> {
591         let token = self.syntax();
592
593         let mut text = token.text();
594         if let Some(suffix) = self.suffix() {
595             text = &text[..text.len() - suffix.len()]
596         }
597
598         let radix = self.radix();
599         text = &text[radix.prefix_len()..];
600
601         let buf;
602         if text.contains('_') {
603             buf = text.replace('_', "");
604             text = buf.as_str();
605         };
606
607         let value = u128::from_str_radix(text, radix as u32).ok()?;
608         Some(value)
609     }
610
611     pub fn suffix(&self) -> Option<&str> {
612         let text = self.text();
613         let radix = self.radix();
614         let mut indices = text.char_indices();
615         if radix != Radix::Decimal {
616             indices.next()?;
617             indices.next()?;
618         }
619         let is_suffix_start: fn(&(usize, char)) -> bool = match radix {
620             Radix::Hexadecimal => |(_, c)| matches!(c, 'g'..='z' | 'G'..='Z'),
621             _ => |(_, c)| c.is_ascii_alphabetic(),
622         };
623         let (suffix_start, _) = indices.find(is_suffix_start)?;
624         Some(&text[suffix_start..])
625     }
626 }
627
628 impl ast::FloatNumber {
629     pub fn suffix(&self) -> Option<&str> {
630         let text = self.text();
631         let mut indices = text.char_indices();
632         let (mut suffix_start, c) = indices.by_ref().find(|(_, c)| c.is_ascii_alphabetic())?;
633         if c == 'e' || c == 'E' {
634             suffix_start = indices.find(|(_, c)| c.is_ascii_alphabetic())?.0;
635         }
636         Some(&text[suffix_start..])
637     }
638 }
639
640 #[derive(Debug, PartialEq, Eq, Copy, Clone)]
641 pub enum Radix {
642     Binary = 2,
643     Octal = 8,
644     Decimal = 10,
645     Hexadecimal = 16,
646 }
647
648 impl Radix {
649     pub const ALL: &'static [Radix] =
650         &[Radix::Binary, Radix::Octal, Radix::Decimal, Radix::Hexadecimal];
651
652     const fn prefix_len(&self) -> usize {
653         match self {
654             Self::Decimal => 0,
655             _ => 2,
656         }
657     }
658 }
659
660 #[cfg(test)]
661 mod tests {
662     use crate::ast::{self, make, FloatNumber, IntNumber};
663
664     fn check_float_suffix<'a>(lit: &str, expected: impl Into<Option<&'a str>>) {
665         assert_eq!(FloatNumber { syntax: make::tokens::literal(lit) }.suffix(), expected.into());
666     }
667
668     fn check_int_suffix<'a>(lit: &str, expected: impl Into<Option<&'a str>>) {
669         assert_eq!(IntNumber { syntax: make::tokens::literal(lit) }.suffix(), expected.into());
670     }
671
672     #[test]
673     fn test_float_number_suffix() {
674         check_float_suffix("123.0", None);
675         check_float_suffix("123f32", "f32");
676         check_float_suffix("123.0e", None);
677         check_float_suffix("123.0e4", None);
678         check_float_suffix("123.0ef32", "f32");
679         check_float_suffix("123.0E4f32", "f32");
680         check_float_suffix("1_2_3.0_f32", "f32");
681     }
682
683     #[test]
684     fn test_int_number_suffix() {
685         check_int_suffix("123", None);
686         check_int_suffix("123i32", "i32");
687         check_int_suffix("1_0_1_l_o_l", "l_o_l");
688         check_int_suffix("0b11", None);
689         check_int_suffix("0o11", None);
690         check_int_suffix("0xff", None);
691         check_int_suffix("0b11u32", "u32");
692         check_int_suffix("0o11u32", "u32");
693         check_int_suffix("0xffu32", "u32");
694     }
695
696     fn check_string_value<'a>(lit: &str, expected: impl Into<Option<&'a str>>) {
697         assert_eq!(
698             ast::String { syntax: make::tokens::literal(&format!("\"{}\"", lit)) }
699                 .value()
700                 .as_deref(),
701             expected.into()
702         );
703     }
704
705     #[test]
706     fn test_string_escape() {
707         check_string_value(r"foobar", "foobar");
708         check_string_value(r"\foobar", None);
709         check_string_value(r"\nfoobar", "\nfoobar");
710         check_string_value(r"C:\\Windows\\System32\\", "C:\\Windows\\System32\\");
711     }
712 }