crates/syntax/src/ast/token_ext.rs

   1 //! There are many AstNodes, but only a few tokens, so we hand-write them here.
   2
   3 use std::borrow::Cow;
   4
   5 use rustc_lexer::unescape::{unescape_literal, Mode};
   6
   7 use crate::{
   8     ast::{self, AstToken},
   9     TextRange, TextSize,
  10 };
  11
  12 impl ast::Comment {
  13     pub fn kind(&self) -> CommentKind {
  14         CommentKind::from_text(self.text())
  15     }
  16
  17     pub fn is_doc(&self) -> bool {
  18         self.kind().doc.is_some()
  19     }
  20
  21     pub fn is_inner(&self) -> bool {
  22         self.kind().doc == Some(CommentPlacement::Inner)
  23     }
  24
  25     pub fn is_outer(&self) -> bool {
  26         self.kind().doc == Some(CommentPlacement::Outer)
  27     }
  28
  29     pub fn prefix(&self) -> &'static str {
  30         let &(prefix, _kind) = CommentKind::BY_PREFIX
  31             .iter()
  32             .find(|&(prefix, kind)| self.kind() == *kind && self.text().starts_with(prefix))
  33             .unwrap();
  34         prefix
  35     }
  36
  37     /// Returns the textual content of a doc comment node as a single string with prefix and suffix
  38     /// removed.
  39     pub fn doc_comment(&self) -> Option<&str> {
  40         let kind = self.kind();
  41         match kind {
  42             CommentKind { shape, doc: Some(_) } => {
  43                 let prefix = kind.prefix();
  44                 let text = &self.text()[prefix.len()..];
  45                 let text = if shape == CommentShape::Block {
  46                     text.strip_suffix("*/").unwrap_or(text)
  47                 } else {
  48                     text
  49                 };
  50                 Some(text)
  51             }
  52             _ => None,
  53         }
  54     }
  55 }
  56
  57 #[derive(Debug, PartialEq, Eq, Clone, Copy)]
  58 pub struct CommentKind {
  59     pub shape: CommentShape,
  60     pub doc: Option<CommentPlacement>,
  61 }
  62
  63 #[derive(Debug, PartialEq, Eq, Clone, Copy)]
  64 pub enum CommentShape {
  65     Line,
  66     Block,
  67 }
  68
  69 impl CommentShape {
  70     pub fn is_line(self) -> bool {
  71         self == CommentShape::Line
  72     }
  73
  74     pub fn is_block(self) -> bool {
  75         self == CommentShape::Block
  76     }
  77 }
  78
  79 #[derive(Debug, PartialEq, Eq, Clone, Copy)]
  80 pub enum CommentPlacement {
  81     Inner,
  82     Outer,
  83 }
  84
  85 impl CommentKind {
  86     const BY_PREFIX: [(&'static str, CommentKind); 9] = [
  87         ("/**/", CommentKind { shape: CommentShape::Block, doc: None }),
  88         ("/***", CommentKind { shape: CommentShape::Block, doc: None }),
  89         ("////", CommentKind { shape: CommentShape::Line, doc: None }),
  90         ("///", CommentKind { shape: CommentShape::Line, doc: Some(CommentPlacement::Outer) }),
  91         ("//!", CommentKind { shape: CommentShape::Line, doc: Some(CommentPlacement::Inner) }),
  92         ("/**", CommentKind { shape: CommentShape::Block, doc: Some(CommentPlacement::Outer) }),
  93         ("/*!", CommentKind { shape: CommentShape::Block, doc: Some(CommentPlacement::Inner) }),
  94         ("//", CommentKind { shape: CommentShape::Line, doc: None }),
  95         ("/*", CommentKind { shape: CommentShape::Block, doc: None }),
  96     ];
  97
  98     pub(crate) fn from_text(text: &str) -> CommentKind {
  99         let &(_prefix, kind) = CommentKind::BY_PREFIX
 100             .iter()
 101             .find(|&(prefix, _kind)| text.starts_with(prefix))
 102             .unwrap();
 103         kind
 104     }
 105
 106     pub fn prefix(&self) -> &'static str {
 107         let &(prefix, _) =
 108             CommentKind::BY_PREFIX.iter().rev().find(|(_, kind)| kind == self).unwrap();
 109         prefix
 110     }
 111 }
 112
 113 impl ast::Whitespace {
 114     pub fn spans_multiple_lines(&self) -> bool {
 115         let text = self.text();
 116         text.find('\n').map_or(false, |idx| text[idx + 1..].contains('\n'))
 117     }
 118 }
 119
 120 pub struct QuoteOffsets {
 121     pub quotes: (TextRange, TextRange),
 122     pub contents: TextRange,
 123 }
 124
 125 impl QuoteOffsets {
 126     fn new(literal: &str) -> Option<QuoteOffsets> {
 127         let left_quote = literal.find('"')?;
 128         let right_quote = literal.rfind('"')?;
 129         if left_quote == right_quote {
 130             // `literal` only contains one quote
 131             return None;
 132         }
 133
 134         let start = TextSize::from(0);
 135         let left_quote = TextSize::try_from(left_quote).unwrap() + TextSize::of('"');
 136         let right_quote = TextSize::try_from(right_quote).unwrap();
 137         let end = TextSize::of(literal);
 138
 139         let res = QuoteOffsets {
 140             quotes: (TextRange::new(start, left_quote), TextRange::new(right_quote, end)),
 141             contents: TextRange::new(left_quote, right_quote),
 142         };
 143         Some(res)
 144     }
 145 }
 146
 147 pub trait IsString: AstToken {
 148     fn quote_offsets(&self) -> Option<QuoteOffsets> {
 149         let text = self.text();
 150         let offsets = QuoteOffsets::new(text)?;
 151         let o = self.syntax().text_range().start();
 152         let offsets = QuoteOffsets {
 153             quotes: (offsets.quotes.0 + o, offsets.quotes.1 + o),
 154             contents: offsets.contents + o,
 155         };
 156         Some(offsets)
 157     }
 158     fn text_range_between_quotes(&self) -> Option<TextRange> {
 159         self.quote_offsets().map(|it| it.contents)
 160     }
 161     fn open_quote_text_range(&self) -> Option<TextRange> {
 162         self.quote_offsets().map(|it| it.quotes.0)
 163     }
 164     fn close_quote_text_range(&self) -> Option<TextRange> {
 165         self.quote_offsets().map(|it| it.quotes.1)
 166     }
 167     fn escaped_char_ranges(
 168         &self,
 169         cb: &mut dyn FnMut(TextRange, Result<char, rustc_lexer::unescape::EscapeError>),
 170     ) {
 171         let text_range_no_quotes = match self.text_range_between_quotes() {
 172             Some(it) => it,
 173             None => return,
 174         };
 175
 176         let start = self.syntax().text_range().start();
 177         let text = &self.text()[text_range_no_quotes - start];
 178         let offset = text_range_no_quotes.start() - start;
 179
 180         unescape_literal(text, Mode::Str, &mut |range, unescaped_char| {
 181             let text_range =
 182                 TextRange::new(range.start.try_into().unwrap(), range.end.try_into().unwrap());
 183             cb(text_range + offset, unescaped_char);
 184         });
 185     }
 186 }
 187
 188 impl IsString for ast::String {}
 189
 190 impl ast::String {
 191     pub fn is_raw(&self) -> bool {
 192         self.text().starts_with('r')
 193     }
 194     pub fn map_range_up(&self, range: TextRange) -> Option<TextRange> {
 195         let contents_range = self.text_range_between_quotes()?;
 196         assert!(TextRange::up_to(contents_range.len()).contains_range(range));
 197         Some(range + contents_range.start())
 198     }
 199
 200     pub fn value(&self) -> Option<Cow<'_, str>> {
 201         if self.is_raw() {
 202             let text = self.text();
 203             let text =
 204                 &text[self.text_range_between_quotes()? - self.syntax().text_range().start()];
 205             return Some(Cow::Borrowed(text));
 206         }
 207
 208         let text = self.text();
 209         let text = &text[self.text_range_between_quotes()? - self.syntax().text_range().start()];
 210
 211         let mut buf = String::new();
 212         let mut text_iter = text.chars();
 213         let mut has_error = false;
 214         unescape_literal(text, Mode::Str, &mut |char_range, unescaped_char| match (
 215             unescaped_char,
 216             buf.capacity() == 0,
 217         ) {
 218             (Ok(c), false) => buf.push(c),
 219             (Ok(c), true) if char_range.len() == 1 && Some(c) == text_iter.next() => (),
 220             (Ok(c), true) => {
 221                 buf.reserve_exact(text.len());
 222                 buf.push_str(&text[..char_range.start]);
 223                 buf.push(c);
 224             }
 225             (Err(_), _) => has_error = true,
 226         });
 227
 228         match (has_error, buf.capacity() == 0) {
 229             (true, _) => None,
 230             (false, true) => Some(Cow::Borrowed(text)),
 231             (false, false) => Some(Cow::Owned(buf)),
 232         }
 233     }
 234 }
 235
 236 impl IsString for ast::ByteString {}
 237
 238 impl ast::ByteString {
 239     pub fn is_raw(&self) -> bool {
 240         self.text().starts_with("br")
 241     }
 242
 243     pub fn value(&self) -> Option<Cow<'_, [u8]>> {
 244         if self.is_raw() {
 245             let text = self.text();
 246             let text =
 247                 &text[self.text_range_between_quotes()? - self.syntax().text_range().start()];
 248             return Some(Cow::Borrowed(text.as_bytes()));
 249         }
 250
 251         let text = self.text();
 252         let text = &text[self.text_range_between_quotes()? - self.syntax().text_range().start()];
 253
 254         let mut buf: Vec<u8> = Vec::new();
 255         let mut text_iter = text.chars();
 256         let mut has_error = false;
 257         unescape_literal(text, Mode::ByteStr, &mut |char_range, unescaped_char| match (
 258             unescaped_char,
 259             buf.capacity() == 0,
 260         ) {
 261             (Ok(c), false) => buf.push(c as u8),
 262             (Ok(c), true) if char_range.len() == 1 && Some(c) == text_iter.next() => (),
 263             (Ok(c), true) => {
 264                 buf.reserve_exact(text.len());
 265                 buf.extend_from_slice(text[..char_range.start].as_bytes());
 266                 buf.push(c as u8);
 267             }
 268             (Err(_), _) => has_error = true,
 269         });
 270
 271         match (has_error, buf.capacity() == 0) {
 272             (true, _) => None,
 273             (false, true) => Some(Cow::Borrowed(text.as_bytes())),
 274             (false, false) => Some(Cow::Owned(buf)),
 275         }
 276     }
 277 }
 278
 279 impl ast::IntNumber {
 280     pub fn radix(&self) -> Radix {
 281         match self.text().get(..2).unwrap_or_default() {
 282             "0b" => Radix::Binary,
 283             "0o" => Radix::Octal,
 284             "0x" => Radix::Hexadecimal,
 285             _ => Radix::Decimal,
 286         }
 287     }
 288
 289     pub fn split_into_parts(&self) -> (&str, &str, &str) {
 290         let radix = self.radix();
 291         let (prefix, mut text) = self.text().split_at(radix.prefix_len());
 292
 293         let is_suffix_start: fn(&(usize, char)) -> bool = match radix {
 294             Radix::Hexadecimal => |(_, c)| matches!(c, 'g'..='z' | 'G'..='Z'),
 295             _ => |(_, c)| c.is_ascii_alphabetic(),
 296         };
 297
 298         let mut suffix = "";
 299         if let Some((suffix_start, _)) = text.char_indices().find(is_suffix_start) {
 300             let (text2, suffix2) = text.split_at(suffix_start);
 301             text = text2;
 302             suffix = suffix2;
 303         };
 304
 305         (prefix, text, suffix)
 306     }
 307
 308     pub fn value(&self) -> Option<u128> {
 309         let (_, text, _) = self.split_into_parts();
 310         let value = u128::from_str_radix(&text.replace("_", ""), self.radix() as u32).ok()?;
 311         Some(value)
 312     }
 313
 314     pub fn suffix(&self) -> Option<&str> {
 315         let (_, _, suffix) = self.split_into_parts();
 316         if suffix.is_empty() {
 317             None
 318         } else {
 319             Some(suffix)
 320         }
 321     }
 322 }
 323
 324 impl ast::FloatNumber {
 325     pub fn suffix(&self) -> Option<&str> {
 326         let text = self.text();
 327         let mut indices = text.char_indices();
 328         let (mut suffix_start, c) = indices.by_ref().find(|(_, c)| c.is_ascii_alphabetic())?;
 329         if c == 'e' || c == 'E' {
 330             suffix_start = indices.find(|(_, c)| c.is_ascii_alphabetic())?.0;
 331         }
 332         Some(&text[suffix_start..])
 333     }
 334 }
 335
 336 #[derive(Debug, PartialEq, Eq, Copy, Clone)]
 337 pub enum Radix {
 338     Binary = 2,
 339     Octal = 8,
 340     Decimal = 10,
 341     Hexadecimal = 16,
 342 }
 343
 344 impl Radix {
 345     pub const ALL: &'static [Radix] =
 346         &[Radix::Binary, Radix::Octal, Radix::Decimal, Radix::Hexadecimal];
 347
 348     const fn prefix_len(self) -> usize {
 349         match self {
 350             Self::Decimal => 0,
 351             _ => 2,
 352         }
 353     }
 354 }
 355
 356 #[cfg(test)]
 357 mod tests {
 358     use crate::ast::{self, make, FloatNumber, IntNumber};
 359
 360     fn check_float_suffix<'a>(lit: &str, expected: impl Into<Option<&'a str>>) {
 361         assert_eq!(FloatNumber { syntax: make::tokens::literal(lit) }.suffix(), expected.into());
 362     }
 363
 364     fn check_int_suffix<'a>(lit: &str, expected: impl Into<Option<&'a str>>) {
 365         assert_eq!(IntNumber { syntax: make::tokens::literal(lit) }.suffix(), expected.into());
 366     }
 367
 368     #[test]
 369     fn test_float_number_suffix() {
 370         check_float_suffix("123.0", None);
 371         check_float_suffix("123f32", "f32");
 372         check_float_suffix("123.0e", None);
 373         check_float_suffix("123.0e4", None);
 374         check_float_suffix("123.0ef32", "f32");
 375         check_float_suffix("123.0E4f32", "f32");
 376         check_float_suffix("1_2_3.0_f32", "f32");
 377     }
 378
 379     #[test]
 380     fn test_int_number_suffix() {
 381         check_int_suffix("123", None);
 382         check_int_suffix("123i32", "i32");
 383         check_int_suffix("1_0_1_l_o_l", "l_o_l");
 384         check_int_suffix("0b11", None);
 385         check_int_suffix("0o11", None);
 386         check_int_suffix("0xff", None);
 387         check_int_suffix("0b11u32", "u32");
 388         check_int_suffix("0o11u32", "u32");
 389         check_int_suffix("0xffu32", "u32");
 390     }
 391
 392     fn check_string_value<'a>(lit: &str, expected: impl Into<Option<&'a str>>) {
 393         assert_eq!(
 394             ast::String { syntax: make::tokens::literal(&format!("\"{}\"", lit)) }
 395                 .value()
 396                 .as_deref(),
 397             expected.into()
 398         );
 399     }
 400
 401     #[test]
 402     fn test_string_escape() {
 403         check_string_value(r"foobar", "foobar");
 404         check_string_value(r"\foobar", None);
 405         check_string_value(r"\nfoobar", "\nfoobar");
 406         check_string_value(r"C:\\Windows\\System32\\", "C:\\Windows\\System32\\");
 407     }
 408 }