]> git.lizzy.rs Git - rust.git/blob - compiler/rustc_ast/src/util/literal.rs
Remove double spaces after dots in comments
[rust.git] / compiler / rustc_ast / src / util / literal.rs
1 //! Code related to parsing literals.
2
3 use crate::ast::{self, LitKind, MetaItemLit, StrStyle};
4 use crate::token::{self, Token};
5 use rustc_lexer::unescape::{byte_from_char, unescape_byte, unescape_char, unescape_literal, Mode};
6 use rustc_span::symbol::{kw, sym, Symbol};
7 use rustc_span::Span;
8 use std::{ascii, fmt, str};
9
10 // Escapes a string, represented as a symbol. Reuses the original symbol,
11 // avoiding interning, if no changes are required.
12 pub fn escape_string_symbol(symbol: Symbol) -> Symbol {
13     let s = symbol.as_str();
14     let escaped = s.escape_default().to_string();
15     if s == escaped { symbol } else { Symbol::intern(&escaped) }
16 }
17
18 // Escapes a char.
19 pub fn escape_char_symbol(ch: char) -> Symbol {
20     let s: String = ch.escape_default().map(Into::<char>::into).collect();
21     Symbol::intern(&s)
22 }
23
24 // Escapes a byte string.
25 pub fn escape_byte_str_symbol(bytes: &[u8]) -> Symbol {
26     let s = bytes.escape_ascii().to_string();
27     Symbol::intern(&s)
28 }
29
30 #[derive(Debug)]
31 pub enum LitError {
32     LexerError,
33     InvalidSuffix,
34     InvalidIntSuffix,
35     InvalidFloatSuffix,
36     NonDecimalFloat(u32),
37     IntTooLarge(u32),
38 }
39
40 impl LitKind {
41     /// Converts literal token into a semantic literal.
42     pub fn from_token_lit(lit: token::Lit) -> Result<LitKind, LitError> {
43         let token::Lit { kind, symbol, suffix } = lit;
44         if suffix.is_some() && !kind.may_have_suffix() {
45             return Err(LitError::InvalidSuffix);
46         }
47
48         Ok(match kind {
49             token::Bool => {
50                 assert!(symbol.is_bool_lit());
51                 LitKind::Bool(symbol == kw::True)
52             }
53             token::Byte => {
54                 return unescape_byte(symbol.as_str())
55                     .map(LitKind::Byte)
56                     .map_err(|_| LitError::LexerError);
57             }
58             token::Char => {
59                 return unescape_char(symbol.as_str())
60                     .map(LitKind::Char)
61                     .map_err(|_| LitError::LexerError);
62             }
63
64             // There are some valid suffixes for integer and float literals,
65             // so all the handling is done internally.
66             token::Integer => return integer_lit(symbol, suffix),
67             token::Float => return float_lit(symbol, suffix),
68
69             token::Str => {
70                 // If there are no characters requiring special treatment we can
71                 // reuse the symbol from the token. Otherwise, we must generate a
72                 // new symbol because the string in the LitKind is different to the
73                 // string in the token.
74                 let s = symbol.as_str();
75                 let symbol = if s.contains(['\\', '\r']) {
76                     let mut buf = String::with_capacity(s.len());
77                     let mut error = Ok(());
78                     // Force-inlining here is aggressive but the closure is
79                     // called on every char in the string, so it can be
80                     // hot in programs with many long strings.
81                     unescape_literal(
82                         s,
83                         Mode::Str,
84                         &mut #[inline(always)]
85                         |_, unescaped_char| match unescaped_char {
86                             Ok(c) => buf.push(c),
87                             Err(err) => {
88                                 if err.is_fatal() {
89                                     error = Err(LitError::LexerError);
90                                 }
91                             }
92                         },
93                     );
94                     error?;
95                     Symbol::intern(&buf)
96                 } else {
97                     symbol
98                 };
99                 LitKind::Str(symbol, ast::StrStyle::Cooked)
100             }
101             token::StrRaw(n) => {
102                 // Ditto.
103                 let s = symbol.as_str();
104                 let symbol =
105                     if s.contains('\r') {
106                         let mut buf = String::with_capacity(s.len());
107                         let mut error = Ok(());
108                         unescape_literal(s, Mode::RawStr, &mut |_, unescaped_char| {
109                             match unescaped_char {
110                                 Ok(c) => buf.push(c),
111                                 Err(err) => {
112                                     if err.is_fatal() {
113                                         error = Err(LitError::LexerError);
114                                     }
115                                 }
116                             }
117                         });
118                         error?;
119                         Symbol::intern(&buf)
120                     } else {
121                         symbol
122                     };
123                 LitKind::Str(symbol, ast::StrStyle::Raw(n))
124             }
125             token::ByteStr => {
126                 let s = symbol.as_str();
127                 let mut buf = Vec::with_capacity(s.len());
128                 let mut error = Ok(());
129                 unescape_literal(s, Mode::ByteStr, &mut |_, c| match c {
130                     Ok(c) => buf.push(byte_from_char(c)),
131                     Err(err) => {
132                         if err.is_fatal() {
133                             error = Err(LitError::LexerError);
134                         }
135                     }
136                 });
137                 error?;
138                 LitKind::ByteStr(buf.into(), StrStyle::Cooked)
139             }
140             token::ByteStrRaw(n) => {
141                 let s = symbol.as_str();
142                 let bytes = if s.contains('\r') {
143                     let mut buf = Vec::with_capacity(s.len());
144                     let mut error = Ok(());
145                     unescape_literal(s, Mode::RawByteStr, &mut |_, c| match c {
146                         Ok(c) => buf.push(byte_from_char(c)),
147                         Err(err) => {
148                             if err.is_fatal() {
149                                 error = Err(LitError::LexerError);
150                             }
151                         }
152                     });
153                     error?;
154                     buf
155                 } else {
156                     symbol.to_string().into_bytes()
157                 };
158
159                 LitKind::ByteStr(bytes.into(), StrStyle::Raw(n))
160             }
161             token::Err => LitKind::Err,
162         })
163     }
164 }
165
166 impl fmt::Display for LitKind {
167     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
168         match *self {
169             LitKind::Byte(b) => {
170                 let b: String = ascii::escape_default(b).map(Into::<char>::into).collect();
171                 write!(f, "b'{b}'")?;
172             }
173             LitKind::Char(ch) => write!(f, "'{}'", escape_char_symbol(ch))?,
174             LitKind::Str(sym, StrStyle::Cooked) => write!(f, "\"{}\"", escape_string_symbol(sym))?,
175             LitKind::Str(sym, StrStyle::Raw(n)) => write!(
176                 f,
177                 "r{delim}\"{string}\"{delim}",
178                 delim = "#".repeat(n as usize),
179                 string = sym
180             )?,
181             LitKind::ByteStr(ref bytes, StrStyle::Cooked) => {
182                 write!(f, "b\"{}\"", escape_byte_str_symbol(bytes))?
183             }
184             LitKind::ByteStr(ref bytes, StrStyle::Raw(n)) => {
185                 // Unwrap because raw byte string literals can only contain ASCII.
186                 let symbol = str::from_utf8(bytes).unwrap();
187                 write!(
188                     f,
189                     "br{delim}\"{string}\"{delim}",
190                     delim = "#".repeat(n as usize),
191                     string = symbol
192                 )?;
193             }
194             LitKind::Int(n, ty) => {
195                 write!(f, "{n}")?;
196                 match ty {
197                     ast::LitIntType::Unsigned(ty) => write!(f, "{}", ty.name())?,
198                     ast::LitIntType::Signed(ty) => write!(f, "{}", ty.name())?,
199                     ast::LitIntType::Unsuffixed => {}
200                 }
201             }
202             LitKind::Float(symbol, ty) => {
203                 write!(f, "{symbol}")?;
204                 match ty {
205                     ast::LitFloatType::Suffixed(ty) => write!(f, "{}", ty.name())?,
206                     ast::LitFloatType::Unsuffixed => {}
207                 }
208             }
209             LitKind::Bool(b) => write!(f, "{}", if b { "true" } else { "false" })?,
210             LitKind::Err => {
211                 // This only shows up in places like `-Zunpretty=hir` output, so we
212                 // don't bother to produce something useful.
213                 write!(f, "<bad-literal>")?;
214             }
215         }
216
217         Ok(())
218     }
219 }
220
221 impl MetaItemLit {
222     /// Converts a token literal into a meta item literal.
223     pub fn from_token_lit(token_lit: token::Lit, span: Span) -> Result<MetaItemLit, LitError> {
224         Ok(MetaItemLit {
225             symbol: token_lit.symbol,
226             suffix: token_lit.suffix,
227             kind: LitKind::from_token_lit(token_lit)?,
228             span,
229         })
230     }
231
232     /// Cheaply converts a meta item literal into a token literal.
233     pub fn as_token_lit(&self) -> token::Lit {
234         let kind = match self.kind {
235             LitKind::Bool(_) => token::Bool,
236             LitKind::Str(_, ast::StrStyle::Cooked) => token::Str,
237             LitKind::Str(_, ast::StrStyle::Raw(n)) => token::StrRaw(n),
238             LitKind::ByteStr(_, ast::StrStyle::Cooked) => token::ByteStr,
239             LitKind::ByteStr(_, ast::StrStyle::Raw(n)) => token::ByteStrRaw(n),
240             LitKind::Byte(_) => token::Byte,
241             LitKind::Char(_) => token::Char,
242             LitKind::Int(..) => token::Integer,
243             LitKind::Float(..) => token::Float,
244             LitKind::Err => token::Err,
245         };
246
247         token::Lit::new(kind, self.symbol, self.suffix)
248     }
249
250     /// Converts an arbitrary token into meta item literal.
251     pub fn from_token(token: &Token) -> Option<MetaItemLit> {
252         token::Lit::from_token(token)
253             .and_then(|token_lit| MetaItemLit::from_token_lit(token_lit, token.span).ok())
254     }
255 }
256
257 fn strip_underscores(symbol: Symbol) -> Symbol {
258     // Do not allocate a new string unless necessary.
259     let s = symbol.as_str();
260     if s.contains('_') {
261         let mut s = s.to_string();
262         s.retain(|c| c != '_');
263         return Symbol::intern(&s);
264     }
265     symbol
266 }
267
268 fn filtered_float_lit(
269     symbol: Symbol,
270     suffix: Option<Symbol>,
271     base: u32,
272 ) -> Result<LitKind, LitError> {
273     debug!("filtered_float_lit: {:?}, {:?}, {:?}", symbol, suffix, base);
274     if base != 10 {
275         return Err(LitError::NonDecimalFloat(base));
276     }
277     Ok(match suffix {
278         Some(suf) => LitKind::Float(
279             symbol,
280             ast::LitFloatType::Suffixed(match suf {
281                 sym::f32 => ast::FloatTy::F32,
282                 sym::f64 => ast::FloatTy::F64,
283                 _ => return Err(LitError::InvalidFloatSuffix),
284             }),
285         ),
286         None => LitKind::Float(symbol, ast::LitFloatType::Unsuffixed),
287     })
288 }
289
290 fn float_lit(symbol: Symbol, suffix: Option<Symbol>) -> Result<LitKind, LitError> {
291     debug!("float_lit: {:?}, {:?}", symbol, suffix);
292     filtered_float_lit(strip_underscores(symbol), suffix, 10)
293 }
294
295 fn integer_lit(symbol: Symbol, suffix: Option<Symbol>) -> Result<LitKind, LitError> {
296     debug!("integer_lit: {:?}, {:?}", symbol, suffix);
297     let symbol = strip_underscores(symbol);
298     let s = symbol.as_str();
299
300     let base = match s.as_bytes() {
301         [b'0', b'x', ..] => 16,
302         [b'0', b'o', ..] => 8,
303         [b'0', b'b', ..] => 2,
304         _ => 10,
305     };
306
307     let ty = match suffix {
308         Some(suf) => match suf {
309             sym::isize => ast::LitIntType::Signed(ast::IntTy::Isize),
310             sym::i8 => ast::LitIntType::Signed(ast::IntTy::I8),
311             sym::i16 => ast::LitIntType::Signed(ast::IntTy::I16),
312             sym::i32 => ast::LitIntType::Signed(ast::IntTy::I32),
313             sym::i64 => ast::LitIntType::Signed(ast::IntTy::I64),
314             sym::i128 => ast::LitIntType::Signed(ast::IntTy::I128),
315             sym::usize => ast::LitIntType::Unsigned(ast::UintTy::Usize),
316             sym::u8 => ast::LitIntType::Unsigned(ast::UintTy::U8),
317             sym::u16 => ast::LitIntType::Unsigned(ast::UintTy::U16),
318             sym::u32 => ast::LitIntType::Unsigned(ast::UintTy::U32),
319             sym::u64 => ast::LitIntType::Unsigned(ast::UintTy::U64),
320             sym::u128 => ast::LitIntType::Unsigned(ast::UintTy::U128),
321             // `1f64` and `2f32` etc. are valid float literals, and
322             // `fxxx` looks more like an invalid float literal than invalid integer literal.
323             _ if suf.as_str().starts_with('f') => return filtered_float_lit(symbol, suffix, base),
324             _ => return Err(LitError::InvalidIntSuffix),
325         },
326         _ => ast::LitIntType::Unsuffixed,
327     };
328
329     let s = &s[if base != 10 { 2 } else { 0 }..];
330     u128::from_str_radix(s, base).map(|i| LitKind::Int(i, ty)).map_err(|_| {
331         // Small bases are lexed as if they were base 10, e.g, the string
332         // might be `0b10201`. This will cause the conversion above to fail,
333         // but these kinds of errors are already reported by the lexer.
334         let from_lexer =
335             base < 10 && s.chars().any(|c| c.to_digit(10).map_or(false, |d| d >= base));
336         if from_lexer { LitError::LexerError } else { LitError::IntTooLarge(base) }
337     })
338 }