1 //! Code related to parsing literals.
3 use crate::ast::{self, LitKind, MetaItemLit, StrStyle};
4 use crate::token::{self, Token};
5 use rustc_lexer::unescape::{byte_from_char, unescape_byte, unescape_char, unescape_literal, Mode};
6 use rustc_span::symbol::{kw, sym, Symbol};
8 use std::{ascii, fmt, str};
10 // Escapes a string, represented as a symbol. Reuses the original symbol,
11 // avoiding interning, if no changes are required.
12 pub fn escape_string_symbol(symbol: Symbol) -> Symbol {
13 let s = symbol.as_str();
14 let escaped = s.escape_default().to_string();
15 if s == escaped { symbol } else { Symbol::intern(&escaped) }
19 pub fn escape_char_symbol(ch: char) -> Symbol {
20 let s: String = ch.escape_default().map(Into::<char>::into).collect();
24 // Escapes a byte string.
25 pub fn escape_byte_str_symbol(bytes: &[u8]) -> Symbol {
26 let s = bytes.escape_ascii().to_string();
41 /// Converts literal token into a semantic literal.
42 pub fn from_token_lit(lit: token::Lit) -> Result<LitKind, LitError> {
43 let token::Lit { kind, symbol, suffix } = lit;
44 if suffix.is_some() && !kind.may_have_suffix() {
45 return Err(LitError::InvalidSuffix);
50 assert!(symbol.is_bool_lit());
51 LitKind::Bool(symbol == kw::True)
54 return unescape_byte(symbol.as_str())
56 .map_err(|_| LitError::LexerError);
59 return unescape_char(symbol.as_str())
61 .map_err(|_| LitError::LexerError);
64 // There are some valid suffixes for integer and float literals,
65 // so all the handling is done internally.
66 token::Integer => return integer_lit(symbol, suffix),
67 token::Float => return float_lit(symbol, suffix),
70 // If there are no characters requiring special treatment we can
71 // reuse the symbol from the token. Otherwise, we must generate a
72 // new symbol because the string in the LitKind is different to the
73 // string in the token.
74 let s = symbol.as_str();
75 let symbol = if s.contains(['\\', '\r']) {
76 let mut buf = String::with_capacity(s.len());
77 let mut error = Ok(());
78 // Force-inlining here is aggressive but the closure is
79 // called on every char in the string, so it can be
80 // hot in programs with many long strings.
84 &mut #[inline(always)]
85 |_, unescaped_char| match unescaped_char {
89 error = Err(LitError::LexerError);
99 LitKind::Str(symbol, ast::StrStyle::Cooked)
101 token::StrRaw(n) => {
103 let s = symbol.as_str();
105 if s.contains('\r') {
106 let mut buf = String::with_capacity(s.len());
107 let mut error = Ok(());
108 unescape_literal(s, Mode::RawStr, &mut |_, unescaped_char| {
109 match unescaped_char {
110 Ok(c) => buf.push(c),
113 error = Err(LitError::LexerError);
123 LitKind::Str(symbol, ast::StrStyle::Raw(n))
126 let s = symbol.as_str();
127 let mut buf = Vec::with_capacity(s.len());
128 let mut error = Ok(());
129 unescape_literal(s, Mode::ByteStr, &mut |_, c| match c {
130 Ok(c) => buf.push(byte_from_char(c)),
133 error = Err(LitError::LexerError);
138 LitKind::ByteStr(buf.into(), StrStyle::Cooked)
140 token::ByteStrRaw(n) => {
141 let s = symbol.as_str();
142 let bytes = if s.contains('\r') {
143 let mut buf = Vec::with_capacity(s.len());
144 let mut error = Ok(());
145 unescape_literal(s, Mode::RawByteStr, &mut |_, c| match c {
146 Ok(c) => buf.push(byte_from_char(c)),
149 error = Err(LitError::LexerError);
156 symbol.to_string().into_bytes()
159 LitKind::ByteStr(bytes.into(), StrStyle::Raw(n))
161 token::Err => LitKind::Err,
166 impl fmt::Display for LitKind {
167 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
169 LitKind::Byte(b) => {
170 let b: String = ascii::escape_default(b).map(Into::<char>::into).collect();
171 write!(f, "b'{b}'")?;
173 LitKind::Char(ch) => write!(f, "'{}'", escape_char_symbol(ch))?,
174 LitKind::Str(sym, StrStyle::Cooked) => write!(f, "\"{}\"", escape_string_symbol(sym))?,
175 LitKind::Str(sym, StrStyle::Raw(n)) => write!(
177 "r{delim}\"{string}\"{delim}",
178 delim = "#".repeat(n as usize),
181 LitKind::ByteStr(ref bytes, StrStyle::Cooked) => {
182 write!(f, "b\"{}\"", escape_byte_str_symbol(bytes))?
184 LitKind::ByteStr(ref bytes, StrStyle::Raw(n)) => {
185 // Unwrap because raw byte string literals can only contain ASCII.
186 let symbol = str::from_utf8(bytes).unwrap();
189 "br{delim}\"{string}\"{delim}",
190 delim = "#".repeat(n as usize),
194 LitKind::Int(n, ty) => {
197 ast::LitIntType::Unsigned(ty) => write!(f, "{}", ty.name())?,
198 ast::LitIntType::Signed(ty) => write!(f, "{}", ty.name())?,
199 ast::LitIntType::Unsuffixed => {}
202 LitKind::Float(symbol, ty) => {
203 write!(f, "{symbol}")?;
205 ast::LitFloatType::Suffixed(ty) => write!(f, "{}", ty.name())?,
206 ast::LitFloatType::Unsuffixed => {}
209 LitKind::Bool(b) => write!(f, "{}", if b { "true" } else { "false" })?,
211 // This only shows up in places like `-Zunpretty=hir` output, so we
212 // don't bother to produce something useful.
213 write!(f, "<bad-literal>")?;
222 /// Converts a token literal into a meta item literal.
223 pub fn from_token_lit(token_lit: token::Lit, span: Span) -> Result<MetaItemLit, LitError> {
225 symbol: token_lit.symbol,
226 suffix: token_lit.suffix,
227 kind: LitKind::from_token_lit(token_lit)?,
232 /// Cheaply converts a meta item literal into a token literal.
233 pub fn as_token_lit(&self) -> token::Lit {
234 let kind = match self.kind {
235 LitKind::Bool(_) => token::Bool,
236 LitKind::Str(_, ast::StrStyle::Cooked) => token::Str,
237 LitKind::Str(_, ast::StrStyle::Raw(n)) => token::StrRaw(n),
238 LitKind::ByteStr(_, ast::StrStyle::Cooked) => token::ByteStr,
239 LitKind::ByteStr(_, ast::StrStyle::Raw(n)) => token::ByteStrRaw(n),
240 LitKind::Byte(_) => token::Byte,
241 LitKind::Char(_) => token::Char,
242 LitKind::Int(..) => token::Integer,
243 LitKind::Float(..) => token::Float,
244 LitKind::Err => token::Err,
247 token::Lit::new(kind, self.symbol, self.suffix)
250 /// Converts an arbitrary token into meta item literal.
251 pub fn from_token(token: &Token) -> Option<MetaItemLit> {
252 token::Lit::from_token(token)
253 .and_then(|token_lit| MetaItemLit::from_token_lit(token_lit, token.span).ok())
257 fn strip_underscores(symbol: Symbol) -> Symbol {
258 // Do not allocate a new string unless necessary.
259 let s = symbol.as_str();
261 let mut s = s.to_string();
262 s.retain(|c| c != '_');
263 return Symbol::intern(&s);
268 fn filtered_float_lit(
270 suffix: Option<Symbol>,
272 ) -> Result<LitKind, LitError> {
273 debug!("filtered_float_lit: {:?}, {:?}, {:?}", symbol, suffix, base);
275 return Err(LitError::NonDecimalFloat(base));
278 Some(suf) => LitKind::Float(
280 ast::LitFloatType::Suffixed(match suf {
281 sym::f32 => ast::FloatTy::F32,
282 sym::f64 => ast::FloatTy::F64,
283 _ => return Err(LitError::InvalidFloatSuffix),
286 None => LitKind::Float(symbol, ast::LitFloatType::Unsuffixed),
290 fn float_lit(symbol: Symbol, suffix: Option<Symbol>) -> Result<LitKind, LitError> {
291 debug!("float_lit: {:?}, {:?}", symbol, suffix);
292 filtered_float_lit(strip_underscores(symbol), suffix, 10)
295 fn integer_lit(symbol: Symbol, suffix: Option<Symbol>) -> Result<LitKind, LitError> {
296 debug!("integer_lit: {:?}, {:?}", symbol, suffix);
297 let symbol = strip_underscores(symbol);
298 let s = symbol.as_str();
300 let base = match s.as_bytes() {
301 [b'0', b'x', ..] => 16,
302 [b'0', b'o', ..] => 8,
303 [b'0', b'b', ..] => 2,
307 let ty = match suffix {
308 Some(suf) => match suf {
309 sym::isize => ast::LitIntType::Signed(ast::IntTy::Isize),
310 sym::i8 => ast::LitIntType::Signed(ast::IntTy::I8),
311 sym::i16 => ast::LitIntType::Signed(ast::IntTy::I16),
312 sym::i32 => ast::LitIntType::Signed(ast::IntTy::I32),
313 sym::i64 => ast::LitIntType::Signed(ast::IntTy::I64),
314 sym::i128 => ast::LitIntType::Signed(ast::IntTy::I128),
315 sym::usize => ast::LitIntType::Unsigned(ast::UintTy::Usize),
316 sym::u8 => ast::LitIntType::Unsigned(ast::UintTy::U8),
317 sym::u16 => ast::LitIntType::Unsigned(ast::UintTy::U16),
318 sym::u32 => ast::LitIntType::Unsigned(ast::UintTy::U32),
319 sym::u64 => ast::LitIntType::Unsigned(ast::UintTy::U64),
320 sym::u128 => ast::LitIntType::Unsigned(ast::UintTy::U128),
321 // `1f64` and `2f32` etc. are valid float literals, and
322 // `fxxx` looks more like an invalid float literal than invalid integer literal.
323 _ if suf.as_str().starts_with('f') => return filtered_float_lit(symbol, suffix, base),
324 _ => return Err(LitError::InvalidIntSuffix),
326 _ => ast::LitIntType::Unsuffixed,
329 let s = &s[if base != 10 { 2 } else { 0 }..];
330 u128::from_str_radix(s, base).map(|i| LitKind::Int(i, ty)).map_err(|_| {
331 // Small bases are lexed as if they were base 10, e.g, the string
332 // might be `0b10201`. This will cause the conversion above to fail,
333 // but these kinds of errors are already reported by the lexer.
335 base < 10 && s.chars().any(|c| c.to_digit(10).map_or(false, |d| d >= base));
336 if from_lexer { LitError::LexerError } else { LitError::IntTooLarge(base) }