1 //! This module implements syntax validation that the parser doesn't handle.
3 //! A failed validation emits a diagnostic.
8 use rustc_lexer::unescape::{
9 self, unescape_byte, unescape_byte_literal, unescape_char, unescape_literal, Mode,
14 ast::{self, HasVisibility},
15 match_ast, AstNode, SyntaxError,
16 SyntaxKind::{CONST, FN, INT_NUMBER, TYPE_ALIAS},
17 SyntaxNode, SyntaxToken, TextSize, T,
20 pub(crate) fn validate(root: &SyntaxNode) -> Vec<SyntaxError> {
22 // * Add unescape validation of raw string literals and raw byte string literals
23 // * Add validation of doc comments are being attached to nodes
25 let mut errors = Vec::new();
26 for node in root.descendants() {
29 ast::Literal(it) => validate_literal(it, &mut errors),
30 ast::Const(it) => validate_const(it, &mut errors),
31 ast::BlockExpr(it) => block::validate_block_expr(it, &mut errors),
32 ast::FieldExpr(it) => validate_numeric_name(it.name_ref(), &mut errors),
33 ast::RecordExprField(it) => validate_numeric_name(it.name_ref(), &mut errors),
34 ast::Visibility(it) => validate_visibility(it, &mut errors),
35 ast::RangeExpr(it) => validate_range_expr(it, &mut errors),
36 ast::PathSegment(it) => validate_path_keywords(it, &mut errors),
37 ast::RefType(it) => validate_trait_object_ref_ty(it, &mut errors),
38 ast::PtrType(it) => validate_trait_object_ptr_ty(it, &mut errors),
39 ast::FnPtrType(it) => validate_trait_object_fn_ptr_ret_ty(it, &mut errors),
40 ast::MacroRules(it) => validate_macro_rules(it, &mut errors),
41 ast::LetExpr(it) => validate_let_expr(it, &mut errors),
49 fn rustc_unescape_error_to_string(err: unescape::EscapeError) -> &'static str {
50 use unescape::EscapeError as EE;
53 let err_message = match err {
55 "Literal must not be empty"
57 EE::MoreThanOneChar => {
58 "Literal must be one character long"
61 "Character must be escaped: `\\`"
63 EE::InvalidEscape => {
66 EE::BareCarriageReturn | EE::BareCarriageReturnInRawString => {
67 "Character must be escaped: `\r`"
69 EE::EscapeOnlyChar => {
70 "Escape character `\\` must be escaped itself"
72 EE::TooShortHexEscape => {
73 "ASCII hex escape code must have exactly two digits"
75 EE::InvalidCharInHexEscape => {
76 "ASCII hex escape code must contain only hex characters"
78 EE::OutOfRangeHexEscape => {
79 "ASCII hex escape code must be at most 0x7F"
81 EE::NoBraceInUnicodeEscape => {
82 "Missing `{` to begin the unicode escape"
84 EE::InvalidCharInUnicodeEscape => {
85 "Unicode escape must contain only hex characters and underscores"
87 EE::EmptyUnicodeEscape => {
88 "Unicode escape must not be empty"
90 EE::UnclosedUnicodeEscape => {
91 "Missing `}` to terminate the unicode escape"
93 EE::LeadingUnderscoreUnicodeEscape => {
94 "Unicode escape code must not begin with an underscore"
96 EE::OverlongUnicodeEscape => {
97 "Unicode escape code must have at most 6 digits"
99 EE::LoneSurrogateUnicodeEscape => {
100 "Unicode escape code must not be a surrogate"
102 EE::OutOfRangeUnicodeEscape => {
103 "Unicode escape code must be at most 0x10FFFF"
105 EE::UnicodeEscapeInByte => {
106 "Byte literals must not contain unicode escapes"
108 EE::NonAsciiCharInByte | EE::NonAsciiCharInByteString => {
109 "Byte literals must not contain non-ASCII characters"
116 fn validate_literal(literal: ast::Literal, acc: &mut Vec<SyntaxError>) {
117 // FIXME: move this function to outer scope (https://github.com/rust-analyzer/rust-analyzer/pull/2834#discussion_r366196658)
118 fn unquote(text: &str, prefix_len: usize, end_delimiter: char) -> Option<&str> {
119 text.rfind(end_delimiter).and_then(|end| text.get(prefix_len..end))
122 let token = literal.token();
123 let text = token.text();
125 // FIXME: lift this lambda refactor to `fn` (https://github.com/rust-analyzer/rust-analyzer/pull/2834#discussion_r366199205)
126 let mut push_err = |prefix_len, (off, err): (usize, unescape::EscapeError)| {
127 let off = token.text_range().start() + TextSize::try_from(off + prefix_len).unwrap();
128 acc.push(SyntaxError::new_at_offset(rustc_unescape_error_to_string(err), off));
131 match literal.kind() {
132 ast::LiteralKind::String(s) => {
134 if let Some(without_quotes) = unquote(text, 1, '"') {
135 unescape_literal(without_quotes, Mode::Str, &mut |range, char| {
136 if let Err(err) = char {
137 push_err(1, (range.start, err));
143 ast::LiteralKind::ByteString(s) => {
145 if let Some(without_quotes) = unquote(text, 2, '"') {
146 unescape_byte_literal(without_quotes, Mode::ByteStr, &mut |range, char| {
147 if let Err(err) = char {
148 push_err(2, (range.start, err));
154 ast::LiteralKind::Char => {
155 if let Some(Err(e)) = unquote(text, 1, '\'').map(unescape_char) {
159 ast::LiteralKind::Byte => {
160 if let Some(Err(e)) = unquote(text, 2, '\'').map(unescape_byte) {
164 ast::LiteralKind::IntNumber(_)
165 | ast::LiteralKind::FloatNumber(_)
166 | ast::LiteralKind::Bool(_) => {}
170 pub(crate) fn validate_block_structure(root: &SyntaxNode) {
171 let mut stack = Vec::new();
172 for node in root.descendants_with_tokens() {
174 T!['{'] => stack.push(node),
176 if let Some(pair) = stack.pop() {
180 "\nunpaired curlys:\n{}\n{:#?}\n",
185 node.next_sibling_or_token().is_none()
186 && pair.prev_sibling_or_token().is_none(),
187 "\nfloating curlys at {:?}\nfile:\n{}\nerror:\n{}\n",
199 fn validate_numeric_name(name_ref: Option<ast::NameRef>, errors: &mut Vec<SyntaxError>) {
200 if let Some(int_token) = int_token(name_ref) {
201 if int_token.text().chars().any(|c| !c.is_digit(10)) {
202 errors.push(SyntaxError::new(
203 "Tuple (struct) field access is only allowed through \
204 decimal integers with no underscores or suffix",
205 int_token.text_range(),
210 fn int_token(name_ref: Option<ast::NameRef>) -> Option<SyntaxToken> {
211 name_ref?.syntax().first_child_or_token()?.into_token().filter(|it| it.kind() == INT_NUMBER)
215 fn validate_visibility(vis: ast::Visibility, errors: &mut Vec<SyntaxError>) {
216 let path_without_in_token = vis.in_token().is_none()
217 && vis.path().and_then(|p| p.as_single_name_ref()).and_then(|n| n.ident_token()).is_some();
218 if path_without_in_token {
219 errors.push(SyntaxError::new("incorrect visibility restriction", vis.syntax.text_range()));
221 let parent = match vis.syntax().parent() {
225 match parent.kind() {
226 FN | CONST | TYPE_ALIAS => (),
230 let impl_def = match parent.parent().and_then(|it| it.parent()).and_then(ast::Impl::cast) {
234 if impl_def.trait_().is_some() {
235 errors.push(SyntaxError::new("Unnecessary visibility qualifier", vis.syntax.text_range()));
239 fn validate_range_expr(expr: ast::RangeExpr, errors: &mut Vec<SyntaxError>) {
240 if expr.op_kind() == Some(ast::RangeOp::Inclusive) && expr.end().is_none() {
241 errors.push(SyntaxError::new(
242 "An inclusive range must have an end expression",
243 expr.syntax().text_range(),
248 fn validate_path_keywords(segment: ast::PathSegment, errors: &mut Vec<SyntaxError>) {
249 let path = segment.parent_path();
250 let is_path_start = segment.coloncolon_token().is_none() && path.qualifier().is_none();
252 if let Some(token) = segment.self_token() {
254 errors.push(SyntaxError::new(
255 "The `self` keyword is only allowed as the first segment of a path",
259 } else if let Some(token) = segment.crate_token() {
260 if !is_path_start || use_prefix(path).is_some() {
261 errors.push(SyntaxError::new(
262 "The `crate` keyword is only allowed as the first segment of a path",
268 fn use_prefix(mut path: ast::Path) -> Option<ast::Path> {
269 for node in path.syntax().ancestors().skip(1) {
272 ast::UseTree(it) => if let Some(tree_path) = it.path() {
273 // Even a top-level path exists within a `UseTree` so we must explicitly
274 // allow our path but disallow anything else
275 if tree_path != path {
276 return Some(tree_path);
279 ast::UseTreeList(_) => continue,
280 ast::Path(parent) => path = parent,
289 fn validate_trait_object_ref_ty(ty: ast::RefType, errors: &mut Vec<SyntaxError>) {
290 if let Some(ast::Type::DynTraitType(ty)) = ty.ty() {
291 if let Some(err) = validate_trait_object_ty(ty) {
297 fn validate_trait_object_ptr_ty(ty: ast::PtrType, errors: &mut Vec<SyntaxError>) {
298 if let Some(ast::Type::DynTraitType(ty)) = ty.ty() {
299 if let Some(err) = validate_trait_object_ty(ty) {
305 fn validate_trait_object_fn_ptr_ret_ty(ty: ast::FnPtrType, errors: &mut Vec<SyntaxError>) {
306 if let Some(ast::Type::DynTraitType(ty)) = ty.ret_type().and_then(|ty| ty.ty()) {
307 if let Some(err) = validate_trait_object_ty(ty) {
313 fn validate_trait_object_ty(ty: ast::DynTraitType) -> Option<SyntaxError> {
314 let tbl = ty.type_bound_list()?;
316 if tbl.bounds().count() > 1 {
317 let dyn_token = ty.dyn_token()?;
318 let potential_parenthesis =
319 algo::skip_trivia_token(dyn_token.prev_token()?, Direction::Prev)?;
320 let kind = potential_parenthesis.kind();
321 if !matches!(kind, T!['('] | T![<] | T![=]) {
322 return Some(SyntaxError::new("ambiguous `+` in a type", ty.syntax().text_range()));
328 fn validate_macro_rules(mac: ast::MacroRules, errors: &mut Vec<SyntaxError>) {
329 if let Some(vis) = mac.visibility() {
330 errors.push(SyntaxError::new(
331 "visibilities are not allowed on `macro_rules!` items",
332 vis.syntax().text_range(),
337 fn validate_const(const_: ast::Const, errors: &mut Vec<SyntaxError>) {
338 if let Some(mut_token) = const_
340 .and_then(|t| t.next_token())
341 .and_then(|t| algo::skip_trivia_token(t, Direction::Next))
342 .filter(|t| t.kind() == T![mut])
344 errors.push(SyntaxError::new("const globals cannot be mutable", mut_token.text_range()));
348 fn validate_let_expr(let_: ast::LetExpr, errors: &mut Vec<SyntaxError>) {
349 let mut token = let_.syntax().clone();
351 token = match token.parent() {
356 if ast::ParenExpr::can_cast(token.kind()) {
358 } else if let Some(it) = ast::BinExpr::cast(token.clone()) {
359 if it.op_kind() == Some(ast::BinaryOp::LogicOp(ast::LogicOp::And)) {
362 } else if ast::IfExpr::can_cast(token.kind())
363 || ast::WhileExpr::can_cast(token.kind())
364 || ast::MatchGuard::can_cast(token.kind())
366 // It must be part of the condition since the expressions are inside a block.
372 errors.push(SyntaxError::new(
373 "`let` expressions are not supported here",
374 let_.syntax().text_range(),