1 // Copyright 2014 The Rust Project Developers. See the COPYRIGHT
2 // file at the top-level directory of this distribution and at
3 // http://rust-lang.org/COPYRIGHT.
5 // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
6 // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
7 // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
8 // option. This file may not be copied, modified, or distributed
9 // except according to those terms.
21 use std::collections::HashMap;
26 use syntax::parse::lexer;
27 use rustc::session::{self, config};
30 use syntax::ast::Name;
31 use syntax::parse::token;
32 use syntax::parse::lexer::TokenAndSpan;
34 fn parse_token_list(file: &str) -> HashMap<String, token::Token> {
35 fn id() -> token::Token {
36 token::Ident(ast::Ident { name: Name(0), ctxt: 0, }, token::Plain)
39 let mut res = HashMap::new();
41 res.insert("-1".to_string(), token::Eof);
43 for line in file.split('\n') {
44 let eq = match line.trim().rfind('=') {
49 let val = line.slice_to(eq);
50 let num = line.slice_from(eq + 1);
53 "SHR" => token::BinOp(token::Shr),
54 "DOLLAR" => token::Dollar,
56 "STAR" => token::BinOp(token::Star),
57 "FLOAT_SUFFIX" => id(),
59 "SHL" => token::BinOp(token::Shl),
60 "LBRACE" => token::OpenDelim(token::Brace),
61 "RARROW" => token::RArrow,
62 "LIT_STR" => token::Literal(token::Str_(Name(0)), None),
63 "DOTDOT" => token::DotDot,
64 "MOD_SEP" => token::ModSep,
65 "DOTDOTDOT" => token::DotDotDot,
67 "AND" => token::BinOp(token::And),
68 "LPAREN" => token::OpenDelim(token::Paren),
69 "ANDAND" => token::AndAnd,
71 "LBRACKET" => token::OpenDelim(token::Bracket),
72 "LIT_STR_RAW" => token::Literal(token::StrRaw(Name(0), 0), None),
73 "RPAREN" => token::CloseDelim(token::Paren),
74 "SLASH" => token::BinOp(token::Slash),
75 "COMMA" => token::Comma,
76 "LIFETIME" => token::Lifetime(ast::Ident { name: Name(0), ctxt: 0 }),
77 "CARET" => token::BinOp(token::Caret),
78 "TILDE" => token::Tilde,
80 "PLUS" => token::BinOp(token::Plus),
81 "LIT_CHAR" => token::Literal(token::Char(Name(0)), None),
82 "LIT_BYTE" => token::Literal(token::Byte(Name(0)), None),
84 "RBRACKET" => token::CloseDelim(token::Bracket),
85 "COMMENT" => token::Comment,
86 "DOC_COMMENT" => token::DocComment(Name(0)),
88 "EQEQ" => token::EqEq,
91 "PERCENT" => token::BinOp(token::Percent),
92 "RBRACE" => token::CloseDelim(token::Brace),
93 "BINOP" => token::BinOp(token::Plus),
94 "POUND" => token::Pound,
95 "OROR" => token::OrOr,
96 "LIT_INTEGER" => token::Literal(token::Integer(Name(0)), None),
97 "BINOPEQ" => token::BinOpEq(token::Plus),
98 "LIT_FLOAT" => token::Literal(token::Float(Name(0)), None),
99 "WHITESPACE" => token::Whitespace,
100 "UNDERSCORE" => token::Underscore,
101 "MINUS" => token::BinOp(token::Minus),
102 "SEMI" => token::Semi,
103 "COLON" => token::Colon,
104 "FAT_ARROW" => token::FatArrow,
105 "OR" => token::BinOp(token::Or),
108 "LIT_BINARY" => token::Literal(token::Binary(Name(0)), None),
109 "LIT_BINARY_RAW" => token::Literal(token::BinaryRaw(Name(0), 0), None),
110 "QUESTION" => token::Question,
114 res.insert(num.to_string(), tok);
117 debug!("Token map: {:?}", res);
121 fn str_to_binop(s: &str) -> token::BinOpToken {
127 "%" => token::Percent,
133 _ => panic!("Bad binop str `{}`", s),
137 /// Assuming a string/binary literal, strip out the leading/trailing
138 /// hashes and surrounding quotes/raw/binary prefix.
139 fn fix(mut lit: &str) -> ast::Name {
140 if lit.char_at(0) == 'r' {
141 if lit.char_at(1) == 'b' {
142 lit = lit.slice_from(2)
144 lit = lit.slice_from(1);
146 } else if lit.char_at(0) == 'b' {
147 lit = lit.slice_from(1);
150 let leading_hashes = count(lit);
152 // +1/-1 to adjust for single quotes
153 parse::token::intern(lit.slice(leading_hashes + 1, lit.len() - leading_hashes - 1))
156 /// Assuming a char/byte literal, strip the 'b' prefix and the single quotes.
157 fn fixchar(mut lit: &str) -> ast::Name {
158 if lit.char_at(0) == 'b' {
159 lit = lit.slice_from(1);
162 parse::token::intern(lit.slice(1, lit.len() - 1))
165 fn count(lit: &str) -> usize {
166 lit.chars().take_while(|c| *c == '#').count()
169 fn parse_antlr_token(s: &str, tokens: &HashMap<String, token::Token>) -> TokenAndSpan {
171 r"\[@(?P<seq>\d+),(?P<start>\d+):(?P<end>\d+)='(?P<content>.+?)',<(?P<toknum>-?\d+)>,\d+:\d+]"
174 let m = re.captures(s).expect(format!("The regex didn't match {}", s).as_slice());
175 let start = m.name("start").unwrap_or("");
176 let end = m.name("end").unwrap_or("");
177 let toknum = m.name("toknum").unwrap_or("");
178 let content = m.name("content").unwrap_or("");
180 let proto_tok = tokens.get(toknum).expect(format!("didn't find token {:?} in the map",
183 let nm = parse::token::intern(content);
185 debug!("What we got: content (`{}`), proto: {:?}", content, proto_tok);
187 let real_tok = match *proto_tok {
188 token::BinOp(..) => token::BinOp(str_to_binop(content)),
189 token::BinOpEq(..) => token::BinOpEq(str_to_binop(content.slice_to(
190 content.len() - 1))),
191 token::Literal(token::Str_(..), n) => token::Literal(token::Str_(fix(content)), n),
192 token::Literal(token::StrRaw(..), n) => token::Literal(token::StrRaw(fix(content),
194 token::Literal(token::Char(..), n) => token::Literal(token::Char(fixchar(content)), n),
195 token::Literal(token::Byte(..), n) => token::Literal(token::Byte(fixchar(content)), n),
196 token::DocComment(..) => token::DocComment(nm),
197 token::Literal(token::Integer(..), n) => token::Literal(token::Integer(nm), n),
198 token::Literal(token::Float(..), n) => token::Literal(token::Float(nm), n),
199 token::Literal(token::Binary(..), n) => token::Literal(token::Binary(nm), n),
200 token::Literal(token::BinaryRaw(..), n) => token::Literal(token::BinaryRaw(fix(content),
202 token::Ident(..) => token::Ident(ast::Ident { name: nm, ctxt: 0 },
204 token::Lifetime(..) => token::Lifetime(ast::Ident { name: nm, ctxt: 0 }),
208 let offset = if real_tok == token::Eof
215 let sp = syntax::codemap::Span {
216 lo: syntax::codemap::BytePos(start.parse::<u32>().unwrap() - offset),
217 hi: syntax::codemap::BytePos(end.parse::<u32>().unwrap() + 1),
218 expn_id: syntax::codemap::NO_EXPANSION
227 fn tok_cmp(a: &token::Token, b: &token::Token) -> bool {
229 &token::Ident(id, _) => match b {
230 &token::Ident(id2, _) => id == id2,
238 fn next(r: &mut lexer::StringReader) -> TokenAndSpan {
239 use syntax::parse::lexer::Reader;
243 let args = std::os::args();
245 let mut token_file = File::open(&Path::new(args[2].as_slice()));
246 let token_map = parse_token_list(token_file.read_to_string().unwrap().as_slice());
248 let mut stdin = std::io::stdin();
249 let mut lock = stdin.lock();
250 let lines = lock.lines();
251 let mut antlr_tokens = lines.map(|l| parse_antlr_token(l.unwrap().as_slice().trim(),
254 let code = File::open(&Path::new(args[1].as_slice())).unwrap().read_to_string().unwrap();
255 let options = config::basic_options();
256 let session = session::build_session(options, None,
257 syntax::diagnostics::registry::Registry::new(&[]));
258 let filemap = parse::string_to_filemap(&session.parse_sess,
260 String::from_str("<n/a>"));
261 let mut lexer = lexer::StringReader::new(session.diagnostic(), filemap);
263 for antlr_tok in antlr_tokens {
264 let rustc_tok = next(&mut lexer);
265 if rustc_tok.tok == token::Eof && antlr_tok.tok == token::Eof {
269 assert!(rustc_tok.sp == antlr_tok.sp, "{:?} and {:?} have different spans", rustc_tok,
272 macro_rules! matches {
274 match rustc_tok.tok {
275 $($x => match antlr_tok.tok {
277 if !tok_cmp(&rustc_tok.tok, &antlr_tok.tok) {
278 // FIXME #15677: needs more robust escaping in
280 warn!("Different names for {:?} and {:?}", rustc_tok, antlr_tok);
283 _ => panic!("{:?} is not {:?}", antlr_tok, rustc_tok)
285 ref c => assert!(c == &antlr_tok.tok, "{:?} is not {:?}", rustc_tok, antlr_tok)
291 token::Literal(token::Byte(..), _),
292 token::Literal(token::Char(..), _),
293 token::Literal(token::Integer(..), _),
294 token::Literal(token::Float(..), _),
295 token::Literal(token::Str_(..), _),
296 token::Literal(token::StrRaw(..), _),
297 token::Literal(token::Binary(..), _),
298 token::Literal(token::BinaryRaw(..), _),
301 token::Interpolated(..),
302 token::DocComment(..),