1 // Copyright 2014 The Rust Project Developers. See the COPYRIGHT
2 // file at the top-level directory of this distribution and at
3 // http://rust-lang.org/COPYRIGHT.
5 // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
6 // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
7 // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
8 // option. This file may not be copied, modified, or distributed
9 // except according to those terms.
11 #![feature(globs, phase, macro_rules)]
19 #[phase(link, plugin)]
22 #[phase(plugin)] extern crate regex_macros;
24 use std::collections::HashMap;
28 use syntax::parse::lexer;
29 use rustc::driver::{session, config};
32 use syntax::ast::Name;
33 use syntax::parse::token::*;
34 use syntax::parse::lexer::TokenAndSpan;
36 fn parse_token_list(file: &str) -> HashMap<String, Token> {
38 IDENT(ast::Ident { name: Name(0), ctxt: 0, }, false)
41 let mut res = HashMap::new();
43 res.insert("-1".to_string(), EOF);
45 for line in file.split('\n') {
46 let eq = match line.trim().rfind('=') {
51 let val = line.slice_to(eq);
52 let num = line.slice_from(eq + 1);
58 "STAR" => BINOP(STAR),
59 "FLOAT_SUFFIX" => id(),
64 "LIT_STR" => LIT_STR(Name(0)),
67 "DOTDOTDOT" => DOTDOTDOT,
73 "LBRACKET" => LBRACKET,
74 "LIT_STR_RAW" => LIT_STR_RAW(Name(0), 0),
76 "SLASH" => BINOP(SLASH),
78 "LIFETIME" => LIFETIME(ast::Ident { name: Name(0), ctxt: 0 }),
79 "CARET" => BINOP(CARET),
82 "PLUS" => BINOP(PLUS),
83 "LIT_CHAR" => LIT_CHAR(Name(0)),
84 "LIT_BYTE" => LIT_BYTE(Name(0)),
86 "RBRACKET" => RBRACKET,
88 "DOC_COMMENT" => DOC_COMMENT(Name(0)),
93 "PERCENT" => BINOP(PERCENT),
95 "BINOP" => BINOP(PLUS),
98 "LIT_INTEGER" => LIT_INTEGER(Name(0)),
99 "BINOPEQ" => BINOPEQ(PLUS),
100 "LIT_FLOAT" => LIT_FLOAT(Name(0)),
102 "UNDERSCORE" => UNDERSCORE,
103 "MINUS" => BINOP(MINUS),
106 "FAT_ARROW" => FAT_ARROW,
110 "LIT_BINARY" => LIT_BINARY(Name(0)),
111 "LIT_BINARY_RAW" => LIT_BINARY_RAW(Name(0), 0),
115 res.insert(num.to_string(), tok);
118 debug!("Token map: {}", res);
122 fn str_to_binop(s: &str) -> BinOp {
134 _ => fail!("Bad binop str `{}`", s)
138 /// Assuming a string/binary literal, strip out the leading/trailing
139 /// hashes and surrounding quotes/raw/binary prefix.
140 fn fix(mut lit: &str) -> ast::Name {
141 if lit.char_at(0) == 'r' {
142 if lit.char_at(1) == 'b' {
143 lit = lit.slice_from(2)
145 lit = lit.slice_from(1);
147 } else if lit.char_at(0) == 'b' {
148 lit = lit.slice_from(1);
151 let leading_hashes = count(lit);
153 // +1/-1 to adjust for single quotes
154 parse::token::intern(lit.slice(leading_hashes + 1, lit.len() - leading_hashes - 1))
157 /// Assuming a char/byte literal, strip the 'b' prefix and the single quotes.
158 fn fixchar(mut lit: &str) -> ast::Name {
159 if lit.char_at(0) == 'b' {
160 lit = lit.slice_from(1);
163 parse::token::intern(lit.slice(1, lit.len() - 1))
166 fn count(lit: &str) -> uint {
167 lit.chars().take_while(|c| *c == '#').count()
170 fn parse_antlr_token(s: &str, tokens: &HashMap<String, Token>) -> TokenAndSpan {
172 r"\[@(?P<seq>\d+),(?P<start>\d+):(?P<end>\d+)='(?P<content>.+?)',<(?P<toknum>-?\d+)>,\d+:\d+]"
175 let m = re.captures(s).expect(format!("The regex didn't match {}", s).as_slice());
176 let start = m.name("start");
177 let end = m.name("end");
178 let toknum = m.name("toknum");
179 let content = m.name("content");
181 let proto_tok = tokens.find_equiv(&toknum).expect(format!("didn't find token {} in the map",
184 let nm = parse::token::intern(content);
186 debug!("What we got: content (`{}`), proto: {}", content, proto_tok);
188 let real_tok = match *proto_tok {
189 BINOP(..) => BINOP(str_to_binop(content)),
190 BINOPEQ(..) => BINOPEQ(str_to_binop(content.slice_to(content.len() - 1))),
191 LIT_STR(..) => LIT_STR(fix(content)),
192 LIT_STR_RAW(..) => LIT_STR_RAW(fix(content), count(content)),
193 LIT_CHAR(..) => LIT_CHAR(fixchar(content)),
194 LIT_BYTE(..) => LIT_BYTE(fixchar(content)),
195 DOC_COMMENT(..) => DOC_COMMENT(nm),
196 LIT_INTEGER(..) => LIT_INTEGER(nm),
197 LIT_FLOAT(..) => LIT_FLOAT(nm),
198 LIT_BINARY(..) => LIT_BINARY(nm),
199 LIT_BINARY_RAW(..) => LIT_BINARY_RAW(fix(content), count(content)),
200 IDENT(..) => IDENT(ast::Ident { name: nm, ctxt: 0 }, true),
201 LIFETIME(..) => LIFETIME(ast::Ident { name: nm, ctxt: 0 }),
205 let offset = if real_tok == EOF {
211 let sp = syntax::codemap::Span {
212 lo: syntax::codemap::BytePos(from_str::<u32>(start).unwrap() - offset),
213 hi: syntax::codemap::BytePos(from_str::<u32>(end).unwrap() + 1),
223 fn tok_cmp(a: &Token, b: &Token) -> bool {
225 &IDENT(id, _) => match b {
226 &IDENT(id2, _) => id == id2,
234 fn next(r: &mut lexer::StringReader) -> TokenAndSpan {
235 use syntax::parse::lexer::Reader;
239 let args = std::os::args();
241 let mut token_file = File::open(&Path::new(args.get(2).as_slice()));
242 let token_map = parse_token_list(token_file.read_to_string().unwrap().as_slice());
244 let mut stdin = std::io::stdin();
245 let mut antlr_tokens = stdin.lines().map(|l| parse_antlr_token(l.unwrap().as_slice().trim(),
248 let code = File::open(&Path::new(args.get(1).as_slice())).unwrap().read_to_string().unwrap();
249 let options = config::basic_options();
250 let session = session::build_session(options, None,
251 syntax::diagnostics::registry::Registry::new([]));
252 let filemap = parse::string_to_filemap(&session.parse_sess,
254 String::from_str("<n/a>"));
255 let mut lexer = lexer::StringReader::new(session.diagnostic(), filemap);
257 for antlr_tok in antlr_tokens {
258 let rustc_tok = next(&mut lexer);
259 if rustc_tok.tok == EOF && antlr_tok.tok == EOF {
263 assert!(rustc_tok.sp == antlr_tok.sp, "{} and {} have different spans", rustc_tok,
266 macro_rules! matches (
268 match rustc_tok.tok {
269 $($x => match antlr_tok.tok {
271 if !tok_cmp(&rustc_tok.tok, &antlr_tok.tok) {
272 // FIXME #15677: needs more robust escaping in
274 warn!("Different names for {} and {}", rustc_tok, antlr_tok);
277 _ => fail!("{} is not {}", antlr_tok, rustc_tok)
279 ref c => assert!(c == &antlr_tok.tok, "{} is not {}", rustc_tok, antlr_tok)
284 matches!(LIT_BYTE(..),