1 #![feature(globs, phase, macro_rules)]
12 #[phase(plugin)] extern crate regex_macros;
14 use std::collections::HashMap;
18 use syntax::parse::lexer;
19 use rustc::driver::{session, config};
22 use syntax::ast::Name;
23 use syntax::parse::token::*;
24 use syntax::parse::lexer::TokenAndSpan;
26 fn parse_token_list(file: &str) -> HashMap<String, Token> {
28 IDENT(ast::Ident { name: Name(0), ctxt: 0, }, false)
31 let mut res = HashMap::new();
33 res.insert("-1".to_string(), EOF);
35 for line in file.split('\n') {
36 let eq = match line.trim().rfind('=') {
41 let val = line.slice_to(eq);
42 let num = line.slice_from(eq + 1);
48 "STAR" => BINOP(STAR),
49 "FLOAT_SUFFIX" => id(),
54 "LIT_STR" => LIT_STR(Name(0)),
57 "DOTDOTDOT" => DOTDOTDOT,
63 "LBRACKET" => LBRACKET,
64 "LIT_STR_RAW" => LIT_STR_RAW(Name(0), 0),
66 "SLASH" => BINOP(SLASH),
68 "LIFETIME" => LIFETIME(ast::Ident { name: Name(0), ctxt: 0 }),
69 "CARET" => BINOP(CARET),
72 "PLUS" => BINOP(PLUS),
73 "LIT_CHAR" => LIT_CHAR(Name(0)),
74 "LIT_BYTE" => LIT_BYTE(Name(0)),
76 "RBRACKET" => RBRACKET,
78 "DOC_COMMENT" => DOC_COMMENT(Name(0)),
83 "PERCENT" => BINOP(PERCENT),
85 "BINOP" => BINOP(PLUS),
88 "LIT_INTEGER" => LIT_INTEGER(Name(0)),
89 "BINOPEQ" => BINOPEQ(PLUS),
90 "LIT_FLOAT" => LIT_FLOAT(Name(0)),
92 "UNDERSCORE" => UNDERSCORE,
93 "MINUS" => BINOP(MINUS),
96 "FAT_ARROW" => FAT_ARROW,
100 "LIT_BINARY" => LIT_BINARY(Name(0)),
101 "LIT_BINARY_RAW" => LIT_BINARY_RAW(Name(0), 0),
105 res.insert(num.to_string(), tok);
108 debug!("Token map: {}", res);
112 fn str_to_binop(s: &str) -> BinOp {
124 _ => fail!("Bad binop str `{}`", s)
128 /// Assuming a string/binary literal, strip out the leading/trailing
129 /// hashes and surrounding quotes/raw/binary prefix.
130 fn fix(mut lit: &str) -> ast::Name {
131 if lit.char_at(0) == 'r' {
132 if lit.char_at(1) == 'b' {
133 lit = lit.slice_from(2)
135 lit = lit.slice_from(1);
137 } else if lit.char_at(0) == 'b' {
138 lit = lit.slice_from(1);
141 let leading_hashes = count(lit);
143 // +1/-1 to adjust for single quotes
144 parse::token::intern(lit.slice(leading_hashes + 1, lit.len() - leading_hashes - 1))
147 /// Assuming a char/byte literal, strip the 'b' prefix and the single quotes.
148 fn fixchar(mut lit: &str) -> ast::Name {
149 if lit.char_at(0) == 'b' {
150 lit = lit.slice_from(1);
153 parse::token::intern(lit.slice(1, lit.len() - 1))
156 fn count(lit: &str) -> uint {
157 lit.chars().take_while(|c| *c == '#').count()
160 fn parse_antlr_token(s: &str, tokens: &HashMap<String, Token>) -> TokenAndSpan {
161 let re = regex!(r"\[@(?P<seq>\d+),(?P<start>\d+):(?P<end>\d+)='(?P<content>.+?)',<(?P<toknum>-?\d+)>,\d+:\d+]");
163 let m = re.captures(s).expect(format!("The regex didn't match {}", s).as_slice());
164 let start = m.name("start");
165 let end = m.name("end");
166 let toknum = m.name("toknum");
167 let content = m.name("content");
169 let proto_tok = tokens.find_equiv(&toknum).expect(format!("didn't find token {} in the map", toknum).as_slice());
171 let nm = parse::token::intern(content);
173 debug!("What we got: content (`{}`), proto: {}", content, proto_tok);
175 let real_tok = match *proto_tok {
176 BINOP(..) => BINOP(str_to_binop(content)),
177 BINOPEQ(..) => BINOPEQ(str_to_binop(content.slice_to(content.len() - 1))),
178 LIT_STR(..) => LIT_STR(fix(content)),
179 LIT_STR_RAW(..) => LIT_STR_RAW(fix(content), count(content)),
180 LIT_CHAR(..) => LIT_CHAR(fixchar(content)),
181 LIT_BYTE(..) => LIT_BYTE(fixchar(content)),
182 DOC_COMMENT(..) => DOC_COMMENT(nm),
183 LIT_INTEGER(..) => LIT_INTEGER(nm),
184 LIT_FLOAT(..) => LIT_FLOAT(nm),
185 LIT_BINARY(..) => LIT_BINARY(nm),
186 LIT_BINARY_RAW(..) => LIT_BINARY_RAW(fix(content), count(content)),
187 IDENT(..) => IDENT(ast::Ident { name: nm, ctxt: 0 }, true),
188 LIFETIME(..) => LIFETIME(ast::Ident { name: nm, ctxt: 0 }),
192 let offset = if real_tok == EOF {
198 let sp = syntax::codemap::Span {
199 lo: syntax::codemap::BytePos(from_str::<u32>(start).unwrap() - offset),
200 hi: syntax::codemap::BytePos(from_str::<u32>(end).unwrap() + 1),
210 fn tok_cmp(a: &Token, b: &Token) -> bool {
212 &IDENT(id, _) => match b {
213 &IDENT(id2, _) => id == id2,
221 fn next(r: &mut lexer::StringReader) -> TokenAndSpan {
222 use syntax::parse::lexer::Reader;
226 let args = std::os::args();
228 let token_map = parse_token_list(File::open(&Path::new(args.get(2).as_slice())).unwrap().read_to_string().unwrap().as_slice());
229 let mut stdin = std::io::stdin();
230 let mut antlr_tokens = stdin.lines().map(|l| parse_antlr_token(l.unwrap().as_slice().trim(), &token_map));
232 let code = File::open(&Path::new(args.get(1).as_slice())).unwrap().read_to_string().unwrap();
233 let options = config::basic_options();
234 let session = session::build_session(options, None,
235 syntax::diagnostics::registry::Registry::new([]));
236 let filemap = parse::string_to_filemap(&session.parse_sess,
238 String::from_str("<n/a>"));
239 let mut lexer = lexer::StringReader::new(session.diagnostic(), filemap);
241 for antlr_tok in antlr_tokens {
242 let rustc_tok = next(&mut lexer);
243 if rustc_tok.tok == EOF && antlr_tok.tok == EOF {
247 assert!(rustc_tok.sp == antlr_tok.sp, "{} and {} have different spans", rustc_tok, antlr_tok);
249 macro_rules! matches (
251 match rustc_tok.tok {
252 $($x => match antlr_tok.tok {
254 if !tok_cmp(&rustc_tok.tok, &antlr_tok.tok) {
255 // FIXME #15677: needs more robust escaping in
257 warn!("Different names for {} and {}", rustc_tok, antlr_tok);
260 _ => fail!("{} is not {}", antlr_tok, rustc_tok)
262 ref c => assert!(c == &antlr_tok.tok, "{} is not {}", rustc_tok, antlr_tok)
267 matches!(LIT_BYTE(..),