lexer grammar RustLexer; @lexer::members { public boolean is_at(int pos) { return _input.index() == pos; } } tokens { EQ, LT, LE, EQEQ, NE, GE, GT, ANDAND, OROR, NOT, TILDE, PLUS, MINUS, STAR, SLASH, PERCENT, CARET, AND, OR, SHL, SHR, BINOP, BINOPEQ, AT, DOT, DOTDOT, DOTDOTDOT, COMMA, SEMI, COLON, MOD_SEP, RARROW, FAT_ARROW, LPAREN, RPAREN, LBRACKET, RBRACKET, LBRACE, RBRACE, POUND, DOLLAR, UNDERSCORE, LIT_CHAR, LIT_BYTE, LIT_INTEGER, LIT_FLOAT, LIT_STR, LIT_STR_RAW, LIT_BYTE_STR, LIT_BYTE_STR_RAW, QUESTION, IDENT, LIFETIME, WHITESPACE, DOC_COMMENT, COMMENT, SHEBANG, UTF8_BOM } import xidstart , xidcontinue; /* Expression-operator symbols */ EQ : '=' ; LT : '<' ; LE : '<=' ; EQEQ : '==' ; NE : '!=' ; GE : '>=' ; GT : '>' ; ANDAND : '&&' ; OROR : '||' ; NOT : '!' ; TILDE : '~' ; PLUS : '+' ; MINUS : '-' ; STAR : '*' ; SLASH : '/' ; PERCENT : '%' ; CARET : '^' ; AND : '&' ; OR : '|' ; SHL : '<<' ; SHR : '>>' ; BINOP : PLUS | SLASH | MINUS | STAR | PERCENT | CARET | AND | OR | SHL | SHR ; BINOPEQ : BINOP EQ ; /* "Structural symbols" */ AT : '@' ; DOT : '.' ; DOTDOT : '..' ; DOTDOTDOT : '...' ; COMMA : ',' ; SEMI : ';' ; COLON : ':' ; MOD_SEP : '::' ; RARROW : '->' ; FAT_ARROW : '=>' ; LPAREN : '(' ; RPAREN : ')' ; LBRACKET : '[' ; RBRACKET : ']' ; LBRACE : '{' ; RBRACE : '}' ; POUND : '#'; DOLLAR : '$' ; UNDERSCORE : '_' ; // Literals fragment HEXIT : [0-9a-fA-F] ; fragment CHAR_ESCAPE : [nrt\\'"0] | [xX] HEXIT HEXIT | 'u' HEXIT HEXIT HEXIT HEXIT | 'U' HEXIT HEXIT HEXIT HEXIT HEXIT HEXIT HEXIT HEXIT | 'u{' HEXIT '}' | 'u{' HEXIT HEXIT '}' | 'u{' HEXIT HEXIT HEXIT '}' | 'u{' HEXIT HEXIT HEXIT HEXIT '}' | 'u{' HEXIT HEXIT HEXIT HEXIT HEXIT '}' | 'u{' HEXIT HEXIT HEXIT HEXIT HEXIT HEXIT '}' ; fragment SUFFIX : IDENT ; fragment INTEGER_SUFFIX : { _input.LA(1) != 'e' && _input.LA(1) != 'E' }? SUFFIX ; LIT_CHAR : '\'' ( '\\' CHAR_ESCAPE | ~[\\'\n\t\r] | '\ud800' .. '\udbff' '\udc00' .. '\udfff' ) '\'' SUFFIX? ; LIT_BYTE : 'b\'' ( '\\' ( [xX] HEXIT HEXIT | [nrt\\'"0] ) | ~[\\'\n\t\r] '\udc00'..'\udfff'? ) '\'' SUFFIX? ; LIT_INTEGER : [0-9][0-9_]* INTEGER_SUFFIX? | '0b' [01_]+ INTEGER_SUFFIX? | '0o' [0-7_]+ INTEGER_SUFFIX? | '0x' [0-9a-fA-F_]+ INTEGER_SUFFIX? ; LIT_FLOAT : [0-9][0-9_]* ('.' { /* dot followed by another dot is a range, not a float */ _input.LA(1) != '.' && /* dot followed by an identifier is an integer with a function call, not a float */ _input.LA(1) != '_' && !(_input.LA(1) >= 'a' && _input.LA(1) <= 'z') && !(_input.LA(1) >= 'A' && _input.LA(1) <= 'Z') }? | ('.' [0-9][0-9_]*)? ([eE] [-+]? [0-9][0-9_]*)? SUFFIX?) ; LIT_STR : '"' ('\\\n' | '\\\r\n' | '\\' CHAR_ESCAPE | .)*? '"' SUFFIX? ; LIT_BYTE_STR : 'b' LIT_STR ; LIT_BYTE_STR_RAW : 'b' LIT_STR_RAW ; /* this is a bit messy */ fragment LIT_STR_RAW_INNER : '"' .*? '"' | LIT_STR_RAW_INNER2 ; fragment LIT_STR_RAW_INNER2 : POUND LIT_STR_RAW_INNER POUND ; LIT_STR_RAW : 'r' LIT_STR_RAW_INNER SUFFIX? ; QUESTION : '?'; IDENT : XID_Start XID_Continue* ; fragment QUESTION_IDENTIFIER : QUESTION? IDENT; LIFETIME : '\'' IDENT ; WHITESPACE : [ \r\n\t]+ ; UNDOC_COMMENT : '////' ~[\n]* -> type(COMMENT) ; YESDOC_COMMENT : '///' ~[\r\n]* -> type(DOC_COMMENT) ; OUTER_DOC_COMMENT : '//!' ~[\r\n]* -> type(DOC_COMMENT) ; LINE_COMMENT : '//' ( ~[/\n] ~[\n]* )? -> type(COMMENT) ; DOC_BLOCK_COMMENT : ('/**' ~[*] | '/*!') (DOC_BLOCK_COMMENT | .)*? '*/' -> type(DOC_COMMENT) ; BLOCK_COMMENT : '/*' (BLOCK_COMMENT | .)*? '*/' -> type(COMMENT) ; /* these appear at the beginning of a file */ SHEBANG : '#!' { is_at(2) && _input.LA(1) != '[' }? ~[\r\n]* -> type(SHEBANG) ; UTF8_BOM : '\ufeff' { is_at(1) }? -> skip ;