1 lexer grammar RustLexer;
4 public boolean is_at(int pos) {
5 return _input.index() == pos;
11 EQ, LT, LE, EQEQ, NE, GE, GT, ANDAND, OROR, NOT, TILDE, PLUS,
12 MINUS, STAR, SLASH, PERCENT, CARET, AND, OR, SHL, SHR, BINOP,
13 BINOPEQ, LARROW, AT, DOT, DOTDOT, DOTDOTDOT, COMMA, SEMI, COLON,
14 MOD_SEP, RARROW, FAT_ARROW, LPAREN, RPAREN, LBRACKET, RBRACKET,
15 LBRACE, RBRACE, POUND, DOLLAR, UNDERSCORE, LIT_CHAR, LIT_BYTE,
16 LIT_INTEGER, LIT_FLOAT, LIT_STR, LIT_STR_RAW, LIT_BYTE_STR,
17 LIT_BYTE_STR_RAW, QUESTION, IDENT, LIFETIME, WHITESPACE, DOC_COMMENT,
18 COMMENT, SHEBANG, UTF8_BOM
21 import xidstart , xidcontinue;
24 /* Expression-operator symbols */
65 /* "Structural symbols" */
96 | 'u' HEXIT HEXIT HEXIT HEXIT
97 | 'U' HEXIT HEXIT HEXIT HEXIT HEXIT HEXIT HEXIT HEXIT
99 | 'u{' HEXIT HEXIT '}'
100 | 'u{' HEXIT HEXIT HEXIT '}'
101 | 'u{' HEXIT HEXIT HEXIT HEXIT '}'
102 | 'u{' HEXIT HEXIT HEXIT HEXIT HEXIT '}'
103 | 'u{' HEXIT HEXIT HEXIT HEXIT HEXIT HEXIT '}'
110 fragment INTEGER_SUFFIX
111 : { _input.LA(1) != 'e' && _input.LA(1) != 'E' }? SUFFIX
115 : '\'' ( '\\' CHAR_ESCAPE
117 | '\ud800' .. '\udbff' '\udc00' .. '\udfff'
123 : 'b\'' ( '\\' ( [xX] HEXIT HEXIT
125 | ~[\\'\n\t\r] '\udc00'..'\udfff'?
132 : [0-9][0-9_]* INTEGER_SUFFIX?
133 | '0b' [01_]+ INTEGER_SUFFIX?
134 | '0o' [0-7_]+ INTEGER_SUFFIX?
135 | '0x' [0-9a-fA-F_]+ INTEGER_SUFFIX?
139 : [0-9][0-9_]* ('.' {
140 /* dot followed by another dot is a range, not a float */
141 _input.LA(1) != '.' &&
142 /* dot followed by an identifier is an integer with a function call, not a float */
143 _input.LA(1) != '_' &&
144 !(_input.LA(1) >= 'a' && _input.LA(1) <= 'z') &&
145 !(_input.LA(1) >= 'A' && _input.LA(1) <= 'Z')
146 }? | ('.' [0-9][0-9_]*)? ([eE] [-+]? [0-9][0-9_]*)? SUFFIX?)
150 : '"' ('\\\n' | '\\\r\n' | '\\' CHAR_ESCAPE | .)*? '"' SUFFIX?
153 LIT_BYTE_STR : 'b' LIT_STR ;
154 LIT_BYTE_STR_RAW : 'b' LIT_STR_RAW ;
156 /* this is a bit messy */
158 fragment LIT_STR_RAW_INNER
163 fragment LIT_STR_RAW_INNER2
164 : POUND LIT_STR_RAW_INNER POUND
168 : 'r' LIT_STR_RAW_INNER SUFFIX?
174 IDENT : XID_Start XID_Continue* ;
176 fragment QUESTION_IDENTIFIER : QUESTION? IDENT;
178 LIFETIME : '\'' IDENT ;
180 WHITESPACE : [ \r\n\t]+ ;
182 UNDOC_COMMENT : '////' ~[\n]* -> type(COMMENT) ;
183 YESDOC_COMMENT : '///' ~[\r\n]* -> type(DOC_COMMENT) ;
184 OUTER_DOC_COMMENT : '//!' ~[\r\n]* -> type(DOC_COMMENT) ;
185 LINE_COMMENT : '//' ( ~[/\n] ~[\n]* )? -> type(COMMENT) ;
188 : ('/**' ~[*] | '/*!') (DOC_BLOCK_COMMENT | .)*? '*/' -> type(DOC_COMMENT)
191 BLOCK_COMMENT : '/*' (BLOCK_COMMENT | .)*? '*/' -> type(COMMENT) ;
193 /* these appear at the beginning of a file */
195 SHEBANG : '#!' { is_at(2) && _input.LA(1) != '[' }? ~[\r\n]* -> type(SHEBANG) ;
197 UTF8_BOM : '\ufeff' { is_at(1) }? -> skip ;