1 lexer grammar RustLexer;
4 EQ, LT, LE, EQEQ, NE, GE, GT, ANDAND, OROR, NOT, TILDE, PLUT,
5 MINUS, STAR, SLASH, PERCENT, CARET, AND, OR, SHL, SHR, BINOP,
6 BINOPEQ, AT, DOT, DOTDOT, DOTDOTDOT, COMMA, SEMI, COLON,
7 MOD_SEP, RARROW, FAT_ARROW, LPAREN, RPAREN, LBRACKET, RBRACKET,
8 LBRACE, RBRACE, POUND, DOLLAR, UNDERSCORE, LIT_CHAR,
9 LIT_INTEGER, LIT_FLOAT, LIT_STR, LIT_STR_RAW, LIT_BINARY,
10 LIT_BINARY_RAW, IDENT, LIFETIME, WHITESPACE, DOC_COMMENT,
14 /* Note: due to antlr limitations, we can't represent XID_start and
15 * XID_continue properly. ASCII-only substitute. */
17 fragment XID_start : [_a-zA-Z] ;
18 fragment XID_continue : [_a-zA-Z0-9] ;
21 /* Expression-operator symbols */
60 /* "Structural symbols" */
91 | 'u' HEXIT HEXIT HEXIT HEXIT
92 | 'U' HEXIT HEXIT HEXIT HEXIT HEXIT HEXIT HEXIT HEXIT
100 : '\'' ( '\\' CHAR_ESCAPE | ~[\\'\n\t\r] ) '\'' SUFFIX?
104 : 'b\'' ( '\\' ( [xX] HEXIT HEXIT | [nrt\\'"0] ) | ~[\\'\n\t\r] ) '\'' SUFFIX?
108 : [0-9][0-9_]* SUFFIX?
109 | '0b' [01][01_]* SUFFIX?
110 | '0o' [0-7][0-7_]* SUFFIX?
111 | '0x' [0-9a-fA-F][0-9a-fA-F_]* SUFFIX?
115 : [0-9][0-9_]* ('.' {
116 /* dot followed by another dot is a range, no float */
117 _input.LA(1) != '.' &&
118 /* dot followed by an identifier is an integer with a function call, no float */
119 _input.LA(1) != '_' &&
120 _input.LA(1) != 'a' &&
121 _input.LA(1) != 'b' &&
122 _input.LA(1) != 'c' &&
123 _input.LA(1) != 'd' &&
124 _input.LA(1) != 'e' &&
125 _input.LA(1) != 'f' &&
126 _input.LA(1) != 'g' &&
127 _input.LA(1) != 'h' &&
128 _input.LA(1) != 'i' &&
129 _input.LA(1) != 'j' &&
130 _input.LA(1) != 'k' &&
131 _input.LA(1) != 'l' &&
132 _input.LA(1) != 'm' &&
133 _input.LA(1) != 'n' &&
134 _input.LA(1) != 'o' &&
135 _input.LA(1) != 'p' &&
136 _input.LA(1) != 'q' &&
137 _input.LA(1) != 'r' &&
138 _input.LA(1) != 's' &&
139 _input.LA(1) != 't' &&
140 _input.LA(1) != 'u' &&
141 _input.LA(1) != 'v' &&
142 _input.LA(1) != 'w' &&
143 _input.LA(1) != 'x' &&
144 _input.LA(1) != 'y' &&
145 _input.LA(1) != 'z' &&
146 _input.LA(1) != 'A' &&
147 _input.LA(1) != 'B' &&
148 _input.LA(1) != 'C' &&
149 _input.LA(1) != 'D' &&
150 _input.LA(1) != 'E' &&
151 _input.LA(1) != 'F' &&
152 _input.LA(1) != 'G' &&
153 _input.LA(1) != 'H' &&
154 _input.LA(1) != 'I' &&
155 _input.LA(1) != 'J' &&
156 _input.LA(1) != 'K' &&
157 _input.LA(1) != 'L' &&
158 _input.LA(1) != 'M' &&
159 _input.LA(1) != 'N' &&
160 _input.LA(1) != 'O' &&
161 _input.LA(1) != 'P' &&
162 _input.LA(1) != 'Q' &&
163 _input.LA(1) != 'R' &&
164 _input.LA(1) != 'S' &&
165 _input.LA(1) != 'T' &&
166 _input.LA(1) != 'U' &&
167 _input.LA(1) != 'V' &&
168 _input.LA(1) != 'W' &&
169 _input.LA(1) != 'X' &&
170 _input.LA(1) != 'Y' &&
172 }? | ('.' [0-9][0-9_]*)? ([eE] [-+]? [0-9][0-9_]*)? SUFFIX?)
176 : '"' ('\\\n' | '\\\r\n' | '\\' CHAR_ESCAPE | .)*? '"' SUFFIX?
179 LIT_BINARY : 'b' LIT_STR SUFFIX?;
180 LIT_BINARY_RAW : 'rb' LIT_STR_RAW SUFFIX?;
182 /* this is a bit messy */
184 fragment LIT_STR_RAW_INNER
189 fragment LIT_STR_RAW_INNER2
190 : POUND LIT_STR_RAW_INNER POUND
194 : 'r' LIT_STR_RAW_INNER SUFFIX?
200 IDENT : XID_start XID_continue* ;
202 fragment QUESTION_IDENTIFIER : QUESTION? IDENT;
204 LIFETIME : '\'' IDENT ;
206 WHITESPACE : [ \r\n\t]+ ;
208 UNDOC_COMMENT : '////' ~[\r\n]* -> type(COMMENT) ;
209 YESDOC_COMMENT : '///' ~[\r\n]* -> type(DOC_COMMENT) ;
210 OUTER_DOC_COMMENT : '//!' ~[\r\n]* -> type(DOC_COMMENT) ;
211 LINE_COMMENT : '//' ~[\r\n]* -> type(COMMENT) ;
214 : ('/**' ~[*] | '/*!') (DOC_BLOCK_COMMENT | .)*? '*/' -> type(DOC_COMMENT)
217 BLOCK_COMMENT : '/*' (BLOCK_COMMENT | .)*? '*/' -> type(COMMENT) ;