]> git.lizzy.rs Git - rust.git/blob - src/libsyntax/parse/mod.rs
4f1d41a4a7a171ed9c09759d8776ca3461a62389
[rust.git] / src / libsyntax / parse / mod.rs
1 // Copyright 2012 The Rust Project Developers. See the COPYRIGHT
2 // file at the top-level directory of this distribution and at
3 // http://rust-lang.org/COPYRIGHT.
4 //
5 // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
6 // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
7 // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
8 // option. This file may not be copied, modified, or distributed
9 // except according to those terms.
10
11 //! The main parser interface
12
13
14 use ast::node_id;
15 use ast;
16 use codemap::{span, CodeMap, FileMap, FileSubstr};
17 use codemap;
18 use diagnostic::{span_handler, mk_span_handler, mk_handler, Emitter};
19 use parse::attr::parser_attr;
20 use parse::lexer::reader;
21 use parse::parser::Parser;
22 use parse::token::{ident_interner, mk_ident_interner};
23
24 use core::io;
25 use core::option::{None, Option, Some};
26 use core::path::Path;
27 use core::result::{Err, Ok};
28
29 pub mod lexer;
30 pub mod parser;
31 pub mod token;
32 pub mod comments;
33 pub mod attr;
34
35
36 /// Common routines shared by parser mods
37 pub mod common;
38
39 /// Routines the parser uses to classify AST nodes
40 pub mod classify;
41
42 /// Reporting obsolete syntax
43 pub mod obsolete;
44
45 // info about a parsing session.
46 // This structure and the reader both have
47 // an interner associated with them. If they're
48 // not the same, bad things can happen.
49 pub struct ParseSess {
50     cm: @codemap::CodeMap, // better be the same as the one in the reader!
51     next_id: node_id,
52     span_diagnostic: @span_handler, // better be the same as the one in the reader!
53     interner: @ident_interner,
54 }
55
56 pub fn new_parse_sess(demitter: Option<Emitter>) -> @mut ParseSess {
57     let cm = @CodeMap::new();
58     @mut ParseSess {
59         cm: cm,
60         next_id: 1,
61         span_diagnostic: mk_span_handler(mk_handler(demitter), cm),
62         interner: mk_ident_interner(),
63     }
64 }
65
66 pub fn new_parse_sess_special_handler(sh: @span_handler,
67                                       cm: @codemap::CodeMap)
68                                    -> @mut ParseSess {
69     @mut ParseSess {
70         cm: cm,
71         next_id: 1,
72         span_diagnostic: sh,
73         interner: mk_ident_interner(),
74     }
75 }
76
77 // a bunch of utility functions of the form parse_<thing>_from_<source>
78 // where <thing> includes crate, expr, item, stmt, tts, and one that
79 // uses a HOF to parse anything, and <source> includes file and
80 // source_str.
81
82 pub fn parse_crate_from_file(
83     input: &Path,
84     cfg: ast::crate_cfg,
85     sess: @mut ParseSess
86 ) -> @ast::crate {
87     new_parser_from_file(sess, /*bad*/ copy cfg, input).parse_crate_mod()
88     // why is there no p.abort_if_errors here?
89 }
90
91 pub fn parse_crate_from_source_str(
92     name: ~str,
93     source: @~str,
94     cfg: ast::crate_cfg,
95     sess: @mut ParseSess
96 ) -> @ast::crate {
97     let p = new_parser_from_source_str(
98         sess,
99         /*bad*/ copy cfg,
100         /*bad*/ copy name,
101         source
102     );
103     maybe_aborted(p.parse_crate_mod(),p)
104 }
105
106 pub fn parse_expr_from_source_str(
107     name: ~str,
108     source: @~str,
109     cfg: ast::crate_cfg,
110     sess: @mut ParseSess
111 ) -> @ast::expr {
112     let p = new_parser_from_source_str(
113         sess,
114         cfg,
115         /*bad*/ copy name,
116         source
117     );
118     maybe_aborted(p.parse_expr(), p)
119 }
120
121 pub fn parse_item_from_source_str(
122     name: ~str,
123     source: @~str,
124     cfg: ast::crate_cfg,
125     attrs: ~[ast::attribute],
126     sess: @mut ParseSess
127 ) -> Option<@ast::item> {
128     let p = new_parser_from_source_str(
129         sess,
130         cfg,
131         /*bad*/ copy name,
132         source
133     );
134     maybe_aborted(p.parse_item(attrs),p)
135 }
136
137 pub fn parse_meta_from_source_str(
138     name: ~str,
139     source: @~str,
140     cfg: ast::crate_cfg,
141     sess: @mut ParseSess
142 ) -> @ast::meta_item {
143     let p = new_parser_from_source_str(
144         sess,
145         cfg,
146         /*bad*/ copy name,
147         source
148     );
149     maybe_aborted(p.parse_meta_item(),p)
150 }
151
152 pub fn parse_stmt_from_source_str(
153     name: ~str,
154     source: @~str,
155     cfg: ast::crate_cfg,
156     attrs: ~[ast::attribute],
157     sess: @mut ParseSess
158 ) -> @ast::stmt {
159     let p = new_parser_from_source_str(
160         sess,
161         cfg,
162         /*bad*/ copy name,
163         source
164     );
165     maybe_aborted(p.parse_stmt(attrs),p)
166 }
167
168 pub fn parse_tts_from_source_str(
169     name: ~str,
170     source: @~str,
171     cfg: ast::crate_cfg,
172     sess: @mut ParseSess
173 ) -> ~[ast::token_tree] {
174     let p = new_parser_from_source_str(
175         sess,
176         cfg,
177         /*bad*/ copy name,
178         source
179     );
180     *p.quote_depth += 1u;
181     // right now this is re-creating the token trees from ... token trees.
182     maybe_aborted(p.parse_all_token_trees(),p)
183 }
184
185 // given a function and parsing information (source str,
186 // filename, crate cfg, and sess), create a parser,
187 // apply the function, and check that the parser
188 // consumed all of the input before returning the function's
189 // result.
190 pub fn parse_from_source_str<T>(
191     f: &fn(&Parser) -> T,
192     name: ~str, ss: codemap::FileSubstr,
193     source: @~str,
194     cfg: ast::crate_cfg,
195     sess: @mut ParseSess
196 ) -> T {
197     let p = new_parser_from_source_substr(
198         sess,
199         cfg,
200         name,
201         ss,
202         source
203     );
204     let r = f(&p);
205     if !p.reader.is_eof() {
206         p.reader.fatal(~"expected end-of-string");
207     }
208     maybe_aborted(r,p)
209 }
210
211 // return the next unused node id.
212 pub fn next_node_id(sess: @mut ParseSess) -> node_id {
213     let rv = sess.next_id;
214     sess.next_id += 1;
215     // ID 0 is reserved for the crate and doesn't actually exist in the AST
216     assert!(rv != 0);
217     return rv;
218 }
219
220 // Create a new parser from a source string
221 pub fn new_parser_from_source_str(sess: @mut ParseSess,
222                                   cfg: ast::crate_cfg,
223                                   name: ~str,
224                                   source: @~str)
225                                -> Parser {
226     filemap_to_parser(sess,string_to_filemap(sess,source,name),cfg)
227 }
228
229 // Create a new parser from a source string where the origin
230 // is specified as a substring of another file.
231 pub fn new_parser_from_source_substr(sess: @mut ParseSess,
232                                   cfg: ast::crate_cfg,
233                                   name: ~str,
234                                   ss: codemap::FileSubstr,
235                                   source: @~str)
236                                -> Parser {
237     filemap_to_parser(sess,substring_to_filemap(sess,source,name,ss),cfg)
238 }
239
240 /// Create a new parser, handling errors as appropriate
241 /// if the file doesn't exist
242 pub fn new_parser_from_file(
243     sess: @mut ParseSess,
244     cfg: ast::crate_cfg,
245     path: &Path
246 ) -> Parser {
247     filemap_to_parser(sess,file_to_filemap(sess,path,None),cfg)
248 }
249
250 /// Given a session, a crate config, a path, and a span, add
251 /// the file at the given path to the codemap, and return a parser.
252 /// On an error, use the given span as the source of the problem.
253 pub fn new_sub_parser_from_file(
254     sess: @mut ParseSess,
255     cfg: ast::crate_cfg,
256     path: &Path,
257     sp: span
258 ) -> Parser {
259     filemap_to_parser(sess,file_to_filemap(sess,path,Some(sp)),cfg)
260 }
261
262 /// Given a filemap and config, return a parser
263 pub fn filemap_to_parser(sess: @mut ParseSess,
264                          filemap: @FileMap,
265                          cfg: ast::crate_cfg) -> Parser {
266     tts_to_parser(sess,filemap_to_tts(sess,filemap),cfg)
267 }
268
269 // must preserve old name for now, because quote! from the *existing*
270 // compiler expands into it
271 pub fn new_parser_from_tts(sess: @mut ParseSess,
272                      cfg: ast::crate_cfg,
273                      tts: ~[ast::token_tree]) -> Parser {
274     tts_to_parser(sess,tts,cfg)
275 }
276
277
278 // base abstractions
279
280 /// Given a session and a path and an optional span (for error reporting),
281 /// add the path to the session's codemap and return the new filemap.
282 pub fn file_to_filemap(sess: @mut ParseSess, path: &Path, spanopt: Option<span>)
283     -> @FileMap {
284     match io::read_whole_file_str(path) {
285         Ok(src) => string_to_filemap(sess, @src, path.to_str()),
286         Err(e) => {
287             match spanopt {
288                 Some(span) => sess.span_diagnostic.span_fatal(span, e),
289                 None => sess.span_diagnostic.handler().fatal(e)
290             }
291         }
292     }
293 }
294
295 // given a session and a string, add the string to
296 // the session's codemap and return the new filemap
297 pub fn string_to_filemap(sess: @mut ParseSess, source: @~str, path: ~str)
298     -> @FileMap {
299     sess.cm.new_filemap(path, source)
300 }
301
302 // given a session and a string and a path and a FileSubStr, add
303 // the string to the CodeMap and return the new FileMap
304 pub fn substring_to_filemap(sess: @mut ParseSess, source: @~str, path: ~str,
305                            filesubstr: FileSubstr) -> @FileMap {
306     sess.cm.new_filemap_w_substr(path,filesubstr,source)
307 }
308
309 // given a filemap, produce a sequence of token-trees
310 pub fn filemap_to_tts(sess: @mut ParseSess, filemap: @FileMap)
311     -> ~[ast::token_tree] {
312     // it appears to me that the cfg doesn't matter here... indeed,
313     // parsing tt's probably shouldn't require a parser at all.
314     let cfg = ~[];
315     let srdr = lexer::new_string_reader(copy sess.span_diagnostic,
316                                         filemap,
317                                         sess.interner);
318     let p1 = Parser(sess, cfg, srdr as @reader);
319     p1.parse_all_token_trees()
320 }
321
322 // given tts and cfg, produce a parser
323 pub fn tts_to_parser(sess: @mut ParseSess,
324                      tts: ~[ast::token_tree],
325                      cfg: ast::crate_cfg) -> Parser {
326     let trdr = lexer::new_tt_reader(
327         copy sess.span_diagnostic,
328         sess.interner,
329         None,
330         tts
331     );
332     Parser(sess, cfg, trdr as @reader)
333 }
334
335 // abort if necessary
336 pub fn maybe_aborted<T>(result : T, p: Parser) -> T {
337     p.abort_if_errors();
338     result
339 }
340
341
342
343 #[cfg(test)]
344 mod test {
345     use super::*;
346     use std::serialize::Encodable;
347     use std;
348     use core::io;
349     use core::option::Option;
350     use core::option::Some;
351     use core::option::None;
352     use core::int;
353     use core::num::NumCast;
354     use core::path::Path;
355     use codemap::{dummy_sp, CodeMap, span, BytePos, spanned};
356     use opt_vec;
357     use ast;
358     use abi;
359     use ast_util::mk_ident;
360     use parse::parser::Parser;
361     use parse::token::{ident_interner, mk_ident_interner, mk_fresh_ident_interner};
362     use diagnostic::{span_handler, mk_span_handler, mk_handler, Emitter};
363
364     // add known names to interner for testing
365     fn mk_testing_interner() -> @ident_interner {
366         let i = mk_fresh_ident_interner();
367         // baby hack; in order to put the identifiers
368         // 'a' and 'b' at known locations, we're going
369         // to fill up the interner to length 100. If
370         // the # of preloaded items on the interner
371         // ever gets larger than 100, we'll have to
372         // adjust this number (say, to 200) and
373         // change the numbers in the identifier
374         // test cases below.
375
376         assert!(i.len() < 100);
377         for int::range(0,100-((i.len()).to_int())) |_dc| {
378             i.gensym(@~"dontcare");
379         }
380         i.intern(@~"a");
381         i.intern(@~"b");
382         i.intern(@~"c");
383         i.intern(@~"d");
384         i.intern(@~"return");
385         assert!(i.get(ast::ident{repr:101,ctxt:0}) == @~"b");
386         i
387     }
388
389     // make a parse_sess that's closed over a
390     // testing interner (where a -> 100, b -> 101)
391     fn mk_testing_parse_sess() -> @mut ParseSess {
392         let interner = mk_testing_interner();
393         let cm = @CodeMap::new();
394         @mut ParseSess {
395             cm: cm,
396             next_id: 1,
397             span_diagnostic: mk_span_handler(mk_handler(None), cm),
398             interner: interner,
399         }
400     }
401
402     // map a string to tts, using a made-up filename: return both the token_trees
403     // and the ParseSess
404     fn string_to_tts_t (source_str : @~str) -> (~[ast::token_tree],@mut ParseSess) {
405         let ps = mk_testing_parse_sess();
406         (filemap_to_tts(ps,string_to_filemap(ps,source_str,~"bogofile")),ps)
407     }
408
409     // map a string to tts, return the tt without its parsesess
410     fn string_to_tts_only(source_str : @~str) -> ~[ast::token_tree] {
411         let (tts,ps) = string_to_tts_t(source_str);
412         tts
413     }
414
415     // map string to parser (via tts)
416     fn string_to_parser(source_str: @~str) -> Parser {
417         let ps = mk_testing_parse_sess();
418         new_parser_from_source_str(ps,~[],~"bogofile",source_str)
419     }
420
421     #[cfg(test)] fn to_json_str<E : Encodable<std::json::Encoder>>(val: @E) -> ~str {
422         do io::with_str_writer |writer| {
423             val.encode(~std::json::Encoder(writer));
424         }
425     }
426
427     fn string_to_crate (source_str : @~str) -> @ast::crate {
428         string_to_parser(source_str).parse_crate_mod()
429     }
430
431     fn string_to_expr (source_str : @~str) -> @ast::expr {
432         string_to_parser(source_str).parse_expr()
433     }
434
435     fn string_to_item (source_str : @~str) -> Option<@ast::item> {
436         string_to_parser(source_str).parse_item(~[])
437     }
438
439     fn string_to_stmt (source_str : @~str) -> @ast::stmt {
440         string_to_parser(source_str).parse_stmt(~[])
441     }
442
443     // produce a codemap::span
444     fn sp (a: uint, b: uint) -> span {
445         span{lo:BytePos(a),hi:BytePos(b),expn_info:None}
446     }
447
448     // convert a vector of uints to a vector of ast::idents
449     fn ints_to_idents(ids: ~[uint]) -> ~[ast::ident] {
450         ids.map(|u| mk_ident(*u))
451     }
452
453     #[test] fn path_exprs_1 () {
454         assert_eq!(string_to_expr(@~"a"),
455                    @ast::expr{id:1,
456                               callee_id:2,
457                               node:ast::expr_path(@ast::Path {span:sp(0,1),
458                                                               global:false,
459                                                               idents:~[mk_ident(100)],
460                                                               rp:None,
461                                                               types:~[]}),
462                               span:sp(0,1)})
463     }
464
465     #[test] fn path_exprs_2 () {
466         assert_eq!(string_to_expr(@~"::a::b"),
467                    @ast::expr{id:1,
468                                callee_id:2,
469                                node:ast::expr_path(@ast::Path {span:sp(0,6),
470                                                                global:true,
471                                                                idents:ints_to_idents(~[100,101]),
472                                                                rp:None,
473                                                                types:~[]}),
474                               span:sp(0,6)})
475     }
476
477     #[should_fail]
478     #[test] fn bad_path_expr_1() {
479         string_to_expr(@~"::abc::def::return");
480     }
481
482     #[test] fn string_to_tts_1 () {
483         let (tts,ps) = string_to_tts_t(@~"fn a (b : int) { b; }");
484         assert_eq!(to_json_str(@tts),
485                    ~"[\
486                 [\"tt_tok\",null,[\"IDENT\",\"fn\",false]],\
487                 [\"tt_tok\",null,[\"IDENT\",\"a\",false]],\
488                 [\
489                     \"tt_delim\",\
490                     [\
491                         [\"tt_tok\",null,\"LPAREN\"],\
492                         [\"tt_tok\",null,[\"IDENT\",\"b\",false]],\
493                         [\"tt_tok\",null,\"COLON\"],\
494                         [\"tt_tok\",null,[\"IDENT\",\"int\",false]],\
495                         [\"tt_tok\",null,\"RPAREN\"]\
496                     ]\
497                 ],\
498                 [\
499                     \"tt_delim\",\
500                     [\
501                         [\"tt_tok\",null,\"LBRACE\"],\
502                         [\"tt_tok\",null,[\"IDENT\",\"b\",false]],\
503                         [\"tt_tok\",null,\"SEMI\"],\
504                         [\"tt_tok\",null,\"RBRACE\"]\
505                     ]\
506                 ]\
507             ]"
508                   );
509     }
510
511     #[test] fn ret_expr() {
512         assert_eq!(string_to_expr(@~"return d"),
513                    @ast::expr{id:3,
514                               callee_id:4,
515                               node:ast::expr_ret(
516                                   Some(@ast::expr{id:1,callee_id:2,
517                                                   node:ast::expr_path(
518                                                       @ast::Path{span:sp(7,8),
519                                                                  global:false,
520                                                                  idents:~[mk_ident(103)],
521                                                                  rp:None,
522                                                                  types:~[]
523                                                                 }),
524                                                   span:sp(7,8)})),
525                               span:sp(0,8)})
526     }
527
528     #[test] fn parse_stmt_1 () {
529         assert_eq!(string_to_stmt(@~"b;"),
530                    @spanned{
531                        node: ast::stmt_expr(@ast::expr{
532                            id: 1,
533                            callee_id: 2,
534                            node: ast::expr_path(
535                                @ast::Path{
536                                    span:sp(0,1),
537                                    global:false,
538                                    idents:~[mk_ident(101)],
539                                    rp:None,
540                                    types: ~[]}),
541                            span: sp(0,1)},
542                                             3), // fixme
543                        span: sp(0,1)})
544
545     }
546
547     fn parser_done(p: Parser){
548         assert_eq!(*p.token,token::EOF);
549     }
550
551     #[test] fn parse_ident_pat () {
552         let parser = string_to_parser(@~"b");
553         assert_eq!(parser.parse_pat(false),
554                    @ast::pat{id:1, // fixme
555                              node: ast::pat_ident(ast::bind_by_copy,
556                                                   @ast::Path{
557                                                       span:sp(0,1),
558                                                       global:false,
559                                                       idents:~[mk_ident(101)],
560                                                       rp: None,
561                                                       types: ~[]},
562                                                   None // no idea
563                                                  ),
564                              span: sp(0,1)});
565         parser_done(parser);
566     }
567
568     #[test] fn parse_arg () {
569         let parser = string_to_parser(@~"b : int");
570         assert_eq!(parser.parse_arg_general(true),
571                    ast::arg{
572                        is_mutbl: false,
573                        ty: @ast::Ty{id:3, // fixme
574                                     node: ast::ty_path(@ast::Path{
575                                         span:sp(4,4), // this is bizarre...
576                                         // check this in the original parser?
577                                         global:false,
578                                         idents:~[mk_ident(105)],
579                                         rp: None,
580                                         types: ~[]},
581                                                        2),
582                                     span:sp(4,7)},
583                        pat: @ast::pat{id:1,
584                                       node: ast::pat_ident(ast::bind_by_copy,
585                                                            @ast::Path{
586                                                                span:sp(0,1),
587                                                                global:false,
588                                                                idents:~[mk_ident(101)],
589                                                                rp: None,
590                                                                types: ~[]},
591                                                            None // no idea
592                                                           ),
593                                       span: sp(0,3)}, // really?
594                        id: 4 // fixme
595                    })
596     }
597
598     // check the contents of the tt manually:
599     #[test] fn parse_fundecl () {
600         // this test depends on the intern order of "fn" and "int", and on the
601         // assignment order of the node_ids.
602         assert_eq!(string_to_item(@~"fn a (b : int) { b; }"),
603                   Some(
604                       @ast::item{ident:mk_ident(100),
605                             attrs:~[],
606                             id: 10, // fixme
607                             node: ast::item_fn(ast::fn_decl{
608                                 inputs: ~[ast::arg{
609                                     is_mutbl: false,
610                                     ty: @ast::Ty{id:3, // fixme
611                                                 node: ast::ty_path(@ast::Path{
612                                         span:sp(10,13),
613                                         global:false,
614                                         idents:~[mk_ident(106)],
615                                         rp: None,
616                                         types: ~[]},
617                                                        2),
618                                                 span:sp(10,13)},
619                                     pat: @ast::pat{id:1, // fixme
620                                                    node: ast::pat_ident(
621                                                        ast::bind_by_copy,
622                                                        @ast::Path{
623                                                            span:sp(6,7),
624                                                            global:false,
625                                                            idents:~[mk_ident(101)],
626                                                            rp: None,
627                                                            types: ~[]},
628                                                        None // no idea
629                                                    ),
630                                                   span: sp(6,9)}, // bleah.
631                                     id: 4 // fixme
632                                 }],
633                                 output: @ast::Ty{id:5, // fixme
634                                                  node: ast::ty_nil,
635                                                  span:sp(15,15)}, // not sure
636                                 cf: ast::return_val
637                             },
638                                     ast::impure_fn,
639                                     abi::AbiSet::Rust(),
640                                     ast::Generics{ // no idea on either of these:
641                                         lifetimes: opt_vec::Empty,
642                                         ty_params: opt_vec::Empty,
643                                     },
644                                     spanned{
645                                         span: sp(15,21),
646                                         node: ast::blk_{
647                                             view_items: ~[],
648                                             stmts: ~[@spanned{
649                                                 node: ast::stmt_semi(@ast::expr{
650                                                     id: 6,
651                                                     callee_id: 7,
652                                                     node: ast::expr_path(
653                                                         @ast::Path{
654                                                             span:sp(17,18),
655                                                             global:false,
656                                                             idents:~[mk_ident(101)],
657                                                             rp:None,
658                                                             types: ~[]}),
659                                                     span: sp(17,18)},
660                                                                      8), // fixme
661                                                 span: sp(17,18)}],
662                                             expr: None,
663                                             id: 9, // fixme
664                                             rules: ast::default_blk // no idea
665                                         }}),
666                             vis: ast::inherited,
667                             span: sp(0,21)}));
668     }
669
670
671     #[test] fn parse_exprs () {
672         // just make sure that they parse....
673         string_to_expr(@~"3 + 4");
674         string_to_expr(@~"a::z.froob(b,@(987+3))");
675     }
676 }