]> git.lizzy.rs Git - rust.git/blob - src/etc/extract_grammar.py
cleanup: s/impl Copy/#[derive(Copy)]/g
[rust.git] / src / etc / extract_grammar.py
1 #!/usr/bin/env python
2 #
3 # Copyright 2012-2013 The Rust Project Developers. See the COPYRIGHT
4 # file at the top-level directory of this distribution and at
5 # http://rust-lang.org/COPYRIGHT.
6 #
7 # Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
8 # http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
9 # <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
10 # option. This file may not be copied, modified, or distributed
11 # except according to those terms.
12
13 # This script is for extracting the grammar from the rust docs.
14
15 import fileinput
16
17 collections = { "gram": [],
18                 "keyword": [],
19                 "reserved": [],
20                 "binop": [],
21                 "unop": [] }
22
23
24 in_coll = False
25 coll = ""
26
27 for line in fileinput.input(openhook=fileinput.hook_encoded("utf-8")):
28     if in_coll:
29         if line.startswith("~~~~"):
30             in_coll = False
31         else:
32             if coll in ["keyword", "reserved", "binop", "unop"]:
33                 for word in line.split():
34                     if word not in collections[coll]:
35                         collections[coll].append(word)
36             else:
37                 collections[coll].append(line)
38
39     else:
40         if line.startswith("~~~~"):
41             for cname in collections:
42                 if ("." + cname) in line:
43                     coll = cname
44                     in_coll = True
45                     break
46
47 # Define operator symbol-names here
48
49 tokens = ["non_star", "non_slash", "non_eol",
50           "non_single_quote", "non_double_quote", "ident" ]
51
52 symnames = {
53 ".": "dot",
54 "+": "plus",
55 "-": "minus",
56 "/": "slash",
57 "*": "star",
58 "%": "percent",
59
60 "~": "tilde",
61 "@": "at",
62
63 "!": "not",
64 "&": "and",
65 "|": "or",
66 "^": "xor",
67
68 "<<": "lsl",
69 ">>": "lsr",
70 ">>>": "asr",
71
72 "&&": "andand",
73 "||": "oror",
74
75 "<" : "lt",
76 "<=" : "le",
77 "==" : "eqeq",
78 ">=" : "ge",
79 ">" : "gt",
80
81 "=": "eq",
82
83 "+=": "plusequal",
84 "-=": "minusequal",
85 "/=": "divequal",
86 "*=": "starequal",
87 "%=": "percentequal",
88
89 "&=": "andequal",
90 "|=": "orequal",
91 "^=": "xorequal",
92
93 ">>=": "lsrequal",
94 ">>>=": "asrequal",
95 "<<=": "lslequal",
96
97 "::": "coloncolon",
98
99 "->": "rightarrow",
100 "<-": "leftarrow",
101 "<->": "swaparrow",
102
103 "//": "linecomment",
104 "/*": "openblockcomment",
105 "*/": "closeblockcomment",
106 "macro_rules": "macro_rules",
107 "=>" : "eg",
108 ".." : "dotdot",
109 ","  : "comma"
110 }
111
112 lines = []
113
114 for line in collections["gram"]:
115     line2 = ""
116     for word in line.split():
117         # replace strings with keyword-names or symbol-names from table
118         if word.startswith("\""):
119             word = word[1:-1]
120             if word in symnames:
121                 word = symnames[word]
122             else:
123                 for ch in word:
124                     if not ch.isalpha():
125                         raise Exception("non-alpha apparent keyword: "
126                                         + word)
127                 if word not in tokens:
128                     if (word in collections["keyword"] or
129                         word in collections["reserved"]):
130                        tokens.append(word)
131                     else:
132                         raise Exception("unknown keyword/reserved word: "
133                                         + word)
134
135         line2 += " " + word
136     lines.append(line2)
137
138
139 for word in collections["keyword"] + collections["reserved"]:
140     if word not in tokens:
141         tokens.append(word)
142
143 for sym in collections["unop"] + collections["binop"] + symnames.keys():
144     word = symnames[sym]
145     if word not in tokens:
146         tokens.append(word)
147
148
149 print("%start parser, token;")
150 print("%%token %s ;" % ("\n\t, ".join(tokens)))
151 for coll in ["keyword", "reserved"]:
152     print("%s: %s ; " % (coll, "\n\t| ".join(collections[coll])));
153 for coll in ["binop", "unop"]:
154     print("%s: %s ; " % (coll, "\n\t| ".join([symnames[x]
155                                               for x in collections[coll]])));
156 print("\n".join(lines));