]> git.lizzy.rs Git - rust.git/blob - src/etc/extract_grammar.py
auto merge of #12029 : zkamsler/rust/merge-sort-allocations, r=huonw
[rust.git] / src / etc / extract_grammar.py
1 # Copyright 2012-2013 The Rust Project Developers. See the COPYRIGHT
2 # file at the top-level directory of this distribution and at
3 # http://rust-lang.org/COPYRIGHT.
4 #
5 # Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
6 # http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
7 # <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
8 # option. This file may not be copied, modified, or distributed
9 # except according to those terms.
10
11 # This script is for extracting the grammar from the rust docs.
12
13 import fileinput
14
15 collections = { "gram": [],
16                 "keyword": [],
17                 "reserved": [],
18                 "binop": [],
19                 "unop": [] }
20
21
22 in_coll = False
23 coll = ""
24
25 for line in fileinput.input(openhook=fileinput.hook_encoded("utf-8")):
26     if in_coll:
27         if line.startswith("~~~~"):
28             in_coll = False
29         else:
30             if coll in ["keyword", "reserved", "binop", "unop"]:
31                 for word in line.split():
32                     if word not in collections[coll]:
33                         collections[coll].append(word)
34             else:
35                 collections[coll].append(line)
36
37     else:
38         if line.startswith("~~~~"):
39             for cname in collections:
40                 if ("." + cname) in line:
41                     coll = cname
42                     in_coll = True
43                     break
44
45 # Define operator symbol-names here
46
47 tokens = ["non_star", "non_slash", "non_eol",
48           "non_single_quote", "non_double_quote", "ident" ]
49
50 symnames = {
51 ".": "dot",
52 "+": "plus",
53 "-": "minus",
54 "/": "slash",
55 "*": "star",
56 "%": "percent",
57
58 "~": "tilde",
59 "@": "at",
60
61 "!": "not",
62 "&": "and",
63 "|": "or",
64 "^": "xor",
65
66 "<<": "lsl",
67 ">>": "lsr",
68 ">>>": "asr",
69
70 "&&": "andand",
71 "||": "oror",
72
73 "<" : "lt",
74 "<=" : "le",
75 "==" : "eqeq",
76 ">=" : "ge",
77 ">" : "gt",
78
79 "=": "eq",
80
81 "+=": "plusequal",
82 "-=": "minusequal",
83 "/=": "divequal",
84 "*=": "starequal",
85 "%=": "percentequal",
86
87 "&=": "andequal",
88 "|=": "orequal",
89 "^=": "xorequal",
90
91 ">>=": "lsrequal",
92 ">>>=": "asrequal",
93 "<<=": "lslequal",
94
95 "::": "coloncolon",
96
97 "->": "rightarrow",
98 "<-": "leftarrow",
99 "<->": "swaparrow",
100
101 "//": "linecomment",
102 "/*": "openblockcomment",
103 "*/": "closeblockcomment",
104 "macro_rules": "macro_rules",
105 "=>" : "eg",
106 ".." : "dotdot",
107 ","  : "comma"
108 }
109
110 lines = []
111
112 for line in collections["gram"]:
113     line2 = ""
114     for word in line.split():
115         # replace strings with keyword-names or symbol-names from table
116         if word.startswith("\""):
117             word = word[1:-1]
118             if word in symnames:
119                 word = symnames[word]
120             else:
121                 for ch in word:
122                     if not ch.isalpha():
123                         raise Exception("non-alpha apparent keyword: "
124                                         + word)
125                 if word not in tokens:
126                     if (word in collections["keyword"] or
127                         word in collections["reserved"]):
128                        tokens.append(word)
129                     else:
130                         raise Exception("unknown keyword/reserved word: "
131                                         + word)
132
133         line2 += " " + word
134     lines.append(line2)
135
136
137 for word in collections["keyword"] + collections["reserved"]:
138     if word not in tokens:
139         tokens.append(word)
140
141 for sym in collections["unop"] + collections["binop"] + symnames.keys():
142     word = symnames[sym]
143     if word not in tokens:
144         tokens.append(word)
145
146
147 print("%start parser, token;")
148 print("%%token %s ;" % ("\n\t, ".join(tokens)))
149 for coll in ["keyword", "reserved"]:
150     print("%s: %s ; " % (coll, "\n\t| ".join(collections[coll])));
151 for coll in ["binop", "unop"]:
152     print("%s: %s ; " % (coll, "\n\t| ".join([symnames[x]
153                                               for x in collections[coll]])));
154 print("\n".join(lines));