]> git.lizzy.rs Git - rust.git/blob - src/etc/extract_grammar.py
extract_grammar symnames
[rust.git] / src / etc / extract_grammar.py
1 #!/usr/bin/env python
2 # xfail-license
3
4 # This script is for extracting the grammar from the rust docs.
5
6 import fileinput
7
8 collections = { "gram": [],
9                 "keyword": [],
10                 "reserved": [],
11                 "binop": [],
12                 "unop": [] }
13
14
15 in_coll = False
16 coll = ""
17
18 for line in fileinput.input(openhook=fileinput.hook_encoded("utf-8")):
19     if in_coll:
20         if line.startswith("~~~~"):
21             in_coll = False
22         else:
23             if coll in ["keyword", "reserved", "binop", "unop"]:
24                 for word in line.split():
25                     if word not in collections[coll]:
26                         collections[coll].append(word)
27             else:
28                 collections[coll].append(line)
29
30     else:
31         if line.startswith("~~~~"):
32             for cname in collections:
33                 if ("." + cname) in line:
34                     coll = cname
35                     in_coll = True
36                     break
37
38 # Define operator symbol-names here
39
40 tokens = ["non_star", "non_slash", "non_eol",
41           "non_single_quote", "non_double_quote", "ident" ]
42
43 symnames = {
44 ".": "dot",
45 "+": "plus",
46 "-": "minus",
47 "/": "slash",
48 "*": "star",
49 "%": "percent",
50
51 "~": "tilde",
52 "@": "at",
53
54 "!": "not",
55 "&": "and",
56 "|": "or",
57 "^": "xor",
58
59 "<<": "lsl",
60 ">>": "lsr",
61 ">>>": "asr",
62
63 "&&": "andand",
64 "||": "oror",
65
66 "<" : "lt",
67 "<=" : "le",
68 "==" : "eqeq",
69 ">=" : "ge",
70 ">" : "gt",
71
72 "=": "eq",
73
74 "+=": "plusequal",
75 "-=": "minusequal",
76 "/=": "divequal",
77 "*=": "starequal",
78 "%=": "percentequal",
79
80 "&=": "andequal",
81 "|=": "orequal",
82 "^=": "xorequal",
83
84 ">>=": "lsrequal",
85 ">>>=": "asrequal",
86 "<<=": "lslequal",
87
88 "::": "coloncolon",
89
90 "->": "rightarrow",
91 "<-": "leftarrow",
92 "<->": "swaparrow",
93
94 "//": "linecomment",
95 "/*": "openblockcomment",
96 "*/": "closeblockcomment",
97 "macro_rules": "macro_rules",
98 "=>" : "eg",
99 ".." : "dotdot",
100 ","  : "comma"
101 }
102
103 lines = []
104
105 for line in collections["gram"]:
106     line2 = ""
107     for word in line.split():
108         # replace strings with keyword-names or symbol-names from table
109         if word.startswith("\""):
110             word = word[1:-1]
111             if word in symnames:
112                 word = symnames[word]
113             else:
114                 for ch in word:
115                     if not ch.isalpha():
116                         raise Exception("non-alpha apparent keyword: "
117                                         + word)
118                 if word not in tokens:
119                     if (word in collections["keyword"] or
120                         word in collections["reserved"]):
121                        tokens.append(word)
122                     else:
123                         raise Exception("unknown keyword/reserved word: "
124                                         + word)
125
126         line2 += " " + word
127     lines.append(line2)
128
129
130 for word in collections["keyword"] + collections["reserved"]:
131     if word not in tokens:
132         tokens.append(word)
133
134 for sym in collections["unop"] + collections["binop"] + symnames.keys():
135     word = symnames[sym]
136     if word not in tokens:
137         tokens.append(word)
138
139
140 print("%start parser, token;")
141 print("%%token %s ;" % ("\n\t, ".join(tokens)))
142 for coll in ["keyword", "reserved"]:
143     print("%s: %s ; " % (coll, "\n\t| ".join(collections[coll])));
144 for coll in ["binop", "unop"]:
145     print("%s: %s ; " % (coll, "\n\t| ".join([symnames[x]
146                                               for x in collections[coll]])));
147 print("\n".join(lines));