]> git.lizzy.rs Git - rust.git/blob - src/etc/extract_grammar.py
Merge pull request #4506 from thestinger/mkdtemp
[rust.git] / src / etc / extract_grammar.py
1 #!/usr/bin/env python
2 # xfail-license
3
4 # This script is for extracting the grammar from the rust docs.
5
6 import fileinput
7
8 collections = { "gram": [],
9                 "keyword": [],
10                 "reserved": [],
11                 "binop": [],
12                 "unop": [] }
13
14
15 in_coll = False
16 coll = ""
17
18 for line in fileinput.input(openhook=fileinput.hook_encoded("utf-8")):
19     if in_coll:
20         if line.startswith("~~~~"):
21             in_coll = False
22         else:
23             if coll in ["keyword", "reserved", "binop", "unop"]:
24                 for word in line.split():
25                     if word not in collections[coll]:
26                         collections[coll].append(word)
27             else:
28                 collections[coll].append(line)
29
30     else:
31         if line.startswith("~~~~"):
32             for cname in collections:
33                 if ("." + cname) in line:
34                     coll = cname
35                     in_coll = True
36                     break
37
38 # Define operator symbol-names here
39
40 tokens = ["non_star", "non_slash", "non_eol",
41           "non_single_quote", "non_double_quote", "ident" ]
42
43 symnames = {
44 ".": "dot",
45 "+": "plus",
46 "-": "minus",
47 "/": "slash",
48 "*": "star",
49 "%": "percent",
50
51 "~": "tilde",
52 "@": "at",
53
54 "!": "not",
55 "&": "and",
56 "|": "or",
57 "^": "xor",
58
59 "<<": "lsl",
60 ">>": "lsr",
61 ">>>": "asr",
62
63 "&&": "andand",
64 "||": "oror",
65
66 "<" : "lt",
67 "<=" : "le",
68 "==" : "eqeq",
69 ">=" : "ge",
70 ">" : "gt",
71
72 "=": "eq",
73
74 "+=": "plusequal",
75 "-=": "minusequal",
76 "/=": "divequal",
77 "*=": "starequal",
78 "%=": "percentequal",
79
80 "&=": "andequal",
81 "|=": "orequal",
82 "^=": "xorequal",
83
84 ">>=": "lsrequal",
85 ">>>=": "asrequal",
86 "<<=": "lslequal",
87
88 "::": "coloncolon",
89
90 "->": "rightarrow",
91 "<-": "leftarrow",
92 "<->": "swaparrow",
93
94 "//": "linecomment",
95 "/*": "openblockcomment",
96 "*/": "closeblockcomment"
97 }
98
99 lines = []
100
101 for line in collections["gram"]:
102     line2 = ""
103     for word in line.split():
104         # replace strings with keyword-names or symbol-names from table
105         if word.startswith("\""):
106             word = word[1:-1]
107             if word in symnames:
108                 word = symnames[word]
109             else:
110                 for ch in word:
111                     if not ch.isalpha():
112                         raise Exception("non-alpha apparent keyword: "
113                                         + word)
114                 if word not in tokens:
115                     if (word in collections["keyword"] or
116                         word in collections["reserved"]):
117                        tokens.append(word)
118                     else:
119                         raise Exception("unknown keyword/reserved word: "
120                                         + word)
121
122         line2 += " " + word
123     lines.append(line2)
124
125
126 for word in collections["keyword"] + collections["reserved"]:
127     if word not in tokens:
128         tokens.append(word)
129
130 for sym in collections["unop"] + collections["binop"] + symnames.keys():
131     word = symnames[sym]
132     if word not in tokens:
133         tokens.append(word)
134
135
136 print("%start parser, token;")
137 print("%%token %s ;" % ("\n\t, ".join(tokens)))
138 for coll in ["keyword", "reserved"]:
139     print("%s: %s ; " % (coll, "\n\t| ".join(collections[coll])));
140 for coll in ["binop", "unop"]:
141     print("%s: %s ; " % (coll, "\n\t| ".join([symnames[x]
142                                               for x in collections[coll]])));
143 print("\n".join(lines));