]> git.lizzy.rs Git - rust.git/blob - src/etc/extract_grammar.py
More doc porting.
[rust.git] / src / etc / extract_grammar.py
1 #!/usr/bin/env python
2
3 # This script is for extracting the grammar from the rust docs.
4
5 import fileinput
6
7 collections = { "gram": [],
8                 "keyword": [],
9                 "reserved": [],
10                 "binop": [],
11                 "unop": [] }
12
13
14 in_coll = False
15 coll = ""
16
17 for line in fileinput.input(openhook=fileinput.hook_encoded("utf-8")):
18     if in_coll:
19         if line.startswith("~~~~"):
20             in_coll = False
21         else:
22             if coll in ["keyword", "reserved", "binop", "unop"]:
23                 for word in line.split():
24                     if word not in collections[coll]:
25                         collections[coll].append(word)
26             else:
27                 collections[coll].append(line)
28
29     else:
30         if line.startswith("~~~~"):
31             for cname in collections:
32                 if ("." + cname) in line:
33                     coll = cname
34                     in_coll = True
35                     break
36
37 # Define operator symbol-names here
38
39 tokens = ["non_star", "non_slash", "non_eol",
40           "non_single_quote", "non_double_quote", "ident" ]
41
42 symnames = {
43 ".": "dot",
44 "+": "plus",
45 "-": "minus",
46 "/": "slash",
47 "*": "star",
48 "%": "percent",
49
50 "~": "tilde",
51 "@": "at",
52
53 "!": "not",
54 "&": "and",
55 "|": "or",
56 "^": "xor",
57
58 "<<": "lsl",
59 ">>": "lsr",
60 ">>>": "asr",
61
62 "&&": "andand",
63 "||": "oror",
64
65 "<" : "lt",
66 "<=" : "le",
67 "==" : "eqeq",
68 ">=" : "ge",
69 ">" : "gt",
70
71 "=": "eq",
72
73 "+=": "plusequal",
74 "-=": "minusequal",
75 "/=": "divequal",
76 "*=": "starequal",
77 "%=": "percentequal",
78
79 "&=": "andequal",
80 "|=": "orequal",
81 "^=": "xorequal",
82
83 ">>=": "lsrequal",
84 ">>>=": "asrequal",
85 "<<=": "lslequal",
86
87 "::": "coloncolon",
88
89 "->": "rightarrow",
90 "<-": "leftarrow",
91 "<->": "swaparrow",
92
93 "//": "linecomment",
94 "/*": "openblockcomment",
95 "*/": "closeblockcomment"
96 }
97
98 lines = []
99
100 for line in collections["gram"]:
101     line2 = ""
102     for word in line.split():
103         # replace strings with keyword-names or symbol-names from table
104         if word.startswith("\""):
105             word = word[1:-1]
106             if word in symnames:
107                 word = symnames[word]
108             else:
109                 for ch in word:
110                     if not ch.isalpha():
111                         raise Exception("non-alpha apparent keyword: "
112                                         + word)
113                 if word not in tokens:
114                     if (word in collections["keyword"] or
115                         word in collections["reserved"]):
116                        tokens.append(word)
117                     else:
118                         raise Exception("unknown keyword/reserved word: "
119                                         + word)
120
121         line2 += " " + word
122     lines.append(line2)
123
124
125 for word in collections["keyword"] + collections["reserved"]:
126     if word not in tokens:
127         tokens.append(word)
128
129 for sym in collections["unop"] + collections["binop"] + symnames.keys():
130     word = symnames[sym]
131     if word not in tokens:
132         tokens.append(word)
133
134
135 print("%start parser, token;")
136 print("%%token %s ;" % ("\n\t, ".join(tokens)))
137 for coll in ["keyword", "reserved"]:
138     print("%s: %s ; " % (coll, "\n\t| ".join(collections[coll])));
139 for coll in ["binop", "unop"]:
140     print("%s: %s ; " % (coll, "\n\t| ".join([symnames[x]
141                                               for x in collections[coll]])));
142 print("\n".join(lines));