]> git.lizzy.rs Git - rust.git/blob - src/etc/extract_grammar.py
Begin shift over to using pandoc, markdown and llnextgen for reference manual. Fix...
[rust.git] / src / etc / extract_grammar.py
1 #!/usr/bin/env python
2
3 # This script is for extracting the grammar from the rust docs.
4
5 import fileinput
6
7 collections = { "gram": [],
8                 "keyword": [],
9                 "reserved": [],
10                 "binop": [],
11                 "unop": [] }
12
13
14 in_coll = False
15 coll = ""
16
17 for line in fileinput.input(openhook=fileinput.hook_encoded("utf-8")):
18     if in_coll:
19         if line.startswith("~~~~"):
20             in_coll = False
21         else:
22             if coll in ["keyword", "reserved", "binop", "unop"]:
23                 for word in line.split():
24                     if word not in collections[coll]:
25                         collections[coll].append(word)
26             else:
27                 collections[coll].append(line)
28
29     else:
30         if line.startswith("~~~~"):
31             for cname in collections:
32                 if ("." + cname) in line:
33                     coll = cname
34                     in_coll = True
35                     break
36
37 # Define operator symbol-names here
38
39 tokens = ["non_star", "non_slash", "non_eol",
40           "non_single_quote", "non_double_quote", "ident" ]
41
42 symnames = {
43 ".": "dot",
44 "+": "plus",
45 "-": "minus",
46 "/": "slash",
47 "*": "star",
48 "%": "percent",
49
50 "~": "tilde",
51 "@": "at",
52
53 "!": "not",
54 "&": "and",
55 "|": "or",
56 "^": "xor",
57
58 "<<": "lsl",
59 ">>": "lsr",
60 ">>>": "asr",
61
62 "&&": "andand",
63 "||": "oror",
64
65 "<" : "lt",
66 "<=" : "le",
67 "==" : "eqeq",
68 ">=" : "ge",
69 ">" : "gt",
70
71 "=": "eq",
72
73 "+=": "plusequal",
74 "-=": "minusequal",
75 "/=": "divequal",
76 "*=": "starequal",
77 "%=": "percentequal",
78
79 "&=": "andequal",
80 "|=": "orequal",
81 "^=": "xorequal",
82
83 ">>=": "lsrequal",
84 ">>>=": "asrequal",
85 "<<=": "lslequal",
86
87 "::": "coloncolon",
88
89 "//": "linecomment",
90 "/*": "openblockcomment",
91 "*/": "closeblockcomment"
92 }
93
94 lines = []
95
96 for line in collections["gram"]:
97     line2 = ""
98     for word in line.split():
99         # replace strings with keyword-names or symbol-names from table
100         if word.startswith("\""):
101             word = word[1:-1]
102             if word in symnames:
103                 word = symnames[word]
104             else:
105                 for ch in word:
106                     if not ch.isalpha():
107                         raise Exception("non-alpha apparent keyword: "
108                                         + word)
109                 if word not in tokens:
110                     if (word in collections["keyword"] or
111                         word in collections["reserved"]):
112                        tokens.append(word)
113                     else:
114                         raise Exception("unknown keyword/reserved word: "
115                                         + word)
116
117         line2 += " " + word
118     lines.append(line2)
119
120
121 for word in collections["keyword"] + collections["reserved"]:
122     if word not in tokens:
123         tokens.append(word)
124
125 for sym in collections["unop"] + collections["binop"] + symnames.keys():
126     word = symnames[sym]
127     if word not in tokens:
128         tokens.append(word)
129
130
131 print("%start parser, token;")
132 print("%%token %s ;" % ("\n\t, ".join(tokens)))
133 for coll in ["keyword", "reserved"]:
134     print("%s: %s ; " % (coll, "\n\t| ".join(collections[coll])));
135 for coll in ["binop", "unop"]:
136     print("%s: %s ; " % (coll, "\n\t| ".join([symnames[x]
137                                               for x in collections[coll]])));
138 print("\n".join(lines));