src/etc/extract_grammar.py

   1 #!/usr/bin/env python
   2 # xfail-license
   3
   4 # This script is for extracting the grammar from the rust docs.
   5
   6 import fileinput
   7
   8 collections = { "gram": [],
   9                 "keyword": [],
  10                 "reserved": [],
  11                 "binop": [],
  12                 "unop": [] }
  13
  14
  15 in_coll = False
  16 coll = ""
  17
  18 for line in fileinput.input(openhook=fileinput.hook_encoded("utf-8")):
  19     if in_coll:
  20         if line.startswith("~~~~"):
  21             in_coll = False
  22         else:
  23             if coll in ["keyword", "reserved", "binop", "unop"]:
  24                 for word in line.split():
  25                     if word not in collections[coll]:
  26                         collections[coll].append(word)
  27             else:
  28                 collections[coll].append(line)
  29
  30     else:
  31         if line.startswith("~~~~"):
  32             for cname in collections:
  33                 if ("." + cname) in line:
  34                     coll = cname
  35                     in_coll = True
  36                     break
  37
  38 # Define operator symbol-names here
  39
  40 tokens = ["non_star", "non_slash", "non_eol",
  41           "non_single_quote", "non_double_quote", "ident" ]
  42
  43 symnames = {
  44 ".": "dot",
  45 "+": "plus",
  46 "-": "minus",
  47 "/": "slash",
  48 "*": "star",
  49 "%": "percent",
  50
  51 "~": "tilde",
  52 "@": "at",
  53
  54 "!": "not",
  55 "&": "and",
  56 "|": "or",
  57 "^": "xor",
  58
  59 "<<": "lsl",
  60 ">>": "lsr",
  61 ">>>": "asr",
  62
  63 "&&": "andand",
  64 "||": "oror",
  65
  66 "<" : "lt",
  67 "<=" : "le",
  68 "==" : "eqeq",
  69 ">=" : "ge",
  70 ">" : "gt",
  71
  72 "=": "eq",
  73
  74 "+=": "plusequal",
  75 "-=": "minusequal",
  76 "/=": "divequal",
  77 "*=": "starequal",
  78 "%=": "percentequal",
  79
  80 "&=": "andequal",
  81 "|=": "orequal",
  82 "^=": "xorequal",
  83
  84 ">>=": "lsrequal",
  85 ">>>=": "asrequal",
  86 "<<=": "lslequal",
  87
  88 "::": "coloncolon",
  89
  90 "->": "rightarrow",
  91 "<-": "leftarrow",
  92 "<->": "swaparrow",
  93
  94 "//": "linecomment",
  95 "/*": "openblockcomment",
  96 "*/": "closeblockcomment",
  97 "macro_rules": "macro_rules",
  98 "=>" : "eg",
  99 ".." : "dotdot",
 100 ","  : "comma"
 101 }
 102
 103 lines = []
 104
 105 for line in collections["gram"]:
 106     line2 = ""
 107     for word in line.split():
 108         # replace strings with keyword-names or symbol-names from table
 109         if word.startswith("\""):
 110             word = word[1:-1]
 111             if word in symnames:
 112                 word = symnames[word]
 113             else:
 114                 for ch in word:
 115                     if not ch.isalpha():
 116                         raise Exception("non-alpha apparent keyword: "
 117                                         + word)
 118                 if word not in tokens:
 119                     if (word in collections["keyword"] or
 120                         word in collections["reserved"]):
 121                        tokens.append(word)
 122                     else:
 123                         raise Exception("unknown keyword/reserved word: "
 124                                         + word)
 125
 126         line2 += " " + word
 127     lines.append(line2)
 128
 129
 130 for word in collections["keyword"] + collections["reserved"]:
 131     if word not in tokens:
 132         tokens.append(word)
 133
 134 for sym in collections["unop"] + collections["binop"] + symnames.keys():
 135     word = symnames[sym]
 136     if word not in tokens:
 137         tokens.append(word)
 138
 139
 140 print("%start parser, token;")
 141 print("%%token %s ;" % ("\n\t, ".join(tokens)))
 142 for coll in ["keyword", "reserved"]:
 143     print("%s: %s ; " % (coll, "\n\t| ".join(collections[coll])));
 144 for coll in ["binop", "unop"]:
 145     print("%s: %s ; " % (coll, "\n\t| ".join([symnames[x]
 146                                               for x in collections[coll]])));
 147 print("\n".join(lines));