src/etc/extract_grammar.py

   1 #!/usr/bin/env python
   2 # xfail-license
   3
   4 # This script is for extracting the grammar from the rust docs.
   5
   6 import fileinput
   7
   8 collections = { "gram": [],
   9                 "keyword": [],
  10                 "reserved": [],
  11                 "binop": [],
  12                 "unop": [] }
  13
  14
  15 in_coll = False
  16 coll = ""
  17
  18 for line in fileinput.input(openhook=fileinput.hook_encoded("utf-8")):
  19     if in_coll:
  20         if line.startswith("~~~~"):
  21             in_coll = False
  22         else:
  23             if coll in ["keyword", "reserved", "binop", "unop"]:
  24                 for word in line.split():
  25                     if word not in collections[coll]:
  26                         collections[coll].append(word)
  27             else:
  28                 collections[coll].append(line)
  29
  30     else:
  31         if line.startswith("~~~~"):
  32             for cname in collections:
  33                 if ("." + cname) in line:
  34                     coll = cname
  35                     in_coll = True
  36                     break
  37
  38 # Define operator symbol-names here
  39
  40 tokens = ["non_star", "non_slash", "non_eol",
  41           "non_single_quote", "non_double_quote", "ident" ]
  42
  43 symnames = {
  44 ".": "dot",
  45 "+": "plus",
  46 "-": "minus",
  47 "/": "slash",
  48 "*": "star",
  49 "%": "percent",
  50
  51 "~": "tilde",
  52 "@": "at",
  53
  54 "!": "not",
  55 "&": "and",
  56 "|": "or",
  57 "^": "xor",
  58
  59 "<<": "lsl",
  60 ">>": "lsr",
  61 ">>>": "asr",
  62
  63 "&&": "andand",
  64 "||": "oror",
  65
  66 "<" : "lt",
  67 "<=" : "le",
  68 "==" : "eqeq",
  69 ">=" : "ge",
  70 ">" : "gt",
  71
  72 "=": "eq",
  73
  74 "+=": "plusequal",
  75 "-=": "minusequal",
  76 "/=": "divequal",
  77 "*=": "starequal",
  78 "%=": "percentequal",
  79
  80 "&=": "andequal",
  81 "|=": "orequal",
  82 "^=": "xorequal",
  83
  84 ">>=": "lsrequal",
  85 ">>>=": "asrequal",
  86 "<<=": "lslequal",
  87
  88 "::": "coloncolon",
  89
  90 "->": "rightarrow",
  91 "<-": "leftarrow",
  92 "<->": "swaparrow",
  93
  94 "//": "linecomment",
  95 "/*": "openblockcomment",
  96 "*/": "closeblockcomment"
  97 }
  98
  99 lines = []
 100
 101 for line in collections["gram"]:
 102     line2 = ""
 103     for word in line.split():
 104         # replace strings with keyword-names or symbol-names from table
 105         if word.startswith("\""):
 106             word = word[1:-1]
 107             if word in symnames:
 108                 word = symnames[word]
 109             else:
 110                 for ch in word:
 111                     if not ch.isalpha():
 112                         raise Exception("non-alpha apparent keyword: "
 113                                         + word)
 114                 if word not in tokens:
 115                     if (word in collections["keyword"] or
 116                         word in collections["reserved"]):
 117                        tokens.append(word)
 118                     else:
 119                         raise Exception("unknown keyword/reserved word: "
 120                                         + word)
 121
 122         line2 += " " + word
 123     lines.append(line2)
 124
 125
 126 for word in collections["keyword"] + collections["reserved"]:
 127     if word not in tokens:
 128         tokens.append(word)
 129
 130 for sym in collections["unop"] + collections["binop"] + symnames.keys():
 131     word = symnames[sym]
 132     if word not in tokens:
 133         tokens.append(word)
 134
 135
 136 print("%start parser, token;")
 137 print("%%token %s ;" % ("\n\t, ".join(tokens)))
 138 for coll in ["keyword", "reserved"]:
 139     print("%s: %s ; " % (coll, "\n\t| ".join(collections[coll])));
 140 for coll in ["binop", "unop"]:
 141     print("%s: %s ; " % (coll, "\n\t| ".join([symnames[x]
 142                                               for x in collections[coll]])));
 143 print("\n".join(lines));