-----------------------------------------------------------------------\r
--- Metalua.\r
---\r
--- Summary: parser generator. Collection of higher order functors,\r
--- which allow to build and combine parsers. Relies on a lexer\r
--- that supports the same API as the one exposed in mll.lua.\r
---\r
-----------------------------------------------------------------------\r
---\r
--- Copyright (c) 2006-2008, Fabien Fleutot <metalua@gmail.com>.\r
---\r
--- This software is released under the MIT Licence, see licence.txt\r
--- for details.\r
---\r
-----------------------------------------------------------------------\r
-\r
---------------------------------------------------------------------------------\r
---\r
--- Exported API:\r
---\r
--- Parser generators:\r
--- * [gg.sequence()]\r
--- * [gg.multisequence()]\r
--- * [gg.expr()]\r
--- * [gg.list()]\r
--- * [gg.onkeyword()]\r
--- * [gg.optkeyword()]\r
---\r
--- Other functions: \r
--- * [gg.parse_error()]\r
--- * [gg.make_parser()]\r
--- * [gg.is_parser()]\r
---\r
---------------------------------------------------------------------------------\r
-\r
-module("gg", package.seeall)\r
-\r
--------------------------------------------------------------------------------\r
--- parser metatable, which maps __call to method parse, and adds some\r
--- error tracing boilerplate.\r
--------------------------------------------------------------------------------\r
-local parser_metatable = { }\r
-function parser_metatable.__call (parser, lx, ...)\r
- --printf ("Call parser %q of type %q", parser.name or "?", parser.kind)\r
- if mlc.metabugs then \r
- return parser:parse (lx, ...) \r
- --local x = parser:parse (lx, ...) \r
- --printf ("Result of parser %q: %s", \r
- -- parser.name or "?",\r
- -- _G.table.tostring(x, "nohash", 80))\r
- --return x\r
- else\r
- local li = lx:lineinfo_right() or { "?", "?", "?", "?" }\r
- local status, ast = pcall (parser.parse, parser, lx, ...) \r
- if status then return ast else\r
- error (string.format ("%s\n - (l.%s, c.%s, k.%s) in parser %s", \r
- ast:strmatch "gg.lua:%d+: (.*)" or ast,\r
- li[1], li[2], li[3], parser.name or parser.kind))\r
- end\r
- end\r
-end\r
-\r
--------------------------------------------------------------------------------\r
--- Turn a table into a parser, mainly by setting the metatable.\r
--------------------------------------------------------------------------------\r
-function make_parser(kind, p)\r
- p.kind = kind\r
- if not p.transformers then p.transformers = { } end\r
- function p.transformers:add (x)\r
- table.insert (self, x)\r
- end\r
- setmetatable (p, parser_metatable)\r
- return p\r
-end\r
-\r
--------------------------------------------------------------------------------\r
--- Return true iff [x] is a parser.\r
--- If it's a gg-generated parser, reutrn the name of its kind.\r
--------------------------------------------------------------------------------\r
-function is_parser (x)\r
- return type(x)=="function" or getmetatable(x)==parser_metatable and x.kind\r
-end\r
-\r
--------------------------------------------------------------------------------\r
--- Parse a sequence, without applying builder nor transformers\r
--------------------------------------------------------------------------------\r
-local function raw_parse_sequence (lx, p)\r
- local r = { }\r
- for i=1, #p do\r
- e=p[i]\r
- if type(e) == "string" then \r
- if not lx:is_keyword (lx:next(), e) then\r
- parse_error (lx, "Keyword '%s' expected", e) end\r
- elseif is_parser (e) then\r
- table.insert (r, e (lx)) \r
- else \r
- gg.parse_error (lx,"Sequence `%s': element #%i is not a string "..\r
- "nor a parser: %s", \r
- p.name, i, table.tostring(e))\r
- end\r
- end\r
- return r\r
-end\r
-\r
--------------------------------------------------------------------------------\r
--- Parse a multisequence, without applying multisequence transformers.\r
--- The sequences are completely parsed.\r
--------------------------------------------------------------------------------\r
-local function raw_parse_multisequence (lx, sequence_table, default)\r
- local seq_parser = sequence_table[lx:is_keyword(lx:peek())]\r
- if seq_parser then return seq_parser (lx)\r
- elseif default then return default (lx)\r
- else return false end\r
-end\r
-\r
--------------------------------------------------------------------------------\r
--- Applies all transformers listed in parser on ast.\r
--------------------------------------------------------------------------------\r
-local function transform (ast, parser, fli, lli)\r
- if parser.transformers then\r
- for _, t in ipairs (parser.transformers) do ast = t(ast) or ast end\r
- end\r
- if type(ast) == 'table'then\r
- local ali = ast.lineinfo\r
- if not ali or ali.first~=fli or ali.last~=lli then\r
- ast.lineinfo = { first = fli, last = lli }\r
- end\r
- end\r
- return ast\r
-end\r
-\r
--------------------------------------------------------------------------------\r
--- Generate a tracable parsing error (not implemented yet)\r
--------------------------------------------------------------------------------\r
-function parse_error(lx, fmt, ...)\r
- local li = lx:lineinfo_left() or {-1,-1,-1, "<unknown file>"}\r
- local msg = string.format("line %i, char %i: "..fmt, li[1], li[2], ...) \r
- local src = lx.src\r
- if li[3]>0 and src then\r
- local i, j = li[3], li[3]\r
- while src:sub(i,i) ~= '\n' and i>=0 do i=i-1 end\r
- while src:sub(j,j) ~= '\n' and j<=#src do j=j+1 end \r
- local srcline = src:sub (i+1, j-1)\r
- local idx = string.rep (" ", li[2]).."^"\r
- msg = string.format("%s\n>>> %s\n>>> %s", msg, srcline, idx)\r
- end\r
- error(msg)\r
-end\r
- \r
--------------------------------------------------------------------------------\r
---\r
--- Sequence parser generator\r
---\r
--------------------------------------------------------------------------------\r
--- Input fields:\r
---\r
--- * [builder]: how to build an AST out of sequence parts. let [x] be the list\r
--- of subparser results (keywords are simply omitted). [builder] can be:\r
--- - [nil], in which case the result of parsing is simply [x]\r
--- - a string, which is then put as a tag on [x]\r
--- - a function, which takes [x] as a parameter and returns an AST.\r
---\r
--- * [name]: the name of the parser. Used for debug messages\r
---\r
--- * [transformers]: a list of AST->AST functions, applied in order on ASTs\r
--- returned by the parser.\r
---\r
--- * Table-part entries corresponds to keywords (strings) and subparsers \r
--- (function and callable objects).\r
---\r
--- After creation, the following fields are added:\r
--- * [parse] the parsing function lexer->AST\r
--- * [kind] == "sequence"\r
--- * [name] is set, if it wasn't in the input.\r
---\r
--------------------------------------------------------------------------------\r
-function sequence (p)\r
- make_parser ("sequence", p)\r
-\r
- -------------------------------------------------------------------\r
- -- Parsing method\r
- -------------------------------------------------------------------\r
- function p:parse (lx)\r
- -- Raw parsing:\r
- local fli = lx:lineinfo_right()\r
- local seq = raw_parse_sequence (lx, self)\r
- local lli = lx:lineinfo_left()\r
-\r
- -- Builder application:\r
- local builder, tb = self.builder, type (self.builder)\r
- if tb == "string" then seq.tag = builder\r
- elseif tb == "function" or builder and builder.__call then seq = builder(seq)\r
- elseif builder == nil then -- nothing\r
- else error ("Invalid builder of type "..tb.." in sequence") end\r
- seq = transform (seq, self, fli, lli)\r
- assert (not seq or seq.lineinfo)\r
- return seq\r
- end\r
-\r
- -------------------------------------------------------------------\r
- -- Construction\r
- -------------------------------------------------------------------\r
- -- Try to build a proper name\r
- if not p.name and type(p[1])=="string" then \r
- p.name = p[1].." ..." \r
- if type(p[#p])=="string" then p.name = p.name .. " " .. p[#p] end\r
- else\r
- p.name = "<anonymous>"\r
- end\r
-\r
- return p\r
-end --</sequence>\r
-\r
-\r
--------------------------------------------------------------------------------\r
---\r
--- Multiple, keyword-driven, sequence parser generator\r
---\r
--------------------------------------------------------------------------------\r
--- in [p], useful fields are:\r
---\r
--- * [transformers]: as usual\r
---\r
--- * [name]: as usual\r
---\r
--- * Table-part entries must be sequence parsers, or tables which can\r
--- be turned into a sequence parser by [gg.sequence]. These\r
--- sequences must start with a keyword, and this initial keyword\r
--- must be different for each sequence. The table-part entries will\r
--- be removed after [gg.multisequence] returns.\r
---\r
--- * [default]: the parser to run if the next keyword in the lexer is\r
--- none of the registered initial keywords. If there's no default\r
--- parser and no suitable initial keyword, the multisequence parser\r
--- simply returns [false].\r
---\r
--- After creation, the following fields are added:\r
---\r
--- * [parse] the parsing function lexer->AST\r
---\r
--- * [sequences] the table of sequences, indexed by initial keywords.\r
---\r
--- * [add] method takes a sequence parser or a config table for\r
--- [gg.sequence], and adds/replaces the corresponding sequence\r
--- parser. If the keyword was already used, the former sequence is\r
--- removed and a warning is issued.\r
---\r
--- * [get] method returns a sequence by its initial keyword\r
---\r
--- * [kind] == "multisequence"\r
---\r
--------------------------------------------------------------------------------\r
-function multisequence (p) \r
- make_parser ("multisequence", p)\r
-\r
- -------------------------------------------------------------------\r
- -- Add a sequence (might be just a config table for [gg.sequence])\r
- -------------------------------------------------------------------\r
- function p:add (s)\r
- -- compile if necessary:\r
- if not is_parser(s) then sequence(s) end\r
- if type(s[1]) ~= "string" then \r
- error "Invalid sequence for multiseq"\r
- elseif self.sequences[s[1]] then \r
- eprintf (" *** Warning: keyword %q overloaded in multisequence ***", s[1])\r
- end\r
- self.sequences[s[1]] = s\r
- end -- </multisequence.add>\r
-\r
- -------------------------------------------------------------------\r
- -- Get the sequence starting with this keyword. [kw :: string]\r
- -------------------------------------------------------------------\r
- function p:get (kw) return self.sequences [kw] end\r
-\r
- -------------------------------------------------------------------\r
- -- Remove the sequence starting with keyword [kw :: string]\r
- -------------------------------------------------------------------\r
- function p:del (kw) \r
- if not self.sequences[kw] then \r
- eprintf("*** Warning: trying to delete sequence starting "..\r
- "with %q from a multisequence having no such "..\r
- "entry ***", kw) end\r
- local removed = self.sequences[kw]\r
- self.sequences[kw] = nil \r
- return removed\r
- end\r
-\r
- -------------------------------------------------------------------\r
- -- Parsing method\r
- -------------------------------------------------------------------\r
- function p:parse (lx)\r
- local fli = lx:lineinfo_right()\r
- local x = raw_parse_multisequence (lx, self.sequences, self.default)\r
- local lli = lx:lineinfo_left()\r
- return transform (x, self, fli, lli)\r
- end\r
-\r
- -------------------------------------------------------------------\r
- -- Construction\r
- -------------------------------------------------------------------\r
- -- Register the sequences passed to the constructor. They're going\r
- -- from the array part of the parser to the hash part of field\r
- -- [sequences]\r
- p.sequences = { }\r
- for i=1, #p do p:add (p[i]); p[i] = nil end\r
-\r
- -- FIXME: why is this commented out?\r
- --if p.default and not is_parser(p.default) then sequence(p.default) end\r
- return p\r
-end --</multisequence>\r
-\r
-\r
--------------------------------------------------------------------------------\r
---\r
--- Expression parser generator\r
---\r
--------------------------------------------------------------------------------\r
---\r
--- Expression configuration relies on three tables: [prefix], [infix]\r
--- and [suffix]. Moreover, the primary parser can be replaced by a\r
--- table: in this case the [primary] table will be passed to\r
--- [gg.multisequence] to create a parser.\r
---\r
--- Each of these tables is a modified multisequence parser: the\r
--- differences with respect to regular multisequence config tables are:\r
---\r
--- * the builder takes specific parameters:\r
--- - for [prefix], it takes the result of the prefix sequence parser,\r
--- and the prefixed expression\r
--- - for [infix], it takes the left-hand-side expression, the results \r
--- of the infix sequence parser, and the right-hand-side expression.\r
--- - for [suffix], it takes the suffixed expression, and theresult \r
--- of the suffix sequence parser.\r
---\r
--- * the default field is a list, with parameters:\r
--- - [parser] the raw parsing function\r
--- - [transformers], as usual\r
--- - [prec], the operator's precedence\r
--- - [assoc] for [infix] table, the operator's associativity, which\r
--- can be "left", "right" or "flat" (default to left)\r
---\r
--- In [p], useful fields are:\r
--- * [transformers]: as usual\r
--- * [name]: as usual\r
--- * [primary]: the atomic expression parser, or a multisequence config \r
--- table (mandatory)\r
--- * [prefix]: prefix operators config table, see above.\r
--- * [infix]: infix operators config table, see above.\r
--- * [suffix]: suffix operators config table, see above.\r
---\r
--- After creation, these fields are added:\r
--- * [kind] == "expr"\r
--- * [parse] as usual\r
--- * each table is turned into a multisequence, and therefore has an \r
--- [add] method\r
---\r
--------------------------------------------------------------------------------\r
-function expr (p)\r
- make_parser ("expr", p)\r
-\r
- -------------------------------------------------------------------\r
- -- parser method.\r
- -- In addition to the lexer, it takes an optional precedence:\r
- -- it won't read expressions whose precedence is lower or equal\r
- -- to [prec].\r
- -------------------------------------------------------------------\r
- function p:parse (lx, prec)\r
- prec = prec or 0\r
-\r
- ------------------------------------------------------\r
- -- Extract the right parser and the corresponding\r
- -- options table, for (pre|in|suff)fix operators.\r
- -- Options include prec, assoc, transformers.\r
- ------------------------------------------------------\r
- local function get_parser_info (tab)\r
- local p2 = tab:get (lx:is_keyword (lx:peek()))\r
- if p2 then -- keyword-based sequence found\r
- local function parser(lx) return raw_parse_sequence(lx, p2) end\r
- return parser, p2\r
- else -- Got to use the default parser\r
- local d = tab.default\r
- if d then return d.parse or d.parser, d\r
- else return false, false end\r
- end\r
- end\r
-\r
- ------------------------------------------------------\r
- -- Look for a prefix sequence. Multiple prefixes are\r
- -- handled through the recursive [p.parse] call.\r
- -- Notice the double-transform: one for the primary\r
- -- expr, and one for the one with the prefix op.\r
- ------------------------------------------------------\r
- local function handle_prefix ()\r
- local fli = lx:lineinfo_right()\r
- local p2_func, p2 = get_parser_info (self.prefix)\r
- local op = p2_func and p2_func (lx)\r
- if op then -- Keyword-based sequence found\r
- local ili = lx:lineinfo_right() -- Intermediate LineInfo\r
- local e = p2.builder (op, self:parse (lx, p2.prec))\r
- local lli = lx:lineinfo_left()\r
- return transform (transform (e, p2, ili, lli), self, fli, lli)\r
- else -- No prefix found, get a primary expression \r
- local e = self.primary(lx)\r
- local lli = lx:lineinfo_left()\r
- return transform (e, self, fli, lli)\r
- end\r
- end --</expr.parse.handle_prefix>\r
-\r
- ------------------------------------------------------\r
- -- Look for an infix sequence+right-hand-side operand.\r
- -- Return the whole binary expression result,\r
- -- or false if no operator was found.\r
- ------------------------------------------------------\r
- local function handle_infix (e)\r
- local p2_func, p2 = get_parser_info (self.infix)\r
- if not p2 then return false end\r
-\r
- -----------------------------------------\r
- -- Handle flattening operators: gather all operands\r
- -- of the series in [list]; when a different operator \r
- -- is found, stop, build from [list], [transform] and\r
- -- return.\r
- -----------------------------------------\r
- if (not p2.prec or p2.prec>prec) and p2.assoc=="flat" then\r
- local fli = lx:lineinfo_right()\r
- local pflat, list = p2, { e }\r
- repeat\r
- local op = p2_func(lx)\r
- if not op then break end\r
- table.insert (list, self:parse (lx, p2.prec))\r
- local _ -- We only care about checking that p2==pflat\r
- _, p2 = get_parser_info (self.infix)\r
- until p2 ~= pflat\r
- local e2 = pflat.builder (list)\r
- local lli = lx:lineinfo_left()\r
- return transform (transform (e2, pflat, fli, lli), self, fli, lli)\r
- \r
- -----------------------------------------\r
- -- Handle regular infix operators: [e] the LHS is known,\r
- -- just gather the operator and [e2] the RHS.\r
- -- Result goes in [e3].\r
- -----------------------------------------\r
- elseif p2.prec and p2.prec>prec or \r
- p2.prec==prec and p2.assoc=="right" then\r
- local fli = e.lineinfo.first -- lx:lineinfo_right()\r
- local op = p2_func(lx)\r
- if not op then return false end\r
- local e2 = self:parse (lx, p2.prec)\r
- local e3 = p2.builder (e, op, e2)\r
- local lli = lx:lineinfo_left()\r
- return transform (transform (e3, p2, fli, lli), self, fli, lli)\r
-\r
- -----------------------------------------\r
- -- Check for non-associative operators, and complain if applicable. \r
- -----------------------------------------\r
- elseif p2.assoc=="none" and p2.prec==prec then\r
- parser_error (lx, "non-associative operator!")\r
-\r
- -----------------------------------------\r
- -- No infix operator suitable at that precedence\r
- -----------------------------------------\r
- else return false end\r
-\r
- end --</expr.parse.handle_infix>\r
-\r
- ------------------------------------------------------\r
- -- Look for a suffix sequence.\r
- -- Return the result of suffix operator on [e],\r
- -- or false if no operator was found.\r
- ------------------------------------------------------\r
- local function handle_suffix (e)\r
- -- FIXME bad fli, must take e.lineinfo.first\r
- local p2_func, p2 = get_parser_info (self.suffix)\r
- if not p2 then return false end\r
- if not p2.prec or p2.prec>=prec then\r
- --local fli = lx:lineinfo_right()\r
- local fli = e.lineinfo.first\r
- local op = p2_func(lx)\r
- if not op then return false end\r
- local lli = lx:lineinfo_left()\r
- e = p2.builder (e, op)\r
- e = transform (transform (e, p2, fli, lli), self, fli, lli)\r
- return e\r
- end\r
- return false\r
- end --</expr.parse.handle_suffix>\r
-\r
- ------------------------------------------------------\r
- -- Parser body: read suffix and (infix+operand) \r
- -- extensions as long as we're able to fetch more at\r
- -- this precedence level.\r
- ------------------------------------------------------\r
- local e = handle_prefix()\r
- repeat\r
- local x = handle_suffix (e); e = x or e\r
- local y = handle_infix (e); e = y or e\r
- until not (x or y)\r
-\r
- -- No transform: it already happened in operators handling\r
- return e\r
- end --</expr.parse>\r
-\r
- -------------------------------------------------------------------\r
- -- Construction\r
- -------------------------------------------------------------------\r
- if not p.primary then p.primary=p[1]; p[1]=nil end\r
- for _, t in ipairs{ "primary", "prefix", "infix", "suffix" } do\r
- if not p[t] then p[t] = { } end\r
- if not is_parser(p[t]) then multisequence(p[t]) end\r
- end\r
- function p:add(...) return self.primary:add(...) end\r
- return p\r
-end --</expr>\r
-\r
-\r
--------------------------------------------------------------------------------\r
---\r
--- List parser generator\r
---\r
--------------------------------------------------------------------------------\r
--- In [p], the following fields can be provided in input:\r
---\r
--- * [builder]: takes list of subparser results, returns AST\r
--- * [transformers]: as usual\r
--- * [name]: as usual\r
---\r
--- * [terminators]: list of strings representing the keywords which\r
--- might mark the end of the list. When non-empty, the list is\r
--- allowed to be empty. A string is treated as a single-element\r
--- table, whose element is that string, e.g. ["do"] is the same as\r
--- [{"do"}].\r
---\r
--- * [separators]: list of strings representing the keywords which can\r
--- separate elements of the list. When non-empty, one of these\r
--- keyword has to be found between each element. Lack of a separator\r
--- indicates the end of the list. A string is treated as a\r
--- single-element table, whose element is that string, e.g. ["do"]\r
--- is the same as [{"do"}]. If [terminators] is empty/nil, then\r
--- [separators] has to be non-empty.\r
---\r
--- After creation, the following fields are added:\r
--- * [parse] the parsing function lexer->AST\r
--- * [kind] == "list"\r
---\r
--------------------------------------------------------------------------------\r
-function list (p)\r
- make_parser ("list", p)\r
-\r
- -------------------------------------------------------------------\r
- -- Parsing method\r
- -------------------------------------------------------------------\r
- function p:parse (lx)\r
-\r
- ------------------------------------------------------\r
- -- Used to quickly check whether there's a terminator \r
- -- or a separator immediately ahead\r
- ------------------------------------------------------\r
- local function peek_is_in (keywords) \r
- return keywords and lx:is_keyword(lx:peek(), unpack(keywords)) end\r
-\r
- local x = { }\r
- local fli = lx:lineinfo_right()\r
-\r
- -- if there's a terminator to start with, don't bother trying\r
- if not peek_is_in (self.terminators) then \r
- repeat table.insert (x, self.primary (lx)) -- read one element\r
- until\r
- -- First reason to stop: There's a separator list specified,\r
- -- and next token isn't one. Otherwise, consume it with [lx:next()]\r
- self.separators and not(peek_is_in (self.separators) and lx:next()) or\r
- -- Other reason to stop: terminator token ahead\r
- peek_is_in (self.terminators) or\r
- -- Last reason: end of file reached\r
- lx:peek().tag=="Eof"\r
- end\r
-\r
- local lli = lx:lineinfo_left()\r
- \r
- -- Apply the builder. It can be a string, or a callable value, \r
- -- or simply nothing.\r
- local b = self.builder\r
- if b then\r
- if type(b)=="string" then x.tag = b -- b is a string, use it as a tag\r
- elseif type(b)=="function" then x=b(x)\r
- else\r
- local bmt = getmetatable(b)\r
- if bmt and bmt.__call then x=b(x) end\r
- end\r
- end\r
- return transform (x, self, fli, lli)\r
- end --</list.parse>\r
-\r
- -------------------------------------------------------------------\r
- -- Construction\r
- -------------------------------------------------------------------\r
- if not p.primary then p.primary = p[1]; p[1] = nil end\r
- if type(p.terminators) == "string" then p.terminators = { p.terminators }\r
- elseif p.terminators and #p.terminators == 0 then p.terminators = nil end\r
- if type(p.separators) == "string" then p.separators = { p.separators }\r
- elseif p.separators and #p.separators == 0 then p.separators = nil end\r
-\r
- return p\r
-end --</list>\r
-\r
-\r
--------------------------------------------------------------------------------\r
---\r
--- Keyword-conditionned parser generator\r
---\r
--------------------------------------------------------------------------------\r
--- \r
--- Only apply a parser if a given keyword is found. The result of\r
--- [gg.onkeyword] parser is the result of the subparser (modulo\r
--- [transformers] applications).\r
---\r
--- lineinfo: the keyword is *not* included in the boundaries of the\r
--- resulting lineinfo. A review of all usages of gg.onkeyword() in the\r
--- implementation of metalua has shown that it was the appropriate choice\r
--- in every case.\r
---\r
--- Input fields:\r
---\r
--- * [name]: as usual\r
---\r
--- * [transformers]: as usual\r
---\r
--- * [peek]: if non-nil, the conditionning keyword is left in the lexeme\r
--- stream instead of being consumed.\r
---\r
--- * [primary]: the subparser. \r
---\r
--- * [keywords]: list of strings representing triggering keywords.\r
---\r
--- * Table-part entries can contain strings, and/or exactly one parser.\r
--- Strings are put in [keywords], and the parser is put in [primary].\r
---\r
--- After the call, the following fields will be set:\r
--- \r
--- * [parse] the parsing method\r
--- * [kind] == "onkeyword"\r
--- * [primary]\r
--- * [keywords]\r
---\r
--------------------------------------------------------------------------------\r
-function onkeyword (p)\r
- make_parser ("onkeyword", p)\r
-\r
- -------------------------------------------------------------------\r
- -- Parsing method\r
- -------------------------------------------------------------------\r
- function p:parse(lx)\r
- if lx:is_keyword (lx:peek(), unpack(self.keywords)) then\r
- --local fli = lx:lineinfo_right()\r
- if not self.peek then lx:next() end\r
- local content = self.primary (lx)\r
- --local lli = lx:lineinfo_left()\r
- local fli, lli = content.lineinfo.first, content.lineinfo.last\r
- return transform (content, p, fli, lli)\r
- else return false end\r
- end\r
-\r
- -------------------------------------------------------------------\r
- -- Construction\r
- -------------------------------------------------------------------\r
- if not p.keywords then p.keywords = { } end\r
- for _, x in ipairs(p) do\r
- if type(x)=="string" then table.insert (p.keywords, x)\r
- else assert (not p.primary and is_parser (x)); p.primary = x end\r
- end\r
- assert (p.primary, 'no primary parser in gg.onkeyword')\r
- return p\r
-end --</onkeyword>\r
-\r
-\r
--------------------------------------------------------------------------------\r
---\r
--- Optional keyword consummer pseudo-parser generator\r
---\r
--------------------------------------------------------------------------------\r
---\r
--- This doesn't return a real parser, just a function. That function parses\r
--- one of the keywords passed as parameters, and returns it. It returns \r
--- [false] if no matching keyword is found.\r
---\r
--- Notice that tokens returned by lexer already carry lineinfo, therefore\r
--- there's no need to add them, as done usually through transform() calls.\r
--------------------------------------------------------------------------------\r
-function optkeyword (...)\r
- local args = {...}\r
- if type (args[1]) == "table" then \r
- assert (#args == 1)\r
- args = args[1]\r
- end\r
- for _, v in ipairs(args) do assert (type(v)=="string") end\r
- return function (lx)\r
- local x = lx:is_keyword (lx:peek(), unpack (args))\r
- if x then lx:next(); return x\r
- else return false end\r
- end\r
-end\r
-\r
-\r
--------------------------------------------------------------------------------\r
---\r
--- Run a parser with a special lexer\r
---\r
--------------------------------------------------------------------------------\r
---\r
--- This doesn't return a real parser, just a function.\r
--- First argument is the lexer class to be used with the parser,\r
--- 2nd is the parser itself.\r
--- The resulting parser returns whatever the argument parser does.\r
---\r
--------------------------------------------------------------------------------\r
-function with_lexer(new_lexer, parser)\r
-\r
- -------------------------------------------------------------------\r
- -- Most gg functions take their parameters in a table, so it's \r
- -- better to silently accept when with_lexer{ } is called with\r
- -- its arguments in a list:\r
- -------------------------------------------------------------------\r
- if not parser and #new_lexer==2 and type(new_lexer[1])=='table' then\r
- return with_lexer(unpack(new_lexer))\r
- end\r
-\r
- -------------------------------------------------------------------\r
- -- Save the current lexer, switch it for the new one, run the parser,\r
- -- restore the previous lexer, even if the parser caused an error.\r
- -------------------------------------------------------------------\r
- return function (lx)\r
- local old_lexer = getmetatable(lx)\r
- lx:sync()\r
- setmetatable(lx, new_lexer)\r
- local status, result = pcall(parser, lx)\r
- lx:sync()\r
- setmetatable(lx, old_lexer)\r
- if status then return result else error(result) end\r
- end\r
-end\r
+----------------------------------------------------------------------
+-- Metalua.
+--
+-- Summary: parser generator. Collection of higher order functors,
+-- which allow to build and combine parsers. Relies on a lexer
+-- that supports the same API as the one exposed in mll.lua.
+--
+----------------------------------------------------------------------
+--
+-- Copyright (c) 2006-2008, Fabien Fleutot <metalua@gmail.com>.
+--
+-- This software is released under the MIT Licence, see licence.txt
+-- for details.
+--
+----------------------------------------------------------------------
+
+--------------------------------------------------------------------------------
+--
+-- Exported API:
+--
+-- Parser generators:
+-- * [gg.sequence()]
+-- * [gg.multisequence()]
+-- * [gg.expr()]
+-- * [gg.list()]
+-- * [gg.onkeyword()]
+-- * [gg.optkeyword()]
+--
+-- Other functions:
+-- * [gg.parse_error()]
+-- * [gg.make_parser()]
+-- * [gg.is_parser()]
+--
+--------------------------------------------------------------------------------
+
+module("gg", package.seeall)
+
+-------------------------------------------------------------------------------
+-- parser metatable, which maps __call to method parse, and adds some
+-- error tracing boilerplate.
+-------------------------------------------------------------------------------
+local parser_metatable = { }
+function parser_metatable.__call (parser, lx, ...)
+ --printf ("Call parser %q of type %q", parser.name or "?", parser.kind)
+ if mlc.metabugs then
+ return parser:parse (lx, ...)
+ --local x = parser:parse (lx, ...)
+ --printf ("Result of parser %q: %s",
+ -- parser.name or "?",
+ -- _G.table.tostring(x, "nohash", 80))
+ --return x
+ else
+ local li = lx:lineinfo_right() or { "?", "?", "?", "?" }
+ local status, ast = pcall (parser.parse, parser, lx, ...)
+ if status then return ast else
+ error (string.format ("%s\n - (l.%s, c.%s, k.%s) in parser %s",
+ ast:strmatch "gg.lua:%d+: (.*)" or ast,
+ li[1], li[2], li[3], parser.name or parser.kind))
+ end
+ end
+end
+
+-------------------------------------------------------------------------------
+-- Turn a table into a parser, mainly by setting the metatable.
+-------------------------------------------------------------------------------
+function make_parser(kind, p)
+ p.kind = kind
+ if not p.transformers then p.transformers = { } end
+ function p.transformers:add (x)
+ table.insert (self, x)
+ end
+ setmetatable (p, parser_metatable)
+ return p
+end
+
+-------------------------------------------------------------------------------
+-- Return true iff [x] is a parser.
+-- If it's a gg-generated parser, reutrn the name of its kind.
+-------------------------------------------------------------------------------
+function is_parser (x)
+ return type(x)=="function" or getmetatable(x)==parser_metatable and x.kind
+end
+
+-------------------------------------------------------------------------------
+-- Parse a sequence, without applying builder nor transformers
+-------------------------------------------------------------------------------
+local function raw_parse_sequence (lx, p)
+ local r = { }
+ for i=1, #p do
+ e=p[i]
+ if type(e) == "string" then
+ if not lx:is_keyword (lx:next(), e) then
+ parse_error (lx, "Keyword '%s' expected", e) end
+ elseif is_parser (e) then
+ table.insert (r, e (lx))
+ else
+ gg.parse_error (lx,"Sequence `%s': element #%i is not a string "..
+ "nor a parser: %s",
+ p.name, i, table.tostring(e))
+ end
+ end
+ return r
+end
+
+-------------------------------------------------------------------------------
+-- Parse a multisequence, without applying multisequence transformers.
+-- The sequences are completely parsed.
+-------------------------------------------------------------------------------
+local function raw_parse_multisequence (lx, sequence_table, default)
+ local seq_parser = sequence_table[lx:is_keyword(lx:peek())]
+ if seq_parser then return seq_parser (lx)
+ elseif default then return default (lx)
+ else return false end
+end
+
+-------------------------------------------------------------------------------
+-- Applies all transformers listed in parser on ast.
+-------------------------------------------------------------------------------
+local function transform (ast, parser, fli, lli)
+ if parser.transformers then
+ for _, t in ipairs (parser.transformers) do ast = t(ast) or ast end
+ end
+ if type(ast) == 'table'then
+ local ali = ast.lineinfo
+ if not ali or ali.first~=fli or ali.last~=lli then
+ ast.lineinfo = { first = fli, last = lli }
+ end
+ end
+ return ast
+end
+
+-------------------------------------------------------------------------------
+-- Generate a tracable parsing error (not implemented yet)
+-------------------------------------------------------------------------------
+function parse_error(lx, fmt, ...)
+ local li = lx:lineinfo_left() or {-1,-1,-1, "<unknown file>"}
+ local msg = string.format("line %i, char %i: "..fmt, li[1], li[2], ...)
+ local src = lx.src
+ if li[3]>0 and src then
+ local i, j = li[3], li[3]
+ while src:sub(i,i) ~= '\n' and i>=0 do i=i-1 end
+ while src:sub(j,j) ~= '\n' and j<=#src do j=j+1 end
+ local srcline = src:sub (i+1, j-1)
+ local idx = string.rep (" ", li[2]).."^"
+ msg = string.format("%s\n>>> %s\n>>> %s", msg, srcline, idx)
+ end
+ error(msg)
+end
+
+-------------------------------------------------------------------------------
+--
+-- Sequence parser generator
+--
+-------------------------------------------------------------------------------
+-- Input fields:
+--
+-- * [builder]: how to build an AST out of sequence parts. let [x] be the list
+-- of subparser results (keywords are simply omitted). [builder] can be:
+-- - [nil], in which case the result of parsing is simply [x]
+-- - a string, which is then put as a tag on [x]
+-- - a function, which takes [x] as a parameter and returns an AST.
+--
+-- * [name]: the name of the parser. Used for debug messages
+--
+-- * [transformers]: a list of AST->AST functions, applied in order on ASTs
+-- returned by the parser.
+--
+-- * Table-part entries corresponds to keywords (strings) and subparsers
+-- (function and callable objects).
+--
+-- After creation, the following fields are added:
+-- * [parse] the parsing function lexer->AST
+-- * [kind] == "sequence"
+-- * [name] is set, if it wasn't in the input.
+--
+-------------------------------------------------------------------------------
+function sequence (p)
+ make_parser ("sequence", p)
+
+ -------------------------------------------------------------------
+ -- Parsing method
+ -------------------------------------------------------------------
+ function p:parse (lx)
+ -- Raw parsing:
+ local fli = lx:lineinfo_right()
+ local seq = raw_parse_sequence (lx, self)
+ local lli = lx:lineinfo_left()
+
+ -- Builder application:
+ local builder, tb = self.builder, type (self.builder)
+ if tb == "string" then seq.tag = builder
+ elseif tb == "function" or builder and builder.__call then seq = builder(seq)
+ elseif builder == nil then -- nothing
+ else error ("Invalid builder of type "..tb.." in sequence") end
+ seq = transform (seq, self, fli, lli)
+ assert (not seq or seq.lineinfo)
+ return seq
+ end
+
+ -------------------------------------------------------------------
+ -- Construction
+ -------------------------------------------------------------------
+ -- Try to build a proper name
+ if not p.name and type(p[1])=="string" then
+ p.name = p[1].." ..."
+ if type(p[#p])=="string" then p.name = p.name .. " " .. p[#p] end
+ else
+ p.name = "<anonymous>"
+ end
+
+ return p
+end --</sequence>
+
+
+-------------------------------------------------------------------------------
+--
+-- Multiple, keyword-driven, sequence parser generator
+--
+-------------------------------------------------------------------------------
+-- in [p], useful fields are:
+--
+-- * [transformers]: as usual
+--
+-- * [name]: as usual
+--
+-- * Table-part entries must be sequence parsers, or tables which can
+-- be turned into a sequence parser by [gg.sequence]. These
+-- sequences must start with a keyword, and this initial keyword
+-- must be different for each sequence. The table-part entries will
+-- be removed after [gg.multisequence] returns.
+--
+-- * [default]: the parser to run if the next keyword in the lexer is
+-- none of the registered initial keywords. If there's no default
+-- parser and no suitable initial keyword, the multisequence parser
+-- simply returns [false].
+--
+-- After creation, the following fields are added:
+--
+-- * [parse] the parsing function lexer->AST
+--
+-- * [sequences] the table of sequences, indexed by initial keywords.
+--
+-- * [add] method takes a sequence parser or a config table for
+-- [gg.sequence], and adds/replaces the corresponding sequence
+-- parser. If the keyword was already used, the former sequence is
+-- removed and a warning is issued.
+--
+-- * [get] method returns a sequence by its initial keyword
+--
+-- * [kind] == "multisequence"
+--
+-------------------------------------------------------------------------------
+function multisequence (p)
+ make_parser ("multisequence", p)
+
+ -------------------------------------------------------------------
+ -- Add a sequence (might be just a config table for [gg.sequence])
+ -------------------------------------------------------------------
+ function p:add (s)
+ -- compile if necessary:
+ if not is_parser(s) then sequence(s) end
+ if type(s[1]) ~= "string" then
+ error "Invalid sequence for multiseq"
+ elseif self.sequences[s[1]] then
+ eprintf (" *** Warning: keyword %q overloaded in multisequence ***", s[1])
+ end
+ self.sequences[s[1]] = s
+ end -- </multisequence.add>
+
+ -------------------------------------------------------------------
+ -- Get the sequence starting with this keyword. [kw :: string]
+ -------------------------------------------------------------------
+ function p:get (kw) return self.sequences [kw] end
+
+ -------------------------------------------------------------------
+ -- Remove the sequence starting with keyword [kw :: string]
+ -------------------------------------------------------------------
+ function p:del (kw)
+ if not self.sequences[kw] then
+ eprintf("*** Warning: trying to delete sequence starting "..
+ "with %q from a multisequence having no such "..
+ "entry ***", kw) end
+ local removed = self.sequences[kw]
+ self.sequences[kw] = nil
+ return removed
+ end
+
+ -------------------------------------------------------------------
+ -- Parsing method
+ -------------------------------------------------------------------
+ function p:parse (lx)
+ local fli = lx:lineinfo_right()
+ local x = raw_parse_multisequence (lx, self.sequences, self.default)
+ local lli = lx:lineinfo_left()
+ return transform (x, self, fli, lli)
+ end
+
+ -------------------------------------------------------------------
+ -- Construction
+ -------------------------------------------------------------------
+ -- Register the sequences passed to the constructor. They're going
+ -- from the array part of the parser to the hash part of field
+ -- [sequences]
+ p.sequences = { }
+ for i=1, #p do p:add (p[i]); p[i] = nil end
+
+ -- FIXME: why is this commented out?
+ --if p.default and not is_parser(p.default) then sequence(p.default) end
+ return p
+end --</multisequence>
+
+
+-------------------------------------------------------------------------------
+--
+-- Expression parser generator
+--
+-------------------------------------------------------------------------------
+--
+-- Expression configuration relies on three tables: [prefix], [infix]
+-- and [suffix]. Moreover, the primary parser can be replaced by a
+-- table: in this case the [primary] table will be passed to
+-- [gg.multisequence] to create a parser.
+--
+-- Each of these tables is a modified multisequence parser: the
+-- differences with respect to regular multisequence config tables are:
+--
+-- * the builder takes specific parameters:
+-- - for [prefix], it takes the result of the prefix sequence parser,
+-- and the prefixed expression
+-- - for [infix], it takes the left-hand-side expression, the results
+-- of the infix sequence parser, and the right-hand-side expression.
+-- - for [suffix], it takes the suffixed expression, and theresult
+-- of the suffix sequence parser.
+--
+-- * the default field is a list, with parameters:
+-- - [parser] the raw parsing function
+-- - [transformers], as usual
+-- - [prec], the operator's precedence
+-- - [assoc] for [infix] table, the operator's associativity, which
+-- can be "left", "right" or "flat" (default to left)
+--
+-- In [p], useful fields are:
+-- * [transformers]: as usual
+-- * [name]: as usual
+-- * [primary]: the atomic expression parser, or a multisequence config
+-- table (mandatory)
+-- * [prefix]: prefix operators config table, see above.
+-- * [infix]: infix operators config table, see above.
+-- * [suffix]: suffix operators config table, see above.
+--
+-- After creation, these fields are added:
+-- * [kind] == "expr"
+-- * [parse] as usual
+-- * each table is turned into a multisequence, and therefore has an
+-- [add] method
+--
+-------------------------------------------------------------------------------
+function expr (p)
+ make_parser ("expr", p)
+
+ -------------------------------------------------------------------
+ -- parser method.
+ -- In addition to the lexer, it takes an optional precedence:
+ -- it won't read expressions whose precedence is lower or equal
+ -- to [prec].
+ -------------------------------------------------------------------
+ function p:parse (lx, prec)
+ prec = prec or 0
+
+ ------------------------------------------------------
+ -- Extract the right parser and the corresponding
+ -- options table, for (pre|in|suff)fix operators.
+ -- Options include prec, assoc, transformers.
+ ------------------------------------------------------
+ local function get_parser_info (tab)
+ local p2 = tab:get (lx:is_keyword (lx:peek()))
+ if p2 then -- keyword-based sequence found
+ local function parser(lx) return raw_parse_sequence(lx, p2) end
+ return parser, p2
+ else -- Got to use the default parser
+ local d = tab.default
+ if d then return d.parse or d.parser, d
+ else return false, false end
+ end
+ end
+
+ ------------------------------------------------------
+ -- Look for a prefix sequence. Multiple prefixes are
+ -- handled through the recursive [p.parse] call.
+ -- Notice the double-transform: one for the primary
+ -- expr, and one for the one with the prefix op.
+ ------------------------------------------------------
+ local function handle_prefix ()
+ local fli = lx:lineinfo_right()
+ local p2_func, p2 = get_parser_info (self.prefix)
+ local op = p2_func and p2_func (lx)
+ if op then -- Keyword-based sequence found
+ local ili = lx:lineinfo_right() -- Intermediate LineInfo
+ local e = p2.builder (op, self:parse (lx, p2.prec))
+ local lli = lx:lineinfo_left()
+ return transform (transform (e, p2, ili, lli), self, fli, lli)
+ else -- No prefix found, get a primary expression
+ local e = self.primary(lx)
+ local lli = lx:lineinfo_left()
+ return transform (e, self, fli, lli)
+ end
+ end --</expr.parse.handle_prefix>
+
+ ------------------------------------------------------
+ -- Look for an infix sequence+right-hand-side operand.
+ -- Return the whole binary expression result,
+ -- or false if no operator was found.
+ ------------------------------------------------------
+ local function handle_infix (e)
+ local p2_func, p2 = get_parser_info (self.infix)
+ if not p2 then return false end
+
+ -----------------------------------------
+ -- Handle flattening operators: gather all operands
+ -- of the series in [list]; when a different operator
+ -- is found, stop, build from [list], [transform] and
+ -- return.
+ -----------------------------------------
+ if (not p2.prec or p2.prec>prec) and p2.assoc=="flat" then
+ local fli = lx:lineinfo_right()
+ local pflat, list = p2, { e }
+ repeat
+ local op = p2_func(lx)
+ if not op then break end
+ table.insert (list, self:parse (lx, p2.prec))
+ local _ -- We only care about checking that p2==pflat
+ _, p2 = get_parser_info (self.infix)
+ until p2 ~= pflat
+ local e2 = pflat.builder (list)
+ local lli = lx:lineinfo_left()
+ return transform (transform (e2, pflat, fli, lli), self, fli, lli)
+
+ -----------------------------------------
+ -- Handle regular infix operators: [e] the LHS is known,
+ -- just gather the operator and [e2] the RHS.
+ -- Result goes in [e3].
+ -----------------------------------------
+ elseif p2.prec and p2.prec>prec or
+ p2.prec==prec and p2.assoc=="right" then
+ local fli = e.lineinfo.first -- lx:lineinfo_right()
+ local op = p2_func(lx)
+ if not op then return false end
+ local e2 = self:parse (lx, p2.prec)
+ local e3 = p2.builder (e, op, e2)
+ local lli = lx:lineinfo_left()
+ return transform (transform (e3, p2, fli, lli), self, fli, lli)
+
+ -----------------------------------------
+ -- Check for non-associative operators, and complain if applicable.
+ -----------------------------------------
+ elseif p2.assoc=="none" and p2.prec==prec then
+ parser_error (lx, "non-associative operator!")
+
+ -----------------------------------------
+ -- No infix operator suitable at that precedence
+ -----------------------------------------
+ else return false end
+
+ end --</expr.parse.handle_infix>
+
+ ------------------------------------------------------
+ -- Look for a suffix sequence.
+ -- Return the result of suffix operator on [e],
+ -- or false if no operator was found.
+ ------------------------------------------------------
+ local function handle_suffix (e)
+ -- FIXME bad fli, must take e.lineinfo.first
+ local p2_func, p2 = get_parser_info (self.suffix)
+ if not p2 then return false end
+ if not p2.prec or p2.prec>=prec then
+ --local fli = lx:lineinfo_right()
+ local fli = e.lineinfo.first
+ local op = p2_func(lx)
+ if not op then return false end
+ local lli = lx:lineinfo_left()
+ e = p2.builder (e, op)
+ e = transform (transform (e, p2, fli, lli), self, fli, lli)
+ return e
+ end
+ return false
+ end --</expr.parse.handle_suffix>
+
+ ------------------------------------------------------
+ -- Parser body: read suffix and (infix+operand)
+ -- extensions as long as we're able to fetch more at
+ -- this precedence level.
+ ------------------------------------------------------
+ local e = handle_prefix()
+ repeat
+ local x = handle_suffix (e); e = x or e
+ local y = handle_infix (e); e = y or e
+ until not (x or y)
+
+ -- No transform: it already happened in operators handling
+ return e
+ end --</expr.parse>
+
+ -------------------------------------------------------------------
+ -- Construction
+ -------------------------------------------------------------------
+ if not p.primary then p.primary=p[1]; p[1]=nil end
+ for _, t in ipairs{ "primary", "prefix", "infix", "suffix" } do
+ if not p[t] then p[t] = { } end
+ if not is_parser(p[t]) then multisequence(p[t]) end
+ end
+ function p:add(...) return self.primary:add(...) end
+ return p
+end --</expr>
+
+
+-------------------------------------------------------------------------------
+--
+-- List parser generator
+--
+-------------------------------------------------------------------------------
+-- In [p], the following fields can be provided in input:
+--
+-- * [builder]: takes list of subparser results, returns AST
+-- * [transformers]: as usual
+-- * [name]: as usual
+--
+-- * [terminators]: list of strings representing the keywords which
+-- might mark the end of the list. When non-empty, the list is
+-- allowed to be empty. A string is treated as a single-element
+-- table, whose element is that string, e.g. ["do"] is the same as
+-- [{"do"}].
+--
+-- * [separators]: list of strings representing the keywords which can
+-- separate elements of the list. When non-empty, one of these
+-- keyword has to be found between each element. Lack of a separator
+-- indicates the end of the list. A string is treated as a
+-- single-element table, whose element is that string, e.g. ["do"]
+-- is the same as [{"do"}]. If [terminators] is empty/nil, then
+-- [separators] has to be non-empty.
+--
+-- After creation, the following fields are added:
+-- * [parse] the parsing function lexer->AST
+-- * [kind] == "list"
+--
+-------------------------------------------------------------------------------
+function list (p)
+ make_parser ("list", p)
+
+ -------------------------------------------------------------------
+ -- Parsing method
+ -------------------------------------------------------------------
+ function p:parse (lx)
+
+ ------------------------------------------------------
+ -- Used to quickly check whether there's a terminator
+ -- or a separator immediately ahead
+ ------------------------------------------------------
+ local function peek_is_in (keywords)
+ return keywords and lx:is_keyword(lx:peek(), unpack(keywords)) end
+
+ local x = { }
+ local fli = lx:lineinfo_right()
+
+ -- if there's a terminator to start with, don't bother trying
+ if not peek_is_in (self.terminators) then
+ repeat table.insert (x, self.primary (lx)) -- read one element
+ until
+ -- First reason to stop: There's a separator list specified,
+ -- and next token isn't one. Otherwise, consume it with [lx:next()]
+ self.separators and not(peek_is_in (self.separators) and lx:next()) or
+ -- Other reason to stop: terminator token ahead
+ peek_is_in (self.terminators) or
+ -- Last reason: end of file reached
+ lx:peek().tag=="Eof"
+ end
+
+ local lli = lx:lineinfo_left()
+
+ -- Apply the builder. It can be a string, or a callable value,
+ -- or simply nothing.
+ local b = self.builder
+ if b then
+ if type(b)=="string" then x.tag = b -- b is a string, use it as a tag
+ elseif type(b)=="function" then x=b(x)
+ else
+ local bmt = getmetatable(b)
+ if bmt and bmt.__call then x=b(x) end
+ end
+ end
+ return transform (x, self, fli, lli)
+ end --</list.parse>
+
+ -------------------------------------------------------------------
+ -- Construction
+ -------------------------------------------------------------------
+ if not p.primary then p.primary = p[1]; p[1] = nil end
+ if type(p.terminators) == "string" then p.terminators = { p.terminators }
+ elseif p.terminators and #p.terminators == 0 then p.terminators = nil end
+ if type(p.separators) == "string" then p.separators = { p.separators }
+ elseif p.separators and #p.separators == 0 then p.separators = nil end
+
+ return p
+end --</list>
+
+
+-------------------------------------------------------------------------------
+--
+-- Keyword-conditionned parser generator
+--
+-------------------------------------------------------------------------------
+--
+-- Only apply a parser if a given keyword is found. The result of
+-- [gg.onkeyword] parser is the result of the subparser (modulo
+-- [transformers] applications).
+--
+-- lineinfo: the keyword is *not* included in the boundaries of the
+-- resulting lineinfo. A review of all usages of gg.onkeyword() in the
+-- implementation of metalua has shown that it was the appropriate choice
+-- in every case.
+--
+-- Input fields:
+--
+-- * [name]: as usual
+--
+-- * [transformers]: as usual
+--
+-- * [peek]: if non-nil, the conditionning keyword is left in the lexeme
+-- stream instead of being consumed.
+--
+-- * [primary]: the subparser.
+--
+-- * [keywords]: list of strings representing triggering keywords.
+--
+-- * Table-part entries can contain strings, and/or exactly one parser.
+-- Strings are put in [keywords], and the parser is put in [primary].
+--
+-- After the call, the following fields will be set:
+--
+-- * [parse] the parsing method
+-- * [kind] == "onkeyword"
+-- * [primary]
+-- * [keywords]
+--
+-------------------------------------------------------------------------------
+function onkeyword (p)
+ make_parser ("onkeyword", p)
+
+ -------------------------------------------------------------------
+ -- Parsing method
+ -------------------------------------------------------------------
+ function p:parse(lx)
+ if lx:is_keyword (lx:peek(), unpack(self.keywords)) then
+ --local fli = lx:lineinfo_right()
+ if not self.peek then lx:next() end
+ local content = self.primary (lx)
+ --local lli = lx:lineinfo_left()
+ local fli, lli = content.lineinfo.first, content.lineinfo.last
+ return transform (content, p, fli, lli)
+ else return false end
+ end
+
+ -------------------------------------------------------------------
+ -- Construction
+ -------------------------------------------------------------------
+ if not p.keywords then p.keywords = { } end
+ for _, x in ipairs(p) do
+ if type(x)=="string" then table.insert (p.keywords, x)
+ else assert (not p.primary and is_parser (x)); p.primary = x end
+ end
+ assert (p.primary, 'no primary parser in gg.onkeyword')
+ return p
+end --</onkeyword>
+
+
+-------------------------------------------------------------------------------
+--
+-- Optional keyword consummer pseudo-parser generator
+--
+-------------------------------------------------------------------------------
+--
+-- This doesn't return a real parser, just a function. That function parses
+-- one of the keywords passed as parameters, and returns it. It returns
+-- [false] if no matching keyword is found.
+--
+-- Notice that tokens returned by lexer already carry lineinfo, therefore
+-- there's no need to add them, as done usually through transform() calls.
+-------------------------------------------------------------------------------
+function optkeyword (...)
+ local args = {...}
+ if type (args[1]) == "table" then
+ assert (#args == 1)
+ args = args[1]
+ end
+ for _, v in ipairs(args) do assert (type(v)=="string") end
+ return function (lx)
+ local x = lx:is_keyword (lx:peek(), unpack (args))
+ if x then lx:next(); return x
+ else return false end
+ end
+end
+
+
+-------------------------------------------------------------------------------
+--
+-- Run a parser with a special lexer
+--
+-------------------------------------------------------------------------------
+--
+-- This doesn't return a real parser, just a function.
+-- First argument is the lexer class to be used with the parser,
+-- 2nd is the parser itself.
+-- The resulting parser returns whatever the argument parser does.
+--
+-------------------------------------------------------------------------------
+function with_lexer(new_lexer, parser)
+
+ -------------------------------------------------------------------
+ -- Most gg functions take their parameters in a table, so it's
+ -- better to silently accept when with_lexer{ } is called with
+ -- its arguments in a list:
+ -------------------------------------------------------------------
+ if not parser and #new_lexer==2 and type(new_lexer[1])=='table' then
+ return with_lexer(unpack(new_lexer))
+ end
+
+ -------------------------------------------------------------------
+ -- Save the current lexer, switch it for the new one, run the parser,
+ -- restore the previous lexer, even if the parser caused an error.
+ -------------------------------------------------------------------
+ return function (lx)
+ local old_lexer = getmetatable(lx)
+ lx:sync()
+ setmetatable(lx, new_lexer)
+ local status, result = pcall(parser, lx)
+ lx:sync()
+ setmetatable(lx, old_lexer)
+ if status then return result else error(result) end
+ end
+end