-% \documentclass{article}\r
-% \title{MetaLua Abstract Syntax Trees}\r
-% \setlength{\columnseprule}{1pt}\r
-\def\T#1{{\bf`#1}}\r
-\def\L#1{\{#1\}}\r
-\def\P#1{{\it #1}}\r
-\def\C#1{[\hspace{-.4em}[#1]\hspace{-.4em}]}\r
-\def\V#1{{\it #1}}\r
-\def\plus{\ensuremath{{}^{+}}}\r
-\def\star{\ensuremath{{}^{*}}}\r
-\def\TRANS{\ensuremath{\Longrightarrow}}\r
-% \usepackage{alltt}\r
-% \begin{document}\r
-% \maketitle\r
-\r
-\section{Abstract Syntax Tree grammar}\r
-\r
-\begin{alltt}\r
-\r
-block: \L{ stat\star }\r
-\r
-stat:\r
-| \T{Do}\L{ block }\r
-| \T{Set}\L{ \L{lhs\plus} \L{expr\plus} }\r
-| \T{While}\L{ expr block }\r
-| \T{Repeat}\L{ block expr }\r
-| \T{If}\L{ (expr block)\plus block? }\r
-| \T{Fornum}\L{ ident expr expr expr? block }\r
-| \T{Forin}\L{ \L{ident\plus} \L{expr\plus} block }\r
-| \T{Local}\L{ \L{ident\plus} \L{expr\plus}? }\r
-| \T{Localrec}\L{ \L{ident\plus} \L{expr\plus}? }\r
-| \T{Goto}\L{\P{string}}\r
-| \T{Label}\L{\P{string}}\r
-| \T{Return}\r
-| \T{Break}\r
-| apply\r
-\r
-expr:\r
-| \T{Nil} | \T{Dots} | \T{True} | \T{False}\r
-| \T{Number}\L{ \P{number} }\r
-| \T{String}\L{ \P{string} }\r
-| \T{Function}\L{ \L{ ident\star \T{Dots}? } block } \r
-| \T{Table}\L{ ( \T{Pair}\L{ expr expr } | expr )\star }\r
-| \T{Op}\L{ binopid expr expr } | \T{Op}\L{ unopid expr }\r
-| \T{Paren}\L{ expr }\r
-| \T{Stat}\L{ block expr }\r
-| apply\r
-| lhs\r
-\r
-apply:\r
-| \T{Call}\L{ expr expr\star }\r
-| \T{Invoke}\L{ expr \T{String}\L{ \P{string} } expr\star }\r
-\r
-lhs: ident | \T{Index}\L{ expr expr }\r
-\r
-ident: \T{Id}\L{ \P{string} }\r
-\r
-binopid: "add" | "sub" | "mul" | "div"\r
- | "mod" | "pow" | "concat" | "eq"\r
- | "lt" | "le" | "and" | "or"\r
-\r
-unopid: "not" | "len" | "unm"\r
-\end{alltt}\r
- \r
-% {\bf{}Relaxed forms:}\r
-\r
-% stat +=\r
-% | \L{ stat\star }\r
-% | \T{Let}\L{ (\L{lhs\plus}|lhs) (\L{expr\plus}|expr) }\r
-% | \T{Forin}\L{ (\L{ident\plus}|ident) (\L{expr\plus}|expr) block }\r
-% | \T{Local}\L{ (\L{ident\plus}|ident) (\L{expr\plus}|expr)? }\r
-\r
-% lhs += \T{Index}\L{ expr expr\plus }\r
-\r
-% expr += \r
-% | \T{Function}\L{ ident? block }\r
-% | \P{number}\r
-% | \P{string}\r
-\r
-% {\bf{}Translations:}\r
-% \C{do \V{foo}; \V{bar} end} \TRANS \T{Do}\L{ \C{\V{foo}}, \C{\V{bar}}}\r
-% \C{\V{v1},\V{v2},\V{v3} = \V{e1},\V{e2},\V{e3}} \TRANS \T{Let}\L{\L{\C{\V{v1}},\C{\V{v2}},\C{\V{v3}}}, \L{\C{\V{e1}},\C{\V{e2}},\C{\V{e3}}}}\r
-% \C{\V{foo}(\V{bar1}, \V{bar2})} \TRANS \T{Call}\L{\C{\V{foo}},\C{\V{bar1}},\C{\V{bar2}}}\r
-% \C{while \V{foo} do \V{bar}; \V{gna} end} \TRANS \T{While}\L{\C{\V{foo}},\T{Do}\L{\C{\V{bar}},\C{\V{gna}}}}\r
-% \C{repeat \V{foo}; \V{bar} until \V{gna}} \TRANS \T{Repeat}\L{\T{Do}\L{\C{\V{foo}},\C{\V{bar}}},\C{\V{gna}}}\r
-\r
-% \end{alltt}\r
-\r
-\r
-%\end{document}\r
-\r
-%stat:\r
-%| \T{Splice}\L{ expr }\r
-\r
-%expr:\r
-%| \T{Quote}\L{ \T{Exprr}, expr }\r
-%| \T{Quote}\L{ \T{Stat}, stat }\r
-%| \T{Quote}\L{ \T{Id}, ident }\r
-%| \T{Splice}\L{ expr }\r
-\r
-%expr relaxed:\r
-%| \T{Quote}\L{ expr }\r
+% \documentclass{article}
+% \title{MetaLua Abstract Syntax Trees}
+% \setlength{\columnseprule}{1pt}
+\def\T#1{{\bf`#1}}
+\def\L#1{\{#1\}}
+\def\P#1{{\it #1}}
+\def\C#1{[\hspace{-.4em}[#1]\hspace{-.4em}]}
+\def\V#1{{\it #1}}
+\def\plus{\ensuremath{{}^{+}}}
+\def\star{\ensuremath{{}^{*}}}
+\def\TRANS{\ensuremath{\Longrightarrow}}
+% \usepackage{alltt}
+% \begin{document}
+% \maketitle
+
+\section{Abstract Syntax Tree grammar}
+
+\begin{alltt}
+
+block: \L{ stat\star }
+
+stat:
+| \T{Do}\L{ block }
+| \T{Set}\L{ \L{lhs\plus} \L{expr\plus} }
+| \T{While}\L{ expr block }
+| \T{Repeat}\L{ block expr }
+| \T{If}\L{ (expr block)\plus block? }
+| \T{Fornum}\L{ ident expr expr expr? block }
+| \T{Forin}\L{ \L{ident\plus} \L{expr\plus} block }
+| \T{Local}\L{ \L{ident\plus} \L{expr\plus}? }
+| \T{Localrec}\L{ \L{ident\plus} \L{expr\plus}? }
+| \T{Goto}\L{\P{string}}
+| \T{Label}\L{\P{string}}
+| \T{Return}
+| \T{Break}
+| apply
+
+expr:
+| \T{Nil} | \T{Dots} | \T{True} | \T{False}
+| \T{Number}\L{ \P{number} }
+| \T{String}\L{ \P{string} }
+| \T{Function}\L{ \L{ ident\star \T{Dots}? } block }
+| \T{Table}\L{ ( \T{Pair}\L{ expr expr } | expr )\star }
+| \T{Op}\L{ binopid expr expr } | \T{Op}\L{ unopid expr }
+| \T{Paren}\L{ expr }
+| \T{Stat}\L{ block expr }
+| apply
+| lhs
+
+apply:
+| \T{Call}\L{ expr expr\star }
+| \T{Invoke}\L{ expr \T{String}\L{ \P{string} } expr\star }
+
+lhs: ident | \T{Index}\L{ expr expr }
+
+ident: \T{Id}\L{ \P{string} }
+
+binopid: "add" | "sub" | "mul" | "div"
+ | "mod" | "pow" | "concat" | "eq"
+ | "lt" | "le" | "and" | "or"
+
+unopid: "not" | "len" | "unm"
+\end{alltt}
+
+% {\bf{}Relaxed forms:}
+
+% stat +=
+% | \L{ stat\star }
+% | \T{Let}\L{ (\L{lhs\plus}|lhs) (\L{expr\plus}|expr) }
+% | \T{Forin}\L{ (\L{ident\plus}|ident) (\L{expr\plus}|expr) block }
+% | \T{Local}\L{ (\L{ident\plus}|ident) (\L{expr\plus}|expr)? }
+
+% lhs += \T{Index}\L{ expr expr\plus }
+
+% expr +=
+% | \T{Function}\L{ ident? block }
+% | \P{number}
+% | \P{string}
+
+% {\bf{}Translations:}
+% \C{do \V{foo}; \V{bar} end} \TRANS \T{Do}\L{ \C{\V{foo}}, \C{\V{bar}}}
+% \C{\V{v1},\V{v2},\V{v3} = \V{e1},\V{e2},\V{e3}} \TRANS \T{Let}\L{\L{\C{\V{v1}},\C{\V{v2}},\C{\V{v3}}}, \L{\C{\V{e1}},\C{\V{e2}},\C{\V{e3}}}}
+% \C{\V{foo}(\V{bar1}, \V{bar2})} \TRANS \T{Call}\L{\C{\V{foo}},\C{\V{bar1}},\C{\V{bar2}}}
+% \C{while \V{foo} do \V{bar}; \V{gna} end} \TRANS \T{While}\L{\C{\V{foo}},\T{Do}\L{\C{\V{bar}},\C{\V{gna}}}}
+% \C{repeat \V{foo}; \V{bar} until \V{gna}} \TRANS \T{Repeat}\L{\T{Do}\L{\C{\V{foo}},\C{\V{bar}}},\C{\V{gna}}}
+
+% \end{alltt}
+
+
+%\end{document}
+
+%stat:
+%| \T{Splice}\L{ expr }
+
+%expr:
+%| \T{Quote}\L{ \T{Exprr}, expr }
+%| \T{Quote}\L{ \T{Stat}, stat }
+%| \T{Quote}\L{ \T{Id}, ident }
+%| \T{Splice}\L{ expr }
+
+%expr relaxed:
+%| \T{Quote}\L{ expr }
-\section{{\tt clopts}: command line options parsing}\r
-This library allows to parse command line options in a generic, standard and\r
-reasonably powerful way. Using it for your programs helps ensure that they\r
-behave in an unsurprizing way to users. the {\tt metalua} compiler parses its\r
-parameters with clopts.\r
-\r
-FIXME: Rest of the doc to be written\r
+\section{{\tt clopts}: command line options parsing}
+This library allows to parse command line options in a generic, standard and
+reasonably powerful way. Using it for your programs helps ensure that they
+behave in an unsurprizing way to users. the {\tt metalua} compiler parses its
+parameters with clopts.
+
+FIXME: Rest of the doc to be written
-\section{Extension {\tt declare\_globals}: mandatory declaration of global\r
- variables}\r
-There is a {\tt strict} library in Lua, also provided by metalua, which\r
-complains when one tries to access a global variable which hasn't been\r
-initialized, and when a global variable is initialized anywhere but at the\r
-program's top-level. This catches many errors, especially typos on variable\r
-names. \r
-\r
-However, with a macro-enabled language, we can do slightly better: we add a {\tt\r
- global} keyword, which works as {\tt local}, allows to declare global\r
-variables, which can subsequently be used freely. Undeclared global variables\r
-cause an error when one tries to read or write them.\r
-\r
-FIXME: need a ``global function ...'' syntax; interactions with sub-modules is\r
-not tested correctly.\r
+\section{Extension {\tt declare\_globals}: mandatory declaration of global
+ variables}
+There is a {\tt strict} library in Lua, also provided by metalua, which
+complains when one tries to access a global variable which hasn't been
+initialized, and when a global variable is initialized anywhere but at the
+program's top-level. This catches many errors, especially typos on variable
+names.
+
+However, with a macro-enabled language, we can do slightly better: we add a {\tt
+ global} keyword, which works as {\tt local}, allows to declare global
+variables, which can subsequently be used freely. Undeclared global variables
+cause an error when one tries to read or write them.
+
+FIXME: need a ``global function ...'' syntax; interactions with sub-modules is
+not tested correctly.
-\section[Dollar extension]{{\tt dollar}: generic syntax for macros}\r
-When you write a short-lived macro which takes reasonably short arguments, you\r
-generally don't want to write a supporting syntax for it. The dollar library\r
-allows you to call it in a generic way: if you store your macro in the table\r
-{\tt mlp.macros}, say as function {\tt mlp.macros.foobar}, then you can call it\r
-in your code as {\tt\$foobar(arg1, arg2...)}: it will receive as parameters the\r
-ASTs of the pseudo-call's arguments.\r
-\r
-\paragraph{Example}~\r
-\begin{verbatim}\r
--{ block:\r
- require 'dollar'\r
- function doller.LOG(id)\r
- match id with `Id{ id_name } -> \r
- return +{ printf("%s = %s", id_name, \r
- table.tostring(-{id})) }\r
- end\r
- end }\r
-\r
-local x = { 1, 2, 3 }\r
-$LOG(x) -- prints "x = { 1, 2, 3 }" when executed\r
-\end{verbatim}\r
+\section[Dollar extension]{{\tt dollar}: generic syntax for macros}
+When you write a short-lived macro which takes reasonably short arguments, you
+generally don't want to write a supporting syntax for it. The dollar library
+allows you to call it in a generic way: if you store your macro in the table
+{\tt mlp.macros}, say as function {\tt mlp.macros.foobar}, then you can call it
+in your code as {\tt\$foobar(arg1, arg2...)}: it will receive as parameters the
+ASTs of the pseudo-call's arguments.
+
+\paragraph{Example}~
+\begin{verbatim}
+-{ block:
+ require 'dollar'
+ function doller.LOG(id)
+ match id with `Id{ id_name } ->
+ return +{ printf("%s = %s", id_name,
+ table.tostring(-{id})) }
+ end
+ end }
+
+local x = { 1, 2, 3 }
+$LOG(x) -- prints "x = { 1, 2, 3 }" when executed
+\end{verbatim}
-\section{Library {\tt H}: hygienic macros}\r
-\r
-\paragraph{Warning} This hygienic macro system is quite new and\r
-experimental. Its API is likely to evolve over the next versions of\r
-metalua. Feedbacks are especially welcome.\r
-\r
-A common problem with meta-programming tools is variable capture, and\r
-the art of automatically avoiding capture is called hygiene. the {\tt\r
- H} library automates as much as possible the handling of hygiene.\r
-\r
-The design of H tries to respect metalua's core principles:\r
-\begin{itemize}\r
-\item Nothing taboo under the hood: the underlying mechanisms of the\r
- language must remain simple enough to be intelligible to their\r
- intended users. Black magic should be banned from the desing. This\r
- rules out hygienic macros as a primitive: these either rely on very\r
- advanced and hard to predict mechanisms, or severely limit the\r
- manipulation tools available to the macro authors.\r
-\item Simple by default: advanced users should know what happens under\r
- the hood, but more casual users should be able to simply turn the\r
- ignition and drive. It should be possible to use H, for regular\r
- macros, without much understanding of its advanced principles and\r
- implementation.\r
-\item Everything's a regular program: again, most macro systems\r
- offering hygiene limit macro manipulations to a term rewriting\r
- framework, which might be Turing complete, but makes many advanced\r
- stuff cumbersome to write. AST are regular data, which must be\r
- manipulable by regular programs.\r
-\item Extension: metalua tries to offer the most extensible possible\r
- language to its users. If the author of the language couldn't\r
- implement a feature as a regular extension, it would probably\r
- outline a severe limitation of the system.\r
-\end{itemize}\r
-\r
-\paragraph{Inside captures}\r
-There are two kind of captures, inside a macro and outside a\r
-macro. People often think about inside captures, in parts because the\r
-C preprocessor is subject to it. It happens when a macro inserts user\r
-code in a quote, and the quote declares a local variable that shadows\r
-a user one:\r
-\r
-\begin{verbatim}\r
--{ block:\r
- require 'dollar'\r
- function dollar.TRICOND(cond, iftrue, iffalse)\r
- local code = +{ block:\r
- local tmp, result\r
- tmp = -{cond}\r
- if tmp then result = -{iftrue} \r
- else result = -{iffalse} end }\r
- return `Stat{ code, +{result} }\r
- end }\r
-\r
-local tmp = 5\r
-local foo = $TRICOND(tmp%2==0, "even", "odd")\r
-\end{verbatim}\r
-\r
-Here, the \verb|tmp| local variable used in the macro code captures\r
-the user's one, and cause a failure (an attempt to get the modulus of\r
-\verb|nil|). The expanded code looks like:\r
-\begin{Verbatim}\r
-local tmp = 5\r
-local foo = -{ `Stat{ +{ block:\r
- local tmp, result\r
- tmp = tmp%2==0\r
- if tmp then result = "even"\r
- else result = "odd" end\r
-}, +{result} } }\r
-\end{Verbatim}\r
-\r
-This is fixed by renaming automatically all local variables in the\r
-macro with fresh names. H provides an AST walker which does\r
-that. However, it needs to rename only macro code, not user-provided\r
-code; therefore the macro writer has to somehow mark the user\r
-code. This is done with the ``!'' prefix operator: sub-trees marked\r
-with ``!'' won't experience any renaming. The following version is\r
-therefore safe w.r.t. inside variable capture:\r
-\r
-\begin{verbatim}\r
--{ block:\r
- require 'dollar'\r
- function dollar.TRICOND(cond, iftrue, iffalse)\r
- local code = +{ block:\r
- local tmp, result\r
- tmp = -{!cond}\r
- if tmp then result = -{!iftrue} \r
- else result = -{!iffalse} end }\r
- return H(`Stat{ code, +{result} })\r
- end }\r
-\r
-local tmp = 5\r
-local foo = $TRICOND(tmp%2==0, "even", "odd")\r
-\end{verbatim}\r
-\r
-It expands to:\r
-\r
-\begin{Verbatim}\r
-local tmp = 5\r
-local foo = -{ `Stat{ +{ block:\r
- local -{`Id '.1.L.tmp'}, -{`Id '.2.L.result'} -- new fresh names\r
- -{`Id '.1.L.tmp'} = tmp%2==0 -- no capture!\r
- if -{`Id '.1.L.tmp'} then -{`Id '.2.L.result'} = "even"\r
- else -{`Id '.2.L.result'} = "odd" end\r
-}, `Id '.2.L.result' } }\r
-\end{Verbatim}\r
-\r
-\paragraph{Outside captures}\r
-We've seen that macros can capture the user's variables; but the\r
-opposite can also happen: the user can capture the variables required\r
-by a macro:\r
-\r
-\begin{Verbatim}\r
--{ block:\r
- require 'dollar'\r
- dollar.log = |x| +{ printf("%s = %s", \r
- -{table.tostring(x)}, \r
- table.tostring(-{x})) } }\r
-local x = { 1, 2, 3 }\r
-\$log(x) -- prints "`Id 'x' = { 1, 2, 3 }"\r
-\end{Verbatim}\r
-\r
-This expands to:\r
-\begin{Verbatim}\r
-printf("%s = %s", "`Id 'x'", table.tostring(x))\r
-\end{Verbatim}\r
-\r
-Now, replace "x" with "table", and you get an outside capture: "local\r
-table = { 1, 2, 3 } shadows the table module from which the macro\r
-tries to get table.tostring().\r
-\r
-The most widespread language which supports non-hygienic macros,\r
-Common Lisp, deals with that issue by being a Lisp-2: it has separate\r
-namespaces for functions and ``normal'' variables. This happens to\r
-remove many common capture cases. \r
-\r
-H fixes this by renaming the free variables in hygienized\r
-macros. After this, a "local new\_names = old\_names" statement is\r
-generated, which re-establishes the correspondance between\r
-names. Let's make the examples above hygienic:\r
-\r
-\begin{Verbatim}\r
--{ block:\r
- require 'dollar'\r
- dollar.log = |x| H+{ printf("%s = %s", \r
- -{!table.tostring(x)}, \r
- table.tostring(-{!x})) } }\r
-local table = { 1, 2, 3 }\r
-\$log(table) -- prints "`Id 'table' = { 1, 2, 3 }"\r
-\end{Verbatim}\r
-\r
-The code above expands into:\r
-\r
-\begin{Verbatim}\r
-local table = { 1, 2, 3 }\r
-(-{`Id '.1.X.printf'}) ("%s = %s",\r
- "`Id 'table'",\r
- (-{`Id '.2.X.table'}.tostring(table)))\r
-\end{Verbatim}\r
-\r
-To make this work, we need to introduce, somewhere where no variable\r
-is captured, the following local statement:\r
-\begin{Verbatim}\r
-local -{`Id '.1.X.printf'}, -{`Id '.2.X.table'} = printf, table\r
-\end{Verbatim}\r
-\r
-The correct way would be to let the user decide where this statement\r
-should go.\r
-\r
+\section{Library {\tt H}: hygienic macros}
+
+\paragraph{Warning} This hygienic macro system is quite new and
+experimental. Its API is likely to evolve over the next versions of
+metalua. Feedbacks are especially welcome.
+
+A common problem with meta-programming tools is variable capture, and
+the art of automatically avoiding capture is called hygiene. the {\tt
+ H} library automates as much as possible the handling of hygiene.
+
+The design of H tries to respect metalua's core principles:
+\begin{itemize}
+\item Nothing taboo under the hood: the underlying mechanisms of the
+ language must remain simple enough to be intelligible to their
+ intended users. Black magic should be banned from the desing. This
+ rules out hygienic macros as a primitive: these either rely on very
+ advanced and hard to predict mechanisms, or severely limit the
+ manipulation tools available to the macro authors.
+\item Simple by default: advanced users should know what happens under
+ the hood, but more casual users should be able to simply turn the
+ ignition and drive. It should be possible to use H, for regular
+ macros, without much understanding of its advanced principles and
+ implementation.
+\item Everything's a regular program: again, most macro systems
+ offering hygiene limit macro manipulations to a term rewriting
+ framework, which might be Turing complete, but makes many advanced
+ stuff cumbersome to write. AST are regular data, which must be
+ manipulable by regular programs.
+\item Extension: metalua tries to offer the most extensible possible
+ language to its users. If the author of the language couldn't
+ implement a feature as a regular extension, it would probably
+ outline a severe limitation of the system.
+\end{itemize}
+
+\paragraph{Inside captures}
+There are two kind of captures, inside a macro and outside a
+macro. People often think about inside captures, in parts because the
+C preprocessor is subject to it. It happens when a macro inserts user
+code in a quote, and the quote declares a local variable that shadows
+a user one:
+
+\begin{verbatim}
+-{ block:
+ require 'dollar'
+ function dollar.TRICOND(cond, iftrue, iffalse)
+ local code = +{ block:
+ local tmp, result
+ tmp = -{cond}
+ if tmp then result = -{iftrue}
+ else result = -{iffalse} end }
+ return `Stat{ code, +{result} }
+ end }
+
+local tmp = 5
+local foo = $TRICOND(tmp%2==0, "even", "odd")
+\end{verbatim}
+
+Here, the \verb|tmp| local variable used in the macro code captures
+the user's one, and cause a failure (an attempt to get the modulus of
+\verb|nil|). The expanded code looks like:
+\begin{Verbatim}
+local tmp = 5
+local foo = -{ `Stat{ +{ block:
+ local tmp, result
+ tmp = tmp%2==0
+ if tmp then result = "even"
+ else result = "odd" end
+}, +{result} } }
+\end{Verbatim}
+
+This is fixed by renaming automatically all local variables in the
+macro with fresh names. H provides an AST walker which does
+that. However, it needs to rename only macro code, not user-provided
+code; therefore the macro writer has to somehow mark the user
+code. This is done with the ``!'' prefix operator: sub-trees marked
+with ``!'' won't experience any renaming. The following version is
+therefore safe w.r.t. inside variable capture:
+
+\begin{verbatim}
+-{ block:
+ require 'dollar'
+ function dollar.TRICOND(cond, iftrue, iffalse)
+ local code = +{ block:
+ local tmp, result
+ tmp = -{!cond}
+ if tmp then result = -{!iftrue}
+ else result = -{!iffalse} end }
+ return H(`Stat{ code, +{result} })
+ end }
+
+local tmp = 5
+local foo = $TRICOND(tmp%2==0, "even", "odd")
+\end{verbatim}
+
+It expands to:
+
+\begin{Verbatim}
+local tmp = 5
+local foo = -{ `Stat{ +{ block:
+ local -{`Id '.1.L.tmp'}, -{`Id '.2.L.result'} -- new fresh names
+ -{`Id '.1.L.tmp'} = tmp%2==0 -- no capture!
+ if -{`Id '.1.L.tmp'} then -{`Id '.2.L.result'} = "even"
+ else -{`Id '.2.L.result'} = "odd" end
+}, `Id '.2.L.result' } }
+\end{Verbatim}
+
+\paragraph{Outside captures}
+We've seen that macros can capture the user's variables; but the
+opposite can also happen: the user can capture the variables required
+by a macro:
+
+\begin{Verbatim}
+-{ block:
+ require 'dollar'
+ dollar.log = |x| +{ printf("%s = %s",
+ -{table.tostring(x)},
+ table.tostring(-{x})) } }
+local x = { 1, 2, 3 }
+\$log(x) -- prints "`Id 'x' = { 1, 2, 3 }"
+\end{Verbatim}
+
+This expands to:
+\begin{Verbatim}
+printf("%s = %s", "`Id 'x'", table.tostring(x))
+\end{Verbatim}
+
+Now, replace "x" with "table", and you get an outside capture: "local
+table = { 1, 2, 3 } shadows the table module from which the macro
+tries to get table.tostring().
+
+The most widespread language which supports non-hygienic macros,
+Common Lisp, deals with that issue by being a Lisp-2: it has separate
+namespaces for functions and ``normal'' variables. This happens to
+remove many common capture cases.
+
+H fixes this by renaming the free variables in hygienized
+macros. After this, a "local new\_names = old\_names" statement is
+generated, which re-establishes the correspondance between
+names. Let's make the examples above hygienic:
+
+\begin{Verbatim}
+-{ block:
+ require 'dollar'
+ dollar.log = |x| H+{ printf("%s = %s",
+ -{!table.tostring(x)},
+ table.tostring(-{!x})) } }
+local table = { 1, 2, 3 }
+\$log(table) -- prints "`Id 'table' = { 1, 2, 3 }"
+\end{Verbatim}
+
+The code above expands into:
+
+\begin{Verbatim}
+local table = { 1, 2, 3 }
+(-{`Id '.1.X.printf'}) ("%s = %s",
+ "`Id 'table'",
+ (-{`Id '.2.X.table'}.tostring(table)))
+\end{Verbatim}
+
+To make this work, we need to introduce, somewhere where no variable
+is captured, the following local statement:
+\begin{Verbatim}
+local -{`Id '.1.X.printf'}, -{`Id '.2.X.table'} = printf, table
+\end{Verbatim}
+
+The correct way would be to let the user decide where this statement
+should go.
+
-\section{Extension {\tt match}: structural pattern matching}\r
-Pattern matching is an extremely pleasant and powerful way to handle tree-like\r
-structures, such as ASTs. Unsurprisingly, it's a feature found in most\r
-ML-inspired languages, which excel at compilation-related tasks. There is a\r
-pattern matching extension for metalua, which is extremely useful for most\r
-meta-programming purposes.\r
-\r
-\subsection{Purpose}\r
-\r
-First, to clear a common misconception: structural pattern matching has\r
-absolutely nothing to do with regular expresssions matching on strings: it works\r
-on arbitrary structured data.\r
-\r
-When manipulating trees, you want to check whether they have a certain structure\r
-(e.g. a `Local{ } node with as first child a list of variables whose tags are\r
-`Id{ }); when you've found a data that matches a certain pattern, you want to\r
-name the interesting sub-parts of it, so that you can manipulate them easily;\r
-finally, most of the time, you have a series of different possible patterns, and\r
-you want to apply the one that matches a given data. These are the needs\r
-addressed by pattern matching: it lets you give a list of ({\tt pattern ->\r
- code\_to\_execute\_if\_match}) associations, selects the first matching\r
-pattern, and executes the corresponding code. Patterns both describe the\r
-expected structures and bind local variables to interesting parts of the data.\r
-Those variables' scope is obviously the code to execute upon matching success.\r
-\r
-\paragraph{Match statement} \r
-A match statement has the form:\r
-\r
-\begin{verbatim}\r
-match <some_value> with\r
-| <pattern_1> -> <block_1>\r
-| <pattern_2> -> <block_2>\r
-...\r
-| <pattern_n> -> <block_n>\r
-end\r
-\end{verbatim}\r
-\r
-The first vertical bar after the "with" is optional; moreover, a pattern can\r
-actually be a list of patterns, separated by bars. In this case, it's enough for\r
-one of them to match, to get the block to be executed:\r
-\r
-\begin{verbatim}\r
-match <some_value> with\r
-| <pattern_1> | <pattern_1_bis > -> <block_1>\r
-...\r
-end\r
-\end{verbatim}\r
-\r
-When the match statement is executed, the first pattern which matches\r
-{\tt<some\_value>} is selected, the corresponding block is executed, and all\r
-other patterns and blocks are ignored. If no pattern matches, an error\r
-{\tt"mismatch"} is raised. However, we'll see it's easy to add a catch-all\r
-pattern at the end of the match, when we want it to be failproof.\r
-\r
-\subsection{Patterns definition}\r
-\r
-\paragraph{Atomic litterals}\r
-Syntactically, a pattern is mostly identical to the values it matches: numbers,\r
-booleans and strings, when used as patterns, match identical values.\r
-\r
-\begin{verbatim}\r
-match x with\r
-| 1 -> print 'x is one'\r
-| 2 -> print 'x is two'\r
-end\r
-\end{verbatim}\r
-\r
-\paragraph{Tables}\r
-Tables as patterns match tables with the same number of array-part elements, if\r
-each pattern field matches the corresponding value field. For instance, \{1, 2,\r
-3\} as a pattern matches \{1, 2, 3\} as a value. Pattern \{1, 2, 3\} matches\r
-value \{1, 2, 3, foo=4\}, but pattern \{1, 2, 3, foo=4\} doesn't match value\r
-\{1, 2, 3\}: there can be extra hash-part fields in the value, not in the\r
-pattern. Notice that field 'tag' is a regular hash-part field, therefore \{1, 2,\r
-3\} matches `Foo\{1, 2, 3\} (but not the other way around). Of course, table\r
-patterns can be nested. The table keys must currently be integers or strings.\r
-It's not difficult to add more, but the need hasn't yet emerged.\r
-\r
-If you want to match tables of arbitrary array-part size, you can add a "..." as\r
-the pattern's final element. For instance, pattern \{1, 2, ...\} will match all\r
-table with at least two array-part elements whose two first elements are 1 and\r
-2.\r
-\r
-\paragraph{Identifiers}\r
-The other fundamental kind of patterns are identifiers: they match everything,\r
-and bind whatever they match to themselves. For instance, pattern {1, 2, x} will\r
-match value {1, 2, 3}, and in the corresponding block, local variable x will be\r
-set to 3. By mixing tables and identifiers, we can already do interesting\r
-things, such as getting the identifiers list out of a local statement, as\r
-mentionned above:\r
-\r
-\begin{verbatim}\r
-match stat with\r
-| `Local{ identifiers, values } ->\r
- table.foreach(identifiers, |x| print(x[1])\r
-... -- other cases\r
-end\r
-\end{verbatim}\r
-\r
-When a variable appears several times in a single pattern, all the elements they\r
-match must be equal, in the sense of the "==" operator. Fore instance, pattern \{\r
- x, x \} will match value \{ 1, 1 \}, but not \{ 1, 2 \}. Both values would be\r
-matched by pattern \{ x, y \}, though. A special identifier is "\_", which doesn't\r
-bind its content. Even if it appears more than once in the pattern, metched\r
-value parts aren't required to be equal. The pattern "\_" is therefore the\r
-simplest catch-all one, and a match statement with a "{\tt| \_ ->}" final\r
-statement will never throw a "mismatch" error.\r
-\r
-\paragraph{Guards}\r
-\r
-Some tests can't be performed by pattern matching. For these cases, the pattern\r
-can be followed by an "if" keyword, followed by a condition.\r
-\r
-\begin{verbatim}\r
-match x with\r
-| n if n%2 == 0 -> print 'odd'\r
-| _ -> print 'even'\r
-end\r
-\end{verbatim}\r
-\r
-Notice that the identifiers bound by the pattern are available in the guard\r
-condition. Moreover, the guard can apply to several patterns:\r
-\r
-\begin{verbatim}\r
-match x with\r
-| n | {n} if n%2 == 0 -> print 'odd'\r
-| _ -> print 'even'\r
-end\r
-\end{verbatim}\r
-\r
-\paragraph{Multi-match}\r
-\r
-If you want to match several values, let's say 'a' and 'b', there's an easy way:\r
-\r
-\begin{verbatim}\r
-match {a,b} with\r
-| {pattern_for_a, pattern_for_b} -> block\r
-...\r
-end\r
-\end{verbatim}\r
-\r
-However, it introduces quite a lot of useless tables and checks. Since this kind\r
-of multiple matches are fairly common, they are supported natively:\r
-\r
-\begin{verbatim}\r
-match a, b with\r
-| pattern_for_a, pattern_for_b -> block\r
-...\r
-end\r
-\end{verbatim}\r
-\r
-This will save some useless tests and computation, and the compiler will\r
-complain if the number of patterns doesn't match the number of values.\r
-\r
-\paragraph{String regular expressions}\r
-There is a way to use Lua's regular exressions with match, through the division\r
-operator ``/'': the left operand is expected to be a literal string, interpreted\r
-as a regular expression. The variables it captures are stored in a table, which\r
-is matched as a value against the right-hand-side operand. For instance, the\r
-following case succeeds when {\tt foo} is a string composed of 3 words separated\r
-by spaces. In case of success, these words are bound to variables {\tt w1}, {\tt\r
- w2} and {\tt w3} in the executed block:\r
-\r
-\begin{verbatim}\r
-match foo with\r
-| "^(%w+) +(%w+) +(%w+)$"/{ w1, w2, w3 } -> \r
- do_stuff (w1, w2, w3)\r
-end\r
-\end{verbatim}\r
-\r
-\subsection{Examples}\r
-There are quite a lot of samples using match in the metalua distribution, and\r
-more will come in the next one. Dig in the samples for fairly simple usages, or\r
-in the standard libs for more advanced ones. Look for instance at examples\r
-provided with the {\tt walk} library.\r
+\section{Extension {\tt match}: structural pattern matching}
+Pattern matching is an extremely pleasant and powerful way to handle tree-like
+structures, such as ASTs. Unsurprisingly, it's a feature found in most
+ML-inspired languages, which excel at compilation-related tasks. There is a
+pattern matching extension for metalua, which is extremely useful for most
+meta-programming purposes.
+
+\subsection{Purpose}
+
+First, to clear a common misconception: structural pattern matching has
+absolutely nothing to do with regular expresssions matching on strings: it works
+on arbitrary structured data.
+
+When manipulating trees, you want to check whether they have a certain structure
+(e.g. a `Local{ } node with as first child a list of variables whose tags are
+`Id{ }); when you've found a data that matches a certain pattern, you want to
+name the interesting sub-parts of it, so that you can manipulate them easily;
+finally, most of the time, you have a series of different possible patterns, and
+you want to apply the one that matches a given data. These are the needs
+addressed by pattern matching: it lets you give a list of ({\tt pattern ->
+ code\_to\_execute\_if\_match}) associations, selects the first matching
+pattern, and executes the corresponding code. Patterns both describe the
+expected structures and bind local variables to interesting parts of the data.
+Those variables' scope is obviously the code to execute upon matching success.
+
+\paragraph{Match statement}
+A match statement has the form:
+
+\begin{verbatim}
+match <some_value> with
+| <pattern_1> -> <block_1>
+| <pattern_2> -> <block_2>
+...
+| <pattern_n> -> <block_n>
+end
+\end{verbatim}
+
+The first vertical bar after the "with" is optional; moreover, a pattern can
+actually be a list of patterns, separated by bars. In this case, it's enough for
+one of them to match, to get the block to be executed:
+
+\begin{verbatim}
+match <some_value> with
+| <pattern_1> | <pattern_1_bis > -> <block_1>
+...
+end
+\end{verbatim}
+
+When the match statement is executed, the first pattern which matches
+{\tt<some\_value>} is selected, the corresponding block is executed, and all
+other patterns and blocks are ignored. If no pattern matches, an error
+{\tt"mismatch"} is raised. However, we'll see it's easy to add a catch-all
+pattern at the end of the match, when we want it to be failproof.
+
+\subsection{Patterns definition}
+
+\paragraph{Atomic litterals}
+Syntactically, a pattern is mostly identical to the values it matches: numbers,
+booleans and strings, when used as patterns, match identical values.
+
+\begin{verbatim}
+match x with
+| 1 -> print 'x is one'
+| 2 -> print 'x is two'
+end
+\end{verbatim}
+
+\paragraph{Tables}
+Tables as patterns match tables with the same number of array-part elements, if
+each pattern field matches the corresponding value field. For instance, \{1, 2,
+3\} as a pattern matches \{1, 2, 3\} as a value. Pattern \{1, 2, 3\} matches
+value \{1, 2, 3, foo=4\}, but pattern \{1, 2, 3, foo=4\} doesn't match value
+\{1, 2, 3\}: there can be extra hash-part fields in the value, not in the
+pattern. Notice that field 'tag' is a regular hash-part field, therefore \{1, 2,
+3\} matches `Foo\{1, 2, 3\} (but not the other way around). Of course, table
+patterns can be nested. The table keys must currently be integers or strings.
+It's not difficult to add more, but the need hasn't yet emerged.
+
+If you want to match tables of arbitrary array-part size, you can add a "..." as
+the pattern's final element. For instance, pattern \{1, 2, ...\} will match all
+table with at least two array-part elements whose two first elements are 1 and
+2.
+
+\paragraph{Identifiers}
+The other fundamental kind of patterns are identifiers: they match everything,
+and bind whatever they match to themselves. For instance, pattern {1, 2, x} will
+match value {1, 2, 3}, and in the corresponding block, local variable x will be
+set to 3. By mixing tables and identifiers, we can already do interesting
+things, such as getting the identifiers list out of a local statement, as
+mentionned above:
+
+\begin{verbatim}
+match stat with
+| `Local{ identifiers, values } ->
+ table.foreach(identifiers, |x| print(x[1])
+... -- other cases
+end
+\end{verbatim}
+
+When a variable appears several times in a single pattern, all the elements they
+match must be equal, in the sense of the "==" operator. Fore instance, pattern \{
+ x, x \} will match value \{ 1, 1 \}, but not \{ 1, 2 \}. Both values would be
+matched by pattern \{ x, y \}, though. A special identifier is "\_", which doesn't
+bind its content. Even if it appears more than once in the pattern, metched
+value parts aren't required to be equal. The pattern "\_" is therefore the
+simplest catch-all one, and a match statement with a "{\tt| \_ ->}" final
+statement will never throw a "mismatch" error.
+
+\paragraph{Guards}
+
+Some tests can't be performed by pattern matching. For these cases, the pattern
+can be followed by an "if" keyword, followed by a condition.
+
+\begin{verbatim}
+match x with
+| n if n%2 == 0 -> print 'odd'
+| _ -> print 'even'
+end
+\end{verbatim}
+
+Notice that the identifiers bound by the pattern are available in the guard
+condition. Moreover, the guard can apply to several patterns:
+
+\begin{verbatim}
+match x with
+| n | {n} if n%2 == 0 -> print 'odd'
+| _ -> print 'even'
+end
+\end{verbatim}
+
+\paragraph{Multi-match}
+
+If you want to match several values, let's say 'a' and 'b', there's an easy way:
+
+\begin{verbatim}
+match {a,b} with
+| {pattern_for_a, pattern_for_b} -> block
+...
+end
+\end{verbatim}
+
+However, it introduces quite a lot of useless tables and checks. Since this kind
+of multiple matches are fairly common, they are supported natively:
+
+\begin{verbatim}
+match a, b with
+| pattern_for_a, pattern_for_b -> block
+...
+end
+\end{verbatim}
+
+This will save some useless tests and computation, and the compiler will
+complain if the number of patterns doesn't match the number of values.
+
+\paragraph{String regular expressions}
+There is a way to use Lua's regular exressions with match, through the division
+operator ``/'': the left operand is expected to be a literal string, interpreted
+as a regular expression. The variables it captures are stored in a table, which
+is matched as a value against the right-hand-side operand. For instance, the
+following case succeeds when {\tt foo} is a string composed of 3 words separated
+by spaces. In case of success, these words are bound to variables {\tt w1}, {\tt
+ w2} and {\tt w3} in the executed block:
+
+\begin{verbatim}
+match foo with
+| "^(%w+) +(%w+) +(%w+)$"/{ w1, w2, w3 } ->
+ do_stuff (w1, w2, w3)
+end
+\end{verbatim}
+
+\subsection{Examples}
+There are quite a lot of samples using match in the metalua distribution, and
+more will come in the next one. Dig in the samples for fairly simple usages, or
+in the standard libs for more advanced ones. Look for instance at examples
+provided with the {\tt walk} library.
-\def\tableHeader{\begin{tabular}{|c|c|p{5cm}|}\hline\r
-\bf name & \bf type & \multicolumn{1}{c|}{\bf description} \\\hline}\r
-\def\entry#1#2#3{{#1} & {\tt#2} & {#3} \\\hline}\r
-\def\tableFooter{\hline\end{tabular}}\r
-\r
-\section{{\tt mlp}, the metalua parser}\r
-\r
-Metalua parser is built on top of \verb|gg|, and cannot be understood\r
-without some knowledge of it. Basically, \verb|gg| allows not only to\r
-build parsers, but to build {\em extensible} parsers. Depending on a\r
-parser's type (sequence, sequence set, list, expression\ldots),\r
-different extension methods are available, which are documented in\r
-\verb|gg| reference. The current section will give the information\r
-needed to extend Metalua syntax:\r
-\begin{itemize}\r
-\item what \verb|mlp| entries are accessible for extension;\r
-\item what do they parse;\r
-\item what is the underlying parser type (and therefore, what\r
- extension methods are supported)\r
-\end{itemize}\r
-\r
-\vfill\pagebreak\r
-\r
-\subsection{Parsing expressions}\r
-\tableHeader\r
-\r
-\entry{mlp.expr}{gg.expr}{Top-level expression parser, and the main\r
- extension point for Metalua expression. Supports all of the methods\r
- defined by {\tt gg.expr}.}\r
-\r
-\entry{mlp.func\_val}{gg.sequence}{Read a function definition,\r
- from the arguments' openning parenthesis to the final {\tt end}, but\r
- excluding the initial {\tt function} keyword, so that it can be used\r
- both for anonymous functions, for {\tt function some\_name(...) end}\r
- and for {\tt local function some\_name(...) end}.}\r
-\r
-% \entry{mlp.func\_params\_content}{gg.list}{Read a potentially empty\r
-% (``{\tt)}''- or ``{\tt|}''-terminated) list of function definition\r
-% parameters, i.e. identifiers or ``{\tt ...}'' varargs. Surrounding\r
-% parentheses are excluded. Don't get confused between parameters and\r
-% arguments: parameters are the variable names used in a function\r
-% definition; arguments are the values passed in a function call.}\r
-\r
-% \entry{mlp.func\_args\_content}{gg.list}{Read a potentially emtpy list\r
-% of function call arguments. Surrounding parentheses are excluded.}\r
-\r
-% \entry{mlp.func\_args}{gg.sequence\_set}{Read function arguments: a\r
-% list of expressions between parenthses, or a litteral table, or a\r
-% litteral string.}\r
-\r
-%\entry{mlp.func\_params}{}{}\r
-\entry{mlp.expr\_list}{}{}\r
-\r
-%\entry{mlp.adt}{\rm custom function}{Read an algebraic datatype\r
-% without its leading backquote.}\r
-\r
-\entry{mlp.table\_content}{gg.list}{Read the content of a table,\r
- excluding the surrounding braces}\r
-\r
-\entry{mlp.table}{gg.sequence}{Read a litteral table,\r
- including the surrounding braces}\r
-\r
-\entry{mlp.table\_field}{\rm custom function}{Read a table entry: {\tt\r
- [foo]=bar}, {\tt foo=bar} or {\tt bar}.}\r
-\r
-\entry{mlp.opt\_id}{\rm custom function}{Try to read an identifier, or\r
- an identifier splice. On failure, returns false.}\r
-\r
-\entry{mlp.id}{\rm custom function}{Read an identifier, or\r
- an identifier splice. Cause an error if there is no identifier.}\r
-\r
-\tableFooter\r
-\r
-\vfill\pagebreak\r
-\r
-\subsection{Parsing statements}\r
-\tableHeader\r
-\entry{mlp.block}{gg.list}{Read a sequence of statements, optionally\r
- separated by semicolons. When introducing syntax extensions, it's\r
- often necessary to add block terminators with {\tt\r
- mlp.block.terminators:add().}}\r
-\entry{mlp.for\_header}{\rm custom function}{Read a {\tt for} header,\r
-from just after the ``{\tt for}'' to just before the ``{\tt do}''.}\r
-\entry{mlp.stat}{gg.multisequence}{Read a single statement.}\r
-\tableFooter\r
-\r
-Actually, {\tt mlp.stat} is an extended version of a multisequence: it\r
-supports easy addition of new assignment operator. It has a field {\tt\r
-assignments}, whose keys are assignment keywords, and values are\r
-assignment builders taking left-hand-side and right-hand-side as\r
-parameters. for instance, C's ``+='' operator could be added as:\r
-\begin{verbatim}\r
-mlp.lexer:add "+="\r
-mlp.stat.assignments["+="] = function (lhs, rhs)\r
- assert(#lhs==1 and #rhs==1)\r
- local a, b = lhs[1], rhs[1]\r
- return +{stat: (-{a}) = -{a} + -{b} }\r
-end \r
-\end{verbatim}\r
-\r
-\subsection{Other useful functions and variables}\r
-\r
-\begin{itemize}\r
-\item{\tt mlp.gensym()} generates a unique identifier. The uniqueness\r
- is guaranteed, therefore this identifier cannot capture another\r
- variable; it is useful to write hygienic\footnote{Hygienic macros\r
- are macros which take care not to use names that might interfere\r
- with user-provided names. The typical non-hygienic macro in C\r
- is {\tt \#define SWAP( a, b) \{ int c=a; a=b; b=c; \}}: this macro\r
- will misearbly fail if you ever call it with a parameter named\r
- {\tt c}. There are well-known techniques to automatically make a\r
- macro hygienic. Without them, you'd have to generate a unique name\r
- for the temporary variable, if you had a {\tt gensym()} operator\r
- in C's preprocessor} macros. \r
-\end{itemize}\r
+\def\tableHeader{\begin{tabular}{|c|c|p{5cm}|}\hline
+\bf name & \bf type & \multicolumn{1}{c|}{\bf description} \\\hline}
+\def\entry#1#2#3{{#1} & {\tt#2} & {#3} \\\hline}
+\def\tableFooter{\hline\end{tabular}}
+
+\section{{\tt mlp}, the metalua parser}
+
+Metalua parser is built on top of \verb|gg|, and cannot be understood
+without some knowledge of it. Basically, \verb|gg| allows not only to
+build parsers, but to build {\em extensible} parsers. Depending on a
+parser's type (sequence, sequence set, list, expression\ldots),
+different extension methods are available, which are documented in
+\verb|gg| reference. The current section will give the information
+needed to extend Metalua syntax:
+\begin{itemize}
+\item what \verb|mlp| entries are accessible for extension;
+\item what do they parse;
+\item what is the underlying parser type (and therefore, what
+ extension methods are supported)
+\end{itemize}
+
+\vfill\pagebreak
+
+\subsection{Parsing expressions}
+\tableHeader
+
+\entry{mlp.expr}{gg.expr}{Top-level expression parser, and the main
+ extension point for Metalua expression. Supports all of the methods
+ defined by {\tt gg.expr}.}
+
+\entry{mlp.func\_val}{gg.sequence}{Read a function definition,
+ from the arguments' openning parenthesis to the final {\tt end}, but
+ excluding the initial {\tt function} keyword, so that it can be used
+ both for anonymous functions, for {\tt function some\_name(...) end}
+ and for {\tt local function some\_name(...) end}.}
+
+% \entry{mlp.func\_params\_content}{gg.list}{Read a potentially empty
+% (``{\tt)}''- or ``{\tt|}''-terminated) list of function definition
+% parameters, i.e. identifiers or ``{\tt ...}'' varargs. Surrounding
+% parentheses are excluded. Don't get confused between parameters and
+% arguments: parameters are the variable names used in a function
+% definition; arguments are the values passed in a function call.}
+
+% \entry{mlp.func\_args\_content}{gg.list}{Read a potentially emtpy list
+% of function call arguments. Surrounding parentheses are excluded.}
+
+% \entry{mlp.func\_args}{gg.sequence\_set}{Read function arguments: a
+% list of expressions between parenthses, or a litteral table, or a
+% litteral string.}
+
+%\entry{mlp.func\_params}{}{}
+\entry{mlp.expr\_list}{}{}
+
+%\entry{mlp.adt}{\rm custom function}{Read an algebraic datatype
+% without its leading backquote.}
+
+\entry{mlp.table\_content}{gg.list}{Read the content of a table,
+ excluding the surrounding braces}
+
+\entry{mlp.table}{gg.sequence}{Read a litteral table,
+ including the surrounding braces}
+
+\entry{mlp.table\_field}{\rm custom function}{Read a table entry: {\tt
+ [foo]=bar}, {\tt foo=bar} or {\tt bar}.}
+
+\entry{mlp.opt\_id}{\rm custom function}{Try to read an identifier, or
+ an identifier splice. On failure, returns false.}
+
+\entry{mlp.id}{\rm custom function}{Read an identifier, or
+ an identifier splice. Cause an error if there is no identifier.}
+
+\tableFooter
+
+\vfill\pagebreak
+
+\subsection{Parsing statements}
+\tableHeader
+\entry{mlp.block}{gg.list}{Read a sequence of statements, optionally
+ separated by semicolons. When introducing syntax extensions, it's
+ often necessary to add block terminators with {\tt
+ mlp.block.terminators:add().}}
+\entry{mlp.for\_header}{\rm custom function}{Read a {\tt for} header,
+from just after the ``{\tt for}'' to just before the ``{\tt do}''.}
+\entry{mlp.stat}{gg.multisequence}{Read a single statement.}
+\tableFooter
+
+Actually, {\tt mlp.stat} is an extended version of a multisequence: it
+supports easy addition of new assignment operator. It has a field {\tt
+assignments}, whose keys are assignment keywords, and values are
+assignment builders taking left-hand-side and right-hand-side as
+parameters. for instance, C's ``+='' operator could be added as:
+\begin{verbatim}
+mlp.lexer:add "+="
+mlp.stat.assignments["+="] = function (lhs, rhs)
+ assert(#lhs==1 and #rhs==1)
+ local a, b = lhs[1], rhs[1]
+ return +{stat: (-{a}) = -{a} + -{b} }
+end
+\end{verbatim}
+
+\subsection{Other useful functions and variables}
+
+\begin{itemize}
+\item{\tt mlp.gensym()} generates a unique identifier. The uniqueness
+ is guaranteed, therefore this identifier cannot capture another
+ variable; it is useful to write hygienic\footnote{Hygienic macros
+ are macros which take care not to use names that might interfere
+ with user-provided names. The typical non-hygienic macro in C
+ is {\tt \#define SWAP( a, b) \{ int c=a; a=b; b=c; \}}: this macro
+ will misearbly fail if you ever call it with a parameter named
+ {\tt c}. There are well-known techniques to automatically make a
+ macro hygienic. Without them, you'd have to generate a unique name
+ for the temporary variable, if you had a {\tt gensym()} operator
+ in C's preprocessor} macros.
+\end{itemize}
-\section*{Reading guide}\r
-This manual tries to be as comprehensive as possible; however, you don't\r
-necessarily have to read all of it before starting to do interesting stuff with\r
-metalua. Here's a brief summary of what the different parts of the manual\r
-address, and why you might want to read them immediately---or not.\r
-\r
-\begin{itemize}\r
-\item{\bf Before reading this manual:} metalua is based on Lua, so\r
- you'll need a minimal level of Lua proficiency. You can probably get\r
- away without knowing much about metatables, environments or\r
- coroutines, but you need to be at ease with basic flow control,\r
- scoping rules, first-class functions, and the whole\r
- everything-is-a-table approach.\r
-\item{\bf Meta-programming in metalua:} this chapter exposes the generic principles\r
- of static meta-programming: meta-levels in sources, AST representation of\r
- code, meta-operators. You need to read this carefully if you plan to write any\r
- non-trivial meta-programming code and you've never used languages like, Common\r
- Lisp, camlp4 or Converge. If you're familiar with one of these, a cursory look\r
- over this chapter might be enough for you.\r
-\item{\bf Standard meta-programming libraries:} these are the tools that will allow\r
- you to manipulate code effectively; the more advanced an extension you want to\r
- write the more of these you'll want to know.\r
- \begin{itemize}\r
- \item{\bf mlp} is the dynamically extensible metalua parser. You need to know it\r
- if you want to change or extend the language's syntax\r
- \item{\bf gg} is the grammar generator, the library which lets you manipulate\r
- dynamic parsers. You need to know it in order to do anything useful with\r
- mlp.\r
- \item{\bf match} is an extension supporting structural pattern matching (which has\r
- almost nothing to do with regular expressions on strings). It's a construct\r
- taken from the ML language familly, which lets you manipulate advanced data\r
- structures in vrey powerful ways. It's extremely helpful, among others, when\r
- working with AST, i.e. for most interesting meta-programs.\r
- \item{\bf walk} is a code walker generator: smomething akin to a visitor pattern,\r
- which will help you to write code analysers or transformers. Whenever you\r
- want to find and transform all return statements in an AST, rename some\r
- conflicting local variables, check for the presence of nested for loops\r
- etc., you'll have to write a code walker, and walk will get you there much\r
- faster. \r
- \item{\bf hygiene} offers hygienic macros, i.e. protects you from accidental\r
- variable captures. As opposed to e.g. Scheme, macro writing is not limited\r
- to a term rewriting system in metalua, which lets more power to the\r
- programmer, but prevents from completely automating macro hygienization. If\r
- you wrote an extension and you want to raise it to production-quality,\r
- you'll need among others to protect its users from variable captures, and\r
- you'll need to hygienize it. If you don't feel like cluttering your code\r
- with dozens of {\tt gensym} calls, you'll want to use the macro hygienizer.\r
- \item{\bf dollar:} if you wrote a macro, but don't feel the need to give it a\r
- dedicated syntax extension, this library will let you call this macro as a\r
- regular function call, except that it will be prefixed with a ``{\tt\$}''.\r
- \end{itemize}\r
- \item{\bf General purpose libraries:} Lua strives at staying minimalist, and does\r
- not come with batteries included; you're expected to grab them separately,\r
- currently from luaforge, and eventually from a Lua Rocks repository. Metalua\r
- needs quite some support to run, and relies on a number of imported and\r
- custom-built libraries. Most of them can be reused for many other purposes\r
- including yours.\\\r
- A whole category of metalua users, who want to use third party libraries\r
- rather than reinventing their own wheels, will be primarily interested by\r
- these.\r
- \begin{itemize}\r
- \item{\bf metalua.runtime:} extensions to Lua core libraries: base, table,\r
- string.\r
- \item{\bf metalua.compiler:} mlc offers a consistent interface to metalua\r
- compilation and code representation transformers. 'package', 'loadstring',\r
- 'dostring', 'loadfile' and 'dofile' are also updated to handle metalua\r
- source files.\r
- \item{\bf clopts} simplifies and unifies the handling of command line options\r
- for metalua programs.\r
- \item{\bf springs} brings together Lua Ring's handling of separate Lua universes\r
- with Pluto's communication capabilities.\r
- \item{\bf clist} offers an extended tables-as-list interface: lists by\r
- comprehension {\em \`a la} Haskell or Python, list chunks etc.\r
- \item{\bf xglobal} makes global variables declaration mandatory, for safer\r
- programming, with almost no runtime overhead, and a syntax consistant qith\r
- local variables declaration.\r
- \item{\bf anaphoric} introduces anaphoric control structures, akin to Common\r
- Lisp's {\tt aif}-familly macros.\r
- \item{\bf trycatch} provides a proper exception system, with reliable finally\r
- blocks and exception catching by structural pattern matching.\r
- \item{\bf log} eases the terminal logging of variables, mainly for those from\r
- the Printf-based School of Debugging.\r
- \item{\bf types} offers dynamic type checking to metalua programs. It supports\r
- variable typing as opposed to value typing, and advanced type system\r
- features (polymorphism, dependant types etc.).\r
- \end{itemize}\r
- \item{\bf Examples and tutorials}: this chapter lists a series of tiny\r
- meta-programs whose main purpose is didactic, and walks through the detailed\r
- implementation of a couple of non-trivial extensions.\r
-\end{itemize}\r
+\section*{Reading guide}
+This manual tries to be as comprehensive as possible; however, you don't
+necessarily have to read all of it before starting to do interesting stuff with
+metalua. Here's a brief summary of what the different parts of the manual
+address, and why you might want to read them immediately---or not.
+
+\begin{itemize}
+\item{\bf Before reading this manual:} metalua is based on Lua, so
+ you'll need a minimal level of Lua proficiency. You can probably get
+ away without knowing much about metatables, environments or
+ coroutines, but you need to be at ease with basic flow control,
+ scoping rules, first-class functions, and the whole
+ everything-is-a-table approach.
+\item{\bf Meta-programming in metalua:} this chapter exposes the generic principles
+ of static meta-programming: meta-levels in sources, AST representation of
+ code, meta-operators. You need to read this carefully if you plan to write any
+ non-trivial meta-programming code and you've never used languages like, Common
+ Lisp, camlp4 or Converge. If you're familiar with one of these, a cursory look
+ over this chapter might be enough for you.
+\item{\bf Standard meta-programming libraries:} these are the tools that will allow
+ you to manipulate code effectively; the more advanced an extension you want to
+ write the more of these you'll want to know.
+ \begin{itemize}
+ \item{\bf mlp} is the dynamically extensible metalua parser. You need to know it
+ if you want to change or extend the language's syntax
+ \item{\bf gg} is the grammar generator, the library which lets you manipulate
+ dynamic parsers. You need to know it in order to do anything useful with
+ mlp.
+ \item{\bf match} is an extension supporting structural pattern matching (which has
+ almost nothing to do with regular expressions on strings). It's a construct
+ taken from the ML language familly, which lets you manipulate advanced data
+ structures in vrey powerful ways. It's extremely helpful, among others, when
+ working with AST, i.e. for most interesting meta-programs.
+ \item{\bf walk} is a code walker generator: smomething akin to a visitor pattern,
+ which will help you to write code analysers or transformers. Whenever you
+ want to find and transform all return statements in an AST, rename some
+ conflicting local variables, check for the presence of nested for loops
+ etc., you'll have to write a code walker, and walk will get you there much
+ faster.
+ \item{\bf hygiene} offers hygienic macros, i.e. protects you from accidental
+ variable captures. As opposed to e.g. Scheme, macro writing is not limited
+ to a term rewriting system in metalua, which lets more power to the
+ programmer, but prevents from completely automating macro hygienization. If
+ you wrote an extension and you want to raise it to production-quality,
+ you'll need among others to protect its users from variable captures, and
+ you'll need to hygienize it. If you don't feel like cluttering your code
+ with dozens of {\tt gensym} calls, you'll want to use the macro hygienizer.
+ \item{\bf dollar:} if you wrote a macro, but don't feel the need to give it a
+ dedicated syntax extension, this library will let you call this macro as a
+ regular function call, except that it will be prefixed with a ``{\tt\$}''.
+ \end{itemize}
+ \item{\bf General purpose libraries:} Lua strives at staying minimalist, and does
+ not come with batteries included; you're expected to grab them separately,
+ currently from luaforge, and eventually from a Lua Rocks repository. Metalua
+ needs quite some support to run, and relies on a number of imported and
+ custom-built libraries. Most of them can be reused for many other purposes
+ including yours.\\
+ A whole category of metalua users, who want to use third party libraries
+ rather than reinventing their own wheels, will be primarily interested by
+ these.
+ \begin{itemize}
+ \item{\bf metalua.runtime:} extensions to Lua core libraries: base, table,
+ string.
+ \item{\bf metalua.compiler:} mlc offers a consistent interface to metalua
+ compilation and code representation transformers. 'package', 'loadstring',
+ 'dostring', 'loadfile' and 'dofile' are also updated to handle metalua
+ source files.
+ \item{\bf clopts} simplifies and unifies the handling of command line options
+ for metalua programs.
+ \item{\bf springs} brings together Lua Ring's handling of separate Lua universes
+ with Pluto's communication capabilities.
+ \item{\bf clist} offers an extended tables-as-list interface: lists by
+ comprehension {\em \`a la} Haskell or Python, list chunks etc.
+ \item{\bf xglobal} makes global variables declaration mandatory, for safer
+ programming, with almost no runtime overhead, and a syntax consistant qith
+ local variables declaration.
+ \item{\bf anaphoric} introduces anaphoric control structures, akin to Common
+ Lisp's {\tt aif}-familly macros.
+ \item{\bf trycatch} provides a proper exception system, with reliable finally
+ blocks and exception catching by structural pattern matching.
+ \item{\bf log} eases the terminal logging of variables, mainly for those from
+ the Printf-based School of Debugging.
+ \item{\bf types} offers dynamic type checking to metalua programs. It supports
+ variable typing as opposed to value typing, and advanced type system
+ features (polymorphism, dependant types etc.).
+ \end{itemize}
+ \item{\bf Examples and tutorials}: this chapter lists a series of tiny
+ meta-programs whose main purpose is didactic, and walks through the detailed
+ implementation of a couple of non-trivial extensions.
+\end{itemize}
-\subsection{Structural pattern matching}\r
-\r
-FIXME: refer to the official match extension instead of re-explaining\r
-what pattern matching is about.\r
-\r
-\subsubsection{Basic principles}\r
-In many languages, including Lua, ``pattern matching'' generally\r
-refers to the ability to:\r
-\begin{itemize}\r
-\item Analyse the general shape of a string;\r
-\item capture specified parts of it into temporary variables\r
-\item do some computation when the string matches a certain pattern,\r
- generally using the temporary captured variables.\r
-\end{itemize}\r
-\r
-In languages derived from ML\footnote{for instance OCaml, SML, Haskell\r
- or Erlang.}, pattern matching can be done on arbitrary data, not\r
-only strings. One can write patterns which describe the shape of a\r
-data structure, bind some interesting sub-parts of it to variables,\r
-and execute some statements associated with a given pattern whenever\r
-the data matches.\r
-\r
-This sample aims to implement this capability into metalua. It will\r
-discuss:\r
-\begin{itemize}\r
-\item How to describe data patterns (it turns out that we'll hijack\r
- metalua's expression parser, which is a great time-saving trick when\r
- it can be pulled).\r
-\item How to compile them into equivalent code.\r
-\item How to wrap all this up into a working compiled statement\r
-\item What improvements can be done to the proposed design.\r
-\end{itemize}\r
-\r
-\subsubsection{Patterns}\r
-A match statement consists of a tested term, and a series of (pattern,\r
-block) pairs. At execution, the first pair whose pattern accurately\r
-describes the tested term is selected, and its corresponding block is\r
-evaluated. Other blocks are ignored. \r
-\r
-We will keep the implementation of patterns as simple as possible. To\r
-do that, we will put a first constraint: the AST of a pattern must be\r
-a legal expression AST. This way, we save most of the syntax handling\r
-trouble. More specifically:\r
-\r
-\begin{itemize}\r
-\item numbers are valid patterns, which only match identical numbers;\r
-\item strings are valid patterns, which only match identical strings;\r
-\item tables are valid patterns if all of their keys are integers or\r
- strings, and all of their values are valid patterns. A table pattern\r
- matches a tested term if:\r
- \begin{itemize}\r
- \item the tested term is a table;\r
- \item every key that appears in the pattern also appears in the\r
- tested term;\r
- \item for every key, the value associated to this key in the tested\r
- term is matched by the sub-pattern associated to this key in the\r
- pattern;\r
- \end{itemize}\r
-\item variables are valid patterns, and match everything. Moreover,\r
- the term matched by the variable captures it, i.e. in the\r
- corresponding block, that variable is set to the\r
- matched term.\r
-\end{itemize}\r
-\r
-Notice that since \verb|tag| is a regular field in metalua (albeit\r
-with an optional dedicated syntax), it doesn't need a special case in\r
-our pattern definition.\r
-\r
-\paragraph{Example} Let's consider implementing an evaluator for\r
-Metalua expressions. We will restrict ourselves to binary operators,\r
-variables and numbers; we will also consider that we have a\r
-\verb|values| table mapping variable names to their value, and\r
-\verb|binopfuncs| which associate an operator name with the corresponding\r
-function. The evaluator would be written:\r
-\r
-\begin{Verbatim}[fontsize=\scriptsize]\r
-\r
-function eval(t)\r
- match t with\r
- | `Op{ op, a, b } -> return binopfuncs[op](eval(a), eval(b))\r
- | `Number{ n } -> return n\r
- | `Variable{ v } -> return values[v]\r
- end\r
-end\r
-\end{Verbatim}\r
-\r
-\subsubsection{Pattern compilation}\r
-\r
-A pattern in a case will translate into:\r
-\begin{itemize}\r
-\item tests: testing that the type of the tested case is the same as\r
- the pattern's type for numbers, strings and tables;\r
-\item local declarations: a variable must be bound to the\r
- tested term's value;\r
-\item nested tests for every pattern key/value pair, when the pattern\r
- is a table. Moreover, since there might be multiple tests to run\r
- against the tested term's field, it should be put in a temporary\r
- variable.\r
-\end{itemize}\r
-\r
-For instance, consider the following pattern:\r
-\r
-\begin{Verbatim}[fontsize=\scriptsize]\r
-\r
-{ tag = "Foo", 10, { 20 }, { x = a }, b }\r
-\end{Verbatim}\r
-\r
-It corresponds to the series of tests and assignments on the left:\r
-\r
-\begin{minipage}{6cm}\r
-\begin{Verbatim}[fontsize=\scriptsize]\r
-\r
-let v1 = <tested_term>\r
-type(v1) == "table"\r
-let v2 = v1.tag\r
-v2 ~= nil\r
-type(v2) == "string"\r
-v2 == "Foo"\r
-let v2 = v1[1]\r
-v2 ~= nil\r
-type(v2) == "number"\r
-v2 == 10\r
-let v2 = v1[2]\r
-v2 ~= nil\r
-type(v2) == "table"\r
-let v3 = v2[1]\r
-v3 ~= nil\r
-type(v3) == "number"\r
-v3 == 20\r
-let v2 = v1[3]\r
-v2 ~= nil\r
-type(v2) == "table"\r
-let v3 = v2.x\r
-v3 ~= nil\r
-local a = v3\r
-let v2 = v1[4]\r
-v2 ~= nil\r
-local b = v2\r
-\r
-\r
-\end{Verbatim}\r
-\end{minipage}\r
-\begin{minipage}{6cm}\r
-\begin{Verbatim}[fontsize=\scriptsize]\r
-\r
-let v1 = tested_term\r
-if type(v1) == "table" then\r
- let v2 = v1.tag\r
- if v2 ~= nil then\r
- if type(v2) == "string" then\r
- if v2 == "Foo" then\r
- let v2 = v1[1]\r
- if v2 ~= nil then\r
- if type(v2) == "number" then\r
- if v2 == 10 then\r
- let v2 = v1[2]\r
- if v2 ~= nil then\r
- if type(v2) == "table" then\r
- let v3 = v2[1]\r
- if v3 ~= nil then\r
- if type(v3) == "number" then\r
- if v3 == 20 then\r
- let v2 = v1[3]\r
- if v2 ~= nil then\r
- if type(v2) == "table" then\r
- let v3 = v2.x\r
- if v3 ~= nil then\r
- local a = v3\r
- let v2 = v1[4]\r
- if v2 ~= nil then\r
- local b = v2\r
- <inner_term>\r
-end ... end -- (16 times)\r
-\end{Verbatim}\r
-\end{minipage}\r
-~\\~\\\r
-\r
-Notice that the relative order of tests and assignments is meaningful:\r
-we cannot put all assignments on one side, and all tests on an\r
-other, e.g. \verb|v2 = v1.tag| on line 3 doesn't make sense if\r
-\verb|type(v1) == table| on line 2 fails.\r
-\r
-We will compile patterns in several steps: first, accumulate all the\r
-tests and assignments in order; then, we will collapse them in a\r
-single big nesting of ``if'' statements. At first, we won't try\r
-to optimize the form of the final term. The list above left would be\r
-collapsed into the single compound statement above on the right.\r
-\r
-\paragraph{Accumulating constraints and tests}\r
-This is done by a \verb|parse_pattern()| function, which does just what\r
-is described in the bullet list above:\r
-\r
-\begin{Verbatim}[fontsize=\scriptsize]\r
-\r
- -------------------------------------------------------------------\r
- -- Turn a pattern into a list of conditions and assignations,\r
- -- stored into [acc]. [n] is the depth of the subpattern into the\r
- -- toplevel pattern; [tested_term] is the AST of the term to be \r
- -- tested; [pattern] is the AST of a pattern, or a subtree of that\r
- -- pattern when [n>0].\r
- -------------------------------------------------------------------\r
- local function parse_pattern (n, pattern)\r
- local v = var(n)\r
- if "Number" == pattern.tag or "String" == pattern.tag then\r
- accumulate (+{ -{v} == -{pattern} })\r
- elseif "Id" == pattern.tag then\r
- accumulate (+{stat: local -{pattern} = -{v} })\r
- elseif "Table" == pattern.tag then\r
- accumulate (+{ type( -{v} ) == "table" } )\r
- local idx = 1\r
- for _, x in ipairs (pattern) do\r
- local w = var(n+1)\r
- local key, sub_pattern\r
- if x.tag=="Key" \r
- then key = x[1]; sub_pattern = x[2]\r
- else key = `Number{ idx }; sub_pattern = x; idx=idx+1 end\r
- accumulate (+{stat: (-{w}) = -{v} [-{key}] })\r
- accumulate (+{ -{w} ~= nil })\r
- parse_pattern (n+1, sub_pattern)\r
- end\r
- else error "Invalid pattern type" end\r
- end\r
- -------------------------------------------------------------------\r
-\end{Verbatim}\r
-\r
-This function relies on the following helper functions:\r
-\begin{itemize}\r
-\item {\tt var($n$)} generates the variable name ``\verb|v|$n$'', which\r
- is used to store the tested term at depth level $n$. Indeed,\r
- sub-patterns in table fields are matched against sub-terms of the\r
- tested term. It also remembers of the biggest $n$ it ever received, \r
- and stores it into \verb|max_n| (this will be used to know which\r
- local vars have to be generated, see below);\r
-\item {\tt accumulate()} just stores an additional code snippet in a\r
- dedicated list;\r
-\end{itemize}\r
-\r
-In the quotes, you might notice the parentheses around the variable in\r
-``\verb|(-{w}) = -{v} [-{key}]|'': they're here to let the compiler\r
-know that what's in \verb|-{...}| is an expression, not a\r
-statement. Without them, it would expect {\tt w} to be the AST of a\r
-statement (since we're in a statement context at this point), then\r
-choke on the unexpected ``='' symbol. This is a common idiom, which\r
-you should think about everytime you generate an assignment to an\r
-anti-quoted identifier.\r
-\r
-\paragraph{Collapsing the accumulated quotes}\r
-As written above, our collapsing function will be kept as simple\r
-as possible, and will not try to minimize the amount of generated\r
-code. It takes as parameters {\tt n}, the index of the quote currently\r
-collapsed in the accumulator, and {\tt inner\_term} the statement\r
-block to put inside the innermost part of the test. It calls itself\r
-recursively, so that the collapsed term is built inside out (generally\r
-speaking, working with trees, including AST, involves a lot of\r
-recursive functions). \verb|acc| is the list filled by the\r
-\verb|accumulate()| function:\r
-\r
-\begin{Verbatim}[fontsize=\scriptsize]\r
-\r
- -------------------------------------------------------------------\r
- -- Turn a list of tests and assignations into [acc] into a\r
- -- single term of nested conditionals and assignments.\r
- -- [inner_term] is the AST of a term to be put into the innermost\r
- -- conditionnal, after all assignments. [n] is the index in [acc]\r
- -- of the term currently parsed.\r
- -- \r
- -- This is a recursive function, which builds the inner part of\r
- -- the statement first, then surrounds it with nested \r
- -- [if ... then ... end], [local ... = ...] and [let ... = ...]\r
- -- statements.\r
- -------------------------------------------------------------------\r
- local function collapse (n, inner_term)\r
- assert (not inner_term.tag, "collapse inner term must be a block")\r
- if n > #acc then return inner_term end\r
- local it = acc[n]\r
- local inside = collapse (n+1, inner_term)\r
- assert (not inside.tag, "collapse must produce a block")\r
- if "Op" == it.tag then \r
- -- [it] is a test, put it in an [if ... then .. end] statement\r
- return +{block: if -{it} then -{inside} end }\r
- else \r
- -- [it] is a statement, just add it at the result's beginning.\r
- assert ("Let" == it.tag or "Local" == it.tag)\r
- return { it, unpack (inside) }\r
- end\r
- end\r
-\end{Verbatim}\r
-\r
-To fully understand this function, one must remember that test\r
-operations are translated into {\tt`Op\{ <opname>, <arg1>, <arg2>\}}\r
-AST. That's why we didn't have to put an explicit reminder in the\r
-accumulator, remembering whether a quote was a test or a statement:\r
-we'll just check for \verb|`Op|'s presence.\r
-\r
-\subsubsection{Match statement compilation}\r
-We already know how to translate a single pattern into a Lua test\r
-statement. This statement will execute a given block, with all pattern\r
-variables bound appropriately, if and only if the tested term matches\r
-the pattern.\r
-\r
-To build a complete match statement, we need to test all patterns in\r
-sequence. When one of them succeeds, we must skip all the following\r
-cases. We could do that with some complex ``\verb|else|'' parts in the\r
-tests, but there is a simpler way to jump out of some nested blocks:\r
-the ``\verb|break|'' statement. Therefore, we will enclose the cases\r
-into a loop that executes exactly once, so that a ``\verb|break|''\r
-will jump just after the last case. Then, we will add a\r
-``\verb|break|'' at the end of every statement that doesn't already\r
-finish with a ``\verb|return|'', so that other cases are skipped upon\r
-successful matching. To sum up, we will translate this:\r
-\r
-\begin{Verbatim}[fontsize=\scriptsize]\r
-\r
-match <foo> with\r
-| <pattern_1> -> <x1>\r
-| <pattern_2> -> <x2>\r
- ...\r
-| <pattern_n> -> <xn>\r
-end\r
-\end{Verbatim}\r
-\r
-\noindent into this:\r
-\r
-\begin{Verbatim}[fontsize=\scriptsize]\r
-\r
-repeat\r
- local v1, v2, ... vx -- variables used to store subpatterns\r
- let v1 = <tested_term>\r
- if <compilation of pattern_1> ... then x1; break end\r
- if <compilation of pattern_2> ... then x2; break end\r
- ...\r
- if <compilation of pattern_n> ... then xn; break end\r
-until true\r
-\end{Verbatim}\r
-\r
-First, we add final \verb|break| statements where required, and we\r
-compile all (pattern, block) pairs:\r
-\r
-\begin{Verbatim}[fontsize=\scriptsize]\r
-\r
- -------------------------------------------------------------------\r
- -- parse all [pattern ==> block] pairs. Result goes in [body].\r
- -------------------------------------------------------------------\r
- local body = { }\r
- for _, case in ipairs (cases) do\r
- acc = { } -- reset the accumulator\r
- parse_pattern (1, case[1], var(1)) -- fill [acc] with conds and lets\r
- local last_stat = case[2][#case[2]]\r
- if last_stat and last_stat.tag ~= "Break" and \r
- last_stat.tag ~= "Return" then\r
- table.insert (case[2], `Break) -- to skip other cases on success\r
- end\r
- local compiled_case = collapse (1, case[2])\r
- for _, x in ipairs (compiled_case) do table.insert (body, x) end\r
- end\r
-\end{Verbatim}\r
-\r
-\noindent Then, we can just splice it into the appropriate quote:\r
-\r
-\begin{Verbatim}[fontsize=\scriptsize]\r
-\r
- -------------------------------------------------------------------\r
- local local_vars = { }\r
- for i = 1, max_n do table.insert (local_vars, var(i)) end\r
- \r
- -------------------------------------------------------------------\r
- -- cases are put inside a [repeat until true], so that the [break]\r
- -- inserted after the value will jump after the last case on success.\r
- -------------------------------------------------------------------\r
- local result = +{ stat: \r
- repeat\r
- -{ `Local{ local_vars, { } } }\r
- (-{var(1)}) = -{tested_term}\r
- -{ body }\r
- until true }\r
- return result\r
- -------------------------------------------------------------------\r
-\end{Verbatim}\r
-\r
-There is one point to notice in this quote: \verb|body| is used where\r
-a statement is expected, although it contains a {\em list} if\r
-statements rather than a single statement. Metalua is designed to\r
-accept this, i.e. if {\tt a, b, c, d} are statements, AST {\tt\r
-`Do\{ a, b, c, d \}} and {\tt`Do\{ a, \{ b, c \}, d\} } are\r
-equivalent. This feature partially replaces Lisp's \verb|@(...)|\r
-operator.\r
-\r
-\subsubsection{Syntax extension}\r
-To use this, we provide a syntax inspired by OCaml\footnote{It is\r
- actually the same syntax as OCaml's, except that we introduced an\r
- explicit {\tt end} terminator, to stay homogeneous with Lua.}: \r
-\r
-\begin{Verbatim}[fontsize=\scriptsize]\r
-\r
-match <foo> with\r
- <pattern> -> block\r
-| <pattern> -> block\r
- ...\r
-| <pattern> -> block\r
-end\r
-\end{Verbatim}\r
-\r
-For this, we need to declare new keywords \verb|match|,\r
-\verb|with| and \verb|->|. Then, we build the (pattern, block) parser\r
-with \verb|gg.sequence{ }|, and read a list of them, separated with\r
-``\verb+|+'', thanks to \verb|gg.list{ }|. Moreover, we accept an\r
-optional ``\verb+|+'' before the first case, so that all cases line up\r
-nicely:\r
-\r
-\begin{Verbatim}[fontsize=\scriptsize]\r
-\r
-----------------------------------------------------------------------\r
-mlp.lexer:add{ "match", "with", "->" }\r
-\r
-mlp.stat:add{ "match", mlp.expr, "with", gg.optkeyword "|",\r
- gg.list{ gg.sequence{ mlp.expr, "->", mlp.block },\r
- separators = "|",\r
- terminators = "end" },\r
- "end",\r
- builder = |x| match_parser (x[1], x[3]) }\r
-----------------------------------------------------------------------\r
-\end{Verbatim}\r
-\r
-\noindent Now, if you try this\ldots\ it won't work! Indeed, Metalua\r
-needs to know what keywords might terminate a block of statements. In\r
-this case, it doesn't know that ``\verb+|+'' can terminate a block. We\r
-need therefore to add the following statement:\r
-\r
-\begin{Verbatim}[fontsize=\scriptsize]\r
-\r
-mlp.block.terminators:add "|"\r
-\end{Verbatim}\r
-\r
-\noindent Finally that's it, we have implemented a working pattern\r
-matching system in 75 lines of code!\r
-\r
-\subsubsection{Possible improvements}\r
-Here are a couple of suggestions to further improve the pattern\r
-matching system presented above. Some of these proposals can be\r
-implemented very quickly, some others more complex; all of them\r
-present some practical interest.\r
-\r
-The code of the basic version, as presented here, is available at\r
-\url{http://metalua.luaforge.net/FIXME}.\r
-\r
-\paragraph{Booleans} Boolean constants aren't handled in the system\r
-above, neither as table keys nor as patterns. They're easy to add, and\r
-doing it will help you get gently into the code.\r
-\r
-\paragraph{Gotos considered beneficial} Gotos might be harmful in\r
-hand-written programs, but they're a bliss for machine-generated\r
-code. They would slightly simplify the code of pattern matching as\r
-presented above; but for many extension proposals listed below, they\r
-will make reasonnably easy some things which would otherwise be\r
-awfully contrived. Exercice: simplify the implementation above as much\r
-as possible by using gotos.\r
-\r
-Labels and gotos in metalua ASTs are represented as {\tt`Label\{ id\r
- \}} and {\tt`Goto\{ id \}} respectively, with {\tt id} an\r
-identifier, typically generated by {\tt mlp.gensym()}. It is always\r
-safe to jump out of a block; jumping into a block is not guaranteed\r
-against weird interactions with local variables and upvalues.\r
-\r
-\paragraph{{\tt collapse()} optimization} Instead of nesting if\r
-statements systematically, two nested {\tt if}s without {\tt else}\r
-branches can be simplified in a single branch with an {\tt and}\r
-operator. Not sure it would change the bytecode's efficiency, but\r
-that's a good exercice of AST manipulation.\r
-\r
-\paragraph{Superfluous assignments} When parsing a table entry, we\r
-assign it to a variable, then recursively call {\tt parse\_pattern()}\r
-on it; in the frequent case where the entry was simply a variable, it\r
-re-assigns it again. This could be optimized away.\r
-\r
-\paragraph{Checking table arity} In many cases, it is practical to\r
-check the number of elements in the array-part of the table. Here is a\r
-simple modification proposal: by default, a table pattern matches a\r
-table term only if they have the same number of array-part\r
-elements. However, if the last element of the pattern is {\tt`Dots}\r
-(a.k.a. {\tt+\{...\}}), then the term simply has to have \r
-{\it at least} as many array-part elements as the pattern.\r
-\r
-\paragraph{Adding guards}\r
-It is sometimes desirable to add arbitrary conditions for a pattern to\r
-match, conditions which might no be expressed by a pattern. OCaml\r
-allows to add them with a ``\verb|when|'' keyword:\r
-\begin{Verbatim}[fontsize=\scriptsize]\r
-\r
-match n with\r
-| 0 -> print "zero"\r
-| n when n%2 == 0 -> print "even number"\r
-| _ -> print "odd number"\r
-end\r
-\end{Verbatim}\r
-I'd advise you to prefer {\tt if} as a dedicated keyword, rather than\r
-{\tt when}: it's unambiguous in this context, and saves a keyword\r
-reservation. \r
-\r
-\paragraph{More bindings}\r
-The way pattern matching is currently implemented, one can either bind\r
-a subterm to a variable, or check its structure against a sub-pattern,\r
-not both simultaneously. OCaml provides an ``\verb|as|'' operator,\r
-which allows to do both (Haskell calls it ``\verb|@|''). For instance,\r
-in the following example, any ADT whose tag is \verb|"RepeatMe"| will\r
-be replaced by two occurrences of itself, while others will remain\r
-unchanged:\r
-\begin{Verbatim}[fontsize=\scriptsize]\r
-\r
-match something with\r
-| `RepeatMe{ ... } as r -> { r, r }\r
-| x -> x\r
-end\r
-\end{Verbatim}\r
-``\verb|as|'' will have to be declared as an infix operator, whose\r
-meaning will remain undefined in expressions which are not patterns.\r
-\r
-As an alternative, you can reuse an existing infix operator, thus\r
-avoiding to mess the expression parser. For instance, use {\tt *}\r
-instead of {\tt as}. You can go further, and implement {\tt +} as an\r
-``or'' operator ({\tt pattern1 + pattern2} would match if either\r
-of the patterns matches), although this would significantly complicate\r
-the implementation of {\tt parse\_pattern()}. \r
-\r
-The {\tt+} operator might prove tricky to implement, if you don't\r
-convert your code generator to gotos and labels first.\r
-\r
-\paragraph{Linear bindings}\r
-We should check, when compiling a pattern, that it is left-linear,\r
-i.e. that variables don't appear more than once in the pattern. People\r
-might be tempted to write things like this to check whether a tree is\r
-symmetric:\r
-\begin{Verbatim}[fontsize=\scriptsize]\r
-\r
-match t with\r
-| `Tree{ x, x } -> print "Symmetric!"\r
-| `Tree{ x, y } -> print "Not symmetric"\r
-| `Leaf{ _ } -> print "Symmetric!"\r
-end\r
-\end{Verbatim}\r
-However, this would work in Prolog but not with our pattern matching,\r
-as two occurences of the same variable in the pattern don't cause an\r
-equality test to be added. We should detect such non-linear variables,\r
-and implement a suitable reaction:\r
-\begin{itemize}\r
-\item throw an error, or at least a warning;\r
-\item or add an equality test between the terms matched by the\r
- non-linear variable;\r
-\item or offer a syntax extension that lets the user provide his own\r
- equality predicate.\r
-\end{itemize}\r
-\r
-Notice that the latter choice would drive us towards a Prolog\r
-unification algorithm, which opens interesting opportunities.\r
-\r
-You might offer an exception for variable ``{\tt\_}'', which is often\r
-intended as a dummy, unused variable. Non-linear occurences of it\r
-should probably be silently accepted, without even performing the\r
-corresponding binding. \r
-\r
-\paragraph{Generalized assignments}\r
-Yet another OCaml-inspired feature: assignments such as\r
-``\verb|foo = bar|'', is almost a special\r
-case of pattern matching with only one case: the left-hand side is\r
-the pattern, and the right-hand side is the ``raw'' ``\verb|foo=bar|''\r
-assignment. Seen this way, it allows to write things such as\r
-``\verb|`If{ cond, block } = some_ast }|'' to assign \verb|cond| and\r
-\verb|block| to the subparts of \verb|some_ast| (if we know that\r
-\verb|some_ast| is the AST of an \verb|if| statement). \r
-\r
-If you go this way, however, make sure that the code generated for\r
-simple {\tt let}s is as efficient as before! Moreover, there is an (easy) \r
-scoping issue: the variables assigned belong to the scope of the\r
-surrounding block.\r
-\r
-\paragraph{Pattern matchings as expressions}\r
-Pattern matching are currently statements, and take statements as\r
-right-hand sides of cases. We could allow pattern matchings where\r
-expressions are expected: these would take expressions instead of\r
-statements as right-hand sides. Two ways to implement this: the dirty\r
-one (hack with functions to change match statements into expressions),\r
-and the clean one (refactoring existing code, so that it is agnostic\r
-about its right-hand side type, and provide two specialized\r
-versions of it for statements and expressions).\r
-\r
-\paragraph{Bootstrap it}\r
-That's something language designers love to do, for largely mystic\r
-reasons: writing a language's compiler in the language itself. Here,\r
-the idea is to re-implement the pattern matching extension by using\r
-pattern matching, and compile it with the older version. Comparing the\r
-firsrt and second versions of the code will give you an idea of how\r
-much code clarification is brought to you by the pattern matching\r
-extension.\r
-\r
-\paragraph{Pattern conjunction} Another feature to take from OCaml is\r
-multiple patterns for a single block. Instead of associating one\r
-block with one pattern, cases associate a block with a (non-empty)\r
-list of patterns. All of these patterns have to bond the same\r
-variables, except for {\tt\_}. The first pattern in the list to match\r
-the tested term does the binding. Patterns are separated by\r
-``\verb+|+''. Example:\r
-\begin{Verbatim}[fontsize=\scriptsize]\r
-\r
-match x with\r
-| 1 | 2 | 3 -> print(x)\r
-| n -> print "more than 3"\r
-end\r
-\end{Verbatim}\r
-(Hint: put the block in a local function. $2^{\mathrm{nd}}$ hint: sort\r
-bound variables, e.g. by lexicographic order. Or much simpler and\r
-more effective: convert your code generator to gotos+labels first).\r
-\r
-\paragraph{XML munching} Ever tried to transform some XML document through\r
-XSLT? Did you feel that this was even more kludgy than XML itself? Here\r
-is a challenging proposal:\r
-\begin{itemize}\r
-\item Realize, if you didn't already, that Metalua's ADT are\r
- isomorphic to XML, if you identify string-keys in tables with\r
- attributes, and limit there content to strings and number. For\r
- instance, ``{\tt <foo bar=3><baz/>eek</foo>}'' easily maps to ``{\tt\r
- `foo\{ bar=3, `baz, "eek" \}}'';\r
-\item compare what ML-style pattern matching does with what XSLT\r
- does (and with what you'd like it to do);\r
-\item design, implement, publish. You might want to google\r
- ``CDuce''\footnote{\url{http://www.cduce.org}} for neat ideas.\r
-\end{itemize}\r
-\r
-If you do this, I'd be really interested to put back your contribution\r
-in the next version of Metalua!\r
-\r
-\subsubsection{Correction}\r
-Most of the improvements proposed here are actually implemented in the\r
-{\tt match} library provided with metalua. Check its (commented)\r
-sources!\r
+\subsection{Structural pattern matching}
+
+FIXME: refer to the official match extension instead of re-explaining
+what pattern matching is about.
+
+\subsubsection{Basic principles}
+In many languages, including Lua, ``pattern matching'' generally
+refers to the ability to:
+\begin{itemize}
+\item Analyse the general shape of a string;
+\item capture specified parts of it into temporary variables
+\item do some computation when the string matches a certain pattern,
+ generally using the temporary captured variables.
+\end{itemize}
+
+In languages derived from ML\footnote{for instance OCaml, SML, Haskell
+ or Erlang.}, pattern matching can be done on arbitrary data, not
+only strings. One can write patterns which describe the shape of a
+data structure, bind some interesting sub-parts of it to variables,
+and execute some statements associated with a given pattern whenever
+the data matches.
+
+This sample aims to implement this capability into metalua. It will
+discuss:
+\begin{itemize}
+\item How to describe data patterns (it turns out that we'll hijack
+ metalua's expression parser, which is a great time-saving trick when
+ it can be pulled).
+\item How to compile them into equivalent code.
+\item How to wrap all this up into a working compiled statement
+\item What improvements can be done to the proposed design.
+\end{itemize}
+
+\subsubsection{Patterns}
+A match statement consists of a tested term, and a series of (pattern,
+block) pairs. At execution, the first pair whose pattern accurately
+describes the tested term is selected, and its corresponding block is
+evaluated. Other blocks are ignored.
+
+We will keep the implementation of patterns as simple as possible. To
+do that, we will put a first constraint: the AST of a pattern must be
+a legal expression AST. This way, we save most of the syntax handling
+trouble. More specifically:
+
+\begin{itemize}
+\item numbers are valid patterns, which only match identical numbers;
+\item strings are valid patterns, which only match identical strings;
+\item tables are valid patterns if all of their keys are integers or
+ strings, and all of their values are valid patterns. A table pattern
+ matches a tested term if:
+ \begin{itemize}
+ \item the tested term is a table;
+ \item every key that appears in the pattern also appears in the
+ tested term;
+ \item for every key, the value associated to this key in the tested
+ term is matched by the sub-pattern associated to this key in the
+ pattern;
+ \end{itemize}
+\item variables are valid patterns, and match everything. Moreover,
+ the term matched by the variable captures it, i.e. in the
+ corresponding block, that variable is set to the
+ matched term.
+\end{itemize}
+
+Notice that since \verb|tag| is a regular field in metalua (albeit
+with an optional dedicated syntax), it doesn't need a special case in
+our pattern definition.
+
+\paragraph{Example} Let's consider implementing an evaluator for
+Metalua expressions. We will restrict ourselves to binary operators,
+variables and numbers; we will also consider that we have a
+\verb|values| table mapping variable names to their value, and
+\verb|binopfuncs| which associate an operator name with the corresponding
+function. The evaluator would be written:
+
+\begin{Verbatim}[fontsize=\scriptsize]
+
+function eval(t)
+ match t with
+ | `Op{ op, a, b } -> return binopfuncs[op](eval(a), eval(b))
+ | `Number{ n } -> return n
+ | `Variable{ v } -> return values[v]
+ end
+end
+\end{Verbatim}
+
+\subsubsection{Pattern compilation}
+
+A pattern in a case will translate into:
+\begin{itemize}
+\item tests: testing that the type of the tested case is the same as
+ the pattern's type for numbers, strings and tables;
+\item local declarations: a variable must be bound to the
+ tested term's value;
+\item nested tests for every pattern key/value pair, when the pattern
+ is a table. Moreover, since there might be multiple tests to run
+ against the tested term's field, it should be put in a temporary
+ variable.
+\end{itemize}
+
+For instance, consider the following pattern:
+
+\begin{Verbatim}[fontsize=\scriptsize]
+
+{ tag = "Foo", 10, { 20 }, { x = a }, b }
+\end{Verbatim}
+
+It corresponds to the series of tests and assignments on the left:
+
+\begin{minipage}{6cm}
+\begin{Verbatim}[fontsize=\scriptsize]
+
+let v1 = <tested_term>
+type(v1) == "table"
+let v2 = v1.tag
+v2 ~= nil
+type(v2) == "string"
+v2 == "Foo"
+let v2 = v1[1]
+v2 ~= nil
+type(v2) == "number"
+v2 == 10
+let v2 = v1[2]
+v2 ~= nil
+type(v2) == "table"
+let v3 = v2[1]
+v3 ~= nil
+type(v3) == "number"
+v3 == 20
+let v2 = v1[3]
+v2 ~= nil
+type(v2) == "table"
+let v3 = v2.x
+v3 ~= nil
+local a = v3
+let v2 = v1[4]
+v2 ~= nil
+local b = v2
+
+
+\end{Verbatim}
+\end{minipage}
+\begin{minipage}{6cm}
+\begin{Verbatim}[fontsize=\scriptsize]
+
+let v1 = tested_term
+if type(v1) == "table" then
+ let v2 = v1.tag
+ if v2 ~= nil then
+ if type(v2) == "string" then
+ if v2 == "Foo" then
+ let v2 = v1[1]
+ if v2 ~= nil then
+ if type(v2) == "number" then
+ if v2 == 10 then
+ let v2 = v1[2]
+ if v2 ~= nil then
+ if type(v2) == "table" then
+ let v3 = v2[1]
+ if v3 ~= nil then
+ if type(v3) == "number" then
+ if v3 == 20 then
+ let v2 = v1[3]
+ if v2 ~= nil then
+ if type(v2) == "table" then
+ let v3 = v2.x
+ if v3 ~= nil then
+ local a = v3
+ let v2 = v1[4]
+ if v2 ~= nil then
+ local b = v2
+ <inner_term>
+end ... end -- (16 times)
+\end{Verbatim}
+\end{minipage}
+~\\~\\
+
+Notice that the relative order of tests and assignments is meaningful:
+we cannot put all assignments on one side, and all tests on an
+other, e.g. \verb|v2 = v1.tag| on line 3 doesn't make sense if
+\verb|type(v1) == table| on line 2 fails.
+
+We will compile patterns in several steps: first, accumulate all the
+tests and assignments in order; then, we will collapse them in a
+single big nesting of ``if'' statements. At first, we won't try
+to optimize the form of the final term. The list above left would be
+collapsed into the single compound statement above on the right.
+
+\paragraph{Accumulating constraints and tests}
+This is done by a \verb|parse_pattern()| function, which does just what
+is described in the bullet list above:
+
+\begin{Verbatim}[fontsize=\scriptsize]
+
+ -------------------------------------------------------------------
+ -- Turn a pattern into a list of conditions and assignations,
+ -- stored into [acc]. [n] is the depth of the subpattern into the
+ -- toplevel pattern; [tested_term] is the AST of the term to be
+ -- tested; [pattern] is the AST of a pattern, or a subtree of that
+ -- pattern when [n>0].
+ -------------------------------------------------------------------
+ local function parse_pattern (n, pattern)
+ local v = var(n)
+ if "Number" == pattern.tag or "String" == pattern.tag then
+ accumulate (+{ -{v} == -{pattern} })
+ elseif "Id" == pattern.tag then
+ accumulate (+{stat: local -{pattern} = -{v} })
+ elseif "Table" == pattern.tag then
+ accumulate (+{ type( -{v} ) == "table" } )
+ local idx = 1
+ for _, x in ipairs (pattern) do
+ local w = var(n+1)
+ local key, sub_pattern
+ if x.tag=="Key"
+ then key = x[1]; sub_pattern = x[2]
+ else key = `Number{ idx }; sub_pattern = x; idx=idx+1 end
+ accumulate (+{stat: (-{w}) = -{v} [-{key}] })
+ accumulate (+{ -{w} ~= nil })
+ parse_pattern (n+1, sub_pattern)
+ end
+ else error "Invalid pattern type" end
+ end
+ -------------------------------------------------------------------
+\end{Verbatim}
+
+This function relies on the following helper functions:
+\begin{itemize}
+\item {\tt var($n$)} generates the variable name ``\verb|v|$n$'', which
+ is used to store the tested term at depth level $n$. Indeed,
+ sub-patterns in table fields are matched against sub-terms of the
+ tested term. It also remembers of the biggest $n$ it ever received,
+ and stores it into \verb|max_n| (this will be used to know which
+ local vars have to be generated, see below);
+\item {\tt accumulate()} just stores an additional code snippet in a
+ dedicated list;
+\end{itemize}
+
+In the quotes, you might notice the parentheses around the variable in
+``\verb|(-{w}) = -{v} [-{key}]|'': they're here to let the compiler
+know that what's in \verb|-{...}| is an expression, not a
+statement. Without them, it would expect {\tt w} to be the AST of a
+statement (since we're in a statement context at this point), then
+choke on the unexpected ``='' symbol. This is a common idiom, which
+you should think about everytime you generate an assignment to an
+anti-quoted identifier.
+
+\paragraph{Collapsing the accumulated quotes}
+As written above, our collapsing function will be kept as simple
+as possible, and will not try to minimize the amount of generated
+code. It takes as parameters {\tt n}, the index of the quote currently
+collapsed in the accumulator, and {\tt inner\_term} the statement
+block to put inside the innermost part of the test. It calls itself
+recursively, so that the collapsed term is built inside out (generally
+speaking, working with trees, including AST, involves a lot of
+recursive functions). \verb|acc| is the list filled by the
+\verb|accumulate()| function:
+
+\begin{Verbatim}[fontsize=\scriptsize]
+
+ -------------------------------------------------------------------
+ -- Turn a list of tests and assignations into [acc] into a
+ -- single term of nested conditionals and assignments.
+ -- [inner_term] is the AST of a term to be put into the innermost
+ -- conditionnal, after all assignments. [n] is the index in [acc]
+ -- of the term currently parsed.
+ --
+ -- This is a recursive function, which builds the inner part of
+ -- the statement first, then surrounds it with nested
+ -- [if ... then ... end], [local ... = ...] and [let ... = ...]
+ -- statements.
+ -------------------------------------------------------------------
+ local function collapse (n, inner_term)
+ assert (not inner_term.tag, "collapse inner term must be a block")
+ if n > #acc then return inner_term end
+ local it = acc[n]
+ local inside = collapse (n+1, inner_term)
+ assert (not inside.tag, "collapse must produce a block")
+ if "Op" == it.tag then
+ -- [it] is a test, put it in an [if ... then .. end] statement
+ return +{block: if -{it} then -{inside} end }
+ else
+ -- [it] is a statement, just add it at the result's beginning.
+ assert ("Let" == it.tag or "Local" == it.tag)
+ return { it, unpack (inside) }
+ end
+ end
+\end{Verbatim}
+
+To fully understand this function, one must remember that test
+operations are translated into {\tt`Op\{ <opname>, <arg1>, <arg2>\}}
+AST. That's why we didn't have to put an explicit reminder in the
+accumulator, remembering whether a quote was a test or a statement:
+we'll just check for \verb|`Op|'s presence.
+
+\subsubsection{Match statement compilation}
+We already know how to translate a single pattern into a Lua test
+statement. This statement will execute a given block, with all pattern
+variables bound appropriately, if and only if the tested term matches
+the pattern.
+
+To build a complete match statement, we need to test all patterns in
+sequence. When one of them succeeds, we must skip all the following
+cases. We could do that with some complex ``\verb|else|'' parts in the
+tests, but there is a simpler way to jump out of some nested blocks:
+the ``\verb|break|'' statement. Therefore, we will enclose the cases
+into a loop that executes exactly once, so that a ``\verb|break|''
+will jump just after the last case. Then, we will add a
+``\verb|break|'' at the end of every statement that doesn't already
+finish with a ``\verb|return|'', so that other cases are skipped upon
+successful matching. To sum up, we will translate this:
+
+\begin{Verbatim}[fontsize=\scriptsize]
+
+match <foo> with
+| <pattern_1> -> <x1>
+| <pattern_2> -> <x2>
+ ...
+| <pattern_n> -> <xn>
+end
+\end{Verbatim}
+
+\noindent into this:
+
+\begin{Verbatim}[fontsize=\scriptsize]
+
+repeat
+ local v1, v2, ... vx -- variables used to store subpatterns
+ let v1 = <tested_term>
+ if <compilation of pattern_1> ... then x1; break end
+ if <compilation of pattern_2> ... then x2; break end
+ ...
+ if <compilation of pattern_n> ... then xn; break end
+until true
+\end{Verbatim}
+
+First, we add final \verb|break| statements where required, and we
+compile all (pattern, block) pairs:
+
+\begin{Verbatim}[fontsize=\scriptsize]
+
+ -------------------------------------------------------------------
+ -- parse all [pattern ==> block] pairs. Result goes in [body].
+ -------------------------------------------------------------------
+ local body = { }
+ for _, case in ipairs (cases) do
+ acc = { } -- reset the accumulator
+ parse_pattern (1, case[1], var(1)) -- fill [acc] with conds and lets
+ local last_stat = case[2][#case[2]]
+ if last_stat and last_stat.tag ~= "Break" and
+ last_stat.tag ~= "Return" then
+ table.insert (case[2], `Break) -- to skip other cases on success
+ end
+ local compiled_case = collapse (1, case[2])
+ for _, x in ipairs (compiled_case) do table.insert (body, x) end
+ end
+\end{Verbatim}
+
+\noindent Then, we can just splice it into the appropriate quote:
+
+\begin{Verbatim}[fontsize=\scriptsize]
+
+ -------------------------------------------------------------------
+ local local_vars = { }
+ for i = 1, max_n do table.insert (local_vars, var(i)) end
+
+ -------------------------------------------------------------------
+ -- cases are put inside a [repeat until true], so that the [break]
+ -- inserted after the value will jump after the last case on success.
+ -------------------------------------------------------------------
+ local result = +{ stat:
+ repeat
+ -{ `Local{ local_vars, { } } }
+ (-{var(1)}) = -{tested_term}
+ -{ body }
+ until true }
+ return result
+ -------------------------------------------------------------------
+\end{Verbatim}
+
+There is one point to notice in this quote: \verb|body| is used where
+a statement is expected, although it contains a {\em list} if
+statements rather than a single statement. Metalua is designed to
+accept this, i.e. if {\tt a, b, c, d} are statements, AST {\tt
+`Do\{ a, b, c, d \}} and {\tt`Do\{ a, \{ b, c \}, d\} } are
+equivalent. This feature partially replaces Lisp's \verb|@(...)|
+operator.
+
+\subsubsection{Syntax extension}
+To use this, we provide a syntax inspired by OCaml\footnote{It is
+ actually the same syntax as OCaml's, except that we introduced an
+ explicit {\tt end} terminator, to stay homogeneous with Lua.}:
+
+\begin{Verbatim}[fontsize=\scriptsize]
+
+match <foo> with
+ <pattern> -> block
+| <pattern> -> block
+ ...
+| <pattern> -> block
+end
+\end{Verbatim}
+
+For this, we need to declare new keywords \verb|match|,
+\verb|with| and \verb|->|. Then, we build the (pattern, block) parser
+with \verb|gg.sequence{ }|, and read a list of them, separated with
+``\verb+|+'', thanks to \verb|gg.list{ }|. Moreover, we accept an
+optional ``\verb+|+'' before the first case, so that all cases line up
+nicely:
+
+\begin{Verbatim}[fontsize=\scriptsize]
+
+----------------------------------------------------------------------
+mlp.lexer:add{ "match", "with", "->" }
+
+mlp.stat:add{ "match", mlp.expr, "with", gg.optkeyword "|",
+ gg.list{ gg.sequence{ mlp.expr, "->", mlp.block },
+ separators = "|",
+ terminators = "end" },
+ "end",
+ builder = |x| match_parser (x[1], x[3]) }
+----------------------------------------------------------------------
+\end{Verbatim}
+
+\noindent Now, if you try this\ldots\ it won't work! Indeed, Metalua
+needs to know what keywords might terminate a block of statements. In
+this case, it doesn't know that ``\verb+|+'' can terminate a block. We
+need therefore to add the following statement:
+
+\begin{Verbatim}[fontsize=\scriptsize]
+
+mlp.block.terminators:add "|"
+\end{Verbatim}
+
+\noindent Finally that's it, we have implemented a working pattern
+matching system in 75 lines of code!
+
+\subsubsection{Possible improvements}
+Here are a couple of suggestions to further improve the pattern
+matching system presented above. Some of these proposals can be
+implemented very quickly, some others more complex; all of them
+present some practical interest.
+
+The code of the basic version, as presented here, is available at
+\url{http://metalua.luaforge.net/FIXME}.
+
+\paragraph{Booleans} Boolean constants aren't handled in the system
+above, neither as table keys nor as patterns. They're easy to add, and
+doing it will help you get gently into the code.
+
+\paragraph{Gotos considered beneficial} Gotos might be harmful in
+hand-written programs, but they're a bliss for machine-generated
+code. They would slightly simplify the code of pattern matching as
+presented above; but for many extension proposals listed below, they
+will make reasonnably easy some things which would otherwise be
+awfully contrived. Exercice: simplify the implementation above as much
+as possible by using gotos.
+
+Labels and gotos in metalua ASTs are represented as {\tt`Label\{ id
+ \}} and {\tt`Goto\{ id \}} respectively, with {\tt id} an
+identifier, typically generated by {\tt mlp.gensym()}. It is always
+safe to jump out of a block; jumping into a block is not guaranteed
+against weird interactions with local variables and upvalues.
+
+\paragraph{{\tt collapse()} optimization} Instead of nesting if
+statements systematically, two nested {\tt if}s without {\tt else}
+branches can be simplified in a single branch with an {\tt and}
+operator. Not sure it would change the bytecode's efficiency, but
+that's a good exercice of AST manipulation.
+
+\paragraph{Superfluous assignments} When parsing a table entry, we
+assign it to a variable, then recursively call {\tt parse\_pattern()}
+on it; in the frequent case where the entry was simply a variable, it
+re-assigns it again. This could be optimized away.
+
+\paragraph{Checking table arity} In many cases, it is practical to
+check the number of elements in the array-part of the table. Here is a
+simple modification proposal: by default, a table pattern matches a
+table term only if they have the same number of array-part
+elements. However, if the last element of the pattern is {\tt`Dots}
+(a.k.a. {\tt+\{...\}}), then the term simply has to have
+{\it at least} as many array-part elements as the pattern.
+
+\paragraph{Adding guards}
+It is sometimes desirable to add arbitrary conditions for a pattern to
+match, conditions which might no be expressed by a pattern. OCaml
+allows to add them with a ``\verb|when|'' keyword:
+\begin{Verbatim}[fontsize=\scriptsize]
+
+match n with
+| 0 -> print "zero"
+| n when n%2 == 0 -> print "even number"
+| _ -> print "odd number"
+end
+\end{Verbatim}
+I'd advise you to prefer {\tt if} as a dedicated keyword, rather than
+{\tt when}: it's unambiguous in this context, and saves a keyword
+reservation.
+
+\paragraph{More bindings}
+The way pattern matching is currently implemented, one can either bind
+a subterm to a variable, or check its structure against a sub-pattern,
+not both simultaneously. OCaml provides an ``\verb|as|'' operator,
+which allows to do both (Haskell calls it ``\verb|@|''). For instance,
+in the following example, any ADT whose tag is \verb|"RepeatMe"| will
+be replaced by two occurrences of itself, while others will remain
+unchanged:
+\begin{Verbatim}[fontsize=\scriptsize]
+
+match something with
+| `RepeatMe{ ... } as r -> { r, r }
+| x -> x
+end
+\end{Verbatim}
+``\verb|as|'' will have to be declared as an infix operator, whose
+meaning will remain undefined in expressions which are not patterns.
+
+As an alternative, you can reuse an existing infix operator, thus
+avoiding to mess the expression parser. For instance, use {\tt *}
+instead of {\tt as}. You can go further, and implement {\tt +} as an
+``or'' operator ({\tt pattern1 + pattern2} would match if either
+of the patterns matches), although this would significantly complicate
+the implementation of {\tt parse\_pattern()}.
+
+The {\tt+} operator might prove tricky to implement, if you don't
+convert your code generator to gotos and labels first.
+
+\paragraph{Linear bindings}
+We should check, when compiling a pattern, that it is left-linear,
+i.e. that variables don't appear more than once in the pattern. People
+might be tempted to write things like this to check whether a tree is
+symmetric:
+\begin{Verbatim}[fontsize=\scriptsize]
+
+match t with
+| `Tree{ x, x } -> print "Symmetric!"
+| `Tree{ x, y } -> print "Not symmetric"
+| `Leaf{ _ } -> print "Symmetric!"
+end
+\end{Verbatim}
+However, this would work in Prolog but not with our pattern matching,
+as two occurences of the same variable in the pattern don't cause an
+equality test to be added. We should detect such non-linear variables,
+and implement a suitable reaction:
+\begin{itemize}
+\item throw an error, or at least a warning;
+\item or add an equality test between the terms matched by the
+ non-linear variable;
+\item or offer a syntax extension that lets the user provide his own
+ equality predicate.
+\end{itemize}
+
+Notice that the latter choice would drive us towards a Prolog
+unification algorithm, which opens interesting opportunities.
+
+You might offer an exception for variable ``{\tt\_}'', which is often
+intended as a dummy, unused variable. Non-linear occurences of it
+should probably be silently accepted, without even performing the
+corresponding binding.
+
+\paragraph{Generalized assignments}
+Yet another OCaml-inspired feature: assignments such as
+``\verb|foo = bar|'', is almost a special
+case of pattern matching with only one case: the left-hand side is
+the pattern, and the right-hand side is the ``raw'' ``\verb|foo=bar|''
+assignment. Seen this way, it allows to write things such as
+``\verb|`If{ cond, block } = some_ast }|'' to assign \verb|cond| and
+\verb|block| to the subparts of \verb|some_ast| (if we know that
+\verb|some_ast| is the AST of an \verb|if| statement).
+
+If you go this way, however, make sure that the code generated for
+simple {\tt let}s is as efficient as before! Moreover, there is an (easy)
+scoping issue: the variables assigned belong to the scope of the
+surrounding block.
+
+\paragraph{Pattern matchings as expressions}
+Pattern matching are currently statements, and take statements as
+right-hand sides of cases. We could allow pattern matchings where
+expressions are expected: these would take expressions instead of
+statements as right-hand sides. Two ways to implement this: the dirty
+one (hack with functions to change match statements into expressions),
+and the clean one (refactoring existing code, so that it is agnostic
+about its right-hand side type, and provide two specialized
+versions of it for statements and expressions).
+
+\paragraph{Bootstrap it}
+That's something language designers love to do, for largely mystic
+reasons: writing a language's compiler in the language itself. Here,
+the idea is to re-implement the pattern matching extension by using
+pattern matching, and compile it with the older version. Comparing the
+firsrt and second versions of the code will give you an idea of how
+much code clarification is brought to you by the pattern matching
+extension.
+
+\paragraph{Pattern conjunction} Another feature to take from OCaml is
+multiple patterns for a single block. Instead of associating one
+block with one pattern, cases associate a block with a (non-empty)
+list of patterns. All of these patterns have to bond the same
+variables, except for {\tt\_}. The first pattern in the list to match
+the tested term does the binding. Patterns are separated by
+``\verb+|+''. Example:
+\begin{Verbatim}[fontsize=\scriptsize]
+
+match x with
+| 1 | 2 | 3 -> print(x)
+| n -> print "more than 3"
+end
+\end{Verbatim}
+(Hint: put the block in a local function. $2^{\mathrm{nd}}$ hint: sort
+bound variables, e.g. by lexicographic order. Or much simpler and
+more effective: convert your code generator to gotos+labels first).
+
+\paragraph{XML munching} Ever tried to transform some XML document through
+XSLT? Did you feel that this was even more kludgy than XML itself? Here
+is a challenging proposal:
+\begin{itemize}
+\item Realize, if you didn't already, that Metalua's ADT are
+ isomorphic to XML, if you identify string-keys in tables with
+ attributes, and limit there content to strings and number. For
+ instance, ``{\tt <foo bar=3><baz/>eek</foo>}'' easily maps to ``{\tt
+ `foo\{ bar=3, `baz, "eek" \}}'';
+\item compare what ML-style pattern matching does with what XSLT
+ does (and with what you'd like it to do);
+\item design, implement, publish. You might want to google
+ ``CDuce''\footnote{\url{http://www.cduce.org}} for neat ideas.
+\end{itemize}
+
+If you do this, I'd be really interested to put back your contribution
+in the next version of Metalua!
+
+\subsubsection{Correction}
+Most of the improvements proposed here are actually implemented in the
+{\tt match} library provided with metalua. Check its (commented)
+sources!
-\section{{\tt springs}: separate universes for Lua} \r
-\r
-\subsection{Origins and purpose}\r
-Springs (Serialization through Pluto for RINGS) is an extension of Lua Rings and\r
-Pluto: Lua Rings allow to create new Lua states from within Lua, but offers\r
-limited communication between them: a master universe can only send instruction\r
-to a slave universe through a ``{\tt dostring}'', and the slave universe can\r
-only send back strings, integers and booleans as results. Since Pluto allows to\r
-serialize pretty much any Lua value as a string, it's used to create powerful\r
-bidirectional communications between universes.\r
-\r
-Springs is used internally by metalua to prevent different files' compile time\r
-actions to interfere with each other: each file is compiled on a fresh clean\r
-single-use slate.\r
-\r
-The underlying projects can be found on the web:\r
-\begin{itemize}\r
-\item \verb|<http://www.keplerproject.org/rings/>|\r
-\item \verb|<http://luaforge.net/projects/pluto/>|\r
-\end{itemize}\r
-Notice however that the Pluto version used in metalua has significantly patched\r
-and debugged by Ivko Stanilov.\r
-\r
-\subsection{API}\r
-Go to Lua Rings web site for a reference on its original API. This API is\r
-extended by spring with:\r
-\begin{itemize}\r
-\item function {\tt springs.new()} which creates a new universe ready for Pluto\r
- communication;\r
-\item ({\tt:dostring()} works as usual)\r
-\item {\tt :pcall(f, arg1, ..., argn)} works as standard function pcall(),\r
- except that execution occurs in the sub-state. Arguments are passed and\r
- results are returned transparently acrosse universes. Moreover, 'f' can also\r
- be a string, rather than a function. If it's a string, it must eval to a\r
- function in the substate's context. This allows to pass standard functions\r
- easily. For instance:\\\r
- \verb|r:pcall('table.concat', {'a', 'b', 'c'}, ',')|\r
-\item {\tt :call()} is similar to :pcall(), except that in case of error, it\r
- actually throws the error in the sender universe's context. Therefore, it\r
- doesn't return a success status as does pcall(). For instance: \\\r
- \verb|assert('xxx' == r:call('string.rep', 'x', 3))|\r
-\end{itemize}\r
+\section{{\tt springs}: separate universes for Lua}
+
+\subsection{Origins and purpose}
+Springs (Serialization through Pluto for RINGS) is an extension of Lua Rings and
+Pluto: Lua Rings allow to create new Lua states from within Lua, but offers
+limited communication between them: a master universe can only send instruction
+to a slave universe through a ``{\tt dostring}'', and the slave universe can
+only send back strings, integers and booleans as results. Since Pluto allows to
+serialize pretty much any Lua value as a string, it's used to create powerful
+bidirectional communications between universes.
+
+Springs is used internally by metalua to prevent different files' compile time
+actions to interfere with each other: each file is compiled on a fresh clean
+single-use slate.
+
+The underlying projects can be found on the web:
+\begin{itemize}
+\item \verb|<http://www.keplerproject.org/rings/>|
+\item \verb|<http://luaforge.net/projects/pluto/>|
+\end{itemize}
+Notice however that the Pluto version used in metalua has significantly patched
+and debugged by Ivko Stanilov.
+
+\subsection{API}
+Go to Lua Rings web site for a reference on its original API. This API is
+extended by spring with:
+\begin{itemize}
+\item function {\tt springs.new()} which creates a new universe ready for Pluto
+ communication;
+\item ({\tt:dostring()} works as usual)
+\item {\tt :pcall(f, arg1, ..., argn)} works as standard function pcall(),
+ except that execution occurs in the sub-state. Arguments are passed and
+ results are returned transparently acrosse universes. Moreover, 'f' can also
+ be a string, rather than a function. If it's a string, it must eval to a
+ function in the substate's context. This allows to pass standard functions
+ easily. For instance:\\
+ \verb|r:pcall('table.concat', {'a', 'b', 'c'}, ',')|
+\item {\tt :call()} is similar to :pcall(), except that in case of error, it
+ actually throws the error in the sender universe's context. Therefore, it
+ doesn't return a success status as does pcall(). For instance: \\
+ \verb|assert('xxx' == r:call('string.rep', 'x', 3))|
+\end{itemize}
-\section{Extension {\tt trywith}: exceptions and finalization}\r
-Lua offers error handling primitives \verb+pcall()+ and\r
-\veb+xpcall()+. However, they are pretty low level, and their syntax\r
-is cumbersome to use and read back. This extension offers a proper\r
-syntax for the handling of such exceptions.\r
-\r
-\subsection{Syntax}\r
-An error handling statement has the following form:\r
-\r
-\begin{verbatim}\r
-try\r
- <protected block>\r
-catch <exception pattern #1> then\r
- <exception handling block #1>\r
-catch <exception pattern #2> then\r
- <exception handling block #2>\r
- ...\r
-catch <exception pattern #n> then\r
- <exception handling block #n>\r
-finally\r
- <finalization block>\r
-end\r
-\end{verbatim}\r
-\r
-\subsection{Semantics}\r
-When such a statement is executed:\r
-\begin{itemize}\r
-\item the protected code is executed\r
-\item if its execution causes an error, the error message is matched\r
- against all exception patterns, until one matches or the last one is\r
- reached (see the \verb+match+ extension for patterns\r
- semantics). Patterns can include guard clauses. The block\r
- corresponding to the first matching pattern is executed. If no\r
- pattern matches, the error will be rethrown.\r
-\item the block following the \verb+finally+ keyword will be executed\r
- no matter what: if the protected block executes succesfully, if it\r
- raises an error, if it causes a \verb+return+, if an error occurs in\r
- an exception handling block, whether an error is caught or\r
- not... The only reason why the finalization block might not be run\r
- is because of a sudden death of the process (call to {\tt os.exit()}\r
- or core dump).\r
-\end{itemize}\r
-\r
-The finally block can be omitted, or there can be no error catching\r
-case. The following examples are legal:\r
-\r
-\begin{verbatim}\r
-try\r
- f = io.open ('file.txt', 'r')\r
- num_char = #f:read '*a'\r
-finally\r
- f:close()\r
-end\r
-\r
-try\r
- do_stuff()\r
-catch "mismatch" then\r
- print "match statement failure"\r
-catch "[Ee]of"/_ then -- regexp pattern\r
- print "An end-of-file error seems to have happened"\r
-catch x if type(x)=='table' then\r
- print "The error is a table, not a string!"\r
-end\r
-\end{verbatim}\r
-\r
- \subsection{RAII}\r
- RAII, or ``Resource Acquisition Is Initialization'', is a common\r
- programming pattern in which an object's lifetime is strictly\r
- associated with a given lexical scope. For instance, if a file is\r
- opened in a given scope, it must be closed as soon as this scope is\r
- leaved, even if it's leaved due to a {\tt return} or an error. The\r
- ``finally'' block allows this, but since it's a very common use case,\r
- there is a dedicated extension ``with/do'': you initialize some resource\r
- behind the ``with'' keyword, and it will be closed after the ``do''\r
- block is left. The only constraint is that the resources must have a\r
- {\tt:close()} method which releases them. Here is a usage example:\r
-\r
-\begin{verbatim}\r
--{ extension 'withdo' }\r
-\r
-with f1, f2 = io.open 'file1.txt', io.open 'file2.txt' do\r
- local t1 = f1:read '*a'\r
- local t2 = f2:read '*a'\r
- printf("The files contain %i and %i chars respectively", t1, t2)\r
-end\r
-\end{verbatim}\r
-\r
+\section{Extension {\tt trywith}: exceptions and finalization}
+Lua offers error handling primitives \verb+pcall()+ and
+\veb+xpcall()+. However, they are pretty low level, and their syntax
+is cumbersome to use and read back. This extension offers a proper
+syntax for the handling of such exceptions.
+
+\subsection{Syntax}
+An error handling statement has the following form:
+
+\begin{verbatim}
+try
+ <protected block>
+catch <exception pattern #1> then
+ <exception handling block #1>
+catch <exception pattern #2> then
+ <exception handling block #2>
+ ...
+catch <exception pattern #n> then
+ <exception handling block #n>
+finally
+ <finalization block>
+end
+\end{verbatim}
+
+\subsection{Semantics}
+When such a statement is executed:
+\begin{itemize}
+\item the protected code is executed
+\item if its execution causes an error, the error message is matched
+ against all exception patterns, until one matches or the last one is
+ reached (see the \verb+match+ extension for patterns
+ semantics). Patterns can include guard clauses. The block
+ corresponding to the first matching pattern is executed. If no
+ pattern matches, the error will be rethrown.
+\item the block following the \verb+finally+ keyword will be executed
+ no matter what: if the protected block executes succesfully, if it
+ raises an error, if it causes a \verb+return+, if an error occurs in
+ an exception handling block, whether an error is caught or
+ not... The only reason why the finalization block might not be run
+ is because of a sudden death of the process (call to {\tt os.exit()}
+ or core dump).
+\end{itemize}
+
+The finally block can be omitted, or there can be no error catching
+case. The following examples are legal:
+
+\begin{verbatim}
+try
+ f = io.open ('file.txt', 'r')
+ num_char = #f:read '*a'
+finally
+ f:close()
+end
+
+try
+ do_stuff()
+catch "mismatch" then
+ print "match statement failure"
+catch "[Ee]of"/_ then -- regexp pattern
+ print "An end-of-file error seems to have happened"
+catch x if type(x)=='table' then
+ print "The error is a table, not a string!"
+end
+\end{verbatim}
+
+ \subsection{RAII}
+ RAII, or ``Resource Acquisition Is Initialization'', is a common
+ programming pattern in which an object's lifetime is strictly
+ associated with a given lexical scope. For instance, if a file is
+ opened in a given scope, it must be closed as soon as this scope is
+ leaved, even if it's leaved due to a {\tt return} or an error. The
+ ``finally'' block allows this, but since it's a very common use case,
+ there is a dedicated extension ``with/do'': you initialize some resource
+ behind the ``with'' keyword, and it will be closed after the ``do''
+ block is left. The only constraint is that the resources must have a
+ {\tt:close()} method which releases them. Here is a usage example:
+
+\begin{verbatim}
+-{ extension 'withdo' }
+
+with f1, f2 = io.open 'file1.txt', io.open 'file2.txt' do
+ local t1 = f1:read '*a'
+ local t2 = f2:read '*a'
+ printf("The files contain %i and %i chars respectively", t1, t2)
+end
+\end{verbatim}
+
-\section{{\tt walk}, the code walker}\r
-\r
-When you write advanced macros, or when you're analyzing some code to check for\r
-some property, you often need to design a function that walks through an\r
-arbitrary piece of code, and does complex stuff on it. Such a function is called\r
-a code walker. Code walkers can be used for some punctual adjustments, e.g.\r
-changing a function's {\tt return} statements into something else, or checking\r
-that a loop performs no {\tt break}, up to pretty advanced transformations, such\r
-as CPS transformation (a way to encode full continuations into a language taht\r
-doesn't support them natively; see Paul Graham's On Lisp for an accessible\r
-description of how it works), lazy semantics...\r
-\r
-Anyway, code walkers are tricky to write, can involve a lot of boilerplate code,\r
-and are generally brittle. To ease things as much as possible, Metalua comes\r
-with a walk library, which intends to accelerate code walker implementation.\r
-Since code walking is intrinsically tricky, the lib won't magically make it\r
-trivial, but at least it will save you a lot of time and code, when compared to\r
-writing all walkers from scratch. Moreover, other people who took the time to\r
-learn the walker generator's API will enter into your code much faster.\r
-\r
-\subsection{Principles}\r
-\r
-Code walking is about traversing a tree, first from root to leaves, then from\r
-leaves back to the root. This tree is not uniform: some nodes are expressions,\r
-some others statements, some others blocks; and each of these node kinds is\r
-subdivided in several sub-cases (addition, numeric for loop...). The basic code\r
-walker just goes from root to leaves and back to root without doing anything.\r
-Then it's up to you to plug some action callbacks in that walker, so that it\r
-does interesting things for you.\r
-\r
-Without entering into the details of AST structure, here is a simplified version\r
-of the walking algorithm, pretending to work on a generic tree:\r
-\r
-\begin{verbatim}\r
-function traverse(node)\r
- local down_result = down(node)\r
- if down_result ~= 'break' then \r
- for c in children(node) do\r
- traverse(node)\r
- end\r
- end\r
- up(node)\r
-end\r
-\end{verbatim}\r
-\r
-The algorithm behind 'walk' is almost as simple as the one above, except that\r
-it's specialized for AST trees. You can essentially specialize it by providing\r
-your own up() and down() functions. These visitor functions perform whatever\r
-action you want on the AST; moreover, down() has the option to return 'break':\r
-in that case, the sub-nodes are not traversed. It allows the user to shun parts\r
-of the tree, or to provide his own special traversal method in the down()\r
-visitor.\r
-\r
-The real walker generator is only marginally more complex than that:\r
-\begin{itemize}\r
-\item It lets you define your up() and down(), and down() can return 'break' to\r
- cut the tree traversal; however, these up() and down() functions are\r
- specialized by node kind: there can be separate callbacks for expr nodes, stat\r
- nodes, block nodes.\r
-\item There's also a binder() visitor for identifier binders. Binders are\r
- variables which declare a new local variable; you'll find them in nodes\r
- `Local, `Localrec, `Forin, `Fornum, `Function. The binders are visited just\r
- before the variable's scope begins, i.e. before traversing a loop or a\r
- function's body, after traversing a `Local's right-hand side, before\r
- traversing a `Localrec's right-hand side. \\ \r
- Notice that separate up() and down() visitors wouldn't make sense for\r
- binders, since they're leave nodes.\r
-\item Visitor functions don't only take the visited node as parameter: they also\r
- take the list of all expr, stat and block nodes above it, up to the AST's\r
- root. This allows a child node to act on its parent nodes.\r
-\end{itemize}\r
-\r
-\subsection{API}\r
-There are 3 main tree walkers: {\tt walk.expr()}, {\tt walk.stat()} and {\tt\r
- walk.block()}, to walk through the corresponding kinds of ASTs. Each of these\r
-walker take as parameters a table {\tt cfg} containing the various visitor\r
-functions, and the AST to walk throuhg. the configuration table {\tt cfg} can\r
-contain fields:\r
-\begin{itemize}\r
-\item {\tt cfg.stat.down(node, parent, grandparent...)}, which applies when\r
- traversing a statement down, i.e. before its children nodes are parsed, and\r
- can modify the tree, and return {\tt nil} or {\tt'break'}. The way children\r
- are traversed is decided {\em after} the {\tt down()} visitor has been run:\r
- this point matters when the visitor modifies its children nodes.\r
-\item {\tt cfg.stat.up(node, parent, grandparent...)}, which is applies on the\r
- way back up. It is applied even if {\tt cfg.stat.down()} returned\r
- {\tt'break'}, but in that case, the children have not been (and will not be)\r
- traversed. \r
-\item {\tt cfg.expr.down()} and {\tt cfg.expr.up()}, which work just as their\r
- {\tt stat} equivalent, but apply to expression nodes.\\\r
- Notice that in Lua, function calls and method invocations can be used as\r
- statements as well as as espressions: in such cases, they are visited only by\r
- the statement visitor, not by the expression visitor.\r
-\item {\tt cfg.block.down()} and {\tt cfg.block.up()} do the same for statements\r
- blocks: loops, conditional and function bodies.\r
-\item {\tt cfg.binder(identifier, id\_parent, id\_grandparent...)}: this\r
- is run on identifiers which create a new local variable, jsut before that\r
- variable's scope begins.\r
-\end{itemize}\r
-\r
-Moreover, there is a {\tt walk.guess(cfg, ast)} walker which tries to guess the\r
-type of the AST it receives, and applies the appropriate walker. When an AST can\r
-be either an expression or a statement (nodes {\tt`Call} and {\tt`Invoke}), it\r
-is interpreted as an expression.\r
-\r
-\subsection{Examples}\r
-\r
-A bit of practice now. Let's build the simplest walker possible, that does\r
-nothing:\r
-\r
-\begin{verbatim}\r
-cfg = { }\r
-walker = |ast| walk.block(cfg, ast)\r
-\end{verbatim}\r
-\r
-Now, let's say we want to catch and remove all statement calls to function\r
-assert(). This can be done by removing its tag and content: an empty list is\r
-simply ignored in an AST. So we're only interested by `Call nodes, and within\r
-these nodes, we want the function to be `Id 'assert'. All of this is only\r
-relevant to stat nodes:\r
-\r
-\begin{verbatim}\r
-function cfg.stat.down (x)\r
- match x with\r
- | `Call{ `Id 'assert', ... } -> x.tag=nil; x <- { }\r
- | _ -> -- not interested by this node, do nothing\r
- end\r
-end\r
-\end{verbatim}\r
-\r
-You'll almost always want to use the 'match' extension to implement visitors.\r
-The imperative table overrider ({\tt x <- y} a.k.a. {\tt table.override(x, y)}\r
-also often comes handy to modify an AST.\r
-\r
-We'll now remove {\tt assert()} calls in non-statement; we cannot replace an\r
-expression by nothing, so we'll replace these nodes by these will simply be\r
-replaced by {\tt nil}:\r
-\r
-\begin{verbatim}\r
-function cfg.expr.down (x)\r
- match x with\r
- | `Call{ `Id 'assert', ... } -> x <- `Nil\r
- | _ -> -- not interested by this node, do nothing\r
- end\r
-end\r
-\end{verbatim}\r
-\r
-\r
-Here's a remark for functional programmers: this API is very imperative; you\r
-might cringe at seeing the `Call nodes transformed in-place. Well, I tend to\r
-agree but it's generally counter-productive to work against the grain of the\r
-wood: Lua is imperative at heart, and design attempts at doing this functionally\r
-sucked more than approaches that embraced imperativeness. \r
-\r
-\paragraph{Cuts}\r
-By making down() return 'break', you can prevent the traversal to go further\r
-down. This might be either because you're not interested by the subtrees, or\r
-because you want to traverse them in a special way. In that later case, just do\r
-the traversal by yourself in the down() function, and cut the walking by\r
-returning 'break', so that nodes aren't re-traversed by the default walking\r
-algorithm. We'll see that in the next, more complex example, listing of free\r
-variables.\r
-\r
-This example is exclusively there for demonstration purposes. For actual work on\r
-identifiers that require awareness of an identifier's binder of freedom, there\r
-is a dedicated {\tt walk.id} library.\r
-\r
-We'll progressively build a walker that gathers all global variables used in a\r
-given AST. This involves keeping, at all times, a set of the identifiers\r
-currently bound by a "local" declaration, by function parameters, as for loop\r
-variables etc. Then, every time an identifier is found in the AST, its presence\r
-is checked in the current set of bound variables. If it isn't in it, then it's a\r
-free (global) identifier.\r
-\r
-The first thing we'll need is a scope handling system: something that keeps\r
-track of what identifiers are currently in scope. It must also allow to save the\r
-current scope (e.g. before we enter a new block) and restore it afterwards (e.g.\r
-after we leave the block). This is quite straightforward and unrelated to code\r
-walking; here is the code:\r
-\r
-\begin{Verbatim}[fontsize=\scriptsize]\r
-require 'std'\r
-require 'walk'\r
-\r
--{ extension 'match' }\r
-\r
---------------------------------------------------------------------------------\r
--- Scope handling: ':push()' saves the current scope, ':pop()'\r
--- restores the previously saved one. ':add(identifiers_list)' adds\r
--- identifiers to the current scope. Current scope is stored in\r
--- '.current', as a string->boolean hashtable.\r
---------------------------------------------------------------------------------\r
-\r
-local scope = { }\r
-scope.__index = scope\r
-\r
-function scope:new()\r
- local ret = { current = { } }\r
- ret.stack = { ret.current }\r
- setmetatable (ret, self)\r
- return ret\r
-end\r
-\r
-function scope:push()\r
- table.insert (self.stack, table.shallow_copy (self.current))\r
-end\r
-\r
-function scope:pop()\r
- self.current = table.remove (self.stack)\r
-end\r
-\r
-function scope:add (vars)\r
- for id in values (vars) do\r
- match id with `Id{ x } -> self.current[x] = true end\r
- end\r
-end\r
-\end{Verbatim}\r
-\r
-(There is an improved version of that class in library {\tt walk.scope}; cf.\r
-its documentation for details).\r
-\r
-Now let's start designing the walker. We'll keep a scope object up to date, as\r
-well as a set of found free identifiers, gathered every time we find an `Id{ }\r
-node. To slightly simplify matter, we'll consider that the AST represent a\r
-block.\r
-\r
-\begin{Verbatim}[fontsize=\scriptsize]\r
-local function fv (term)\r
- local freevars = { }\r
- local scope = scope:new()\r
- local cfg = { expr = { } }\r
-\r
- function cfg.expr.down(x)\r
- match x with\r
- | `Id{ name } -> if not scope.current[name] then freevars[name] = true end\r
- | _ -> -- pass\r
- end\r
- end\r
-\r
- walk.guess(cfg, term)\r
- return freevars\r
-end\r
-\end{Verbatim}\r
-\r
-Since we don't ever add any identifier to the scope, this will just list all the\r
-identifiers used in the AST, bound or not. Now let's start doing more\r
-interesting things:\r
-\r
-\begin{itemize}\r
-\item We'll save the scope's state before entering a new block, and restore it\r
- when we leave it. That will be done by providing functions {\tt cfg.block.down()}\r
- and {\tt cfg.block.up()}. Saving and restoring will be performed by methods\r
- {\tt :push()} and {\tt :pop()}.\r
-\item Whenever we find a {\tt local} declaration, we'll add the list of\r
- identifiers to the current scope, so that they won't be gathered when parsing\r
- the {\tt `Id} expression nodes. Notice that the identifiers declared by the\r
- 'local' statement only start being valid after the statement, so we'll add\r
- them in the {\tt cfg.stat.up()} function rather than {\tt cfg.stat.down()}.\r
-\end{itemize}\r
-\r
-\begin{Verbatim}[fontsize=\scriptsize]\r
-local cfg = { expr = { },\r
- stat = { },\r
- block = { } }\r
- [...]\r
- function cfg.stat.up(x)\r
- match x with\r
- | `Local{ vars, ... } -> scope:add(vars)\r
- | _ -> -- pass\r
- end\r
- end\r
-\r
- -----------------------------------------------------------------------------\r
- -- Create a separate scope for each block, close it when leaving.\r
- -----------------------------------------------------------------------------\r
- function cfg.block.down() scope:push() end\r
- function cfg.block.up() scope:pop() end \r
-\end{Verbatim}\r
-\r
-This starts to be useful. We can also easily add the case for `Localrec{ } nodes\r
-(the ones generated by {\tt "local function foo() ... end"}), where the variable\r
-is already bound in the {\tt`Localrec} statement's right-hand side; so we do the\r
-same as for {\tt`Local}, but we do it in the {\tt down()} function rather than\r
-in the up() one.\r
-\r
-We'll also take care of {\tt`Function}, {\tt`Forin} and {\tt`Fornum} nodes,\r
-which introduce new bound identifiers as function parameters or loop variables.\r
-This is quite straightforward; the only thing to take care of is to save the\r
-scope before entering the function/loop body (in {\tt down()}), and restore it\r
-when leaving (in {\tt up()}). The code becomes:\r
-\r
- \r
-\begin{Verbatim}[fontsize=\scriptsize]\r
-local function fv (term)\r
- local freevars = { }\r
- local scope = scope:new()\r
- local cfg = { expr = { },\r
- stat = { },\r
- block = { } }\r
-\r
- -----------------------------------------------------------------------------\r
- -- Check identifiers; add functions parameters to newly created scope.\r
- -----------------------------------------------------------------------------\r
- function cfg.expr.down(x)\r
- match x with\r
- | `Id{ name } -> if not scope.current[name] then freevars[name] = true end\r
- | `Function{ params, _ } -> scope:push(); scope:add (params)\r
- | _ -> -- pass\r
- end\r
- end\r
-\r
- -----------------------------------------------------------------------------\r
- -- Close the function scope opened by 'down()'.\r
- -----------------------------------------------------------------------------\r
- function cfg.expr.up(x) \r
- match x with\r
- | `Function{ ... } -> scope:pop()\r
- | _ -> -- pass\r
- end\r
- end\r
-\r
- -----------------------------------------------------------------------------\r
- -- Create a new scope and register loop variable[s] in it\r
- -----------------------------------------------------------------------------\r
- function cfg.stat.down(x)\r
- match x with\r
- | `Forin{ vars, ... } -> scope:push(); scope:add(vars)\r
- | `Fornum{ var, ... } -> scope:push(); scope:add{var}\r
- | `Localrec{ vars, ... } -> scope:add(vars)\r
- | `Local{ ... } -> -- pass\r
- | _ -> -- pass\r
- end\r
- end\r
-\r
- -----------------------------------------------------------------------------\r
- -- Close the scopes opened by 'up()'\r
- -----------------------------------------------------------------------------\r
- function cfg.stat.up(x)\r
- match x with\r
- | `Forin{ ... } | `Fornum{ ... } -> scope:pop()\r
- | `Local{ vars, ... } -> scope:add(vars)\r
- | _ -> -- pass\r
- end\r
- end\r
-\r
- -----------------------------------------------------------------------------\r
- -- Create a separate scope for each block, close it when leaving.\r
- -----------------------------------------------------------------------------\r
- function cfg.block.down() scope:push() end\r
- function cfg.block.up() scope:pop() end\r
-\r
- walk.guess(cfg, term)\r
- return freevars\r
-end\r
-\end{Verbatim}\r
-\r
-This is almost correct now. There's one last tricky point of Lua's semantics\r
-that we need to address: in {\tt repeat foo until bar} loops, "bar" is included\r
-in "foo"'s scope. For instance, if we write {\tt repeat local x=foo() until\r
- x>3}, the "x" in the condition is the local variable "x" declared inside the\r
-body. This violates our way of handling block scopes: the scope must be kept\r
-alive after the block is finished. We'll fix this by providing a custom walking\r
-for the block inside `Repeat, and preventing the normal walking to happen by\r
-returning 'break':\r
-\r
-\begin{Verbatim}[fontsize=\scriptsize]\r
- -----------------------------------------------------------------------------\r
- -- Create a new scope and register loop variable[s] in it\r
- -----------------------------------------------------------------------------\r
- function cfg.stat.down(x)\r
- match x with\r
- | `Forin{ vars, ... } -> scope:push(); scope:add(vars)\r
- | `Fornum{ var, ... } -> scope:push(); scope:add{var}\r
- | `Localrec{ vars, ... } -> scope:add(vars)\r
- | `Local{ ... } -> -- pass\r
- | `Repeat{ block, cond } -> -- 'cond' is in the scope of 'block'\r
- scope:push()\r
- for s in values (block) do walk.stat(cfg)(s) end -- no new scope\r
- walk.expr(cfg)(cond)\r
- scope:pop()\r
- return 'break' -- No automatic walking of subparts 'cond' and 'body'\r
- | _ -> -- pass\r
- end\r
- end\r
-\end{Verbatim}\r
-\r
-That's it, we've now got a full free variables lister, and have demonstrated\r
-most APIs offered by the basic 'walk' library. If you want to walk through\r
-identifiers in a scope-aware way, though, you'll want to look at the {\tt\r
- walk.id} library.\r
-\r
-\subsection{Library {\tt walk.id}, the scope-aware walker}\r
-\r
-This library walks AST to gather information about the identifiers in it. It\r
-call distinct visitor functions depending on whether an identifier is bound or\r
-free; moreover, when an identifier is bound, the visitor also receives its\r
-binder node as a parameter. For instance, in {\tt +\{function(x) print(x)\r
- end\}}, the bound identifier walker will be called on the \verb|+{x}| in the\r
-\verb|print| call, and the visitor's second parameter will be the {\tt`Function}\r
-node which created the local variable {\tt x}.\r
-\r
-\paragraph{API}\r
-The library is loaded with \verb|require 'walk.id'|. The walkers provided are:\r
-\begin{itemize}\r
-\item {\tt walk\_id.expr()};\r
-\item {\tt walk\_id.stat()};\r
-\item {\tt walk\_id.block()};\r
-\item {\tt walk\_id.guess()}.\r
-\end{itemize}\r
-\r
-They take the same config tables as regular walkers, except that they also\r
-recognize the following entries:\r
-\begin{itemize}\r
-\item {\tt cfg.id.free(identifier, parent, grandparent...)}, which is run on\r
- free variables;\r
-\item {\tt cfg.id.bound(identifier, binder, parent, grandparent...)}, which is\r
- run on bound variables. The statement or expression which created this bound\r
- veriable's scope is passed as a second parameter, before the parent nodes.\r
-\end{itemize}\r
-\r
-\paragraph{Examples}\r
-Let's rewrite the free variables walker above, with the id walker:\r
-\r
-\begin{Verbatim}[fontsize=\scriptsize]\r
-function fv (term)\r
- local cfg = { id = { } }\r
- local freevars = { }\r
- function cfg.id.free(id)\r
- local id_name = id[1]\r
- freevars[id_name] = true\r
- end\r
- walk_id.guess (cfg, term)\r
- return freevars\r
-end\r
-\end{Verbatim}\r
-\r
-Now, let's $\alpha$-rename all bound variables in a term. This is slightly\r
-trickier than one could think: we need to first walk the whole tree, then\r
-perform all the replacement. If we renamed binders as we went, they would stop\r
-binding their variables, and something messy would happen. For instance, if we\r
-took {\tt +\{function(x) print(x) end\}} and renamed the binder {\tt x} into\r
-{\tt foo}, we'd then start walking the function body on the tree {\tt\r
- +\{function(foo) print(x) end\}}, where {\tt x} isn't bound anymore.\r
-\r
-\begin{Verbatim}[fontsize=\scriptsize]\r
---------------------------------------------------------------------------------\r
--- bound_vars keeps a binder node -> old_name -> new_name dictionary.\r
--- It allows to associate all instances of an identifier together,\r
--- with the binder that created them\r
---------------------------------------------------------------------------------\r
-local bound_vars = { }\r
-\r
---------------------------------------------------------------------------------\r
--- local_renames will keep all identifier nodes to rename as keys,\r
--- and their new name as values. The renaming must happen after \r
--- the whole tree has been visited, in order to avoid breaking captures.\r
---------------------------------------------------------------------------------\r
-local local_renames = { }\r
-\r
---------------------------------------------------------------------------------\r
--- Associate a new name in bound_vars when a local variable is created.\r
---------------------------------------------------------------------------------\r
-function cfg.binder (id, binder)\r
- local old_name = id[1]\r
- local binder_table = bound_vars[binder]\r
- if not binder_table then\r
- -- Create a new entry for this binder:\r
- binder_table = { }\r
- bound_vars[binder] = binder_table\r
- end\r
- local new_name = mlp.gensym(old_name)[1] -- generate a new name\r
- binder_table[old_name] = new_name -- remember name for next instances\r
- local_renames[id] = new_name -- add to the rename todo-list\r
-end\r
-\r
---------------------------------------------------------------------------------\r
--- Add a bound variable the the rename todo-list\r
---------------------------------------------------------------------------------\r
-function cfg.id.bound (id, binder)\r
- local old_name = id[1]\r
- local new_name = bound_vars[binder][old_name]\r
- local_renames[id] = new_name\r
-end\r
-\r
--- walk the tree and fill laocal_renames:\r
-walk_id.guess(cfg, ast)\r
-\r
--- perform the renaming actions:\r
-for id, new_name in pairs(local_renames) do id[1] = new_name end\r
-\end{Verbatim}\r
-\r
-\subsection{Library {\tt walk.scope}, the scope helper}\r
-This library allows to easily store data, in an AST walking function, in a scope\r
-aware way. Cf. comments in the sources for details.\r
+\section{{\tt walk}, the code walker}
+
+When you write advanced macros, or when you're analyzing some code to check for
+some property, you often need to design a function that walks through an
+arbitrary piece of code, and does complex stuff on it. Such a function is called
+a code walker. Code walkers can be used for some punctual adjustments, e.g.
+changing a function's {\tt return} statements into something else, or checking
+that a loop performs no {\tt break}, up to pretty advanced transformations, such
+as CPS transformation (a way to encode full continuations into a language taht
+doesn't support them natively; see Paul Graham's On Lisp for an accessible
+description of how it works), lazy semantics...
+
+Anyway, code walkers are tricky to write, can involve a lot of boilerplate code,
+and are generally brittle. To ease things as much as possible, Metalua comes
+with a walk library, which intends to accelerate code walker implementation.
+Since code walking is intrinsically tricky, the lib won't magically make it
+trivial, but at least it will save you a lot of time and code, when compared to
+writing all walkers from scratch. Moreover, other people who took the time to
+learn the walker generator's API will enter into your code much faster.
+
+\subsection{Principles}
+
+Code walking is about traversing a tree, first from root to leaves, then from
+leaves back to the root. This tree is not uniform: some nodes are expressions,
+some others statements, some others blocks; and each of these node kinds is
+subdivided in several sub-cases (addition, numeric for loop...). The basic code
+walker just goes from root to leaves and back to root without doing anything.
+Then it's up to you to plug some action callbacks in that walker, so that it
+does interesting things for you.
+
+Without entering into the details of AST structure, here is a simplified version
+of the walking algorithm, pretending to work on a generic tree:
+
+\begin{verbatim}
+function traverse(node)
+ local down_result = down(node)
+ if down_result ~= 'break' then
+ for c in children(node) do
+ traverse(node)
+ end
+ end
+ up(node)
+end
+\end{verbatim}
+
+The algorithm behind 'walk' is almost as simple as the one above, except that
+it's specialized for AST trees. You can essentially specialize it by providing
+your own up() and down() functions. These visitor functions perform whatever
+action you want on the AST; moreover, down() has the option to return 'break':
+in that case, the sub-nodes are not traversed. It allows the user to shun parts
+of the tree, or to provide his own special traversal method in the down()
+visitor.
+
+The real walker generator is only marginally more complex than that:
+\begin{itemize}
+\item It lets you define your up() and down(), and down() can return 'break' to
+ cut the tree traversal; however, these up() and down() functions are
+ specialized by node kind: there can be separate callbacks for expr nodes, stat
+ nodes, block nodes.
+\item There's also a binder() visitor for identifier binders. Binders are
+ variables which declare a new local variable; you'll find them in nodes
+ `Local, `Localrec, `Forin, `Fornum, `Function. The binders are visited just
+ before the variable's scope begins, i.e. before traversing a loop or a
+ function's body, after traversing a `Local's right-hand side, before
+ traversing a `Localrec's right-hand side. \\
+ Notice that separate up() and down() visitors wouldn't make sense for
+ binders, since they're leave nodes.
+\item Visitor functions don't only take the visited node as parameter: they also
+ take the list of all expr, stat and block nodes above it, up to the AST's
+ root. This allows a child node to act on its parent nodes.
+\end{itemize}
+
+\subsection{API}
+There are 3 main tree walkers: {\tt walk.expr()}, {\tt walk.stat()} and {\tt
+ walk.block()}, to walk through the corresponding kinds of ASTs. Each of these
+walker take as parameters a table {\tt cfg} containing the various visitor
+functions, and the AST to walk throuhg. the configuration table {\tt cfg} can
+contain fields:
+\begin{itemize}
+\item {\tt cfg.stat.down(node, parent, grandparent...)}, which applies when
+ traversing a statement down, i.e. before its children nodes are parsed, and
+ can modify the tree, and return {\tt nil} or {\tt'break'}. The way children
+ are traversed is decided {\em after} the {\tt down()} visitor has been run:
+ this point matters when the visitor modifies its children nodes.
+\item {\tt cfg.stat.up(node, parent, grandparent...)}, which is applies on the
+ way back up. It is applied even if {\tt cfg.stat.down()} returned
+ {\tt'break'}, but in that case, the children have not been (and will not be)
+ traversed.
+\item {\tt cfg.expr.down()} and {\tt cfg.expr.up()}, which work just as their
+ {\tt stat} equivalent, but apply to expression nodes.\\
+ Notice that in Lua, function calls and method invocations can be used as
+ statements as well as as espressions: in such cases, they are visited only by
+ the statement visitor, not by the expression visitor.
+\item {\tt cfg.block.down()} and {\tt cfg.block.up()} do the same for statements
+ blocks: loops, conditional and function bodies.
+\item {\tt cfg.binder(identifier, id\_parent, id\_grandparent...)}: this
+ is run on identifiers which create a new local variable, jsut before that
+ variable's scope begins.
+\end{itemize}
+
+Moreover, there is a {\tt walk.guess(cfg, ast)} walker which tries to guess the
+type of the AST it receives, and applies the appropriate walker. When an AST can
+be either an expression or a statement (nodes {\tt`Call} and {\tt`Invoke}), it
+is interpreted as an expression.
+
+\subsection{Examples}
+
+A bit of practice now. Let's build the simplest walker possible, that does
+nothing:
+
+\begin{verbatim}
+cfg = { }
+walker = |ast| walk.block(cfg, ast)
+\end{verbatim}
+
+Now, let's say we want to catch and remove all statement calls to function
+assert(). This can be done by removing its tag and content: an empty list is
+simply ignored in an AST. So we're only interested by `Call nodes, and within
+these nodes, we want the function to be `Id 'assert'. All of this is only
+relevant to stat nodes:
+
+\begin{verbatim}
+function cfg.stat.down (x)
+ match x with
+ | `Call{ `Id 'assert', ... } -> x.tag=nil; x <- { }
+ | _ -> -- not interested by this node, do nothing
+ end
+end
+\end{verbatim}
+
+You'll almost always want to use the 'match' extension to implement visitors.
+The imperative table overrider ({\tt x <- y} a.k.a. {\tt table.override(x, y)}
+also often comes handy to modify an AST.
+
+We'll now remove {\tt assert()} calls in non-statement; we cannot replace an
+expression by nothing, so we'll replace these nodes by these will simply be
+replaced by {\tt nil}:
+
+\begin{verbatim}
+function cfg.expr.down (x)
+ match x with
+ | `Call{ `Id 'assert', ... } -> x <- `Nil
+ | _ -> -- not interested by this node, do nothing
+ end
+end
+\end{verbatim}
+
+
+Here's a remark for functional programmers: this API is very imperative; you
+might cringe at seeing the `Call nodes transformed in-place. Well, I tend to
+agree but it's generally counter-productive to work against the grain of the
+wood: Lua is imperative at heart, and design attempts at doing this functionally
+sucked more than approaches that embraced imperativeness.
+
+\paragraph{Cuts}
+By making down() return 'break', you can prevent the traversal to go further
+down. This might be either because you're not interested by the subtrees, or
+because you want to traverse them in a special way. In that later case, just do
+the traversal by yourself in the down() function, and cut the walking by
+returning 'break', so that nodes aren't re-traversed by the default walking
+algorithm. We'll see that in the next, more complex example, listing of free
+variables.
+
+This example is exclusively there for demonstration purposes. For actual work on
+identifiers that require awareness of an identifier's binder of freedom, there
+is a dedicated {\tt walk.id} library.
+
+We'll progressively build a walker that gathers all global variables used in a
+given AST. This involves keeping, at all times, a set of the identifiers
+currently bound by a "local" declaration, by function parameters, as for loop
+variables etc. Then, every time an identifier is found in the AST, its presence
+is checked in the current set of bound variables. If it isn't in it, then it's a
+free (global) identifier.
+
+The first thing we'll need is a scope handling system: something that keeps
+track of what identifiers are currently in scope. It must also allow to save the
+current scope (e.g. before we enter a new block) and restore it afterwards (e.g.
+after we leave the block). This is quite straightforward and unrelated to code
+walking; here is the code:
+
+\begin{Verbatim}[fontsize=\scriptsize]
+require 'std'
+require 'walk'
+
+-{ extension 'match' }
+
+--------------------------------------------------------------------------------
+-- Scope handling: ':push()' saves the current scope, ':pop()'
+-- restores the previously saved one. ':add(identifiers_list)' adds
+-- identifiers to the current scope. Current scope is stored in
+-- '.current', as a string->boolean hashtable.
+--------------------------------------------------------------------------------
+
+local scope = { }
+scope.__index = scope
+
+function scope:new()
+ local ret = { current = { } }
+ ret.stack = { ret.current }
+ setmetatable (ret, self)
+ return ret
+end
+
+function scope:push()
+ table.insert (self.stack, table.shallow_copy (self.current))
+end
+
+function scope:pop()
+ self.current = table.remove (self.stack)
+end
+
+function scope:add (vars)
+ for id in values (vars) do
+ match id with `Id{ x } -> self.current[x] = true end
+ end
+end
+\end{Verbatim}
+
+(There is an improved version of that class in library {\tt walk.scope}; cf.
+its documentation for details).
+
+Now let's start designing the walker. We'll keep a scope object up to date, as
+well as a set of found free identifiers, gathered every time we find an `Id{ }
+node. To slightly simplify matter, we'll consider that the AST represent a
+block.
+
+\begin{Verbatim}[fontsize=\scriptsize]
+local function fv (term)
+ local freevars = { }
+ local scope = scope:new()
+ local cfg = { expr = { } }
+
+ function cfg.expr.down(x)
+ match x with
+ | `Id{ name } -> if not scope.current[name] then freevars[name] = true end
+ | _ -> -- pass
+ end
+ end
+
+ walk.guess(cfg, term)
+ return freevars
+end
+\end{Verbatim}
+
+Since we don't ever add any identifier to the scope, this will just list all the
+identifiers used in the AST, bound or not. Now let's start doing more
+interesting things:
+
+\begin{itemize}
+\item We'll save the scope's state before entering a new block, and restore it
+ when we leave it. That will be done by providing functions {\tt cfg.block.down()}
+ and {\tt cfg.block.up()}. Saving and restoring will be performed by methods
+ {\tt :push()} and {\tt :pop()}.
+\item Whenever we find a {\tt local} declaration, we'll add the list of
+ identifiers to the current scope, so that they won't be gathered when parsing
+ the {\tt `Id} expression nodes. Notice that the identifiers declared by the
+ 'local' statement only start being valid after the statement, so we'll add
+ them in the {\tt cfg.stat.up()} function rather than {\tt cfg.stat.down()}.
+\end{itemize}
+
+\begin{Verbatim}[fontsize=\scriptsize]
+local cfg = { expr = { },
+ stat = { },
+ block = { } }
+ [...]
+ function cfg.stat.up(x)
+ match x with
+ | `Local{ vars, ... } -> scope:add(vars)
+ | _ -> -- pass
+ end
+ end
+
+ -----------------------------------------------------------------------------
+ -- Create a separate scope for each block, close it when leaving.
+ -----------------------------------------------------------------------------
+ function cfg.block.down() scope:push() end
+ function cfg.block.up() scope:pop() end
+\end{Verbatim}
+
+This starts to be useful. We can also easily add the case for `Localrec{ } nodes
+(the ones generated by {\tt "local function foo() ... end"}), where the variable
+is already bound in the {\tt`Localrec} statement's right-hand side; so we do the
+same as for {\tt`Local}, but we do it in the {\tt down()} function rather than
+in the up() one.
+
+We'll also take care of {\tt`Function}, {\tt`Forin} and {\tt`Fornum} nodes,
+which introduce new bound identifiers as function parameters or loop variables.
+This is quite straightforward; the only thing to take care of is to save the
+scope before entering the function/loop body (in {\tt down()}), and restore it
+when leaving (in {\tt up()}). The code becomes:
+
+
+\begin{Verbatim}[fontsize=\scriptsize]
+local function fv (term)
+ local freevars = { }
+ local scope = scope:new()
+ local cfg = { expr = { },
+ stat = { },
+ block = { } }
+
+ -----------------------------------------------------------------------------
+ -- Check identifiers; add functions parameters to newly created scope.
+ -----------------------------------------------------------------------------
+ function cfg.expr.down(x)
+ match x with
+ | `Id{ name } -> if not scope.current[name] then freevars[name] = true end
+ | `Function{ params, _ } -> scope:push(); scope:add (params)
+ | _ -> -- pass
+ end
+ end
+
+ -----------------------------------------------------------------------------
+ -- Close the function scope opened by 'down()'.
+ -----------------------------------------------------------------------------
+ function cfg.expr.up(x)
+ match x with
+ | `Function{ ... } -> scope:pop()
+ | _ -> -- pass
+ end
+ end
+
+ -----------------------------------------------------------------------------
+ -- Create a new scope and register loop variable[s] in it
+ -----------------------------------------------------------------------------
+ function cfg.stat.down(x)
+ match x with
+ | `Forin{ vars, ... } -> scope:push(); scope:add(vars)
+ | `Fornum{ var, ... } -> scope:push(); scope:add{var}
+ | `Localrec{ vars, ... } -> scope:add(vars)
+ | `Local{ ... } -> -- pass
+ | _ -> -- pass
+ end
+ end
+
+ -----------------------------------------------------------------------------
+ -- Close the scopes opened by 'up()'
+ -----------------------------------------------------------------------------
+ function cfg.stat.up(x)
+ match x with
+ | `Forin{ ... } | `Fornum{ ... } -> scope:pop()
+ | `Local{ vars, ... } -> scope:add(vars)
+ | _ -> -- pass
+ end
+ end
+
+ -----------------------------------------------------------------------------
+ -- Create a separate scope for each block, close it when leaving.
+ -----------------------------------------------------------------------------
+ function cfg.block.down() scope:push() end
+ function cfg.block.up() scope:pop() end
+
+ walk.guess(cfg, term)
+ return freevars
+end
+\end{Verbatim}
+
+This is almost correct now. There's one last tricky point of Lua's semantics
+that we need to address: in {\tt repeat foo until bar} loops, "bar" is included
+in "foo"'s scope. For instance, if we write {\tt repeat local x=foo() until
+ x>3}, the "x" in the condition is the local variable "x" declared inside the
+body. This violates our way of handling block scopes: the scope must be kept
+alive after the block is finished. We'll fix this by providing a custom walking
+for the block inside `Repeat, and preventing the normal walking to happen by
+returning 'break':
+
+\begin{Verbatim}[fontsize=\scriptsize]
+ -----------------------------------------------------------------------------
+ -- Create a new scope and register loop variable[s] in it
+ -----------------------------------------------------------------------------
+ function cfg.stat.down(x)
+ match x with
+ | `Forin{ vars, ... } -> scope:push(); scope:add(vars)
+ | `Fornum{ var, ... } -> scope:push(); scope:add{var}
+ | `Localrec{ vars, ... } -> scope:add(vars)
+ | `Local{ ... } -> -- pass
+ | `Repeat{ block, cond } -> -- 'cond' is in the scope of 'block'
+ scope:push()
+ for s in values (block) do walk.stat(cfg)(s) end -- no new scope
+ walk.expr(cfg)(cond)
+ scope:pop()
+ return 'break' -- No automatic walking of subparts 'cond' and 'body'
+ | _ -> -- pass
+ end
+ end
+\end{Verbatim}
+
+That's it, we've now got a full free variables lister, and have demonstrated
+most APIs offered by the basic 'walk' library. If you want to walk through
+identifiers in a scope-aware way, though, you'll want to look at the {\tt
+ walk.id} library.
+
+\subsection{Library {\tt walk.id}, the scope-aware walker}
+
+This library walks AST to gather information about the identifiers in it. It
+call distinct visitor functions depending on whether an identifier is bound or
+free; moreover, when an identifier is bound, the visitor also receives its
+binder node as a parameter. For instance, in {\tt +\{function(x) print(x)
+ end\}}, the bound identifier walker will be called on the \verb|+{x}| in the
+\verb|print| call, and the visitor's second parameter will be the {\tt`Function}
+node which created the local variable {\tt x}.
+
+\paragraph{API}
+The library is loaded with \verb|require 'walk.id'|. The walkers provided are:
+\begin{itemize}
+\item {\tt walk\_id.expr()};
+\item {\tt walk\_id.stat()};
+\item {\tt walk\_id.block()};
+\item {\tt walk\_id.guess()}.
+\end{itemize}
+
+They take the same config tables as regular walkers, except that they also
+recognize the following entries:
+\begin{itemize}
+\item {\tt cfg.id.free(identifier, parent, grandparent...)}, which is run on
+ free variables;
+\item {\tt cfg.id.bound(identifier, binder, parent, grandparent...)}, which is
+ run on bound variables. The statement or expression which created this bound
+ veriable's scope is passed as a second parameter, before the parent nodes.
+\end{itemize}
+
+\paragraph{Examples}
+Let's rewrite the free variables walker above, with the id walker:
+
+\begin{Verbatim}[fontsize=\scriptsize]
+function fv (term)
+ local cfg = { id = { } }
+ local freevars = { }
+ function cfg.id.free(id)
+ local id_name = id[1]
+ freevars[id_name] = true
+ end
+ walk_id.guess (cfg, term)
+ return freevars
+end
+\end{Verbatim}
+
+Now, let's $\alpha$-rename all bound variables in a term. This is slightly
+trickier than one could think: we need to first walk the whole tree, then
+perform all the replacement. If we renamed binders as we went, they would stop
+binding their variables, and something messy would happen. For instance, if we
+took {\tt +\{function(x) print(x) end\}} and renamed the binder {\tt x} into
+{\tt foo}, we'd then start walking the function body on the tree {\tt
+ +\{function(foo) print(x) end\}}, where {\tt x} isn't bound anymore.
+
+\begin{Verbatim}[fontsize=\scriptsize]
+--------------------------------------------------------------------------------
+-- bound_vars keeps a binder node -> old_name -> new_name dictionary.
+-- It allows to associate all instances of an identifier together,
+-- with the binder that created them
+--------------------------------------------------------------------------------
+local bound_vars = { }
+
+--------------------------------------------------------------------------------
+-- local_renames will keep all identifier nodes to rename as keys,
+-- and their new name as values. The renaming must happen after
+-- the whole tree has been visited, in order to avoid breaking captures.
+--------------------------------------------------------------------------------
+local local_renames = { }
+
+--------------------------------------------------------------------------------
+-- Associate a new name in bound_vars when a local variable is created.
+--------------------------------------------------------------------------------
+function cfg.binder (id, binder)
+ local old_name = id[1]
+ local binder_table = bound_vars[binder]
+ if not binder_table then
+ -- Create a new entry for this binder:
+ binder_table = { }
+ bound_vars[binder] = binder_table
+ end
+ local new_name = mlp.gensym(old_name)[1] -- generate a new name
+ binder_table[old_name] = new_name -- remember name for next instances
+ local_renames[id] = new_name -- add to the rename todo-list
+end
+
+--------------------------------------------------------------------------------
+-- Add a bound variable the the rename todo-list
+--------------------------------------------------------------------------------
+function cfg.id.bound (id, binder)
+ local old_name = id[1]
+ local new_name = bound_vars[binder][old_name]
+ local_renames[id] = new_name
+end
+
+-- walk the tree and fill laocal_renames:
+walk_id.guess(cfg, ast)
+
+-- perform the renaming actions:
+for id, new_name in pairs(local_renames) do id[1] = new_name end
+\end{Verbatim}
+
+\subsection{Library {\tt walk.scope}, the scope helper}
+This library allows to easily store data, in an AST walking function, in a scope
+aware way. Cf. comments in the sources for details.
-$Id$\r
-\r
-This changelog is maintained as of version 2.0alpha1. \r
-Earlier versions are changelogged on the LuaForge site.\r
-\r
--- 2.0alpha1 --\r
-* Fixed all outstanding 5.0->5.1 conversion issues\r
-* Made heavier use of size_t in preference to int\r
-* Fixed GC/Upval issue (thanks to Eric Jacobs)\r
-\r
+$Id$
+
+This changelog is maintained as of version 2.0alpha1.
+Earlier versions are changelogged on the LuaForge site.
+
+-- 2.0alpha1 --
+* Fixed all outstanding 5.0->5.1 conversion issues
+* Made heavier use of size_t in preference to int
+* Fixed GC/Upval issue (thanks to Eric Jacobs)
+
-$Id$\r
-\r
-PLUTO - Heavy duty persistence for Lua\r
-\r
-Pluto is a library which allows users to write arbitrarily large portions\r
-of the "Lua universe" into a flat file, and later read them back into the\r
-same or a different Lua universe. Object references are appropriately\r
-handled, such that the file contains everything needed to recreate the\r
-objects in question.\r
-\r
-Pluto has the following major features: \r
-* Can persist any Lua function\r
-* Can persist threads \r
-* Works with any Lua chunkreader/chunkwriter \r
-* Support for "invariant" permanent objects, of all datatypes\r
-* Can invoke metafunctions for custom persistence of tables and userdata\r
-\r
-Pluto 2.0 requires Lua 5.1 or later. If you need to use Pluto with Lua\r
-5.0, please use version 1.2 of Pluto.\r
-\r
-Pluto may have bugs. Users are advised to define lua_assert in \r
-luaconf.h to something useful when compiling in debug mode, to catch\r
-assertions by Pluto and Lua.\r
-\r
-The Pluto library consists of two public functions.\r
-\r
-int pluto_persist(lua_State *L, lua_Chunkwriter writer, void *ud)\r
-\r
-This function recursively persists the Lua object in stack position 2\r
-and all other objects which are directly or indirectly referenced by\r
-it, except those referenced in the permanent object table. The data\r
-is written using the chunk-writer given, and that writer is passed\r
-the arbitrary pointer value ud.\r
-\r
-The Lua stack must contain exactly and only these two items, in order:\r
-\r
-1. A table of permanent objects, that should not be persisted. For each\r
-permanent object, the object itself should be the key, and a unique\r
-object of any type should be the value. Likely candidates for this table \r
-include Lua functions (including those in the Lua libraries) that are \r
-loaded at load-time. It must include all non-persistable objects that \r
-are referenced by the object to be persisted. The table is not modified \r
-by the function. Objects in this table are considered "opaque" and are \r
-not examined or descended into. Objects should not appear in the table \r
-multiple times; the result of doing this is undefined (though probably \r
-harmless). NOTE: If you are planning to persist threads, keep in mind \r
-that all yielded threads have coroutine.yield on the tops of their \r
-stacks. Since it's a C function, it should be put here. For complex \r
-permanents, it may be a good idea to use the __index meta-function of \r
-the permanents table to "search" for permanents.\r
-\r
-2. The single object to be persisted. In many cases, this will be the\r
-global table. For more flexibility, however, it may be something like a\r
-table built for the occasion, with various values to keep track of. The\r
-object may not be nil.\r
-\r
-\r
-int pluto_unpersist(lua_State *L, lua_Chunkreader reader, void *ud)\r
-\r
-This function loads in a Lua object and places it on top of the stack. All\r
-objects directly or indirectly referenced by it are also loaded.\r
-\r
-The Lua stack must contain, as its top value, a table of permanent\r
-objects. This table should be like the permanent object table used when\r
-persisting, but with the key and value of each pair reversed. These \r
-objects are used as substitutes for those referenced in their positions \r
-when persisting, and under most circumstances should be identical objects\r
-to those referenced in the permanents table used for persisting. It's \r
-okay for multiple keys to refer to the same object.\r
-\r
-\r
-RUNNING PLUTO FROM LUA:\r
-It is also possible to invoke pluto from a Lua script. The C function\r
-pluto_open() will register pluto.persist and pluto.unpersist, lua functions\r
-which operate on strings. The first takes a permanents table and a root \r
-object, and returns a string; the second takes a permanents table and a \r
-string, and returns the root object.\r
-\r
-An error will be raised if pluto.persist is called from a thread which is\r
-itself referenced by the root object.\r
-\r
-SPECIAL PERSISTENCE:\r
-Tables and userdata have special persistence semantics. These semantics are\r
-keyed to the value of the object's metatable's __persist member, if any. This\r
-member may be any of the following four values:\r
-1. Boolean "true": The table or userdata is persisted literally; tables are\r
-persisted member-by-member, and userdata are written out as literal data.\r
-2. Boolean "false": An error is returned, indicating that the object cannot\r
-be persisted.\r
-3. A function: This function should take one argument, the object in question,\r
-and return one result, a closure. This "fixup closure", in turn, will be \r
-persisted, and during unpersistence will be called. The closure will be \r
-responsible for recreating the object with the appropriate data, based on \r
-its upvalues.\r
-4. Nil, or no metatable. In the case of tables, the table is literally\r
-persisted. In the case of userdata, an error is returned.\r
-\r
-Here's an example of special persistence for a simple 3d vector object:\r
-\r
-vec = { x = 2, y = 1, z = 4 }\r
-setmetatable(vec, { __persist = function(oldtbl)\r
- local x = oldtbl.x\r
- local y = oldtbl.y\r
- local z = oldtbl.z\r
- local mt = getmetatable(oldtbl)\r
- return function()\r
- newtbl = {}\r
- newtbl.x = x\r
- newtbl.y = y\r
- newtbl.z = z\r
- setmetatable(newtbl, mt)\r
- return newtbl\r
- end\r
-end })\r
-\r
-Note how x, y, z, and the mt are explicitly pulled out of the table. It is \r
-important that the fixup closure returned not reference the original table \r
-directly, as that table would again be persisted as an upvalue, leading to an \r
-infinite loop. Also note that the object's metatable is NOT automatically \r
-persisted; it is necessary for the fixup closure to reset it, if it wants.\r
-\r
-LIMITATIONS/TODO: \r
-* Light userdata are persisted literally, as their pointer values. This \r
-may or may not be what you want. \r
-* Closures of C functions may not be persisted. Once it becomes possible\r
-to specify a C function "proto" as a permanent object, this restriction\r
-will be relaxed.\r
-\r
-BUGS: None known. Emphasis on the 'known'.\r
+$Id$
+
+PLUTO - Heavy duty persistence for Lua
+
+Pluto is a library which allows users to write arbitrarily large portions
+of the "Lua universe" into a flat file, and later read them back into the
+same or a different Lua universe. Object references are appropriately
+handled, such that the file contains everything needed to recreate the
+objects in question.
+
+Pluto has the following major features:
+* Can persist any Lua function
+* Can persist threads
+* Works with any Lua chunkreader/chunkwriter
+* Support for "invariant" permanent objects, of all datatypes
+* Can invoke metafunctions for custom persistence of tables and userdata
+
+Pluto 2.0 requires Lua 5.1 or later. If you need to use Pluto with Lua
+5.0, please use version 1.2 of Pluto.
+
+Pluto may have bugs. Users are advised to define lua_assert in
+luaconf.h to something useful when compiling in debug mode, to catch
+assertions by Pluto and Lua.
+
+The Pluto library consists of two public functions.
+
+int pluto_persist(lua_State *L, lua_Chunkwriter writer, void *ud)
+
+This function recursively persists the Lua object in stack position 2
+and all other objects which are directly or indirectly referenced by
+it, except those referenced in the permanent object table. The data
+is written using the chunk-writer given, and that writer is passed
+the arbitrary pointer value ud.
+
+The Lua stack must contain exactly and only these two items, in order:
+
+1. A table of permanent objects, that should not be persisted. For each
+permanent object, the object itself should be the key, and a unique
+object of any type should be the value. Likely candidates for this table
+include Lua functions (including those in the Lua libraries) that are
+loaded at load-time. It must include all non-persistable objects that
+are referenced by the object to be persisted. The table is not modified
+by the function. Objects in this table are considered "opaque" and are
+not examined or descended into. Objects should not appear in the table
+multiple times; the result of doing this is undefined (though probably
+harmless). NOTE: If you are planning to persist threads, keep in mind
+that all yielded threads have coroutine.yield on the tops of their
+stacks. Since it's a C function, it should be put here. For complex
+permanents, it may be a good idea to use the __index meta-function of
+the permanents table to "search" for permanents.
+
+2. The single object to be persisted. In many cases, this will be the
+global table. For more flexibility, however, it may be something like a
+table built for the occasion, with various values to keep track of. The
+object may not be nil.
+
+
+int pluto_unpersist(lua_State *L, lua_Chunkreader reader, void *ud)
+
+This function loads in a Lua object and places it on top of the stack. All
+objects directly or indirectly referenced by it are also loaded.
+
+The Lua stack must contain, as its top value, a table of permanent
+objects. This table should be like the permanent object table used when
+persisting, but with the key and value of each pair reversed. These
+objects are used as substitutes for those referenced in their positions
+when persisting, and under most circumstances should be identical objects
+to those referenced in the permanents table used for persisting. It's
+okay for multiple keys to refer to the same object.
+
+
+RUNNING PLUTO FROM LUA:
+It is also possible to invoke pluto from a Lua script. The C function
+pluto_open() will register pluto.persist and pluto.unpersist, lua functions
+which operate on strings. The first takes a permanents table and a root
+object, and returns a string; the second takes a permanents table and a
+string, and returns the root object.
+
+An error will be raised if pluto.persist is called from a thread which is
+itself referenced by the root object.
+
+SPECIAL PERSISTENCE:
+Tables and userdata have special persistence semantics. These semantics are
+keyed to the value of the object's metatable's __persist member, if any. This
+member may be any of the following four values:
+1. Boolean "true": The table or userdata is persisted literally; tables are
+persisted member-by-member, and userdata are written out as literal data.
+2. Boolean "false": An error is returned, indicating that the object cannot
+be persisted.
+3. A function: This function should take one argument, the object in question,
+and return one result, a closure. This "fixup closure", in turn, will be
+persisted, and during unpersistence will be called. The closure will be
+responsible for recreating the object with the appropriate data, based on
+its upvalues.
+4. Nil, or no metatable. In the case of tables, the table is literally
+persisted. In the case of userdata, an error is returned.
+
+Here's an example of special persistence for a simple 3d vector object:
+
+vec = { x = 2, y = 1, z = 4 }
+setmetatable(vec, { __persist = function(oldtbl)
+ local x = oldtbl.x
+ local y = oldtbl.y
+ local z = oldtbl.z
+ local mt = getmetatable(oldtbl)
+ return function()
+ newtbl = {}
+ newtbl.x = x
+ newtbl.y = y
+ newtbl.z = z
+ setmetatable(newtbl, mt)
+ return newtbl
+ end
+end })
+
+Note how x, y, z, and the mt are explicitly pulled out of the table. It is
+important that the fixup closure returned not reference the original table
+directly, as that table would again be persisted as an upvalue, leading to an
+infinite loop. Also note that the object's metatable is NOT automatically
+persisted; it is necessary for the fixup closure to reset it, if it wants.
+
+LIMITATIONS/TODO:
+* Light userdata are persisted literally, as their pointer values. This
+may or may not be what you want.
+* Closures of C functions may not be persisted. Once it becomes possible
+to specify a C function "proto" as a permanent object, this restriction
+will be relaxed.
+
+BUGS: None known. Emphasis on the 'known'.
-----------------------------------------------------------------------\r
--- Metalua: $Id$\r
---\r
--- Summary: Hygienic macro facility for Metalua\r
---\r
-----------------------------------------------------------------------\r
---\r
--- Copyright (c) 2006, Fabien Fleutot <metalua@gmail.com>.\r
---\r
--- This software is released under the MIT Licence, see licence.txt\r
--- for details.\r
---\r
---------------------------------------------------------------------------------\r
---\r
--- =============\r
--- W A R N I N G\r
--- =============\r
---\r
--- THIS IS AN OLD NAIVE IMPLEMENTATION. IT'S PARATIAL (NO HYGIENE WRT OUTSIDE)\r
--- AND WRITTEN FROM SCRATCH WITH PATTERN MATCHING. MUST BE DONE WITH A WALKER.\r
---\r
--- Traditional macros carry a well-known pitfall, called variable capture:\r
--- when pasting a piece of source code A into another code B, if B bonds some\r
--- variables used by A, then the meaning of A is modified in a way probably\r
--- not intended by the user.\r
---\r
--- Example:\r
--- A = +{ n = 5 }\r
--- B = +{ local n=3; -{ A } }\r
--- \r
--- In this example, [n] in [A] will be captured by the local variable declared\r
--- by [B], and this is probably a bug.\r
--- \r
--- Notice that this also exists in C. Typical example:\r
---\r
--- #define swap (type, a, b) do { type tmp=a; a=b; b=tmp } while(0)\r
--- void f() {\r
--- int tmp=1, a=2; \r
--- swap (int, tmp, a); // won't work, [tmp] is captured in the macro\r
--- }\r
---\r
--- We can fix this by making sure that all local variables and parameters\r
--- created by [B] have fresh names. [mlp.gensym()] produces guaranteed-to-be-unique\r
--- variable names; we use it to replace all local var names declarations and \r
--- occurences in [B] by such fresh names.\r
---\r
--- Such macros which are guaranteed not to capture any variable are called\r
--- hygienic macros. By extension, an AST guaranteed not to contain capturing\r
--- variables is called an hygienic AST.\r
---\r
--- We implement here some functions which make sure that an AST is hygienic:\r
--- \r
--- - [hygienize_stat (ast)] for statement AST;\r
--- - [hygienize_stat (ast)] for statement block AST;\r
--- - [hygienize_expr (ast)] for expression AST;\r
---\r
--- This sample deconstructs AST by structural pattern matching, which is\r
--- supported by Metalua extension "match.lua"\r
---\r
---------------------------------------------------------------------------------\r
-\r
--{ extension "match" }\r
-\r
-require "std"\r
-\r
-local clone_ctx = std.shallow_copy\r
-\r
---------------------------------------------------------------------------------\r
--- Tag tables: these allow [hygienize] to decide whether an AST is\r
--- an expression, a statement, or something which isn't changed by\r
--- alpha renaming.\r
---------------------------------------------------------------------------------\r
-local stat_tags = {\r
- Do = true, Let = true,\r
- While = true, Repeat = true,\r
- If = true, Fornum = true,\r
- Forin = true, Local = true,\r
- Localrec = true, Return = true }\r
-\r
-local expr_tags = {\r
- Function = true, Table = true,\r
- Op = true, Call = true,\r
- Method = true, Index = true }\r
-\r
-local neutral_tags = {\r
- String = true, Number = true,\r
- True = true, False = true,\r
- Dots = true, Break = true,\r
- Id = true }\r
-\r
---------------------------------------------------------------------------------\r
--- Choose the relevant [hygienize_xxx()] function according to the AST's tag\r
--- and the tables above.\r
---------------------------------------------------------------------------------\r
-function hygienize (ast)\r
- if not ast.tag then hygienize_block (ast)\r
- elseif neutral_tags[ast.tag] then -- pass\r
- elseif stat_tags[ast.tag] then hygienize_stat (ast) \r
- elseif expr_tags[ast.tag] then hygienize_expr (ast) \r
- else error "Unrecognized AST" end\r
- return ast\r
-end\r
-\r
-if mlp then\r
- -- Add hygienic parsers for quotes\r
- mlp.hexpr = hygienize `o` mlp.expr\r
- mlp.hstat = hygienize `o` mlp.stat\r
- mlp.hblock = hygienize `o` mlp.block\r
-end\r
-\r
---------------------------------------------------------------------------------\r
--- Make a statement AST hygienic. The optional [ctx] parameter is a\r
--- [old_name -> new_name] map, which holds variable name substitutions\r
--- to perform.\r
---------------------------------------------------------------------------------\r
-function hygienize_stat (ast, ctx)\r
- if not ctx then ctx = { } end\r
- match ast with\r
- | { ... } if not ast.tag -> hygienize_block (ast, ctx)\r
- | `Do{ ... } -> hygienize_block (ast, clone_ctx (ctx))\r
-\r
- | `Let{ vars, vals } -> \r
- hygienize_expr_list (vars, ctx)\r
- hygienize_expr_list (vals, ctx)\r
-\r
- | `While{ cond, block } ->\r
- hygienize_expr (cond, ctx)\r
- -- use a clone of [ctx], since the block has a separate scope\r
- hygienize_block (ast, clone_ctx (ctx))\r
-\r
- | `Repeat{ block, cond } ->\r
- -- use a clone of [ctx], since the block has a separate scope.\r
- -- Notice that the condition in [repeat ... until] is evaluated\r
- -- inside the block's scope, i.e. with [inner_ctx] rather than [ctx].\r
- local inner_ctx = clone_ctx (ctx)\r
- hygienize_block (ast, inner_ctx)\r
- hygienize (cond, inner_ctx)\r
-\r
- | `If{ ... } ->\r
- for i=1, #ast-1, 2 do\r
- hygienize_expr (ast[i], ctx) -- condtion\r
- -- each block has its own scope\r
- hygienize_block (ast[i+1], clone_ctx (ctx)) -- conditional block\r
- end\r
- if #ast % 2 == 1 then \r
- hygienize_block (ast[#ast], clone_ctx (ctx)) -- else block\r
- end\r
-\r
- | `Fornum{ var, ... } ->\r
- hygienize_expr (ast[i], ctx, 2, #ast-1) -- start, finish, step? exprs\r
- local inner_ctx = clone_ctx (ctx)\r
- alpha_rename (var, inner_ctx) -- rename local var [var] in [inner_ctx]\r
- hygienize_block (ast[#ast], inner_ctx)\r
- \r
- | `Forin{ vars, vals, block } ->\r
- hygienize_expr_list (vals, ctx)\r
- local inner_ctx = clone_ctx (ctx)\r
- alpha_rename_list (vars, inner_ctx) -- rename local vars [vars] in [inner_ctx]\r
- hygienize_block (block, inner_ctx) \r
- \r
- | `Local{ vars, vals } ->\r
- -- locals only enter in scope after their values are computed\r
- -- --> parse values first, then rename vars\r
- hygienize_expr_list (vals, ctx)\r
- alpha_rename_list (vars, ctx)\r
- \r
- | `Localrec{ vars, vals } ->\r
- -- As opposed to [`Local], vars are in scope during their values' \r
- -- computation --> rename before parsing values. \r
- alpha_rename_list (vars, ctx)\r
- hygienize_expr_list (vals, ctx)\r
-\r
- | `Call{ ... } | `Method{ ... } -> \r
- -- these are actually expr, delegate to [hygienize_expr]\r
- hygienize_expr (ast, ctx)\r
-\r
- | `Return{ ... } -> hygienize_expr_list (ast, ctx)\r
- | `Break -> \r
- | _ -> error ("Unknown statement "..ast.tag)\r
- end\r
-end\r
-\r
-\r
---------------------------------------------------------------------------------\r
--- Make an expression AST hygienic. The optional [ctx] parameter is a\r
--- [old_name -> new_name] map, which holds variable name substitutions\r
--- to perform.\r
---------------------------------------------------------------------------------\r
-function hygienize_expr (ast, ctx)\r
- if not ctx then ctx = { } end\r
- match ast with\r
- | `String{ _ } | `Number{ _ } | `True | `False | `Dots -> -- nothing\r
-\r
- | `Function{ params, block } ->\r
- local inner_ctx = clone_ctx (ctx)\r
- alpha_rename_list (params, inner_ctx)\r
- hygienize_block (block, inner_ctx)\r
- \r
- | `Table{ ... } ->\r
- for _, x in ipairs (ast) do\r
- match x with\r
- | `Key{ key, val } -> \r
- hygienize_expr (key, ctx)\r
- hygienize_expr (val, ctx)\r
- | _ -> hygienize (x, ctx)\r
- end\r
- end\r
-\r
- | `Id{ x } ->\r
- -- Check for substitutions to apply:\r
- local y = ctx[x]; if y then ast[1] = y end\r
-\r
- | `Op{ op, ... } ->\r
- hygienize_expr_list (ast, ctx, 2, #ast)\r
-\r
- -- Just dispatch to sub-expressions:\r
- | `Call{ func, ... }\r
- | `Method{ obj, `String{ name }, ... }\r
- | `Index{ table, key } ->\r
- hygienize_expr_list (ast, ctx)\r
- | _ -> error ("Unknown expression "..ast.tag)\r
- end\r
-end\r
-\r
---------------------------------------------------------------------------------\r
--- Make an statements block AST hygienic. The optional [ctx] parameter is a\r
--- [old_name -> new_name] map, which holds variable name substitutions\r
--- to perform.\r
---------------------------------------------------------------------------------\r
-function hygienize_block (ast, ctx)\r
- if not ctx then ctx = { } end\r
- table.iter ((|x| hygienize(x, ctx)), ast)\r
--- for i = 1, #ast do\r
--- hygienize_stat (ast[i], ctx)\r
--- end\r
-end\r
-\r
---------------------------------------------------------------------------------\r
--- Makes a shallow copy of a table. Used to make a copy of [ctx] substitution\r
--- tables, when entering a new scope.\r
---------------------------------------------------------------------------------\r
---[[\r
-function clone_ctx (ctx)\r
- local r = { }\r
- for k, v in pairs (ctx) do r[k] = v end\r
- return r\r
-end\r
-]]\r
-\r
---------------------------------------------------------------------------------\r
--- Make every expression from index [start] to [finish], in list\r
--- [ast], hygienic. The optional [ctx] parameter is a [old_name ->\r
--- new_name] map, which holds variable name substitutions to perform.\r
--- [start] defaults to 1, [finish] defaults to the list's size.\r
---------------------------------------------------------------------------------\r
-function hygienize_expr_list (ast, ctx, start, finish)\r
- for i = start or 1, finish or #ast do \r
- hygienize_expr (ast[i], ctx)\r
- end\r
-end\r
-\r
---------------------------------------------------------------------------------\r
--- Replace the identifier [var]'s name with a fresh one generated by\r
--- [mlp.gensym()], and store the new association in [ctx], so that the\r
--- calling function will be able to substitute identifier occurences with\r
--- its new name.\r
---------------------------------------------------------------------------------\r
-function alpha_rename (var, ctx)\r
- assert (var.tag == "Id")\r
- ctx[var[1]] = mlp.gensym()[1]\r
- var[1] = ctx[var[1]]\r
-end\r
-\r
---------------------------------------------------------------------------------\r
--- runs [alpha_rename] on a list of identifiers.\r
---------------------------------------------------------------------------------\r
-function alpha_rename_list (vars, ctx)\r
- for _, v in ipairs(vars) do alpha_rename (v, ctx) end\r
-end\r
+----------------------------------------------------------------------
+-- Metalua: $Id$
+--
+-- Summary: Hygienic macro facility for Metalua
+--
+----------------------------------------------------------------------
+--
+-- Copyright (c) 2006, Fabien Fleutot <metalua@gmail.com>.
+--
+-- This software is released under the MIT Licence, see licence.txt
+-- for details.
+--
+--------------------------------------------------------------------------------
+--
+-- =============
+-- W A R N I N G
+-- =============
+--
+-- THIS IS AN OLD NAIVE IMPLEMENTATION. IT'S PARATIAL (NO HYGIENE WRT OUTSIDE)
+-- AND WRITTEN FROM SCRATCH WITH PATTERN MATCHING. MUST BE DONE WITH A WALKER.
+--
+-- Traditional macros carry a well-known pitfall, called variable capture:
+-- when pasting a piece of source code A into another code B, if B bonds some
+-- variables used by A, then the meaning of A is modified in a way probably
+-- not intended by the user.
+--
+-- Example:
+-- A = +{ n = 5 }
+-- B = +{ local n=3; -{ A } }
+--
+-- In this example, [n] in [A] will be captured by the local variable declared
+-- by [B], and this is probably a bug.
+--
+-- Notice that this also exists in C. Typical example:
+--
+-- #define swap (type, a, b) do { type tmp=a; a=b; b=tmp } while(0)
+-- void f() {
+-- int tmp=1, a=2;
+-- swap (int, tmp, a); // won't work, [tmp] is captured in the macro
+-- }
+--
+-- We can fix this by making sure that all local variables and parameters
+-- created by [B] have fresh names. [mlp.gensym()] produces guaranteed-to-be-unique
+-- variable names; we use it to replace all local var names declarations and
+-- occurences in [B] by such fresh names.
+--
+-- Such macros which are guaranteed not to capture any variable are called
+-- hygienic macros. By extension, an AST guaranteed not to contain capturing
+-- variables is called an hygienic AST.
+--
+-- We implement here some functions which make sure that an AST is hygienic:
+--
+-- - [hygienize_stat (ast)] for statement AST;
+-- - [hygienize_stat (ast)] for statement block AST;
+-- - [hygienize_expr (ast)] for expression AST;
+--
+-- This sample deconstructs AST by structural pattern matching, which is
+-- supported by Metalua extension "match.lua"
+--
+--------------------------------------------------------------------------------
+
+-{ extension "match" }
+
+require "std"
+
+local clone_ctx = std.shallow_copy
+
+--------------------------------------------------------------------------------
+-- Tag tables: these allow [hygienize] to decide whether an AST is
+-- an expression, a statement, or something which isn't changed by
+-- alpha renaming.
+--------------------------------------------------------------------------------
+local stat_tags = {
+ Do = true, Let = true,
+ While = true, Repeat = true,
+ If = true, Fornum = true,
+ Forin = true, Local = true,
+ Localrec = true, Return = true }
+
+local expr_tags = {
+ Function = true, Table = true,
+ Op = true, Call = true,
+ Method = true, Index = true }
+
+local neutral_tags = {
+ String = true, Number = true,
+ True = true, False = true,
+ Dots = true, Break = true,
+ Id = true }
+
+--------------------------------------------------------------------------------
+-- Choose the relevant [hygienize_xxx()] function according to the AST's tag
+-- and the tables above.
+--------------------------------------------------------------------------------
+function hygienize (ast)
+ if not ast.tag then hygienize_block (ast)
+ elseif neutral_tags[ast.tag] then -- pass
+ elseif stat_tags[ast.tag] then hygienize_stat (ast)
+ elseif expr_tags[ast.tag] then hygienize_expr (ast)
+ else error "Unrecognized AST" end
+ return ast
+end
+
+if mlp then
+ -- Add hygienic parsers for quotes
+ mlp.hexpr = hygienize `o` mlp.expr
+ mlp.hstat = hygienize `o` mlp.stat
+ mlp.hblock = hygienize `o` mlp.block
+end
+
+--------------------------------------------------------------------------------
+-- Make a statement AST hygienic. The optional [ctx] parameter is a
+-- [old_name -> new_name] map, which holds variable name substitutions
+-- to perform.
+--------------------------------------------------------------------------------
+function hygienize_stat (ast, ctx)
+ if not ctx then ctx = { } end
+ match ast with
+ | { ... } if not ast.tag -> hygienize_block (ast, ctx)
+ | `Do{ ... } -> hygienize_block (ast, clone_ctx (ctx))
+
+ | `Let{ vars, vals } ->
+ hygienize_expr_list (vars, ctx)
+ hygienize_expr_list (vals, ctx)
+
+ | `While{ cond, block } ->
+ hygienize_expr (cond, ctx)
+ -- use a clone of [ctx], since the block has a separate scope
+ hygienize_block (ast, clone_ctx (ctx))
+
+ | `Repeat{ block, cond } ->
+ -- use a clone of [ctx], since the block has a separate scope.
+ -- Notice that the condition in [repeat ... until] is evaluated
+ -- inside the block's scope, i.e. with [inner_ctx] rather than [ctx].
+ local inner_ctx = clone_ctx (ctx)
+ hygienize_block (ast, inner_ctx)
+ hygienize (cond, inner_ctx)
+
+ | `If{ ... } ->
+ for i=1, #ast-1, 2 do
+ hygienize_expr (ast[i], ctx) -- condtion
+ -- each block has its own scope
+ hygienize_block (ast[i+1], clone_ctx (ctx)) -- conditional block
+ end
+ if #ast % 2 == 1 then
+ hygienize_block (ast[#ast], clone_ctx (ctx)) -- else block
+ end
+
+ | `Fornum{ var, ... } ->
+ hygienize_expr (ast[i], ctx, 2, #ast-1) -- start, finish, step? exprs
+ local inner_ctx = clone_ctx (ctx)
+ alpha_rename (var, inner_ctx) -- rename local var [var] in [inner_ctx]
+ hygienize_block (ast[#ast], inner_ctx)
+
+ | `Forin{ vars, vals, block } ->
+ hygienize_expr_list (vals, ctx)
+ local inner_ctx = clone_ctx (ctx)
+ alpha_rename_list (vars, inner_ctx) -- rename local vars [vars] in [inner_ctx]
+ hygienize_block (block, inner_ctx)
+
+ | `Local{ vars, vals } ->
+ -- locals only enter in scope after their values are computed
+ -- --> parse values first, then rename vars
+ hygienize_expr_list (vals, ctx)
+ alpha_rename_list (vars, ctx)
+
+ | `Localrec{ vars, vals } ->
+ -- As opposed to [`Local], vars are in scope during their values'
+ -- computation --> rename before parsing values.
+ alpha_rename_list (vars, ctx)
+ hygienize_expr_list (vals, ctx)
+
+ | `Call{ ... } | `Method{ ... } ->
+ -- these are actually expr, delegate to [hygienize_expr]
+ hygienize_expr (ast, ctx)
+
+ | `Return{ ... } -> hygienize_expr_list (ast, ctx)
+ | `Break ->
+ | _ -> error ("Unknown statement "..ast.tag)
+ end
+end
+
+
+--------------------------------------------------------------------------------
+-- Make an expression AST hygienic. The optional [ctx] parameter is a
+-- [old_name -> new_name] map, which holds variable name substitutions
+-- to perform.
+--------------------------------------------------------------------------------
+function hygienize_expr (ast, ctx)
+ if not ctx then ctx = { } end
+ match ast with
+ | `String{ _ } | `Number{ _ } | `True | `False | `Dots -> -- nothing
+
+ | `Function{ params, block } ->
+ local inner_ctx = clone_ctx (ctx)
+ alpha_rename_list (params, inner_ctx)
+ hygienize_block (block, inner_ctx)
+
+ | `Table{ ... } ->
+ for _, x in ipairs (ast) do
+ match x with
+ | `Key{ key, val } ->
+ hygienize_expr (key, ctx)
+ hygienize_expr (val, ctx)
+ | _ -> hygienize (x, ctx)
+ end
+ end
+
+ | `Id{ x } ->
+ -- Check for substitutions to apply:
+ local y = ctx[x]; if y then ast[1] = y end
+
+ | `Op{ op, ... } ->
+ hygienize_expr_list (ast, ctx, 2, #ast)
+
+ -- Just dispatch to sub-expressions:
+ | `Call{ func, ... }
+ | `Method{ obj, `String{ name }, ... }
+ | `Index{ table, key } ->
+ hygienize_expr_list (ast, ctx)
+ | _ -> error ("Unknown expression "..ast.tag)
+ end
+end
+
+--------------------------------------------------------------------------------
+-- Make an statements block AST hygienic. The optional [ctx] parameter is a
+-- [old_name -> new_name] map, which holds variable name substitutions
+-- to perform.
+--------------------------------------------------------------------------------
+function hygienize_block (ast, ctx)
+ if not ctx then ctx = { } end
+ table.iter ((|x| hygienize(x, ctx)), ast)
+-- for i = 1, #ast do
+-- hygienize_stat (ast[i], ctx)
+-- end
+end
+
+--------------------------------------------------------------------------------
+-- Makes a shallow copy of a table. Used to make a copy of [ctx] substitution
+-- tables, when entering a new scope.
+--------------------------------------------------------------------------------
+--[[
+function clone_ctx (ctx)
+ local r = { }
+ for k, v in pairs (ctx) do r[k] = v end
+ return r
+end
+]]
+
+--------------------------------------------------------------------------------
+-- Make every expression from index [start] to [finish], in list
+-- [ast], hygienic. The optional [ctx] parameter is a [old_name ->
+-- new_name] map, which holds variable name substitutions to perform.
+-- [start] defaults to 1, [finish] defaults to the list's size.
+--------------------------------------------------------------------------------
+function hygienize_expr_list (ast, ctx, start, finish)
+ for i = start or 1, finish or #ast do
+ hygienize_expr (ast[i], ctx)
+ end
+end
+
+--------------------------------------------------------------------------------
+-- Replace the identifier [var]'s name with a fresh one generated by
+-- [mlp.gensym()], and store the new association in [ctx], so that the
+-- calling function will be able to substitute identifier occurences with
+-- its new name.
+--------------------------------------------------------------------------------
+function alpha_rename (var, ctx)
+ assert (var.tag == "Id")
+ ctx[var[1]] = mlp.gensym()[1]
+ var[1] = ctx[var[1]]
+end
+
+--------------------------------------------------------------------------------
+-- runs [alpha_rename] on a list of identifiers.
+--------------------------------------------------------------------------------
+function alpha_rename_list (vars, ctx)
+ for _, v in ipairs(vars) do alpha_rename (v, ctx) end
+end
-----------------------------------------------------------------------\r
--- Metalua.\r
---\r
--- Summary: parser generator. Collection of higher order functors,\r
--- which allow to build and combine parsers. Relies on a lexer\r
--- that supports the same API as the one exposed in mll.lua.\r
---\r
-----------------------------------------------------------------------\r
---\r
--- Copyright (c) 2006-2008, Fabien Fleutot <metalua@gmail.com>.\r
---\r
--- This software is released under the MIT Licence, see licence.txt\r
--- for details.\r
---\r
-----------------------------------------------------------------------\r
-\r
---------------------------------------------------------------------------------\r
---\r
--- Exported API:\r
---\r
--- Parser generators:\r
--- * [gg.sequence()]\r
--- * [gg.multisequence()]\r
--- * [gg.expr()]\r
--- * [gg.list()]\r
--- * [gg.onkeyword()]\r
--- * [gg.optkeyword()]\r
---\r
--- Other functions: \r
--- * [gg.parse_error()]\r
--- * [gg.make_parser()]\r
--- * [gg.is_parser()]\r
---\r
---------------------------------------------------------------------------------\r
-\r
-module("gg", package.seeall)\r
-\r
--------------------------------------------------------------------------------\r
--- parser metatable, which maps __call to method parse, and adds some\r
--- error tracing boilerplate.\r
--------------------------------------------------------------------------------\r
-local parser_metatable = { }\r
-function parser_metatable.__call (parser, lx, ...)\r
- --printf ("Call parser %q of type %q", parser.name or "?", parser.kind)\r
- if mlc.metabugs then \r
- return parser:parse (lx, ...) \r
- --local x = parser:parse (lx, ...) \r
- --printf ("Result of parser %q: %s", \r
- -- parser.name or "?",\r
- -- _G.table.tostring(x, "nohash", 80))\r
- --return x\r
- else\r
- local li = lx:lineinfo_right() or { "?", "?", "?", "?" }\r
- local status, ast = pcall (parser.parse, parser, lx, ...) \r
- if status then return ast else\r
- error (string.format ("%s\n - (l.%s, c.%s, k.%s) in parser %s", \r
- ast:strmatch "gg.lua:%d+: (.*)" or ast,\r
- li[1], li[2], li[3], parser.name or parser.kind))\r
- end\r
- end\r
-end\r
-\r
--------------------------------------------------------------------------------\r
--- Turn a table into a parser, mainly by setting the metatable.\r
--------------------------------------------------------------------------------\r
-function make_parser(kind, p)\r
- p.kind = kind\r
- if not p.transformers then p.transformers = { } end\r
- function p.transformers:add (x)\r
- table.insert (self, x)\r
- end\r
- setmetatable (p, parser_metatable)\r
- return p\r
-end\r
-\r
--------------------------------------------------------------------------------\r
--- Return true iff [x] is a parser.\r
--- If it's a gg-generated parser, reutrn the name of its kind.\r
--------------------------------------------------------------------------------\r
-function is_parser (x)\r
- return type(x)=="function" or getmetatable(x)==parser_metatable and x.kind\r
-end\r
-\r
--------------------------------------------------------------------------------\r
--- Parse a sequence, without applying builder nor transformers\r
--------------------------------------------------------------------------------\r
-local function raw_parse_sequence (lx, p)\r
- local r = { }\r
- for i=1, #p do\r
- e=p[i]\r
- if type(e) == "string" then \r
- if not lx:is_keyword (lx:next(), e) then\r
- parse_error (lx, "Keyword '%s' expected", e) end\r
- elseif is_parser (e) then\r
- table.insert (r, e (lx)) \r
- else \r
- gg.parse_error (lx,"Sequence `%s': element #%i is not a string "..\r
- "nor a parser: %s", \r
- p.name, i, table.tostring(e))\r
- end\r
- end\r
- return r\r
-end\r
-\r
--------------------------------------------------------------------------------\r
--- Parse a multisequence, without applying multisequence transformers.\r
--- The sequences are completely parsed.\r
--------------------------------------------------------------------------------\r
-local function raw_parse_multisequence (lx, sequence_table, default)\r
- local seq_parser = sequence_table[lx:is_keyword(lx:peek())]\r
- if seq_parser then return seq_parser (lx)\r
- elseif default then return default (lx)\r
- else return false end\r
-end\r
-\r
--------------------------------------------------------------------------------\r
--- Applies all transformers listed in parser on ast.\r
--------------------------------------------------------------------------------\r
-local function transform (ast, parser, fli, lli)\r
- if parser.transformers then\r
- for _, t in ipairs (parser.transformers) do ast = t(ast) or ast end\r
- end\r
- if type(ast) == 'table'then\r
- local ali = ast.lineinfo\r
- if not ali or ali.first~=fli or ali.last~=lli then\r
- ast.lineinfo = { first = fli, last = lli }\r
- end\r
- end\r
- return ast\r
-end\r
-\r
--------------------------------------------------------------------------------\r
--- Generate a tracable parsing error (not implemented yet)\r
--------------------------------------------------------------------------------\r
-function parse_error(lx, fmt, ...)\r
- local li = lx:lineinfo_left() or {-1,-1,-1, "<unknown file>"}\r
- local msg = string.format("line %i, char %i: "..fmt, li[1], li[2], ...) \r
- local src = lx.src\r
- if li[3]>0 and src then\r
- local i, j = li[3], li[3]\r
- while src:sub(i,i) ~= '\n' and i>=0 do i=i-1 end\r
- while src:sub(j,j) ~= '\n' and j<=#src do j=j+1 end \r
- local srcline = src:sub (i+1, j-1)\r
- local idx = string.rep (" ", li[2]).."^"\r
- msg = string.format("%s\n>>> %s\n>>> %s", msg, srcline, idx)\r
- end\r
- error(msg)\r
-end\r
- \r
--------------------------------------------------------------------------------\r
---\r
--- Sequence parser generator\r
---\r
--------------------------------------------------------------------------------\r
--- Input fields:\r
---\r
--- * [builder]: how to build an AST out of sequence parts. let [x] be the list\r
--- of subparser results (keywords are simply omitted). [builder] can be:\r
--- - [nil], in which case the result of parsing is simply [x]\r
--- - a string, which is then put as a tag on [x]\r
--- - a function, which takes [x] as a parameter and returns an AST.\r
---\r
--- * [name]: the name of the parser. Used for debug messages\r
---\r
--- * [transformers]: a list of AST->AST functions, applied in order on ASTs\r
--- returned by the parser.\r
---\r
--- * Table-part entries corresponds to keywords (strings) and subparsers \r
--- (function and callable objects).\r
---\r
--- After creation, the following fields are added:\r
--- * [parse] the parsing function lexer->AST\r
--- * [kind] == "sequence"\r
--- * [name] is set, if it wasn't in the input.\r
---\r
--------------------------------------------------------------------------------\r
-function sequence (p)\r
- make_parser ("sequence", p)\r
-\r
- -------------------------------------------------------------------\r
- -- Parsing method\r
- -------------------------------------------------------------------\r
- function p:parse (lx)\r
- -- Raw parsing:\r
- local fli = lx:lineinfo_right()\r
- local seq = raw_parse_sequence (lx, self)\r
- local lli = lx:lineinfo_left()\r
-\r
- -- Builder application:\r
- local builder, tb = self.builder, type (self.builder)\r
- if tb == "string" then seq.tag = builder\r
- elseif tb == "function" or builder and builder.__call then seq = builder(seq)\r
- elseif builder == nil then -- nothing\r
- else error ("Invalid builder of type "..tb.." in sequence") end\r
- seq = transform (seq, self, fli, lli)\r
- assert (not seq or seq.lineinfo)\r
- return seq\r
- end\r
-\r
- -------------------------------------------------------------------\r
- -- Construction\r
- -------------------------------------------------------------------\r
- -- Try to build a proper name\r
- if not p.name and type(p[1])=="string" then \r
- p.name = p[1].." ..." \r
- if type(p[#p])=="string" then p.name = p.name .. " " .. p[#p] end\r
- else\r
- p.name = "<anonymous>"\r
- end\r
-\r
- return p\r
-end --</sequence>\r
-\r
-\r
--------------------------------------------------------------------------------\r
---\r
--- Multiple, keyword-driven, sequence parser generator\r
---\r
--------------------------------------------------------------------------------\r
--- in [p], useful fields are:\r
---\r
--- * [transformers]: as usual\r
---\r
--- * [name]: as usual\r
---\r
--- * Table-part entries must be sequence parsers, or tables which can\r
--- be turned into a sequence parser by [gg.sequence]. These\r
--- sequences must start with a keyword, and this initial keyword\r
--- must be different for each sequence. The table-part entries will\r
--- be removed after [gg.multisequence] returns.\r
---\r
--- * [default]: the parser to run if the next keyword in the lexer is\r
--- none of the registered initial keywords. If there's no default\r
--- parser and no suitable initial keyword, the multisequence parser\r
--- simply returns [false].\r
---\r
--- After creation, the following fields are added:\r
---\r
--- * [parse] the parsing function lexer->AST\r
---\r
--- * [sequences] the table of sequences, indexed by initial keywords.\r
---\r
--- * [add] method takes a sequence parser or a config table for\r
--- [gg.sequence], and adds/replaces the corresponding sequence\r
--- parser. If the keyword was already used, the former sequence is\r
--- removed and a warning is issued.\r
---\r
--- * [get] method returns a sequence by its initial keyword\r
---\r
--- * [kind] == "multisequence"\r
---\r
--------------------------------------------------------------------------------\r
-function multisequence (p) \r
- make_parser ("multisequence", p)\r
-\r
- -------------------------------------------------------------------\r
- -- Add a sequence (might be just a config table for [gg.sequence])\r
- -------------------------------------------------------------------\r
- function p:add (s)\r
- -- compile if necessary:\r
- if not is_parser(s) then sequence(s) end\r
- if type(s[1]) ~= "string" then \r
- error "Invalid sequence for multiseq"\r
- elseif self.sequences[s[1]] then \r
- printf (" *** Warning: keyword %q overloaded in multisequence ***", s[1])\r
- end\r
- self.sequences[s[1]] = s\r
- end -- </multisequence.add>\r
-\r
- -------------------------------------------------------------------\r
- -- Get the sequence starting with this keyword. [kw :: string]\r
- -------------------------------------------------------------------\r
- function p:get (kw) return self.sequences [kw] end\r
-\r
- -------------------------------------------------------------------\r
- -- Remove the sequence starting with keyword [kw :: string]\r
- -------------------------------------------------------------------\r
- function p:del (kw) \r
- if not self.sequences[kw] then \r
- printf("*** Warning: trying to delete sequence starting "..\r
- "with %q from a multisequence having no such "..\r
- "entry ***", kw) end\r
- local removed = self.sequences[kw]\r
- self.sequences[kw] = nil \r
- return removed\r
- end\r
-\r
- -------------------------------------------------------------------\r
- -- Parsing method\r
- -------------------------------------------------------------------\r
- function p:parse (lx)\r
- local fli = lx:lineinfo_right()\r
- local x = raw_parse_multisequence (lx, self.sequences, self.default)\r
- local lli = lx:lineinfo_left()\r
- return transform (x, self, fli, lli)\r
- end\r
-\r
- -------------------------------------------------------------------\r
- -- Construction\r
- -------------------------------------------------------------------\r
- -- Register the sequences passed to the constructor. They're going\r
- -- from the array part of the parser to the hash part of field\r
- -- [sequences]\r
- p.sequences = { }\r
- for i=1, #p do p:add (p[i]); p[i] = nil end\r
-\r
- -- FIXME: why is this commented out?\r
- --if p.default and not is_parser(p.default) then sequence(p.default) end\r
- return p\r
-end --</multisequence>\r
-\r
-\r
--------------------------------------------------------------------------------\r
---\r
--- Expression parser generator\r
---\r
--------------------------------------------------------------------------------\r
---\r
--- Expression configuration relies on three tables: [prefix], [infix]\r
--- and [suffix]. Moreover, the primary parser can be replaced by a\r
--- table: in this case the [primary] table will be passed to\r
--- [gg.multisequence] to create a parser.\r
---\r
--- Each of these tables is a modified multisequence parser: the\r
--- differences with respect to regular multisequence config tables are:\r
---\r
--- * the builder takes specific parameters:\r
--- - for [prefix], it takes the result of the prefix sequence parser,\r
--- and the prefixed expression\r
--- - for [infix], it takes the left-hand-side expression, the results \r
--- of the infix sequence parser, and the right-hand-side expression.\r
--- - for [suffix], it takes the suffixed expression, and theresult \r
--- of the suffix sequence parser.\r
---\r
--- * the default field is a list, with parameters:\r
--- - [parser] the raw parsing function\r
--- - [transformers], as usual\r
--- - [prec], the operator's precedence\r
--- - [assoc] for [infix] table, the operator's associativity, which\r
--- can be "left", "right" or "flat" (default to left)\r
---\r
--- In [p], useful fields are:\r
--- * [transformers]: as usual\r
--- * [name]: as usual\r
--- * [primary]: the atomic expression parser, or a multisequence config \r
--- table (mandatory)\r
--- * [prefix]: prefix operators config table, see above.\r
--- * [infix]: infix operators config table, see above.\r
--- * [suffix]: suffix operators config table, see above.\r
---\r
--- After creation, these fields are added:\r
--- * [kind] == "expr"\r
--- * [parse] as usual\r
--- * each table is turned into a multisequence, and therefore has an \r
--- [add] method\r
---\r
--------------------------------------------------------------------------------\r
-function expr (p)\r
- make_parser ("expr", p)\r
-\r
- -------------------------------------------------------------------\r
- -- parser method.\r
- -- In addition to the lexer, it takes an optional precedence:\r
- -- it won't read expressions whose precedence is lower or equal\r
- -- to [prec].\r
- -------------------------------------------------------------------\r
- function p:parse (lx, prec)\r
- prec = prec or 0\r
-\r
- ------------------------------------------------------\r
- -- Extract the right parser and the corresponding\r
- -- options table, for (pre|in|suff)fix operators.\r
- -- Options include prec, assoc, transformers.\r
- ------------------------------------------------------\r
- local function get_parser_info (tab)\r
- local p2 = tab:get (lx:is_keyword (lx:peek()))\r
- if p2 then -- keyword-based sequence found\r
- local function parser(lx) return raw_parse_sequence(lx, p2) end\r
- return parser, p2\r
- else -- Got to use the default parser\r
- local d = tab.default\r
- if d then return d.parse or d.parser, d\r
- else return false, false end\r
- end\r
- end\r
-\r
- ------------------------------------------------------\r
- -- Look for a prefix sequence. Multiple prefixes are\r
- -- handled through the recursive [p.parse] call.\r
- -- Notice the double-transform: one for the primary\r
- -- expr, and one for the one with the prefix op.\r
- ------------------------------------------------------\r
- local function handle_prefix ()\r
- local fli = lx:lineinfo_right()\r
- local p2_func, p2 = get_parser_info (self.prefix)\r
- local op = p2_func and p2_func (lx)\r
- if op then -- Keyword-based sequence found\r
- local ili = lx:lineinfo_right() -- Intermediate LineInfo\r
- local e = p2.builder (op, self:parse (lx, p2.prec))\r
- local lli = lx:lineinfo_left()\r
- return transform (transform (e, p2, ili, lli), self, fli, lli)\r
- else -- No prefix found, get a primary expression \r
- local e = self.primary(lx)\r
- local lli = lx:lineinfo_left()\r
- return transform (e, self, fli, lli)\r
- end\r
- end --</expr.parse.handle_prefix>\r
-\r
- ------------------------------------------------------\r
- -- Look for an infix sequence+right-hand-side operand.\r
- -- Return the whole binary expression result,\r
- -- or false if no operator was found.\r
- ------------------------------------------------------\r
- local function handle_infix (e)\r
- local p2_func, p2 = get_parser_info (self.infix)\r
- if not p2 then return false end\r
-\r
- -----------------------------------------\r
- -- Handle flattening operators: gather all operands\r
- -- of the series in [list]; when a different operator \r
- -- is found, stop, build from [list], [transform] and\r
- -- return.\r
- -----------------------------------------\r
- if (not p2.prec or p2.prec>prec) and p2.assoc=="flat" then\r
- local fli = lx:lineinfo_right()\r
- local pflat, list = p2, { e }\r
- repeat\r
- local op = p2_func(lx)\r
- if not op then break end\r
- table.insert (list, self:parse (lx, p2.prec))\r
- local _ -- We only care about checking that p2==pflat\r
- _, p2 = get_parser_info (self.infix)\r
- until p2 ~= pflat\r
- local e2 = pflat.builder (list)\r
- local lli = lx:lineinfo_left()\r
- return transform (transform (e2, pflat, fli, lli), self, fli, lli)\r
- \r
- -----------------------------------------\r
- -- Handle regular infix operators: [e] the LHS is known,\r
- -- just gather the operator and [e2] the RHS.\r
- -- Result goes in [e3].\r
- -----------------------------------------\r
- elseif p2.prec and p2.prec>prec or \r
- p2.prec==prec and p2.assoc=="right" then\r
- local fli = e.lineinfo.first -- lx:lineinfo_right()\r
- local op = p2_func(lx)\r
- if not op then return false end\r
- local e2 = self:parse (lx, p2.prec)\r
- local e3 = p2.builder (e, op, e2)\r
- local lli = lx:lineinfo_left()\r
- return transform (transform (e3, p2, fli, lli), self, fli, lli)\r
-\r
- -----------------------------------------\r
- -- Check for non-associative operators, and complain if applicable. \r
- -----------------------------------------\r
- elseif p2.assoc=="none" and p2.prec==prec then\r
- parser_error (lx, "non-associative operator!")\r
-\r
- -----------------------------------------\r
- -- No infix operator suitable at that precedence\r
- -----------------------------------------\r
- else return false end\r
-\r
- end --</expr.parse.handle_infix>\r
-\r
- ------------------------------------------------------\r
- -- Look for a suffix sequence.\r
- -- Return the result of suffix operator on [e],\r
- -- or false if no operator was found.\r
- ------------------------------------------------------\r
- local function handle_suffix (e)\r
- -- FIXME bad fli, must take e.lineinfo.first\r
- local p2_func, p2 = get_parser_info (self.suffix)\r
- if not p2 then return false end\r
- if not p2.prec or p2.prec>=prec then\r
- --local fli = lx:lineinfo_right()\r
- local fli = e.lineinfo.first\r
- local op = p2_func(lx)\r
- if not op then return false end\r
- local lli = lx:lineinfo_left()\r
- e = p2.builder (e, op)\r
- e = transform (transform (e, p2, fli, lli), self, fli, lli)\r
- return e\r
- end\r
- return false\r
- end --</expr.parse.handle_suffix>\r
-\r
- ------------------------------------------------------\r
- -- Parser body: read suffix and (infix+operand) \r
- -- extensions as long as we're able to fetch more at\r
- -- this precedence level.\r
- ------------------------------------------------------\r
- local e = handle_prefix()\r
- repeat\r
- local x = handle_suffix (e); e = x or e\r
- local y = handle_infix (e); e = y or e\r
- until not (x or y)\r
-\r
- -- No transform: it already happened in operators handling\r
- return e\r
- end --</expr.parse>\r
-\r
- -------------------------------------------------------------------\r
- -- Construction\r
- -------------------------------------------------------------------\r
- if not p.primary then p.primary=p[1]; p[1]=nil end\r
- for _, t in ipairs{ "primary", "prefix", "infix", "suffix" } do\r
- if not p[t] then p[t] = { } end\r
- if not is_parser(p[t]) then multisequence(p[t]) end\r
- end\r
- function p:add(...) return self.primary:add(...) end\r
- return p\r
-end --</expr>\r
-\r
-\r
--------------------------------------------------------------------------------\r
---\r
--- List parser generator\r
---\r
--------------------------------------------------------------------------------\r
--- In [p], the following fields can be provided in input:\r
---\r
--- * [builder]: takes list of subparser results, returns AST\r
--- * [transformers]: as usual\r
--- * [name]: as usual\r
---\r
--- * [terminators]: list of strings representing the keywords which\r
--- might mark the end of the list. When non-empty, the list is\r
--- allowed to be empty. A string is treated as a single-element\r
--- table, whose element is that string, e.g. ["do"] is the same as\r
--- [{"do"}].\r
---\r
--- * [separators]: list of strings representing the keywords which can\r
--- separate elements of the list. When non-empty, one of these\r
--- keyword has to be found between each element. Lack of a separator\r
--- indicates the end of the list. A string is treated as a\r
--- single-element table, whose element is that string, e.g. ["do"]\r
--- is the same as [{"do"}]. If [terminators] is empty/nil, then\r
--- [separators] has to be non-empty.\r
---\r
--- After creation, the following fields are added:\r
--- * [parse] the parsing function lexer->AST\r
--- * [kind] == "list"\r
---\r
--------------------------------------------------------------------------------\r
-function list (p)\r
- make_parser ("list", p)\r
-\r
- -------------------------------------------------------------------\r
- -- Parsing method\r
- -------------------------------------------------------------------\r
- function p:parse (lx)\r
-\r
- ------------------------------------------------------\r
- -- Used to quickly check whether there's a terminator \r
- -- or a separator immediately ahead\r
- ------------------------------------------------------\r
- local function peek_is_in (keywords) \r
- return keywords and lx:is_keyword(lx:peek(), unpack(keywords)) end\r
-\r
- local x = { }\r
- local fli = lx:lineinfo_right()\r
-\r
- -- if there's a terminator to start with, don't bother trying\r
- if not peek_is_in (self.terminators) then \r
- repeat table.insert (x, self.primary (lx)) -- read one element\r
- until\r
- -- First reason to stop: There's a separator list specified,\r
- -- and next token isn't one. Otherwise, consume it with [lx:next()]\r
- self.separators and not(peek_is_in (self.separators) and lx:next()) or\r
- -- Other reason to stop: terminator token ahead\r
- peek_is_in (self.terminators) or\r
- -- Last reason: end of file reached\r
- lx:peek().tag=="Eof"\r
- end\r
-\r
- local lli = lx:lineinfo_left()\r
- \r
- -- Apply the builder. It can be a string, or a callable value, \r
- -- or simply nothing.\r
- local b = self.builder\r
- if b then\r
- if type(b)=="string" then x.tag = b -- b is a string, use it as a tag\r
- elseif type(b)=="function" then x=b(x)\r
- else\r
- local bmt = getmetatable(b)\r
- if bmt and bmt.__call then x=b(x) end\r
- end\r
- end\r
- return transform (x, self, fli, lli)\r
- end --</list.parse>\r
-\r
- -------------------------------------------------------------------\r
- -- Construction\r
- -------------------------------------------------------------------\r
- if not p.primary then p.primary = p[1]; p[1] = nil end\r
- if type(p.terminators) == "string" then p.terminators = { p.terminators }\r
- elseif p.terminators and #p.terminators == 0 then p.terminators = nil end\r
- if type(p.separators) == "string" then p.separators = { p.separators }\r
- elseif p.separators and #p.separators == 0 then p.separators = nil end\r
-\r
- return p\r
-end --</list>\r
-\r
-\r
--------------------------------------------------------------------------------\r
---\r
--- Keyword-conditionned parser generator\r
---\r
--------------------------------------------------------------------------------\r
--- \r
--- Only apply a parser if a given keyword is found. The result of\r
--- [gg.onkeyword] parser is the result of the subparser (modulo\r
--- [transformers] applications).\r
---\r
--- lineinfo: the keyword is *not* included in the boundaries of the\r
--- resulting lineinfo. A review of all usages of gg.onkeyword() in the\r
--- implementation of metalua has shown that it was the appropriate choice\r
--- in every case.\r
---\r
--- Input fields:\r
---\r
--- * [name]: as usual\r
---\r
--- * [transformers]: as usual\r
---\r
--- * [peek]: if non-nil, the conditionning keyword is left in the lexeme\r
--- stream instead of being consumed.\r
---\r
--- * [primary]: the subparser. \r
---\r
--- * [keywords]: list of strings representing triggering keywords.\r
---\r
--- * Table-part entries can contain strings, and/or exactly one parser.\r
--- Strings are put in [keywords], and the parser is put in [primary].\r
---\r
--- After the call, the following fields will be set:\r
--- \r
--- * [parse] the parsing method\r
--- * [kind] == "onkeyword"\r
--- * [primary]\r
--- * [keywords]\r
---\r
--------------------------------------------------------------------------------\r
-function onkeyword (p)\r
- make_parser ("onkeyword", p)\r
-\r
- -------------------------------------------------------------------\r
- -- Parsing method\r
- -------------------------------------------------------------------\r
- function p:parse(lx)\r
- if lx:is_keyword (lx:peek(), unpack(self.keywords)) then\r
- --local fli = lx:lineinfo_right()\r
- if not self.peek then lx:next() end\r
- local content = self.primary (lx)\r
- --local lli = lx:lineinfo_left()\r
- local fli, lli = content.lineinfo.first, content.lineinfo.last\r
- return transform (content, p, fli, lli)\r
- else return false end\r
- end\r
-\r
- -------------------------------------------------------------------\r
- -- Construction\r
- -------------------------------------------------------------------\r
- if not p.keywords then p.keywords = { } end\r
- for _, x in ipairs(p) do\r
- if type(x)=="string" then table.insert (p.keywords, x)\r
- else assert (not p.primary and is_parser (x)); p.primary = x end\r
- end\r
- assert (p.primary, 'no primary parser in gg.onkeyword')\r
- return p\r
-end --</onkeyword>\r
-\r
-\r
--------------------------------------------------------------------------------\r
---\r
--- Optional keyword consummer pseudo-parser generator\r
---\r
--------------------------------------------------------------------------------\r
---\r
--- This doesn't return a real parser, just a function. That function parses\r
--- one of the keywords passed as parameters, and returns it. It returns \r
--- [false] if no matching keyword is found.\r
---\r
--- Notice that tokens returned by lexer already carry lineinfo, therefore\r
--- there's no need to add them, as done usually through transform() calls.\r
--------------------------------------------------------------------------------\r
-function optkeyword (...)\r
- local args = {...}\r
- if type (args[1]) == "table" then \r
- assert (#args == 1)\r
- args = args[1]\r
- end\r
- for _, v in ipairs(args) do assert (type(v)=="string") end\r
- return function (lx)\r
- local x = lx:is_keyword (lx:peek(), unpack (args))\r
- if x then lx:next(); return x\r
- else return false end\r
- end\r
-end\r
-\r
-\r
--------------------------------------------------------------------------------\r
---\r
--- Run a parser with a special lexer\r
---\r
--------------------------------------------------------------------------------\r
---\r
--- This doesn't return a real parser, just a function.\r
--- First argument is the lexer class to be used with the parser,\r
--- 2nd is the parser itself.\r
--- The resulting parser returns whatever the argument parser does.\r
---\r
--------------------------------------------------------------------------------\r
-function with_lexer(new_lexer, parser)\r
-\r
- -------------------------------------------------------------------\r
- -- Most gg functions take their parameters in a table, so it's \r
- -- better to silently accept when with_lexer{ } is called with\r
- -- its arguments in a list:\r
- -------------------------------------------------------------------\r
- if not parser and #new_lexer==2 and type(new_lexer[1])=='table' then\r
- return with_lexer(unpack(new_lexer))\r
- end\r
-\r
- -------------------------------------------------------------------\r
- -- Save the current lexer, switch it for the new one, run the parser,\r
- -- restore the previous lexer, even if the parser caused an error.\r
- -------------------------------------------------------------------\r
- return function (lx)\r
- local old_lexer = getmetatable(lx)\r
- lx:sync()\r
- setmetatable(lx, new_lexer)\r
- local status, result = pcall(parser, lx)\r
- lx:sync()\r
- setmetatable(lx, old_lexer)\r
- if status then return result else error(result) end\r
- end\r
-end\r
+----------------------------------------------------------------------
+-- Metalua.
+--
+-- Summary: parser generator. Collection of higher order functors,
+-- which allow to build and combine parsers. Relies on a lexer
+-- that supports the same API as the one exposed in mll.lua.
+--
+----------------------------------------------------------------------
+--
+-- Copyright (c) 2006-2008, Fabien Fleutot <metalua@gmail.com>.
+--
+-- This software is released under the MIT Licence, see licence.txt
+-- for details.
+--
+----------------------------------------------------------------------
+
+--------------------------------------------------------------------------------
+--
+-- Exported API:
+--
+-- Parser generators:
+-- * [gg.sequence()]
+-- * [gg.multisequence()]
+-- * [gg.expr()]
+-- * [gg.list()]
+-- * [gg.onkeyword()]
+-- * [gg.optkeyword()]
+--
+-- Other functions:
+-- * [gg.parse_error()]
+-- * [gg.make_parser()]
+-- * [gg.is_parser()]
+--
+--------------------------------------------------------------------------------
+
+module("gg", package.seeall)
+
+-------------------------------------------------------------------------------
+-- parser metatable, which maps __call to method parse, and adds some
+-- error tracing boilerplate.
+-------------------------------------------------------------------------------
+local parser_metatable = { }
+function parser_metatable.__call (parser, lx, ...)
+ --printf ("Call parser %q of type %q", parser.name or "?", parser.kind)
+ if mlc.metabugs then
+ return parser:parse (lx, ...)
+ --local x = parser:parse (lx, ...)
+ --printf ("Result of parser %q: %s",
+ -- parser.name or "?",
+ -- _G.table.tostring(x, "nohash", 80))
+ --return x
+ else
+ local li = lx:lineinfo_right() or { "?", "?", "?", "?" }
+ local status, ast = pcall (parser.parse, parser, lx, ...)
+ if status then return ast else
+ error (string.format ("%s\n - (l.%s, c.%s, k.%s) in parser %s",
+ ast:strmatch "gg.lua:%d+: (.*)" or ast,
+ li[1], li[2], li[3], parser.name or parser.kind))
+ end
+ end
+end
+
+-------------------------------------------------------------------------------
+-- Turn a table into a parser, mainly by setting the metatable.
+-------------------------------------------------------------------------------
+function make_parser(kind, p)
+ p.kind = kind
+ if not p.transformers then p.transformers = { } end
+ function p.transformers:add (x)
+ table.insert (self, x)
+ end
+ setmetatable (p, parser_metatable)
+ return p
+end
+
+-------------------------------------------------------------------------------
+-- Return true iff [x] is a parser.
+-- If it's a gg-generated parser, reutrn the name of its kind.
+-------------------------------------------------------------------------------
+function is_parser (x)
+ return type(x)=="function" or getmetatable(x)==parser_metatable and x.kind
+end
+
+-------------------------------------------------------------------------------
+-- Parse a sequence, without applying builder nor transformers
+-------------------------------------------------------------------------------
+local function raw_parse_sequence (lx, p)
+ local r = { }
+ for i=1, #p do
+ e=p[i]
+ if type(e) == "string" then
+ if not lx:is_keyword (lx:next(), e) then
+ parse_error (lx, "Keyword '%s' expected", e) end
+ elseif is_parser (e) then
+ table.insert (r, e (lx))
+ else
+ gg.parse_error (lx,"Sequence `%s': element #%i is not a string "..
+ "nor a parser: %s",
+ p.name, i, table.tostring(e))
+ end
+ end
+ return r
+end
+
+-------------------------------------------------------------------------------
+-- Parse a multisequence, without applying multisequence transformers.
+-- The sequences are completely parsed.
+-------------------------------------------------------------------------------
+local function raw_parse_multisequence (lx, sequence_table, default)
+ local seq_parser = sequence_table[lx:is_keyword(lx:peek())]
+ if seq_parser then return seq_parser (lx)
+ elseif default then return default (lx)
+ else return false end
+end
+
+-------------------------------------------------------------------------------
+-- Applies all transformers listed in parser on ast.
+-------------------------------------------------------------------------------
+local function transform (ast, parser, fli, lli)
+ if parser.transformers then
+ for _, t in ipairs (parser.transformers) do ast = t(ast) or ast end
+ end
+ if type(ast) == 'table'then
+ local ali = ast.lineinfo
+ if not ali or ali.first~=fli or ali.last~=lli then
+ ast.lineinfo = { first = fli, last = lli }
+ end
+ end
+ return ast
+end
+
+-------------------------------------------------------------------------------
+-- Generate a tracable parsing error (not implemented yet)
+-------------------------------------------------------------------------------
+function parse_error(lx, fmt, ...)
+ local li = lx:lineinfo_left() or {-1,-1,-1, "<unknown file>"}
+ local msg = string.format("line %i, char %i: "..fmt, li[1], li[2], ...)
+ local src = lx.src
+ if li[3]>0 and src then
+ local i, j = li[3], li[3]
+ while src:sub(i,i) ~= '\n' and i>=0 do i=i-1 end
+ while src:sub(j,j) ~= '\n' and j<=#src do j=j+1 end
+ local srcline = src:sub (i+1, j-1)
+ local idx = string.rep (" ", li[2]).."^"
+ msg = string.format("%s\n>>> %s\n>>> %s", msg, srcline, idx)
+ end
+ error(msg)
+end
+
+-------------------------------------------------------------------------------
+--
+-- Sequence parser generator
+--
+-------------------------------------------------------------------------------
+-- Input fields:
+--
+-- * [builder]: how to build an AST out of sequence parts. let [x] be the list
+-- of subparser results (keywords are simply omitted). [builder] can be:
+-- - [nil], in which case the result of parsing is simply [x]
+-- - a string, which is then put as a tag on [x]
+-- - a function, which takes [x] as a parameter and returns an AST.
+--
+-- * [name]: the name of the parser. Used for debug messages
+--
+-- * [transformers]: a list of AST->AST functions, applied in order on ASTs
+-- returned by the parser.
+--
+-- * Table-part entries corresponds to keywords (strings) and subparsers
+-- (function and callable objects).
+--
+-- After creation, the following fields are added:
+-- * [parse] the parsing function lexer->AST
+-- * [kind] == "sequence"
+-- * [name] is set, if it wasn't in the input.
+--
+-------------------------------------------------------------------------------
+function sequence (p)
+ make_parser ("sequence", p)
+
+ -------------------------------------------------------------------
+ -- Parsing method
+ -------------------------------------------------------------------
+ function p:parse (lx)
+ -- Raw parsing:
+ local fli = lx:lineinfo_right()
+ local seq = raw_parse_sequence (lx, self)
+ local lli = lx:lineinfo_left()
+
+ -- Builder application:
+ local builder, tb = self.builder, type (self.builder)
+ if tb == "string" then seq.tag = builder
+ elseif tb == "function" or builder and builder.__call then seq = builder(seq)
+ elseif builder == nil then -- nothing
+ else error ("Invalid builder of type "..tb.." in sequence") end
+ seq = transform (seq, self, fli, lli)
+ assert (not seq or seq.lineinfo)
+ return seq
+ end
+
+ -------------------------------------------------------------------
+ -- Construction
+ -------------------------------------------------------------------
+ -- Try to build a proper name
+ if not p.name and type(p[1])=="string" then
+ p.name = p[1].." ..."
+ if type(p[#p])=="string" then p.name = p.name .. " " .. p[#p] end
+ else
+ p.name = "<anonymous>"
+ end
+
+ return p
+end --</sequence>
+
+
+-------------------------------------------------------------------------------
+--
+-- Multiple, keyword-driven, sequence parser generator
+--
+-------------------------------------------------------------------------------
+-- in [p], useful fields are:
+--
+-- * [transformers]: as usual
+--
+-- * [name]: as usual
+--
+-- * Table-part entries must be sequence parsers, or tables which can
+-- be turned into a sequence parser by [gg.sequence]. These
+-- sequences must start with a keyword, and this initial keyword
+-- must be different for each sequence. The table-part entries will
+-- be removed after [gg.multisequence] returns.
+--
+-- * [default]: the parser to run if the next keyword in the lexer is
+-- none of the registered initial keywords. If there's no default
+-- parser and no suitable initial keyword, the multisequence parser
+-- simply returns [false].
+--
+-- After creation, the following fields are added:
+--
+-- * [parse] the parsing function lexer->AST
+--
+-- * [sequences] the table of sequences, indexed by initial keywords.
+--
+-- * [add] method takes a sequence parser or a config table for
+-- [gg.sequence], and adds/replaces the corresponding sequence
+-- parser. If the keyword was already used, the former sequence is
+-- removed and a warning is issued.
+--
+-- * [get] method returns a sequence by its initial keyword
+--
+-- * [kind] == "multisequence"
+--
+-------------------------------------------------------------------------------
+function multisequence (p)
+ make_parser ("multisequence", p)
+
+ -------------------------------------------------------------------
+ -- Add a sequence (might be just a config table for [gg.sequence])
+ -------------------------------------------------------------------
+ function p:add (s)
+ -- compile if necessary:
+ if not is_parser(s) then sequence(s) end
+ if type(s[1]) ~= "string" then
+ error "Invalid sequence for multiseq"
+ elseif self.sequences[s[1]] then
+ printf (" *** Warning: keyword %q overloaded in multisequence ***", s[1])
+ end
+ self.sequences[s[1]] = s
+ end -- </multisequence.add>
+
+ -------------------------------------------------------------------
+ -- Get the sequence starting with this keyword. [kw :: string]
+ -------------------------------------------------------------------
+ function p:get (kw) return self.sequences [kw] end
+
+ -------------------------------------------------------------------
+ -- Remove the sequence starting with keyword [kw :: string]
+ -------------------------------------------------------------------
+ function p:del (kw)
+ if not self.sequences[kw] then
+ printf("*** Warning: trying to delete sequence starting "..
+ "with %q from a multisequence having no such "..
+ "entry ***", kw) end
+ local removed = self.sequences[kw]
+ self.sequences[kw] = nil
+ return removed
+ end
+
+ -------------------------------------------------------------------
+ -- Parsing method
+ -------------------------------------------------------------------
+ function p:parse (lx)
+ local fli = lx:lineinfo_right()
+ local x = raw_parse_multisequence (lx, self.sequences, self.default)
+ local lli = lx:lineinfo_left()
+ return transform (x, self, fli, lli)
+ end
+
+ -------------------------------------------------------------------
+ -- Construction
+ -------------------------------------------------------------------
+ -- Register the sequences passed to the constructor. They're going
+ -- from the array part of the parser to the hash part of field
+ -- [sequences]
+ p.sequences = { }
+ for i=1, #p do p:add (p[i]); p[i] = nil end
+
+ -- FIXME: why is this commented out?
+ --if p.default and not is_parser(p.default) then sequence(p.default) end
+ return p
+end --</multisequence>
+
+
+-------------------------------------------------------------------------------
+--
+-- Expression parser generator
+--
+-------------------------------------------------------------------------------
+--
+-- Expression configuration relies on three tables: [prefix], [infix]
+-- and [suffix]. Moreover, the primary parser can be replaced by a
+-- table: in this case the [primary] table will be passed to
+-- [gg.multisequence] to create a parser.
+--
+-- Each of these tables is a modified multisequence parser: the
+-- differences with respect to regular multisequence config tables are:
+--
+-- * the builder takes specific parameters:
+-- - for [prefix], it takes the result of the prefix sequence parser,
+-- and the prefixed expression
+-- - for [infix], it takes the left-hand-side expression, the results
+-- of the infix sequence parser, and the right-hand-side expression.
+-- - for [suffix], it takes the suffixed expression, and theresult
+-- of the suffix sequence parser.
+--
+-- * the default field is a list, with parameters:
+-- - [parser] the raw parsing function
+-- - [transformers], as usual
+-- - [prec], the operator's precedence
+-- - [assoc] for [infix] table, the operator's associativity, which
+-- can be "left", "right" or "flat" (default to left)
+--
+-- In [p], useful fields are:
+-- * [transformers]: as usual
+-- * [name]: as usual
+-- * [primary]: the atomic expression parser, or a multisequence config
+-- table (mandatory)
+-- * [prefix]: prefix operators config table, see above.
+-- * [infix]: infix operators config table, see above.
+-- * [suffix]: suffix operators config table, see above.
+--
+-- After creation, these fields are added:
+-- * [kind] == "expr"
+-- * [parse] as usual
+-- * each table is turned into a multisequence, and therefore has an
+-- [add] method
+--
+-------------------------------------------------------------------------------
+function expr (p)
+ make_parser ("expr", p)
+
+ -------------------------------------------------------------------
+ -- parser method.
+ -- In addition to the lexer, it takes an optional precedence:
+ -- it won't read expressions whose precedence is lower or equal
+ -- to [prec].
+ -------------------------------------------------------------------
+ function p:parse (lx, prec)
+ prec = prec or 0
+
+ ------------------------------------------------------
+ -- Extract the right parser and the corresponding
+ -- options table, for (pre|in|suff)fix operators.
+ -- Options include prec, assoc, transformers.
+ ------------------------------------------------------
+ local function get_parser_info (tab)
+ local p2 = tab:get (lx:is_keyword (lx:peek()))
+ if p2 then -- keyword-based sequence found
+ local function parser(lx) return raw_parse_sequence(lx, p2) end
+ return parser, p2
+ else -- Got to use the default parser
+ local d = tab.default
+ if d then return d.parse or d.parser, d
+ else return false, false end
+ end
+ end
+
+ ------------------------------------------------------
+ -- Look for a prefix sequence. Multiple prefixes are
+ -- handled through the recursive [p.parse] call.
+ -- Notice the double-transform: one for the primary
+ -- expr, and one for the one with the prefix op.
+ ------------------------------------------------------
+ local function handle_prefix ()
+ local fli = lx:lineinfo_right()
+ local p2_func, p2 = get_parser_info (self.prefix)
+ local op = p2_func and p2_func (lx)
+ if op then -- Keyword-based sequence found
+ local ili = lx:lineinfo_right() -- Intermediate LineInfo
+ local e = p2.builder (op, self:parse (lx, p2.prec))
+ local lli = lx:lineinfo_left()
+ return transform (transform (e, p2, ili, lli), self, fli, lli)
+ else -- No prefix found, get a primary expression
+ local e = self.primary(lx)
+ local lli = lx:lineinfo_left()
+ return transform (e, self, fli, lli)
+ end
+ end --</expr.parse.handle_prefix>
+
+ ------------------------------------------------------
+ -- Look for an infix sequence+right-hand-side operand.
+ -- Return the whole binary expression result,
+ -- or false if no operator was found.
+ ------------------------------------------------------
+ local function handle_infix (e)
+ local p2_func, p2 = get_parser_info (self.infix)
+ if not p2 then return false end
+
+ -----------------------------------------
+ -- Handle flattening operators: gather all operands
+ -- of the series in [list]; when a different operator
+ -- is found, stop, build from [list], [transform] and
+ -- return.
+ -----------------------------------------
+ if (not p2.prec or p2.prec>prec) and p2.assoc=="flat" then
+ local fli = lx:lineinfo_right()
+ local pflat, list = p2, { e }
+ repeat
+ local op = p2_func(lx)
+ if not op then break end
+ table.insert (list, self:parse (lx, p2.prec))
+ local _ -- We only care about checking that p2==pflat
+ _, p2 = get_parser_info (self.infix)
+ until p2 ~= pflat
+ local e2 = pflat.builder (list)
+ local lli = lx:lineinfo_left()
+ return transform (transform (e2, pflat, fli, lli), self, fli, lli)
+
+ -----------------------------------------
+ -- Handle regular infix operators: [e] the LHS is known,
+ -- just gather the operator and [e2] the RHS.
+ -- Result goes in [e3].
+ -----------------------------------------
+ elseif p2.prec and p2.prec>prec or
+ p2.prec==prec and p2.assoc=="right" then
+ local fli = e.lineinfo.first -- lx:lineinfo_right()
+ local op = p2_func(lx)
+ if not op then return false end
+ local e2 = self:parse (lx, p2.prec)
+ local e3 = p2.builder (e, op, e2)
+ local lli = lx:lineinfo_left()
+ return transform (transform (e3, p2, fli, lli), self, fli, lli)
+
+ -----------------------------------------
+ -- Check for non-associative operators, and complain if applicable.
+ -----------------------------------------
+ elseif p2.assoc=="none" and p2.prec==prec then
+ parser_error (lx, "non-associative operator!")
+
+ -----------------------------------------
+ -- No infix operator suitable at that precedence
+ -----------------------------------------
+ else return false end
+
+ end --</expr.parse.handle_infix>
+
+ ------------------------------------------------------
+ -- Look for a suffix sequence.
+ -- Return the result of suffix operator on [e],
+ -- or false if no operator was found.
+ ------------------------------------------------------
+ local function handle_suffix (e)
+ -- FIXME bad fli, must take e.lineinfo.first
+ local p2_func, p2 = get_parser_info (self.suffix)
+ if not p2 then return false end
+ if not p2.prec or p2.prec>=prec then
+ --local fli = lx:lineinfo_right()
+ local fli = e.lineinfo.first
+ local op = p2_func(lx)
+ if not op then return false end
+ local lli = lx:lineinfo_left()
+ e = p2.builder (e, op)
+ e = transform (transform (e, p2, fli, lli), self, fli, lli)
+ return e
+ end
+ return false
+ end --</expr.parse.handle_suffix>
+
+ ------------------------------------------------------
+ -- Parser body: read suffix and (infix+operand)
+ -- extensions as long as we're able to fetch more at
+ -- this precedence level.
+ ------------------------------------------------------
+ local e = handle_prefix()
+ repeat
+ local x = handle_suffix (e); e = x or e
+ local y = handle_infix (e); e = y or e
+ until not (x or y)
+
+ -- No transform: it already happened in operators handling
+ return e
+ end --</expr.parse>
+
+ -------------------------------------------------------------------
+ -- Construction
+ -------------------------------------------------------------------
+ if not p.primary then p.primary=p[1]; p[1]=nil end
+ for _, t in ipairs{ "primary", "prefix", "infix", "suffix" } do
+ if not p[t] then p[t] = { } end
+ if not is_parser(p[t]) then multisequence(p[t]) end
+ end
+ function p:add(...) return self.primary:add(...) end
+ return p
+end --</expr>
+
+
+-------------------------------------------------------------------------------
+--
+-- List parser generator
+--
+-------------------------------------------------------------------------------
+-- In [p], the following fields can be provided in input:
+--
+-- * [builder]: takes list of subparser results, returns AST
+-- * [transformers]: as usual
+-- * [name]: as usual
+--
+-- * [terminators]: list of strings representing the keywords which
+-- might mark the end of the list. When non-empty, the list is
+-- allowed to be empty. A string is treated as a single-element
+-- table, whose element is that string, e.g. ["do"] is the same as
+-- [{"do"}].
+--
+-- * [separators]: list of strings representing the keywords which can
+-- separate elements of the list. When non-empty, one of these
+-- keyword has to be found between each element. Lack of a separator
+-- indicates the end of the list. A string is treated as a
+-- single-element table, whose element is that string, e.g. ["do"]
+-- is the same as [{"do"}]. If [terminators] is empty/nil, then
+-- [separators] has to be non-empty.
+--
+-- After creation, the following fields are added:
+-- * [parse] the parsing function lexer->AST
+-- * [kind] == "list"
+--
+-------------------------------------------------------------------------------
+function list (p)
+ make_parser ("list", p)
+
+ -------------------------------------------------------------------
+ -- Parsing method
+ -------------------------------------------------------------------
+ function p:parse (lx)
+
+ ------------------------------------------------------
+ -- Used to quickly check whether there's a terminator
+ -- or a separator immediately ahead
+ ------------------------------------------------------
+ local function peek_is_in (keywords)
+ return keywords and lx:is_keyword(lx:peek(), unpack(keywords)) end
+
+ local x = { }
+ local fli = lx:lineinfo_right()
+
+ -- if there's a terminator to start with, don't bother trying
+ if not peek_is_in (self.terminators) then
+ repeat table.insert (x, self.primary (lx)) -- read one element
+ until
+ -- First reason to stop: There's a separator list specified,
+ -- and next token isn't one. Otherwise, consume it with [lx:next()]
+ self.separators and not(peek_is_in (self.separators) and lx:next()) or
+ -- Other reason to stop: terminator token ahead
+ peek_is_in (self.terminators) or
+ -- Last reason: end of file reached
+ lx:peek().tag=="Eof"
+ end
+
+ local lli = lx:lineinfo_left()
+
+ -- Apply the builder. It can be a string, or a callable value,
+ -- or simply nothing.
+ local b = self.builder
+ if b then
+ if type(b)=="string" then x.tag = b -- b is a string, use it as a tag
+ elseif type(b)=="function" then x=b(x)
+ else
+ local bmt = getmetatable(b)
+ if bmt and bmt.__call then x=b(x) end
+ end
+ end
+ return transform (x, self, fli, lli)
+ end --</list.parse>
+
+ -------------------------------------------------------------------
+ -- Construction
+ -------------------------------------------------------------------
+ if not p.primary then p.primary = p[1]; p[1] = nil end
+ if type(p.terminators) == "string" then p.terminators = { p.terminators }
+ elseif p.terminators and #p.terminators == 0 then p.terminators = nil end
+ if type(p.separators) == "string" then p.separators = { p.separators }
+ elseif p.separators and #p.separators == 0 then p.separators = nil end
+
+ return p
+end --</list>
+
+
+-------------------------------------------------------------------------------
+--
+-- Keyword-conditionned parser generator
+--
+-------------------------------------------------------------------------------
+--
+-- Only apply a parser if a given keyword is found. The result of
+-- [gg.onkeyword] parser is the result of the subparser (modulo
+-- [transformers] applications).
+--
+-- lineinfo: the keyword is *not* included in the boundaries of the
+-- resulting lineinfo. A review of all usages of gg.onkeyword() in the
+-- implementation of metalua has shown that it was the appropriate choice
+-- in every case.
+--
+-- Input fields:
+--
+-- * [name]: as usual
+--
+-- * [transformers]: as usual
+--
+-- * [peek]: if non-nil, the conditionning keyword is left in the lexeme
+-- stream instead of being consumed.
+--
+-- * [primary]: the subparser.
+--
+-- * [keywords]: list of strings representing triggering keywords.
+--
+-- * Table-part entries can contain strings, and/or exactly one parser.
+-- Strings are put in [keywords], and the parser is put in [primary].
+--
+-- After the call, the following fields will be set:
+--
+-- * [parse] the parsing method
+-- * [kind] == "onkeyword"
+-- * [primary]
+-- * [keywords]
+--
+-------------------------------------------------------------------------------
+function onkeyword (p)
+ make_parser ("onkeyword", p)
+
+ -------------------------------------------------------------------
+ -- Parsing method
+ -------------------------------------------------------------------
+ function p:parse(lx)
+ if lx:is_keyword (lx:peek(), unpack(self.keywords)) then
+ --local fli = lx:lineinfo_right()
+ if not self.peek then lx:next() end
+ local content = self.primary (lx)
+ --local lli = lx:lineinfo_left()
+ local fli, lli = content.lineinfo.first, content.lineinfo.last
+ return transform (content, p, fli, lli)
+ else return false end
+ end
+
+ -------------------------------------------------------------------
+ -- Construction
+ -------------------------------------------------------------------
+ if not p.keywords then p.keywords = { } end
+ for _, x in ipairs(p) do
+ if type(x)=="string" then table.insert (p.keywords, x)
+ else assert (not p.primary and is_parser (x)); p.primary = x end
+ end
+ assert (p.primary, 'no primary parser in gg.onkeyword')
+ return p
+end --</onkeyword>
+
+
+-------------------------------------------------------------------------------
+--
+-- Optional keyword consummer pseudo-parser generator
+--
+-------------------------------------------------------------------------------
+--
+-- This doesn't return a real parser, just a function. That function parses
+-- one of the keywords passed as parameters, and returns it. It returns
+-- [false] if no matching keyword is found.
+--
+-- Notice that tokens returned by lexer already carry lineinfo, therefore
+-- there's no need to add them, as done usually through transform() calls.
+-------------------------------------------------------------------------------
+function optkeyword (...)
+ local args = {...}
+ if type (args[1]) == "table" then
+ assert (#args == 1)
+ args = args[1]
+ end
+ for _, v in ipairs(args) do assert (type(v)=="string") end
+ return function (lx)
+ local x = lx:is_keyword (lx:peek(), unpack (args))
+ if x then lx:next(); return x
+ else return false end
+ end
+end
+
+
+-------------------------------------------------------------------------------
+--
+-- Run a parser with a special lexer
+--
+-------------------------------------------------------------------------------
+--
+-- This doesn't return a real parser, just a function.
+-- First argument is the lexer class to be used with the parser,
+-- 2nd is the parser itself.
+-- The resulting parser returns whatever the argument parser does.
+--
+-------------------------------------------------------------------------------
+function with_lexer(new_lexer, parser)
+
+ -------------------------------------------------------------------
+ -- Most gg functions take their parameters in a table, so it's
+ -- better to silently accept when with_lexer{ } is called with
+ -- its arguments in a list:
+ -------------------------------------------------------------------
+ if not parser and #new_lexer==2 and type(new_lexer[1])=='table' then
+ return with_lexer(unpack(new_lexer))
+ end
+
+ -------------------------------------------------------------------
+ -- Save the current lexer, switch it for the new one, run the parser,
+ -- restore the previous lexer, even if the parser caused an error.
+ -------------------------------------------------------------------
+ return function (lx)
+ local old_lexer = getmetatable(lx)
+ lx:sync()
+ setmetatable(lx, new_lexer)
+ local status, result = pcall(parser, lx)
+ lx:sync()
+ setmetatable(lx, old_lexer)
+ if status then return result else error(result) end
+ end
+end
--- static partial checkings for Lua.\r
---\r
--- This program checks some metalua or plain lua source code for most common\r
--- mistakes. Its design focuses on the ability to check plain lua code: there is\r
--- no need to load any extension in the module.\r
---\r
--- The current checkings include:\r
---\r
--- * Listing all free variables, and make sure they are declared.\r
--- * For free vars known as modules, check that indexings in them are also\r
--- declared.\r
--- * When the type of something is known, do some basic type checkings. These\r
--- checkings are by no means exhaustive; however, when a parameter function\r
--- is constant or statically declared, it's checked.\r
-\r
-\r
-\r
---[[\r
-Type grammar:\r
-\r
-t ::=\r
-| t and t\r
-| t or t\r
-| function (t, ...) return t, ... end\r
-| { (k=t)... }\r
-| table(t, t)\r
-| string\r
-| number\r
-| integer\r
-| boolean\r
-| userdata\r
-| nil\r
-| multi(t, ...)\r
-| _\r
-\r
---]]\r
-\r
-\r
-match function get_type\r
-| `Number{...} -> return +{number}\r
-| `String{...} -> return +{string}\r
-| `True|`False -> return +{boolean}\r
-| `Nil -> return +{nil}\r
-| `Dots -> return +{_}\r
-| `Stat{_,v} -> return get_type(v)\r
-| `Paren{t} -> return get_one_type(t)\r
-| `Call{f, ...} -> \r
- local ftype = get_type(f)\r
- match ftype with\r
- | `Function{ _, {`Return{result}} } -> return get_type(result)\r
- | `Function{ _, {`Return{...} == results} } ->\r
- local r2 = +{ multi() }\r
- for r in ivalues(results) table.insert(r2, get_type(r)) end\r
- return r2\r
- | `And{...} -> return +{_} -- not implemented\r
- | `Or{ a, b } -> match get_one_type(a), get_one_type(b) with\r
- | `Function{...}==f1, `Function{...}==f2 ->\r
- return `Op{ 'or', get_type(`Call{f1}), get_type(`Call{f2})}\r
- | `Function{...}==f, _ | _, `Function{...}==f ->\r
- return get_type(`Call{f})\r
- | _ -> return +{_}\r
- end\r
-| `Invoke{o, m, ... } == x -> return get_type(`Call{`Index{o, m}})\r
-| `Op{...}==o -> return get_op_type(o)\r
-| `Table{...}==t ->\r
- local r = `Table{ }\r
- for x in ivalues(t) do\r
- match x with\r
- | `Pair{ `String{...}==k, v } -> table.insert(r, `Pair{k, get_one_type(v)})\r
- | t -> table.insert(r, get_one_type(t))\r
- end\r
- end\r
- return r\r
-| `Function{...}==f ->\r
-| `Id{v} ->\r
-| `Index{t, k} -> match get_one_type(t), get_one_type(k) with\r
- | `Call{`Id 'table', tk, tv }, _ -> return tv\r
- | `Table{...}==tt, `Id 'string' ->\r
-\r
-\r
-\r
-local types_rt = require 'extension.types'\r
-\r
-\r
-\r
-\r
-function check_function(f, term)\r
- match get_type(term) with\r
- | `Function{ params, {`Return{...} == results}}, args ->\r
- | `And{ a, b }, args ->\r
- check_function(a, args)\r
- check_function(b, args)\r
- | `Or{ a, b }, args ->\r
- if not pcall(check_function, a, args) then check_function(b, args) end\r
- | `Id '_' -> -- pass\r
- | _ -> error ("Call to a non-function")\r
- end\r
-end\r
-\r
-function check_index(a, b, term)\r
- match get_type(term) with\r
- | `Table{}\r
-\r
-match function cfg.id.up\r
-| `Call{ f, ... } == x -> check_function (f, x)\r
-| `Index{ a, b } == x -> check_index (a, b, x)\r
-end \r
-\r
-\r
--- List free vars\r
+-- static partial checkings for Lua.
+--
+-- This program checks some metalua or plain lua source code for most common
+-- mistakes. Its design focuses on the ability to check plain lua code: there is
+-- no need to load any extension in the module.
+--
+-- The current checkings include:
+--
+-- * Listing all free variables, and make sure they are declared.
+-- * For free vars known as modules, check that indexings in them are also
+-- declared.
+-- * When the type of something is known, do some basic type checkings. These
+-- checkings are by no means exhaustive; however, when a parameter function
+-- is constant or statically declared, it's checked.
+
+
+
+--[[
+Type grammar:
+
+t ::=
+| t and t
+| t or t
+| function (t, ...) return t, ... end
+| { (k=t)... }
+| table(t, t)
+| string
+| number
+| integer
+| boolean
+| userdata
+| nil
+| multi(t, ...)
+| _
+
+--]]
+
+
+match function get_type
+| `Number{...} -> return +{number}
+| `String{...} -> return +{string}
+| `True|`False -> return +{boolean}
+| `Nil -> return +{nil}
+| `Dots -> return +{_}
+| `Stat{_,v} -> return get_type(v)
+| `Paren{t} -> return get_one_type(t)
+| `Call{f, ...} ->
+ local ftype = get_type(f)
+ match ftype with
+ | `Function{ _, {`Return{result}} } -> return get_type(result)
+ | `Function{ _, {`Return{...} == results} } ->
+ local r2 = +{ multi() }
+ for r in ivalues(results) table.insert(r2, get_type(r)) end
+ return r2
+ | `And{...} -> return +{_} -- not implemented
+ | `Or{ a, b } -> match get_one_type(a), get_one_type(b) with
+ | `Function{...}==f1, `Function{...}==f2 ->
+ return `Op{ 'or', get_type(`Call{f1}), get_type(`Call{f2})}
+ | `Function{...}==f, _ | _, `Function{...}==f ->
+ return get_type(`Call{f})
+ | _ -> return +{_}
+ end
+| `Invoke{o, m, ... } == x -> return get_type(`Call{`Index{o, m}})
+| `Op{...}==o -> return get_op_type(o)
+| `Table{...}==t ->
+ local r = `Table{ }
+ for x in ivalues(t) do
+ match x with
+ | `Pair{ `String{...}==k, v } -> table.insert(r, `Pair{k, get_one_type(v)})
+ | t -> table.insert(r, get_one_type(t))
+ end
+ end
+ return r
+| `Function{...}==f ->
+| `Id{v} ->
+| `Index{t, k} -> match get_one_type(t), get_one_type(k) with
+ | `Call{`Id 'table', tk, tv }, _ -> return tv
+ | `Table{...}==tt, `Id 'string' ->
+
+
+
+local types_rt = require 'extension.types'
+
+
+
+
+function check_function(f, term)
+ match get_type(term) with
+ | `Function{ params, {`Return{...} == results}}, args ->
+ | `And{ a, b }, args ->
+ check_function(a, args)
+ check_function(b, args)
+ | `Or{ a, b }, args ->
+ if not pcall(check_function, a, args) then check_function(b, args) end
+ | `Id '_' -> -- pass
+ | _ -> error ("Call to a non-function")
+ end
+end
+
+function check_index(a, b, term)
+ match get_type(term) with
+ | `Table{}
+
+match function cfg.id.up
+| `Call{ f, ... } == x -> check_function (f, x)
+| `Index{ a, b } == x -> check_index (a, b, x)
+end
+
+
+-- List free vars
cfg.id.
\ No newline at end of file