From 87073d5f385ce724f0db6342724daa30b891a957 Mon Sep 17 00:00:00 2001 From: unknown Date: Wed, 17 Dec 2008 11:04:24 +0100 Subject: [PATCH] fix CRLF --- doc/manual/ast.tex | 204 ++-- doc/manual/clopts-ref.tex | 14 +- doc/manual/declare-globals-ref.tex | 30 +- doc/manual/dollar-ref.tex | 44 +- doc/manual/hygiene-ref.tex | 336 +++---- doc/manual/match-ref.tex | 350 +++---- doc/manual/mlp-ref.tex | 230 ++--- doc/manual/reading-guide.tex | 180 ++-- doc/manual/sample-match.tex | 1266 ++++++++++++------------ doc/manual/springs-ref.tex | 84 +- doc/manual/trycatch-ref.tex | 174 ++-- doc/manual/walk-ref.tex | 988 +++++++++---------- doc/pluto/CHANGELOG | 20 +- doc/pluto/README | 258 ++--- junk/hygienic.lua | 558 +++++------ src/compiler/gg.lua | 1478 ++++++++++++++-------------- src/samples/typecheck.mlua | 220 ++--- 17 files changed, 3217 insertions(+), 3217 deletions(-) diff --git a/doc/manual/ast.tex b/doc/manual/ast.tex index 05a45ad..bf8b749 100755 --- a/doc/manual/ast.tex +++ b/doc/manual/ast.tex @@ -1,102 +1,102 @@ -% \documentclass{article} -% \title{MetaLua Abstract Syntax Trees} -% \setlength{\columnseprule}{1pt} -\def\T#1{{\bf`#1}} -\def\L#1{\{#1\}} -\def\P#1{{\it #1}} -\def\C#1{[\hspace{-.4em}[#1]\hspace{-.4em}]} -\def\V#1{{\it #1}} -\def\plus{\ensuremath{{}^{+}}} -\def\star{\ensuremath{{}^{*}}} -\def\TRANS{\ensuremath{\Longrightarrow}} -% \usepackage{alltt} -% \begin{document} -% \maketitle - -\section{Abstract Syntax Tree grammar} - -\begin{alltt} - -block: \L{ stat\star } - -stat: -| \T{Do}\L{ block } -| \T{Set}\L{ \L{lhs\plus} \L{expr\plus} } -| \T{While}\L{ expr block } -| \T{Repeat}\L{ block expr } -| \T{If}\L{ (expr block)\plus block? } -| \T{Fornum}\L{ ident expr expr expr? block } -| \T{Forin}\L{ \L{ident\plus} \L{expr\plus} block } -| \T{Local}\L{ \L{ident\plus} \L{expr\plus}? } -| \T{Localrec}\L{ \L{ident\plus} \L{expr\plus}? } -| \T{Goto}\L{\P{string}} -| \T{Label}\L{\P{string}} -| \T{Return} -| \T{Break} -| apply - -expr: -| \T{Nil} | \T{Dots} | \T{True} | \T{False} -| \T{Number}\L{ \P{number} } -| \T{String}\L{ \P{string} } -| \T{Function}\L{ \L{ ident\star \T{Dots}? } block } -| \T{Table}\L{ ( \T{Pair}\L{ expr expr } | expr )\star } -| \T{Op}\L{ binopid expr expr } | \T{Op}\L{ unopid expr } -| \T{Paren}\L{ expr } -| \T{Stat}\L{ block expr } -| apply -| lhs - -apply: -| \T{Call}\L{ expr expr\star } -| \T{Invoke}\L{ expr \T{String}\L{ \P{string} } expr\star } - -lhs: ident | \T{Index}\L{ expr expr } - -ident: \T{Id}\L{ \P{string} } - -binopid: "add" | "sub" | "mul" | "div" - | "mod" | "pow" | "concat" | "eq" - | "lt" | "le" | "and" | "or" - -unopid: "not" | "len" | "unm" -\end{alltt} - -% {\bf{}Relaxed forms:} - -% stat += -% | \L{ stat\star } -% | \T{Let}\L{ (\L{lhs\plus}|lhs) (\L{expr\plus}|expr) } -% | \T{Forin}\L{ (\L{ident\plus}|ident) (\L{expr\plus}|expr) block } -% | \T{Local}\L{ (\L{ident\plus}|ident) (\L{expr\plus}|expr)? } - -% lhs += \T{Index}\L{ expr expr\plus } - -% expr += -% | \T{Function}\L{ ident? block } -% | \P{number} -% | \P{string} - -% {\bf{}Translations:} -% \C{do \V{foo}; \V{bar} end} \TRANS \T{Do}\L{ \C{\V{foo}}, \C{\V{bar}}} -% \C{\V{v1},\V{v2},\V{v3} = \V{e1},\V{e2},\V{e3}} \TRANS \T{Let}\L{\L{\C{\V{v1}},\C{\V{v2}},\C{\V{v3}}}, \L{\C{\V{e1}},\C{\V{e2}},\C{\V{e3}}}} -% \C{\V{foo}(\V{bar1}, \V{bar2})} \TRANS \T{Call}\L{\C{\V{foo}},\C{\V{bar1}},\C{\V{bar2}}} -% \C{while \V{foo} do \V{bar}; \V{gna} end} \TRANS \T{While}\L{\C{\V{foo}},\T{Do}\L{\C{\V{bar}},\C{\V{gna}}}} -% \C{repeat \V{foo}; \V{bar} until \V{gna}} \TRANS \T{Repeat}\L{\T{Do}\L{\C{\V{foo}},\C{\V{bar}}},\C{\V{gna}}} - -% \end{alltt} - - -%\end{document} - -%stat: -%| \T{Splice}\L{ expr } - -%expr: -%| \T{Quote}\L{ \T{Exprr}, expr } -%| \T{Quote}\L{ \T{Stat}, stat } -%| \T{Quote}\L{ \T{Id}, ident } -%| \T{Splice}\L{ expr } - -%expr relaxed: -%| \T{Quote}\L{ expr } +% \documentclass{article} +% \title{MetaLua Abstract Syntax Trees} +% \setlength{\columnseprule}{1pt} +\def\T#1{{\bf`#1}} +\def\L#1{\{#1\}} +\def\P#1{{\it #1}} +\def\C#1{[\hspace{-.4em}[#1]\hspace{-.4em}]} +\def\V#1{{\it #1}} +\def\plus{\ensuremath{{}^{+}}} +\def\star{\ensuremath{{}^{*}}} +\def\TRANS{\ensuremath{\Longrightarrow}} +% \usepackage{alltt} +% \begin{document} +% \maketitle + +\section{Abstract Syntax Tree grammar} + +\begin{alltt} + +block: \L{ stat\star } + +stat: +| \T{Do}\L{ block } +| \T{Set}\L{ \L{lhs\plus} \L{expr\plus} } +| \T{While}\L{ expr block } +| \T{Repeat}\L{ block expr } +| \T{If}\L{ (expr block)\plus block? } +| \T{Fornum}\L{ ident expr expr expr? block } +| \T{Forin}\L{ \L{ident\plus} \L{expr\plus} block } +| \T{Local}\L{ \L{ident\plus} \L{expr\plus}? } +| \T{Localrec}\L{ \L{ident\plus} \L{expr\plus}? } +| \T{Goto}\L{\P{string}} +| \T{Label}\L{\P{string}} +| \T{Return} +| \T{Break} +| apply + +expr: +| \T{Nil} | \T{Dots} | \T{True} | \T{False} +| \T{Number}\L{ \P{number} } +| \T{String}\L{ \P{string} } +| \T{Function}\L{ \L{ ident\star \T{Dots}? } block } +| \T{Table}\L{ ( \T{Pair}\L{ expr expr } | expr )\star } +| \T{Op}\L{ binopid expr expr } | \T{Op}\L{ unopid expr } +| \T{Paren}\L{ expr } +| \T{Stat}\L{ block expr } +| apply +| lhs + +apply: +| \T{Call}\L{ expr expr\star } +| \T{Invoke}\L{ expr \T{String}\L{ \P{string} } expr\star } + +lhs: ident | \T{Index}\L{ expr expr } + +ident: \T{Id}\L{ \P{string} } + +binopid: "add" | "sub" | "mul" | "div" + | "mod" | "pow" | "concat" | "eq" + | "lt" | "le" | "and" | "or" + +unopid: "not" | "len" | "unm" +\end{alltt} + +% {\bf{}Relaxed forms:} + +% stat += +% | \L{ stat\star } +% | \T{Let}\L{ (\L{lhs\plus}|lhs) (\L{expr\plus}|expr) } +% | \T{Forin}\L{ (\L{ident\plus}|ident) (\L{expr\plus}|expr) block } +% | \T{Local}\L{ (\L{ident\plus}|ident) (\L{expr\plus}|expr)? } + +% lhs += \T{Index}\L{ expr expr\plus } + +% expr += +% | \T{Function}\L{ ident? block } +% | \P{number} +% | \P{string} + +% {\bf{}Translations:} +% \C{do \V{foo}; \V{bar} end} \TRANS \T{Do}\L{ \C{\V{foo}}, \C{\V{bar}}} +% \C{\V{v1},\V{v2},\V{v3} = \V{e1},\V{e2},\V{e3}} \TRANS \T{Let}\L{\L{\C{\V{v1}},\C{\V{v2}},\C{\V{v3}}}, \L{\C{\V{e1}},\C{\V{e2}},\C{\V{e3}}}} +% \C{\V{foo}(\V{bar1}, \V{bar2})} \TRANS \T{Call}\L{\C{\V{foo}},\C{\V{bar1}},\C{\V{bar2}}} +% \C{while \V{foo} do \V{bar}; \V{gna} end} \TRANS \T{While}\L{\C{\V{foo}},\T{Do}\L{\C{\V{bar}},\C{\V{gna}}}} +% \C{repeat \V{foo}; \V{bar} until \V{gna}} \TRANS \T{Repeat}\L{\T{Do}\L{\C{\V{foo}},\C{\V{bar}}},\C{\V{gna}}} + +% \end{alltt} + + +%\end{document} + +%stat: +%| \T{Splice}\L{ expr } + +%expr: +%| \T{Quote}\L{ \T{Exprr}, expr } +%| \T{Quote}\L{ \T{Stat}, stat } +%| \T{Quote}\L{ \T{Id}, ident } +%| \T{Splice}\L{ expr } + +%expr relaxed: +%| \T{Quote}\L{ expr } diff --git a/doc/manual/clopts-ref.tex b/doc/manual/clopts-ref.tex index 05a1991..c95c6ae 100644 --- a/doc/manual/clopts-ref.tex +++ b/doc/manual/clopts-ref.tex @@ -1,7 +1,7 @@ -\section{{\tt clopts}: command line options parsing} -This library allows to parse command line options in a generic, standard and -reasonably powerful way. Using it for your programs helps ensure that they -behave in an unsurprizing way to users. the {\tt metalua} compiler parses its -parameters with clopts. - -FIXME: Rest of the doc to be written +\section{{\tt clopts}: command line options parsing} +This library allows to parse command line options in a generic, standard and +reasonably powerful way. Using it for your programs helps ensure that they +behave in an unsurprizing way to users. the {\tt metalua} compiler parses its +parameters with clopts. + +FIXME: Rest of the doc to be written diff --git a/doc/manual/declare-globals-ref.tex b/doc/manual/declare-globals-ref.tex index f60e3b8..2e19ea5 100644 --- a/doc/manual/declare-globals-ref.tex +++ b/doc/manual/declare-globals-ref.tex @@ -1,15 +1,15 @@ -\section{Extension {\tt declare\_globals}: mandatory declaration of global - variables} -There is a {\tt strict} library in Lua, also provided by metalua, which -complains when one tries to access a global variable which hasn't been -initialized, and when a global variable is initialized anywhere but at the -program's top-level. This catches many errors, especially typos on variable -names. - -However, with a macro-enabled language, we can do slightly better: we add a {\tt - global} keyword, which works as {\tt local}, allows to declare global -variables, which can subsequently be used freely. Undeclared global variables -cause an error when one tries to read or write them. - -FIXME: need a ``global function ...'' syntax; interactions with sub-modules is -not tested correctly. +\section{Extension {\tt declare\_globals}: mandatory declaration of global + variables} +There is a {\tt strict} library in Lua, also provided by metalua, which +complains when one tries to access a global variable which hasn't been +initialized, and when a global variable is initialized anywhere but at the +program's top-level. This catches many errors, especially typos on variable +names. + +However, with a macro-enabled language, we can do slightly better: we add a {\tt + global} keyword, which works as {\tt local}, allows to declare global +variables, which can subsequently be used freely. Undeclared global variables +cause an error when one tries to read or write them. + +FIXME: need a ``global function ...'' syntax; interactions with sub-modules is +not tested correctly. diff --git a/doc/manual/dollar-ref.tex b/doc/manual/dollar-ref.tex index 0f65ae5..bb5054a 100644 --- a/doc/manual/dollar-ref.tex +++ b/doc/manual/dollar-ref.tex @@ -1,22 +1,22 @@ -\section[Dollar extension]{{\tt dollar}: generic syntax for macros} -When you write a short-lived macro which takes reasonably short arguments, you -generally don't want to write a supporting syntax for it. The dollar library -allows you to call it in a generic way: if you store your macro in the table -{\tt mlp.macros}, say as function {\tt mlp.macros.foobar}, then you can call it -in your code as {\tt\$foobar(arg1, arg2...)}: it will receive as parameters the -ASTs of the pseudo-call's arguments. - -\paragraph{Example}~ -\begin{verbatim} --{ block: - require 'dollar' - function doller.LOG(id) - match id with `Id{ id_name } -> - return +{ printf("%s = %s", id_name, - table.tostring(-{id})) } - end - end } - -local x = { 1, 2, 3 } -$LOG(x) -- prints "x = { 1, 2, 3 }" when executed -\end{verbatim} +\section[Dollar extension]{{\tt dollar}: generic syntax for macros} +When you write a short-lived macro which takes reasonably short arguments, you +generally don't want to write a supporting syntax for it. The dollar library +allows you to call it in a generic way: if you store your macro in the table +{\tt mlp.macros}, say as function {\tt mlp.macros.foobar}, then you can call it +in your code as {\tt\$foobar(arg1, arg2...)}: it will receive as parameters the +ASTs of the pseudo-call's arguments. + +\paragraph{Example}~ +\begin{verbatim} +-{ block: + require 'dollar' + function doller.LOG(id) + match id with `Id{ id_name } -> + return +{ printf("%s = %s", id_name, + table.tostring(-{id})) } + end + end } + +local x = { 1, 2, 3 } +$LOG(x) -- prints "x = { 1, 2, 3 }" when executed +\end{verbatim} diff --git a/doc/manual/hygiene-ref.tex b/doc/manual/hygiene-ref.tex index 9def0f8..3fa3d73 100644 --- a/doc/manual/hygiene-ref.tex +++ b/doc/manual/hygiene-ref.tex @@ -1,168 +1,168 @@ -\section{Library {\tt H}: hygienic macros} - -\paragraph{Warning} This hygienic macro system is quite new and -experimental. Its API is likely to evolve over the next versions of -metalua. Feedbacks are especially welcome. - -A common problem with meta-programming tools is variable capture, and -the art of automatically avoiding capture is called hygiene. the {\tt - H} library automates as much as possible the handling of hygiene. - -The design of H tries to respect metalua's core principles: -\begin{itemize} -\item Nothing taboo under the hood: the underlying mechanisms of the - language must remain simple enough to be intelligible to their - intended users. Black magic should be banned from the desing. This - rules out hygienic macros as a primitive: these either rely on very - advanced and hard to predict mechanisms, or severely limit the - manipulation tools available to the macro authors. -\item Simple by default: advanced users should know what happens under - the hood, but more casual users should be able to simply turn the - ignition and drive. It should be possible to use H, for regular - macros, without much understanding of its advanced principles and - implementation. -\item Everything's a regular program: again, most macro systems - offering hygiene limit macro manipulations to a term rewriting - framework, which might be Turing complete, but makes many advanced - stuff cumbersome to write. AST are regular data, which must be - manipulable by regular programs. -\item Extension: metalua tries to offer the most extensible possible - language to its users. If the author of the language couldn't - implement a feature as a regular extension, it would probably - outline a severe limitation of the system. -\end{itemize} - -\paragraph{Inside captures} -There are two kind of captures, inside a macro and outside a -macro. People often think about inside captures, in parts because the -C preprocessor is subject to it. It happens when a macro inserts user -code in a quote, and the quote declares a local variable that shadows -a user one: - -\begin{verbatim} --{ block: - require 'dollar' - function dollar.TRICOND(cond, iftrue, iffalse) - local code = +{ block: - local tmp, result - tmp = -{cond} - if tmp then result = -{iftrue} - else result = -{iffalse} end } - return `Stat{ code, +{result} } - end } - -local tmp = 5 -local foo = $TRICOND(tmp%2==0, "even", "odd") -\end{verbatim} - -Here, the \verb|tmp| local variable used in the macro code captures -the user's one, and cause a failure (an attempt to get the modulus of -\verb|nil|). The expanded code looks like: -\begin{Verbatim} -local tmp = 5 -local foo = -{ `Stat{ +{ block: - local tmp, result - tmp = tmp%2==0 - if tmp then result = "even" - else result = "odd" end -}, +{result} } } -\end{Verbatim} - -This is fixed by renaming automatically all local variables in the -macro with fresh names. H provides an AST walker which does -that. However, it needs to rename only macro code, not user-provided -code; therefore the macro writer has to somehow mark the user -code. This is done with the ``!'' prefix operator: sub-trees marked -with ``!'' won't experience any renaming. The following version is -therefore safe w.r.t. inside variable capture: - -\begin{verbatim} --{ block: - require 'dollar' - function dollar.TRICOND(cond, iftrue, iffalse) - local code = +{ block: - local tmp, result - tmp = -{!cond} - if tmp then result = -{!iftrue} - else result = -{!iffalse} end } - return H(`Stat{ code, +{result} }) - end } - -local tmp = 5 -local foo = $TRICOND(tmp%2==0, "even", "odd") -\end{verbatim} - -It expands to: - -\begin{Verbatim} -local tmp = 5 -local foo = -{ `Stat{ +{ block: - local -{`Id '.1.L.tmp'}, -{`Id '.2.L.result'} -- new fresh names - -{`Id '.1.L.tmp'} = tmp%2==0 -- no capture! - if -{`Id '.1.L.tmp'} then -{`Id '.2.L.result'} = "even" - else -{`Id '.2.L.result'} = "odd" end -}, `Id '.2.L.result' } } -\end{Verbatim} - -\paragraph{Outside captures} -We've seen that macros can capture the user's variables; but the -opposite can also happen: the user can capture the variables required -by a macro: - -\begin{Verbatim} --{ block: - require 'dollar' - dollar.log = |x| +{ printf("%s = %s", - -{table.tostring(x)}, - table.tostring(-{x})) } } -local x = { 1, 2, 3 } -\$log(x) -- prints "`Id 'x' = { 1, 2, 3 }" -\end{Verbatim} - -This expands to: -\begin{Verbatim} -printf("%s = %s", "`Id 'x'", table.tostring(x)) -\end{Verbatim} - -Now, replace "x" with "table", and you get an outside capture: "local -table = { 1, 2, 3 } shadows the table module from which the macro -tries to get table.tostring(). - -The most widespread language which supports non-hygienic macros, -Common Lisp, deals with that issue by being a Lisp-2: it has separate -namespaces for functions and ``normal'' variables. This happens to -remove many common capture cases. - -H fixes this by renaming the free variables in hygienized -macros. After this, a "local new\_names = old\_names" statement is -generated, which re-establishes the correspondance between -names. Let's make the examples above hygienic: - -\begin{Verbatim} --{ block: - require 'dollar' - dollar.log = |x| H+{ printf("%s = %s", - -{!table.tostring(x)}, - table.tostring(-{!x})) } } -local table = { 1, 2, 3 } -\$log(table) -- prints "`Id 'table' = { 1, 2, 3 }" -\end{Verbatim} - -The code above expands into: - -\begin{Verbatim} -local table = { 1, 2, 3 } -(-{`Id '.1.X.printf'}) ("%s = %s", - "`Id 'table'", - (-{`Id '.2.X.table'}.tostring(table))) -\end{Verbatim} - -To make this work, we need to introduce, somewhere where no variable -is captured, the following local statement: -\begin{Verbatim} -local -{`Id '.1.X.printf'}, -{`Id '.2.X.table'} = printf, table -\end{Verbatim} - -The correct way would be to let the user decide where this statement -should go. - +\section{Library {\tt H}: hygienic macros} + +\paragraph{Warning} This hygienic macro system is quite new and +experimental. Its API is likely to evolve over the next versions of +metalua. Feedbacks are especially welcome. + +A common problem with meta-programming tools is variable capture, and +the art of automatically avoiding capture is called hygiene. the {\tt + H} library automates as much as possible the handling of hygiene. + +The design of H tries to respect metalua's core principles: +\begin{itemize} +\item Nothing taboo under the hood: the underlying mechanisms of the + language must remain simple enough to be intelligible to their + intended users. Black magic should be banned from the desing. This + rules out hygienic macros as a primitive: these either rely on very + advanced and hard to predict mechanisms, or severely limit the + manipulation tools available to the macro authors. +\item Simple by default: advanced users should know what happens under + the hood, but more casual users should be able to simply turn the + ignition and drive. It should be possible to use H, for regular + macros, without much understanding of its advanced principles and + implementation. +\item Everything's a regular program: again, most macro systems + offering hygiene limit macro manipulations to a term rewriting + framework, which might be Turing complete, but makes many advanced + stuff cumbersome to write. AST are regular data, which must be + manipulable by regular programs. +\item Extension: metalua tries to offer the most extensible possible + language to its users. If the author of the language couldn't + implement a feature as a regular extension, it would probably + outline a severe limitation of the system. +\end{itemize} + +\paragraph{Inside captures} +There are two kind of captures, inside a macro and outside a +macro. People often think about inside captures, in parts because the +C preprocessor is subject to it. It happens when a macro inserts user +code in a quote, and the quote declares a local variable that shadows +a user one: + +\begin{verbatim} +-{ block: + require 'dollar' + function dollar.TRICOND(cond, iftrue, iffalse) + local code = +{ block: + local tmp, result + tmp = -{cond} + if tmp then result = -{iftrue} + else result = -{iffalse} end } + return `Stat{ code, +{result} } + end } + +local tmp = 5 +local foo = $TRICOND(tmp%2==0, "even", "odd") +\end{verbatim} + +Here, the \verb|tmp| local variable used in the macro code captures +the user's one, and cause a failure (an attempt to get the modulus of +\verb|nil|). The expanded code looks like: +\begin{Verbatim} +local tmp = 5 +local foo = -{ `Stat{ +{ block: + local tmp, result + tmp = tmp%2==0 + if tmp then result = "even" + else result = "odd" end +}, +{result} } } +\end{Verbatim} + +This is fixed by renaming automatically all local variables in the +macro with fresh names. H provides an AST walker which does +that. However, it needs to rename only macro code, not user-provided +code; therefore the macro writer has to somehow mark the user +code. This is done with the ``!'' prefix operator: sub-trees marked +with ``!'' won't experience any renaming. The following version is +therefore safe w.r.t. inside variable capture: + +\begin{verbatim} +-{ block: + require 'dollar' + function dollar.TRICOND(cond, iftrue, iffalse) + local code = +{ block: + local tmp, result + tmp = -{!cond} + if tmp then result = -{!iftrue} + else result = -{!iffalse} end } + return H(`Stat{ code, +{result} }) + end } + +local tmp = 5 +local foo = $TRICOND(tmp%2==0, "even", "odd") +\end{verbatim} + +It expands to: + +\begin{Verbatim} +local tmp = 5 +local foo = -{ `Stat{ +{ block: + local -{`Id '.1.L.tmp'}, -{`Id '.2.L.result'} -- new fresh names + -{`Id '.1.L.tmp'} = tmp%2==0 -- no capture! + if -{`Id '.1.L.tmp'} then -{`Id '.2.L.result'} = "even" + else -{`Id '.2.L.result'} = "odd" end +}, `Id '.2.L.result' } } +\end{Verbatim} + +\paragraph{Outside captures} +We've seen that macros can capture the user's variables; but the +opposite can also happen: the user can capture the variables required +by a macro: + +\begin{Verbatim} +-{ block: + require 'dollar' + dollar.log = |x| +{ printf("%s = %s", + -{table.tostring(x)}, + table.tostring(-{x})) } } +local x = { 1, 2, 3 } +\$log(x) -- prints "`Id 'x' = { 1, 2, 3 }" +\end{Verbatim} + +This expands to: +\begin{Verbatim} +printf("%s = %s", "`Id 'x'", table.tostring(x)) +\end{Verbatim} + +Now, replace "x" with "table", and you get an outside capture: "local +table = { 1, 2, 3 } shadows the table module from which the macro +tries to get table.tostring(). + +The most widespread language which supports non-hygienic macros, +Common Lisp, deals with that issue by being a Lisp-2: it has separate +namespaces for functions and ``normal'' variables. This happens to +remove many common capture cases. + +H fixes this by renaming the free variables in hygienized +macros. After this, a "local new\_names = old\_names" statement is +generated, which re-establishes the correspondance between +names. Let's make the examples above hygienic: + +\begin{Verbatim} +-{ block: + require 'dollar' + dollar.log = |x| H+{ printf("%s = %s", + -{!table.tostring(x)}, + table.tostring(-{!x})) } } +local table = { 1, 2, 3 } +\$log(table) -- prints "`Id 'table' = { 1, 2, 3 }" +\end{Verbatim} + +The code above expands into: + +\begin{Verbatim} +local table = { 1, 2, 3 } +(-{`Id '.1.X.printf'}) ("%s = %s", + "`Id 'table'", + (-{`Id '.2.X.table'}.tostring(table))) +\end{Verbatim} + +To make this work, we need to introduce, somewhere where no variable +is captured, the following local statement: +\begin{Verbatim} +local -{`Id '.1.X.printf'}, -{`Id '.2.X.table'} = printf, table +\end{Verbatim} + +The correct way would be to let the user decide where this statement +should go. + diff --git a/doc/manual/match-ref.tex b/doc/manual/match-ref.tex index 97aad75..388887c 100644 --- a/doc/manual/match-ref.tex +++ b/doc/manual/match-ref.tex @@ -1,175 +1,175 @@ -\section{Extension {\tt match}: structural pattern matching} -Pattern matching is an extremely pleasant and powerful way to handle tree-like -structures, such as ASTs. Unsurprisingly, it's a feature found in most -ML-inspired languages, which excel at compilation-related tasks. There is a -pattern matching extension for metalua, which is extremely useful for most -meta-programming purposes. - -\subsection{Purpose} - -First, to clear a common misconception: structural pattern matching has -absolutely nothing to do with regular expresssions matching on strings: it works -on arbitrary structured data. - -When manipulating trees, you want to check whether they have a certain structure -(e.g. a `Local{ } node with as first child a list of variables whose tags are -`Id{ }); when you've found a data that matches a certain pattern, you want to -name the interesting sub-parts of it, so that you can manipulate them easily; -finally, most of the time, you have a series of different possible patterns, and -you want to apply the one that matches a given data. These are the needs -addressed by pattern matching: it lets you give a list of ({\tt pattern -> - code\_to\_execute\_if\_match}) associations, selects the first matching -pattern, and executes the corresponding code. Patterns both describe the -expected structures and bind local variables to interesting parts of the data. -Those variables' scope is obviously the code to execute upon matching success. - -\paragraph{Match statement} -A match statement has the form: - -\begin{verbatim} -match with -| -> -| -> -... -| -> -end -\end{verbatim} - -The first vertical bar after the "with" is optional; moreover, a pattern can -actually be a list of patterns, separated by bars. In this case, it's enough for -one of them to match, to get the block to be executed: - -\begin{verbatim} -match with -| | -> -... -end -\end{verbatim} - -When the match statement is executed, the first pattern which matches -{\tt} is selected, the corresponding block is executed, and all -other patterns and blocks are ignored. If no pattern matches, an error -{\tt"mismatch"} is raised. However, we'll see it's easy to add a catch-all -pattern at the end of the match, when we want it to be failproof. - -\subsection{Patterns definition} - -\paragraph{Atomic litterals} -Syntactically, a pattern is mostly identical to the values it matches: numbers, -booleans and strings, when used as patterns, match identical values. - -\begin{verbatim} -match x with -| 1 -> print 'x is one' -| 2 -> print 'x is two' -end -\end{verbatim} - -\paragraph{Tables} -Tables as patterns match tables with the same number of array-part elements, if -each pattern field matches the corresponding value field. For instance, \{1, 2, -3\} as a pattern matches \{1, 2, 3\} as a value. Pattern \{1, 2, 3\} matches -value \{1, 2, 3, foo=4\}, but pattern \{1, 2, 3, foo=4\} doesn't match value -\{1, 2, 3\}: there can be extra hash-part fields in the value, not in the -pattern. Notice that field 'tag' is a regular hash-part field, therefore \{1, 2, -3\} matches `Foo\{1, 2, 3\} (but not the other way around). Of course, table -patterns can be nested. The table keys must currently be integers or strings. -It's not difficult to add more, but the need hasn't yet emerged. - -If you want to match tables of arbitrary array-part size, you can add a "..." as -the pattern's final element. For instance, pattern \{1, 2, ...\} will match all -table with at least two array-part elements whose two first elements are 1 and -2. - -\paragraph{Identifiers} -The other fundamental kind of patterns are identifiers: they match everything, -and bind whatever they match to themselves. For instance, pattern {1, 2, x} will -match value {1, 2, 3}, and in the corresponding block, local variable x will be -set to 3. By mixing tables and identifiers, we can already do interesting -things, such as getting the identifiers list out of a local statement, as -mentionned above: - -\begin{verbatim} -match stat with -| `Local{ identifiers, values } -> - table.foreach(identifiers, |x| print(x[1]) -... -- other cases -end -\end{verbatim} - -When a variable appears several times in a single pattern, all the elements they -match must be equal, in the sense of the "==" operator. Fore instance, pattern \{ - x, x \} will match value \{ 1, 1 \}, but not \{ 1, 2 \}. Both values would be -matched by pattern \{ x, y \}, though. A special identifier is "\_", which doesn't -bind its content. Even if it appears more than once in the pattern, metched -value parts aren't required to be equal. The pattern "\_" is therefore the -simplest catch-all one, and a match statement with a "{\tt| \_ ->}" final -statement will never throw a "mismatch" error. - -\paragraph{Guards} - -Some tests can't be performed by pattern matching. For these cases, the pattern -can be followed by an "if" keyword, followed by a condition. - -\begin{verbatim} -match x with -| n if n%2 == 0 -> print 'odd' -| _ -> print 'even' -end -\end{verbatim} - -Notice that the identifiers bound by the pattern are available in the guard -condition. Moreover, the guard can apply to several patterns: - -\begin{verbatim} -match x with -| n | {n} if n%2 == 0 -> print 'odd' -| _ -> print 'even' -end -\end{verbatim} - -\paragraph{Multi-match} - -If you want to match several values, let's say 'a' and 'b', there's an easy way: - -\begin{verbatim} -match {a,b} with -| {pattern_for_a, pattern_for_b} -> block -... -end -\end{verbatim} - -However, it introduces quite a lot of useless tables and checks. Since this kind -of multiple matches are fairly common, they are supported natively: - -\begin{verbatim} -match a, b with -| pattern_for_a, pattern_for_b -> block -... -end -\end{verbatim} - -This will save some useless tests and computation, and the compiler will -complain if the number of patterns doesn't match the number of values. - -\paragraph{String regular expressions} -There is a way to use Lua's regular exressions with match, through the division -operator ``/'': the left operand is expected to be a literal string, interpreted -as a regular expression. The variables it captures are stored in a table, which -is matched as a value against the right-hand-side operand. For instance, the -following case succeeds when {\tt foo} is a string composed of 3 words separated -by spaces. In case of success, these words are bound to variables {\tt w1}, {\tt - w2} and {\tt w3} in the executed block: - -\begin{verbatim} -match foo with -| "^(%w+) +(%w+) +(%w+)$"/{ w1, w2, w3 } -> - do_stuff (w1, w2, w3) -end -\end{verbatim} - -\subsection{Examples} -There are quite a lot of samples using match in the metalua distribution, and -more will come in the next one. Dig in the samples for fairly simple usages, or -in the standard libs for more advanced ones. Look for instance at examples -provided with the {\tt walk} library. +\section{Extension {\tt match}: structural pattern matching} +Pattern matching is an extremely pleasant and powerful way to handle tree-like +structures, such as ASTs. Unsurprisingly, it's a feature found in most +ML-inspired languages, which excel at compilation-related tasks. There is a +pattern matching extension for metalua, which is extremely useful for most +meta-programming purposes. + +\subsection{Purpose} + +First, to clear a common misconception: structural pattern matching has +absolutely nothing to do with regular expresssions matching on strings: it works +on arbitrary structured data. + +When manipulating trees, you want to check whether they have a certain structure +(e.g. a `Local{ } node with as first child a list of variables whose tags are +`Id{ }); when you've found a data that matches a certain pattern, you want to +name the interesting sub-parts of it, so that you can manipulate them easily; +finally, most of the time, you have a series of different possible patterns, and +you want to apply the one that matches a given data. These are the needs +addressed by pattern matching: it lets you give a list of ({\tt pattern -> + code\_to\_execute\_if\_match}) associations, selects the first matching +pattern, and executes the corresponding code. Patterns both describe the +expected structures and bind local variables to interesting parts of the data. +Those variables' scope is obviously the code to execute upon matching success. + +\paragraph{Match statement} +A match statement has the form: + +\begin{verbatim} +match with +| -> +| -> +... +| -> +end +\end{verbatim} + +The first vertical bar after the "with" is optional; moreover, a pattern can +actually be a list of patterns, separated by bars. In this case, it's enough for +one of them to match, to get the block to be executed: + +\begin{verbatim} +match with +| | -> +... +end +\end{verbatim} + +When the match statement is executed, the first pattern which matches +{\tt} is selected, the corresponding block is executed, and all +other patterns and blocks are ignored. If no pattern matches, an error +{\tt"mismatch"} is raised. However, we'll see it's easy to add a catch-all +pattern at the end of the match, when we want it to be failproof. + +\subsection{Patterns definition} + +\paragraph{Atomic litterals} +Syntactically, a pattern is mostly identical to the values it matches: numbers, +booleans and strings, when used as patterns, match identical values. + +\begin{verbatim} +match x with +| 1 -> print 'x is one' +| 2 -> print 'x is two' +end +\end{verbatim} + +\paragraph{Tables} +Tables as patterns match tables with the same number of array-part elements, if +each pattern field matches the corresponding value field. For instance, \{1, 2, +3\} as a pattern matches \{1, 2, 3\} as a value. Pattern \{1, 2, 3\} matches +value \{1, 2, 3, foo=4\}, but pattern \{1, 2, 3, foo=4\} doesn't match value +\{1, 2, 3\}: there can be extra hash-part fields in the value, not in the +pattern. Notice that field 'tag' is a regular hash-part field, therefore \{1, 2, +3\} matches `Foo\{1, 2, 3\} (but not the other way around). Of course, table +patterns can be nested. The table keys must currently be integers or strings. +It's not difficult to add more, but the need hasn't yet emerged. + +If you want to match tables of arbitrary array-part size, you can add a "..." as +the pattern's final element. For instance, pattern \{1, 2, ...\} will match all +table with at least two array-part elements whose two first elements are 1 and +2. + +\paragraph{Identifiers} +The other fundamental kind of patterns are identifiers: they match everything, +and bind whatever they match to themselves. For instance, pattern {1, 2, x} will +match value {1, 2, 3}, and in the corresponding block, local variable x will be +set to 3. By mixing tables and identifiers, we can already do interesting +things, such as getting the identifiers list out of a local statement, as +mentionned above: + +\begin{verbatim} +match stat with +| `Local{ identifiers, values } -> + table.foreach(identifiers, |x| print(x[1]) +... -- other cases +end +\end{verbatim} + +When a variable appears several times in a single pattern, all the elements they +match must be equal, in the sense of the "==" operator. Fore instance, pattern \{ + x, x \} will match value \{ 1, 1 \}, but not \{ 1, 2 \}. Both values would be +matched by pattern \{ x, y \}, though. A special identifier is "\_", which doesn't +bind its content. Even if it appears more than once in the pattern, metched +value parts aren't required to be equal. The pattern "\_" is therefore the +simplest catch-all one, and a match statement with a "{\tt| \_ ->}" final +statement will never throw a "mismatch" error. + +\paragraph{Guards} + +Some tests can't be performed by pattern matching. For these cases, the pattern +can be followed by an "if" keyword, followed by a condition. + +\begin{verbatim} +match x with +| n if n%2 == 0 -> print 'odd' +| _ -> print 'even' +end +\end{verbatim} + +Notice that the identifiers bound by the pattern are available in the guard +condition. Moreover, the guard can apply to several patterns: + +\begin{verbatim} +match x with +| n | {n} if n%2 == 0 -> print 'odd' +| _ -> print 'even' +end +\end{verbatim} + +\paragraph{Multi-match} + +If you want to match several values, let's say 'a' and 'b', there's an easy way: + +\begin{verbatim} +match {a,b} with +| {pattern_for_a, pattern_for_b} -> block +... +end +\end{verbatim} + +However, it introduces quite a lot of useless tables and checks. Since this kind +of multiple matches are fairly common, they are supported natively: + +\begin{verbatim} +match a, b with +| pattern_for_a, pattern_for_b -> block +... +end +\end{verbatim} + +This will save some useless tests and computation, and the compiler will +complain if the number of patterns doesn't match the number of values. + +\paragraph{String regular expressions} +There is a way to use Lua's regular exressions with match, through the division +operator ``/'': the left operand is expected to be a literal string, interpreted +as a regular expression. The variables it captures are stored in a table, which +is matched as a value against the right-hand-side operand. For instance, the +following case succeeds when {\tt foo} is a string composed of 3 words separated +by spaces. In case of success, these words are bound to variables {\tt w1}, {\tt + w2} and {\tt w3} in the executed block: + +\begin{verbatim} +match foo with +| "^(%w+) +(%w+) +(%w+)$"/{ w1, w2, w3 } -> + do_stuff (w1, w2, w3) +end +\end{verbatim} + +\subsection{Examples} +There are quite a lot of samples using match in the metalua distribution, and +more will come in the next one. Dig in the samples for fairly simple usages, or +in the standard libs for more advanced ones. Look for instance at examples +provided with the {\tt walk} library. diff --git a/doc/manual/mlp-ref.tex b/doc/manual/mlp-ref.tex index 0055008..091a123 100644 --- a/doc/manual/mlp-ref.tex +++ b/doc/manual/mlp-ref.tex @@ -1,115 +1,115 @@ -\def\tableHeader{\begin{tabular}{|c|c|p{5cm}|}\hline -\bf name & \bf type & \multicolumn{1}{c|}{\bf description} \\\hline} -\def\entry#1#2#3{{#1} & {\tt#2} & {#3} \\\hline} -\def\tableFooter{\hline\end{tabular}} - -\section{{\tt mlp}, the metalua parser} - -Metalua parser is built on top of \verb|gg|, and cannot be understood -without some knowledge of it. Basically, \verb|gg| allows not only to -build parsers, but to build {\em extensible} parsers. Depending on a -parser's type (sequence, sequence set, list, expression\ldots), -different extension methods are available, which are documented in -\verb|gg| reference. The current section will give the information -needed to extend Metalua syntax: -\begin{itemize} -\item what \verb|mlp| entries are accessible for extension; -\item what do they parse; -\item what is the underlying parser type (and therefore, what - extension methods are supported) -\end{itemize} - -\vfill\pagebreak - -\subsection{Parsing expressions} -\tableHeader - -\entry{mlp.expr}{gg.expr}{Top-level expression parser, and the main - extension point for Metalua expression. Supports all of the methods - defined by {\tt gg.expr}.} - -\entry{mlp.func\_val}{gg.sequence}{Read a function definition, - from the arguments' openning parenthesis to the final {\tt end}, but - excluding the initial {\tt function} keyword, so that it can be used - both for anonymous functions, for {\tt function some\_name(...) end} - and for {\tt local function some\_name(...) end}.} - -% \entry{mlp.func\_params\_content}{gg.list}{Read a potentially empty -% (``{\tt)}''- or ``{\tt|}''-terminated) list of function definition -% parameters, i.e. identifiers or ``{\tt ...}'' varargs. Surrounding -% parentheses are excluded. Don't get confused between parameters and -% arguments: parameters are the variable names used in a function -% definition; arguments are the values passed in a function call.} - -% \entry{mlp.func\_args\_content}{gg.list}{Read a potentially emtpy list -% of function call arguments. Surrounding parentheses are excluded.} - -% \entry{mlp.func\_args}{gg.sequence\_set}{Read function arguments: a -% list of expressions between parenthses, or a litteral table, or a -% litteral string.} - -%\entry{mlp.func\_params}{}{} -\entry{mlp.expr\_list}{}{} - -%\entry{mlp.adt}{\rm custom function}{Read an algebraic datatype -% without its leading backquote.} - -\entry{mlp.table\_content}{gg.list}{Read the content of a table, - excluding the surrounding braces} - -\entry{mlp.table}{gg.sequence}{Read a litteral table, - including the surrounding braces} - -\entry{mlp.table\_field}{\rm custom function}{Read a table entry: {\tt - [foo]=bar}, {\tt foo=bar} or {\tt bar}.} - -\entry{mlp.opt\_id}{\rm custom function}{Try to read an identifier, or - an identifier splice. On failure, returns false.} - -\entry{mlp.id}{\rm custom function}{Read an identifier, or - an identifier splice. Cause an error if there is no identifier.} - -\tableFooter - -\vfill\pagebreak - -\subsection{Parsing statements} -\tableHeader -\entry{mlp.block}{gg.list}{Read a sequence of statements, optionally - separated by semicolons. When introducing syntax extensions, it's - often necessary to add block terminators with {\tt - mlp.block.terminators:add().}} -\entry{mlp.for\_header}{\rm custom function}{Read a {\tt for} header, -from just after the ``{\tt for}'' to just before the ``{\tt do}''.} -\entry{mlp.stat}{gg.multisequence}{Read a single statement.} -\tableFooter - -Actually, {\tt mlp.stat} is an extended version of a multisequence: it -supports easy addition of new assignment operator. It has a field {\tt -assignments}, whose keys are assignment keywords, and values are -assignment builders taking left-hand-side and right-hand-side as -parameters. for instance, C's ``+='' operator could be added as: -\begin{verbatim} -mlp.lexer:add "+=" -mlp.stat.assignments["+="] = function (lhs, rhs) - assert(#lhs==1 and #rhs==1) - local a, b = lhs[1], rhs[1] - return +{stat: (-{a}) = -{a} + -{b} } -end -\end{verbatim} - -\subsection{Other useful functions and variables} - -\begin{itemize} -\item{\tt mlp.gensym()} generates a unique identifier. The uniqueness - is guaranteed, therefore this identifier cannot capture another - variable; it is useful to write hygienic\footnote{Hygienic macros - are macros which take care not to use names that might interfere - with user-provided names. The typical non-hygienic macro in C - is {\tt \#define SWAP( a, b) \{ int c=a; a=b; b=c; \}}: this macro - will misearbly fail if you ever call it with a parameter named - {\tt c}. There are well-known techniques to automatically make a - macro hygienic. Without them, you'd have to generate a unique name - for the temporary variable, if you had a {\tt gensym()} operator - in C's preprocessor} macros. -\end{itemize} +\def\tableHeader{\begin{tabular}{|c|c|p{5cm}|}\hline +\bf name & \bf type & \multicolumn{1}{c|}{\bf description} \\\hline} +\def\entry#1#2#3{{#1} & {\tt#2} & {#3} \\\hline} +\def\tableFooter{\hline\end{tabular}} + +\section{{\tt mlp}, the metalua parser} + +Metalua parser is built on top of \verb|gg|, and cannot be understood +without some knowledge of it. Basically, \verb|gg| allows not only to +build parsers, but to build {\em extensible} parsers. Depending on a +parser's type (sequence, sequence set, list, expression\ldots), +different extension methods are available, which are documented in +\verb|gg| reference. The current section will give the information +needed to extend Metalua syntax: +\begin{itemize} +\item what \verb|mlp| entries are accessible for extension; +\item what do they parse; +\item what is the underlying parser type (and therefore, what + extension methods are supported) +\end{itemize} + +\vfill\pagebreak + +\subsection{Parsing expressions} +\tableHeader + +\entry{mlp.expr}{gg.expr}{Top-level expression parser, and the main + extension point for Metalua expression. Supports all of the methods + defined by {\tt gg.expr}.} + +\entry{mlp.func\_val}{gg.sequence}{Read a function definition, + from the arguments' openning parenthesis to the final {\tt end}, but + excluding the initial {\tt function} keyword, so that it can be used + both for anonymous functions, for {\tt function some\_name(...) end} + and for {\tt local function some\_name(...) end}.} + +% \entry{mlp.func\_params\_content}{gg.list}{Read a potentially empty +% (``{\tt)}''- or ``{\tt|}''-terminated) list of function definition +% parameters, i.e. identifiers or ``{\tt ...}'' varargs. Surrounding +% parentheses are excluded. Don't get confused between parameters and +% arguments: parameters are the variable names used in a function +% definition; arguments are the values passed in a function call.} + +% \entry{mlp.func\_args\_content}{gg.list}{Read a potentially emtpy list +% of function call arguments. Surrounding parentheses are excluded.} + +% \entry{mlp.func\_args}{gg.sequence\_set}{Read function arguments: a +% list of expressions between parenthses, or a litteral table, or a +% litteral string.} + +%\entry{mlp.func\_params}{}{} +\entry{mlp.expr\_list}{}{} + +%\entry{mlp.adt}{\rm custom function}{Read an algebraic datatype +% without its leading backquote.} + +\entry{mlp.table\_content}{gg.list}{Read the content of a table, + excluding the surrounding braces} + +\entry{mlp.table}{gg.sequence}{Read a litteral table, + including the surrounding braces} + +\entry{mlp.table\_field}{\rm custom function}{Read a table entry: {\tt + [foo]=bar}, {\tt foo=bar} or {\tt bar}.} + +\entry{mlp.opt\_id}{\rm custom function}{Try to read an identifier, or + an identifier splice. On failure, returns false.} + +\entry{mlp.id}{\rm custom function}{Read an identifier, or + an identifier splice. Cause an error if there is no identifier.} + +\tableFooter + +\vfill\pagebreak + +\subsection{Parsing statements} +\tableHeader +\entry{mlp.block}{gg.list}{Read a sequence of statements, optionally + separated by semicolons. When introducing syntax extensions, it's + often necessary to add block terminators with {\tt + mlp.block.terminators:add().}} +\entry{mlp.for\_header}{\rm custom function}{Read a {\tt for} header, +from just after the ``{\tt for}'' to just before the ``{\tt do}''.} +\entry{mlp.stat}{gg.multisequence}{Read a single statement.} +\tableFooter + +Actually, {\tt mlp.stat} is an extended version of a multisequence: it +supports easy addition of new assignment operator. It has a field {\tt +assignments}, whose keys are assignment keywords, and values are +assignment builders taking left-hand-side and right-hand-side as +parameters. for instance, C's ``+='' operator could be added as: +\begin{verbatim} +mlp.lexer:add "+=" +mlp.stat.assignments["+="] = function (lhs, rhs) + assert(#lhs==1 and #rhs==1) + local a, b = lhs[1], rhs[1] + return +{stat: (-{a}) = -{a} + -{b} } +end +\end{verbatim} + +\subsection{Other useful functions and variables} + +\begin{itemize} +\item{\tt mlp.gensym()} generates a unique identifier. The uniqueness + is guaranteed, therefore this identifier cannot capture another + variable; it is useful to write hygienic\footnote{Hygienic macros + are macros which take care not to use names that might interfere + with user-provided names. The typical non-hygienic macro in C + is {\tt \#define SWAP( a, b) \{ int c=a; a=b; b=c; \}}: this macro + will misearbly fail if you ever call it with a parameter named + {\tt c}. There are well-known techniques to automatically make a + macro hygienic. Without them, you'd have to generate a unique name + for the temporary variable, if you had a {\tt gensym()} operator + in C's preprocessor} macros. +\end{itemize} diff --git a/doc/manual/reading-guide.tex b/doc/manual/reading-guide.tex index a75f0a9..995f12d 100755 --- a/doc/manual/reading-guide.tex +++ b/doc/manual/reading-guide.tex @@ -1,90 +1,90 @@ -\section*{Reading guide} -This manual tries to be as comprehensive as possible; however, you don't -necessarily have to read all of it before starting to do interesting stuff with -metalua. Here's a brief summary of what the different parts of the manual -address, and why you might want to read them immediately---or not. - -\begin{itemize} -\item{\bf Before reading this manual:} metalua is based on Lua, so - you'll need a minimal level of Lua proficiency. You can probably get - away without knowing much about metatables, environments or - coroutines, but you need to be at ease with basic flow control, - scoping rules, first-class functions, and the whole - everything-is-a-table approach. -\item{\bf Meta-programming in metalua:} this chapter exposes the generic principles - of static meta-programming: meta-levels in sources, AST representation of - code, meta-operators. You need to read this carefully if you plan to write any - non-trivial meta-programming code and you've never used languages like, Common - Lisp, camlp4 or Converge. If you're familiar with one of these, a cursory look - over this chapter might be enough for you. -\item{\bf Standard meta-programming libraries:} these are the tools that will allow - you to manipulate code effectively; the more advanced an extension you want to - write the more of these you'll want to know. - \begin{itemize} - \item{\bf mlp} is the dynamically extensible metalua parser. You need to know it - if you want to change or extend the language's syntax - \item{\bf gg} is the grammar generator, the library which lets you manipulate - dynamic parsers. You need to know it in order to do anything useful with - mlp. - \item{\bf match} is an extension supporting structural pattern matching (which has - almost nothing to do with regular expressions on strings). It's a construct - taken from the ML language familly, which lets you manipulate advanced data - structures in vrey powerful ways. It's extremely helpful, among others, when - working with AST, i.e. for most interesting meta-programs. - \item{\bf walk} is a code walker generator: smomething akin to a visitor pattern, - which will help you to write code analysers or transformers. Whenever you - want to find and transform all return statements in an AST, rename some - conflicting local variables, check for the presence of nested for loops - etc., you'll have to write a code walker, and walk will get you there much - faster. - \item{\bf hygiene} offers hygienic macros, i.e. protects you from accidental - variable captures. As opposed to e.g. Scheme, macro writing is not limited - to a term rewriting system in metalua, which lets more power to the - programmer, but prevents from completely automating macro hygienization. If - you wrote an extension and you want to raise it to production-quality, - you'll need among others to protect its users from variable captures, and - you'll need to hygienize it. If you don't feel like cluttering your code - with dozens of {\tt gensym} calls, you'll want to use the macro hygienizer. - \item{\bf dollar:} if you wrote a macro, but don't feel the need to give it a - dedicated syntax extension, this library will let you call this macro as a - regular function call, except that it will be prefixed with a ``{\tt\$}''. - \end{itemize} - \item{\bf General purpose libraries:} Lua strives at staying minimalist, and does - not come with batteries included; you're expected to grab them separately, - currently from luaforge, and eventually from a Lua Rocks repository. Metalua - needs quite some support to run, and relies on a number of imported and - custom-built libraries. Most of them can be reused for many other purposes - including yours.\\ - A whole category of metalua users, who want to use third party libraries - rather than reinventing their own wheels, will be primarily interested by - these. - \begin{itemize} - \item{\bf metalua.runtime:} extensions to Lua core libraries: base, table, - string. - \item{\bf metalua.compiler:} mlc offers a consistent interface to metalua - compilation and code representation transformers. 'package', 'loadstring', - 'dostring', 'loadfile' and 'dofile' are also updated to handle metalua - source files. - \item{\bf clopts} simplifies and unifies the handling of command line options - for metalua programs. - \item{\bf springs} brings together Lua Ring's handling of separate Lua universes - with Pluto's communication capabilities. - \item{\bf clist} offers an extended tables-as-list interface: lists by - comprehension {\em \`a la} Haskell or Python, list chunks etc. - \item{\bf xglobal} makes global variables declaration mandatory, for safer - programming, with almost no runtime overhead, and a syntax consistant qith - local variables declaration. - \item{\bf anaphoric} introduces anaphoric control structures, akin to Common - Lisp's {\tt aif}-familly macros. - \item{\bf trycatch} provides a proper exception system, with reliable finally - blocks and exception catching by structural pattern matching. - \item{\bf log} eases the terminal logging of variables, mainly for those from - the Printf-based School of Debugging. - \item{\bf types} offers dynamic type checking to metalua programs. It supports - variable typing as opposed to value typing, and advanced type system - features (polymorphism, dependant types etc.). - \end{itemize} - \item{\bf Examples and tutorials}: this chapter lists a series of tiny - meta-programs whose main purpose is didactic, and walks through the detailed - implementation of a couple of non-trivial extensions. -\end{itemize} +\section*{Reading guide} +This manual tries to be as comprehensive as possible; however, you don't +necessarily have to read all of it before starting to do interesting stuff with +metalua. Here's a brief summary of what the different parts of the manual +address, and why you might want to read them immediately---or not. + +\begin{itemize} +\item{\bf Before reading this manual:} metalua is based on Lua, so + you'll need a minimal level of Lua proficiency. You can probably get + away without knowing much about metatables, environments or + coroutines, but you need to be at ease with basic flow control, + scoping rules, first-class functions, and the whole + everything-is-a-table approach. +\item{\bf Meta-programming in metalua:} this chapter exposes the generic principles + of static meta-programming: meta-levels in sources, AST representation of + code, meta-operators. You need to read this carefully if you plan to write any + non-trivial meta-programming code and you've never used languages like, Common + Lisp, camlp4 or Converge. If you're familiar with one of these, a cursory look + over this chapter might be enough for you. +\item{\bf Standard meta-programming libraries:} these are the tools that will allow + you to manipulate code effectively; the more advanced an extension you want to + write the more of these you'll want to know. + \begin{itemize} + \item{\bf mlp} is the dynamically extensible metalua parser. You need to know it + if you want to change or extend the language's syntax + \item{\bf gg} is the grammar generator, the library which lets you manipulate + dynamic parsers. You need to know it in order to do anything useful with + mlp. + \item{\bf match} is an extension supporting structural pattern matching (which has + almost nothing to do with regular expressions on strings). It's a construct + taken from the ML language familly, which lets you manipulate advanced data + structures in vrey powerful ways. It's extremely helpful, among others, when + working with AST, i.e. for most interesting meta-programs. + \item{\bf walk} is a code walker generator: smomething akin to a visitor pattern, + which will help you to write code analysers or transformers. Whenever you + want to find and transform all return statements in an AST, rename some + conflicting local variables, check for the presence of nested for loops + etc., you'll have to write a code walker, and walk will get you there much + faster. + \item{\bf hygiene} offers hygienic macros, i.e. protects you from accidental + variable captures. As opposed to e.g. Scheme, macro writing is not limited + to a term rewriting system in metalua, which lets more power to the + programmer, but prevents from completely automating macro hygienization. If + you wrote an extension and you want to raise it to production-quality, + you'll need among others to protect its users from variable captures, and + you'll need to hygienize it. If you don't feel like cluttering your code + with dozens of {\tt gensym} calls, you'll want to use the macro hygienizer. + \item{\bf dollar:} if you wrote a macro, but don't feel the need to give it a + dedicated syntax extension, this library will let you call this macro as a + regular function call, except that it will be prefixed with a ``{\tt\$}''. + \end{itemize} + \item{\bf General purpose libraries:} Lua strives at staying minimalist, and does + not come with batteries included; you're expected to grab them separately, + currently from luaforge, and eventually from a Lua Rocks repository. Metalua + needs quite some support to run, and relies on a number of imported and + custom-built libraries. Most of them can be reused for many other purposes + including yours.\\ + A whole category of metalua users, who want to use third party libraries + rather than reinventing their own wheels, will be primarily interested by + these. + \begin{itemize} + \item{\bf metalua.runtime:} extensions to Lua core libraries: base, table, + string. + \item{\bf metalua.compiler:} mlc offers a consistent interface to metalua + compilation and code representation transformers. 'package', 'loadstring', + 'dostring', 'loadfile' and 'dofile' are also updated to handle metalua + source files. + \item{\bf clopts} simplifies and unifies the handling of command line options + for metalua programs. + \item{\bf springs} brings together Lua Ring's handling of separate Lua universes + with Pluto's communication capabilities. + \item{\bf clist} offers an extended tables-as-list interface: lists by + comprehension {\em \`a la} Haskell or Python, list chunks etc. + \item{\bf xglobal} makes global variables declaration mandatory, for safer + programming, with almost no runtime overhead, and a syntax consistant qith + local variables declaration. + \item{\bf anaphoric} introduces anaphoric control structures, akin to Common + Lisp's {\tt aif}-familly macros. + \item{\bf trycatch} provides a proper exception system, with reliable finally + blocks and exception catching by structural pattern matching. + \item{\bf log} eases the terminal logging of variables, mainly for those from + the Printf-based School of Debugging. + \item{\bf types} offers dynamic type checking to metalua programs. It supports + variable typing as opposed to value typing, and advanced type system + features (polymorphism, dependant types etc.). + \end{itemize} + \item{\bf Examples and tutorials}: this chapter lists a series of tiny + meta-programs whose main purpose is didactic, and walks through the detailed + implementation of a couple of non-trivial extensions. +\end{itemize} diff --git a/doc/manual/sample-match.tex b/doc/manual/sample-match.tex index 73c9eba..a6244c3 100644 --- a/doc/manual/sample-match.tex +++ b/doc/manual/sample-match.tex @@ -1,633 +1,633 @@ -\subsection{Structural pattern matching} - -FIXME: refer to the official match extension instead of re-explaining -what pattern matching is about. - -\subsubsection{Basic principles} -In many languages, including Lua, ``pattern matching'' generally -refers to the ability to: -\begin{itemize} -\item Analyse the general shape of a string; -\item capture specified parts of it into temporary variables -\item do some computation when the string matches a certain pattern, - generally using the temporary captured variables. -\end{itemize} - -In languages derived from ML\footnote{for instance OCaml, SML, Haskell - or Erlang.}, pattern matching can be done on arbitrary data, not -only strings. One can write patterns which describe the shape of a -data structure, bind some interesting sub-parts of it to variables, -and execute some statements associated with a given pattern whenever -the data matches. - -This sample aims to implement this capability into metalua. It will -discuss: -\begin{itemize} -\item How to describe data patterns (it turns out that we'll hijack - metalua's expression parser, which is a great time-saving trick when - it can be pulled). -\item How to compile them into equivalent code. -\item How to wrap all this up into a working compiled statement -\item What improvements can be done to the proposed design. -\end{itemize} - -\subsubsection{Patterns} -A match statement consists of a tested term, and a series of (pattern, -block) pairs. At execution, the first pair whose pattern accurately -describes the tested term is selected, and its corresponding block is -evaluated. Other blocks are ignored. - -We will keep the implementation of patterns as simple as possible. To -do that, we will put a first constraint: the AST of a pattern must be -a legal expression AST. This way, we save most of the syntax handling -trouble. More specifically: - -\begin{itemize} -\item numbers are valid patterns, which only match identical numbers; -\item strings are valid patterns, which only match identical strings; -\item tables are valid patterns if all of their keys are integers or - strings, and all of their values are valid patterns. A table pattern - matches a tested term if: - \begin{itemize} - \item the tested term is a table; - \item every key that appears in the pattern also appears in the - tested term; - \item for every key, the value associated to this key in the tested - term is matched by the sub-pattern associated to this key in the - pattern; - \end{itemize} -\item variables are valid patterns, and match everything. Moreover, - the term matched by the variable captures it, i.e. in the - corresponding block, that variable is set to the - matched term. -\end{itemize} - -Notice that since \verb|tag| is a regular field in metalua (albeit -with an optional dedicated syntax), it doesn't need a special case in -our pattern definition. - -\paragraph{Example} Let's consider implementing an evaluator for -Metalua expressions. We will restrict ourselves to binary operators, -variables and numbers; we will also consider that we have a -\verb|values| table mapping variable names to their value, and -\verb|binopfuncs| which associate an operator name with the corresponding -function. The evaluator would be written: - -\begin{Verbatim}[fontsize=\scriptsize] - -function eval(t) - match t with - | `Op{ op, a, b } -> return binopfuncs[op](eval(a), eval(b)) - | `Number{ n } -> return n - | `Variable{ v } -> return values[v] - end -end -\end{Verbatim} - -\subsubsection{Pattern compilation} - -A pattern in a case will translate into: -\begin{itemize} -\item tests: testing that the type of the tested case is the same as - the pattern's type for numbers, strings and tables; -\item local declarations: a variable must be bound to the - tested term's value; -\item nested tests for every pattern key/value pair, when the pattern - is a table. Moreover, since there might be multiple tests to run - against the tested term's field, it should be put in a temporary - variable. -\end{itemize} - -For instance, consider the following pattern: - -\begin{Verbatim}[fontsize=\scriptsize] - -{ tag = "Foo", 10, { 20 }, { x = a }, b } -\end{Verbatim} - -It corresponds to the series of tests and assignments on the left: - -\begin{minipage}{6cm} -\begin{Verbatim}[fontsize=\scriptsize] - -let v1 = -type(v1) == "table" -let v2 = v1.tag -v2 ~= nil -type(v2) == "string" -v2 == "Foo" -let v2 = v1[1] -v2 ~= nil -type(v2) == "number" -v2 == 10 -let v2 = v1[2] -v2 ~= nil -type(v2) == "table" -let v3 = v2[1] -v3 ~= nil -type(v3) == "number" -v3 == 20 -let v2 = v1[3] -v2 ~= nil -type(v2) == "table" -let v3 = v2.x -v3 ~= nil -local a = v3 -let v2 = v1[4] -v2 ~= nil -local b = v2 - - -\end{Verbatim} -\end{minipage} -\begin{minipage}{6cm} -\begin{Verbatim}[fontsize=\scriptsize] - -let v1 = tested_term -if type(v1) == "table" then - let v2 = v1.tag - if v2 ~= nil then - if type(v2) == "string" then - if v2 == "Foo" then - let v2 = v1[1] - if v2 ~= nil then - if type(v2) == "number" then - if v2 == 10 then - let v2 = v1[2] - if v2 ~= nil then - if type(v2) == "table" then - let v3 = v2[1] - if v3 ~= nil then - if type(v3) == "number" then - if v3 == 20 then - let v2 = v1[3] - if v2 ~= nil then - if type(v2) == "table" then - let v3 = v2.x - if v3 ~= nil then - local a = v3 - let v2 = v1[4] - if v2 ~= nil then - local b = v2 - -end ... end -- (16 times) -\end{Verbatim} -\end{minipage} -~\\~\\ - -Notice that the relative order of tests and assignments is meaningful: -we cannot put all assignments on one side, and all tests on an -other, e.g. \verb|v2 = v1.tag| on line 3 doesn't make sense if -\verb|type(v1) == table| on line 2 fails. - -We will compile patterns in several steps: first, accumulate all the -tests and assignments in order; then, we will collapse them in a -single big nesting of ``if'' statements. At first, we won't try -to optimize the form of the final term. The list above left would be -collapsed into the single compound statement above on the right. - -\paragraph{Accumulating constraints and tests} -This is done by a \verb|parse_pattern()| function, which does just what -is described in the bullet list above: - -\begin{Verbatim}[fontsize=\scriptsize] - - ------------------------------------------------------------------- - -- Turn a pattern into a list of conditions and assignations, - -- stored into [acc]. [n] is the depth of the subpattern into the - -- toplevel pattern; [tested_term] is the AST of the term to be - -- tested; [pattern] is the AST of a pattern, or a subtree of that - -- pattern when [n>0]. - ------------------------------------------------------------------- - local function parse_pattern (n, pattern) - local v = var(n) - if "Number" == pattern.tag or "String" == pattern.tag then - accumulate (+{ -{v} == -{pattern} }) - elseif "Id" == pattern.tag then - accumulate (+{stat: local -{pattern} = -{v} }) - elseif "Table" == pattern.tag then - accumulate (+{ type( -{v} ) == "table" } ) - local idx = 1 - for _, x in ipairs (pattern) do - local w = var(n+1) - local key, sub_pattern - if x.tag=="Key" - then key = x[1]; sub_pattern = x[2] - else key = `Number{ idx }; sub_pattern = x; idx=idx+1 end - accumulate (+{stat: (-{w}) = -{v} [-{key}] }) - accumulate (+{ -{w} ~= nil }) - parse_pattern (n+1, sub_pattern) - end - else error "Invalid pattern type" end - end - ------------------------------------------------------------------- -\end{Verbatim} - -This function relies on the following helper functions: -\begin{itemize} -\item {\tt var($n$)} generates the variable name ``\verb|v|$n$'', which - is used to store the tested term at depth level $n$. Indeed, - sub-patterns in table fields are matched against sub-terms of the - tested term. It also remembers of the biggest $n$ it ever received, - and stores it into \verb|max_n| (this will be used to know which - local vars have to be generated, see below); -\item {\tt accumulate()} just stores an additional code snippet in a - dedicated list; -\end{itemize} - -In the quotes, you might notice the parentheses around the variable in -``\verb|(-{w}) = -{v} [-{key}]|'': they're here to let the compiler -know that what's in \verb|-{...}| is an expression, not a -statement. Without them, it would expect {\tt w} to be the AST of a -statement (since we're in a statement context at this point), then -choke on the unexpected ``='' symbol. This is a common idiom, which -you should think about everytime you generate an assignment to an -anti-quoted identifier. - -\paragraph{Collapsing the accumulated quotes} -As written above, our collapsing function will be kept as simple -as possible, and will not try to minimize the amount of generated -code. It takes as parameters {\tt n}, the index of the quote currently -collapsed in the accumulator, and {\tt inner\_term} the statement -block to put inside the innermost part of the test. It calls itself -recursively, so that the collapsed term is built inside out (generally -speaking, working with trees, including AST, involves a lot of -recursive functions). \verb|acc| is the list filled by the -\verb|accumulate()| function: - -\begin{Verbatim}[fontsize=\scriptsize] - - ------------------------------------------------------------------- - -- Turn a list of tests and assignations into [acc] into a - -- single term of nested conditionals and assignments. - -- [inner_term] is the AST of a term to be put into the innermost - -- conditionnal, after all assignments. [n] is the index in [acc] - -- of the term currently parsed. - -- - -- This is a recursive function, which builds the inner part of - -- the statement first, then surrounds it with nested - -- [if ... then ... end], [local ... = ...] and [let ... = ...] - -- statements. - ------------------------------------------------------------------- - local function collapse (n, inner_term) - assert (not inner_term.tag, "collapse inner term must be a block") - if n > #acc then return inner_term end - local it = acc[n] - local inside = collapse (n+1, inner_term) - assert (not inside.tag, "collapse must produce a block") - if "Op" == it.tag then - -- [it] is a test, put it in an [if ... then .. end] statement - return +{block: if -{it} then -{inside} end } - else - -- [it] is a statement, just add it at the result's beginning. - assert ("Let" == it.tag or "Local" == it.tag) - return { it, unpack (inside) } - end - end -\end{Verbatim} - -To fully understand this function, one must remember that test -operations are translated into {\tt`Op\{ , , \}} -AST. That's why we didn't have to put an explicit reminder in the -accumulator, remembering whether a quote was a test or a statement: -we'll just check for \verb|`Op|'s presence. - -\subsubsection{Match statement compilation} -We already know how to translate a single pattern into a Lua test -statement. This statement will execute a given block, with all pattern -variables bound appropriately, if and only if the tested term matches -the pattern. - -To build a complete match statement, we need to test all patterns in -sequence. When one of them succeeds, we must skip all the following -cases. We could do that with some complex ``\verb|else|'' parts in the -tests, but there is a simpler way to jump out of some nested blocks: -the ``\verb|break|'' statement. Therefore, we will enclose the cases -into a loop that executes exactly once, so that a ``\verb|break|'' -will jump just after the last case. Then, we will add a -``\verb|break|'' at the end of every statement that doesn't already -finish with a ``\verb|return|'', so that other cases are skipped upon -successful matching. To sum up, we will translate this: - -\begin{Verbatim}[fontsize=\scriptsize] - -match with -| -> -| -> - ... -| -> -end -\end{Verbatim} - -\noindent into this: - -\begin{Verbatim}[fontsize=\scriptsize] - -repeat - local v1, v2, ... vx -- variables used to store subpatterns - let v1 = - if ... then x1; break end - if ... then x2; break end - ... - if ... then xn; break end -until true -\end{Verbatim} - -First, we add final \verb|break| statements where required, and we -compile all (pattern, block) pairs: - -\begin{Verbatim}[fontsize=\scriptsize] - - ------------------------------------------------------------------- - -- parse all [pattern ==> block] pairs. Result goes in [body]. - ------------------------------------------------------------------- - local body = { } - for _, case in ipairs (cases) do - acc = { } -- reset the accumulator - parse_pattern (1, case[1], var(1)) -- fill [acc] with conds and lets - local last_stat = case[2][#case[2]] - if last_stat and last_stat.tag ~= "Break" and - last_stat.tag ~= "Return" then - table.insert (case[2], `Break) -- to skip other cases on success - end - local compiled_case = collapse (1, case[2]) - for _, x in ipairs (compiled_case) do table.insert (body, x) end - end -\end{Verbatim} - -\noindent Then, we can just splice it into the appropriate quote: - -\begin{Verbatim}[fontsize=\scriptsize] - - ------------------------------------------------------------------- - local local_vars = { } - for i = 1, max_n do table.insert (local_vars, var(i)) end - - ------------------------------------------------------------------- - -- cases are put inside a [repeat until true], so that the [break] - -- inserted after the value will jump after the last case on success. - ------------------------------------------------------------------- - local result = +{ stat: - repeat - -{ `Local{ local_vars, { } } } - (-{var(1)}) = -{tested_term} - -{ body } - until true } - return result - ------------------------------------------------------------------- -\end{Verbatim} - -There is one point to notice in this quote: \verb|body| is used where -a statement is expected, although it contains a {\em list} if -statements rather than a single statement. Metalua is designed to -accept this, i.e. if {\tt a, b, c, d} are statements, AST {\tt -`Do\{ a, b, c, d \}} and {\tt`Do\{ a, \{ b, c \}, d\} } are -equivalent. This feature partially replaces Lisp's \verb|@(...)| -operator. - -\subsubsection{Syntax extension} -To use this, we provide a syntax inspired by OCaml\footnote{It is - actually the same syntax as OCaml's, except that we introduced an - explicit {\tt end} terminator, to stay homogeneous with Lua.}: - -\begin{Verbatim}[fontsize=\scriptsize] - -match with - -> block -| -> block - ... -| -> block -end -\end{Verbatim} - -For this, we need to declare new keywords \verb|match|, -\verb|with| and \verb|->|. Then, we build the (pattern, block) parser -with \verb|gg.sequence{ }|, and read a list of them, separated with -``\verb+|+'', thanks to \verb|gg.list{ }|. Moreover, we accept an -optional ``\verb+|+'' before the first case, so that all cases line up -nicely: - -\begin{Verbatim}[fontsize=\scriptsize] - ----------------------------------------------------------------------- -mlp.lexer:add{ "match", "with", "->" } - -mlp.stat:add{ "match", mlp.expr, "with", gg.optkeyword "|", - gg.list{ gg.sequence{ mlp.expr, "->", mlp.block }, - separators = "|", - terminators = "end" }, - "end", - builder = |x| match_parser (x[1], x[3]) } ----------------------------------------------------------------------- -\end{Verbatim} - -\noindent Now, if you try this\ldots\ it won't work! Indeed, Metalua -needs to know what keywords might terminate a block of statements. In -this case, it doesn't know that ``\verb+|+'' can terminate a block. We -need therefore to add the following statement: - -\begin{Verbatim}[fontsize=\scriptsize] - -mlp.block.terminators:add "|" -\end{Verbatim} - -\noindent Finally that's it, we have implemented a working pattern -matching system in 75 lines of code! - -\subsubsection{Possible improvements} -Here are a couple of suggestions to further improve the pattern -matching system presented above. Some of these proposals can be -implemented very quickly, some others more complex; all of them -present some practical interest. - -The code of the basic version, as presented here, is available at -\url{http://metalua.luaforge.net/FIXME}. - -\paragraph{Booleans} Boolean constants aren't handled in the system -above, neither as table keys nor as patterns. They're easy to add, and -doing it will help you get gently into the code. - -\paragraph{Gotos considered beneficial} Gotos might be harmful in -hand-written programs, but they're a bliss for machine-generated -code. They would slightly simplify the code of pattern matching as -presented above; but for many extension proposals listed below, they -will make reasonnably easy some things which would otherwise be -awfully contrived. Exercice: simplify the implementation above as much -as possible by using gotos. - -Labels and gotos in metalua ASTs are represented as {\tt`Label\{ id - \}} and {\tt`Goto\{ id \}} respectively, with {\tt id} an -identifier, typically generated by {\tt mlp.gensym()}. It is always -safe to jump out of a block; jumping into a block is not guaranteed -against weird interactions with local variables and upvalues. - -\paragraph{{\tt collapse()} optimization} Instead of nesting if -statements systematically, two nested {\tt if}s without {\tt else} -branches can be simplified in a single branch with an {\tt and} -operator. Not sure it would change the bytecode's efficiency, but -that's a good exercice of AST manipulation. - -\paragraph{Superfluous assignments} When parsing a table entry, we -assign it to a variable, then recursively call {\tt parse\_pattern()} -on it; in the frequent case where the entry was simply a variable, it -re-assigns it again. This could be optimized away. - -\paragraph{Checking table arity} In many cases, it is practical to -check the number of elements in the array-part of the table. Here is a -simple modification proposal: by default, a table pattern matches a -table term only if they have the same number of array-part -elements. However, if the last element of the pattern is {\tt`Dots} -(a.k.a. {\tt+\{...\}}), then the term simply has to have -{\it at least} as many array-part elements as the pattern. - -\paragraph{Adding guards} -It is sometimes desirable to add arbitrary conditions for a pattern to -match, conditions which might no be expressed by a pattern. OCaml -allows to add them with a ``\verb|when|'' keyword: -\begin{Verbatim}[fontsize=\scriptsize] - -match n with -| 0 -> print "zero" -| n when n%2 == 0 -> print "even number" -| _ -> print "odd number" -end -\end{Verbatim} -I'd advise you to prefer {\tt if} as a dedicated keyword, rather than -{\tt when}: it's unambiguous in this context, and saves a keyword -reservation. - -\paragraph{More bindings} -The way pattern matching is currently implemented, one can either bind -a subterm to a variable, or check its structure against a sub-pattern, -not both simultaneously. OCaml provides an ``\verb|as|'' operator, -which allows to do both (Haskell calls it ``\verb|@|''). For instance, -in the following example, any ADT whose tag is \verb|"RepeatMe"| will -be replaced by two occurrences of itself, while others will remain -unchanged: -\begin{Verbatim}[fontsize=\scriptsize] - -match something with -| `RepeatMe{ ... } as r -> { r, r } -| x -> x -end -\end{Verbatim} -``\verb|as|'' will have to be declared as an infix operator, whose -meaning will remain undefined in expressions which are not patterns. - -As an alternative, you can reuse an existing infix operator, thus -avoiding to mess the expression parser. For instance, use {\tt *} -instead of {\tt as}. You can go further, and implement {\tt +} as an -``or'' operator ({\tt pattern1 + pattern2} would match if either -of the patterns matches), although this would significantly complicate -the implementation of {\tt parse\_pattern()}. - -The {\tt+} operator might prove tricky to implement, if you don't -convert your code generator to gotos and labels first. - -\paragraph{Linear bindings} -We should check, when compiling a pattern, that it is left-linear, -i.e. that variables don't appear more than once in the pattern. People -might be tempted to write things like this to check whether a tree is -symmetric: -\begin{Verbatim}[fontsize=\scriptsize] - -match t with -| `Tree{ x, x } -> print "Symmetric!" -| `Tree{ x, y } -> print "Not symmetric" -| `Leaf{ _ } -> print "Symmetric!" -end -\end{Verbatim} -However, this would work in Prolog but not with our pattern matching, -as two occurences of the same variable in the pattern don't cause an -equality test to be added. We should detect such non-linear variables, -and implement a suitable reaction: -\begin{itemize} -\item throw an error, or at least a warning; -\item or add an equality test between the terms matched by the - non-linear variable; -\item or offer a syntax extension that lets the user provide his own - equality predicate. -\end{itemize} - -Notice that the latter choice would drive us towards a Prolog -unification algorithm, which opens interesting opportunities. - -You might offer an exception for variable ``{\tt\_}'', which is often -intended as a dummy, unused variable. Non-linear occurences of it -should probably be silently accepted, without even performing the -corresponding binding. - -\paragraph{Generalized assignments} -Yet another OCaml-inspired feature: assignments such as -``\verb|foo = bar|'', is almost a special -case of pattern matching with only one case: the left-hand side is -the pattern, and the right-hand side is the ``raw'' ``\verb|foo=bar|'' -assignment. Seen this way, it allows to write things such as -``\verb|`If{ cond, block } = some_ast }|'' to assign \verb|cond| and -\verb|block| to the subparts of \verb|some_ast| (if we know that -\verb|some_ast| is the AST of an \verb|if| statement). - -If you go this way, however, make sure that the code generated for -simple {\tt let}s is as efficient as before! Moreover, there is an (easy) -scoping issue: the variables assigned belong to the scope of the -surrounding block. - -\paragraph{Pattern matchings as expressions} -Pattern matching are currently statements, and take statements as -right-hand sides of cases. We could allow pattern matchings where -expressions are expected: these would take expressions instead of -statements as right-hand sides. Two ways to implement this: the dirty -one (hack with functions to change match statements into expressions), -and the clean one (refactoring existing code, so that it is agnostic -about its right-hand side type, and provide two specialized -versions of it for statements and expressions). - -\paragraph{Bootstrap it} -That's something language designers love to do, for largely mystic -reasons: writing a language's compiler in the language itself. Here, -the idea is to re-implement the pattern matching extension by using -pattern matching, and compile it with the older version. Comparing the -firsrt and second versions of the code will give you an idea of how -much code clarification is brought to you by the pattern matching -extension. - -\paragraph{Pattern conjunction} Another feature to take from OCaml is -multiple patterns for a single block. Instead of associating one -block with one pattern, cases associate a block with a (non-empty) -list of patterns. All of these patterns have to bond the same -variables, except for {\tt\_}. The first pattern in the list to match -the tested term does the binding. Patterns are separated by -``\verb+|+''. Example: -\begin{Verbatim}[fontsize=\scriptsize] - -match x with -| 1 | 2 | 3 -> print(x) -| n -> print "more than 3" -end -\end{Verbatim} -(Hint: put the block in a local function. $2^{\mathrm{nd}}$ hint: sort -bound variables, e.g. by lexicographic order. Or much simpler and -more effective: convert your code generator to gotos+labels first). - -\paragraph{XML munching} Ever tried to transform some XML document through -XSLT? Did you feel that this was even more kludgy than XML itself? Here -is a challenging proposal: -\begin{itemize} -\item Realize, if you didn't already, that Metalua's ADT are - isomorphic to XML, if you identify string-keys in tables with - attributes, and limit there content to strings and number. For - instance, ``{\tt eek}'' easily maps to ``{\tt - `foo\{ bar=3, `baz, "eek" \}}''; -\item compare what ML-style pattern matching does with what XSLT - does (and with what you'd like it to do); -\item design, implement, publish. You might want to google - ``CDuce''\footnote{\url{http://www.cduce.org}} for neat ideas. -\end{itemize} - -If you do this, I'd be really interested to put back your contribution -in the next version of Metalua! - -\subsubsection{Correction} -Most of the improvements proposed here are actually implemented in the -{\tt match} library provided with metalua. Check its (commented) -sources! +\subsection{Structural pattern matching} + +FIXME: refer to the official match extension instead of re-explaining +what pattern matching is about. + +\subsubsection{Basic principles} +In many languages, including Lua, ``pattern matching'' generally +refers to the ability to: +\begin{itemize} +\item Analyse the general shape of a string; +\item capture specified parts of it into temporary variables +\item do some computation when the string matches a certain pattern, + generally using the temporary captured variables. +\end{itemize} + +In languages derived from ML\footnote{for instance OCaml, SML, Haskell + or Erlang.}, pattern matching can be done on arbitrary data, not +only strings. One can write patterns which describe the shape of a +data structure, bind some interesting sub-parts of it to variables, +and execute some statements associated with a given pattern whenever +the data matches. + +This sample aims to implement this capability into metalua. It will +discuss: +\begin{itemize} +\item How to describe data patterns (it turns out that we'll hijack + metalua's expression parser, which is a great time-saving trick when + it can be pulled). +\item How to compile them into equivalent code. +\item How to wrap all this up into a working compiled statement +\item What improvements can be done to the proposed design. +\end{itemize} + +\subsubsection{Patterns} +A match statement consists of a tested term, and a series of (pattern, +block) pairs. At execution, the first pair whose pattern accurately +describes the tested term is selected, and its corresponding block is +evaluated. Other blocks are ignored. + +We will keep the implementation of patterns as simple as possible. To +do that, we will put a first constraint: the AST of a pattern must be +a legal expression AST. This way, we save most of the syntax handling +trouble. More specifically: + +\begin{itemize} +\item numbers are valid patterns, which only match identical numbers; +\item strings are valid patterns, which only match identical strings; +\item tables are valid patterns if all of their keys are integers or + strings, and all of their values are valid patterns. A table pattern + matches a tested term if: + \begin{itemize} + \item the tested term is a table; + \item every key that appears in the pattern also appears in the + tested term; + \item for every key, the value associated to this key in the tested + term is matched by the sub-pattern associated to this key in the + pattern; + \end{itemize} +\item variables are valid patterns, and match everything. Moreover, + the term matched by the variable captures it, i.e. in the + corresponding block, that variable is set to the + matched term. +\end{itemize} + +Notice that since \verb|tag| is a regular field in metalua (albeit +with an optional dedicated syntax), it doesn't need a special case in +our pattern definition. + +\paragraph{Example} Let's consider implementing an evaluator for +Metalua expressions. We will restrict ourselves to binary operators, +variables and numbers; we will also consider that we have a +\verb|values| table mapping variable names to their value, and +\verb|binopfuncs| which associate an operator name with the corresponding +function. The evaluator would be written: + +\begin{Verbatim}[fontsize=\scriptsize] + +function eval(t) + match t with + | `Op{ op, a, b } -> return binopfuncs[op](eval(a), eval(b)) + | `Number{ n } -> return n + | `Variable{ v } -> return values[v] + end +end +\end{Verbatim} + +\subsubsection{Pattern compilation} + +A pattern in a case will translate into: +\begin{itemize} +\item tests: testing that the type of the tested case is the same as + the pattern's type for numbers, strings and tables; +\item local declarations: a variable must be bound to the + tested term's value; +\item nested tests for every pattern key/value pair, when the pattern + is a table. Moreover, since there might be multiple tests to run + against the tested term's field, it should be put in a temporary + variable. +\end{itemize} + +For instance, consider the following pattern: + +\begin{Verbatim}[fontsize=\scriptsize] + +{ tag = "Foo", 10, { 20 }, { x = a }, b } +\end{Verbatim} + +It corresponds to the series of tests and assignments on the left: + +\begin{minipage}{6cm} +\begin{Verbatim}[fontsize=\scriptsize] + +let v1 = +type(v1) == "table" +let v2 = v1.tag +v2 ~= nil +type(v2) == "string" +v2 == "Foo" +let v2 = v1[1] +v2 ~= nil +type(v2) == "number" +v2 == 10 +let v2 = v1[2] +v2 ~= nil +type(v2) == "table" +let v3 = v2[1] +v3 ~= nil +type(v3) == "number" +v3 == 20 +let v2 = v1[3] +v2 ~= nil +type(v2) == "table" +let v3 = v2.x +v3 ~= nil +local a = v3 +let v2 = v1[4] +v2 ~= nil +local b = v2 + + +\end{Verbatim} +\end{minipage} +\begin{minipage}{6cm} +\begin{Verbatim}[fontsize=\scriptsize] + +let v1 = tested_term +if type(v1) == "table" then + let v2 = v1.tag + if v2 ~= nil then + if type(v2) == "string" then + if v2 == "Foo" then + let v2 = v1[1] + if v2 ~= nil then + if type(v2) == "number" then + if v2 == 10 then + let v2 = v1[2] + if v2 ~= nil then + if type(v2) == "table" then + let v3 = v2[1] + if v3 ~= nil then + if type(v3) == "number" then + if v3 == 20 then + let v2 = v1[3] + if v2 ~= nil then + if type(v2) == "table" then + let v3 = v2.x + if v3 ~= nil then + local a = v3 + let v2 = v1[4] + if v2 ~= nil then + local b = v2 + +end ... end -- (16 times) +\end{Verbatim} +\end{minipage} +~\\~\\ + +Notice that the relative order of tests and assignments is meaningful: +we cannot put all assignments on one side, and all tests on an +other, e.g. \verb|v2 = v1.tag| on line 3 doesn't make sense if +\verb|type(v1) == table| on line 2 fails. + +We will compile patterns in several steps: first, accumulate all the +tests and assignments in order; then, we will collapse them in a +single big nesting of ``if'' statements. At first, we won't try +to optimize the form of the final term. The list above left would be +collapsed into the single compound statement above on the right. + +\paragraph{Accumulating constraints and tests} +This is done by a \verb|parse_pattern()| function, which does just what +is described in the bullet list above: + +\begin{Verbatim}[fontsize=\scriptsize] + + ------------------------------------------------------------------- + -- Turn a pattern into a list of conditions and assignations, + -- stored into [acc]. [n] is the depth of the subpattern into the + -- toplevel pattern; [tested_term] is the AST of the term to be + -- tested; [pattern] is the AST of a pattern, or a subtree of that + -- pattern when [n>0]. + ------------------------------------------------------------------- + local function parse_pattern (n, pattern) + local v = var(n) + if "Number" == pattern.tag or "String" == pattern.tag then + accumulate (+{ -{v} == -{pattern} }) + elseif "Id" == pattern.tag then + accumulate (+{stat: local -{pattern} = -{v} }) + elseif "Table" == pattern.tag then + accumulate (+{ type( -{v} ) == "table" } ) + local idx = 1 + for _, x in ipairs (pattern) do + local w = var(n+1) + local key, sub_pattern + if x.tag=="Key" + then key = x[1]; sub_pattern = x[2] + else key = `Number{ idx }; sub_pattern = x; idx=idx+1 end + accumulate (+{stat: (-{w}) = -{v} [-{key}] }) + accumulate (+{ -{w} ~= nil }) + parse_pattern (n+1, sub_pattern) + end + else error "Invalid pattern type" end + end + ------------------------------------------------------------------- +\end{Verbatim} + +This function relies on the following helper functions: +\begin{itemize} +\item {\tt var($n$)} generates the variable name ``\verb|v|$n$'', which + is used to store the tested term at depth level $n$. Indeed, + sub-patterns in table fields are matched against sub-terms of the + tested term. It also remembers of the biggest $n$ it ever received, + and stores it into \verb|max_n| (this will be used to know which + local vars have to be generated, see below); +\item {\tt accumulate()} just stores an additional code snippet in a + dedicated list; +\end{itemize} + +In the quotes, you might notice the parentheses around the variable in +``\verb|(-{w}) = -{v} [-{key}]|'': they're here to let the compiler +know that what's in \verb|-{...}| is an expression, not a +statement. Without them, it would expect {\tt w} to be the AST of a +statement (since we're in a statement context at this point), then +choke on the unexpected ``='' symbol. This is a common idiom, which +you should think about everytime you generate an assignment to an +anti-quoted identifier. + +\paragraph{Collapsing the accumulated quotes} +As written above, our collapsing function will be kept as simple +as possible, and will not try to minimize the amount of generated +code. It takes as parameters {\tt n}, the index of the quote currently +collapsed in the accumulator, and {\tt inner\_term} the statement +block to put inside the innermost part of the test. It calls itself +recursively, so that the collapsed term is built inside out (generally +speaking, working with trees, including AST, involves a lot of +recursive functions). \verb|acc| is the list filled by the +\verb|accumulate()| function: + +\begin{Verbatim}[fontsize=\scriptsize] + + ------------------------------------------------------------------- + -- Turn a list of tests and assignations into [acc] into a + -- single term of nested conditionals and assignments. + -- [inner_term] is the AST of a term to be put into the innermost + -- conditionnal, after all assignments. [n] is the index in [acc] + -- of the term currently parsed. + -- + -- This is a recursive function, which builds the inner part of + -- the statement first, then surrounds it with nested + -- [if ... then ... end], [local ... = ...] and [let ... = ...] + -- statements. + ------------------------------------------------------------------- + local function collapse (n, inner_term) + assert (not inner_term.tag, "collapse inner term must be a block") + if n > #acc then return inner_term end + local it = acc[n] + local inside = collapse (n+1, inner_term) + assert (not inside.tag, "collapse must produce a block") + if "Op" == it.tag then + -- [it] is a test, put it in an [if ... then .. end] statement + return +{block: if -{it} then -{inside} end } + else + -- [it] is a statement, just add it at the result's beginning. + assert ("Let" == it.tag or "Local" == it.tag) + return { it, unpack (inside) } + end + end +\end{Verbatim} + +To fully understand this function, one must remember that test +operations are translated into {\tt`Op\{ , , \}} +AST. That's why we didn't have to put an explicit reminder in the +accumulator, remembering whether a quote was a test or a statement: +we'll just check for \verb|`Op|'s presence. + +\subsubsection{Match statement compilation} +We already know how to translate a single pattern into a Lua test +statement. This statement will execute a given block, with all pattern +variables bound appropriately, if and only if the tested term matches +the pattern. + +To build a complete match statement, we need to test all patterns in +sequence. When one of them succeeds, we must skip all the following +cases. We could do that with some complex ``\verb|else|'' parts in the +tests, but there is a simpler way to jump out of some nested blocks: +the ``\verb|break|'' statement. Therefore, we will enclose the cases +into a loop that executes exactly once, so that a ``\verb|break|'' +will jump just after the last case. Then, we will add a +``\verb|break|'' at the end of every statement that doesn't already +finish with a ``\verb|return|'', so that other cases are skipped upon +successful matching. To sum up, we will translate this: + +\begin{Verbatim}[fontsize=\scriptsize] + +match with +| -> +| -> + ... +| -> +end +\end{Verbatim} + +\noindent into this: + +\begin{Verbatim}[fontsize=\scriptsize] + +repeat + local v1, v2, ... vx -- variables used to store subpatterns + let v1 = + if ... then x1; break end + if ... then x2; break end + ... + if ... then xn; break end +until true +\end{Verbatim} + +First, we add final \verb|break| statements where required, and we +compile all (pattern, block) pairs: + +\begin{Verbatim}[fontsize=\scriptsize] + + ------------------------------------------------------------------- + -- parse all [pattern ==> block] pairs. Result goes in [body]. + ------------------------------------------------------------------- + local body = { } + for _, case in ipairs (cases) do + acc = { } -- reset the accumulator + parse_pattern (1, case[1], var(1)) -- fill [acc] with conds and lets + local last_stat = case[2][#case[2]] + if last_stat and last_stat.tag ~= "Break" and + last_stat.tag ~= "Return" then + table.insert (case[2], `Break) -- to skip other cases on success + end + local compiled_case = collapse (1, case[2]) + for _, x in ipairs (compiled_case) do table.insert (body, x) end + end +\end{Verbatim} + +\noindent Then, we can just splice it into the appropriate quote: + +\begin{Verbatim}[fontsize=\scriptsize] + + ------------------------------------------------------------------- + local local_vars = { } + for i = 1, max_n do table.insert (local_vars, var(i)) end + + ------------------------------------------------------------------- + -- cases are put inside a [repeat until true], so that the [break] + -- inserted after the value will jump after the last case on success. + ------------------------------------------------------------------- + local result = +{ stat: + repeat + -{ `Local{ local_vars, { } } } + (-{var(1)}) = -{tested_term} + -{ body } + until true } + return result + ------------------------------------------------------------------- +\end{Verbatim} + +There is one point to notice in this quote: \verb|body| is used where +a statement is expected, although it contains a {\em list} if +statements rather than a single statement. Metalua is designed to +accept this, i.e. if {\tt a, b, c, d} are statements, AST {\tt +`Do\{ a, b, c, d \}} and {\tt`Do\{ a, \{ b, c \}, d\} } are +equivalent. This feature partially replaces Lisp's \verb|@(...)| +operator. + +\subsubsection{Syntax extension} +To use this, we provide a syntax inspired by OCaml\footnote{It is + actually the same syntax as OCaml's, except that we introduced an + explicit {\tt end} terminator, to stay homogeneous with Lua.}: + +\begin{Verbatim}[fontsize=\scriptsize] + +match with + -> block +| -> block + ... +| -> block +end +\end{Verbatim} + +For this, we need to declare new keywords \verb|match|, +\verb|with| and \verb|->|. Then, we build the (pattern, block) parser +with \verb|gg.sequence{ }|, and read a list of them, separated with +``\verb+|+'', thanks to \verb|gg.list{ }|. Moreover, we accept an +optional ``\verb+|+'' before the first case, so that all cases line up +nicely: + +\begin{Verbatim}[fontsize=\scriptsize] + +---------------------------------------------------------------------- +mlp.lexer:add{ "match", "with", "->" } + +mlp.stat:add{ "match", mlp.expr, "with", gg.optkeyword "|", + gg.list{ gg.sequence{ mlp.expr, "->", mlp.block }, + separators = "|", + terminators = "end" }, + "end", + builder = |x| match_parser (x[1], x[3]) } +---------------------------------------------------------------------- +\end{Verbatim} + +\noindent Now, if you try this\ldots\ it won't work! Indeed, Metalua +needs to know what keywords might terminate a block of statements. In +this case, it doesn't know that ``\verb+|+'' can terminate a block. We +need therefore to add the following statement: + +\begin{Verbatim}[fontsize=\scriptsize] + +mlp.block.terminators:add "|" +\end{Verbatim} + +\noindent Finally that's it, we have implemented a working pattern +matching system in 75 lines of code! + +\subsubsection{Possible improvements} +Here are a couple of suggestions to further improve the pattern +matching system presented above. Some of these proposals can be +implemented very quickly, some others more complex; all of them +present some practical interest. + +The code of the basic version, as presented here, is available at +\url{http://metalua.luaforge.net/FIXME}. + +\paragraph{Booleans} Boolean constants aren't handled in the system +above, neither as table keys nor as patterns. They're easy to add, and +doing it will help you get gently into the code. + +\paragraph{Gotos considered beneficial} Gotos might be harmful in +hand-written programs, but they're a bliss for machine-generated +code. They would slightly simplify the code of pattern matching as +presented above; but for many extension proposals listed below, they +will make reasonnably easy some things which would otherwise be +awfully contrived. Exercice: simplify the implementation above as much +as possible by using gotos. + +Labels and gotos in metalua ASTs are represented as {\tt`Label\{ id + \}} and {\tt`Goto\{ id \}} respectively, with {\tt id} an +identifier, typically generated by {\tt mlp.gensym()}. It is always +safe to jump out of a block; jumping into a block is not guaranteed +against weird interactions with local variables and upvalues. + +\paragraph{{\tt collapse()} optimization} Instead of nesting if +statements systematically, two nested {\tt if}s without {\tt else} +branches can be simplified in a single branch with an {\tt and} +operator. Not sure it would change the bytecode's efficiency, but +that's a good exercice of AST manipulation. + +\paragraph{Superfluous assignments} When parsing a table entry, we +assign it to a variable, then recursively call {\tt parse\_pattern()} +on it; in the frequent case where the entry was simply a variable, it +re-assigns it again. This could be optimized away. + +\paragraph{Checking table arity} In many cases, it is practical to +check the number of elements in the array-part of the table. Here is a +simple modification proposal: by default, a table pattern matches a +table term only if they have the same number of array-part +elements. However, if the last element of the pattern is {\tt`Dots} +(a.k.a. {\tt+\{...\}}), then the term simply has to have +{\it at least} as many array-part elements as the pattern. + +\paragraph{Adding guards} +It is sometimes desirable to add arbitrary conditions for a pattern to +match, conditions which might no be expressed by a pattern. OCaml +allows to add them with a ``\verb|when|'' keyword: +\begin{Verbatim}[fontsize=\scriptsize] + +match n with +| 0 -> print "zero" +| n when n%2 == 0 -> print "even number" +| _ -> print "odd number" +end +\end{Verbatim} +I'd advise you to prefer {\tt if} as a dedicated keyword, rather than +{\tt when}: it's unambiguous in this context, and saves a keyword +reservation. + +\paragraph{More bindings} +The way pattern matching is currently implemented, one can either bind +a subterm to a variable, or check its structure against a sub-pattern, +not both simultaneously. OCaml provides an ``\verb|as|'' operator, +which allows to do both (Haskell calls it ``\verb|@|''). For instance, +in the following example, any ADT whose tag is \verb|"RepeatMe"| will +be replaced by two occurrences of itself, while others will remain +unchanged: +\begin{Verbatim}[fontsize=\scriptsize] + +match something with +| `RepeatMe{ ... } as r -> { r, r } +| x -> x +end +\end{Verbatim} +``\verb|as|'' will have to be declared as an infix operator, whose +meaning will remain undefined in expressions which are not patterns. + +As an alternative, you can reuse an existing infix operator, thus +avoiding to mess the expression parser. For instance, use {\tt *} +instead of {\tt as}. You can go further, and implement {\tt +} as an +``or'' operator ({\tt pattern1 + pattern2} would match if either +of the patterns matches), although this would significantly complicate +the implementation of {\tt parse\_pattern()}. + +The {\tt+} operator might prove tricky to implement, if you don't +convert your code generator to gotos and labels first. + +\paragraph{Linear bindings} +We should check, when compiling a pattern, that it is left-linear, +i.e. that variables don't appear more than once in the pattern. People +might be tempted to write things like this to check whether a tree is +symmetric: +\begin{Verbatim}[fontsize=\scriptsize] + +match t with +| `Tree{ x, x } -> print "Symmetric!" +| `Tree{ x, y } -> print "Not symmetric" +| `Leaf{ _ } -> print "Symmetric!" +end +\end{Verbatim} +However, this would work in Prolog but not with our pattern matching, +as two occurences of the same variable in the pattern don't cause an +equality test to be added. We should detect such non-linear variables, +and implement a suitable reaction: +\begin{itemize} +\item throw an error, or at least a warning; +\item or add an equality test between the terms matched by the + non-linear variable; +\item or offer a syntax extension that lets the user provide his own + equality predicate. +\end{itemize} + +Notice that the latter choice would drive us towards a Prolog +unification algorithm, which opens interesting opportunities. + +You might offer an exception for variable ``{\tt\_}'', which is often +intended as a dummy, unused variable. Non-linear occurences of it +should probably be silently accepted, without even performing the +corresponding binding. + +\paragraph{Generalized assignments} +Yet another OCaml-inspired feature: assignments such as +``\verb|foo = bar|'', is almost a special +case of pattern matching with only one case: the left-hand side is +the pattern, and the right-hand side is the ``raw'' ``\verb|foo=bar|'' +assignment. Seen this way, it allows to write things such as +``\verb|`If{ cond, block } = some_ast }|'' to assign \verb|cond| and +\verb|block| to the subparts of \verb|some_ast| (if we know that +\verb|some_ast| is the AST of an \verb|if| statement). + +If you go this way, however, make sure that the code generated for +simple {\tt let}s is as efficient as before! Moreover, there is an (easy) +scoping issue: the variables assigned belong to the scope of the +surrounding block. + +\paragraph{Pattern matchings as expressions} +Pattern matching are currently statements, and take statements as +right-hand sides of cases. We could allow pattern matchings where +expressions are expected: these would take expressions instead of +statements as right-hand sides. Two ways to implement this: the dirty +one (hack with functions to change match statements into expressions), +and the clean one (refactoring existing code, so that it is agnostic +about its right-hand side type, and provide two specialized +versions of it for statements and expressions). + +\paragraph{Bootstrap it} +That's something language designers love to do, for largely mystic +reasons: writing a language's compiler in the language itself. Here, +the idea is to re-implement the pattern matching extension by using +pattern matching, and compile it with the older version. Comparing the +firsrt and second versions of the code will give you an idea of how +much code clarification is brought to you by the pattern matching +extension. + +\paragraph{Pattern conjunction} Another feature to take from OCaml is +multiple patterns for a single block. Instead of associating one +block with one pattern, cases associate a block with a (non-empty) +list of patterns. All of these patterns have to bond the same +variables, except for {\tt\_}. The first pattern in the list to match +the tested term does the binding. Patterns are separated by +``\verb+|+''. Example: +\begin{Verbatim}[fontsize=\scriptsize] + +match x with +| 1 | 2 | 3 -> print(x) +| n -> print "more than 3" +end +\end{Verbatim} +(Hint: put the block in a local function. $2^{\mathrm{nd}}$ hint: sort +bound variables, e.g. by lexicographic order. Or much simpler and +more effective: convert your code generator to gotos+labels first). + +\paragraph{XML munching} Ever tried to transform some XML document through +XSLT? Did you feel that this was even more kludgy than XML itself? Here +is a challenging proposal: +\begin{itemize} +\item Realize, if you didn't already, that Metalua's ADT are + isomorphic to XML, if you identify string-keys in tables with + attributes, and limit there content to strings and number. For + instance, ``{\tt eek}'' easily maps to ``{\tt + `foo\{ bar=3, `baz, "eek" \}}''; +\item compare what ML-style pattern matching does with what XSLT + does (and with what you'd like it to do); +\item design, implement, publish. You might want to google + ``CDuce''\footnote{\url{http://www.cduce.org}} for neat ideas. +\end{itemize} + +If you do this, I'd be really interested to put back your contribution +in the next version of Metalua! + +\subsubsection{Correction} +Most of the improvements proposed here are actually implemented in the +{\tt match} library provided with metalua. Check its (commented) +sources! diff --git a/doc/manual/springs-ref.tex b/doc/manual/springs-ref.tex index 01c62e9..9b26635 100644 --- a/doc/manual/springs-ref.tex +++ b/doc/manual/springs-ref.tex @@ -1,42 +1,42 @@ -\section{{\tt springs}: separate universes for Lua} - -\subsection{Origins and purpose} -Springs (Serialization through Pluto for RINGS) is an extension of Lua Rings and -Pluto: Lua Rings allow to create new Lua states from within Lua, but offers -limited communication between them: a master universe can only send instruction -to a slave universe through a ``{\tt dostring}'', and the slave universe can -only send back strings, integers and booleans as results. Since Pluto allows to -serialize pretty much any Lua value as a string, it's used to create powerful -bidirectional communications between universes. - -Springs is used internally by metalua to prevent different files' compile time -actions to interfere with each other: each file is compiled on a fresh clean -single-use slate. - -The underlying projects can be found on the web: -\begin{itemize} -\item \verb|| -\item \verb|| -\end{itemize} -Notice however that the Pluto version used in metalua has significantly patched -and debugged by Ivko Stanilov. - -\subsection{API} -Go to Lua Rings web site for a reference on its original API. This API is -extended by spring with: -\begin{itemize} -\item function {\tt springs.new()} which creates a new universe ready for Pluto - communication; -\item ({\tt:dostring()} works as usual) -\item {\tt :pcall(f, arg1, ..., argn)} works as standard function pcall(), - except that execution occurs in the sub-state. Arguments are passed and - results are returned transparently acrosse universes. Moreover, 'f' can also - be a string, rather than a function. If it's a string, it must eval to a - function in the substate's context. This allows to pass standard functions - easily. For instance:\\ - \verb|r:pcall('table.concat', {'a', 'b', 'c'}, ',')| -\item {\tt :call()} is similar to :pcall(), except that in case of error, it - actually throws the error in the sender universe's context. Therefore, it - doesn't return a success status as does pcall(). For instance: \\ - \verb|assert('xxx' == r:call('string.rep', 'x', 3))| -\end{itemize} +\section{{\tt springs}: separate universes for Lua} + +\subsection{Origins and purpose} +Springs (Serialization through Pluto for RINGS) is an extension of Lua Rings and +Pluto: Lua Rings allow to create new Lua states from within Lua, but offers +limited communication between them: a master universe can only send instruction +to a slave universe through a ``{\tt dostring}'', and the slave universe can +only send back strings, integers and booleans as results. Since Pluto allows to +serialize pretty much any Lua value as a string, it's used to create powerful +bidirectional communications between universes. + +Springs is used internally by metalua to prevent different files' compile time +actions to interfere with each other: each file is compiled on a fresh clean +single-use slate. + +The underlying projects can be found on the web: +\begin{itemize} +\item \verb|| +\item \verb|| +\end{itemize} +Notice however that the Pluto version used in metalua has significantly patched +and debugged by Ivko Stanilov. + +\subsection{API} +Go to Lua Rings web site for a reference on its original API. This API is +extended by spring with: +\begin{itemize} +\item function {\tt springs.new()} which creates a new universe ready for Pluto + communication; +\item ({\tt:dostring()} works as usual) +\item {\tt :pcall(f, arg1, ..., argn)} works as standard function pcall(), + except that execution occurs in the sub-state. Arguments are passed and + results are returned transparently acrosse universes. Moreover, 'f' can also + be a string, rather than a function. If it's a string, it must eval to a + function in the substate's context. This allows to pass standard functions + easily. For instance:\\ + \verb|r:pcall('table.concat', {'a', 'b', 'c'}, ',')| +\item {\tt :call()} is similar to :pcall(), except that in case of error, it + actually throws the error in the sender universe's context. Therefore, it + doesn't return a success status as does pcall(). For instance: \\ + \verb|assert('xxx' == r:call('string.rep', 'x', 3))| +\end{itemize} diff --git a/doc/manual/trycatch-ref.tex b/doc/manual/trycatch-ref.tex index dcdb9fb..32375ea 100644 --- a/doc/manual/trycatch-ref.tex +++ b/doc/manual/trycatch-ref.tex @@ -1,87 +1,87 @@ -\section{Extension {\tt trywith}: exceptions and finalization} -Lua offers error handling primitives \verb+pcall()+ and -\veb+xpcall()+. However, they are pretty low level, and their syntax -is cumbersome to use and read back. This extension offers a proper -syntax for the handling of such exceptions. - -\subsection{Syntax} -An error handling statement has the following form: - -\begin{verbatim} -try - -catch then - -catch then - - ... -catch then - -finally - -end -\end{verbatim} - -\subsection{Semantics} -When such a statement is executed: -\begin{itemize} -\item the protected code is executed -\item if its execution causes an error, the error message is matched - against all exception patterns, until one matches or the last one is - reached (see the \verb+match+ extension for patterns - semantics). Patterns can include guard clauses. The block - corresponding to the first matching pattern is executed. If no - pattern matches, the error will be rethrown. -\item the block following the \verb+finally+ keyword will be executed - no matter what: if the protected block executes succesfully, if it - raises an error, if it causes a \verb+return+, if an error occurs in - an exception handling block, whether an error is caught or - not... The only reason why the finalization block might not be run - is because of a sudden death of the process (call to {\tt os.exit()} - or core dump). -\end{itemize} - -The finally block can be omitted, or there can be no error catching -case. The following examples are legal: - -\begin{verbatim} -try - f = io.open ('file.txt', 'r') - num_char = #f:read '*a' -finally - f:close() -end - -try - do_stuff() -catch "mismatch" then - print "match statement failure" -catch "[Ee]of"/_ then -- regexp pattern - print "An end-of-file error seems to have happened" -catch x if type(x)=='table' then - print "The error is a table, not a string!" -end -\end{verbatim} - - \subsection{RAII} - RAII, or ``Resource Acquisition Is Initialization'', is a common - programming pattern in which an object's lifetime is strictly - associated with a given lexical scope. For instance, if a file is - opened in a given scope, it must be closed as soon as this scope is - leaved, even if it's leaved due to a {\tt return} or an error. The - ``finally'' block allows this, but since it's a very common use case, - there is a dedicated extension ``with/do'': you initialize some resource - behind the ``with'' keyword, and it will be closed after the ``do'' - block is left. The only constraint is that the resources must have a - {\tt:close()} method which releases them. Here is a usage example: - -\begin{verbatim} --{ extension 'withdo' } - -with f1, f2 = io.open 'file1.txt', io.open 'file2.txt' do - local t1 = f1:read '*a' - local t2 = f2:read '*a' - printf("The files contain %i and %i chars respectively", t1, t2) -end -\end{verbatim} - +\section{Extension {\tt trywith}: exceptions and finalization} +Lua offers error handling primitives \verb+pcall()+ and +\veb+xpcall()+. However, they are pretty low level, and their syntax +is cumbersome to use and read back. This extension offers a proper +syntax for the handling of such exceptions. + +\subsection{Syntax} +An error handling statement has the following form: + +\begin{verbatim} +try + +catch then + +catch then + + ... +catch then + +finally + +end +\end{verbatim} + +\subsection{Semantics} +When such a statement is executed: +\begin{itemize} +\item the protected code is executed +\item if its execution causes an error, the error message is matched + against all exception patterns, until one matches or the last one is + reached (see the \verb+match+ extension for patterns + semantics). Patterns can include guard clauses. The block + corresponding to the first matching pattern is executed. If no + pattern matches, the error will be rethrown. +\item the block following the \verb+finally+ keyword will be executed + no matter what: if the protected block executes succesfully, if it + raises an error, if it causes a \verb+return+, if an error occurs in + an exception handling block, whether an error is caught or + not... The only reason why the finalization block might not be run + is because of a sudden death of the process (call to {\tt os.exit()} + or core dump). +\end{itemize} + +The finally block can be omitted, or there can be no error catching +case. The following examples are legal: + +\begin{verbatim} +try + f = io.open ('file.txt', 'r') + num_char = #f:read '*a' +finally + f:close() +end + +try + do_stuff() +catch "mismatch" then + print "match statement failure" +catch "[Ee]of"/_ then -- regexp pattern + print "An end-of-file error seems to have happened" +catch x if type(x)=='table' then + print "The error is a table, not a string!" +end +\end{verbatim} + + \subsection{RAII} + RAII, or ``Resource Acquisition Is Initialization'', is a common + programming pattern in which an object's lifetime is strictly + associated with a given lexical scope. For instance, if a file is + opened in a given scope, it must be closed as soon as this scope is + leaved, even if it's leaved due to a {\tt return} or an error. The + ``finally'' block allows this, but since it's a very common use case, + there is a dedicated extension ``with/do'': you initialize some resource + behind the ``with'' keyword, and it will be closed after the ``do'' + block is left. The only constraint is that the resources must have a + {\tt:close()} method which releases them. Here is a usage example: + +\begin{verbatim} +-{ extension 'withdo' } + +with f1, f2 = io.open 'file1.txt', io.open 'file2.txt' do + local t1 = f1:read '*a' + local t2 = f2:read '*a' + printf("The files contain %i and %i chars respectively", t1, t2) +end +\end{verbatim} + diff --git a/doc/manual/walk-ref.tex b/doc/manual/walk-ref.tex index d736231..c76a7ea 100644 --- a/doc/manual/walk-ref.tex +++ b/doc/manual/walk-ref.tex @@ -1,494 +1,494 @@ -\section{{\tt walk}, the code walker} - -When you write advanced macros, or when you're analyzing some code to check for -some property, you often need to design a function that walks through an -arbitrary piece of code, and does complex stuff on it. Such a function is called -a code walker. Code walkers can be used for some punctual adjustments, e.g. -changing a function's {\tt return} statements into something else, or checking -that a loop performs no {\tt break}, up to pretty advanced transformations, such -as CPS transformation (a way to encode full continuations into a language taht -doesn't support them natively; see Paul Graham's On Lisp for an accessible -description of how it works), lazy semantics... - -Anyway, code walkers are tricky to write, can involve a lot of boilerplate code, -and are generally brittle. To ease things as much as possible, Metalua comes -with a walk library, which intends to accelerate code walker implementation. -Since code walking is intrinsically tricky, the lib won't magically make it -trivial, but at least it will save you a lot of time and code, when compared to -writing all walkers from scratch. Moreover, other people who took the time to -learn the walker generator's API will enter into your code much faster. - -\subsection{Principles} - -Code walking is about traversing a tree, first from root to leaves, then from -leaves back to the root. This tree is not uniform: some nodes are expressions, -some others statements, some others blocks; and each of these node kinds is -subdivided in several sub-cases (addition, numeric for loop...). The basic code -walker just goes from root to leaves and back to root without doing anything. -Then it's up to you to plug some action callbacks in that walker, so that it -does interesting things for you. - -Without entering into the details of AST structure, here is a simplified version -of the walking algorithm, pretending to work on a generic tree: - -\begin{verbatim} -function traverse(node) - local down_result = down(node) - if down_result ~= 'break' then - for c in children(node) do - traverse(node) - end - end - up(node) -end -\end{verbatim} - -The algorithm behind 'walk' is almost as simple as the one above, except that -it's specialized for AST trees. You can essentially specialize it by providing -your own up() and down() functions. These visitor functions perform whatever -action you want on the AST; moreover, down() has the option to return 'break': -in that case, the sub-nodes are not traversed. It allows the user to shun parts -of the tree, or to provide his own special traversal method in the down() -visitor. - -The real walker generator is only marginally more complex than that: -\begin{itemize} -\item It lets you define your up() and down(), and down() can return 'break' to - cut the tree traversal; however, these up() and down() functions are - specialized by node kind: there can be separate callbacks for expr nodes, stat - nodes, block nodes. -\item There's also a binder() visitor for identifier binders. Binders are - variables which declare a new local variable; you'll find them in nodes - `Local, `Localrec, `Forin, `Fornum, `Function. The binders are visited just - before the variable's scope begins, i.e. before traversing a loop or a - function's body, after traversing a `Local's right-hand side, before - traversing a `Localrec's right-hand side. \\ - Notice that separate up() and down() visitors wouldn't make sense for - binders, since they're leave nodes. -\item Visitor functions don't only take the visited node as parameter: they also - take the list of all expr, stat and block nodes above it, up to the AST's - root. This allows a child node to act on its parent nodes. -\end{itemize} - -\subsection{API} -There are 3 main tree walkers: {\tt walk.expr()}, {\tt walk.stat()} and {\tt - walk.block()}, to walk through the corresponding kinds of ASTs. Each of these -walker take as parameters a table {\tt cfg} containing the various visitor -functions, and the AST to walk throuhg. the configuration table {\tt cfg} can -contain fields: -\begin{itemize} -\item {\tt cfg.stat.down(node, parent, grandparent...)}, which applies when - traversing a statement down, i.e. before its children nodes are parsed, and - can modify the tree, and return {\tt nil} or {\tt'break'}. The way children - are traversed is decided {\em after} the {\tt down()} visitor has been run: - this point matters when the visitor modifies its children nodes. -\item {\tt cfg.stat.up(node, parent, grandparent...)}, which is applies on the - way back up. It is applied even if {\tt cfg.stat.down()} returned - {\tt'break'}, but in that case, the children have not been (and will not be) - traversed. -\item {\tt cfg.expr.down()} and {\tt cfg.expr.up()}, which work just as their - {\tt stat} equivalent, but apply to expression nodes.\\ - Notice that in Lua, function calls and method invocations can be used as - statements as well as as espressions: in such cases, they are visited only by - the statement visitor, not by the expression visitor. -\item {\tt cfg.block.down()} and {\tt cfg.block.up()} do the same for statements - blocks: loops, conditional and function bodies. -\item {\tt cfg.binder(identifier, id\_parent, id\_grandparent...)}: this - is run on identifiers which create a new local variable, jsut before that - variable's scope begins. -\end{itemize} - -Moreover, there is a {\tt walk.guess(cfg, ast)} walker which tries to guess the -type of the AST it receives, and applies the appropriate walker. When an AST can -be either an expression or a statement (nodes {\tt`Call} and {\tt`Invoke}), it -is interpreted as an expression. - -\subsection{Examples} - -A bit of practice now. Let's build the simplest walker possible, that does -nothing: - -\begin{verbatim} -cfg = { } -walker = |ast| walk.block(cfg, ast) -\end{verbatim} - -Now, let's say we want to catch and remove all statement calls to function -assert(). This can be done by removing its tag and content: an empty list is -simply ignored in an AST. So we're only interested by `Call nodes, and within -these nodes, we want the function to be `Id 'assert'. All of this is only -relevant to stat nodes: - -\begin{verbatim} -function cfg.stat.down (x) - match x with - | `Call{ `Id 'assert', ... } -> x.tag=nil; x <- { } - | _ -> -- not interested by this node, do nothing - end -end -\end{verbatim} - -You'll almost always want to use the 'match' extension to implement visitors. -The imperative table overrider ({\tt x <- y} a.k.a. {\tt table.override(x, y)} -also often comes handy to modify an AST. - -We'll now remove {\tt assert()} calls in non-statement; we cannot replace an -expression by nothing, so we'll replace these nodes by these will simply be -replaced by {\tt nil}: - -\begin{verbatim} -function cfg.expr.down (x) - match x with - | `Call{ `Id 'assert', ... } -> x <- `Nil - | _ -> -- not interested by this node, do nothing - end -end -\end{verbatim} - - -Here's a remark for functional programmers: this API is very imperative; you -might cringe at seeing the `Call nodes transformed in-place. Well, I tend to -agree but it's generally counter-productive to work against the grain of the -wood: Lua is imperative at heart, and design attempts at doing this functionally -sucked more than approaches that embraced imperativeness. - -\paragraph{Cuts} -By making down() return 'break', you can prevent the traversal to go further -down. This might be either because you're not interested by the subtrees, or -because you want to traverse them in a special way. In that later case, just do -the traversal by yourself in the down() function, and cut the walking by -returning 'break', so that nodes aren't re-traversed by the default walking -algorithm. We'll see that in the next, more complex example, listing of free -variables. - -This example is exclusively there for demonstration purposes. For actual work on -identifiers that require awareness of an identifier's binder of freedom, there -is a dedicated {\tt walk.id} library. - -We'll progressively build a walker that gathers all global variables used in a -given AST. This involves keeping, at all times, a set of the identifiers -currently bound by a "local" declaration, by function parameters, as for loop -variables etc. Then, every time an identifier is found in the AST, its presence -is checked in the current set of bound variables. If it isn't in it, then it's a -free (global) identifier. - -The first thing we'll need is a scope handling system: something that keeps -track of what identifiers are currently in scope. It must also allow to save the -current scope (e.g. before we enter a new block) and restore it afterwards (e.g. -after we leave the block). This is quite straightforward and unrelated to code -walking; here is the code: - -\begin{Verbatim}[fontsize=\scriptsize] -require 'std' -require 'walk' - --{ extension 'match' } - --------------------------------------------------------------------------------- --- Scope handling: ':push()' saves the current scope, ':pop()' --- restores the previously saved one. ':add(identifiers_list)' adds --- identifiers to the current scope. Current scope is stored in --- '.current', as a string->boolean hashtable. --------------------------------------------------------------------------------- - -local scope = { } -scope.__index = scope - -function scope:new() - local ret = { current = { } } - ret.stack = { ret.current } - setmetatable (ret, self) - return ret -end - -function scope:push() - table.insert (self.stack, table.shallow_copy (self.current)) -end - -function scope:pop() - self.current = table.remove (self.stack) -end - -function scope:add (vars) - for id in values (vars) do - match id with `Id{ x } -> self.current[x] = true end - end -end -\end{Verbatim} - -(There is an improved version of that class in library {\tt walk.scope}; cf. -its documentation for details). - -Now let's start designing the walker. We'll keep a scope object up to date, as -well as a set of found free identifiers, gathered every time we find an `Id{ } -node. To slightly simplify matter, we'll consider that the AST represent a -block. - -\begin{Verbatim}[fontsize=\scriptsize] -local function fv (term) - local freevars = { } - local scope = scope:new() - local cfg = { expr = { } } - - function cfg.expr.down(x) - match x with - | `Id{ name } -> if not scope.current[name] then freevars[name] = true end - | _ -> -- pass - end - end - - walk.guess(cfg, term) - return freevars -end -\end{Verbatim} - -Since we don't ever add any identifier to the scope, this will just list all the -identifiers used in the AST, bound or not. Now let's start doing more -interesting things: - -\begin{itemize} -\item We'll save the scope's state before entering a new block, and restore it - when we leave it. That will be done by providing functions {\tt cfg.block.down()} - and {\tt cfg.block.up()}. Saving and restoring will be performed by methods - {\tt :push()} and {\tt :pop()}. -\item Whenever we find a {\tt local} declaration, we'll add the list of - identifiers to the current scope, so that they won't be gathered when parsing - the {\tt `Id} expression nodes. Notice that the identifiers declared by the - 'local' statement only start being valid after the statement, so we'll add - them in the {\tt cfg.stat.up()} function rather than {\tt cfg.stat.down()}. -\end{itemize} - -\begin{Verbatim}[fontsize=\scriptsize] -local cfg = { expr = { }, - stat = { }, - block = { } } - [...] - function cfg.stat.up(x) - match x with - | `Local{ vars, ... } -> scope:add(vars) - | _ -> -- pass - end - end - - ----------------------------------------------------------------------------- - -- Create a separate scope for each block, close it when leaving. - ----------------------------------------------------------------------------- - function cfg.block.down() scope:push() end - function cfg.block.up() scope:pop() end -\end{Verbatim} - -This starts to be useful. We can also easily add the case for `Localrec{ } nodes -(the ones generated by {\tt "local function foo() ... end"}), where the variable -is already bound in the {\tt`Localrec} statement's right-hand side; so we do the -same as for {\tt`Local}, but we do it in the {\tt down()} function rather than -in the up() one. - -We'll also take care of {\tt`Function}, {\tt`Forin} and {\tt`Fornum} nodes, -which introduce new bound identifiers as function parameters or loop variables. -This is quite straightforward; the only thing to take care of is to save the -scope before entering the function/loop body (in {\tt down()}), and restore it -when leaving (in {\tt up()}). The code becomes: - - -\begin{Verbatim}[fontsize=\scriptsize] -local function fv (term) - local freevars = { } - local scope = scope:new() - local cfg = { expr = { }, - stat = { }, - block = { } } - - ----------------------------------------------------------------------------- - -- Check identifiers; add functions parameters to newly created scope. - ----------------------------------------------------------------------------- - function cfg.expr.down(x) - match x with - | `Id{ name } -> if not scope.current[name] then freevars[name] = true end - | `Function{ params, _ } -> scope:push(); scope:add (params) - | _ -> -- pass - end - end - - ----------------------------------------------------------------------------- - -- Close the function scope opened by 'down()'. - ----------------------------------------------------------------------------- - function cfg.expr.up(x) - match x with - | `Function{ ... } -> scope:pop() - | _ -> -- pass - end - end - - ----------------------------------------------------------------------------- - -- Create a new scope and register loop variable[s] in it - ----------------------------------------------------------------------------- - function cfg.stat.down(x) - match x with - | `Forin{ vars, ... } -> scope:push(); scope:add(vars) - | `Fornum{ var, ... } -> scope:push(); scope:add{var} - | `Localrec{ vars, ... } -> scope:add(vars) - | `Local{ ... } -> -- pass - | _ -> -- pass - end - end - - ----------------------------------------------------------------------------- - -- Close the scopes opened by 'up()' - ----------------------------------------------------------------------------- - function cfg.stat.up(x) - match x with - | `Forin{ ... } | `Fornum{ ... } -> scope:pop() - | `Local{ vars, ... } -> scope:add(vars) - | _ -> -- pass - end - end - - ----------------------------------------------------------------------------- - -- Create a separate scope for each block, close it when leaving. - ----------------------------------------------------------------------------- - function cfg.block.down() scope:push() end - function cfg.block.up() scope:pop() end - - walk.guess(cfg, term) - return freevars -end -\end{Verbatim} - -This is almost correct now. There's one last tricky point of Lua's semantics -that we need to address: in {\tt repeat foo until bar} loops, "bar" is included -in "foo"'s scope. For instance, if we write {\tt repeat local x=foo() until - x>3}, the "x" in the condition is the local variable "x" declared inside the -body. This violates our way of handling block scopes: the scope must be kept -alive after the block is finished. We'll fix this by providing a custom walking -for the block inside `Repeat, and preventing the normal walking to happen by -returning 'break': - -\begin{Verbatim}[fontsize=\scriptsize] - ----------------------------------------------------------------------------- - -- Create a new scope and register loop variable[s] in it - ----------------------------------------------------------------------------- - function cfg.stat.down(x) - match x with - | `Forin{ vars, ... } -> scope:push(); scope:add(vars) - | `Fornum{ var, ... } -> scope:push(); scope:add{var} - | `Localrec{ vars, ... } -> scope:add(vars) - | `Local{ ... } -> -- pass - | `Repeat{ block, cond } -> -- 'cond' is in the scope of 'block' - scope:push() - for s in values (block) do walk.stat(cfg)(s) end -- no new scope - walk.expr(cfg)(cond) - scope:pop() - return 'break' -- No automatic walking of subparts 'cond' and 'body' - | _ -> -- pass - end - end -\end{Verbatim} - -That's it, we've now got a full free variables lister, and have demonstrated -most APIs offered by the basic 'walk' library. If you want to walk through -identifiers in a scope-aware way, though, you'll want to look at the {\tt - walk.id} library. - -\subsection{Library {\tt walk.id}, the scope-aware walker} - -This library walks AST to gather information about the identifiers in it. It -call distinct visitor functions depending on whether an identifier is bound or -free; moreover, when an identifier is bound, the visitor also receives its -binder node as a parameter. For instance, in {\tt +\{function(x) print(x) - end\}}, the bound identifier walker will be called on the \verb|+{x}| in the -\verb|print| call, and the visitor's second parameter will be the {\tt`Function} -node which created the local variable {\tt x}. - -\paragraph{API} -The library is loaded with \verb|require 'walk.id'|. The walkers provided are: -\begin{itemize} -\item {\tt walk\_id.expr()}; -\item {\tt walk\_id.stat()}; -\item {\tt walk\_id.block()}; -\item {\tt walk\_id.guess()}. -\end{itemize} - -They take the same config tables as regular walkers, except that they also -recognize the following entries: -\begin{itemize} -\item {\tt cfg.id.free(identifier, parent, grandparent...)}, which is run on - free variables; -\item {\tt cfg.id.bound(identifier, binder, parent, grandparent...)}, which is - run on bound variables. The statement or expression which created this bound - veriable's scope is passed as a second parameter, before the parent nodes. -\end{itemize} - -\paragraph{Examples} -Let's rewrite the free variables walker above, with the id walker: - -\begin{Verbatim}[fontsize=\scriptsize] -function fv (term) - local cfg = { id = { } } - local freevars = { } - function cfg.id.free(id) - local id_name = id[1] - freevars[id_name] = true - end - walk_id.guess (cfg, term) - return freevars -end -\end{Verbatim} - -Now, let's $\alpha$-rename all bound variables in a term. This is slightly -trickier than one could think: we need to first walk the whole tree, then -perform all the replacement. If we renamed binders as we went, they would stop -binding their variables, and something messy would happen. For instance, if we -took {\tt +\{function(x) print(x) end\}} and renamed the binder {\tt x} into -{\tt foo}, we'd then start walking the function body on the tree {\tt - +\{function(foo) print(x) end\}}, where {\tt x} isn't bound anymore. - -\begin{Verbatim}[fontsize=\scriptsize] --------------------------------------------------------------------------------- --- bound_vars keeps a binder node -> old_name -> new_name dictionary. --- It allows to associate all instances of an identifier together, --- with the binder that created them --------------------------------------------------------------------------------- -local bound_vars = { } - --------------------------------------------------------------------------------- --- local_renames will keep all identifier nodes to rename as keys, --- and their new name as values. The renaming must happen after --- the whole tree has been visited, in order to avoid breaking captures. --------------------------------------------------------------------------------- -local local_renames = { } - --------------------------------------------------------------------------------- --- Associate a new name in bound_vars when a local variable is created. --------------------------------------------------------------------------------- -function cfg.binder (id, binder) - local old_name = id[1] - local binder_table = bound_vars[binder] - if not binder_table then - -- Create a new entry for this binder: - binder_table = { } - bound_vars[binder] = binder_table - end - local new_name = mlp.gensym(old_name)[1] -- generate a new name - binder_table[old_name] = new_name -- remember name for next instances - local_renames[id] = new_name -- add to the rename todo-list -end - --------------------------------------------------------------------------------- --- Add a bound variable the the rename todo-list --------------------------------------------------------------------------------- -function cfg.id.bound (id, binder) - local old_name = id[1] - local new_name = bound_vars[binder][old_name] - local_renames[id] = new_name -end - --- walk the tree and fill laocal_renames: -walk_id.guess(cfg, ast) - --- perform the renaming actions: -for id, new_name in pairs(local_renames) do id[1] = new_name end -\end{Verbatim} - -\subsection{Library {\tt walk.scope}, the scope helper} -This library allows to easily store data, in an AST walking function, in a scope -aware way. Cf. comments in the sources for details. +\section{{\tt walk}, the code walker} + +When you write advanced macros, or when you're analyzing some code to check for +some property, you often need to design a function that walks through an +arbitrary piece of code, and does complex stuff on it. Such a function is called +a code walker. Code walkers can be used for some punctual adjustments, e.g. +changing a function's {\tt return} statements into something else, or checking +that a loop performs no {\tt break}, up to pretty advanced transformations, such +as CPS transformation (a way to encode full continuations into a language taht +doesn't support them natively; see Paul Graham's On Lisp for an accessible +description of how it works), lazy semantics... + +Anyway, code walkers are tricky to write, can involve a lot of boilerplate code, +and are generally brittle. To ease things as much as possible, Metalua comes +with a walk library, which intends to accelerate code walker implementation. +Since code walking is intrinsically tricky, the lib won't magically make it +trivial, but at least it will save you a lot of time and code, when compared to +writing all walkers from scratch. Moreover, other people who took the time to +learn the walker generator's API will enter into your code much faster. + +\subsection{Principles} + +Code walking is about traversing a tree, first from root to leaves, then from +leaves back to the root. This tree is not uniform: some nodes are expressions, +some others statements, some others blocks; and each of these node kinds is +subdivided in several sub-cases (addition, numeric for loop...). The basic code +walker just goes from root to leaves and back to root without doing anything. +Then it's up to you to plug some action callbacks in that walker, so that it +does interesting things for you. + +Without entering into the details of AST structure, here is a simplified version +of the walking algorithm, pretending to work on a generic tree: + +\begin{verbatim} +function traverse(node) + local down_result = down(node) + if down_result ~= 'break' then + for c in children(node) do + traverse(node) + end + end + up(node) +end +\end{verbatim} + +The algorithm behind 'walk' is almost as simple as the one above, except that +it's specialized for AST trees. You can essentially specialize it by providing +your own up() and down() functions. These visitor functions perform whatever +action you want on the AST; moreover, down() has the option to return 'break': +in that case, the sub-nodes are not traversed. It allows the user to shun parts +of the tree, or to provide his own special traversal method in the down() +visitor. + +The real walker generator is only marginally more complex than that: +\begin{itemize} +\item It lets you define your up() and down(), and down() can return 'break' to + cut the tree traversal; however, these up() and down() functions are + specialized by node kind: there can be separate callbacks for expr nodes, stat + nodes, block nodes. +\item There's also a binder() visitor for identifier binders. Binders are + variables which declare a new local variable; you'll find them in nodes + `Local, `Localrec, `Forin, `Fornum, `Function. The binders are visited just + before the variable's scope begins, i.e. before traversing a loop or a + function's body, after traversing a `Local's right-hand side, before + traversing a `Localrec's right-hand side. \\ + Notice that separate up() and down() visitors wouldn't make sense for + binders, since they're leave nodes. +\item Visitor functions don't only take the visited node as parameter: they also + take the list of all expr, stat and block nodes above it, up to the AST's + root. This allows a child node to act on its parent nodes. +\end{itemize} + +\subsection{API} +There are 3 main tree walkers: {\tt walk.expr()}, {\tt walk.stat()} and {\tt + walk.block()}, to walk through the corresponding kinds of ASTs. Each of these +walker take as parameters a table {\tt cfg} containing the various visitor +functions, and the AST to walk throuhg. the configuration table {\tt cfg} can +contain fields: +\begin{itemize} +\item {\tt cfg.stat.down(node, parent, grandparent...)}, which applies when + traversing a statement down, i.e. before its children nodes are parsed, and + can modify the tree, and return {\tt nil} or {\tt'break'}. The way children + are traversed is decided {\em after} the {\tt down()} visitor has been run: + this point matters when the visitor modifies its children nodes. +\item {\tt cfg.stat.up(node, parent, grandparent...)}, which is applies on the + way back up. It is applied even if {\tt cfg.stat.down()} returned + {\tt'break'}, but in that case, the children have not been (and will not be) + traversed. +\item {\tt cfg.expr.down()} and {\tt cfg.expr.up()}, which work just as their + {\tt stat} equivalent, but apply to expression nodes.\\ + Notice that in Lua, function calls and method invocations can be used as + statements as well as as espressions: in such cases, they are visited only by + the statement visitor, not by the expression visitor. +\item {\tt cfg.block.down()} and {\tt cfg.block.up()} do the same for statements + blocks: loops, conditional and function bodies. +\item {\tt cfg.binder(identifier, id\_parent, id\_grandparent...)}: this + is run on identifiers which create a new local variable, jsut before that + variable's scope begins. +\end{itemize} + +Moreover, there is a {\tt walk.guess(cfg, ast)} walker which tries to guess the +type of the AST it receives, and applies the appropriate walker. When an AST can +be either an expression or a statement (nodes {\tt`Call} and {\tt`Invoke}), it +is interpreted as an expression. + +\subsection{Examples} + +A bit of practice now. Let's build the simplest walker possible, that does +nothing: + +\begin{verbatim} +cfg = { } +walker = |ast| walk.block(cfg, ast) +\end{verbatim} + +Now, let's say we want to catch and remove all statement calls to function +assert(). This can be done by removing its tag and content: an empty list is +simply ignored in an AST. So we're only interested by `Call nodes, and within +these nodes, we want the function to be `Id 'assert'. All of this is only +relevant to stat nodes: + +\begin{verbatim} +function cfg.stat.down (x) + match x with + | `Call{ `Id 'assert', ... } -> x.tag=nil; x <- { } + | _ -> -- not interested by this node, do nothing + end +end +\end{verbatim} + +You'll almost always want to use the 'match' extension to implement visitors. +The imperative table overrider ({\tt x <- y} a.k.a. {\tt table.override(x, y)} +also often comes handy to modify an AST. + +We'll now remove {\tt assert()} calls in non-statement; we cannot replace an +expression by nothing, so we'll replace these nodes by these will simply be +replaced by {\tt nil}: + +\begin{verbatim} +function cfg.expr.down (x) + match x with + | `Call{ `Id 'assert', ... } -> x <- `Nil + | _ -> -- not interested by this node, do nothing + end +end +\end{verbatim} + + +Here's a remark for functional programmers: this API is very imperative; you +might cringe at seeing the `Call nodes transformed in-place. Well, I tend to +agree but it's generally counter-productive to work against the grain of the +wood: Lua is imperative at heart, and design attempts at doing this functionally +sucked more than approaches that embraced imperativeness. + +\paragraph{Cuts} +By making down() return 'break', you can prevent the traversal to go further +down. This might be either because you're not interested by the subtrees, or +because you want to traverse them in a special way. In that later case, just do +the traversal by yourself in the down() function, and cut the walking by +returning 'break', so that nodes aren't re-traversed by the default walking +algorithm. We'll see that in the next, more complex example, listing of free +variables. + +This example is exclusively there for demonstration purposes. For actual work on +identifiers that require awareness of an identifier's binder of freedom, there +is a dedicated {\tt walk.id} library. + +We'll progressively build a walker that gathers all global variables used in a +given AST. This involves keeping, at all times, a set of the identifiers +currently bound by a "local" declaration, by function parameters, as for loop +variables etc. Then, every time an identifier is found in the AST, its presence +is checked in the current set of bound variables. If it isn't in it, then it's a +free (global) identifier. + +The first thing we'll need is a scope handling system: something that keeps +track of what identifiers are currently in scope. It must also allow to save the +current scope (e.g. before we enter a new block) and restore it afterwards (e.g. +after we leave the block). This is quite straightforward and unrelated to code +walking; here is the code: + +\begin{Verbatim}[fontsize=\scriptsize] +require 'std' +require 'walk' + +-{ extension 'match' } + +-------------------------------------------------------------------------------- +-- Scope handling: ':push()' saves the current scope, ':pop()' +-- restores the previously saved one. ':add(identifiers_list)' adds +-- identifiers to the current scope. Current scope is stored in +-- '.current', as a string->boolean hashtable. +-------------------------------------------------------------------------------- + +local scope = { } +scope.__index = scope + +function scope:new() + local ret = { current = { } } + ret.stack = { ret.current } + setmetatable (ret, self) + return ret +end + +function scope:push() + table.insert (self.stack, table.shallow_copy (self.current)) +end + +function scope:pop() + self.current = table.remove (self.stack) +end + +function scope:add (vars) + for id in values (vars) do + match id with `Id{ x } -> self.current[x] = true end + end +end +\end{Verbatim} + +(There is an improved version of that class in library {\tt walk.scope}; cf. +its documentation for details). + +Now let's start designing the walker. We'll keep a scope object up to date, as +well as a set of found free identifiers, gathered every time we find an `Id{ } +node. To slightly simplify matter, we'll consider that the AST represent a +block. + +\begin{Verbatim}[fontsize=\scriptsize] +local function fv (term) + local freevars = { } + local scope = scope:new() + local cfg = { expr = { } } + + function cfg.expr.down(x) + match x with + | `Id{ name } -> if not scope.current[name] then freevars[name] = true end + | _ -> -- pass + end + end + + walk.guess(cfg, term) + return freevars +end +\end{Verbatim} + +Since we don't ever add any identifier to the scope, this will just list all the +identifiers used in the AST, bound or not. Now let's start doing more +interesting things: + +\begin{itemize} +\item We'll save the scope's state before entering a new block, and restore it + when we leave it. That will be done by providing functions {\tt cfg.block.down()} + and {\tt cfg.block.up()}. Saving and restoring will be performed by methods + {\tt :push()} and {\tt :pop()}. +\item Whenever we find a {\tt local} declaration, we'll add the list of + identifiers to the current scope, so that they won't be gathered when parsing + the {\tt `Id} expression nodes. Notice that the identifiers declared by the + 'local' statement only start being valid after the statement, so we'll add + them in the {\tt cfg.stat.up()} function rather than {\tt cfg.stat.down()}. +\end{itemize} + +\begin{Verbatim}[fontsize=\scriptsize] +local cfg = { expr = { }, + stat = { }, + block = { } } + [...] + function cfg.stat.up(x) + match x with + | `Local{ vars, ... } -> scope:add(vars) + | _ -> -- pass + end + end + + ----------------------------------------------------------------------------- + -- Create a separate scope for each block, close it when leaving. + ----------------------------------------------------------------------------- + function cfg.block.down() scope:push() end + function cfg.block.up() scope:pop() end +\end{Verbatim} + +This starts to be useful. We can also easily add the case for `Localrec{ } nodes +(the ones generated by {\tt "local function foo() ... end"}), where the variable +is already bound in the {\tt`Localrec} statement's right-hand side; so we do the +same as for {\tt`Local}, but we do it in the {\tt down()} function rather than +in the up() one. + +We'll also take care of {\tt`Function}, {\tt`Forin} and {\tt`Fornum} nodes, +which introduce new bound identifiers as function parameters or loop variables. +This is quite straightforward; the only thing to take care of is to save the +scope before entering the function/loop body (in {\tt down()}), and restore it +when leaving (in {\tt up()}). The code becomes: + + +\begin{Verbatim}[fontsize=\scriptsize] +local function fv (term) + local freevars = { } + local scope = scope:new() + local cfg = { expr = { }, + stat = { }, + block = { } } + + ----------------------------------------------------------------------------- + -- Check identifiers; add functions parameters to newly created scope. + ----------------------------------------------------------------------------- + function cfg.expr.down(x) + match x with + | `Id{ name } -> if not scope.current[name] then freevars[name] = true end + | `Function{ params, _ } -> scope:push(); scope:add (params) + | _ -> -- pass + end + end + + ----------------------------------------------------------------------------- + -- Close the function scope opened by 'down()'. + ----------------------------------------------------------------------------- + function cfg.expr.up(x) + match x with + | `Function{ ... } -> scope:pop() + | _ -> -- pass + end + end + + ----------------------------------------------------------------------------- + -- Create a new scope and register loop variable[s] in it + ----------------------------------------------------------------------------- + function cfg.stat.down(x) + match x with + | `Forin{ vars, ... } -> scope:push(); scope:add(vars) + | `Fornum{ var, ... } -> scope:push(); scope:add{var} + | `Localrec{ vars, ... } -> scope:add(vars) + | `Local{ ... } -> -- pass + | _ -> -- pass + end + end + + ----------------------------------------------------------------------------- + -- Close the scopes opened by 'up()' + ----------------------------------------------------------------------------- + function cfg.stat.up(x) + match x with + | `Forin{ ... } | `Fornum{ ... } -> scope:pop() + | `Local{ vars, ... } -> scope:add(vars) + | _ -> -- pass + end + end + + ----------------------------------------------------------------------------- + -- Create a separate scope for each block, close it when leaving. + ----------------------------------------------------------------------------- + function cfg.block.down() scope:push() end + function cfg.block.up() scope:pop() end + + walk.guess(cfg, term) + return freevars +end +\end{Verbatim} + +This is almost correct now. There's one last tricky point of Lua's semantics +that we need to address: in {\tt repeat foo until bar} loops, "bar" is included +in "foo"'s scope. For instance, if we write {\tt repeat local x=foo() until + x>3}, the "x" in the condition is the local variable "x" declared inside the +body. This violates our way of handling block scopes: the scope must be kept +alive after the block is finished. We'll fix this by providing a custom walking +for the block inside `Repeat, and preventing the normal walking to happen by +returning 'break': + +\begin{Verbatim}[fontsize=\scriptsize] + ----------------------------------------------------------------------------- + -- Create a new scope and register loop variable[s] in it + ----------------------------------------------------------------------------- + function cfg.stat.down(x) + match x with + | `Forin{ vars, ... } -> scope:push(); scope:add(vars) + | `Fornum{ var, ... } -> scope:push(); scope:add{var} + | `Localrec{ vars, ... } -> scope:add(vars) + | `Local{ ... } -> -- pass + | `Repeat{ block, cond } -> -- 'cond' is in the scope of 'block' + scope:push() + for s in values (block) do walk.stat(cfg)(s) end -- no new scope + walk.expr(cfg)(cond) + scope:pop() + return 'break' -- No automatic walking of subparts 'cond' and 'body' + | _ -> -- pass + end + end +\end{Verbatim} + +That's it, we've now got a full free variables lister, and have demonstrated +most APIs offered by the basic 'walk' library. If you want to walk through +identifiers in a scope-aware way, though, you'll want to look at the {\tt + walk.id} library. + +\subsection{Library {\tt walk.id}, the scope-aware walker} + +This library walks AST to gather information about the identifiers in it. It +call distinct visitor functions depending on whether an identifier is bound or +free; moreover, when an identifier is bound, the visitor also receives its +binder node as a parameter. For instance, in {\tt +\{function(x) print(x) + end\}}, the bound identifier walker will be called on the \verb|+{x}| in the +\verb|print| call, and the visitor's second parameter will be the {\tt`Function} +node which created the local variable {\tt x}. + +\paragraph{API} +The library is loaded with \verb|require 'walk.id'|. The walkers provided are: +\begin{itemize} +\item {\tt walk\_id.expr()}; +\item {\tt walk\_id.stat()}; +\item {\tt walk\_id.block()}; +\item {\tt walk\_id.guess()}. +\end{itemize} + +They take the same config tables as regular walkers, except that they also +recognize the following entries: +\begin{itemize} +\item {\tt cfg.id.free(identifier, parent, grandparent...)}, which is run on + free variables; +\item {\tt cfg.id.bound(identifier, binder, parent, grandparent...)}, which is + run on bound variables. The statement or expression which created this bound + veriable's scope is passed as a second parameter, before the parent nodes. +\end{itemize} + +\paragraph{Examples} +Let's rewrite the free variables walker above, with the id walker: + +\begin{Verbatim}[fontsize=\scriptsize] +function fv (term) + local cfg = { id = { } } + local freevars = { } + function cfg.id.free(id) + local id_name = id[1] + freevars[id_name] = true + end + walk_id.guess (cfg, term) + return freevars +end +\end{Verbatim} + +Now, let's $\alpha$-rename all bound variables in a term. This is slightly +trickier than one could think: we need to first walk the whole tree, then +perform all the replacement. If we renamed binders as we went, they would stop +binding their variables, and something messy would happen. For instance, if we +took {\tt +\{function(x) print(x) end\}} and renamed the binder {\tt x} into +{\tt foo}, we'd then start walking the function body on the tree {\tt + +\{function(foo) print(x) end\}}, where {\tt x} isn't bound anymore. + +\begin{Verbatim}[fontsize=\scriptsize] +-------------------------------------------------------------------------------- +-- bound_vars keeps a binder node -> old_name -> new_name dictionary. +-- It allows to associate all instances of an identifier together, +-- with the binder that created them +-------------------------------------------------------------------------------- +local bound_vars = { } + +-------------------------------------------------------------------------------- +-- local_renames will keep all identifier nodes to rename as keys, +-- and their new name as values. The renaming must happen after +-- the whole tree has been visited, in order to avoid breaking captures. +-------------------------------------------------------------------------------- +local local_renames = { } + +-------------------------------------------------------------------------------- +-- Associate a new name in bound_vars when a local variable is created. +-------------------------------------------------------------------------------- +function cfg.binder (id, binder) + local old_name = id[1] + local binder_table = bound_vars[binder] + if not binder_table then + -- Create a new entry for this binder: + binder_table = { } + bound_vars[binder] = binder_table + end + local new_name = mlp.gensym(old_name)[1] -- generate a new name + binder_table[old_name] = new_name -- remember name for next instances + local_renames[id] = new_name -- add to the rename todo-list +end + +-------------------------------------------------------------------------------- +-- Add a bound variable the the rename todo-list +-------------------------------------------------------------------------------- +function cfg.id.bound (id, binder) + local old_name = id[1] + local new_name = bound_vars[binder][old_name] + local_renames[id] = new_name +end + +-- walk the tree and fill laocal_renames: +walk_id.guess(cfg, ast) + +-- perform the renaming actions: +for id, new_name in pairs(local_renames) do id[1] = new_name end +\end{Verbatim} + +\subsection{Library {\tt walk.scope}, the scope helper} +This library allows to easily store data, in an AST walking function, in a scope +aware way. Cf. comments in the sources for details. diff --git a/doc/pluto/CHANGELOG b/doc/pluto/CHANGELOG index dd17e64..9ec73aa 100755 --- a/doc/pluto/CHANGELOG +++ b/doc/pluto/CHANGELOG @@ -1,10 +1,10 @@ -$Id$ - -This changelog is maintained as of version 2.0alpha1. -Earlier versions are changelogged on the LuaForge site. - --- 2.0alpha1 -- -* Fixed all outstanding 5.0->5.1 conversion issues -* Made heavier use of size_t in preference to int -* Fixed GC/Upval issue (thanks to Eric Jacobs) - +$Id$ + +This changelog is maintained as of version 2.0alpha1. +Earlier versions are changelogged on the LuaForge site. + +-- 2.0alpha1 -- +* Fixed all outstanding 5.0->5.1 conversion issues +* Made heavier use of size_t in preference to int +* Fixed GC/Upval issue (thanks to Eric Jacobs) + diff --git a/doc/pluto/README b/doc/pluto/README index 7a4dd1e..e341a81 100755 --- a/doc/pluto/README +++ b/doc/pluto/README @@ -1,129 +1,129 @@ -$Id$ - -PLUTO - Heavy duty persistence for Lua - -Pluto is a library which allows users to write arbitrarily large portions -of the "Lua universe" into a flat file, and later read them back into the -same or a different Lua universe. Object references are appropriately -handled, such that the file contains everything needed to recreate the -objects in question. - -Pluto has the following major features: -* Can persist any Lua function -* Can persist threads -* Works with any Lua chunkreader/chunkwriter -* Support for "invariant" permanent objects, of all datatypes -* Can invoke metafunctions for custom persistence of tables and userdata - -Pluto 2.0 requires Lua 5.1 or later. If you need to use Pluto with Lua -5.0, please use version 1.2 of Pluto. - -Pluto may have bugs. Users are advised to define lua_assert in -luaconf.h to something useful when compiling in debug mode, to catch -assertions by Pluto and Lua. - -The Pluto library consists of two public functions. - -int pluto_persist(lua_State *L, lua_Chunkwriter writer, void *ud) - -This function recursively persists the Lua object in stack position 2 -and all other objects which are directly or indirectly referenced by -it, except those referenced in the permanent object table. The data -is written using the chunk-writer given, and that writer is passed -the arbitrary pointer value ud. - -The Lua stack must contain exactly and only these two items, in order: - -1. A table of permanent objects, that should not be persisted. For each -permanent object, the object itself should be the key, and a unique -object of any type should be the value. Likely candidates for this table -include Lua functions (including those in the Lua libraries) that are -loaded at load-time. It must include all non-persistable objects that -are referenced by the object to be persisted. The table is not modified -by the function. Objects in this table are considered "opaque" and are -not examined or descended into. Objects should not appear in the table -multiple times; the result of doing this is undefined (though probably -harmless). NOTE: If you are planning to persist threads, keep in mind -that all yielded threads have coroutine.yield on the tops of their -stacks. Since it's a C function, it should be put here. For complex -permanents, it may be a good idea to use the __index meta-function of -the permanents table to "search" for permanents. - -2. The single object to be persisted. In many cases, this will be the -global table. For more flexibility, however, it may be something like a -table built for the occasion, with various values to keep track of. The -object may not be nil. - - -int pluto_unpersist(lua_State *L, lua_Chunkreader reader, void *ud) - -This function loads in a Lua object and places it on top of the stack. All -objects directly or indirectly referenced by it are also loaded. - -The Lua stack must contain, as its top value, a table of permanent -objects. This table should be like the permanent object table used when -persisting, but with the key and value of each pair reversed. These -objects are used as substitutes for those referenced in their positions -when persisting, and under most circumstances should be identical objects -to those referenced in the permanents table used for persisting. It's -okay for multiple keys to refer to the same object. - - -RUNNING PLUTO FROM LUA: -It is also possible to invoke pluto from a Lua script. The C function -pluto_open() will register pluto.persist and pluto.unpersist, lua functions -which operate on strings. The first takes a permanents table and a root -object, and returns a string; the second takes a permanents table and a -string, and returns the root object. - -An error will be raised if pluto.persist is called from a thread which is -itself referenced by the root object. - -SPECIAL PERSISTENCE: -Tables and userdata have special persistence semantics. These semantics are -keyed to the value of the object's metatable's __persist member, if any. This -member may be any of the following four values: -1. Boolean "true": The table or userdata is persisted literally; tables are -persisted member-by-member, and userdata are written out as literal data. -2. Boolean "false": An error is returned, indicating that the object cannot -be persisted. -3. A function: This function should take one argument, the object in question, -and return one result, a closure. This "fixup closure", in turn, will be -persisted, and during unpersistence will be called. The closure will be -responsible for recreating the object with the appropriate data, based on -its upvalues. -4. Nil, or no metatable. In the case of tables, the table is literally -persisted. In the case of userdata, an error is returned. - -Here's an example of special persistence for a simple 3d vector object: - -vec = { x = 2, y = 1, z = 4 } -setmetatable(vec, { __persist = function(oldtbl) - local x = oldtbl.x - local y = oldtbl.y - local z = oldtbl.z - local mt = getmetatable(oldtbl) - return function() - newtbl = {} - newtbl.x = x - newtbl.y = y - newtbl.z = z - setmetatable(newtbl, mt) - return newtbl - end -end }) - -Note how x, y, z, and the mt are explicitly pulled out of the table. It is -important that the fixup closure returned not reference the original table -directly, as that table would again be persisted as an upvalue, leading to an -infinite loop. Also note that the object's metatable is NOT automatically -persisted; it is necessary for the fixup closure to reset it, if it wants. - -LIMITATIONS/TODO: -* Light userdata are persisted literally, as their pointer values. This -may or may not be what you want. -* Closures of C functions may not be persisted. Once it becomes possible -to specify a C function "proto" as a permanent object, this restriction -will be relaxed. - -BUGS: None known. Emphasis on the 'known'. +$Id$ + +PLUTO - Heavy duty persistence for Lua + +Pluto is a library which allows users to write arbitrarily large portions +of the "Lua universe" into a flat file, and later read them back into the +same or a different Lua universe. Object references are appropriately +handled, such that the file contains everything needed to recreate the +objects in question. + +Pluto has the following major features: +* Can persist any Lua function +* Can persist threads +* Works with any Lua chunkreader/chunkwriter +* Support for "invariant" permanent objects, of all datatypes +* Can invoke metafunctions for custom persistence of tables and userdata + +Pluto 2.0 requires Lua 5.1 or later. If you need to use Pluto with Lua +5.0, please use version 1.2 of Pluto. + +Pluto may have bugs. Users are advised to define lua_assert in +luaconf.h to something useful when compiling in debug mode, to catch +assertions by Pluto and Lua. + +The Pluto library consists of two public functions. + +int pluto_persist(lua_State *L, lua_Chunkwriter writer, void *ud) + +This function recursively persists the Lua object in stack position 2 +and all other objects which are directly or indirectly referenced by +it, except those referenced in the permanent object table. The data +is written using the chunk-writer given, and that writer is passed +the arbitrary pointer value ud. + +The Lua stack must contain exactly and only these two items, in order: + +1. A table of permanent objects, that should not be persisted. For each +permanent object, the object itself should be the key, and a unique +object of any type should be the value. Likely candidates for this table +include Lua functions (including those in the Lua libraries) that are +loaded at load-time. It must include all non-persistable objects that +are referenced by the object to be persisted. The table is not modified +by the function. Objects in this table are considered "opaque" and are +not examined or descended into. Objects should not appear in the table +multiple times; the result of doing this is undefined (though probably +harmless). NOTE: If you are planning to persist threads, keep in mind +that all yielded threads have coroutine.yield on the tops of their +stacks. Since it's a C function, it should be put here. For complex +permanents, it may be a good idea to use the __index meta-function of +the permanents table to "search" for permanents. + +2. The single object to be persisted. In many cases, this will be the +global table. For more flexibility, however, it may be something like a +table built for the occasion, with various values to keep track of. The +object may not be nil. + + +int pluto_unpersist(lua_State *L, lua_Chunkreader reader, void *ud) + +This function loads in a Lua object and places it on top of the stack. All +objects directly or indirectly referenced by it are also loaded. + +The Lua stack must contain, as its top value, a table of permanent +objects. This table should be like the permanent object table used when +persisting, but with the key and value of each pair reversed. These +objects are used as substitutes for those referenced in their positions +when persisting, and under most circumstances should be identical objects +to those referenced in the permanents table used for persisting. It's +okay for multiple keys to refer to the same object. + + +RUNNING PLUTO FROM LUA: +It is also possible to invoke pluto from a Lua script. The C function +pluto_open() will register pluto.persist and pluto.unpersist, lua functions +which operate on strings. The first takes a permanents table and a root +object, and returns a string; the second takes a permanents table and a +string, and returns the root object. + +An error will be raised if pluto.persist is called from a thread which is +itself referenced by the root object. + +SPECIAL PERSISTENCE: +Tables and userdata have special persistence semantics. These semantics are +keyed to the value of the object's metatable's __persist member, if any. This +member may be any of the following four values: +1. Boolean "true": The table or userdata is persisted literally; tables are +persisted member-by-member, and userdata are written out as literal data. +2. Boolean "false": An error is returned, indicating that the object cannot +be persisted. +3. A function: This function should take one argument, the object in question, +and return one result, a closure. This "fixup closure", in turn, will be +persisted, and during unpersistence will be called. The closure will be +responsible for recreating the object with the appropriate data, based on +its upvalues. +4. Nil, or no metatable. In the case of tables, the table is literally +persisted. In the case of userdata, an error is returned. + +Here's an example of special persistence for a simple 3d vector object: + +vec = { x = 2, y = 1, z = 4 } +setmetatable(vec, { __persist = function(oldtbl) + local x = oldtbl.x + local y = oldtbl.y + local z = oldtbl.z + local mt = getmetatable(oldtbl) + return function() + newtbl = {} + newtbl.x = x + newtbl.y = y + newtbl.z = z + setmetatable(newtbl, mt) + return newtbl + end +end }) + +Note how x, y, z, and the mt are explicitly pulled out of the table. It is +important that the fixup closure returned not reference the original table +directly, as that table would again be persisted as an upvalue, leading to an +infinite loop. Also note that the object's metatable is NOT automatically +persisted; it is necessary for the fixup closure to reset it, if it wants. + +LIMITATIONS/TODO: +* Light userdata are persisted literally, as their pointer values. This +may or may not be what you want. +* Closures of C functions may not be persisted. Once it becomes possible +to specify a C function "proto" as a permanent object, this restriction +will be relaxed. + +BUGS: None known. Emphasis on the 'known'. diff --git a/junk/hygienic.lua b/junk/hygienic.lua index dac0ce7..ea4adeb 100755 --- a/junk/hygienic.lua +++ b/junk/hygienic.lua @@ -1,279 +1,279 @@ ----------------------------------------------------------------------- --- Metalua: $Id$ --- --- Summary: Hygienic macro facility for Metalua --- ----------------------------------------------------------------------- --- --- Copyright (c) 2006, Fabien Fleutot . --- --- This software is released under the MIT Licence, see licence.txt --- for details. --- --------------------------------------------------------------------------------- --- --- ============= --- W A R N I N G --- ============= --- --- THIS IS AN OLD NAIVE IMPLEMENTATION. IT'S PARATIAL (NO HYGIENE WRT OUTSIDE) --- AND WRITTEN FROM SCRATCH WITH PATTERN MATCHING. MUST BE DONE WITH A WALKER. --- --- Traditional macros carry a well-known pitfall, called variable capture: --- when pasting a piece of source code A into another code B, if B bonds some --- variables used by A, then the meaning of A is modified in a way probably --- not intended by the user. --- --- Example: --- A = +{ n = 5 } --- B = +{ local n=3; -{ A } } --- --- In this example, [n] in [A] will be captured by the local variable declared --- by [B], and this is probably a bug. --- --- Notice that this also exists in C. Typical example: --- --- #define swap (type, a, b) do { type tmp=a; a=b; b=tmp } while(0) --- void f() { --- int tmp=1, a=2; --- swap (int, tmp, a); // won't work, [tmp] is captured in the macro --- } --- --- We can fix this by making sure that all local variables and parameters --- created by [B] have fresh names. [mlp.gensym()] produces guaranteed-to-be-unique --- variable names; we use it to replace all local var names declarations and --- occurences in [B] by such fresh names. --- --- Such macros which are guaranteed not to capture any variable are called --- hygienic macros. By extension, an AST guaranteed not to contain capturing --- variables is called an hygienic AST. --- --- We implement here some functions which make sure that an AST is hygienic: --- --- - [hygienize_stat (ast)] for statement AST; --- - [hygienize_stat (ast)] for statement block AST; --- - [hygienize_expr (ast)] for expression AST; --- --- This sample deconstructs AST by structural pattern matching, which is --- supported by Metalua extension "match.lua" --- --------------------------------------------------------------------------------- - --{ extension "match" } - -require "std" - -local clone_ctx = std.shallow_copy - --------------------------------------------------------------------------------- --- Tag tables: these allow [hygienize] to decide whether an AST is --- an expression, a statement, or something which isn't changed by --- alpha renaming. --------------------------------------------------------------------------------- -local stat_tags = { - Do = true, Let = true, - While = true, Repeat = true, - If = true, Fornum = true, - Forin = true, Local = true, - Localrec = true, Return = true } - -local expr_tags = { - Function = true, Table = true, - Op = true, Call = true, - Method = true, Index = true } - -local neutral_tags = { - String = true, Number = true, - True = true, False = true, - Dots = true, Break = true, - Id = true } - --------------------------------------------------------------------------------- --- Choose the relevant [hygienize_xxx()] function according to the AST's tag --- and the tables above. --------------------------------------------------------------------------------- -function hygienize (ast) - if not ast.tag then hygienize_block (ast) - elseif neutral_tags[ast.tag] then -- pass - elseif stat_tags[ast.tag] then hygienize_stat (ast) - elseif expr_tags[ast.tag] then hygienize_expr (ast) - else error "Unrecognized AST" end - return ast -end - -if mlp then - -- Add hygienic parsers for quotes - mlp.hexpr = hygienize `o` mlp.expr - mlp.hstat = hygienize `o` mlp.stat - mlp.hblock = hygienize `o` mlp.block -end - --------------------------------------------------------------------------------- --- Make a statement AST hygienic. The optional [ctx] parameter is a --- [old_name -> new_name] map, which holds variable name substitutions --- to perform. --------------------------------------------------------------------------------- -function hygienize_stat (ast, ctx) - if not ctx then ctx = { } end - match ast with - | { ... } if not ast.tag -> hygienize_block (ast, ctx) - | `Do{ ... } -> hygienize_block (ast, clone_ctx (ctx)) - - | `Let{ vars, vals } -> - hygienize_expr_list (vars, ctx) - hygienize_expr_list (vals, ctx) - - | `While{ cond, block } -> - hygienize_expr (cond, ctx) - -- use a clone of [ctx], since the block has a separate scope - hygienize_block (ast, clone_ctx (ctx)) - - | `Repeat{ block, cond } -> - -- use a clone of [ctx], since the block has a separate scope. - -- Notice that the condition in [repeat ... until] is evaluated - -- inside the block's scope, i.e. with [inner_ctx] rather than [ctx]. - local inner_ctx = clone_ctx (ctx) - hygienize_block (ast, inner_ctx) - hygienize (cond, inner_ctx) - - | `If{ ... } -> - for i=1, #ast-1, 2 do - hygienize_expr (ast[i], ctx) -- condtion - -- each block has its own scope - hygienize_block (ast[i+1], clone_ctx (ctx)) -- conditional block - end - if #ast % 2 == 1 then - hygienize_block (ast[#ast], clone_ctx (ctx)) -- else block - end - - | `Fornum{ var, ... } -> - hygienize_expr (ast[i], ctx, 2, #ast-1) -- start, finish, step? exprs - local inner_ctx = clone_ctx (ctx) - alpha_rename (var, inner_ctx) -- rename local var [var] in [inner_ctx] - hygienize_block (ast[#ast], inner_ctx) - - | `Forin{ vars, vals, block } -> - hygienize_expr_list (vals, ctx) - local inner_ctx = clone_ctx (ctx) - alpha_rename_list (vars, inner_ctx) -- rename local vars [vars] in [inner_ctx] - hygienize_block (block, inner_ctx) - - | `Local{ vars, vals } -> - -- locals only enter in scope after their values are computed - -- --> parse values first, then rename vars - hygienize_expr_list (vals, ctx) - alpha_rename_list (vars, ctx) - - | `Localrec{ vars, vals } -> - -- As opposed to [`Local], vars are in scope during their values' - -- computation --> rename before parsing values. - alpha_rename_list (vars, ctx) - hygienize_expr_list (vals, ctx) - - | `Call{ ... } | `Method{ ... } -> - -- these are actually expr, delegate to [hygienize_expr] - hygienize_expr (ast, ctx) - - | `Return{ ... } -> hygienize_expr_list (ast, ctx) - | `Break -> - | _ -> error ("Unknown statement "..ast.tag) - end -end - - --------------------------------------------------------------------------------- --- Make an expression AST hygienic. The optional [ctx] parameter is a --- [old_name -> new_name] map, which holds variable name substitutions --- to perform. --------------------------------------------------------------------------------- -function hygienize_expr (ast, ctx) - if not ctx then ctx = { } end - match ast with - | `String{ _ } | `Number{ _ } | `True | `False | `Dots -> -- nothing - - | `Function{ params, block } -> - local inner_ctx = clone_ctx (ctx) - alpha_rename_list (params, inner_ctx) - hygienize_block (block, inner_ctx) - - | `Table{ ... } -> - for _, x in ipairs (ast) do - match x with - | `Key{ key, val } -> - hygienize_expr (key, ctx) - hygienize_expr (val, ctx) - | _ -> hygienize (x, ctx) - end - end - - | `Id{ x } -> - -- Check for substitutions to apply: - local y = ctx[x]; if y then ast[1] = y end - - | `Op{ op, ... } -> - hygienize_expr_list (ast, ctx, 2, #ast) - - -- Just dispatch to sub-expressions: - | `Call{ func, ... } - | `Method{ obj, `String{ name }, ... } - | `Index{ table, key } -> - hygienize_expr_list (ast, ctx) - | _ -> error ("Unknown expression "..ast.tag) - end -end - --------------------------------------------------------------------------------- --- Make an statements block AST hygienic. The optional [ctx] parameter is a --- [old_name -> new_name] map, which holds variable name substitutions --- to perform. --------------------------------------------------------------------------------- -function hygienize_block (ast, ctx) - if not ctx then ctx = { } end - table.iter ((|x| hygienize(x, ctx)), ast) --- for i = 1, #ast do --- hygienize_stat (ast[i], ctx) --- end -end - --------------------------------------------------------------------------------- --- Makes a shallow copy of a table. Used to make a copy of [ctx] substitution --- tables, when entering a new scope. --------------------------------------------------------------------------------- ---[[ -function clone_ctx (ctx) - local r = { } - for k, v in pairs (ctx) do r[k] = v end - return r -end -]] - --------------------------------------------------------------------------------- --- Make every expression from index [start] to [finish], in list --- [ast], hygienic. The optional [ctx] parameter is a [old_name -> --- new_name] map, which holds variable name substitutions to perform. --- [start] defaults to 1, [finish] defaults to the list's size. --------------------------------------------------------------------------------- -function hygienize_expr_list (ast, ctx, start, finish) - for i = start or 1, finish or #ast do - hygienize_expr (ast[i], ctx) - end -end - --------------------------------------------------------------------------------- --- Replace the identifier [var]'s name with a fresh one generated by --- [mlp.gensym()], and store the new association in [ctx], so that the --- calling function will be able to substitute identifier occurences with --- its new name. --------------------------------------------------------------------------------- -function alpha_rename (var, ctx) - assert (var.tag == "Id") - ctx[var[1]] = mlp.gensym()[1] - var[1] = ctx[var[1]] -end - --------------------------------------------------------------------------------- --- runs [alpha_rename] on a list of identifiers. --------------------------------------------------------------------------------- -function alpha_rename_list (vars, ctx) - for _, v in ipairs(vars) do alpha_rename (v, ctx) end -end +---------------------------------------------------------------------- +-- Metalua: $Id$ +-- +-- Summary: Hygienic macro facility for Metalua +-- +---------------------------------------------------------------------- +-- +-- Copyright (c) 2006, Fabien Fleutot . +-- +-- This software is released under the MIT Licence, see licence.txt +-- for details. +-- +-------------------------------------------------------------------------------- +-- +-- ============= +-- W A R N I N G +-- ============= +-- +-- THIS IS AN OLD NAIVE IMPLEMENTATION. IT'S PARATIAL (NO HYGIENE WRT OUTSIDE) +-- AND WRITTEN FROM SCRATCH WITH PATTERN MATCHING. MUST BE DONE WITH A WALKER. +-- +-- Traditional macros carry a well-known pitfall, called variable capture: +-- when pasting a piece of source code A into another code B, if B bonds some +-- variables used by A, then the meaning of A is modified in a way probably +-- not intended by the user. +-- +-- Example: +-- A = +{ n = 5 } +-- B = +{ local n=3; -{ A } } +-- +-- In this example, [n] in [A] will be captured by the local variable declared +-- by [B], and this is probably a bug. +-- +-- Notice that this also exists in C. Typical example: +-- +-- #define swap (type, a, b) do { type tmp=a; a=b; b=tmp } while(0) +-- void f() { +-- int tmp=1, a=2; +-- swap (int, tmp, a); // won't work, [tmp] is captured in the macro +-- } +-- +-- We can fix this by making sure that all local variables and parameters +-- created by [B] have fresh names. [mlp.gensym()] produces guaranteed-to-be-unique +-- variable names; we use it to replace all local var names declarations and +-- occurences in [B] by such fresh names. +-- +-- Such macros which are guaranteed not to capture any variable are called +-- hygienic macros. By extension, an AST guaranteed not to contain capturing +-- variables is called an hygienic AST. +-- +-- We implement here some functions which make sure that an AST is hygienic: +-- +-- - [hygienize_stat (ast)] for statement AST; +-- - [hygienize_stat (ast)] for statement block AST; +-- - [hygienize_expr (ast)] for expression AST; +-- +-- This sample deconstructs AST by structural pattern matching, which is +-- supported by Metalua extension "match.lua" +-- +-------------------------------------------------------------------------------- + +-{ extension "match" } + +require "std" + +local clone_ctx = std.shallow_copy + +-------------------------------------------------------------------------------- +-- Tag tables: these allow [hygienize] to decide whether an AST is +-- an expression, a statement, or something which isn't changed by +-- alpha renaming. +-------------------------------------------------------------------------------- +local stat_tags = { + Do = true, Let = true, + While = true, Repeat = true, + If = true, Fornum = true, + Forin = true, Local = true, + Localrec = true, Return = true } + +local expr_tags = { + Function = true, Table = true, + Op = true, Call = true, + Method = true, Index = true } + +local neutral_tags = { + String = true, Number = true, + True = true, False = true, + Dots = true, Break = true, + Id = true } + +-------------------------------------------------------------------------------- +-- Choose the relevant [hygienize_xxx()] function according to the AST's tag +-- and the tables above. +-------------------------------------------------------------------------------- +function hygienize (ast) + if not ast.tag then hygienize_block (ast) + elseif neutral_tags[ast.tag] then -- pass + elseif stat_tags[ast.tag] then hygienize_stat (ast) + elseif expr_tags[ast.tag] then hygienize_expr (ast) + else error "Unrecognized AST" end + return ast +end + +if mlp then + -- Add hygienic parsers for quotes + mlp.hexpr = hygienize `o` mlp.expr + mlp.hstat = hygienize `o` mlp.stat + mlp.hblock = hygienize `o` mlp.block +end + +-------------------------------------------------------------------------------- +-- Make a statement AST hygienic. The optional [ctx] parameter is a +-- [old_name -> new_name] map, which holds variable name substitutions +-- to perform. +-------------------------------------------------------------------------------- +function hygienize_stat (ast, ctx) + if not ctx then ctx = { } end + match ast with + | { ... } if not ast.tag -> hygienize_block (ast, ctx) + | `Do{ ... } -> hygienize_block (ast, clone_ctx (ctx)) + + | `Let{ vars, vals } -> + hygienize_expr_list (vars, ctx) + hygienize_expr_list (vals, ctx) + + | `While{ cond, block } -> + hygienize_expr (cond, ctx) + -- use a clone of [ctx], since the block has a separate scope + hygienize_block (ast, clone_ctx (ctx)) + + | `Repeat{ block, cond } -> + -- use a clone of [ctx], since the block has a separate scope. + -- Notice that the condition in [repeat ... until] is evaluated + -- inside the block's scope, i.e. with [inner_ctx] rather than [ctx]. + local inner_ctx = clone_ctx (ctx) + hygienize_block (ast, inner_ctx) + hygienize (cond, inner_ctx) + + | `If{ ... } -> + for i=1, #ast-1, 2 do + hygienize_expr (ast[i], ctx) -- condtion + -- each block has its own scope + hygienize_block (ast[i+1], clone_ctx (ctx)) -- conditional block + end + if #ast % 2 == 1 then + hygienize_block (ast[#ast], clone_ctx (ctx)) -- else block + end + + | `Fornum{ var, ... } -> + hygienize_expr (ast[i], ctx, 2, #ast-1) -- start, finish, step? exprs + local inner_ctx = clone_ctx (ctx) + alpha_rename (var, inner_ctx) -- rename local var [var] in [inner_ctx] + hygienize_block (ast[#ast], inner_ctx) + + | `Forin{ vars, vals, block } -> + hygienize_expr_list (vals, ctx) + local inner_ctx = clone_ctx (ctx) + alpha_rename_list (vars, inner_ctx) -- rename local vars [vars] in [inner_ctx] + hygienize_block (block, inner_ctx) + + | `Local{ vars, vals } -> + -- locals only enter in scope after their values are computed + -- --> parse values first, then rename vars + hygienize_expr_list (vals, ctx) + alpha_rename_list (vars, ctx) + + | `Localrec{ vars, vals } -> + -- As opposed to [`Local], vars are in scope during their values' + -- computation --> rename before parsing values. + alpha_rename_list (vars, ctx) + hygienize_expr_list (vals, ctx) + + | `Call{ ... } | `Method{ ... } -> + -- these are actually expr, delegate to [hygienize_expr] + hygienize_expr (ast, ctx) + + | `Return{ ... } -> hygienize_expr_list (ast, ctx) + | `Break -> + | _ -> error ("Unknown statement "..ast.tag) + end +end + + +-------------------------------------------------------------------------------- +-- Make an expression AST hygienic. The optional [ctx] parameter is a +-- [old_name -> new_name] map, which holds variable name substitutions +-- to perform. +-------------------------------------------------------------------------------- +function hygienize_expr (ast, ctx) + if not ctx then ctx = { } end + match ast with + | `String{ _ } | `Number{ _ } | `True | `False | `Dots -> -- nothing + + | `Function{ params, block } -> + local inner_ctx = clone_ctx (ctx) + alpha_rename_list (params, inner_ctx) + hygienize_block (block, inner_ctx) + + | `Table{ ... } -> + for _, x in ipairs (ast) do + match x with + | `Key{ key, val } -> + hygienize_expr (key, ctx) + hygienize_expr (val, ctx) + | _ -> hygienize (x, ctx) + end + end + + | `Id{ x } -> + -- Check for substitutions to apply: + local y = ctx[x]; if y then ast[1] = y end + + | `Op{ op, ... } -> + hygienize_expr_list (ast, ctx, 2, #ast) + + -- Just dispatch to sub-expressions: + | `Call{ func, ... } + | `Method{ obj, `String{ name }, ... } + | `Index{ table, key } -> + hygienize_expr_list (ast, ctx) + | _ -> error ("Unknown expression "..ast.tag) + end +end + +-------------------------------------------------------------------------------- +-- Make an statements block AST hygienic. The optional [ctx] parameter is a +-- [old_name -> new_name] map, which holds variable name substitutions +-- to perform. +-------------------------------------------------------------------------------- +function hygienize_block (ast, ctx) + if not ctx then ctx = { } end + table.iter ((|x| hygienize(x, ctx)), ast) +-- for i = 1, #ast do +-- hygienize_stat (ast[i], ctx) +-- end +end + +-------------------------------------------------------------------------------- +-- Makes a shallow copy of a table. Used to make a copy of [ctx] substitution +-- tables, when entering a new scope. +-------------------------------------------------------------------------------- +--[[ +function clone_ctx (ctx) + local r = { } + for k, v in pairs (ctx) do r[k] = v end + return r +end +]] + +-------------------------------------------------------------------------------- +-- Make every expression from index [start] to [finish], in list +-- [ast], hygienic. The optional [ctx] parameter is a [old_name -> +-- new_name] map, which holds variable name substitutions to perform. +-- [start] defaults to 1, [finish] defaults to the list's size. +-------------------------------------------------------------------------------- +function hygienize_expr_list (ast, ctx, start, finish) + for i = start or 1, finish or #ast do + hygienize_expr (ast[i], ctx) + end +end + +-------------------------------------------------------------------------------- +-- Replace the identifier [var]'s name with a fresh one generated by +-- [mlp.gensym()], and store the new association in [ctx], so that the +-- calling function will be able to substitute identifier occurences with +-- its new name. +-------------------------------------------------------------------------------- +function alpha_rename (var, ctx) + assert (var.tag == "Id") + ctx[var[1]] = mlp.gensym()[1] + var[1] = ctx[var[1]] +end + +-------------------------------------------------------------------------------- +-- runs [alpha_rename] on a list of identifiers. +-------------------------------------------------------------------------------- +function alpha_rename_list (vars, ctx) + for _, v in ipairs(vars) do alpha_rename (v, ctx) end +end diff --git a/src/compiler/gg.lua b/src/compiler/gg.lua index 7d29002..7f5b702 100644 --- a/src/compiler/gg.lua +++ b/src/compiler/gg.lua @@ -1,739 +1,739 @@ ----------------------------------------------------------------------- --- Metalua. --- --- Summary: parser generator. Collection of higher order functors, --- which allow to build and combine parsers. Relies on a lexer --- that supports the same API as the one exposed in mll.lua. --- ----------------------------------------------------------------------- --- --- Copyright (c) 2006-2008, Fabien Fleutot . --- --- This software is released under the MIT Licence, see licence.txt --- for details. --- ----------------------------------------------------------------------- - --------------------------------------------------------------------------------- --- --- Exported API: --- --- Parser generators: --- * [gg.sequence()] --- * [gg.multisequence()] --- * [gg.expr()] --- * [gg.list()] --- * [gg.onkeyword()] --- * [gg.optkeyword()] --- --- Other functions: --- * [gg.parse_error()] --- * [gg.make_parser()] --- * [gg.is_parser()] --- --------------------------------------------------------------------------------- - -module("gg", package.seeall) - -------------------------------------------------------------------------------- --- parser metatable, which maps __call to method parse, and adds some --- error tracing boilerplate. -------------------------------------------------------------------------------- -local parser_metatable = { } -function parser_metatable.__call (parser, lx, ...) - --printf ("Call parser %q of type %q", parser.name or "?", parser.kind) - if mlc.metabugs then - return parser:parse (lx, ...) - --local x = parser:parse (lx, ...) - --printf ("Result of parser %q: %s", - -- parser.name or "?", - -- _G.table.tostring(x, "nohash", 80)) - --return x - else - local li = lx:lineinfo_right() or { "?", "?", "?", "?" } - local status, ast = pcall (parser.parse, parser, lx, ...) - if status then return ast else - error (string.format ("%s\n - (l.%s, c.%s, k.%s) in parser %s", - ast:strmatch "gg.lua:%d+: (.*)" or ast, - li[1], li[2], li[3], parser.name or parser.kind)) - end - end -end - -------------------------------------------------------------------------------- --- Turn a table into a parser, mainly by setting the metatable. -------------------------------------------------------------------------------- -function make_parser(kind, p) - p.kind = kind - if not p.transformers then p.transformers = { } end - function p.transformers:add (x) - table.insert (self, x) - end - setmetatable (p, parser_metatable) - return p -end - -------------------------------------------------------------------------------- --- Return true iff [x] is a parser. --- If it's a gg-generated parser, reutrn the name of its kind. -------------------------------------------------------------------------------- -function is_parser (x) - return type(x)=="function" or getmetatable(x)==parser_metatable and x.kind -end - -------------------------------------------------------------------------------- --- Parse a sequence, without applying builder nor transformers -------------------------------------------------------------------------------- -local function raw_parse_sequence (lx, p) - local r = { } - for i=1, #p do - e=p[i] - if type(e) == "string" then - if not lx:is_keyword (lx:next(), e) then - parse_error (lx, "Keyword '%s' expected", e) end - elseif is_parser (e) then - table.insert (r, e (lx)) - else - gg.parse_error (lx,"Sequence `%s': element #%i is not a string ".. - "nor a parser: %s", - p.name, i, table.tostring(e)) - end - end - return r -end - -------------------------------------------------------------------------------- --- Parse a multisequence, without applying multisequence transformers. --- The sequences are completely parsed. -------------------------------------------------------------------------------- -local function raw_parse_multisequence (lx, sequence_table, default) - local seq_parser = sequence_table[lx:is_keyword(lx:peek())] - if seq_parser then return seq_parser (lx) - elseif default then return default (lx) - else return false end -end - -------------------------------------------------------------------------------- --- Applies all transformers listed in parser on ast. -------------------------------------------------------------------------------- -local function transform (ast, parser, fli, lli) - if parser.transformers then - for _, t in ipairs (parser.transformers) do ast = t(ast) or ast end - end - if type(ast) == 'table'then - local ali = ast.lineinfo - if not ali or ali.first~=fli or ali.last~=lli then - ast.lineinfo = { first = fli, last = lli } - end - end - return ast -end - -------------------------------------------------------------------------------- --- Generate a tracable parsing error (not implemented yet) -------------------------------------------------------------------------------- -function parse_error(lx, fmt, ...) - local li = lx:lineinfo_left() or {-1,-1,-1, ""} - local msg = string.format("line %i, char %i: "..fmt, li[1], li[2], ...) - local src = lx.src - if li[3]>0 and src then - local i, j = li[3], li[3] - while src:sub(i,i) ~= '\n' and i>=0 do i=i-1 end - while src:sub(j,j) ~= '\n' and j<=#src do j=j+1 end - local srcline = src:sub (i+1, j-1) - local idx = string.rep (" ", li[2]).."^" - msg = string.format("%s\n>>> %s\n>>> %s", msg, srcline, idx) - end - error(msg) -end - -------------------------------------------------------------------------------- --- --- Sequence parser generator --- -------------------------------------------------------------------------------- --- Input fields: --- --- * [builder]: how to build an AST out of sequence parts. let [x] be the list --- of subparser results (keywords are simply omitted). [builder] can be: --- - [nil], in which case the result of parsing is simply [x] --- - a string, which is then put as a tag on [x] --- - a function, which takes [x] as a parameter and returns an AST. --- --- * [name]: the name of the parser. Used for debug messages --- --- * [transformers]: a list of AST->AST functions, applied in order on ASTs --- returned by the parser. --- --- * Table-part entries corresponds to keywords (strings) and subparsers --- (function and callable objects). --- --- After creation, the following fields are added: --- * [parse] the parsing function lexer->AST --- * [kind] == "sequence" --- * [name] is set, if it wasn't in the input. --- -------------------------------------------------------------------------------- -function sequence (p) - make_parser ("sequence", p) - - ------------------------------------------------------------------- - -- Parsing method - ------------------------------------------------------------------- - function p:parse (lx) - -- Raw parsing: - local fli = lx:lineinfo_right() - local seq = raw_parse_sequence (lx, self) - local lli = lx:lineinfo_left() - - -- Builder application: - local builder, tb = self.builder, type (self.builder) - if tb == "string" then seq.tag = builder - elseif tb == "function" or builder and builder.__call then seq = builder(seq) - elseif builder == nil then -- nothing - else error ("Invalid builder of type "..tb.." in sequence") end - seq = transform (seq, self, fli, lli) - assert (not seq or seq.lineinfo) - return seq - end - - ------------------------------------------------------------------- - -- Construction - ------------------------------------------------------------------- - -- Try to build a proper name - if not p.name and type(p[1])=="string" then - p.name = p[1].." ..." - if type(p[#p])=="string" then p.name = p.name .. " " .. p[#p] end - else - p.name = "" - end - - return p -end -- - - -------------------------------------------------------------------------------- --- --- Multiple, keyword-driven, sequence parser generator --- -------------------------------------------------------------------------------- --- in [p], useful fields are: --- --- * [transformers]: as usual --- --- * [name]: as usual --- --- * Table-part entries must be sequence parsers, or tables which can --- be turned into a sequence parser by [gg.sequence]. These --- sequences must start with a keyword, and this initial keyword --- must be different for each sequence. The table-part entries will --- be removed after [gg.multisequence] returns. --- --- * [default]: the parser to run if the next keyword in the lexer is --- none of the registered initial keywords. If there's no default --- parser and no suitable initial keyword, the multisequence parser --- simply returns [false]. --- --- After creation, the following fields are added: --- --- * [parse] the parsing function lexer->AST --- --- * [sequences] the table of sequences, indexed by initial keywords. --- --- * [add] method takes a sequence parser or a config table for --- [gg.sequence], and adds/replaces the corresponding sequence --- parser. If the keyword was already used, the former sequence is --- removed and a warning is issued. --- --- * [get] method returns a sequence by its initial keyword --- --- * [kind] == "multisequence" --- -------------------------------------------------------------------------------- -function multisequence (p) - make_parser ("multisequence", p) - - ------------------------------------------------------------------- - -- Add a sequence (might be just a config table for [gg.sequence]) - ------------------------------------------------------------------- - function p:add (s) - -- compile if necessary: - if not is_parser(s) then sequence(s) end - if type(s[1]) ~= "string" then - error "Invalid sequence for multiseq" - elseif self.sequences[s[1]] then - printf (" *** Warning: keyword %q overloaded in multisequence ***", s[1]) - end - self.sequences[s[1]] = s - end -- - - ------------------------------------------------------------------- - -- Get the sequence starting with this keyword. [kw :: string] - ------------------------------------------------------------------- - function p:get (kw) return self.sequences [kw] end - - ------------------------------------------------------------------- - -- Remove the sequence starting with keyword [kw :: string] - ------------------------------------------------------------------- - function p:del (kw) - if not self.sequences[kw] then - printf("*** Warning: trying to delete sequence starting ".. - "with %q from a multisequence having no such ".. - "entry ***", kw) end - local removed = self.sequences[kw] - self.sequences[kw] = nil - return removed - end - - ------------------------------------------------------------------- - -- Parsing method - ------------------------------------------------------------------- - function p:parse (lx) - local fli = lx:lineinfo_right() - local x = raw_parse_multisequence (lx, self.sequences, self.default) - local lli = lx:lineinfo_left() - return transform (x, self, fli, lli) - end - - ------------------------------------------------------------------- - -- Construction - ------------------------------------------------------------------- - -- Register the sequences passed to the constructor. They're going - -- from the array part of the parser to the hash part of field - -- [sequences] - p.sequences = { } - for i=1, #p do p:add (p[i]); p[i] = nil end - - -- FIXME: why is this commented out? - --if p.default and not is_parser(p.default) then sequence(p.default) end - return p -end -- - - -------------------------------------------------------------------------------- --- --- Expression parser generator --- -------------------------------------------------------------------------------- --- --- Expression configuration relies on three tables: [prefix], [infix] --- and [suffix]. Moreover, the primary parser can be replaced by a --- table: in this case the [primary] table will be passed to --- [gg.multisequence] to create a parser. --- --- Each of these tables is a modified multisequence parser: the --- differences with respect to regular multisequence config tables are: --- --- * the builder takes specific parameters: --- - for [prefix], it takes the result of the prefix sequence parser, --- and the prefixed expression --- - for [infix], it takes the left-hand-side expression, the results --- of the infix sequence parser, and the right-hand-side expression. --- - for [suffix], it takes the suffixed expression, and theresult --- of the suffix sequence parser. --- --- * the default field is a list, with parameters: --- - [parser] the raw parsing function --- - [transformers], as usual --- - [prec], the operator's precedence --- - [assoc] for [infix] table, the operator's associativity, which --- can be "left", "right" or "flat" (default to left) --- --- In [p], useful fields are: --- * [transformers]: as usual --- * [name]: as usual --- * [primary]: the atomic expression parser, or a multisequence config --- table (mandatory) --- * [prefix]: prefix operators config table, see above. --- * [infix]: infix operators config table, see above. --- * [suffix]: suffix operators config table, see above. --- --- After creation, these fields are added: --- * [kind] == "expr" --- * [parse] as usual --- * each table is turned into a multisequence, and therefore has an --- [add] method --- -------------------------------------------------------------------------------- -function expr (p) - make_parser ("expr", p) - - ------------------------------------------------------------------- - -- parser method. - -- In addition to the lexer, it takes an optional precedence: - -- it won't read expressions whose precedence is lower or equal - -- to [prec]. - ------------------------------------------------------------------- - function p:parse (lx, prec) - prec = prec or 0 - - ------------------------------------------------------ - -- Extract the right parser and the corresponding - -- options table, for (pre|in|suff)fix operators. - -- Options include prec, assoc, transformers. - ------------------------------------------------------ - local function get_parser_info (tab) - local p2 = tab:get (lx:is_keyword (lx:peek())) - if p2 then -- keyword-based sequence found - local function parser(lx) return raw_parse_sequence(lx, p2) end - return parser, p2 - else -- Got to use the default parser - local d = tab.default - if d then return d.parse or d.parser, d - else return false, false end - end - end - - ------------------------------------------------------ - -- Look for a prefix sequence. Multiple prefixes are - -- handled through the recursive [p.parse] call. - -- Notice the double-transform: one for the primary - -- expr, and one for the one with the prefix op. - ------------------------------------------------------ - local function handle_prefix () - local fli = lx:lineinfo_right() - local p2_func, p2 = get_parser_info (self.prefix) - local op = p2_func and p2_func (lx) - if op then -- Keyword-based sequence found - local ili = lx:lineinfo_right() -- Intermediate LineInfo - local e = p2.builder (op, self:parse (lx, p2.prec)) - local lli = lx:lineinfo_left() - return transform (transform (e, p2, ili, lli), self, fli, lli) - else -- No prefix found, get a primary expression - local e = self.primary(lx) - local lli = lx:lineinfo_left() - return transform (e, self, fli, lli) - end - end -- - - ------------------------------------------------------ - -- Look for an infix sequence+right-hand-side operand. - -- Return the whole binary expression result, - -- or false if no operator was found. - ------------------------------------------------------ - local function handle_infix (e) - local p2_func, p2 = get_parser_info (self.infix) - if not p2 then return false end - - ----------------------------------------- - -- Handle flattening operators: gather all operands - -- of the series in [list]; when a different operator - -- is found, stop, build from [list], [transform] and - -- return. - ----------------------------------------- - if (not p2.prec or p2.prec>prec) and p2.assoc=="flat" then - local fli = lx:lineinfo_right() - local pflat, list = p2, { e } - repeat - local op = p2_func(lx) - if not op then break end - table.insert (list, self:parse (lx, p2.prec)) - local _ -- We only care about checking that p2==pflat - _, p2 = get_parser_info (self.infix) - until p2 ~= pflat - local e2 = pflat.builder (list) - local lli = lx:lineinfo_left() - return transform (transform (e2, pflat, fli, lli), self, fli, lli) - - ----------------------------------------- - -- Handle regular infix operators: [e] the LHS is known, - -- just gather the operator and [e2] the RHS. - -- Result goes in [e3]. - ----------------------------------------- - elseif p2.prec and p2.prec>prec or - p2.prec==prec and p2.assoc=="right" then - local fli = e.lineinfo.first -- lx:lineinfo_right() - local op = p2_func(lx) - if not op then return false end - local e2 = self:parse (lx, p2.prec) - local e3 = p2.builder (e, op, e2) - local lli = lx:lineinfo_left() - return transform (transform (e3, p2, fli, lli), self, fli, lli) - - ----------------------------------------- - -- Check for non-associative operators, and complain if applicable. - ----------------------------------------- - elseif p2.assoc=="none" and p2.prec==prec then - parser_error (lx, "non-associative operator!") - - ----------------------------------------- - -- No infix operator suitable at that precedence - ----------------------------------------- - else return false end - - end -- - - ------------------------------------------------------ - -- Look for a suffix sequence. - -- Return the result of suffix operator on [e], - -- or false if no operator was found. - ------------------------------------------------------ - local function handle_suffix (e) - -- FIXME bad fli, must take e.lineinfo.first - local p2_func, p2 = get_parser_info (self.suffix) - if not p2 then return false end - if not p2.prec or p2.prec>=prec then - --local fli = lx:lineinfo_right() - local fli = e.lineinfo.first - local op = p2_func(lx) - if not op then return false end - local lli = lx:lineinfo_left() - e = p2.builder (e, op) - e = transform (transform (e, p2, fli, lli), self, fli, lli) - return e - end - return false - end -- - - ------------------------------------------------------ - -- Parser body: read suffix and (infix+operand) - -- extensions as long as we're able to fetch more at - -- this precedence level. - ------------------------------------------------------ - local e = handle_prefix() - repeat - local x = handle_suffix (e); e = x or e - local y = handle_infix (e); e = y or e - until not (x or y) - - -- No transform: it already happened in operators handling - return e - end -- - - ------------------------------------------------------------------- - -- Construction - ------------------------------------------------------------------- - if not p.primary then p.primary=p[1]; p[1]=nil end - for _, t in ipairs{ "primary", "prefix", "infix", "suffix" } do - if not p[t] then p[t] = { } end - if not is_parser(p[t]) then multisequence(p[t]) end - end - function p:add(...) return self.primary:add(...) end - return p -end -- - - -------------------------------------------------------------------------------- --- --- List parser generator --- -------------------------------------------------------------------------------- --- In [p], the following fields can be provided in input: --- --- * [builder]: takes list of subparser results, returns AST --- * [transformers]: as usual --- * [name]: as usual --- --- * [terminators]: list of strings representing the keywords which --- might mark the end of the list. When non-empty, the list is --- allowed to be empty. A string is treated as a single-element --- table, whose element is that string, e.g. ["do"] is the same as --- [{"do"}]. --- --- * [separators]: list of strings representing the keywords which can --- separate elements of the list. When non-empty, one of these --- keyword has to be found between each element. Lack of a separator --- indicates the end of the list. A string is treated as a --- single-element table, whose element is that string, e.g. ["do"] --- is the same as [{"do"}]. If [terminators] is empty/nil, then --- [separators] has to be non-empty. --- --- After creation, the following fields are added: --- * [parse] the parsing function lexer->AST --- * [kind] == "list" --- -------------------------------------------------------------------------------- -function list (p) - make_parser ("list", p) - - ------------------------------------------------------------------- - -- Parsing method - ------------------------------------------------------------------- - function p:parse (lx) - - ------------------------------------------------------ - -- Used to quickly check whether there's a terminator - -- or a separator immediately ahead - ------------------------------------------------------ - local function peek_is_in (keywords) - return keywords and lx:is_keyword(lx:peek(), unpack(keywords)) end - - local x = { } - local fli = lx:lineinfo_right() - - -- if there's a terminator to start with, don't bother trying - if not peek_is_in (self.terminators) then - repeat table.insert (x, self.primary (lx)) -- read one element - until - -- First reason to stop: There's a separator list specified, - -- and next token isn't one. Otherwise, consume it with [lx:next()] - self.separators and not(peek_is_in (self.separators) and lx:next()) or - -- Other reason to stop: terminator token ahead - peek_is_in (self.terminators) or - -- Last reason: end of file reached - lx:peek().tag=="Eof" - end - - local lli = lx:lineinfo_left() - - -- Apply the builder. It can be a string, or a callable value, - -- or simply nothing. - local b = self.builder - if b then - if type(b)=="string" then x.tag = b -- b is a string, use it as a tag - elseif type(b)=="function" then x=b(x) - else - local bmt = getmetatable(b) - if bmt and bmt.__call then x=b(x) end - end - end - return transform (x, self, fli, lli) - end -- - - ------------------------------------------------------------------- - -- Construction - ------------------------------------------------------------------- - if not p.primary then p.primary = p[1]; p[1] = nil end - if type(p.terminators) == "string" then p.terminators = { p.terminators } - elseif p.terminators and #p.terminators == 0 then p.terminators = nil end - if type(p.separators) == "string" then p.separators = { p.separators } - elseif p.separators and #p.separators == 0 then p.separators = nil end - - return p -end -- - - -------------------------------------------------------------------------------- --- --- Keyword-conditionned parser generator --- -------------------------------------------------------------------------------- --- --- Only apply a parser if a given keyword is found. The result of --- [gg.onkeyword] parser is the result of the subparser (modulo --- [transformers] applications). --- --- lineinfo: the keyword is *not* included in the boundaries of the --- resulting lineinfo. A review of all usages of gg.onkeyword() in the --- implementation of metalua has shown that it was the appropriate choice --- in every case. --- --- Input fields: --- --- * [name]: as usual --- --- * [transformers]: as usual --- --- * [peek]: if non-nil, the conditionning keyword is left in the lexeme --- stream instead of being consumed. --- --- * [primary]: the subparser. --- --- * [keywords]: list of strings representing triggering keywords. --- --- * Table-part entries can contain strings, and/or exactly one parser. --- Strings are put in [keywords], and the parser is put in [primary]. --- --- After the call, the following fields will be set: --- --- * [parse] the parsing method --- * [kind] == "onkeyword" --- * [primary] --- * [keywords] --- -------------------------------------------------------------------------------- -function onkeyword (p) - make_parser ("onkeyword", p) - - ------------------------------------------------------------------- - -- Parsing method - ------------------------------------------------------------------- - function p:parse(lx) - if lx:is_keyword (lx:peek(), unpack(self.keywords)) then - --local fli = lx:lineinfo_right() - if not self.peek then lx:next() end - local content = self.primary (lx) - --local lli = lx:lineinfo_left() - local fli, lli = content.lineinfo.first, content.lineinfo.last - return transform (content, p, fli, lli) - else return false end - end - - ------------------------------------------------------------------- - -- Construction - ------------------------------------------------------------------- - if not p.keywords then p.keywords = { } end - for _, x in ipairs(p) do - if type(x)=="string" then table.insert (p.keywords, x) - else assert (not p.primary and is_parser (x)); p.primary = x end - end - assert (p.primary, 'no primary parser in gg.onkeyword') - return p -end -- - - -------------------------------------------------------------------------------- --- --- Optional keyword consummer pseudo-parser generator --- -------------------------------------------------------------------------------- --- --- This doesn't return a real parser, just a function. That function parses --- one of the keywords passed as parameters, and returns it. It returns --- [false] if no matching keyword is found. --- --- Notice that tokens returned by lexer already carry lineinfo, therefore --- there's no need to add them, as done usually through transform() calls. -------------------------------------------------------------------------------- -function optkeyword (...) - local args = {...} - if type (args[1]) == "table" then - assert (#args == 1) - args = args[1] - end - for _, v in ipairs(args) do assert (type(v)=="string") end - return function (lx) - local x = lx:is_keyword (lx:peek(), unpack (args)) - if x then lx:next(); return x - else return false end - end -end - - -------------------------------------------------------------------------------- --- --- Run a parser with a special lexer --- -------------------------------------------------------------------------------- --- --- This doesn't return a real parser, just a function. --- First argument is the lexer class to be used with the parser, --- 2nd is the parser itself. --- The resulting parser returns whatever the argument parser does. --- -------------------------------------------------------------------------------- -function with_lexer(new_lexer, parser) - - ------------------------------------------------------------------- - -- Most gg functions take their parameters in a table, so it's - -- better to silently accept when with_lexer{ } is called with - -- its arguments in a list: - ------------------------------------------------------------------- - if not parser and #new_lexer==2 and type(new_lexer[1])=='table' then - return with_lexer(unpack(new_lexer)) - end - - ------------------------------------------------------------------- - -- Save the current lexer, switch it for the new one, run the parser, - -- restore the previous lexer, even if the parser caused an error. - ------------------------------------------------------------------- - return function (lx) - local old_lexer = getmetatable(lx) - lx:sync() - setmetatable(lx, new_lexer) - local status, result = pcall(parser, lx) - lx:sync() - setmetatable(lx, old_lexer) - if status then return result else error(result) end - end -end +---------------------------------------------------------------------- +-- Metalua. +-- +-- Summary: parser generator. Collection of higher order functors, +-- which allow to build and combine parsers. Relies on a lexer +-- that supports the same API as the one exposed in mll.lua. +-- +---------------------------------------------------------------------- +-- +-- Copyright (c) 2006-2008, Fabien Fleutot . +-- +-- This software is released under the MIT Licence, see licence.txt +-- for details. +-- +---------------------------------------------------------------------- + +-------------------------------------------------------------------------------- +-- +-- Exported API: +-- +-- Parser generators: +-- * [gg.sequence()] +-- * [gg.multisequence()] +-- * [gg.expr()] +-- * [gg.list()] +-- * [gg.onkeyword()] +-- * [gg.optkeyword()] +-- +-- Other functions: +-- * [gg.parse_error()] +-- * [gg.make_parser()] +-- * [gg.is_parser()] +-- +-------------------------------------------------------------------------------- + +module("gg", package.seeall) + +------------------------------------------------------------------------------- +-- parser metatable, which maps __call to method parse, and adds some +-- error tracing boilerplate. +------------------------------------------------------------------------------- +local parser_metatable = { } +function parser_metatable.__call (parser, lx, ...) + --printf ("Call parser %q of type %q", parser.name or "?", parser.kind) + if mlc.metabugs then + return parser:parse (lx, ...) + --local x = parser:parse (lx, ...) + --printf ("Result of parser %q: %s", + -- parser.name or "?", + -- _G.table.tostring(x, "nohash", 80)) + --return x + else + local li = lx:lineinfo_right() or { "?", "?", "?", "?" } + local status, ast = pcall (parser.parse, parser, lx, ...) + if status then return ast else + error (string.format ("%s\n - (l.%s, c.%s, k.%s) in parser %s", + ast:strmatch "gg.lua:%d+: (.*)" or ast, + li[1], li[2], li[3], parser.name or parser.kind)) + end + end +end + +------------------------------------------------------------------------------- +-- Turn a table into a parser, mainly by setting the metatable. +------------------------------------------------------------------------------- +function make_parser(kind, p) + p.kind = kind + if not p.transformers then p.transformers = { } end + function p.transformers:add (x) + table.insert (self, x) + end + setmetatable (p, parser_metatable) + return p +end + +------------------------------------------------------------------------------- +-- Return true iff [x] is a parser. +-- If it's a gg-generated parser, reutrn the name of its kind. +------------------------------------------------------------------------------- +function is_parser (x) + return type(x)=="function" or getmetatable(x)==parser_metatable and x.kind +end + +------------------------------------------------------------------------------- +-- Parse a sequence, without applying builder nor transformers +------------------------------------------------------------------------------- +local function raw_parse_sequence (lx, p) + local r = { } + for i=1, #p do + e=p[i] + if type(e) == "string" then + if not lx:is_keyword (lx:next(), e) then + parse_error (lx, "Keyword '%s' expected", e) end + elseif is_parser (e) then + table.insert (r, e (lx)) + else + gg.parse_error (lx,"Sequence `%s': element #%i is not a string ".. + "nor a parser: %s", + p.name, i, table.tostring(e)) + end + end + return r +end + +------------------------------------------------------------------------------- +-- Parse a multisequence, without applying multisequence transformers. +-- The sequences are completely parsed. +------------------------------------------------------------------------------- +local function raw_parse_multisequence (lx, sequence_table, default) + local seq_parser = sequence_table[lx:is_keyword(lx:peek())] + if seq_parser then return seq_parser (lx) + elseif default then return default (lx) + else return false end +end + +------------------------------------------------------------------------------- +-- Applies all transformers listed in parser on ast. +------------------------------------------------------------------------------- +local function transform (ast, parser, fli, lli) + if parser.transformers then + for _, t in ipairs (parser.transformers) do ast = t(ast) or ast end + end + if type(ast) == 'table'then + local ali = ast.lineinfo + if not ali or ali.first~=fli or ali.last~=lli then + ast.lineinfo = { first = fli, last = lli } + end + end + return ast +end + +------------------------------------------------------------------------------- +-- Generate a tracable parsing error (not implemented yet) +------------------------------------------------------------------------------- +function parse_error(lx, fmt, ...) + local li = lx:lineinfo_left() or {-1,-1,-1, ""} + local msg = string.format("line %i, char %i: "..fmt, li[1], li[2], ...) + local src = lx.src + if li[3]>0 and src then + local i, j = li[3], li[3] + while src:sub(i,i) ~= '\n' and i>=0 do i=i-1 end + while src:sub(j,j) ~= '\n' and j<=#src do j=j+1 end + local srcline = src:sub (i+1, j-1) + local idx = string.rep (" ", li[2]).."^" + msg = string.format("%s\n>>> %s\n>>> %s", msg, srcline, idx) + end + error(msg) +end + +------------------------------------------------------------------------------- +-- +-- Sequence parser generator +-- +------------------------------------------------------------------------------- +-- Input fields: +-- +-- * [builder]: how to build an AST out of sequence parts. let [x] be the list +-- of subparser results (keywords are simply omitted). [builder] can be: +-- - [nil], in which case the result of parsing is simply [x] +-- - a string, which is then put as a tag on [x] +-- - a function, which takes [x] as a parameter and returns an AST. +-- +-- * [name]: the name of the parser. Used for debug messages +-- +-- * [transformers]: a list of AST->AST functions, applied in order on ASTs +-- returned by the parser. +-- +-- * Table-part entries corresponds to keywords (strings) and subparsers +-- (function and callable objects). +-- +-- After creation, the following fields are added: +-- * [parse] the parsing function lexer->AST +-- * [kind] == "sequence" +-- * [name] is set, if it wasn't in the input. +-- +------------------------------------------------------------------------------- +function sequence (p) + make_parser ("sequence", p) + + ------------------------------------------------------------------- + -- Parsing method + ------------------------------------------------------------------- + function p:parse (lx) + -- Raw parsing: + local fli = lx:lineinfo_right() + local seq = raw_parse_sequence (lx, self) + local lli = lx:lineinfo_left() + + -- Builder application: + local builder, tb = self.builder, type (self.builder) + if tb == "string" then seq.tag = builder + elseif tb == "function" or builder and builder.__call then seq = builder(seq) + elseif builder == nil then -- nothing + else error ("Invalid builder of type "..tb.." in sequence") end + seq = transform (seq, self, fli, lli) + assert (not seq or seq.lineinfo) + return seq + end + + ------------------------------------------------------------------- + -- Construction + ------------------------------------------------------------------- + -- Try to build a proper name + if not p.name and type(p[1])=="string" then + p.name = p[1].." ..." + if type(p[#p])=="string" then p.name = p.name .. " " .. p[#p] end + else + p.name = "" + end + + return p +end -- + + +------------------------------------------------------------------------------- +-- +-- Multiple, keyword-driven, sequence parser generator +-- +------------------------------------------------------------------------------- +-- in [p], useful fields are: +-- +-- * [transformers]: as usual +-- +-- * [name]: as usual +-- +-- * Table-part entries must be sequence parsers, or tables which can +-- be turned into a sequence parser by [gg.sequence]. These +-- sequences must start with a keyword, and this initial keyword +-- must be different for each sequence. The table-part entries will +-- be removed after [gg.multisequence] returns. +-- +-- * [default]: the parser to run if the next keyword in the lexer is +-- none of the registered initial keywords. If there's no default +-- parser and no suitable initial keyword, the multisequence parser +-- simply returns [false]. +-- +-- After creation, the following fields are added: +-- +-- * [parse] the parsing function lexer->AST +-- +-- * [sequences] the table of sequences, indexed by initial keywords. +-- +-- * [add] method takes a sequence parser or a config table for +-- [gg.sequence], and adds/replaces the corresponding sequence +-- parser. If the keyword was already used, the former sequence is +-- removed and a warning is issued. +-- +-- * [get] method returns a sequence by its initial keyword +-- +-- * [kind] == "multisequence" +-- +------------------------------------------------------------------------------- +function multisequence (p) + make_parser ("multisequence", p) + + ------------------------------------------------------------------- + -- Add a sequence (might be just a config table for [gg.sequence]) + ------------------------------------------------------------------- + function p:add (s) + -- compile if necessary: + if not is_parser(s) then sequence(s) end + if type(s[1]) ~= "string" then + error "Invalid sequence for multiseq" + elseif self.sequences[s[1]] then + printf (" *** Warning: keyword %q overloaded in multisequence ***", s[1]) + end + self.sequences[s[1]] = s + end -- + + ------------------------------------------------------------------- + -- Get the sequence starting with this keyword. [kw :: string] + ------------------------------------------------------------------- + function p:get (kw) return self.sequences [kw] end + + ------------------------------------------------------------------- + -- Remove the sequence starting with keyword [kw :: string] + ------------------------------------------------------------------- + function p:del (kw) + if not self.sequences[kw] then + printf("*** Warning: trying to delete sequence starting ".. + "with %q from a multisequence having no such ".. + "entry ***", kw) end + local removed = self.sequences[kw] + self.sequences[kw] = nil + return removed + end + + ------------------------------------------------------------------- + -- Parsing method + ------------------------------------------------------------------- + function p:parse (lx) + local fli = lx:lineinfo_right() + local x = raw_parse_multisequence (lx, self.sequences, self.default) + local lli = lx:lineinfo_left() + return transform (x, self, fli, lli) + end + + ------------------------------------------------------------------- + -- Construction + ------------------------------------------------------------------- + -- Register the sequences passed to the constructor. They're going + -- from the array part of the parser to the hash part of field + -- [sequences] + p.sequences = { } + for i=1, #p do p:add (p[i]); p[i] = nil end + + -- FIXME: why is this commented out? + --if p.default and not is_parser(p.default) then sequence(p.default) end + return p +end -- + + +------------------------------------------------------------------------------- +-- +-- Expression parser generator +-- +------------------------------------------------------------------------------- +-- +-- Expression configuration relies on three tables: [prefix], [infix] +-- and [suffix]. Moreover, the primary parser can be replaced by a +-- table: in this case the [primary] table will be passed to +-- [gg.multisequence] to create a parser. +-- +-- Each of these tables is a modified multisequence parser: the +-- differences with respect to regular multisequence config tables are: +-- +-- * the builder takes specific parameters: +-- - for [prefix], it takes the result of the prefix sequence parser, +-- and the prefixed expression +-- - for [infix], it takes the left-hand-side expression, the results +-- of the infix sequence parser, and the right-hand-side expression. +-- - for [suffix], it takes the suffixed expression, and theresult +-- of the suffix sequence parser. +-- +-- * the default field is a list, with parameters: +-- - [parser] the raw parsing function +-- - [transformers], as usual +-- - [prec], the operator's precedence +-- - [assoc] for [infix] table, the operator's associativity, which +-- can be "left", "right" or "flat" (default to left) +-- +-- In [p], useful fields are: +-- * [transformers]: as usual +-- * [name]: as usual +-- * [primary]: the atomic expression parser, or a multisequence config +-- table (mandatory) +-- * [prefix]: prefix operators config table, see above. +-- * [infix]: infix operators config table, see above. +-- * [suffix]: suffix operators config table, see above. +-- +-- After creation, these fields are added: +-- * [kind] == "expr" +-- * [parse] as usual +-- * each table is turned into a multisequence, and therefore has an +-- [add] method +-- +------------------------------------------------------------------------------- +function expr (p) + make_parser ("expr", p) + + ------------------------------------------------------------------- + -- parser method. + -- In addition to the lexer, it takes an optional precedence: + -- it won't read expressions whose precedence is lower or equal + -- to [prec]. + ------------------------------------------------------------------- + function p:parse (lx, prec) + prec = prec or 0 + + ------------------------------------------------------ + -- Extract the right parser and the corresponding + -- options table, for (pre|in|suff)fix operators. + -- Options include prec, assoc, transformers. + ------------------------------------------------------ + local function get_parser_info (tab) + local p2 = tab:get (lx:is_keyword (lx:peek())) + if p2 then -- keyword-based sequence found + local function parser(lx) return raw_parse_sequence(lx, p2) end + return parser, p2 + else -- Got to use the default parser + local d = tab.default + if d then return d.parse or d.parser, d + else return false, false end + end + end + + ------------------------------------------------------ + -- Look for a prefix sequence. Multiple prefixes are + -- handled through the recursive [p.parse] call. + -- Notice the double-transform: one for the primary + -- expr, and one for the one with the prefix op. + ------------------------------------------------------ + local function handle_prefix () + local fli = lx:lineinfo_right() + local p2_func, p2 = get_parser_info (self.prefix) + local op = p2_func and p2_func (lx) + if op then -- Keyword-based sequence found + local ili = lx:lineinfo_right() -- Intermediate LineInfo + local e = p2.builder (op, self:parse (lx, p2.prec)) + local lli = lx:lineinfo_left() + return transform (transform (e, p2, ili, lli), self, fli, lli) + else -- No prefix found, get a primary expression + local e = self.primary(lx) + local lli = lx:lineinfo_left() + return transform (e, self, fli, lli) + end + end -- + + ------------------------------------------------------ + -- Look for an infix sequence+right-hand-side operand. + -- Return the whole binary expression result, + -- or false if no operator was found. + ------------------------------------------------------ + local function handle_infix (e) + local p2_func, p2 = get_parser_info (self.infix) + if not p2 then return false end + + ----------------------------------------- + -- Handle flattening operators: gather all operands + -- of the series in [list]; when a different operator + -- is found, stop, build from [list], [transform] and + -- return. + ----------------------------------------- + if (not p2.prec or p2.prec>prec) and p2.assoc=="flat" then + local fli = lx:lineinfo_right() + local pflat, list = p2, { e } + repeat + local op = p2_func(lx) + if not op then break end + table.insert (list, self:parse (lx, p2.prec)) + local _ -- We only care about checking that p2==pflat + _, p2 = get_parser_info (self.infix) + until p2 ~= pflat + local e2 = pflat.builder (list) + local lli = lx:lineinfo_left() + return transform (transform (e2, pflat, fli, lli), self, fli, lli) + + ----------------------------------------- + -- Handle regular infix operators: [e] the LHS is known, + -- just gather the operator and [e2] the RHS. + -- Result goes in [e3]. + ----------------------------------------- + elseif p2.prec and p2.prec>prec or + p2.prec==prec and p2.assoc=="right" then + local fli = e.lineinfo.first -- lx:lineinfo_right() + local op = p2_func(lx) + if not op then return false end + local e2 = self:parse (lx, p2.prec) + local e3 = p2.builder (e, op, e2) + local lli = lx:lineinfo_left() + return transform (transform (e3, p2, fli, lli), self, fli, lli) + + ----------------------------------------- + -- Check for non-associative operators, and complain if applicable. + ----------------------------------------- + elseif p2.assoc=="none" and p2.prec==prec then + parser_error (lx, "non-associative operator!") + + ----------------------------------------- + -- No infix operator suitable at that precedence + ----------------------------------------- + else return false end + + end -- + + ------------------------------------------------------ + -- Look for a suffix sequence. + -- Return the result of suffix operator on [e], + -- or false if no operator was found. + ------------------------------------------------------ + local function handle_suffix (e) + -- FIXME bad fli, must take e.lineinfo.first + local p2_func, p2 = get_parser_info (self.suffix) + if not p2 then return false end + if not p2.prec or p2.prec>=prec then + --local fli = lx:lineinfo_right() + local fli = e.lineinfo.first + local op = p2_func(lx) + if not op then return false end + local lli = lx:lineinfo_left() + e = p2.builder (e, op) + e = transform (transform (e, p2, fli, lli), self, fli, lli) + return e + end + return false + end -- + + ------------------------------------------------------ + -- Parser body: read suffix and (infix+operand) + -- extensions as long as we're able to fetch more at + -- this precedence level. + ------------------------------------------------------ + local e = handle_prefix() + repeat + local x = handle_suffix (e); e = x or e + local y = handle_infix (e); e = y or e + until not (x or y) + + -- No transform: it already happened in operators handling + return e + end -- + + ------------------------------------------------------------------- + -- Construction + ------------------------------------------------------------------- + if not p.primary then p.primary=p[1]; p[1]=nil end + for _, t in ipairs{ "primary", "prefix", "infix", "suffix" } do + if not p[t] then p[t] = { } end + if not is_parser(p[t]) then multisequence(p[t]) end + end + function p:add(...) return self.primary:add(...) end + return p +end -- + + +------------------------------------------------------------------------------- +-- +-- List parser generator +-- +------------------------------------------------------------------------------- +-- In [p], the following fields can be provided in input: +-- +-- * [builder]: takes list of subparser results, returns AST +-- * [transformers]: as usual +-- * [name]: as usual +-- +-- * [terminators]: list of strings representing the keywords which +-- might mark the end of the list. When non-empty, the list is +-- allowed to be empty. A string is treated as a single-element +-- table, whose element is that string, e.g. ["do"] is the same as +-- [{"do"}]. +-- +-- * [separators]: list of strings representing the keywords which can +-- separate elements of the list. When non-empty, one of these +-- keyword has to be found between each element. Lack of a separator +-- indicates the end of the list. A string is treated as a +-- single-element table, whose element is that string, e.g. ["do"] +-- is the same as [{"do"}]. If [terminators] is empty/nil, then +-- [separators] has to be non-empty. +-- +-- After creation, the following fields are added: +-- * [parse] the parsing function lexer->AST +-- * [kind] == "list" +-- +------------------------------------------------------------------------------- +function list (p) + make_parser ("list", p) + + ------------------------------------------------------------------- + -- Parsing method + ------------------------------------------------------------------- + function p:parse (lx) + + ------------------------------------------------------ + -- Used to quickly check whether there's a terminator + -- or a separator immediately ahead + ------------------------------------------------------ + local function peek_is_in (keywords) + return keywords and lx:is_keyword(lx:peek(), unpack(keywords)) end + + local x = { } + local fli = lx:lineinfo_right() + + -- if there's a terminator to start with, don't bother trying + if not peek_is_in (self.terminators) then + repeat table.insert (x, self.primary (lx)) -- read one element + until + -- First reason to stop: There's a separator list specified, + -- and next token isn't one. Otherwise, consume it with [lx:next()] + self.separators and not(peek_is_in (self.separators) and lx:next()) or + -- Other reason to stop: terminator token ahead + peek_is_in (self.terminators) or + -- Last reason: end of file reached + lx:peek().tag=="Eof" + end + + local lli = lx:lineinfo_left() + + -- Apply the builder. It can be a string, or a callable value, + -- or simply nothing. + local b = self.builder + if b then + if type(b)=="string" then x.tag = b -- b is a string, use it as a tag + elseif type(b)=="function" then x=b(x) + else + local bmt = getmetatable(b) + if bmt and bmt.__call then x=b(x) end + end + end + return transform (x, self, fli, lli) + end -- + + ------------------------------------------------------------------- + -- Construction + ------------------------------------------------------------------- + if not p.primary then p.primary = p[1]; p[1] = nil end + if type(p.terminators) == "string" then p.terminators = { p.terminators } + elseif p.terminators and #p.terminators == 0 then p.terminators = nil end + if type(p.separators) == "string" then p.separators = { p.separators } + elseif p.separators and #p.separators == 0 then p.separators = nil end + + return p +end -- + + +------------------------------------------------------------------------------- +-- +-- Keyword-conditionned parser generator +-- +------------------------------------------------------------------------------- +-- +-- Only apply a parser if a given keyword is found. The result of +-- [gg.onkeyword] parser is the result of the subparser (modulo +-- [transformers] applications). +-- +-- lineinfo: the keyword is *not* included in the boundaries of the +-- resulting lineinfo. A review of all usages of gg.onkeyword() in the +-- implementation of metalua has shown that it was the appropriate choice +-- in every case. +-- +-- Input fields: +-- +-- * [name]: as usual +-- +-- * [transformers]: as usual +-- +-- * [peek]: if non-nil, the conditionning keyword is left in the lexeme +-- stream instead of being consumed. +-- +-- * [primary]: the subparser. +-- +-- * [keywords]: list of strings representing triggering keywords. +-- +-- * Table-part entries can contain strings, and/or exactly one parser. +-- Strings are put in [keywords], and the parser is put in [primary]. +-- +-- After the call, the following fields will be set: +-- +-- * [parse] the parsing method +-- * [kind] == "onkeyword" +-- * [primary] +-- * [keywords] +-- +------------------------------------------------------------------------------- +function onkeyword (p) + make_parser ("onkeyword", p) + + ------------------------------------------------------------------- + -- Parsing method + ------------------------------------------------------------------- + function p:parse(lx) + if lx:is_keyword (lx:peek(), unpack(self.keywords)) then + --local fli = lx:lineinfo_right() + if not self.peek then lx:next() end + local content = self.primary (lx) + --local lli = lx:lineinfo_left() + local fli, lli = content.lineinfo.first, content.lineinfo.last + return transform (content, p, fli, lli) + else return false end + end + + ------------------------------------------------------------------- + -- Construction + ------------------------------------------------------------------- + if not p.keywords then p.keywords = { } end + for _, x in ipairs(p) do + if type(x)=="string" then table.insert (p.keywords, x) + else assert (not p.primary and is_parser (x)); p.primary = x end + end + assert (p.primary, 'no primary parser in gg.onkeyword') + return p +end -- + + +------------------------------------------------------------------------------- +-- +-- Optional keyword consummer pseudo-parser generator +-- +------------------------------------------------------------------------------- +-- +-- This doesn't return a real parser, just a function. That function parses +-- one of the keywords passed as parameters, and returns it. It returns +-- [false] if no matching keyword is found. +-- +-- Notice that tokens returned by lexer already carry lineinfo, therefore +-- there's no need to add them, as done usually through transform() calls. +------------------------------------------------------------------------------- +function optkeyword (...) + local args = {...} + if type (args[1]) == "table" then + assert (#args == 1) + args = args[1] + end + for _, v in ipairs(args) do assert (type(v)=="string") end + return function (lx) + local x = lx:is_keyword (lx:peek(), unpack (args)) + if x then lx:next(); return x + else return false end + end +end + + +------------------------------------------------------------------------------- +-- +-- Run a parser with a special lexer +-- +------------------------------------------------------------------------------- +-- +-- This doesn't return a real parser, just a function. +-- First argument is the lexer class to be used with the parser, +-- 2nd is the parser itself. +-- The resulting parser returns whatever the argument parser does. +-- +------------------------------------------------------------------------------- +function with_lexer(new_lexer, parser) + + ------------------------------------------------------------------- + -- Most gg functions take their parameters in a table, so it's + -- better to silently accept when with_lexer{ } is called with + -- its arguments in a list: + ------------------------------------------------------------------- + if not parser and #new_lexer==2 and type(new_lexer[1])=='table' then + return with_lexer(unpack(new_lexer)) + end + + ------------------------------------------------------------------- + -- Save the current lexer, switch it for the new one, run the parser, + -- restore the previous lexer, even if the parser caused an error. + ------------------------------------------------------------------- + return function (lx) + local old_lexer = getmetatable(lx) + lx:sync() + setmetatable(lx, new_lexer) + local status, result = pcall(parser, lx) + lx:sync() + setmetatable(lx, old_lexer) + if status then return result else error(result) end + end +end diff --git a/src/samples/typecheck.mlua b/src/samples/typecheck.mlua index 631f672..e9d6b75 100644 --- a/src/samples/typecheck.mlua +++ b/src/samples/typecheck.mlua @@ -1,111 +1,111 @@ --- static partial checkings for Lua. --- --- This program checks some metalua or plain lua source code for most common --- mistakes. Its design focuses on the ability to check plain lua code: there is --- no need to load any extension in the module. --- --- The current checkings include: --- --- * Listing all free variables, and make sure they are declared. --- * For free vars known as modules, check that indexings in them are also --- declared. --- * When the type of something is known, do some basic type checkings. These --- checkings are by no means exhaustive; however, when a parameter function --- is constant or statically declared, it's checked. - - - ---[[ -Type grammar: - -t ::= -| t and t -| t or t -| function (t, ...) return t, ... end -| { (k=t)... } -| table(t, t) -| string -| number -| integer -| boolean -| userdata -| nil -| multi(t, ...) -| _ - ---]] - - -match function get_type -| `Number{...} -> return +{number} -| `String{...} -> return +{string} -| `True|`False -> return +{boolean} -| `Nil -> return +{nil} -| `Dots -> return +{_} -| `Stat{_,v} -> return get_type(v) -| `Paren{t} -> return get_one_type(t) -| `Call{f, ...} -> - local ftype = get_type(f) - match ftype with - | `Function{ _, {`Return{result}} } -> return get_type(result) - | `Function{ _, {`Return{...} == results} } -> - local r2 = +{ multi() } - for r in ivalues(results) table.insert(r2, get_type(r)) end - return r2 - | `And{...} -> return +{_} -- not implemented - | `Or{ a, b } -> match get_one_type(a), get_one_type(b) with - | `Function{...}==f1, `Function{...}==f2 -> - return `Op{ 'or', get_type(`Call{f1}), get_type(`Call{f2})} - | `Function{...}==f, _ | _, `Function{...}==f -> - return get_type(`Call{f}) - | _ -> return +{_} - end -| `Invoke{o, m, ... } == x -> return get_type(`Call{`Index{o, m}}) -| `Op{...}==o -> return get_op_type(o) -| `Table{...}==t -> - local r = `Table{ } - for x in ivalues(t) do - match x with - | `Pair{ `String{...}==k, v } -> table.insert(r, `Pair{k, get_one_type(v)}) - | t -> table.insert(r, get_one_type(t)) - end - end - return r -| `Function{...}==f -> -| `Id{v} -> -| `Index{t, k} -> match get_one_type(t), get_one_type(k) with - | `Call{`Id 'table', tk, tv }, _ -> return tv - | `Table{...}==tt, `Id 'string' -> - - - -local types_rt = require 'extension.types' - - - - -function check_function(f, term) - match get_type(term) with - | `Function{ params, {`Return{...} == results}}, args -> - | `And{ a, b }, args -> - check_function(a, args) - check_function(b, args) - | `Or{ a, b }, args -> - if not pcall(check_function, a, args) then check_function(b, args) end - | `Id '_' -> -- pass - | _ -> error ("Call to a non-function") - end -end - -function check_index(a, b, term) - match get_type(term) with - | `Table{} - -match function cfg.id.up -| `Call{ f, ... } == x -> check_function (f, x) -| `Index{ a, b } == x -> check_index (a, b, x) -end - - --- List free vars +-- static partial checkings for Lua. +-- +-- This program checks some metalua or plain lua source code for most common +-- mistakes. Its design focuses on the ability to check plain lua code: there is +-- no need to load any extension in the module. +-- +-- The current checkings include: +-- +-- * Listing all free variables, and make sure they are declared. +-- * For free vars known as modules, check that indexings in them are also +-- declared. +-- * When the type of something is known, do some basic type checkings. These +-- checkings are by no means exhaustive; however, when a parameter function +-- is constant or statically declared, it's checked. + + + +--[[ +Type grammar: + +t ::= +| t and t +| t or t +| function (t, ...) return t, ... end +| { (k=t)... } +| table(t, t) +| string +| number +| integer +| boolean +| userdata +| nil +| multi(t, ...) +| _ + +--]] + + +match function get_type +| `Number{...} -> return +{number} +| `String{...} -> return +{string} +| `True|`False -> return +{boolean} +| `Nil -> return +{nil} +| `Dots -> return +{_} +| `Stat{_,v} -> return get_type(v) +| `Paren{t} -> return get_one_type(t) +| `Call{f, ...} -> + local ftype = get_type(f) + match ftype with + | `Function{ _, {`Return{result}} } -> return get_type(result) + | `Function{ _, {`Return{...} == results} } -> + local r2 = +{ multi() } + for r in ivalues(results) table.insert(r2, get_type(r)) end + return r2 + | `And{...} -> return +{_} -- not implemented + | `Or{ a, b } -> match get_one_type(a), get_one_type(b) with + | `Function{...}==f1, `Function{...}==f2 -> + return `Op{ 'or', get_type(`Call{f1}), get_type(`Call{f2})} + | `Function{...}==f, _ | _, `Function{...}==f -> + return get_type(`Call{f}) + | _ -> return +{_} + end +| `Invoke{o, m, ... } == x -> return get_type(`Call{`Index{o, m}}) +| `Op{...}==o -> return get_op_type(o) +| `Table{...}==t -> + local r = `Table{ } + for x in ivalues(t) do + match x with + | `Pair{ `String{...}==k, v } -> table.insert(r, `Pair{k, get_one_type(v)}) + | t -> table.insert(r, get_one_type(t)) + end + end + return r +| `Function{...}==f -> +| `Id{v} -> +| `Index{t, k} -> match get_one_type(t), get_one_type(k) with + | `Call{`Id 'table', tk, tv }, _ -> return tv + | `Table{...}==tt, `Id 'string' -> + + + +local types_rt = require 'extension.types' + + + + +function check_function(f, term) + match get_type(term) with + | `Function{ params, {`Return{...} == results}}, args -> + | `And{ a, b }, args -> + check_function(a, args) + check_function(b, args) + | `Or{ a, b }, args -> + if not pcall(check_function, a, args) then check_function(b, args) end + | `Id '_' -> -- pass + | _ -> error ("Call to a non-function") + end +end + +function check_index(a, b, term) + match get_type(term) with + | `Table{} + +match function cfg.id.up +| `Call{ f, ... } == x -> check_function (f, x) +| `Index{ a, b } == x -> check_index (a, b, x) +end + + +-- List free vars cfg.id. \ No newline at end of file -- 2.44.0