1 ----------------------------------------------------------------------
2 -- Metalua samples: $Id$
4 -- Summary: Structural pattern matching for metalua ADT.
6 ----------------------------------------------------------------------
8 -- Copyright (c) 2006-2008, Fabien Fleutot <metalua@gmail.com>.
10 -- This software is released under the MIT Licence, see licence.txt
13 --------------------------------------------------------------------------------
17 -- * term_seq: the tested stuff, a sequence of terms
18 -- * pattern_element: might match one term of a term seq. Represented
19 -- as expression ASTs.
20 -- * pattern_seq: might match a term_seq
21 -- * pattern_group: several pattern seqs, one of them might match
23 -- * case: pattern_group * guard option * block
24 -- * match_statement: tested term_seq * case list
26 -- Hence a complete match statement is a:
28 -- { list(expr), list{ list(list(expr)), expr or false, block } }
30 -- Implementation hints
31 -- ====================
33 -- The implementation is made as modular as possible, so that parts
34 -- can be reused in other extensions. The priviledged way to share
35 -- contextual information across functions is through the 'cfg' table
36 -- argument. Its fields include:
38 -- * code: code generated from pattern. A pattern_(element|seq|group)
39 -- is compiled as a sequence of instructions which will jump to
40 -- label [cfg.on_failure] if the tested term doesn't match.
42 -- * on_failure: name of the label where the code will jump if the
43 -- pattern doesn't match
45 -- * locals: names of local variables used by the pattern. This
46 -- includes bound variables, and temporary variables used to
47 -- destructure tables. Names are stored as keys of the table,
48 -- values are meaningless.
50 -- * after_success: label where the code must jump after a pattern
51 -- succeeded to capture a term, and the guard suceeded if there is
52 -- any, and the conditional block has run.
54 -- * ntmp: number of temporary variables used to destructurate table
55 -- in the current case.
57 -- Code generation is performed by acc_xxx() functions, which accumulate
60 -- * acc_test(test, cfg) will generate a jump to cfg.on_failure
61 -- *when the test returns TRUE*
63 -- * acc_stat accumulates a statement
65 -- * acc_assign accumulate an assignment statement, and makes sure that
66 -- the LHS variable the registered as local in cfg.locals.
68 ----------------------------------------------------------------------
70 -- TODO: hygiene wrt type()
71 -- TODO: cfg.ntmp isn't reset as often as it could. I'm not even sure
72 -- the corresponding locals are declared.
74 module ('spmatch', package.seeall)
76 ----------------------------------------------------------------------
77 -- This would have been best done through library 'metalua.walk',
78 -- but walk depends on match, so we have to break the dependency.
79 -- It replaces all instances of `...' in `ast' with `term', unless
80 -- it appears in a function.
81 ----------------------------------------------------------------------
82 function replace_dots (ast, term)
83 local function rec (x)
84 if type(x) == 'table' then
86 if term=='ambiguous' then
87 error ("You can't use `...' on the right of a match case when it appears "..
88 "more than once on the left")
92 elseif x.tag=='Function' then return
93 else for y in ivalues (x) do rec (y) end end
99 tmpvar_base = mlp.gensym 'submatch.' [1]
100 function next_tmpvar(cfg)
101 assert (cfg.ntmp, "No cfg.ntmp imbrication level in the match compiler")
102 cfg.ntmp = cfg.ntmp+1
103 return `Id{ tmpvar_base .. cfg.ntmp }
107 acc_stat = |x,cfg| table.insert (cfg.code, x)
108 acc_test = |x,cfg| acc_stat(+{stat: if -{x} then -{`Goto{cfg.on_failure}} end}, cfg)
109 -- lhs :: `Id{ string }
111 function acc_assign (lhs, rhs, cfg)
112 assert(lhs.tag=='Id')
113 cfg.locals[lhs[1]] = true
114 acc_stat (`Set{ {lhs}, {rhs} }, cfg)
117 literal_tags = table.transpose{ 'String', 'Number', 'True', 'False', 'Nil' }
119 -- pattern :: `Id{ string }
121 function id_pattern_element_builder (pattern, term, cfg)
122 assert (pattern.tag == "Id")
123 if pattern[1] == "_" then
124 -- "_" is used as a dummy var ==> no assignment, no == checking
126 elseif cfg.locals[pattern[1]] then
127 -- This var is already bound ==> test for equality
128 acc_test (+{ -{term} ~= -{pattern} }, cfg)
130 -- Free var ==> bind it, and remember it for latter linearity checking
131 acc_assign (pattern, term, cfg)
132 cfg.locals[pattern[1]] = true
136 -- Concatenate code in [cfg.code], that will jump to label
137 -- [cfg.on_failure] if [pattern] doesn't match [term]. [pattern]
138 -- should be an identifier, or at least cheap to compute and
139 -- side-effects free.
141 -- pattern :: pattern_element
143 function pattern_element_builder (pattern, term, cfg)
144 if literal_tags[pattern.tag] then
145 acc_test (+{ -{term} ~= -{pattern} }, cfg)
146 elseif "Id" == pattern.tag then
147 id_pattern_element_builder (pattern, term, cfg)
148 elseif "Op" == pattern.tag and "div" == pattern[1] then
149 regexp_pattern_element_builder (pattern, term, cfg)
150 elseif "Op" == pattern.tag and "eq" == pattern[1] then
151 eq_pattern_element_builder (pattern, term, cfg)
152 elseif "Table" == pattern.tag then
153 table_pattern_element_builder (pattern, term, cfg)
155 error ("Invalid pattern: "..table.tostring(pattern, "nohash"))
159 function eq_pattern_element_builder (pattern, term, cfg)
160 local _, pat1, pat2 = unpack (pattern)
161 local ntmp_save = cfg.ntmp
162 pattern_element_builder (pat1, term, cfg)
164 pattern_element_builder (pat2, term, cfg)
167 -- pattern :: `Op{ 'div', string, list{`Id string} or `Id{ string }}
169 function regexp_pattern_element_builder (pattern, term, cfg)
170 local op, regexp, sub_pattern = unpack(pattern)
173 assert (op=='div', "Don't know what to do with that op in a pattern")
174 assert (regexp.tag=="String",
175 "Left hand side operand for '/' in a pattern must be "..
176 "a literal string representing a regular expression")
177 assert (sub_pattern.tag=="Table",
178 "Right hand side operand for '/' in a pattern must be "..
179 "an identifier or a list of identifiers")
180 for x in ivalues(sub_pattern) do
181 assert (x.tag=="Id" or x.tag=='Dots',
182 "Right hand side operand for '/' in a pattern must be "..
183 "a list of identifiers")
186 -- Regexp patterns can only match strings
187 acc_test (+{ type(-{term}) ~= 'string' }, cfg)
188 -- put all captures in a list
189 local capt_list = +{ { string.strmatch(-{term}, -{regexp}) } }
190 -- save them in a var_n for recursive decomposition
191 local v2 = next_tmpvar(cfg)
192 acc_stat (+{stat: local -{v2} = -{capt_list} }, cfg)
193 -- was capture successful?
194 acc_test (+{ not next (-{v2}) }, cfg)
195 pattern_element_builder (sub_pattern, v2, cfg)
198 -- pattern :: pattern and `Table{ }
200 function table_pattern_element_builder (pattern, term, cfg)
201 local seen_dots, len = false, 0
202 acc_test (+{ type( -{term} ) ~= "table" }, cfg)
203 for i = 1, #pattern do
204 local key, sub_pattern
205 if pattern[i].tag=="Pair" then -- Explicit key/value pair
206 key, sub_pattern = unpack (pattern[i])
207 assert (literal_tags[key.tag], "Invalid key")
209 len, key, sub_pattern = len+1, `Number{ len+1 }, pattern[i]
212 -- '...' can only appear in final position
213 -- Could be fixed actually...
214 assert (not seen_dots, "Wrongly placed `...' ")
216 if sub_pattern.tag == "Id" then
217 -- Optimization: save a useless [ v(n+1)=v(n).key ]
218 id_pattern_element_builder (sub_pattern, `Index{ term, key }, cfg)
219 if sub_pattern[1] ~= "_" then
220 acc_test (+{ -{sub_pattern} == nil }, cfg)
222 elseif sub_pattern.tag == "Dots" then
223 -- Remember where the capture is, and thatt arity checking shouldn't occur
226 -- Business as usual:
227 local v2 = next_tmpvar(cfg)
228 acc_assign (v2, `Index{ term, key }, cfg)
229 pattern_element_builder (sub_pattern, v2, cfg)
230 -- TODO: restore ntmp?
233 if seen_dots then -- remember how to retrieve `...'
234 -- FIXME: check, but there might be cases where the variable -{term}
235 -- will be overridden in contrieved tables.
236 -- ==> save it now, and clean the setting statement if unused
237 if cfg.dots_replacement then cfg.dots_replacement = 'ambiguous'
238 else cfg.dots_replacement = +{ select (-{`Number{len}}, unpack(-{term})) } end
240 acc_test (+{ #-{term} ~= -{`Number{len}} }, cfg)
244 -- Jumps to [cfg.on_faliure] if pattern_seq doesn't match
246 function pattern_seq_builder (pattern_seq, term_seq, cfg)
247 if #pattern_seq ~= #term_seq then error ("Bad seq arity") end
248 cfg.locals = { } -- reset bound variables between alternatives
249 for i=1, #pattern_seq do
250 cfg.ntmp = 1 -- reset the tmp var generator
251 pattern_element_builder(pattern_seq[i], term_seq[i], cfg)
255 --------------------------------------------------
257 -- pattern_seq_builder_i:
258 -- * on failure, go to on_failure_i
259 -- * on success, go to on_success
262 -- goto after_success
263 -- label on_failure_i
264 --------------------------------------------------
265 function case_builder (case, term_seq, cfg)
266 local patterns_group, guard, block = unpack(case)
267 local on_success = mlp.gensym 'on_success' [1]
268 for i = 1, #patterns_group do
269 local pattern_seq = patterns_group[i]
270 cfg.on_failure = mlp.gensym 'match_fail' [1]
271 cfg.dots_replacement = false
272 pattern_seq_builder (pattern_seq, term_seq, cfg)
273 if i<#patterns_group then
274 acc_stat (`Goto{on_success}, cfg)
275 acc_stat (`Label{cfg.on_failure}, cfg)
278 acc_stat (`Label{on_success}, cfg)
279 if guard then acc_test (+{not -{guard}}, cfg) end
280 if cfg.dots_replacement then
281 eprintf ("Dots replacement required in a match")
282 replace_dots (block, cfg.dots_replacement)
285 acc_stat (block, cfg)
286 acc_stat (`Goto{cfg.after_success}, cfg)
287 acc_stat (`Label{cfg.on_failure}, cfg)
290 function match_builder (x)
291 local term_seq, cases = unpack(x)
294 after_success = mlp.gensym "_after_success" }
298 -- Make sure that all tested terms are variables or literals
299 for i=1, #term_seq do
300 local t = term_seq[i]
301 -- Capture problem: the following would compile wrongly:
302 -- `match x with x -> end'
303 -- Temporary workaround: suppress the condition, so that
304 -- all external variables are copied into unique names.
305 --if t.tag ~= 'Id' and not literal_tags[t.tag] then
306 local v = mlp.gensym 'v'
307 if not match_locals then match_locals = `Local{ {v}, {t} } else
308 table.insert(match_locals[1], v)
309 table.insert(match_locals[2], t)
315 if match_locals then acc_stat(match_locals, cfg) end
319 after_success = cfg.after_success,
321 -- locals = { } -- unnecessary, done by pattern_seq_builder
323 case_builder (cases[i], term_seq, case_cfg)
324 if next (case_cfg.locals) then
325 local case_locals = { }
326 table.insert (case_cfg.code, 1, `Local{ case_locals, { } })
327 for v in keys (case_cfg.locals) do
328 table.insert (case_locals, `Id{ v })
331 acc_stat(case_cfg.code, cfg)
333 acc_stat(+{error 'mismatch'}, cfg)
334 acc_stat(`Label{cfg.after_success}, cfg)
338 ----------------------------------------------------------------------
339 -- Syntactical front-end
340 ----------------------------------------------------------------------
342 mlp.lexer:add{ "match", "with", "->" }
343 mlp.block.terminators:add "|"
345 match_cases_list_parser = gg.list{ name = "match cases list",
346 gg.sequence{ name = "match case",
347 gg.list{ name = "match case patterns list",
348 primary = mlp.expr_list,
350 terminators = { "->", "if" } },
351 gg.onkeyword{ "if", mlp.expr, consume = true },
355 terminators = "end" }
357 mlp.stat:add{ name = "match statement",
360 "with", gg.optkeyword "|",
361 match_cases_list_parser,
363 builder = |x| match_builder{ x[1], x[3] } }