long_string = "^%[(=*)%[\n?(.-)%]%1%]()",
number_mantissa = {
"^%d+%.?%d*()",
- "^%d*%d%.%d+()" },
+ "^%d*%.%d+()" },
number_exponant = "^[eE][%+%-]?%d+()",
+ number_hex = "^0[xX]%x+()",
word = "^([%a_][%w_]*)()"
}
-
-
----------------------------------------------------------------------
-- unescape a whole string, applying [unesc_digits] and
-- [unesc_letter] as many times as required.
-- Turn the digits of an escape sequence into the corresponding
-- character, e.g. [unesc_digits("123") == string.char(123)].
local function unesc_digits (x)
+ if x:sub(1,1)=="\\" then return x end -- Hack to parse correctly "\\123"
local k, j, i = x:reverse():byte(1, 3)
local z = _G.string.byte "0"
+ local code = (k or z) + 10*(j or z) + 100*(i or z) - 111*z
+ if code > 255 then
+ error ("Illegal escape sequence '\\"..x.."' in string: ASCII codes must be in [0..255]")
+ end
return _G.string.char ((k or z) + 10*(j or z) + 100*(i or z) - 111*z)
end
local t = {
a = "\a", b = "\b", f = "\f",
n = "\n", r = "\r", t = "\t", v = "\v",
- ["\\"] = "\\", ["'"] = "'", ['"'] = '"' }
- return t[x] or error("Unknown escape sequence \\"..x)
+ ["\\"] = "\\", ["'"] = "'", ['"'] = '"', ["\n"] = "\n" }
+ return t[x] or error([[Unknown escape sequence '\]]..x..[[']])
end
return s
- :gsub ("\\([0-9]+)", unesc_digits)
- :gsub ("\\(.)",unesc_letter)
+ :gsub ("\\(%D)",unesc_letter)
+ :gsub ("\\(\\?[0-9][0-9]?[0-9]?)", unesc_digits)
end
lexer.extractors = {
-- __tostring = function(a)
-- return string.format ("`%s{'%s'}",a.tag, a[1])
-- end
- }
+}
+
+lexer.lineinfo_metatable = { }
+--[[
+-- The presence of this function prevents serialization by Pluto,
+-- I can't figure out why :(
+function lexer.lineinfo_metatable:__tostring()
+ local txt = string.format("%s:%i(%i,%i)", self[4], self[3], self[1], self[2])
+ if self.comments then
+ acc = { }
+ for comment in ivalues(self.comments) do
+ local content, loc1, loc2, kind = unpack(comment)
+ table.insert (acc, string.format ("%s@%i..%i:%q", kind, loc1, loc2, content))
+ end
+ txt = txt.."["..table.concat(acc,"; ").."]"
+ end
+ return txt
+end
+--]]
----------------------------------------------------------------------
-- Really extract next token fron the raw string
-- Put line info, comments and metatable arount the tag and content
-- provided by extractors, thus returning a complete lexer token.
+ -- first_line: line # at the beginning of token
+ -- first_column_offset: char # of the last '\n' before beginning of token
+ -- i: scans from beginning of prefix spaces/comments to end of token.
local function build_token (tag, content)
assert (tag and content)
- local i, first_line, first_column_offset =
- previous_i, self.line, self.column_offset
+ local i, first_line, first_column_offset, previous_line_length =
+ previous_i, self.line, self.column_offset, nil
+
-- update self.line and first_line. i := indexes of '\n' chars
while true do
i = self.src :find ("\n", i+1, true)
- if not i then break end
- if loc and i <= loc then
+ if not i or i>self.i then break end -- no more '\n' until end of token
+ previous_line_length = i - self.column_offset
+ if loc and i <= loc then -- '\n' before beginning of token
first_column_offset = i
first_line = first_line+1
end
- if i <= self.i then
- self.line = self.line+1
- self.column_offset = i
- else break end
+ self.line = self.line+1
+ self.column_offset = i
end
- local a = { --char = loc, line = self.line,
- tag = tag,
- lineinfo = {
- first = { first_line, loc - first_column_offset, loc },
- last = { self.line, self.i - self.column_offset, self.i } },
- content }
+
+ -- lineinfo entries: [1]=line, [2]=column, [3]=char, [4]=filename
+ local fli = { first_line, loc-first_column_offset, loc, self.src_name }
+ local lli = { self.line, self.i-self.column_offset-1, self.i-1, self.src_name }
+ --Pluto barfes when the metatable is set:(
+ setmetatable(fli, lexer.lineinfo_metatable)
+ setmetatable(lli, lexer.lineinfo_metatable)
+ local a = { tag = tag, lineinfo = { first=fli, last=lli }, content }
+ if lli[2]==-1 then lli[1], lli[2] = lli[1]-1, previous_line_length-1 end
if #self.attached_comments > 0 then
a.lineinfo.comments = self.attached_comments
- self.attached_comments = nil
+ fli.comments = self.attached_comments
+ if self.lineinfo_last then
+ self.lineinfo_last.comments = self.attached_comments
+ end
end
+ self.attached_comments = { }
return setmetatable (a, self.token_metatable)
end --</function build_token>
- self.attached_comments = { }
-
for ext_idx, extractor in ipairs(self.extractors) do
-- printf("method = %s", method)
local tag, content = self [extractor] (self)
----------------------------------------------------------------------
function lexer:skip_whitespaces_and_comments()
local table_insert = _G.table.insert
- local attached_comments = { }
repeat -- loop as long as a space or comment chunk is found
local _, j
local again = false
-- skip a short comment if any
last_comment_content, j = self.src:match (self.patterns.short_comment, self.i)
if j then
- table_insert(attached_comments,
+ table_insert(self.attached_comments,
{last_comment_content, self.i, j, "short"})
self.i=j; again=true
end
-- j = position after interesting char
-- y = char just after x
local x, y
- x, j, y = self.src :match ("([\\\r\n"..k.."])()(.)", j)
- if x == '\\' then j=j+1 -- don't parse escaped char
+ x, j, y = self.src :match ("([\\\r\n"..k.."])()(.?)", j)
+ if x == '\\' then j=j+1 -- don't parse escaped char
elseif x == k then break -- unescaped end of string
- else -- end of source or \r/\n before end of string
+ else -- eof or '\r' or '\n' reached before end of string
assert (not x or x=="\r" or x=="\n")
error "Unterminated string"
end
----------------------------------------------------------------------
function lexer:extract_number()
-- Number
- local j = self.src:match (self.patterns.number_mantissa[1], self.i) or
- self.src:match (self.patterns.number_mantissa[2], self.i)
- if j then
- j = self.src:match (self.patterns.number_exponant, j) or j;
- local n = tonumber (self.src:sub (self.i, j-1))
- self.i = j
- return "Number", n
+ local j = self.src:match(self.patterns.number_hex, self.i)
+ if not j then
+ j = self.src:match (self.patterns.number_mantissa[1], self.i) or
+ self.src:match (self.patterns.number_mantissa[2], self.i)
+ if j then
+ j = self.src:match (self.patterns.number_exponant, j) or j;
+ end
end
+ if not j then return end
+ -- Number found, interpret with tonumber() and return it
+ local n = tonumber (self.src:sub (self.i, j-1))
+ self.i = j
+ return "Number", n
end
----------------------------------------------------------------------
-- token is returned.
----------------------------------------------------------------------
function lexer:peek (n)
- assert(self)
if not n then n=1 end
if n > #self.peeked then
for i = #self.peeked+1, n do
-- stream, an EOF token is returned.
----------------------------------------------------------------------
function lexer:next (n)
- if not n then n=1 end
+ n = n or 1
self:peek (n)
local a
for i=1,n do
a = _G.table.remove (self.peeked, 1)
if a then
- debugf ("lexer:next() ==> %s",
- table.tostring(a))
+ --debugf ("lexer:next() ==> %s %s",
+ -- table.tostring(a), tostring(a))
end
self.lastline = a.lineinfo.last[1]
end
+ self.lineinfo_last = a.lineinfo.last
return a or eof_token
end
----------------------------------------------------------------------
-- Returns an object which saves the stream's current state.
----------------------------------------------------------------------
+-- FIXME there are more fields than that to save
function lexer:save () return { self.i; _G.table.cat(self.peeked) } end
----------------------------------------------------------------------
-- Restore the stream's state, as saved by method [save].
----------------------------------------------------------------------
+-- FIXME there are more fields than that to restore
function lexer:restore (s) self.i=s[1]; self.peeked=s[2] end
----------------------------------------------------------------------
local p1 = self.peeked[1]
if p1 then
li = p1.lineinfo.first
- self.line, self.column_offset, self.i, self.peeked =
- li[1], li[2], li[3], { }
+ self.line, self.i = li[1], li[3]
+ self.column_offset = self.i - li[2]
+ self.peeked = { }
+ self.attached_comments = p1.lineinfo.first.comments or { }
end
end
----------------------------------------------------------------------
function lexer:takeover(old)
self:sync()
- self.line, self.column_offset, self.i, self.src =
- old.line, old.column_offset, old.i, old.src
+ self.line, self.column_offset, self.i, self.src, self.attached_comments =
+ old.line, old.column_offset, old.i, old.src, old.attached_comments
return self
end
-function lexer:lineinfo()
- if self.peeked[1] then return self.peeked[1].lineinfo.first
- else return { self.line, self.i-self.column_offset, self.i } end
+-- function lexer:lineinfo()
+-- if self.peeked[1] then return self.peeked[1].lineinfo.first
+-- else return { self.line, self.i-self.column_offset, self.i } end
+-- end
+
+
+----------------------------------------------------------------------
+-- Return the current position in the sources. This position is between
+-- two tokens, and can be within a space / comment area, and therefore
+-- have a non-null width. :lineinfo_left() returns the beginning of the
+-- separation area, :lineinfo_right() returns the end of that area.
+--
+-- ____ last consummed token ____ first unconsummed token
+-- / /
+-- XXXXX <spaces and comments> YYYYY
+-- \____ \____
+-- :lineinfo_left() :lineinfo_right()
+----------------------------------------------------------------------
+function lexer:lineinfo_right()
+ return self:peek(1).lineinfo.first
+end
+
+function lexer:lineinfo_left()
+ return self.lineinfo_last
end
----------------------------------------------------------------------
-- Create a new lexstream.
----------------------------------------------------------------------
-function lexer:newstream (src_or_stream)
+function lexer:newstream (src_or_stream, name)
+ name = name or "?"
if type(src_or_stream)=='table' then -- it's a stream
return setmetatable ({ }, self) :takeover (src_or_stream)
elseif type(src_or_stream)=='string' then -- it's a source string
local src = src_or_stream
local stream = {
- src = src; -- The source, as a single string
- peeked = { }; -- Already peeked, but not discarded yet, tokens
- i = 1; -- Character offset in src
- line = 1; -- Current line number
- column_offset = 0; -- distance from beginning of file to last '\n'
+ src_name = name; -- Name of the file
+ src = src; -- The source, as a single string
+ peeked = { }; -- Already peeked, but not discarded yet, tokens
+ i = 1; -- Character offset in src
+ line = 1; -- Current line number
+ column_offset = 0; -- distance from beginning of file to last '\n'
+ attached_comments = { },-- comments accumulator
+ lineinfo_last = { 1, 1, 1, name }
}
setmetatable (stream, self)
-- skip initial sharp-bang for unix scripts
- if src and src :match "^#!" then stream.i = src :find "\n" + 1 end
+ -- FIXME: redundant with mlp.chunk()
+ if src and src :match "^#" then stream.i = src :find "\n" + 1 end
return stream
else
assert(false, ":newstream() takes a source string or a stream, not a "..