long_string = "^%[(=*)%[\n?(.-)%]%1%]()",
number_mantissa = {
"^%d+%.?%d*()",
- "^%d*%d%.%d+()" },
+ "^%d*%.%d+()" },
number_exponant = "^[eE][%+%-]?%d+()",
number_hex = "^0[xX]%x+()",
word = "^([%a_][%w_]*)()"
-- Turn the digits of an escape sequence into the corresponding
-- character, e.g. [unesc_digits("123") == string.char(123)].
local function unesc_digits (x)
+ if x:sub(1,1)=="\\" then return x end -- Hack to parse correctly "\\123"
local k, j, i = x:reverse():byte(1, 3)
local z = _G.string.byte "0"
+ local code = (k or z) + 10*(j or z) + 100*(i or z) - 111*z
+ if code > 255 then
+ error ("Illegal escape sequence '\\"..x.."' in string: ASCII codes must be in [0..255]")
+ end
return _G.string.char ((k or z) + 10*(j or z) + 100*(i or z) - 111*z)
end
return s
:gsub ("\\(%D)",unesc_letter)
- :gsub ("\\([0-9]+)", unesc_digits)
+ :gsub ("\\(\\?[0-9][0-9]?[0-9]?)", unesc_digits)
end
lexer.extractors = {
-- __tostring = function(a)
-- return string.format ("`%s{'%s'}",a.tag, a[1])
-- end
- }
+}
+
+lexer.lineinfo_metatable = { }
+--[[
+-- The presence of this function prevents serialization by Pluto,
+-- I can't figure out why :(
+function lexer.lineinfo_metatable:__tostring()
+ local txt = string.format("%s:%i(%i,%i)", self[4], self[3], self[1], self[2])
+ if self.comments then
+ acc = { }
+ for comment in ivalues(self.comments) do
+ local content, loc1, loc2, kind = unpack(comment)
+ table.insert (acc, string.format ("%s@%i..%i:%q", kind, loc1, loc2, content))
+ end
+ txt = txt.."["..table.concat(acc,"; ").."]"
+ end
+ return txt
+end
+--]]
----------------------------------------------------------------------
-- Really extract next token fron the raw string
-- Put line info, comments and metatable arount the tag and content
-- provided by extractors, thus returning a complete lexer token.
+ -- first_line: line # at the beginning of token
+ -- first_column_offset: char # of the last '\n' before beginning of token
+ -- i: scans from beginning of prefix spaces/comments to end of token.
local function build_token (tag, content)
assert (tag and content)
- local i, first_line, first_column_offset =
- previous_i, self.line, self.column_offset
+ local i, first_line, first_column_offset, previous_line_length =
+ previous_i, self.line, self.column_offset, nil
+
-- update self.line and first_line. i := indexes of '\n' chars
while true do
i = self.src :find ("\n", i+1, true)
- if not i then break end
- if loc and i <= loc then
+ if not i or i>self.i then break end -- no more '\n' until end of token
+ previous_line_length = i - self.column_offset
+ if loc and i <= loc then -- '\n' before beginning of token
first_column_offset = i
first_line = first_line+1
end
- if i <= self.i then
- self.line = self.line+1
- self.column_offset = i
- else break end
+ self.line = self.line+1
+ self.column_offset = i
end
- local a = { --char = loc, line = self.line,
- tag = tag,
- lineinfo = {
- name = self.src_name,
- first = { first_line, loc - first_column_offset, loc },
- last = { self.line, self.i - self.column_offset, self.i } },
- content }
+
+ -- lineinfo entries: [1]=line, [2]=column, [3]=char, [4]=filename
+ local fli = { first_line, loc-first_column_offset, loc, self.src_name }
+ local lli = { self.line, self.i-self.column_offset-1, self.i-1, self.src_name }
+ --Pluto barfes when the metatable is set:(
+ setmetatable(fli, lexer.lineinfo_metatable)
+ setmetatable(lli, lexer.lineinfo_metatable)
+ local a = { tag = tag, lineinfo = { first=fli, last=lli }, content }
+ if lli[2]==-1 then lli[1], lli[2] = lli[1]-1, previous_line_length-1 end
if #self.attached_comments > 0 then
a.lineinfo.comments = self.attached_comments
- self.attached_comments = nil
+ fli.comments = self.attached_comments
+ if self.lineinfo_last then
+ self.lineinfo_last.comments = self.attached_comments
+ end
end
self.attached_comments = { }
return setmetatable (a, self.token_metatable)
end
self.lastline = a.lineinfo.last[1]
end
+ self.lineinfo_last = a.lineinfo.last
return a or eof_token
end
self.line, self.i = li[1], li[3]
self.column_offset = self.i - li[2]
self.peeked = { }
- self.attached_comments = p1.lineinfo.comments or { }
+ self.attached_comments = p1.lineinfo.first.comments or { }
end
end
return self
end
-function lexer:lineinfo()
- if self.peeked[1] then return self.peeked[1].lineinfo.first
- else return { self.line, self.i-self.column_offset, self.i } end
+-- function lexer:lineinfo()
+-- if self.peeked[1] then return self.peeked[1].lineinfo.first
+-- else return { self.line, self.i-self.column_offset, self.i } end
+-- end
+
+
+----------------------------------------------------------------------
+-- Return the current position in the sources. This position is between
+-- two tokens, and can be within a space / comment area, and therefore
+-- have a non-null width. :lineinfo_left() returns the beginning of the
+-- separation area, :lineinfo_right() returns the end of that area.
+--
+-- ____ last consummed token ____ first unconsummed token
+-- / /
+-- XXXXX <spaces and comments> YYYYY
+-- \____ \____
+-- :lineinfo_left() :lineinfo_right()
+----------------------------------------------------------------------
+function lexer:lineinfo_right()
+ return self:peek(1).lineinfo.first
+end
+
+function lexer:lineinfo_left()
+ return self.lineinfo_last
end
----------------------------------------------------------------------
i = 1; -- Character offset in src
line = 1; -- Current line number
column_offset = 0; -- distance from beginning of file to last '\n'
- attached_comments = { } -- comments accumulator
+ attached_comments = { },-- comments accumulator
+ lineinfo_last = { 1, 1, 1, name }
}
setmetatable (stream, self)