Ex 模式 |
|
Lua 模式 [1] 是正则表达式的有限形式。与正则表达式不同,模式不支持对除单个字符集以外的任何内容进行分组和量化 [2] 操作。对于完整的正则表达式(或 Perl 风格的正则表达式)支持,您可以编译正则表达式模块,例如 lrexlib [3]。为了获得更大的灵活性,对于解析和词法分析应用程序,甚至对于更简单的情况,可以使用解析表达式语法 (PEG) 通过用 C 实现的 LuaPeg 库 [4]。
然而,有时 Lua 模式几乎足够了,因为我们可能希望在不编译任何额外的 C 代码的情况下,稍微扩展模式的功能。这就是这个模块的用武之地。这个模块在 Lua 模式之上构建扩展模式匹配。它采用以类似于 LPeg 的形式编写的模式表达式,并将其转换为包含 string.match
调用的 Lua 代码字符串,然后通过 loadstring
将其编译为 Lua 函数。这类似于您可能自己编写的代码,尽管这个模块使用代码生成对其进行了自动化。假设您预编译了重复使用的任何匹配对象,它应该具有类似的效率。
这是一个简单的例子
local M = require "xpattern" local P = M.P local m = ( (P'(b+)' + P'(c+)') * P'[A-Z][a-z]'^0 * P'(.)()' ):compile() local a,b,c,d = m('mmcccZzYybbZzYyddd') -- match c not b first assert(a == nil and b == 'ccc' and c == 'b' and d == 11)
它在内部生成此 Lua 函数
local match = ... -- string.match return function(s,pos) for pos1=(pos or 1),#s do local pos2 local c1,c2,c3,c4 local pos3 = pos1 c1,pos3 = match(s, "^(b+)()", pos1) if not pos3 then c2,pos3 = match(s, "^(c+)()", pos1) end pos2 = pos3 if pos2 then do local pos3=pos2 pos2 = pos2 while 1 do pos3 = match(s, "^[A-Z][a-z]()", pos3) if pos3 then pos2=pos3 else break end end end end if pos2 then c3,c4,pos2 = match(s, "^(.)()()", pos2) end if pos2 then return c1,c2,c3,c4 end end end
有关更多示例,请参见下面的测试套件。
警告:此代码应被视为实验性的。它尚未完全开发或测试。如果您想在生产中使用它,请修复代码。
-- xpattern.lua -- Preliminary regular expression-like support in Lua -- Uses Lua patterns as the core building block. -- -- Implemented in pure Lua with code generation technique. -- It translates an expression into a snippet of Lua code -- having a series of `string.match` calls, which is then -- compiled (via `loadstring`). -- -- Like lpeg, does not support backtracking. -- -- WARNING: This is experimental code. The design and implementation -- has not been thoroughly tested. -- -- Version v20091021. -- (c) 2008-2009 David Manura. Licensed under the same terms as Lua (MIT license). -- Please post patches. local M = {} local string = string local format = string.format local match = string.match local assert = assert local error = error local ipairs = ipairs local loadstring = loadstring local setmetatable = setmetatable local type = type local print = print -- Adds whitespace to string `s`. -- Whitespace string `ws` (default to '' if omitted) is prepended to each line -- of `s`. Also ensures `s` is is terminated by a newline. local function add_whitespace(s, ws) ws = ws or '' s = s:gsub('[^\r\n]+', ws .. '%1') if s:match('[^\r\n]$') then s = s .. '\n' end return s end -- Counts the number `count` of captures '()' in Lua pattern string `pat`. local function count_captures(pat) local count = 0 local pos = 1 while pos <= #pat do local pos2 = pat:match('^[^%(%%%[]+()', pos) if pos2 then pos = pos2 elseif pat:match('^%(', pos) then count = count + 1 pos = pos + 1 elseif pat:match('^%%b..', pos) then pos = pos + 3 elseif pat:match('^%%.', pos) then pos = pos + 2 else local pos2 = pat:match('^%[[^%]%%]*()', pos) if pos2 then pos = pos2 while 1 do local pos2 = pat:match('^%%.[^%]%%]*()', pos) if pos2 then pos = pos2 elseif pat:match('^%]', pos) then pos = pos + 1 break else error('syntax', 2) end end else error('syntax', 2) end end end return count end M._count_captures = count_captures -- Appends '()' to Lua pattern string `pat`. local function pat_append_pos(pat) local prefix = pat:match'^(.*)%$$' pat = prefix and prefix .. '()$' or pat .. '()' return pat end -- Prepends '()' to Lua pattern string `pat`. local function pat_prepend_pos(pat) local postfix = pat:match'^%^(.*)' pat = postfix and '^()' .. postfix or '()' .. pat return pat end -- Prepends '^' to Lua pattern string `pat`. local function pat_prepend_carrot(pat) local postfix = pat:match'^%^(.*)' pat = postfix and pat or '^' .. pat return pat end -- Return a string listing pattern capture variables with indices `firstidx` -- to `lastidx`. -- Ex: code_vars(1,2) --> 'c1,c2' local function code_vars(firstidx, lastidx) local code = '' for i=firstidx,lastidx do code = code .. (i == firstidx and '' or ',') .. 'c' .. i end return code end -- Metatable for expression objects local epat_mt = {} epat_mt.__index = epat_mt -- Builds an extended pattern object `epat` from Lua string pattern `pat`. local function pattern(pat) local epat = setmetatable({}, epat_mt) epat.call = function(srcidx0, destidx0, totncaptures0) local ncaptures = count_captures(pat) local lvars = code_vars(totncaptures0+1, totncaptures0+ncaptures) .. (ncaptures == 0 and '' or ',') .. 'pos' .. destidx0 local pat = pat_append_pos(pat) pat = pat_prepend_carrot(pat) local str = format('%q', pat) local code = lvars .. ' = match(s, ' .. str .. ', pos' .. srcidx0 .. ')\n' return code, ncaptures end epat.anchored = pat:sub(1,1) == '^' return epat end -- Generates code from pattern `anypat` (either Lua pattern string or extended -- pattern object). -- `anypat` - either Lua pattern string or extended pattern object -- `srcidx0` - index of variable holding position to start matching at -- `destidx0` - index of variable holding position to store subsequent -- match position at. stores nil if no match -- `totncaptures0` - number of captures prior to this match -- `code` - Lua code string (code) and number of -- `ncaptures` - number of captures in pattern. local function gen(anypat, srcidx0, destidx0, totncaptures0) if type(anypat) == 'string' then anypat = pat_prepend_carrot(anypat) anypat = pattern(anypat) end local code, ncaptures = anypat(srcidx0, destidx0, totncaptures0) return code, ncaptures end -- Creates a new extended pattern object `epat` that is the concatenation of -- the given list (of size >= 0) of pattern objects. -- Specify a single string argument to convert a Lua pattern to an extended -- pattern object. local function seq(...) -- epats... -- Ensure args are extended pattern objects. local epats = {...} for i=1,#epats do if type(epats[i]) == 'string' then epats[i] = pattern(epats[i]) end end local epat = setmetatable({}, epat_mt) epat.call = function(srcidx0, destidx0, totncaptures0, ws) if #epats == 0 then return 'pos' .. destidx0 .. ' = pos' .. srcidx0 .. '\n', 0 elseif #epats == 1 then return epats[1](srcidx0, destidx0, totncaptures0, ws) else local ncaptures = 0 local pat_code, pat_ncaptures = gen(epats[1], srcidx0, destidx0, totncaptures0+ncaptures, true) ncaptures = ncaptures + pat_ncaptures local code = add_whitespace(pat_code, '') for i=2,#epats do local pat_code, pat_ncaptures = gen(epats[i], destidx0, destidx0, totncaptures0+ncaptures, true) ncaptures = ncaptures + pat_ncaptures code = code .. 'if pos' .. destidx0 .. ' then\n' .. add_whitespace(pat_code, ' ') .. 'end\n' end return code, ncaptures end end if epats[1] and epats[1].anchored then epat.anchored = true end return epat end M.P = seq -- Creates new extended pattern object `epat` that is the alternation of the -- given list of pattern objects `epats...`. local function alt(...) -- epats... -- Ensure args are extended pattern objects. local epats = {...} for i=1,#epats do if type(epats[i]) == 'string' then epats[i] = pattern(epats[i]) end end local epat = setmetatable({}, epat_mt) epat.call = function(srcidx0, destidx0, totncaptures0) if #epats == 0 then return 'pos' .. destidx0 .. ' = pos' .. srcidx0 .. '\n', 0 elseif #epats == 1 then return epats[1](srcidx0, destidx0, totncaptures0) else local ncaptures = 0 local pat_code, pat_ncaptures = gen(epats[1], srcidx0, destidx0+1, totncaptures0+ncaptures, true) ncaptures = ncaptures + pat_ncaptures local code = 'local pos' .. destidx0+1 .. ' = pos' .. srcidx0 .. '\n' .. add_whitespace(pat_code, '') for i=2,#epats do local pat_code, pat_ncaptures = gen(epats[i], srcidx0, destidx0+1, totncaptures0+ncaptures, true) ncaptures = ncaptures + pat_ncaptures code = code .. 'if not pos' .. destidx0+1 .. ' then\n' .. add_whitespace(pat_code, ' ') .. 'end\n' end code = code .. 'pos' .. destidx0 .. ' = pos' .. destidx0+1 .. '\n' return code, ncaptures end end return epat end M.alt = alt -- Creates new extended pattern object `epat` that is zero or more repetitions -- of the given pattern object `pat` (if one evaluates to false) or one or more -- (if one evaluates to true). local function star(pat, one) local epat = setmetatable({}, epat_mt) epat.call = function(srcidx0, destidx0, totncaptures0) local ncaptures = 0 local destidx = destidx0 + 1 local code = 'do\n' .. ' local pos' .. destidx .. '=pos' .. srcidx0 .. '\n' local pat_code, pat_ncaptures = gen(pat, destidx, destidx, totncaptures0+ncaptures, true) ncaptures = ncaptures + pat_ncaptures code = code .. (one and (' pos' .. destidx0 .. ' = nil\n') or (' pos' .. destidx0 .. ' = pos' .. srcidx0 .. '\n')) .. ' while 1 do\n' .. add_whitespace(pat_code, ' ') .. ' if pos' .. destidx .. ' then\n' .. ' pos' .. destidx0 .. '=pos' .. destidx .. '\n' .. ' else break end\n' .. ' end\n' .. 'end\n' return code, ncaptures end return epat end M.star = star -- Creates new extended pattern object `epat` that is zero or one of the -- given pattern object `epat0`. local function zero_or_one(epat0) local epat = setmetatable({}, epat_mt) epat.call = function(srcidx0, destidx0, totncaptures0) local ncaptures = 0 local destidx = destidx0 + 1 local code = 'do\n' .. ' local pos' .. destidx .. '=pos' .. srcidx0 .. '\n' local epat0_code, epat0_ncaptures = gen(epat0, destidx, destidx, totncaptures0+ncaptures, true) ncaptures = ncaptures + epat0_ncaptures code = code .. add_whitespace(epat0_code) .. ' if pos' .. destidx .. ' then\n' .. ' pos' .. destidx0 .. '=pos' .. destidx .. '\n' .. ' else\n' .. ' pos' .. destidx0 .. '=pos' .. srcidx0 .. '\n' .. ' end\n' .. 'end\n' return code, ncaptures end return epat end M.zero_or_one = zero_or_one -- Gets Lua core code string `code` corresponding to pattern object `epat` local function basic_code_of(epat) local pat_code, ncaptures = epat(1, 2, 0, true) local lvars = code_vars(1, ncaptures) if epat.anchored then local code = 'local pos1=pos or 1\n' .. 'local pos2\n' .. (lvars ~= '' and ' local ' .. lvars .. '\n' or '') .. add_whitespace(pat_code) .. '\n' .. 'if pos2 then return ' .. (lvars ~= '' and lvars or 's:sub(pos1,pos2-1)') .. ' end\n' return code else local code = 'for pos1=(pos or 1),#s do\n' .. ' local pos2\n' if lvars == '' then code = code .. add_whitespace(pat_code, ' ') .. ' if pos2 then return s:sub(pos1,pos2-1) end\n' else code = code .. ' local ' .. lvars .. '\n' .. add_whitespace(pat_code, ' ') .. ' if pos2 then return ' .. lvars .. ' end\n' end code = code .. 'end\n' return code end end M.basic_code_of = basic_code_of -- Gets Lua complete code string `code` corresponding to pattern object `epat`. local function code_of(epat) local code = 'local match = ...\n' .. 'return function(s,pos)\n' .. add_whitespace(basic_code_of(epat), ' ') .. 'end\n' return code end M.code_of = code_of -- Compiles pattern object `epat` to Lua function `f`. local function compile(epat) local code = code_of(epat) if M.debug then print('DEBUG:\n' .. code) end local f = assert(loadstring(code))(match) return f end M.compile = compile -- operator for pattern matching function epat_mt.__call(epat, ...) return epat.call(...) end -- operator for pattern alternation function epat_mt.__add(a_epat, b_epat) return alt(a_epat, b_epat) end -- operator for pattern concatenation function epat_mt.__mul(a_epat, b_epat) return seq(a_epat, b_epat) end -- operator for pattern repetition function epat_mt.__pow(epat, n) if n == 0 then return star(epat) elseif n == 1 then return star(epat, true) elseif n == -1 then return zero_or_one(epat) else error 'FIX - unimplemented' end end -- IMPROVE design? epat_mt.compile = compile epat_mt.basic_code_of = basic_code_of epat_mt.code_of = code_of return M
-- xpattern_test.lua - test suite for xpattern.lua -- utility function: convert list of values to string. local function str(...) local n = select('#', ...) local t = {...} for i=1,n do t[i] = tostring(t[i]) end return table.concat(t, ',') end local M = require "xpattern" local P = M.P M.debug = true -- internal: _count_captures assert(M._count_captures'' == 0) assert(M._count_captures'a' == 0) assert(not pcall(function() M._count_captures'%' end)) assert(M._count_captures'()' == 1) assert(M._count_captures'%(%)' == 0) -- %( assert(M._count_captures'[()]' == 0) -- () inside [] assert(M._count_captures'[%(%)]' == 0) -- %( inside [] assert(M._count_captures'[%]()]' == 0) -- %] inside [] assert(M._count_captures'[]()]' == 1) assert(M._count_captures'%b()' == 0) -- () on %b.. assert(M._count_captures'(()().())' == 4) -- nested -- more complex example assert(M._count_captures'.(.%))[(]%(()' == 2) -- simple matching assert(str(P'':compile()('')) == '') assert(str(P'':compile()('a')) == '') assert(str(P'a':compile()('')) == '') assert(str(P'a':compile()('a')) == 'a') assert(str(P'a':compile()('ba')) == 'a') assert(str(P'a+':compile()('baa')) == 'aa') -- simple anchors assert(str(P'^a+':compile()('aa')) == 'aa') assert(str(P'^a+':compile()('baab')) == '') -- $ fail assert(str(P'a+$':compile()('baa')) == 'aa') assert(str(P'a+$':compile()('baab')) == '') -- $ fail -- simple captures assert(str(P'(a+)(b+)':compile()('baab')) == 'aa,b') assert(str(P'^(a+)(b+)':compile()('baab')) == '') -- simple sequences local m = P():compile() assert(str( m('') ) == '') assert(str( m('a') ) == '') local m = P(''):compile() assert(str( m('') ) == '') assert(str( m('a') ) == '') local m = P('a', 'b', 'c'):compile() assert(str( m('.a.') ) == '') assert(str( m('.abc.') ) == 'abc') local m = (P'a' * P'b' * P'c'):compile() -- identical assert(str( m('.a.') ) == '') assert(str( m('.abc.') ) == 'abc') local m = P(P'a', 'b', P'c'):compile() -- identical assert(str( m('.a.') ) == '') assert(str( m('.abc.') ) == 'abc') local m = P(P'a+', 'b+', P'c+'):compile() assert(str( m('.abaabcc.') ) == 'aabcc') -- simple alternation local m = (P'aa+' + P'bb+'):compile() assert(str( m('abbaa') ) == 'bb') local m = (P'aa+' + P'bb+' + P'cc+'):compile() assert(str( m('abccaa') ) == 'cc') -- simple combinations local m = ((P'(a+)' + P'b(b*)') * P'(c+)()'):compile() assert(str( m("aacccdd")) == 'aa,nil,ccc,6') assert(str( m("bbcccdd")) == 'nil,b,ccc,6') assert(str( m("bbdd")) == '') print('?'..str( m("aabbcc"))) assert(str( m("aabbcc")) == 'nil,b,cc,7') -- alternative -- simple replication (*) local m = ( P'a'^0 ):compile() assert(str(m'') == '') assert(str(m'a') == 'a') assert(str(m'aab') == 'aa') -- replication (*) local m = ( (P'a+' + P'b+')^0 ):compile() assert(str(m'zabaabbc') == '') assert(str(m'abaabb') == 'abaabb') local m = ( (P'a+' * P'b+' + P'c+' * P'd+')^0 ):compile() assert(str(m'aabbccddaa') == 'aabbccdd') local m = ( P'aa'^0 * P'bb' * P'aa'^0 ):compile() assert(str(m'aaccaaaabbaa') == 'aaaabbaa') -- simple replication (+) local m = ( P'a'^1 ):compile() assert(str(m'') == '') assert(str(m'a') == 'a') assert(str(m'aab') == 'aa') -- replacation (+) local m = ( P'b' * P'a'^1 ):compile() print(m'b') assert(str(m'b') == '') assert(str(m'ba') == 'ba') assert(str(m'baab') == 'baa') -- simple replication (?) local m = ( P'a'^-1 ):compile() assert(str(m'') == '') assert(str(m'a') == 'a') assert(str(m'aab') == 'a') -- replication (?) local m = ( P'c' * (P'a+' + P'b+')^-1 ):compile() assert(str(m'caabb') == 'caa') -- Some of these examples from Mastering Regular Expressions (MRE), -- 2nd Ed. Jeffrey .Friedl. -- MRE p.19 local m = ( P'^' * (P'From' + P'Subject' + P'Date') * P':%s*(.*)' ):compile() assert(str(m('Subject: test')) == 'test') -- MRE p.13 local m = ( (P'Geo' + P'Je') * P'ff' * (P're' + P'er') * P'y' ):compile() assert(str(m'Jeffrey') == 'Jeffrey') assert(str(m'Jeffery') == 'Jeffery') assert(str(m'Geoffrey') == 'Geoffrey') assert(str(m'Geoffery') == 'Geoffery') assert(str(m'Jefery') == '') assert(str(m'Geofferi') == '') assert(str(m'GeoffrezGeoffery') == 'Geoffery') -- skips assert(str(m'JefferzGeoffery') == 'Geoffery') -- skips assert(str(m'GeoffJeffery') == 'Jeffery') -- skips -- MRE p.24 local m = ( P'%$[0-9]+' * P'%.[0-9][0-9]'^-1 ):compile() assert(str(m'$20.00') == '$20.00') assert(str(m'$20') == '$20') assert(str(m'$20.00.00') == '$20.00') -- example print 'example' local M = require "xpattern" local P = M.P local m = ( (P'(b+)' + P'(c+)') * P'[A-Z][a-z]'^0 * P'(.)()' ):compile() local a,b,c,d = m('mmcccZzYybbZzYyddd') -- match c not b first assert(a == nil and b == 'ccc' and c == 'b' and d == 11) -- example local m = P('foo', P'bar'+P'baz', 'qux'):compile() assert(str(m'afoobazfoobarquxbar', 'foobarqux')) local m = P('^foo', P'bar'+P'baz', 'qux'):compile() -- anchored assert(str(m'afoobazfoobarquxbar', '')) assert(str(m'foobarquxbar', '')) -- https://lua-users.lua.ac.cn/lists/lua-l/2009-10/msg00752.html local m = ( P'^' * ( ( P'ceil'+P'abs' +P'floor'+P'mod' +P'exp'+P'log'+P'pow'+ P'sqrt'+P'acos'+P'asin' +P'atan'+P'cos'+P'sin'+P'tan'+ P'deg' +P'rad' +P'random' ) * P'%(' + P'[0-9%(%)%-%+%*%/%.%,]' + P'pi' )^1 * P'$' ):compile() assert(m'cos(1+pi)' == 'cos(1+pi)') assert(m'cos(1+p)' == nil) -- 'p' assert(m'cos(12.3/2)+mod(2,3)' == 'cos(12.3/2)+mod(2,3)') assert(m'cos(12.3/2)+mod(2,3) ' == nil) -- ' ' assert(m' cos(12.3/2)+mod(2,3)' == nil) -- ' ' assert(m'cos(12.3/2)+mod+2' == nil) -- no '(' print 'DONE'
更改