字符串查询

lua-users home
wiki

stringquery 是一个字符串模式匹配和转换库,部分灵感来自 [jQuery]

描述

主要设计特点是

与 jQuery 的相似之处在于,我们构建一个包含一组用于研究对象的选定项的查询对象,然后执行一系列链式方法调用来细化和更改这些选定项(一次性作为一组),最后对这些选定项执行操作(例如替换或返回)。请参阅以下测试套件中的示例。

状态

该库的设计尚处于初步阶段,最初仅作为实验。欢迎改进。

作者

DavidManura

test_stringquery.lua

-- test_stringquery.lua
-- test of dmlib.stringquery.

local SQ = require "dmlib.stringquery"
local sq = SQ.sq

local function asserteq(a,b)
  if a ~= b then
    error('[' .. tostring(a) .. '] ~= [' .. tostring(b) .. ']', 2)
  end
end

assert(
  sq("this is a test"):match("%w+"):replace('_')
  == '_ _ _ _'
)

assert(
  sq('<p>this is a <a href="/">test</a> https://lua-users.lua.ac.cn https://lua.ac.cn </p>')
  :match("<[^>]*>")
  :invert()
  :match('http://[^ ]+')
  :filter('user')
  :replace(function(s) return '<a href="' .. s .. '">' .. s .. '</a>' end)
  == '<p>this is a <a href="/">test</a> <a href="https://lua-users.lua.ac.cn">' ..
     'https://lua-users.lua.ac.cn</a> https://lua.ac.cn </p>'
)

assert(
  sq("the red book, the green book, and the blue book")
  :match("%w+ book")
  :filter(SQ.any("^green ", "^red"))
  :replace(string.upper)
  == 'the RED BOOK, the GREEN BOOK, and the blue book'
)

-- solution to problem from https://lua-users.lua.ac.cn/wiki/FrontierPattern
assert(
  sq("the QUICK BROwn fox")
  :match("%w+")
  :filter("^[A-Z]*$")
  :get_unpacked()
  == 'QUICK'
)

-- examples in docs
asserteq(
  table.concat(sq'Alpha BETA GammA':match'%w+':match'[A-Z]+':get(), ','),
  'A,BETA,G,A' )
asserteq(
  table.concat(sq'this is a test':match'%w+':filter'^....$':get(), ','),
  'this,test' )
asserteq(
  table.concat(sq'123 abc 234':match'%a+':invert():get(), ','),
  '123 , 234' )
asserteq(
  table.concat({sq'this is a test':match'%w+':get_unpacked()}, ','),
  'this,is,a,test' )
asserteq(
  table.concat(sq'hello':get(), ','),
  'hello' )
asserteq(
  SQ.any('%a%d', '%d%a')(' a1 '), true )
asserteq(
  SQ.all('%a%d', '%d%a')(' a1 2b '), true )

print 'DONE'

dmlib/stringquery.lua

-- dmlib/stringquery.lua (dmlib.stringquery)
--
-- String matching/replacing library inspired partly by jquery
--
-- Warning: preliminary design.
--
-- (c) 2009 David Manura, Licensed under the same terms as Lua (MIT license).

local M = {}


-- Replace array part of table dst with array part of table src.
local function tioverride(dst, src)
  for k    in ipairs(dst) do dst[k] = nil end
  for k, v in ipairs(src) do dst[k] = v end
  return dst
end

-- Returns array of substrings in s, paritioned
-- by array of ranges (1-based start and end indicies).
-- Always returns odd-number of substrings (even indexed
-- substrings are inside the ranges).
-- Example:
--   partition("abcdefg", {{1,2},{4,5}})
--   --> {'','ab', 'c','de', 'fg'}
local function partition(s, ranges)
  local result = {}
  local i = 1
  for _,range in ipairs(ranges) do
    local ia,ib = unpack(range)
    table.insert(result, s:sub(i,ia-1))
    table.insert(result, s:sub(ia,ib))
    i = ib+1
  end
  table.insert(result, s:sub(i))
  return result
end


-- Helper function.
-- Processes argument, allowing function or
-- pattern matching function represented as string.
local function getarg(o)
  local f
  if type(o) == 'string' then
    f = function(s) return s:match(o) end
  else
    f = o
  end
  return f
end


local mt = {}
mt.__index = mt


-- Defines new selections based on matches of
-- pattern inside current selections.
-- Example:
--   sq'Alpha BETA GammA':match'%w+':match'[A-Z]+':get()
--   --> {'A', 'BETA', 'G', 'A'}
function mt:match(pat)
  local results = {}
  for _,range in ipairs(self) do
    local ia0,ib0 = unpack(range)
    local stmp = self.s:sub(ia0,ib0)
    local ia = 1
    repeat
      local ib
      ia,ib = stmp:find(pat, ia)
      if ia then
        table.insert(results, {ia+ia0-1,ib+ia0-1})
        ia = ib+1
      end
    until not ia
  end
  tioverride(self, results)
  return self
end


-- Defines new selections based only on current selections
-- that match object o.  o can be a function (s -> b),
-- return returns Boolean b whether string s matches.
-- Alternately o can be a string pattern.
-- Example:
--   sq'this is a test':match'%w+':filter'^....$':get()
--   --> {'this', 'test'}
function mt:filter(o)
  local f = getarg(o)

  local result = {}
  for _,range in ipairs(self) do
    local ia,ib = unpack(range)
    local si = self.s:sub(ia,ib)
    if f(si) then
      table.insert(result, {ia,ib})
    end
  end
  tioverride(self, result)
  return self
end


-- Defines new selections that form the inverse (compliment)
-- of the current selections.
-- warning: might not be fully correct (e.g. would
-- sq(s):invert():invert() == sq(s)?).
-- Example:
--   sq'123 abc 234':match'%a+':invert():get()
--   --> {'123 ', ' 234'}
function mt:invert()
  local result = {}
  local i=1
  for _,range in ipairs(self) do
    local ia,ib = unpack(range)
    if ia > i then
      table.insert(result, {i,ia-1})
    end
    i = ib+1
  end
  if i < #self.s then
    table.insert(result, {i,#self.s})
  end
  tioverride(self, result)
  return self
end


-- Replace selections using o and return string.
-- o can be a function (s1 -> s2) that indicates that
-- string s1 should be replaced with string s2).
-- Alternately, o can be a string that all selections
-- will be replaced with.
function mt:replace(o)
  local f
  if type(o) == 'string' then
    f = function(s) return o end
  else
    f = o
  end

  local result = partition(self.s, self)
  for i=2,#result,2 do
    result[i] = f(result[i]) or ''
  end

  return table.concat(result, '')
end


-- Returns all string selections as array.
-- Example:
--   sq'this is a test':match'%w+':get()
--   --> {'this', 'is', 'a', 'test'}
function mt:get()
  local parts = partition(self.s, self)
  local result = {}
  for i=2,#parts,2 do
    table.insert(result, parts[i])
  end
  return result 
end


-- Returns all string selections as unpacked list.
-- Example:
--   sq'this is a test':match'%w+':get()
--   --> 'this', 'is', 'a', 'test'
function mt:get_unpacked()
  return unpack(self:get())
end


-- Prints selections.
-- For debugging.  Requires penlight 0.6.3
function mt:print_dump()
  local dump = require "pl.pretty" . write
  print(dump(self))
  return self
end


-- Constructor for string query given string s.
-- The selection by default is the entire string.
-- Example:
--   sq'hello':get() --> {'hello'}
local function sq(s)
  return setmetatable({s=s, {1,#s}}, mt)
end
M.sq = sq


-- Returns a predicate function that matches
-- *any* of the given predicate functions.
-- Predicates can also be represented as string
-- patterns.
-- Useful for sq(s):filter.
-- Example:
--   any('%a%d', '%d%a')(' a1 ') --> true
local function any(...)
  local os = {...}
  for i,v in ipairs(os) do os[i] = getarg(v) end
  return function(s)
    for _,o in ipairs(os) do
      if o(s) then return true end
    end
    return false
  end
end
M.any = any


-- Returns a predicate function that matches
-- *all* of the given predicate functions.
-- Predicates can also be represented as string
-- patterns.
-- Useful for sq(s):filter.
-- Example:
--   all('%a%d', '%d%a')(' a1 2b ') --> true
local function all(...)
  local os = {...}
  for i,v in ipairs(os) do os[i] = getarg(v) end
  return function(s)
    for _,o in ipairs(os) do
      if not o(s) then return false end
    end
    return true
  end
end
M.all = all


return M

另请参阅


最近更改 · 偏好设置
编辑 · 历史记录
最后编辑于 2009 年 10 月 31 日下午 7:46 GMT (差异)