读取定义的块

lua-users home
wiki

使用此函数,您可以从文件或io.stdin中读取定义的块(直到找到给定的分隔符)。它是先前版本的完整重写:现在它速度快,不需要连接字符串,需要更少的内存并且灵活。目的是处理 a) 具有大量兆字节的文件 b) 混合格式的输入,例如 MIME 多部分消息是行 \r\n 和二进制数据的混合。请注意一个简单的非 Lua 标准增强功能:我使用数字变量lua.maxread来设置在中心点使用的 io.reads(字节块)的块大小。用您首选的块大小替换此变量,例如 2^13 用于 8KB。

-- a simple example without using all the specials
local Handle=io.open('File','r')
local ReadUntil=io.readuntil(Handle)
repeat
   Line=coroutine.resume(ReadUntil,'\n',true)
   if Line then

   end
until Line==nil

-- another example
local Handle = io.open('File', 'r')
local ReadUntil = io.readuntil(Handle)
local Chunk, Found
repeat
   _,Chunk, Found = coroutine.resume(ReadUntil,
                   'search this string in a huge file',false)
   if Found then
      _,Chunk, Found = coroutine.resume(ReadUntil,
                      'search another string in the same file',true)
      if Found then break end
   end
until Chunk == nil

-- Now if chunk ~= nil, then chunk is the stuff between
-- 'search this string in a huge file' and 'search another
-- string in the same file'. Yes, it's possible to do the same
-- very simply, but the advantage here is that the large file
-- isn't loaded at once into memory.

代码

function io.readuntil(Filehandle, Delimiter, Collect, Limit)

-- Filehandle (userdata)
-- Delimiter (string, optional); max. length is lua.maxread;
--   optional because coroutine.resume() also accepts <delimiter>
-- Collect (boolean, optional) = true (default); read until
--   <delimiter> is found or end of file or <limit> is reached
--   and return string at once = false; return string also before
--   <delimiter> is found or end of file or <limit> is reached
-- Limit (number, optional); number of bytes to read from
--   <filehandle>; default is unlimited

-- <function> = cooroutine.resume(Function, Delimiter, Collect)

-- Function (thread); returned from io.readuntil()
-- Delimiter (string, optional); see io.readuntil()
-- Collect (boolean, optional); see io.readuntil()

-- return (boolean); = true; no error
--                   = false; an error occured and the second
--                            argument returned is the errormessage
--        (string or nil) = nil; end of file
--        (boolean) = true; delimiter found
--                  = false; delimiter not found

-- note: if the coroutine returns true,<string>,false then
--          if <collect> = false it does not have to be the end of file
--                       = true  the end of file is reached and the next
--                               coroutine.resume returns true,nil(,nil)

   if type(Delimiter) == 'boolean' then
      Collect,Delimiter = Delimiter,Collect
   end
   if type(Delimiter) == 'number' then
      Limit,Delimiter = Delimiter,nil
   end
   if type(Collect) == 'number' then
      Limit,Collect = Collect,nil
   end

   return coroutine.create(function(NewDelimiter,NewCollect)

      local Next = function(NewDelimiter,NewCollect)
                      if type(NewDelimiter) == 'boolean' then
                          NewCollect,NewDelimiter = NewDelimiter,nil
                      end
                      return NewDelimiter or Delimiter,NewCollect or Collect
                   end

      Delimiter,Collect = Next(NewDelimiter,NewCollect)

      local Chunksize,Chunk,Length,First,Second,SearchFrom,
            GetFrom,FoundFrom,FoundTo =
            lua.maxread,{},0,1,2,1,1

      if Limit and Length+Chunksize>Limit then
         Chunk[First] = Limit-Length>0 and Filehandle:read(Limit-Length)
      else
         Chunk[First] = Filehandle:read(Chunksize)
      end

      if Chunk[First] then
         Length = Length + string.len(Chunk[First])

         while true do
            if string.len(Delimiter)>Chunksize then
               error('io.readuntil: delimiter to long')
            end

            FoundFrom,FoundTo = string.find(
                      Chunk[First],Delimiter,SearchFrom,true)
            if FoundFrom then
               -- delimiter found in first chunk
               Delimiter,Collect = Next(coroutine.yield(
                    string.sub(Chunk[First],GetFrom,FoundFrom-1),true))
               SearchFrom,GetFrom = FoundTo+1,FoundTo+1
            else
               if Limit and Length+Chunksize > Limit then
                  Chunk[Second] = Limit-Length>0 and Filehandle:read(Limit-Length)
               else
                  Chunk[Second] = Filehandle:read(Chunksize)
               end

               if Chunk[Second] then
                  Length = Length + string.len(Chunk[Second])

                  -- concatenate end of first chunk with start of
                  -- second chunk so that a possible splitted delimiter
                  -- must be found
                  FoundFrom,FoundTo = string.find(
                    string.sub(Chunk[First],
                      string.len(Chunk[First])-string.len(Delimiter)+2) ..
                      string.sub(Chunk[Second],1,string.len(Delimiter)-1),
                    Delimiter,1,true)
                  if FoundFrom then
                     -- delimiter is splitted between first and second chunk
                     Delimiter,Collect = Next(coroutine.yield(
                       string.sub(Chunk[First],GetFrom,string.len(Chunk[First])-
                         string.len(Delimiter)+FoundFrom), true
                     ))
                     First,Second = Second,First
                     SearchFrom,GetFrom = FoundFrom+1,FoundFrom+1
                  else
                     -- delimiter isn't splitted between first and second chunk
                     if Collect then
                        SearchFrom = string.len(Chunk[First])+1
                        Chunk[First] = Chunk[First]..Chunk[Second]
                     else
                        if string.len(Chunk[First]) >= GetFrom then
                           Delimiter,Collect = Next(coroutine.yield(
                              string.sub(Chunk[First],GetFrom),false))
                        end
                        First,Second = Second,First
                        SearchFrom,GetFrom = 1,1
                     end
                  end
               else
                  -- no delimiter found and no further input
                  break
               end
            end
         end

         if string.len(Chunk[First]) >= GetFrom then
            -- return rest of first chunk
            coroutine.yield(string.sub(Chunk[First],GetFrom),false)
         end
      end
   end)
   -- return (thread); a coroutine
   end

-- MarkusHuber


最近更改 · 偏好设置
编辑 · 历史记录
最后编辑于 2007 年 5 月 28 日下午 11:01 GMT (差异)