Lua Sorting

lua-users home
wiki

table 库提供了一个原地排序函数,基于快速排序算法 [wikipedia]。不过,我们也可以不用花费太多代价,用纯 Lua 来实现 sort,如下所示。

这里使用的算法是希尔排序(以其发明者 Donald Shell 命名)[wikipedia],其步长序列来自 Robert Sedgewick(参见 [A036569][整数序列在线百科全书] 中关于 Sedgewick 论文的引用)。我选择希尔排序而不是快速排序,是因为它的速度通常差不多,而且代码要简单得多。步长序列应该足以处理至少一千万个元素。

另请参阅 LazySort,了解 Lua 中排序的另一种观点。

为了提高效率,排序器有三个版本;顶层函数会根据情况选择其中一个。有专门针对 < 运算符和 > 运算符的版本,以及一个通用的实现,该实现可以接受任何比较函数,就像 table.sort 一样。请注意,与 table.sort 一样,比较函数在参数相等时应返回 false,尽管在希尔排序的情况下,这不像那么关键。

样本计时结果如下。

-- shellsort.lua
-- Written by Rici Lake. The author disclaims all copyright and offers no warranty.
--
-- This module returns a single function (not a table) whose interface is upwards-
-- compatible with the interface to table.sort:
--
-- array = shellsort(array, before, n)
-- array is an array of comparable elements to be sorted in place
-- before is a function of two arguments which returns true if its first argument
--    should be before the second argument in the second result. It must define
--    a total order on the elements of array.
--      Alternatively, before can be one of the strings "<" or ">", in which case
--    the comparison will be done with the indicated operator.
--    If before is omitted, the default value is "<"
-- n is the number of elements in the array. If it is omitted, #array will be used.
-- For convenience, shellsort returns its first argument.

do
  local incs = { 1391376,
                 463792, 198768, 86961, 33936,
                 13776, 4592, 1968, 861, 336, 
                 112, 48, 21, 7, 3, 1 }

  local function ssup(t, n)
    for _, h in ipairs(incs) do
      for i = h + 1, n do
        local v = t[i]
        for j = i - h, 1, -h do
          local testval = t[j]
          if not (v < testval) then break end
          t[i] = testval; i = j
        end
        t[i] = v
      end 
    end
    return t
  end

  local function ssdown(t, n)
    for _, h in ipairs(incs) do
      for i = h + 1, n do
        local v = t[i]
        for j = i - h, 1, -h do
          local testval = t[j]
          if not (v > testval) then break end
          t[i] = testval; i = j
        end
        t[i] = v
      end 
    end
    return t
  end

  local function ssgeneral(t, n, before)
    for _, h in ipairs(incs) do
      for i = h + 1, n do
        local v = t[i]
        for j = i - h, 1, -h do
          local testval = t[j]
          if not before(v, testval) then break end
          t[i] = testval; i = j
        end
        t[i] = v
      end 
    end
    return t
  end

  function shellsort(t, before, n)
    n = n or #t
    if not before or before == "<" then return ssup(t, n)
    elseif before == ">" then return ssdown(t, n)
    else return ssgeneral(t, n, before)
    end
  end
  return shellsort
end

一个简单的测试(并非很好的基准测试)平台

local function gt(a, b) return a > b end
local function lt(a, b) return a < b end

local function builtinsort(t, before)
  if not before or before == "<" then table.sort(t)
  elseif before == ">" then table.sort(t, gt)
  else table.sort(t, before)
  end
  return t
end

local algo, sort = "Shell sort", shellsort
if not tonumber(arg[1]) then
  if arg[1] == "builtin" then
    algo, sort = "table.sort", builtinsort
  elseif arg[1] == "shell" then
    algo, sort = "Shell sort", require"shellsort"
  else error"Only shell and builtin are implemented"
  end
  table.remove(arg, 1)
end

local a = {}
local range = 100
for i = 1, tonumber(arg[1]) or 10000 do a[i] = math.random(1, range) end
local before = arg[2] and
        ( arg[2]:match"^[<>]$"
          or arg[2] and assert(loadstring("return function(a, b) return "..arg[2].." end"))()
        )
      or "<"

local now = os.clock()
sort(a, before)
local took = os.clock() - now
io.write(("%-12s with %i values: %7.3f seconds, comparison: %s"):format(algo, #a, took, arg[2] or "<"))

-- Validate
before = ({["<"] = lt, [">"] = gt})[before] or before
for i = 1, #a-1 do
  if before(a[i+1], a[i]) then print(("\t****Failed at %i\n"):format(i)); return end
end
print"\tOrder checked"

结果表明,尽管 shellsort 是纯 Lua 实现,但它与 table.sort 相比具有竞争力;在 table.sort 具有优化实现(小于比较)的情况下,shellsort 慢约 75%,但在其具有优化实现(大于比较)的情况下,它更快,并且在比较函数占主导地位的排序情况下,速度大致相同。

# (luafast is compiled with aligned doubles):

rlake@freeb:~/lualib$ for count in 1e5 2e5 5e5 1e6; do
>  for comp in "<" ">" "(a-50)^2<(b-50)^2"; do echo
>    for algo in shell builtin; do
>      luafast testsort.lua $algo $count $comp
>    done           
>  done     
>done

Shell sort   with 100000 values:   0.352 seconds, comparison: < Order checked
table.sort   with 100000 values:   0.195 seconds, comparison: < Order checked

Shell sort   with 100000 values:   0.352 seconds, comparison: > Order checked
table.sort   with 100000 values:   0.430 seconds, comparison: > Order checked

Shell sort   with 100000 values:   0.914 seconds, comparison: (a-50)^2<(b-50)^2 Order checked
table.sort   with 100000 values:   0.805 seconds, comparison: (a-50)^2<(b-50)^2 Order checked

Shell sort   with 200000 values:   0.734 seconds, comparison: < Order checked
table.sort   with 200000 values:   0.414 seconds, comparison: < Order checked

Shell sort   with 200000 values:   0.758 seconds, comparison: > Order checked
table.sort   with 200000 values:   0.906 seconds, comparison: > Order checked

Shell sort   with 200000 values:   1.891 seconds, comparison: (a-50)^2<(b-50)^2 Order checked
table.sort   with 200000 values:   1.758 seconds, comparison: (a-50)^2<(b-50)^2 Order checked

Shell sort   with 500000 values:   1.961 seconds, comparison: < Order checked
table.sort   with 500000 values:   1.062 seconds, comparison: < Order checked

Shell sort   with 500000 values:   1.938 seconds, comparison: > Order checked
table.sort   with 500000 values:   2.352 seconds, comparison: > Order checked

Shell sort   with 500000 values:   5.031 seconds, comparison: (a-50)^2<(b-50)^2 Order checked
table.sort   with 500000 values:   5.000 seconds, comparison: (a-50)^2<(b-50)^2 Order checked

Shell sort   with 1000000 values:   4.320 seconds, comparison: <        Order checked
table.sort   with 1000000 values:   2.391 seconds, comparison: <        Order checked

Shell sort   with 1000000 values:   4.500 seconds, comparison: >        Order checked
table.sort   with 1000000 values:   6.062 seconds, comparison: >        Order checked

Shell sort   with 1000000 values:  12.305 seconds, comparison: (a-50)^2<(b-50)^2        Order checked
table.sort   with 1000000 values:  12.359 seconds, comparison: (a-50)^2<(b-50)^2        Order checked

评论

这里是一个元编程实现:--DavidManura
do
  local incs = { 1391376,
                 463792, 198768, 86961, 33936,
                 13776, 4592, 1968, 861, 336, 
                 112, 48, 21, 7, 3, 1 }

  local function make_sorter(compare)
    local src = [[
      local incs = ...
        return function(t, n, before)
        for _, h in ipairs(incs) do
          for i = h + 1, n do
            local a = t[i]
            for j = i - h, 1, -h do
              local b = t[j]
              if not (]] .. compare .. [[) then break end
              t[i] = b; i = j
            end
            t[i] = a
          end 
        end
        return t
      end
    ]]
    return assert(loadstring(src))(incs)
  end

  local sorters = {}
  local aliases = {["<"] = "a<b", [">"] = "a>b"}

  function shellsort(t, before, n)
    before = aliases[before] or before or "a<b"
    n = n or #t
    local beforesrc = type(before) == "function" and "before(a,b)" or before
    local sorter = sorters[beforesrc]
    if not sorter then
      sorter = make_sorter(beforesrc)
      sorters[beforesrc] = sorter
    end
    return sorter(t, n, before)
  end

  return shellsort
end

RecentChanges · preferences
编辑 · 历史
最后编辑于 2012 年 4 月 18 日 晚上 9:26 GMT (差异)