Lua Sorting |
|
table 库提供了一个原地排序函数,基于快速排序算法 [wikipedia]。不过,我们也可以不用花费太多代价,用纯 Lua 来实现 sort,如下所示。这里使用的算法是希尔排序(以其发明者 Donald Shell 命名)[wikipedia],其步长序列来自 Robert Sedgewick(参见 [A036569] 在 [整数序列在线百科全书] 中关于 Sedgewick 论文的引用)。我选择希尔排序而不是快速排序,是因为它的速度通常差不多,而且代码要简单得多。步长序列应该足以处理至少一千万个元素。
另请参阅 LazySort,了解 Lua 中排序的另一种观点。
为了提高效率,排序器有三个版本;顶层函数会根据情况选择其中一个。有专门针对 < 运算符和 > 运算符的版本,以及一个通用的实现,该实现可以接受任何比较函数,就像 table.sort 一样。请注意,与 table.sort 一样,比较函数在参数相等时应返回 false,尽管在希尔排序的情况下,这不像那么关键。
样本计时结果如下。
-- shellsort.lua -- Written by Rici Lake. The author disclaims all copyright and offers no warranty. -- -- This module returns a single function (not a table) whose interface is upwards- -- compatible with the interface to table.sort: -- -- array = shellsort(array, before, n) -- array is an array of comparable elements to be sorted in place -- before is a function of two arguments which returns true if its first argument -- should be before the second argument in the second result. It must define -- a total order on the elements of array. -- Alternatively, before can be one of the strings "<" or ">", in which case -- the comparison will be done with the indicated operator. -- If before is omitted, the default value is "<" -- n is the number of elements in the array. If it is omitted, #array will be used. -- For convenience, shellsort returns its first argument. do local incs = { 1391376, 463792, 198768, 86961, 33936, 13776, 4592, 1968, 861, 336, 112, 48, 21, 7, 3, 1 } local function ssup(t, n) for _, h in ipairs(incs) do for i = h + 1, n do local v = t[i] for j = i - h, 1, -h do local testval = t[j] if not (v < testval) then break end t[i] = testval; i = j end t[i] = v end end return t end local function ssdown(t, n) for _, h in ipairs(incs) do for i = h + 1, n do local v = t[i] for j = i - h, 1, -h do local testval = t[j] if not (v > testval) then break end t[i] = testval; i = j end t[i] = v end end return t end local function ssgeneral(t, n, before) for _, h in ipairs(incs) do for i = h + 1, n do local v = t[i] for j = i - h, 1, -h do local testval = t[j] if not before(v, testval) then break end t[i] = testval; i = j end t[i] = v end end return t end function shellsort(t, before, n) n = n or #t if not before or before == "<" then return ssup(t, n) elseif before == ">" then return ssdown(t, n) else return ssgeneral(t, n, before) end end return shellsort end
一个简单的测试(并非很好的基准测试)平台
local function gt(a, b) return a > b end local function lt(a, b) return a < b end local function builtinsort(t, before) if not before or before == "<" then table.sort(t) elseif before == ">" then table.sort(t, gt) else table.sort(t, before) end return t end local algo, sort = "Shell sort", shellsort if not tonumber(arg[1]) then if arg[1] == "builtin" then algo, sort = "table.sort", builtinsort elseif arg[1] == "shell" then algo, sort = "Shell sort", require"shellsort" else error"Only shell and builtin are implemented" end table.remove(arg, 1) end local a = {} local range = 100 for i = 1, tonumber(arg[1]) or 10000 do a[i] = math.random(1, range) end local before = arg[2] and ( arg[2]:match"^[<>]$" or arg[2] and assert(loadstring("return function(a, b) return "..arg[2].." end"))() ) or "<" local now = os.clock() sort(a, before) local took = os.clock() - now io.write(("%-12s with %i values: %7.3f seconds, comparison: %s"):format(algo, #a, took, arg[2] or "<")) -- Validate before = ({["<"] = lt, [">"] = gt})[before] or before for i = 1, #a-1 do if before(a[i+1], a[i]) then print(("\t****Failed at %i\n"):format(i)); return end end print"\tOrder checked"
结果表明,尽管 shellsort 是纯 Lua 实现,但它与 table.sort 相比具有竞争力;在 table.sort 具有优化实现(小于比较)的情况下,shellsort 慢约 75%,但在其具有优化实现(大于比较)的情况下,它更快,并且在比较函数占主导地位的排序情况下,速度大致相同。
# (luafast is compiled with aligned doubles): rlake@freeb:~/lualib$ for count in 1e5 2e5 5e5 1e6; do > for comp in "<" ">" "(a-50)^2<(b-50)^2"; do echo > for algo in shell builtin; do > luafast testsort.lua $algo $count $comp > done > done >done Shell sort with 100000 values: 0.352 seconds, comparison: < Order checked table.sort with 100000 values: 0.195 seconds, comparison: < Order checked Shell sort with 100000 values: 0.352 seconds, comparison: > Order checked table.sort with 100000 values: 0.430 seconds, comparison: > Order checked Shell sort with 100000 values: 0.914 seconds, comparison: (a-50)^2<(b-50)^2 Order checked table.sort with 100000 values: 0.805 seconds, comparison: (a-50)^2<(b-50)^2 Order checked Shell sort with 200000 values: 0.734 seconds, comparison: < Order checked table.sort with 200000 values: 0.414 seconds, comparison: < Order checked Shell sort with 200000 values: 0.758 seconds, comparison: > Order checked table.sort with 200000 values: 0.906 seconds, comparison: > Order checked Shell sort with 200000 values: 1.891 seconds, comparison: (a-50)^2<(b-50)^2 Order checked table.sort with 200000 values: 1.758 seconds, comparison: (a-50)^2<(b-50)^2 Order checked Shell sort with 500000 values: 1.961 seconds, comparison: < Order checked table.sort with 500000 values: 1.062 seconds, comparison: < Order checked Shell sort with 500000 values: 1.938 seconds, comparison: > Order checked table.sort with 500000 values: 2.352 seconds, comparison: > Order checked Shell sort with 500000 values: 5.031 seconds, comparison: (a-50)^2<(b-50)^2 Order checked table.sort with 500000 values: 5.000 seconds, comparison: (a-50)^2<(b-50)^2 Order checked Shell sort with 1000000 values: 4.320 seconds, comparison: < Order checked table.sort with 1000000 values: 2.391 seconds, comparison: < Order checked Shell sort with 1000000 values: 4.500 seconds, comparison: > Order checked table.sort with 1000000 values: 6.062 seconds, comparison: > Order checked Shell sort with 1000000 values: 12.305 seconds, comparison: (a-50)^2<(b-50)^2 Order checked table.sort with 1000000 values: 12.359 seconds, comparison: (a-50)^2<(b-50)^2 Order checked
do local incs = { 1391376, 463792, 198768, 86961, 33936, 13776, 4592, 1968, 861, 336, 112, 48, 21, 7, 3, 1 } local function make_sorter(compare) local src = [[ local incs = ... return function(t, n, before) for _, h in ipairs(incs) do for i = h + 1, n do local a = t[i] for j = i - h, 1, -h do local b = t[j] if not (]] .. compare .. [[) then break end t[i] = b; i = j end t[i] = a end end return t end ]] return assert(loadstring(src))(incs) end local sorters = {} local aliases = {["<"] = "a<b", [">"] = "a>b"} function shellsort(t, before, n) before = aliases[before] or before or "a<b" n = n or #t local beforesrc = type(before) == "function" and "before(a,b)" or before local sorter = sorters[beforesrc] if not sorter then sorter = make_sorter(beforesrc) sorters[beforesrc] = sorter end return sorter(t, n, before) end return shellsort end