高精度计时器 |
|
此计时器由我的基准测试工具包 BenchmarkModule 使用。
HiResTimer.lua
--------------------------- -- this module provide a function -- HiResTimer.clock() which returns a high resolution timer --------------------------- module("HiResTimer",package.seeall) -- -- take the alien module -- local alien=require"alien" -- -- get the kernel dll -- local kernel32=alien.load("kernel32.dll") -- -- get dll functions -- local QueryPerformanceCounter=kernel32.QueryPerformanceCounter QueryPerformanceCounter:types{ret="int",abi="stdcall","pointer"} local QueryPerformanceFrequency=kernel32.QueryPerformanceFrequency QueryPerformanceFrequency:types{ret="int",abi="stdcall","pointer"} -------------------------------------------- --- utility : convert a long to an unsigned long value -- (because alien does not support longlong nor ulong) -------------------------------------------- local function lu(long) return long<0 and long+0x80000000+0x80000000 or long end -------------------------------------------- --- Query the performance frequency. -- @return (number) -------------------------------------------- local function qpf() local frequency=alien.array('long',2) QueryPerformanceFrequency(frequency.buffer) return math.ldexp(lu(frequency[1]),0) +math.ldexp(lu(frequency[2]),32) end -------------------------------------------- --- Query the performance counter. -- @return (number) -------------------------------------------- local function qpc() local counter=alien.array('long',2) QueryPerformanceCounter(counter.buffer) return math.ldexp(lu(counter[1]),0) +math.ldexp(lu(counter[2]),32) end -------------------------------------------- -- get the startup values -------------------------------------------- local f0=qpf() local c0=qpc() -------------------------------------------- --- Return a hires clock -- @return (number) elapsed seconds since load of the module -------------------------------------------- function clock() local c1=qpc() return (c1-c0)/f0 end return HiResTimer
local hrt=require"HiResTimer" local t0=hrt.clock() for i=1,1000*1000 do local j=i*i end local t1=hrt.clock() print("Took",t1-t0,"seconds")
我一直使用 LuaSocket? 的 gettime 函数作为通用的高精度计时器。它在 Windows、Linux 和 BSD 上似乎运行良好。此外,LuaSocket? 通常已经存在。 -ScottVokes?
对我来说,socket.gettime() 的精度不够高。请查看此计时器。
local hrt=require"HiResTimer" local socket=require"socket" local function printf(...) io.write(string.format(...)) end for loops=0,100000,1000 do printf("%6d loops ",loops) local t0=hrt.clock() for i=1,loops do local j=math.sin(123) end local t1=hrt.clock() printf("hrt=%9.3f ms",(t1-t0)*1000) local t0=socket.gettime() for i=1,loops do local j=math.sin(123) end local t1=socket.gettime() printf(" socket=%9.3f ms\n",(t1-t0)*1000) --if t1>t0 then break end end
0 loops hrt= 0.035 ms socket= 0.000 ms 1000 loops hrt= 0.327 ms socket= 0.000 ms 2000 loops hrt= 0.461 ms socket= 0.000 ms 3000 loops hrt= 0.685 ms socket= 0.000 ms 4000 loops hrt= 0.905 ms socket= 0.000 ms 5000 loops hrt= 1.123 ms socket= 0.000 ms 6000 loops hrt= 1.391 ms socket= 0.000 ms 7000 loops hrt= 1.565 ms socket= 0.000 ms 8000 loops hrt= 1.801 ms socket= 15.625 ms 9000 loops hrt= 2.032 ms socket= 0.000 ms 10000 loops hrt= 2.244 ms socket= 0.000 ms ::: 90000 loops hrt= 22.061 ms socket= 15.625 ms 91000 loops hrt= 20.248 ms socket= 31.250 ms 92000 loops hrt= 22.713 ms socket= 15.625 ms 93000 loops hrt= 22.575 ms socket= 15.625 ms 94000 loops hrt= 21.200 ms socket= 15.625 ms 95000 loops hrt= 22.974 ms socket= 15.625 ms 96000 loops hrt= 21.370 ms socket= 31.250 ms 97000 loops hrt= 23.359 ms socket= 31.250 ms 98000 loops hrt= 21.953 ms socket= 15.625 ms 99000 loops hrt= 22.304 ms socket= 15.625 ms 100000 loops hrt= 22.887 ms socket= 15.625 ms
(至少在 Windows 上)socket.gettime 的分辨率为 64 滴答/秒,hires 中使用的性能计数器超过 1000000 滴答/秒(确切值可能因机器而异)
为了完整起见,我在 Linux 机器上检查了 socket.gettime
local socket=require"socket" local function printf(...) io.write(string.format(...)) end for loops=0,100000,1000 do printf("%6d loops ",loops) local t0=socket.gettime() for i=1,loops do local j=math.sin(123) end local t1=socket.gettime() printf(" socket=%9.3f ms\n",(t1-t0)*1000) end
0 loops socket= 0.002 ms 1000 loops socket= 0.509 ms 2000 loops socket= 1.017 ms 3000 loops socket= 1.485 ms 4000 loops socket= 1.172 ms 5000 loops socket= 1.477 ms 6000 loops socket= 1.776 ms 7000 loops socket= 2.078 ms 8000 loops socket= 2.381 ms 9000 loops socket= 2.850 ms 10000 loops socket= 3.000 ms :::: 90000 loops socket= 25.086 ms 91000 loops socket= 25.474 ms 92000 loops socket= 25.696 ms 93000 loops socket= 25.991 ms 94000 loops socket= 26.368 ms 95000 loops socket= 26.634 ms 96000 loops socket= 26.922 ms 97000 loops socket= 27.096 ms 98000 loops socket= 27.463 ms 99000 loops socket= 27.776 ms 100000 loops socket= 27.973 ms这看起来相当不错,比 Windows 结果好得多。
上面的 socket.gettime() 代码在 Mac OS X 上运行良好(在 10.6.5 上测试)
结果
0 loops socket= 0.001 ms 1000 loops socket= 0.156 ms 2000 loops socket= 0.264 ms 3000 loops socket= 0.399 ms 4000 loops socket= 0.529 ms 5000 loops socket= 0.667 ms ... 96000 loops socket= 12.948 ms 97000 loops socket= 12.678 ms 98000 loops socket= 12.718 ms 99000 loops socket= 12.963 ms 100000 loops socket= 12.949 ms
-- DougCurrie