Lua 项目列表到 XML

lua-users home
wiki

这个示例程序将从 Lua 网站读取使用 Lua 编写的项目列表,并将其输出为有效的 XML 文档。该程序需要在系统中安装 GNU wget(它适用于主要的 Unix 和 Windows 系统,并且在大多数 Linux 发行版中默认安装)。

警告:该程序将读取和解释 HTML。列表格式的未来更改**将**破坏解析器。

#!/usr/bin/env lua

local fname = "uses.html"
os.execute("wget -q -O " .. fname .. " https://lua.ac.cn/uses.html")

local fp = io.open(fname, "r")
if fp == nil then
  print("Error opening file '" .. fname .. "'.")
  os.exit(1)
end

local s = fp:read("*a")
fp:close()

-- Remove optional spaces from the tags.
s = string.gsub(s, "\n", " ")
s = string.gsub(s, " *< *", " <")
s = string.gsub(s, " *> *", "> ")
s = string.gsub(s, "> *<", "><")

-- Put all the tags in lowercase.
s = string.gsub(s, "(<[^ >]+)", string.lower)

-- Remove images, scripts, etc.
s = string.gsub(s, "<img[^>]*>", "")
s = string.gsub(s, "<script[^>]*>.-</script>", "")

-- "Normalize" links for future use
s = string.gsub(s, "(<a[^>]*HREF *=)", string.lower)
s = string.gsub(s, "<a[^>]*href *= *", "<a href=")


print("<?xml version=\"1.0\" encoding=\"iso-8859-1\" ?>")
print("<luauses>")

for tmp in string.gfind(s, "<h3>.-<hr>") do

  -- Current data format (without spaces and line-breaks):
  --     <h3>
  --       <a NAME="1" HREF="APPURL">APPNAM</a>
  --       <br><small><em>USER</em></small>
  --     </h3>
  --       DESCR [can have html here]
  --       <p> Contact: <a HREF="EMAIL">CONTACT</a>
  --     <hr>

  local i, f, app = string.find(tmp, "<h3>(.-)</h3>")
  if app then
    app = string.gsub(app, "</?em>", "")
    app = string.gsub(app, "<br>", "")
    i, f, appurl, appnam = string.find(app, "<a href=\"([^\"> ]*)\"[^>]*>([^<]*)<")
    if appurl == nil then
      i, f, appnam = string.find(app, "<a[^>]*>([^<]*)</a>")
      appurl = ""
    end
  end

  i, f, user = string.find(tmp, "<small>(.-)</small>")
  if user then
    user = string.gsub(user, "</?.->", "")
    user = string.gsub(user, "&", "&amp;")
  else
    user = ""
  end

  i, f, desc = string.find(tmp, "</h3>(.-)<hr>")
  if desc then
    i, f, cont = string.find(desc, "<p> *Contact: *(.*)")
    if cont then
      desc = string.gsub(desc, "<p> *Contact:(.*)", "")
      cont = string.gsub(cont, "<p> *Contact: *", "")
      i, f, email, name = string.find(cont, "<a href=\"([^ \"]+)\"[^>]*>([^<]+)<")
      if name == nil then
        name = cont
        email = ""
      end
      if email then
        email = string.gsub(email, "mailto:/?/?", "")
      else
        email = ""
      end
    else
      name = ""
      email = ""
    end
    desc = string.gsub(desc, "&", "&amp;")
    desc = string.gsub(desc, "<", "&lt;")
    desc = string.gsub(desc, ">", "&gt;")
  else
    desc = ""
  end

  print(" <use>")
  print("  <app>" .. appnam .. "</app>")
  print("  <url>" .. appurl .. "</url>")
  print("  <user>" .. user .. "</user>")
  print("  <desc>" .. desc .. "</desc>")
  print("  <contact>" .. name .. "</contact>")
  print("  <email>" .. email .. "</email>")
  print(" </use>")
end

print("</luauses>")

-- AlexandreErwinIttner


最近更改 · 偏好设置
编辑 · 历史记录
最后编辑于 2007 年 5 月 28 日下午 10:29 GMT (差异)