Scite 使用 Unicode

lua-users home
wiki

以下是在缓冲区中写入和读取 Unicode 字符的示例,特别是处理 UTF-8。如果您在第一个代码片段中看不到 Unicode 字符,请尝试使用 UTF-8 字符集查看。


写入一些 Unicode 字符

-- -*- coding: utf-8 -*-
-- write some UTF-8 chars <[email protected]> 20061017 public domain
-- (see Markus Kuhn's UTF-8 and Unicode FAQ or RFC3629 for more info)
function UnicodeWriteSomething()
  -- ����� C3 A1 C3 A9 C3 AF C3 B6 C3 BC
  -- &#20013;&#25991; E4 B8 AD E6 96 87
  -- open a new buffer and set encoding as UTF-8
  scite.Open("")
  editor.CodePage = SC_CP_UTF8
  -- string is in UTF-8
  editor:AppendText("�����\n")
  editor:AppendText("&#20013;&#25991;\n")
  -- string is encoded as escaped sequences
  editor:AppendText("\195\161\195\169\195\175\195\182\195\188\n")
  editor:AppendText("\228\184\173\230\150\135\n")
end


读取和写入 Unicode 值

以下函数有助于在缓冲区中读取和写入 UTF-8 字符。它允许最多 6 字节的字符序列来支持 UCS-4 范围。

-- -*- coding: utf-8 -*-
-- return value of UTF-8 character <[email protected]> 20061017 public domain
-- (see Markus Kuhn's UTF-8 and Unicode FAQ or RFC3629 for more info)
function FromUTF8(pos)
  local mod = math.mod
  local function charat(p)
    local v = editor.CharAt[p]; if v < 0 then v = v + 256 end; return v
  end
  local v, c, n = 0, charat(pos), 1
  if c < 128 then v = c
  elseif c < 192 then
    error("Byte values between 0x80 to 0xBF cannot start a multibyte sequence")
  elseif c < 224 then v = mod(c, 32); n = 2
  elseif c < 240 then v = mod(c, 16); n = 3
  elseif c < 248 then v = mod(c,  8); n = 4
  elseif c < 252 then v = mod(c,  4); n = 5
  elseif c < 254 then v = mod(c,  2); n = 6
  else
    error("Byte values between 0xFE and OxFF cannot start a multibyte sequence")
  end
  for i = 2, n do
    pos = pos + 1; c = charat(pos)
    if c < 128 or c > 191 then
      error("Following bytes must have values between 0x80 and 0xBF")
    end
    v = v * 64 + mod(c, 64)
  end
  return v, pos, n
end

-- return UTF-8 sequence string <[email protected]> 20061017 public domain
-- (see Markus Kuhn's UTF-8 and Unicode FAQ or RFC3629 for more info)
function ToUTF8(v)
  local math = math
  local n, s, b = 1, "", 0
  -- delete this if your version of SciTE goes beyond UCS-2
  if v > 65535 then error("SciTE does not support codes above U+FFFF") end
  if v >= 55296 and v <= 57343 then
    error("failed to convert UTF-16 surrogate pairs to UTF-8")
  end
  if    v >= 67108864 then n = 6; b = 252
  elseif v >= 2097152 then n = 5; b = 248
  elseif v >=   65536 then n = 4; b = 240
  elseif v >=    2048 then n = 3; b = 224
  elseif v >=     128 then n = 2; b = 192
  end
  for i = 2, n do
    local c = math.mod(v, 64); v = math.floor(v / 64)
    s = string.char(c + 128)..s
  end
  s = string.char(v + b)..s
  return s, n
end

-- demonstrate use of FromUTF8() function: display the character code
-- value of the current character under the cursor in the output window
function Demo_FromUTF8()
  print("Character code: "..(FromUTF8(editor.CurrentPos)))
end

-- demonstrate use of ToUTF8() function: display two characters based
-- on the given unicode value
function Demo_ToUTF8()
  editor:AppendText(ToUTF8(tonumber("0x4E2D", 16)))
  editor:AppendText(ToUTF8(tonumber("0x6587", 16)))
end


显示从 U+0000 到 U+FFFF 的字符代码

以下演示函数显示 Unicode 字符表。它需要上面的 ToUTF8() 函数。

-- -*- coding: utf-8 -*-
-- write out a UTF-16 table <[email protected]> 20061017 public domain
function UTF16Table()
  scite.Open("")
  editor.CodePage = SC_CP_UTF8
  editor:AppendText("-*- coding: utf-8 -*-\n")
  editor:AppendText("   Dec ( Hex ) : 0123456789ABCDEF0123456789ABCDEF\n")
  editor:AppendText("-------------------------------------------------\n")
  for p = 0, 65535, 32 do
    ln = string.format("%6d (0x%4X): ", p, p)
    for q = p, p+31 do
      if q < 32 or (q >= 55296 and q <= 57343) then ln = ln.."?"
      else ln = ln..ToUTF8(q)
      end
    end
    ln = ln.."\n"
    editor:AppendText(ln)
  end
end

--KeinHongMan


最近更改 · 偏好设置
编辑 · 历史记录
上次编辑于 2006 年 10 月 17 日下午 6:26 GMT (差异)