--
-- $Id: uhc2utf8.lua,v 1.4 2011/07/08 15:23:52 nomos Exp $
--

module('uhc2utf8', package.seeall)

luatexbase.provides_module({
    name        = "uhc2utf8",
    version     = 0.0,
    date        = "2011/07/08",
    author	= "Dohyun Kim",
    description = "convert input encoding between UHC (CP949) and UTF-8",
    license     = "public domain",
})

local t_uhc2ucs = t_uhc2ucs or dofile(kpse.find_file('ksc5601.lua'))
local gsub = string.gsub
local byte = string.byte
local len = string.len
local format = string.format
local ugsub = unicode.utf8.gsub
local ubyte = unicode.utf8.byte
local uchar = unicode.utf8.char
local floor = math.floor
local isfile = lfs.isfile
local kpse_find_file = kpse.find_file

local uhc_to_utf8 = function(buffer)
    if not buffer then return end
    -- check if buffer is already utf-8; better solution?
    local t = gsub(buffer,"[%z\1-\127]","")
    t = gsub(t,"[\194-\223][\128-\191]","")
    t = gsub(t,"[\224-\239][\128-\191][\128-\191]","")
    t = gsub(t,"[\240-\244][\128-\191][\128-\191][\128-\191]","")
    if len(t) == 0 then return buffer end
    -- now convert to utf8
    buffer = gsub(buffer, "([\129-\253])([\65-\254])",
    function(a, b)
	a, b = byte(a), byte(b)
	local utf = t_uhc2ucs[a * 256 + b - 0x8141]
	if utf then return uchar(utf) end
    end)
    return buffer
end

local loaded = false

function startconvert ()
    if loaded then return end
    luatexbase.add_to_callback('process_input_buffer',
	uhc_to_utf8, 'luatexko-uhc2utf8', 1)
    loaded = true
end

function stopconvert ()
    if not loaded then return end
    luatexbase.remove_from_callback('process_input_buffer',
	'luatexko-uhc2utf8')
    loaded = false
end

--
-- Hangul Windows OS uses CP949 file names. sigh.
--
local t_ucs2uhc = t_ucs2uhc or {}
for i,v in pairs(t_uhc2ucs) do t_ucs2uhc[v] = i + 0x8141 end

local function utf8_to_uhc (name)
    if not name then return end
    name = ugsub(name, "[\161-\239\191\166]", -- 00A1..FFE6
	function(u)
	    local c = t_ucs2uhc[ubyte(u)]
	    if not c then return u end
	    return format("%c%c", floor(c/256), c%256)
	end)
    return name
end

local function uhc_find_file (file, ...)
    local f = kpse_find_file(file, ...)
    if f then return f end
    f = utf8_to_uhc(file)
    f = f and kpse_find_file(f, ...)
    return f
end

local uhc_names_loaded = false

function start_uhc_filename ()
    if uhc_names_loaded then return end
    luatexbase.add_to_callback('find_read_file',
	function(id, name) return uhc_find_file(name) end,
	'luatexko-touhc-findreadfile')
    luatexbase.add_to_callback('find_image_file',
	uhc_find_file, 'luatexko-touhc-findimagefile')
    kpse.find_file = uhc_find_file
    uhc_names_loaded = true
end

function stop_uhc_filename ()
    if not uhc_names_loaded then return end
    luatexbase.remove_from_callback('find_read_file',
	'luatexko-touhc-findreadfile')
    luatexbase.remove_from_callback('find_image_file',
	'luatexko-touhc-findimagefile')
    kpse.find_file = kpse_find_file
    uhc_names_loaded = false
end
