--
-- $Id: hangulnormalize.lua,v 1.4 2011/07/20 05:11:25 nomos Exp $
--

module('hangulnormalize', package.seeall)

luatexbase.provides_module({
    name	= "hangulnormalize",
    version	= 0.0,
    date	= "2011/06/30",
    author	= "Dohyun Kim",
    description	= "compose/decompose between jamo and syllable.",
    license	= "public domain",
})

local cho   = "[\225\132\128-\225\132\146]"
local jung  = "[\225\133\161-\225\133\181]"
local jong  = "[\225\134\168-\225\135\130]"
local ojong = "[\225\135\131-\225\135\191\237\159\139-\237\159\187]"
local compathanja = "[\239\164\128-\239\168\139]"
local chanjatohanja = dofile(kpse.find_file('compathanjatohanja.lua'))

local gsub = unicode.utf8.gsub
local byte = unicode.utf8.byte
local char = unicode.utf8.char

local syllable2jamo = function(l,v,t)
    l, v = byte(l), byte(v)
    local s = (l - 0x1100) * 21
    s = (s + v - 0x1161) * 28
    if t then
	t = byte(t)
	s = s + t - 0x11a7
    end
    s = s + 0xac00
    return char(s)
end

local hanguldecompose = function(buffer)
    buffer = gsub(buffer, "[가-힣]", function(s)
	s = byte(s) - 0xac00
	local cho = s / (21 * 28) + 0x1100
	local jung = (s % (21 * 28)) / 28 + 0x1161
	local jong = s % 28 + 0x11a7
	if jong > 0x11a7 then
	    return char(cho)..char(jung)..char(jong)
	end
	return char(cho)..char(jung)
    end)
    return buffer
end

local function hanjanormalize(c)
    local hanja = chanjatohanja[byte(c)]
    hanja = hanja and char(hanja)
    return hanja
end

local hangulcompose = function(buffer)
    buffer = hanguldecompose(buffer)
    buffer = gsub(buffer, "("..cho..")("..jung..")("..jong..")", syllable2jamo)
    buffer = gsub(buffer, "("..cho..")("..jung..ojong..")", "%1\1%2")
    buffer = gsub(buffer, "("..cho..")("..jung..")", syllable2jamo)
    buffer = gsub(buffer, "\1", "")
    buffer = gsub(buffer, "("..compathanja..")", hanjanormalize)
    return buffer
end

local loaded = false

function compose()
    if loaded then
	luatexbase.remove_from_callback('process_input_buffer',
	'luatexko-normalize')
    end
    loaded = true
    luatexbase.add_to_callback('process_input_buffer',
    hangulcompose, 'luatexko-normalize')
end

function decompose()
    if loaded then
	luatexbase.remove_from_callback('process_input_buffer',
	'luatexko-normalize')
    end
    loaded = true
    luatexbase.add_to_callback('process_input_buffer',
    hanguldecompose, 'luatexko-normalize')
end

function unload()
    if loaded then
	luatexbase.remove_from_callback('process_input_buffer',
	'luatexko-normalize')
    end
    loaded = false
end

