词条 | Module:Ancient Greek |
释义 | local p = {} local ustring = mw.ustring local U = ustring.char local find = ustring.find local gsub = ustring.gsub local decompose = ustring.toNFD local lower = ustring.lower local upper = ustring.upper local str_gmatch = string.gmatch local macron = U(0x304) local breve = U(0x306) local rough = U(0x314) local smooth = U(0x313) local diaeresis = U(0x308) local acute = U(0x301) local grave = U(0x300) local circumflex = U(0x342) local Latin_circumflex = U(0x302) local subscript = U(0x345) local macron_circumflex = macron .. diaeresis .. '?' .. Latin_circumflex local is_velar = { ['κ'] = true, ['γ'] = true, ['χ'] = true, ['ξ'] = true, } local UTF8_char = "[%z\\1-\\127\\194-\\244][\\128-\\191]*" local basic_Greek = "[\\206-\\207][\\128-\\191]" -- excluding first line of Greek and Coptic block: ͰͱͲͳʹ͵Ͷͷͺͻͼͽ;Ϳ local info = {} -- The tables are shared among different characters so that they can be checked-- for equality if needed, and to use less space.local vowel = { vowel = true, diacritic_seat = true } local iota = { vowel = true, diacritic_seat = true, offglide = true } local upsilon = { vowel = true, diacritic_seat = true, offglide = true } -- Technically rho is only a seat for rough or smooth breathing.local rho = { consonant = true, diacritic_seat = true } local consonant = { consonant = true } local diacritic = { diacritic = true } -- Needed for equality comparisons.local breathing = { diacritic = true } local function add_info(characters, t) if type(characters) == "string" then for character in string.gmatch(characters, UTF8_char) do info[character] = t end else for _, character in ipairs(characters) do info[character] = t end end end add_info({ macron, breve, diaeresis, acute, grave, circumflex, subscript, }, diacritic) add_info({rough, smooth}, breathing) add_info("ΑΕΗΟΩαεηοω", vowel) add_info("Ιι", iota) add_info("Υυ", upsilon) add_info("ΒΓΔΖΘΚΛΜΝΞΠΡΣΤΦΧΨϜϘϺϷͶϠβγδζθκλμνξπρσςτφχψϝϙϻϸͷϡ", consonant) add_info("Ρρ", rho) local not_recognized = {} setmetatable(info, { __index = function() return not_recognized end }) local function quote(str) return "“" .. str .. "”" end local tt = { -- Vowels ["α"] = "a", ["ε"] = "e", ["η"] = "e" .. macron, ["ι"] = "i", ["ο"] = "o", ["υ"] = "u", ["ω"] = "o" .. macron, -- Consonants ["β"] = "b", ["γ"] = "g", ["δ"] = "d", ["ζ"] = "z", ["θ"] = "th", ["κ"] = "k", ["λ"] = "l", ["μ"] = "m", ["ν"] = "n", ["ξ"] = "x", ["π"] = "p", ["ρ"] = "r", ["σ"] = "s", ["ς"] = "s", ["τ"] = "t", ["φ"] = "ph", ["χ"] = "kh", ["ψ"] = "ps",
-- Archaic letters ["ϝ"] = "w", ["ϻ"] = "ś", ["ϙ"] = "q", ["ϡ"] = "š", ["ͷ"] = "v",
-- Diacritics -- unchanged: macron, diaeresis, grave, acute [breve] = , [smooth] = , [rough] = , [circumflex] = Latin_circumflex, [subscript] = 'i', } --[=[This breaks a word into meaningful "tokens", which are individual letters or diphthongs with their diacritics. Used by grc-accent and grc-pronunciation. --]=]local function tokenize(text) local tokens, vowel_info, prev_info = {}, {}, {} local token_i = 1 local prev for character in str_gmatch(decompose(text), UTF8_char) do local curr_info = info[character] -- Split vowels between tokens if not a diphthong. if curr_info.vowel then if prev and (not (curr_info.offglide and prev_info.vowel) -- υυ → υ, υ -- ιυ → ι, υ or prev_info.offglide and curr_info == upsilon) then token_i = token_i + 1 end tokens[token_i] = (tokens[token_i] or "") .. character table.insert(vowel_info, { index = token_i }) elseif curr_info.diacritic then tokens[token_i] = (tokens[token_i] or "") .. character if prev_info.vowel or prev_info.diacritic then if character == diaeresis then -- Current token is vowel, vowel, possibly other diacritics, -- and a diaeresis. -- Split the current token into two: -- the first letter, then the second letter plus any diacritics. local previous_vowel, vowel_with_diaeresis = string.match(tokens[token_i], "^(" .. basic_Greek .. ")(" .. basic_Greek .. ".+)") if previous_vowel then tokens[token_i], tokens[token_i + 1] = previous_vowel, vowel_with_diaeresis token_i = token_i + 1 end end elseif prev_info == rho then if curr_info ~= breathing then return string.format("The character %s cannot have the accent %s on it.", prev, "◌" .. character) end else error("The character " .. quote(prev) .. " cannot have a diacritic on it.") end elseif curr_info == rho then if prev and not (prev_info == breathing and info[string.match(tokens[token_i], "^" .. basic_Greek)] == rho) then token_i = token_i + 1 end tokens[token_i] = (tokens[token_i] or "") .. character else if prev then token_i = token_i + 1 end tokens[token_i] = (tokens[token_i] or "") .. character end prev = character prev_info = curr_info end return tokens end function p.transliterate(text) text = decompose(text) --[[ if text == '῾' then return 'h' end --]]
--[[ Replace semicolon or Greek question mark with regular question mark, except after an ASCII alphanumeric character (to avoid converting semicolons in HTML entities). --]] text = gsub(text, "([^A-Za-z0-9])[;" .. U(0x37E) .. "]", "%1?")
-- Handle the middle dot. It is equivalent to semicolon or colon, but semicolon is probably more common. text = text:gsub("·", ";")
local tokens = tokenize(text) --now read the tokens local output = {} for i, token in pairs(tokens) do -- substitute each character in the token for its transliteration local translit = gsub(mw.ustring.lower(token), '.', tt)
if token == 'γ' and is_velar[tokens[i + 1]] then -- γ before a velar should be translit = 'n' elseif token == 'ρ' and tokens[i - 1] == 'ρ' then -- ρ after ρ should be translit = 'rh' elseif find(token, '^[αΑ].*' .. subscript .. '$') then -- add macron to ᾳ translit = gsub(translit, '([aA])', '%1' .. macron) end
if token:find(rough) then if find(token, '[Ρρ]') then translit = translit .. 'h' else -- vowel translit = 'h' .. translit end end
-- Remove macron from a vowel that has a circumflex. if find(translit, macron_circumflex) then translit = translit:gsub(macron, ) end
-- Capitalize first character of transliteration. if token ~= lower(token) then translit = gsub(translit, "^.", upper) end
table.insert(output, translit) end
return table.concat(output) end function p.translit(frame) local args = frame:getParent().args local text = frame.args[1] or args[1] local transliteration = p.transliterate(text) return '' .. transliteration .. '' end function p.bare_translit(frame) return p.transliterate(frame.args[1] or frame:getParent().args[1]) end return p |
随便看 |
|
开放百科全书收录14589846条英语、德语、日语等多语种百科知识,基本涵盖了大多数领域的百科知识,是一部内容自由、开放的电子版国际百科全书。