“Module:Ancient Greek”的意思、由来-开放百科全书

词条

Module:Ancient Greek

释义

local p = {}

local ustring = mw.ustring

local U = ustring.char

local find = ustring.find

local gsub = ustring.gsub

local decompose = ustring.toNFD

local lower = ustring.lower

local upper = ustring.upper

local str_gmatch = string.gmatch

local macron = U(0x304)

local breve = U(0x306)

local rough = U(0x314)

local smooth = U(0x313)

local diaeresis = U(0x308)

local acute = U(0x301)

local grave = U(0x300)

local circumflex = U(0x342)

local Latin_circumflex = U(0x302)

local subscript = U(0x345)

local macron_circumflex = macron .. diaeresis .. '?' .. Latin_circumflex

local is_velar = { ['κ'] = true, ['γ'] = true, ['χ'] = true, ['ξ'] = true, }

local UTF8_char = "[%z\\1-\\127\\194-\\244][\\128-\\191]*"

local basic_Greek = "[\\206-\\207][\\128-\\191]" -- excluding first line of Greek and Coptic block: ͰͱͲͳʹ͵Ͷͷͺͻͼͽ;Ϳ

local info = {}

-- The tables are shared among different characters so that they can be checked-- for equality if needed, and to use less space.

local vowel = { vowel = true, diacritic_seat = true }

local iota = { vowel = true, diacritic_seat = true, offglide = true }

local upsilon = { vowel = true, diacritic_seat = true, offglide = true }

-- Technically rho is only a seat for rough or smooth breathing.

local rho = { consonant = true, diacritic_seat = true }

local consonant = { consonant = true }

local diacritic = { diacritic = true }

-- Needed for equality comparisons.

local breathing = { diacritic = true }

local function add_info(characters, t)

if type(characters) == "string" then

for character in string.gmatch(characters, UTF8_char) do

info[character] = t

end

else

for _, character in ipairs(characters) do

info[character] = t

end

add_info({ macron, breve,

diaeresis,

acute, grave, circumflex,

subscript,

}, diacritic)

add_info({rough, smooth}, breathing)

add_info("ΑΕΗΟΩαεηοω", vowel)

add_info("Ιι", iota)

add_info("Υυ", upsilon)

add_info("ΒΓΔΖΘΚΛΜΝΞΠΡΣΤΦΧΨϜϘϺϷͶϠβγδζθκλμνξπρσςτφχψϝϙϻϸͷϡ", consonant)

add_info("Ρρ", rho)

local not_recognized = {}

setmetatable(info, { __index =

function()

return not_recognized

end

})

local function quote(str)

return "“" .. str .. "”"

end

local tt = {

-- Vowels

["α"] = "a",

["ε"] = "e",

["η"] = "e" .. macron,

["ι"] = "i",

["ο"] = "o",

["υ"] = "u",

["ω"] = "o" .. macron,

-- Consonants

["β"] = "b",

["γ"] = "g",

["δ"] = "d",

["ζ"] = "z",

["θ"] = "th",

["κ"] = "k",

["λ"] = "l",

["μ"] = "m",

["ν"] = "n",

["ξ"] = "x",

["π"] = "p",

["ρ"] = "r",

["σ"] = "s",

["ς"] = "s",

["τ"] = "t",

["φ"] = "ph",

["χ"] = "kh",

["ψ"] = "ps",

-- Archaic letters

["ϝ"] = "w",

["ϻ"] = "ś",

["ϙ"] = "q",

["ϡ"] = "š",

["ͷ"] = "v",

-- Diacritics

-- unchanged: macron, diaeresis, grave, acute

[breve] = ,

[smooth] = ,

[rough] = ,

[circumflex] = Latin_circumflex,

[subscript] = 'i',

}

--[=[

This breaks a word into meaningful "tokens", which are

individual letters or diphthongs with their diacritics.

Used by grc-accent and grc-pronunciation.

--]=]

local function tokenize(text)

local tokens, vowel_info, prev_info = {}, {}, {}

local token_i = 1

local prev

for character in str_gmatch(decompose(text), UTF8_char) do

local curr_info = info[character]

-- Split vowels between tokens if not a diphthong.

if curr_info.vowel then

if prev and (not (curr_info.offglide and prev_info.vowel)

-- υυ → υ, υ

-- ιυ → ι, υ

or prev_info.offglide and curr_info == upsilon) then

token_i = token_i + 1

end

tokens[token_i] = (tokens[token_i] or "") .. character

table.insert(vowel_info, { index = token_i })

elseif curr_info.diacritic then

tokens[token_i] = (tokens[token_i] or "") .. character

if prev_info.vowel or prev_info.diacritic then

if character == diaeresis then

-- Current token is vowel, vowel, possibly other diacritics,

-- and a diaeresis.

-- Split the current token into two:

-- the first letter, then the second letter plus any diacritics.

local previous_vowel, vowel_with_diaeresis = string.match(tokens[token_i], "^(" .. basic_Greek .. ")(" .. basic_Greek .. ".+)")

if previous_vowel then

tokens[token_i], tokens[token_i + 1] = previous_vowel, vowel_with_diaeresis

token_i = token_i + 1

end

elseif prev_info == rho then

if curr_info ~= breathing then

return string.format("The character %s cannot have the accent %s on it.", prev, "◌" .. character)

end

else

error("The character " .. quote(prev) .. " cannot have a diacritic on it.")

end

elseif curr_info == rho then

if prev and not (prev_info == breathing and info[string.match(tokens[token_i], "^" .. basic_Greek)] == rho) then

token_i = token_i + 1

end

tokens[token_i] = (tokens[token_i] or "") .. character

else

if prev then

token_i = token_i + 1

end

tokens[token_i] = (tokens[token_i] or "") .. character

end

prev = character

prev_info = curr_info

end

return tokens

end

function p.transliterate(text)

text = decompose(text)

--[[

if text == '῾' then

return 'h'

end

--]]

--[[

Replace semicolon or Greek question mark with regular question mark,

except after an ASCII alphanumeric character (to avoid converting

semicolons in HTML entities).

--]]

text = gsub(text, "([^A-Za-z0-9])[;" .. U(0x37E) .. "]", "%1?")

-- Handle the middle dot. It is equivalent to semicolon or colon, but semicolon is probably more common.

text = text:gsub("·", ";")

local tokens = tokenize(text)

--now read the tokens

local output = {}

for i, token in pairs(tokens) do

-- substitute each character in the token for its transliteration

local translit = gsub(mw.ustring.lower(token), '.', tt)

if token == 'γ' and is_velar[tokens[i + 1]] then

-- γ before a velar should be

translit = 'n'

elseif token == 'ρ' and tokens[i - 1] == 'ρ' then

-- ρ after ρ should be

translit = 'rh'

elseif find(token, '^[αΑ].*' .. subscript .. '$') then

-- add macron to ᾳ

translit = gsub(translit, '([aA])', '%1' .. macron)

end

if token:find(rough) then

if find(token, '[Ρρ]') then

translit = translit .. 'h'

else -- vowel

translit = 'h' .. translit

end

-- Remove macron from a vowel that has a circumflex.

if find(translit, macron_circumflex) then

translit = translit:gsub(macron, )

end

-- Capitalize first character of transliteration.

if token ~= lower(token) then

translit = gsub(translit, "^.", upper)

end

table.insert(output, translit)

end

return table.concat(output)

end

function p.translit(frame)

local args = frame:getParent().args

local text = frame.args[1] or args[1]

local transliteration = p.transliterate(text)

return '' .. transliteration .. ''

end

function p.bare_translit(frame)

return p.transliterate(frame.args[1] or frame:getParent().args[1])

end

return p

随便看

开放百科全书收录14589846条英语、德语、日语等多语种百科知识，基本涵盖了大多数领域的百科知识，是一部内容自由、开放的电子版国际百科全书。