请输入您要查询的百科知识:

 

词条 Module:User:Awesomemeeos/farsi
释义

local export = {}

local U = mw.ustring.char

local match = mw.ustring.match

local gsub = mw.ustring.gsub

local fatHatan = U(0x64B)

local fathe = U(0x64E) -- also zabar

local zamme = U(0x64F) -- also zir

local kasre = U(0x650) -- also pish

local tashdid = U(0x651) -- also called shadda

local jazm = U(0x652) -- also sokun

local supAlef = U(0x670)

local mapping = {

["ا"] = "â", ["ب"] = "b", ["پ"] = "p", ["ت"] = "t", ["ث"] = "s", ["ج"] = "j", ["چ"] = "č", ["ح"] = "h", ["خ"] = "x",

["د"] = "d", ["ذ"] = "z", ["ر"] = "r", ["ز"] = "z", ["ژ"] = "ž", ["س"] = "s", ["ش"] = "š", ["ص"] = "s", ["ض"] = "z",

["ط"] = "t", ["ظ"] = "z", ["غ"] = "ğ", ["ف"] = "f", ["ق"] = "q", ["ک"] = "k", ["گ"] = "g", ["ل"] = "l", ["م"] = "m",

["ن"] = "n", ["و"] = "u", ["ه"] = "h", ["ی"] = "i", ["آ"] = "â",

-- displaying on separate lines as the viewing becomes weird on these combinations

["ع"] = "’",

["ء"] = "’",

["ئ"] = "’",

["ؤ"] = "’",

["أ"] = "’",

-- diacritics

[fathe] = "a",

[kasre] = "e",

[zamme] = "o",

[jazm] = "", -- no vowel

[U(0x200C)] = "-", -- ZWNJ (zero-width non-joiner)

[fatHatan] = "n",

-- ligatures

["ﻻ"] = "lâ",

["ﷲ"] = "llâh",

-- kashida

["ـ"] = "", -- kashida, no sound

-- numerals

["۱"] = "1", ["۲"] = "2", ["۳"] = "3", ["۴"] = "4", ["۵"] = "5",

["۶"] = "6", ["۷"] = "7", ["۸"] = "8", ["۹"] = "9", ["۰"] = "0",

-- normal arabic variants to numerals

["١"] = "1", ["٢"] = "2", ["٣"] = "3", ["٤"] = "4", ["٥"] = "5",

["٦"] = "6", ["٧"] = "7", ["٨"] = "8", ["٩"] = "9", ["٠"] = "0",

-- punctuation (leave on separate lines)

["؟"] = "?", -- question mark

["،"] = ",", -- comma

["؛"] = ";", -- semicolon

["«"] = "“", -- quotation mark

["»"] = "”", -- quotation mark

["٪"] = "%", -- percent

["؉"] = "‰", -- per mille

["٫"] = ".", -- decimals

["٬"] = ",", -- thousand

["ۀ"] = "-ye" -- he ye (in ezâfe)

}

function export.result(text)

if type(text) == "table" then

text = text.args[1]

end

text = " " .. text .. " " -- make all word borders have a space

text = gsub(text, "([" .. fathe .. kasre .. zamme .. "])" .. tashdid, tashdid .. "%1") -- swapping tashdid with diacritics

text = gsub(text, "(.)" .. tashdid, "%1%1") -- implementing tashdid

text = gsub(text, "ا".. fatHatan, fathe .. fatHatan)

text = gsub(text, "ى".. supAlef, "ا")

text = gsub(text, "([" .. fathe .. kasre .. "])ه ", "%1 ")

text = gsub(text, "([^" .. fathe .. kasre .. zamme .. "])ه ", "%1" .. kasre .. " ")

text = gsub(text, kasre .. "ی", "ی")

text = gsub(text, zamme .. "و", "و")

text = gsub(text, " او", " u")

text = gsub(text, " ای", " i")

text = gsub(text, " ا([" .. fathe .. kasre .. zamme .. "])", " %1")

text = gsub(text, "و([اوی" .. fathe .. kasre .. zamme .. "])", "v%1")

text = gsub(text, "ی([اوی" .. fathe .. kasre .. zamme .. "])", "y%1")

-- text = gsub(text, fathe .. "و", fathe .. "u")

text = gsub(text, fathe .. "ی", kasre .. "ی")

text = gsub(text, "([اوی" .. kasre .. zamme .. "])ی", "%1y")

text = gsub(text, ".", mapping)

text = gsub(text, "([aâeiu])u ", "%1v ") -- I need better algorithms to distinguish و and ی between a consonant and vowel.

return text

end

return export

随便看

 

开放百科全书收录14589846条英语、德语、日语等多语种百科知识,基本涵盖了大多数领域的百科知识,是一部内容自由、开放的电子版国际百科全书。

 

Copyright © 2023 OENC.NET All Rights Reserved
京ICP备2021023879号 更新时间:2024/11/10 17:10:47