“Module:Sandbox/Erutuon”的意思、由来-开放百科全书

词条

Module:Sandbox/Erutuon

释义

local p = {}

local Unicode_data = require "Module:Unicode data/sandbox"

local fun = require "Module:fun"

local m_table = require "Module:TableTools"

local function errorf(level, ...)

if type(level) == "number" then

return error(string.format(...), level + 1)

else -- level is actually the format string.

return error(string.format(level, ...), 2)

end

function p.search_for_language_codes(frame)

local page_name = frame.args[1] or "English language"

local success, title_object = pcall(mw.title.new, page_name)

if not (success and title_object) then

mw.logf("Could not make title object for '%s'.", page_name)

return

end

local content = title_object:getContent()

local language_codes = {}

for lang_template in content:gmatch "{{lang[^}]+" do

local template_name = lang_template:match("{{([^|}]+)")

local language_code

if template_name == "lang" then

language_code = lang_template:match "{{lang|([^|}]+)"

elseif template_name:find "^lang-" then

language_code = lang_template:match "{{lang-([^|}]+)"

end

if language_code then

language_codes[language_code] = true

end

return table.concat(m_table.keysToList(language_codes), ", ")

end

local parsed_subtags_mt = {

__index = {

-- "error" is the error message.

-- "index" is the ordinal of the subtag in which the error was found.

throw = function (self, error, index)

self.error = self.error_messages[error]

self.invalid = table.concat(self.input, "-", index)

return self:remove_unnecessary_fields()

end,

remove_unnecessary_fields = function (self)

-- Only useful internally.

self.input = nil

self:pretty_print()

p.validate_lang_tag(self)

return self

end,

-- Regularize capitalization of language subtags:

-- ZH-LATN -> zh-Latn, FR-ca -> fr-CA

pretty_print = function (self)

for key, func in pairs(self.print_funcs) do

if self[key] then

self[key] = func(self[key])

end

return self

end,

-- Re-create the original tag from the parsed subtags.

get_tag = function (self)

if self.tag then return self.tag end

local tag = {}

for _, subtag_name in ipairs(self.subtag_order) do

if subtag_name == "private_use" then

table.insert(tag, "x")

end

if type(self[subtag_name]) == "table" then

for _, subtag in ipairs(self[subtag_name]) do

table.insert(tag, subtag)

end

else

table.insert(tag, self[subtag_name])

end

tag = table.concat(tag, "-")

self.tag = tag -- Cache the result.

return tag

end,

subtag_order = {

"language", "script", "region", "variant", "private_use"

error_messages = {

invalid_characters = "invalid characters",

no_language = "no language subtag",

invalid_subtag = "invalid subtag",

invalid_private_use = "length of private-use subtag out of range",

empty_private_use = "empty private-use subtag",

}

local function initial_caps_helper(initial, rest)

return string.upper(initial) .. string.lower(rest)

end

local function lower_or_map_lower(str)

if type(str) == "table" then

return fun.map(string.lower, str)

else

return string.lower(str)

end

parsed_subtags_mt.__index.print_funcs = {

language = string.lower,

script = function (script_code)

return (string.gsub(script_code, "^(%a)(%a%a%a)$", initial_caps_helper))

end,

region = string.upper,

variant = lower_or_map_lower,

private_use = lower_or_map_lower,

}

setmetatable(parsed_subtags_mt, {

__call = function (self, input)

return setmetatable({ input = input }, self)

end

})

-- An array of patterns for each subtag, and a "type" field for the name-- of the subtag.-- The patterns are checked in order, and any of the subtags can be skipped.-- So, for example, the "language" subtag must precede the "script"-- subtag, but a tag may contain a "language" subtag, no "script" subtag-- and then a "region" subtag.-- If the full list of subtags has been iterated over, the remaining subtags-- must match the pattern for a private-use subtag, or the tag is invalid.

local subtag_info = { -- can be put in data module

{ "%a%a%a?", "1%a+", type = "language" }, -- ll or lll; special case

-- include extlang?

{ "%a%a%a%a", type = "script" }, -- Ssss

{ "%a%a", "%d%d%d", type = "region" }, -- rr, DDD

{

"%d%d%d%d", -- 4 digits

"%w%w%w%w%w%w?%w?%w?", -- 5-8 alnum characters

type = "variant",

repeatable = true, -- There can be multiple variants.

}

-- A previous draft, in Lang/sandbox:-- https://en.wikipedia.org/w/index.php?oldid=812819217-- Based on https://www.w3.org/International/articles/language-tags/.-- Parse a language tag.-- Returns nil if tag is not a string or empty.-- Else returns a table with a map of subtag type to subtag for all subtags that-- were parsed.-- If there was an error, returns an "error" field with a description of the-- error, and an "invalid" field with the suffix of the tag starting at the-- index where the error occurred.-- Does not recognize "extension" tags, such as those introduced by "u", as they-- are not needed on Wikipedia. Does not recognize "grandfathered" tags.-- Does not recognize extended language subtags, such as "zh-yue".-- https://www.rfc-editor.org/rfc/rfc6067.txt, https://tools.ietf.org/html/bcp47-- Only checks that the syntax is correct, not that the values are valid. For-- instance, will accept non-existent language codes, like "zz".

function p.parse_IETF(tag)

if type(tag) ~= "string" or tag == "" then

return nil

end

-- This may contain the special fields "invalid", "error".

-- "error" indicates why the

-- tag is invalid (if applicable).

-- All other fields are subtags, and they appear in the tag in the following

-- order:

-- "language", "script", "region", "variant", "private_use", "invalid"

-- All these subtags can be strings or nil, while "variant" can also be an

-- array of strings if more than one variant subtag was found.

-- "invalid" is the portion of the tag after the last valid subtag (minus a

-- hyphen).

local segments = mw.text.split(tag, "-")

local parsed_subtags = parsed_subtags_mt(segments)

-- Language tags probably only contain ASCII alphabetic and numerical

-- characters and hyphen-minus.

if not tag:find "^[A-Za-z0-9-]+$" then

return parsed_subtags:throw(

"invalid_characters",

fun.indexOf(

function (tag)

return tag:find "[^A-Za-z0-9-]"

end,

segments))

end

local subtag_i = 1 -- Index of current item in subtag_info.

local segment_i = 1 -- Index of current segment.

while segments[segment_i] and subtag_info[subtag_i] do

local segment = segments[segment_i]

local subtag_type

while not subtag_type and subtag_info[subtag_i] do

-- Check each pattern for the subtag type at "subtag_i" in "subtag_info".

local cur_subtag = subtag_info[subtag_i]

for _, pattern in ipairs(cur_subtag) do

if segment:find("^" .. pattern .. "$") then

subtag_type = cur_subtag.type

-- There can be multiple "variant" subtags (and "extension"

-- subtags, if those are added).

if not cur_subtag.repeatable then

subtag_i = subtag_i + 1

end

break

end

if not subtag_type then -- No match; try next subtag.

subtag_i = subtag_i + 1

end

-- If language subtag has not been found, or the current segment has not

-- been matched as a subtag, break the loop and check for

-- a private-use subtag.

if segment_i == 1 and subtag_type ~= "language" or not subtag_type then

break

else

if parsed_subtags[subtag_type] then -- Create an array.

if type(parsed_subtags[subtag_type]) == "string" then

parsed_subtags[subtag_type] = { parsed_subtags[subtag_type] }

end -- else table

table.insert(parsed_subtags[subtag_type], segment)

else

parsed_subtags[subtag_type] = segment

end

last_matched_segment_i = segment_i

end

segment_i = segment_i + 1

end

if segments[segment_i] then -- More segments to scan?

-- Not all potential subtags were matched. Check for private-use subtags.

-- https://tools.ietf.org/html/bcp47#section-2.2.7

-- Private-use subtags consist of one or more sequences of 1 to 8

-- alphanumeric characters preceded by "x-".

-- Alphanumericity has already been checked.

-- A tag must start with either a language subtag or a private-use subtag.

-- If next segment is not "x", introducing a private-use subtag, there

-- is no private-use subtag.

if segments[segment_i] and segments[segment_i]:lower() ~= "x" then

if not parsed_subtags.language then

return parsed_subtags:throw("no_language", 1)

else

return parsed_subtags:throw("invalid_subtag",

segment_i)

end

elseif not segments[segment_i + 1] then

return parsed_subtags:throw("empty_private_use",

segment_i)

end

-- Check length of all segments after "x".

for i = segment_i + 1, #segments do

local length = #segments[i]

if not (1 <= length and length <= 8) then

return parsed_subtags

:throw("invalid_private_use", segment_i)

end

if not segments[last_matched_segment_i + 3] then -- There is only one private-use subtag.

parsed_subtags.private_use = segments[segment_i + 1]

else

parsed_subtags.private_use = {}

for i = segment_i + 1, #segments do

table.insert(parsed_subtags.private_use, segments[i])

end

return parsed_subtags:remove_unnecessary_fields()

end

local lang_name_table = mw.loadData "Module:Language/name/data"

local synonym_table = mw.loadData "Module:Lang/ISO 639 synonyms"

local lang_data = mw.loadData "Module:Lang/data"

function p.validate_lang_tag(parsed_subtags)

-- Already checked that the tag starts with a language subtag or a private-use subtag.

-- Script code is initially capitalized, region code is uppercase,

-- everything else is lowercase.

-- Check existence of language tag.

if parsed_subtags.language and

not (lang_data.override[parsed_subtags.language]

or lang_name_table.lang[parsed_subtags.language]) then

mw.log("Invalid language code", parsed_subtags.language, "in", parsed_subtags:get_tag())

end

-- Check existence of script tag.

if parsed_subtags.script then

local lower_script = parsed_subtags.script:lower()

if not lang_name_table.script[lower_script] then

mw.log("Invalid script code", parsed_subtags.script, "in", parsed_subtags:get_tag())

end

-- Check that script tag is not marked as superfluous (because the

-- it is considered the default one for the language).

if lang_name_table.suppressed[lower_script]

and parsed_subtags.language

and m_table.inArray(

lang_name_table.suppressed[lower_script],

parsed_subtags.language:lower()) then

mw.log(parsed_subtags.script, "is suppressed with",

parsed_subtags.language, "in", parsed_subtags:get_tag())

end

-- Check existence of region code..

if parsed_subtags.region and not lang_name_table.region[parsed_subtags.region:lower()] then

mw.log("Invalid region code", parsed_subtags.region, "in", parsed_subtags:get_tag())

end

-- Check that variant code is valid, and that it can validly be used with the

-- given combination of language, script, region, and variant.

-- Check for duplicate variant subtags?

if parsed_subtags.variant then

local lower_tag = parsed_subtags:get_tag():lower()

for _, variant in ipairs(type(parsed_subtags.variant) == "table"

and parsed_subtags.variant or { parsed_subtags.variant }) do

if not lang_name_table.variant[variant] then

mw.log("Invalid variant code", variant, "in", parsed_subtags:get_tag())

else

local prefix = parsed_subtags:get_tag():lower():match("^(.-)%-" .. variant)

-- Check that at least one of the prefixes is found at the

-- beginning of lower_tag.

if not fun.some(function (prefix)

return lower_tag:find(prefix, 1, true) == 1

end,

lang_name_table.variant[variant].prefixes) then

mw.log("Variant tag", variant, "does not belong with prefix",

prefix, "in", parsed_subtags:get_tag())

end

-- Check that the private-use subtag is actually used by Wikipedia.

if parsed_subtags.private_use and not lang_data.override[parsed_subtags.tag] then

mw.log("Invalid private-use subtag in", parsed_subtags:get_tag())

end

local output_mt = {}

function output_mt:insert(str)

self.n = self.n + 1

self[self.n] = str

end

-- also in Unicode data/documentation functions

function output_mt:insert_format(...)

self:insert(string.format(...))

end

output_mt.join = table.concat

output_mt.__index = output_mt

local function Output()

return setmetatable({ n = 0 }, output_mt)

end

function p.show(frame)

local output = Output()

for variant, data in m_table.sortedPairs(mw.loadData "Module:Language/data/iana variants") do

if data.prefixes[1] then

local tags = Output()

for _, prefix in ipairs(data.prefixes) do

tags:insert(prefix .. "-" .. variant)

end

output:insert(tags:join(", "))

end

return output:join("
")

end

function p.show_COinS(frame)

local ref = frame.args[1]

local tag = ref:match(']*class="Z3988"[^>]*>')

local data = tag:match('title="(.-)"')

local vals = {}

for item in mw.text.gsplit(data, "&") do

local key, value = item:match("(.-)=(.*)")

vals[key] = mw.uri.decode(value)

end

return ref .. "\\" .. table.concat(

require "Module:fun".mapIter(

function (value, key)

return ("%s: %s"):format(key, value)

end,

m_table.sortedPairs(

vals)),

", ")

end

return p

随便看

开放百科全书收录14589846条英语、德语、日语等多语种百科知识，基本涵盖了大多数领域的百科知识，是一部内容自由、开放的电子版国际百科全书。