Lompat ke isi

Modul:Text

Wikipedia Minangkabau - Lubuak aka tapian ilimu

Dokumentasi untuk modul ini dapat dibuat di Modul:Text/doc

local Text = { serial = "2024-06-05",  suite = "Text",  item = 29387871 } --[=[ Text utilities ]=] local Failsafe = Text local GlobalMod = Text local Patterns = { } local RangesLatin = false local SeekQuote = false    local foreignModule = function ( access, advanced, append, alt, alert )  -- Fetch global module  -- Precondition:  -- access -- string, with name of base module  -- advanced -- true, for require(); else mw.loadData()  -- append -- string, with subpage part, if any; or false  -- alt -- number, of wikidata item of root; or false  -- alert -- true, for throwing error on data problem  -- Postcondition:  -- Returns whatever, probably table  -- 2019-10-29  local storage = access  local finer = function ()  if append then  storage = string.format( "%s/%s",  storage,  append )  end  end  local fun, lucky, r, suited  if advanced then  fun = require  else  fun = mw.loadData  end  GlobalMod.globalModules = GlobalMod.globalModules or { }  suited = GlobalMod.globalModules[ access ]  if not suited then  finer()  lucky, r = pcall( fun, "Module:" .. storage )  end  if not lucky then  if not suited and  type( alt ) == "number" and  alt > 0 then  suited = string.format( "Q%d", alt )  suited = mw.wikibase.getSitelink( suited )  GlobalMod.globalModules[ access ] = suited or true  end  if type( suited ) == "string" then  storage = suited  finer()  lucky, r = pcall( fun, storage )  end  if not lucky and alert then  error( "Missing or invalid page: " .. storage, 0 )  end  end  return r end -- foreignModule()    local function factoryQuote()  -- Create quote definitions  if not Text.quoteLang then  local quoting = foreignModule( "Text",  false,  "quoting",  Text.item )  if type( quoting ) == "table" then  Text.quoteLang = quoting.langs  Text.quoteType = quoting.types  end  if type( Text.quoteLang ) ~= "table" then  Text.quoteLang = { }  end  if type( Text.quoteType ) ~= "table" then  Text.quoteType = { }  end  if type( Text.quoteLang.en ) ~= "string" then  Text.quoteLang.en = "ld"  end  if type( Text.quoteType[ Text.quoteLang.en ] ) ~= "table" then  Text.quoteType[ Text.quoteLang.en ] = { { 8220, 8221 },  { 8216, 8217 } }  end  end end -- factoryQuote()    local function fiatQuote( apply, alien, advance )  -- Quote text  -- Parameter:  -- apply -- string, with text  -- alien -- string, with language code  -- advance -- number, with level 1 or 2  local r = apply  local quotes, suite  factoryQuote()  if alien then  suite = mw.text.trim( alien )  if suite == "" then  suite = false  else  local s = Text.quoteLang[ suite ]  if s then  suite = s  else  local slang = suite:match( "^(%l+)-" )  if slang then  suite = Text.quoteLang[ slang ]  end  end  end  end  if not suite then  suite = Text.quoteLang.en  end  quotes = Text.quoteType[ suite ]  if quotes then  local space  if quotes[ 3 ] then  space = "&#160;"  else  space = ""  end  quotes = quotes[ advance ]  if quotes then  r = mw.ustring.format( "%s%s%s%s%s",  mw.ustring.char( quotes[ 1 ] ),  space,  apply,  space,  mw.ustring.char( quotes[ 2 ] ) )  end  else  mw.log( "fiatQuote() " .. suite )  end  return r end -- fiatQuote()    Text.char = function ( apply, again, accept )  -- Create string from codepoints  -- Parameter:  -- apply -- table (sequence) with numerical codepoints, or nil  -- again -- number of repetitions, or nil  -- accept -- true, if no error messages to be appended  -- Returns: string  local r  if type( apply ) == "table" then  local bad = { }  local codes = { }  local s  for k, v in pairs( apply ) do  s = type( v )  if s == "number" then  if v < 32 and v ~= 9 and v ~= 10 then  v = tostring( v )  else  v = math.floor( v )  s = false  end  elseif s ~= "string" then  v = tostring( v )  end  if s then  table.insert( bad, v )  else  table.insert( codes, v )  end  end -- for k, v  if #bad == 0 then  if #codes > 0 then  r = mw.ustring.char( unpack( codes ) )  if again then  if type( again ) == "number" then  local n = math.floor( again )  if n > 1 then  r = r:rep( n )  elseif n < 1 then  r = ""  end  else  s = "bad repetitions: " .. tostring( again )  end  end  end  else  s = "bad codepoints: " .. table.concat( bad, " " )  end  if s and not accept then  r = tostring( mw.html.create( "span" )  :addClass( "error" )  :wikitext( s ) )  end  end  return r or "" end -- Text.char()    Text.concatParams = function ( args, apply, adapt )  -- Concat list items into one string  -- Parameter:  -- args -- table (sequence) with numKey=string  -- apply -- string (optional); separator (default: "|")  -- adapt -- string (optional); format including "%s"  -- Returns: string  local collect = { }  for k, v in pairs( args ) do  if type( k ) == "number" then  v = mw.text.trim( v )  if v ~= "" then  if adapt then  v = mw.ustring.format( adapt, v )  end  table.insert( collect, v )  end  end  end -- for k, v  return table.concat( collect, apply or "|" ) end -- Text.concatParams()    Text.containsCJK = function ( analyse )  -- Is any CJK code within?  -- Parameter:  -- analyse -- string  -- Returns: true, if CJK detected  local r  if not Patterns.CJK then  Patterns.CJK = mw.ustring.char( 91,  0x3400, 45, 0x9FFF,  0x20000, 45, 0x2B81F,  93 )  end  if mw.ustring.find( analyse, Patterns.CJK ) then  r = true  else  r = false  end  return r end -- Text.containsCJK()    Text.getPlain = function ( adjust )  -- Remove wikisyntax from string, except templates  -- Parameter:  -- adjust -- string  -- Returns: string  local i = adjust:find( "<!--", 1, true )  local r = adjust  local j  while i do  j = r:find( "-->", i + 3, true )  if j then  r = r:sub( 1, i ) .. r:sub( j + 3 )  else  r = r:sub( 1, i )  end  i = r:find( "<!--", i, true )  end -- "<!--"  r = r:gsub( "(</?%l[^>]*>)", "" )  :gsub( "'''(.+)'''", "%1" )  :gsub( "''(.+)''", "%1" )  :gsub( "&nbsp;", " " )  return mw.text.unstrip( r ) end -- Text.getPlain()    Text.isLatinRange = function ( adjust )  -- Are characters expected to be latin or symbols within latin texts?  -- Precondition:  -- adjust -- string, or nil for initialization  -- Returns: true, if valid for latin only  local r  if not RangesLatin then  RangesLatin = { { 0x07, 0x02AF },  { 0x1D6B, 0x1D9A },  { 0x1E00, 0x1EFF },  { 0x2002, 0x203A },  { 0x2190, 0x23BD } }  end  if not Patterns.Latin then  local range  Patterns.Latin = "^["  for i = 1, #RangesLatin do  range = RangesLatin[ i ]  Patterns.Latin = Patterns.Latin ..  mw.ustring.char( range[ 1 ], 45, range[ 2 ] )  end -- for i  Patterns.Latin = Patterns.Latin .. "]*$"  end  if adjust then  if mw.ustring.match( adjust, Patterns.Latin ) then  r = true  else  r = false  end  end  return r end -- Text.isLatinRange()    Text.isQuote = function ( ask )  -- Is this character any quotation mark?  -- Parameter:  -- ask -- string, with single character  -- Returns: true, if ask is quotation mark  local r  if not SeekQuote then  SeekQuote = mw.ustring.char( 34, -- "  39, -- '  171, -- laquo  187, -- raquo  8216, -- lsquo  8217, -- rsquo  8218, -- sbquo  8220, -- ldquo  8221, -- rdquo  8222, -- bdquo  8249, -- lsaquo  8250, -- rsaquo  0x300C, -- CJK  0x300D, -- CJK  0x300E, -- CJK  0x300F ) -- CJK  end  if ask == "" then  r = false  elseif mw.ustring.find( SeekQuote, ask, 1, true ) then  r = true  else  r = false  end  return r end -- Text.isQuote()    Text.listToText = function ( args, adapt )  -- Format list items similar to mw.text.listToText()  -- Parameter:  -- args -- table (sequence) with numKey=string  -- adapt -- string (optional); format including "%s"  -- Returns: string  local collect = { }  for k, v in pairs( args ) do  if type( k ) == "number" then  v = mw.text.trim( v )  if v ~= "" then  if adapt then  v = mw.ustring.format( adapt, v )  end  table.insert( collect, v )  end  end  end -- for k, v  return mw.text.listToText( collect ) end -- Text.listToText()    Text.quote = function ( apply, alien, advance )  -- Quote text  -- Parameter:  -- apply -- string, with text  -- alien -- string, with language code, or nil  -- advance -- number, with level 1 or 2, or nil  -- Returns: quoted string  local mode, slang  if type( alien ) == "string" then  slang = mw.text.trim( alien ):lower()  else  local pageLang = mw.title.getCurrentTitle().pageLanguage  if pageLang then  slang = pageLang.code  else  slang = mw.language.getContentLanguage():getCode()  end  end  if advance == 2 then  mode = 2  else  mode = 1  end  return fiatQuote( mw.text.trim( apply ), slang, mode ) end -- Text.quote()    Text.quoteUnquoted = function ( apply, alien, advance )  -- Quote text, if not yet quoted and not empty  -- Parameter:  -- apply -- string, with text  -- alien -- string, with language code, or nil  -- advance -- number, with level 1 or 2, or nil  -- Returns: string; possibly quoted  local r = mw.text.trim( apply )  local s = mw.ustring.sub( r, 1, 1 )  if s ~= "" and not Text.isQuote( s, advance ) then  s = mw.ustring.sub( r, -1, 1 )  if not Text.isQuote( s ) then  r = Text.quote( r, alien, advance )  end  end  return r end -- Text.quoteUnquoted()    Text.removeDiacritics = function ( adjust )  -- Remove all diacritics  -- Parameter:  -- adjust -- string  -- Returns: string; all latin letters should be ASCII  -- or basic greek or cyrillic or symbols etc.  local cleanup, decomposed  if not Patterns.Combined then  Patterns.Combined = mw.ustring.char( 91,  0x0300, 45, 0x036F,  0x1AB0, 45, 0x1AFF,  0x1DC0, 45, 0x1DFF,  0xFE20, 45, 0xFE2F,  93 )  end  decomposed = mw.ustring.toNFD( adjust )  cleanup = mw.ustring.gsub( decomposed, Patterns.Combined, "" )  return mw.ustring.toNFC( cleanup ) end -- Text.removeDiacritics()    Text.removeWhitespace = function ( adjust )  -- Remove all whitespace, or replace with ASCII space  -- Parameter:  -- adjust -- string  -- Returns: string; modified  local r = mw.text.decode( adjust )  if r:find( "&", 1, true ) then  r = r:gsub( "&lrm;", "" )  :gsub( "&rlm;", "" )  :gsub( "&zwj;", "" )  :gsub( "&zwnj;", "" )  :gsub( "&thinsp;", " " )  :gsub( "&ensp;", " " )  :gsub( "&emsp;", " " )  end  if not Patterns.Whitespace then  Patterns.Whitespace = mw.ustring.char( 0x00AD,  91, 0x200C, 45, 0x200F, 93,  91, 0x2028, 45, 0x202E, 93,  0x205F,  0x2060 )  Patterns.Space = mw.ustring.char( 0x00A0,  0x1680,  91, 0x2000, 45, 0x200A, 93,  0x202F,  0x205F,  0x3000,  0x303F )  end  r = mw.ustring.gsub( r, Patterns.Whitespace, "" )  r = mw.ustring.gsub( r, Patterns.Space, " " )  return mw.text.trim( r ) end -- Text.removeWhitespace()    Text.sentenceTerminated = function ( analyse )  -- Is string terminated by dot, question or exclamation mark?  -- Quotation, link termination and so on granted  -- Parameter:  -- analyse -- string  -- Returns: true, if sentence terminated  local r = mw.text.trim( analyse )  local lt = r:find( "<", 1, true )  if not Patterns.Terminated then  Patterns.Terminated = mw.ustring.char( 91,  0x3002,  0xFF01,  0xFF0E,  0xFF1F )  .. "!%.%?…][\"'%]‹›«»‘’“”]*$"  end  if lt then  r = r:gsub( "</span>", "" )  end  if mw.ustring.find( r, Patterns.Terminated ) then  r = true  elseif lt then  local s = "<bdi[^>]* dir=\"([lr]t[rl])\".+</bdi></bdo>"  s = r:match( s )  if s then  if mw.language.getContentLanguage():isRTL() then  r = ( s == "ltr" )  else  r = ( s == "rtl" )  end  else  r = false  end  else  r = false  end  return r end -- Text.sentenceTerminated()    Text.tokenWords = function ( adjust )  -- Split text in words of digits or letters  -- Precondition:  -- adjust -- string  -- Returns: string with  local r = mw.uri.decode( adjust, "WIKI" )  if r:find( "&", 1, true ) then  r = mw.text.decode( r )  end  r = Text.removeWhitespace( r )  r = mw.ustring.gsub( r, "[%p%s]+", " " )  return r end -- Text.tokenWords()    Text.ucfirstAll = function ( adjust )  -- Capitalize all words  -- Precondition:  -- adjust -- string  -- Returns: string with all first letters in upper case  local r = " " .. adjust  local i = 1  local c, j, m  if adjust:find( "&" ) then  r = r:gsub( "&amp;", "&#38;" )  :gsub( "&lt;", "&#60;" )  :gsub( "&gt;", "&#62;" )  :gsub( "&nbsp;", "&#160;" )  :gsub( "&thinsp;", "&#8201;" )  :gsub( "&zwnj;", "&#8204;" )  :gsub( "&zwj;", "&#8205;" )  :gsub( "&lrm;", "&#8206;" )  :gsub( "&rlm;", "&#8207;" )  m = true  end  while i do  i = mw.ustring.find( r, "%W%l", i )  if i then  j = i + 1  c = mw.ustring.upper( mw.ustring.sub( r, j, j ) )  r = string.format( "%s%s%s",  mw.ustring.sub( r, 1, i ),  c,  mw.ustring.sub( r, i + 2 ) )  i = j  end  end -- while i  r = r:sub( 2 )  if m then  r = r:gsub( "&#38;", "&amp;" )  :gsub( "&#60;", "&lt;" )  :gsub( "&#62;", "&gt;" )  :gsub( "&#160;", "&nbsp;" )  :gsub( "&#8201;", "&thinsp;" )  :gsub( "&#8204;", "&zwnj;" )  :gsub( "&#8205;", "&zwj;" )  :gsub( "&#8206;", "&lrm;" )  :gsub( "&#8207;", "&rlm;" )  :gsub( "&#X(%x+);", "&#x%1;" )  end  return r end -- Text.ucfirstAll()    Text.uprightNonlatin = function ( adjust )  -- Ensure non-italics for non-latin text parts  -- One single greek letter might be granted  -- Precondition:  -- adjust -- string  -- Returns: string with non-latin parts enclosed in <span>  local r  Text.isLatinRange()  if mw.ustring.match( adjust, Patterns.Latin ) then  -- latin only, horizontal dashes, quotes  r = adjust  else  local c  local e = mw.html.create( "span" )  local j = false  local k = 1  local m = false  local n = mw.ustring.len( adjust )  local p  local flat = function ( a )  -- isLatin  local range  for i = 1, #RangesLatin do  range = RangesLatin[ i ]  if a >= range[ 1 ] and a <= range[ 2 ] then  return true  end  end -- for i  end -- flat()  local focus = function ( a )  -- char is not ambivalent  local r = ( a > 64 )  if r then  r = ( a < 8192 or a > 8212 )  else  r = ( a == 38 or a == 60 ) -- '&' '<'  end  return r  end -- focus()  local form = function ( a )  return string.format( p,  r,  mw.ustring.sub( adjust, k, j - 1 ),  mw.ustring.sub( adjust, j, a ) )  end -- form()  e:attr( "dir", "auto" )  :css( "font-style", "normal" )  :wikitext( "%s" )  p = "%s%s" .. tostring( e )  r = ""  for i = 1, n do  c = mw.ustring.codepoint( adjust, i, i )  if focus( c ) then  if flat( c ) then  if j then  if m then  if i == m then  -- single greek letter.  j = false  end  m = false  end  if j then  local nx = i - 1  local s = ""  for ix = nx, 1, -1 do  c = mw.ustring.sub( adjust, ix, ix )  if c == " " or c == "(" then  nx = nx - 1  s = c .. s  else  break -- for ix  end  end -- for ix  r = form( nx ) .. s  j = false  k = i  end  end  elseif not j then  j = i  if c >= 880 and c <= 1023 then  -- single greek letter?  m = i + 1  else  m = false  end  end  elseif m then  m = m + 1  end  end -- for i  if j and ( not m or m < n ) then  r = form( n )  else  r = r .. mw.ustring.sub( adjust, k )  end  end  return r end -- Text.uprightNonlatin()    Failsafe.failsafe = function ( atleast )  -- Retrieve versioning and check for compliance  -- Precondition:  -- atleast -- string, with required version  -- or wikidata|item|~|@ or false  -- Postcondition:  -- Returns string -- with queried version/item, also if problem  -- false -- if appropriate  -- 2024-03-01  local since = atleast  local last = ( since == "~" )  local linked = ( since == "@" )  local link = ( since == "item" )  local r  if last or link or linked or since == "wikidata" then  local item = Failsafe.item  since = false  if type( item ) == "number" and item > 0 then  local suited = string.format( "Q%d", item )  if link then  r = suited  else  local entity = mw.wikibase.getEntity( suited )  if type( entity ) == "table" then  local seek = Failsafe.serialProperty or "P348"  local vsn = entity:formatPropertyValues( seek )  if type( vsn ) == "table" and  type( vsn.value ) == "string" and  vsn.value ~= "" then  if last and vsn.value == Failsafe.serial then  r = false  elseif linked then  if mw.title.getCurrentTitle().prefixedText  == mw.wikibase.getSitelink( suited ) then  r = false  else  r = suited  end  else  r = vsn.value  end  end  end  end  elseif link then  r = false  end  end  if type( r ) == "nil" then  if not since or since <= Failsafe.serial then  r = Failsafe.serial  else  r = false  end  end  return r end -- Failsafe.failsafe()    Text.test = function ( about )  local r  if about == "quote" then  factoryQuote()  r = { QuoteLang = Text.quoteLang,  QuoteType = Text.quoteType }  end  return r end -- Text.test()    -- Export local p = { }  function p.char( frame )  local params = frame:getParent().args  local story = params[ 1 ]  local codes, lenient, multiple  if not story then  params = frame.args  story = params[ 1 ]  end  if story then  local items = mw.text.split( story, "%s+" )  if #items > 0 then  local j  lenient = ( params.errors == "0" )  codes = { }  multiple = tonumber( params[ "*" ] )  for k, v in pairs( items ) do  if v:sub( 1, 1 ) == "x" then  j = tonumber( "0" .. v )  elseif v == "" then  v = false  else  j = tonumber( v )  end  if v then  table.insert( codes, j or v )  end  end -- for k, v  end  end  return Text.char( codes, multiple, lenient ) end  function p.concatParams( frame )  local args  local template = frame.args.template  if type( template ) == "string" then  template = mw.text.trim( template )  template = ( template == "1" )  end  if template then  args = frame:getParent().args  else  args = frame.args  end  return Text.concatParams( args,  frame.args.separator,  frame.args.format ) end  function p.containsCJK( frame )  return Text.containsCJK( frame.args[ 1 ] or "" ) and "1" or "" end  function p.getPlain( frame )  return Text.getPlain( frame.args[ 1 ] or "" ) end  function p.isLatinRange( frame )  return Text.isLatinRange( frame.args[ 1 ] or "" ) and "1" or "" end  function p.isQuote( frame )  return Text.isQuote( frame.args[ 1 ] or "" ) and "1" or "" end    function p.listToFormat(frame)  local lists = {}  local pformat = frame.args["format"]  local sep = frame.args["sep"] or ";"   -- Parameter parsen: Listen  for k, v in pairs(frame.args) do  local knum = tonumber(k)  if knum then lists[knum] = v end  end   -- Listen splitten  local maxListLen = 0  for i = 1, #lists do  lists[i] = mw.text.split(lists[i], sep)  if #lists[i] > maxListLen then maxListLen = #lists[i] end  end   -- Ergebnisstring generieren  local result = ""  local result_line = ""  for i = 1, maxListLen do  result_line = pformat  for j = 1, #lists do  result_line = mw.ustring.gsub( result_line,  "%%s",  lists[ j ][ i ],  1 )  end  result = result .. result_line  end   return result end    function p.listToText( frame )  local args  local template = frame.args.template  if type( template ) == "string" then  template = mw.text.trim( template )  template = ( template == "1" )  end  if template then  args = frame:getParent().args  else  args = frame.args  end  return Text.listToText( args, frame.args.format ) end    function p.quote( frame )  local slang = frame.args[2]  if type( slang ) == "string" then  slang = mw.text.trim( slang )  if slang == "" then  slang = false  end  end  return Text.quote( frame.args[ 1 ] or "",  slang,  tonumber( frame.args[3] ) ) end    function p.quoteUnquoted( frame )  local slang = frame.args[2]  if type( slang ) == "string" then  slang = mw.text.trim( slang )  if slang == "" then  slang = false  end  end  return Text.quoteUnquoted( frame.args[ 1 ] or "",  slang,  tonumber( frame.args[3] ) ) end    function p.removeDiacritics( frame )  return Text.removeDiacritics( frame.args[ 1 ] or "" ) end  function p.sentenceTerminated( frame )  return Text.sentenceTerminated( frame.args[ 1 ] or "" ) and "1" or "" end  function p.tokenWords( frame )  return Text.tokenWords( frame.args[ 1 ] or "" ) end  function p.ucfirstAll( frame )  return Text.ucfirstAll( frame.args[ 1 ] or "" ) end  function p.unstrip( frame )  return mw.text.trim( mw.text.unstrip( frame.args[ 1 ] or "" ) ) end  function p.uprightNonlatin( frame )  return Text.uprightNonlatin( frame.args[ 1 ] or "" ) end    function p.zip(frame)  local lists = {}  local seps = {}  local defaultsep = frame.args["sep"] or ""  local innersep = frame.args["isep"] or ""  local outersep = frame.args["osep"] or ""   -- Parameter parsen  for k, v in pairs(frame.args) do  local knum = tonumber(k)  if knum then lists[knum] = v else  if string.sub(k, 1, 3) == "sep" then  local sepnum = tonumber(string.sub(k, 4))  if sepnum then seps[sepnum] = v end  end  end  end  -- sofern keine expliziten Separatoren angegeben sind, den Standardseparator verwenden  for i = 1, math.max(#seps, #lists) do  if not seps[i] then seps[i] = defaultsep end  end   -- Listen splitten  local maxListLen = 0  for i = 1, #lists do  lists[i] = mw.text.split(lists[i], seps[i])  if #lists[i] > maxListLen then maxListLen = #lists[i] end  end   local result = ""  for i = 1, maxListLen do  if i ~= 1 then result = result .. outersep end  for j = 1, #lists do  if j ~= 1 then result = result .. innersep end  result = result .. (lists[j][i] or "")  end  end  return result end    p.failsafe = function ( frame )  -- Versioning interface  local s = type( frame )  local since  if s == "table" then  since = frame.args[ 1 ]  elseif s == "string" then  since = frame  end  if since then  since = mw.text.trim( since )  if since == "" then  since = false  end  end  return Failsafe.failsafe( since ) or "" end -- p.failsafe()    p.Text = function ()  return Text end -- p.Text  setmetatable( p, { __call = function ( func, ... )  setmetatable( p, nil )  return Failsafe  end } )  return p