Modul:URLutil: Unterschied zwischen den Versionen
Aus WiiDatabase Wiki
Zur Navigation springenZur Suche springen
ICON (Diskussion | Beiträge) (Die Seite wurde neu angelegt: „local URLutil = { suite = "URLutil", serial = "2018-01-24", item = 10859193 } --[=[ Utilities for URL etc. on www. * get…“) |
ICON (Diskussion | Beiträge) K (1 Version importiert) |
||
(Eine dazwischenliegende Version von einem anderen Benutzer wird nicht angezeigt) | |||
Zeile 1: | Zeile 1: | ||
local URLutil = { suite = "URLutil", | local URLutil = { suite = "URLutil", | ||
serial = " | serial = "2022-04-05", | ||
item = 10859193 } | item = 10859193 } | ||
--[=[ | --[=[ | ||
Utilities for URL etc. on www. | Utilities for URL etc. on www. | ||
* decode() | |||
* encode() | |||
* getAuthority() | * getAuthority() | ||
* getFragment() | * getFragment() | ||
Zeile 24: | Zeile 26: | ||
* isDomainInt() | * isDomainInt() | ||
* isHost() | * isHost() | ||
* isHostPathResource() | |||
* isIP() | * isIP() | ||
* isIPlocal() | * isIPlocal() | ||
Zeile 37: | Zeile 40: | ||
* isWebURL() | * isWebURL() | ||
* wikiEscapeURL() | * wikiEscapeURL() | ||
* failsafe() | |||
Only [[dotted decimal]] notation for IPv4 expected. | Only [[dotted decimal]] notation for IPv4 expected. | ||
Does not support dotted hexadecimal, dotted octal, or single-number formats. | Does not support dotted hexadecimal, dotted octal, or single-number formats. | ||
IPv6 URL (bracketed) not yet implemented; might need Wikintax escaping anyway. | IPv6 URL (bracketed) not yet implemented; might need Wikintax escaping anyway. | ||
]=] | ]=] | ||
local Failsafe = URLutil | |||
Zeile 46: | Zeile 51: | ||
local decodeComponentProtect = { F = "\"#%<>[\]^`{|}", | local decodeComponentProtect = { F = "\"#%<>[\]^`{|}", | ||
P = "\"#%<>[\]^`{|}/?", | P = "\"#%<>[\]^`{|}/?", | ||
Q = "\"#%<>[\]^`{|}&=+;", | Q = "\"#%<>[\]^`{|}&=+;,", | ||
X = "\"#%<>[\]^`{|}&=+;/?" } | X = "\"#%<>[\]^`{|}&=+;,/?" } | ||
Zeile 123: | Zeile 128: | ||
end | end | ||
n = ( ask:byte( j, j ) - 48 ) * 16 + n | n = ( ask:byte( j, j ) - 48 ) * 16 + n | ||
if decodeComponentEscape( averse, n ) then | if n == 39 and | ||
ask:sub( i + 3, i + 5 ) == "%27" then | |||
j = i + 6 | |||
while ( ask:sub( j, j + 2 ) == "%27" ) do | |||
j = j + 3 | |||
end -- while "%27" | |||
elseif decodeComponentEscape( averse, n ) then | |||
if m then | if m then | ||
ask = string.format( "%s%c%s", | ask = string.format( "%s%c%s", | ||
Zeile 189: | Zeile 200: | ||
return r | return r | ||
end -- getHash() | end -- getHash() | ||
URLutil.decode = function ( url, enctype ) | |||
local r, s | |||
if type( enctype ) == "string" then | |||
s = mw.text.trim( enctype ) | |||
if s == "" then | |||
s = false | |||
else | |||
s = s:upper() | |||
end | |||
end | |||
r = mw.text.encode( mw.uri.decode( url, s ) ) | |||
if r:find( "[%[|%]]" ) then | |||
local k | |||
r, k = r:gsub( "%[", "[" ) | |||
:gsub( "|", "|" ) | |||
:gsub( "%]", "]" ) | |||
end | |||
return r | |||
end -- URLutil.decode() | |||
URLutil.encode = function ( url, enctype ) | |||
local k, r, s | |||
if type( enctype ) == "string" then | |||
s = mw.text.trim( enctype ) | |||
if s == "" then | |||
s = false | |||
else | |||
s = s:upper() | |||
end | |||
end | |||
r = mw.uri.encode( url, s ) | |||
k = r:byte( 1, 1 ) | |||
if -- k == 35 or -- # | |||
k == 42 or -- * | |||
k == 58 or -- : | |||
k == 59 then -- ; | |||
r = string.format( "%%%X%s", k, r:sub( 2 ) ) | |||
end | |||
if r:find( "[%[|%]]" ) then | |||
r, k = r:gsub( "%[", "%5B" ) | |||
:gsub( "|", "%7C" ) | |||
:gsub( "%]", "%5D" ) | |||
end | |||
return r | |||
end -- URLutil.encode() | |||
Zeile 601: | Zeile 662: | ||
local r | local r | ||
if type( s ) == "string" then | if type( s ) == "string" then | ||
local scan = "^%s*([%w%.%%_-] | local scan = "^%s*([%w%.%%_-]*%w)%.(%a[%w-]*%a)%s*$" | ||
local scope | local scope | ||
s, scope = mw.ustring.match( s, scan ) | s, scope = mw.ustring.match( s, scan ) | ||
Zeile 663: | Zeile 724: | ||
return URLutil.isDomain( s ) or URLutil.isIP( s ) | return URLutil.isDomain( s ) or URLutil.isIP( s ) | ||
end -- URLutil.isHost() | end -- URLutil.isHost() | ||
URLutil.isHostPathResource = function ( s ) | |||
local r = URLutil.isResourceURL( s ) | |||
if not r and s then | |||
r = URLutil.isResourceURL( "//" .. mw.text.trim( s ) ) | |||
end | |||
return r | |||
end -- URLutil.isHostPathResource() | |||
Zeile 842: | Zeile 913: | ||
local s = URLutil.getAuthority( url ) | local s = URLutil.getAuthority( url ) | ||
local pat = "[%[|%]" .. | local pat = "[%[|%]" .. | ||
mw.ustring.char( 8201, 45, 8207, | mw.ustring.char( 34, | ||
8201, 45, 8207, | |||
8234, 45, 8239, | 8234, 45, 8239, | ||
8288 ) | 8288 ) | ||
Zeile 875: | Zeile 947: | ||
URLutil.isWebURL = function ( url ) | URLutil.isWebURL = function ( url ) | ||
if URLutil.getScheme( url ) and URLutil.getAuthority( url ) then | if URLutil.getScheme( url ) and URLutil.getAuthority( url ) then | ||
if not url: | if not url:find( "%S%s+%S" ) and | ||
not url:find( "''", 1, true ) then | |||
return true | return true | ||
end | end | ||
Zeile 896: | Zeile 969: | ||
Failsafe.failsafe = function ( atleast ) | |||
-- Retrieve versioning and check for compliance | -- Retrieve versioning and check for compliance | ||
-- Precondition: | -- Precondition: | ||
-- | -- atleast -- string, with required version | ||
-- | -- or wikidata|item|~|@ or false | ||
-- Postcondition: | -- Postcondition: | ||
-- Returns string with | -- Returns string -- with queried version/item, also if problem | ||
local since = | -- false -- if appropriate | ||
-- 2020-08-17 | |||
local since = atleast | |||
local last = ( since == "~" ) | |||
local linked = ( since == "@" ) | |||
local link = ( since == "item" ) | |||
local r | local r | ||
if since == "wikidata" then | if last or link or linked or since == "wikidata" then | ||
local item = | local item = Failsafe.item | ||
since = false | since = false | ||
if type( item ) == "number" and item > 0 then | if type( item ) == "number" and item > 0 then | ||
local | local suited = string.format( "Q%d", item ) | ||
if link then | |||
r = suited | |||
else | |||
local entity = mw.wikibase.getEntity( suited ) | |||
if type( entity ) == "table" then | |||
local seek = Failsafe.serialProperty or "P348" | |||
local vsn = entity:formatPropertyValues( seek ) | |||
if type( vsn ) == "table" and | |||
type( vsn.value ) == "string" and | |||
vsn.value ~= "" then | |||
if last and vsn.value == Failsafe.serial then | |||
r = false | |||
elseif linked then | |||
if mw.title.getCurrentTitle().prefixedText | |||
== mw.wikibase.getSitelink( suited ) then | |||
r = false | |||
else | |||
r = suited | |||
end | |||
else | |||
r = vsn.value | |||
end | |||
end | |||
end | end | ||
end | end | ||
end | end | ||
end | end | ||
if | if type( r ) == "nil" then | ||
if not since or since <= | if not since or since <= Failsafe.serial then | ||
r = | r = Failsafe.serial | ||
else | else | ||
r = false | r = false | ||
Zeile 929: | Zeile 1.023: | ||
end | end | ||
return r | return r | ||
end -- | end -- Failsafe.failsafe() | ||
local function Template( frame, action, amount ) | |||
-- Run actual code from template transclusion | |||
-- Precondition: | |||
-- frame -- object | |||
-- action -- string, with function name | |||
-- amount -- number, of args if > 1 | |||
-- Postcondition: | |||
-- Return string or not | |||
local n = amount or 1 | |||
local v = { } | |||
local r, s | |||
for i = 1, n do | |||
s = frame.args[ i ] | |||
if s then | |||
s = mw.text.trim( s ) | |||
if s ~= "" then | |||
v[ i ] = s | |||
end | |||
end | |||
end -- for i | |||
if v[ 1 ] then | |||
r = URLutil[ action ]( v[ 1 ], v[ 2 ], v[ 3 ] ) | |||
end | |||
return r | |||
end -- Template() | |||
local p = {} | local p = {} | ||
function p.decode( frame ) | |||
return Template( frame, "decode", 2 ) or "" | |||
end | |||
function p.encode( frame ) | |||
return Template( frame, "encode", 2 ) or "" | |||
end | |||
function p.getAuthority( frame ) | function p.getAuthority( frame ) | ||
return | return Template( frame, "getAuthority" ) or "" | ||
end | end | ||
function p.getFragment( frame ) | function p.getFragment( frame ) | ||
local r = | local r = Template( frame, "getFragment", 2 ) | ||
if r then | if r then | ||
r = "#" .. r | r = "#" .. r | ||
Zeile 950: | Zeile 1.076: | ||
end | end | ||
function p.getHost( frame ) | function p.getHost( frame ) | ||
return | return Template( frame, "getHost" ) or "" | ||
end | end | ||
function p.getLocation( frame ) | function p.getLocation( frame ) | ||
return | return Template( frame, "getLocation" ) or "" | ||
end | end | ||
function p.getNormalized( frame ) | function p.getNormalized( frame ) | ||
return | return Template( frame, "getNormalized" ) or "" | ||
end | end | ||
function p.getPath( frame ) | function p.getPath( frame ) | ||
return | return Template( frame, "getPath" ) or "" | ||
end | end | ||
function p.getPort( frame ) | function p.getPort( frame ) | ||
return | return Template( frame, "getPort" ) or "" | ||
end | end | ||
function p.getQuery( frame ) | function p.getQuery( frame ) | ||
local r | local r = Template( frame, "getQuery", 3 ) | ||
local key = frame.args[ 2 ] | if r then | ||
local key = frame.args[ 2 ] | |||
if key then | |||
key = mw.text.trim( key ) | |||
if key == "" then | |||
key = nil | |||
end | |||
end | end | ||
if not key then | if not key then | ||
r = "?" .. r | r = "?" .. r | ||
Zeile 984: | Zeile 1.109: | ||
end | end | ||
function p.getRelativePath( frame ) | function p.getRelativePath( frame ) | ||
return | return Template( frame, "getRelativePath" ) or "" | ||
end | end | ||
function p.getScheme( frame ) | function p.getScheme( frame ) | ||
return | return Template( frame, "getScheme" ) or "" | ||
end | end | ||
function p.getSortkey( frame ) | function p.getSortkey( frame ) | ||
return | return Template( frame, "getSortkey" ) or "" | ||
end | end | ||
function p.getTLD( frame ) | function p.getTLD( frame ) | ||
return | return Template( frame, "getTLD" ) or "" | ||
end | end | ||
function p.getTop2domain( frame ) | function p.getTop2domain( frame ) | ||
return | return Template( frame, "getTop2domain" ) or "" | ||
end | end | ||
function p.getTop3domain( frame ) | function p.getTop3domain( frame ) | ||
return | return Template( frame, "getTop3domain" ) or "" | ||
end | end | ||
function p.isAuthority( frame ) | function p.isAuthority( frame ) | ||
return | return Template( frame, "isAuthority" ) and "1" or "" | ||
end | end | ||
function p.isDomain( frame ) | function p.isDomain( frame ) | ||
return | return Template( frame, "isDomain" ) and "1" or "" | ||
end | end | ||
function p.isDomainExample( frame ) | function p.isDomainExample( frame ) | ||
return | return Template( frame, "isDomainExample" ) and "1" or "" | ||
end | end | ||
function p.isDomainInt( frame ) | function p.isDomainInt( frame ) | ||
return | return Template( frame, "isDomainInt" ) and "1" or "" | ||
end | end | ||
function p.isHost( frame ) | function p.isHost( frame ) | ||
return | return Template( frame, "isHost" ) and "1" or "" | ||
end | |||
function p.isHostPathResource( frame ) | |||
return Template( frame, "isHostPathResource" ) and "1" or "" | |||
end | end | ||
function p.isIP( frame ) | function p.isIP( frame ) | ||
return | return Template( frame, "isIP" ) or "" | ||
end | end | ||
function p.isIPlocal( frame ) | function p.isIPlocal( frame ) | ||
return | return Template( frame, "isIPlocal" ) and "1" or "" | ||
end | end | ||
function p.isIPv4( frame ) | function p.isIPv4( frame ) | ||
return | return Template( frame, "isIPv4" ) and "1" or "" | ||
end | end | ||
function p.isIPv6( frame ) | function p.isIPv6( frame ) | ||
return | return Template( frame, "isIPv6" ) and "1" or "" | ||
end | end | ||
function p.isMailAddress( frame ) | function p.isMailAddress( frame ) | ||
return | return Template( frame, "isMailAddress" ) and "1" or "" | ||
end | end | ||
function p.isMailLink( frame ) | function p.isMailLink( frame ) | ||
return | return Template( frame, "isMailLink" ) and "1" or "" | ||
end | end | ||
function p.isProtocolDialog( frame ) | function p.isProtocolDialog( frame ) | ||
return | return Template( frame, "isProtocolDialog" ) and "1" or "" | ||
end | end | ||
function p.isProtocolWiki( frame ) | function p.isProtocolWiki( frame ) | ||
return | return Template( frame, "isProtocolWiki" ) and "1" or "" | ||
end | end | ||
function p.isResourceURL( frame ) | function p.isResourceURL( frame ) | ||
return | return Template( frame, "isResourceURL" ) and "1" or "" | ||
end | end | ||
function p.isSuspiciousURL( frame ) | function p.isSuspiciousURL( frame ) | ||
return | return Template( frame, "isSuspiciousURL" ) and "1" or "" | ||
end | end | ||
function p.isUnescapedURL( frame ) | function p.isUnescapedURL( frame ) | ||
return | return Template( frame, "isUnescapedURL", 2 ) and "1" or "" | ||
end | end | ||
function p.isWebURL( frame ) | function p.isWebURL( frame ) | ||
return | return Template( frame, "isWebURL" ) and "1" or "" | ||
end | end | ||
function p.wikiEscapeURL( frame ) | function p.wikiEscapeURL( frame ) | ||
return | return Template( frame, "wikiEscapeURL" ) | ||
end | end | ||
p.failsafe = function ( frame ) | p.failsafe = function ( frame ) | ||
Zeile 1.069: | Zeile 1.197: | ||
end | end | ||
end | end | ||
return | return Failsafe.failsafe( since ) or "" | ||
end | end | ||
function p.URLutil() | function p.URLutil() |
Aktuelle Version vom 30. September 2022, 20:11 Uhr
Die Dokumentation für dieses Modul kann unter Modul:URLutil/Doku erstellt werden
local URLutil = { suite = "URLutil",
serial = "2022-04-05",
item = 10859193 }
--[=[
Utilities for URL etc. on www.
* decode()
* encode()
* getAuthority()
* getFragment()
* getHost()
* getLocation()
* getNormalized()
* getPath()
* getPort()
* getQuery()
* getQueryTable()
* getRelativePath()
* getScheme()
* getSortkey()
* getTLD()
* getTop2domain()
* getTop3domain()
* isAuthority()
* isDomain()
* isDomainExample()
* isDomainInt()
* isHost()
* isHostPathResource()
* isIP()
* isIPlocal()
* isIPv4()
* isIPv6()
* isMailAddress()
* isMailLink()
* isProtocolDialog()
* isProtocolWiki()
* isResourceURL()
* isSuspiciousURL()
* isUnescapedURL()
* isWebURL()
* wikiEscapeURL()
* failsafe()
Only [[dotted decimal]] notation for IPv4 expected.
Does not support dotted hexadecimal, dotted octal, or single-number formats.
IPv6 URL (bracketed) not yet implemented; might need Wikintax escaping anyway.
]=]
local Failsafe = URLutil
local decodeComponentProtect = { F = "\"#%<>[\]^`{|}",
P = "\"#%<>[\]^`{|}/?",
Q = "\"#%<>[\]^`{|}&=+;,",
X = "\"#%<>[\]^`{|}&=+;,/?" }
local decodeComponentEscape = function ( averse, adapt )
return adapt == 20 or adapt == 127 or
decodeComponentProtect[ averse ]:find( string.char( adapt ),
1,
true )
end -- decodeComponentEscape()
local decodeComponentML = function ( ask )
local i = 1
local j, n, s
while ( i ) do
i = ask:find( "&#[xX]%x%x+;", i )
if i then
j = ask:find( ";", i + 3, true )
s = ask:sub( i + 2, j - 1 ):upper()
n = s:byte( 1, 1 )
if n == 88 then
n = tonumber( s:sub( 2 ), 16 )
elseif s:match( "^%d+$" ) then
n = tonumber( s )
else
n = false
end
if n then
if n >= 128 then
s = string.format( "&#%d;", n )
elseif decodeComponentEscape( "X", n ) then
s = string.format( "%%%02X", n )
else
s = string.format( "%c", n )
end
j = j + 1
if i == 1 then
ask = s .. ask:sub( j )
else
ask = string.format( "%s%s%s",
ask:sub( 1, i - 1 ),
s,
ask:sub( j ) )
end
end
i = i + 1
end
end -- while i
return ask
end -- decodeComponentML()
local decodeComponentPercent = function ( ask, averse )
local i = 1
local j, k, m, n
while ( i ) do
i = ask:find( "%%[2-7]%x", i )
if i then
j = i + 1
k = j + 1
n = ask:byte( k, k )
k = k + 1
m = ( n > 96 )
if m then
n = n - 32
m = n
end
if n > 57 then
n = n - 55
else
n = n - 48
end
n = ( ask:byte( j, j ) - 48 ) * 16 + n
if n == 39 and
ask:sub( i + 3, i + 5 ) == "%27" then
j = i + 6
while ( ask:sub( j, j + 2 ) == "%27" ) do
j = j + 3
end -- while "%27"
elseif decodeComponentEscape( averse, n ) then
if m then
ask = string.format( "%s%c%s",
ask:sub( 1, j ),
m,
ask:sub( k ) )
end
elseif i == 1 then
ask = string.format( "%c%s", n, ask:sub( k ) )
else
ask = string.format( "%s%c%s",
ask:sub( 1, i - 1 ),
n,
ask:sub( k ) )
end
i = j
end
end -- while i
return ask
end -- decodeComponentPercent()
local getTopDomain = function ( url, mode )
local r = URLutil.getHost( url )
if r then
local pattern = "[%w%%%-]+%.%a[%w%-]*%a)$"
if mode == 3 then
pattern = "[%w%%%-]+%." .. pattern
end
r = mw.ustring.match( "." .. r, "%.(" .. pattern )
if not r then
r = false
end
else
r = false
end
return r
end -- getTopDomain()
local getHash = function ( url )
local r = url:find( "#", 1, true )
if r then
local i = url:find( "&#", 1, true )
if i then
local s
while ( i ) do
s = url:sub( i + 2 )
if s:match( "^%d+;" ) or s:match( "^x%x+;" ) then
r = url:find( "#", i + 4, true )
if r then
i = url:find( "&#", i + 4, true )
else
i = false
end
else
r = i + 1
i = false
end
end -- while i
end
end
return r
end -- getHash()
URLutil.decode = function ( url, enctype )
local r, s
if type( enctype ) == "string" then
s = mw.text.trim( enctype )
if s == "" then
s = false
else
s = s:upper()
end
end
r = mw.text.encode( mw.uri.decode( url, s ) )
if r:find( "[%[|%]]" ) then
local k
r, k = r:gsub( "%[", "[" )
:gsub( "|", "|" )
:gsub( "%]", "]" )
end
return r
end -- URLutil.decode()
URLutil.encode = function ( url, enctype )
local k, r, s
if type( enctype ) == "string" then
s = mw.text.trim( enctype )
if s == "" then
s = false
else
s = s:upper()
end
end
r = mw.uri.encode( url, s )
k = r:byte( 1, 1 )
if -- k == 35 or -- #
k == 42 or -- *
k == 58 or -- :
k == 59 then -- ;
r = string.format( "%%%X%s", k, r:sub( 2 ) )
end
if r:find( "[%[|%]]" ) then
r, k = r:gsub( "%[", "%5B" )
:gsub( "|", "%7C" )
:gsub( "%]", "%5D" )
end
return r
end -- URLutil.encode()
URLutil.getAuthority = function ( url )
local r
if type( url ) == "string" then
local colon, host, port
local pattern = "^%s*%w*:?//([%w%.%%_-]+)(:?)([%d]*)/"
local s = mw.text.decode( url )
local i = s:find( "#", 6, true )
if i then
s = s:sub( 1, i - 1 ) .. "/"
else
s = s .. "/"
end
host, colon, port = mw.ustring.match( s, pattern )
if URLutil.isHost( host ) then
host = mw.ustring.lower( host )
if colon == ":" then
if port:find( "^[1-9]" ) then
r = ( host .. ":" .. port )
end
elseif #port == 0 then
r = host
end
end
else
r = false
end
return r
end -- URLutil.getAuthority()
URLutil.getFragment = function ( url, decode )
local r
if type( url ) == "string" then
local i = getHash( url )
if i then
r = mw.text.trim( url:sub( i ) ):sub( 2 )
if type( decode ) == "string" then
local encoding = mw.text.trim( decode )
local launch
if encoding == "%" then
launch = true
elseif encoding == "WIKI" then
r = r:gsub( "%.(%x%x)", "%%%1" )
:gsub( "_", " " )
launch = true
end
if launch then
r = mw.uri.decode( r, "PATH" )
end
end
else
r = false
end
else
r = nil
end
return r
end -- URLutil.getFragment()
URLutil.getHost = function ( url )
local r = URLutil.getAuthority( url )
if r then
r = mw.ustring.match( r, "^([%w%.%%_%-]+):?[%d]*$" )
end
return r
end -- URLutil.getHost()
URLutil.getLocation = function ( url )
local r
if type( url ) == "string" then
r = mw.text.trim( url )
if r == "" then
r = false
else
local i
i = getHash( r )
if i then
if i == 1 then
r = false
else
r = r:sub( 1, i - 1 )
end
end
end
else
r = nil
end
return r
end -- URLutil.getLocation()
URLutil.getNormalized = function ( url )
local r
if type( url ) == "string" then
r = mw.text.trim( url )
if r == "" then
r = false
else
r = decodeComponentML( r )
end
else
r = false
end
if r then
local k = r:find( "//", 1, true )
if k then
local j = r:find( "/", k + 2, true )
local sF, sP, sQ
if r:find( "%%[2-7]%x" ) then
local i = getHash( r )
if i then
sF = r:sub( i + 1 )
r = r:sub( 1, i - 1 )
if sF == "" then
sF = false
else
sF = decodeComponentPercent( sF, "F" )
end
end
i = r:find( "?", 1, true )
if i then
sQ = r:sub( i )
r = r:sub( 1, i - 1 )
sQ = decodeComponentPercent( sQ, "Q" )
end
if j then
if #r > j then
sP = r:sub( j + 1 )
sP = decodeComponentPercent( sP, "P" )
end
r = r:sub( 1, j - 1 )
end
elseif j then
local n = #r
if r:byte( n, n ) == 35 then -- '#'
n = n - 1
r = r:sub( 1, n )
end
if n > j then
sP = r:sub( j + 1 )
end
r = r:sub( 1, j - 1 )
end
r = mw.ustring.lower( r ) .. "/"
if sP then
r = r .. sP
end
if sQ then
r = r .. sQ
end
if sF then
r = string.format( "%s#%s", r, sF )
end
end
r = r:gsub( " ", "%%20" )
:gsub( "%[", "%%5B" )
:gsub( "|", "%%7C" )
:gsub( "%]", "%%5D" )
:gsub( "%<", "%%3C" )
:gsub( "%>", "%%3E" )
end
return r
end -- URLutil.getNormalized()
URLutil.getPath = function ( url )
local r = URLutil.getRelativePath( url )
if r then
local s = r:match( "^([^%?]*)%?" )
if s then
r = s
end
s = r:match( "^([^#]*)#" )
if s then
r = s
end
end
return r
end -- URLutil.getPath()
URLutil.getPort = function ( url )
local r = URLutil.getAuthority( url )
if r then
r = r:match( ":([1-9][0-9]*)$" )
if r then
r = tonumber( r )
else
r = false
end
end
return r
end -- URLutil.getPort()
URLutil.getQuery = function ( url, key, separator )
local r = URLutil.getLocation( url )
if r then
r = r:match( "^[^%?]*%?(.+)$" )
if r then
if type( key ) == "string" then
local single = mw.text.trim( key )
local sep = "&"
local s, scan
if type( separator ) == "string" then
s = mw.text.trim( separator )
if s:match( "^[&;,/]$" ) then
sep = s
end
end
s = string.format( "%s%s%s", sep, r, sep )
scan = string.format( "%s%s=([^%s]*)%s",
sep, key, sep, sep )
r = s:match( scan )
end
end
if not r then
r = false
end
end
return r
end -- URLutil.getQuery()
URLutil.getQueryTable = function ( url, separator )
local r = URLutil.getQuery( url )
if r then
local sep = "&"
local n, pairs, s, set
if type( separator ) == "string" then
s = mw.text.trim( separator )
if s:match( "^[&;,/]$" ) then
sep = s
end
end
pairs = mw.text.split( r, sep, true )
n = #pairs
r = { }
for i = 1, n do
s = pairs[ i ]
if s:find( "=", 2, true ) then
s, set = s:match( "^([^=]+)=(.*)$" )
if s then
r[ s ] = set
end
else
r[ s ] = false
end
end -- for i
end
return r
end -- URLutil.getQueryTable()
URLutil.getRelativePath = function ( url )
local r
if type( url ) == "string" then
local s = url:match( "^%s*[a-zA-Z]*://(.*)$" )
if s then
s = s:match( "[^/]+(/.*)$" )
else
local x
x, s = url:match( "^%s*(/?)(/.*)$" )
if x == "/" then
s = s:match( "/[^/]+(/.*)$" )
end
end
if s then
r = mw.text.trim( s )
elseif URLutil.isResourceURL( url ) then
r = "/"
else
r = false
end
else
r = nil
end
return r
end -- URLutil.getRelativePath()
URLutil.getScheme = function ( url )
local r
if type( url ) == "string" then
local pattern = "^%s*([a-zA-Z]*)(:?)(//)"
local prot, colon, slashes = url:match( pattern )
r = false
if slashes == "//" then
if colon == ":" then
if #prot > 2 then
r = prot:lower() .. "://"
end
elseif #prot == 0 then
r = "//"
end
end
else
r = nil
end
return r
end -- URLutil.getScheme()
URLutil.getSortkey = function ( url )
local r = url
if type( url ) == "string" then
local i = url:find( "//" )
if i then
local scheme
if i == 0 then
scheme = ""
else
scheme = url:match( "^%s*([a-zA-Z]*)://" )
end
if scheme then
local s = url:sub( i + 2 )
local comps, site, m, suffix
scheme = scheme:lower()
i = s:find( "/" )
if i and i > 1 then
suffix = s:sub( i + 1 ) -- mw.uri.encode()
s = s:sub( 1, i - 1 )
suffix = suffix:gsub( "#", " " )
else
suffix = ""
end
site, m = s:match( "^(.+)(:%d+)$" )
if not m then
site = s
m = 0
end
comps = mw.text.split( site:lower(), ".", true )
r = "///"
for i = #comps, 2, -1 do
r = string.format( "%s%s.", r, comps[ i ] )
end -- for --i
r = string.format( "%s%s %d %s: %s",
r, comps[ 1 ], m, scheme, suffix )
end
end
end
return r
end -- URLutil.getSortkey()
URLutil.getTLD = function ( url )
local r = URLutil.getHost( url )
if r then
r = mw.ustring.match( r, "%w+%.(%a[%w%-]*%a)$" )
if not r then
r = false
end
end
return r
end -- URLutil.getTLD()
URLutil.getTop2domain = function ( url )
return getTopDomain( url, 2 )
end -- URLutil.getTop2domain()
URLutil.getTop3domain = function ( url )
return getTopDomain( url, 3 )
end -- URLutil.getTop3domain()
URLutil.isAuthority = function ( s )
local r
if type( s ) == "string" then
local pattern = "^%s*([%w%.%%_-]+)(:?)(%d*)%s*$"
local host, colon, port = mw.ustring.match( s, pattern )
if colon == ":" then
port = port:match( "^[1-9][0-9]*$" )
if type( port ) ~= "string" then
r = false
end
elseif port ~= "" then
r = false
end
r = URLutil.isHost( host )
else
r = nil
end
return r
end -- URLutil.isAuthority()
URLutil.isDomain = function ( s )
local r
if type( s ) == "string" then
local scan = "^%s*([%w%.%%_-]*%w)%.(%a[%w-]*%a)%s*$"
local scope
s, scope = mw.ustring.match( s, scan )
if type( s ) == "string" then
if mw.ustring.find( s, "^%w" ) then
if mw.ustring.find( s, "..", 1, true ) then
r = false
else
r = true
end
end
end
else
r = nil
end
return r
end -- URLutil.isDomain()
URLutil.isDomainExample = function ( url )
-- RFC 2606: example.com example.net example.org example.edu
local r = getTopDomain( url, 2 )
if r then
local s = r:lower():match( "^example%.([a-z][a-z][a-z])$" )
if s then
r = ( s == "com" or
s == "edu" or
s == "net" or
s == "org" )
else
r = false
end
end
return r
end -- URLutil.isDomainExample()
URLutil.isDomainInt = function ( url )
-- Internationalized Domain Name (Punycode)
local r = URLutil.getHost( url )
if r then
if r:match( "^[!-~]+$" ) then
local s = "." .. r
if s:find( ".xn--", 1, true ) then
r = true
else
r = false
end
else
r = true
end
end
return r
end -- URLutil.isDomainInt()
URLutil.isHost = function ( s )
return URLutil.isDomain( s ) or URLutil.isIP( s )
end -- URLutil.isHost()
URLutil.isHostPathResource = function ( s )
local r = URLutil.isResourceURL( s )
if not r and s then
r = URLutil.isResourceURL( "//" .. mw.text.trim( s ) )
end
return r
end -- URLutil.isHostPathResource()
URLutil.isIP = function ( s )
return URLutil.isIPv4( s ) and 4 or URLutil.isIPv6( s ) and 6
end -- URLutil.isIP()
URLutil.isIPlocal = function ( s )
-- IPv4 according to RFC 1918, RFC 1122; even any 0.0.0.0 (RFC 5735)
local r = false
local num = s:match( "^ *([01][0-9]*)%." )
if num then
num = tonumber( num )
if num == 0 then
r = s:match( "^ *0+%.[0-9]+%.[0-9]+%.[0-9]+ *$" )
elseif num == 10 or num == 127 then
-- loopback; private/local host: 127.0.0.1
r = URLutil.isIPv4( s )
elseif num == 169 then
-- 169.254.*.*
elseif num == 172 then
-- 172.(16...31).*.*
num = s:match( "^ *0*172%.([0-9]+)%." )
if num then
num = tonumber( num )
if num >= 16 and num <= 31 then
r = URLutil.isIPv4( s )
end
end
elseif beg == 192 then
-- 192.168.*.*
num = s:match( "^ *0*192%.([0-9]+)%." )
if num then
num = tonumber( num )
if num == 168 then
r = URLutil.isIPv4( s )
end
end
end
end
if r then
r = true
end
return r
end -- URLutil.isIPlocal()
URLutil.isIPv4 = function ( s )
local function legal( n )
return ( tonumber( n ) < 256 )
end
local r = false
if type( s ) == "string" then
local p1, p2, p3, p4 = s:match( "^%s*([1-9][0-9]?[0-9]?)%.([12]?[0-9]?[0-9])%.([12]?[0-9]?[0-9])%.([12]?[0-9]?[0-9])%s*$" )
if p1 and p2 and p3 and p4 then
r = legal( p1 ) and legal( p2 ) and legal( p3 ) and legal( p4 )
end
end
return r
end -- URLutil.isIPv4()
URLutil.isIPv6 = function ( s )
local dcolon, groups
if type( s ) ~= "string"
or s:len() == 0
or s:find( "[^:%x]" ) -- only colon and hex digits are legal chars
or s:find( "^:[^:]" ) -- can begin or end with :: but not with single :
or s:find( "[^:]:$" )
or s:find( ":::" )
then
return false
end
s = mw.text.trim( s )
s, dcolon = s:gsub( "::", ":" )
if dcolon > 1 then
return false
end -- at most one ::
s = s:gsub( "^:?", ":" ) -- prepend : if needed, upper
s, groups = s:gsub( ":%x%x?%x?%x?", "" ) -- remove valid groups, and count them
return ( ( dcolon == 1 and groups < 8 ) or
( dcolon == 0 and groups == 8 ) )
and ( s:len() == 0 or ( dcolon == 1 and s == ":" ) ) -- might be one dangling : if original ended with ::
end -- URLutil.isIPv6()
URLutil.isMailAddress = function ( s )
if type( s ) == "string" then
s = mw.ustring.match( s, "^%s*[%w%.%%_-]+@([%w%.%%-]+)%s*$" )
return URLutil.isDomain( s )
end
return false
end -- URLutil.isMailAddress()
URLutil.isMailLink = function ( s )
if type( s ) == "string" then
local addr
s, addr = mw.ustring.match( s, "^%s*([Mm][Aa][Ii][Ll][Tt][Oo]):(%S[%w%.%%_-]*@[%w%.%%-]+)%s*$" )
if type( s ) == "string" then
if s:lower() == "mailto" then
return URLutil.isMailAddress( addr )
end
end
end
return false
end -- URLutil.isMailLink()
local function isProtocolAccepted( prot, supplied )
if type( prot ) == "string" then
local scheme, colon, slashes = mw.ustring.match( prot, "^%s*([a-zA-Z]*)(:?)(/?/?)%s*$" )
if slashes ~= "/" then
if scheme == "" then
if colon ~= ":" and slashes == "//" then
return true
end
elseif colon == ":" or slashes == "" then
local s = supplied:match( " " .. scheme:lower() .. " " )
if type( s ) == "string" then
return true
end
end
end
end
return false
end -- isProtocolAccepted()
URLutil.isProtocolDialog = function ( prot )
return isProtocolAccepted( prot, " mailto irc ircs ssh telnet " )
end -- URLutil.isProtocolDialog()
URLutil.isProtocolWiki = function ( prot )
return isProtocolAccepted( prot,
" ftp ftps git http https nntp sftp svn worldwind " )
end -- URLutil.isProtocolWiki()
URLutil.isResourceURL = function ( url )
local scheme = URLutil.getScheme( url )
if scheme then
local s = " // http:// https:// ftp:// sftp:// "
s = s:find( string.format( " %s ", scheme ) )
if s then
if URLutil.getAuthority( url ) then
if not url:match( "%S%s+%S" ) then
local s1, s2 = url:match( "^([^#]+)(#.*)$" )
if s2 then
if url:match( "^%s*[a-zA-Z]*:?//(.+)/" ) then
return true
end
else
return true
end
end
end
end
end
return false
end -- URLutil.isResourceURL()
URLutil.isSuspiciousURL = function ( url )
if URLutil.isResourceURL( url ) then
local s = URLutil.getAuthority( url )
local pat = "[%[|%]" ..
mw.ustring.char( 34,
8201, 45, 8207,
8234, 45, 8239,
8288 )
.. "]"
if s:find( "@" )
or url:find( "''" )
or url:find( pat )
or url:find( "[%.,]$" ) then
return true
end
-- TODO zero width character ??
return false
end
return true
end -- URLutil.isSuspiciousURL()
URLutil.isUnescapedURL = function ( url, trailing )
if type( trailing ) ~= "string" then
if URLutil.isWebURL( url ) then
if url:match( "[%[|%]]" ) then
return true
end
end
end
return false
end -- URLutil.isUnescapedURL()
URLutil.isWebURL = function ( url )
if URLutil.getScheme( url ) and URLutil.getAuthority( url ) then
if not url:find( "%S%s+%S" ) and
not url:find( "''", 1, true ) then
return true
end
end
return false
end -- URLutil.isWebURL()
URLutil.wikiEscapeURL = function ( url )
if url:find( "[%[|%]]" ) then
local n
url, n = url:gsub( "%[", "[" )
:gsub( "|", "|" )
:gsub( "%]", "]" )
end
return url
end -- URLutil.wikiEscapeURL()
Failsafe.failsafe = function ( atleast )
-- Retrieve versioning and check for compliance
-- Precondition:
-- atleast -- string, with required version
-- or wikidata|item|~|@ or false
-- Postcondition:
-- Returns string -- with queried version/item, also if problem
-- false -- if appropriate
-- 2020-08-17
local since = atleast
local last = ( since == "~" )
local linked = ( since == "@" )
local link = ( since == "item" )
local r
if last or link or linked or since == "wikidata" then
local item = Failsafe.item
since = false
if type( item ) == "number" and item > 0 then
local suited = string.format( "Q%d", item )
if link then
r = suited
else
local entity = mw.wikibase.getEntity( suited )
if type( entity ) == "table" then
local seek = Failsafe.serialProperty or "P348"
local vsn = entity:formatPropertyValues( seek )
if type( vsn ) == "table" and
type( vsn.value ) == "string" and
vsn.value ~= "" then
if last and vsn.value == Failsafe.serial then
r = false
elseif linked then
if mw.title.getCurrentTitle().prefixedText
== mw.wikibase.getSitelink( suited ) then
r = false
else
r = suited
end
else
r = vsn.value
end
end
end
end
end
end
if type( r ) == "nil" then
if not since or since <= Failsafe.serial then
r = Failsafe.serial
else
r = false
end
end
return r
end -- Failsafe.failsafe()
local function Template( frame, action, amount )
-- Run actual code from template transclusion
-- Precondition:
-- frame -- object
-- action -- string, with function name
-- amount -- number, of args if > 1
-- Postcondition:
-- Return string or not
local n = amount or 1
local v = { }
local r, s
for i = 1, n do
s = frame.args[ i ]
if s then
s = mw.text.trim( s )
if s ~= "" then
v[ i ] = s
end
end
end -- for i
if v[ 1 ] then
r = URLutil[ action ]( v[ 1 ], v[ 2 ], v[ 3 ] )
end
return r
end -- Template()
local p = {}
function p.decode( frame )
return Template( frame, "decode", 2 ) or ""
end
function p.encode( frame )
return Template( frame, "encode", 2 ) or ""
end
function p.getAuthority( frame )
return Template( frame, "getAuthority" ) or ""
end
function p.getFragment( frame )
local r = Template( frame, "getFragment", 2 )
if r then
r = "#" .. r
else
r = ""
end
return r
end
function p.getHost( frame )
return Template( frame, "getHost" ) or ""
end
function p.getLocation( frame )
return Template( frame, "getLocation" ) or ""
end
function p.getNormalized( frame )
return Template( frame, "getNormalized" ) or ""
end
function p.getPath( frame )
return Template( frame, "getPath" ) or ""
end
function p.getPort( frame )
return Template( frame, "getPort" ) or ""
end
function p.getQuery( frame )
local r = Template( frame, "getQuery", 3 )
if r then
local key = frame.args[ 2 ]
if key then
key = mw.text.trim( key )
if key == "" then
key = nil
end
end
if not key then
r = "?" .. r
end
else
r = ""
end
return r
end
function p.getRelativePath( frame )
return Template( frame, "getRelativePath" ) or ""
end
function p.getScheme( frame )
return Template( frame, "getScheme" ) or ""
end
function p.getSortkey( frame )
return Template( frame, "getSortkey" ) or ""
end
function p.getTLD( frame )
return Template( frame, "getTLD" ) or ""
end
function p.getTop2domain( frame )
return Template( frame, "getTop2domain" ) or ""
end
function p.getTop3domain( frame )
return Template( frame, "getTop3domain" ) or ""
end
function p.isAuthority( frame )
return Template( frame, "isAuthority" ) and "1" or ""
end
function p.isDomain( frame )
return Template( frame, "isDomain" ) and "1" or ""
end
function p.isDomainExample( frame )
return Template( frame, "isDomainExample" ) and "1" or ""
end
function p.isDomainInt( frame )
return Template( frame, "isDomainInt" ) and "1" or ""
end
function p.isHost( frame )
return Template( frame, "isHost" ) and "1" or ""
end
function p.isHostPathResource( frame )
return Template( frame, "isHostPathResource" ) and "1" or ""
end
function p.isIP( frame )
return Template( frame, "isIP" ) or ""
end
function p.isIPlocal( frame )
return Template( frame, "isIPlocal" ) and "1" or ""
end
function p.isIPv4( frame )
return Template( frame, "isIPv4" ) and "1" or ""
end
function p.isIPv6( frame )
return Template( frame, "isIPv6" ) and "1" or ""
end
function p.isMailAddress( frame )
return Template( frame, "isMailAddress" ) and "1" or ""
end
function p.isMailLink( frame )
return Template( frame, "isMailLink" ) and "1" or ""
end
function p.isProtocolDialog( frame )
return Template( frame, "isProtocolDialog" ) and "1" or ""
end
function p.isProtocolWiki( frame )
return Template( frame, "isProtocolWiki" ) and "1" or ""
end
function p.isResourceURL( frame )
return Template( frame, "isResourceURL" ) and "1" or ""
end
function p.isSuspiciousURL( frame )
return Template( frame, "isSuspiciousURL" ) and "1" or ""
end
function p.isUnescapedURL( frame )
return Template( frame, "isUnescapedURL", 2 ) and "1" or ""
end
function p.isWebURL( frame )
return Template( frame, "isWebURL" ) and "1" or ""
end
function p.wikiEscapeURL( frame )
return Template( frame, "wikiEscapeURL" )
end
p.failsafe = function ( frame )
local s = type( frame )
local since
if s == "table" then
since = frame.args[ 1 ]
elseif s == "string" then
since = frame
end
if since then
since = mw.text.trim( since )
if since == "" then
since = false
end
end
return Failsafe.failsafe( since ) or ""
end
function p.URLutil()
return URLutil
end
return p