Modul:URLutil

Aus Android Wiki
Zur Navigation springen Zur Suche springen

Die Dokumentation für dieses Modul kann unter Modul:URLutil/Doku erstellt werden

--[=[ URLutil 2014-09-20
Utilities for URL etc. on www.
* getAuthority()
* getFragment()
* getHost()
* getLocation()
* getPath()
* getPort()
* getQuery()
* getQueryTable()
* getRelativePath()
* getScheme()
* getTLD()
* getTop2domain()
* getTop3domain()
* isAuthority()
* isDomain()
* isDomainExample()
* isDomainInt()
* isHost()
* isIP()
* isIPlocal()
* isIPv4()
* isIPv6()
* isMailAddress()
* isMailLink()
* isProtocolDialog()
* isProtocolWiki()
* isResourceURL()
* isSuspiciousURL()
* isUnescapedURL()
* isWebURL()
* wikiEscapeURL()
Only [[dotted decimal]] notation for IPv4 supported.
Does not support dotted hexadecimal, dotted octal, or single-number formats.
IPv6 URL (bracketed) not yet implemented; might need Wikintax escaping anyway.
]=]



-- table for export
local URLutil = {}



URLutil.getURIScheme = function ( uri )
    if type( uri ) == "string" then
        local prot, colon, slashes = uri:match( "^%s*([a-zA-Z]*)(:?)(/?/?)" )
        if #colon == 1 and #prot >= 2 then
            return prot:lower()
        elseif #slashes == 2 and #prot == 0 then
            return "//"
        end
    end
    return false
end -- getURIScheme()



local getTopDomain = function ( url, mode )
    local r = URLutil.getHost( url )
    if r then
        local pattern = "[%w%%]+%.%a[%w-]*%a)$"
        if mode == 3 then
            pattern = "[%w%%]+%." .. pattern
        end
        r = mw.ustring.match( "." .. r,  "%.(" .. pattern )
        if not r then
            r = false
        end
    else
        r = false
    end
    return r
end -- getTopDomain()



URLutil.getAuthority = function ( url )
    local r
    if type( url ) == "string" then
        local colon, host, port
        local pattern = "^%s*%w*:?//([%w%.%%-]+)(:?)([%d]*)/"
        local s = mw.text.decode( url )
        local i = s:find( "#", 6, true )
        if i then
            s = s:sub( 1,  i - 1 )  ..  "/"
        else
            s = s .. "/"
        end
        host, colon, port = mw.ustring.match( s, pattern )
        if URLutil.isHost( host ) then
            host = mw.ustring.lower( host )
            if colon == ":" then
                if port:find( "^[1-9]" ) then
                    r = ( host .. ":" .. port )
                end
            elseif #port == 0 then
                r = host
            end
        end
    else
        r = false
    end
    return r
end -- URLutil.getAuthority()



URLutil.getFragment = function ( url, decode )
    local r
    if type( url ) == "string" then
        local s = mw.text.decode( url )
        local i = s:find( "#", 1, true )
        if i then
            r = mw.text.trim( s:sub( i ) ):sub( 2 )
            if type( decode ) == "string" then
                local encoding = mw.text.trim( decode )
                local launch
                if encoding == "%" then
                    launch = true
                elseif encoding == "WIKI" then
                    r = r:gsub( "%.(%x%x)", "%%%1" )
                         :gsub( "_", " " )
                    launch = true
                end
                if launch then
                    r = mw.uri.decode( r, "PATH" )
                end
            end
        else
            r = false
        end
    else
        r = nil
    end
    return r
end -- URLutil.getFragment()



URLutil.getHost = function ( url )
    local r = URLutil.getAuthority( url )
    if r then
        r = mw.ustring.match( r, "^([%w%.%%-]+):?[%d]*$" )
    end
    return r
end -- URLutil.getHost()



URLutil.getLocation = function ( url )
    local r
    if type( url ) == "string" then
        r = mw.text.trim( url )
        if r == "" then
            r = false
        else
            local i
            r = mw.text.decode( r )
            i = r:find( "#", 1, true )
            if i then
                if i == 1 then
                    r = false
                else
                    r = r:sub( 1,  i - 1 )
                end
            end
        end
    else
        r = nil
    end
    return r
end -- URLutil.getLocation()



URLutil.getPath = function ( url )
    local r = URLutil.getRelativePath( url )
    if r then
        local s = r:match( "^([^%?]*)%?" )
        if s then
            r = s
        end
        s = r:match( "^([^#]*)#" )
        if s then
            r = s
        end
    end
    return r
end -- URLutil.getPath()



URLutil.getPort = function ( url )
    local r = URLutil.getAuthority( url )
    if r then
        r = r:match( ":([1-9][0-9]*)$" )
        if r then
            r = tonumber( r )
        else
            r = false
        end
    end
    return r
end -- URLutil.getPort()



URLutil.getQuery = function ( url, key, separator )
    local r = URLutil.getLocation( url )
    if r then
        r = r:match( "^[^%?]*%?(.+)$" )
        if r then
            if type( key ) == "string" then
                local single = mw.text.trim( key )
                local sep = "&"
                local s, scan
                if type( separator ) == "string" then
                    s = mw.text.trim( separator )
                    if s:match( "^[&;,/]$" ) then
                        sep = s
                    end
                end
                s = string.format( "%s%s%s", sep, r, sep )
                scan = string.format( "%s%s=([^%s]*)%s",
                                      sep, key, sep, sep )
                r = s:match( scan )
            end
        end
        if not r then
            r = false
        end
    end
    return r
end -- URLutil.getQuery()



URLutil.getQueryTable = function ( url, separator )
    local r = URLutil.getQuery( url )
    if r then
        local sep = "&"
        local n, pairs, s, set
        if type( separator ) == "string" then
            s = mw.text.trim( separator )
            if s:match( "^[&;,/]$" ) then
                sep = s
            end
        end
        pairs = mw.text.split( r, sep, true )
        n = #pairs
        r = { }
        for i = 1, n do
            s = pairs[ i ]
            if s:find( "=", 2, true ) then
                s, set = s:match( "^([^=]+)=(.*)$" )
                if s then
                    r[ s ] = set
                end
            else
                r[ s ] = false
            end
        end -- for i
    end
    return r
end -- URLutil.getQueryTable()



URLutil.getRelativePath = function ( url )
    local r
    if type( url ) == "string" then
        local s = url:match( "^%s*[a-zA-Z]*://(.*)$" )
        if s then
            s = s:match( "[^/]+(/.*)$" )
        else
            local x
            x, s = url:match( "^%s*(/?)(/.*)$" )
            if x == "/" then
                s = s:match( "/[^/]+(/.*)$" )
            end
        end
        if s then
            r = mw.text.trim( s )
        elseif URLutil.isResourceURL( url ) then
            r = "/"
        else
            r = false
        end
    else
        r = nil
    end
    return r
end -- URLutil.getRelativePath()



URLutil.getScheme = function ( url )
    local r
    if type( url ) == "string" then
        local pattern = "^%s*([a-zA-Z]*)(:?)(//)"
        local prot, colon, slashes = url:match( pattern )
        r = false
        if slashes == "//" then
            if colon == ":" then
                if #prot > 2 then
                    r = prot:lower() .. "://"
                end
            elseif #prot == 0 then
                r = "//"
            end
        end
    else
        r = nil
    end
    return r
end -- URLutil.getScheme()



URLutil.getTLD = function ( url )
    local r = URLutil.getHost( url )
    if r then
        r = mw.ustring.match( r, "[%w]+%.(%a[%w-]*%a)$" )
        if not r then
            r = false
        end
    end
    return r
end -- URLutil.getTLD()



URLutil.getTop2domain = function ( url )
    return getTopDomain( url, 2 )
end -- URLutil.getTop2domain()



URLutil.getTop3domain = function ( url )
    return getTopDomain( url, 3 )
end -- URLutil.getTop3domain()



URLutil.isAuthority = function ( s )
    local r
    if type( s ) == "string" then
        local pattern = "^%s*([%w%.%%-]+)(:?)(%d*)%s*$"
        local host, colon, port = mw.ustring.match( s, pattern )
        if colon == ":" then
            port = port:match( "^[1-9][0-9]*$" )
            if type( port ) ~= "string" then
                r = false
            end
        elseif port ~= "" then
            r = false
        end
        r = URLutil.isHost( host )
    else
        r = nil
    end
    return r
end -- URLutil.isAuthority()



URLutil.isDomain = function ( s )
    local r
    if type( s ) == "string" then
        local scan = "^%s*([%w%.%%-]+%w)%.(%a[%w-]*%a)%s*$"
        local scope
        s, scope = mw.ustring.match( s, scan )
        if type( s ) == "string" then
            if mw.ustring.find( s, "^%w" ) then
                if mw.ustring.find( s, "..", 1, true ) then
                    r = false
                else
                    r = true
                end
            end
        end
    else
        r = nil
    end
    return r
end -- URLutil.isDomain()



URLutil.isDomainExample = function ( url )
    -- RFC 2606: example.com example.net example.org example.edu
    local r = getTopDomain( url, 2 )
    if r then
        local s = r:lower():match( "^example%.([a-z][a-z][a-z])$" )
        if s then
            r = ( s == "com" or
                  s == "edu" or
                  s == "net" or
                  s == "org" )
        else
            r = false
        end
    end
    return r
end -- URLutil.isDomainExample()



URLutil.isDomainInt = function ( url )
    -- Internationalized Domain Name (Punycode)
    local r = URLutil.getHost( url )
    if r then
        if r:match( "^[!-~]+$" ) then
            local s = "." .. r
            if s:find( ".xn--", 1, true ) then
                r = true
            else
                r = false
            end
        else
            r = true
        end
    end
    return r
end -- URLutil.isDomainInt()



URLutil.isHost = function ( s )
    return URLutil.isDomain( s ) or URLutil.isIP( s )
end -- URLutil.isHost()



URLutil.isIP = function ( s )
    return URLutil.isIPv4( s ) and 4 or URLutil.isIPv6( s ) and 6
end -- URLutil.isIP()



URLutil.isIPlocal = function ( s )
    -- IPv4 according to RFC 1918, RFC 1122; even any 0.0.0.0 (RFC 5735)
    local r = false
    local num = s:match( "^ *([01][0-9]*)%." )
    if num then
        num = tonumber( num )
        if num == 0 then
            r = s:match( "^ *0+%.[0-9]+%.[0-9]+%.[0-9]+ *$" )
        elseif num == 10  or  num == 127 then
            -- loopback; private/local host: 127.0.0.1
            r = URLutil.isIPv4( s )
        elseif num == 169 then
            -- 169.254.*.*
        elseif num == 172 then
            -- 172.(16...31).*.*
            num = s:match( "^ *0*172%.([0-9]+)%." )
            if num then
                num = tonumber( num )
                if num >= 16  and  num <= 31 then
                    r = URLutil.isIPv4( s )
                end
            end
        elseif beg == 192 then
            -- 192.168.*.*
            num = s:match( "^ *0*192%.([0-9]+)%." )
            if num then
                num = tonumber( num )
                if num == 168 then
                    r = URLutil.isIPv4( s )
                end
            end
        end
    end
    if r then
        r = true
    end
    return r
end -- URLutil.isIPlocal()



URLutil.isIPv4 = function ( s )
    local function legal( n )
              return ( tonumber( n ) < 256 )
          end
    local r = false
    if type( s ) == "string" then
        local p1, p2, p3, p4 = s:match( "^%s*([1-9][0-9]?[0-9]?)%.([12]?[0-9]?[0-9])%.([12]?[0-9]?[0-9])%.([12]?[0-9]?[0-9])%s*$" )
        if p1 and p2 and p3 and p4 then
            r = legal( p1 ) and legal( p2 ) and legal( p3 ) and legal( p4 )
        end
    end
    return r
end -- URLutil.isIPv4()



URLutil.isIPv6 = function ( s )
    local dcolon, groups
    if type( s ) ~= "string"
        or s:len() == 0
        or s:find( "[^:%x]" ) -- only colon and hex digits are legal chars
        or s:find( "^:[^:]" ) -- can begin or end with :: but not with single :
        or s:find( "[^:]:$" )
        or s:find( ":::" )
    then
        return false
    end
    s = mw.text.trim( s )
    s, dcolon = s:gsub( "::", ":" )
    if dcolon > 1 then
        return false
    end -- at most one ::
    s = s:gsub( "^:?", ":" ) -- prepend : if needed, upper
    s, groups = s:gsub( ":%x%x?%x?%x?", "" ) -- remove valid groups, and count them
    return ( ( dcolon == 1 and groups < 8 ) or
             ( dcolon == 0 and groups == 8 ) )
        and ( s:len() == 0 or ( dcolon == 1 and s == ":" ) ) -- might be one dangling : if original ended with ::
end -- URLutil.isIPv6()



URLutil.isMailAddress = function ( s )
    if type( s ) == "string" then
        s = mw.ustring.match( s, "^%s*[%w%.%%_-]+@([%w%.%%-]+)%s*$" )
        return URLutil.isDomain( s )
    end
    return false
end -- URLutil.isMailAddress()



URLutil.isMailLink = function ( s )
    if type( s ) == "string" then
        local addr
        s, addr = mw.ustring.match( s, "^%s*([Mm][Aa][Ii][Ll][Tt][Oo]):(%S[%w%.%%_-]*@[%w%.%%-]+)%s*$" )
        if type( s ) == "string" then
            if s:lower() == "mailto" then
                return URLutil.isMailAddress( addr )
            end
        end
    end
    return false
end -- URLutil.isMailLink()



local function isProtocolAccepted( prot, supplied )
    if type( prot ) == "string" then
        local scheme, colon, slashes = mw.ustring.match( prot, "^%s*([a-zA-Z]*)(:?)(/?/?)%s*$" )
        if slashes ~= "/" then
            if scheme == "" then
                if colon ~= ":" and slashes == "//" then
                    return true
                end
             elseif colon == ":" or slashes == "" then
                local s = supplied:match( " " .. scheme:lower() .. " " )
                if type( s ) == "string" then
                    return true
                end
            end
        end
    end
    return false
end -- isProtocolAccepted()



URLutil.isProtocolMW = function ( prot )
    return isProtocolAccepted( prot,
                               " http https ftp ftps ssh sftp irc ircs xmpp sip sips gopher telnet nntp worldwind mailto tel sms news svn git mms bitcoin magnet urn geo " )
end -- URLutil.isProtocolMW()



URLutil.isProtocolDialog = function ( prot )
    return isProtocolAccepted( prot, " mailto irc ircs ssh telnet " )
end -- URLutil.isProtocolDialog()



URLutil.isProtocolWiki = function ( prot )
    return isProtocolAccepted( prot,
                               " ftp ftps git http https nntp sftp svn worldwind " )
end -- URLutil.isProtocolWiki()



URLutil.isResourceURL = function ( url )
    local scheme = URLutil.getScheme( url )
    if scheme then
        local s = " // http:// https:// ftp:// sftp:// "
        s = s:find( string.format( " %s ", scheme ) )
        if s then
            if URLutil.getAuthority( url ) then
                if not url:match( "%S%s+%S" ) then
                    return true
                end
            end
        end
    end
    return false
end -- URLutil.isResourceURL()



URLutil.isSuspiciousURL = function ( url )
    if URLutil.isResourceURL( url ) then
        local s = URLutil.getAuthority( url )
        local pat = "[%[|%]" ..
                    mw.ustring.char( 8201, 45, 8207, 8234, 45, 8239, 8288 )
                    .. "]"
        if s:find( "@" )
           or url:find( "''" )
           or url:find( pat )
           or url:find( "[%.,]$" ) then
            return true
        end
        -- TODO  zero width character ??
        return false
    end
    return true
end -- URLutil.isSuspiciousURL()



URLutil.isUnescapedURL = function ( url, trailing )
    if type( trailing ) ~= "string" then
        if URLutil.isWebURL( url ) then
            if url:match( "[%[|%]]" ) then
                return true
            end
        end
    end
    return false
end -- URLutil.isUnescapedURL()



URLutil.isWebURL = function ( url )
    if URLutil.getScheme( url ) and URLutil.getAuthority( url ) then
        if not url:match( "%S%s+%S" ) then
            return true
        end
    end
    return false
end -- URLutil.isWebURL()



URLutil.wikiEscapeURL = function ( url )
    if url:find( "[%[|%]]" ) then
        local n
        url, n = url:gsub( "%[", "&#91;" )
                    :gsub( "|", "&#124;" )
                    :gsub( "%]", "&#93;" )
    end
    return url
end -- URLutil.wikiEscapeURL()



-- Provide template access and expose URLutil table to require

local p = {}

function p.getURIScheme( frame )
    return URLutil.getURIScheme( frame.args[ 1 ] ) or ""
end
function p.getAuthority( frame )
    return URLutil.getAuthority( frame.args[ 1 ] ) or ""
end
function p.getFragment( frame )
    local r = URLutil.getFragment( frame.args[ 1 ], frame.args[ 2 ] )
    if r then
        r = "#" .. r
    else
        r = ""
    end
    return r
end
function p.getHost( frame )
    return URLutil.getHost( frame.args[ 1 ] ) or ""
end
function p.getLocation( frame )
    return URLutil.getLocation( frame.args[ 1 ] ) or ""
end
function p.getPath( frame )
    return URLutil.getPath( frame.args[ 1 ] ) or ""
end
function p.getPort( frame )
    return URLutil.getPort( frame.args[ 1 ] ) or ""
end
function p.getQuery( frame )
    local r
    local key = frame.args[ 2 ]
    if key then
        key = mw.text.trim( key )
        if key == "" then
            key = nil
        end
    end
    r = URLutil.getQuery( frame.args[ 1 ], key, frame.args[ 3 ] )
    if r then
        if not key then
            r = "?" .. r
        end
    else
        r = ""
    end
    return r
end
function p.getRelativePath( frame )
    return URLutil.getRelativePath( frame.args[ 1 ] ) or ""
end
function p.getScheme( frame )
    return URLutil.getScheme( frame.args[ 1 ] ) or ""
end
function p.getTLD( frame )
    return URLutil.getTLD( frame.args[ 1 ] ) or ""
end
function p.getTop2domain( frame )
    return URLutil.getTop2domain( frame.args[ 1 ] ) or ""
end
function p.getTop3domain( frame )
    return URLutil.getTop3domain( frame.args[ 1 ] ) or ""
end
function p.isAuthority( frame )
    return URLutil.isAuthority( frame.args[ 1 ] ) and "1" or ""
end
function p.isDomain( frame )
    return URLutil.isDomain( frame.args[ 1 ] ) and "1" or ""
end
function p.isDomainExample( frame )
    return URLutil.isDomainExample( frame.args[ 1 ] ) and "1" or ""
end
function p.isDomainInt( frame )
    return URLutil.isDomainInt( frame.args[ 1 ] ) and "1" or ""
end
function p.isHost( frame )
    return URLutil.isHost( frame.args[ 1 ] ) and "1" or ""
end
function p.isIP( frame )
    return URLutil.isIP( frame.args[ 1 ] ) or ""
end
function p.isIPlocal( frame )
    return URLutil.isIPlocal( frame.args[ 1 ] ) and "1" or ""
end
function p.isIPv4( frame )
    return URLutil.isIPv4( frame.args[ 1 ] ) and "1" or ""
end
function p.isIPv6( frame )
    return URLutil.isIPv6( frame.args[ 1 ] ) and "1" or ""
end
function p.isMailAddress( frame )
    return URLutil.isMailAddress( frame.args[ 1 ] ) and "1" or ""
end
function p.isMailLink( frame )
    return URLutil.isMailLink( frame.args[ 1 ] ) and "1" or ""
end
function p.isProtocolMW( frame )
    return URLutil.isProtocolMW( frame.args[ 1 ] ) and "1" or ""
end
function p.isProtocolDialog( frame )
    return URLutil.isProtocolDialog( frame.args[ 1 ] ) and "1" or ""
end
function p.isProtocolWiki( frame )
    return URLutil.isProtocolWiki( frame.args[ 1 ] ) and "1" or ""
end
function p.isResourceURL( frame )
    return URLutil.isResourceURL( frame.args[ 1 ] ) and "1" or ""
end
function p.isSuspiciousURL( frame )
    return URLutil.isSuspiciousURL( frame.args[ 1 ] ) and "1" or ""
end
function p.isUnescapedURL( frame )
    return URLutil.isUnescapedURL( frame.args[ 1 ], frame.args[ 2 ] ) and "1" or ""
end
function p.isWebURL( frame )
    return URLutil.isWebURL( frame.args[ 1 ] ) and "1" or ""
end
function p.wikiEscapeURL( frame )
    return URLutil.wikiEscapeURL( frame.args[ 1 ] )
end
function p.URLutil()
    return URLutil
end

return p