/*
ssc (static site checker)
Copyright (c) 2020 Dylan Harris
https://dylanharris.org/

This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public Licence as published by
the Free Software Foundation, either version 3 of the Licence, or
(at your option) any later version.

This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public Licence for more details.

You should have received a copy of the GNU General Public
Licence along with this program; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/

#include "context.h"
#include "url_scheme.h"
#include "url_sanitise.h"
#include "quote.h"


bool set_component (const e_component e, vc_t& p, const char* sz, const bool res)
{   assert (e <= last_component);
    p [e] = ::std::string (sz);
    return res; }

bool is_error (vstr_t& p, const char* sz, const bool res)
{   return set_component (es_diagnosis, p, sz, res); }

void is_warning (vstr_t& p, const char* sz)
{   set_component (es_diagnosis, p, sz, false); }


bool equivalent_rfc3986 (const vc_t& lhs, const vc_t& rhs)
{   if (lhs [es_authority] != rhs [es_authority]) return false;
    if (lhs [es_server] != rhs [es_server]) return false;
    if (lhs [es_path] != rhs [es_path])
    if (lhs [es_file] != rhs [es_file])
    {   if (lhs [es_file].empty ()) { if (rhs [es_file] != context.index ()) return false; }
        else if (rhs [es_file].empty ()) { if (lhs [es_file] != context.index ()) return false; }
        else return false; }
    return (lhs [es_fragment] == rhs [es_fragment] ); }

bool parse_rfc3986 (const e_protocol prot, const ::std::string& s, vc_t& component)
{   // RFC 3986
    //       URI         = scheme ":" hier-part [ "?" query ] [ "#" fragment ]
    assert (component.size () > last_component);
    ::std::string url (s);
    ::std::string scheme, hier_part, authority, path, file, user, insecure_password, ipv6, ipv4, domain, host, port;

    if (url.empty ()) return is_error (component, "empty URL", false);

    if (! separate (url, scheme, hier_part, COLON))
    {   scheme = HTTP_PROTOCOL; hier_part = url; }
    else
    {   if (scheme.empty ()) is_error (component, "protocol cannot be empty");
        if (scheme.find_first_not_of (ALPHABET DENERY "+-.") != ::std::string::npos)
           return is_error (component, "illegal character in protocol");
        if (scheme.substr (0, 1).find_first_not_of (ALPHABET) != ::std::string::npos)
            return is_error (component, "first character of protocol must be a letter");
        if (prot == pr_other)
        {   ::std::string err ("unrecognised protocol ");
            err += quote (scheme);
            is_warning (component, err.c_str ()); } }

    if (hier_part.empty ()) return is_error (component, "a URL needs more than just a protocol");

    ::std::string fragments = remove_tail (hier_part, HASH);
    ::std::string queries = remove_tail (hier_part, QUESTION);

    if ((hier_part.length () > 1) && (hier_part [0] == SLASH) && (hier_part [1] == SLASH))
    {   authority = trim_the_lot_off (hier_part.substr (2));
        path = remove_tail (authority, SLASH); }
    else path = hier_part;

    bool authority_empty = authority.empty ();
    if (! authority_empty)
    {   if (remove_head (authority, user, AT))
        {   if (remove_tail (user, insecure_password, COLON))
            {   if (insecure_password.empty ()) is_warning (component, "empty password");
                else is_warning (component, "passwords in URLs are dangerously insecure; use an alternative authentication mechanism"); }
            if (user.empty ()) return is_error (component, "username missing before '@'"); }
        if (authority [0] == SQOPEN)
        {   authority = authority.substr (1);
            if (! separate (authority, host, port, SQCLOSE)) return is_error (component, "malformed ip6 address (no ']' found)");
            if (! port.empty ())
            {   if (port [0] != COLON) return is_error (component, "unexpected characters follow ip6 address");
                port = port.substr (1); }
            if (host.empty ()) return is_error (component, "empty ipv6 address");
            if ((host [1] != 'v') && (host [1] != 'V') && (host.find_first_not_of (HEX ":.") != host.npos))
                return is_error (component, "invalid ipv6 address");
            ipv6 = host; }
        else if (separate (authority, host, port, COLON))
            if (! port.empty ())
                if (port.find_first_not_of (DENERY) != port.npos)
                    return is_error (component, "port must be an unsigned integer");
                else
                {   int tst = lexical < int > :: cast (port, 65536);
                    if ((tst < 0) || (tst > 65535)) return is_error (component, "port out of range"); }

        if (host.empty ()) return is_error (component, "host cannot be empty");
        else if ((host [0] >= '0') && (host [0] <= '8'))
        {   if (host.find_first_not_of (OCTAL ".") != host.npos) return is_error (component, "illegal character in ipv4 address");
            vstr_t octal;
            octal.resize (4);
            for (int i = 0; i < 4; ++i)
            {   ::std::string::size_type dot = host.find (DOT);
                if ((dot == host.npos) && (i < 4)) return is_error (component, "bad ipv4 address");
                octal [i] = host.substr (0, dot);
                if (lexical < int > :: cast (octal [i], 256) > 255) // yeah, yeah, I know, but it's just a value check
                    return is_error (component, "all four ipv4 octals must be present and each not exceed 255");
                host = host.substr (dot); }
            if (octal [3].empty ()) return is_error (component, "incomplete ipv4 address");
            ipv4 = host; }
        else
        {   if (host.find_first_not_of (ALPHABET DDD) != host.npos)
                return is_error (component, "illegal character in domain");
            domain = host; } }

    if (! path.empty ())
    {   ::std::string pp (path);
        path.clear ();
        bool slashed = false;
        for (auto ch : pp)  // replaces repeated slashes with singletons
        {   if (ch != SLASH) slashed = false;
            else if (slashed) continue;
            else slashed = true;
            path += ch; }
        ::std::string::size_type len = path.length ();
        if (len > 0)
            if (path [len - 1] != SLASH)
            {   ::std::string::size_type pos = path.find_last_of (SLASH);
                if (pos == ::std::string::npos)
                {   file = path;
                    path.clear (); }
                else
                {   file = path.substr (pos + 1);
                    path = path.substr (0, pos); } } }

    component [es_scheme] = scheme;
    component [es_query] = queries;
    component [es_fragment] = fragments;
    component [es_authority] = authority;
    component [es_path] = path;
    component [es_file] = file;
    component [es_user] = user;
    component [es_password] = insecure_password;
    component [es_port] = port;

    if (! domain.empty ()) component [es_server] = domain;
    else if (! ipv4.empty ()) component [es_server] = ipv4;
    else if (! ipv6.empty ()) component [es_server] = ipv6;

    return true; }

::std::string get_rfc3986 (const vc_t& component, bool defaulted)
{   ::std::string res;
    if (! defaulted)
    {   res += component [es_scheme];
        if (! component [es_path].empty () || ! component [es_file].empty ()) res += CSS;
        else res += COLON; }
    if (! component [es_user].empty ())
    {   res += component [es_user];
        if (! component [es_password].empty ())
        {   res += COLON;
            res += component [es_password]; }
        res += AT; }
    if (! component [es_authority].empty ())
    {   res += component [es_authority];
        if (! component [es_path].empty ())
            if (component [es_path][0] != SLASH)
                res += SLASH; }
    res += component [es_path];
    ::std::string::size_type len = component [es_path].length () - 1;
    if (component [es_path][len] != SLASH)
        res += SLASH;
    res += component [es_file];
    if (! component [es_query].empty ())
    {   res += QUESTION;
        res += component [es_query]; }
    if (! component [es_fragment].empty ())
    {   res += HASH;
        res += component [es_fragment]; }
    return res; }

::std::string absolute_rfc3986 (const vc_t& component, bool can_use_index, bool defaulted)
{   ::std::string res;
    if (! defaulted)  // perhaps rewrite with boost path??
    {   res += component [es_scheme];
        if (! component [es_path].empty () || ! component [es_file].empty ()) res += CSS;
        else res += COLON; }
    if (! component [es_user].empty ())
    {   res += component [es_user];
        if (! component [es_password].empty ())
        {   res += COLON;
            res += component [es_password]; }
        res += AT; }
    if (! component [es_authority].empty ())
        res += component [es_authority];
    else if (context.site ().empty ()) // FFS!
        res += DEFAULT_DOMAIN;
    else res += context.site ().at (0);
    if (! component [es_path].empty ())
    {   if (component [es_path][0] != SLASH)
            res += SLASH;
        res += component [es_path];
        ::std::string::size_type len = component [es_path].length () - 1;
        if (component [es_path][len] != SLASH)
            res += SLASH;
        if (! component [es_file].empty ())
            res += component [es_file];
        else if (can_use_index)
        {   len = component [es_path].length () - 1;
            if (component [es_path][len] != SLASH)
                res += SLASH;
            res += context.index (); } }
    else if (! component [es_file].empty ())
        res += component [es_file];
    else if (can_use_index)
        res += context.index ();
    if (! component [es_query].empty ())
    {   res += QUESTION;
        res += component [es_query]; }
    if (! component [es_fragment].empty ())
    {   res += HASH;
        res += component [es_fragment]; }
    return res; }
