/*
ssc (static site checker)
Copyright (c) 2020 Dylan Harris
https://dylanharris.org/

This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public Licence as published by
the Free Software Foundation, either version 3 of the Licence,  or
(at your option) any later version.

This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU General Public Licence for more details.

You should have received a copy of the GNU General Public
Licence along with this program; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/

#include "common.h"
#include "context.h"
#include "external.h"
#include "page.h"
#include "directory.h"
#include <memory>

using namespace boost::filesystem;

directory* directory::root_ = nullptr;
external directory::external_;

bool directory::is_present (::boost::filesystem::path::iterator& i, const ::boost::filesystem::path::iterator& last) const
{   if (i == last) return false;
    if (i -> empty ()) return false;
    if (i -> string () == ".") return is_present (++i, last);
    if (i -> string () == "..")
    {   if (mummy_ == nullptr) return false;
        return mummy_ -> is_present (++i, last); }
    auto e = content_.find (i -> string ());
    if (e == content_.end ()) return false;
    if (e -> second == nullptr) return true;
    return e -> second -> is_present (++i, last); }

bool directory::is_present (const ::boost::filesystem::path& link) const
{   if (link.empty ()) return false;
    auto i = link.begin ();
    if (i == link.end ()) return false;
    if (i -> string () == "/") return root_ -> is_present (++i, link.end ());
    return is_present (i, link.end ()); }

bool directory::scan ()
{   assert (! offsite_);
    if (invalid ()) return false;
    for (directory_entry& x : directory_iterator (path_))
        if (! add_to_content (x)) return false;
    return true; }

bool directory::add_to_content (::boost::filesystem::directory_entry& i)
{   if (is_regular_file (i.path ()))
        return content_.insert (value_t (i.path ().filename ().string (), nullptr)).second;
    if (is_directory (i.path ()))
        return content_.insert (value_t (i.path ().filename ().string (), self_ptr (new directory (i.path ().string (), this)))).second;
    return false; }

bool directory::invalid () const
{   return ! offsite_ && ! (exists (path_) && is_directory (path_)); }

bool directory::add_virtual (const ::std::string& virt, const ::std::string& path)
{   assert (! offsite_);
    if (is_present (virt)) return true;
    return content_.insert (directory::value_t (virt, self_ptr (new directory (path, this)))).second; }

void directory::examine ()
{   assert (! offsite_);
    for (auto i : content_)
        if (i.second != nullptr)
            i.second -> examine ();
        else if (is_webpage (i.first, context.extensions ()))
        {   ::std::ostringstream ss;
            path name (path_);
            name /= i.first;
            context.filename (name.string ());
            if (context.tell (e_severe)) ss << "*** " << context.filename () << "\n";
            try
            {   ::std::string content (read_text_file (context.filename ()));
                if (! content.empty ())
                {   myencoding_t encoding = bom_to_encoding (get_byte_order (content));
                    if (encoding == MyENCODING_LAST_ENTRY) ss << "Unsupported byte order (ASCII, ANSI, UTF-8 or UTF-16, please)\n";
                    else
                    {   page web (i.first, content, encoding);
                        if (! web.invalid ())
                        {   web.examine (*this, 0);
                            web.mf_write (ss, name);
                            ss << web.report (); } } } }
            catch (...)
            {  if (context.tell (e_error)) ss << "Cannot parse " << context.filename () << "\n"; }
            ::std::cout << ss.str (); } }

bool directory::unguarded_verify_url (const ::std::string& url, ::std::ostringstream& ss) const
{   if (url.empty ()) return true; // e.g. self
    if (url.at (0) == '#') return true; // e.g. id on self; should add code to check ids.
    ::std::string u (get_page_url (url));
    if (u.empty ())
    {   if (context.tell (e_warning)) ss << "duff URL " << url;
        return false; }
    size_t colon = u.find (':');
    if (colon != ::std::string::npos)
    {   if (u.length () < colon + 3)
        {   if (context.external ()) return verify_external (url, ss);
            return true; }
        ::std::string lhs = u.substr (0, colon);
        if (! compare_no_case (lhs, HTTP_PROTOCOL) && ! compare_no_case (lhs, HTTPS_PROTOCOL))
        {   if (context.external ()) return verify_external (url, ss);
            return true; }
        if (u.substr (colon + 1, 2) != "//")
        {   if (context.tell (e_warning)) ss << "malformed URL " << url;
            return false; }
        size_t eop = u.find ('/', colon + 3);
        if (eop == ::std::string::npos) eop = u.length ();
        if (! is_one_of (u.substr (colon + 3, eop - colon - 3), context.site ()))
        {   if (context.external ()) return verify_external (url, ss);
            return true; }
        if (eop == u.length ()) u = "/";
        else u = u.substr (eop); }
    if (u.at (u.length () - 1) == '/')
        u += context.index ();
    if (offsite_) return true;
    path p (context.base ());
    p /= u;
    if (is_present (p)) return true;
    if (context.tell (e_error) || context.links ()) ss << quote (url) << " not found";
    return false; }

bool directory::verify_url (const ::std::string& url, ::std::ostringstream& ss) const
{   if (! context.links ()) return true;
    if (context.checking_urls ()) return true;
    bool res = false;
    try
    {   context.checking_urls (true);
        res = unguarded_verify_url (url, ss);
        context.checking_urls (false); }
    catch (...)
    {   context.checking_urls (false);
        throw;  }
    return res; }

bool directory::verify_external (const ::std::string& url, ::std::ostringstream& ss) const
{   if (! context.external ()) return true;
    bool res = external_.verify (url, ss);
    if (res) return true;
    if (context.repeated () && context.once ()) return true;
    if (! context.tell (e_error)) return false;
    if (context.code () < 300) return true;
    switch (context.code ())
    {   case 301 :
        case 308 :
            if (context.tell (e_info)) ss << url << " has moved (" << context.code () << ")";
            break;
        case 400 :
            if (context.tell (e_warning)) ss << url << " is a malformed or bad link (" << context.code () << ")";
            break;
        case 401 :
            if (context.tell (e_warning)) ss << url << " is unauthorised (" << context.code () << ")";
            break;
        case 403 :
            if (context.tell (e_warning)) ss << url << " is forbidden (" << context.code () << ")";
            break;
        case 404 :
        case 410 :
            if (context.tell (e_error)) ss << url << " is a broken link (" << context.code () << ")";
            break;
        case 407 :
            if (context.tell (e_info)) ss << url << " is blocked by a proxy (" << context.code () << ")";
            break;
        case 408 :
            if (context.tell (e_comment)) ss << url << " has timed out (" << context.code () << ")";
            break;
        case 451 :
            if (context.tell (e_warning)) ss << url << " is censored (" << context.code () << ")";
            break;
        default :
            if (context.tell (e_warning)) ss << url << " is unavailable (" << context.code () << ")";
            break; }
    return false;}

bool add_virtual (directory& d, const ::std::string& assignment)
{   size_t len = assignment.length ();
    size_t sz = assignment.find ('=');
    if (sz == ::std::string::npos || sz < 1 || sz >= len - 1) return false;
    return d.add_virtual (assignment.substr (0, sz), assignment.substr (sz + 1)); }

bool is_webpage (const ::std::string& name, const vstr_t& extensions)
{   ::std::string ext (::boost::filesystem::path (name).extension ().string ());
    if (ext.empty ()) return false;
    return is_one_of (ext.substr (1), extensions); }
