/*
ssc (static site checker)
Copyright (c) 2020 Dylan Harris
https://dylanharris.org/

This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public Licence as published by
the Free Software Foundation, either version 3 of the Licence, or
(at your option) any later version.

This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public Licence for more details.

You should have received a copy of the GNU General Public
Licence along with this program; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/

#include "irt.h"
#include "version.h"
#include "context.h"
#include "common.h"
#include "external.h"
#include "page.h"
#include "directory.h"
#include "headers.h"
#include <boost/format.hpp>

#define REPLY "reply"

#define V "version"
#define LENGTH "length"

#define PAGE "page"
#define ID "id"
#define SERVER "server"
#define TARGET "target"
#define CONTENT "content"
#define WHEN "when"

bool reply::set_server (const ::std::string& link)
{   if (link.empty ()) return false;
    assert (! target_.empty ());
    if (link.find (COLON) != link.npos)
        server_ = link;
    else
    {   size_t  eod = target_.npos,
                css = target_.find (CSS);
        if (css != target_.npos)
        {   eod = target_.find (SLASH, css+3);
            if (eod == target_.npos) eod = target_.length (); }
        if (link [0] == SLASH)
        {   if (eod != target_.npos)
                server_ = target_.substr (0, eod);
            else
            {   vstr_t site (context.site ());
                if (site.size () == 0) return false;
                server_ = HTTP;
                server_ += site [0]; }
            server_ += link; }
        else
        {   size_t last = target_.length () - 1;
            bool slashed = target_ [last] == SLASH;
            if (eod != target_.npos)
                server_ = target_;
            else
            {   vstr_t site (context.site ());
                if (site.size () == 0) return false;
                server_ = HTTP;
                server_ += site [0];
                if (target_ [0] != SLASH)
                    server_ += SLASH;
                server_ += target_; }
            if (! slashed) server_ += SLASH;
            server_ += link; } }
    return true; }

bool reply::find_server (const int depth)
{   if (target_.empty ()) return false;
    bool ok = true;
    bool vrai = context.test_header ().empty ();
    ::boost::filesystem::path http_temp (context.test_header ());
    if (vrai)
    {   http_temp = get_tmp_filename ();
        ok = fetch_http (target_, http_temp, diagnosis_); }
    if (ok)
    {   ::std::string http (read_text_file (http_temp.string ()));
        headers h (http, diagnosis_);
        if (h.abusive_site () && context.tell (e_warning))
        {   diagnosis_ << "WARNING: " << target_ << " uses privacy piracy techniques\n"; }
        ::std::string link (h.link (WEBMENTION));
        if (set_server (link))
        {   if (context.tell (e_comment))
                diagnosis_ << "found " WEBMENTION " server " << server_ << " in headers for " << target_ << "\n";
            if (vrai) if (::boost::filesystem::exists (http_temp)) ::boost::filesystem::remove (http_temp);
            return true; } }
    if (vrai) if (::boost::filesystem::exists (http_temp)) ::boost::filesystem::remove (http_temp);
    ::boost::filesystem::path html_temp (get_tmp_filename ());
    ::std::string html;
    if (fetch (target_, html_temp, diagnosis_)) html = read_text_file (html_temp.string ());
    if (::boost::filesystem::exists (html_temp)) ::boost::filesystem::remove (html_temp);
    if (html.empty ()) return false;
    page p (target_, html);
    directory d (target_, true);
    p.examine (d, depth);
    ::std::string mention = p.find_webmention (depth);
    if (! set_server (mention)) return false;
    if (context.tell (e_comment))
        diagnosis_ << "found " WEBMENTION " server " << server_ << " for " << target_ << "\n";
    return true; }

void reply::mark ()
{   time_t ridiculous_1;
    time (&ridiculous_1);
    struct tm *ridiculous_2 = gmtime (&ridiculous_1);
    assert (ridiculous_2 != nullptr);
    ::boost::format ridiculous_3 ("%04d.%02d.%02d %02d:%02d:%02d");
    ridiculous_3 % (ridiculous_2 -> tm_year + 1900) % ridiculous_2 -> tm_mon % ridiculous_2 -> tm_mday % ridiculous_2 -> tm_hour % ridiculous_2 -> tm_min % ridiculous_2 -> tm_sec;
    when_ = ridiculous_3.str (); }

::std::string reply::clean (const ::std::string& s)
{   ::std::string res;
    bool whitespace = false;
    for (auto ch : s)
    {   if (ch > ' ') { res += ch; whitespace = false; }
        else if (! whitespace) { res += ' '; whitespace = true; } }
    return res; }

bool reply::operator == (const reply& rhs) const
{   return  (file_ == rhs.file_) &&
            (id_ == rhs.id_) &&
            (target_ == rhs.target_) &&
            (content_ == rhs.content_); }

bool reply::close_but_no_banana (const reply& rhs) const
{   return  (file_ == rhs.file_) &&
            (id_ == rhs.id_) &&
            (target_ == rhs.target_); }

void reply::read (::boost::property_tree::ptree& tree, const ::std::string& container)
{   file_ = read_field < ::std::string > (tree, container, PAGE);
    id_ = read_field < ::std::string > (tree, container, ID);
    server_ = read_field < ::std::string > (tree, container, SERVER);
    target_ = read_field < ::std::string > (tree, container, TARGET);
    content_ = read_field < ::std::string > (tree, container, CONTENT);
    when_ = read_field < ::std::string > (tree, container, WHEN); }

void reply::write (::boost::property_tree::ptree& tree, const ::std::string& container)
{   write_field < ::std::string > (tree, container, PAGE, file_);
    write_field < ::std::string > (tree, container, ID, id_);
    write_field < ::std::string > (tree, container, SERVER, server_);
    write_field < ::std::string > (tree, container, TARGET, target_);
    write_field < ::std::string > (tree, container, CONTENT, content_);
    write_field < ::std::string > (tree, container, WHEN, when_); }

void reply::mark_unchanged ()
 {  if (context.tell (e_info)) report ("Static");
    activity_ = act_static; }

void reply::mark_update ()
{   if (context.nochange () || context.tell (e_info)) report ("Update");
    activity_ = act_update; mark (); }

void reply::mark_insert ()
{   if (context.nochange () || context.tell (e_info)) report ("Insert");
    activity_ = act_insert; mark (); }

void reply::mark_delete ()
{   if (context.nochange () || context.tell (e_info)) report ("Delete");
    activity_ = act_delete; mark (); }

::std::string reply::report (const char* verb) const
{   ::std::ostringstream res;
    if (context.tell (e_all)) res << "*** " << verb << '\n' <<  file_ << '\n' << id_ << '\n' << server_ << '\n' << target_ << '\n' << content_ << '\n' << ::std::endl;
    return res.str (); }

::std::string reply::report (const size_t n) const
{   ::std::ostringstream res;
    if (context.tell (e_all)) res << n << ":" << file_ << ',' << id_ << ',' << server_ << ',' << target_ << ',' << content_ << ::std::endl;
    return res.str (); }

bool reply::enact (const int depth)
{   assert (activity_ != act_unknown);
    if (activity_ == act_static) return true;
    if (! find_server (depth)) return false;
    return mention (file_, target_, server_, diagnosis_); }

void replies::append (const ::std::string& file, const ::std::string& id, const ::std::string& target, const ::std::string& content)
{   reply_.push_back (reply (file, id, target, content)); }

bool replies::read (const ::std::string filename)
{   if (! ::boost::filesystem::exists (filename)) return true; // no data is mega insert
    ::boost::property_tree::ptree json;
    ::boost::property_tree::read_json (filename, json);
    if (json.empty ()) return false;
    ::std::string version = read_field < ::std::string > (json, PROG, V);
    if (version.substr (0, 3) != "0.0") return false;
    size_t max = json.get < size_t > (PROG SEP REPLY SEP LENGTH, 0);
    if (max == 0) return true;
    reply_.reserve (max);
    for (size_t n = 0; n < max; ++n)
    {   ::std::string count = PROG SEP REPLY SEP;
        count += ::boost::lexical_cast < ::std::string > (n);
        reply_.push_back (reply (json, count));
        if (reply_.back ().invalid ()) return false; }
    return true; }

bool replies::write ()
{   ::boost::property_tree::ptree json;
    ::boost::filesystem::path filename (context.persisted ());
    if (context.tell (e_detail)) report ("write");
    write_header (json, REPLY);
    write_field < size_t > (json, REPLY SEP LENGTH, reply_.size ());
    for (size_t n = 0; n < reply_.size (); ++n)
    {   ::std::string count = REPLY SEP;
        count += ::boost::lexical_cast < ::std::string > (n);
        reply_ [n].write (json, count); }
    return replace_file (json, filename); }

size_t replies::find (const reply& r)
{   for (size_t z = 0; z < reply_.size (); ++z)
        if (reply_ [z] == r) return z;
    return no_reply; }

size_t replies::probably_match (const reply& r)
{   for (size_t z = 0; z < reply_.size (); ++z)
        if (reply_ [z].close_but_no_banana (r)) return z;
    return no_reply; }

bool replies::update_records (::std::ostringstream& ss) // not efficient for any real quantities
{   bool res = false;
    size_t mmax = reply_.size ();
    size_t imax = context.get_replies ().reply_.size ();
    if (context.tell (e_debug)) ss << mmax << " previous replies found in " << context.persisted () << ", " << imax << " found in pages\n";
    context.get_replies ().report (WEBMENTION);
    if (context.tell (e_detail)) report ("persisted");
    for (size_t z = 0; z < mmax; ++z)
        reply_ [z].mark_unknown ();
    for (size_t z = 0; z < imax; ++z)
    {   size_t here = find (context.get_replies ().reply_ [z]);
        if (here != no_reply)
        {   if (reply_ [here].is_unknown ())
                reply_ [here].mark_unchanged (); }
        else
        {   here = probably_match (context.get_replies ().reply_ [z]);
            if (here != no_reply)
                reply_ [here].mark_update ();
            else
            {   reply_.push_back (context.get_replies ().reply_ [z]);
                reply_.back ().mark_insert (); }
            res = true; } }
    mmax = reply_.size ();
    for (size_t z = 0; z < mmax; ++z)
    {   if (reply_ [z].is_unknown ())
        {   res = true;
            reply_ [z].mark_delete (); }
        ss << reply_ [z].diagnosis (); }
    if (context.tell (e_detail)) report ("post update");
    return res; }

 bool replies::enact (const int depth) // not efficient for any real quantities
{   bool res = false;
    if (context.tell (e_detail)) report ("enact");
    size_t mmax = reply_.size ();
    for (size_t z = 0; z < mmax; ++z)
        if (! reply_ [z].enact (depth))
        {  if (reply_ [z].is_deleted ())
                reply_ [z].mark_unchanged (); }
        else res = true;
    if (res)
    {   vreply_t reply;
        for (size_t x = 0; x < mmax; ++x)
            if (! reply_ [x].is_deleted ())
                reply.push_back (reply_ [x]);
        reply.shrink_to_fit ();
        reply_.swap (reply); }
    return res; }

bool replies::process (::std::ostringstream& ss, const int depth)
{   if (! context.notify ()) return true;
    if (context.persisted ().empty ()) return true;
    replies persisted;
    if (! context.clear () && ! persisted.read (context.persisted ())) return false;
    if (! persisted.update_records (ss)) return true;
    if (context.nochange ()) return true;
    persisted.enact (depth); // whatever the webmentions, still update the file
    return persisted.write (); }

::std::string replies::report (const char* comment) const
{   ::std::ostringstream res;
    if (context.tell (e_info))
    {   if (comment != nullptr) res << comment << "\n";    // tell?
        for (size_t s = 0; s < reply_.size (); ++s)
           res << reply_ [s].report (s); }
    return res.str (); }
