/*
ssc (static site checker)
Copyright (c) 2020 Dylan Harris
https://dylanharris.org/

This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public Licence as published by
the Free Software Foundation, either version 3 of the Licence, or
(at your option) any later version.

This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public Licence for more details.

You should have received a copy of the GNU General Public
Licence along with this program; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/

#include "reldef.h"
#include "vocab.h"

// microformats: http://microformats.org/wiki/Main_Page
#define H_ADR "h-adr"
#define H_BREADCRUMB "h-breadcrumb"
#define H_BREADCRUMBS "h-breadcrumbs"
#define H_CARD "h-card"
#define H_CITE "h-cite"
#define H_ENTRY "h-entry"
#define H_EVENT "h-event"
#define H_FEED "h-feed"
#define H_GEO "h-geo"
#define H_ITEM "h-item"
#define H_LISTING "h-listing"
#define H_PRODUCT "h-product"
#define H_RECIPE "h-recipe"
#define H_RESUME "h-resume"
#define H_REVIEW "h-review"
#define H_AGGREGATE "h-review-aggregate"

// for those who habitually spell correctly (unlike me)
#define H_CV "h-cv"
#define H_ADDR "h-addr"
#define H_CALENDAR "h-calendar"


struct symbol_entry < e_vocabulary > vocab_symbol_table [] =
{   { H_ADR, h_adr },
    { H_BREADCRUMB, h_breadcrumb },
    { H_CARD, h_card },
    { H_CITE, h_cite },
    { H_ENTRY, h_entry },
    { H_EVENT, h_event },
    { H_FEED, h_feed },
    { H_GEO, h_geo },
    { H_ITEM, h_item },
    { H_LISTING, h_listing },
    { H_PRODUCT, h_product },
    { H_RECIPE, h_recipe },
    { H_RESUME, h_resume },
    { H_REVIEW, h_review },
    { H_AGGREGATE, h_aggregate },

    { R_ACQUAINTANCE, rel_acquaintance },
    { R_ALTERNATIVE, rel_alternative },
    { R_APPENDIX, rel_appendix },
    { R_AUTHOR, rel_author },
    { R_BOOKMARK, rel_bookmark },
    { R_BIBLIOGRAPHY, rel_bibliography },
    { R_CANONICAL, rel_canonical },
    { R_CHAPTER, rel_chapter },
    { R_CHILD, rel_child },
    { R_CITE, rel_cite },
    { R_CODE_LICENCE, rel_code_licence },
    { R_COLLEAGUE, rel_colleague },
    { R_CONTACT, rel_contact },
    { R_CONTENT_LICENCE, rel_content_licence },
    { R_CONTENTS, rel_contents },
    { R_COPYRIGHT, rel_copyright },
    { R_CORESIDENT, rel_coresident },
    { R_COWORKER, rel_coworker },
    { R_CRUSH, rel_crush },
    { R_DATE, rel_date },
    { R_DIRECTORY, rel_directory },
    { R_DISCUSSION, rel_discussion },
    { R_DNSPREFETCH, rel_dnsprefetch },
    { R_EXTERNAL, rel_external },
    { R_ENCLOSURE, rel_enclosure },
    { R_FRIEND, rel_friend },
    { R_GLOSSARY, rel_glossary },
    { R_GROUP, rel_group },
    { R_HELP, rel_help },
    { R_HOME, rel_home },
    { R_ICON, rel_icon },
    { R_IN_REPLY_TO, rel_in_reply_to },
    { R_INDEX, rel_index },
    { R_ISSUES, rel_issues },
    { R_ITSRULES, rel_its_rules },
    { R_KIN, rel_kin },
    { R_LICENCE, rel_licence },
    { R_MAP, rel_map },
    { R_ME, rel_me },
    { R_MEMBER, rel_member },
    { R_MET, rel_met },
    { R_MUSE, rel_muse },
    { R_MICROPUB, rel_micropub },
    { R_NEIGHBOUR, rel_neighbour },
    { R_NEXT, rel_next },
    { R_NOFOLLOW, rel_nofollow },
    { R_NOOPENER, rel_noopener },
    { R_NOREFERRER, rel_noreferrer },
    { R_PARENT, rel_parent },
    { R_PAYMENT, rel_payment },
    { R_PINGBACK, rel_pingback },
    { R_PRECONNECT, rel_preconnect },
    { R_PREFETCH, rel_prefetch },
    { R_PRELOAD, rel_preload },
    { R_PRERENDER, rel_prerender },
    { R_PREV, rel_prev },
    { R_PREVIOUS, rel_prev },
    { R_PRONUNCIATION, rel_pronunciation },
    { R_SECTION, rel_section },
    { R_SEARCH, rel_search },
    { R_SIBLING, rel_sibling },
    { R_SIDEBAR, rel_sidebar },
    { R_SPOUSE, rel_spouse },
    { R_START, rel_start },
    { R_STYLESHEET, rel_stylesheet },
    { R_SUPERCEDES, rel_supercedes },
    { R_SUBSECTION, rel_subsection },
    { R_SWEETHEART, rel_sweetheart },
    { R_TAG, rel_tag },
    { R_TOC, rel_toc },
    { R_TOP, rel_top },
    { R_TRANSFORMATION, rel_transformation },
    { R_VCS_GIT, rel_vcs_git },
    { R_VCS_SVN, rel_vcs_svn },
    { R_WEBMENTION, rel_webmention },

   { R_BANNER, rel_banner },
   { R_BEGIN, rel_begin },
   { R_BIBLIOENTRY, rel_biblioentry },
   { R_CITATION, rel_citation },
   { R_DEFINITION, rel_definition },
   { R_DISCLAIMER, rel_disclaimer },
   { R_EDITOR, rel_editor },
   { R_END, rel_end },
   { R_NAVIGATE, rel_navigate },
   { R_ORIGIN, rel_origin },
   { R_POINTER, rel_pointer },
   { R_TRADEMARK, rel_trademark },
   { R_TRANSLATION, rel_translation },
   { R_URC, rel_urc },

   { R_ABOUT, rel_about },
   { R_ARCHIVES, rel_archives },
   { R_BLOCKED_BY, rel_blocked_by },
   { R_CITE_AS, rel_cite_as },
   { R_COLLECTION, rel_collection },
   { R_CONVERTED_FROM, rel_convertedfrom },
   { R_CREATE_FORM, rel_create_form },
   { R_CURRENT, rel_current },
   { R_DESCRIBED_BY, rel_describedby },
   { R_DESCRIBES, rel_describes },
   { R_DUPLICATE, rel_duplicate },
   { R_EDIT, rel_edit },
   { R_EDIT_FORM, rel_edit_form },
   { R_EDIT_MEDIA, rel_edit_media },
   { R_FIRST, rel_first },
   { R_HOSTS, rel_hosts },
   { R_HUB, rel_hub },
   { R_INTERVAL_AFTER, rel_intervalafter },
   { R_INTERVAL_BEFORE, rel_intervalbefore },
   { R_INTERVAL_CONTAINS, rel_intervalcontains },
   { R_INTERVAL_DISJOINT, rel_intervaldisjoint },
   { R_INTERVAL_DURING, rel_intervalduring },
   { R_INTERVAL_EQUALS, rel_intervalequals },
   { R_INTERVAL_FINISHED_BY, rel_intervalfinishedby },
   { R_INTERVAL_FINISHES, rel_intervalfinishes },
   { R_INTERVAL_IN, rel_intervalin },
   { R_INTERVAL_MEETS, rel_intervalmeets },
   { R_INTERVAL_MET_BY, rel_intervalmetby },
   { R_INTERVAL_OVERLAPPED_BY, rel_intervaloverlappedby },
   { R_INTERVAL_OVERLAPS, rel_intervaloverlaps },
   { R_INTERVAL_STARTED_BY, rel_intervalstartedby },
   { R_INTERVAL_STARTS, rel_intervalstarts },
   { R_ITEM, rel_item },
   { R_LAST, rel_last },
   { R_LATEST_VERSION, rel_latest_version },
   { R_LRDD, rel_lrdd },
   { R_MEMENTO, rel_memento },
   { R_MONITOR, rel_monitor },
   { R_MONITOR_GROUP, rel_monitor_group },
   { R_NEXT_ARCHIVE, rel_next_archive },
   { R_ORIGINAL, rel_original },
   { R_PREDECESSOR_VERSION, rel_predecessor_version },
   { R_PREV_ARCHIVE, rel_prev_archive },
   { R_PRIVACY_POLICY, rel_privacy_policy },
   { R_PROFILE, rel_profile },
   { R_RELATED, rel_related },
   { R_RESTCONF, rel_restconf },
   { R_REPLIES, rel_replies },
   { R_SELF, rel_self },
   { R_SERVICE, rel_service },
   { R_SERVICE_DESC, rel_service_desc },
   { R_SERVICE_DOC, rel_service_doc },
   { R_SERVICE_META, rel_service_meta },
   { R_STATUS, rel_status },
   { R_SUCCESSOR_VERSION, rel_successor_version },
   { R_SUNSET, rel_sunset },
   { R_TERMS_OF_SERVICE, rel_terms_of_service },
   { R_TIMEGATE, rel_timegate },
   { R_TIMEMAP, rel_timemap },
   { R_TYPE, rel_type },
   { R_UP, rel_up },
   { R_VERSION_HISTORY, rel_version_history },
   { R_VIA, rel_via },
   { R_WORKING_COPY, rel_working_copy },
   { R_WORKING_COPY_OF, rel_working_copy_of },

   { R_AMPHTML, rel_amphtml },
   { R_APPLE_TOUCH_ICON, rel_apple_touch_icon },
   { R_APPLE_TOUCH_ICON_PRECOMPOSED, rel_apple_touch_icon_precomposed },
   { R_APPLE_TOUCH_STARTUP_IMAGE, rel_apple_touch_startup_image },
   { R_ARCHIVED, rel_archived },
   { R_ATTACHMENT, rel_attachment },
   { R_AUTHORISATION_ENDPOINT, rel_authorisation_endpoint },
   { R_CATEGORY, rel_category },
   { R_CODE_REPOSITORY, rel_code_repository },
   { R_COMPONENT, rel_component },
   { R_CHROME_WEBSTORE_ITEM, rel_chrome_webstore_item },
   { R_CONTENT_REPOSITORY, rel_content_repository },
   { R_DCTERMS_CONFORMS_TO, rel_dcterms_conformsto },
   { R_DCTERMS_CONTRIBUTOR, rel_dcterms_contributor },
   { R_DCTERMS_CREATOR, rel_dcterms_creator },
   { R_DCTERMS_DESCRIPTION, rel_dcterms_description },
   { R_DCTERMS_HAS_FORMAT, rel_dcterms_hasformat },
   { R_DCTERMS_HAS_PART, rel_dcterms_haspart },
   { R_DCTERMS_HAS_VERSION, rel_dcterms_hasversion },
   { R_DCTERMS_IS_FORMAT_OF, rel_dcterms_isformatof },
   { R_DCTERMS_IS_PART_OF, rel_dcterms_ispartof },
   { R_DCTERMS_IS_REFERENCED_BY, rel_dcterms_isreferencedby },
   { R_DCTERMS_IS_REPLACED_BY, rel_dcterms_isreplacedby },
   { R_DCTERMS_IS_REQUIRED_BY, rel_dcterms_isrequiredby },
   { R_DCTERMS_IS_VERSION_OF, rel_dcterms_isversionof },
   { R_DCTERMS_LICENCE, rel_dcterms_licence },
   { R_DCTERMS_MEDIATOR, rel_dcterms_mediator },
   { R_DCTERMS_PUBLISHER, rel_dcterms_publisher },
   { R_DCTERMS_REFERENCES, rel_dcterms_references },
   { R_DCTERMS_RELATION, rel_dcterms_relation },
   { R_DCTERMS_REPLACES, rel_dcterms_replaces },
   { R_DCTERMS_REQUIRES, rel_dcterms_requires },
   { R_DCTERMS_RIGHTS_HOLDER, rel_dcterms_rightsholder },
   { R_DCTERMS_SOURCE, rel_dcterms_source },
   { R_DCTERMS_SUBJECT, rel_dcterms_subject },
   { R_DISCLOSURE, rel_disclosure },
   { R_EDIT_URI, rel_edituri },
   { R_ENTRY_CONTENT, rel_entry_content },
   { R_GBFS, rel_gbfs },
   { R_GTFS_STATIC, rel_gtfs_static },
   { R_GTFS_REALTIME, rel_gtfs_realtime },
   { R_IMAGE_SRC, rel_image_src },
   { R_IMPORT, rel_import },
   { R_JSLICENCE, rel_jslicence },
   { R_LIGHTBOX, rel_lightbox },
   { R_LIGHTVIDEO, rel_lightvideo },
   { R_MANIFEST, rel_manifest },
   { R_MASKICON, rel_maskicon },
   { R_META, rel_meta },
   { R_OPENID_DELEGATE, rel_openid_delegate },
   { R_OPENID_SERVER, rel_openid_server },
   { R_OPENID2_LOCAL_ID, rel_openid2_local_id },
   { R_OPENID2_PROVIDER, rel_openid2_provider },
   { R_P3PV1, rel_p3pv1 },
   { R_PGPKEY, rel_pgpkey },
   { R_PUBLISHER, rel_publisher },
   { R_RADIOEPG, rel_radioepg },
   { R_RENDITION, rel_rendition },
   { R_ROOT, rel_root },
   { R_REPLY_TO, rel_reply_to },
   { R_SCHEMA_DCTERMS, rel_schema_dcterms },
   { R_SHORTLINK, rel_shortlink },
   { R_SITEMAP, rel_sitemap },
   { R_STYLESHEET_LESS, rel_stylesheetless },
   { R_SUBRESOURCE, rel_subresource },
   { R_SWORD, rel_sword },
   { R_SYNDICATION, rel_syndication },
   { R_TIMESHEET, rel_timesheet },
   { R_TOKEN_ENDPOINT, rel_token_endpoint },
   { R_WIDGET, rel_widget },
   { R_WLWMANIFEST, rel_wlwmanifest },
   { R_YANDEX_TABLEAU_WIDGET, rel_yandex_tableau_widget },
   { R_CMIS, rel_docs_oasis_open_org_ns_cmis_link_200908_acl },

   { R_ACCESSIBILITY, rel_accessibility },
   { R_LONGDESC, rel_longdesc },
   { R_M_PAGESCROLL2ID, rel_m_pagescroll2id },
   { R_SOURCE, rel_source },
   { R_VCALENDAR_PARENT, rel_vcalendar_parent },
   { R_VCALENDAR_CHILD, rel_vcalendar_child },
   { R_VCALENDAR_SIBLING, rel_vcalendar_sibling },
   { R_WORDPRESS_API, rel_w_api_org },

   { R_COMMENT, rel_comment },
   { R_CONTRIBUTION, rel_contribution },
   { R_ENDORSED, rel_endorsed },
   { R_FAN, rel_fan },
   { R_FEED, rel_feed },
   { R_FOOTNOTE, rel_footnote },
   { R_KINETIC_STYLESHEET, rel_kinetic_stylesheet },
   { R_MADE, rel_made },
   { R_MICROSUMMARY, rel_microsummary },
   { R_PERMALINK, rel_permalink },
   { R_POPOVER, rel_popover },
   { R_PRIVACY, rel_privacy },
   { R_PUBLICKEY, rel_publickey },
   { R_REFERRAL, rel_referral },
   { R_RESPOND_PROXY, rel_respond_proxy },
   { R_RESPOND_REDIRECT, rel_respond_redirect },
   { R_RESOURCE, rel_resource },
   { R_SPONSOR, rel_sponsor },
   { R_TOOLTIP, rel_tooltip },
   { R_TRACKBACK, rel_trackback },
   { R_UNENDORSED, rel_unendorsed },
   { R_USER, rel_user },

   { R_SCHEMA_DC, rel_schema_dc },

   { R_LOGO, rel_logo },
   { R_PAVATAR, rel_pavatar }
};

void vocab::init ()
{   symbol::init (vocab_symbol_table, sizeof (vocab_symbol_table) / sizeof (symbol_entry < e_vocabulary >)); }

e_vocabulary vocab::parse (const ::std::string& x, ::std::string& diagnose)
{   e_vocabulary v = v_unknown;
    ::std::string lc (::boost::algorithm::to_lower_copy (x));
    if (symbol::find (lc, v)) return v;
    if (context.tell (e_warning)) // integrate into diagnostics
        if (lc == H_CV) diagnose = "it's " H_RESUME " (without accents), not " H_CV; else
        if (lc == H_ADDR) diagnose = "address abbreviated correctly"; else
        if (lc == H_BREADCRUMBS) diagnose = "it's not " H_BREADCRUMBS ", it's " H_BREADCRUMB " embedded"; else
        if (lc == H_CALENDAR) diagnose = "it's " H_EVENT ", not " H_CALENDAR; else
        if (lc == ALTERNATIVE) diagnose = ALTERNATIVE " spelt correctly"; else
        if (lc == CODE_LICENCE) diagnose = "licence in " CODE_LICENCE " spelt correctly"; else
        if (lc == CONTENT_LICENCE) diagnose = "licence in " CONTENT_LICENCE " spelt correctly"; else
        if (lc == CC_LICENCE) diagnose = "licence in " CC_LICENCE " spelt correctly"; else
        if (lc == COWORKER) diagnose = COWORKER " spelt correctly"; else
        if (lc == LICENCE) diagnose = LICENCE " spelt correctly"; else
        if (lc == SUPERSEDES) diagnose = SUPERSEDES " spelt correctly (just don't use it)"; else
        if (lc == DCTERMS_LICENCE) diagnose = "licence in " DCTERMS_LICENCE " spelt correctly"; else
        if (lc == NEIGHBOUR) diagnose = NEIGHBOUR " spelt correctly";
    return v_unknown; }

bool verify_rel (const ::std::string& s, ::std::string& diagnosis)
{   if (s.empty ()) return false;
    vstr_t rels (split_by_space (s));
    for (auto rel : rels)
    {   e_vocabulary v = vocab::parse (rel, diagnosis);
        if (v == v_unknown) return false; }
    return true; }
