#!/usr/bin/env python

import argparse
import codecs
import dateparser
import glob
import logging
import multiprocessing
import os
import pytz
import re
import shutil
import sys
import time
from datetime import UTC, datetime
from bs4 import BeautifulSoup
from jinja2 import Environment, FileSystemLoader
from markupsafe import Markup
from pathlib import Path
from slugify import slugify

import markdown
import orjson
import pypandoc
import sass

from btbytes.gitstuff import get_commits, get_file_history
from btbytes.jfilters import mydateformat

utc = pytz.UTC

DOMAIN = "https://www.btbytes.com"
BUILD = os.path.realpath(__file__)
BASE = os.path.dirname(os.path.realpath(__file__))
DEST = os.path.join(BASE, "output")
SRC = os.path.join(BASE, "content")
TDIR = "_layouts"
DB  = "articles.db"

whitelisted_dirs = [
    "",
    "basscss",
    "book",
    "certifications",
    "code",
    "copywork",
    "docs",
    "hireme",
    "kannada",
    "learning",
    "links",
    "log",
    "log/**",
    "misc",
    "note",
    "posts",
    "projects",
    "readingnotes",
    "stopwatch",
    "til",
    "weekly",
    "farming",
    "pycon2010",
    "2025/**",
]


loader = FileSystemLoader(searchpath=SRC)
env = Environment(loader=loader)
env.filters["mydateformat"] = mydateformat


# Create the file handler
file_handler = logging.FileHandler("sitebuild.log")
file_handler.setLevel(logging.WARNING)

# Create the console handler
console_handler = logging.StreamHandler(sys.stdout)
console_handler.setLevel(logging.INFO)

# Create formatters and add them to the handlers
file_formatter = logging.Formatter(
    "%(asctime)s - %(name)s - %(levelname)s - %(message)s"
)
console_formatter = logging.Formatter("%(name)s - %(levelname)s - %(message)s")
file_handler.setFormatter(file_formatter)
console_handler.setFormatter(console_formatter)

# Get the root logger and add the handlers
logger = logging.getLogger()
logger.setLevel(logging.INFO)  # Ensure everything is captured at root level
logger.addHandler(file_handler)
logger.addHandler(console_handler)

logging.getLogger("pypandoc").addHandler(logging.NullHandler())


def sort_by_tag_count(value):
    return sorted(value, key=lambda x: len(x))


env.filters["sort_by_tag_count"] = sort_by_tag_count


import sqlite3
import json


def create_tables(conn):
    """Creates the necessary tables in the SQLite database."""
    cursor = conn.cursor()

    cursor.execute(
        """
    CREATE TABLE IF NOT EXISTS articles (
        id INTEGER PRIMARY KEY AUTOINCREMENT,
        title TEXT,
        src TEXT UNIQUE,
        url TEXT,
        created TEXT,
        updated TEXT,
        kind TEXT
    );
    """
    )

    cursor.execute(
        """
    CREATE TABLE IF NOT EXISTS keywords (
        id INTEGER PRIMARY KEY AUTOINCREMENT,
        keyword TEXT UNIQUE
    );
    """
    )

    cursor.execute(
        """
    CREATE TABLE IF NOT EXISTS article_keywords (
        article_id INTEGER,
        keyword_id INTEGER,
        PRIMARY KEY (article_id, keyword_id),
        FOREIGN KEY (article_id) REFERENCES articles(id),
        FOREIGN KEY (keyword_id) REFERENCES keywords(id)
    );
    """
    )

    conn.commit()


def insert_data(conn, data):
    """Inserts the JSON data into the SQLite tables."""
    cursor = conn.cursor()
    count = 0
    notitle = []
    for article in data:
        src = article["path"]
        url = article["url"]
        print(f"insert_data/src: {src}")
        title = article.get("title", "")
        created = article.get("date", "")
        updated = article.get("last-modified", "")
        kind = article.get("kind", "")
        keywords = article.get("keywords", [])
        cursor.execute(
            """
        INSERT OR IGNORE INTO articles (title, src, url, created, updated,  kind)
        VALUES (?, ?, ?, ?, ?, ?)
        """,
            (
                title,
                src,
                url,
                created,
                updated,
                kind,
            ),
        )
        count += 1
        article_id = cursor.lastrowid
        if article_id == 0:
            cursor.execute("SELECT id FROM articles WHERE src = ?", src)
            article_id = cursor.fetchone()[0]

        for keyword in keywords:
            cursor.execute(
                "INSERT OR IGNORE INTO keywords (keyword) VALUES (?)", (keyword,)
            )
            cursor.execute("SELECT id FROM keywords WHERE keyword = ?", (keyword,))
            keyword_id = cursor.fetchone()[0]

            cursor.execute(
                "INSERT OR IGNORE INTO article_keywords (article_id, keyword_id) VALUES (?, ?)",
                (article_id, keyword_id),
            )

    conn.commit()
    print(f"Inserted {count} articles")


def load_json_to_db():
    conn = sqlite3.connect(DB)
    create_tables(conn)
    print("created tables")
    with open(os.path.join(DEST, "allmeta.json"), "r") as f:
        data = json.load(f)
    insert_data(conn, data)
    print("inserted json to sqlite")
    conn.close()


def insert_allmeta_to_db(allmeta):
    Path(DB).unlink(missing_ok=True)
    print(f"in insert_allmeta_to_db....")
    conn = sqlite3.connect(DB)
    create_tables(conn)
    print("created tables")
    insert_data(conn, allmeta)
    print("inserted json to sqlite")
    conn.close()

def mdict(f, s):
    """
    $name: $content -- date, last-modified,
    path, url, title,keywords, date
    """
    m = s.find_all("meta")
    d = {}
    d["path"] = f
    d["url"] = urlfor(f)
    ts = s.find("title")
    if ts:
        d["title"] = ts.text.strip()
    h1s = s.find_all("h1")
    if len(h1s) > 0:
        h1 = h1s[0]
        d["h1"] = h1.text
    for x in m:
        k = x.attrs.get("http-equiv")
        k = k or x.attrs.get("name")
        v = x.attrs.get("content")
        d[k] = v
    for k in [None, "viewport"]:
        if k in d:
            d.pop(k)
    if "keywords" in d.keys():
        d["keywords"] = [k.strip() for k in d["keywords"].split(",")]
    if "date" in d.keys():
        try:
            ddate = dateparser.parse(d["date"])
            ddate = ddate.replace(tzinfo=utc)
            d["date"] = ddate.strftime("%Y-%m-%dT%H:%M:%S")
            # d['date'] = dateparser.parse(d['date'])
            # d['date'] = d['date'].replace(tzinfo=utc)
        except Exception as e:
            logger.warning("unable to process date in: %s - %s - %s" % (f, d.keys(), e))
    return d


def build(target, sources):
    """
    if the `target` file's modification time is newer than
    any of the `sources` one's then return true
    """
    if not os.path.exists(target):
        return True
    tmtime = os.path.getmtime(target)
    stimes = [os.path.getmtime(s) for s in sources]
    return any([tmtime < s for s in stimes])


def render_pandoc(content, md):
    filters = ["pandoc/filters.lua"]
    pdoc_args = [
        # "--standalone",
        "--citeproc",  # render citations
        "--bibliography",
        "content/bib/nbrefs.bib",
        "--csl",
        "content/csl/jcsl.csl",
    ]
    bib = md.Meta.get("bib", None)
    csl = md.Meta.get("csl", None)
    number_sections = md.Meta.get("number_sections", False)
    toc = md.Meta.get("toca", False)
    if bib and not csl:  # default style file
        csl = ["csl/jcsl.csl"]
    if number_sections:
        pdoc_args += ["--number-sections"]
    if toc:
        pdoc_args += ["--toc"]
    try:
        output = pypandoc.convert_text(
            content, "html", format="md", filters=filters, extra_args=pdoc_args
        )
        return output
    except Exception as e:
        logger.warning("render_pandoc_exc: %s -- %s" % (md.Meta, e))
        sys.exit(0)


def render_md(html, md):
    env.filters["markdown"] = lambda text: Markup(md.convert(text))
    if md.Meta.get("layout"):
        tfile = os.path.join(TDIR, md.Meta.get("layout"))
    elif md.Meta.get("kind") == "log":
        tfile = os.path.join(TDIR, "notebook.htm")
    elif md.Meta.get("kind") == "post":
        tfile = os.path.join(TDIR, "post.html")
    elif md.Meta.get("kind") == "til":
        tfile = os.path.join(TDIR, "til.html")
    elif md.Meta.get("kind") == "link":
        tfile = os.path.join(TDIR, "link.html")
    elif md.Meta.get("kind") == "note":
        tfile = os.path.join(TDIR, "semantic.html")
    elif md.Meta.get("kind") == "readingnote":
        tfile = os.path.join(TDIR, "readingnote.htm")
    else:
        tfile = os.path.join(TDIR, "notebook.htm")
    template = env.get_template(tfile)
    pagemeta = {"body": html, "meta": md.Meta, "now": datetime.now(UTC)}
    content = template.render(**pagemeta)
    return content


def render_metadata(md, uri):
    d = md.Meta
    d["uri"] = uri
    return d


def make_directory(outfile):
    a, b = os.path.split(outfile)
    outdir = os.path.join(a)
    os.makedirs(outdir, exist_ok=True)


def minify_content(content):
    soup = BeautifulSoup(content, "lxml")  # html.parser
    return str(soup)


def render_to_html(infile, outfile, html, md):
    if md.Meta.get("draft"):
        return
    uri = f"/{outfile[outfile.find('output')+7:]}"
    url = f"{DOMAIN}{uri}"
    md.Meta["url"] = url
    content = render_md(html, md)
    content = minify_content(content)
    make_directory(outfile)
    with codecs.open(outfile, "w", "utf-8") as f:
        logger.debug(f"generating: {url}")
        f.write(content)

    # # render the metadata to a json file
    # jsonout = outfile[:-5] + '.metadata.json'
    # with open(jsonout, "wb") as f:
    #     f.write(
    #     orjson.dumps(
    #         render_metadata(md, uri),
    #         option=orjson.OPT_NAIVE_UTC
    #         | orjson.OPT_INDENT_2
    #         | orjson.OPT_APPEND_NEWLINE,
    #     ))


def convert_scss_to_css(srcfile, outfile):
    make_directory(outfile)
    with open(outfile, "w") as f:
        f.write(sass.compile(filename=srcfile, output_style="compressed"))


def link_to_blocks(text):
    """
    Replace ^abcdef (hexadecimal number of length 6 characters)
    with <a id="abcdef" class="block">#</a>
    """
    pattern = r"(\^[0-9a-fA-F]{6})"
    replaced_text = re.sub(
        pattern,
        lambda match: f'<span id="{match.group(1)}" class="block">#</span>',
        text,
    )
    return replaced_text


def replace_link_to_blocks(text):
    """
    Replace a wikilink like [[webdesign#^cb2493]] with a link to the block
    like this: [webdesign](/webdesign.html#cb2493)
    """

    def replace_link(match):
        parts = match.group(1).split("#")
        if len(parts) == 1:
            return f"[{parts[0]}](/{parts[0].replace(' ', '-')}.html)"
        else:
            return f"[{parts[0]}](/{parts[0].replace(' ', '-')}.html#{parts[1]})"

    pattern = r"\[\[(.*?)\]\]"
    replaced_text = re.sub(pattern, replace_link, text)
    return replaced_text


def replace_links(text):
    # Define the regex pattern to match [[Text]]
    pattern = r"\[\[(.*?)\]\]"

    # Replace occurrences of the pattern with the desired format
    replaced_text = re.sub(
        pattern,
        lambda match: "["
        + match.group(1)
        + "](/"
        + match.group(1).replace(" ", "-")
        + ".html){.wikilink}",
        text,
    )

    return replaced_text


def preprocess(mdcontent):
    # mdcontent = link_to_blocks(mdcontent)
    mdcontent = replace_links(mdcontent)
    return mdcontent


def extract_metadata_md(mdfile):
    """
    This is only used for extracting metadata & not for markdown processing.
    Markdown parsing is handle by Pandoc.
    """
    fpath = mdfile.replace(BASE + "/", "")
    with open(mdfile, "r") as f:
        md = markdown.Markdown(extensions=["full_yaml_metadata"])
        content = f.read()
        html = md.convert(
            content
        )  # this is required to trigger the metadata extraction
        content = preprocess(content)
        if not md.Meta:
            logger.warning(
                f"{'*' * 10 }MetaError. File=({mdfile}) -> md.Meta=({md.Meta})"
            )
            md.Meta = dict()
            md.Meta["title"] = fpath.split("/")[-1].replace(".md", "")
        md.Meta["generator"] = "pandoc"
        html = render_pandoc(content, md)
        created, modified = get_file_history(BASE, fpath)
        md.Meta["created"] = (
            md.Meta.get("date", None)
            or created
            or datetime.fromtimestamp(os.path.getctime(mdfile))
        )
        md.Meta["modified"] = (
            md.Meta.get("updated", None)
            or modified
            or datetime.fromtimestamp(os.path.getmtime(mdfile))
        )
        return html, md


def get_target_path(mdfile, a=".md", b=".html"):
    basename = os.path.basename(mdfile)
    justfile = basename.split(".")[0]
    target = mdfile.replace(SRC, DEST).replace(a, b)
    slug = slugify(justfile, lowercase=False)
    target = target.replace(justfile, slug)
    return target


def urlfor(fpath):
    return fpath.replace(DEST, "")


def extract_html_metadata(infile):
    with open(infile) as f:
        return BeautifulSoup(f, features="html.parser")


def get_git_messages(gitlog, lead):
    """Get interesting git messages"""

    commits = []
    # combined = {}
    for c in gitlog:
        if lead in c["title"]:
            c["title"] = c["title"].replace(lead, "")
            c["date"] = dateparser.parse(c["date"]).strftime("%Y-%m-%dT%H:%M:%S")
            commits.append(c)
    for c in commits:
        data = [{"title": c["title"], "message": c["message"]}]
        if c["date"] in combined.keys():
            combined["date"] = c["date"].extend(data)
        else:
            c["date"] = data
    return commits


def get_all_markdowns():
    mds = [
        f for d in whitelisted_dirs for f in glob.glob(os.path.join(SRC, d, "*.md"), recursive=True)
    ]
    return mds

def get_all_htmls():
    htmls = [
        f for d in whitelisted_dirs for f in glob.glob(os.path.join(SRC, d, "*.html"), recursive=True)
    ]
    return htmls

def process_html_files(html_files):
    # extract metadata from html files like:
    # url
    # title
    # created
    # modified
    # canonical <link rel="canonical" href="https://www.example.com/page.php">
    for html_file in html_files:
        print("Processing HTML file:", html_file)
        # with open(html_file, 'r') as f:
        #     soup = BeautifulSoup(f.read(), 'html.parser')
        #     url = soup.find('meta', attrs={'property': 'og:url'})['content']
        #     title = soup.find('title').text
        #     created = soup.find('meta', attrs={'name': 'date'})['content']
        #     modified = soup.find('meta', attrs={'name': 'last-modified'})['content']
        #     metadata = {'url': url, 'title': title, 'created': created, 'modified': modified}
        #     # TODO: store the metadata into a document store.
        #     # db.insert({'filename': html_file, 'metadata': metadata})
        #     print(metadata)


def process_markdown_file(filename):
    target = get_target_path(filename)
    if build(target, [filename]):
        html, md = extract_metadata_md(filename)
        # TODO: store the html and md into a document store.
        # db.insert({'filename': filename, 'html': html, 'metadata': md.Meta})
        render_to_html(filename, target, html, md)


def process_markdown_files(markdowns):
    logger.info("processing %s markdown files" % (len(markdowns)))
    pool = multiprocessing.Pool(processes=4)
    pool.starmap(process_markdown_file, zip(markdowns))
    pool.close()
    pool.join()


def process_less_files():
    """convert .scss files to .css"""
    logger.info("processing less files")
    scssfiles = glob.glob(os.path.join(SRC, "css", "*.scss"))
    for f in scssfiles:
        target = get_target_path(f, ".scss", ".css")
        convert_scss_to_css(f, target)


def remove_filepaths_by_prefix(allfiles, exclusion_prefixes):
    """Removes file paths from a list if they start with any of the provided prefixes.

    Args:
        alllists: A list of file paths.
        exclusion_prefixes: A list of prefixes to exclude.

    Returns:
        A new list with the excluded file paths removed.
    """

    filtered_list = [
        filepath
        for filepath in allfiles
        if not any(filepath.startswith(prefix) for prefix in exclusion_prefixes)
    ]
    return filtered_list


exclusion_prefixes = [
    ".obsidian",
    "_layouts",
    "_obstemplates",
    "_scripts",
    "_templater",
]


def copy_the_rest(markdowns):
    """Copy the rest"""
    logger.info("copy the rest")
    allfiles = glob.glob(SRC + "/**", recursive=True)
    allfiles = remove_filepaths_by_prefix(allfiles, exclusion_prefixes)
    rest = set(allfiles) - set(markdowns)

    for r in rest:
        if os.path.isdir(r):
            continue
        target = r.replace(SRC, DEST)
        h, t = os.path.split(target)
        tdir = os.path.join(h)
        if not os.path.isdir(tdir):
            os.makedirs(tdir, exist_ok=True)
        if build(target, [BUILD, r]):
            shutil.copyfile(r, target)


def extract_article_body(soup):
    article = soup.find("article")
    return article


def generate_sitemeta():
    logger.debug("sitemeta/start   %s %s" % (time.strftime("%H:%M:%S"), time.time()))
    af = glob.glob(DEST + "/**/*.html", recursive=True)
    af = [a for a in af if "tweetstorms" not in a]
    af = [a for a in af if "whyd" not in a]
    af = [a for a in af if "/files/" not in a]
    logger.debug("sitemeta/extrct0 %s %s" % (time.strftime("%H:%M:%S"), time.time()))
    allmeta = [(a, extract_html_metadata(a)) for a in af]
    logger.info("sitemeta/extrct1 %s %s" % (time.strftime("%H:%M:%S"), time.time()))
    allmeta = [mdict(fa, s) for fa, s in allmeta]
    insert_allmeta_to_db(allmeta)
    logger.info("sitemeta/allmeta %s %s" % (time.strftime("%H:%M:%S"), time.time()))
    posts = [
        d
        for d in allmeta
        if d.get("kind") == "post"
        and d.get("date")
        and (
            "show_in_archive" in d.get("keywords", [])
            or "weekly0" not in d.get("keywords", [])
        )
    ]
    sorted_posts = sorted(posts, key=lambda kv: kv.get("date"), reverse=True)
    logger.info(f"sorted_posts: {len(sorted_posts)}")
    for sp in sorted_posts[:10]:
        path = sp["path"]
        body = extract_article_body(extract_html_metadata(path))
        sp["body"] = str(body)

    with open(os.path.join(DEST, "sorted_posts.json"), "wb") as f:
        f.write(orjson.dumps(sorted_posts))

    journals = [d for d in allmeta if (d.get("kind") == "journal" and d.get("date"))]
    links = [d for d in allmeta if (d.get("kind") == "link" and d.get("date"))]
    sorted_journals = sorted(journals, key=lambda kv: kv.get("date"), reverse=True)
    notebooks = [d for d in allmeta if d.get("kind") == "notebook"]
    sorted_notebooks = sorted(
        notebooks, key=lambda kv: kv.get("last-modified", kv.get("date")), reverse=True
    )
    tils = [d for d in allmeta if d.get("kind") == "til"]
    sorted_tils = sorted(tils, key=lambda kv: kv.get("date"), reverse=True)
    sorted_index = sorted(
        journals + posts + links + tils, key=lambda kv: kv.get("date"), reverse=True
    )
    logger.debug(
        "generate_sitemeta/sorted  %s %s" % (time.strftime("%H:%M:%S"), time.time())
    )
    anything = [
        d
        for d in allmeta
        if d.get("kind") in ["post", "link", "log", "note", "notebook", "til"]
        and d.get("date")
    ]
    sorted_any = sorted(
        anything, key=lambda kv: kv.get("last-modified", kv.get("date")), reverse=True
    )
    tags = {}
    for d in allmeta:
        kws = d.get("keywords")
        if kws:
            for t in kws:
                if t in tags:
                    tags[t].append(d)
                else:
                    tags[t] = [d]
    logger.info(
        "generate_sitemeta/tags    %s %s" % (time.strftime("%H:%M:%S"), time.time())
    )
    with open(os.path.join(DEST, "tags.json"), "wb") as tagsf:
        tagsf.write(
            orjson.dumps(
                tags,
                option=orjson.OPT_NAIVE_UTC
                | orjson.OPT_INDENT_2
                | orjson.OPT_APPEND_NEWLINE,
            )
        )
    with open(os.path.join(DEST, "allmeta.json"), "wb") as allmetaf:
        allmetaf.write(
            orjson.dumps(
                allmeta,
                option=orjson.OPT_NAIVE_UTC
                | orjson.OPT_INDENT_2
                | orjson.OPT_APPEND_NEWLINE,
            )
        )
    allurls = [d.get("url") for d in allmeta if d.get("url")]
    with open(os.path.join(DEST, "urls.json"), "wb") as allurlsf:
        allurlsf.write(
            orjson.dumps(
                allurls,
                option=orjson.OPT_NAIVE_UTC
                | orjson.OPT_INDENT_2
                | orjson.OPT_APPEND_NEWLINE,
            )
        )
    sitemeta = {
        "sorted_index": sorted_index,
        "sorted_posts": sorted_posts,
        "sorted_notebooks": sorted_notebooks,
        "sorted_journals": sorted_journals,
        "sorted_tils": sorted_tils,
        "now": datetime.now(),
        "changes": get_commits(BASE, PREFIX="CHANGE"),
        "news": get_commits(BASE, PREFIX="NEW"),
        "sitetags": tags,
        "sorted_any": sorted_any,
    }
    with open(os.path.join(DEST, "sitemeta.json"), "wb") as f:
        f.write(
            orjson.dumps(
                sitemeta,
                option=orjson.OPT_NAIVE_UTC
                | orjson.OPT_INDENT_2
                | orjson.OPT_APPEND_NEWLINE,
            )
        )
    with open(os.path.join(DEST, "sorted_posts.json"), "wb") as f:
        f.write(
            orjson.dumps(
                sorted_posts,
                option=orjson.OPT_NAIVE_UTC
                | orjson.OPT_INDENT_2
                | orjson.OPT_APPEND_NEWLINE,
            )
        )
    logger.info(
        "generate_sitemeta/gendict %s %s" % (time.strftime("%H:%M:%S"), time.time())
    )
    return sitemeta


def render_feed_page(sitemeta):
    logger.info("rendering feed page")
    page = SRC + "/feed.jinja"
    tp = page.replace(SRC, "")
    template = env.get_template(tp)
    content = template.render(**sitemeta)
    content = minify_content(content)
    pg = os.path.join(SRC, page)
    t = get_target_path(pg, a=".jinja", b=".xml")

    with open(t, "w") as f:
        logger.debug(f"FEED page: {DOMAIN}/{t[t.find('output')+7:]}")
        f.write(content)


def find_jinja2_files(directory):
    jinja2_files = []
    for root, dirs, files in os.walk(directory):
        for file in files:
            if file.endswith(".jinja2"):
                jinja2_files.append(os.path.join(root, file))
    return jinja2_files


def render_site_pages(sitemeta):
    dpages = glob.glob(SRC + "/*.jinja2", recursive=True)
    logger.info(f"Rendering {len(dpages)} site pages")
    md = markdown.Markdown()
    env.filters["markdown"] = lambda text: Markup(md.convert(text))
    for page in dpages:
        tp = page.replace(SRC, "")
        template = env.get_template(tp)
        content = template.render(**sitemeta)

        pg = os.path.join(SRC, page)
        if pg.find("posts.jinja2") > -1:
            t = get_target_path(pg, a=".jinja2", b=".rss")
        elif pg.find("sitemap.xml.jinja2") > -1:
            t = get_target_path(pg, a=".jinja2", b="")
        else:
            t = get_target_path(pg, a=".jinja2")
            content = minify_content(content)
        with open(t, "w") as f:
            logger.debug(f"dynamic page: {DOMAIN}/{t[t.find('output')+7:]}")
            f.write(content)

def render_year_pages():
    conn = sqlite3.connect(DB)
    sql = f"select distinct(strftime('%Y', created)) from articles where strftime('%Y', created)<> '' order by strftime('%Y', created);"
    years = conn.execute(sql).fetchall()
    for year in years:
        print("Articles in Year:", year[0])
        # template = env.get_template("year.html")
        # content = template.render(year=year, **allmeta)
        # with open(f"{DEST}/{year}.html", "w") as f:
        #     f.write(content)

def main():
    parser = argparse.ArgumentParser(description="Process the site")
    parser.add_argument("--rebuild", action="store_true")  # not using
    parser.add_argument("--full", action="store_true")
    parser.add_argument("--load", action="store_true")
    args = parser.parse_args()
    if args.load:
        load_json_to_db()
        sys.exit(0)
    start = time.time()
    markdowns = get_all_markdowns()
    # with open("allmarkdowns.txt", "w") as f:
    #     f.write("\n".join(markdowns))
    process_markdown_files(markdowns)
    process_html_files(get_all_htmls())
    process_less_files()
    now = time.time()
    copy_the_rest(markdowns)
    now1 = time.time()
    logger.info(f"time taken to copy: {now1 - now}")
    logger.info(f"time taken from start: {now1 - start}")

    if args.full:
        logger.info("doing a full build...")
        now2 = time.time()
        sitemeta = generate_sitemeta()
        logger.info("time taken to sitemeta: %.2f" % (time.time() - now2,))
        logger.info("rendering site pages.")
        render_year_pages()
        render_site_pages(sitemeta)
    logger.info(f"finish. total time taken: {time.time() - start}")


if __name__ == "__main__":
    main()