patate.dev/python_script/generate_article.py

import os
import shutil
import datetime
import re
import argparse
import html
import sys

SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__))
DOMAIN_URL = "https://patate.dev"

PATHS = {
    "images_src": SCRIPT_DIR,
    "images_dest": os.path.abspath(os.path.join(SCRIPT_DIR, "../images")),
    "pages": os.path.abspath(os.path.join(SCRIPT_DIR, "../pages")),
    "rss": os.path.abspath(os.path.join(SCRIPT_DIR, "../rss")),
    "sitemap": os.path.abspath(os.path.join(SCRIPT_DIR, "../sitemap.xml")),
    "blog_index": os.path.abspath(os.path.join(SCRIPT_DIR, "../pages/blog.html")),
    "header": os.path.abspath(os.path.join(SCRIPT_DIR, "../pages/header.html")),
    "footer": os.path.abspath(os.path.join(SCRIPT_DIR, "../pages/footer.html"))
}

class BlogGenerator:
    def __init__(self, filepath):
        self.filepath = filepath
        self.filename = os.path.basename(filepath)
        self.content_lines = []
        self.metadata = {
            "foldername": "",
            "title": "",
            "description": "",
            "date_str": "",
            "date_obj": None,
            "filename_html": ""
        }

    def run(self):
        print(f"Processing {self.filename}...")

        if not os.path.exists(PATHS["header"]):
            print(f"CRITICAL ERROR: header.html not found at {PATHS['header']}")
            return

        self.parse_file()
        missing_fields = []
        if not self.metadata["foldername"]: missing_fields.append("[foldername]")
        if not self.metadata["title"]: missing_fields.append("[title]")
        if not self.metadata["description"]: missing_fields.append("[description]")
        if not self.metadata["date_str"]: missing_fields.append("[date]")

        if missing_fields:
            print("Error: The following mandatory fields are missing from your text file:")
            for field in missing_fields:
                print(f"  - {field}")
            print("Aborting generation.")
            return

        self.write_html_output()
        self.handle_images()
        self.update_blog_index()
        self.update_rss()
        self.update_sitemap()
        print("Done!")

    def parse_date(self, date_str):
        clean_date = date_str.strip()
        try:
            return datetime.datetime.strptime(clean_date, "%b %d, %Y")
        except ValueError:
            print(f"Warning: Could not parse date '{clean_date}'. Defaulting to NOW.")
            return datetime.datetime.now()

    def highlight_code(self, code):
        """Simple regex-based syntax highlighter for C/C++/Python/Rust."""
        code = html.escape(code)

        code = re.sub(r'(&quot;.*?&quot;)', r'<span class="str">\1</span>', code)
        code = re.sub(r"('.*?')", r'<span class="str">\1</span>', code)
        code = re.sub(r'(//.*)', r'<span class="com">\1</span>', code)
        code = re.sub(r'(#.*)', r'<span class="com">\1</span>', code)

        keywords = [
            "int", "void", "char", "float", "double", "struct", "class",
            "if", "else", "while", "for", "return", "switch", "case", "break",
            "def", "import", "from", "fn", "let", "mut", "pub", "impl", "use",
            "const", "static", "unsigned", "long", "true", "false", "NULL", "nullptr"
        ]

        for kw in keywords:
            pattern = r'\b(' + kw + r')\b(?![^<]*>)'
            code = re.sub(pattern, r'<span class="kwd">\1</span>', code)

        code = re.sub(r'\b(0x[0-9a-fA-F]+|\d+)\b(?![^<]*>)', r'<span class="num">\1</span>', code)
        return code

    def process_inline_tags(self, text):
        text = re.sub(r'\[data\](.*?)\[data\]',
                      r'<code class="highlighter-rouge">\1</code>', text)

        def replace_link(match):
            url = match.group(1)
            label = match.group(2)
            return f'<a href="{url}" target="_blank">{label}</a>'
        text = re.sub(r'\[link\]\[(.*?)\](.*?)\[link\]', replace_link, text)

        return text

    def get_template_content(self, path):
        if os.path.exists(path):
            with open(path, "r", encoding="utf-8") as f:
                return f.read()
        else:
            print(f"Warning: Template file not found at {path}")
            return f""

    def parse_file(self):
        with open(self.filepath, "r", encoding="utf-8") as f:
            lines = f.readlines()

        is_list = False
        in_code_block = False
        code_buffer = []
        html_buffer = []

        header_content = self.get_template_content(PATHS["header"])
        html_buffer.append(header_content)

        html_buffer.append('<article aria-label="Content" itemscope itemtype="http://schema.org/BlogPosting">')

        for line in lines:
            if line.strip().startswith("[code]"):
                if in_code_block:
                    raw_code = "".join(code_buffer).strip()
                    highlighted_code = self.highlight_code(raw_code)
                    html_buffer.append(f'<pre><code>{highlighted_code}</code></pre>')
                    code_buffer = []
                    in_code_block = False
                else:
                    in_code_block = True
                continue

            if in_code_block:
                code_buffer.append(line)
                continue

            line = line.strip()
            if not line: continue

            if line.startswith("[foldername]"):
                val = line.replace("[foldername]", "").strip()
                self.metadata["foldername"] = val
                self.metadata["filename_html"] = val + ".html"

            elif line.startswith("[title]"):
                val = line.replace("[title]", "").strip()
                self.metadata["title"] = val
                html_buffer.append(f'<h1 itemprop="name headline">{val}</h1>')

            elif line.startswith("[description]"):
                val = line.replace("[description]", "").strip()
                self.metadata["description"] = val

            elif line.startswith("[date]"):
                val = line.replace("[date]", "").strip()
                self.metadata["date_str"] = val
                self.metadata["date_obj"] = self.parse_date(val)
                html_buffer.append(f'<time class="mono"> {val}</time>')
                html_buffer.append('<main itemprop="articleBody" style="position: relative">')

            elif line.startswith("[section]"):
                val = line.replace("[section]", "").strip()
                anchor = val.replace(" ", "-")
                html_buffer.append(f'<h2 id="{anchor}"><a href="#{anchor}">{val}</a></h2>')

            elif line.startswith("[image]"):
                val = line.replace("[image]", "").strip()
                img_path = f'../images/{self.metadata["foldername"]}/{val}'
                html_buffer.append(f'<p><img class="center_image" src="{img_path}" alt="" /></p>')

            elif line.startswith("[list]"):
                parts = line.split("[list]")
                items = [p for p in parts if p.strip()]

                if not is_list:
                    html_buffer.append("<ul>")
                    is_list = True

                for item in items:
                    parsed_item = self.process_inline_tags(item)
                    html_buffer.append(f"<li>{parsed_item}</li>")

            elif line.startswith("[endlist]"):
                if is_list:
                    html_buffer.append("</ul>")
                    is_list = False

            else:
                if is_list and not line.startswith("[list]"):
                    html_buffer.append("</ul>")
                    is_list = False

                processed_line = self.process_inline_tags(line)
                html_buffer.append(f"<p>{processed_line}</p>")

        html_buffer.append('</main></article>')

        footer_content = self.get_template_content(PATHS["footer"])
        html_buffer.append(footer_content)

        if "</body>" not in footer_content:
            html_buffer.append('</body>')
        if "</html>" not in footer_content:
            html_buffer.append('</html>')

        self.content_lines = html_buffer

    def write_html_output(self):
        output_path = os.path.join(PATHS["pages"], self.metadata["filename_html"])
        with open(output_path, "w", encoding="utf-8") as f:
            f.write("\n".join(self.content_lines))
        print(f"Generated page: {output_path}")

    def handle_images(self):
        target_dir = os.path.join(PATHS["images_dest"], self.metadata["foldername"])
        if not os.path.exists(target_dir):
            os.makedirs(target_dir)

        source_dir = os.path.dirname(os.path.abspath(self.filepath))

        for f in os.listdir(source_dir):
            if f.lower().endswith(('.png', '.jpg', '.jpeg', '.gif', '.ico')):
                shutil.copy(os.path.join(source_dir, f), os.path.join(target_dir, f))

    def update_blog_index(self):
        marker = ""
        entry = f'\t\t<li><a href="{self.metadata["filename_html"]}" class="article">{self.metadata["title"]}</a></li>'

        print(f"Updating {PATHS['blog_index']}...")

        try:
            with open(PATHS["blog_index"], "r", encoding="utf-8") as f:
                content = f.read()

            if self.metadata["filename_html"] in content and self.metadata["title"] in content:
                print("Link already exists in blog.html. Skipping index update.")
                return

            marker_index = content.find(marker)
            if marker_index == -1:
                print(f"Warning: '{marker}' marker not found in blog.html")
                return

            ul_start_index = content.find("<ul>", marker_index)
            if ul_start_index == -1:
                print("Warning: No <ul> found after the Articles marker.")
                return

            insertion_point = ul_start_index + len("<ul>")
            new_content = content[:insertion_point] + "\n" + entry + content[insertion_point:]

            with open(PATHS["blog_index"], "w", encoding="utf-8") as f:
                f.write(new_content)
            print("Blog index updated.")

        except FileNotFoundError:
            print(f"Error: Could not find {PATHS['blog_index']}")

    def update_rss(self):
        rss_file = os.path.join(PATHS["rss"], "feed.xml")
        rss_date = self.metadata["date_obj"].strftime("%a, %d %b %Y %H:%M:%S +0000")
        link = f"{DOMAIN_URL}/pages/{self.metadata['filename_html']}"

        safe_title = html.escape(self.metadata['title'])
        safe_desc = html.escape(self.metadata['description'])

        new_item = f"""
    <item>
      <title>{safe_title}</title>
      <link>{link}</link>
      <guid>{link}</guid>
      <pubDate>{rss_date}</pubDate>
      <description>{safe_desc}</description>
    </item>"""

        content = ""
        file_is_valid = False

        if os.path.exists(rss_file):
            with open(rss_file, "r", encoding="utf-8") as f:
                content = f.read()
            if content.strip() and "<rss" in content and "<channel>" in content:
                file_is_valid = True
            else:
                print("Warning: RSS file exists but is empty or invalid. Re-creating.")

        if not file_is_valid:
            print(f"Creating new RSS file at {rss_file}")
            content = f"""<?xml version="1.0" encoding="UTF-8" ?>
<rss version="2.0">
<channel>
  <title>~Blog of a French coder~</title>
  <link>{DOMAIN_URL}</link>
  <description>Blog Posts</description>
  {new_item}
</channel>
</rss>"""
            with open(rss_file, "w", encoding="utf-8") as f:
                f.write(content)
        else:
            if link in content:
                print("RSS already contains this link. Skipping.")
            else:
                if "<item>" in content:
                    content, num_subs = re.subn(r'(<item>)', f"{new_item}\n\\1", content, count=1)
                    if num_subs == 0:
                         content = content.replace("</channel>", f"{new_item}\n</channel>")
                else:
                    content = content.replace("</channel>", f"{new_item}\n</channel>")

                with open(rss_file, "w", encoding="utf-8") as f:
                    f.write(content)
                print("RSS feed updated.")

    def update_sitemap(self):
        today_full = datetime.datetime.now().strftime("%Y-%m-%dT%H:%M:%S+00:00")
        new_loc = f"{DOMAIN_URL}/pages/{self.metadata['filename_html']}"

        new_url_entry = f"""
<url>
  <loc>{new_loc}</loc>
  <lastmod>{today_full}</lastmod>
  <priority>0.64</priority>
</url>"""

        with open(PATHS["sitemap"], "r", encoding="utf-8") as f:
            content = f.read()

        if new_loc in content:
            print("Sitemap already contains this link.")
            return

        if "</urlset>" in content:
            content = content.replace("</urlset>", f"{new_url_entry}\n</urlset>")
            with open(PATHS["sitemap"], "w", encoding="utf-8") as f:
                f.write(content)

if __name__ == "__main__":
    for p in [PATHS["images_dest"], PATHS["rss"]]:
        if not os.path.exists(p):
            os.makedirs(p)

    parser = argparse.ArgumentParser(description="Generate a blog post from a template file.")
    parser.add_argument("filename", help="Path to the article text file")

    args = parser.parse_args()

    if os.path.isfile(args.filename):
        generator = BlogGenerator(args.filename)
        generator.run()
    else:
        print(f"Error: File '{args.filename}' not found.")