feat: improved article generation, added code blocks

2025-12-05 18:35:10 +01:00
parent ae757fbe70
commit 08a64ca6fc
5 changed files with 485 additions and 173 deletions
--- a/python_script/generate_article.py
+++ b/python_script/generate_article.py
@@ -1,181 +1,361 @@
-"""
-~foldername~template~foldername~
-~titre~Test article~titre~
-~date~ Jan 25, 2022~date~
-~section~Test~section~
-~data~data~data~
-~link~[https://www.google.com]link to google~link~
-~list~Test~list~
-~endlist~
-~image~serverlist.png~image~
-"""
-#TODO : 
-# - add code snippet
-
-
 import os
-from os import listdir
-from os.path import isfile, join
 import shutil
-from tkinter import Tk
-from tkinter.filedialog import askopenfilename
+import datetime
+import re
+import argparse
+import html
+import sys

-def write_file(text) :
-    with open("out.html", "a") as o :
-        o.write(text)
+SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__))
+DOMAIN_URL = "https://patate.dev" 

-def generate_base(title) :
-    base = """
-    <!--#include virtual="header.html" -->
-  <article aria-label="Content" itemscope itemtype="http://schema.org/BlogPosting">
-    <h1 itemprop="name headline">"""+title+"""</h1>    
-"""
-    write_file(base)
+PATHS = {
+    "images_src": SCRIPT_DIR,
+    "images_dest": os.path.abspath(os.path.join(SCRIPT_DIR, "../images")),
+    "pages": os.path.abspath(os.path.join(SCRIPT_DIR, "../pages")),
+    "rss": os.path.abspath(os.path.join(SCRIPT_DIR, "../rss")),
+    "sitemap": os.path.abspath(os.path.join(SCRIPT_DIR, "../sitemap.xml")),
+    "blog_index": os.path.abspath(os.path.join(SCRIPT_DIR, "../pages/blog.html")),
+    "header": os.path.abspath(os.path.join(SCRIPT_DIR, "../pages/header.html")),
+    "footer": os.path.abspath(os.path.join(SCRIPT_DIR, "../pages/footer.html"))
+}

-def generate_date(date) :
-    base = """
-<time class="mono"> """+date+"""</time>
+class BlogGenerator:
+    def __init__(self, filepath):
+        self.filepath = filepath
+        self.filename = os.path.basename(filepath)
+        self.content_lines = []
+        self.metadata = {
+            "foldername": "",
+            "title": "",
+            "description": "",
+            "date_str": "",
+            "date_obj": None,
+            "filename_html": ""
+        }
+        
+    def run(self):
+        print(f"Processing {self.filename}...")
+        
+        if not os.path.exists(PATHS["header"]):
+            print(f"CRITICAL ERROR: header.html not found at {PATHS['header']}")
+            return

-<main itemprop="articleBody" style="position: relative">
-"""
+        self.parse_file()
+        missing_fields = []
+        if not self.metadata["foldername"]: missing_fields.append("[foldername]")
+        if not self.metadata["title"]: missing_fields.append("[title]")
+        if not self.metadata["description"]: missing_fields.append("[description]")
+        if not self.metadata["date_str"]: missing_fields.append("[date]")

-    write_file(base)
+        if missing_fields:
+            print("Error: The following mandatory fields are missing from your text file:")
+            for field in missing_fields:
+                print(f"  - {field}")
+            print("Aborting generation.")
+            return

-def generate_paragraph(p) :
-    base = "<p>"+p+"</p>"
-    write_file(base)
+        self.write_html_output()
+        self.handle_images()
+        self.update_blog_index()
+        self.update_rss()
+        self.update_sitemap()
+        print("Done!")

-def generate_section(s) :
-    s_2 = s.replace(" ","-")
-    base = '''
-<h2 id="'''+s_2+'''">
-    <a href="#'''+s_2+'''">'''+s+'''</a>
-</h2>
-'''
-    write_file(base)
+    def parse_date(self, date_str):
+        clean_date = date_str.strip()
+        try:
+            return datetime.datetime.strptime(clean_date, "%b %d, %Y")
+        except ValueError:
+            print(f"Warning: Could not parse date '{clean_date}'. Defaulting to NOW.")
+            return datetime.datetime.now()

-def generate_end_file() :
-    base = '''
-    <p></p></main></article>
-    <!--#include virtual="footer.html" -->
-    </body>
-    </html>'''
-    
-    write_file(base)
+    def highlight_code(self, code):
+        """Simple regex-based syntax highlighter for C/C++/Python/Rust."""
+        code = html.escape(code)
+        
+        code = re.sub(r'(&quot;.*?&quot;)', r'<span class="str">\1</span>', code)
+        code = re.sub(r"('.*?')", r'<span class="str">\1</span>', code)
+        code = re.sub(r'(//.*)', r'<span class="com">\1</span>', code)
+        code = re.sub(r'(#.*)', r'<span class="com">\1</span>', code)

-def generate_image(path) :
-    base = '''
-<p>
-    <img class="center_image" src="'''+path+'''" alt="" />
-</p>
-'''
-    write_file(base)
+        keywords = [
+            "int", "void", "char", "float", "double", "struct", "class", 
+            "if", "else", "while", "for", "return", "switch", "case", "break", 
+            "def", "import", "from", "fn", "let", "mut", "pub", "impl", "use",
+            "const", "static", "unsigned", "long", "true", "false", "NULL", "nullptr"
+        ]
+        
+        for kw in keywords:
+            pattern = r'\b(' + kw + r')\b(?![^<]*>)' 
+            code = re.sub(pattern, r'<span class="kwd">\1</span>', code)

-def set_foldername(name) :
-    try : os.mkdir("../images/"+name)
-    except: pass
-    onlyfiles = [f for f in listdir(".") if isfile(join(".", f))]
-    for f in onlyfiles :
-        if f.endswith(".jpg") or f.endswith(".jpeg") or f.endswith(".png") or f.endswith(".ico") or f.endswith(".gif") :
-            shutil.copy(f, "../images/"+name+"/"+f)
+        code = re.sub(r'\b(0x[0-9a-fA-F]+|\d+)\b(?![^<]*>)', r'<span class="num">\1</span>', code)
+        return code

-def main() :
-    Tk().withdraw() # we don't want a full GUI, so keep the root window from appearing
-    filename = askopenfilename() # show an "Open" dialog box and return the path to the selected file
+    def process_inline_tags(self, text):
+        text = re.sub(r'\[data\](.*?)\[data\]', 
+                      r'<code class="highlighter-rouge">\1</code>', text)
+        
+        def replace_link(match):
+            url = match.group(1)
+            label = match.group(2)
+            return f'<a href="{url}" target="_blank">{label}</a>'
+        text = re.sub(r'\[link\]\[(.*?)\](.*?)\[link\]', replace_link, text)
+        
+        return text

-    name = ""
-    titre = ""
-    date = ""
-    was_list = False
-    with open(filename, "r") as a :
-        for line in a.readlines() :
-            
-            if line.startswith("~foldername~") :
-                final_line = line.replace("~foldername~","").strip()
-                name = final_line
-                set_foldername(final_line)
+    def get_template_content(self, path):
+        if os.path.exists(path):
+            with open(path, "r", encoding="utf-8") as f:
+                return f.read()
+        else:
+            print(f"Warning: Template file not found at {path}")
+            return f""

-            elif line.startswith("~titre~") :
-                final_line = line.replace("~titre~","").strip()
-                titre = final_line
-                generate_base(final_line)
+    def parse_file(self):
+        with open(self.filepath, "r", encoding="utf-8") as f:
+            lines = f.readlines()

-            elif line.startswith("~date~") :
-                final_line = line.replace("~date~","").strip()
-                date = " " + final_line
-                generate_date(final_line)
+        is_list = False
+        in_code_block = False
+        code_buffer = []
+        html_buffer = []

-            elif line.startswith("~section~") :
-                final_line = line.replace("~section~","").strip()
-                generate_section(final_line)
-            
-            elif line.startswith("~image~") :
-                final_line = line.replace("~image~","").strip()
-                final_line = "../images/"+name+"/" + final_line
-                generate_image(final_line)
+        header_content = self.get_template_content(PATHS["header"])
+        html_buffer.append(header_content)
+        
+        html_buffer.append('<article aria-label="Content" itemscope itemtype="http://schema.org/BlogPosting">')

-            else :
-                if line.startswith("~endlist~"):
-                    was_list = False
-                    write_file("</ul>")
-                    continue
-
-                if line.startswith("~list~") :
-                    l = line.split("~list~")
-                    res = ""
-
-                    if not was_list :
-                        res += "<ul>"
-
-                    for w in l :
-                        if not w == " " and not w == "" and not w == "\n":
-                            res += "<li>"+w+"</li>"
-                    
-                    was_list = True
-                    line = res
-
-                if "~data~" in line :
-                    l = line.split("~data~")
-                    res = ""
-                    for w in l :
-                        if not w.startswith(" ") and not w.endswith(" ") and not "<li" in w :
-                            res += '<code class="language-plaintext highlighter-rouge">'+w+'</code>'
-                        else :
-                            res += w
-                    line = res
-
-                if "~link~" in line :
-                    l = line.split("~link~")
-                    res = ""
-                    for w in l :
-                        if not w.startswith(" ") and not w.endswith(" ") and not w == "":
-                            link = w.split("]")
-                            if len(link) < 2 :
-                                continue
-
-                            res += '<a href="'+link[0][1:]+'" target="_blank">'+link[1]+'</a>'
-                        else :
-                            res += w
-                    line = res
-
-                if not was_list :
-                    write_file("<p>" + line.strip() + "</p>")
-                else :
-                    write_file(line.strip())
-    
-    generate_end_file()
-    shutil.move("out.html", "../pages/"+name+".html")
-
-    base = '''\n\t\t<li><a href="'''+name+'''.html" class="article">'''+titre+'''</a></li>\n'''
-    with open("../pages/blog.html", "r+") as f :
-        lines = f.readlines()
-        for i, line in enumerate(lines):
-            if "<!--Articles-->" in line :
-                lines[i+1] = lines[i+1].strip() + base
-                
-        f.seek(0)
        for line in lines:
-            f.write(line)
+            if line.strip().startswith("[code]"):
+                if in_code_block:
+                    raw_code = "".join(code_buffer).strip()
+                    highlighted_code = self.highlight_code(raw_code)
+                    html_buffer.append(f'<pre><code>{highlighted_code}</code></pre>')
+                    code_buffer = []
+                    in_code_block = False
+                else:
+                    in_code_block = True
+                continue

-if __name__ == "__main__" :
-    main()
+            if in_code_block:
+                code_buffer.append(line)
+                continue
+
+            line = line.strip()
+            if not line: continue
+
+            if line.startswith("[foldername]"):
+                val = line.replace("[foldername]", "").strip()
+                self.metadata["foldername"] = val
+                self.metadata["filename_html"] = val + ".html"
+            
+            elif line.startswith("[title]"):
+                val = line.replace("[title]", "").strip()
+                self.metadata["title"] = val
+                html_buffer.append(f'<h1 itemprop="name headline">{val}</h1>')
+            
+            elif line.startswith("[description]"):
+                val = line.replace("[description]", "").strip()
+                self.metadata["description"] = val
+
+            elif line.startswith("[date]"):
+                val = line.replace("[date]", "").strip()
+                self.metadata["date_str"] = val
+                self.metadata["date_obj"] = self.parse_date(val)
+                html_buffer.append(f'<time class="mono"> {val}</time>')
+                html_buffer.append('<main itemprop="articleBody" style="position: relative">')
+
+            elif line.startswith("[section]"):
+                val = line.replace("[section]", "").strip()
+                anchor = val.replace(" ", "-")
+                html_buffer.append(f'<h2 id="{anchor}"><a href="#{anchor}">{val}</a></h2>')
+
+            elif line.startswith("[image]"):
+                val = line.replace("[image]", "").strip()
+                img_path = f'../images/{self.metadata["foldername"]}/{val}'
+                html_buffer.append(f'<p><img class="center_image" src="{img_path}" alt="" /></p>')
+
+            elif line.startswith("[list]"):
+                parts = line.split("[list]")
+                items = [p for p in parts if p.strip()]
+                
+                if not is_list:
+                    html_buffer.append("<ul>")
+                    is_list = True
+                
+                for item in items:
+                    parsed_item = self.process_inline_tags(item)
+                    html_buffer.append(f"<li>{parsed_item}</li>")
+
+            elif line.startswith("[endlist]"):
+                if is_list:
+                    html_buffer.append("</ul>")
+                    is_list = False
+
+            else:
+                if is_list and not line.startswith("[list]"):
+                    html_buffer.append("</ul>")
+                    is_list = False
+                
+                processed_line = self.process_inline_tags(line)
+                html_buffer.append(f"<p>{processed_line}</p>")
+
+        html_buffer.append('</main></article>')
+        
+        footer_content = self.get_template_content(PATHS["footer"])
+        html_buffer.append(footer_content)
+
+        if "</body>" not in footer_content:
+            html_buffer.append('</body>')
+        if "</html>" not in footer_content:
+            html_buffer.append('</html>')
+        
+        self.content_lines = html_buffer
+
+    def write_html_output(self):
+        output_path = os.path.join(PATHS["pages"], self.metadata["filename_html"])
+        with open(output_path, "w", encoding="utf-8") as f:
+            f.write("\n".join(self.content_lines))
+        print(f"Generated page: {output_path}")
+
+    def handle_images(self):
+        target_dir = os.path.join(PATHS["images_dest"], self.metadata["foldername"])
+        if not os.path.exists(target_dir):
+            os.makedirs(target_dir)
+
+        source_dir = os.path.dirname(os.path.abspath(self.filepath))
+        
+        for f in os.listdir(source_dir):
+            if f.lower().endswith(('.png', '.jpg', '.jpeg', '.gif', '.ico')):
+                shutil.copy(os.path.join(source_dir, f), os.path.join(target_dir, f))
+
+    def update_blog_index(self):
+        marker = ""
+        entry = f'\t\t<li><a href="{self.metadata["filename_html"]}" class="article">{self.metadata["title"]}</a></li>'
+        
+        print(f"Updating {PATHS['blog_index']}...")
+        
+        try:
+            with open(PATHS["blog_index"], "r", encoding="utf-8") as f:
+                content = f.read()
+
+            if self.metadata["filename_html"] in content and self.metadata["title"] in content:
+                print("Link already exists in blog.html. Skipping index update.")
+                return
+
+            marker_index = content.find(marker)
+            if marker_index == -1:
+                print(f"Warning: '{marker}' marker not found in blog.html")
+                return
+
+            ul_start_index = content.find("<ul>", marker_index)
+            if ul_start_index == -1:
+                print("Warning: No <ul> found after the Articles marker.")
+                return
+
+            insertion_point = ul_start_index + len("<ul>")
+            new_content = content[:insertion_point] + "\n" + entry + content[insertion_point:]
+            
+            with open(PATHS["blog_index"], "w", encoding="utf-8") as f:
+                f.write(new_content)
+            print("Blog index updated.")
+
+        except FileNotFoundError:
+            print(f"Error: Could not find {PATHS['blog_index']}")
+
+    def update_rss(self):
+        rss_file = os.path.join(PATHS["rss"], "feed.xml")
+        rss_date = self.metadata["date_obj"].strftime("%a, %d %b %Y %H:%M:%S +0000")
+        link = f"{DOMAIN_URL}/pages/{self.metadata['filename_html']}"
+        
+        safe_title = html.escape(self.metadata['title'])
+        safe_desc = html.escape(self.metadata['description'])
+
+        new_item = f"""
+    <item>
+      <title>{safe_title}</title>
+      <link>{link}</link>
+      <guid>{link}</guid>
+      <pubDate>{rss_date}</pubDate>
+      <description>{safe_desc}</description>
+    </item>"""
+
+        content = ""
+        file_is_valid = False
+
+        if os.path.exists(rss_file):
+            with open(rss_file, "r", encoding="utf-8") as f:
+                content = f.read()
+            if content.strip() and "<rss" in content and "<channel>" in content:
+                file_is_valid = True
+            else:
+                print("Warning: RSS file exists but is empty or invalid. Re-creating.")
+
+        if not file_is_valid:
+            print(f"Creating new RSS file at {rss_file}")
+            content = f"""<?xml version="1.0" encoding="UTF-8" ?>
+<rss version="2.0">
+<channel>
+  <title>~Blog of a French coder~</title>
+  <link>{DOMAIN_URL}</link>
+  <description>Blog Posts</description>
+  {new_item}
+</channel>
+</rss>"""
+            with open(rss_file, "w", encoding="utf-8") as f:
+                f.write(content)
+        else:
+            if link in content:
+                print("RSS already contains this link. Skipping.")
+            else:
+                if "<item>" in content:
+                    content, num_subs = re.subn(r'(<item>)', f"{new_item}\n\\1", content, count=1)
+                    if num_subs == 0:
+                         content = content.replace("</channel>", f"{new_item}\n</channel>")
+                else:
+                    content = content.replace("</channel>", f"{new_item}\n</channel>")
+                    
+                with open(rss_file, "w", encoding="utf-8") as f:
+                    f.write(content)
+                print("RSS feed updated.")
+
+    def update_sitemap(self):
+        today_full = datetime.datetime.now().strftime("%Y-%m-%dT%H:%M:%S+00:00")
+        new_loc = f"{DOMAIN_URL}/pages/{self.metadata['filename_html']}"
+        
+        new_url_entry = f"""
+<url>
+  <loc>{new_loc}</loc>
+  <lastmod>{today_full}</lastmod>
+  <priority>0.64</priority>
+</url>"""
+
+        with open(PATHS["sitemap"], "r", encoding="utf-8") as f:
+            content = f.read()
+
+        if new_loc in content:
+            print("Sitemap already contains this link.")
+            return
+
+        if "</urlset>" in content:
+            content = content.replace("</urlset>", f"{new_url_entry}\n</urlset>")
+            with open(PATHS["sitemap"], "w", encoding="utf-8") as f:
+                f.write(content)
+
+if __name__ == "__main__":
+    for p in [PATHS["images_dest"], PATHS["rss"]]:
+        if not os.path.exists(p):
+            os.makedirs(p)
+
+    parser = argparse.ArgumentParser(description="Generate a blog post from a template file.")
+    parser.add_argument("filename", help="Path to the article text file")
+    
+    args = parser.parse_args()
+
+    if os.path.isfile(args.filename):
+        generator = BlogGenerator(args.filename)
+        generator.run()
+    else:
+        print(f"Error: File '{args.filename}' not found.")
--- a/python_script/screenshot.png
+++ b/python_script/screenshot.png
--- a/python_script/template
+++ b/python_script/template
@@ -1,11 +1,110 @@
-~foldername~template~foldername~
-~titre~Example~titre~
-~date~Jan 25, 2022~date~
-This is a paragraph
-This is another one with ~data~data text~data~ and ~link~[https://www.youtube.com/watch?v=UaH8cAGdjzw]a link.~link~
-~section~Title 1~section~
-Paragraph of Title 1
-~list~list element~list~
-~list~~data~data element in list~data~~list~
-~endlist~
-~image~serverlist.png~image~
+[foldername]my_new_post_another[foldername]
+[title]My Article 2[title]
+[description]hello its me\n uwu[description]
+[date] Jan 25, 2025[date]
+[section]Introduction[section]
+[data]print("Hello World")[data]
+
+[link][https://google.com]Link to Google[link]
+
+[list]First item[list]
+[list]Second item[list]
+[endlist]
+
+[image]screenshot.png[image]
+
+[code]
+int main(int argc, char* argv[])
+{
+    const char* infile = nullptr;
+    bool marshalled = false;
+    const char* version = nullptr;
+    std::ostream* pyc_output = &std::cout;
+    std::ofstream out_file;
+
+    for (int arg = 1; arg < argc; ++arg) {
+        if (strcmp(argv[arg], "-o") == 0) {
+            if (arg + 1 < argc) {
+                const char* filename = argv[++arg];
+                out_file.open(filename, std::ios_base::out);
+                if (out_file.fail()) {
+                    fprintf(stderr, "Error opening file '%s' for writing\n",
+                            filename);
+                    return 1;
+                }
+                pyc_output = &out_file;
+            } else {
+                fputs("Option '-o' requires a filename\n", stderr);
+                return 1;
+            }
+        } else if (strcmp(argv[arg], "-c") == 0) {
+            marshalled = true;
+        } else if (strcmp(argv[arg], "-v") == 0) {
+            if (arg + 1 < argc) {
+                version = argv[++arg];
+            } else {
+                fputs("Option '-v' requires a version\n", stderr);
+                return 1;
+            }
+        } else if (strcmp(argv[arg], "--help") == 0 || strcmp(argv[arg], "-h") == 0) {
+            fprintf(stderr, "Usage:  %s [options] input.pyc\n\n", argv[0]);
+            fputs("Options:\n", stderr);
+            fputs("  -o <filename>  Write output to <filename> (default: stdout)\n", stderr);
+            fputs("  -c             Specify loading a compiled code object. Requires the version to be set\n", stderr);
+            fputs("  -v <x.y>       Specify a Python version for loading a compiled code object\n", stderr);
+            fputs("  --help         Show this help text and then exit\n", stderr);
+            return 0;
+        } else {
+            infile = argv[arg];
+        }
+    }
+
+    if (!infile) {
+        fputs("No input file specified\n", stderr);
+        return 1;
+    }
+
+    PycModule mod;
+    if (!marshalled) {
+        try {
+            mod.loadFromFile(infile);
+        } catch (std::exception& ex) {
+            fprintf(stderr, "Error loading file %s: %s\n", infile, ex.what());
+            return 1;
+        }
+    } else {
+        if (!version) {
+            fputs("Opening raw code objects requires a version to be specified\n", stderr);
+            return 1;
+        }
+        std::string s(version);
+        auto dot = s.find('.');
+        if (dot == std::string::npos || dot == s.size()-1) {
+            fputs("Unable to parse version string (use the format x.y)\n", stderr);
+            return 1;
+        }
+        int major = std::stoi(s.substr(0, dot));
+        int minor = std::stoi(s.substr(dot+1, s.size()));
+        mod.loadFromMarshalledFile(infile, major, minor);
+    }
+
+    if (!mod.isValid()) {
+        fprintf(stderr, "Could not load file %s\n", infile);
+        return 1;
+    }
+    const char* dispname = strrchr(infile, PATHSEP);
+    dispname = (dispname == NULL) ? infile : dispname + 1;
+    *pyc_output << "# Source Generated with Decompyle++\n";
+    formatted_print(*pyc_output, "# File: %s (Python %d.%d%s)\n\n", dispname,
+                    mod.majorVer(), mod.minorVer(),
+                    (mod.majorVer() < 3 && mod.isUnicode()) ? " Unicode" : "");
+    try {
+        decompyle(mod.code(), &mod, *pyc_output);
+    } catch (std::exception& ex) {
+        fprintf(stderr, "Error decompyling %s: %s\n", infile, ex.what());
+        return 1;
+    }
+
+    return 0;
+}
+[code]