Files
patate.dev/python_script/generate_article.py

362 lines
13 KiB
Python

import os
import shutil
import datetime
import re
import argparse
import html
import sys
SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__))
DOMAIN_URL = "https://patate.dev"
PATHS = {
"images_src": SCRIPT_DIR,
"images_dest": os.path.abspath(os.path.join(SCRIPT_DIR, "../images")),
"pages": os.path.abspath(os.path.join(SCRIPT_DIR, "../pages")),
"rss": os.path.abspath(os.path.join(SCRIPT_DIR, "../rss")),
"sitemap": os.path.abspath(os.path.join(SCRIPT_DIR, "../sitemap.xml")),
"blog_index": os.path.abspath(os.path.join(SCRIPT_DIR, "../pages/blog.html")),
"header": os.path.abspath(os.path.join(SCRIPT_DIR, "../pages/header.html")),
"footer": os.path.abspath(os.path.join(SCRIPT_DIR, "../pages/footer.html"))
}
class BlogGenerator:
def __init__(self, filepath):
self.filepath = filepath
self.filename = os.path.basename(filepath)
self.content_lines = []
self.metadata = {
"foldername": "",
"title": "",
"description": "",
"date_str": "",
"date_obj": None,
"filename_html": ""
}
def run(self):
print(f"Processing {self.filename}...")
if not os.path.exists(PATHS["header"]):
print(f"CRITICAL ERROR: header.html not found at {PATHS['header']}")
return
self.parse_file()
missing_fields = []
if not self.metadata["foldername"]: missing_fields.append("[foldername]")
if not self.metadata["title"]: missing_fields.append("[title]")
if not self.metadata["description"]: missing_fields.append("[description]")
if not self.metadata["date_str"]: missing_fields.append("[date]")
if missing_fields:
print("Error: The following mandatory fields are missing from your text file:")
for field in missing_fields:
print(f" - {field}")
print("Aborting generation.")
return
self.write_html_output()
self.handle_images()
self.update_blog_index()
self.update_rss()
self.update_sitemap()
print("Done!")
def parse_date(self, date_str):
clean_date = date_str.strip()
try:
return datetime.datetime.strptime(clean_date, "%b %d, %Y")
except ValueError:
print(f"Warning: Could not parse date '{clean_date}'. Defaulting to NOW.")
return datetime.datetime.now()
def highlight_code(self, code):
"""Simple regex-based syntax highlighter for C/C++/Python/Rust."""
code = html.escape(code)
code = re.sub(r'(&quot;.*?&quot;)', r'<span class="str">\1</span>', code)
code = re.sub(r"('.*?')", r'<span class="str">\1</span>', code)
code = re.sub(r'(//.*)', r'<span class="com">\1</span>', code)
code = re.sub(r'(#.*)', r'<span class="com">\1</span>', code)
keywords = [
"int", "void", "char", "float", "double", "struct", "class",
"if", "else", "while", "for", "return", "switch", "case", "break",
"def", "import", "from", "fn", "let", "mut", "pub", "impl", "use",
"const", "static", "unsigned", "long", "true", "false", "NULL", "nullptr"
]
for kw in keywords:
pattern = r'\b(' + kw + r')\b(?![^<]*>)'
code = re.sub(pattern, r'<span class="kwd">\1</span>', code)
code = re.sub(r'\b(0x[0-9a-fA-F]+|\d+)\b(?![^<]*>)', r'<span class="num">\1</span>', code)
return code
def process_inline_tags(self, text):
text = re.sub(r'\[data\](.*?)\[data\]',
r'<code class="highlighter-rouge">\1</code>', text)
def replace_link(match):
url = match.group(1)
label = match.group(2)
return f'<a href="{url}" target="_blank">{label}</a>'
text = re.sub(r'\[link\]\[(.*?)\](.*?)\[link\]', replace_link, text)
return text
def get_template_content(self, path):
if os.path.exists(path):
with open(path, "r", encoding="utf-8") as f:
return f.read()
else:
print(f"Warning: Template file not found at {path}")
return f""
def parse_file(self):
with open(self.filepath, "r", encoding="utf-8") as f:
lines = f.readlines()
is_list = False
in_code_block = False
code_buffer = []
html_buffer = []
header_content = self.get_template_content(PATHS["header"])
html_buffer.append(header_content)
html_buffer.append('<article aria-label="Content" itemscope itemtype="http://schema.org/BlogPosting">')
for line in lines:
if line.strip().startswith("[code]"):
if in_code_block:
raw_code = "".join(code_buffer).strip()
highlighted_code = self.highlight_code(raw_code)
html_buffer.append(f'<pre><code>{highlighted_code}</code></pre>')
code_buffer = []
in_code_block = False
else:
in_code_block = True
continue
if in_code_block:
code_buffer.append(line)
continue
line = line.strip()
if not line: continue
if line.startswith("[foldername]"):
val = line.replace("[foldername]", "").strip()
self.metadata["foldername"] = val
self.metadata["filename_html"] = val + ".html"
elif line.startswith("[title]"):
val = line.replace("[title]", "").strip()
self.metadata["title"] = val
html_buffer.append(f'<h1 itemprop="name headline">{val}</h1>')
elif line.startswith("[description]"):
val = line.replace("[description]", "").strip()
self.metadata["description"] = val
elif line.startswith("[date]"):
val = line.replace("[date]", "").strip()
self.metadata["date_str"] = val
self.metadata["date_obj"] = self.parse_date(val)
html_buffer.append(f'<time class="mono"> {val}</time>')
html_buffer.append('<main itemprop="articleBody" style="position: relative">')
elif line.startswith("[section]"):
val = line.replace("[section]", "").strip()
anchor = val.replace(" ", "-")
html_buffer.append(f'<h2 id="{anchor}"><a href="#{anchor}">{val}</a></h2>')
elif line.startswith("[image]"):
val = line.replace("[image]", "").strip()
img_path = f'../images/{self.metadata["foldername"]}/{val}'
html_buffer.append(f'<p><img class="center_image" src="{img_path}" alt="" /></p>')
elif line.startswith("[list]"):
parts = line.split("[list]")
items = [p for p in parts if p.strip()]
if not is_list:
html_buffer.append("<ul>")
is_list = True
for item in items:
parsed_item = self.process_inline_tags(item)
html_buffer.append(f"<li>{parsed_item}</li>")
elif line.startswith("[endlist]"):
if is_list:
html_buffer.append("</ul>")
is_list = False
else:
if is_list and not line.startswith("[list]"):
html_buffer.append("</ul>")
is_list = False
processed_line = self.process_inline_tags(line)
html_buffer.append(f"<p>{processed_line}</p>")
html_buffer.append('</main></article>')
footer_content = self.get_template_content(PATHS["footer"])
html_buffer.append(footer_content)
if "</body>" not in footer_content:
html_buffer.append('</body>')
if "</html>" not in footer_content:
html_buffer.append('</html>')
self.content_lines = html_buffer
def write_html_output(self):
output_path = os.path.join(PATHS["pages"], self.metadata["filename_html"])
with open(output_path, "w", encoding="utf-8") as f:
f.write("\n".join(self.content_lines))
print(f"Generated page: {output_path}")
def handle_images(self):
target_dir = os.path.join(PATHS["images_dest"], self.metadata["foldername"])
if not os.path.exists(target_dir):
os.makedirs(target_dir)
source_dir = os.path.dirname(os.path.abspath(self.filepath))
for f in os.listdir(source_dir):
if f.lower().endswith(('.png', '.jpg', '.jpeg', '.gif', '.ico')):
shutil.copy(os.path.join(source_dir, f), os.path.join(target_dir, f))
def update_blog_index(self):
marker = ""
entry = f'\t\t<li><a href="{self.metadata["filename_html"]}" class="article">{self.metadata["title"]}</a></li>'
print(f"Updating {PATHS['blog_index']}...")
try:
with open(PATHS["blog_index"], "r", encoding="utf-8") as f:
content = f.read()
if self.metadata["filename_html"] in content and self.metadata["title"] in content:
print("Link already exists in blog.html. Skipping index update.")
return
marker_index = content.find(marker)
if marker_index == -1:
print(f"Warning: '{marker}' marker not found in blog.html")
return
ul_start_index = content.find("<ul>", marker_index)
if ul_start_index == -1:
print("Warning: No <ul> found after the Articles marker.")
return
insertion_point = ul_start_index + len("<ul>")
new_content = content[:insertion_point] + "\n" + entry + content[insertion_point:]
with open(PATHS["blog_index"], "w", encoding="utf-8") as f:
f.write(new_content)
print("Blog index updated.")
except FileNotFoundError:
print(f"Error: Could not find {PATHS['blog_index']}")
def update_rss(self):
rss_file = os.path.join(PATHS["rss"], "feed.xml")
rss_date = self.metadata["date_obj"].strftime("%a, %d %b %Y %H:%M:%S +0000")
link = f"{DOMAIN_URL}/pages/{self.metadata['filename_html']}"
safe_title = html.escape(self.metadata['title'])
safe_desc = html.escape(self.metadata['description'])
new_item = f"""
<item>
<title>{safe_title}</title>
<link>{link}</link>
<guid>{link}</guid>
<pubDate>{rss_date}</pubDate>
<description>{safe_desc}</description>
</item>"""
content = ""
file_is_valid = False
if os.path.exists(rss_file):
with open(rss_file, "r", encoding="utf-8") as f:
content = f.read()
if content.strip() and "<rss" in content and "<channel>" in content:
file_is_valid = True
else:
print("Warning: RSS file exists but is empty or invalid. Re-creating.")
if not file_is_valid:
print(f"Creating new RSS file at {rss_file}")
content = f"""<?xml version="1.0" encoding="UTF-8" ?>
<rss version="2.0">
<channel>
<title>~Blog of a French coder~</title>
<link>{DOMAIN_URL}</link>
<description>Blog Posts</description>
{new_item}
</channel>
</rss>"""
with open(rss_file, "w", encoding="utf-8") as f:
f.write(content)
else:
if link in content:
print("RSS already contains this link. Skipping.")
else:
if "<item>" in content:
content, num_subs = re.subn(r'(<item>)', f"{new_item}\n\\1", content, count=1)
if num_subs == 0:
content = content.replace("</channel>", f"{new_item}\n</channel>")
else:
content = content.replace("</channel>", f"{new_item}\n</channel>")
with open(rss_file, "w", encoding="utf-8") as f:
f.write(content)
print("RSS feed updated.")
def update_sitemap(self):
today_full = datetime.datetime.now().strftime("%Y-%m-%dT%H:%M:%S+00:00")
new_loc = f"{DOMAIN_URL}/pages/{self.metadata['filename_html']}"
new_url_entry = f"""
<url>
<loc>{new_loc}</loc>
<lastmod>{today_full}</lastmod>
<priority>0.64</priority>
</url>"""
with open(PATHS["sitemap"], "r", encoding="utf-8") as f:
content = f.read()
if new_loc in content:
print("Sitemap already contains this link.")
return
if "</urlset>" in content:
content = content.replace("</urlset>", f"{new_url_entry}\n</urlset>")
with open(PATHS["sitemap"], "w", encoding="utf-8") as f:
f.write(content)
if __name__ == "__main__":
for p in [PATHS["images_dest"], PATHS["rss"]]:
if not os.path.exists(p):
os.makedirs(p)
parser = argparse.ArgumentParser(description="Generate a blog post from a template file.")
parser.add_argument("filename", help="Path to the article text file")
args = parser.parse_args()
if os.path.isfile(args.filename):
generator = BlogGenerator(args.filename)
generator.run()
else:
print(f"Error: File '{args.filename}' not found.")