362 lines
13 KiB
Python
362 lines
13 KiB
Python
import os
|
|
import shutil
|
|
import datetime
|
|
import re
|
|
import argparse
|
|
import html
|
|
import sys
|
|
|
|
SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__))
|
|
DOMAIN_URL = "https://patate.dev"
|
|
|
|
PATHS = {
|
|
"images_src": SCRIPT_DIR,
|
|
"images_dest": os.path.abspath(os.path.join(SCRIPT_DIR, "../images")),
|
|
"pages": os.path.abspath(os.path.join(SCRIPT_DIR, "../pages")),
|
|
"rss": os.path.abspath(os.path.join(SCRIPT_DIR, "../rss")),
|
|
"sitemap": os.path.abspath(os.path.join(SCRIPT_DIR, "../sitemap.xml")),
|
|
"blog_index": os.path.abspath(os.path.join(SCRIPT_DIR, "../pages/blog.html")),
|
|
"header": os.path.abspath(os.path.join(SCRIPT_DIR, "../pages/header.html")),
|
|
"footer": os.path.abspath(os.path.join(SCRIPT_DIR, "../pages/footer.html"))
|
|
}
|
|
|
|
class BlogGenerator:
|
|
def __init__(self, filepath):
|
|
self.filepath = filepath
|
|
self.filename = os.path.basename(filepath)
|
|
self.content_lines = []
|
|
self.metadata = {
|
|
"foldername": "",
|
|
"title": "",
|
|
"description": "",
|
|
"date_str": "",
|
|
"date_obj": None,
|
|
"filename_html": ""
|
|
}
|
|
|
|
def run(self):
|
|
print(f"Processing {self.filename}...")
|
|
|
|
if not os.path.exists(PATHS["header"]):
|
|
print(f"CRITICAL ERROR: header.html not found at {PATHS['header']}")
|
|
return
|
|
|
|
self.parse_file()
|
|
missing_fields = []
|
|
if not self.metadata["foldername"]: missing_fields.append("[foldername]")
|
|
if not self.metadata["title"]: missing_fields.append("[title]")
|
|
if not self.metadata["description"]: missing_fields.append("[description]")
|
|
if not self.metadata["date_str"]: missing_fields.append("[date]")
|
|
|
|
if missing_fields:
|
|
print("Error: The following mandatory fields are missing from your text file:")
|
|
for field in missing_fields:
|
|
print(f" - {field}")
|
|
print("Aborting generation.")
|
|
return
|
|
|
|
self.write_html_output()
|
|
self.handle_images()
|
|
self.update_blog_index()
|
|
self.update_rss()
|
|
self.update_sitemap()
|
|
print("Done!")
|
|
|
|
def parse_date(self, date_str):
|
|
clean_date = date_str.strip()
|
|
try:
|
|
return datetime.datetime.strptime(clean_date, "%b %d, %Y")
|
|
except ValueError:
|
|
print(f"Warning: Could not parse date '{clean_date}'. Defaulting to NOW.")
|
|
return datetime.datetime.now()
|
|
|
|
def highlight_code(self, code):
|
|
"""Simple regex-based syntax highlighter for C/C++/Python/Rust."""
|
|
code = html.escape(code)
|
|
|
|
code = re.sub(r'(".*?")', r'<span class="str">\1</span>', code)
|
|
code = re.sub(r"('.*?')", r'<span class="str">\1</span>', code)
|
|
code = re.sub(r'(//.*)', r'<span class="com">\1</span>', code)
|
|
code = re.sub(r'(#.*)', r'<span class="com">\1</span>', code)
|
|
|
|
keywords = [
|
|
"int", "void", "char", "float", "double", "struct", "class",
|
|
"if", "else", "while", "for", "return", "switch", "case", "break",
|
|
"def", "import", "from", "fn", "let", "mut", "pub", "impl", "use",
|
|
"const", "static", "unsigned", "long", "true", "false", "NULL", "nullptr"
|
|
]
|
|
|
|
for kw in keywords:
|
|
pattern = r'\b(' + kw + r')\b(?![^<]*>)'
|
|
code = re.sub(pattern, r'<span class="kwd">\1</span>', code)
|
|
|
|
code = re.sub(r'\b(0x[0-9a-fA-F]+|\d+)\b(?![^<]*>)', r'<span class="num">\1</span>', code)
|
|
return code
|
|
|
|
def process_inline_tags(self, text):
|
|
text = re.sub(r'\[data\](.*?)\[data\]',
|
|
r'<code class="highlighter-rouge">\1</code>', text)
|
|
|
|
def replace_link(match):
|
|
url = match.group(1)
|
|
label = match.group(2)
|
|
return f'<a href="{url}" target="_blank">{label}</a>'
|
|
text = re.sub(r'\[link\]\[(.*?)\](.*?)\[link\]', replace_link, text)
|
|
|
|
return text
|
|
|
|
def get_template_content(self, path):
|
|
if os.path.exists(path):
|
|
with open(path, "r", encoding="utf-8") as f:
|
|
return f.read()
|
|
else:
|
|
print(f"Warning: Template file not found at {path}")
|
|
return f""
|
|
|
|
def parse_file(self):
|
|
with open(self.filepath, "r", encoding="utf-8") as f:
|
|
lines = f.readlines()
|
|
|
|
is_list = False
|
|
in_code_block = False
|
|
code_buffer = []
|
|
html_buffer = []
|
|
|
|
header_content = self.get_template_content(PATHS["header"])
|
|
html_buffer.append(header_content)
|
|
|
|
html_buffer.append('<article aria-label="Content" itemscope itemtype="http://schema.org/BlogPosting">')
|
|
|
|
for line in lines:
|
|
if line.strip().startswith("[code]"):
|
|
if in_code_block:
|
|
raw_code = "".join(code_buffer).strip()
|
|
highlighted_code = self.highlight_code(raw_code)
|
|
html_buffer.append(f'<pre><code>{highlighted_code}</code></pre>')
|
|
code_buffer = []
|
|
in_code_block = False
|
|
else:
|
|
in_code_block = True
|
|
continue
|
|
|
|
if in_code_block:
|
|
code_buffer.append(line)
|
|
continue
|
|
|
|
line = line.strip()
|
|
if not line: continue
|
|
|
|
if line.startswith("[foldername]"):
|
|
val = line.replace("[foldername]", "").strip()
|
|
self.metadata["foldername"] = val
|
|
self.metadata["filename_html"] = val + ".html"
|
|
|
|
elif line.startswith("[title]"):
|
|
val = line.replace("[title]", "").strip()
|
|
self.metadata["title"] = val
|
|
html_buffer.append(f'<h1 itemprop="name headline">{val}</h1>')
|
|
|
|
elif line.startswith("[description]"):
|
|
val = line.replace("[description]", "").strip()
|
|
self.metadata["description"] = val
|
|
|
|
elif line.startswith("[date]"):
|
|
val = line.replace("[date]", "").strip()
|
|
self.metadata["date_str"] = val
|
|
self.metadata["date_obj"] = self.parse_date(val)
|
|
html_buffer.append(f'<time class="mono"> {val}</time>')
|
|
html_buffer.append('<main itemprop="articleBody" style="position: relative">')
|
|
|
|
elif line.startswith("[section]"):
|
|
val = line.replace("[section]", "").strip()
|
|
anchor = val.replace(" ", "-")
|
|
html_buffer.append(f'<h2 id="{anchor}"><a href="#{anchor}">{val}</a></h2>')
|
|
|
|
elif line.startswith("[image]"):
|
|
val = line.replace("[image]", "").strip()
|
|
img_path = f'../images/{self.metadata["foldername"]}/{val}'
|
|
html_buffer.append(f'<p><img class="center_image" src="{img_path}" alt="" /></p>')
|
|
|
|
elif line.startswith("[list]"):
|
|
parts = line.split("[list]")
|
|
items = [p for p in parts if p.strip()]
|
|
|
|
if not is_list:
|
|
html_buffer.append("<ul>")
|
|
is_list = True
|
|
|
|
for item in items:
|
|
parsed_item = self.process_inline_tags(item)
|
|
html_buffer.append(f"<li>{parsed_item}</li>")
|
|
|
|
elif line.startswith("[endlist]"):
|
|
if is_list:
|
|
html_buffer.append("</ul>")
|
|
is_list = False
|
|
|
|
else:
|
|
if is_list and not line.startswith("[list]"):
|
|
html_buffer.append("</ul>")
|
|
is_list = False
|
|
|
|
processed_line = self.process_inline_tags(line)
|
|
html_buffer.append(f"<p>{processed_line}</p>")
|
|
|
|
html_buffer.append('</main></article>')
|
|
|
|
footer_content = self.get_template_content(PATHS["footer"])
|
|
html_buffer.append(footer_content)
|
|
|
|
if "</body>" not in footer_content:
|
|
html_buffer.append('</body>')
|
|
if "</html>" not in footer_content:
|
|
html_buffer.append('</html>')
|
|
|
|
self.content_lines = html_buffer
|
|
|
|
def write_html_output(self):
|
|
output_path = os.path.join(PATHS["pages"], self.metadata["filename_html"])
|
|
with open(output_path, "w", encoding="utf-8") as f:
|
|
f.write("\n".join(self.content_lines))
|
|
print(f"Generated page: {output_path}")
|
|
|
|
def handle_images(self):
|
|
target_dir = os.path.join(PATHS["images_dest"], self.metadata["foldername"])
|
|
if not os.path.exists(target_dir):
|
|
os.makedirs(target_dir)
|
|
|
|
source_dir = os.path.dirname(os.path.abspath(self.filepath))
|
|
|
|
for f in os.listdir(source_dir):
|
|
if f.lower().endswith(('.png', '.jpg', '.jpeg', '.gif', '.ico')):
|
|
shutil.copy(os.path.join(source_dir, f), os.path.join(target_dir, f))
|
|
|
|
def update_blog_index(self):
|
|
marker = ""
|
|
entry = f'\t\t<li><a href="{self.metadata["filename_html"]}" class="article">{self.metadata["title"]}</a></li>'
|
|
|
|
print(f"Updating {PATHS['blog_index']}...")
|
|
|
|
try:
|
|
with open(PATHS["blog_index"], "r", encoding="utf-8") as f:
|
|
content = f.read()
|
|
|
|
if self.metadata["filename_html"] in content and self.metadata["title"] in content:
|
|
print("Link already exists in blog.html. Skipping index update.")
|
|
return
|
|
|
|
marker_index = content.find(marker)
|
|
if marker_index == -1:
|
|
print(f"Warning: '{marker}' marker not found in blog.html")
|
|
return
|
|
|
|
ul_start_index = content.find("<ul>", marker_index)
|
|
if ul_start_index == -1:
|
|
print("Warning: No <ul> found after the Articles marker.")
|
|
return
|
|
|
|
insertion_point = ul_start_index + len("<ul>")
|
|
new_content = content[:insertion_point] + "\n" + entry + content[insertion_point:]
|
|
|
|
with open(PATHS["blog_index"], "w", encoding="utf-8") as f:
|
|
f.write(new_content)
|
|
print("Blog index updated.")
|
|
|
|
except FileNotFoundError:
|
|
print(f"Error: Could not find {PATHS['blog_index']}")
|
|
|
|
def update_rss(self):
|
|
rss_file = os.path.join(PATHS["rss"], "feed.xml")
|
|
rss_date = self.metadata["date_obj"].strftime("%a, %d %b %Y %H:%M:%S +0000")
|
|
link = f"{DOMAIN_URL}/pages/{self.metadata['filename_html']}"
|
|
|
|
safe_title = html.escape(self.metadata['title'])
|
|
safe_desc = html.escape(self.metadata['description'])
|
|
|
|
new_item = f"""
|
|
<item>
|
|
<title>{safe_title}</title>
|
|
<link>{link}</link>
|
|
<guid>{link}</guid>
|
|
<pubDate>{rss_date}</pubDate>
|
|
<description>{safe_desc}</description>
|
|
</item>"""
|
|
|
|
content = ""
|
|
file_is_valid = False
|
|
|
|
if os.path.exists(rss_file):
|
|
with open(rss_file, "r", encoding="utf-8") as f:
|
|
content = f.read()
|
|
if content.strip() and "<rss" in content and "<channel>" in content:
|
|
file_is_valid = True
|
|
else:
|
|
print("Warning: RSS file exists but is empty or invalid. Re-creating.")
|
|
|
|
if not file_is_valid:
|
|
print(f"Creating new RSS file at {rss_file}")
|
|
content = f"""<?xml version="1.0" encoding="UTF-8" ?>
|
|
<rss version="2.0">
|
|
<channel>
|
|
<title>~Blog of a French coder~</title>
|
|
<link>{DOMAIN_URL}</link>
|
|
<description>Blog Posts</description>
|
|
{new_item}
|
|
</channel>
|
|
</rss>"""
|
|
with open(rss_file, "w", encoding="utf-8") as f:
|
|
f.write(content)
|
|
else:
|
|
if link in content:
|
|
print("RSS already contains this link. Skipping.")
|
|
else:
|
|
if "<item>" in content:
|
|
content, num_subs = re.subn(r'(<item>)', f"{new_item}\n\\1", content, count=1)
|
|
if num_subs == 0:
|
|
content = content.replace("</channel>", f"{new_item}\n</channel>")
|
|
else:
|
|
content = content.replace("</channel>", f"{new_item}\n</channel>")
|
|
|
|
with open(rss_file, "w", encoding="utf-8") as f:
|
|
f.write(content)
|
|
print("RSS feed updated.")
|
|
|
|
def update_sitemap(self):
|
|
today_full = datetime.datetime.now().strftime("%Y-%m-%dT%H:%M:%S+00:00")
|
|
new_loc = f"{DOMAIN_URL}/pages/{self.metadata['filename_html']}"
|
|
|
|
new_url_entry = f"""
|
|
<url>
|
|
<loc>{new_loc}</loc>
|
|
<lastmod>{today_full}</lastmod>
|
|
<priority>0.64</priority>
|
|
</url>"""
|
|
|
|
with open(PATHS["sitemap"], "r", encoding="utf-8") as f:
|
|
content = f.read()
|
|
|
|
if new_loc in content:
|
|
print("Sitemap already contains this link.")
|
|
return
|
|
|
|
if "</urlset>" in content:
|
|
content = content.replace("</urlset>", f"{new_url_entry}\n</urlset>")
|
|
with open(PATHS["sitemap"], "w", encoding="utf-8") as f:
|
|
f.write(content)
|
|
|
|
if __name__ == "__main__":
|
|
for p in [PATHS["images_dest"], PATHS["rss"]]:
|
|
if not os.path.exists(p):
|
|
os.makedirs(p)
|
|
|
|
parser = argparse.ArgumentParser(description="Generate a blog post from a template file.")
|
|
parser.add_argument("filename", help="Path to the article text file")
|
|
|
|
args = parser.parse_args()
|
|
|
|
if os.path.isfile(args.filename):
|
|
generator = BlogGenerator(args.filename)
|
|
generator.run()
|
|
else:
|
|
print(f"Error: File '{args.filename}' not found.")
|