feat: improved article generation, added code blocks

This commit is contained in:
2025-12-05 18:35:10 +01:00
parent ae757fbe70
commit 08a64ca6fc
5 changed files with 485 additions and 173 deletions

View File

@@ -1,181 +1,361 @@
"""
~foldername~template~foldername~
~titre~Test article~titre~
~date~ Jan 25, 2022~date~
~section~Test~section~
~data~data~data~
~link~[https://www.google.com]link to google~link~
~list~Test~list~
~endlist~
~image~serverlist.png~image~
"""
#TODO :
# - add code snippet
import os
from os import listdir
from os.path import isfile, join
import shutil
from tkinter import Tk
from tkinter.filedialog import askopenfilename
import datetime
import re
import argparse
import html
import sys
def write_file(text) :
with open("out.html", "a") as o :
o.write(text)
SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__))
DOMAIN_URL = "https://patate.dev"
def generate_base(title) :
base = """
<!--#include virtual="header.html" -->
<article aria-label="Content" itemscope itemtype="http://schema.org/BlogPosting">
<h1 itemprop="name headline">"""+title+"""</h1>
"""
write_file(base)
PATHS = {
"images_src": SCRIPT_DIR,
"images_dest": os.path.abspath(os.path.join(SCRIPT_DIR, "../images")),
"pages": os.path.abspath(os.path.join(SCRIPT_DIR, "../pages")),
"rss": os.path.abspath(os.path.join(SCRIPT_DIR, "../rss")),
"sitemap": os.path.abspath(os.path.join(SCRIPT_DIR, "../sitemap.xml")),
"blog_index": os.path.abspath(os.path.join(SCRIPT_DIR, "../pages/blog.html")),
"header": os.path.abspath(os.path.join(SCRIPT_DIR, "../pages/header.html")),
"footer": os.path.abspath(os.path.join(SCRIPT_DIR, "../pages/footer.html"))
}
def generate_date(date) :
base = """
<time class="mono"> """+date+"""</time>
class BlogGenerator:
def __init__(self, filepath):
self.filepath = filepath
self.filename = os.path.basename(filepath)
self.content_lines = []
self.metadata = {
"foldername": "",
"title": "",
"description": "",
"date_str": "",
"date_obj": None,
"filename_html": ""
}
def run(self):
print(f"Processing {self.filename}...")
if not os.path.exists(PATHS["header"]):
print(f"CRITICAL ERROR: header.html not found at {PATHS['header']}")
return
<main itemprop="articleBody" style="position: relative">
"""
self.parse_file()
missing_fields = []
if not self.metadata["foldername"]: missing_fields.append("[foldername]")
if not self.metadata["title"]: missing_fields.append("[title]")
if not self.metadata["description"]: missing_fields.append("[description]")
if not self.metadata["date_str"]: missing_fields.append("[date]")
write_file(base)
if missing_fields:
print("Error: The following mandatory fields are missing from your text file:")
for field in missing_fields:
print(f" - {field}")
print("Aborting generation.")
return
def generate_paragraph(p) :
base = "<p>"+p+"</p>"
write_file(base)
self.write_html_output()
self.handle_images()
self.update_blog_index()
self.update_rss()
self.update_sitemap()
print("Done!")
def generate_section(s) :
s_2 = s.replace(" ","-")
base = '''
<h2 id="'''+s_2+'''">
<a href="#'''+s_2+'''">'''+s+'''</a>
</h2>
'''
write_file(base)
def parse_date(self, date_str):
clean_date = date_str.strip()
try:
return datetime.datetime.strptime(clean_date, "%b %d, %Y")
except ValueError:
print(f"Warning: Could not parse date '{clean_date}'. Defaulting to NOW.")
return datetime.datetime.now()
def generate_end_file() :
base = '''
<p></p></main></article>
<!--#include virtual="footer.html" -->
</body>
</html>'''
write_file(base)
def highlight_code(self, code):
"""Simple regex-based syntax highlighter for C/C++/Python/Rust."""
code = html.escape(code)
code = re.sub(r'(&quot;.*?&quot;)', r'<span class="str">\1</span>', code)
code = re.sub(r"('.*?')", r'<span class="str">\1</span>', code)
code = re.sub(r'(//.*)', r'<span class="com">\1</span>', code)
code = re.sub(r'(#.*)', r'<span class="com">\1</span>', code)
def generate_image(path) :
base = '''
<p>
<img class="center_image" src="'''+path+'''" alt="" />
</p>
'''
write_file(base)
keywords = [
"int", "void", "char", "float", "double", "struct", "class",
"if", "else", "while", "for", "return", "switch", "case", "break",
"def", "import", "from", "fn", "let", "mut", "pub", "impl", "use",
"const", "static", "unsigned", "long", "true", "false", "NULL", "nullptr"
]
for kw in keywords:
pattern = r'\b(' + kw + r')\b(?![^<]*>)'
code = re.sub(pattern, r'<span class="kwd">\1</span>', code)
def set_foldername(name) :
try : os.mkdir("../images/"+name)
except: pass
onlyfiles = [f for f in listdir(".") if isfile(join(".", f))]
for f in onlyfiles :
if f.endswith(".jpg") or f.endswith(".jpeg") or f.endswith(".png") or f.endswith(".ico") or f.endswith(".gif") :
shutil.copy(f, "../images/"+name+"/"+f)
code = re.sub(r'\b(0x[0-9a-fA-F]+|\d+)\b(?![^<]*>)', r'<span class="num">\1</span>', code)
return code
def main() :
Tk().withdraw() # we don't want a full GUI, so keep the root window from appearing
filename = askopenfilename() # show an "Open" dialog box and return the path to the selected file
def process_inline_tags(self, text):
text = re.sub(r'\[data\](.*?)\[data\]',
r'<code class="highlighter-rouge">\1</code>', text)
def replace_link(match):
url = match.group(1)
label = match.group(2)
return f'<a href="{url}" target="_blank">{label}</a>'
text = re.sub(r'\[link\]\[(.*?)\](.*?)\[link\]', replace_link, text)
return text
name = ""
titre = ""
date = ""
was_list = False
with open(filename, "r") as a :
for line in a.readlines() :
if line.startswith("~foldername~") :
final_line = line.replace("~foldername~","").strip()
name = final_line
set_foldername(final_line)
def get_template_content(self, path):
if os.path.exists(path):
with open(path, "r", encoding="utf-8") as f:
return f.read()
else:
print(f"Warning: Template file not found at {path}")
return f""
elif line.startswith("~titre~") :
final_line = line.replace("~titre~","").strip()
titre = final_line
generate_base(final_line)
def parse_file(self):
with open(self.filepath, "r", encoding="utf-8") as f:
lines = f.readlines()
elif line.startswith("~date~") :
final_line = line.replace("~date~","").strip()
date = " " + final_line
generate_date(final_line)
is_list = False
in_code_block = False
code_buffer = []
html_buffer = []
elif line.startswith("~section~") :
final_line = line.replace("~section~","").strip()
generate_section(final_line)
elif line.startswith("~image~") :
final_line = line.replace("~image~","").strip()
final_line = "../images/"+name+"/" + final_line
generate_image(final_line)
header_content = self.get_template_content(PATHS["header"])
html_buffer.append(header_content)
html_buffer.append('<article aria-label="Content" itemscope itemtype="http://schema.org/BlogPosting">')
else :
if line.startswith("~endlist~"):
was_list = False
write_file("</ul>")
continue
if line.startswith("~list~") :
l = line.split("~list~")
res = ""
if not was_list :
res += "<ul>"
for w in l :
if not w == " " and not w == "" and not w == "\n":
res += "<li>"+w+"</li>"
was_list = True
line = res
if "~data~" in line :
l = line.split("~data~")
res = ""
for w in l :
if not w.startswith(" ") and not w.endswith(" ") and not "<li" in w :
res += '<code class="language-plaintext highlighter-rouge">'+w+'</code>'
else :
res += w
line = res
if "~link~" in line :
l = line.split("~link~")
res = ""
for w in l :
if not w.startswith(" ") and not w.endswith(" ") and not w == "":
link = w.split("]")
if len(link) < 2 :
continue
res += '<a href="'+link[0][1:]+'" target="_blank">'+link[1]+'</a>'
else :
res += w
line = res
if not was_list :
write_file("<p>" + line.strip() + "</p>")
else :
write_file(line.strip())
generate_end_file()
shutil.move("out.html", "../pages/"+name+".html")
base = '''\n\t\t<li><a href="'''+name+'''.html" class="article">'''+titre+'''</a></li>\n'''
with open("../pages/blog.html", "r+") as f :
lines = f.readlines()
for i, line in enumerate(lines):
if "<!--Articles-->" in line :
lines[i+1] = lines[i+1].strip() + base
f.seek(0)
for line in lines:
f.write(line)
if line.strip().startswith("[code]"):
if in_code_block:
raw_code = "".join(code_buffer).strip()
highlighted_code = self.highlight_code(raw_code)
html_buffer.append(f'<pre><code>{highlighted_code}</code></pre>')
code_buffer = []
in_code_block = False
else:
in_code_block = True
continue
if __name__ == "__main__" :
main()
if in_code_block:
code_buffer.append(line)
continue
line = line.strip()
if not line: continue
if line.startswith("[foldername]"):
val = line.replace("[foldername]", "").strip()
self.metadata["foldername"] = val
self.metadata["filename_html"] = val + ".html"
elif line.startswith("[title]"):
val = line.replace("[title]", "").strip()
self.metadata["title"] = val
html_buffer.append(f'<h1 itemprop="name headline">{val}</h1>')
elif line.startswith("[description]"):
val = line.replace("[description]", "").strip()
self.metadata["description"] = val
elif line.startswith("[date]"):
val = line.replace("[date]", "").strip()
self.metadata["date_str"] = val
self.metadata["date_obj"] = self.parse_date(val)
html_buffer.append(f'<time class="mono"> {val}</time>')
html_buffer.append('<main itemprop="articleBody" style="position: relative">')
elif line.startswith("[section]"):
val = line.replace("[section]", "").strip()
anchor = val.replace(" ", "-")
html_buffer.append(f'<h2 id="{anchor}"><a href="#{anchor}">{val}</a></h2>')
elif line.startswith("[image]"):
val = line.replace("[image]", "").strip()
img_path = f'../images/{self.metadata["foldername"]}/{val}'
html_buffer.append(f'<p><img class="center_image" src="{img_path}" alt="" /></p>')
elif line.startswith("[list]"):
parts = line.split("[list]")
items = [p for p in parts if p.strip()]
if not is_list:
html_buffer.append("<ul>")
is_list = True
for item in items:
parsed_item = self.process_inline_tags(item)
html_buffer.append(f"<li>{parsed_item}</li>")
elif line.startswith("[endlist]"):
if is_list:
html_buffer.append("</ul>")
is_list = False
else:
if is_list and not line.startswith("[list]"):
html_buffer.append("</ul>")
is_list = False
processed_line = self.process_inline_tags(line)
html_buffer.append(f"<p>{processed_line}</p>")
html_buffer.append('</main></article>')
footer_content = self.get_template_content(PATHS["footer"])
html_buffer.append(footer_content)
if "</body>" not in footer_content:
html_buffer.append('</body>')
if "</html>" not in footer_content:
html_buffer.append('</html>')
self.content_lines = html_buffer
def write_html_output(self):
output_path = os.path.join(PATHS["pages"], self.metadata["filename_html"])
with open(output_path, "w", encoding="utf-8") as f:
f.write("\n".join(self.content_lines))
print(f"Generated page: {output_path}")
def handle_images(self):
target_dir = os.path.join(PATHS["images_dest"], self.metadata["foldername"])
if not os.path.exists(target_dir):
os.makedirs(target_dir)
source_dir = os.path.dirname(os.path.abspath(self.filepath))
for f in os.listdir(source_dir):
if f.lower().endswith(('.png', '.jpg', '.jpeg', '.gif', '.ico')):
shutil.copy(os.path.join(source_dir, f), os.path.join(target_dir, f))
def update_blog_index(self):
marker = ""
entry = f'\t\t<li><a href="{self.metadata["filename_html"]}" class="article">{self.metadata["title"]}</a></li>'
print(f"Updating {PATHS['blog_index']}...")
try:
with open(PATHS["blog_index"], "r", encoding="utf-8") as f:
content = f.read()
if self.metadata["filename_html"] in content and self.metadata["title"] in content:
print("Link already exists in blog.html. Skipping index update.")
return
marker_index = content.find(marker)
if marker_index == -1:
print(f"Warning: '{marker}' marker not found in blog.html")
return
ul_start_index = content.find("<ul>", marker_index)
if ul_start_index == -1:
print("Warning: No <ul> found after the Articles marker.")
return
insertion_point = ul_start_index + len("<ul>")
new_content = content[:insertion_point] + "\n" + entry + content[insertion_point:]
with open(PATHS["blog_index"], "w", encoding="utf-8") as f:
f.write(new_content)
print("Blog index updated.")
except FileNotFoundError:
print(f"Error: Could not find {PATHS['blog_index']}")
def update_rss(self):
rss_file = os.path.join(PATHS["rss"], "feed.xml")
rss_date = self.metadata["date_obj"].strftime("%a, %d %b %Y %H:%M:%S +0000")
link = f"{DOMAIN_URL}/pages/{self.metadata['filename_html']}"
safe_title = html.escape(self.metadata['title'])
safe_desc = html.escape(self.metadata['description'])
new_item = f"""
<item>
<title>{safe_title}</title>
<link>{link}</link>
<guid>{link}</guid>
<pubDate>{rss_date}</pubDate>
<description>{safe_desc}</description>
</item>"""
content = ""
file_is_valid = False
if os.path.exists(rss_file):
with open(rss_file, "r", encoding="utf-8") as f:
content = f.read()
if content.strip() and "<rss" in content and "<channel>" in content:
file_is_valid = True
else:
print("Warning: RSS file exists but is empty or invalid. Re-creating.")
if not file_is_valid:
print(f"Creating new RSS file at {rss_file}")
content = f"""<?xml version="1.0" encoding="UTF-8" ?>
<rss version="2.0">
<channel>
<title>~Blog of a French coder~</title>
<link>{DOMAIN_URL}</link>
<description>Blog Posts</description>
{new_item}
</channel>
</rss>"""
with open(rss_file, "w", encoding="utf-8") as f:
f.write(content)
else:
if link in content:
print("RSS already contains this link. Skipping.")
else:
if "<item>" in content:
content, num_subs = re.subn(r'(<item>)', f"{new_item}\n\\1", content, count=1)
if num_subs == 0:
content = content.replace("</channel>", f"{new_item}\n</channel>")
else:
content = content.replace("</channel>", f"{new_item}\n</channel>")
with open(rss_file, "w", encoding="utf-8") as f:
f.write(content)
print("RSS feed updated.")
def update_sitemap(self):
today_full = datetime.datetime.now().strftime("%Y-%m-%dT%H:%M:%S+00:00")
new_loc = f"{DOMAIN_URL}/pages/{self.metadata['filename_html']}"
new_url_entry = f"""
<url>
<loc>{new_loc}</loc>
<lastmod>{today_full}</lastmod>
<priority>0.64</priority>
</url>"""
with open(PATHS["sitemap"], "r", encoding="utf-8") as f:
content = f.read()
if new_loc in content:
print("Sitemap already contains this link.")
return
if "</urlset>" in content:
content = content.replace("</urlset>", f"{new_url_entry}\n</urlset>")
with open(PATHS["sitemap"], "w", encoding="utf-8") as f:
f.write(content)
if __name__ == "__main__":
for p in [PATHS["images_dest"], PATHS["rss"]]:
if not os.path.exists(p):
os.makedirs(p)
parser = argparse.ArgumentParser(description="Generate a blog post from a template file.")
parser.add_argument("filename", help="Path to the article text file")
args = parser.parse_args()
if os.path.isfile(args.filename):
generator = BlogGenerator(args.filename)
generator.run()
else:
print(f"Error: File '{args.filename}' not found.")

Binary file not shown.

After

Width:  |  Height:  |  Size: 62 KiB

View File

@@ -1,11 +1,110 @@
~foldername~template~foldername~
~titre~Example~titre~
~date~Jan 25, 2022~date~
This is a paragraph
This is another one with ~data~data text~data~ and ~link~[https://www.youtube.com/watch?v=UaH8cAGdjzw]a link.~link~
~section~Title 1~section~
Paragraph of Title 1
~list~list element~list~
~list~~data~data element in list~data~~list~
~endlist~
~image~serverlist.png~image~
[foldername]my_new_post_another[foldername]
[title]My Article 2[title]
[description]hello its me\n uwu[description]
[date] Jan 25, 2025[date]
[section]Introduction[section]
[data]print("Hello World")[data]
[link][https://google.com]Link to Google[link]
[list]First item[list]
[list]Second item[list]
[endlist]
[image]screenshot.png[image]
[code]
int main(int argc, char* argv[])
{
const char* infile = nullptr;
bool marshalled = false;
const char* version = nullptr;
std::ostream* pyc_output = &std::cout;
std::ofstream out_file;
for (int arg = 1; arg < argc; ++arg) {
if (strcmp(argv[arg], "-o") == 0) {
if (arg + 1 < argc) {
const char* filename = argv[++arg];
out_file.open(filename, std::ios_base::out);
if (out_file.fail()) {
fprintf(stderr, "Error opening file '%s' for writing\n",
filename);
return 1;
}
pyc_output = &out_file;
} else {
fputs("Option '-o' requires a filename\n", stderr);
return 1;
}
} else if (strcmp(argv[arg], "-c") == 0) {
marshalled = true;
} else if (strcmp(argv[arg], "-v") == 0) {
if (arg + 1 < argc) {
version = argv[++arg];
} else {
fputs("Option '-v' requires a version\n", stderr);
return 1;
}
} else if (strcmp(argv[arg], "--help") == 0 || strcmp(argv[arg], "-h") == 0) {
fprintf(stderr, "Usage: %s [options] input.pyc\n\n", argv[0]);
fputs("Options:\n", stderr);
fputs(" -o <filename> Write output to <filename> (default: stdout)\n", stderr);
fputs(" -c Specify loading a compiled code object. Requires the version to be set\n", stderr);
fputs(" -v <x.y> Specify a Python version for loading a compiled code object\n", stderr);
fputs(" --help Show this help text and then exit\n", stderr);
return 0;
} else {
infile = argv[arg];
}
}
if (!infile) {
fputs("No input file specified\n", stderr);
return 1;
}
PycModule mod;
if (!marshalled) {
try {
mod.loadFromFile(infile);
} catch (std::exception& ex) {
fprintf(stderr, "Error loading file %s: %s\n", infile, ex.what());
return 1;
}
} else {
if (!version) {
fputs("Opening raw code objects requires a version to be specified\n", stderr);
return 1;
}
std::string s(version);
auto dot = s.find('.');
if (dot == std::string::npos || dot == s.size()-1) {
fputs("Unable to parse version string (use the format x.y)\n", stderr);
return 1;
}
int major = std::stoi(s.substr(0, dot));
int minor = std::stoi(s.substr(dot+1, s.size()));
mod.loadFromMarshalledFile(infile, major, minor);
}
if (!mod.isValid()) {
fprintf(stderr, "Could not load file %s\n", infile);
return 1;
}
const char* dispname = strrchr(infile, PATHSEP);
dispname = (dispname == NULL) ? infile : dispname + 1;
*pyc_output << "# Source Generated with Decompyle++\n";
formatted_print(*pyc_output, "# File: %s (Python %d.%d%s)\n\n", dispname,
mod.majorVer(), mod.minorVer(),
(mod.majorVer() < 3 && mod.isUnicode()) ? " Unicode" : "");
try {
decompyle(mod.code(), &mod, *pyc_output);
} catch (std::exception& ex) {
fprintf(stderr, "Error decompyling %s: %s\n", infile, ex.what());
return 1;
}
return 0;
}
[code]