vault backup: 2025-12-09 22:35:53
All checks were successful
Deploy Quartz site to GitHub Pages / build (push) Successful in 1m17s
All checks were successful
Deploy Quartz site to GitHub Pages / build (push) Successful in 1m17s
This commit is contained in:
@@ -1,9 +1,12 @@
|
||||
# python
|
||||
#!/usr/bin/env python3
|
||||
import re
|
||||
import csv
|
||||
import argparse
|
||||
from pathlib import Path
|
||||
|
||||
from markdown import markdown
|
||||
|
||||
FRONTMATTER_RE = re.compile(r"^---\s*\n(.*?)\n---\s*\n", re.S)
|
||||
FENCE_RE = re.compile(r"^```([^\n]*)\n(.*?)\n```", re.S | re.M)
|
||||
DATE_DIR_RE = re.compile(r"\d{4}-\d{2}-\d{2}")
|
||||
@@ -17,7 +20,7 @@ def find_date(path: Path):
|
||||
def parse_frontmatter(text: str):
|
||||
m = FRONTMATTER_RE.match(text)
|
||||
if not m:
|
||||
return {}, text
|
||||
return {"tags": [], "date": ""}, text
|
||||
fm_raw = m.group(1)
|
||||
rest = text[m.end():]
|
||||
tags = []
|
||||
@@ -36,10 +39,21 @@ def parse_frontmatter(text: str):
|
||||
break
|
||||
# also try single-line tags: tags: [a, b]
|
||||
if not tags:
|
||||
m2 = re.search(r"tags\s*:\s*\[([^\]]+)\]", fm_raw)
|
||||
if m2:
|
||||
tags = [t.strip() for t in m2.group(1).split(",")]
|
||||
return {"tags": tags}, rest
|
||||
# handle single-line tags like: tags: [a, b]
|
||||
idx = fm_raw.find("tags:")
|
||||
if idx != -1:
|
||||
# look for first '[' and ']' after the 'tags:' token on the same or next line
|
||||
br_start = fm_raw.find("[", idx)
|
||||
br_end = fm_raw.find("]", br_start + 1) if br_start != -1 else -1
|
||||
if br_start != -1 and br_end != -1:
|
||||
inner = fm_raw[br_start+1:br_end]
|
||||
tags = [t.strip().strip('"\'') for t in inner.split(",") if t.strip()]
|
||||
# parse date from frontmatter if present
|
||||
date_val = ""
|
||||
mdate = re.search(r"^date\s*:\s*(.+)$", fm_raw, re.M)
|
||||
if mdate:
|
||||
date_val = mdate.group(1).strip().strip('"\'')
|
||||
return {"tags": tags, "date": date_val}, rest
|
||||
|
||||
def extract_question_answer(body: str):
|
||||
# find first fenced block (prefer spoiler)
|
||||
@@ -64,29 +78,41 @@ def extract_question_answer(body: str):
|
||||
def main(root: Path, out: Path):
|
||||
rows = []
|
||||
for md in root.rglob("*.md"):
|
||||
rel = md.relative_to(root)
|
||||
date = find_date(md.parent)
|
||||
qnum = md.stem
|
||||
# process each markdown file
|
||||
if len(md.stem) > 2:
|
||||
continue
|
||||
text = md.read_text(encoding="utf-8")
|
||||
fm, body = parse_frontmatter(text)
|
||||
date = fm.get("date") or find_date(md.parent)
|
||||
qnum = md.stem
|
||||
tags = fm.get("tags", [])
|
||||
# choose first tag that's not biokemi or provfråga
|
||||
category = ""
|
||||
for t in tags:
|
||||
if t.lower() not in ("biokemi", "provfråga"):
|
||||
if t and t.lower() not in ("biokemi", "provfråga"):
|
||||
category = t
|
||||
break
|
||||
question, answer = extract_question_answer(body)
|
||||
# normalize whitespace
|
||||
question = re.sub(r"\s+", " ", question).strip()
|
||||
answer = re.sub(r"\s+", " ", answer).strip()
|
||||
details = f"{category}; {date} {qnum} {answer}"
|
||||
rows.append((question, details))
|
||||
# write CSV with semicolon delimiter and quoting
|
||||
# keep original markdown (preserve line breaks) so markdown can render properly
|
||||
question_md = question.strip()
|
||||
answer_md = answer.strip()
|
||||
|
||||
# Render question and answer markdown to HTML. Enable common extensions.
|
||||
question_html = markdown(question_md, extensions=["fenced_code", "tables"])
|
||||
answer_html = markdown(answer_md, extensions=["fenced_code", "tables"])
|
||||
|
||||
# metadata as simple HTML paragraphs so CSV consumer can display it
|
||||
meta_html = f"<p>kategory: {category}</p><p>prov: {date}</p><p>fråga: {qnum}</p>"
|
||||
|
||||
# second column contains the rendered answer followed by metadata HTML
|
||||
details = answer_html + "\n\n" + meta_html
|
||||
|
||||
rows.append((question_html, details, category))
|
||||
# write CSV with semicolon delimiter
|
||||
out.parent.mkdir(parents=True, exist_ok=True)
|
||||
with out.open("w", encoding="utf-8", newline="") as f:
|
||||
writer = csv.writer(f)
|
||||
writer.writerow(["question", "details"])
|
||||
writer = csv.writer(f, delimiter=";", quoting=csv.QUOTE_ALL)
|
||||
#writer.writerow(["fråga", "svar", "kategori"])
|
||||
for r in rows:
|
||||
writer.writerow(r)
|
||||
|
||||
|
||||
Reference in New Issue
Block a user