# python #!/usr/bin/env python3 import re import csv import argparse from pathlib import Path from markdown import markdown FRONTMATTER_RE = re.compile(r"^---\s*\n(.*?)\n---\s*\n", re.S) FENCE_RE = re.compile(r"^```([^\n]*)\n(.*?)\n```", re.S | re.M) DATE_DIR_RE = re.compile(r"\d{4}-\d{2}-\d{2}") def find_date(path: Path): for p in path.parents: if DATE_DIR_RE.fullmatch(p.name): return p.name return "" def parse_frontmatter(text: str): m = FRONTMATTER_RE.match(text) if not m: return {"tags": [], "date": "", "maxpoints": ""}, text fm_raw = m.group(1) rest = text[m.end():] tags = [] # simple tags parsing: look for lines under "tags:" if re.search(r"^\s*tags\s*:\s*$", fm_raw, re.M): in_tags = False for line in fm_raw.splitlines(): if re.match(r"^\s*tags\s*:\s*$", line): in_tags = True continue if in_tags: mm = re.match(r"^\s*-\s*(.+)$", line) if mm: tags.append(mm.group(1).strip()) else: break # also try single-line tags: tags: [a, b] if not tags: # handle single-line tags like: tags: [a, b] idx = fm_raw.find("tags:") if idx != -1: # look for first '[' and ']' after the 'tags:' token on the same or next line br_start = fm_raw.find("[", idx) br_end = fm_raw.find("]", br_start + 1) if br_start != -1 else -1 if br_start != -1 and br_end != -1: inner = fm_raw[br_start+1:br_end] tags = [t.strip().strip('"\'') for t in inner.split(",") if t.strip()] # parse date from frontmatter if present date_val = "" mdate = re.search(r"^date\s*:\s*(.+)$", fm_raw, re.M) if mdate: date_val = mdate.group(1).strip().strip('"\'') # parse maxpoints (accept either maxpoints or max_points) maxpoints_val = "" mmax = re.search(r"^(?:max_points|maxpoints)\s*:\s*(.+)$", fm_raw, re.M) if mmax: maxpoints_val = mmax.group(1).strip().strip('"\'') return {"tags": tags, "date": date_val, "maxpoints": maxpoints_val}, rest def extract_question_answer(body: str): # find first fenced block (prefer spoiler) fences = list(FENCE_RE.finditer(body)) spoiler = None for f in fences: info = (f.group(1) or "").lower() if "spoiler" in info: spoiler = f break if spoiler is None and fences: spoiler = fences[0] if spoiler: answer = spoiler.group(2).strip() question = (body[:spoiler.start()]).strip() else: # fallback: everything is question, no answer answer = "" question = body.strip() return question, answer def _plain_text_from_md(md_text: str) -> str: """Simple cleanup to produce plain text from markdown for CSV/Excel export. Removes fenced code blocks, basic markdown punctuation, list markers, and collapses whitespace. This is intentionally lightweight (not a full markdown->text renderer) but good enough for spreadsheet import. """ # remove fenced code blocks txt = re.sub(r"```.*?```", "", md_text, flags=re.S) # remove inline code markers and emphasis/headers txt = re.sub(r"[#*_`]+", "", txt) # remove list markers at start of lines txt = re.sub(r"^\s*-\s+", "", txt, flags=re.M) # collapse whitespace to single spaces txt = re.sub(r"\s+", " ", txt) return txt.strip() def main(root: Path, out: Path, mode: str = "anki"): rows = [] for md in root.rglob("*.md"): # process each markdown file if len(md.stem) > 2: continue text = md.read_text(encoding="utf-8") fm, body = parse_frontmatter(text) date = fm.get("date") or find_date(md.parent) qnum = md.stem tags = fm.get("tags", []) maxpoints = fm.get("maxpoints", "") # choose first tag that's not biokemi or provfråga category = "" for t in tags: if t and t.lower() not in ("biokemi", "provfråga"): category = t break question, answer = extract_question_answer(body) # keep original markdown (preserve line breaks) so markdown can render properly question_md = question.strip() answer_md = answer.strip() question_md = question_md.replace("**Uppgift**", "") question_md = question_md.replace("**Rätt svar**", "") question_md = question_md.replace("**Svar**", "") question_md = question_md.replace("**Answer**", "") if mode == "excel": # produce plain-text (no HTML) rows: date;question_number;question;category;maxpoints;question;answer date_val = date or "" q_plain = _plain_text_from_md(question_md) a_plain = _plain_text_from_md(answer_md) # columns: date;question_number;question;category;maxpoints;question;answer # move category to be the third column as requested: date;question_number;category;question;maxpoints;question;answer rows.append((date_val, qnum, category, q_plain, maxpoints, q_plain, a_plain)) continue # Render question and answer markdown to HTML. Enable common extensions. question_html = markdown(question_md, extensions=["fenced_code", "tables"]) answer_html = markdown(answer_md, extensions=["fenced_code", "tables"]) # metadata as simple HTML paragraphs so CSV consumer can display it meta_html = f"
kategory: {category}
prov: {date}
fråga: {qnum}
" # second column contains the rendered answer followed by metadata HTML details = answer_html + "\n\n" + meta_html # construct Anki deck name: use category if present, otherwise fallback to 'FÖRELÄSNING' deck_suffix = category if category else "?" deck_name = f"Biokemi::Johan D - Tentafrågor::{deck_suffix.capitalize()}" # For Anki export, include deck as first column rows.append((question_html, details, category, deck_name)) # write CSV with semicolon delimiter out.parent.mkdir(parents=True, exist_ok=True) with out.open("w", encoding="utf-8", newline="") as f: f.write("#deck column:4\n") writer = csv.writer(f, delimiter=";", quoting=csv.QUOTE_ALL) # If exporting for Excel, add a header row matching the columns if mode == "excel": writer.writerow(["date", "question_number", "category", "question", "maxpoints", "question", "answer"]) for r in rows: writer.writerow(r) if __name__ == "__main__": ap = argparse.ArgumentParser(description="Extract questions+answers to CSV") ap.add_argument("root", nargs="?", default="content", help="root content folder (default: content)") ap.add_argument("-o", "--out", default="output.csv", help="output CSV file (default: output.csv)") ap.add_argument("--mode", choices=["anki", "excel"], default="anki", help="output mode: 'anki' (default) or 'excel' (date;question_number;question;category;maxpoints;question;answer)") args = ap.parse_args() main(Path(args.root), Path(args.out), args.mode)