import dataclasses import pathlib import frontmatter import mistune from quiz.utils.obsidian_embed_plugin import obsidian_embed markdown = mistune.create_markdown(renderer="ast", plugins=[obsidian_embed]) class Node: def __init__(self, token): self.type = token["type"] self.raw = token.get("raw", "") self.attrs = token.get("attrs", {}) self.children = [Node(token=child) for child in token.get("children", [])] def __repr__(self) -> str: attrs = [] if self.raw: attrs.append(f"raw={self.raw!r}") if self.attrs: attrs.append(f"attrs={self.attrs!r}") if self.children: attrs.append(f"children={self.children!r}") # block_text -> BlockText pretty = self.type.replace("_", " ").title().replace(" ", "") return f"{pretty}(" + ", ".join(attrs) + ")" @property def text(self) -> str: if self.type == "text": return self.raw texts = [] for child in self.children: texts.append(child.text) return "".join(texts) @dataclasses.dataclass class ParsedQuestion: metadata: dict = dataclasses.field(default_factory=dict) raw_content: str = "" nodes: list[Node] = dataclasses.field(default_factory=list) def parse_question(path: pathlib.Path): raw = path.read_text(encoding="utf-8") return parse_question_from_content(raw) def parse_question_from_content(content_str: str): """Parse question from a content string instead of a file.""" metadata, content = frontmatter.parse(content_str) tokens = markdown(content) question = ParsedQuestion( metadata=metadata, raw_content=content, nodes=[Node(token=token) for token in tokens], ) return question def main(): root = pathlib.Path(__file__).parent.parent.parent.parent print(root) exams = root / "content" / "Anatomi & Histologi 2" / "Gamla tentor" for file in sorted(exams.glob("*/*.md")): if len(file.stem) > 2: continue question = parse_question(file) print(question.metadata, repr(question.raw_content)) continue for node in question.nodes: match node.type: case "heading": print("Heading:", repr(node.text)) case "paragraph": print("Paragraph:", repr(node.text)) case "list": print("List:") for child in node.children: print(" - List item:", repr(child.text)) case "block_code" if node.attrs["info"] == "spoiler-block:": print("Spoiler:", repr(node.raw.rstrip())) if __name__ == "__main__": main()