vault backup: 2025-12-23 10:55:37

2025-12-23 10:55:37 +01:00
parent 68079d368d
commit 3b2751808e
9 changed files with 877 additions and 368 deletions
--- a/content/.obsidian/workspace.json
+++ b/content/.obsidian/workspace.json
@@ -30,7 +30,7 @@
              "state": {
                "file": "Anatomi & Histologi 2/Statistik.md",
                "mode": "source",
-                "source": true,
+                "source": false,
                "backlinks": false
              },
              "icon": "lucide-file",
--- a/quiz/MATCHING_FORMAT.md
+++ b/quiz/MATCHING_FORMAT.md
@@ -1,18 +0,0 @@
 # Matching Questions Format Analysis
 Based on reviewing the 17 matching questions:
 ## Key Finding:
 Only **1 question has an answer** (2023-05-31/3.md), the rest have TODO.
 **That question uses this format:**
 - Two separate bullet lists
 - Answer: "ItemName: MatchName" format
 ## Proposed Implementation:
 1. Support two-list format (most flexible)
 2. Parse answer as "Item: Match" pairs
 3. Store as JSON with 0-indexed pairs
 4. Render as n×n table with radio buttons
 ## Next: Implement based on this one working example.
--- a/quiz/parse-markdown.py
+++ b/quiz/parse-markdown.py
@@ -1,15 +0,0 @@
 import pathlib
 import mistune
 markdown = mistune.create_markdown(renderer=None)
 root = pathlib.Path(__file__).parent.parent
 exams = root / "content" / "Anatomi & Histologi 2" / "Gamla tentor"
 print(exams.absolute())
 for file in sorted(exams.glob("*/*.md")):
    if len(file.stem) > 2:
        continue
    print(f"Parsing {file}")
    tokens = markdown(file.read_text(encoding="utf-8"))
    import pprint
    pprint.pprint(tokens)
--- a/quiz/pytest.ini
+++ b/quiz/pytest.ini
@@ -13,5 +13,6 @@ markers =
    admin: Admin interface tests
    import: Import and parsing tests
    import_tests: Import and parsing tests
    parser: Question parser tests
    slow: Slow running tests
--- a/quiz/quiz/tests/test_question_parser.py
+++ b/quiz/quiz/tests/test_question_parser.py
@@ -0,0 +1,537 @@
 """
 Comprehensive test suite for the question_parser module.
 This test suite uses pytest's parametrize decorator to test multiple scenarios
 with minimal code duplication. It covers:
 1. Node class:
   - Initialization with different token types
   - Attribute handling
   - Children node processing
   - String representation (__repr__)
   - Text extraction from nested structures
 2. parse_question function:
   - Metadata parsing (tags, dates, etc.)
   - Raw content extraction
   - Different question types (MCQ, SCQ, text field, matching)
   - Questions with images
   - Edge cases (empty content, missing frontmatter)
   - Document structure preservation
 3. ParsedQuestion dataclass:
   - Default values
   - Initialization with custom values
 4. Real exam questions:
   - Parsing actual exam questions from the content directory
   - Validation of all short-named question files
 Test execution:
    pytest tests/test_question_parser.py -v              # Verbose output
    pytest tests/test_question_parser.py -k "mcq"        # Run only MCQ tests
    pytest tests/test_question_parser.py --collect-only  # List all tests
 """
 import pathlib
 import tempfile
 import pytest
 from quiz.utils.question_parser import Node, ParsedQuestion, parse_question
@pytest.fixture
 def temp_dir():
    """Create a temporary directory for test files"""
    with tempfile.TemporaryDirectory() as tmpdir:
        yield pathlib.Path(tmpdir)
@pytest.fixture
 def create_question_file(temp_dir):
    """Factory fixture to create question files"""
    def _create_file(filename: str, content: str) -> pathlib.Path:
        file_path = temp_dir / filename
        file_path.write_text(content, encoding="utf-8")
        return file_path
    return _create_file
 class TestNode:
    """Test the Node class"""
    @pytest.mark.parametrize("token,expected_type,expected_raw", [
        ({"type": "paragraph"}, "paragraph", ""),
        ({"type": "heading", "raw": "Test Heading"}, "heading", "Test Heading"),
        ({"type": "text", "raw": "Some text"}, "text", "Some text"),
        ({"type": "list"}, "list", ""),
    ])
    def test_node_initialization(self, token, expected_type, expected_raw):
        """Test Node initialization with different token types"""
        node = Node(token)
        assert node.type == expected_type
        assert node.raw == expected_raw
    @pytest.mark.parametrize("token,expected_attrs", [
        ({"type": "block_code", "attrs": {"info": "spoiler-block:"}}, {"info": "spoiler-block:"}),
        ({"type": "paragraph"}, {}),
        ({"type": "heading", "attrs": {"level": 2}}, {"level": 2}),
    ])
    def test_node_attributes(self, token, expected_attrs):
        """Test Node attributes handling"""
        node = Node(token)
        assert node.attrs == expected_attrs
    def test_node_children(self):
        """Test Node children handling"""
        token = {
            "type": "paragraph",
            "children": [
                {"type": "text", "raw": "Hello "},
                {"type": "text", "raw": "World"},
            ]
        }
        node = Node(token)
        assert len(node.children) == 2
        assert node.children[0].type == "text"
        assert node.children[0].raw == "Hello "
        assert node.children[1].type == "text"
        assert node.children[1].raw == "World"
    @pytest.mark.parametrize("token,expected_repr_contains", [
        ({"type": "text", "raw": "test"}, "Text(raw='test')"),
        ({"type": "paragraph"}, "Paragraph()"),
        ({"type": "block_code", "attrs": {"info": "python"}}, "BlockCode(attrs={'info': 'python'})"),
    ])
    def test_node_repr(self, token, expected_repr_contains):
        """Test Node __repr__ method"""
        node = Node(token)
        assert repr(node) == expected_repr_contains
    @pytest.mark.parametrize("token,expected_text", [
        ({"type": "text", "raw": "Simple text"}, "Simple text"),
        (
            {
                "type": "paragraph",
                "children": [
                    {"type": "text", "raw": "Hello "},
                    {"type": "text", "raw": "World"},
                ]
            },
            "Hello World"
        ),
        (
            {
                "type": "paragraph",
                "children": [
                    {"type": "text", "raw": "Nested "},
                    {
                        "type": "strong",
                        "children": [{"type": "text", "raw": "bold"}]
                    },
                    {"type": "text", "raw": " text"},
                ]
            },
            "Nested bold text"
        ),
    ])
    def test_node_text_property(self, token, expected_text):
        """Test Node text property extraction"""
        node = Node(token)
        assert node.text == expected_text
 class TestParseQuestion:
    """Test the parse_question function"""
    @pytest.mark.parametrize("content,expected_tags", [
        (
            """---
 tags: [ah2, provfråga, frågetyp/mcq]
 date: 2022-01-15
 ---
 Question content""",
            ["ah2", "provfråga", "frågetyp/mcq"]
        ),
        (
            """---
 tags:
  - ah2
  - provfråga
  - frågetyp/scq
 date: 2023-05-31
 ---
 Question content""",
            ["ah2", "provfråga", "frågetyp/scq"]
        ),
    ])
    def test_parse_metadata_tags(self, create_question_file, content, expected_tags):
        """Test parsing of metadata tags in different formats"""
        file_path = create_question_file("test.md", content)
        question = parse_question(file_path)
        assert question.metadata["tags"] == expected_tags
    @pytest.mark.parametrize("content,expected_date", [
        (
            """---
 tags: [ah2]
 date: 2022-01-15
 ---
 Content""",
            "2022-01-15"
        ),
        (
            """---
 tags: [ah2]
 date: 2023-05-31
 ---
 Content""",
            "2023-05-31"
        ),
    ])
    def test_parse_metadata_date(self, create_question_file, content, expected_date):
        """Test parsing of metadata date"""
        file_path = create_question_file("test.md", content)
        question = parse_question(file_path)
        assert str(question.metadata["date"]) == expected_date
    @pytest.mark.parametrize("content,expected_raw", [
        (
            """---
 tags: [ah2]
 ---
 Simple question""",
            "Simple question"
        ),
        (
            """---
 tags: [ah2]
 ---
 Question with **bold** text""",
            "Question with **bold** text"
        ),
    ])
    def test_parse_raw_content(self, create_question_file, content, expected_raw):
        """Test parsing of raw content"""
        file_path = create_question_file("test.md", content)
        question = parse_question(file_path)
        assert question.raw_content.strip() == expected_raw
    def test_parse_mcq_question(self, create_question_file):
        """Test parsing a complete MCQ question"""
        content = """---
 tags: [ah2, provfråga, frågetyp/mcq, cerebrum]
 date: 2022-01-15
 ---
 Vilka av följande räknas till storhjärnans basala kärnor?
 **Välj två alternativ**
 - A: Putamen
 - B: Nucleus Ruber
 - C: Substantia nigra
 - D: Nucleus caudatus
 ```spoiler-block:
 A och D
 ```
 """
        file_path = create_question_file("mcq.md", content)
        question = parse_question(file_path)
        assert question.metadata["tags"] == ["ah2", "provfråga", "frågetyp/mcq", "cerebrum"]
        assert len(question.nodes) > 0
        # Find paragraph nodes
        paragraphs = [n for n in question.nodes if n.type == "paragraph"]
        assert len(paragraphs) > 0
        # Find list nodes
        lists = [n for n in question.nodes if n.type == "list"]
        assert len(lists) > 0
        # Find spoiler block
        code_blocks = [n for n in question.nodes if n.type == "block_code"]
        assert len(code_blocks) > 0
        spoiler = code_blocks[0]
        assert spoiler.attrs.get("info") == "spoiler-block:"
        assert "A och D" in spoiler.raw
    def test_parse_scq_question(self, create_question_file):
        """Test parsing a single choice question"""
        content = """---
 tags: [ah2, provfråga, frågetyp/scq, histologi]
 date: 2022-06-01
 ---
 Vilken del av CNS syns i bild?
 - A: Cerebellum
 - B: Diencephalon
 - C: Medulla spinalis
 - D: Cerebrum
 - E: Pons
 ```spoiler-block:
 A
 ```
 """
        file_path = create_question_file("scq.md", content)
        question = parse_question(file_path)
        assert "frågetyp/scq" in question.metadata["tags"]
        lists = [n for n in question.nodes if n.type == "list"]
        assert len(lists) > 0
    def test_parse_text_field_question(self, create_question_file):
        """Test parsing a text field question"""
        content = """---
 tags: [ah2, provfråga, frågetyp/textfält, öga, anatomi]
 date: 2022-01-15
 ---
 ![[image-2.png|301x248]]
 **Fyll i rätt siffra!**
 (0.5p per rätt svar, inga avdrag för fel svar):
 a) Vilken siffra pekar på gula fläcken?
 b) Vilken siffra pekar på choroidea?
 ```spoiler-block:
 a) 7
 b) 6
 ```
 """
        file_path = create_question_file("textfield.md", content)
        question = parse_question(file_path)
        assert "frågetyp/textfält" in question.metadata["tags"]
        assert len(question.nodes) > 0
    def test_parse_matching_question(self, create_question_file):
        """Test parsing a matching question"""
        content = """---
 tags: [ah2, provfråga, frågetyp/matching, histologi]
 date: 2023-05-31
 ---
 Vilka av följande stödjeceller finns i CNS? Markera JA eller NEJ för varje angiven celltyp:
 (1p för alla rätt, inga delpoäng)
 - a) oligodendrocyter
 - b) Astrocyter
 - c) satellitceller
 - d) ependymceller
 - e) mikroglia
 - f) Schwannceller
 - JA, finn i CNS
 - NEJ, finns inte i CNS 
 ```spoiler-block:
 a) JA, finn i CNS
 b) JA, finn i CNS
 c) NEJ, finns inte i CNS
 d) JA, finn i CNS
 e) JA, finn i CNS
 f) NEJ, finns inte i CNS
 ```
 """
        file_path = create_question_file("matching.md", content)
        question = parse_question(file_path)
        assert "frågetyp/matching" in question.metadata["tags"]
        lists = [n for n in question.nodes if n.type == "list"]
        assert len(lists) > 0
    def test_parse_question_with_image(self, create_question_file):
        """Test parsing a question with embedded images"""
        content = """---
 tags: [ah2, provfråga, frågetyp/textfält, öra, anatomi, bild]
 date: 2022-01-15
 ---
 ![[image-4.png|292x316]]
 **Fyll i rätt siffra !**
 (0.5p per rätt svar, inga avdrag för fel svar):
 a) Vilken siffra pekar på incus? (1..19)
 b) Vilken siffra pekar på tuba auditiva? (1..19)
 ```spoiler-block:
 a) 7
 b) 18
 ```
 """
        file_path = create_question_file("image_q.md", content)
        question = parse_question(file_path)
        assert "bild" in question.metadata["tags"]
        assert "![[image-4.png" in question.raw_content
        embed = question.nodes[0].children[0]
        assert embed.type == "embed"
        assert embed.attrs == {
            "filename": "image-4.png",
            "width": 292,
            "height": 316
        }
    @pytest.mark.parametrize("invalid_content", [
        "",  # Empty content
        "No frontmatter",  # No frontmatter
        "---\n---\n",  # Empty frontmatter
    ])
    def test_parse_edge_cases(self, create_question_file, invalid_content):
        """Test parsing edge cases"""
        file_path = create_question_file("edge.md", invalid_content)
        question = parse_question(file_path)
        assert isinstance(question, ParsedQuestion)
    def test_parse_question_preserves_structure(self, create_question_file):
        """Test that parsing preserves the document structure"""
        content = """---
 tags: [ah2]
 ---
 # Heading
 Paragraph text
 - List item 1
 - List item 2
 ```spoiler-block:
 Answer
 ```
 """
        file_path = create_question_file("structure.md", content)
        question = parse_question(file_path)
        node_types = [n.type for n in question.nodes]
        assert "heading" in node_types
        assert "paragraph" in node_types
        assert "list" in node_types
        assert "block_code" in node_types
 class TestParsedQuestionDataclass:
    """Test the ParsedQuestion dataclass"""
    def test_parsed_question_defaults(self):
        """Test ParsedQuestion default values"""
        question = ParsedQuestion()
        assert question.metadata == {}
        assert question.raw_content == ""
        assert question.nodes == []
    def test_parsed_question_initialization(self):
        """Test ParsedQuestion initialization with values"""
        metadata = {"tags": ["test"], "date": "2022-01-15"}
        content = "Test content"
        nodes = [Node({"type": "paragraph"})]
        question = ParsedQuestion(
            metadata=metadata,
            raw_content=content,
            nodes=nodes
        )
        assert question.metadata == metadata
        assert question.raw_content == content
        assert question.nodes == nodes
 class TestRealQuestions:
    """Test parsing real questions from the exam files"""
    @pytest.fixture
    def exam_dir(self):
        """Get the real exam directory"""
        root = pathlib.Path(__file__).parent.parent.parent
        exam_path = root / "content" / "Anatomi & Histologi 2" / "Gamla tentor"
        if exam_path.exists():
            return exam_path
        pytest.skip("Exam directory not found")
    @pytest.mark.parametrize("exam_date,question_num", [
        ("2022-01-15", "1"),
        ("2022-01-15", "2"),
        ("2022-01-15", "3"),
        ("2022-01-15", "4"),
        ("2022-06-01", "8"),
    ])
    def test_parse_real_exam_questions(self, exam_dir, exam_date, question_num):
        """Test parsing real exam questions"""
        file_path = exam_dir / exam_date / f"{question_num}.md"
        if not file_path.exists():
            pytest.skip(f"Question file {file_path} not found")
        question = parse_question(file_path)
        # Verify metadata exists and has required fields
        assert "tags" in question.metadata
        assert isinstance(question.metadata["tags"], list)
        assert "ah2" in question.metadata["tags"]
        assert "provfråga" in question.metadata["tags"]
        # Verify content was parsed
        assert len(question.raw_content) > 0
        assert len(question.nodes) > 0
    def test_parse_all_short_named_questions(self, exam_dir):
        """Test parsing all questions with short filenames (1-2 chars)"""
        questions_found = 0
        for file in sorted(exam_dir.glob("*/*.md")):
            if len(file.stem) <= 2 and file.stem.isdigit():
                question = parse_question(file)
                assert isinstance(question, ParsedQuestion)
                assert "tags" in question.metadata
                questions_found += 1
        # Ensure we found at least some questions
        assert questions_found > 0, "No exam questions found to test"
 class TestNodeTextExtraction:
    """Test text extraction from complex node structures"""
    @pytest.mark.parametrize("token,expected_text", [
        # Simple text
        ({"type": "text", "raw": "Hello"}, "Hello"),
        # Paragraph with multiple text children
        (
            {
                "type": "paragraph",
                "children": [
                    {"type": "text", "raw": "A "},
                    {"type": "text", "raw": "B "},
                    {"type": "text", "raw": "C"},
                ]
            },
            "A B C"
        ),
        # Nested formatting
        (
            {
                "type": "paragraph",
                "children": [
                    {"type": "text", "raw": "Normal "},
                    {
                        "type": "emphasis",
                        "children": [{"type": "text", "raw": "italic"}]
                    },
                    {"type": "text", "raw": " "},
                    {
                        "type": "strong",
                        "children": [{"type": "text", "raw": "bold"}]
                    },
                ]
            },
            "Normal italic bold"
        ),
        # Empty node
        ({"type": "paragraph", "children": []}, ""),
    ])
    def test_complex_text_extraction(self, token, expected_text):
        """Test text extraction from complex nested structures"""
        node = Node(token)
        assert node.text == expected_text
--- a/quiz/quiz/utils/importer.py
+++ b/quiz/quiz/utils/importer.py
@@ -1,8 +1,13 @@
 import re
 from pathlib import Path
 from collections import defaultdict
 from datetime import datetime
 from pathlib import Path
 from typing import Tuple
-from quiz.models import Question, Option
+
 from django.conf import settings
 from quiz.models import Course, Exam, Question, Option
 from quiz.utils.question_parser import parse_question_from_content, Node
 class ImportStats:
@@ -70,44 +75,188 @@ class ImportStats:
        return "\n".join(lines)
-def parse_matching_question(content: str) -> Tuple[bool, dict]:
+
 def parse_markdown_question(file_path: Path, content: str) -> Tuple[bool, dict]:
    """
-    Parse matching question from markdown.
+    Parse a markdown file and extract question data using the new question_parser.
    Returns:
        (is_mcq, question_data) where question_data contains:
        - text: question text
        - options: list of (letter, text) tuples
        - correct_answer: the correct answer letter(s)
        - has_answer: whether it has an answer (not TODO)
        - tags: list of tag strings
        - question_type: type of question (mcq, scq, matching, etc.)
    """
    # Parse from content string (works for both test cases and real files)
    parsed = parse_question_from_content(content)
    # Extract metadata
    metadata = parsed.metadata
    tags = metadata.get('tags', [])
    # Check for question type in tags
    question_type = None
    is_question = False
    for tag in tags:
        if 'frågetyp/' in tag:
            is_question = True
            if 'frågetyp/mcq' in tag:
                question_type = 'mcq'
            elif 'frågetyp/scq' in tag:
                question_type = 'scq'
            elif 'frågetyp/matching' in tag:
                question_type = 'matching'
            elif 'frågetyp/textalternativ' in tag:
                question_type = 'textalternativ'
            elif 'frågetyp/textfält' in tag:
                question_type = 'textfält'
    if not is_question:
        return False, {}
    # Handle matching questions separately
    if question_type == 'matching':
        return parse_matching_question_from_nodes(parsed.nodes, tags)
    # Extract question text from first paragraph (skip images and special instructions)
    question_text = None
    for node in parsed.nodes:
        if node.type != "paragraph":
            continue
        text = node.text.strip()
        # Skip empty paragraphs
        if not text:
            continue
        # Remove inline images from text first
        text = re.sub(r'!\[\[.*?\]\]', '', text).strip()
        # Skip if paragraph was only an image reference
        if not text:
            continue
        # Skip "Välj X alternativ" instructions
        if 'Välj' in text and 'alternativ' in text:
            continue
        # Clean up bold markers
        text = text.replace('**', '')
        if text:
            question_text = text
            break
    if not question_text:
        return True, {
            'text': None,
            'options': [],
            'correct_answer': '',
            'has_answer': False,
            'question_type': question_type,
            'tags': tags
        }
    # Extract options from list nodes
    options_data = []
    for node in parsed.nodes:
        if node.type != "list":
            continue
        for item in node.children:
            # Get the text of the list item
            if item.type != "list_item":
                continue
            item_text = item.text.strip()
            # Match "A: text" or just "A"
            match = re.match(r'^([A-Z]):\s*(.*)$', item_text)
            if match:
                letter = match.group(1)
                text = match.group(2).strip()
                options_data.append((letter, text))
            elif re.match(r'^([A-Z])$', item_text):
                letter = item_text
                options_data.append((letter, ''))
            elif question_type in ['textalternativ', 'textfält']:
                # For text-based questions, use incrementing letters
                if not re.match(r'^[a-z]\)', item_text):  # Skip sub-question markers
                    letter = chr(ord('A') + len(options_data))
                    options_data.append((letter, item_text))
    # For text-based questions, options are optional
    if not options_data:
        options_data = [('A', '')]
    elif len(options_data) < 2 and question_type in ['mcq', 'scq']:
        return True, {
            'text': question_text,
            'options': options_data,
            'correct_answer': '',
            'has_answer': False,
            'question_type': question_type,
            'tags': tags
        }
    # Extract answer from spoiler block
    correct_answer = None
    has_answer = False
    for node in parsed.nodes:
        if node.type == "block_code" and node.attrs.get("info") == "spoiler-block:":
            answer_text = node.raw.strip()
            # Check for TODO
            if 'TODO' in answer_text.upper():
                has_answer = False
            else:
                has_answer = True
                # For MCQ/SCQ: Extract capital letters
                if question_type in ['mcq', 'scq']:
                    letters = re.findall(r'\b([A-Z])\b', answer_text)
                    if letters:
                        correct_answer = ','.join(sorted(set(letters)))
                else:
                    # For text-based questions: Store the full answer text
                    correct_answer = answer_text[:200]  # Limit to 200 chars for database field
            break
    return True, {
        'text': question_text,
        'options': options_data,
        'correct_answer': correct_answer,
        'has_answer': has_answer,
        'question_type': question_type,
        'tags': tags
    }
 def parse_matching_question_from_nodes(nodes: list[Node], tags: list) -> Tuple[bool, dict]:
    """
    Parse matching question from parsed nodes.
    Expected format:
-    - Two consecutive bullet lists (with "- " prefix)
+    - Two consecutive bullet lists
    - First list = left column items (rows)
    - Second list = top row items (columns)
    - Answer format: "LeftItem: TopItem" pairs
    Returns:
-        (is_matching, question_data) where question_data contains:
+        (is_matching, question_data)
        - text: question text
        - left_items: list of left column items
        - top_items: list of top row items  
        - correct_pairs: list of [left_idx, top_idx] pairs (0-indexed)
        - has_answer: whether it has an answer (not TODO)
        - question_type: 'matching'
    """
-    lines = content.split('\n')
+    # Extract question text
    # Extract question text (first non-empty line after frontmatter)
    question_text = None
-    in_frontmatter = False
+    for node in nodes:
-    frontmatter_done = False
+        if node.type == "paragraph":
-    
+            text = node.text.strip()
-    for line in lines:
+            # Remove inline images
-        if line.strip() == '---':
+            text = re.sub(r'!\[\[.*?\]\]', '', text).strip()
-            if not in_frontmatter:
+            # Skip if empty after removing images
-                in_frontmatter = True
+            if not text:
            else:
                in_frontmatter = False
                frontmatter_done = True
                continue
-            
+            question_text = text.replace('**', '')
        if frontmatter_done and line.strip() and not line.startswith('![['):
            if not line.startswith('-') and not line.startswith('```'):
                question_text = line.strip().replace('**', '')
            break
    if not question_text:
@@ -117,94 +266,45 @@ def parse_matching_question(content: str) -> Tuple[bool, dict]:
            'top_items': [],
            'correct_pairs': [],
            'has_answer': False,
-            'question_type': 'matching'
+            'question_type': 'matching',
            'tags': tags
        }
-    # Extract two consecutive bullet lists
+    # Extract two consecutive lists
    left_items = []
    top_items = []
-    in_first_list = False
+    list_nodes = [node for node in nodes if node.type == "list"]
    in_second_list = False
    in_frontmatter = False
    frontmatter_done = False
    found_question_text = False
-    for line in lines:
+    if len(list_nodes) >= 2:
-        # Track frontmatter
+        # First list = left items
-        if line.strip() == '---':
+        for item in list_nodes[0].children:
-            if not in_frontmatter:
+            if item.type == "list_item":
-                in_frontmatter = True
+                left_items.append(item.text.strip())
            else:
                in_frontmatter = False
                frontmatter_done = True
            continue
-        if in_frontmatter or not frontmatter_done:
+        # Second list = top items
-            continue
+        for item in list_nodes[1].children:
-        
+            if item.type == "list_item":
-        # Skip spoiler blocks
+                top_items.append(item.text.strip())
        if line.strip().startswith('```'):
            break
        # Found question text
        if not found_question_text and question_text in line:
            found_question_text = True
            continue
        if not found_question_text:
            continue
        # Look for bullet lists
        if line.strip().startswith('- '):
            item = line.strip()[2:].strip()
            if not item:  # Empty bullet
                continue
            if not in_first_list and not in_second_list:
                in_first_list = True
                left_items.append(item)
            elif in_first_list:
                left_items.append(item)
            elif in_second_list:
                top_items.append(item)
        elif line.strip() == '':
            # Empty line - transition from first list to second
            if in_first_list and left_items:
                in_first_list = False
                in_second_list = True
        elif not line.strip().startswith('-') and (in_first_list or in_second_list):
            # Non-bullet line after starting lists - end of lists
            break
    # Parse answer from spoiler block
    correct_pairs = []
    has_answer = False
    in_spoiler = False
    answer_lines = []
-    for line in lines:
+    for node in nodes:
-        if line.strip().startswith('```spoiler-block'):
+        if node.type == "block_code" and node.attrs.get("info") == "spoiler-block:":
-            in_spoiler = True
+            answer_text = node.raw.strip()
            continue
        if in_spoiler:
            if line.strip() == '```':
                break
            stripped = line.strip()
            if stripped:
                answer_lines.append(stripped)
    if answer_lines:
        full_answer = ' '.join(answer_lines)
            # Check for TODO
-        if 'TODO' in full_answer.upper():
+            if 'TODO' in answer_text.upper():
                has_answer = False
-        else:
+                break
            has_answer = True
            # Parse "Item: Match" format
-            # Example: "Smak: Lobus Insularis"
+            answer_lines = answer_text.split('\n')
            for line in answer_lines:
-                if ':' in line:
+                line = line.strip()
                if ':' not in line:
                    continue
                left_part, top_part = line.split(':', 1)
                left_part = left_part.strip()
                top_part = top_part.strip()
@@ -225,6 +325,7 @@ def parse_matching_question(content: str) -> Tuple[bool, dict]:
                if left_idx is not None and top_idx is not None:
                    correct_pairs.append([left_idx, top_idx])
            break
    return True, {
        'text': question_text,
@@ -232,215 +333,8 @@ def parse_matching_question(content: str) -> Tuple[bool, dict]:
        'top_items': top_items,
        'correct_pairs': correct_pairs,
        'has_answer': has_answer,
-        'question_type': 'matching'
+        'question_type': 'matching',
-    }
+        'tags': tags
 def parse_markdown_question(file_path: Path, content: str) -> Tuple[bool, dict]:
    """
    Parse a markdown file and extract question data.
    Returns:
        (is_mcq, question_data) where question_data contains:
        - text: question text
        - options: list of (letter, text) tuples
        - correct_answer: the correct answer letter(s)
        - has_answer: whether it has an answer (not TODO)
        - tags: list of tag strings
    """
    lines = content.split('\n')
    # Check for question tags in frontmatter
    # Accept: frågetyp/mcq, frågetyp/scq, frågetyp/textalternativ, frågetyp/textfält
    is_question = False
    question_type = None
    in_frontmatter = False
    for line in lines:
        if line.strip() == '---':
            if in_frontmatter:
                # End of frontmatter
                in_frontmatter = False
                break
            else:
                in_frontmatter = True
            continue
        if in_frontmatter:
            if 'frågetyp/' in line:
                is_question = True
                # Extract question type
                if 'frågetyp/mcq' in line:
                    question_type = 'mcq'
                elif 'frågetyp/scq' in line:
                    question_type = 'scq'
                elif 'frågetyp/matching' in line:
                    question_type = 'matching'
                elif 'frågetyp/textalternativ' in line:
                    question_type = 'textalternativ'
                elif 'frågetyp/textfält' in line:
                    question_type = 'textfält'
            if line.strip().lower().startswith('tags:'):
                # Extract tags
                # Handle: tags: [tag1, tag2] or tags: tag1, tag2
                tag_content = line.split(':', 1)[1].strip()
                # Remove brackets if present
                tag_content = tag_content.strip('[]')
                # Split by comma
                tags = [t.strip() for t in tag_content.split(',') if t.strip()]
    # If it's a matching question, use the matching parser
    if question_type == 'matching':
        is_matching, matching_data = parse_matching_question(content)
        if is_matching:
            # Add tags to the data
            matching_data['tags'] = tags if 'tags' in locals() else []
            return True, matching_data
    if not is_question:
        return False, {}
    # Extract question text (first non-empty line after frontmatter)
    question_text = None
    in_frontmatter = False
    frontmatter_done = False
    for line in lines:
        if line.strip() == '---':
            if not in_frontmatter:
                in_frontmatter = True
            else:
                in_frontmatter = False
                frontmatter_done = True
            continue
        if frontmatter_done and line.strip() and not line.startswith('![['):
            # Skip "Välj ett/två alternativ:" lines
            if 'Välj' in line and 'alternativ' in line:
                continue
            if not line.startswith('-') and not line.startswith('```'):
                question_text = line.strip().replace('**', '')
                break
    # Return early if no question text found, but include has_answer field
    if not question_text:
        return True, {
            'text': None,
            'options': [],
            'correct_answer': '',
            'has_answer': False,
            'question_type': question_type,
            'tags': tags if 'tags' in locals() else []
        }
    # Extract options (pattern: "- A:" or "- A" for MCQ, or text for textalternativ)
    options_data = []
    in_frontmatter = False
    frontmatter_done = False
    in_spoiler = False
    for line in lines:
        # Track frontmatter to skip it
        if line.strip() == '---':
            if not in_frontmatter:
                in_frontmatter = True
            else:
                in_frontmatter = False
                frontmatter_done = True
            continue
        # Skip frontmatter and spoiler blocks
        if in_frontmatter or not frontmatter_done:
            continue
        if line.strip().startswith('```spoiler-block:'):
            in_spoiler = True
            continue
        if in_spoiler:
            if line.strip() == '```':
                in_spoiler = False
            continue
        # Match "- A: text" or "- A: " or just "- A"
        match = re.match(r'^-\s*([A-Z]):\s*(.*)$', line.strip())
        if not match:
            # Also try "- A" without colon
            match = re.match(r'^-\s*([A-Z])$', line.strip())
        if match:
            letter = match.group(1)
            text = match.group(2) if len(match.groups()) > 1 else ""
            options_data.append((letter, text.strip()))
        else:
            # For textalternativ, options might be plain text items
            if question_type in ['textalternativ', 'textfält'] and line.strip().startswith('-') and not line.strip().startswith('--'):
                # Extract text after dash
                option_text = line.strip()[1:].strip()
                # Skip if it's a sub-question marker like "a)" or "b)"
                if option_text and not re.match(r'^[a-z]\)', option_text):
                    # Use incrementing letters for text options
                    letter = chr(ord('A') + len(options_data))
                    options_data.append((letter, option_text))
    # For text-based questions, options are optional
    if not options_data:
        # At least return something for single-option questions
        options_data = [('A', '')]
    elif len(options_data) < 2 and question_type in ['mcq', 'scq']:
        return True, {
            'text': question_text,
            'options': options_data,
            'correct_answer': '',
            'has_answer': False,
            'question_type': question_type
        }
    # Extract answer from spoiler block
    correct_answer = None
    has_answer = False
    in_spoiler = False
    answer_lines = []
    for line in lines:
        if line.strip().startswith('```spoiler-block:'):
            in_spoiler = True
            continue
        if in_spoiler:
            if line.strip() == '```':
                break
            stripped = line.strip()
            if stripped:
                answer_lines.append(stripped)
    # Process collected answer lines
    if answer_lines:
        full_answer = ' '.join(answer_lines)
        # Check for TODO
        if 'TODO' in full_answer.upper():
            has_answer = False
        else:
            has_answer = True
            # For MCQ/SCQ: Extract capital letters
            if question_type in ['mcq', 'scq']:
                letters = re.findall(r'\b([A-Z])\b', full_answer)
                if letters:
                    correct_answer = ','.join(sorted(set(letters)))
            else:
                # For text-based questions: Store the full answer text
                correct_answer = full_answer[:200]  # Limit to 200 chars for database field
    return True, {
        'text': question_text,
        'options': options_data,
        'correct_answer': correct_answer,
        'has_answer': has_answer,
        'question_type': question_type,
        'tags': tags if 'tags' in locals() else []
    }
@@ -460,7 +354,6 @@ def import_question_file(file_path: Path, base_path: Path, stats: ImportStats, f
        file_mtime = file_path.stat().st_mtime
        # Calculate path relative to project root
        from django.conf import settings
        project_root = settings.BASE_DIR.parent
        try:
            file_path_str = str(file_path.relative_to(project_root))
@@ -518,9 +411,6 @@ def import_question_file(file_path: Path, base_path: Path, stats: ImportStats, f
            # Try to parse as date
            if exam_folder and '-' in exam_folder:
                try:
                    from datetime import datetime
                    from quiz.models import Course, Exam
                    exam_date = datetime.strptime(exam_folder, '%Y-%m-%d').date()
                    # Get or create course (default to "Anatomi & Histologi 2")
@@ -610,17 +500,6 @@ def import_question_file(file_path: Path, base_path: Path, stats: ImportStats, f
 def import_questions(folder_path: Path, base_path: Path = None, force: bool = False) -> ImportStats:
    """
    Import all questions from a folder.
    Args:
        folder_path: Path to the folder containing question markdown files
        base_path: Base path for relative path calculations (defaults to folder_path)
        force: If True, import all files regardless of mtime (for initial import)
    Returns:
        ImportStats object with import statistics
    """
    if base_path is None:
        base_path = folder_path
@@ -634,9 +513,7 @@ def import_questions(folder_path: Path, base_path: Path = None, force: bool = Fa
 def delete_question_by_path(file_path: Path):
    """Delete a question from the database by file path"""
    try:
        from django.conf import settings
        project_root = settings.BASE_DIR.parent
        file_path_str = str(file_path.relative_to(project_root))
        deleted_count, _ = Question.objects.filter(file_path=file_path_str).delete()
--- a/quiz/quiz/utils/obsidian_embed_plugin.py
+++ b/quiz/quiz/utils/obsidian_embed_plugin.py
@@ -0,0 +1,38 @@
 __all__ = ["obsidian_embed"]
 # https://help.obsidian.md/embeds
 # Supported:
 # ![[image-4.png|292x316]]
 def parse_embed(inline, match, state):
    filename = match.group("filename")
    attrs = {}
    if "|" in filename:
        filename, size = filename.split("|", 1)
    else:
        size = None
    attrs["filename"] = filename
    if size:
        if "x" in size:
            width, height = size.split("x", 1)
            if width:
                attrs["width"] = int(width)
            if height:
                attrs["height"] = int(height)
        else:
            attrs["width"] = int(size)
    state.append_token({"type": "embed", "attrs": attrs})
    return match.end()
 INLINE_EMBED_PATTERN = (
    r'!\[\[' # begins with ![
    r'(?!\s)'  # not whitespace
    r'(?P<filename>.+?)'  # content between `![[xx]]`
    r'(?!\s)'  # not whitespace
    r'\]\]' # closing ]
 )
 def obsidian_embed(md: "Markdown") -> None:
    md.inline.register('embed', INLINE_EMBED_PATTERN, parse_embed, before="link")
--- a/quiz/quiz/utils/question_parser.py
+++ b/quiz/quiz/utils/question_parser.py
@@ -0,0 +1,89 @@
 import dataclasses
 import pathlib
 import frontmatter
 import mistune
 from quiz.utils.obsidian_embed_plugin import obsidian_embed
 markdown = mistune.create_markdown(renderer="ast", plugins=[obsidian_embed])
 class Node:
    def __init__(self, token):
        self.type = token["type"]
        self.raw = token.get("raw", "")
        self.attrs = token.get("attrs", {})
        self.children = [Node(token=child) for child in token.get("children", [])]
    def __repr__(self) -> str:
        attrs = []
        if self.raw:
            attrs.append(f"raw={self.raw!r}")
        if self.attrs:
            attrs.append(f"attrs={self.attrs!r}")
        if self.children:
            attrs.append(f"children={self.children!r}")
        # block_text -> BlockText
        pretty = self.type.replace("_", " ").title().replace(" ", "")
        return f"{pretty}(" + ", ".join(attrs) + ")"
    @property
    def text(self) -> str:
        if self.type == "text":
            return self.raw
        texts = []
        for child in self.children:
            texts.append(child.text)
        return "".join(texts)
@dataclasses.dataclass
 class ParsedQuestion:
    metadata: dict = dataclasses.field(default_factory=dict)
    raw_content: str = ""
    nodes: list[Node] = dataclasses.field(default_factory=list)
 def parse_question(path: pathlib.Path):
    raw = path.read_text(encoding="utf-8")
    return parse_question_from_content(raw)
 def parse_question_from_content(content_str: str):
    """Parse question from a content string instead of a file."""
    metadata, content = frontmatter.parse(content_str)
    tokens = markdown(content)
    question = ParsedQuestion(
        metadata=metadata,
        raw_content=content,
        nodes=[Node(token=token) for token in tokens],
    )
    return question
 def main():
    root = pathlib.Path(__file__).parent.parent.parent.parent
    print(root)
    exams = root / "content" / "Anatomi & Histologi 2" / "Gamla tentor"
    for file in sorted(exams.glob("*/*.md")):
        if len(file.stem) > 2:
            continue
        question = parse_question(file)
        print(question.metadata, repr(question.raw_content))
        continue
        for node in question.nodes:
            match node.type:
                case "heading":
                    print("Heading:", repr(node.text))
                case "paragraph":
                    print("Paragraph:", repr(node.text))
                case "list":
                    print("List:")
                    for child in node.children:
                        print(" - List item:", repr(child.text))
                case "block_code" if node.attrs["info"] == "spoiler-block:":
                    print("Spoiler:", repr(node.raw.rstrip()))
 if __name__ == "__main__":
    main()