1
0

vault backup: 2025-12-23 10:55:37
All checks were successful
Deploy Quartz site to GitHub Pages / build (push) Successful in 2m10s

This commit is contained in:
2025-12-23 10:55:37 +01:00
parent 68079d368d
commit 3b2751808e
9 changed files with 877 additions and 368 deletions

View File

@@ -30,7 +30,7 @@
"state": { "state": {
"file": "Anatomi & Histologi 2/Statistik.md", "file": "Anatomi & Histologi 2/Statistik.md",
"mode": "source", "mode": "source",
"source": true, "source": false,
"backlinks": false "backlinks": false
}, },
"icon": "lucide-file", "icon": "lucide-file",

View File

@@ -1,18 +0,0 @@
# Matching Questions Format Analysis
Based on reviewing the 17 matching questions:
## Key Finding:
Only **1 question has an answer** (2023-05-31/3.md), the rest have TODO.
**That question uses this format:**
- Two separate bullet lists
- Answer: "ItemName: MatchName" format
## Proposed Implementation:
1. Support two-list format (most flexible)
2. Parse answer as "Item: Match" pairs
3. Store as JSON with 0-indexed pairs
4. Render as n×n table with radio buttons
## Next: Implement based on this one working example.

View File

@@ -1,15 +0,0 @@
import pathlib
import mistune
markdown = mistune.create_markdown(renderer=None)
root = pathlib.Path(__file__).parent.parent
exams = root / "content" / "Anatomi & Histologi 2" / "Gamla tentor"
print(exams.absolute())
for file in sorted(exams.glob("*/*.md")):
if len(file.stem) > 2:
continue
print(f"Parsing {file}")
tokens = markdown(file.read_text(encoding="utf-8"))
import pprint
pprint.pprint(tokens)

View File

@@ -13,5 +13,6 @@ markers =
admin: Admin interface tests admin: Admin interface tests
import: Import and parsing tests import: Import and parsing tests
import_tests: Import and parsing tests import_tests: Import and parsing tests
parser: Question parser tests
slow: Slow running tests slow: Slow running tests

View File

@@ -0,0 +1,537 @@
"""
Comprehensive test suite for the question_parser module.
This test suite uses pytest's parametrize decorator to test multiple scenarios
with minimal code duplication. It covers:
1. Node class:
- Initialization with different token types
- Attribute handling
- Children node processing
- String representation (__repr__)
- Text extraction from nested structures
2. parse_question function:
- Metadata parsing (tags, dates, etc.)
- Raw content extraction
- Different question types (MCQ, SCQ, text field, matching)
- Questions with images
- Edge cases (empty content, missing frontmatter)
- Document structure preservation
3. ParsedQuestion dataclass:
- Default values
- Initialization with custom values
4. Real exam questions:
- Parsing actual exam questions from the content directory
- Validation of all short-named question files
Test execution:
pytest tests/test_question_parser.py -v # Verbose output
pytest tests/test_question_parser.py -k "mcq" # Run only MCQ tests
pytest tests/test_question_parser.py --collect-only # List all tests
"""
import pathlib
import tempfile
import pytest
from quiz.utils.question_parser import Node, ParsedQuestion, parse_question
@pytest.fixture
def temp_dir():
"""Create a temporary directory for test files"""
with tempfile.TemporaryDirectory() as tmpdir:
yield pathlib.Path(tmpdir)
@pytest.fixture
def create_question_file(temp_dir):
"""Factory fixture to create question files"""
def _create_file(filename: str, content: str) -> pathlib.Path:
file_path = temp_dir / filename
file_path.write_text(content, encoding="utf-8")
return file_path
return _create_file
class TestNode:
"""Test the Node class"""
@pytest.mark.parametrize("token,expected_type,expected_raw", [
({"type": "paragraph"}, "paragraph", ""),
({"type": "heading", "raw": "Test Heading"}, "heading", "Test Heading"),
({"type": "text", "raw": "Some text"}, "text", "Some text"),
({"type": "list"}, "list", ""),
])
def test_node_initialization(self, token, expected_type, expected_raw):
"""Test Node initialization with different token types"""
node = Node(token)
assert node.type == expected_type
assert node.raw == expected_raw
@pytest.mark.parametrize("token,expected_attrs", [
({"type": "block_code", "attrs": {"info": "spoiler-block:"}}, {"info": "spoiler-block:"}),
({"type": "paragraph"}, {}),
({"type": "heading", "attrs": {"level": 2}}, {"level": 2}),
])
def test_node_attributes(self, token, expected_attrs):
"""Test Node attributes handling"""
node = Node(token)
assert node.attrs == expected_attrs
def test_node_children(self):
"""Test Node children handling"""
token = {
"type": "paragraph",
"children": [
{"type": "text", "raw": "Hello "},
{"type": "text", "raw": "World"},
]
}
node = Node(token)
assert len(node.children) == 2
assert node.children[0].type == "text"
assert node.children[0].raw == "Hello "
assert node.children[1].type == "text"
assert node.children[1].raw == "World"
@pytest.mark.parametrize("token,expected_repr_contains", [
({"type": "text", "raw": "test"}, "Text(raw='test')"),
({"type": "paragraph"}, "Paragraph()"),
({"type": "block_code", "attrs": {"info": "python"}}, "BlockCode(attrs={'info': 'python'})"),
])
def test_node_repr(self, token, expected_repr_contains):
"""Test Node __repr__ method"""
node = Node(token)
assert repr(node) == expected_repr_contains
@pytest.mark.parametrize("token,expected_text", [
({"type": "text", "raw": "Simple text"}, "Simple text"),
(
{
"type": "paragraph",
"children": [
{"type": "text", "raw": "Hello "},
{"type": "text", "raw": "World"},
]
},
"Hello World"
),
(
{
"type": "paragraph",
"children": [
{"type": "text", "raw": "Nested "},
{
"type": "strong",
"children": [{"type": "text", "raw": "bold"}]
},
{"type": "text", "raw": " text"},
]
},
"Nested bold text"
),
])
def test_node_text_property(self, token, expected_text):
"""Test Node text property extraction"""
node = Node(token)
assert node.text == expected_text
class TestParseQuestion:
"""Test the parse_question function"""
@pytest.mark.parametrize("content,expected_tags", [
(
"""---
tags: [ah2, provfråga, frågetyp/mcq]
date: 2022-01-15
---
Question content""",
["ah2", "provfråga", "frågetyp/mcq"]
),
(
"""---
tags:
- ah2
- provfråga
- frågetyp/scq
date: 2023-05-31
---
Question content""",
["ah2", "provfråga", "frågetyp/scq"]
),
])
def test_parse_metadata_tags(self, create_question_file, content, expected_tags):
"""Test parsing of metadata tags in different formats"""
file_path = create_question_file("test.md", content)
question = parse_question(file_path)
assert question.metadata["tags"] == expected_tags
@pytest.mark.parametrize("content,expected_date", [
(
"""---
tags: [ah2]
date: 2022-01-15
---
Content""",
"2022-01-15"
),
(
"""---
tags: [ah2]
date: 2023-05-31
---
Content""",
"2023-05-31"
),
])
def test_parse_metadata_date(self, create_question_file, content, expected_date):
"""Test parsing of metadata date"""
file_path = create_question_file("test.md", content)
question = parse_question(file_path)
assert str(question.metadata["date"]) == expected_date
@pytest.mark.parametrize("content,expected_raw", [
(
"""---
tags: [ah2]
---
Simple question""",
"Simple question"
),
(
"""---
tags: [ah2]
---
Question with **bold** text""",
"Question with **bold** text"
),
])
def test_parse_raw_content(self, create_question_file, content, expected_raw):
"""Test parsing of raw content"""
file_path = create_question_file("test.md", content)
question = parse_question(file_path)
assert question.raw_content.strip() == expected_raw
def test_parse_mcq_question(self, create_question_file):
"""Test parsing a complete MCQ question"""
content = """---
tags: [ah2, provfråga, frågetyp/mcq, cerebrum]
date: 2022-01-15
---
Vilka av följande räknas till storhjärnans basala kärnor?
**Välj två alternativ**
- A: Putamen
- B: Nucleus Ruber
- C: Substantia nigra
- D: Nucleus caudatus
```spoiler-block:
A och D
```
"""
file_path = create_question_file("mcq.md", content)
question = parse_question(file_path)
assert question.metadata["tags"] == ["ah2", "provfråga", "frågetyp/mcq", "cerebrum"]
assert len(question.nodes) > 0
# Find paragraph nodes
paragraphs = [n for n in question.nodes if n.type == "paragraph"]
assert len(paragraphs) > 0
# Find list nodes
lists = [n for n in question.nodes if n.type == "list"]
assert len(lists) > 0
# Find spoiler block
code_blocks = [n for n in question.nodes if n.type == "block_code"]
assert len(code_blocks) > 0
spoiler = code_blocks[0]
assert spoiler.attrs.get("info") == "spoiler-block:"
assert "A och D" in spoiler.raw
def test_parse_scq_question(self, create_question_file):
"""Test parsing a single choice question"""
content = """---
tags: [ah2, provfråga, frågetyp/scq, histologi]
date: 2022-06-01
---
Vilken del av CNS syns i bild?
- A: Cerebellum
- B: Diencephalon
- C: Medulla spinalis
- D: Cerebrum
- E: Pons
```spoiler-block:
A
```
"""
file_path = create_question_file("scq.md", content)
question = parse_question(file_path)
assert "frågetyp/scq" in question.metadata["tags"]
lists = [n for n in question.nodes if n.type == "list"]
assert len(lists) > 0
def test_parse_text_field_question(self, create_question_file):
"""Test parsing a text field question"""
content = """---
tags: [ah2, provfråga, frågetyp/textfält, öga, anatomi]
date: 2022-01-15
---
![[image-2.png|301x248]]
**Fyll i rätt siffra!**
(0.5p per rätt svar, inga avdrag för fel svar):
a) Vilken siffra pekar på gula fläcken?
b) Vilken siffra pekar på choroidea?
```spoiler-block:
a) 7
b) 6
```
"""
file_path = create_question_file("textfield.md", content)
question = parse_question(file_path)
assert "frågetyp/textfält" in question.metadata["tags"]
assert len(question.nodes) > 0
def test_parse_matching_question(self, create_question_file):
"""Test parsing a matching question"""
content = """---
tags: [ah2, provfråga, frågetyp/matching, histologi]
date: 2023-05-31
---
Vilka av följande stödjeceller finns i CNS? Markera JA eller NEJ för varje angiven celltyp:
(1p för alla rätt, inga delpoäng)
- a) oligodendrocyter
- b) Astrocyter
- c) satellitceller
- d) ependymceller
- e) mikroglia
- f) Schwannceller
- JA, finn i CNS
- NEJ, finns inte i CNS
```spoiler-block:
a) JA, finn i CNS
b) JA, finn i CNS
c) NEJ, finns inte i CNS
d) JA, finn i CNS
e) JA, finn i CNS
f) NEJ, finns inte i CNS
```
"""
file_path = create_question_file("matching.md", content)
question = parse_question(file_path)
assert "frågetyp/matching" in question.metadata["tags"]
lists = [n for n in question.nodes if n.type == "list"]
assert len(lists) > 0
def test_parse_question_with_image(self, create_question_file):
"""Test parsing a question with embedded images"""
content = """---
tags: [ah2, provfråga, frågetyp/textfält, öra, anatomi, bild]
date: 2022-01-15
---
![[image-4.png|292x316]]
**Fyll i rätt siffra !**
(0.5p per rätt svar, inga avdrag för fel svar):
a) Vilken siffra pekar på incus? (1..19)
b) Vilken siffra pekar på tuba auditiva? (1..19)
```spoiler-block:
a) 7
b) 18
```
"""
file_path = create_question_file("image_q.md", content)
question = parse_question(file_path)
assert "bild" in question.metadata["tags"]
assert "![[image-4.png" in question.raw_content
embed = question.nodes[0].children[0]
assert embed.type == "embed"
assert embed.attrs == {
"filename": "image-4.png",
"width": 292,
"height": 316
}
@pytest.mark.parametrize("invalid_content", [
"", # Empty content
"No frontmatter", # No frontmatter
"---\n---\n", # Empty frontmatter
])
def test_parse_edge_cases(self, create_question_file, invalid_content):
"""Test parsing edge cases"""
file_path = create_question_file("edge.md", invalid_content)
question = parse_question(file_path)
assert isinstance(question, ParsedQuestion)
def test_parse_question_preserves_structure(self, create_question_file):
"""Test that parsing preserves the document structure"""
content = """---
tags: [ah2]
---
# Heading
Paragraph text
- List item 1
- List item 2
```spoiler-block:
Answer
```
"""
file_path = create_question_file("structure.md", content)
question = parse_question(file_path)
node_types = [n.type for n in question.nodes]
assert "heading" in node_types
assert "paragraph" in node_types
assert "list" in node_types
assert "block_code" in node_types
class TestParsedQuestionDataclass:
"""Test the ParsedQuestion dataclass"""
def test_parsed_question_defaults(self):
"""Test ParsedQuestion default values"""
question = ParsedQuestion()
assert question.metadata == {}
assert question.raw_content == ""
assert question.nodes == []
def test_parsed_question_initialization(self):
"""Test ParsedQuestion initialization with values"""
metadata = {"tags": ["test"], "date": "2022-01-15"}
content = "Test content"
nodes = [Node({"type": "paragraph"})]
question = ParsedQuestion(
metadata=metadata,
raw_content=content,
nodes=nodes
)
assert question.metadata == metadata
assert question.raw_content == content
assert question.nodes == nodes
class TestRealQuestions:
"""Test parsing real questions from the exam files"""
@pytest.fixture
def exam_dir(self):
"""Get the real exam directory"""
root = pathlib.Path(__file__).parent.parent.parent
exam_path = root / "content" / "Anatomi & Histologi 2" / "Gamla tentor"
if exam_path.exists():
return exam_path
pytest.skip("Exam directory not found")
@pytest.mark.parametrize("exam_date,question_num", [
("2022-01-15", "1"),
("2022-01-15", "2"),
("2022-01-15", "3"),
("2022-01-15", "4"),
("2022-06-01", "8"),
])
def test_parse_real_exam_questions(self, exam_dir, exam_date, question_num):
"""Test parsing real exam questions"""
file_path = exam_dir / exam_date / f"{question_num}.md"
if not file_path.exists():
pytest.skip(f"Question file {file_path} not found")
question = parse_question(file_path)
# Verify metadata exists and has required fields
assert "tags" in question.metadata
assert isinstance(question.metadata["tags"], list)
assert "ah2" in question.metadata["tags"]
assert "provfråga" in question.metadata["tags"]
# Verify content was parsed
assert len(question.raw_content) > 0
assert len(question.nodes) > 0
def test_parse_all_short_named_questions(self, exam_dir):
"""Test parsing all questions with short filenames (1-2 chars)"""
questions_found = 0
for file in sorted(exam_dir.glob("*/*.md")):
if len(file.stem) <= 2 and file.stem.isdigit():
question = parse_question(file)
assert isinstance(question, ParsedQuestion)
assert "tags" in question.metadata
questions_found += 1
# Ensure we found at least some questions
assert questions_found > 0, "No exam questions found to test"
class TestNodeTextExtraction:
"""Test text extraction from complex node structures"""
@pytest.mark.parametrize("token,expected_text", [
# Simple text
({"type": "text", "raw": "Hello"}, "Hello"),
# Paragraph with multiple text children
(
{
"type": "paragraph",
"children": [
{"type": "text", "raw": "A "},
{"type": "text", "raw": "B "},
{"type": "text", "raw": "C"},
]
},
"A B C"
),
# Nested formatting
(
{
"type": "paragraph",
"children": [
{"type": "text", "raw": "Normal "},
{
"type": "emphasis",
"children": [{"type": "text", "raw": "italic"}]
},
{"type": "text", "raw": " "},
{
"type": "strong",
"children": [{"type": "text", "raw": "bold"}]
},
]
},
"Normal italic bold"
),
# Empty node
({"type": "paragraph", "children": []}, ""),
])
def test_complex_text_extraction(self, token, expected_text):
"""Test text extraction from complex nested structures"""
node = Node(token)
assert node.text == expected_text

View File

@@ -1,8 +1,13 @@
import re import re
from pathlib import Path
from collections import defaultdict from collections import defaultdict
from datetime import datetime
from pathlib import Path
from typing import Tuple from typing import Tuple
from quiz.models import Question, Option
from django.conf import settings
from quiz.models import Course, Exam, Question, Option
from quiz.utils.question_parser import parse_question_from_content, Node
class ImportStats: class ImportStats:
@@ -70,44 +75,188 @@ class ImportStats:
return "\n".join(lines) return "\n".join(lines)
def parse_matching_question(content: str) -> Tuple[bool, dict]:
def parse_markdown_question(file_path: Path, content: str) -> Tuple[bool, dict]:
""" """
Parse matching question from markdown. Parse a markdown file and extract question data using the new question_parser.
Returns:
(is_mcq, question_data) where question_data contains:
- text: question text
- options: list of (letter, text) tuples
- correct_answer: the correct answer letter(s)
- has_answer: whether it has an answer (not TODO)
- tags: list of tag strings
- question_type: type of question (mcq, scq, matching, etc.)
"""
# Parse from content string (works for both test cases and real files)
parsed = parse_question_from_content(content)
# Extract metadata
metadata = parsed.metadata
tags = metadata.get('tags', [])
# Check for question type in tags
question_type = None
is_question = False
for tag in tags:
if 'frågetyp/' in tag:
is_question = True
if 'frågetyp/mcq' in tag:
question_type = 'mcq'
elif 'frågetyp/scq' in tag:
question_type = 'scq'
elif 'frågetyp/matching' in tag:
question_type = 'matching'
elif 'frågetyp/textalternativ' in tag:
question_type = 'textalternativ'
elif 'frågetyp/textfält' in tag:
question_type = 'textfält'
if not is_question:
return False, {}
# Handle matching questions separately
if question_type == 'matching':
return parse_matching_question_from_nodes(parsed.nodes, tags)
# Extract question text from first paragraph (skip images and special instructions)
question_text = None
for node in parsed.nodes:
if node.type != "paragraph":
continue
text = node.text.strip()
# Skip empty paragraphs
if not text:
continue
# Remove inline images from text first
text = re.sub(r'!\[\[.*?\]\]', '', text).strip()
# Skip if paragraph was only an image reference
if not text:
continue
# Skip "Välj X alternativ" instructions
if 'Välj' in text and 'alternativ' in text:
continue
# Clean up bold markers
text = text.replace('**', '')
if text:
question_text = text
break
if not question_text:
return True, {
'text': None,
'options': [],
'correct_answer': '',
'has_answer': False,
'question_type': question_type,
'tags': tags
}
# Extract options from list nodes
options_data = []
for node in parsed.nodes:
if node.type != "list":
continue
for item in node.children:
# Get the text of the list item
if item.type != "list_item":
continue
item_text = item.text.strip()
# Match "A: text" or just "A"
match = re.match(r'^([A-Z]):\s*(.*)$', item_text)
if match:
letter = match.group(1)
text = match.group(2).strip()
options_data.append((letter, text))
elif re.match(r'^([A-Z])$', item_text):
letter = item_text
options_data.append((letter, ''))
elif question_type in ['textalternativ', 'textfält']:
# For text-based questions, use incrementing letters
if not re.match(r'^[a-z]\)', item_text): # Skip sub-question markers
letter = chr(ord('A') + len(options_data))
options_data.append((letter, item_text))
# For text-based questions, options are optional
if not options_data:
options_data = [('A', '')]
elif len(options_data) < 2 and question_type in ['mcq', 'scq']:
return True, {
'text': question_text,
'options': options_data,
'correct_answer': '',
'has_answer': False,
'question_type': question_type,
'tags': tags
}
# Extract answer from spoiler block
correct_answer = None
has_answer = False
for node in parsed.nodes:
if node.type == "block_code" and node.attrs.get("info") == "spoiler-block:":
answer_text = node.raw.strip()
# Check for TODO
if 'TODO' in answer_text.upper():
has_answer = False
else:
has_answer = True
# For MCQ/SCQ: Extract capital letters
if question_type in ['mcq', 'scq']:
letters = re.findall(r'\b([A-Z])\b', answer_text)
if letters:
correct_answer = ','.join(sorted(set(letters)))
else:
# For text-based questions: Store the full answer text
correct_answer = answer_text[:200] # Limit to 200 chars for database field
break
return True, {
'text': question_text,
'options': options_data,
'correct_answer': correct_answer,
'has_answer': has_answer,
'question_type': question_type,
'tags': tags
}
def parse_matching_question_from_nodes(nodes: list[Node], tags: list) -> Tuple[bool, dict]:
"""
Parse matching question from parsed nodes.
Expected format: Expected format:
- Two consecutive bullet lists (with "- " prefix) - Two consecutive bullet lists
- First list = left column items (rows) - First list = left column items (rows)
- Second list = top row items (columns) - Second list = top row items (columns)
- Answer format: "LeftItem: TopItem" pairs - Answer format: "LeftItem: TopItem" pairs
Returns: Returns:
(is_matching, question_data) where question_data contains: (is_matching, question_data)
- text: question text
- left_items: list of left column items
- top_items: list of top row items
- correct_pairs: list of [left_idx, top_idx] pairs (0-indexed)
- has_answer: whether it has an answer (not TODO)
- question_type: 'matching'
""" """
lines = content.split('\n') # Extract question text
# Extract question text (first non-empty line after frontmatter)
question_text = None question_text = None
in_frontmatter = False for node in nodes:
frontmatter_done = False if node.type == "paragraph":
text = node.text.strip()
for line in lines: # Remove inline images
if line.strip() == '---': text = re.sub(r'!\[\[.*?\]\]', '', text).strip()
if not in_frontmatter: # Skip if empty after removing images
in_frontmatter = True if not text:
else:
in_frontmatter = False
frontmatter_done = True
continue continue
question_text = text.replace('**', '')
if frontmatter_done and line.strip() and not line.startswith('![['):
if not line.startswith('-') and not line.startswith('```'):
question_text = line.strip().replace('**', '')
break break
if not question_text: if not question_text:
@@ -117,94 +266,45 @@ def parse_matching_question(content: str) -> Tuple[bool, dict]:
'top_items': [], 'top_items': [],
'correct_pairs': [], 'correct_pairs': [],
'has_answer': False, 'has_answer': False,
'question_type': 'matching' 'question_type': 'matching',
'tags': tags
} }
# Extract two consecutive bullet lists # Extract two consecutive lists
left_items = [] left_items = []
top_items = [] top_items = []
in_first_list = False list_nodes = [node for node in nodes if node.type == "list"]
in_second_list = False
in_frontmatter = False
frontmatter_done = False
found_question_text = False
for line in lines: if len(list_nodes) >= 2:
# Track frontmatter # First list = left items
if line.strip() == '---': for item in list_nodes[0].children:
if not in_frontmatter: if item.type == "list_item":
in_frontmatter = True left_items.append(item.text.strip())
else:
in_frontmatter = False
frontmatter_done = True
continue
if in_frontmatter or not frontmatter_done: # Second list = top items
continue for item in list_nodes[1].children:
if item.type == "list_item":
# Skip spoiler blocks top_items.append(item.text.strip())
if line.strip().startswith('```'):
break
# Found question text
if not found_question_text and question_text in line:
found_question_text = True
continue
if not found_question_text:
continue
# Look for bullet lists
if line.strip().startswith('- '):
item = line.strip()[2:].strip()
if not item: # Empty bullet
continue
if not in_first_list and not in_second_list:
in_first_list = True
left_items.append(item)
elif in_first_list:
left_items.append(item)
elif in_second_list:
top_items.append(item)
elif line.strip() == '':
# Empty line - transition from first list to second
if in_first_list and left_items:
in_first_list = False
in_second_list = True
elif not line.strip().startswith('-') and (in_first_list or in_second_list):
# Non-bullet line after starting lists - end of lists
break
# Parse answer from spoiler block # Parse answer from spoiler block
correct_pairs = [] correct_pairs = []
has_answer = False has_answer = False
in_spoiler = False
answer_lines = []
for line in lines: for node in nodes:
if line.strip().startswith('```spoiler-block'): if node.type == "block_code" and node.attrs.get("info") == "spoiler-block:":
in_spoiler = True answer_text = node.raw.strip()
continue
if in_spoiler:
if line.strip() == '```':
break
stripped = line.strip()
if stripped:
answer_lines.append(stripped)
if answer_lines:
full_answer = ' '.join(answer_lines)
# Check for TODO # Check for TODO
if 'TODO' in full_answer.upper(): if 'TODO' in answer_text.upper():
has_answer = False has_answer = False
else: break
has_answer = True has_answer = True
# Parse "Item: Match" format # Parse "Item: Match" format
# Example: "Smak: Lobus Insularis" answer_lines = answer_text.split('\n')
for line in answer_lines: for line in answer_lines:
if ':' in line: line = line.strip()
if ':' not in line:
continue
left_part, top_part = line.split(':', 1) left_part, top_part = line.split(':', 1)
left_part = left_part.strip() left_part = left_part.strip()
top_part = top_part.strip() top_part = top_part.strip()
@@ -225,6 +325,7 @@ def parse_matching_question(content: str) -> Tuple[bool, dict]:
if left_idx is not None and top_idx is not None: if left_idx is not None and top_idx is not None:
correct_pairs.append([left_idx, top_idx]) correct_pairs.append([left_idx, top_idx])
break
return True, { return True, {
'text': question_text, 'text': question_text,
@@ -232,215 +333,8 @@ def parse_matching_question(content: str) -> Tuple[bool, dict]:
'top_items': top_items, 'top_items': top_items,
'correct_pairs': correct_pairs, 'correct_pairs': correct_pairs,
'has_answer': has_answer, 'has_answer': has_answer,
'question_type': 'matching' 'question_type': 'matching',
} 'tags': tags
def parse_markdown_question(file_path: Path, content: str) -> Tuple[bool, dict]:
"""
Parse a markdown file and extract question data.
Returns:
(is_mcq, question_data) where question_data contains:
- text: question text
- options: list of (letter, text) tuples
- correct_answer: the correct answer letter(s)
- has_answer: whether it has an answer (not TODO)
- tags: list of tag strings
"""
lines = content.split('\n')
# Check for question tags in frontmatter
# Accept: frågetyp/mcq, frågetyp/scq, frågetyp/textalternativ, frågetyp/textfält
is_question = False
question_type = None
in_frontmatter = False
for line in lines:
if line.strip() == '---':
if in_frontmatter:
# End of frontmatter
in_frontmatter = False
break
else:
in_frontmatter = True
continue
if in_frontmatter:
if 'frågetyp/' in line:
is_question = True
# Extract question type
if 'frågetyp/mcq' in line:
question_type = 'mcq'
elif 'frågetyp/scq' in line:
question_type = 'scq'
elif 'frågetyp/matching' in line:
question_type = 'matching'
elif 'frågetyp/textalternativ' in line:
question_type = 'textalternativ'
elif 'frågetyp/textfält' in line:
question_type = 'textfält'
if line.strip().lower().startswith('tags:'):
# Extract tags
# Handle: tags: [tag1, tag2] or tags: tag1, tag2
tag_content = line.split(':', 1)[1].strip()
# Remove brackets if present
tag_content = tag_content.strip('[]')
# Split by comma
tags = [t.strip() for t in tag_content.split(',') if t.strip()]
# If it's a matching question, use the matching parser
if question_type == 'matching':
is_matching, matching_data = parse_matching_question(content)
if is_matching:
# Add tags to the data
matching_data['tags'] = tags if 'tags' in locals() else []
return True, matching_data
if not is_question:
return False, {}
# Extract question text (first non-empty line after frontmatter)
question_text = None
in_frontmatter = False
frontmatter_done = False
for line in lines:
if line.strip() == '---':
if not in_frontmatter:
in_frontmatter = True
else:
in_frontmatter = False
frontmatter_done = True
continue
if frontmatter_done and line.strip() and not line.startswith('![['):
# Skip "Välj ett/två alternativ:" lines
if 'Välj' in line and 'alternativ' in line:
continue
if not line.startswith('-') and not line.startswith('```'):
question_text = line.strip().replace('**', '')
break
# Return early if no question text found, but include has_answer field
if not question_text:
return True, {
'text': None,
'options': [],
'correct_answer': '',
'has_answer': False,
'question_type': question_type,
'tags': tags if 'tags' in locals() else []
}
# Extract options (pattern: "- A:" or "- A" for MCQ, or text for textalternativ)
options_data = []
in_frontmatter = False
frontmatter_done = False
in_spoiler = False
for line in lines:
# Track frontmatter to skip it
if line.strip() == '---':
if not in_frontmatter:
in_frontmatter = True
else:
in_frontmatter = False
frontmatter_done = True
continue
# Skip frontmatter and spoiler blocks
if in_frontmatter or not frontmatter_done:
continue
if line.strip().startswith('```spoiler-block:'):
in_spoiler = True
continue
if in_spoiler:
if line.strip() == '```':
in_spoiler = False
continue
# Match "- A: text" or "- A: " or just "- A"
match = re.match(r'^-\s*([A-Z]):\s*(.*)$', line.strip())
if not match:
# Also try "- A" without colon
match = re.match(r'^-\s*([A-Z])$', line.strip())
if match:
letter = match.group(1)
text = match.group(2) if len(match.groups()) > 1 else ""
options_data.append((letter, text.strip()))
else:
# For textalternativ, options might be plain text items
if question_type in ['textalternativ', 'textfält'] and line.strip().startswith('-') and not line.strip().startswith('--'):
# Extract text after dash
option_text = line.strip()[1:].strip()
# Skip if it's a sub-question marker like "a)" or "b)"
if option_text and not re.match(r'^[a-z]\)', option_text):
# Use incrementing letters for text options
letter = chr(ord('A') + len(options_data))
options_data.append((letter, option_text))
# For text-based questions, options are optional
if not options_data:
# At least return something for single-option questions
options_data = [('A', '')]
elif len(options_data) < 2 and question_type in ['mcq', 'scq']:
return True, {
'text': question_text,
'options': options_data,
'correct_answer': '',
'has_answer': False,
'question_type': question_type
}
# Extract answer from spoiler block
correct_answer = None
has_answer = False
in_spoiler = False
answer_lines = []
for line in lines:
if line.strip().startswith('```spoiler-block:'):
in_spoiler = True
continue
if in_spoiler:
if line.strip() == '```':
break
stripped = line.strip()
if stripped:
answer_lines.append(stripped)
# Process collected answer lines
if answer_lines:
full_answer = ' '.join(answer_lines)
# Check for TODO
if 'TODO' in full_answer.upper():
has_answer = False
else:
has_answer = True
# For MCQ/SCQ: Extract capital letters
if question_type in ['mcq', 'scq']:
letters = re.findall(r'\b([A-Z])\b', full_answer)
if letters:
correct_answer = ','.join(sorted(set(letters)))
else:
# For text-based questions: Store the full answer text
correct_answer = full_answer[:200] # Limit to 200 chars for database field
return True, {
'text': question_text,
'options': options_data,
'correct_answer': correct_answer,
'has_answer': has_answer,
'question_type': question_type,
'tags': tags if 'tags' in locals() else []
} }
@@ -460,7 +354,6 @@ def import_question_file(file_path: Path, base_path: Path, stats: ImportStats, f
file_mtime = file_path.stat().st_mtime file_mtime = file_path.stat().st_mtime
# Calculate path relative to project root # Calculate path relative to project root
from django.conf import settings
project_root = settings.BASE_DIR.parent project_root = settings.BASE_DIR.parent
try: try:
file_path_str = str(file_path.relative_to(project_root)) file_path_str = str(file_path.relative_to(project_root))
@@ -518,9 +411,6 @@ def import_question_file(file_path: Path, base_path: Path, stats: ImportStats, f
# Try to parse as date # Try to parse as date
if exam_folder and '-' in exam_folder: if exam_folder and '-' in exam_folder:
try: try:
from datetime import datetime
from quiz.models import Course, Exam
exam_date = datetime.strptime(exam_folder, '%Y-%m-%d').date() exam_date = datetime.strptime(exam_folder, '%Y-%m-%d').date()
# Get or create course (default to "Anatomi & Histologi 2") # Get or create course (default to "Anatomi & Histologi 2")
@@ -610,17 +500,6 @@ def import_question_file(file_path: Path, base_path: Path, stats: ImportStats, f
def import_questions(folder_path: Path, base_path: Path = None, force: bool = False) -> ImportStats: def import_questions(folder_path: Path, base_path: Path = None, force: bool = False) -> ImportStats:
"""
Import all questions from a folder.
Args:
folder_path: Path to the folder containing question markdown files
base_path: Base path for relative path calculations (defaults to folder_path)
force: If True, import all files regardless of mtime (for initial import)
Returns:
ImportStats object with import statistics
"""
if base_path is None: if base_path is None:
base_path = folder_path base_path = folder_path
@@ -634,9 +513,7 @@ def import_questions(folder_path: Path, base_path: Path = None, force: bool = Fa
def delete_question_by_path(file_path: Path): def delete_question_by_path(file_path: Path):
"""Delete a question from the database by file path"""
try: try:
from django.conf import settings
project_root = settings.BASE_DIR.parent project_root = settings.BASE_DIR.parent
file_path_str = str(file_path.relative_to(project_root)) file_path_str = str(file_path.relative_to(project_root))
deleted_count, _ = Question.objects.filter(file_path=file_path_str).delete() deleted_count, _ = Question.objects.filter(file_path=file_path_str).delete()

View File

@@ -0,0 +1,38 @@
__all__ = ["obsidian_embed"]
# https://help.obsidian.md/embeds
# Supported:
# ![[image-4.png|292x316]]
def parse_embed(inline, match, state):
filename = match.group("filename")
attrs = {}
if "|" in filename:
filename, size = filename.split("|", 1)
else:
size = None
attrs["filename"] = filename
if size:
if "x" in size:
width, height = size.split("x", 1)
if width:
attrs["width"] = int(width)
if height:
attrs["height"] = int(height)
else:
attrs["width"] = int(size)
state.append_token({"type": "embed", "attrs": attrs})
return match.end()
INLINE_EMBED_PATTERN = (
r'!\[\[' # begins with ![
r'(?!\s)' # not whitespace
r'(?P<filename>.+?)' # content between `![[xx]]`
r'(?!\s)' # not whitespace
r'\]\]' # closing ]
)
def obsidian_embed(md: "Markdown") -> None:
md.inline.register('embed', INLINE_EMBED_PATTERN, parse_embed, before="link")

View File

@@ -0,0 +1,89 @@
import dataclasses
import pathlib
import frontmatter
import mistune
from quiz.utils.obsidian_embed_plugin import obsidian_embed
markdown = mistune.create_markdown(renderer="ast", plugins=[obsidian_embed])
class Node:
def __init__(self, token):
self.type = token["type"]
self.raw = token.get("raw", "")
self.attrs = token.get("attrs", {})
self.children = [Node(token=child) for child in token.get("children", [])]
def __repr__(self) -> str:
attrs = []
if self.raw:
attrs.append(f"raw={self.raw!r}")
if self.attrs:
attrs.append(f"attrs={self.attrs!r}")
if self.children:
attrs.append(f"children={self.children!r}")
# block_text -> BlockText
pretty = self.type.replace("_", " ").title().replace(" ", "")
return f"{pretty}(" + ", ".join(attrs) + ")"
@property
def text(self) -> str:
if self.type == "text":
return self.raw
texts = []
for child in self.children:
texts.append(child.text)
return "".join(texts)
@dataclasses.dataclass
class ParsedQuestion:
metadata: dict = dataclasses.field(default_factory=dict)
raw_content: str = ""
nodes: list[Node] = dataclasses.field(default_factory=list)
def parse_question(path: pathlib.Path):
raw = path.read_text(encoding="utf-8")
return parse_question_from_content(raw)
def parse_question_from_content(content_str: str):
"""Parse question from a content string instead of a file."""
metadata, content = frontmatter.parse(content_str)
tokens = markdown(content)
question = ParsedQuestion(
metadata=metadata,
raw_content=content,
nodes=[Node(token=token) for token in tokens],
)
return question
def main():
root = pathlib.Path(__file__).parent.parent.parent.parent
print(root)
exams = root / "content" / "Anatomi & Histologi 2" / "Gamla tentor"
for file in sorted(exams.glob("*/*.md")):
if len(file.stem) > 2:
continue
question = parse_question(file)
print(question.metadata, repr(question.raw_content))
continue
for node in question.nodes:
match node.type:
case "heading":
print("Heading:", repr(node.text))
case "paragraph":
print("Paragraph:", repr(node.text))
case "list":
print("List:")
for child in node.children:
print(" - List item:", repr(child.text))
case "block_code" if node.attrs["info"] == "spoiler-block:":
print("Spoiler:", repr(node.raw.rstrip()))
if __name__ == "__main__":
main()