1
0

vault backup: 2025-12-23 10:55:37
All checks were successful
Deploy Quartz site to GitHub Pages / build (push) Successful in 2m10s

This commit is contained in:
2025-12-23 10:55:37 +01:00
parent 68079d368d
commit 3b2751808e
9 changed files with 877 additions and 368 deletions

View File

@@ -30,7 +30,7 @@
"state": { "state": {
"file": "Anatomi & Histologi 2/Statistik.md", "file": "Anatomi & Histologi 2/Statistik.md",
"mode": "source", "mode": "source",
"source": true, "source": false,
"backlinks": false "backlinks": false
}, },
"icon": "lucide-file", "icon": "lucide-file",

View File

@@ -1,11 +1,11 @@
### Tentor ### Tentor
| Datum | OCR | Split | Bilder | Hotspot | Taggad | Svar | Granska | | Datum | OCR | Split | Bilder | Hotspot | Taggad | Svar | Granska |
| ---------- | :-: | :---: | :----: | :-----: | :----: | :--: | ------- | | ---------- | :-: | :---: | :----: | :-----: | :----: | :--: | ------- |
| 2022-01-15 | ✅ | ✅ | ✅ | | ✅ | ✅ | | | 2022-01-15 | ✅ | ✅ | ✅ | | ✅ | ✅ | |
| 2022-06-01 | ✅ | ✅ | ✅ | | ✅ | ✅ | | | 2022-06-01 | ✅ | ✅ | ✅ | | ✅ | ✅ | |
| 2023-01-11 | ✅ | ✅ | ✅ | | ✅ | ✅ | | | 2023-01-11 | ✅ | ✅ | ✅ | | ✅ | ✅ | |
| 2023-05-31 | ✅ | ✅ | ✅ | | ✅ | ✅ | | | 2023-05-31 | ✅ | ✅ | ✅ | | ✅ | ✅ | |
| 2024-01-10 | ✅ | ✅ | ✅ | | ✅ | | | | 2024-01-10 | ✅ | ✅ | ✅ | | ✅ | | |
| 2024-05-29 | ✅ | ✅ | ✅ | | ✅ | | | | 2024-05-29 | ✅ | ✅ | ✅ | | ✅ | | |
| 2025-01-15 | ✅ | ✅ | ✅ | | ✅ | | | | 2025-01-15 | ✅ | ✅ | ✅ | | ✅ | | |

View File

@@ -1,18 +0,0 @@
# Matching Questions Format Analysis
Based on reviewing the 17 matching questions:
## Key Finding:
Only **1 question has an answer** (2023-05-31/3.md), the rest have TODO.
**That question uses this format:**
- Two separate bullet lists
- Answer: "ItemName: MatchName" format
## Proposed Implementation:
1. Support two-list format (most flexible)
2. Parse answer as "Item: Match" pairs
3. Store as JSON with 0-indexed pairs
4. Render as n×n table with radio buttons
## Next: Implement based on this one working example.

View File

@@ -1,15 +0,0 @@
import pathlib
import mistune
markdown = mistune.create_markdown(renderer=None)
root = pathlib.Path(__file__).parent.parent
exams = root / "content" / "Anatomi & Histologi 2" / "Gamla tentor"
print(exams.absolute())
for file in sorted(exams.glob("*/*.md")):
if len(file.stem) > 2:
continue
print(f"Parsing {file}")
tokens = markdown(file.read_text(encoding="utf-8"))
import pprint
pprint.pprint(tokens)

View File

@@ -13,5 +13,6 @@ markers =
admin: Admin interface tests admin: Admin interface tests
import: Import and parsing tests import: Import and parsing tests
import_tests: Import and parsing tests import_tests: Import and parsing tests
parser: Question parser tests
slow: Slow running tests slow: Slow running tests

View File

@@ -0,0 +1,537 @@
"""
Comprehensive test suite for the question_parser module.
This test suite uses pytest's parametrize decorator to test multiple scenarios
with minimal code duplication. It covers:
1. Node class:
- Initialization with different token types
- Attribute handling
- Children node processing
- String representation (__repr__)
- Text extraction from nested structures
2. parse_question function:
- Metadata parsing (tags, dates, etc.)
- Raw content extraction
- Different question types (MCQ, SCQ, text field, matching)
- Questions with images
- Edge cases (empty content, missing frontmatter)
- Document structure preservation
3. ParsedQuestion dataclass:
- Default values
- Initialization with custom values
4. Real exam questions:
- Parsing actual exam questions from the content directory
- Validation of all short-named question files
Test execution:
pytest tests/test_question_parser.py -v # Verbose output
pytest tests/test_question_parser.py -k "mcq" # Run only MCQ tests
pytest tests/test_question_parser.py --collect-only # List all tests
"""
import pathlib
import tempfile
import pytest
from quiz.utils.question_parser import Node, ParsedQuestion, parse_question
@pytest.fixture
def temp_dir():
"""Create a temporary directory for test files"""
with tempfile.TemporaryDirectory() as tmpdir:
yield pathlib.Path(tmpdir)
@pytest.fixture
def create_question_file(temp_dir):
"""Factory fixture to create question files"""
def _create_file(filename: str, content: str) -> pathlib.Path:
file_path = temp_dir / filename
file_path.write_text(content, encoding="utf-8")
return file_path
return _create_file
class TestNode:
"""Test the Node class"""
@pytest.mark.parametrize("token,expected_type,expected_raw", [
({"type": "paragraph"}, "paragraph", ""),
({"type": "heading", "raw": "Test Heading"}, "heading", "Test Heading"),
({"type": "text", "raw": "Some text"}, "text", "Some text"),
({"type": "list"}, "list", ""),
])
def test_node_initialization(self, token, expected_type, expected_raw):
"""Test Node initialization with different token types"""
node = Node(token)
assert node.type == expected_type
assert node.raw == expected_raw
@pytest.mark.parametrize("token,expected_attrs", [
({"type": "block_code", "attrs": {"info": "spoiler-block:"}}, {"info": "spoiler-block:"}),
({"type": "paragraph"}, {}),
({"type": "heading", "attrs": {"level": 2}}, {"level": 2}),
])
def test_node_attributes(self, token, expected_attrs):
"""Test Node attributes handling"""
node = Node(token)
assert node.attrs == expected_attrs
def test_node_children(self):
"""Test Node children handling"""
token = {
"type": "paragraph",
"children": [
{"type": "text", "raw": "Hello "},
{"type": "text", "raw": "World"},
]
}
node = Node(token)
assert len(node.children) == 2
assert node.children[0].type == "text"
assert node.children[0].raw == "Hello "
assert node.children[1].type == "text"
assert node.children[1].raw == "World"
@pytest.mark.parametrize("token,expected_repr_contains", [
({"type": "text", "raw": "test"}, "Text(raw='test')"),
({"type": "paragraph"}, "Paragraph()"),
({"type": "block_code", "attrs": {"info": "python"}}, "BlockCode(attrs={'info': 'python'})"),
])
def test_node_repr(self, token, expected_repr_contains):
"""Test Node __repr__ method"""
node = Node(token)
assert repr(node) == expected_repr_contains
@pytest.mark.parametrize("token,expected_text", [
({"type": "text", "raw": "Simple text"}, "Simple text"),
(
{
"type": "paragraph",
"children": [
{"type": "text", "raw": "Hello "},
{"type": "text", "raw": "World"},
]
},
"Hello World"
),
(
{
"type": "paragraph",
"children": [
{"type": "text", "raw": "Nested "},
{
"type": "strong",
"children": [{"type": "text", "raw": "bold"}]
},
{"type": "text", "raw": " text"},
]
},
"Nested bold text"
),
])
def test_node_text_property(self, token, expected_text):
"""Test Node text property extraction"""
node = Node(token)
assert node.text == expected_text
class TestParseQuestion:
"""Test the parse_question function"""
@pytest.mark.parametrize("content,expected_tags", [
(
"""---
tags: [ah2, provfråga, frågetyp/mcq]
date: 2022-01-15
---
Question content""",
["ah2", "provfråga", "frågetyp/mcq"]
),
(
"""---
tags:
- ah2
- provfråga
- frågetyp/scq
date: 2023-05-31
---
Question content""",
["ah2", "provfråga", "frågetyp/scq"]
),
])
def test_parse_metadata_tags(self, create_question_file, content, expected_tags):
"""Test parsing of metadata tags in different formats"""
file_path = create_question_file("test.md", content)
question = parse_question(file_path)
assert question.metadata["tags"] == expected_tags
@pytest.mark.parametrize("content,expected_date", [
(
"""---
tags: [ah2]
date: 2022-01-15
---
Content""",
"2022-01-15"
),
(
"""---
tags: [ah2]
date: 2023-05-31
---
Content""",
"2023-05-31"
),
])
def test_parse_metadata_date(self, create_question_file, content, expected_date):
"""Test parsing of metadata date"""
file_path = create_question_file("test.md", content)
question = parse_question(file_path)
assert str(question.metadata["date"]) == expected_date
@pytest.mark.parametrize("content,expected_raw", [
(
"""---
tags: [ah2]
---
Simple question""",
"Simple question"
),
(
"""---
tags: [ah2]
---
Question with **bold** text""",
"Question with **bold** text"
),
])
def test_parse_raw_content(self, create_question_file, content, expected_raw):
"""Test parsing of raw content"""
file_path = create_question_file("test.md", content)
question = parse_question(file_path)
assert question.raw_content.strip() == expected_raw
def test_parse_mcq_question(self, create_question_file):
"""Test parsing a complete MCQ question"""
content = """---
tags: [ah2, provfråga, frågetyp/mcq, cerebrum]
date: 2022-01-15
---
Vilka av följande räknas till storhjärnans basala kärnor?
**Välj två alternativ**
- A: Putamen
- B: Nucleus Ruber
- C: Substantia nigra
- D: Nucleus caudatus
```spoiler-block:
A och D
```
"""
file_path = create_question_file("mcq.md", content)
question = parse_question(file_path)
assert question.metadata["tags"] == ["ah2", "provfråga", "frågetyp/mcq", "cerebrum"]
assert len(question.nodes) > 0
# Find paragraph nodes
paragraphs = [n for n in question.nodes if n.type == "paragraph"]
assert len(paragraphs) > 0
# Find list nodes
lists = [n for n in question.nodes if n.type == "list"]
assert len(lists) > 0
# Find spoiler block
code_blocks = [n for n in question.nodes if n.type == "block_code"]
assert len(code_blocks) > 0
spoiler = code_blocks[0]
assert spoiler.attrs.get("info") == "spoiler-block:"
assert "A och D" in spoiler.raw
def test_parse_scq_question(self, create_question_file):
"""Test parsing a single choice question"""
content = """---
tags: [ah2, provfråga, frågetyp/scq, histologi]
date: 2022-06-01
---
Vilken del av CNS syns i bild?
- A: Cerebellum
- B: Diencephalon
- C: Medulla spinalis
- D: Cerebrum
- E: Pons
```spoiler-block:
A
```
"""
file_path = create_question_file("scq.md", content)
question = parse_question(file_path)
assert "frågetyp/scq" in question.metadata["tags"]
lists = [n for n in question.nodes if n.type == "list"]
assert len(lists) > 0
def test_parse_text_field_question(self, create_question_file):
"""Test parsing a text field question"""
content = """---
tags: [ah2, provfråga, frågetyp/textfält, öga, anatomi]
date: 2022-01-15
---
![[image-2.png|301x248]]
**Fyll i rätt siffra!**
(0.5p per rätt svar, inga avdrag för fel svar):
a) Vilken siffra pekar på gula fläcken?
b) Vilken siffra pekar på choroidea?
```spoiler-block:
a) 7
b) 6
```
"""
file_path = create_question_file("textfield.md", content)
question = parse_question(file_path)
assert "frågetyp/textfält" in question.metadata["tags"]
assert len(question.nodes) > 0
def test_parse_matching_question(self, create_question_file):
"""Test parsing a matching question"""
content = """---
tags: [ah2, provfråga, frågetyp/matching, histologi]
date: 2023-05-31
---
Vilka av följande stödjeceller finns i CNS? Markera JA eller NEJ för varje angiven celltyp:
(1p för alla rätt, inga delpoäng)
- a) oligodendrocyter
- b) Astrocyter
- c) satellitceller
- d) ependymceller
- e) mikroglia
- f) Schwannceller
- JA, finn i CNS
- NEJ, finns inte i CNS
```spoiler-block:
a) JA, finn i CNS
b) JA, finn i CNS
c) NEJ, finns inte i CNS
d) JA, finn i CNS
e) JA, finn i CNS
f) NEJ, finns inte i CNS
```
"""
file_path = create_question_file("matching.md", content)
question = parse_question(file_path)
assert "frågetyp/matching" in question.metadata["tags"]
lists = [n for n in question.nodes if n.type == "list"]
assert len(lists) > 0
def test_parse_question_with_image(self, create_question_file):
"""Test parsing a question with embedded images"""
content = """---
tags: [ah2, provfråga, frågetyp/textfält, öra, anatomi, bild]
date: 2022-01-15
---
![[image-4.png|292x316]]
**Fyll i rätt siffra !**
(0.5p per rätt svar, inga avdrag för fel svar):
a) Vilken siffra pekar på incus? (1..19)
b) Vilken siffra pekar på tuba auditiva? (1..19)
```spoiler-block:
a) 7
b) 18
```
"""
file_path = create_question_file("image_q.md", content)
question = parse_question(file_path)
assert "bild" in question.metadata["tags"]
assert "![[image-4.png" in question.raw_content
embed = question.nodes[0].children[0]
assert embed.type == "embed"
assert embed.attrs == {
"filename": "image-4.png",
"width": 292,
"height": 316
}
@pytest.mark.parametrize("invalid_content", [
"", # Empty content
"No frontmatter", # No frontmatter
"---\n---\n", # Empty frontmatter
])
def test_parse_edge_cases(self, create_question_file, invalid_content):
"""Test parsing edge cases"""
file_path = create_question_file("edge.md", invalid_content)
question = parse_question(file_path)
assert isinstance(question, ParsedQuestion)
def test_parse_question_preserves_structure(self, create_question_file):
"""Test that parsing preserves the document structure"""
content = """---
tags: [ah2]
---
# Heading
Paragraph text
- List item 1
- List item 2
```spoiler-block:
Answer
```
"""
file_path = create_question_file("structure.md", content)
question = parse_question(file_path)
node_types = [n.type for n in question.nodes]
assert "heading" in node_types
assert "paragraph" in node_types
assert "list" in node_types
assert "block_code" in node_types
class TestParsedQuestionDataclass:
"""Test the ParsedQuestion dataclass"""
def test_parsed_question_defaults(self):
"""Test ParsedQuestion default values"""
question = ParsedQuestion()
assert question.metadata == {}
assert question.raw_content == ""
assert question.nodes == []
def test_parsed_question_initialization(self):
"""Test ParsedQuestion initialization with values"""
metadata = {"tags": ["test"], "date": "2022-01-15"}
content = "Test content"
nodes = [Node({"type": "paragraph"})]
question = ParsedQuestion(
metadata=metadata,
raw_content=content,
nodes=nodes
)
assert question.metadata == metadata
assert question.raw_content == content
assert question.nodes == nodes
class TestRealQuestions:
"""Test parsing real questions from the exam files"""
@pytest.fixture
def exam_dir(self):
"""Get the real exam directory"""
root = pathlib.Path(__file__).parent.parent.parent
exam_path = root / "content" / "Anatomi & Histologi 2" / "Gamla tentor"
if exam_path.exists():
return exam_path
pytest.skip("Exam directory not found")
@pytest.mark.parametrize("exam_date,question_num", [
("2022-01-15", "1"),
("2022-01-15", "2"),
("2022-01-15", "3"),
("2022-01-15", "4"),
("2022-06-01", "8"),
])
def test_parse_real_exam_questions(self, exam_dir, exam_date, question_num):
"""Test parsing real exam questions"""
file_path = exam_dir / exam_date / f"{question_num}.md"
if not file_path.exists():
pytest.skip(f"Question file {file_path} not found")
question = parse_question(file_path)
# Verify metadata exists and has required fields
assert "tags" in question.metadata
assert isinstance(question.metadata["tags"], list)
assert "ah2" in question.metadata["tags"]
assert "provfråga" in question.metadata["tags"]
# Verify content was parsed
assert len(question.raw_content) > 0
assert len(question.nodes) > 0
def test_parse_all_short_named_questions(self, exam_dir):
"""Test parsing all questions with short filenames (1-2 chars)"""
questions_found = 0
for file in sorted(exam_dir.glob("*/*.md")):
if len(file.stem) <= 2 and file.stem.isdigit():
question = parse_question(file)
assert isinstance(question, ParsedQuestion)
assert "tags" in question.metadata
questions_found += 1
# Ensure we found at least some questions
assert questions_found > 0, "No exam questions found to test"
class TestNodeTextExtraction:
"""Test text extraction from complex node structures"""
@pytest.mark.parametrize("token,expected_text", [
# Simple text
({"type": "text", "raw": "Hello"}, "Hello"),
# Paragraph with multiple text children
(
{
"type": "paragraph",
"children": [
{"type": "text", "raw": "A "},
{"type": "text", "raw": "B "},
{"type": "text", "raw": "C"},
]
},
"A B C"
),
# Nested formatting
(
{
"type": "paragraph",
"children": [
{"type": "text", "raw": "Normal "},
{
"type": "emphasis",
"children": [{"type": "text", "raw": "italic"}]
},
{"type": "text", "raw": " "},
{
"type": "strong",
"children": [{"type": "text", "raw": "bold"}]
},
]
},
"Normal italic bold"
),
# Empty node
({"type": "paragraph", "children": []}, ""),
])
def test_complex_text_extraction(self, token, expected_text):
"""Test text extraction from complex nested structures"""
node = Node(token)
assert node.text == expected_text

View File

@@ -1,8 +1,13 @@
import re import re
from pathlib import Path
from collections import defaultdict from collections import defaultdict
from datetime import datetime
from pathlib import Path
from typing import Tuple from typing import Tuple
from quiz.models import Question, Option
from django.conf import settings
from quiz.models import Course, Exam, Question, Option
from quiz.utils.question_parser import parse_question_from_content, Node
class ImportStats: class ImportStats:
@@ -70,175 +75,10 @@ class ImportStats:
return "\n".join(lines) return "\n".join(lines)
def parse_matching_question(content: str) -> Tuple[bool, dict]:
"""
Parse matching question from markdown.
Expected format:
- Two consecutive bullet lists (with "- " prefix)
- First list = left column items (rows)
- Second list = top row items (columns)
- Answer format: "LeftItem: TopItem" pairs
Returns:
(is_matching, question_data) where question_data contains:
- text: question text
- left_items: list of left column items
- top_items: list of top row items
- correct_pairs: list of [left_idx, top_idx] pairs (0-indexed)
- has_answer: whether it has an answer (not TODO)
- question_type: 'matching'
"""
lines = content.split('\n')
# Extract question text (first non-empty line after frontmatter)
question_text = None
in_frontmatter = False
frontmatter_done = False
for line in lines:
if line.strip() == '---':
if not in_frontmatter:
in_frontmatter = True
else:
in_frontmatter = False
frontmatter_done = True
continue
if frontmatter_done and line.strip() and not line.startswith('![['):
if not line.startswith('-') and not line.startswith('```'):
question_text = line.strip().replace('**', '')
break
if not question_text:
return True, {
'text': None,
'left_items': [],
'top_items': [],
'correct_pairs': [],
'has_answer': False,
'question_type': 'matching'
}
# Extract two consecutive bullet lists
left_items = []
top_items = []
in_first_list = False
in_second_list = False
in_frontmatter = False
frontmatter_done = False
found_question_text = False
for line in lines:
# Track frontmatter
if line.strip() == '---':
if not in_frontmatter:
in_frontmatter = True
else:
in_frontmatter = False
frontmatter_done = True
continue
if in_frontmatter or not frontmatter_done:
continue
# Skip spoiler blocks
if line.strip().startswith('```'):
break
# Found question text
if not found_question_text and question_text in line:
found_question_text = True
continue
if not found_question_text:
continue
# Look for bullet lists
if line.strip().startswith('- '):
item = line.strip()[2:].strip()
if not item: # Empty bullet
continue
if not in_first_list and not in_second_list:
in_first_list = True
left_items.append(item)
elif in_first_list:
left_items.append(item)
elif in_second_list:
top_items.append(item)
elif line.strip() == '':
# Empty line - transition from first list to second
if in_first_list and left_items:
in_first_list = False
in_second_list = True
elif not line.strip().startswith('-') and (in_first_list or in_second_list):
# Non-bullet line after starting lists - end of lists
break
# Parse answer from spoiler block
correct_pairs = []
has_answer = False
in_spoiler = False
answer_lines = []
for line in lines:
if line.strip().startswith('```spoiler-block'):
in_spoiler = True
continue
if in_spoiler:
if line.strip() == '```':
break
stripped = line.strip()
if stripped:
answer_lines.append(stripped)
if answer_lines:
full_answer = ' '.join(answer_lines)
# Check for TODO
if 'TODO' in full_answer.upper():
has_answer = False
else:
has_answer = True
# Parse "Item: Match" format
# Example: "Smak: Lobus Insularis"
for line in answer_lines:
if ':' in line:
left_part, top_part = line.split(':', 1)
left_part = left_part.strip()
top_part = top_part.strip()
# Find indices
left_idx = None
top_idx = None
for idx, item in enumerate(left_items):
if left_part.lower() in item.lower() or item.lower() in left_part.lower():
left_idx = idx
break
for idx, item in enumerate(top_items):
if top_part.lower() in item.lower() or item.lower() in top_part.lower():
top_idx = idx
break
if left_idx is not None and top_idx is not None:
correct_pairs.append([left_idx, top_idx])
return True, {
'text': question_text,
'left_items': left_items,
'top_items': top_items,
'correct_pairs': correct_pairs,
'has_answer': has_answer,
'question_type': 'matching'
}
def parse_markdown_question(file_path: Path, content: str) -> Tuple[bool, dict]: def parse_markdown_question(file_path: Path, content: str) -> Tuple[bool, dict]:
""" """
Parse a markdown file and extract question data. Parse a markdown file and extract question data using the new question_parser.
Returns: Returns:
(is_mcq, question_data) where question_data contains: (is_mcq, question_data) where question_data contains:
@@ -247,84 +87,67 @@ def parse_markdown_question(file_path: Path, content: str) -> Tuple[bool, dict]:
- correct_answer: the correct answer letter(s) - correct_answer: the correct answer letter(s)
- has_answer: whether it has an answer (not TODO) - has_answer: whether it has an answer (not TODO)
- tags: list of tag strings - tags: list of tag strings
- question_type: type of question (mcq, scq, matching, etc.)
""" """
lines = content.split('\n') # Parse from content string (works for both test cases and real files)
parsed = parse_question_from_content(content)
# Check for question tags in frontmatter # Extract metadata
# Accept: frågetyp/mcq, frågetyp/scq, frågetyp/textalternativ, frågetyp/textfält metadata = parsed.metadata
is_question = False tags = metadata.get('tags', [])
# Check for question type in tags
question_type = None question_type = None
in_frontmatter = False is_question = False
for line in lines:
if line.strip() == '---':
if in_frontmatter:
# End of frontmatter
in_frontmatter = False
break
else:
in_frontmatter = True
continue
if in_frontmatter:
if 'frågetyp/' in line:
is_question = True
# Extract question type
if 'frågetyp/mcq' in line:
question_type = 'mcq'
elif 'frågetyp/scq' in line:
question_type = 'scq'
elif 'frågetyp/matching' in line:
question_type = 'matching'
elif 'frågetyp/textalternativ' in line:
question_type = 'textalternativ'
elif 'frågetyp/textfält' in line:
question_type = 'textfält'
if line.strip().lower().startswith('tags:'):
# Extract tags
# Handle: tags: [tag1, tag2] or tags: tag1, tag2
tag_content = line.split(':', 1)[1].strip()
# Remove brackets if present
tag_content = tag_content.strip('[]')
# Split by comma
tags = [t.strip() for t in tag_content.split(',') if t.strip()]
# If it's a matching question, use the matching parser
if question_type == 'matching':
is_matching, matching_data = parse_matching_question(content)
if is_matching:
# Add tags to the data
matching_data['tags'] = tags if 'tags' in locals() else []
return True, matching_data
for tag in tags:
if 'frågetyp/' in tag:
is_question = True
if 'frågetyp/mcq' in tag:
question_type = 'mcq'
elif 'frågetyp/scq' in tag:
question_type = 'scq'
elif 'frågetyp/matching' in tag:
question_type = 'matching'
elif 'frågetyp/textalternativ' in tag:
question_type = 'textalternativ'
elif 'frågetyp/textfält' in tag:
question_type = 'textfält'
if not is_question: if not is_question:
return False, {} return False, {}
# Extract question text (first non-empty line after frontmatter) # Handle matching questions separately
question_text = None if question_type == 'matching':
in_frontmatter = False return parse_matching_question_from_nodes(parsed.nodes, tags)
frontmatter_done = False
for line in lines: # Extract question text from first paragraph (skip images and special instructions)
if line.strip() == '---': question_text = None
if not in_frontmatter: for node in parsed.nodes:
in_frontmatter = True if node.type != "paragraph":
else: continue
in_frontmatter = False text = node.text.strip()
frontmatter_done = True # Skip empty paragraphs
if not text:
continue continue
if frontmatter_done and line.strip() and not line.startswith('![['): # Remove inline images from text first
# Skip "Välj ett/två alternativ:" lines text = re.sub(r'!\[\[.*?\]\]', '', text).strip()
if 'Välj' in line and 'alternativ' in line:
continue # Skip if paragraph was only an image reference
if not line.startswith('-') and not line.startswith('```'): if not text:
question_text = line.strip().replace('**', '') continue
break
# Skip "Välj X alternativ" instructions
if 'Välj' in text and 'alternativ' in text:
continue
# Clean up bold markers
text = text.replace('**', '')
if text:
question_text = text
break
# Return early if no question text found, but include has_answer field
if not question_text: if not question_text:
return True, { return True, {
'text': None, 'text': None,
@@ -332,62 +155,38 @@ def parse_markdown_question(file_path: Path, content: str) -> Tuple[bool, dict]:
'correct_answer': '', 'correct_answer': '',
'has_answer': False, 'has_answer': False,
'question_type': question_type, 'question_type': question_type,
'tags': tags if 'tags' in locals() else [] 'tags': tags
} }
# Extract options from list nodes
# Extract options (pattern: "- A:" or "- A" for MCQ, or text for textalternativ)
options_data = [] options_data = []
in_frontmatter = False
frontmatter_done = False
in_spoiler = False
for line in lines: for node in parsed.nodes:
# Track frontmatter to skip it if node.type != "list":
if line.strip() == '---':
if not in_frontmatter:
in_frontmatter = True
else:
in_frontmatter = False
frontmatter_done = True
continue continue
for item in node.children:
# Get the text of the list item
if item.type != "list_item":
continue
item_text = item.text.strip()
# Skip frontmatter and spoiler blocks # Match "A: text" or just "A"
if in_frontmatter or not frontmatter_done: match = re.match(r'^([A-Z]):\s*(.*)$', item_text)
continue if match:
letter = match.group(1)
if line.strip().startswith('```spoiler-block:'): text = match.group(2).strip()
in_spoiler = True options_data.append((letter, text))
continue elif re.match(r'^([A-Z])$', item_text):
if in_spoiler: letter = item_text
if line.strip() == '```': options_data.append((letter, ''))
in_spoiler = False elif question_type in ['textalternativ', 'textfält']:
continue # For text-based questions, use incrementing letters
if not re.match(r'^[a-z]\)', item_text): # Skip sub-question markers
# Match "- A: text" or "- A: " or just "- A"
match = re.match(r'^-\s*([A-Z]):\s*(.*)$', line.strip())
if not match:
# Also try "- A" without colon
match = re.match(r'^-\s*([A-Z])$', line.strip())
if match:
letter = match.group(1)
text = match.group(2) if len(match.groups()) > 1 else ""
options_data.append((letter, text.strip()))
else:
# For textalternativ, options might be plain text items
if question_type in ['textalternativ', 'textfält'] and line.strip().startswith('-') and not line.strip().startswith('--'):
# Extract text after dash
option_text = line.strip()[1:].strip()
# Skip if it's a sub-question marker like "a)" or "b)"
if option_text and not re.match(r'^[a-z]\)', option_text):
# Use incrementing letters for text options
letter = chr(ord('A') + len(options_data)) letter = chr(ord('A') + len(options_data))
options_data.append((letter, option_text)) options_data.append((letter, item_text))
# For text-based questions, options are optional # For text-based questions, options are optional
if not options_data: if not options_data:
# At least return something for single-option questions
options_data = [('A', '')] options_data = [('A', '')]
elif len(options_data) < 2 and question_type in ['mcq', 'scq']: elif len(options_data) < 2 and question_type in ['mcq', 'scq']:
return True, { return True, {
@@ -395,44 +194,34 @@ def parse_markdown_question(file_path: Path, content: str) -> Tuple[bool, dict]:
'options': options_data, 'options': options_data,
'correct_answer': '', 'correct_answer': '',
'has_answer': False, 'has_answer': False,
'question_type': question_type 'question_type': question_type,
'tags': tags
} }
# Extract answer from spoiler block # Extract answer from spoiler block
correct_answer = None correct_answer = None
has_answer = False has_answer = False
in_spoiler = False
answer_lines = []
for line in lines: for node in parsed.nodes:
if line.strip().startswith('```spoiler-block:'): if node.type == "block_code" and node.attrs.get("info") == "spoiler-block:":
in_spoiler = True answer_text = node.raw.strip()
continue
if in_spoiler:
if line.strip() == '```':
break
stripped = line.strip()
if stripped:
answer_lines.append(stripped)
# Process collected answer lines # Check for TODO
if answer_lines: if 'TODO' in answer_text.upper():
full_answer = ' '.join(answer_lines) has_answer = False
# Check for TODO
if 'TODO' in full_answer.upper():
has_answer = False
else:
has_answer = True
# For MCQ/SCQ: Extract capital letters
if question_type in ['mcq', 'scq']:
letters = re.findall(r'\b([A-Z])\b', full_answer)
if letters:
correct_answer = ','.join(sorted(set(letters)))
else: else:
# For text-based questions: Store the full answer text has_answer = True
correct_answer = full_answer[:200] # Limit to 200 chars for database field
# For MCQ/SCQ: Extract capital letters
if question_type in ['mcq', 'scq']:
letters = re.findall(r'\b([A-Z])\b', answer_text)
if letters:
correct_answer = ','.join(sorted(set(letters)))
else:
# For text-based questions: Store the full answer text
correct_answer = answer_text[:200] # Limit to 200 chars for database field
break
return True, { return True, {
'text': question_text, 'text': question_text,
@@ -440,7 +229,112 @@ def parse_markdown_question(file_path: Path, content: str) -> Tuple[bool, dict]:
'correct_answer': correct_answer, 'correct_answer': correct_answer,
'has_answer': has_answer, 'has_answer': has_answer,
'question_type': question_type, 'question_type': question_type,
'tags': tags if 'tags' in locals() else [] 'tags': tags
}
def parse_matching_question_from_nodes(nodes: list[Node], tags: list) -> Tuple[bool, dict]:
"""
Parse matching question from parsed nodes.
Expected format:
- Two consecutive bullet lists
- First list = left column items (rows)
- Second list = top row items (columns)
- Answer format: "LeftItem: TopItem" pairs
Returns:
(is_matching, question_data)
"""
# Extract question text
question_text = None
for node in nodes:
if node.type == "paragraph":
text = node.text.strip()
# Remove inline images
text = re.sub(r'!\[\[.*?\]\]', '', text).strip()
# Skip if empty after removing images
if not text:
continue
question_text = text.replace('**', '')
break
if not question_text:
return True, {
'text': None,
'left_items': [],
'top_items': [],
'correct_pairs': [],
'has_answer': False,
'question_type': 'matching',
'tags': tags
}
# Extract two consecutive lists
left_items = []
top_items = []
list_nodes = [node for node in nodes if node.type == "list"]
if len(list_nodes) >= 2:
# First list = left items
for item in list_nodes[0].children:
if item.type == "list_item":
left_items.append(item.text.strip())
# Second list = top items
for item in list_nodes[1].children:
if item.type == "list_item":
top_items.append(item.text.strip())
# Parse answer from spoiler block
correct_pairs = []
has_answer = False
for node in nodes:
if node.type == "block_code" and node.attrs.get("info") == "spoiler-block:":
answer_text = node.raw.strip()
# Check for TODO
if 'TODO' in answer_text.upper():
has_answer = False
break
has_answer = True
# Parse "Item: Match" format
answer_lines = answer_text.split('\n')
for line in answer_lines:
line = line.strip()
if ':' not in line:
continue
left_part, top_part = line.split(':', 1)
left_part = left_part.strip()
top_part = top_part.strip()
# Find indices
left_idx = None
top_idx = None
for idx, item in enumerate(left_items):
if left_part.lower() in item.lower() or item.lower() in left_part.lower():
left_idx = idx
break
for idx, item in enumerate(top_items):
if top_part.lower() in item.lower() or item.lower() in top_part.lower():
top_idx = idx
break
if left_idx is not None and top_idx is not None:
correct_pairs.append([left_idx, top_idx])
break
return True, {
'text': question_text,
'left_items': left_items,
'top_items': top_items,
'correct_pairs': correct_pairs,
'has_answer': has_answer,
'question_type': 'matching',
'tags': tags
} }
@@ -460,7 +354,6 @@ def import_question_file(file_path: Path, base_path: Path, stats: ImportStats, f
file_mtime = file_path.stat().st_mtime file_mtime = file_path.stat().st_mtime
# Calculate path relative to project root # Calculate path relative to project root
from django.conf import settings
project_root = settings.BASE_DIR.parent project_root = settings.BASE_DIR.parent
try: try:
file_path_str = str(file_path.relative_to(project_root)) file_path_str = str(file_path.relative_to(project_root))
@@ -518,9 +411,6 @@ def import_question_file(file_path: Path, base_path: Path, stats: ImportStats, f
# Try to parse as date # Try to parse as date
if exam_folder and '-' in exam_folder: if exam_folder and '-' in exam_folder:
try: try:
from datetime import datetime
from quiz.models import Course, Exam
exam_date = datetime.strptime(exam_folder, '%Y-%m-%d').date() exam_date = datetime.strptime(exam_folder, '%Y-%m-%d').date()
# Get or create course (default to "Anatomi & Histologi 2") # Get or create course (default to "Anatomi & Histologi 2")
@@ -610,17 +500,6 @@ def import_question_file(file_path: Path, base_path: Path, stats: ImportStats, f
def import_questions(folder_path: Path, base_path: Path = None, force: bool = False) -> ImportStats: def import_questions(folder_path: Path, base_path: Path = None, force: bool = False) -> ImportStats:
"""
Import all questions from a folder.
Args:
folder_path: Path to the folder containing question markdown files
base_path: Base path for relative path calculations (defaults to folder_path)
force: If True, import all files regardless of mtime (for initial import)
Returns:
ImportStats object with import statistics
"""
if base_path is None: if base_path is None:
base_path = folder_path base_path = folder_path
@@ -634,9 +513,7 @@ def import_questions(folder_path: Path, base_path: Path = None, force: bool = Fa
def delete_question_by_path(file_path: Path): def delete_question_by_path(file_path: Path):
"""Delete a question from the database by file path"""
try: try:
from django.conf import settings
project_root = settings.BASE_DIR.parent project_root = settings.BASE_DIR.parent
file_path_str = str(file_path.relative_to(project_root)) file_path_str = str(file_path.relative_to(project_root))
deleted_count, _ = Question.objects.filter(file_path=file_path_str).delete() deleted_count, _ = Question.objects.filter(file_path=file_path_str).delete()

View File

@@ -0,0 +1,38 @@
__all__ = ["obsidian_embed"]
# https://help.obsidian.md/embeds
# Supported:
# ![[image-4.png|292x316]]
def parse_embed(inline, match, state):
filename = match.group("filename")
attrs = {}
if "|" in filename:
filename, size = filename.split("|", 1)
else:
size = None
attrs["filename"] = filename
if size:
if "x" in size:
width, height = size.split("x", 1)
if width:
attrs["width"] = int(width)
if height:
attrs["height"] = int(height)
else:
attrs["width"] = int(size)
state.append_token({"type": "embed", "attrs": attrs})
return match.end()
INLINE_EMBED_PATTERN = (
r'!\[\[' # begins with ![
r'(?!\s)' # not whitespace
r'(?P<filename>.+?)' # content between `![[xx]]`
r'(?!\s)' # not whitespace
r'\]\]' # closing ]
)
def obsidian_embed(md: "Markdown") -> None:
md.inline.register('embed', INLINE_EMBED_PATTERN, parse_embed, before="link")

View File

@@ -0,0 +1,89 @@
import dataclasses
import pathlib
import frontmatter
import mistune
from quiz.utils.obsidian_embed_plugin import obsidian_embed
markdown = mistune.create_markdown(renderer="ast", plugins=[obsidian_embed])
class Node:
def __init__(self, token):
self.type = token["type"]
self.raw = token.get("raw", "")
self.attrs = token.get("attrs", {})
self.children = [Node(token=child) for child in token.get("children", [])]
def __repr__(self) -> str:
attrs = []
if self.raw:
attrs.append(f"raw={self.raw!r}")
if self.attrs:
attrs.append(f"attrs={self.attrs!r}")
if self.children:
attrs.append(f"children={self.children!r}")
# block_text -> BlockText
pretty = self.type.replace("_", " ").title().replace(" ", "")
return f"{pretty}(" + ", ".join(attrs) + ")"
@property
def text(self) -> str:
if self.type == "text":
return self.raw
texts = []
for child in self.children:
texts.append(child.text)
return "".join(texts)
@dataclasses.dataclass
class ParsedQuestion:
metadata: dict = dataclasses.field(default_factory=dict)
raw_content: str = ""
nodes: list[Node] = dataclasses.field(default_factory=list)
def parse_question(path: pathlib.Path):
raw = path.read_text(encoding="utf-8")
return parse_question_from_content(raw)
def parse_question_from_content(content_str: str):
"""Parse question from a content string instead of a file."""
metadata, content = frontmatter.parse(content_str)
tokens = markdown(content)
question = ParsedQuestion(
metadata=metadata,
raw_content=content,
nodes=[Node(token=token) for token in tokens],
)
return question
def main():
root = pathlib.Path(__file__).parent.parent.parent.parent
print(root)
exams = root / "content" / "Anatomi & Histologi 2" / "Gamla tentor"
for file in sorted(exams.glob("*/*.md")):
if len(file.stem) > 2:
continue
question = parse_question(file)
print(question.metadata, repr(question.raw_content))
continue
for node in question.nodes:
match node.type:
case "heading":
print("Heading:", repr(node.text))
case "paragraph":
print("Paragraph:", repr(node.text))
case "list":
print("List:")
for child in node.children:
print(" - List item:", repr(child.text))
case "block_code" if node.attrs["info"] == "spoiler-block:":
print("Spoiler:", repr(node.raw.rstrip()))
if __name__ == "__main__":
main()