1
0

vault backup: 2025-12-26 02:09:22
All checks were successful
Deploy Quartz site to GitHub Pages / build (push) Successful in 2m29s

This commit is contained in:
2025-12-26 02:09:22 +01:00
parent 3fddadfe50
commit 50366b9b9c
288 changed files with 58893 additions and 750 deletions

View File

View File

@@ -0,0 +1,527 @@
import re
from collections import defaultdict
from datetime import datetime
from pathlib import Path
from typing import Tuple
import django.db.utils
from django.conf import settings
from quiz.models import Course, Exam, Question, Option
from quiz.utils.question_parser import parse_question_from_content, Node
class ImportStats:
"""Track import statistics by exam folder"""
def __init__(self):
self.total_files = 0
self.mcq_questions = 0
self.non_mcq_skipped = 0
self.questions_with_answers = 0
self.questions_with_todo = 0
self.created = 0
self.updated = 0
self.errors = 0
self.by_folder = defaultdict(lambda: {
'total': 0,
'mcq': 0,
'answered': 0,
'todo': 0
})
def has_changes(self) -> bool:
"""Check if there were any actual changes"""
return self.created > 0 or self.updated > 0 or self.errors > 0
def format_output(self, show_if_no_changes: bool = True) -> str:
"""
Format statistics for console output
Args:
show_if_no_changes: If False, returns empty string when no changes
"""
if not show_if_no_changes and not self.has_changes():
return ""
lines = []
lines.append("\n" + "="*70)
lines.append("QUESTION IMPORT STATISTICS")
lines.append("="*70)
lines.append(f"Total .md files found: {self.total_files}")
lines.append(f"MCQ questions found: {self.mcq_questions}")
lines.append(f"Non-MCQ skipped: {self.non_mcq_skipped}")
lines.append(f"Questions with answers: {self.questions_with_answers}")
lines.append(f"Questions with TODO: {self.questions_with_todo}")
lines.append(f"Created in database: {self.created}")
lines.append(f"Updated in database: {self.updated}")
if self.errors > 0:
lines.append(f"Errors: {self.errors}")
if self.mcq_questions > 0:
completion_pct = (self.questions_with_answers / self.mcq_questions * 100)
lines.append(f"Overall completion: {completion_pct:.1f}%")
lines.append("\n" + "-"*70)
lines.append("COMPLETION BY EXAM FOLDER")
lines.append("-"*70)
sorted_folders = sorted(self.by_folder.items())
for folder, stats in sorted_folders:
if stats['mcq'] > 0:
pct = (stats['answered'] / stats['mcq'] * 100)
lines.append(f"{folder:20} {stats['answered']:3}/{stats['mcq']:3} MCQ ({pct:5.1f}%)")
lines.append("="*70 + "\n")
return "\n".join(lines)
def parse_markdown_question(file_path: Path, content: str) -> Tuple[bool, dict]:
"""
Parse a markdown file and extract question data using the new question_parser.
Returns:
(is_mcq, question_data) where question_data contains:
- text: question text
- options: list of (letter, text) tuples
- correct_answer: the correct answer letter(s)
- has_answer: whether it has an answer (not TODO)
- tags: list of tag strings
- question_type: type of question (mcq, scq, matching, etc.)
"""
# Parse from content string (works for both test cases and real files)
parsed = parse_question_from_content(content)
# Extract metadata
metadata = parsed.metadata
tags = metadata.get('tags', [])
# Check for question type in tags
question_type = None
is_question = False
for tag in tags:
if 'frågetyp/' in tag:
is_question = True
if 'frågetyp/mcq' in tag:
question_type = 'mcq'
elif 'frågetyp/scq' in tag:
question_type = 'scq'
elif 'frågetyp/matching' in tag:
question_type = 'matching'
elif 'frågetyp/textalternativ' in tag:
question_type = 'textalternativ'
elif 'frågetyp/textfält' in tag:
question_type = 'textfält'
if not is_question:
return False, {}
# Handle matching questions separately
if question_type == 'matching':
return parse_matching_question_from_nodes(parsed.nodes, tags)
# Extract question text from first paragraph (skip images and special instructions)
question_text = None
for node in parsed.nodes:
if node.type != "paragraph":
continue
text = node.text.strip()
# Skip empty paragraphs
if not text:
continue
# Remove inline images from text first
text = re.sub(r'!\[\[.*?\]\]', '', text).strip()
# Skip if paragraph was only an image reference
if not text:
continue
# Skip "Välj X alternativ" instructions
if 'Välj' in text and 'alternativ' in text:
continue
# Clean up bold markers
text = text.replace('**', '')
if text:
question_text = text
break
if not question_text:
return True, {
'text': None,
'options': [],
'correct_answer': '',
'has_answer': False,
'question_type': question_type,
'tags': tags
}
# Extract options from list nodes
options_data = []
for node in parsed.nodes:
if node.type != "list":
continue
for item in node.children:
# Get the text of the list item
if item.type != "list_item":
continue
item_text = item.text.strip()
# Match "A: text" or just "A"
match = re.match(r'^([A-Z]):\s*(.*)$', item_text)
if match:
letter = match.group(1)
text = match.group(2).strip()
options_data.append((letter, text))
elif re.match(r'^([A-Z])$', item_text):
letter = item_text
options_data.append((letter, ''))
elif question_type in ['textalternativ', 'textfält']:
# For text-based questions, use incrementing letters
if not re.match(r'^[a-z]\)', item_text): # Skip sub-question markers
letter = chr(ord('A') + len(options_data))
options_data.append((letter, item_text))
# For text-based questions, options are optional
if not options_data:
options_data = [('A', '')]
elif len(options_data) < 2 and question_type in ['mcq', 'scq']:
return True, {
'text': question_text,
'options': options_data,
'correct_answer': '',
'has_answer': False,
'question_type': question_type,
'tags': tags
}
# Extract answer from spoiler block
correct_answer = None
has_answer = False
for node in parsed.nodes:
if node.type == "block_code" and node.attrs.get("info") == "spoiler-block:":
answer_text = node.raw.strip()
# Check for TODO
if 'TODO' in answer_text.upper():
has_answer = False
else:
has_answer = True
# For MCQ/SCQ: Extract capital letters
if question_type in ['mcq', 'scq']:
letters = re.findall(r'\b([A-Z])\b', answer_text)
if letters:
correct_answer = ','.join(sorted(set(letters)))
else:
# For text-based questions: Store the full answer text
correct_answer = answer_text[:200] # Limit to 200 chars for database field
break
return True, {
'text': question_text,
'options': options_data,
'correct_answer': correct_answer,
'has_answer': has_answer,
'question_type': question_type,
'tags': tags
}
def parse_matching_question_from_nodes(nodes: list[Node], tags: list) -> Tuple[bool, dict]:
"""
Parse matching question from parsed nodes.
Expected format:
- Two consecutive bullet lists
- First list = left column items (rows)
- Second list = top row items (columns)
- Answer format: "LeftItem: TopItem" pairs
Returns:
(is_matching, question_data)
"""
# Extract question text
question_text = None
for node in nodes:
if node.type == "paragraph":
text = node.text.strip()
# Remove inline images
text = re.sub(r'!\[\[.*?\]\]', '', text).strip()
# Skip if empty after removing images
if not text:
continue
question_text = text.replace('**', '')
break
if not question_text:
return True, {
'text': None,
'left_items': [],
'top_items': [],
'correct_pairs': [],
'has_answer': False,
'question_type': 'matching',
'tags': tags
}
# Extract two consecutive lists
left_items = []
top_items = []
list_nodes = [node for node in nodes if node.type == "list"]
if len(list_nodes) >= 2:
# First list = left items
for item in list_nodes[0].children:
if item.type == "list_item":
left_items.append(item.text.strip())
# Second list = top items
for item in list_nodes[1].children:
if item.type == "list_item":
top_items.append(item.text.strip())
# Parse answer from spoiler block
correct_pairs = []
has_answer = False
for node in nodes:
if node.type == "block_code" and node.attrs.get("info") == "spoiler-block:":
answer_text = node.raw.strip()
# Check for TODO
if 'TODO' in answer_text.upper():
has_answer = False
break
has_answer = True
# Parse "Item: Match" format
answer_lines = answer_text.split('\n')
for line in answer_lines:
line = line.strip()
if ':' not in line:
continue
left_part, top_part = line.split(':', 1)
left_part = left_part.strip()
top_part = top_part.strip()
# Find indices
left_idx = None
top_idx = None
for idx, item in enumerate(left_items):
if left_part.lower() in item.lower() or item.lower() in left_part.lower():
left_idx = idx
break
for idx, item in enumerate(top_items):
if top_part.lower() in item.lower() or item.lower() in top_part.lower():
top_idx = idx
break
if left_idx is not None and top_idx is not None:
correct_pairs.append([left_idx, top_idx])
break
return True, {
'text': question_text,
'left_items': left_items,
'top_items': top_items,
'correct_pairs': correct_pairs,
'has_answer': has_answer,
'question_type': 'matching',
'tags': tags
}
def import_question_file(file_path: Path, base_path: Path, stats: ImportStats, force: bool = False):
"""
Import a single question file, checking modification time to avoid unnecessary updates.
Args:
file_path: Path to the question file
base_path: Base path for relative calculations
stats: ImportStats object to track statistics
force: If True, import regardless of mtime (for initial import)
"""
try:
# Get file modification time
file_mtime = file_path.stat().st_mtime
# Calculate path relative to project root
project_root = settings.BASE_DIR.parent
try:
file_path_str = str(file_path.relative_to(project_root))
except ValueError:
file_path_str = str(file_path.relative_to(base_path))
# Check if file has changed by comparing mtime
if not force:
try:
existing_question = Question.objects.get(file_path=file_path_str)
if existing_question.file_mtime and existing_question.file_mtime >= file_mtime:
# File hasn't changed, skip
return 'skipped_unchanged'
except Question.DoesNotExist:
pass # New file, will import
content = file_path.read_text(encoding='utf-8')
is_mcq, question_data = parse_markdown_question(file_path, content)
# Track folder stats
relative_path = file_path.relative_to(base_path)
folder_name = relative_path.parts[0] if len(relative_path.parts) > 1 else 'root'
stats.by_folder[folder_name]['total'] += 1
if not is_mcq:
stats.non_mcq_skipped += 1
return 'skipped_not_mcq'
stats.mcq_questions += 1
stats.by_folder[folder_name]['mcq'] += 1
if not question_data or not question_data.get('text'):
stats.non_mcq_skipped += 1
return 'skipped_invalid'
if not question_data['has_answer']:
stats.questions_with_todo += 1
stats.by_folder[folder_name]['todo'] += 1
return 'skipped_todo'
stats.questions_with_answers += 1
stats.by_folder[folder_name]['answered'] += 1
# Extract exam information from folder structure
# Expected path: content/Anatomi & Histologi 2/Gamla tentor/2022-01-15/question.md
exam = None
relative_path = file_path.relative_to(base_path)
path_parts = relative_path.parts
# Try to extract exam date from folder structure
if len(path_parts) >= 2:
# Get the parent folder name which should be the exam date (e.g., "2022-01-15")
exam_folder = path_parts[-2] if len(path_parts) > 1 else None
# Try to parse as date
if exam_folder and '-' in exam_folder:
try:
exam_date = datetime.strptime(exam_folder, '%Y-%m-%d').date()
# Get or create course (default to "Anatomi & Histologi 2")
# Extract course name from path if available
course_name = "Anatomi & Histologi 2"
if len(path_parts) >= 3 and 'Anatomi' in ' '.join(path_parts):
# Try to find course name in path
for part in path_parts:
if 'Anatomi' in part or 'Histologi' in part:
course_name = part
break
course, _ = Course.objects.get_or_create(
name=course_name,
defaults={'code': 'AH2'}
)
# Get or create exam
exam, _ = Exam.objects.get_or_create(
course=course,
date=exam_date,
defaults={
'name': exam_folder,
'folder_path': '/'.join(path_parts[:-1])
}
)
except (ValueError, ImportError):
pass # If date parsing fails, exam remains None
# Import to database with mtime tracking
# Prepare defaults dict
defaults = {
'exam': exam,
'text': question_data['text'],
'correct_answer': question_data.get('correct_answer', ''),
'file_mtime': file_mtime,
'question_type': question_data.get('question_type', 'mcq'),
}
# Add matching_data if it's a matching question
if question_data.get('question_type') == 'matching':
defaults['matching_data'] = {
'left_items': question_data.get('left_items', []),
'top_items': question_data.get('top_items', []),
'correct_pairs': question_data.get('correct_pairs', [])
}
question, created = Question.objects.update_or_create(
file_path=file_path_str,
defaults=defaults
)
if created:
stats.created += 1
else:
stats.updated += 1
# Update tags
from django.utils.text import slugify
from quiz.models import Tag
question.tags.clear()
for tag_name in question_data.get('tags', []):
tag_slug = slugify(tag_name)
tag, _ = Tag.objects.get_or_create(
slug=tag_slug,
defaults={'name': tag_name}
)
question.tags.add(tag)
# Update options (only for MCQ/SCQ questions)
if question_data.get('question_type') not in ['matching']:
question.options.all().delete()
# Deduplicate options by letter (keep first occurrence)
seen_letters = set()
for letter, text in question_data.get('options', []):
if letter not in seen_letters:
Option.objects.create(question=question, letter=letter, text=text)
seen_letters.add(letter)
return 'imported' if created else 'updated'
except (OSError, ValueError, django.db.utils.Error) as e:
stats.errors += 1
print(f"Error importing {file_path}: {e}")
return 'error'
def import_questions(folder_path: Path, base_path: Path = None, force: bool = False) -> ImportStats:
if base_path is None:
base_path = folder_path
stats = ImportStats()
for md_file in folder_path.rglob('*.md'):
stats.total_files += 1
import_question_file(md_file, base_path, stats, force=force)
return stats
def delete_question_by_path(file_path: Path):
try:
project_root = settings.BASE_DIR.parent
file_path_str = str(file_path.relative_to(project_root))
deleted_count, _ = Question.objects.filter(file_path=file_path_str).delete()
if deleted_count > 0:
print(f"[Auto-delete] ✓ Deleted question: {file_path.name}")
return deleted_count > 0
except (OSError, django.db.utils.Error) as e:
print(f"[Auto-delete] ✗ Error deleting question {file_path}: {e}")
return False

View File

@@ -0,0 +1,38 @@
__all__ = ["obsidian_embed"]
# https://help.obsidian.md/embeds
# Supported:
# ![[image-4.png|292x316]]
def parse_embed(inline, match, state):
filename = match.group("filename")
attrs = {}
if "|" in filename:
filename, size = filename.split("|", 1)
else:
size = None
attrs["filename"] = filename
if size:
if "x" in size:
width, height = size.split("x", 1)
if width:
attrs["width"] = int(width)
if height:
attrs["height"] = int(height)
else:
attrs["width"] = int(size)
state.append_token({"type": "embed", "attrs": attrs})
return match.end()
INLINE_EMBED_PATTERN = (
r'!\[\[' # begins with ![
r'(?!\s)' # not whitespace
r'(?P<filename>.+?)' # content between `![[xx]]`
r'(?!\s)' # not whitespace
r'\]\]' # closing ]
)
def obsidian_embed(md: "Markdown") -> None:
md.inline.register('embed', INLINE_EMBED_PATTERN, parse_embed, before="link")

View File

@@ -0,0 +1,89 @@
import dataclasses
import pathlib
import frontmatter
import mistune
from quiz.utils.obsidian_embed_plugin import obsidian_embed
markdown = mistune.create_markdown(renderer="ast", plugins=[obsidian_embed])
class Node:
def __init__(self, token):
self.type = token["type"]
self.raw = token.get("raw", "")
self.attrs = token.get("attrs", {})
self.children = [Node(token=child) for child in token.get("children", [])]
def __repr__(self) -> str:
attrs = []
if self.raw:
attrs.append(f"raw={self.raw!r}")
if self.attrs:
attrs.append(f"attrs={self.attrs!r}")
if self.children:
attrs.append(f"children={self.children!r}")
# block_text -> BlockText
pretty = self.type.replace("_", " ").title().replace(" ", "")
return f"{pretty}(" + ", ".join(attrs) + ")"
@property
def text(self) -> str:
if self.type == "text":
return self.raw
texts = []
for child in self.children:
texts.append(child.text)
return "".join(texts)
@dataclasses.dataclass
class ParsedQuestion:
metadata: dict = dataclasses.field(default_factory=dict)
raw_content: str = ""
nodes: list[Node] = dataclasses.field(default_factory=list)
def parse_question(path: pathlib.Path):
raw = path.read_text(encoding="utf-8")
return parse_question_from_content(raw)
def parse_question_from_content(content_str: str):
"""Parse question from a content string instead of a file."""
metadata, content = frontmatter.parse(content_str)
tokens = markdown(content)
question = ParsedQuestion(
metadata=metadata,
raw_content=content,
nodes=[Node(token=token) for token in tokens],
)
return question
def main():
root = pathlib.Path(__file__).parent.parent.parent.parent
print(root)
exams = root / "content" / "Anatomi & Histologi 2" / "Gamla tentor"
for file in sorted(exams.glob("*/*.md")):
if len(file.stem) > 2:
continue
question = parse_question(file)
print(question.metadata, repr(question.raw_content))
continue
for node in question.nodes:
match node.type:
case "heading":
print("Heading:", repr(node.text))
case "paragraph":
print("Paragraph:", repr(node.text))
case "list":
print("List:")
for child in node.children:
print(" - List item:", repr(child.text))
case "block_code" if node.attrs["info"] == "spoiler-block:":
print("Spoiler:", repr(node.raw.rstrip()))
if __name__ == "__main__":
main()

View File

View File

@@ -0,0 +1,261 @@
import pytest
import time
from pathlib import Path
from quiz.utils.importer import parse_markdown_question, import_question_file, ImportStats
from quiz.models import Question, Option
@pytest.mark.django_db
@pytest.mark.import_tests
class TestMarkdownParsing:
"""Test parsing of various Obsidian markdown question formats"""
def test_parse_single_choice_question(self):
"""Test parsing standard single choice question (SCQ)"""
content = """---
tags: [ah2, provfråga, frågetyp/scq, anatomi]
date: 2022-01-15
---
What is the correct answer?
**Välj ett alternativ:**
- A: Wrong answer
- B: Correct answer
- C: Another wrong
```spoiler-block:
B
```
"""
is_question, data = parse_markdown_question(Path("test.md"), content)
assert is_question is True
assert data['text'] == 'What is the correct answer?'
assert data['correct_answer'] == 'B'
assert data['has_answer'] is True
assert data['question_type'] == 'scq'
assert len(data['options']) == 3
assert data['options'][0] == ('A', 'Wrong answer')
assert data['options'][1] == ('B', 'Correct answer')
def test_parse_multiple_choice_question(self):
"""Test parsing multiple choice question (MCQ) with 'och' separator"""
content = """---
tags: [ah2, provfråga, frågetyp/mcq, cerebrum]
date: 2022-01-15
---
Vilka av följande räknas till storhjärnans basala kärnor?
**Välj två alternativ**
- A: Putamen
- B: Nucleus Ruber
- C: Substantia nigra
- D: Nucleus caudatus
```spoiler-block:
A och D
```
"""
is_question, data = parse_markdown_question(Path("test.md"), content)
assert is_question is True
assert 'Vilka av följande' in data['text']
assert data['correct_answer'] == 'A,D' # Normalized to comma-separated
assert data['has_answer'] is True
assert data['question_type'] == 'mcq'
assert len(data['options']) == 4
def test_parse_multiple_choice_comma_separated(self):
"""Test MCQ with comma-separated answer"""
content = """---
tags: [frågetyp/mcq]
---
Select two options:
- A: Option A
- B: Option B
- C: Option C
- D: Option D
```spoiler-block:
B, C
```
"""
is_question, data = parse_markdown_question(Path("test.md"), content)
assert data['correct_answer'] == 'B,C'
assert data['has_answer'] is True
def test_parse_matching_question(self):
"""Test parsing matching question (DND/Matching)"""
content = """---
tags: [ah2, provfråga, frågetyp/matching, anatomi, öra]
date: 2023-05-31
---
**Matcha rätt funktion med rätt lob:**
(1p för alla rätt, inga delpoäng)
- Smak
- Syn
- Somatosensorik
- Motorik
- Hörsel
**Alternativ:**
- Lobus frontalis
- Lobus Insularis
- Lobus temporalis
- Lobus parietalis
- Lobus occipitalis
```spoiler-block:
Smak: Lobus Insularis
Syn: Lobus occipitalis
Somatosensorik: Lobus parietalis
Motorik: Lobus frontalis
Hörsel: Lobus temporalis
```
"""
is_matching, data = parse_markdown_question(Path("test.md"), content)
assert is_matching is True
assert data['question_type'] == 'matching'
assert data['has_answer'] is True
assert len(data['left_items']) == 5
assert len(data['top_items']) == 5
assert len(data['correct_pairs']) == 5
def test_parse_textalternativ_question(self):
"""Test text alternative question type"""
content = """---
tags: [frågetyp/textalternativ, öga, anatomi]
---
Svara på följande frågor:
a) Bokstaven B sitter i en lob, vilken?
- Lobus temporalis
- Lobus frontalis
- Lobus parietalis
b) Vilket funktionellt centra återfinns där?
- Syncentrum
- Motorcentrum
- Somatosensoriskt centrum
```spoiler-block:
a) Lobus parietalis
b) Somatosensoriskt centrum
```
"""
is_question, data = parse_markdown_question(Path("test.md"), content)
assert is_question is True
assert data['question_type'] == 'textalternativ'
assert data['has_answer'] is True
assert 'Lobus parietalis' in data['correct_answer']
assert 'Somatosensoriskt centrum' in data['correct_answer']
def test_parse_textfalt_question(self):
"""Test text field (fill-in) question type"""
content = """---
tags: [frågetyp/textfält, öga]
---
**Fyll i rätt siffra!**
a) Vilken siffra pekar på gula fläcken?
b) Vilken siffra pekar på choroidea?
```spoiler-block:
a) 7
b) 6
```
"""
is_question, data = parse_markdown_question(Path("test.md"), content)
assert is_question is True
assert data['question_type'] == 'textfält'
assert data['has_answer'] is True
assert '7' in data['correct_answer']
assert '6' in data['correct_answer']
@pytest.mark.django_db
@pytest.mark.import_tests
class TestQuestionImport:
"""Test actual import of questions to database"""
def test_import_single_question(self, tmp_path):
"""Test importing a single question file"""
question_file = tmp_path / "question1.md"
question_file.write_text("""---
tags: [frågetyp/scq]
---
Test question?
- A: Correct
- B: Wrong
```spoiler-block:
A
```
""")
stats = ImportStats()
result = import_question_file(question_file, tmp_path, stats, force=True)
assert result in ['imported', 'updated']
assert stats.questions_with_answers == 1
# Verify in database
question = Question.objects.get(text='Test question?')
assert question.correct_answer == 'A'
assert question.options.count() == 2
def test_mtime_tracking(self, tmp_path):
"""Test that file modification time is tracked"""
question_file = tmp_path / "question4.md"
question_file.write_text("""---
tags: [frågetyp/scq]
---
What is the correct answer?
```spoiler-block:
A
```
""")
stats = ImportStats()
import_question_file(question_file, tmp_path, stats, force=True)
question = Question.objects.get(text='What is the correct answer?')
assert question.file_mtime == question_file.stat().st_mtime
def test_update_existing_question(self, tmp_path):
"""Test updating an existing question"""
question_file = tmp_path / "question5.md"
# Initial import
question_file.write_text("""---
tags: [frågetyp/scq]
---
Question to update?
```spoiler-block:
A
```
""")
import_question_file(question_file, tmp_path, ImportStats(), force=True)
# Update the file
time.sleep(0.1)
question_file.write_text("""---
tags: [frågetyp/scq]
---
Question to update?
```spoiler-block:
B
```
""")
stats = ImportStats()
result = import_question_file(question_file, tmp_path, stats, force=False)
assert result == 'updated'
assert Question.objects.get(text='Question to update?').correct_answer == 'B'

View File

@@ -0,0 +1,537 @@
"""
Comprehensive test suite for the question_parser module.
This test suite uses pytest's parametrize decorator to test multiple scenarios
with minimal code duplication. It covers:
1. Node class:
- Initialization with different token types
- Attribute handling
- Children node processing
- String representation (__repr__)
- Text extraction from nested structures
2. parse_question function:
- Metadata parsing (tags, dates, etc.)
- Raw content extraction
- Different question types (MCQ, SCQ, text field, matching)
- Questions with images
- Edge cases (empty content, missing frontmatter)
- Document structure preservation
3. ParsedQuestion dataclass:
- Default values
- Initialization with custom values
4. Real exam questions:
- Parsing actual exam questions from the content directory
- Validation of all short-named question files
Test execution:
pytest tests/test_question_parser.py -v # Verbose output
pytest tests/test_question_parser.py -k "mcq" # Run only MCQ tests
pytest tests/test_question_parser.py --collect-only # List all tests
"""
import pathlib
import tempfile
import pytest
from quiz.utils.question_parser import Node, ParsedQuestion, parse_question
@pytest.fixture
def temp_dir():
"""Create a temporary directory for test files"""
with tempfile.TemporaryDirectory() as tmpdir:
yield pathlib.Path(tmpdir)
@pytest.fixture
def create_question_file(temp_dir):
"""Factory fixture to create question files"""
def _create_file(filename: str, content: str) -> pathlib.Path:
file_path = temp_dir / filename
file_path.write_text(content, encoding="utf-8")
return file_path
return _create_file
class TestNode:
"""Test the Node class"""
@pytest.mark.parametrize("token,expected_type,expected_raw", [
({"type": "paragraph"}, "paragraph", ""),
({"type": "heading", "raw": "Test Heading"}, "heading", "Test Heading"),
({"type": "text", "raw": "Some text"}, "text", "Some text"),
({"type": "list"}, "list", ""),
])
def test_node_initialization(self, token, expected_type, expected_raw):
"""Test Node initialization with different token types"""
node = Node(token)
assert node.type == expected_type
assert node.raw == expected_raw
@pytest.mark.parametrize("token,expected_attrs", [
({"type": "block_code", "attrs": {"info": "spoiler-block:"}}, {"info": "spoiler-block:"}),
({"type": "paragraph"}, {}),
({"type": "heading", "attrs": {"level": 2}}, {"level": 2}),
])
def test_node_attributes(self, token, expected_attrs):
"""Test Node attributes handling"""
node = Node(token)
assert node.attrs == expected_attrs
def test_node_children(self):
"""Test Node children handling"""
token = {
"type": "paragraph",
"children": [
{"type": "text", "raw": "Hello "},
{"type": "text", "raw": "World"},
]
}
node = Node(token)
assert len(node.children) == 2
assert node.children[0].type == "text"
assert node.children[0].raw == "Hello "
assert node.children[1].type == "text"
assert node.children[1].raw == "World"
@pytest.mark.parametrize("token,expected_repr_contains", [
({"type": "text", "raw": "test"}, "Text(raw='test')"),
({"type": "paragraph"}, "Paragraph()"),
({"type": "block_code", "attrs": {"info": "python"}}, "BlockCode(attrs={'info': 'python'})"),
])
def test_node_repr(self, token, expected_repr_contains):
"""Test Node __repr__ method"""
node = Node(token)
assert repr(node) == expected_repr_contains
@pytest.mark.parametrize("token,expected_text", [
({"type": "text", "raw": "Simple text"}, "Simple text"),
(
{
"type": "paragraph",
"children": [
{"type": "text", "raw": "Hello "},
{"type": "text", "raw": "World"},
]
},
"Hello World"
),
(
{
"type": "paragraph",
"children": [
{"type": "text", "raw": "Nested "},
{
"type": "strong",
"children": [{"type": "text", "raw": "bold"}]
},
{"type": "text", "raw": " text"},
]
},
"Nested bold text"
),
])
def test_node_text_property(self, token, expected_text):
"""Test Node text property extraction"""
node = Node(token)
assert node.text == expected_text
class TestParseQuestion:
"""Test the parse_question function"""
@pytest.mark.parametrize("content,expected_tags", [
(
"""---
tags: [ah2, provfråga, frågetyp/mcq]
date: 2022-01-15
---
Question content""",
["ah2", "provfråga", "frågetyp/mcq"]
),
(
"""---
tags:
- ah2
- provfråga
- frågetyp/scq
date: 2023-05-31
---
Question content""",
["ah2", "provfråga", "frågetyp/scq"]
),
])
def test_parse_metadata_tags(self, create_question_file, content, expected_tags):
"""Test parsing of metadata tags in different formats"""
file_path = create_question_file("test.md", content)
question = parse_question(file_path)
assert question.metadata["tags"] == expected_tags
@pytest.mark.parametrize("content,expected_date", [
(
"""---
tags: [ah2]
date: 2022-01-15
---
Content""",
"2022-01-15"
),
(
"""---
tags: [ah2]
date: 2023-05-31
---
Content""",
"2023-05-31"
),
])
def test_parse_metadata_date(self, create_question_file, content, expected_date):
"""Test parsing of metadata date"""
file_path = create_question_file("test.md", content)
question = parse_question(file_path)
assert str(question.metadata["date"]) == expected_date
@pytest.mark.parametrize("content,expected_raw", [
(
"""---
tags: [ah2]
---
Simple question""",
"Simple question"
),
(
"""---
tags: [ah2]
---
Question with **bold** text""",
"Question with **bold** text"
),
])
def test_parse_raw_content(self, create_question_file, content, expected_raw):
"""Test parsing of raw content"""
file_path = create_question_file("test.md", content)
question = parse_question(file_path)
assert question.raw_content.strip() == expected_raw
def test_parse_mcq_question(self, create_question_file):
"""Test parsing a complete MCQ question"""
content = """---
tags: [ah2, provfråga, frågetyp/mcq, cerebrum]
date: 2022-01-15
---
Vilka av följande räknas till storhjärnans basala kärnor?
**Välj två alternativ**
- A: Putamen
- B: Nucleus Ruber
- C: Substantia nigra
- D: Nucleus caudatus
```spoiler-block:
A och D
```
"""
file_path = create_question_file("mcq.md", content)
question = parse_question(file_path)
assert question.metadata["tags"] == ["ah2", "provfråga", "frågetyp/mcq", "cerebrum"]
assert len(question.nodes) > 0
# Find paragraph nodes
paragraphs = [n for n in question.nodes if n.type == "paragraph"]
assert len(paragraphs) > 0
# Find list nodes
lists = [n for n in question.nodes if n.type == "list"]
assert len(lists) > 0
# Find spoiler block
code_blocks = [n for n in question.nodes if n.type == "block_code"]
assert len(code_blocks) > 0
spoiler = code_blocks[0]
assert spoiler.attrs.get("info") == "spoiler-block:"
assert "A och D" in spoiler.raw
def test_parse_scq_question(self, create_question_file):
"""Test parsing a single choice question"""
content = """---
tags: [ah2, provfråga, frågetyp/scq, histologi]
date: 2022-06-01
---
Vilken del av CNS syns i bild?
- A: Cerebellum
- B: Diencephalon
- C: Medulla spinalis
- D: Cerebrum
- E: Pons
```spoiler-block:
A
```
"""
file_path = create_question_file("scq.md", content)
question = parse_question(file_path)
assert "frågetyp/scq" in question.metadata["tags"]
lists = [n for n in question.nodes if n.type == "list"]
assert len(lists) > 0
def test_parse_text_field_question(self, create_question_file):
"""Test parsing a text field question"""
content = """---
tags: [ah2, provfråga, frågetyp/textfält, öga, anatomi]
date: 2022-01-15
---
![[image-2.png|301x248]]
**Fyll i rätt siffra!**
(0.5p per rätt svar, inga avdrag för fel svar):
a) Vilken siffra pekar på gula fläcken?
b) Vilken siffra pekar på choroidea?
```spoiler-block:
a) 7
b) 6
```
"""
file_path = create_question_file("textfield.md", content)
question = parse_question(file_path)
assert "frågetyp/textfält" in question.metadata["tags"]
assert len(question.nodes) > 0
def test_parse_matching_question(self, create_question_file):
"""Test parsing a matching question"""
content = """---
tags: [ah2, provfråga, frågetyp/matching, histologi]
date: 2023-05-31
---
Vilka av följande stödjeceller finns i CNS? Markera JA eller NEJ för varje angiven celltyp:
(1p för alla rätt, inga delpoäng)
- a) oligodendrocyter
- b) Astrocyter
- c) satellitceller
- d) ependymceller
- e) mikroglia
- f) Schwannceller
- JA, finn i CNS
- NEJ, finns inte i CNS
```spoiler-block:
a) JA, finn i CNS
b) JA, finn i CNS
c) NEJ, finns inte i CNS
d) JA, finn i CNS
e) JA, finn i CNS
f) NEJ, finns inte i CNS
```
"""
file_path = create_question_file("matching.md", content)
question = parse_question(file_path)
assert "frågetyp/matching" in question.metadata["tags"]
lists = [n for n in question.nodes if n.type == "list"]
assert len(lists) > 0
def test_parse_question_with_image(self, create_question_file):
"""Test parsing a question with embedded images"""
content = """---
tags: [ah2, provfråga, frågetyp/textfält, öra, anatomi, bild]
date: 2022-01-15
---
![[image-4.png|292x316]]
**Fyll i rätt siffra !**
(0.5p per rätt svar, inga avdrag för fel svar):
a) Vilken siffra pekar på incus? (1..19)
b) Vilken siffra pekar på tuba auditiva? (1..19)
```spoiler-block:
a) 7
b) 18
```
"""
file_path = create_question_file("image_q.md", content)
question = parse_question(file_path)
assert "bild" in question.metadata["tags"]
assert "![[image-4.png" in question.raw_content
embed = question.nodes[0].children[0]
assert embed.type == "embed"
assert embed.attrs == {
"filename": "image-4.png",
"width": 292,
"height": 316
}
@pytest.mark.parametrize("invalid_content", [
"", # Empty content
"No frontmatter", # No frontmatter
"---\n---\n", # Empty frontmatter
])
def test_parse_edge_cases(self, create_question_file, invalid_content):
"""Test parsing edge cases"""
file_path = create_question_file("edge.md", invalid_content)
question = parse_question(file_path)
assert isinstance(question, ParsedQuestion)
def test_parse_question_preserves_structure(self, create_question_file):
"""Test that parsing preserves the document structure"""
content = """---
tags: [ah2]
---
# Heading
Paragraph text
- List item 1
- List item 2
```spoiler-block:
Answer
```
"""
file_path = create_question_file("structure.md", content)
question = parse_question(file_path)
node_types = [n.type for n in question.nodes]
assert "heading" in node_types
assert "paragraph" in node_types
assert "list" in node_types
assert "block_code" in node_types
class TestParsedQuestionDataclass:
"""Test the ParsedQuestion dataclass"""
def test_parsed_question_defaults(self):
"""Test ParsedQuestion default values"""
question = ParsedQuestion()
assert question.metadata == {}
assert question.raw_content == ""
assert question.nodes == []
def test_parsed_question_initialization(self):
"""Test ParsedQuestion initialization with values"""
metadata = {"tags": ["test"], "date": "2022-01-15"}
content = "Test content"
nodes = [Node({"type": "paragraph"})]
question = ParsedQuestion(
metadata=metadata,
raw_content=content,
nodes=nodes
)
assert question.metadata == metadata
assert question.raw_content == content
assert question.nodes == nodes
class TestRealQuestions:
"""Test parsing real questions from the exam files"""
@pytest.fixture
def exam_dir(self):
"""Get the real exam directory"""
root = pathlib.Path(__file__).parent.parent.parent
exam_path = root / "content" / "Anatomi & Histologi 2" / "Gamla tentor"
if exam_path.exists():
return exam_path
pytest.skip("Exam directory not found")
@pytest.mark.parametrize("exam_date,question_num", [
("2022-01-15", "1"),
("2022-01-15", "2"),
("2022-01-15", "3"),
("2022-01-15", "4"),
("2022-06-01", "8"),
])
def test_parse_real_exam_questions(self, exam_dir, exam_date, question_num):
"""Test parsing real exam questions"""
file_path = exam_dir / exam_date / f"{question_num}.md"
if not file_path.exists():
pytest.skip(f"Question file {file_path} not found")
question = parse_question(file_path)
# Verify metadata exists and has required fields
assert "tags" in question.metadata
assert isinstance(question.metadata["tags"], list)
assert "ah2" in question.metadata["tags"]
assert "provfråga" in question.metadata["tags"]
# Verify content was parsed
assert len(question.raw_content) > 0
assert len(question.nodes) > 0
def test_parse_all_short_named_questions(self, exam_dir):
"""Test parsing all questions with short filenames (1-2 chars)"""
questions_found = 0
for file in sorted(exam_dir.glob("*/*.md")):
if len(file.stem) <= 2 and file.stem.isdigit():
question = parse_question(file)
assert isinstance(question, ParsedQuestion)
assert "tags" in question.metadata
questions_found += 1
# Ensure we found at least some questions
assert questions_found > 0, "No exam questions found to test"
class TestNodeTextExtraction:
"""Test text extraction from complex node structures"""
@pytest.mark.parametrize("token,expected_text", [
# Simple text
({"type": "text", "raw": "Hello"}, "Hello"),
# Paragraph with multiple text children
(
{
"type": "paragraph",
"children": [
{"type": "text", "raw": "A "},
{"type": "text", "raw": "B "},
{"type": "text", "raw": "C"},
]
},
"A B C"
),
# Nested formatting
(
{
"type": "paragraph",
"children": [
{"type": "text", "raw": "Normal "},
{
"type": "emphasis",
"children": [{"type": "text", "raw": "italic"}]
},
{"type": "text", "raw": " "},
{
"type": "strong",
"children": [{"type": "text", "raw": "bold"}]
},
]
},
"Normal italic bold"
),
# Empty node
({"type": "paragraph", "children": []}, ""),
])
def test_complex_text_extraction(self, token, expected_text):
"""Test text extraction from complex nested structures"""
node = Node(token)
assert node.text == expected_text

View File

@@ -0,0 +1,187 @@
import datetime
from quiz.utils.unified_parser import UnifiedParser, QuestionType
def test_parse_mcq_question():
content = """---
tags: [frågetyp/mcq, ah2]
date: 2024-03-21
---
Question?
- A: Yes
- B: No
- C: Maybe
- D: Never
```spoiler-block:
A och D
```"""
data = UnifiedParser(content).parse()
assert data.type == QuestionType.MCQ
assert data.question == "Question?"
assert data.answer == ["A", "D"]
assert data.num_questions == 1
assert data.is_complete is True
assert data.options == ["A: Yes", "B: No", "C: Maybe", "D: Never"]
assert data.metadata == {"tags": ["frågetyp/mcq", "ah2"], "date": datetime.date(2024, 3, 21)}
assert not data.sub_questions
def test_parse_scq_question():
content = """---
tags: [frågetyp/scq]
---
Pick one:
- A: One
- B: Two
```spoiler-block:
B
```"""
data = UnifiedParser(content).parse()
assert data.type == QuestionType.SCQ
assert data.question == "Pick one:"
assert data.answer == "B"
assert data.num_questions == 1
assert data.is_complete is True
assert data.options == ["A: One", "B: Two"]
assert not data.sub_questions
def test_parse_textfält_question():
content = """---
tags: [frågetyp/textfält]
---
Name these:
a) Part 1
b) Part 2
```spoiler-block:
a) Left
b) Right
```"""
data = UnifiedParser(content).parse()
assert data.type == QuestionType.TEXTFÄLT
assert data.question == "Name these:"
assert data.answer == ["a) Left", "b) Right"]
assert data.num_questions == 2
assert len(data.sub_questions) == 2
assert data.sub_questions[0].id == "a"
assert data.sub_questions[0].text == "Part 1"
assert data.sub_questions[0].answer == "a) Left"
assert data.sub_questions[0].options is None
def test_parse_matching_question():
content = """---
tags: [frågetyp/matching]
---
Match:
- 1
- 2
- A
- B
```spoiler-block:
1: A
2: B
```"""
data = UnifiedParser(content).parse()
assert data.type == QuestionType.MATCHING
assert data.question == "Match:"
assert data.answer == [["1", "A"], ["2", "B"]]
assert data.num_questions == 1
assert data.options == ["1", "2", "A", "B"]
assert not data.sub_questions
def test_parse_question_with_image_and_instruction():
content = """---
tags: [frågetyp/scq]
---
**Välj ett alternativ:**
![[brain.png|300]]
What is this?
- A: Brain
- B: Heart
```spoiler-block:
A
```"""
data = UnifiedParser(content).parse()
assert data.type == QuestionType.SCQ
assert data.question == "What is this?"
assert data.instruction == "Välj ett alternativ:"
assert data.image == "![[brain.png]]"
assert data.is_complete is True
def test_parse_field_question_with_ranges():
content = """---
tags: [frågetyp/sifferfält]
---
Identify the structures:
a) Arachnoidea? (1..10)
(0.5 p)
b) Cortex cerebri (1..10)
(0.5 p)
```spoiler-block:
a) 7
b) 3
```"""
data = UnifiedParser(content).parse()
assert data.type == QuestionType.SIFFERFÄLT
assert data.num_questions == 2
assert len(data.sub_questions) == 2
# Part A
assert data.sub_questions[0].id == "a"
assert data.sub_questions[0].text == "Arachnoidea?"
assert data.sub_questions[0].options == [str(x) for x in range(1, 11)]
assert data.sub_questions[0].answer == "a) 7"
# Part B
assert data.sub_questions[1].id == "b"
assert data.sub_questions[1].text == "Cortex cerebri"
assert data.sub_questions[1].options == [str(x) for x in range(1, 11)]
assert data.sub_questions[1].answer == "b) 3"
def test_parse_field_question_with_list_options():
content = """---
tags: [frågetyp/sifferfält]
---
a) First (A, B, C)
b) Second (1, 2, 3)
```spoiler-block:
a) A
b) 2
```"""
data = UnifiedParser(content).parse()
assert data.sub_questions[0].options == ["A", "B", "C"]
assert data.sub_questions[1].options == ["1", "2", "3"]
def test_parse_hotspot_question():
content = """---
tags: [frågetyp/hotspot]
---
Klicka på hippocampus!
```spoiler-block:
![[brain_atlas.png]]
Det här är hippocampus.
```"""
data = UnifiedParser(content).parse()
assert data.type == QuestionType.HOTSPOT
assert data.answer == "Det här är hippocampus."
assert data.answer_image == "![[brain_atlas.png]]"
assert data.is_complete is True
def test_completeness_missing_sub_questions():
content = """---
tags: [frågetyp/textfält]
---
a) one
b) two
```spoiler-block:
a) found
```"""
data = UnifiedParser(content).parse()
assert data.num_questions == 2
assert data.is_complete is False
assert len(data.sub_questions) == 2
assert data.sub_questions[0].answer == "a) found"
assert data.sub_questions[1].answer is None

View File

@@ -0,0 +1,465 @@
import re
from dataclasses import dataclass, field
from enum import Enum
from typing import Any
from quiz.utils.question_parser import Node, parse_question_from_content
# === REGEX PATTERNS ===
# Matches Obsidian-style embeds like ![[image.png]] or ![[image.png|300]]
EMBED_RE = re.compile(
r"!\[\[" # Start of embed
r".*?" # Content (filename and optional pipes)
r"\]\]" # End of embed
)
# Captures the filename from an Obsidian embed, ignoring dimensions
IMAGE_RE = re.compile(
r"!\[\[" # Start of embed
r"([^|\]]+)" # Group 1: Filename (everything before | or ])
r"(?:\|.*?)?" # Optional dimension part starting with |
r"\]\]" # End of embed
)
# Matches lettered options at the start of a line, e.g., "A: Text" or "B. Text"
OPTION_LETTER_RE = re.compile(
r"^([A-Z])" # Group 1: Single uppercase letter at start
r"[:\.]?" # Optional colon or period
r"\s*" # Optional whitespace
r"(.*)$" # Group 2: The rest of the text
)
# Matches standalone uppercase letters used for answers, e.g., "A", "A och B"
ANSWER_LETTER_RE = re.compile(
r"\b" # Word boundary
r"([A-Z])" # Group 1: Single uppercase letter
r"\b" # Word boundary
)
# Matches sub-question markers like a), b) at the start of a line
SUB_QUESTION_LETTER_RE = re.compile(
r"^\s*" # Start of line and optional whitespace
r"([a-z])" # Group 1: Single lowercase letter
r"\)" # Closing parenthesis
, re.MULTILINE)
# Matches numbered sub-question markers like 1), 2) at the start of a line
SUB_QUESTION_NUMBER_RE = re.compile(
r"^\s*" # Start of line and optional whitespace
r"(\d+)" # Group 1: One or more digits
r"\)" # Closing parenthesis
, re.MULTILINE)
# Matches select range patterns like (1..10)
SELECT_RANGE_RE = re.compile(
r"\(" # Opening parenthesis
r"(\d+)" # Group 1: Start number
r"\.\." # Range dots
r"(\d+)" # Group 2: End number
r"\)" # Closing parenthesis
)
# Matches letter range patterns like (A..H)
SELECT_LETTER_RANGE_RE = re.compile(
r"\(" # Opening parenthesis
r"([A-Z])" # Group 1: Start letter
r"\.\." # Range dots
r"([A-Z])" # Group 2: End letter
r"\)" # Closing parenthesis
)
# Matches select list patterns like (A, B, C)
SELECT_LIST_RE = re.compile(
r"\(" # Opening parenthesis
r"(" # Group 1: The list content
r"[^)]+" # Anything but closing parenthesis
r"," # At least one comma
r"[^)]+" # Anything but closing parenthesis
r")"
r"\)" # Closing parenthesis
)
# Matches sub-question markers in mid-text (used for splitting intro text)
FIELD_MARKER_RE = re.compile(
r"\b" # Word boundary
r"([a-z]|\d+)" # Group 1: Letter or digit
r"\)" # Closing parenthesis
)
# Matches sub-question markers (a, b or 1, 2) at start of line for splitting
SUB_QUESTION_SPLIT_RE = re.compile(
r"^\s*" # Start of line and optional whitespace
r"([a-z]|\d+)" # Group 1: Single letter or one or more digits
r"\)" # Closing parenthesis
r"\s*" # Optional trailing whitespace
, re.MULTILINE)
# Matches point markers like (0.5 p) or (1 p)
POINTS_RE = re.compile(
r"\(" # Opening parenthesis
r"\d+" # One or more digits
r"(?:\.\d+)?" # Optional decimal part
r"\s*" # Optional whitespace
r"p" # Literal 'p'
r"\)" # Closing parenthesis
)
class QuestionType(Enum):
MCQ = "mcq"
SCQ = "scq"
MATCHING = "matching"
TEXTALTERNATIV = "textalternativ"
TEXTFÄLT = "textfält"
SIFFERFÄLT = "sifferfält"
HOTSPOT = "hotspot"
SAMMANSATT = "sammansatt"
DND_TEXT = "dnd-text"
DND_BILD = "dnd-bild"
SANT_FALSKT = "sant-falskt"
@dataclass
class SubQuestion:
id: str # 'a', 'b', etc.
text: str # Text for this part
answer: Any = None
options: list[str] | None = None # None if text input
@dataclass
class QuestionData:
type: QuestionType
question: str
answer: Any # str | list[str] | list[list[str]]
num_questions: int = 1 # Total sub-questions (a, b, c...)
is_complete: bool = False
options: list[str] = field(default_factory=list)
image: str | None = None
answer_image: str | None = None
instruction: str | None = None
metadata: dict = field(default_factory=dict)
sub_questions: list[SubQuestion] = field(default_factory=list)
class UnifiedParser:
def __init__(self, content: str):
self.content = content
self.parsed = parse_question_from_content(content)
self.metadata = self.parsed.metadata
self.nodes = self.parsed.nodes
# Pre-extract common fields
self.type = self._extract_type()
self.question = self._extract_question_text()
self.instruction = self._extract_instruction()
self.image = self._extract_image()
self.num_questions = self._count_sub_questions()
def parse(self) -> QuestionData:
match self.type:
case QuestionType.MCQ | QuestionType.SCQ:
data = self._parse_choice_question()
case QuestionType.MATCHING:
data = self._create_question(
answer=self._extract_answer_pairs(),
options=self._extract_bullet_list_options()
)
case QuestionType.TEXTALTERNATIV:
data = self._create_question(
answer=self._extract_raw_answer(),
options=self._extract_bullet_list_options()
)
case QuestionType.TEXTFÄLT:
data = self._parse_text_field()
case QuestionType.SIFFERFÄLT:
data = self._create_question(answer=self._extract_raw_answer())
case QuestionType.HOTSPOT:
data = self._parse_hotspot()
case QuestionType.SAMMANSATT:
data = self._create_question(answer=self._extract_answer_lines())
case QuestionType.DND_TEXT:
data = self._create_question(answer=self._extract_answer_lines())
case QuestionType.DND_BILD:
data = self._create_question(answer=self._extract_answer_lines())
case QuestionType.SANT_FALSKT:
data = self._create_question(answer=self._extract_answer_pairs())
case _:
raise ValueError(f"Unsupported question type: {self.type}")
data.num_questions = self.num_questions
data.sub_questions = self._extract_sub_questions(data)
data.is_complete = self._check_completeness(data)
return data
def _check_completeness(self, data: QuestionData) -> bool:
"""Verify if the answer is complete (no TODOs, matches sub-question count)."""
content = self._extract_raw_answer()
if not content or "TODO" in content:
return False
# If we have sub-questions, ensure we have enough answer lines/parts
if data.num_questions > 1:
if isinstance(data.answer, list):
if data.type in [QuestionType.MCQ, QuestionType.SCQ]:
return len(data.answer) > 0
return len(data.answer) >= data.num_questions
else:
return False
return True
def _count_sub_questions(self) -> int:
"""Count sub-questions like a), b), c) or 1), 2) in the question text."""
md_content = self.parsed.raw_content
# Count lettered sub-questions: a), b), c)...
letters = SUB_QUESTION_LETTER_RE.findall(md_content)
if letters:
unique_letters = sorted(list(set(letters)))
if "a" in unique_letters:
max_letter = max(unique_letters)
return ord(max_letter) - ord("a") + 1
# Count numbered sub-questions: 1), 2), 3)...
numbers = SUB_QUESTION_NUMBER_RE.findall(md_content)
if numbers:
unique_numbers = sorted(list(set(map(int, numbers))))
if 1 in unique_numbers:
return max(unique_numbers)
return 1
def _create_question(
self,
answer: Any,
options: list[str] = None,
answer_image: str | None = None
) -> QuestionData:
"""Create a QuestionData object with common fields pre-populated."""
return QuestionData(
type=self.type,
question=self.question,
answer=answer,
options=options or [],
image=self.image,
answer_image=answer_image,
instruction=self.instruction,
metadata=self.metadata
)
# === Extraction Helpers ===
def _extract_type(self) -> QuestionType:
tags = self.metadata.get("tags", [])
for tag in tags:
if tag.startswith("frågetyp/"):
type_str = tag.split("/", 1)[1]
try:
return QuestionType(type_str)
except ValueError:
continue
return QuestionType.MCQ # Default
def _extract_question_text(self) -> str:
texts = []
for node in self.nodes:
if node.type == "paragraph":
text = node.text.strip()
# Skip instructions
if text.startswith("Välj") and "alternativ" in text:
continue
# If paragraph contains a sub-question marker, stop there
# We use a more liberal search here because mistune might have joined lines
first_marker = FIELD_MARKER_RE.search(text)
if first_marker:
text = text[:first_marker.start()].strip()
if text:
# Only add if it doesn't look like an instruction we already skipped
if not (text.startswith("Välj") and "alternativ" in text):
texts.append(text)
break # Stop collecting intro text once we hit a sub-question
# Clean and collect
text = EMBED_RE.sub("", text).strip()
text = text.replace("**", "")
if text:
texts.append(text)
return "\n".join(texts)
def _extract_instruction(self) -> str | None:
for node in self.nodes:
if node.type == "paragraph":
text = node.text.strip()
if "Välj" in text and "alternativ" in text:
return text.replace("**", "")
return None
def _extract_image(self) -> str | None:
for node in self.nodes:
# Check for direct embed nodes
if node.type == "embed":
return f"![[{node.attrs['filename']}]]"
# Check inside paragraphs/lists for inline embeds
if node.type in ["paragraph", "list"]:
for child in node.children:
if child.type == "embed":
return f"![[{child.attrs['filename']}]]"
if node.raw:
match = IMAGE_RE.search(node.raw)
if match:
return f"![[{match.group(1)}]]"
return None
def _extract_sub_questions(self, data: QuestionData) -> list[SubQuestion]:
# Only split the text BEFORE the spoiler block to avoid misidentifying markers in answers
full_raw = self.parsed.raw_content
parts = full_raw.split("```", 1)
question_portion = parts[0]
# Split by sub-question markers at the start of lines: a), b) or 1), 2)
segments = SUB_QUESTION_SPLIT_RE.split(question_portion)[1:]
sub_questions = []
# segments will be [id1, text1, id2, text2, ...]
for i in range(0, len(segments), 2):
q_id = segments[i]
q_full_text = segments[i+1].strip()
# Extract options if any (for select fields)
options = self._extract_select_options(q_full_text)
# Clean text (remove point markers like (0.5 p) and select patterns)
clean_text = SELECT_RANGE_RE.sub("", q_full_text)
clean_text = SELECT_LETTER_RANGE_RE.sub("", clean_text)
clean_text = SELECT_LIST_RE.sub("", clean_text)
clean_text = POINTS_RE.sub("", clean_text).strip()
# Extract answer for this part
answer = None
if isinstance(data.answer, list) and i//2 < len(data.answer):
answer = data.answer[i//2]
elif isinstance(data.answer, str):
lines = [l.strip() for l in data.answer.split("\n") if l.strip()]
if i//2 < len(lines):
answer = lines[i//2]
elif data.num_questions == 1:
answer = data.answer
sub_questions.append(SubQuestion(
id=q_id,
text=clean_text,
answer=answer,
options=options
))
return sub_questions
def _extract_select_options(self, text: str) -> list[str] | None:
"""Extract options from patterns like (1..10), (A..D), or (A, B, C)."""
# Numerical range (1..10)
match = SELECT_RANGE_RE.search(text)
if match:
start, end = map(int, match.groups())
return [str(x) for x in range(start, end + 1)]
# Letter range (A..H)
match = SELECT_LETTER_RANGE_RE.search(text)
if match:
start, end = match.groups()
return [chr(x) for x in range(ord(start), ord(end) + 1)]
# Comma-separated list (A, B, C)
match = SELECT_LIST_RE.search(text)
if match:
items = match.group(1).split(",")
return [item.strip() for item in items]
return None
def _extract_lettered_options(self) -> list[str]:
options = []
for node in self.nodes:
if node.type == "list":
for item in node.children:
item_text = item.text.strip()
if OPTION_LETTER_RE.match(item_text):
options.append(item_text)
return options
def _extract_bullet_list_options(self) -> list[str]:
options = []
for node in self.nodes:
if node.type == "list":
for item in node.children:
options.append(item.text.strip())
return options
def _extract_raw_answer(self) -> str:
for node in self.nodes:
if node.type == "block_code" and node.attrs.get("info") == "spoiler-block:":
return node.raw.strip()
return ""
def _extract_answer_letters(self) -> list[str]:
content = self._extract_raw_answer()
if not content or content == "TODO":
return []
return ANSWER_LETTER_RE.findall(content)
def _extract_answer_lines(self) -> list[str]:
content = self._extract_raw_answer()
if not content or content == "TODO":
return []
return [line.strip() for line in content.split("\n") if line.strip()]
def _extract_answer_pairs(self) -> list[list[str]]:
lines = self._extract_answer_lines()
pairs = []
for line in lines:
if ":" in line:
key, value = line.split(":", 1)
pairs.append([key.strip(), value.strip()])
return pairs
# === Question Type Handlers ===
def _parse_choice_question(self) -> QuestionData:
answer_letters = self._extract_answer_letters()
if self.type == QuestionType.MCQ:
answer = answer_letters
else:
answer = answer_letters[0] if answer_letters else ""
return self._create_question(
answer=answer,
options=self._extract_lettered_options()
)
def _parse_text_field(self) -> QuestionData:
lines = self._extract_answer_lines()
return self._create_question(
answer=lines if len(lines) > 1 else (lines[0] if lines else "")
)
def _parse_hotspot(self) -> QuestionData:
content = self._extract_raw_answer()
answer_image = None
match = IMAGE_RE.search(content)
if match:
answer_image = f"![[{match.group(1)}]]"
answer_text = EMBED_RE.sub("", content).strip()
else:
answer_text = content
return self._create_question(
answer=answer_text,
answer_image=answer_image
)

View File

@@ -0,0 +1,150 @@
import time
import threading
from pathlib import Path
from watchdog.observers import Observer
from watchdog.events import FileSystemEventHandler, FileSystemEvent
from django.conf import settings
from quiz.utils.importer import import_question_file, delete_question_by_path, ImportStats
class QuestionFileHandler(FileSystemEventHandler):
"""Handle file system events for question markdown files with mtime checking"""
def __init__(self, base_path: Path, watch_path: Path):
super().__init__()
self.base_path = base_path
self.watch_path = watch_path
self.pending_events = {}
self.debounce_seconds = 2
self.lock = threading.Lock()
def _debounced_import(self, file_path: Path, event_type: str):
"""Import file after debounce delay, checking mtime for actual changes"""
time.sleep(self.debounce_seconds)
with self.lock:
if file_path in self.pending_events:
del self.pending_events[file_path]
if not file_path.exists():
return
# Import with mtime checking (force=False means only import if changed)
stats = ImportStats()
result = import_question_file(file_path, self.watch_path, stats, force=False)
# Provide feedback based on result
if result == 'imported':
print(f"\n[Auto-import] ✓ Created: {file_path.name}")
elif result == 'updated':
print(f"\n[Auto-import] ✓ Updated: {file_path.name}")
elif result == 'skipped_unchanged':
# File hasn't actually changed (same mtime), no output
pass
elif result == 'skipped_todo':
print(f"\n[Auto-import] ⊘ Skipped: {file_path.name} (TODO answer)")
elif result == 'skipped_not_mcq':
# Silently skip non-MCQ files
pass
elif result == 'error':
print(f"\n[Auto-import] ✗ Error: {file_path.name}")
def _handle_file_change(self, file_path: Path, event_type: str = 'modified'):
"""Handle file creation or modification with debouncing"""
if not file_path.suffix == '.md':
return
with self.lock:
# Cancel pending import if exists
if file_path in self.pending_events:
self.pending_events[file_path].cancel()
# Schedule new import
timer = threading.Timer(
self.debounce_seconds,
self._debounced_import,
args=[file_path, event_type]
)
self.pending_events[file_path] = timer
timer.start()
def on_created(self, event: FileSystemEvent):
"""Handle file creation"""
if not event.is_directory:
self._handle_file_change(Path(event.src_path), 'created')
def on_modified(self, event: FileSystemEvent):
"""Handle file modification"""
if not event.is_directory:
self._handle_file_change(Path(event.src_path), 'modified')
def on_deleted(self, event: FileSystemEvent):
"""Handle file deletion"""
if not event.is_directory and event.src_path.endswith('.md'):
file_path = Path(event.src_path)
delete_question_by_path(file_path)
class QuestionWatcher:
"""Watch for changes in question markdown files and auto-import"""
def __init__(self, watch_path: Path, base_path: Path = None):
self.watch_path = watch_path
self.base_path = base_path or watch_path
self.observer = None
self.running = False
def start(self):
"""Start watching for file changes"""
if self.running:
return
self.observer = Observer()
event_handler = QuestionFileHandler(self.base_path, self.watch_path)
self.observer.schedule(event_handler, str(self.watch_path), recursive=True)
self.observer.start()
self.running = True
print(f"[QuestionWatcher] Started watching: {self.watch_path}")
def stop(self):
"""Stop watching for file changes"""
if self.observer and self.running:
self.observer.stop()
self.observer.join()
self.running = False
print("[QuestionWatcher] Stopped")
def start_watcher_thread():
"""Start the question watcher in a background thread"""
from quiz.utils.importer import import_questions
def run_watcher():
# Get watch path from settings
watch_path_str = getattr(settings, 'QUESTION_WATCH_PATH', 'content/Anatomi & Histologi 2/Gamla tentor')
watch_path = settings.BASE_DIR.parent / watch_path_str
if not watch_path.exists():
print(f"[QuestionWatcher] Warning: Watch path does not exist: {watch_path}")
return
# Initial import with mtime checking (force=False to only import changed files)
print("\n[QuestionWatcher] Checking for changes...")
stats = import_questions(watch_path, watch_path, force=False)
# Only show stats if there were changes
output = stats.format_output(show_if_no_changes=False)
if output:
print(output)
else:
print(f"[QuestionWatcher] ✓ All files up to date")
# Start watching for changes
watcher = QuestionWatcher(watch_path, watch_path)
watcher.start()
# Start in daemon thread so it doesn't block shutdown
thread = threading.Thread(target=run_watcher, name="QuestionWatcher", daemon=True)
thread.start()
print("[QuestionWatcher] Background thread started")