vault backup: 2025-12-26 02:09:22
All checks were successful
Deploy Quartz site to GitHub Pages / build (push) Successful in 2m29s
All checks were successful
Deploy Quartz site to GitHub Pages / build (push) Successful in 2m29s
This commit is contained in:
0
stroma/quiz/utils/__init__.py
Normal file
0
stroma/quiz/utils/__init__.py
Normal file
527
stroma/quiz/utils/importer.py
Normal file
527
stroma/quiz/utils/importer.py
Normal file
@@ -0,0 +1,527 @@
|
||||
import re
|
||||
from collections import defaultdict
|
||||
from datetime import datetime
|
||||
from pathlib import Path
|
||||
from typing import Tuple
|
||||
import django.db.utils
|
||||
|
||||
from django.conf import settings
|
||||
|
||||
from quiz.models import Course, Exam, Question, Option
|
||||
from quiz.utils.question_parser import parse_question_from_content, Node
|
||||
|
||||
|
||||
class ImportStats:
|
||||
"""Track import statistics by exam folder"""
|
||||
|
||||
def __init__(self):
|
||||
self.total_files = 0
|
||||
self.mcq_questions = 0
|
||||
self.non_mcq_skipped = 0
|
||||
self.questions_with_answers = 0
|
||||
self.questions_with_todo = 0
|
||||
self.created = 0
|
||||
self.updated = 0
|
||||
self.errors = 0
|
||||
self.by_folder = defaultdict(lambda: {
|
||||
'total': 0,
|
||||
'mcq': 0,
|
||||
'answered': 0,
|
||||
'todo': 0
|
||||
})
|
||||
|
||||
def has_changes(self) -> bool:
|
||||
"""Check if there were any actual changes"""
|
||||
return self.created > 0 or self.updated > 0 or self.errors > 0
|
||||
|
||||
def format_output(self, show_if_no_changes: bool = True) -> str:
|
||||
"""
|
||||
Format statistics for console output
|
||||
|
||||
Args:
|
||||
show_if_no_changes: If False, returns empty string when no changes
|
||||
"""
|
||||
if not show_if_no_changes and not self.has_changes():
|
||||
return ""
|
||||
|
||||
lines = []
|
||||
lines.append("\n" + "="*70)
|
||||
lines.append("QUESTION IMPORT STATISTICS")
|
||||
lines.append("="*70)
|
||||
lines.append(f"Total .md files found: {self.total_files}")
|
||||
lines.append(f"MCQ questions found: {self.mcq_questions}")
|
||||
lines.append(f"Non-MCQ skipped: {self.non_mcq_skipped}")
|
||||
lines.append(f"Questions with answers: {self.questions_with_answers}")
|
||||
lines.append(f"Questions with TODO: {self.questions_with_todo}")
|
||||
lines.append(f"Created in database: {self.created}")
|
||||
lines.append(f"Updated in database: {self.updated}")
|
||||
if self.errors > 0:
|
||||
lines.append(f"Errors: {self.errors}")
|
||||
|
||||
if self.mcq_questions > 0:
|
||||
completion_pct = (self.questions_with_answers / self.mcq_questions * 100)
|
||||
lines.append(f"Overall completion: {completion_pct:.1f}%")
|
||||
|
||||
lines.append("\n" + "-"*70)
|
||||
lines.append("COMPLETION BY EXAM FOLDER")
|
||||
lines.append("-"*70)
|
||||
|
||||
sorted_folders = sorted(self.by_folder.items())
|
||||
for folder, stats in sorted_folders:
|
||||
if stats['mcq'] > 0:
|
||||
pct = (stats['answered'] / stats['mcq'] * 100)
|
||||
lines.append(f"{folder:20} {stats['answered']:3}/{stats['mcq']:3} MCQ ({pct:5.1f}%)")
|
||||
|
||||
lines.append("="*70 + "\n")
|
||||
return "\n".join(lines)
|
||||
|
||||
|
||||
|
||||
def parse_markdown_question(file_path: Path, content: str) -> Tuple[bool, dict]:
|
||||
"""
|
||||
Parse a markdown file and extract question data using the new question_parser.
|
||||
|
||||
Returns:
|
||||
(is_mcq, question_data) where question_data contains:
|
||||
- text: question text
|
||||
- options: list of (letter, text) tuples
|
||||
- correct_answer: the correct answer letter(s)
|
||||
- has_answer: whether it has an answer (not TODO)
|
||||
- tags: list of tag strings
|
||||
- question_type: type of question (mcq, scq, matching, etc.)
|
||||
"""
|
||||
# Parse from content string (works for both test cases and real files)
|
||||
parsed = parse_question_from_content(content)
|
||||
|
||||
# Extract metadata
|
||||
metadata = parsed.metadata
|
||||
tags = metadata.get('tags', [])
|
||||
|
||||
# Check for question type in tags
|
||||
question_type = None
|
||||
is_question = False
|
||||
|
||||
for tag in tags:
|
||||
if 'frågetyp/' in tag:
|
||||
is_question = True
|
||||
if 'frågetyp/mcq' in tag:
|
||||
question_type = 'mcq'
|
||||
elif 'frågetyp/scq' in tag:
|
||||
question_type = 'scq'
|
||||
elif 'frågetyp/matching' in tag:
|
||||
question_type = 'matching'
|
||||
elif 'frågetyp/textalternativ' in tag:
|
||||
question_type = 'textalternativ'
|
||||
elif 'frågetyp/textfält' in tag:
|
||||
question_type = 'textfält'
|
||||
|
||||
if not is_question:
|
||||
return False, {}
|
||||
|
||||
# Handle matching questions separately
|
||||
if question_type == 'matching':
|
||||
return parse_matching_question_from_nodes(parsed.nodes, tags)
|
||||
|
||||
# Extract question text from first paragraph (skip images and special instructions)
|
||||
question_text = None
|
||||
for node in parsed.nodes:
|
||||
if node.type != "paragraph":
|
||||
continue
|
||||
text = node.text.strip()
|
||||
# Skip empty paragraphs
|
||||
if not text:
|
||||
continue
|
||||
|
||||
# Remove inline images from text first
|
||||
text = re.sub(r'!\[\[.*?\]\]', '', text).strip()
|
||||
|
||||
# Skip if paragraph was only an image reference
|
||||
if not text:
|
||||
continue
|
||||
|
||||
# Skip "Välj X alternativ" instructions
|
||||
if 'Välj' in text and 'alternativ' in text:
|
||||
continue
|
||||
|
||||
# Clean up bold markers
|
||||
text = text.replace('**', '')
|
||||
if text:
|
||||
question_text = text
|
||||
break
|
||||
|
||||
if not question_text:
|
||||
return True, {
|
||||
'text': None,
|
||||
'options': [],
|
||||
'correct_answer': '',
|
||||
'has_answer': False,
|
||||
'question_type': question_type,
|
||||
'tags': tags
|
||||
}
|
||||
|
||||
# Extract options from list nodes
|
||||
options_data = []
|
||||
|
||||
for node in parsed.nodes:
|
||||
if node.type != "list":
|
||||
continue
|
||||
for item in node.children:
|
||||
# Get the text of the list item
|
||||
if item.type != "list_item":
|
||||
continue
|
||||
item_text = item.text.strip()
|
||||
|
||||
# Match "A: text" or just "A"
|
||||
match = re.match(r'^([A-Z]):\s*(.*)$', item_text)
|
||||
if match:
|
||||
letter = match.group(1)
|
||||
text = match.group(2).strip()
|
||||
options_data.append((letter, text))
|
||||
elif re.match(r'^([A-Z])$', item_text):
|
||||
letter = item_text
|
||||
options_data.append((letter, ''))
|
||||
elif question_type in ['textalternativ', 'textfält']:
|
||||
# For text-based questions, use incrementing letters
|
||||
if not re.match(r'^[a-z]\)', item_text): # Skip sub-question markers
|
||||
letter = chr(ord('A') + len(options_data))
|
||||
options_data.append((letter, item_text))
|
||||
|
||||
# For text-based questions, options are optional
|
||||
if not options_data:
|
||||
options_data = [('A', '')]
|
||||
elif len(options_data) < 2 and question_type in ['mcq', 'scq']:
|
||||
return True, {
|
||||
'text': question_text,
|
||||
'options': options_data,
|
||||
'correct_answer': '',
|
||||
'has_answer': False,
|
||||
'question_type': question_type,
|
||||
'tags': tags
|
||||
}
|
||||
|
||||
# Extract answer from spoiler block
|
||||
correct_answer = None
|
||||
has_answer = False
|
||||
|
||||
for node in parsed.nodes:
|
||||
if node.type == "block_code" and node.attrs.get("info") == "spoiler-block:":
|
||||
answer_text = node.raw.strip()
|
||||
|
||||
# Check for TODO
|
||||
if 'TODO' in answer_text.upper():
|
||||
has_answer = False
|
||||
else:
|
||||
has_answer = True
|
||||
|
||||
# For MCQ/SCQ: Extract capital letters
|
||||
if question_type in ['mcq', 'scq']:
|
||||
letters = re.findall(r'\b([A-Z])\b', answer_text)
|
||||
if letters:
|
||||
correct_answer = ','.join(sorted(set(letters)))
|
||||
else:
|
||||
# For text-based questions: Store the full answer text
|
||||
correct_answer = answer_text[:200] # Limit to 200 chars for database field
|
||||
|
||||
break
|
||||
|
||||
return True, {
|
||||
'text': question_text,
|
||||
'options': options_data,
|
||||
'correct_answer': correct_answer,
|
||||
'has_answer': has_answer,
|
||||
'question_type': question_type,
|
||||
'tags': tags
|
||||
}
|
||||
|
||||
|
||||
def parse_matching_question_from_nodes(nodes: list[Node], tags: list) -> Tuple[bool, dict]:
|
||||
"""
|
||||
Parse matching question from parsed nodes.
|
||||
|
||||
Expected format:
|
||||
- Two consecutive bullet lists
|
||||
- First list = left column items (rows)
|
||||
- Second list = top row items (columns)
|
||||
- Answer format: "LeftItem: TopItem" pairs
|
||||
|
||||
Returns:
|
||||
(is_matching, question_data)
|
||||
"""
|
||||
# Extract question text
|
||||
question_text = None
|
||||
for node in nodes:
|
||||
if node.type == "paragraph":
|
||||
text = node.text.strip()
|
||||
# Remove inline images
|
||||
text = re.sub(r'!\[\[.*?\]\]', '', text).strip()
|
||||
# Skip if empty after removing images
|
||||
if not text:
|
||||
continue
|
||||
question_text = text.replace('**', '')
|
||||
break
|
||||
|
||||
if not question_text:
|
||||
return True, {
|
||||
'text': None,
|
||||
'left_items': [],
|
||||
'top_items': [],
|
||||
'correct_pairs': [],
|
||||
'has_answer': False,
|
||||
'question_type': 'matching',
|
||||
'tags': tags
|
||||
}
|
||||
|
||||
# Extract two consecutive lists
|
||||
left_items = []
|
||||
top_items = []
|
||||
list_nodes = [node for node in nodes if node.type == "list"]
|
||||
|
||||
if len(list_nodes) >= 2:
|
||||
# First list = left items
|
||||
for item in list_nodes[0].children:
|
||||
if item.type == "list_item":
|
||||
left_items.append(item.text.strip())
|
||||
|
||||
# Second list = top items
|
||||
for item in list_nodes[1].children:
|
||||
if item.type == "list_item":
|
||||
top_items.append(item.text.strip())
|
||||
|
||||
# Parse answer from spoiler block
|
||||
correct_pairs = []
|
||||
has_answer = False
|
||||
|
||||
for node in nodes:
|
||||
if node.type == "block_code" and node.attrs.get("info") == "spoiler-block:":
|
||||
answer_text = node.raw.strip()
|
||||
|
||||
# Check for TODO
|
||||
if 'TODO' in answer_text.upper():
|
||||
has_answer = False
|
||||
break
|
||||
has_answer = True
|
||||
# Parse "Item: Match" format
|
||||
answer_lines = answer_text.split('\n')
|
||||
for line in answer_lines:
|
||||
line = line.strip()
|
||||
if ':' not in line:
|
||||
continue
|
||||
left_part, top_part = line.split(':', 1)
|
||||
left_part = left_part.strip()
|
||||
top_part = top_part.strip()
|
||||
|
||||
# Find indices
|
||||
left_idx = None
|
||||
top_idx = None
|
||||
|
||||
for idx, item in enumerate(left_items):
|
||||
if left_part.lower() in item.lower() or item.lower() in left_part.lower():
|
||||
left_idx = idx
|
||||
break
|
||||
|
||||
for idx, item in enumerate(top_items):
|
||||
if top_part.lower() in item.lower() or item.lower() in top_part.lower():
|
||||
top_idx = idx
|
||||
break
|
||||
|
||||
if left_idx is not None and top_idx is not None:
|
||||
correct_pairs.append([left_idx, top_idx])
|
||||
break
|
||||
|
||||
return True, {
|
||||
'text': question_text,
|
||||
'left_items': left_items,
|
||||
'top_items': top_items,
|
||||
'correct_pairs': correct_pairs,
|
||||
'has_answer': has_answer,
|
||||
'question_type': 'matching',
|
||||
'tags': tags
|
||||
}
|
||||
|
||||
|
||||
|
||||
def import_question_file(file_path: Path, base_path: Path, stats: ImportStats, force: bool = False):
|
||||
"""
|
||||
Import a single question file, checking modification time to avoid unnecessary updates.
|
||||
|
||||
Args:
|
||||
file_path: Path to the question file
|
||||
base_path: Base path for relative calculations
|
||||
stats: ImportStats object to track statistics
|
||||
force: If True, import regardless of mtime (for initial import)
|
||||
"""
|
||||
try:
|
||||
# Get file modification time
|
||||
file_mtime = file_path.stat().st_mtime
|
||||
|
||||
# Calculate path relative to project root
|
||||
project_root = settings.BASE_DIR.parent
|
||||
try:
|
||||
file_path_str = str(file_path.relative_to(project_root))
|
||||
except ValueError:
|
||||
file_path_str = str(file_path.relative_to(base_path))
|
||||
|
||||
# Check if file has changed by comparing mtime
|
||||
if not force:
|
||||
try:
|
||||
existing_question = Question.objects.get(file_path=file_path_str)
|
||||
if existing_question.file_mtime and existing_question.file_mtime >= file_mtime:
|
||||
# File hasn't changed, skip
|
||||
return 'skipped_unchanged'
|
||||
except Question.DoesNotExist:
|
||||
pass # New file, will import
|
||||
|
||||
content = file_path.read_text(encoding='utf-8')
|
||||
is_mcq, question_data = parse_markdown_question(file_path, content)
|
||||
|
||||
# Track folder stats
|
||||
relative_path = file_path.relative_to(base_path)
|
||||
folder_name = relative_path.parts[0] if len(relative_path.parts) > 1 else 'root'
|
||||
stats.by_folder[folder_name]['total'] += 1
|
||||
|
||||
if not is_mcq:
|
||||
stats.non_mcq_skipped += 1
|
||||
return 'skipped_not_mcq'
|
||||
|
||||
stats.mcq_questions += 1
|
||||
stats.by_folder[folder_name]['mcq'] += 1
|
||||
|
||||
if not question_data or not question_data.get('text'):
|
||||
stats.non_mcq_skipped += 1
|
||||
return 'skipped_invalid'
|
||||
|
||||
if not question_data['has_answer']:
|
||||
stats.questions_with_todo += 1
|
||||
stats.by_folder[folder_name]['todo'] += 1
|
||||
return 'skipped_todo'
|
||||
|
||||
stats.questions_with_answers += 1
|
||||
stats.by_folder[folder_name]['answered'] += 1
|
||||
|
||||
# Extract exam information from folder structure
|
||||
# Expected path: content/Anatomi & Histologi 2/Gamla tentor/2022-01-15/question.md
|
||||
exam = None
|
||||
relative_path = file_path.relative_to(base_path)
|
||||
path_parts = relative_path.parts
|
||||
|
||||
# Try to extract exam date from folder structure
|
||||
if len(path_parts) >= 2:
|
||||
# Get the parent folder name which should be the exam date (e.g., "2022-01-15")
|
||||
exam_folder = path_parts[-2] if len(path_parts) > 1 else None
|
||||
|
||||
# Try to parse as date
|
||||
if exam_folder and '-' in exam_folder:
|
||||
try:
|
||||
exam_date = datetime.strptime(exam_folder, '%Y-%m-%d').date()
|
||||
|
||||
# Get or create course (default to "Anatomi & Histologi 2")
|
||||
# Extract course name from path if available
|
||||
course_name = "Anatomi & Histologi 2"
|
||||
if len(path_parts) >= 3 and 'Anatomi' in ' '.join(path_parts):
|
||||
# Try to find course name in path
|
||||
for part in path_parts:
|
||||
if 'Anatomi' in part or 'Histologi' in part:
|
||||
course_name = part
|
||||
break
|
||||
|
||||
course, _ = Course.objects.get_or_create(
|
||||
name=course_name,
|
||||
defaults={'code': 'AH2'}
|
||||
)
|
||||
|
||||
# Get or create exam
|
||||
exam, _ = Exam.objects.get_or_create(
|
||||
course=course,
|
||||
date=exam_date,
|
||||
defaults={
|
||||
'name': exam_folder,
|
||||
'folder_path': '/'.join(path_parts[:-1])
|
||||
}
|
||||
)
|
||||
except (ValueError, ImportError):
|
||||
pass # If date parsing fails, exam remains None
|
||||
|
||||
# Import to database with mtime tracking
|
||||
# Prepare defaults dict
|
||||
defaults = {
|
||||
'exam': exam,
|
||||
'text': question_data['text'],
|
||||
'correct_answer': question_data.get('correct_answer', ''),
|
||||
'file_mtime': file_mtime,
|
||||
'question_type': question_data.get('question_type', 'mcq'),
|
||||
}
|
||||
|
||||
# Add matching_data if it's a matching question
|
||||
if question_data.get('question_type') == 'matching':
|
||||
defaults['matching_data'] = {
|
||||
'left_items': question_data.get('left_items', []),
|
||||
'top_items': question_data.get('top_items', []),
|
||||
'correct_pairs': question_data.get('correct_pairs', [])
|
||||
}
|
||||
|
||||
question, created = Question.objects.update_or_create(
|
||||
file_path=file_path_str,
|
||||
defaults=defaults
|
||||
)
|
||||
|
||||
if created:
|
||||
stats.created += 1
|
||||
else:
|
||||
stats.updated += 1
|
||||
|
||||
# Update tags
|
||||
from django.utils.text import slugify
|
||||
from quiz.models import Tag
|
||||
|
||||
question.tags.clear()
|
||||
for tag_name in question_data.get('tags', []):
|
||||
tag_slug = slugify(tag_name)
|
||||
tag, _ = Tag.objects.get_or_create(
|
||||
slug=tag_slug,
|
||||
defaults={'name': tag_name}
|
||||
)
|
||||
question.tags.add(tag)
|
||||
|
||||
# Update options (only for MCQ/SCQ questions)
|
||||
if question_data.get('question_type') not in ['matching']:
|
||||
question.options.all().delete()
|
||||
# Deduplicate options by letter (keep first occurrence)
|
||||
seen_letters = set()
|
||||
for letter, text in question_data.get('options', []):
|
||||
if letter not in seen_letters:
|
||||
Option.objects.create(question=question, letter=letter, text=text)
|
||||
seen_letters.add(letter)
|
||||
|
||||
return 'imported' if created else 'updated'
|
||||
|
||||
except (OSError, ValueError, django.db.utils.Error) as e:
|
||||
stats.errors += 1
|
||||
print(f"Error importing {file_path}: {e}")
|
||||
return 'error'
|
||||
|
||||
|
||||
def import_questions(folder_path: Path, base_path: Path = None, force: bool = False) -> ImportStats:
|
||||
if base_path is None:
|
||||
base_path = folder_path
|
||||
|
||||
stats = ImportStats()
|
||||
|
||||
for md_file in folder_path.rglob('*.md'):
|
||||
stats.total_files += 1
|
||||
import_question_file(md_file, base_path, stats, force=force)
|
||||
|
||||
return stats
|
||||
|
||||
|
||||
def delete_question_by_path(file_path: Path):
|
||||
try:
|
||||
project_root = settings.BASE_DIR.parent
|
||||
file_path_str = str(file_path.relative_to(project_root))
|
||||
deleted_count, _ = Question.objects.filter(file_path=file_path_str).delete()
|
||||
if deleted_count > 0:
|
||||
print(f"[Auto-delete] ✓ Deleted question: {file_path.name}")
|
||||
return deleted_count > 0
|
||||
except (OSError, django.db.utils.Error) as e:
|
||||
print(f"[Auto-delete] ✗ Error deleting question {file_path}: {e}")
|
||||
return False
|
||||
|
||||
38
stroma/quiz/utils/obsidian_embed_plugin.py
Normal file
38
stroma/quiz/utils/obsidian_embed_plugin.py
Normal file
@@ -0,0 +1,38 @@
|
||||
__all__ = ["obsidian_embed"]
|
||||
|
||||
# https://help.obsidian.md/embeds
|
||||
|
||||
# Supported:
|
||||
# ![[image-4.png|292x316]]
|
||||
def parse_embed(inline, match, state):
|
||||
filename = match.group("filename")
|
||||
attrs = {}
|
||||
if "|" in filename:
|
||||
filename, size = filename.split("|", 1)
|
||||
else:
|
||||
size = None
|
||||
attrs["filename"] = filename
|
||||
if size:
|
||||
if "x" in size:
|
||||
width, height = size.split("x", 1)
|
||||
if width:
|
||||
attrs["width"] = int(width)
|
||||
if height:
|
||||
attrs["height"] = int(height)
|
||||
else:
|
||||
attrs["width"] = int(size)
|
||||
state.append_token({"type": "embed", "attrs": attrs})
|
||||
return match.end()
|
||||
|
||||
|
||||
INLINE_EMBED_PATTERN = (
|
||||
r'!\[\[' # begins with ![
|
||||
r'(?!\s)' # not whitespace
|
||||
r'(?P<filename>.+?)' # content between `![[xx]]`
|
||||
r'(?!\s)' # not whitespace
|
||||
r'\]\]' # closing ]
|
||||
)
|
||||
|
||||
|
||||
def obsidian_embed(md: "Markdown") -> None:
|
||||
md.inline.register('embed', INLINE_EMBED_PATTERN, parse_embed, before="link")
|
||||
89
stroma/quiz/utils/question_parser.py
Normal file
89
stroma/quiz/utils/question_parser.py
Normal file
@@ -0,0 +1,89 @@
|
||||
import dataclasses
|
||||
import pathlib
|
||||
|
||||
import frontmatter
|
||||
import mistune
|
||||
|
||||
from quiz.utils.obsidian_embed_plugin import obsidian_embed
|
||||
|
||||
markdown = mistune.create_markdown(renderer="ast", plugins=[obsidian_embed])
|
||||
|
||||
|
||||
class Node:
|
||||
def __init__(self, token):
|
||||
self.type = token["type"]
|
||||
self.raw = token.get("raw", "")
|
||||
self.attrs = token.get("attrs", {})
|
||||
self.children = [Node(token=child) for child in token.get("children", [])]
|
||||
|
||||
def __repr__(self) -> str:
|
||||
attrs = []
|
||||
if self.raw:
|
||||
attrs.append(f"raw={self.raw!r}")
|
||||
if self.attrs:
|
||||
attrs.append(f"attrs={self.attrs!r}")
|
||||
if self.children:
|
||||
attrs.append(f"children={self.children!r}")
|
||||
# block_text -> BlockText
|
||||
pretty = self.type.replace("_", " ").title().replace(" ", "")
|
||||
return f"{pretty}(" + ", ".join(attrs) + ")"
|
||||
|
||||
@property
|
||||
def text(self) -> str:
|
||||
if self.type == "text":
|
||||
return self.raw
|
||||
texts = []
|
||||
for child in self.children:
|
||||
texts.append(child.text)
|
||||
return "".join(texts)
|
||||
|
||||
|
||||
@dataclasses.dataclass
|
||||
class ParsedQuestion:
|
||||
metadata: dict = dataclasses.field(default_factory=dict)
|
||||
raw_content: str = ""
|
||||
nodes: list[Node] = dataclasses.field(default_factory=list)
|
||||
|
||||
|
||||
def parse_question(path: pathlib.Path):
|
||||
raw = path.read_text(encoding="utf-8")
|
||||
return parse_question_from_content(raw)
|
||||
|
||||
|
||||
def parse_question_from_content(content_str: str):
|
||||
"""Parse question from a content string instead of a file."""
|
||||
metadata, content = frontmatter.parse(content_str)
|
||||
tokens = markdown(content)
|
||||
question = ParsedQuestion(
|
||||
metadata=metadata,
|
||||
raw_content=content,
|
||||
nodes=[Node(token=token) for token in tokens],
|
||||
)
|
||||
return question
|
||||
|
||||
|
||||
def main():
|
||||
root = pathlib.Path(__file__).parent.parent.parent.parent
|
||||
print(root)
|
||||
exams = root / "content" / "Anatomi & Histologi 2" / "Gamla tentor"
|
||||
for file in sorted(exams.glob("*/*.md")):
|
||||
if len(file.stem) > 2:
|
||||
continue
|
||||
question = parse_question(file)
|
||||
print(question.metadata, repr(question.raw_content))
|
||||
continue
|
||||
for node in question.nodes:
|
||||
match node.type:
|
||||
case "heading":
|
||||
print("Heading:", repr(node.text))
|
||||
case "paragraph":
|
||||
print("Paragraph:", repr(node.text))
|
||||
case "list":
|
||||
print("List:")
|
||||
for child in node.children:
|
||||
print(" - List item:", repr(child.text))
|
||||
case "block_code" if node.attrs["info"] == "spoiler-block:":
|
||||
print("Spoiler:", repr(node.raw.rstrip()))
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
0
stroma/quiz/utils/tests/__init__.py
Normal file
0
stroma/quiz/utils/tests/__init__.py
Normal file
261
stroma/quiz/utils/tests/test_importer.py
Normal file
261
stroma/quiz/utils/tests/test_importer.py
Normal file
@@ -0,0 +1,261 @@
|
||||
import pytest
|
||||
import time
|
||||
from pathlib import Path
|
||||
from quiz.utils.importer import parse_markdown_question, import_question_file, ImportStats
|
||||
from quiz.models import Question, Option
|
||||
|
||||
@pytest.mark.django_db
|
||||
@pytest.mark.import_tests
|
||||
class TestMarkdownParsing:
|
||||
"""Test parsing of various Obsidian markdown question formats"""
|
||||
|
||||
def test_parse_single_choice_question(self):
|
||||
"""Test parsing standard single choice question (SCQ)"""
|
||||
content = """---
|
||||
tags: [ah2, provfråga, frågetyp/scq, anatomi]
|
||||
date: 2022-01-15
|
||||
---
|
||||
What is the correct answer?
|
||||
|
||||
**Välj ett alternativ:**
|
||||
- A: Wrong answer
|
||||
- B: Correct answer
|
||||
- C: Another wrong
|
||||
|
||||
```spoiler-block:
|
||||
B
|
||||
```
|
||||
"""
|
||||
is_question, data = parse_markdown_question(Path("test.md"), content)
|
||||
|
||||
assert is_question is True
|
||||
assert data['text'] == 'What is the correct answer?'
|
||||
assert data['correct_answer'] == 'B'
|
||||
assert data['has_answer'] is True
|
||||
assert data['question_type'] == 'scq'
|
||||
assert len(data['options']) == 3
|
||||
assert data['options'][0] == ('A', 'Wrong answer')
|
||||
assert data['options'][1] == ('B', 'Correct answer')
|
||||
|
||||
def test_parse_multiple_choice_question(self):
|
||||
"""Test parsing multiple choice question (MCQ) with 'och' separator"""
|
||||
content = """---
|
||||
tags: [ah2, provfråga, frågetyp/mcq, cerebrum]
|
||||
date: 2022-01-15
|
||||
---
|
||||
Vilka av följande räknas till storhjärnans basala kärnor?
|
||||
|
||||
**Välj två alternativ**
|
||||
- A: Putamen
|
||||
- B: Nucleus Ruber
|
||||
- C: Substantia nigra
|
||||
- D: Nucleus caudatus
|
||||
|
||||
```spoiler-block:
|
||||
A och D
|
||||
```
|
||||
"""
|
||||
is_question, data = parse_markdown_question(Path("test.md"), content)
|
||||
|
||||
assert is_question is True
|
||||
assert 'Vilka av följande' in data['text']
|
||||
assert data['correct_answer'] == 'A,D' # Normalized to comma-separated
|
||||
assert data['has_answer'] is True
|
||||
assert data['question_type'] == 'mcq'
|
||||
assert len(data['options']) == 4
|
||||
|
||||
def test_parse_multiple_choice_comma_separated(self):
|
||||
"""Test MCQ with comma-separated answer"""
|
||||
content = """---
|
||||
tags: [frågetyp/mcq]
|
||||
---
|
||||
Select two options:
|
||||
|
||||
- A: Option A
|
||||
- B: Option B
|
||||
- C: Option C
|
||||
- D: Option D
|
||||
|
||||
```spoiler-block:
|
||||
B, C
|
||||
```
|
||||
"""
|
||||
is_question, data = parse_markdown_question(Path("test.md"), content)
|
||||
|
||||
assert data['correct_answer'] == 'B,C'
|
||||
assert data['has_answer'] is True
|
||||
|
||||
def test_parse_matching_question(self):
|
||||
"""Test parsing matching question (DND/Matching)"""
|
||||
content = """---
|
||||
tags: [ah2, provfråga, frågetyp/matching, anatomi, öra]
|
||||
date: 2023-05-31
|
||||
---
|
||||
**Matcha rätt funktion med rätt lob:**
|
||||
(1p för alla rätt, inga delpoäng)
|
||||
- Smak
|
||||
- Syn
|
||||
- Somatosensorik
|
||||
- Motorik
|
||||
- Hörsel
|
||||
|
||||
**Alternativ:**
|
||||
|
||||
- Lobus frontalis
|
||||
- Lobus Insularis
|
||||
- Lobus temporalis
|
||||
- Lobus parietalis
|
||||
- Lobus occipitalis
|
||||
|
||||
```spoiler-block:
|
||||
Smak: Lobus Insularis
|
||||
Syn: Lobus occipitalis
|
||||
Somatosensorik: Lobus parietalis
|
||||
Motorik: Lobus frontalis
|
||||
Hörsel: Lobus temporalis
|
||||
```
|
||||
"""
|
||||
is_matching, data = parse_markdown_question(Path("test.md"), content)
|
||||
|
||||
assert is_matching is True
|
||||
assert data['question_type'] == 'matching'
|
||||
assert data['has_answer'] is True
|
||||
assert len(data['left_items']) == 5
|
||||
assert len(data['top_items']) == 5
|
||||
assert len(data['correct_pairs']) == 5
|
||||
|
||||
def test_parse_textalternativ_question(self):
|
||||
"""Test text alternative question type"""
|
||||
content = """---
|
||||
tags: [frågetyp/textalternativ, öga, anatomi]
|
||||
---
|
||||
Svara på följande frågor:
|
||||
|
||||
a) Bokstaven B sitter i en lob, vilken?
|
||||
- Lobus temporalis
|
||||
- Lobus frontalis
|
||||
- Lobus parietalis
|
||||
|
||||
b) Vilket funktionellt centra återfinns där?
|
||||
- Syncentrum
|
||||
- Motorcentrum
|
||||
- Somatosensoriskt centrum
|
||||
|
||||
```spoiler-block:
|
||||
a) Lobus parietalis
|
||||
b) Somatosensoriskt centrum
|
||||
```
|
||||
"""
|
||||
is_question, data = parse_markdown_question(Path("test.md"), content)
|
||||
|
||||
assert is_question is True
|
||||
assert data['question_type'] == 'textalternativ'
|
||||
assert data['has_answer'] is True
|
||||
assert 'Lobus parietalis' in data['correct_answer']
|
||||
assert 'Somatosensoriskt centrum' in data['correct_answer']
|
||||
|
||||
def test_parse_textfalt_question(self):
|
||||
"""Test text field (fill-in) question type"""
|
||||
content = """---
|
||||
tags: [frågetyp/textfält, öga]
|
||||
---
|
||||
**Fyll i rätt siffra!**
|
||||
|
||||
a) Vilken siffra pekar på gula fläcken?
|
||||
b) Vilken siffra pekar på choroidea?
|
||||
|
||||
```spoiler-block:
|
||||
a) 7
|
||||
b) 6
|
||||
```
|
||||
"""
|
||||
is_question, data = parse_markdown_question(Path("test.md"), content)
|
||||
|
||||
assert is_question is True
|
||||
assert data['question_type'] == 'textfält'
|
||||
assert data['has_answer'] is True
|
||||
assert '7' in data['correct_answer']
|
||||
assert '6' in data['correct_answer']
|
||||
|
||||
@pytest.mark.django_db
|
||||
@pytest.mark.import_tests
|
||||
class TestQuestionImport:
|
||||
"""Test actual import of questions to database"""
|
||||
|
||||
def test_import_single_question(self, tmp_path):
|
||||
"""Test importing a single question file"""
|
||||
question_file = tmp_path / "question1.md"
|
||||
question_file.write_text("""---
|
||||
tags: [frågetyp/scq]
|
||||
---
|
||||
Test question?
|
||||
|
||||
- A: Correct
|
||||
- B: Wrong
|
||||
|
||||
```spoiler-block:
|
||||
A
|
||||
```
|
||||
""")
|
||||
|
||||
stats = ImportStats()
|
||||
result = import_question_file(question_file, tmp_path, stats, force=True)
|
||||
|
||||
assert result in ['imported', 'updated']
|
||||
assert stats.questions_with_answers == 1
|
||||
|
||||
# Verify in database
|
||||
question = Question.objects.get(text='Test question?')
|
||||
assert question.correct_answer == 'A'
|
||||
assert question.options.count() == 2
|
||||
|
||||
def test_mtime_tracking(self, tmp_path):
|
||||
"""Test that file modification time is tracked"""
|
||||
question_file = tmp_path / "question4.md"
|
||||
question_file.write_text("""---
|
||||
tags: [frågetyp/scq]
|
||||
---
|
||||
What is the correct answer?
|
||||
```spoiler-block:
|
||||
A
|
||||
```
|
||||
""")
|
||||
|
||||
stats = ImportStats()
|
||||
import_question_file(question_file, tmp_path, stats, force=True)
|
||||
|
||||
question = Question.objects.get(text='What is the correct answer?')
|
||||
assert question.file_mtime == question_file.stat().st_mtime
|
||||
|
||||
def test_update_existing_question(self, tmp_path):
|
||||
"""Test updating an existing question"""
|
||||
question_file = tmp_path / "question5.md"
|
||||
|
||||
# Initial import
|
||||
question_file.write_text("""---
|
||||
tags: [frågetyp/scq]
|
||||
---
|
||||
Question to update?
|
||||
```spoiler-block:
|
||||
A
|
||||
```
|
||||
""")
|
||||
|
||||
import_question_file(question_file, tmp_path, ImportStats(), force=True)
|
||||
|
||||
# Update the file
|
||||
time.sleep(0.1)
|
||||
question_file.write_text("""---
|
||||
tags: [frågetyp/scq]
|
||||
---
|
||||
Question to update?
|
||||
```spoiler-block:
|
||||
B
|
||||
```
|
||||
""")
|
||||
|
||||
stats = ImportStats()
|
||||
result = import_question_file(question_file, tmp_path, stats, force=False)
|
||||
|
||||
assert result == 'updated'
|
||||
assert Question.objects.get(text='Question to update?').correct_answer == 'B'
|
||||
537
stroma/quiz/utils/tests/test_question_parser.py
Normal file
537
stroma/quiz/utils/tests/test_question_parser.py
Normal file
@@ -0,0 +1,537 @@
|
||||
"""
|
||||
Comprehensive test suite for the question_parser module.
|
||||
|
||||
This test suite uses pytest's parametrize decorator to test multiple scenarios
|
||||
with minimal code duplication. It covers:
|
||||
|
||||
1. Node class:
|
||||
- Initialization with different token types
|
||||
- Attribute handling
|
||||
- Children node processing
|
||||
- String representation (__repr__)
|
||||
- Text extraction from nested structures
|
||||
|
||||
2. parse_question function:
|
||||
- Metadata parsing (tags, dates, etc.)
|
||||
- Raw content extraction
|
||||
- Different question types (MCQ, SCQ, text field, matching)
|
||||
- Questions with images
|
||||
- Edge cases (empty content, missing frontmatter)
|
||||
- Document structure preservation
|
||||
|
||||
3. ParsedQuestion dataclass:
|
||||
- Default values
|
||||
- Initialization with custom values
|
||||
|
||||
4. Real exam questions:
|
||||
- Parsing actual exam questions from the content directory
|
||||
- Validation of all short-named question files
|
||||
|
||||
Test execution:
|
||||
pytest tests/test_question_parser.py -v # Verbose output
|
||||
pytest tests/test_question_parser.py -k "mcq" # Run only MCQ tests
|
||||
pytest tests/test_question_parser.py --collect-only # List all tests
|
||||
"""
|
||||
|
||||
import pathlib
|
||||
import tempfile
|
||||
import pytest
|
||||
from quiz.utils.question_parser import Node, ParsedQuestion, parse_question
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def temp_dir():
|
||||
"""Create a temporary directory for test files"""
|
||||
with tempfile.TemporaryDirectory() as tmpdir:
|
||||
yield pathlib.Path(tmpdir)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def create_question_file(temp_dir):
|
||||
"""Factory fixture to create question files"""
|
||||
def _create_file(filename: str, content: str) -> pathlib.Path:
|
||||
file_path = temp_dir / filename
|
||||
file_path.write_text(content, encoding="utf-8")
|
||||
return file_path
|
||||
return _create_file
|
||||
|
||||
|
||||
class TestNode:
|
||||
"""Test the Node class"""
|
||||
|
||||
@pytest.mark.parametrize("token,expected_type,expected_raw", [
|
||||
({"type": "paragraph"}, "paragraph", ""),
|
||||
({"type": "heading", "raw": "Test Heading"}, "heading", "Test Heading"),
|
||||
({"type": "text", "raw": "Some text"}, "text", "Some text"),
|
||||
({"type": "list"}, "list", ""),
|
||||
])
|
||||
def test_node_initialization(self, token, expected_type, expected_raw):
|
||||
"""Test Node initialization with different token types"""
|
||||
node = Node(token)
|
||||
assert node.type == expected_type
|
||||
assert node.raw == expected_raw
|
||||
|
||||
@pytest.mark.parametrize("token,expected_attrs", [
|
||||
({"type": "block_code", "attrs": {"info": "spoiler-block:"}}, {"info": "spoiler-block:"}),
|
||||
({"type": "paragraph"}, {}),
|
||||
({"type": "heading", "attrs": {"level": 2}}, {"level": 2}),
|
||||
])
|
||||
def test_node_attributes(self, token, expected_attrs):
|
||||
"""Test Node attributes handling"""
|
||||
node = Node(token)
|
||||
assert node.attrs == expected_attrs
|
||||
|
||||
def test_node_children(self):
|
||||
"""Test Node children handling"""
|
||||
token = {
|
||||
"type": "paragraph",
|
||||
"children": [
|
||||
{"type": "text", "raw": "Hello "},
|
||||
{"type": "text", "raw": "World"},
|
||||
]
|
||||
}
|
||||
node = Node(token)
|
||||
assert len(node.children) == 2
|
||||
assert node.children[0].type == "text"
|
||||
assert node.children[0].raw == "Hello "
|
||||
assert node.children[1].type == "text"
|
||||
assert node.children[1].raw == "World"
|
||||
|
||||
@pytest.mark.parametrize("token,expected_repr_contains", [
|
||||
({"type": "text", "raw": "test"}, "Text(raw='test')"),
|
||||
({"type": "paragraph"}, "Paragraph()"),
|
||||
({"type": "block_code", "attrs": {"info": "python"}}, "BlockCode(attrs={'info': 'python'})"),
|
||||
])
|
||||
def test_node_repr(self, token, expected_repr_contains):
|
||||
"""Test Node __repr__ method"""
|
||||
node = Node(token)
|
||||
assert repr(node) == expected_repr_contains
|
||||
|
||||
@pytest.mark.parametrize("token,expected_text", [
|
||||
({"type": "text", "raw": "Simple text"}, "Simple text"),
|
||||
(
|
||||
{
|
||||
"type": "paragraph",
|
||||
"children": [
|
||||
{"type": "text", "raw": "Hello "},
|
||||
{"type": "text", "raw": "World"},
|
||||
]
|
||||
},
|
||||
"Hello World"
|
||||
),
|
||||
(
|
||||
{
|
||||
"type": "paragraph",
|
||||
"children": [
|
||||
{"type": "text", "raw": "Nested "},
|
||||
{
|
||||
"type": "strong",
|
||||
"children": [{"type": "text", "raw": "bold"}]
|
||||
},
|
||||
{"type": "text", "raw": " text"},
|
||||
]
|
||||
},
|
||||
"Nested bold text"
|
||||
),
|
||||
])
|
||||
def test_node_text_property(self, token, expected_text):
|
||||
"""Test Node text property extraction"""
|
||||
node = Node(token)
|
||||
assert node.text == expected_text
|
||||
|
||||
|
||||
class TestParseQuestion:
|
||||
"""Test the parse_question function"""
|
||||
|
||||
@pytest.mark.parametrize("content,expected_tags", [
|
||||
(
|
||||
"""---
|
||||
tags: [ah2, provfråga, frågetyp/mcq]
|
||||
date: 2022-01-15
|
||||
---
|
||||
Question content""",
|
||||
["ah2", "provfråga", "frågetyp/mcq"]
|
||||
),
|
||||
(
|
||||
"""---
|
||||
tags:
|
||||
- ah2
|
||||
- provfråga
|
||||
- frågetyp/scq
|
||||
date: 2023-05-31
|
||||
---
|
||||
Question content""",
|
||||
["ah2", "provfråga", "frågetyp/scq"]
|
||||
),
|
||||
])
|
||||
def test_parse_metadata_tags(self, create_question_file, content, expected_tags):
|
||||
"""Test parsing of metadata tags in different formats"""
|
||||
file_path = create_question_file("test.md", content)
|
||||
question = parse_question(file_path)
|
||||
assert question.metadata["tags"] == expected_tags
|
||||
|
||||
@pytest.mark.parametrize("content,expected_date", [
|
||||
(
|
||||
"""---
|
||||
tags: [ah2]
|
||||
date: 2022-01-15
|
||||
---
|
||||
Content""",
|
||||
"2022-01-15"
|
||||
),
|
||||
(
|
||||
"""---
|
||||
tags: [ah2]
|
||||
date: 2023-05-31
|
||||
---
|
||||
Content""",
|
||||
"2023-05-31"
|
||||
),
|
||||
])
|
||||
def test_parse_metadata_date(self, create_question_file, content, expected_date):
|
||||
"""Test parsing of metadata date"""
|
||||
file_path = create_question_file("test.md", content)
|
||||
question = parse_question(file_path)
|
||||
assert str(question.metadata["date"]) == expected_date
|
||||
|
||||
@pytest.mark.parametrize("content,expected_raw", [
|
||||
(
|
||||
"""---
|
||||
tags: [ah2]
|
||||
---
|
||||
Simple question""",
|
||||
"Simple question"
|
||||
),
|
||||
(
|
||||
"""---
|
||||
tags: [ah2]
|
||||
---
|
||||
Question with **bold** text""",
|
||||
"Question with **bold** text"
|
||||
),
|
||||
])
|
||||
def test_parse_raw_content(self, create_question_file, content, expected_raw):
|
||||
"""Test parsing of raw content"""
|
||||
file_path = create_question_file("test.md", content)
|
||||
question = parse_question(file_path)
|
||||
assert question.raw_content.strip() == expected_raw
|
||||
|
||||
def test_parse_mcq_question(self, create_question_file):
|
||||
"""Test parsing a complete MCQ question"""
|
||||
content = """---
|
||||
tags: [ah2, provfråga, frågetyp/mcq, cerebrum]
|
||||
date: 2022-01-15
|
||||
---
|
||||
Vilka av följande räknas till storhjärnans basala kärnor?
|
||||
|
||||
**Välj två alternativ**
|
||||
- A: Putamen
|
||||
- B: Nucleus Ruber
|
||||
- C: Substantia nigra
|
||||
- D: Nucleus caudatus
|
||||
|
||||
```spoiler-block:
|
||||
A och D
|
||||
```
|
||||
"""
|
||||
file_path = create_question_file("mcq.md", content)
|
||||
question = parse_question(file_path)
|
||||
|
||||
assert question.metadata["tags"] == ["ah2", "provfråga", "frågetyp/mcq", "cerebrum"]
|
||||
assert len(question.nodes) > 0
|
||||
|
||||
# Find paragraph nodes
|
||||
paragraphs = [n for n in question.nodes if n.type == "paragraph"]
|
||||
assert len(paragraphs) > 0
|
||||
|
||||
# Find list nodes
|
||||
lists = [n for n in question.nodes if n.type == "list"]
|
||||
assert len(lists) > 0
|
||||
|
||||
# Find spoiler block
|
||||
code_blocks = [n for n in question.nodes if n.type == "block_code"]
|
||||
assert len(code_blocks) > 0
|
||||
spoiler = code_blocks[0]
|
||||
assert spoiler.attrs.get("info") == "spoiler-block:"
|
||||
assert "A och D" in spoiler.raw
|
||||
|
||||
def test_parse_scq_question(self, create_question_file):
|
||||
"""Test parsing a single choice question"""
|
||||
content = """---
|
||||
tags: [ah2, provfråga, frågetyp/scq, histologi]
|
||||
date: 2022-06-01
|
||||
---
|
||||
Vilken del av CNS syns i bild?
|
||||
- A: Cerebellum
|
||||
- B: Diencephalon
|
||||
- C: Medulla spinalis
|
||||
- D: Cerebrum
|
||||
- E: Pons
|
||||
|
||||
```spoiler-block:
|
||||
A
|
||||
```
|
||||
"""
|
||||
file_path = create_question_file("scq.md", content)
|
||||
question = parse_question(file_path)
|
||||
|
||||
assert "frågetyp/scq" in question.metadata["tags"]
|
||||
lists = [n for n in question.nodes if n.type == "list"]
|
||||
assert len(lists) > 0
|
||||
|
||||
def test_parse_text_field_question(self, create_question_file):
|
||||
"""Test parsing a text field question"""
|
||||
content = """---
|
||||
tags: [ah2, provfråga, frågetyp/textfält, öga, anatomi]
|
||||
date: 2022-01-15
|
||||
---
|
||||
![[image-2.png|301x248]]
|
||||
**Fyll i rätt siffra!**
|
||||
|
||||
(0.5p per rätt svar, inga avdrag för fel svar):
|
||||
|
||||
a) Vilken siffra pekar på gula fläcken?
|
||||
b) Vilken siffra pekar på choroidea?
|
||||
|
||||
```spoiler-block:
|
||||
a) 7
|
||||
b) 6
|
||||
```
|
||||
"""
|
||||
file_path = create_question_file("textfield.md", content)
|
||||
question = parse_question(file_path)
|
||||
|
||||
assert "frågetyp/textfält" in question.metadata["tags"]
|
||||
assert len(question.nodes) > 0
|
||||
|
||||
def test_parse_matching_question(self, create_question_file):
|
||||
"""Test parsing a matching question"""
|
||||
content = """---
|
||||
tags: [ah2, provfråga, frågetyp/matching, histologi]
|
||||
date: 2023-05-31
|
||||
---
|
||||
Vilka av följande stödjeceller finns i CNS? Markera JA eller NEJ för varje angiven celltyp:
|
||||
(1p för alla rätt, inga delpoäng)
|
||||
|
||||
- a) oligodendrocyter
|
||||
- b) Astrocyter
|
||||
- c) satellitceller
|
||||
- d) ependymceller
|
||||
- e) mikroglia
|
||||
- f) Schwannceller
|
||||
|
||||
- JA, finn i CNS
|
||||
- NEJ, finns inte i CNS
|
||||
|
||||
```spoiler-block:
|
||||
a) JA, finn i CNS
|
||||
b) JA, finn i CNS
|
||||
c) NEJ, finns inte i CNS
|
||||
d) JA, finn i CNS
|
||||
e) JA, finn i CNS
|
||||
f) NEJ, finns inte i CNS
|
||||
```
|
||||
"""
|
||||
file_path = create_question_file("matching.md", content)
|
||||
question = parse_question(file_path)
|
||||
|
||||
assert "frågetyp/matching" in question.metadata["tags"]
|
||||
lists = [n for n in question.nodes if n.type == "list"]
|
||||
assert len(lists) > 0
|
||||
|
||||
def test_parse_question_with_image(self, create_question_file):
|
||||
"""Test parsing a question with embedded images"""
|
||||
content = """---
|
||||
tags: [ah2, provfråga, frågetyp/textfält, öra, anatomi, bild]
|
||||
date: 2022-01-15
|
||||
---
|
||||
![[image-4.png|292x316]]
|
||||
**Fyll i rätt siffra !**
|
||||
|
||||
(0.5p per rätt svar, inga avdrag för fel svar):
|
||||
|
||||
a) Vilken siffra pekar på incus? (1..19)
|
||||
b) Vilken siffra pekar på tuba auditiva? (1..19)
|
||||
|
||||
```spoiler-block:
|
||||
a) 7
|
||||
b) 18
|
||||
```
|
||||
"""
|
||||
file_path = create_question_file("image_q.md", content)
|
||||
question = parse_question(file_path)
|
||||
|
||||
assert "bild" in question.metadata["tags"]
|
||||
assert "![[image-4.png" in question.raw_content
|
||||
embed = question.nodes[0].children[0]
|
||||
assert embed.type == "embed"
|
||||
assert embed.attrs == {
|
||||
"filename": "image-4.png",
|
||||
"width": 292,
|
||||
"height": 316
|
||||
}
|
||||
|
||||
@pytest.mark.parametrize("invalid_content", [
|
||||
"", # Empty content
|
||||
"No frontmatter", # No frontmatter
|
||||
"---\n---\n", # Empty frontmatter
|
||||
])
|
||||
def test_parse_edge_cases(self, create_question_file, invalid_content):
|
||||
"""Test parsing edge cases"""
|
||||
file_path = create_question_file("edge.md", invalid_content)
|
||||
question = parse_question(file_path)
|
||||
assert isinstance(question, ParsedQuestion)
|
||||
|
||||
def test_parse_question_preserves_structure(self, create_question_file):
|
||||
"""Test that parsing preserves the document structure"""
|
||||
content = """---
|
||||
tags: [ah2]
|
||||
---
|
||||
# Heading
|
||||
|
||||
Paragraph text
|
||||
|
||||
- List item 1
|
||||
- List item 2
|
||||
|
||||
```spoiler-block:
|
||||
Answer
|
||||
```
|
||||
"""
|
||||
file_path = create_question_file("structure.md", content)
|
||||
question = parse_question(file_path)
|
||||
|
||||
node_types = [n.type for n in question.nodes]
|
||||
assert "heading" in node_types
|
||||
assert "paragraph" in node_types
|
||||
assert "list" in node_types
|
||||
assert "block_code" in node_types
|
||||
|
||||
|
||||
class TestParsedQuestionDataclass:
|
||||
"""Test the ParsedQuestion dataclass"""
|
||||
|
||||
def test_parsed_question_defaults(self):
|
||||
"""Test ParsedQuestion default values"""
|
||||
question = ParsedQuestion()
|
||||
assert question.metadata == {}
|
||||
assert question.raw_content == ""
|
||||
assert question.nodes == []
|
||||
|
||||
def test_parsed_question_initialization(self):
|
||||
"""Test ParsedQuestion initialization with values"""
|
||||
metadata = {"tags": ["test"], "date": "2022-01-15"}
|
||||
content = "Test content"
|
||||
nodes = [Node({"type": "paragraph"})]
|
||||
|
||||
question = ParsedQuestion(
|
||||
metadata=metadata,
|
||||
raw_content=content,
|
||||
nodes=nodes
|
||||
)
|
||||
|
||||
assert question.metadata == metadata
|
||||
assert question.raw_content == content
|
||||
assert question.nodes == nodes
|
||||
|
||||
|
||||
class TestRealQuestions:
|
||||
"""Test parsing real questions from the exam files"""
|
||||
|
||||
@pytest.fixture
|
||||
def exam_dir(self):
|
||||
"""Get the real exam directory"""
|
||||
root = pathlib.Path(__file__).parent.parent.parent
|
||||
exam_path = root / "content" / "Anatomi & Histologi 2" / "Gamla tentor"
|
||||
if exam_path.exists():
|
||||
return exam_path
|
||||
pytest.skip("Exam directory not found")
|
||||
|
||||
@pytest.mark.parametrize("exam_date,question_num", [
|
||||
("2022-01-15", "1"),
|
||||
("2022-01-15", "2"),
|
||||
("2022-01-15", "3"),
|
||||
("2022-01-15", "4"),
|
||||
("2022-06-01", "8"),
|
||||
])
|
||||
def test_parse_real_exam_questions(self, exam_dir, exam_date, question_num):
|
||||
"""Test parsing real exam questions"""
|
||||
file_path = exam_dir / exam_date / f"{question_num}.md"
|
||||
if not file_path.exists():
|
||||
pytest.skip(f"Question file {file_path} not found")
|
||||
|
||||
question = parse_question(file_path)
|
||||
|
||||
# Verify metadata exists and has required fields
|
||||
assert "tags" in question.metadata
|
||||
assert isinstance(question.metadata["tags"], list)
|
||||
assert "ah2" in question.metadata["tags"]
|
||||
assert "provfråga" in question.metadata["tags"]
|
||||
|
||||
# Verify content was parsed
|
||||
assert len(question.raw_content) > 0
|
||||
assert len(question.nodes) > 0
|
||||
|
||||
def test_parse_all_short_named_questions(self, exam_dir):
|
||||
"""Test parsing all questions with short filenames (1-2 chars)"""
|
||||
questions_found = 0
|
||||
|
||||
for file in sorted(exam_dir.glob("*/*.md")):
|
||||
if len(file.stem) <= 2 and file.stem.isdigit():
|
||||
question = parse_question(file)
|
||||
assert isinstance(question, ParsedQuestion)
|
||||
assert "tags" in question.metadata
|
||||
questions_found += 1
|
||||
|
||||
# Ensure we found at least some questions
|
||||
assert questions_found > 0, "No exam questions found to test"
|
||||
|
||||
|
||||
class TestNodeTextExtraction:
|
||||
"""Test text extraction from complex node structures"""
|
||||
|
||||
@pytest.mark.parametrize("token,expected_text", [
|
||||
# Simple text
|
||||
({"type": "text", "raw": "Hello"}, "Hello"),
|
||||
|
||||
# Paragraph with multiple text children
|
||||
(
|
||||
{
|
||||
"type": "paragraph",
|
||||
"children": [
|
||||
{"type": "text", "raw": "A "},
|
||||
{"type": "text", "raw": "B "},
|
||||
{"type": "text", "raw": "C"},
|
||||
]
|
||||
},
|
||||
"A B C"
|
||||
),
|
||||
|
||||
# Nested formatting
|
||||
(
|
||||
{
|
||||
"type": "paragraph",
|
||||
"children": [
|
||||
{"type": "text", "raw": "Normal "},
|
||||
{
|
||||
"type": "emphasis",
|
||||
"children": [{"type": "text", "raw": "italic"}]
|
||||
},
|
||||
{"type": "text", "raw": " "},
|
||||
{
|
||||
"type": "strong",
|
||||
"children": [{"type": "text", "raw": "bold"}]
|
||||
},
|
||||
]
|
||||
},
|
||||
"Normal italic bold"
|
||||
),
|
||||
|
||||
# Empty node
|
||||
({"type": "paragraph", "children": []}, ""),
|
||||
])
|
||||
def test_complex_text_extraction(self, token, expected_text):
|
||||
"""Test text extraction from complex nested structures"""
|
||||
node = Node(token)
|
||||
assert node.text == expected_text
|
||||
|
||||
187
stroma/quiz/utils/tests/test_unified_parser.py
Normal file
187
stroma/quiz/utils/tests/test_unified_parser.py
Normal file
@@ -0,0 +1,187 @@
|
||||
import datetime
|
||||
from quiz.utils.unified_parser import UnifiedParser, QuestionType
|
||||
|
||||
def test_parse_mcq_question():
|
||||
content = """---
|
||||
tags: [frågetyp/mcq, ah2]
|
||||
date: 2024-03-21
|
||||
---
|
||||
Question?
|
||||
- A: Yes
|
||||
- B: No
|
||||
- C: Maybe
|
||||
- D: Never
|
||||
```spoiler-block:
|
||||
A och D
|
||||
```"""
|
||||
data = UnifiedParser(content).parse()
|
||||
assert data.type == QuestionType.MCQ
|
||||
assert data.question == "Question?"
|
||||
assert data.answer == ["A", "D"]
|
||||
assert data.num_questions == 1
|
||||
assert data.is_complete is True
|
||||
assert data.options == ["A: Yes", "B: No", "C: Maybe", "D: Never"]
|
||||
assert data.metadata == {"tags": ["frågetyp/mcq", "ah2"], "date": datetime.date(2024, 3, 21)}
|
||||
assert not data.sub_questions
|
||||
|
||||
def test_parse_scq_question():
|
||||
content = """---
|
||||
tags: [frågetyp/scq]
|
||||
---
|
||||
Pick one:
|
||||
- A: One
|
||||
- B: Two
|
||||
```spoiler-block:
|
||||
B
|
||||
```"""
|
||||
data = UnifiedParser(content).parse()
|
||||
assert data.type == QuestionType.SCQ
|
||||
assert data.question == "Pick one:"
|
||||
assert data.answer == "B"
|
||||
assert data.num_questions == 1
|
||||
assert data.is_complete is True
|
||||
assert data.options == ["A: One", "B: Two"]
|
||||
assert not data.sub_questions
|
||||
|
||||
def test_parse_textfält_question():
|
||||
content = """---
|
||||
tags: [frågetyp/textfält]
|
||||
---
|
||||
Name these:
|
||||
a) Part 1
|
||||
b) Part 2
|
||||
```spoiler-block:
|
||||
a) Left
|
||||
b) Right
|
||||
```"""
|
||||
data = UnifiedParser(content).parse()
|
||||
assert data.type == QuestionType.TEXTFÄLT
|
||||
assert data.question == "Name these:"
|
||||
assert data.answer == ["a) Left", "b) Right"]
|
||||
assert data.num_questions == 2
|
||||
assert len(data.sub_questions) == 2
|
||||
assert data.sub_questions[0].id == "a"
|
||||
assert data.sub_questions[0].text == "Part 1"
|
||||
assert data.sub_questions[0].answer == "a) Left"
|
||||
assert data.sub_questions[0].options is None
|
||||
|
||||
def test_parse_matching_question():
|
||||
content = """---
|
||||
tags: [frågetyp/matching]
|
||||
---
|
||||
Match:
|
||||
- 1
|
||||
- 2
|
||||
- A
|
||||
- B
|
||||
```spoiler-block:
|
||||
1: A
|
||||
2: B
|
||||
```"""
|
||||
data = UnifiedParser(content).parse()
|
||||
assert data.type == QuestionType.MATCHING
|
||||
assert data.question == "Match:"
|
||||
assert data.answer == [["1", "A"], ["2", "B"]]
|
||||
assert data.num_questions == 1
|
||||
assert data.options == ["1", "2", "A", "B"]
|
||||
assert not data.sub_questions
|
||||
|
||||
def test_parse_question_with_image_and_instruction():
|
||||
content = """---
|
||||
tags: [frågetyp/scq]
|
||||
---
|
||||
**Välj ett alternativ:**
|
||||
|
||||
![[brain.png|300]]
|
||||
|
||||
What is this?
|
||||
- A: Brain
|
||||
- B: Heart
|
||||
```spoiler-block:
|
||||
A
|
||||
```"""
|
||||
data = UnifiedParser(content).parse()
|
||||
assert data.type == QuestionType.SCQ
|
||||
assert data.question == "What is this?"
|
||||
assert data.instruction == "Välj ett alternativ:"
|
||||
assert data.image == "![[brain.png]]"
|
||||
assert data.is_complete is True
|
||||
|
||||
def test_parse_field_question_with_ranges():
|
||||
content = """---
|
||||
tags: [frågetyp/sifferfält]
|
||||
---
|
||||
Identify the structures:
|
||||
|
||||
a) Arachnoidea? (1..10)
|
||||
(0.5 p)
|
||||
b) Cortex cerebri (1..10)
|
||||
(0.5 p)
|
||||
|
||||
```spoiler-block:
|
||||
a) 7
|
||||
b) 3
|
||||
```"""
|
||||
data = UnifiedParser(content).parse()
|
||||
assert data.type == QuestionType.SIFFERFÄLT
|
||||
assert data.num_questions == 2
|
||||
assert len(data.sub_questions) == 2
|
||||
|
||||
# Part A
|
||||
assert data.sub_questions[0].id == "a"
|
||||
assert data.sub_questions[0].text == "Arachnoidea?"
|
||||
assert data.sub_questions[0].options == [str(x) for x in range(1, 11)]
|
||||
assert data.sub_questions[0].answer == "a) 7"
|
||||
|
||||
# Part B
|
||||
assert data.sub_questions[1].id == "b"
|
||||
assert data.sub_questions[1].text == "Cortex cerebri"
|
||||
assert data.sub_questions[1].options == [str(x) for x in range(1, 11)]
|
||||
assert data.sub_questions[1].answer == "b) 3"
|
||||
|
||||
def test_parse_field_question_with_list_options():
|
||||
content = """---
|
||||
tags: [frågetyp/sifferfält]
|
||||
---
|
||||
a) First (A, B, C)
|
||||
b) Second (1, 2, 3)
|
||||
|
||||
```spoiler-block:
|
||||
a) A
|
||||
b) 2
|
||||
```"""
|
||||
data = UnifiedParser(content).parse()
|
||||
assert data.sub_questions[0].options == ["A", "B", "C"]
|
||||
assert data.sub_questions[1].options == ["1", "2", "3"]
|
||||
|
||||
def test_parse_hotspot_question():
|
||||
content = """---
|
||||
tags: [frågetyp/hotspot]
|
||||
---
|
||||
Klicka på hippocampus!
|
||||
|
||||
```spoiler-block:
|
||||
![[brain_atlas.png]]
|
||||
Det här är hippocampus.
|
||||
```"""
|
||||
data = UnifiedParser(content).parse()
|
||||
assert data.type == QuestionType.HOTSPOT
|
||||
assert data.answer == "Det här är hippocampus."
|
||||
assert data.answer_image == "![[brain_atlas.png]]"
|
||||
assert data.is_complete is True
|
||||
|
||||
def test_completeness_missing_sub_questions():
|
||||
content = """---
|
||||
tags: [frågetyp/textfält]
|
||||
---
|
||||
a) one
|
||||
b) two
|
||||
```spoiler-block:
|
||||
a) found
|
||||
```"""
|
||||
data = UnifiedParser(content).parse()
|
||||
assert data.num_questions == 2
|
||||
assert data.is_complete is False
|
||||
assert len(data.sub_questions) == 2
|
||||
assert data.sub_questions[0].answer == "a) found"
|
||||
assert data.sub_questions[1].answer is None
|
||||
465
stroma/quiz/utils/unified_parser.py
Normal file
465
stroma/quiz/utils/unified_parser.py
Normal file
@@ -0,0 +1,465 @@
|
||||
import re
|
||||
from dataclasses import dataclass, field
|
||||
from enum import Enum
|
||||
from typing import Any
|
||||
|
||||
from quiz.utils.question_parser import Node, parse_question_from_content
|
||||
|
||||
# === REGEX PATTERNS ===
|
||||
|
||||
# Matches Obsidian-style embeds like ![[image.png]] or ![[image.png|300]]
|
||||
EMBED_RE = re.compile(
|
||||
r"!\[\[" # Start of embed
|
||||
r".*?" # Content (filename and optional pipes)
|
||||
r"\]\]" # End of embed
|
||||
)
|
||||
|
||||
# Captures the filename from an Obsidian embed, ignoring dimensions
|
||||
IMAGE_RE = re.compile(
|
||||
r"!\[\[" # Start of embed
|
||||
r"([^|\]]+)" # Group 1: Filename (everything before | or ])
|
||||
r"(?:\|.*?)?" # Optional dimension part starting with |
|
||||
r"\]\]" # End of embed
|
||||
)
|
||||
|
||||
# Matches lettered options at the start of a line, e.g., "A: Text" or "B. Text"
|
||||
OPTION_LETTER_RE = re.compile(
|
||||
r"^([A-Z])" # Group 1: Single uppercase letter at start
|
||||
r"[:\.]?" # Optional colon or period
|
||||
r"\s*" # Optional whitespace
|
||||
r"(.*)$" # Group 2: The rest of the text
|
||||
)
|
||||
|
||||
# Matches standalone uppercase letters used for answers, e.g., "A", "A och B"
|
||||
ANSWER_LETTER_RE = re.compile(
|
||||
r"\b" # Word boundary
|
||||
r"([A-Z])" # Group 1: Single uppercase letter
|
||||
r"\b" # Word boundary
|
||||
)
|
||||
|
||||
# Matches sub-question markers like a), b) at the start of a line
|
||||
SUB_QUESTION_LETTER_RE = re.compile(
|
||||
r"^\s*" # Start of line and optional whitespace
|
||||
r"([a-z])" # Group 1: Single lowercase letter
|
||||
r"\)" # Closing parenthesis
|
||||
, re.MULTILINE)
|
||||
|
||||
# Matches numbered sub-question markers like 1), 2) at the start of a line
|
||||
SUB_QUESTION_NUMBER_RE = re.compile(
|
||||
r"^\s*" # Start of line and optional whitespace
|
||||
r"(\d+)" # Group 1: One or more digits
|
||||
r"\)" # Closing parenthesis
|
||||
, re.MULTILINE)
|
||||
|
||||
# Matches select range patterns like (1..10)
|
||||
SELECT_RANGE_RE = re.compile(
|
||||
r"\(" # Opening parenthesis
|
||||
r"(\d+)" # Group 1: Start number
|
||||
r"\.\." # Range dots
|
||||
r"(\d+)" # Group 2: End number
|
||||
r"\)" # Closing parenthesis
|
||||
)
|
||||
|
||||
# Matches letter range patterns like (A..H)
|
||||
SELECT_LETTER_RANGE_RE = re.compile(
|
||||
r"\(" # Opening parenthesis
|
||||
r"([A-Z])" # Group 1: Start letter
|
||||
r"\.\." # Range dots
|
||||
r"([A-Z])" # Group 2: End letter
|
||||
r"\)" # Closing parenthesis
|
||||
)
|
||||
|
||||
# Matches select list patterns like (A, B, C)
|
||||
SELECT_LIST_RE = re.compile(
|
||||
r"\(" # Opening parenthesis
|
||||
r"(" # Group 1: The list content
|
||||
r"[^)]+" # Anything but closing parenthesis
|
||||
r"," # At least one comma
|
||||
r"[^)]+" # Anything but closing parenthesis
|
||||
r")"
|
||||
r"\)" # Closing parenthesis
|
||||
)
|
||||
|
||||
# Matches sub-question markers in mid-text (used for splitting intro text)
|
||||
FIELD_MARKER_RE = re.compile(
|
||||
r"\b" # Word boundary
|
||||
r"([a-z]|\d+)" # Group 1: Letter or digit
|
||||
r"\)" # Closing parenthesis
|
||||
)
|
||||
|
||||
# Matches sub-question markers (a, b or 1, 2) at start of line for splitting
|
||||
SUB_QUESTION_SPLIT_RE = re.compile(
|
||||
r"^\s*" # Start of line and optional whitespace
|
||||
r"([a-z]|\d+)" # Group 1: Single letter or one or more digits
|
||||
r"\)" # Closing parenthesis
|
||||
r"\s*" # Optional trailing whitespace
|
||||
, re.MULTILINE)
|
||||
|
||||
# Matches point markers like (0.5 p) or (1 p)
|
||||
POINTS_RE = re.compile(
|
||||
r"\(" # Opening parenthesis
|
||||
r"\d+" # One or more digits
|
||||
r"(?:\.\d+)?" # Optional decimal part
|
||||
r"\s*" # Optional whitespace
|
||||
r"p" # Literal 'p'
|
||||
r"\)" # Closing parenthesis
|
||||
)
|
||||
|
||||
|
||||
class QuestionType(Enum):
|
||||
MCQ = "mcq"
|
||||
SCQ = "scq"
|
||||
MATCHING = "matching"
|
||||
TEXTALTERNATIV = "textalternativ"
|
||||
TEXTFÄLT = "textfält"
|
||||
SIFFERFÄLT = "sifferfält"
|
||||
HOTSPOT = "hotspot"
|
||||
SAMMANSATT = "sammansatt"
|
||||
DND_TEXT = "dnd-text"
|
||||
DND_BILD = "dnd-bild"
|
||||
SANT_FALSKT = "sant-falskt"
|
||||
|
||||
|
||||
@dataclass
|
||||
class SubQuestion:
|
||||
id: str # 'a', 'b', etc.
|
||||
text: str # Text for this part
|
||||
answer: Any = None
|
||||
options: list[str] | None = None # None if text input
|
||||
|
||||
|
||||
@dataclass
|
||||
class QuestionData:
|
||||
type: QuestionType
|
||||
question: str
|
||||
answer: Any # str | list[str] | list[list[str]]
|
||||
num_questions: int = 1 # Total sub-questions (a, b, c...)
|
||||
is_complete: bool = False
|
||||
options: list[str] = field(default_factory=list)
|
||||
image: str | None = None
|
||||
answer_image: str | None = None
|
||||
instruction: str | None = None
|
||||
metadata: dict = field(default_factory=dict)
|
||||
sub_questions: list[SubQuestion] = field(default_factory=list)
|
||||
|
||||
|
||||
class UnifiedParser:
|
||||
def __init__(self, content: str):
|
||||
self.content = content
|
||||
self.parsed = parse_question_from_content(content)
|
||||
self.metadata = self.parsed.metadata
|
||||
self.nodes = self.parsed.nodes
|
||||
|
||||
# Pre-extract common fields
|
||||
self.type = self._extract_type()
|
||||
self.question = self._extract_question_text()
|
||||
self.instruction = self._extract_instruction()
|
||||
self.image = self._extract_image()
|
||||
self.num_questions = self._count_sub_questions()
|
||||
|
||||
def parse(self) -> QuestionData:
|
||||
match self.type:
|
||||
case QuestionType.MCQ | QuestionType.SCQ:
|
||||
data = self._parse_choice_question()
|
||||
case QuestionType.MATCHING:
|
||||
data = self._create_question(
|
||||
answer=self._extract_answer_pairs(),
|
||||
options=self._extract_bullet_list_options()
|
||||
)
|
||||
case QuestionType.TEXTALTERNATIV:
|
||||
data = self._create_question(
|
||||
answer=self._extract_raw_answer(),
|
||||
options=self._extract_bullet_list_options()
|
||||
)
|
||||
case QuestionType.TEXTFÄLT:
|
||||
data = self._parse_text_field()
|
||||
case QuestionType.SIFFERFÄLT:
|
||||
data = self._create_question(answer=self._extract_raw_answer())
|
||||
case QuestionType.HOTSPOT:
|
||||
data = self._parse_hotspot()
|
||||
case QuestionType.SAMMANSATT:
|
||||
data = self._create_question(answer=self._extract_answer_lines())
|
||||
case QuestionType.DND_TEXT:
|
||||
data = self._create_question(answer=self._extract_answer_lines())
|
||||
case QuestionType.DND_BILD:
|
||||
data = self._create_question(answer=self._extract_answer_lines())
|
||||
case QuestionType.SANT_FALSKT:
|
||||
data = self._create_question(answer=self._extract_answer_pairs())
|
||||
case _:
|
||||
raise ValueError(f"Unsupported question type: {self.type}")
|
||||
|
||||
data.num_questions = self.num_questions
|
||||
data.sub_questions = self._extract_sub_questions(data)
|
||||
data.is_complete = self._check_completeness(data)
|
||||
return data
|
||||
|
||||
def _check_completeness(self, data: QuestionData) -> bool:
|
||||
"""Verify if the answer is complete (no TODOs, matches sub-question count)."""
|
||||
content = self._extract_raw_answer()
|
||||
if not content or "TODO" in content:
|
||||
return False
|
||||
|
||||
# If we have sub-questions, ensure we have enough answer lines/parts
|
||||
if data.num_questions > 1:
|
||||
if isinstance(data.answer, list):
|
||||
if data.type in [QuestionType.MCQ, QuestionType.SCQ]:
|
||||
return len(data.answer) > 0
|
||||
return len(data.answer) >= data.num_questions
|
||||
else:
|
||||
return False
|
||||
|
||||
return True
|
||||
|
||||
def _count_sub_questions(self) -> int:
|
||||
"""Count sub-questions like a), b), c) or 1), 2) in the question text."""
|
||||
md_content = self.parsed.raw_content
|
||||
|
||||
# Count lettered sub-questions: a), b), c)...
|
||||
letters = SUB_QUESTION_LETTER_RE.findall(md_content)
|
||||
if letters:
|
||||
unique_letters = sorted(list(set(letters)))
|
||||
if "a" in unique_letters:
|
||||
max_letter = max(unique_letters)
|
||||
return ord(max_letter) - ord("a") + 1
|
||||
|
||||
# Count numbered sub-questions: 1), 2), 3)...
|
||||
numbers = SUB_QUESTION_NUMBER_RE.findall(md_content)
|
||||
if numbers:
|
||||
unique_numbers = sorted(list(set(map(int, numbers))))
|
||||
if 1 in unique_numbers:
|
||||
return max(unique_numbers)
|
||||
|
||||
return 1
|
||||
|
||||
def _create_question(
|
||||
self,
|
||||
answer: Any,
|
||||
options: list[str] = None,
|
||||
answer_image: str | None = None
|
||||
) -> QuestionData:
|
||||
"""Create a QuestionData object with common fields pre-populated."""
|
||||
return QuestionData(
|
||||
type=self.type,
|
||||
question=self.question,
|
||||
answer=answer,
|
||||
options=options or [],
|
||||
image=self.image,
|
||||
answer_image=answer_image,
|
||||
instruction=self.instruction,
|
||||
metadata=self.metadata
|
||||
)
|
||||
|
||||
# === Extraction Helpers ===
|
||||
|
||||
def _extract_type(self) -> QuestionType:
|
||||
tags = self.metadata.get("tags", [])
|
||||
for tag in tags:
|
||||
if tag.startswith("frågetyp/"):
|
||||
type_str = tag.split("/", 1)[1]
|
||||
try:
|
||||
return QuestionType(type_str)
|
||||
except ValueError:
|
||||
continue
|
||||
return QuestionType.MCQ # Default
|
||||
|
||||
def _extract_question_text(self) -> str:
|
||||
texts = []
|
||||
for node in self.nodes:
|
||||
if node.type == "paragraph":
|
||||
text = node.text.strip()
|
||||
# Skip instructions
|
||||
if text.startswith("Välj") and "alternativ" in text:
|
||||
continue
|
||||
|
||||
# If paragraph contains a sub-question marker, stop there
|
||||
# We use a more liberal search here because mistune might have joined lines
|
||||
first_marker = FIELD_MARKER_RE.search(text)
|
||||
|
||||
if first_marker:
|
||||
text = text[:first_marker.start()].strip()
|
||||
if text:
|
||||
# Only add if it doesn't look like an instruction we already skipped
|
||||
if not (text.startswith("Välj") and "alternativ" in text):
|
||||
texts.append(text)
|
||||
break # Stop collecting intro text once we hit a sub-question
|
||||
|
||||
# Clean and collect
|
||||
text = EMBED_RE.sub("", text).strip()
|
||||
text = text.replace("**", "")
|
||||
if text:
|
||||
texts.append(text)
|
||||
return "\n".join(texts)
|
||||
|
||||
def _extract_instruction(self) -> str | None:
|
||||
for node in self.nodes:
|
||||
if node.type == "paragraph":
|
||||
text = node.text.strip()
|
||||
if "Välj" in text and "alternativ" in text:
|
||||
return text.replace("**", "")
|
||||
return None
|
||||
|
||||
def _extract_image(self) -> str | None:
|
||||
for node in self.nodes:
|
||||
# Check for direct embed nodes
|
||||
if node.type == "embed":
|
||||
return f"![[{node.attrs['filename']}]]"
|
||||
|
||||
# Check inside paragraphs/lists for inline embeds
|
||||
if node.type in ["paragraph", "list"]:
|
||||
for child in node.children:
|
||||
if child.type == "embed":
|
||||
return f"![[{child.attrs['filename']}]]"
|
||||
|
||||
if node.raw:
|
||||
match = IMAGE_RE.search(node.raw)
|
||||
if match:
|
||||
return f"![[{match.group(1)}]]"
|
||||
return None
|
||||
|
||||
def _extract_sub_questions(self, data: QuestionData) -> list[SubQuestion]:
|
||||
# Only split the text BEFORE the spoiler block to avoid misidentifying markers in answers
|
||||
full_raw = self.parsed.raw_content
|
||||
parts = full_raw.split("```", 1)
|
||||
question_portion = parts[0]
|
||||
|
||||
# Split by sub-question markers at the start of lines: a), b) or 1), 2)
|
||||
segments = SUB_QUESTION_SPLIT_RE.split(question_portion)[1:]
|
||||
|
||||
sub_questions = []
|
||||
# segments will be [id1, text1, id2, text2, ...]
|
||||
for i in range(0, len(segments), 2):
|
||||
q_id = segments[i]
|
||||
q_full_text = segments[i+1].strip()
|
||||
|
||||
# Extract options if any (for select fields)
|
||||
options = self._extract_select_options(q_full_text)
|
||||
|
||||
# Clean text (remove point markers like (0.5 p) and select patterns)
|
||||
clean_text = SELECT_RANGE_RE.sub("", q_full_text)
|
||||
clean_text = SELECT_LETTER_RANGE_RE.sub("", clean_text)
|
||||
clean_text = SELECT_LIST_RE.sub("", clean_text)
|
||||
clean_text = POINTS_RE.sub("", clean_text).strip()
|
||||
|
||||
# Extract answer for this part
|
||||
answer = None
|
||||
if isinstance(data.answer, list) and i//2 < len(data.answer):
|
||||
answer = data.answer[i//2]
|
||||
elif isinstance(data.answer, str):
|
||||
lines = [l.strip() for l in data.answer.split("\n") if l.strip()]
|
||||
if i//2 < len(lines):
|
||||
answer = lines[i//2]
|
||||
elif data.num_questions == 1:
|
||||
answer = data.answer
|
||||
|
||||
sub_questions.append(SubQuestion(
|
||||
id=q_id,
|
||||
text=clean_text,
|
||||
answer=answer,
|
||||
options=options
|
||||
))
|
||||
|
||||
return sub_questions
|
||||
|
||||
def _extract_select_options(self, text: str) -> list[str] | None:
|
||||
"""Extract options from patterns like (1..10), (A..D), or (A, B, C)."""
|
||||
# Numerical range (1..10)
|
||||
match = SELECT_RANGE_RE.search(text)
|
||||
if match:
|
||||
start, end = map(int, match.groups())
|
||||
return [str(x) for x in range(start, end + 1)]
|
||||
|
||||
# Letter range (A..H)
|
||||
match = SELECT_LETTER_RANGE_RE.search(text)
|
||||
if match:
|
||||
start, end = match.groups()
|
||||
return [chr(x) for x in range(ord(start), ord(end) + 1)]
|
||||
|
||||
# Comma-separated list (A, B, C)
|
||||
match = SELECT_LIST_RE.search(text)
|
||||
if match:
|
||||
items = match.group(1).split(",")
|
||||
return [item.strip() for item in items]
|
||||
|
||||
return None
|
||||
|
||||
def _extract_lettered_options(self) -> list[str]:
|
||||
options = []
|
||||
for node in self.nodes:
|
||||
if node.type == "list":
|
||||
for item in node.children:
|
||||
item_text = item.text.strip()
|
||||
if OPTION_LETTER_RE.match(item_text):
|
||||
options.append(item_text)
|
||||
return options
|
||||
|
||||
def _extract_bullet_list_options(self) -> list[str]:
|
||||
options = []
|
||||
for node in self.nodes:
|
||||
if node.type == "list":
|
||||
for item in node.children:
|
||||
options.append(item.text.strip())
|
||||
return options
|
||||
|
||||
def _extract_raw_answer(self) -> str:
|
||||
for node in self.nodes:
|
||||
if node.type == "block_code" and node.attrs.get("info") == "spoiler-block:":
|
||||
return node.raw.strip()
|
||||
return ""
|
||||
|
||||
def _extract_answer_letters(self) -> list[str]:
|
||||
content = self._extract_raw_answer()
|
||||
if not content or content == "TODO":
|
||||
return []
|
||||
return ANSWER_LETTER_RE.findall(content)
|
||||
|
||||
def _extract_answer_lines(self) -> list[str]:
|
||||
content = self._extract_raw_answer()
|
||||
if not content or content == "TODO":
|
||||
return []
|
||||
return [line.strip() for line in content.split("\n") if line.strip()]
|
||||
|
||||
def _extract_answer_pairs(self) -> list[list[str]]:
|
||||
lines = self._extract_answer_lines()
|
||||
pairs = []
|
||||
for line in lines:
|
||||
if ":" in line:
|
||||
key, value = line.split(":", 1)
|
||||
pairs.append([key.strip(), value.strip()])
|
||||
return pairs
|
||||
|
||||
# === Question Type Handlers ===
|
||||
|
||||
def _parse_choice_question(self) -> QuestionData:
|
||||
answer_letters = self._extract_answer_letters()
|
||||
if self.type == QuestionType.MCQ:
|
||||
answer = answer_letters
|
||||
else:
|
||||
answer = answer_letters[0] if answer_letters else ""
|
||||
|
||||
return self._create_question(
|
||||
answer=answer,
|
||||
options=self._extract_lettered_options()
|
||||
)
|
||||
|
||||
def _parse_text_field(self) -> QuestionData:
|
||||
lines = self._extract_answer_lines()
|
||||
return self._create_question(
|
||||
answer=lines if len(lines) > 1 else (lines[0] if lines else "")
|
||||
)
|
||||
|
||||
|
||||
def _parse_hotspot(self) -> QuestionData:
|
||||
content = self._extract_raw_answer()
|
||||
answer_image = None
|
||||
|
||||
match = IMAGE_RE.search(content)
|
||||
if match:
|
||||
answer_image = f"![[{match.group(1)}]]"
|
||||
answer_text = EMBED_RE.sub("", content).strip()
|
||||
else:
|
||||
answer_text = content
|
||||
|
||||
return self._create_question(
|
||||
answer=answer_text,
|
||||
answer_image=answer_image
|
||||
)
|
||||
150
stroma/quiz/utils/watcher.py
Normal file
150
stroma/quiz/utils/watcher.py
Normal file
@@ -0,0 +1,150 @@
|
||||
import time
|
||||
import threading
|
||||
from pathlib import Path
|
||||
from watchdog.observers import Observer
|
||||
from watchdog.events import FileSystemEventHandler, FileSystemEvent
|
||||
from django.conf import settings
|
||||
from quiz.utils.importer import import_question_file, delete_question_by_path, ImportStats
|
||||
|
||||
|
||||
class QuestionFileHandler(FileSystemEventHandler):
|
||||
"""Handle file system events for question markdown files with mtime checking"""
|
||||
|
||||
def __init__(self, base_path: Path, watch_path: Path):
|
||||
super().__init__()
|
||||
self.base_path = base_path
|
||||
self.watch_path = watch_path
|
||||
self.pending_events = {}
|
||||
self.debounce_seconds = 2
|
||||
self.lock = threading.Lock()
|
||||
|
||||
def _debounced_import(self, file_path: Path, event_type: str):
|
||||
"""Import file after debounce delay, checking mtime for actual changes"""
|
||||
time.sleep(self.debounce_seconds)
|
||||
|
||||
with self.lock:
|
||||
if file_path in self.pending_events:
|
||||
del self.pending_events[file_path]
|
||||
|
||||
if not file_path.exists():
|
||||
return
|
||||
|
||||
# Import with mtime checking (force=False means only import if changed)
|
||||
stats = ImportStats()
|
||||
result = import_question_file(file_path, self.watch_path, stats, force=False)
|
||||
|
||||
# Provide feedback based on result
|
||||
if result == 'imported':
|
||||
print(f"\n[Auto-import] ✓ Created: {file_path.name}")
|
||||
elif result == 'updated':
|
||||
print(f"\n[Auto-import] ✓ Updated: {file_path.name}")
|
||||
elif result == 'skipped_unchanged':
|
||||
# File hasn't actually changed (same mtime), no output
|
||||
pass
|
||||
elif result == 'skipped_todo':
|
||||
print(f"\n[Auto-import] ⊘ Skipped: {file_path.name} (TODO answer)")
|
||||
elif result == 'skipped_not_mcq':
|
||||
# Silently skip non-MCQ files
|
||||
pass
|
||||
elif result == 'error':
|
||||
print(f"\n[Auto-import] ✗ Error: {file_path.name}")
|
||||
|
||||
def _handle_file_change(self, file_path: Path, event_type: str = 'modified'):
|
||||
"""Handle file creation or modification with debouncing"""
|
||||
if not file_path.suffix == '.md':
|
||||
return
|
||||
|
||||
with self.lock:
|
||||
# Cancel pending import if exists
|
||||
if file_path in self.pending_events:
|
||||
self.pending_events[file_path].cancel()
|
||||
|
||||
# Schedule new import
|
||||
timer = threading.Timer(
|
||||
self.debounce_seconds,
|
||||
self._debounced_import,
|
||||
args=[file_path, event_type]
|
||||
)
|
||||
self.pending_events[file_path] = timer
|
||||
timer.start()
|
||||
|
||||
def on_created(self, event: FileSystemEvent):
|
||||
"""Handle file creation"""
|
||||
if not event.is_directory:
|
||||
self._handle_file_change(Path(event.src_path), 'created')
|
||||
|
||||
def on_modified(self, event: FileSystemEvent):
|
||||
"""Handle file modification"""
|
||||
if not event.is_directory:
|
||||
self._handle_file_change(Path(event.src_path), 'modified')
|
||||
|
||||
def on_deleted(self, event: FileSystemEvent):
|
||||
"""Handle file deletion"""
|
||||
if not event.is_directory and event.src_path.endswith('.md'):
|
||||
file_path = Path(event.src_path)
|
||||
delete_question_by_path(file_path)
|
||||
|
||||
|
||||
class QuestionWatcher:
|
||||
"""Watch for changes in question markdown files and auto-import"""
|
||||
|
||||
def __init__(self, watch_path: Path, base_path: Path = None):
|
||||
self.watch_path = watch_path
|
||||
self.base_path = base_path or watch_path
|
||||
self.observer = None
|
||||
self.running = False
|
||||
|
||||
def start(self):
|
||||
"""Start watching for file changes"""
|
||||
if self.running:
|
||||
return
|
||||
|
||||
self.observer = Observer()
|
||||
event_handler = QuestionFileHandler(self.base_path, self.watch_path)
|
||||
self.observer.schedule(event_handler, str(self.watch_path), recursive=True)
|
||||
self.observer.start()
|
||||
self.running = True
|
||||
print(f"[QuestionWatcher] Started watching: {self.watch_path}")
|
||||
|
||||
def stop(self):
|
||||
"""Stop watching for file changes"""
|
||||
if self.observer and self.running:
|
||||
self.observer.stop()
|
||||
self.observer.join()
|
||||
self.running = False
|
||||
print("[QuestionWatcher] Stopped")
|
||||
|
||||
|
||||
def start_watcher_thread():
|
||||
"""Start the question watcher in a background thread"""
|
||||
from quiz.utils.importer import import_questions
|
||||
|
||||
def run_watcher():
|
||||
# Get watch path from settings
|
||||
watch_path_str = getattr(settings, 'QUESTION_WATCH_PATH', 'content/Anatomi & Histologi 2/Gamla tentor')
|
||||
watch_path = settings.BASE_DIR.parent / watch_path_str
|
||||
|
||||
if not watch_path.exists():
|
||||
print(f"[QuestionWatcher] Warning: Watch path does not exist: {watch_path}")
|
||||
return
|
||||
|
||||
# Initial import with mtime checking (force=False to only import changed files)
|
||||
print("\n[QuestionWatcher] Checking for changes...")
|
||||
stats = import_questions(watch_path, watch_path, force=False)
|
||||
|
||||
# Only show stats if there were changes
|
||||
output = stats.format_output(show_if_no_changes=False)
|
||||
if output:
|
||||
print(output)
|
||||
else:
|
||||
print(f"[QuestionWatcher] ✓ All files up to date")
|
||||
|
||||
# Start watching for changes
|
||||
watcher = QuestionWatcher(watch_path, watch_path)
|
||||
watcher.start()
|
||||
|
||||
# Start in daemon thread so it doesn't block shutdown
|
||||
thread = threading.Thread(target=run_watcher, name="QuestionWatcher", daemon=True)
|
||||
thread.start()
|
||||
print("[QuestionWatcher] Background thread started")
|
||||
|
||||
Reference in New Issue
Block a user