1
0
Files
medical-notes/quiz/quiz/utils/importer.py
Johan Dahlin 2ec904d899
All checks were successful
Deploy Quartz site to GitHub Pages / build (push) Successful in 2m30s
vault backup: 2025-12-21 20:21:58
2025-12-21 20:21:58 +01:00

249 lines
8.0 KiB
Python

import re
from pathlib import Path
from collections import defaultdict
from typing import Tuple
from quiz.models import Question, Option
class ImportStats:
"""Track import statistics by exam folder"""
def __init__(self):
self.total_files = 0
self.mcq_questions = 0
self.non_mcq_skipped = 0
self.questions_with_answers = 0
self.questions_with_todo = 0
self.created = 0
self.updated = 0
self.errors = 0
self.by_folder = defaultdict(lambda: {
'total': 0,
'mcq': 0,
'answered': 0,
'todo': 0
})
def format_output(self) -> str:
"""Format statistics for console output"""
lines = []
lines.append("\n" + "="*70)
lines.append("QUESTION IMPORT STATISTICS")
lines.append("="*70)
lines.append(f"Total .md files found: {self.total_files}")
lines.append(f"MCQ questions found: {self.mcq_questions}")
lines.append(f"Non-MCQ skipped: {self.non_mcq_skipped}")
lines.append(f"Questions with answers: {self.questions_with_answers}")
lines.append(f"Questions with TODO: {self.questions_with_todo}")
lines.append(f"Created in database: {self.created}")
lines.append(f"Updated in database: {self.updated}")
if self.errors > 0:
lines.append(f"Errors: {self.errors}")
if self.mcq_questions > 0:
completion_pct = (self.questions_with_answers / self.mcq_questions * 100)
lines.append(f"Overall completion: {completion_pct:.1f}%")
lines.append("\n" + "-"*70)
lines.append("COMPLETION BY EXAM FOLDER")
lines.append("-"*70)
sorted_folders = sorted(self.by_folder.items())
for folder, stats in sorted_folders:
if stats['mcq'] > 0:
pct = (stats['answered'] / stats['mcq'] * 100)
lines.append(f"{folder:20} {stats['answered']:3}/{stats['mcq']:3} MCQ ({pct:5.1f}%)")
lines.append("="*70 + "\n")
return "\n".join(lines)
def parse_markdown_question(file_path: Path, content: str) -> Tuple[bool, dict]:
"""
Parse a markdown file and extract question data.
Returns:
(is_mcq, question_data) where question_data contains:
- text: question text
- options: list of (letter, text) tuples
- correct_answer: the correct answer letter(s)
- has_answer: whether it has an answer (not TODO)
"""
lines = content.split('\n')
# Check for MCQ tags in frontmatter
is_mcq = False
in_frontmatter = False
for line in lines:
if line.strip() == '---':
in_frontmatter = not in_frontmatter
continue
if in_frontmatter and ('frågetyp/mcq' in line or 'frågetyp/scq' in line):
is_mcq = True
break
if not is_mcq:
return False, {}
# Extract question text (first non-empty line after frontmatter)
question_text = None
in_frontmatter = False
frontmatter_done = False
for line in lines:
if line.strip() == '---':
if not in_frontmatter:
in_frontmatter = True
else:
in_frontmatter = False
frontmatter_done = True
continue
if frontmatter_done and line.strip() and not line.startswith('![['):
# Skip "Välj ett/två alternativ:" lines
if 'Välj' in line and 'alternativ' in line:
continue
if not line.startswith('-') and not line.startswith('```'):
question_text = line.strip().replace('**', '')
break
if not question_text:
return True, {}
# Extract options (pattern: "- A:" or "- A" or just "- A:")
options_data = []
for line in lines:
# Match "- A: text" or "- A: " or just "- A"
match = re.match(r'^-\s*([A-Z]):\s*(.*)$', line.strip())
if not match:
# Also try "- A" without colon
match = re.match(r'^-\s*([A-Z])$', line.strip())
if match:
letter = match.group(1)
text = match.group(2) if len(match.groups()) > 1 else ""
options_data.append((letter, text.strip()))
if len(options_data) < 2:
return True, {}
# Extract answer from spoiler block
correct_answer = None
has_answer = False
in_spoiler = False
for line in lines:
if line.strip().startswith('```spoiler-block:'):
in_spoiler = True
continue
if in_spoiler:
if line.strip() == '```':
break
stripped = line.strip()
if stripped and stripped != 'TODO':
# Extract single letter answer (e.g., "B" or "F")
answer_match = re.match(r'^([A-Z])$', stripped)
if answer_match:
correct_answer = answer_match.group(1)
has_answer = True
break
elif stripped == 'TODO':
break
return True, {
'text': question_text,
'options': options_data,
'correct_answer': correct_answer,
'has_answer': has_answer
}
def import_question_file(file_path: Path, base_path: Path, stats: ImportStats):
"""Import a single question file"""
try:
content = file_path.read_text(encoding='utf-8')
is_mcq, question_data = parse_markdown_question(file_path, content)
# Track folder stats
relative_path = file_path.relative_to(base_path)
folder_name = relative_path.parts[0] if len(relative_path.parts) > 1 else 'root'
stats.by_folder[folder_name]['total'] += 1
if not is_mcq:
stats.non_mcq_skipped += 1
return
stats.mcq_questions += 1
stats.by_folder[folder_name]['mcq'] += 1
if not question_data or not question_data.get('text'):
stats.non_mcq_skipped += 1
return
if not question_data['has_answer']:
stats.questions_with_todo += 1
stats.by_folder[folder_name]['todo'] += 1
return # Skip questions without answers
stats.questions_with_answers += 1
stats.by_folder[folder_name]['answered'] += 1
# Import to database
file_path_str = str(file_path.relative_to(base_path.parent))
question, created = Question.objects.update_or_create(
file_path=file_path_str,
defaults={
'text': question_data['text'],
'correct_answer': question_data['correct_answer'],
}
)
if created:
stats.created += 1
else:
stats.updated += 1
# Update options
question.options.all().delete()
for letter, text in question_data['options']:
Option.objects.create(question=question, letter=letter, text=text)
except Exception as e:
stats.errors += 1
print(f"Error importing {file_path}: {e}")
def import_questions(folder_path: Path, base_path: Path = None) -> ImportStats:
"""
Import all questions from a folder.
Args:
folder_path: Path to the folder containing question markdown files
base_path: Base path for relative path calculations (defaults to folder_path)
Returns:
ImportStats object with import statistics
"""
if base_path is None:
base_path = folder_path
stats = ImportStats()
for md_file in folder_path.rglob('*.md'):
stats.total_files += 1
import_question_file(md_file, base_path, stats)
return stats
def delete_question_by_path(file_path: Path, base_path: Path):
"""Delete a question from the database by file path"""
try:
file_path_str = str(file_path.relative_to(base_path.parent))
Question.objects.filter(file_path=file_path_str).delete()
print(f"Deleted question: {file_path_str}")
except Exception as e:
print(f"Error deleting question {file_path}: {e}")