All checks were successful
Deploy Quartz site to GitHub Pages / build (push) Successful in 2m29s
528 lines
18 KiB
Python
528 lines
18 KiB
Python
import re
|
|
from collections import defaultdict
|
|
from datetime import datetime
|
|
from pathlib import Path
|
|
from typing import Tuple
|
|
import django.db.utils
|
|
|
|
from django.conf import settings
|
|
|
|
from quiz.models import Course, Exam, Question, Option
|
|
from quiz.utils.question_parser import parse_question_from_content, Node
|
|
|
|
|
|
class ImportStats:
|
|
"""Track import statistics by exam folder"""
|
|
|
|
def __init__(self):
|
|
self.total_files = 0
|
|
self.mcq_questions = 0
|
|
self.non_mcq_skipped = 0
|
|
self.questions_with_answers = 0
|
|
self.questions_with_todo = 0
|
|
self.created = 0
|
|
self.updated = 0
|
|
self.errors = 0
|
|
self.by_folder = defaultdict(lambda: {
|
|
'total': 0,
|
|
'mcq': 0,
|
|
'answered': 0,
|
|
'todo': 0
|
|
})
|
|
|
|
def has_changes(self) -> bool:
|
|
"""Check if there were any actual changes"""
|
|
return self.created > 0 or self.updated > 0 or self.errors > 0
|
|
|
|
def format_output(self, show_if_no_changes: bool = True) -> str:
|
|
"""
|
|
Format statistics for console output
|
|
|
|
Args:
|
|
show_if_no_changes: If False, returns empty string when no changes
|
|
"""
|
|
if not show_if_no_changes and not self.has_changes():
|
|
return ""
|
|
|
|
lines = []
|
|
lines.append("\n" + "="*70)
|
|
lines.append("QUESTION IMPORT STATISTICS")
|
|
lines.append("="*70)
|
|
lines.append(f"Total .md files found: {self.total_files}")
|
|
lines.append(f"MCQ questions found: {self.mcq_questions}")
|
|
lines.append(f"Non-MCQ skipped: {self.non_mcq_skipped}")
|
|
lines.append(f"Questions with answers: {self.questions_with_answers}")
|
|
lines.append(f"Questions with TODO: {self.questions_with_todo}")
|
|
lines.append(f"Created in database: {self.created}")
|
|
lines.append(f"Updated in database: {self.updated}")
|
|
if self.errors > 0:
|
|
lines.append(f"Errors: {self.errors}")
|
|
|
|
if self.mcq_questions > 0:
|
|
completion_pct = (self.questions_with_answers / self.mcq_questions * 100)
|
|
lines.append(f"Overall completion: {completion_pct:.1f}%")
|
|
|
|
lines.append("\n" + "-"*70)
|
|
lines.append("COMPLETION BY EXAM FOLDER")
|
|
lines.append("-"*70)
|
|
|
|
sorted_folders = sorted(self.by_folder.items())
|
|
for folder, stats in sorted_folders:
|
|
if stats['mcq'] > 0:
|
|
pct = (stats['answered'] / stats['mcq'] * 100)
|
|
lines.append(f"{folder:20} {stats['answered']:3}/{stats['mcq']:3} MCQ ({pct:5.1f}%)")
|
|
|
|
lines.append("="*70 + "\n")
|
|
return "\n".join(lines)
|
|
|
|
|
|
|
|
def parse_markdown_question(file_path: Path, content: str) -> Tuple[bool, dict]:
|
|
"""
|
|
Parse a markdown file and extract question data using the new question_parser.
|
|
|
|
Returns:
|
|
(is_mcq, question_data) where question_data contains:
|
|
- text: question text
|
|
- options: list of (letter, text) tuples
|
|
- correct_answer: the correct answer letter(s)
|
|
- has_answer: whether it has an answer (not TODO)
|
|
- tags: list of tag strings
|
|
- question_type: type of question (mcq, scq, matching, etc.)
|
|
"""
|
|
# Parse from content string (works for both test cases and real files)
|
|
parsed = parse_question_from_content(content)
|
|
|
|
# Extract metadata
|
|
metadata = parsed.metadata
|
|
tags = metadata.get('tags', [])
|
|
|
|
# Check for question type in tags
|
|
question_type = None
|
|
is_question = False
|
|
|
|
for tag in tags:
|
|
if 'frågetyp/' in tag:
|
|
is_question = True
|
|
if 'frågetyp/mcq' in tag:
|
|
question_type = 'mcq'
|
|
elif 'frågetyp/scq' in tag:
|
|
question_type = 'scq'
|
|
elif 'frågetyp/matching' in tag:
|
|
question_type = 'matching'
|
|
elif 'frågetyp/textalternativ' in tag:
|
|
question_type = 'textalternativ'
|
|
elif 'frågetyp/textfält' in tag:
|
|
question_type = 'textfält'
|
|
|
|
if not is_question:
|
|
return False, {}
|
|
|
|
# Handle matching questions separately
|
|
if question_type == 'matching':
|
|
return parse_matching_question_from_nodes(parsed.nodes, tags)
|
|
|
|
# Extract question text from first paragraph (skip images and special instructions)
|
|
question_text = None
|
|
for node in parsed.nodes:
|
|
if node.type != "paragraph":
|
|
continue
|
|
text = node.text.strip()
|
|
# Skip empty paragraphs
|
|
if not text:
|
|
continue
|
|
|
|
# Remove inline images from text first
|
|
text = re.sub(r'!\[\[.*?\]\]', '', text).strip()
|
|
|
|
# Skip if paragraph was only an image reference
|
|
if not text:
|
|
continue
|
|
|
|
# Skip "Välj X alternativ" instructions
|
|
if 'Välj' in text and 'alternativ' in text:
|
|
continue
|
|
|
|
# Clean up bold markers
|
|
text = text.replace('**', '')
|
|
if text:
|
|
question_text = text
|
|
break
|
|
|
|
if not question_text:
|
|
return True, {
|
|
'text': None,
|
|
'options': [],
|
|
'correct_answer': '',
|
|
'has_answer': False,
|
|
'question_type': question_type,
|
|
'tags': tags
|
|
}
|
|
|
|
# Extract options from list nodes
|
|
options_data = []
|
|
|
|
for node in parsed.nodes:
|
|
if node.type != "list":
|
|
continue
|
|
for item in node.children:
|
|
# Get the text of the list item
|
|
if item.type != "list_item":
|
|
continue
|
|
item_text = item.text.strip()
|
|
|
|
# Match "A: text" or just "A"
|
|
match = re.match(r'^([A-Z]):\s*(.*)$', item_text)
|
|
if match:
|
|
letter = match.group(1)
|
|
text = match.group(2).strip()
|
|
options_data.append((letter, text))
|
|
elif re.match(r'^([A-Z])$', item_text):
|
|
letter = item_text
|
|
options_data.append((letter, ''))
|
|
elif question_type in ['textalternativ', 'textfält']:
|
|
# For text-based questions, use incrementing letters
|
|
if not re.match(r'^[a-z]\)', item_text): # Skip sub-question markers
|
|
letter = chr(ord('A') + len(options_data))
|
|
options_data.append((letter, item_text))
|
|
|
|
# For text-based questions, options are optional
|
|
if not options_data:
|
|
options_data = [('A', '')]
|
|
elif len(options_data) < 2 and question_type in ['mcq', 'scq']:
|
|
return True, {
|
|
'text': question_text,
|
|
'options': options_data,
|
|
'correct_answer': '',
|
|
'has_answer': False,
|
|
'question_type': question_type,
|
|
'tags': tags
|
|
}
|
|
|
|
# Extract answer from spoiler block
|
|
correct_answer = None
|
|
has_answer = False
|
|
|
|
for node in parsed.nodes:
|
|
if node.type == "block_code" and node.attrs.get("info") == "spoiler-block:":
|
|
answer_text = node.raw.strip()
|
|
|
|
# Check for TODO
|
|
if 'TODO' in answer_text.upper():
|
|
has_answer = False
|
|
else:
|
|
has_answer = True
|
|
|
|
# For MCQ/SCQ: Extract capital letters
|
|
if question_type in ['mcq', 'scq']:
|
|
letters = re.findall(r'\b([A-Z])\b', answer_text)
|
|
if letters:
|
|
correct_answer = ','.join(sorted(set(letters)))
|
|
else:
|
|
# For text-based questions: Store the full answer text
|
|
correct_answer = answer_text[:200] # Limit to 200 chars for database field
|
|
|
|
break
|
|
|
|
return True, {
|
|
'text': question_text,
|
|
'options': options_data,
|
|
'correct_answer': correct_answer,
|
|
'has_answer': has_answer,
|
|
'question_type': question_type,
|
|
'tags': tags
|
|
}
|
|
|
|
|
|
def parse_matching_question_from_nodes(nodes: list[Node], tags: list) -> Tuple[bool, dict]:
|
|
"""
|
|
Parse matching question from parsed nodes.
|
|
|
|
Expected format:
|
|
- Two consecutive bullet lists
|
|
- First list = left column items (rows)
|
|
- Second list = top row items (columns)
|
|
- Answer format: "LeftItem: TopItem" pairs
|
|
|
|
Returns:
|
|
(is_matching, question_data)
|
|
"""
|
|
# Extract question text
|
|
question_text = None
|
|
for node in nodes:
|
|
if node.type == "paragraph":
|
|
text = node.text.strip()
|
|
# Remove inline images
|
|
text = re.sub(r'!\[\[.*?\]\]', '', text).strip()
|
|
# Skip if empty after removing images
|
|
if not text:
|
|
continue
|
|
question_text = text.replace('**', '')
|
|
break
|
|
|
|
if not question_text:
|
|
return True, {
|
|
'text': None,
|
|
'left_items': [],
|
|
'top_items': [],
|
|
'correct_pairs': [],
|
|
'has_answer': False,
|
|
'question_type': 'matching',
|
|
'tags': tags
|
|
}
|
|
|
|
# Extract two consecutive lists
|
|
left_items = []
|
|
top_items = []
|
|
list_nodes = [node for node in nodes if node.type == "list"]
|
|
|
|
if len(list_nodes) >= 2:
|
|
# First list = left items
|
|
for item in list_nodes[0].children:
|
|
if item.type == "list_item":
|
|
left_items.append(item.text.strip())
|
|
|
|
# Second list = top items
|
|
for item in list_nodes[1].children:
|
|
if item.type == "list_item":
|
|
top_items.append(item.text.strip())
|
|
|
|
# Parse answer from spoiler block
|
|
correct_pairs = []
|
|
has_answer = False
|
|
|
|
for node in nodes:
|
|
if node.type == "block_code" and node.attrs.get("info") == "spoiler-block:":
|
|
answer_text = node.raw.strip()
|
|
|
|
# Check for TODO
|
|
if 'TODO' in answer_text.upper():
|
|
has_answer = False
|
|
break
|
|
has_answer = True
|
|
# Parse "Item: Match" format
|
|
answer_lines = answer_text.split('\n')
|
|
for line in answer_lines:
|
|
line = line.strip()
|
|
if ':' not in line:
|
|
continue
|
|
left_part, top_part = line.split(':', 1)
|
|
left_part = left_part.strip()
|
|
top_part = top_part.strip()
|
|
|
|
# Find indices
|
|
left_idx = None
|
|
top_idx = None
|
|
|
|
for idx, item in enumerate(left_items):
|
|
if left_part.lower() in item.lower() or item.lower() in left_part.lower():
|
|
left_idx = idx
|
|
break
|
|
|
|
for idx, item in enumerate(top_items):
|
|
if top_part.lower() in item.lower() or item.lower() in top_part.lower():
|
|
top_idx = idx
|
|
break
|
|
|
|
if left_idx is not None and top_idx is not None:
|
|
correct_pairs.append([left_idx, top_idx])
|
|
break
|
|
|
|
return True, {
|
|
'text': question_text,
|
|
'left_items': left_items,
|
|
'top_items': top_items,
|
|
'correct_pairs': correct_pairs,
|
|
'has_answer': has_answer,
|
|
'question_type': 'matching',
|
|
'tags': tags
|
|
}
|
|
|
|
|
|
|
|
def import_question_file(file_path: Path, base_path: Path, stats: ImportStats, force: bool = False):
|
|
"""
|
|
Import a single question file, checking modification time to avoid unnecessary updates.
|
|
|
|
Args:
|
|
file_path: Path to the question file
|
|
base_path: Base path for relative calculations
|
|
stats: ImportStats object to track statistics
|
|
force: If True, import regardless of mtime (for initial import)
|
|
"""
|
|
try:
|
|
# Get file modification time
|
|
file_mtime = file_path.stat().st_mtime
|
|
|
|
# Calculate path relative to project root
|
|
project_root = settings.BASE_DIR.parent
|
|
try:
|
|
file_path_str = str(file_path.relative_to(project_root))
|
|
except ValueError:
|
|
file_path_str = str(file_path.relative_to(base_path))
|
|
|
|
# Check if file has changed by comparing mtime
|
|
if not force:
|
|
try:
|
|
existing_question = Question.objects.get(file_path=file_path_str)
|
|
if existing_question.file_mtime and existing_question.file_mtime >= file_mtime:
|
|
# File hasn't changed, skip
|
|
return 'skipped_unchanged'
|
|
except Question.DoesNotExist:
|
|
pass # New file, will import
|
|
|
|
content = file_path.read_text(encoding='utf-8')
|
|
is_mcq, question_data = parse_markdown_question(file_path, content)
|
|
|
|
# Track folder stats
|
|
relative_path = file_path.relative_to(base_path)
|
|
folder_name = relative_path.parts[0] if len(relative_path.parts) > 1 else 'root'
|
|
stats.by_folder[folder_name]['total'] += 1
|
|
|
|
if not is_mcq:
|
|
stats.non_mcq_skipped += 1
|
|
return 'skipped_not_mcq'
|
|
|
|
stats.mcq_questions += 1
|
|
stats.by_folder[folder_name]['mcq'] += 1
|
|
|
|
if not question_data or not question_data.get('text'):
|
|
stats.non_mcq_skipped += 1
|
|
return 'skipped_invalid'
|
|
|
|
if not question_data['has_answer']:
|
|
stats.questions_with_todo += 1
|
|
stats.by_folder[folder_name]['todo'] += 1
|
|
return 'skipped_todo'
|
|
|
|
stats.questions_with_answers += 1
|
|
stats.by_folder[folder_name]['answered'] += 1
|
|
|
|
# Extract exam information from folder structure
|
|
# Expected path: content/Anatomi & Histologi 2/Gamla tentor/2022-01-15/question.md
|
|
exam = None
|
|
relative_path = file_path.relative_to(base_path)
|
|
path_parts = relative_path.parts
|
|
|
|
# Try to extract exam date from folder structure
|
|
if len(path_parts) >= 2:
|
|
# Get the parent folder name which should be the exam date (e.g., "2022-01-15")
|
|
exam_folder = path_parts[-2] if len(path_parts) > 1 else None
|
|
|
|
# Try to parse as date
|
|
if exam_folder and '-' in exam_folder:
|
|
try:
|
|
exam_date = datetime.strptime(exam_folder, '%Y-%m-%d').date()
|
|
|
|
# Get or create course (default to "Anatomi & Histologi 2")
|
|
# Extract course name from path if available
|
|
course_name = "Anatomi & Histologi 2"
|
|
if len(path_parts) >= 3 and 'Anatomi' in ' '.join(path_parts):
|
|
# Try to find course name in path
|
|
for part in path_parts:
|
|
if 'Anatomi' in part or 'Histologi' in part:
|
|
course_name = part
|
|
break
|
|
|
|
course, _ = Course.objects.get_or_create(
|
|
name=course_name,
|
|
defaults={'code': 'AH2'}
|
|
)
|
|
|
|
# Get or create exam
|
|
exam, _ = Exam.objects.get_or_create(
|
|
course=course,
|
|
date=exam_date,
|
|
defaults={
|
|
'name': exam_folder,
|
|
'folder_path': '/'.join(path_parts[:-1])
|
|
}
|
|
)
|
|
except (ValueError, ImportError):
|
|
pass # If date parsing fails, exam remains None
|
|
|
|
# Import to database with mtime tracking
|
|
# Prepare defaults dict
|
|
defaults = {
|
|
'exam': exam,
|
|
'text': question_data['text'],
|
|
'correct_answer': question_data.get('correct_answer', ''),
|
|
'file_mtime': file_mtime,
|
|
'question_type': question_data.get('question_type', 'mcq'),
|
|
}
|
|
|
|
# Add matching_data if it's a matching question
|
|
if question_data.get('question_type') == 'matching':
|
|
defaults['matching_data'] = {
|
|
'left_items': question_data.get('left_items', []),
|
|
'top_items': question_data.get('top_items', []),
|
|
'correct_pairs': question_data.get('correct_pairs', [])
|
|
}
|
|
|
|
question, created = Question.objects.update_or_create(
|
|
file_path=file_path_str,
|
|
defaults=defaults
|
|
)
|
|
|
|
if created:
|
|
stats.created += 1
|
|
else:
|
|
stats.updated += 1
|
|
|
|
# Update tags
|
|
from django.utils.text import slugify
|
|
from quiz.models import Tag
|
|
|
|
question.tags.clear()
|
|
for tag_name in question_data.get('tags', []):
|
|
tag_slug = slugify(tag_name)
|
|
tag, _ = Tag.objects.get_or_create(
|
|
slug=tag_slug,
|
|
defaults={'name': tag_name}
|
|
)
|
|
question.tags.add(tag)
|
|
|
|
# Update options (only for MCQ/SCQ questions)
|
|
if question_data.get('question_type') not in ['matching']:
|
|
question.options.all().delete()
|
|
# Deduplicate options by letter (keep first occurrence)
|
|
seen_letters = set()
|
|
for letter, text in question_data.get('options', []):
|
|
if letter not in seen_letters:
|
|
Option.objects.create(question=question, letter=letter, text=text)
|
|
seen_letters.add(letter)
|
|
|
|
return 'imported' if created else 'updated'
|
|
|
|
except (OSError, ValueError, django.db.utils.Error) as e:
|
|
stats.errors += 1
|
|
print(f"Error importing {file_path}: {e}")
|
|
return 'error'
|
|
|
|
|
|
def import_questions(folder_path: Path, base_path: Path = None, force: bool = False) -> ImportStats:
|
|
if base_path is None:
|
|
base_path = folder_path
|
|
|
|
stats = ImportStats()
|
|
|
|
for md_file in folder_path.rglob('*.md'):
|
|
stats.total_files += 1
|
|
import_question_file(md_file, base_path, stats, force=force)
|
|
|
|
return stats
|
|
|
|
|
|
def delete_question_by_path(file_path: Path):
|
|
try:
|
|
project_root = settings.BASE_DIR.parent
|
|
file_path_str = str(file_path.relative_to(project_root))
|
|
deleted_count, _ = Question.objects.filter(file_path=file_path_str).delete()
|
|
if deleted_count > 0:
|
|
print(f"[Auto-delete] ✓ Deleted question: {file_path.name}")
|
|
return deleted_count > 0
|
|
except (OSError, django.db.utils.Error) as e:
|
|
print(f"[Auto-delete] ✗ Error deleting question {file_path}: {e}")
|
|
return False
|
|
|