vault backup: 2025-12-23 10:55:37
All checks were successful
Deploy Quartz site to GitHub Pages / build (push) Successful in 2m10s
All checks were successful
Deploy Quartz site to GitHub Pages / build (push) Successful in 2m10s
This commit is contained in:
2
content/.obsidian/workspace.json
vendored
2
content/.obsidian/workspace.json
vendored
@@ -30,7 +30,7 @@
|
|||||||
"state": {
|
"state": {
|
||||||
"file": "Anatomi & Histologi 2/Statistik.md",
|
"file": "Anatomi & Histologi 2/Statistik.md",
|
||||||
"mode": "source",
|
"mode": "source",
|
||||||
"source": true,
|
"source": false,
|
||||||
"backlinks": false
|
"backlinks": false
|
||||||
},
|
},
|
||||||
"icon": "lucide-file",
|
"icon": "lucide-file",
|
||||||
|
|||||||
@@ -1,18 +0,0 @@
|
|||||||
# Matching Questions Format Analysis
|
|
||||||
|
|
||||||
Based on reviewing the 17 matching questions:
|
|
||||||
|
|
||||||
## Key Finding:
|
|
||||||
Only **1 question has an answer** (2023-05-31/3.md), the rest have TODO.
|
|
||||||
|
|
||||||
**That question uses this format:**
|
|
||||||
- Two separate bullet lists
|
|
||||||
- Answer: "ItemName: MatchName" format
|
|
||||||
|
|
||||||
## Proposed Implementation:
|
|
||||||
1. Support two-list format (most flexible)
|
|
||||||
2. Parse answer as "Item: Match" pairs
|
|
||||||
3. Store as JSON with 0-indexed pairs
|
|
||||||
4. Render as n×n table with radio buttons
|
|
||||||
|
|
||||||
## Next: Implement based on this one working example.
|
|
||||||
@@ -1,15 +0,0 @@
|
|||||||
import pathlib
|
|
||||||
|
|
||||||
import mistune
|
|
||||||
markdown = mistune.create_markdown(renderer=None)
|
|
||||||
|
|
||||||
root = pathlib.Path(__file__).parent.parent
|
|
||||||
exams = root / "content" / "Anatomi & Histologi 2" / "Gamla tentor"
|
|
||||||
print(exams.absolute())
|
|
||||||
for file in sorted(exams.glob("*/*.md")):
|
|
||||||
if len(file.stem) > 2:
|
|
||||||
continue
|
|
||||||
print(f"Parsing {file}")
|
|
||||||
tokens = markdown(file.read_text(encoding="utf-8"))
|
|
||||||
import pprint
|
|
||||||
pprint.pprint(tokens)
|
|
||||||
@@ -13,5 +13,6 @@ markers =
|
|||||||
admin: Admin interface tests
|
admin: Admin interface tests
|
||||||
import: Import and parsing tests
|
import: Import and parsing tests
|
||||||
import_tests: Import and parsing tests
|
import_tests: Import and parsing tests
|
||||||
|
parser: Question parser tests
|
||||||
slow: Slow running tests
|
slow: Slow running tests
|
||||||
|
|
||||||
|
|||||||
537
quiz/quiz/tests/test_question_parser.py
Normal file
537
quiz/quiz/tests/test_question_parser.py
Normal file
@@ -0,0 +1,537 @@
|
|||||||
|
"""
|
||||||
|
Comprehensive test suite for the question_parser module.
|
||||||
|
|
||||||
|
This test suite uses pytest's parametrize decorator to test multiple scenarios
|
||||||
|
with minimal code duplication. It covers:
|
||||||
|
|
||||||
|
1. Node class:
|
||||||
|
- Initialization with different token types
|
||||||
|
- Attribute handling
|
||||||
|
- Children node processing
|
||||||
|
- String representation (__repr__)
|
||||||
|
- Text extraction from nested structures
|
||||||
|
|
||||||
|
2. parse_question function:
|
||||||
|
- Metadata parsing (tags, dates, etc.)
|
||||||
|
- Raw content extraction
|
||||||
|
- Different question types (MCQ, SCQ, text field, matching)
|
||||||
|
- Questions with images
|
||||||
|
- Edge cases (empty content, missing frontmatter)
|
||||||
|
- Document structure preservation
|
||||||
|
|
||||||
|
3. ParsedQuestion dataclass:
|
||||||
|
- Default values
|
||||||
|
- Initialization with custom values
|
||||||
|
|
||||||
|
4. Real exam questions:
|
||||||
|
- Parsing actual exam questions from the content directory
|
||||||
|
- Validation of all short-named question files
|
||||||
|
|
||||||
|
Test execution:
|
||||||
|
pytest tests/test_question_parser.py -v # Verbose output
|
||||||
|
pytest tests/test_question_parser.py -k "mcq" # Run only MCQ tests
|
||||||
|
pytest tests/test_question_parser.py --collect-only # List all tests
|
||||||
|
"""
|
||||||
|
|
||||||
|
import pathlib
|
||||||
|
import tempfile
|
||||||
|
import pytest
|
||||||
|
from quiz.utils.question_parser import Node, ParsedQuestion, parse_question
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture
|
||||||
|
def temp_dir():
|
||||||
|
"""Create a temporary directory for test files"""
|
||||||
|
with tempfile.TemporaryDirectory() as tmpdir:
|
||||||
|
yield pathlib.Path(tmpdir)
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture
|
||||||
|
def create_question_file(temp_dir):
|
||||||
|
"""Factory fixture to create question files"""
|
||||||
|
def _create_file(filename: str, content: str) -> pathlib.Path:
|
||||||
|
file_path = temp_dir / filename
|
||||||
|
file_path.write_text(content, encoding="utf-8")
|
||||||
|
return file_path
|
||||||
|
return _create_file
|
||||||
|
|
||||||
|
|
||||||
|
class TestNode:
|
||||||
|
"""Test the Node class"""
|
||||||
|
|
||||||
|
@pytest.mark.parametrize("token,expected_type,expected_raw", [
|
||||||
|
({"type": "paragraph"}, "paragraph", ""),
|
||||||
|
({"type": "heading", "raw": "Test Heading"}, "heading", "Test Heading"),
|
||||||
|
({"type": "text", "raw": "Some text"}, "text", "Some text"),
|
||||||
|
({"type": "list"}, "list", ""),
|
||||||
|
])
|
||||||
|
def test_node_initialization(self, token, expected_type, expected_raw):
|
||||||
|
"""Test Node initialization with different token types"""
|
||||||
|
node = Node(token)
|
||||||
|
assert node.type == expected_type
|
||||||
|
assert node.raw == expected_raw
|
||||||
|
|
||||||
|
@pytest.mark.parametrize("token,expected_attrs", [
|
||||||
|
({"type": "block_code", "attrs": {"info": "spoiler-block:"}}, {"info": "spoiler-block:"}),
|
||||||
|
({"type": "paragraph"}, {}),
|
||||||
|
({"type": "heading", "attrs": {"level": 2}}, {"level": 2}),
|
||||||
|
])
|
||||||
|
def test_node_attributes(self, token, expected_attrs):
|
||||||
|
"""Test Node attributes handling"""
|
||||||
|
node = Node(token)
|
||||||
|
assert node.attrs == expected_attrs
|
||||||
|
|
||||||
|
def test_node_children(self):
|
||||||
|
"""Test Node children handling"""
|
||||||
|
token = {
|
||||||
|
"type": "paragraph",
|
||||||
|
"children": [
|
||||||
|
{"type": "text", "raw": "Hello "},
|
||||||
|
{"type": "text", "raw": "World"},
|
||||||
|
]
|
||||||
|
}
|
||||||
|
node = Node(token)
|
||||||
|
assert len(node.children) == 2
|
||||||
|
assert node.children[0].type == "text"
|
||||||
|
assert node.children[0].raw == "Hello "
|
||||||
|
assert node.children[1].type == "text"
|
||||||
|
assert node.children[1].raw == "World"
|
||||||
|
|
||||||
|
@pytest.mark.parametrize("token,expected_repr_contains", [
|
||||||
|
({"type": "text", "raw": "test"}, "Text(raw='test')"),
|
||||||
|
({"type": "paragraph"}, "Paragraph()"),
|
||||||
|
({"type": "block_code", "attrs": {"info": "python"}}, "BlockCode(attrs={'info': 'python'})"),
|
||||||
|
])
|
||||||
|
def test_node_repr(self, token, expected_repr_contains):
|
||||||
|
"""Test Node __repr__ method"""
|
||||||
|
node = Node(token)
|
||||||
|
assert repr(node) == expected_repr_contains
|
||||||
|
|
||||||
|
@pytest.mark.parametrize("token,expected_text", [
|
||||||
|
({"type": "text", "raw": "Simple text"}, "Simple text"),
|
||||||
|
(
|
||||||
|
{
|
||||||
|
"type": "paragraph",
|
||||||
|
"children": [
|
||||||
|
{"type": "text", "raw": "Hello "},
|
||||||
|
{"type": "text", "raw": "World"},
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"Hello World"
|
||||||
|
),
|
||||||
|
(
|
||||||
|
{
|
||||||
|
"type": "paragraph",
|
||||||
|
"children": [
|
||||||
|
{"type": "text", "raw": "Nested "},
|
||||||
|
{
|
||||||
|
"type": "strong",
|
||||||
|
"children": [{"type": "text", "raw": "bold"}]
|
||||||
|
},
|
||||||
|
{"type": "text", "raw": " text"},
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"Nested bold text"
|
||||||
|
),
|
||||||
|
])
|
||||||
|
def test_node_text_property(self, token, expected_text):
|
||||||
|
"""Test Node text property extraction"""
|
||||||
|
node = Node(token)
|
||||||
|
assert node.text == expected_text
|
||||||
|
|
||||||
|
|
||||||
|
class TestParseQuestion:
|
||||||
|
"""Test the parse_question function"""
|
||||||
|
|
||||||
|
@pytest.mark.parametrize("content,expected_tags", [
|
||||||
|
(
|
||||||
|
"""---
|
||||||
|
tags: [ah2, provfråga, frågetyp/mcq]
|
||||||
|
date: 2022-01-15
|
||||||
|
---
|
||||||
|
Question content""",
|
||||||
|
["ah2", "provfråga", "frågetyp/mcq"]
|
||||||
|
),
|
||||||
|
(
|
||||||
|
"""---
|
||||||
|
tags:
|
||||||
|
- ah2
|
||||||
|
- provfråga
|
||||||
|
- frågetyp/scq
|
||||||
|
date: 2023-05-31
|
||||||
|
---
|
||||||
|
Question content""",
|
||||||
|
["ah2", "provfråga", "frågetyp/scq"]
|
||||||
|
),
|
||||||
|
])
|
||||||
|
def test_parse_metadata_tags(self, create_question_file, content, expected_tags):
|
||||||
|
"""Test parsing of metadata tags in different formats"""
|
||||||
|
file_path = create_question_file("test.md", content)
|
||||||
|
question = parse_question(file_path)
|
||||||
|
assert question.metadata["tags"] == expected_tags
|
||||||
|
|
||||||
|
@pytest.mark.parametrize("content,expected_date", [
|
||||||
|
(
|
||||||
|
"""---
|
||||||
|
tags: [ah2]
|
||||||
|
date: 2022-01-15
|
||||||
|
---
|
||||||
|
Content""",
|
||||||
|
"2022-01-15"
|
||||||
|
),
|
||||||
|
(
|
||||||
|
"""---
|
||||||
|
tags: [ah2]
|
||||||
|
date: 2023-05-31
|
||||||
|
---
|
||||||
|
Content""",
|
||||||
|
"2023-05-31"
|
||||||
|
),
|
||||||
|
])
|
||||||
|
def test_parse_metadata_date(self, create_question_file, content, expected_date):
|
||||||
|
"""Test parsing of metadata date"""
|
||||||
|
file_path = create_question_file("test.md", content)
|
||||||
|
question = parse_question(file_path)
|
||||||
|
assert str(question.metadata["date"]) == expected_date
|
||||||
|
|
||||||
|
@pytest.mark.parametrize("content,expected_raw", [
|
||||||
|
(
|
||||||
|
"""---
|
||||||
|
tags: [ah2]
|
||||||
|
---
|
||||||
|
Simple question""",
|
||||||
|
"Simple question"
|
||||||
|
),
|
||||||
|
(
|
||||||
|
"""---
|
||||||
|
tags: [ah2]
|
||||||
|
---
|
||||||
|
Question with **bold** text""",
|
||||||
|
"Question with **bold** text"
|
||||||
|
),
|
||||||
|
])
|
||||||
|
def test_parse_raw_content(self, create_question_file, content, expected_raw):
|
||||||
|
"""Test parsing of raw content"""
|
||||||
|
file_path = create_question_file("test.md", content)
|
||||||
|
question = parse_question(file_path)
|
||||||
|
assert question.raw_content.strip() == expected_raw
|
||||||
|
|
||||||
|
def test_parse_mcq_question(self, create_question_file):
|
||||||
|
"""Test parsing a complete MCQ question"""
|
||||||
|
content = """---
|
||||||
|
tags: [ah2, provfråga, frågetyp/mcq, cerebrum]
|
||||||
|
date: 2022-01-15
|
||||||
|
---
|
||||||
|
Vilka av följande räknas till storhjärnans basala kärnor?
|
||||||
|
|
||||||
|
**Välj två alternativ**
|
||||||
|
- A: Putamen
|
||||||
|
- B: Nucleus Ruber
|
||||||
|
- C: Substantia nigra
|
||||||
|
- D: Nucleus caudatus
|
||||||
|
|
||||||
|
```spoiler-block:
|
||||||
|
A och D
|
||||||
|
```
|
||||||
|
"""
|
||||||
|
file_path = create_question_file("mcq.md", content)
|
||||||
|
question = parse_question(file_path)
|
||||||
|
|
||||||
|
assert question.metadata["tags"] == ["ah2", "provfråga", "frågetyp/mcq", "cerebrum"]
|
||||||
|
assert len(question.nodes) > 0
|
||||||
|
|
||||||
|
# Find paragraph nodes
|
||||||
|
paragraphs = [n for n in question.nodes if n.type == "paragraph"]
|
||||||
|
assert len(paragraphs) > 0
|
||||||
|
|
||||||
|
# Find list nodes
|
||||||
|
lists = [n for n in question.nodes if n.type == "list"]
|
||||||
|
assert len(lists) > 0
|
||||||
|
|
||||||
|
# Find spoiler block
|
||||||
|
code_blocks = [n for n in question.nodes if n.type == "block_code"]
|
||||||
|
assert len(code_blocks) > 0
|
||||||
|
spoiler = code_blocks[0]
|
||||||
|
assert spoiler.attrs.get("info") == "spoiler-block:"
|
||||||
|
assert "A och D" in spoiler.raw
|
||||||
|
|
||||||
|
def test_parse_scq_question(self, create_question_file):
|
||||||
|
"""Test parsing a single choice question"""
|
||||||
|
content = """---
|
||||||
|
tags: [ah2, provfråga, frågetyp/scq, histologi]
|
||||||
|
date: 2022-06-01
|
||||||
|
---
|
||||||
|
Vilken del av CNS syns i bild?
|
||||||
|
- A: Cerebellum
|
||||||
|
- B: Diencephalon
|
||||||
|
- C: Medulla spinalis
|
||||||
|
- D: Cerebrum
|
||||||
|
- E: Pons
|
||||||
|
|
||||||
|
```spoiler-block:
|
||||||
|
A
|
||||||
|
```
|
||||||
|
"""
|
||||||
|
file_path = create_question_file("scq.md", content)
|
||||||
|
question = parse_question(file_path)
|
||||||
|
|
||||||
|
assert "frågetyp/scq" in question.metadata["tags"]
|
||||||
|
lists = [n for n in question.nodes if n.type == "list"]
|
||||||
|
assert len(lists) > 0
|
||||||
|
|
||||||
|
def test_parse_text_field_question(self, create_question_file):
|
||||||
|
"""Test parsing a text field question"""
|
||||||
|
content = """---
|
||||||
|
tags: [ah2, provfråga, frågetyp/textfält, öga, anatomi]
|
||||||
|
date: 2022-01-15
|
||||||
|
---
|
||||||
|
![[image-2.png|301x248]]
|
||||||
|
**Fyll i rätt siffra!**
|
||||||
|
|
||||||
|
(0.5p per rätt svar, inga avdrag för fel svar):
|
||||||
|
|
||||||
|
a) Vilken siffra pekar på gula fläcken?
|
||||||
|
b) Vilken siffra pekar på choroidea?
|
||||||
|
|
||||||
|
```spoiler-block:
|
||||||
|
a) 7
|
||||||
|
b) 6
|
||||||
|
```
|
||||||
|
"""
|
||||||
|
file_path = create_question_file("textfield.md", content)
|
||||||
|
question = parse_question(file_path)
|
||||||
|
|
||||||
|
assert "frågetyp/textfält" in question.metadata["tags"]
|
||||||
|
assert len(question.nodes) > 0
|
||||||
|
|
||||||
|
def test_parse_matching_question(self, create_question_file):
|
||||||
|
"""Test parsing a matching question"""
|
||||||
|
content = """---
|
||||||
|
tags: [ah2, provfråga, frågetyp/matching, histologi]
|
||||||
|
date: 2023-05-31
|
||||||
|
---
|
||||||
|
Vilka av följande stödjeceller finns i CNS? Markera JA eller NEJ för varje angiven celltyp:
|
||||||
|
(1p för alla rätt, inga delpoäng)
|
||||||
|
|
||||||
|
- a) oligodendrocyter
|
||||||
|
- b) Astrocyter
|
||||||
|
- c) satellitceller
|
||||||
|
- d) ependymceller
|
||||||
|
- e) mikroglia
|
||||||
|
- f) Schwannceller
|
||||||
|
|
||||||
|
- JA, finn i CNS
|
||||||
|
- NEJ, finns inte i CNS
|
||||||
|
|
||||||
|
```spoiler-block:
|
||||||
|
a) JA, finn i CNS
|
||||||
|
b) JA, finn i CNS
|
||||||
|
c) NEJ, finns inte i CNS
|
||||||
|
d) JA, finn i CNS
|
||||||
|
e) JA, finn i CNS
|
||||||
|
f) NEJ, finns inte i CNS
|
||||||
|
```
|
||||||
|
"""
|
||||||
|
file_path = create_question_file("matching.md", content)
|
||||||
|
question = parse_question(file_path)
|
||||||
|
|
||||||
|
assert "frågetyp/matching" in question.metadata["tags"]
|
||||||
|
lists = [n for n in question.nodes if n.type == "list"]
|
||||||
|
assert len(lists) > 0
|
||||||
|
|
||||||
|
def test_parse_question_with_image(self, create_question_file):
|
||||||
|
"""Test parsing a question with embedded images"""
|
||||||
|
content = """---
|
||||||
|
tags: [ah2, provfråga, frågetyp/textfält, öra, anatomi, bild]
|
||||||
|
date: 2022-01-15
|
||||||
|
---
|
||||||
|
![[image-4.png|292x316]]
|
||||||
|
**Fyll i rätt siffra !**
|
||||||
|
|
||||||
|
(0.5p per rätt svar, inga avdrag för fel svar):
|
||||||
|
|
||||||
|
a) Vilken siffra pekar på incus? (1..19)
|
||||||
|
b) Vilken siffra pekar på tuba auditiva? (1..19)
|
||||||
|
|
||||||
|
```spoiler-block:
|
||||||
|
a) 7
|
||||||
|
b) 18
|
||||||
|
```
|
||||||
|
"""
|
||||||
|
file_path = create_question_file("image_q.md", content)
|
||||||
|
question = parse_question(file_path)
|
||||||
|
|
||||||
|
assert "bild" in question.metadata["tags"]
|
||||||
|
assert "![[image-4.png" in question.raw_content
|
||||||
|
embed = question.nodes[0].children[0]
|
||||||
|
assert embed.type == "embed"
|
||||||
|
assert embed.attrs == {
|
||||||
|
"filename": "image-4.png",
|
||||||
|
"width": 292,
|
||||||
|
"height": 316
|
||||||
|
}
|
||||||
|
|
||||||
|
@pytest.mark.parametrize("invalid_content", [
|
||||||
|
"", # Empty content
|
||||||
|
"No frontmatter", # No frontmatter
|
||||||
|
"---\n---\n", # Empty frontmatter
|
||||||
|
])
|
||||||
|
def test_parse_edge_cases(self, create_question_file, invalid_content):
|
||||||
|
"""Test parsing edge cases"""
|
||||||
|
file_path = create_question_file("edge.md", invalid_content)
|
||||||
|
question = parse_question(file_path)
|
||||||
|
assert isinstance(question, ParsedQuestion)
|
||||||
|
|
||||||
|
def test_parse_question_preserves_structure(self, create_question_file):
|
||||||
|
"""Test that parsing preserves the document structure"""
|
||||||
|
content = """---
|
||||||
|
tags: [ah2]
|
||||||
|
---
|
||||||
|
# Heading
|
||||||
|
|
||||||
|
Paragraph text
|
||||||
|
|
||||||
|
- List item 1
|
||||||
|
- List item 2
|
||||||
|
|
||||||
|
```spoiler-block:
|
||||||
|
Answer
|
||||||
|
```
|
||||||
|
"""
|
||||||
|
file_path = create_question_file("structure.md", content)
|
||||||
|
question = parse_question(file_path)
|
||||||
|
|
||||||
|
node_types = [n.type for n in question.nodes]
|
||||||
|
assert "heading" in node_types
|
||||||
|
assert "paragraph" in node_types
|
||||||
|
assert "list" in node_types
|
||||||
|
assert "block_code" in node_types
|
||||||
|
|
||||||
|
|
||||||
|
class TestParsedQuestionDataclass:
|
||||||
|
"""Test the ParsedQuestion dataclass"""
|
||||||
|
|
||||||
|
def test_parsed_question_defaults(self):
|
||||||
|
"""Test ParsedQuestion default values"""
|
||||||
|
question = ParsedQuestion()
|
||||||
|
assert question.metadata == {}
|
||||||
|
assert question.raw_content == ""
|
||||||
|
assert question.nodes == []
|
||||||
|
|
||||||
|
def test_parsed_question_initialization(self):
|
||||||
|
"""Test ParsedQuestion initialization with values"""
|
||||||
|
metadata = {"tags": ["test"], "date": "2022-01-15"}
|
||||||
|
content = "Test content"
|
||||||
|
nodes = [Node({"type": "paragraph"})]
|
||||||
|
|
||||||
|
question = ParsedQuestion(
|
||||||
|
metadata=metadata,
|
||||||
|
raw_content=content,
|
||||||
|
nodes=nodes
|
||||||
|
)
|
||||||
|
|
||||||
|
assert question.metadata == metadata
|
||||||
|
assert question.raw_content == content
|
||||||
|
assert question.nodes == nodes
|
||||||
|
|
||||||
|
|
||||||
|
class TestRealQuestions:
|
||||||
|
"""Test parsing real questions from the exam files"""
|
||||||
|
|
||||||
|
@pytest.fixture
|
||||||
|
def exam_dir(self):
|
||||||
|
"""Get the real exam directory"""
|
||||||
|
root = pathlib.Path(__file__).parent.parent.parent
|
||||||
|
exam_path = root / "content" / "Anatomi & Histologi 2" / "Gamla tentor"
|
||||||
|
if exam_path.exists():
|
||||||
|
return exam_path
|
||||||
|
pytest.skip("Exam directory not found")
|
||||||
|
|
||||||
|
@pytest.mark.parametrize("exam_date,question_num", [
|
||||||
|
("2022-01-15", "1"),
|
||||||
|
("2022-01-15", "2"),
|
||||||
|
("2022-01-15", "3"),
|
||||||
|
("2022-01-15", "4"),
|
||||||
|
("2022-06-01", "8"),
|
||||||
|
])
|
||||||
|
def test_parse_real_exam_questions(self, exam_dir, exam_date, question_num):
|
||||||
|
"""Test parsing real exam questions"""
|
||||||
|
file_path = exam_dir / exam_date / f"{question_num}.md"
|
||||||
|
if not file_path.exists():
|
||||||
|
pytest.skip(f"Question file {file_path} not found")
|
||||||
|
|
||||||
|
question = parse_question(file_path)
|
||||||
|
|
||||||
|
# Verify metadata exists and has required fields
|
||||||
|
assert "tags" in question.metadata
|
||||||
|
assert isinstance(question.metadata["tags"], list)
|
||||||
|
assert "ah2" in question.metadata["tags"]
|
||||||
|
assert "provfråga" in question.metadata["tags"]
|
||||||
|
|
||||||
|
# Verify content was parsed
|
||||||
|
assert len(question.raw_content) > 0
|
||||||
|
assert len(question.nodes) > 0
|
||||||
|
|
||||||
|
def test_parse_all_short_named_questions(self, exam_dir):
|
||||||
|
"""Test parsing all questions with short filenames (1-2 chars)"""
|
||||||
|
questions_found = 0
|
||||||
|
|
||||||
|
for file in sorted(exam_dir.glob("*/*.md")):
|
||||||
|
if len(file.stem) <= 2 and file.stem.isdigit():
|
||||||
|
question = parse_question(file)
|
||||||
|
assert isinstance(question, ParsedQuestion)
|
||||||
|
assert "tags" in question.metadata
|
||||||
|
questions_found += 1
|
||||||
|
|
||||||
|
# Ensure we found at least some questions
|
||||||
|
assert questions_found > 0, "No exam questions found to test"
|
||||||
|
|
||||||
|
|
||||||
|
class TestNodeTextExtraction:
|
||||||
|
"""Test text extraction from complex node structures"""
|
||||||
|
|
||||||
|
@pytest.mark.parametrize("token,expected_text", [
|
||||||
|
# Simple text
|
||||||
|
({"type": "text", "raw": "Hello"}, "Hello"),
|
||||||
|
|
||||||
|
# Paragraph with multiple text children
|
||||||
|
(
|
||||||
|
{
|
||||||
|
"type": "paragraph",
|
||||||
|
"children": [
|
||||||
|
{"type": "text", "raw": "A "},
|
||||||
|
{"type": "text", "raw": "B "},
|
||||||
|
{"type": "text", "raw": "C"},
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"A B C"
|
||||||
|
),
|
||||||
|
|
||||||
|
# Nested formatting
|
||||||
|
(
|
||||||
|
{
|
||||||
|
"type": "paragraph",
|
||||||
|
"children": [
|
||||||
|
{"type": "text", "raw": "Normal "},
|
||||||
|
{
|
||||||
|
"type": "emphasis",
|
||||||
|
"children": [{"type": "text", "raw": "italic"}]
|
||||||
|
},
|
||||||
|
{"type": "text", "raw": " "},
|
||||||
|
{
|
||||||
|
"type": "strong",
|
||||||
|
"children": [{"type": "text", "raw": "bold"}]
|
||||||
|
},
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"Normal italic bold"
|
||||||
|
),
|
||||||
|
|
||||||
|
# Empty node
|
||||||
|
({"type": "paragraph", "children": []}, ""),
|
||||||
|
])
|
||||||
|
def test_complex_text_extraction(self, token, expected_text):
|
||||||
|
"""Test text extraction from complex nested structures"""
|
||||||
|
node = Node(token)
|
||||||
|
assert node.text == expected_text
|
||||||
|
|
||||||
@@ -1,8 +1,13 @@
|
|||||||
import re
|
import re
|
||||||
from pathlib import Path
|
|
||||||
from collections import defaultdict
|
from collections import defaultdict
|
||||||
|
from datetime import datetime
|
||||||
|
from pathlib import Path
|
||||||
from typing import Tuple
|
from typing import Tuple
|
||||||
from quiz.models import Question, Option
|
|
||||||
|
from django.conf import settings
|
||||||
|
|
||||||
|
from quiz.models import Course, Exam, Question, Option
|
||||||
|
from quiz.utils.question_parser import parse_question_from_content, Node
|
||||||
|
|
||||||
|
|
||||||
class ImportStats:
|
class ImportStats:
|
||||||
@@ -70,44 +75,188 @@ class ImportStats:
|
|||||||
return "\n".join(lines)
|
return "\n".join(lines)
|
||||||
|
|
||||||
|
|
||||||
def parse_matching_question(content: str) -> Tuple[bool, dict]:
|
|
||||||
|
def parse_markdown_question(file_path: Path, content: str) -> Tuple[bool, dict]:
|
||||||
"""
|
"""
|
||||||
Parse matching question from markdown.
|
Parse a markdown file and extract question data using the new question_parser.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
(is_mcq, question_data) where question_data contains:
|
||||||
|
- text: question text
|
||||||
|
- options: list of (letter, text) tuples
|
||||||
|
- correct_answer: the correct answer letter(s)
|
||||||
|
- has_answer: whether it has an answer (not TODO)
|
||||||
|
- tags: list of tag strings
|
||||||
|
- question_type: type of question (mcq, scq, matching, etc.)
|
||||||
|
"""
|
||||||
|
# Parse from content string (works for both test cases and real files)
|
||||||
|
parsed = parse_question_from_content(content)
|
||||||
|
|
||||||
|
# Extract metadata
|
||||||
|
metadata = parsed.metadata
|
||||||
|
tags = metadata.get('tags', [])
|
||||||
|
|
||||||
|
# Check for question type in tags
|
||||||
|
question_type = None
|
||||||
|
is_question = False
|
||||||
|
|
||||||
|
for tag in tags:
|
||||||
|
if 'frågetyp/' in tag:
|
||||||
|
is_question = True
|
||||||
|
if 'frågetyp/mcq' in tag:
|
||||||
|
question_type = 'mcq'
|
||||||
|
elif 'frågetyp/scq' in tag:
|
||||||
|
question_type = 'scq'
|
||||||
|
elif 'frågetyp/matching' in tag:
|
||||||
|
question_type = 'matching'
|
||||||
|
elif 'frågetyp/textalternativ' in tag:
|
||||||
|
question_type = 'textalternativ'
|
||||||
|
elif 'frågetyp/textfält' in tag:
|
||||||
|
question_type = 'textfält'
|
||||||
|
|
||||||
|
if not is_question:
|
||||||
|
return False, {}
|
||||||
|
|
||||||
|
# Handle matching questions separately
|
||||||
|
if question_type == 'matching':
|
||||||
|
return parse_matching_question_from_nodes(parsed.nodes, tags)
|
||||||
|
|
||||||
|
# Extract question text from first paragraph (skip images and special instructions)
|
||||||
|
question_text = None
|
||||||
|
for node in parsed.nodes:
|
||||||
|
if node.type != "paragraph":
|
||||||
|
continue
|
||||||
|
text = node.text.strip()
|
||||||
|
# Skip empty paragraphs
|
||||||
|
if not text:
|
||||||
|
continue
|
||||||
|
|
||||||
|
# Remove inline images from text first
|
||||||
|
text = re.sub(r'!\[\[.*?\]\]', '', text).strip()
|
||||||
|
|
||||||
|
# Skip if paragraph was only an image reference
|
||||||
|
if not text:
|
||||||
|
continue
|
||||||
|
|
||||||
|
# Skip "Välj X alternativ" instructions
|
||||||
|
if 'Välj' in text and 'alternativ' in text:
|
||||||
|
continue
|
||||||
|
|
||||||
|
# Clean up bold markers
|
||||||
|
text = text.replace('**', '')
|
||||||
|
if text:
|
||||||
|
question_text = text
|
||||||
|
break
|
||||||
|
|
||||||
|
if not question_text:
|
||||||
|
return True, {
|
||||||
|
'text': None,
|
||||||
|
'options': [],
|
||||||
|
'correct_answer': '',
|
||||||
|
'has_answer': False,
|
||||||
|
'question_type': question_type,
|
||||||
|
'tags': tags
|
||||||
|
}
|
||||||
|
|
||||||
|
# Extract options from list nodes
|
||||||
|
options_data = []
|
||||||
|
|
||||||
|
for node in parsed.nodes:
|
||||||
|
if node.type != "list":
|
||||||
|
continue
|
||||||
|
for item in node.children:
|
||||||
|
# Get the text of the list item
|
||||||
|
if item.type != "list_item":
|
||||||
|
continue
|
||||||
|
item_text = item.text.strip()
|
||||||
|
|
||||||
|
# Match "A: text" or just "A"
|
||||||
|
match = re.match(r'^([A-Z]):\s*(.*)$', item_text)
|
||||||
|
if match:
|
||||||
|
letter = match.group(1)
|
||||||
|
text = match.group(2).strip()
|
||||||
|
options_data.append((letter, text))
|
||||||
|
elif re.match(r'^([A-Z])$', item_text):
|
||||||
|
letter = item_text
|
||||||
|
options_data.append((letter, ''))
|
||||||
|
elif question_type in ['textalternativ', 'textfält']:
|
||||||
|
# For text-based questions, use incrementing letters
|
||||||
|
if not re.match(r'^[a-z]\)', item_text): # Skip sub-question markers
|
||||||
|
letter = chr(ord('A') + len(options_data))
|
||||||
|
options_data.append((letter, item_text))
|
||||||
|
|
||||||
|
# For text-based questions, options are optional
|
||||||
|
if not options_data:
|
||||||
|
options_data = [('A', '')]
|
||||||
|
elif len(options_data) < 2 and question_type in ['mcq', 'scq']:
|
||||||
|
return True, {
|
||||||
|
'text': question_text,
|
||||||
|
'options': options_data,
|
||||||
|
'correct_answer': '',
|
||||||
|
'has_answer': False,
|
||||||
|
'question_type': question_type,
|
||||||
|
'tags': tags
|
||||||
|
}
|
||||||
|
|
||||||
|
# Extract answer from spoiler block
|
||||||
|
correct_answer = None
|
||||||
|
has_answer = False
|
||||||
|
|
||||||
|
for node in parsed.nodes:
|
||||||
|
if node.type == "block_code" and node.attrs.get("info") == "spoiler-block:":
|
||||||
|
answer_text = node.raw.strip()
|
||||||
|
|
||||||
|
# Check for TODO
|
||||||
|
if 'TODO' in answer_text.upper():
|
||||||
|
has_answer = False
|
||||||
|
else:
|
||||||
|
has_answer = True
|
||||||
|
|
||||||
|
# For MCQ/SCQ: Extract capital letters
|
||||||
|
if question_type in ['mcq', 'scq']:
|
||||||
|
letters = re.findall(r'\b([A-Z])\b', answer_text)
|
||||||
|
if letters:
|
||||||
|
correct_answer = ','.join(sorted(set(letters)))
|
||||||
|
else:
|
||||||
|
# For text-based questions: Store the full answer text
|
||||||
|
correct_answer = answer_text[:200] # Limit to 200 chars for database field
|
||||||
|
|
||||||
|
break
|
||||||
|
|
||||||
|
return True, {
|
||||||
|
'text': question_text,
|
||||||
|
'options': options_data,
|
||||||
|
'correct_answer': correct_answer,
|
||||||
|
'has_answer': has_answer,
|
||||||
|
'question_type': question_type,
|
||||||
|
'tags': tags
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def parse_matching_question_from_nodes(nodes: list[Node], tags: list) -> Tuple[bool, dict]:
|
||||||
|
"""
|
||||||
|
Parse matching question from parsed nodes.
|
||||||
|
|
||||||
Expected format:
|
Expected format:
|
||||||
- Two consecutive bullet lists (with "- " prefix)
|
- Two consecutive bullet lists
|
||||||
- First list = left column items (rows)
|
- First list = left column items (rows)
|
||||||
- Second list = top row items (columns)
|
- Second list = top row items (columns)
|
||||||
- Answer format: "LeftItem: TopItem" pairs
|
- Answer format: "LeftItem: TopItem" pairs
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
(is_matching, question_data) where question_data contains:
|
(is_matching, question_data)
|
||||||
- text: question text
|
|
||||||
- left_items: list of left column items
|
|
||||||
- top_items: list of top row items
|
|
||||||
- correct_pairs: list of [left_idx, top_idx] pairs (0-indexed)
|
|
||||||
- has_answer: whether it has an answer (not TODO)
|
|
||||||
- question_type: 'matching'
|
|
||||||
"""
|
"""
|
||||||
lines = content.split('\n')
|
# Extract question text
|
||||||
|
|
||||||
# Extract question text (first non-empty line after frontmatter)
|
|
||||||
question_text = None
|
question_text = None
|
||||||
in_frontmatter = False
|
for node in nodes:
|
||||||
frontmatter_done = False
|
if node.type == "paragraph":
|
||||||
|
text = node.text.strip()
|
||||||
for line in lines:
|
# Remove inline images
|
||||||
if line.strip() == '---':
|
text = re.sub(r'!\[\[.*?\]\]', '', text).strip()
|
||||||
if not in_frontmatter:
|
# Skip if empty after removing images
|
||||||
in_frontmatter = True
|
if not text:
|
||||||
else:
|
|
||||||
in_frontmatter = False
|
|
||||||
frontmatter_done = True
|
|
||||||
continue
|
continue
|
||||||
|
question_text = text.replace('**', '')
|
||||||
if frontmatter_done and line.strip() and not line.startswith('![['):
|
|
||||||
if not line.startswith('-') and not line.startswith('```'):
|
|
||||||
question_text = line.strip().replace('**', '')
|
|
||||||
break
|
break
|
||||||
|
|
||||||
if not question_text:
|
if not question_text:
|
||||||
@@ -117,94 +266,45 @@ def parse_matching_question(content: str) -> Tuple[bool, dict]:
|
|||||||
'top_items': [],
|
'top_items': [],
|
||||||
'correct_pairs': [],
|
'correct_pairs': [],
|
||||||
'has_answer': False,
|
'has_answer': False,
|
||||||
'question_type': 'matching'
|
'question_type': 'matching',
|
||||||
|
'tags': tags
|
||||||
}
|
}
|
||||||
|
|
||||||
# Extract two consecutive bullet lists
|
# Extract two consecutive lists
|
||||||
left_items = []
|
left_items = []
|
||||||
top_items = []
|
top_items = []
|
||||||
in_first_list = False
|
list_nodes = [node for node in nodes if node.type == "list"]
|
||||||
in_second_list = False
|
|
||||||
in_frontmatter = False
|
|
||||||
frontmatter_done = False
|
|
||||||
found_question_text = False
|
|
||||||
|
|
||||||
for line in lines:
|
if len(list_nodes) >= 2:
|
||||||
# Track frontmatter
|
# First list = left items
|
||||||
if line.strip() == '---':
|
for item in list_nodes[0].children:
|
||||||
if not in_frontmatter:
|
if item.type == "list_item":
|
||||||
in_frontmatter = True
|
left_items.append(item.text.strip())
|
||||||
else:
|
|
||||||
in_frontmatter = False
|
|
||||||
frontmatter_done = True
|
|
||||||
continue
|
|
||||||
|
|
||||||
if in_frontmatter or not frontmatter_done:
|
# Second list = top items
|
||||||
continue
|
for item in list_nodes[1].children:
|
||||||
|
if item.type == "list_item":
|
||||||
# Skip spoiler blocks
|
top_items.append(item.text.strip())
|
||||||
if line.strip().startswith('```'):
|
|
||||||
break
|
|
||||||
|
|
||||||
# Found question text
|
|
||||||
if not found_question_text and question_text in line:
|
|
||||||
found_question_text = True
|
|
||||||
continue
|
|
||||||
|
|
||||||
if not found_question_text:
|
|
||||||
continue
|
|
||||||
|
|
||||||
# Look for bullet lists
|
|
||||||
if line.strip().startswith('- '):
|
|
||||||
item = line.strip()[2:].strip()
|
|
||||||
if not item: # Empty bullet
|
|
||||||
continue
|
|
||||||
|
|
||||||
if not in_first_list and not in_second_list:
|
|
||||||
in_first_list = True
|
|
||||||
left_items.append(item)
|
|
||||||
elif in_first_list:
|
|
||||||
left_items.append(item)
|
|
||||||
elif in_second_list:
|
|
||||||
top_items.append(item)
|
|
||||||
elif line.strip() == '':
|
|
||||||
# Empty line - transition from first list to second
|
|
||||||
if in_first_list and left_items:
|
|
||||||
in_first_list = False
|
|
||||||
in_second_list = True
|
|
||||||
elif not line.strip().startswith('-') and (in_first_list or in_second_list):
|
|
||||||
# Non-bullet line after starting lists - end of lists
|
|
||||||
break
|
|
||||||
|
|
||||||
# Parse answer from spoiler block
|
# Parse answer from spoiler block
|
||||||
correct_pairs = []
|
correct_pairs = []
|
||||||
has_answer = False
|
has_answer = False
|
||||||
in_spoiler = False
|
|
||||||
answer_lines = []
|
|
||||||
|
|
||||||
for line in lines:
|
for node in nodes:
|
||||||
if line.strip().startswith('```spoiler-block'):
|
if node.type == "block_code" and node.attrs.get("info") == "spoiler-block:":
|
||||||
in_spoiler = True
|
answer_text = node.raw.strip()
|
||||||
continue
|
|
||||||
if in_spoiler:
|
|
||||||
if line.strip() == '```':
|
|
||||||
break
|
|
||||||
stripped = line.strip()
|
|
||||||
if stripped:
|
|
||||||
answer_lines.append(stripped)
|
|
||||||
|
|
||||||
if answer_lines:
|
|
||||||
full_answer = ' '.join(answer_lines)
|
|
||||||
|
|
||||||
# Check for TODO
|
# Check for TODO
|
||||||
if 'TODO' in full_answer.upper():
|
if 'TODO' in answer_text.upper():
|
||||||
has_answer = False
|
has_answer = False
|
||||||
else:
|
break
|
||||||
has_answer = True
|
has_answer = True
|
||||||
# Parse "Item: Match" format
|
# Parse "Item: Match" format
|
||||||
# Example: "Smak: Lobus Insularis"
|
answer_lines = answer_text.split('\n')
|
||||||
for line in answer_lines:
|
for line in answer_lines:
|
||||||
if ':' in line:
|
line = line.strip()
|
||||||
|
if ':' not in line:
|
||||||
|
continue
|
||||||
left_part, top_part = line.split(':', 1)
|
left_part, top_part = line.split(':', 1)
|
||||||
left_part = left_part.strip()
|
left_part = left_part.strip()
|
||||||
top_part = top_part.strip()
|
top_part = top_part.strip()
|
||||||
@@ -225,6 +325,7 @@ def parse_matching_question(content: str) -> Tuple[bool, dict]:
|
|||||||
|
|
||||||
if left_idx is not None and top_idx is not None:
|
if left_idx is not None and top_idx is not None:
|
||||||
correct_pairs.append([left_idx, top_idx])
|
correct_pairs.append([left_idx, top_idx])
|
||||||
|
break
|
||||||
|
|
||||||
return True, {
|
return True, {
|
||||||
'text': question_text,
|
'text': question_text,
|
||||||
@@ -232,215 +333,8 @@ def parse_matching_question(content: str) -> Tuple[bool, dict]:
|
|||||||
'top_items': top_items,
|
'top_items': top_items,
|
||||||
'correct_pairs': correct_pairs,
|
'correct_pairs': correct_pairs,
|
||||||
'has_answer': has_answer,
|
'has_answer': has_answer,
|
||||||
'question_type': 'matching'
|
'question_type': 'matching',
|
||||||
}
|
'tags': tags
|
||||||
|
|
||||||
|
|
||||||
def parse_markdown_question(file_path: Path, content: str) -> Tuple[bool, dict]:
|
|
||||||
"""
|
|
||||||
Parse a markdown file and extract question data.
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
(is_mcq, question_data) where question_data contains:
|
|
||||||
- text: question text
|
|
||||||
- options: list of (letter, text) tuples
|
|
||||||
- correct_answer: the correct answer letter(s)
|
|
||||||
- has_answer: whether it has an answer (not TODO)
|
|
||||||
- tags: list of tag strings
|
|
||||||
"""
|
|
||||||
lines = content.split('\n')
|
|
||||||
|
|
||||||
# Check for question tags in frontmatter
|
|
||||||
# Accept: frågetyp/mcq, frågetyp/scq, frågetyp/textalternativ, frågetyp/textfält
|
|
||||||
is_question = False
|
|
||||||
question_type = None
|
|
||||||
in_frontmatter = False
|
|
||||||
|
|
||||||
for line in lines:
|
|
||||||
if line.strip() == '---':
|
|
||||||
if in_frontmatter:
|
|
||||||
# End of frontmatter
|
|
||||||
in_frontmatter = False
|
|
||||||
break
|
|
||||||
else:
|
|
||||||
in_frontmatter = True
|
|
||||||
continue
|
|
||||||
|
|
||||||
if in_frontmatter:
|
|
||||||
if 'frågetyp/' in line:
|
|
||||||
is_question = True
|
|
||||||
# Extract question type
|
|
||||||
if 'frågetyp/mcq' in line:
|
|
||||||
question_type = 'mcq'
|
|
||||||
elif 'frågetyp/scq' in line:
|
|
||||||
question_type = 'scq'
|
|
||||||
elif 'frågetyp/matching' in line:
|
|
||||||
question_type = 'matching'
|
|
||||||
elif 'frågetyp/textalternativ' in line:
|
|
||||||
question_type = 'textalternativ'
|
|
||||||
elif 'frågetyp/textfält' in line:
|
|
||||||
question_type = 'textfält'
|
|
||||||
|
|
||||||
if line.strip().lower().startswith('tags:'):
|
|
||||||
# Extract tags
|
|
||||||
# Handle: tags: [tag1, tag2] or tags: tag1, tag2
|
|
||||||
tag_content = line.split(':', 1)[1].strip()
|
|
||||||
# Remove brackets if present
|
|
||||||
tag_content = tag_content.strip('[]')
|
|
||||||
# Split by comma
|
|
||||||
tags = [t.strip() for t in tag_content.split(',') if t.strip()]
|
|
||||||
|
|
||||||
# If it's a matching question, use the matching parser
|
|
||||||
if question_type == 'matching':
|
|
||||||
is_matching, matching_data = parse_matching_question(content)
|
|
||||||
if is_matching:
|
|
||||||
# Add tags to the data
|
|
||||||
matching_data['tags'] = tags if 'tags' in locals() else []
|
|
||||||
return True, matching_data
|
|
||||||
|
|
||||||
|
|
||||||
if not is_question:
|
|
||||||
return False, {}
|
|
||||||
|
|
||||||
# Extract question text (first non-empty line after frontmatter)
|
|
||||||
question_text = None
|
|
||||||
in_frontmatter = False
|
|
||||||
frontmatter_done = False
|
|
||||||
|
|
||||||
for line in lines:
|
|
||||||
if line.strip() == '---':
|
|
||||||
if not in_frontmatter:
|
|
||||||
in_frontmatter = True
|
|
||||||
else:
|
|
||||||
in_frontmatter = False
|
|
||||||
frontmatter_done = True
|
|
||||||
continue
|
|
||||||
|
|
||||||
if frontmatter_done and line.strip() and not line.startswith('![['):
|
|
||||||
# Skip "Välj ett/två alternativ:" lines
|
|
||||||
if 'Välj' in line and 'alternativ' in line:
|
|
||||||
continue
|
|
||||||
if not line.startswith('-') and not line.startswith('```'):
|
|
||||||
question_text = line.strip().replace('**', '')
|
|
||||||
break
|
|
||||||
|
|
||||||
# Return early if no question text found, but include has_answer field
|
|
||||||
if not question_text:
|
|
||||||
return True, {
|
|
||||||
'text': None,
|
|
||||||
'options': [],
|
|
||||||
'correct_answer': '',
|
|
||||||
'has_answer': False,
|
|
||||||
'question_type': question_type,
|
|
||||||
'tags': tags if 'tags' in locals() else []
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
# Extract options (pattern: "- A:" or "- A" for MCQ, or text for textalternativ)
|
|
||||||
options_data = []
|
|
||||||
in_frontmatter = False
|
|
||||||
frontmatter_done = False
|
|
||||||
in_spoiler = False
|
|
||||||
|
|
||||||
for line in lines:
|
|
||||||
# Track frontmatter to skip it
|
|
||||||
if line.strip() == '---':
|
|
||||||
if not in_frontmatter:
|
|
||||||
in_frontmatter = True
|
|
||||||
else:
|
|
||||||
in_frontmatter = False
|
|
||||||
frontmatter_done = True
|
|
||||||
continue
|
|
||||||
|
|
||||||
# Skip frontmatter and spoiler blocks
|
|
||||||
if in_frontmatter or not frontmatter_done:
|
|
||||||
continue
|
|
||||||
|
|
||||||
if line.strip().startswith('```spoiler-block:'):
|
|
||||||
in_spoiler = True
|
|
||||||
continue
|
|
||||||
if in_spoiler:
|
|
||||||
if line.strip() == '```':
|
|
||||||
in_spoiler = False
|
|
||||||
continue
|
|
||||||
|
|
||||||
# Match "- A: text" or "- A: " or just "- A"
|
|
||||||
match = re.match(r'^-\s*([A-Z]):\s*(.*)$', line.strip())
|
|
||||||
if not match:
|
|
||||||
# Also try "- A" without colon
|
|
||||||
match = re.match(r'^-\s*([A-Z])$', line.strip())
|
|
||||||
|
|
||||||
if match:
|
|
||||||
letter = match.group(1)
|
|
||||||
text = match.group(2) if len(match.groups()) > 1 else ""
|
|
||||||
options_data.append((letter, text.strip()))
|
|
||||||
else:
|
|
||||||
# For textalternativ, options might be plain text items
|
|
||||||
if question_type in ['textalternativ', 'textfält'] and line.strip().startswith('-') and not line.strip().startswith('--'):
|
|
||||||
# Extract text after dash
|
|
||||||
option_text = line.strip()[1:].strip()
|
|
||||||
# Skip if it's a sub-question marker like "a)" or "b)"
|
|
||||||
if option_text and not re.match(r'^[a-z]\)', option_text):
|
|
||||||
# Use incrementing letters for text options
|
|
||||||
letter = chr(ord('A') + len(options_data))
|
|
||||||
options_data.append((letter, option_text))
|
|
||||||
|
|
||||||
# For text-based questions, options are optional
|
|
||||||
if not options_data:
|
|
||||||
# At least return something for single-option questions
|
|
||||||
options_data = [('A', '')]
|
|
||||||
elif len(options_data) < 2 and question_type in ['mcq', 'scq']:
|
|
||||||
return True, {
|
|
||||||
'text': question_text,
|
|
||||||
'options': options_data,
|
|
||||||
'correct_answer': '',
|
|
||||||
'has_answer': False,
|
|
||||||
'question_type': question_type
|
|
||||||
}
|
|
||||||
|
|
||||||
# Extract answer from spoiler block
|
|
||||||
correct_answer = None
|
|
||||||
has_answer = False
|
|
||||||
in_spoiler = False
|
|
||||||
answer_lines = []
|
|
||||||
|
|
||||||
for line in lines:
|
|
||||||
if line.strip().startswith('```spoiler-block:'):
|
|
||||||
in_spoiler = True
|
|
||||||
continue
|
|
||||||
if in_spoiler:
|
|
||||||
if line.strip() == '```':
|
|
||||||
break
|
|
||||||
stripped = line.strip()
|
|
||||||
if stripped:
|
|
||||||
answer_lines.append(stripped)
|
|
||||||
|
|
||||||
# Process collected answer lines
|
|
||||||
if answer_lines:
|
|
||||||
full_answer = ' '.join(answer_lines)
|
|
||||||
|
|
||||||
# Check for TODO
|
|
||||||
if 'TODO' in full_answer.upper():
|
|
||||||
has_answer = False
|
|
||||||
else:
|
|
||||||
has_answer = True
|
|
||||||
|
|
||||||
# For MCQ/SCQ: Extract capital letters
|
|
||||||
if question_type in ['mcq', 'scq']:
|
|
||||||
letters = re.findall(r'\b([A-Z])\b', full_answer)
|
|
||||||
if letters:
|
|
||||||
correct_answer = ','.join(sorted(set(letters)))
|
|
||||||
else:
|
|
||||||
# For text-based questions: Store the full answer text
|
|
||||||
correct_answer = full_answer[:200] # Limit to 200 chars for database field
|
|
||||||
|
|
||||||
return True, {
|
|
||||||
'text': question_text,
|
|
||||||
'options': options_data,
|
|
||||||
'correct_answer': correct_answer,
|
|
||||||
'has_answer': has_answer,
|
|
||||||
'question_type': question_type,
|
|
||||||
'tags': tags if 'tags' in locals() else []
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@@ -460,7 +354,6 @@ def import_question_file(file_path: Path, base_path: Path, stats: ImportStats, f
|
|||||||
file_mtime = file_path.stat().st_mtime
|
file_mtime = file_path.stat().st_mtime
|
||||||
|
|
||||||
# Calculate path relative to project root
|
# Calculate path relative to project root
|
||||||
from django.conf import settings
|
|
||||||
project_root = settings.BASE_DIR.parent
|
project_root = settings.BASE_DIR.parent
|
||||||
try:
|
try:
|
||||||
file_path_str = str(file_path.relative_to(project_root))
|
file_path_str = str(file_path.relative_to(project_root))
|
||||||
@@ -518,9 +411,6 @@ def import_question_file(file_path: Path, base_path: Path, stats: ImportStats, f
|
|||||||
# Try to parse as date
|
# Try to parse as date
|
||||||
if exam_folder and '-' in exam_folder:
|
if exam_folder and '-' in exam_folder:
|
||||||
try:
|
try:
|
||||||
from datetime import datetime
|
|
||||||
from quiz.models import Course, Exam
|
|
||||||
|
|
||||||
exam_date = datetime.strptime(exam_folder, '%Y-%m-%d').date()
|
exam_date = datetime.strptime(exam_folder, '%Y-%m-%d').date()
|
||||||
|
|
||||||
# Get or create course (default to "Anatomi & Histologi 2")
|
# Get or create course (default to "Anatomi & Histologi 2")
|
||||||
@@ -610,17 +500,6 @@ def import_question_file(file_path: Path, base_path: Path, stats: ImportStats, f
|
|||||||
|
|
||||||
|
|
||||||
def import_questions(folder_path: Path, base_path: Path = None, force: bool = False) -> ImportStats:
|
def import_questions(folder_path: Path, base_path: Path = None, force: bool = False) -> ImportStats:
|
||||||
"""
|
|
||||||
Import all questions from a folder.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
folder_path: Path to the folder containing question markdown files
|
|
||||||
base_path: Base path for relative path calculations (defaults to folder_path)
|
|
||||||
force: If True, import all files regardless of mtime (for initial import)
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
ImportStats object with import statistics
|
|
||||||
"""
|
|
||||||
if base_path is None:
|
if base_path is None:
|
||||||
base_path = folder_path
|
base_path = folder_path
|
||||||
|
|
||||||
@@ -634,9 +513,7 @@ def import_questions(folder_path: Path, base_path: Path = None, force: bool = Fa
|
|||||||
|
|
||||||
|
|
||||||
def delete_question_by_path(file_path: Path):
|
def delete_question_by_path(file_path: Path):
|
||||||
"""Delete a question from the database by file path"""
|
|
||||||
try:
|
try:
|
||||||
from django.conf import settings
|
|
||||||
project_root = settings.BASE_DIR.parent
|
project_root = settings.BASE_DIR.parent
|
||||||
file_path_str = str(file_path.relative_to(project_root))
|
file_path_str = str(file_path.relative_to(project_root))
|
||||||
deleted_count, _ = Question.objects.filter(file_path=file_path_str).delete()
|
deleted_count, _ = Question.objects.filter(file_path=file_path_str).delete()
|
||||||
|
|||||||
38
quiz/quiz/utils/obsidian_embed_plugin.py
Normal file
38
quiz/quiz/utils/obsidian_embed_plugin.py
Normal file
@@ -0,0 +1,38 @@
|
|||||||
|
__all__ = ["obsidian_embed"]
|
||||||
|
|
||||||
|
# https://help.obsidian.md/embeds
|
||||||
|
|
||||||
|
# Supported:
|
||||||
|
# ![[image-4.png|292x316]]
|
||||||
|
def parse_embed(inline, match, state):
|
||||||
|
filename = match.group("filename")
|
||||||
|
attrs = {}
|
||||||
|
if "|" in filename:
|
||||||
|
filename, size = filename.split("|", 1)
|
||||||
|
else:
|
||||||
|
size = None
|
||||||
|
attrs["filename"] = filename
|
||||||
|
if size:
|
||||||
|
if "x" in size:
|
||||||
|
width, height = size.split("x", 1)
|
||||||
|
if width:
|
||||||
|
attrs["width"] = int(width)
|
||||||
|
if height:
|
||||||
|
attrs["height"] = int(height)
|
||||||
|
else:
|
||||||
|
attrs["width"] = int(size)
|
||||||
|
state.append_token({"type": "embed", "attrs": attrs})
|
||||||
|
return match.end()
|
||||||
|
|
||||||
|
|
||||||
|
INLINE_EMBED_PATTERN = (
|
||||||
|
r'!\[\[' # begins with ![
|
||||||
|
r'(?!\s)' # not whitespace
|
||||||
|
r'(?P<filename>.+?)' # content between `![[xx]]`
|
||||||
|
r'(?!\s)' # not whitespace
|
||||||
|
r'\]\]' # closing ]
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def obsidian_embed(md: "Markdown") -> None:
|
||||||
|
md.inline.register('embed', INLINE_EMBED_PATTERN, parse_embed, before="link")
|
||||||
89
quiz/quiz/utils/question_parser.py
Normal file
89
quiz/quiz/utils/question_parser.py
Normal file
@@ -0,0 +1,89 @@
|
|||||||
|
import dataclasses
|
||||||
|
import pathlib
|
||||||
|
|
||||||
|
import frontmatter
|
||||||
|
import mistune
|
||||||
|
|
||||||
|
from quiz.utils.obsidian_embed_plugin import obsidian_embed
|
||||||
|
|
||||||
|
markdown = mistune.create_markdown(renderer="ast", plugins=[obsidian_embed])
|
||||||
|
|
||||||
|
|
||||||
|
class Node:
|
||||||
|
def __init__(self, token):
|
||||||
|
self.type = token["type"]
|
||||||
|
self.raw = token.get("raw", "")
|
||||||
|
self.attrs = token.get("attrs", {})
|
||||||
|
self.children = [Node(token=child) for child in token.get("children", [])]
|
||||||
|
|
||||||
|
def __repr__(self) -> str:
|
||||||
|
attrs = []
|
||||||
|
if self.raw:
|
||||||
|
attrs.append(f"raw={self.raw!r}")
|
||||||
|
if self.attrs:
|
||||||
|
attrs.append(f"attrs={self.attrs!r}")
|
||||||
|
if self.children:
|
||||||
|
attrs.append(f"children={self.children!r}")
|
||||||
|
# block_text -> BlockText
|
||||||
|
pretty = self.type.replace("_", " ").title().replace(" ", "")
|
||||||
|
return f"{pretty}(" + ", ".join(attrs) + ")"
|
||||||
|
|
||||||
|
@property
|
||||||
|
def text(self) -> str:
|
||||||
|
if self.type == "text":
|
||||||
|
return self.raw
|
||||||
|
texts = []
|
||||||
|
for child in self.children:
|
||||||
|
texts.append(child.text)
|
||||||
|
return "".join(texts)
|
||||||
|
|
||||||
|
|
||||||
|
@dataclasses.dataclass
|
||||||
|
class ParsedQuestion:
|
||||||
|
metadata: dict = dataclasses.field(default_factory=dict)
|
||||||
|
raw_content: str = ""
|
||||||
|
nodes: list[Node] = dataclasses.field(default_factory=list)
|
||||||
|
|
||||||
|
|
||||||
|
def parse_question(path: pathlib.Path):
|
||||||
|
raw = path.read_text(encoding="utf-8")
|
||||||
|
return parse_question_from_content(raw)
|
||||||
|
|
||||||
|
|
||||||
|
def parse_question_from_content(content_str: str):
|
||||||
|
"""Parse question from a content string instead of a file."""
|
||||||
|
metadata, content = frontmatter.parse(content_str)
|
||||||
|
tokens = markdown(content)
|
||||||
|
question = ParsedQuestion(
|
||||||
|
metadata=metadata,
|
||||||
|
raw_content=content,
|
||||||
|
nodes=[Node(token=token) for token in tokens],
|
||||||
|
)
|
||||||
|
return question
|
||||||
|
|
||||||
|
|
||||||
|
def main():
|
||||||
|
root = pathlib.Path(__file__).parent.parent.parent.parent
|
||||||
|
print(root)
|
||||||
|
exams = root / "content" / "Anatomi & Histologi 2" / "Gamla tentor"
|
||||||
|
for file in sorted(exams.glob("*/*.md")):
|
||||||
|
if len(file.stem) > 2:
|
||||||
|
continue
|
||||||
|
question = parse_question(file)
|
||||||
|
print(question.metadata, repr(question.raw_content))
|
||||||
|
continue
|
||||||
|
for node in question.nodes:
|
||||||
|
match node.type:
|
||||||
|
case "heading":
|
||||||
|
print("Heading:", repr(node.text))
|
||||||
|
case "paragraph":
|
||||||
|
print("Paragraph:", repr(node.text))
|
||||||
|
case "list":
|
||||||
|
print("List:")
|
||||||
|
for child in node.children:
|
||||||
|
print(" - List item:", repr(child.text))
|
||||||
|
case "block_code" if node.attrs["info"] == "spoiler-block:":
|
||||||
|
print("Spoiler:", repr(node.raw.rstrip()))
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
Reference in New Issue
Block a user