1
0
Files
medical-notes/stroma/quiz/utils/tests/test_question_parser.py
Johan Dahlin 50366b9b9c
All checks were successful
Deploy Quartz site to GitHub Pages / build (push) Successful in 2m29s
vault backup: 2025-12-26 02:09:22
2025-12-26 02:09:22 +01:00

538 lines
16 KiB
Python

"""
Comprehensive test suite for the question_parser module.
This test suite uses pytest's parametrize decorator to test multiple scenarios
with minimal code duplication. It covers:
1. Node class:
- Initialization with different token types
- Attribute handling
- Children node processing
- String representation (__repr__)
- Text extraction from nested structures
2. parse_question function:
- Metadata parsing (tags, dates, etc.)
- Raw content extraction
- Different question types (MCQ, SCQ, text field, matching)
- Questions with images
- Edge cases (empty content, missing frontmatter)
- Document structure preservation
3. ParsedQuestion dataclass:
- Default values
- Initialization with custom values
4. Real exam questions:
- Parsing actual exam questions from the content directory
- Validation of all short-named question files
Test execution:
pytest tests/test_question_parser.py -v # Verbose output
pytest tests/test_question_parser.py -k "mcq" # Run only MCQ tests
pytest tests/test_question_parser.py --collect-only # List all tests
"""
import pathlib
import tempfile
import pytest
from quiz.utils.question_parser import Node, ParsedQuestion, parse_question
@pytest.fixture
def temp_dir():
"""Create a temporary directory for test files"""
with tempfile.TemporaryDirectory() as tmpdir:
yield pathlib.Path(tmpdir)
@pytest.fixture
def create_question_file(temp_dir):
"""Factory fixture to create question files"""
def _create_file(filename: str, content: str) -> pathlib.Path:
file_path = temp_dir / filename
file_path.write_text(content, encoding="utf-8")
return file_path
return _create_file
class TestNode:
"""Test the Node class"""
@pytest.mark.parametrize("token,expected_type,expected_raw", [
({"type": "paragraph"}, "paragraph", ""),
({"type": "heading", "raw": "Test Heading"}, "heading", "Test Heading"),
({"type": "text", "raw": "Some text"}, "text", "Some text"),
({"type": "list"}, "list", ""),
])
def test_node_initialization(self, token, expected_type, expected_raw):
"""Test Node initialization with different token types"""
node = Node(token)
assert node.type == expected_type
assert node.raw == expected_raw
@pytest.mark.parametrize("token,expected_attrs", [
({"type": "block_code", "attrs": {"info": "spoiler-block:"}}, {"info": "spoiler-block:"}),
({"type": "paragraph"}, {}),
({"type": "heading", "attrs": {"level": 2}}, {"level": 2}),
])
def test_node_attributes(self, token, expected_attrs):
"""Test Node attributes handling"""
node = Node(token)
assert node.attrs == expected_attrs
def test_node_children(self):
"""Test Node children handling"""
token = {
"type": "paragraph",
"children": [
{"type": "text", "raw": "Hello "},
{"type": "text", "raw": "World"},
]
}
node = Node(token)
assert len(node.children) == 2
assert node.children[0].type == "text"
assert node.children[0].raw == "Hello "
assert node.children[1].type == "text"
assert node.children[1].raw == "World"
@pytest.mark.parametrize("token,expected_repr_contains", [
({"type": "text", "raw": "test"}, "Text(raw='test')"),
({"type": "paragraph"}, "Paragraph()"),
({"type": "block_code", "attrs": {"info": "python"}}, "BlockCode(attrs={'info': 'python'})"),
])
def test_node_repr(self, token, expected_repr_contains):
"""Test Node __repr__ method"""
node = Node(token)
assert repr(node) == expected_repr_contains
@pytest.mark.parametrize("token,expected_text", [
({"type": "text", "raw": "Simple text"}, "Simple text"),
(
{
"type": "paragraph",
"children": [
{"type": "text", "raw": "Hello "},
{"type": "text", "raw": "World"},
]
},
"Hello World"
),
(
{
"type": "paragraph",
"children": [
{"type": "text", "raw": "Nested "},
{
"type": "strong",
"children": [{"type": "text", "raw": "bold"}]
},
{"type": "text", "raw": " text"},
]
},
"Nested bold text"
),
])
def test_node_text_property(self, token, expected_text):
"""Test Node text property extraction"""
node = Node(token)
assert node.text == expected_text
class TestParseQuestion:
"""Test the parse_question function"""
@pytest.mark.parametrize("content,expected_tags", [
(
"""---
tags: [ah2, provfråga, frågetyp/mcq]
date: 2022-01-15
---
Question content""",
["ah2", "provfråga", "frågetyp/mcq"]
),
(
"""---
tags:
- ah2
- provfråga
- frågetyp/scq
date: 2023-05-31
---
Question content""",
["ah2", "provfråga", "frågetyp/scq"]
),
])
def test_parse_metadata_tags(self, create_question_file, content, expected_tags):
"""Test parsing of metadata tags in different formats"""
file_path = create_question_file("test.md", content)
question = parse_question(file_path)
assert question.metadata["tags"] == expected_tags
@pytest.mark.parametrize("content,expected_date", [
(
"""---
tags: [ah2]
date: 2022-01-15
---
Content""",
"2022-01-15"
),
(
"""---
tags: [ah2]
date: 2023-05-31
---
Content""",
"2023-05-31"
),
])
def test_parse_metadata_date(self, create_question_file, content, expected_date):
"""Test parsing of metadata date"""
file_path = create_question_file("test.md", content)
question = parse_question(file_path)
assert str(question.metadata["date"]) == expected_date
@pytest.mark.parametrize("content,expected_raw", [
(
"""---
tags: [ah2]
---
Simple question""",
"Simple question"
),
(
"""---
tags: [ah2]
---
Question with **bold** text""",
"Question with **bold** text"
),
])
def test_parse_raw_content(self, create_question_file, content, expected_raw):
"""Test parsing of raw content"""
file_path = create_question_file("test.md", content)
question = parse_question(file_path)
assert question.raw_content.strip() == expected_raw
def test_parse_mcq_question(self, create_question_file):
"""Test parsing a complete MCQ question"""
content = """---
tags: [ah2, provfråga, frågetyp/mcq, cerebrum]
date: 2022-01-15
---
Vilka av följande räknas till storhjärnans basala kärnor?
**Välj två alternativ**
- A: Putamen
- B: Nucleus Ruber
- C: Substantia nigra
- D: Nucleus caudatus
```spoiler-block:
A och D
```
"""
file_path = create_question_file("mcq.md", content)
question = parse_question(file_path)
assert question.metadata["tags"] == ["ah2", "provfråga", "frågetyp/mcq", "cerebrum"]
assert len(question.nodes) > 0
# Find paragraph nodes
paragraphs = [n for n in question.nodes if n.type == "paragraph"]
assert len(paragraphs) > 0
# Find list nodes
lists = [n for n in question.nodes if n.type == "list"]
assert len(lists) > 0
# Find spoiler block
code_blocks = [n for n in question.nodes if n.type == "block_code"]
assert len(code_blocks) > 0
spoiler = code_blocks[0]
assert spoiler.attrs.get("info") == "spoiler-block:"
assert "A och D" in spoiler.raw
def test_parse_scq_question(self, create_question_file):
"""Test parsing a single choice question"""
content = """---
tags: [ah2, provfråga, frågetyp/scq, histologi]
date: 2022-06-01
---
Vilken del av CNS syns i bild?
- A: Cerebellum
- B: Diencephalon
- C: Medulla spinalis
- D: Cerebrum
- E: Pons
```spoiler-block:
A
```
"""
file_path = create_question_file("scq.md", content)
question = parse_question(file_path)
assert "frågetyp/scq" in question.metadata["tags"]
lists = [n for n in question.nodes if n.type == "list"]
assert len(lists) > 0
def test_parse_text_field_question(self, create_question_file):
"""Test parsing a text field question"""
content = """---
tags: [ah2, provfråga, frågetyp/textfält, öga, anatomi]
date: 2022-01-15
---
![[image-2.png|301x248]]
**Fyll i rätt siffra!**
(0.5p per rätt svar, inga avdrag för fel svar):
a) Vilken siffra pekar på gula fläcken?
b) Vilken siffra pekar på choroidea?
```spoiler-block:
a) 7
b) 6
```
"""
file_path = create_question_file("textfield.md", content)
question = parse_question(file_path)
assert "frågetyp/textfält" in question.metadata["tags"]
assert len(question.nodes) > 0
def test_parse_matching_question(self, create_question_file):
"""Test parsing a matching question"""
content = """---
tags: [ah2, provfråga, frågetyp/matching, histologi]
date: 2023-05-31
---
Vilka av följande stödjeceller finns i CNS? Markera JA eller NEJ för varje angiven celltyp:
(1p för alla rätt, inga delpoäng)
- a) oligodendrocyter
- b) Astrocyter
- c) satellitceller
- d) ependymceller
- e) mikroglia
- f) Schwannceller
- JA, finn i CNS
- NEJ, finns inte i CNS
```spoiler-block:
a) JA, finn i CNS
b) JA, finn i CNS
c) NEJ, finns inte i CNS
d) JA, finn i CNS
e) JA, finn i CNS
f) NEJ, finns inte i CNS
```
"""
file_path = create_question_file("matching.md", content)
question = parse_question(file_path)
assert "frågetyp/matching" in question.metadata["tags"]
lists = [n for n in question.nodes if n.type == "list"]
assert len(lists) > 0
def test_parse_question_with_image(self, create_question_file):
"""Test parsing a question with embedded images"""
content = """---
tags: [ah2, provfråga, frågetyp/textfält, öra, anatomi, bild]
date: 2022-01-15
---
![[image-4.png|292x316]]
**Fyll i rätt siffra !**
(0.5p per rätt svar, inga avdrag för fel svar):
a) Vilken siffra pekar på incus? (1..19)
b) Vilken siffra pekar på tuba auditiva? (1..19)
```spoiler-block:
a) 7
b) 18
```
"""
file_path = create_question_file("image_q.md", content)
question = parse_question(file_path)
assert "bild" in question.metadata["tags"]
assert "![[image-4.png" in question.raw_content
embed = question.nodes[0].children[0]
assert embed.type == "embed"
assert embed.attrs == {
"filename": "image-4.png",
"width": 292,
"height": 316
}
@pytest.mark.parametrize("invalid_content", [
"", # Empty content
"No frontmatter", # No frontmatter
"---\n---\n", # Empty frontmatter
])
def test_parse_edge_cases(self, create_question_file, invalid_content):
"""Test parsing edge cases"""
file_path = create_question_file("edge.md", invalid_content)
question = parse_question(file_path)
assert isinstance(question, ParsedQuestion)
def test_parse_question_preserves_structure(self, create_question_file):
"""Test that parsing preserves the document structure"""
content = """---
tags: [ah2]
---
# Heading
Paragraph text
- List item 1
- List item 2
```spoiler-block:
Answer
```
"""
file_path = create_question_file("structure.md", content)
question = parse_question(file_path)
node_types = [n.type for n in question.nodes]
assert "heading" in node_types
assert "paragraph" in node_types
assert "list" in node_types
assert "block_code" in node_types
class TestParsedQuestionDataclass:
"""Test the ParsedQuestion dataclass"""
def test_parsed_question_defaults(self):
"""Test ParsedQuestion default values"""
question = ParsedQuestion()
assert question.metadata == {}
assert question.raw_content == ""
assert question.nodes == []
def test_parsed_question_initialization(self):
"""Test ParsedQuestion initialization with values"""
metadata = {"tags": ["test"], "date": "2022-01-15"}
content = "Test content"
nodes = [Node({"type": "paragraph"})]
question = ParsedQuestion(
metadata=metadata,
raw_content=content,
nodes=nodes
)
assert question.metadata == metadata
assert question.raw_content == content
assert question.nodes == nodes
class TestRealQuestions:
"""Test parsing real questions from the exam files"""
@pytest.fixture
def exam_dir(self):
"""Get the real exam directory"""
root = pathlib.Path(__file__).parent.parent.parent
exam_path = root / "content" / "Anatomi & Histologi 2" / "Gamla tentor"
if exam_path.exists():
return exam_path
pytest.skip("Exam directory not found")
@pytest.mark.parametrize("exam_date,question_num", [
("2022-01-15", "1"),
("2022-01-15", "2"),
("2022-01-15", "3"),
("2022-01-15", "4"),
("2022-06-01", "8"),
])
def test_parse_real_exam_questions(self, exam_dir, exam_date, question_num):
"""Test parsing real exam questions"""
file_path = exam_dir / exam_date / f"{question_num}.md"
if not file_path.exists():
pytest.skip(f"Question file {file_path} not found")
question = parse_question(file_path)
# Verify metadata exists and has required fields
assert "tags" in question.metadata
assert isinstance(question.metadata["tags"], list)
assert "ah2" in question.metadata["tags"]
assert "provfråga" in question.metadata["tags"]
# Verify content was parsed
assert len(question.raw_content) > 0
assert len(question.nodes) > 0
def test_parse_all_short_named_questions(self, exam_dir):
"""Test parsing all questions with short filenames (1-2 chars)"""
questions_found = 0
for file in sorted(exam_dir.glob("*/*.md")):
if len(file.stem) <= 2 and file.stem.isdigit():
question = parse_question(file)
assert isinstance(question, ParsedQuestion)
assert "tags" in question.metadata
questions_found += 1
# Ensure we found at least some questions
assert questions_found > 0, "No exam questions found to test"
class TestNodeTextExtraction:
"""Test text extraction from complex node structures"""
@pytest.mark.parametrize("token,expected_text", [
# Simple text
({"type": "text", "raw": "Hello"}, "Hello"),
# Paragraph with multiple text children
(
{
"type": "paragraph",
"children": [
{"type": "text", "raw": "A "},
{"type": "text", "raw": "B "},
{"type": "text", "raw": "C"},
]
},
"A B C"
),
# Nested formatting
(
{
"type": "paragraph",
"children": [
{"type": "text", "raw": "Normal "},
{
"type": "emphasis",
"children": [{"type": "text", "raw": "italic"}]
},
{"type": "text", "raw": " "},
{
"type": "strong",
"children": [{"type": "text", "raw": "bold"}]
},
]
},
"Normal italic bold"
),
# Empty node
({"type": "paragraph", "children": []}, ""),
])
def test_complex_text_extraction(self, token, expected_text):
"""Test text extraction from complex nested structures"""
node = Node(token)
assert node.text == expected_text