All checks were successful
Deploy Quartz site to GitHub Pages / build (push) Successful in 2m29s
538 lines
16 KiB
Python
538 lines
16 KiB
Python
"""
|
|
Comprehensive test suite for the question_parser module.
|
|
|
|
This test suite uses pytest's parametrize decorator to test multiple scenarios
|
|
with minimal code duplication. It covers:
|
|
|
|
1. Node class:
|
|
- Initialization with different token types
|
|
- Attribute handling
|
|
- Children node processing
|
|
- String representation (__repr__)
|
|
- Text extraction from nested structures
|
|
|
|
2. parse_question function:
|
|
- Metadata parsing (tags, dates, etc.)
|
|
- Raw content extraction
|
|
- Different question types (MCQ, SCQ, text field, matching)
|
|
- Questions with images
|
|
- Edge cases (empty content, missing frontmatter)
|
|
- Document structure preservation
|
|
|
|
3. ParsedQuestion dataclass:
|
|
- Default values
|
|
- Initialization with custom values
|
|
|
|
4. Real exam questions:
|
|
- Parsing actual exam questions from the content directory
|
|
- Validation of all short-named question files
|
|
|
|
Test execution:
|
|
pytest tests/test_question_parser.py -v # Verbose output
|
|
pytest tests/test_question_parser.py -k "mcq" # Run only MCQ tests
|
|
pytest tests/test_question_parser.py --collect-only # List all tests
|
|
"""
|
|
|
|
import pathlib
|
|
import tempfile
|
|
import pytest
|
|
from quiz.utils.question_parser import Node, ParsedQuestion, parse_question
|
|
|
|
|
|
@pytest.fixture
|
|
def temp_dir():
|
|
"""Create a temporary directory for test files"""
|
|
with tempfile.TemporaryDirectory() as tmpdir:
|
|
yield pathlib.Path(tmpdir)
|
|
|
|
|
|
@pytest.fixture
|
|
def create_question_file(temp_dir):
|
|
"""Factory fixture to create question files"""
|
|
def _create_file(filename: str, content: str) -> pathlib.Path:
|
|
file_path = temp_dir / filename
|
|
file_path.write_text(content, encoding="utf-8")
|
|
return file_path
|
|
return _create_file
|
|
|
|
|
|
class TestNode:
|
|
"""Test the Node class"""
|
|
|
|
@pytest.mark.parametrize("token,expected_type,expected_raw", [
|
|
({"type": "paragraph"}, "paragraph", ""),
|
|
({"type": "heading", "raw": "Test Heading"}, "heading", "Test Heading"),
|
|
({"type": "text", "raw": "Some text"}, "text", "Some text"),
|
|
({"type": "list"}, "list", ""),
|
|
])
|
|
def test_node_initialization(self, token, expected_type, expected_raw):
|
|
"""Test Node initialization with different token types"""
|
|
node = Node(token)
|
|
assert node.type == expected_type
|
|
assert node.raw == expected_raw
|
|
|
|
@pytest.mark.parametrize("token,expected_attrs", [
|
|
({"type": "block_code", "attrs": {"info": "spoiler-block:"}}, {"info": "spoiler-block:"}),
|
|
({"type": "paragraph"}, {}),
|
|
({"type": "heading", "attrs": {"level": 2}}, {"level": 2}),
|
|
])
|
|
def test_node_attributes(self, token, expected_attrs):
|
|
"""Test Node attributes handling"""
|
|
node = Node(token)
|
|
assert node.attrs == expected_attrs
|
|
|
|
def test_node_children(self):
|
|
"""Test Node children handling"""
|
|
token = {
|
|
"type": "paragraph",
|
|
"children": [
|
|
{"type": "text", "raw": "Hello "},
|
|
{"type": "text", "raw": "World"},
|
|
]
|
|
}
|
|
node = Node(token)
|
|
assert len(node.children) == 2
|
|
assert node.children[0].type == "text"
|
|
assert node.children[0].raw == "Hello "
|
|
assert node.children[1].type == "text"
|
|
assert node.children[1].raw == "World"
|
|
|
|
@pytest.mark.parametrize("token,expected_repr_contains", [
|
|
({"type": "text", "raw": "test"}, "Text(raw='test')"),
|
|
({"type": "paragraph"}, "Paragraph()"),
|
|
({"type": "block_code", "attrs": {"info": "python"}}, "BlockCode(attrs={'info': 'python'})"),
|
|
])
|
|
def test_node_repr(self, token, expected_repr_contains):
|
|
"""Test Node __repr__ method"""
|
|
node = Node(token)
|
|
assert repr(node) == expected_repr_contains
|
|
|
|
@pytest.mark.parametrize("token,expected_text", [
|
|
({"type": "text", "raw": "Simple text"}, "Simple text"),
|
|
(
|
|
{
|
|
"type": "paragraph",
|
|
"children": [
|
|
{"type": "text", "raw": "Hello "},
|
|
{"type": "text", "raw": "World"},
|
|
]
|
|
},
|
|
"Hello World"
|
|
),
|
|
(
|
|
{
|
|
"type": "paragraph",
|
|
"children": [
|
|
{"type": "text", "raw": "Nested "},
|
|
{
|
|
"type": "strong",
|
|
"children": [{"type": "text", "raw": "bold"}]
|
|
},
|
|
{"type": "text", "raw": " text"},
|
|
]
|
|
},
|
|
"Nested bold text"
|
|
),
|
|
])
|
|
def test_node_text_property(self, token, expected_text):
|
|
"""Test Node text property extraction"""
|
|
node = Node(token)
|
|
assert node.text == expected_text
|
|
|
|
|
|
class TestParseQuestion:
|
|
"""Test the parse_question function"""
|
|
|
|
@pytest.mark.parametrize("content,expected_tags", [
|
|
(
|
|
"""---
|
|
tags: [ah2, provfråga, frågetyp/mcq]
|
|
date: 2022-01-15
|
|
---
|
|
Question content""",
|
|
["ah2", "provfråga", "frågetyp/mcq"]
|
|
),
|
|
(
|
|
"""---
|
|
tags:
|
|
- ah2
|
|
- provfråga
|
|
- frågetyp/scq
|
|
date: 2023-05-31
|
|
---
|
|
Question content""",
|
|
["ah2", "provfråga", "frågetyp/scq"]
|
|
),
|
|
])
|
|
def test_parse_metadata_tags(self, create_question_file, content, expected_tags):
|
|
"""Test parsing of metadata tags in different formats"""
|
|
file_path = create_question_file("test.md", content)
|
|
question = parse_question(file_path)
|
|
assert question.metadata["tags"] == expected_tags
|
|
|
|
@pytest.mark.parametrize("content,expected_date", [
|
|
(
|
|
"""---
|
|
tags: [ah2]
|
|
date: 2022-01-15
|
|
---
|
|
Content""",
|
|
"2022-01-15"
|
|
),
|
|
(
|
|
"""---
|
|
tags: [ah2]
|
|
date: 2023-05-31
|
|
---
|
|
Content""",
|
|
"2023-05-31"
|
|
),
|
|
])
|
|
def test_parse_metadata_date(self, create_question_file, content, expected_date):
|
|
"""Test parsing of metadata date"""
|
|
file_path = create_question_file("test.md", content)
|
|
question = parse_question(file_path)
|
|
assert str(question.metadata["date"]) == expected_date
|
|
|
|
@pytest.mark.parametrize("content,expected_raw", [
|
|
(
|
|
"""---
|
|
tags: [ah2]
|
|
---
|
|
Simple question""",
|
|
"Simple question"
|
|
),
|
|
(
|
|
"""---
|
|
tags: [ah2]
|
|
---
|
|
Question with **bold** text""",
|
|
"Question with **bold** text"
|
|
),
|
|
])
|
|
def test_parse_raw_content(self, create_question_file, content, expected_raw):
|
|
"""Test parsing of raw content"""
|
|
file_path = create_question_file("test.md", content)
|
|
question = parse_question(file_path)
|
|
assert question.raw_content.strip() == expected_raw
|
|
|
|
def test_parse_mcq_question(self, create_question_file):
|
|
"""Test parsing a complete MCQ question"""
|
|
content = """---
|
|
tags: [ah2, provfråga, frågetyp/mcq, cerebrum]
|
|
date: 2022-01-15
|
|
---
|
|
Vilka av följande räknas till storhjärnans basala kärnor?
|
|
|
|
**Välj två alternativ**
|
|
- A: Putamen
|
|
- B: Nucleus Ruber
|
|
- C: Substantia nigra
|
|
- D: Nucleus caudatus
|
|
|
|
```spoiler-block:
|
|
A och D
|
|
```
|
|
"""
|
|
file_path = create_question_file("mcq.md", content)
|
|
question = parse_question(file_path)
|
|
|
|
assert question.metadata["tags"] == ["ah2", "provfråga", "frågetyp/mcq", "cerebrum"]
|
|
assert len(question.nodes) > 0
|
|
|
|
# Find paragraph nodes
|
|
paragraphs = [n for n in question.nodes if n.type == "paragraph"]
|
|
assert len(paragraphs) > 0
|
|
|
|
# Find list nodes
|
|
lists = [n for n in question.nodes if n.type == "list"]
|
|
assert len(lists) > 0
|
|
|
|
# Find spoiler block
|
|
code_blocks = [n for n in question.nodes if n.type == "block_code"]
|
|
assert len(code_blocks) > 0
|
|
spoiler = code_blocks[0]
|
|
assert spoiler.attrs.get("info") == "spoiler-block:"
|
|
assert "A och D" in spoiler.raw
|
|
|
|
def test_parse_scq_question(self, create_question_file):
|
|
"""Test parsing a single choice question"""
|
|
content = """---
|
|
tags: [ah2, provfråga, frågetyp/scq, histologi]
|
|
date: 2022-06-01
|
|
---
|
|
Vilken del av CNS syns i bild?
|
|
- A: Cerebellum
|
|
- B: Diencephalon
|
|
- C: Medulla spinalis
|
|
- D: Cerebrum
|
|
- E: Pons
|
|
|
|
```spoiler-block:
|
|
A
|
|
```
|
|
"""
|
|
file_path = create_question_file("scq.md", content)
|
|
question = parse_question(file_path)
|
|
|
|
assert "frågetyp/scq" in question.metadata["tags"]
|
|
lists = [n for n in question.nodes if n.type == "list"]
|
|
assert len(lists) > 0
|
|
|
|
def test_parse_text_field_question(self, create_question_file):
|
|
"""Test parsing a text field question"""
|
|
content = """---
|
|
tags: [ah2, provfråga, frågetyp/textfält, öga, anatomi]
|
|
date: 2022-01-15
|
|
---
|
|
![[image-2.png|301x248]]
|
|
**Fyll i rätt siffra!**
|
|
|
|
(0.5p per rätt svar, inga avdrag för fel svar):
|
|
|
|
a) Vilken siffra pekar på gula fläcken?
|
|
b) Vilken siffra pekar på choroidea?
|
|
|
|
```spoiler-block:
|
|
a) 7
|
|
b) 6
|
|
```
|
|
"""
|
|
file_path = create_question_file("textfield.md", content)
|
|
question = parse_question(file_path)
|
|
|
|
assert "frågetyp/textfält" in question.metadata["tags"]
|
|
assert len(question.nodes) > 0
|
|
|
|
def test_parse_matching_question(self, create_question_file):
|
|
"""Test parsing a matching question"""
|
|
content = """---
|
|
tags: [ah2, provfråga, frågetyp/matching, histologi]
|
|
date: 2023-05-31
|
|
---
|
|
Vilka av följande stödjeceller finns i CNS? Markera JA eller NEJ för varje angiven celltyp:
|
|
(1p för alla rätt, inga delpoäng)
|
|
|
|
- a) oligodendrocyter
|
|
- b) Astrocyter
|
|
- c) satellitceller
|
|
- d) ependymceller
|
|
- e) mikroglia
|
|
- f) Schwannceller
|
|
|
|
- JA, finn i CNS
|
|
- NEJ, finns inte i CNS
|
|
|
|
```spoiler-block:
|
|
a) JA, finn i CNS
|
|
b) JA, finn i CNS
|
|
c) NEJ, finns inte i CNS
|
|
d) JA, finn i CNS
|
|
e) JA, finn i CNS
|
|
f) NEJ, finns inte i CNS
|
|
```
|
|
"""
|
|
file_path = create_question_file("matching.md", content)
|
|
question = parse_question(file_path)
|
|
|
|
assert "frågetyp/matching" in question.metadata["tags"]
|
|
lists = [n for n in question.nodes if n.type == "list"]
|
|
assert len(lists) > 0
|
|
|
|
def test_parse_question_with_image(self, create_question_file):
|
|
"""Test parsing a question with embedded images"""
|
|
content = """---
|
|
tags: [ah2, provfråga, frågetyp/textfält, öra, anatomi, bild]
|
|
date: 2022-01-15
|
|
---
|
|
![[image-4.png|292x316]]
|
|
**Fyll i rätt siffra !**
|
|
|
|
(0.5p per rätt svar, inga avdrag för fel svar):
|
|
|
|
a) Vilken siffra pekar på incus? (1..19)
|
|
b) Vilken siffra pekar på tuba auditiva? (1..19)
|
|
|
|
```spoiler-block:
|
|
a) 7
|
|
b) 18
|
|
```
|
|
"""
|
|
file_path = create_question_file("image_q.md", content)
|
|
question = parse_question(file_path)
|
|
|
|
assert "bild" in question.metadata["tags"]
|
|
assert "![[image-4.png" in question.raw_content
|
|
embed = question.nodes[0].children[0]
|
|
assert embed.type == "embed"
|
|
assert embed.attrs == {
|
|
"filename": "image-4.png",
|
|
"width": 292,
|
|
"height": 316
|
|
}
|
|
|
|
@pytest.mark.parametrize("invalid_content", [
|
|
"", # Empty content
|
|
"No frontmatter", # No frontmatter
|
|
"---\n---\n", # Empty frontmatter
|
|
])
|
|
def test_parse_edge_cases(self, create_question_file, invalid_content):
|
|
"""Test parsing edge cases"""
|
|
file_path = create_question_file("edge.md", invalid_content)
|
|
question = parse_question(file_path)
|
|
assert isinstance(question, ParsedQuestion)
|
|
|
|
def test_parse_question_preserves_structure(self, create_question_file):
|
|
"""Test that parsing preserves the document structure"""
|
|
content = """---
|
|
tags: [ah2]
|
|
---
|
|
# Heading
|
|
|
|
Paragraph text
|
|
|
|
- List item 1
|
|
- List item 2
|
|
|
|
```spoiler-block:
|
|
Answer
|
|
```
|
|
"""
|
|
file_path = create_question_file("structure.md", content)
|
|
question = parse_question(file_path)
|
|
|
|
node_types = [n.type for n in question.nodes]
|
|
assert "heading" in node_types
|
|
assert "paragraph" in node_types
|
|
assert "list" in node_types
|
|
assert "block_code" in node_types
|
|
|
|
|
|
class TestParsedQuestionDataclass:
|
|
"""Test the ParsedQuestion dataclass"""
|
|
|
|
def test_parsed_question_defaults(self):
|
|
"""Test ParsedQuestion default values"""
|
|
question = ParsedQuestion()
|
|
assert question.metadata == {}
|
|
assert question.raw_content == ""
|
|
assert question.nodes == []
|
|
|
|
def test_parsed_question_initialization(self):
|
|
"""Test ParsedQuestion initialization with values"""
|
|
metadata = {"tags": ["test"], "date": "2022-01-15"}
|
|
content = "Test content"
|
|
nodes = [Node({"type": "paragraph"})]
|
|
|
|
question = ParsedQuestion(
|
|
metadata=metadata,
|
|
raw_content=content,
|
|
nodes=nodes
|
|
)
|
|
|
|
assert question.metadata == metadata
|
|
assert question.raw_content == content
|
|
assert question.nodes == nodes
|
|
|
|
|
|
class TestRealQuestions:
|
|
"""Test parsing real questions from the exam files"""
|
|
|
|
@pytest.fixture
|
|
def exam_dir(self):
|
|
"""Get the real exam directory"""
|
|
root = pathlib.Path(__file__).parent.parent.parent
|
|
exam_path = root / "content" / "Anatomi & Histologi 2" / "Gamla tentor"
|
|
if exam_path.exists():
|
|
return exam_path
|
|
pytest.skip("Exam directory not found")
|
|
|
|
@pytest.mark.parametrize("exam_date,question_num", [
|
|
("2022-01-15", "1"),
|
|
("2022-01-15", "2"),
|
|
("2022-01-15", "3"),
|
|
("2022-01-15", "4"),
|
|
("2022-06-01", "8"),
|
|
])
|
|
def test_parse_real_exam_questions(self, exam_dir, exam_date, question_num):
|
|
"""Test parsing real exam questions"""
|
|
file_path = exam_dir / exam_date / f"{question_num}.md"
|
|
if not file_path.exists():
|
|
pytest.skip(f"Question file {file_path} not found")
|
|
|
|
question = parse_question(file_path)
|
|
|
|
# Verify metadata exists and has required fields
|
|
assert "tags" in question.metadata
|
|
assert isinstance(question.metadata["tags"], list)
|
|
assert "ah2" in question.metadata["tags"]
|
|
assert "provfråga" in question.metadata["tags"]
|
|
|
|
# Verify content was parsed
|
|
assert len(question.raw_content) > 0
|
|
assert len(question.nodes) > 0
|
|
|
|
def test_parse_all_short_named_questions(self, exam_dir):
|
|
"""Test parsing all questions with short filenames (1-2 chars)"""
|
|
questions_found = 0
|
|
|
|
for file in sorted(exam_dir.glob("*/*.md")):
|
|
if len(file.stem) <= 2 and file.stem.isdigit():
|
|
question = parse_question(file)
|
|
assert isinstance(question, ParsedQuestion)
|
|
assert "tags" in question.metadata
|
|
questions_found += 1
|
|
|
|
# Ensure we found at least some questions
|
|
assert questions_found > 0, "No exam questions found to test"
|
|
|
|
|
|
class TestNodeTextExtraction:
|
|
"""Test text extraction from complex node structures"""
|
|
|
|
@pytest.mark.parametrize("token,expected_text", [
|
|
# Simple text
|
|
({"type": "text", "raw": "Hello"}, "Hello"),
|
|
|
|
# Paragraph with multiple text children
|
|
(
|
|
{
|
|
"type": "paragraph",
|
|
"children": [
|
|
{"type": "text", "raw": "A "},
|
|
{"type": "text", "raw": "B "},
|
|
{"type": "text", "raw": "C"},
|
|
]
|
|
},
|
|
"A B C"
|
|
),
|
|
|
|
# Nested formatting
|
|
(
|
|
{
|
|
"type": "paragraph",
|
|
"children": [
|
|
{"type": "text", "raw": "Normal "},
|
|
{
|
|
"type": "emphasis",
|
|
"children": [{"type": "text", "raw": "italic"}]
|
|
},
|
|
{"type": "text", "raw": " "},
|
|
{
|
|
"type": "strong",
|
|
"children": [{"type": "text", "raw": "bold"}]
|
|
},
|
|
]
|
|
},
|
|
"Normal italic bold"
|
|
),
|
|
|
|
# Empty node
|
|
({"type": "paragraph", "children": []}, ""),
|
|
])
|
|
def test_complex_text_extraction(self, token, expected_text):
|
|
"""Test text extraction from complex nested structures"""
|
|
node = Node(token)
|
|
assert node.text == expected_text
|
|
|