""" Comprehensive test suite for the question_parser module. This test suite uses pytest's parametrize decorator to test multiple scenarios with minimal code duplication. It covers: 1. Node class: - Initialization with different token types - Attribute handling - Children node processing - String representation (__repr__) - Text extraction from nested structures 2. parse_question function: - Metadata parsing (tags, dates, etc.) - Raw content extraction - Different question types (MCQ, SCQ, text field, matching) - Questions with images - Edge cases (empty content, missing frontmatter) - Document structure preservation 3. ParsedQuestion dataclass: - Default values - Initialization with custom values 4. Real exam questions: - Parsing actual exam questions from the content directory - Validation of all short-named question files Test execution: pytest tests/test_question_parser.py -v # Verbose output pytest tests/test_question_parser.py -k "mcq" # Run only MCQ tests pytest tests/test_question_parser.py --collect-only # List all tests """ import pathlib import tempfile import pytest from quiz.utils.question_parser import Node, ParsedQuestion, parse_question @pytest.fixture def temp_dir(): """Create a temporary directory for test files""" with tempfile.TemporaryDirectory() as tmpdir: yield pathlib.Path(tmpdir) @pytest.fixture def create_question_file(temp_dir): """Factory fixture to create question files""" def _create_file(filename: str, content: str) -> pathlib.Path: file_path = temp_dir / filename file_path.write_text(content, encoding="utf-8") return file_path return _create_file class TestNode: """Test the Node class""" @pytest.mark.parametrize("token,expected_type,expected_raw", [ ({"type": "paragraph"}, "paragraph", ""), ({"type": "heading", "raw": "Test Heading"}, "heading", "Test Heading"), ({"type": "text", "raw": "Some text"}, "text", "Some text"), ({"type": "list"}, "list", ""), ]) def test_node_initialization(self, token, expected_type, expected_raw): """Test Node initialization with different token types""" node = Node(token) assert node.type == expected_type assert node.raw == expected_raw @pytest.mark.parametrize("token,expected_attrs", [ ({"type": "block_code", "attrs": {"info": "spoiler-block:"}}, {"info": "spoiler-block:"}), ({"type": "paragraph"}, {}), ({"type": "heading", "attrs": {"level": 2}}, {"level": 2}), ]) def test_node_attributes(self, token, expected_attrs): """Test Node attributes handling""" node = Node(token) assert node.attrs == expected_attrs def test_node_children(self): """Test Node children handling""" token = { "type": "paragraph", "children": [ {"type": "text", "raw": "Hello "}, {"type": "text", "raw": "World"}, ] } node = Node(token) assert len(node.children) == 2 assert node.children[0].type == "text" assert node.children[0].raw == "Hello " assert node.children[1].type == "text" assert node.children[1].raw == "World" @pytest.mark.parametrize("token,expected_repr_contains", [ ({"type": "text", "raw": "test"}, "Text(raw='test')"), ({"type": "paragraph"}, "Paragraph()"), ({"type": "block_code", "attrs": {"info": "python"}}, "BlockCode(attrs={'info': 'python'})"), ]) def test_node_repr(self, token, expected_repr_contains): """Test Node __repr__ method""" node = Node(token) assert repr(node) == expected_repr_contains @pytest.mark.parametrize("token,expected_text", [ ({"type": "text", "raw": "Simple text"}, "Simple text"), ( { "type": "paragraph", "children": [ {"type": "text", "raw": "Hello "}, {"type": "text", "raw": "World"}, ] }, "Hello World" ), ( { "type": "paragraph", "children": [ {"type": "text", "raw": "Nested "}, { "type": "strong", "children": [{"type": "text", "raw": "bold"}] }, {"type": "text", "raw": " text"}, ] }, "Nested bold text" ), ]) def test_node_text_property(self, token, expected_text): """Test Node text property extraction""" node = Node(token) assert node.text == expected_text class TestParseQuestion: """Test the parse_question function""" @pytest.mark.parametrize("content,expected_tags", [ ( """--- tags: [ah2, provfråga, frågetyp/mcq] date: 2022-01-15 --- Question content""", ["ah2", "provfråga", "frågetyp/mcq"] ), ( """--- tags: - ah2 - provfråga - frågetyp/scq date: 2023-05-31 --- Question content""", ["ah2", "provfråga", "frågetyp/scq"] ), ]) def test_parse_metadata_tags(self, create_question_file, content, expected_tags): """Test parsing of metadata tags in different formats""" file_path = create_question_file("test.md", content) question = parse_question(file_path) assert question.metadata["tags"] == expected_tags @pytest.mark.parametrize("content,expected_date", [ ( """--- tags: [ah2] date: 2022-01-15 --- Content""", "2022-01-15" ), ( """--- tags: [ah2] date: 2023-05-31 --- Content""", "2023-05-31" ), ]) def test_parse_metadata_date(self, create_question_file, content, expected_date): """Test parsing of metadata date""" file_path = create_question_file("test.md", content) question = parse_question(file_path) assert str(question.metadata["date"]) == expected_date @pytest.mark.parametrize("content,expected_raw", [ ( """--- tags: [ah2] --- Simple question""", "Simple question" ), ( """--- tags: [ah2] --- Question with **bold** text""", "Question with **bold** text" ), ]) def test_parse_raw_content(self, create_question_file, content, expected_raw): """Test parsing of raw content""" file_path = create_question_file("test.md", content) question = parse_question(file_path) assert question.raw_content.strip() == expected_raw def test_parse_mcq_question(self, create_question_file): """Test parsing a complete MCQ question""" content = """--- tags: [ah2, provfråga, frågetyp/mcq, cerebrum] date: 2022-01-15 --- Vilka av följande räknas till storhjärnans basala kärnor? **Välj två alternativ** - A: Putamen - B: Nucleus Ruber - C: Substantia nigra - D: Nucleus caudatus ```spoiler-block: A och D ``` """ file_path = create_question_file("mcq.md", content) question = parse_question(file_path) assert question.metadata["tags"] == ["ah2", "provfråga", "frågetyp/mcq", "cerebrum"] assert len(question.nodes) > 0 # Find paragraph nodes paragraphs = [n for n in question.nodes if n.type == "paragraph"] assert len(paragraphs) > 0 # Find list nodes lists = [n for n in question.nodes if n.type == "list"] assert len(lists) > 0 # Find spoiler block code_blocks = [n for n in question.nodes if n.type == "block_code"] assert len(code_blocks) > 0 spoiler = code_blocks[0] assert spoiler.attrs.get("info") == "spoiler-block:" assert "A och D" in spoiler.raw def test_parse_scq_question(self, create_question_file): """Test parsing a single choice question""" content = """--- tags: [ah2, provfråga, frågetyp/scq, histologi] date: 2022-06-01 --- Vilken del av CNS syns i bild? - A: Cerebellum - B: Diencephalon - C: Medulla spinalis - D: Cerebrum - E: Pons ```spoiler-block: A ``` """ file_path = create_question_file("scq.md", content) question = parse_question(file_path) assert "frågetyp/scq" in question.metadata["tags"] lists = [n for n in question.nodes if n.type == "list"] assert len(lists) > 0 def test_parse_text_field_question(self, create_question_file): """Test parsing a text field question""" content = """--- tags: [ah2, provfråga, frågetyp/textfält, öga, anatomi] date: 2022-01-15 --- ![[image-2.png|301x248]] **Fyll i rätt siffra!** (0.5p per rätt svar, inga avdrag för fel svar): a) Vilken siffra pekar på gula fläcken? b) Vilken siffra pekar på choroidea? ```spoiler-block: a) 7 b) 6 ``` """ file_path = create_question_file("textfield.md", content) question = parse_question(file_path) assert "frågetyp/textfält" in question.metadata["tags"] assert len(question.nodes) > 0 def test_parse_matching_question(self, create_question_file): """Test parsing a matching question""" content = """--- tags: [ah2, provfråga, frågetyp/matching, histologi] date: 2023-05-31 --- Vilka av följande stödjeceller finns i CNS? Markera JA eller NEJ för varje angiven celltyp: (1p för alla rätt, inga delpoäng) - a) oligodendrocyter - b) Astrocyter - c) satellitceller - d) ependymceller - e) mikroglia - f) Schwannceller - JA, finn i CNS - NEJ, finns inte i CNS ```spoiler-block: a) JA, finn i CNS b) JA, finn i CNS c) NEJ, finns inte i CNS d) JA, finn i CNS e) JA, finn i CNS f) NEJ, finns inte i CNS ``` """ file_path = create_question_file("matching.md", content) question = parse_question(file_path) assert "frågetyp/matching" in question.metadata["tags"] lists = [n for n in question.nodes if n.type == "list"] assert len(lists) > 0 def test_parse_question_with_image(self, create_question_file): """Test parsing a question with embedded images""" content = """--- tags: [ah2, provfråga, frågetyp/textfält, öra, anatomi, bild] date: 2022-01-15 --- ![[image-4.png|292x316]] **Fyll i rätt siffra !** (0.5p per rätt svar, inga avdrag för fel svar): a) Vilken siffra pekar på incus? (1..19) b) Vilken siffra pekar på tuba auditiva? (1..19) ```spoiler-block: a) 7 b) 18 ``` """ file_path = create_question_file("image_q.md", content) question = parse_question(file_path) assert "bild" in question.metadata["tags"] assert "![[image-4.png" in question.raw_content embed = question.nodes[0].children[0] assert embed.type == "embed" assert embed.attrs == { "filename": "image-4.png", "width": 292, "height": 316 } @pytest.mark.parametrize("invalid_content", [ "", # Empty content "No frontmatter", # No frontmatter "---\n---\n", # Empty frontmatter ]) def test_parse_edge_cases(self, create_question_file, invalid_content): """Test parsing edge cases""" file_path = create_question_file("edge.md", invalid_content) question = parse_question(file_path) assert isinstance(question, ParsedQuestion) def test_parse_question_preserves_structure(self, create_question_file): """Test that parsing preserves the document structure""" content = """--- tags: [ah2] --- # Heading Paragraph text - List item 1 - List item 2 ```spoiler-block: Answer ``` """ file_path = create_question_file("structure.md", content) question = parse_question(file_path) node_types = [n.type for n in question.nodes] assert "heading" in node_types assert "paragraph" in node_types assert "list" in node_types assert "block_code" in node_types class TestParsedQuestionDataclass: """Test the ParsedQuestion dataclass""" def test_parsed_question_defaults(self): """Test ParsedQuestion default values""" question = ParsedQuestion() assert question.metadata == {} assert question.raw_content == "" assert question.nodes == [] def test_parsed_question_initialization(self): """Test ParsedQuestion initialization with values""" metadata = {"tags": ["test"], "date": "2022-01-15"} content = "Test content" nodes = [Node({"type": "paragraph"})] question = ParsedQuestion( metadata=metadata, raw_content=content, nodes=nodes ) assert question.metadata == metadata assert question.raw_content == content assert question.nodes == nodes class TestRealQuestions: """Test parsing real questions from the exam files""" @pytest.fixture def exam_dir(self): """Get the real exam directory""" root = pathlib.Path(__file__).parent.parent.parent exam_path = root / "content" / "Anatomi & Histologi 2" / "Gamla tentor" if exam_path.exists(): return exam_path pytest.skip("Exam directory not found") @pytest.mark.parametrize("exam_date,question_num", [ ("2022-01-15", "1"), ("2022-01-15", "2"), ("2022-01-15", "3"), ("2022-01-15", "4"), ("2022-06-01", "8"), ]) def test_parse_real_exam_questions(self, exam_dir, exam_date, question_num): """Test parsing real exam questions""" file_path = exam_dir / exam_date / f"{question_num}.md" if not file_path.exists(): pytest.skip(f"Question file {file_path} not found") question = parse_question(file_path) # Verify metadata exists and has required fields assert "tags" in question.metadata assert isinstance(question.metadata["tags"], list) assert "ah2" in question.metadata["tags"] assert "provfråga" in question.metadata["tags"] # Verify content was parsed assert len(question.raw_content) > 0 assert len(question.nodes) > 0 def test_parse_all_short_named_questions(self, exam_dir): """Test parsing all questions with short filenames (1-2 chars)""" questions_found = 0 for file in sorted(exam_dir.glob("*/*.md")): if len(file.stem) <= 2 and file.stem.isdigit(): question = parse_question(file) assert isinstance(question, ParsedQuestion) assert "tags" in question.metadata questions_found += 1 # Ensure we found at least some questions assert questions_found > 0, "No exam questions found to test" class TestNodeTextExtraction: """Test text extraction from complex node structures""" @pytest.mark.parametrize("token,expected_text", [ # Simple text ({"type": "text", "raw": "Hello"}, "Hello"), # Paragraph with multiple text children ( { "type": "paragraph", "children": [ {"type": "text", "raw": "A "}, {"type": "text", "raw": "B "}, {"type": "text", "raw": "C"}, ] }, "A B C" ), # Nested formatting ( { "type": "paragraph", "children": [ {"type": "text", "raw": "Normal "}, { "type": "emphasis", "children": [{"type": "text", "raw": "italic"}] }, {"type": "text", "raw": " "}, { "type": "strong", "children": [{"type": "text", "raw": "bold"}] }, ] }, "Normal italic bold" ), # Empty node ({"type": "paragraph", "children": []}, ""), ]) def test_complex_text_extraction(self, token, expected_text): """Test text extraction from complex nested structures""" node = Node(token) assert node.text == expected_text