medical-notes/quiz/tests/test_import.py

import pytest
from pathlib import Path
from quiz.utils.importer import parse_markdown_question, import_question_file, ImportStats
from quiz.models import Question, Option


@pytest.mark.django_db
@pytest.mark.import_tests
class TestMarkdownParsing:
    """Test parsing of various Obsidian markdown question formats"""

    def test_parse_single_choice_question(self):
        """Test parsing standard single choice question (SCQ)"""
        content = """---
tags: [ah2, provfråga, frågetyp/scq, anatomi]
date: 2022-01-15
---
What is the correct answer?

**Välj ett alternativ:**
- A: Wrong answer
- B: Correct answer
- C: Another wrong

```spoiler-block:
B
```
"""
        is_question, data = parse_markdown_question(Path("test.md"), content)

        assert is_question is True
        assert data['text'] == 'What is the correct answer?'
        assert data['correct_answer'] == 'B'
        assert data['has_answer'] is True
        assert data['question_type'] == 'scq'
        assert len(data['options']) == 3
        assert data['options'][0] == ('A', 'Wrong answer')
        assert data['options'][1] == ('B', 'Correct answer')

    def test_parse_multiple_choice_question(self):
        """Test parsing multiple choice question (MCQ) with 'och' separator"""
        content = """---
tags: [ah2, provfråga, frågetyp/mcq, cerebrum]
date: 2022-01-15
---
Vilka av följande räknas till storhjärnans basala kärnor?

**Välj två alternativ**
- A: Putamen
- B: Nucleus Ruber
- C: Substantia nigra
- D: Nucleus caudatus

```spoiler-block:
A och D
```
"""
        is_question, data = parse_markdown_question(Path("test.md"), content)

        assert is_question is True
        assert 'Vilka av följande' in data['text']
        assert data['correct_answer'] == 'A,D'  # Normalized to comma-separated
        assert data['has_answer'] is True
        assert data['question_type'] == 'mcq'
        assert len(data['options']) == 4

    def test_parse_multiple_choice_comma_separated(self):
        """Test MCQ with comma-separated answer"""
        content = """---
tags: [frågetyp/mcq]
---
Select two options:

- A: Option A
- B: Option B
- C: Option C
- D: Option D

```spoiler-block:
B, C
```
"""
        is_question, data = parse_markdown_question(Path("test.md"), content)

        assert data['correct_answer'] == 'B,C'
        assert data['has_answer'] is True

    def test_parse_options_without_colon(self):
        """Test parsing options in format '- A' without text"""
        content = """---
tags: [frågetyp/scq]
---
Which letter?

**Välj ett alternativ:**
- A
- B
- C
- D

```spoiler-block:
C
```
"""
        is_question, data = parse_markdown_question(Path("test.md"), content)

        assert len(data['options']) == 4
        assert all(text == '' for _, text in data['options'])
        assert data['correct_answer'] == 'C'

    def test_parse_textalternativ_question(self):
        """Test text alternative question type"""
        content = """---
tags: [frågetyp/textalternativ, öga, anatomi]
---
Svara på följande frågor:

a) Bokstaven B sitter i en lob, vilken?
- Lobus temporalis
- Lobus frontalis
- Lobus parietalis

b) Vilket funktionellt centra återfinns där?
- Syncentrum
- Motorcentrum
- Somatosensoriskt centrum

```spoiler-block:
a) Lobus parietalis
b) Somatosensoriskt centrum
```
"""
        is_question, data = parse_markdown_question(Path("test.md"), content)

        assert is_question is True
        assert data['question_type'] == 'textalternativ'
        assert data['has_answer'] is True
        assert 'Lobus parietalis' in data['correct_answer']
        assert 'Somatosensoriskt centrum' in data['correct_answer']

    def test_parse_textfalt_question(self):
        """Test text field (fill-in) question type"""
        content = """---
tags: [frågetyp/textfält, öga]
---
**Fyll i rätt siffra!**

a) Vilken siffra pekar på gula fläcken?
b) Vilken siffra pekar på choroidea?

```spoiler-block:
a) 7
b) 6
```
"""
        is_question, data = parse_markdown_question(Path("test.md"), content)

        assert is_question is True
        assert data['question_type'] == 'textfält'
        assert data['has_answer'] is True
        assert '7' in data['correct_answer']
        assert '6' in data['correct_answer']

    def test_skip_todo_answers(self):
        """Test that questions with TODO are skipped"""
        content = """---
tags: [frågetyp/mcq]
---
What is this?

- A: Option A
- B: Option B

```spoiler-block:
TODO
```
"""
        is_question, data = parse_markdown_question(Path("test.md"), content)

        assert is_question is True
        assert data['has_answer'] is False

    def test_skip_non_question_files(self):
        """Test that files without question tags are skipped"""
        content = """---
tags: [ah2, notes, general]
---
This is just a note, not a question.
"""
        is_question, data = parse_markdown_question(Path("test.md"), content)

        assert is_question is False

    def test_parse_with_images(self):
        """Test parsing questions with embedded images"""
        content = """---
tags: [frågetyp/scq, bild]
---
![[image.png|338x258]]
Vilken bokstav på denna bild sitter på Mesencephalon?

**Välj ett alternativ:**
- A
- B
- C
- D
- E
- F

```spoiler-block:
F
```
"""
        is_question, data = parse_markdown_question(Path("test.md"), content)

        assert is_question is True
        assert 'Vilken bokstav' in data['text']
        assert data['correct_answer'] == 'F'
        assert len(data['options']) == 6

    def test_parse_yaml_list_format_tags(self):
        """Test parsing tags in YAML list format"""
        content = """---
tags:
  - ah2
  - provfråga
  - frågetyp/scq
  - anatomi
date: 2022-01-15
---
Question text?

- A: Answer A
- B: Answer B

```spoiler-block:
A
```
"""
        is_question, data = parse_markdown_question(Path("test.md"), content)

        assert is_question is True
        assert data['question_type'] == 'scq'

    def test_parse_mixed_option_formats(self):
        """Test parsing with inconsistent option formatting"""
        content = """---
tags: [frågetyp/mcq]
---
Select correct options:

**Välj två alternativ:**
- A: First option with text
- B:Second option no space
- C:  Third option extra spaces
- D:Fourth with trailing

```spoiler-block:
A och C
```
"""
        is_question, data = parse_markdown_question(Path("test.md"), content)

        assert len(data['options']) == 4
        assert data['options'][0] == ('A', 'First option with text')
        assert data['options'][1] == ('B', 'Second option no space')
        assert data['correct_answer'] == 'A,C'

    def test_parse_question_with_multiple_paragraphs(self):
        """Test question text extraction with multiple paragraphs"""
        content = """---
tags: [frågetyp/scq]
---
This is a longer question that spans multiple lines
and has additional context.

**Välj ett alternativ:**
- A: Answer
- B: Another

```spoiler-block:
A
```
"""
        is_question, data = parse_markdown_question(Path("test.md"), content)

        assert is_question is True
        assert 'This is a longer question' in data['text']


@pytest.mark.django_db
@pytest.mark.import_tests
class TestQuestionImport:
    """Test actual import of questions to database"""

    def test_import_single_question(self, tmp_path):
        """Test importing a single question file"""
        question_file = tmp_path / "question1.md"
        question_file.write_text("""---
tags: [frågetyp/scq]
---
Test question?

- A: Correct
- B: Wrong

```spoiler-block:
A
```
""")

        stats = ImportStats()
        result = import_question_file(question_file, tmp_path, stats, force=True)

        assert result in ['imported', 'updated']
        assert stats.questions_with_answers == 1
        assert stats.mcq_questions == 1

        # Verify in database
        question = Question.objects.get(text='Test question?')
        assert question.correct_answer == 'A'
        assert question.options.count() == 2

    def test_import_multi_select_question(self, tmp_path):
        """Test importing multi-select question"""
        question_file = tmp_path / "question2.md"
        question_file.write_text("""---
tags: [frågetyp/mcq]
---
Multi-select question?

- A: First correct
- B: Wrong
- C: Second correct

```spoiler-block:
A och C
```
""")

        stats = ImportStats()
        import_question_file(question_file, tmp_path, stats, force=True)

        question = Question.objects.get(text='Multi-select question?')
        assert question.correct_answer == 'A,C'
        assert question.options.count() == 3

    def test_skip_question_without_answer(self, tmp_path):
        """Test that questions with TODO are not imported"""
        question_file = tmp_path / "question3.md"
        question_file.write_text("""---
tags: [frågetyp/scq]
---
Incomplete question?

- A: Option A
- B: Option B

```spoiler-block:
TODO
```
""")

        stats = ImportStats()
        result = import_question_file(question_file, tmp_path, stats, force=True)

        assert result == 'skipped_todo'
        assert stats.questions_with_todo == 1
        assert Question.objects.filter(text='Incomplete question?').count() == 0

    def test_mtime_tracking(self, tmp_path):
        """Test that file modification time is tracked"""
        question_file = tmp_path / "question4.md"
        question_file.write_text("""---
tags: [frågetyp/scq]
---
What is the correct answer?

**Välj ett alternativ:**
- A: Answer A
- B: Answer B

```spoiler-block:
A
```
""")

        stats = ImportStats()
        result = import_question_file(question_file, tmp_path, stats, force=True)

        # Verify import succeeded
        assert result in ['imported', 'updated'], f"Import failed with status: {result}"
        assert stats.created == 1, f"Expected 1 created, got {stats.created}"

        question = Question.objects.get(text='What is the correct answer?')
        assert question.file_mtime is not None
        assert question.file_mtime == question_file.stat().st_mtime

    def test_update_existing_question(self, tmp_path):
        """Test updating an existing question"""
        question_file = tmp_path / "question5.md"

        # Initial import
        question_file.write_text("""---
tags: [frågetyp/scq]
---
What is the original question here?

**Välj ett alternativ:**
- A: First answer
- B: Second answer

```spoiler-block:
A
```
""")

        stats1 = ImportStats()
        result1 = import_question_file(question_file, tmp_path, stats1, force=True)
        assert result1 in ['imported', 'updated'], f"Initial import failed: {result1}"
        assert stats1.created == 1

        # Update the file
        import time
        time.sleep(0.1)  # Ensure mtime changes
        question_file.write_text("""---
tags: [frågetyp/scq]
---
What is the original question here?

**Välj ett alternativ:**
- A: First answer
- B: Second answer
- C: Third option

```spoiler-block:
C
```
""")

        stats2 = ImportStats()
        result = import_question_file(question_file, tmp_path, stats2, force=False)

        assert result == 'updated'
        assert stats2.updated == 1

        # Verify update
        question = Question.objects.get(text='What is the original question here?')
        assert question.correct_answer == 'C'
        assert question.options.count() == 3


@pytest.mark.django_db
@pytest.mark.import_tests
class TestImportStatistics:
    """Test import statistics tracking"""

    def test_statistics_aggregation(self, tmp_path):
        """Test that statistics are correctly aggregated"""
        # Create multiple question files
        (tmp_path / "folder1").mkdir()
        (tmp_path / "folder2").mkdir()

        (tmp_path / "folder1" / "q1.md").write_text("""---
tags: [frågetyp/mcq]
---
Question number one?

**Välj två alternativ:**
- A: Answer A
- B: Answer B
```spoiler-block:
A
```
""")

        (tmp_path / "folder1" / "q2.md").write_text("""---
tags: [frågetyp/scq]
---
Question number two?

**Välj ett alternativ:**
- A: Answer A
```spoiler-block:
TODO
```
""")

        (tmp_path / "folder2" / "q3.md").write_text("""---
tags: [notes]
---
Not a question, just notes
""")

        from quiz.utils.importer import import_questions
        stats = import_questions(tmp_path, tmp_path, force=True)

        assert stats.total_files == 3
        assert stats.mcq_questions == 2
        assert stats.questions_with_answers == 1
        assert stats.questions_with_todo == 1
        assert stats.non_mcq_skipped == 1


@pytest.mark.django_db
class TestEdgeCases:
    """Test edge cases and error handling"""

    def test_malformed_frontmatter(self):
        """Test handling of malformed frontmatter"""
        content = """---
tags: [frågetyp/scq]
date: broken
---
Question?
- A: Answer
```spoiler-block:
A
```
"""
        is_question, data = parse_markdown_question(Path("test.md"), content)
        # Should still parse as question if tags are recognizable
        assert is_question is True

    def test_missing_spoiler_block(self):
        """Test question without spoiler block"""
        content = """---
tags: [frågetyp/scq]
---
Question without answer?

- A: Option A
- B: Option B
"""
        is_question, data = parse_markdown_question(Path("test.md"), content)

        assert is_question is True
        assert data['has_answer'] is False

    def test_empty_spoiler_block(self):
        """Test question with empty spoiler block"""
        content = """---
tags: [frågetyp/scq]
---
Question with empty answer block?

**Välj ett alternativ:**
- A: Option A

```spoiler-block:
```
"""
        is_question, data = parse_markdown_question(Path("test.md"), content)

        assert is_question is True
        assert data.get('has_answer') is False

    def test_special_characters_in_text(self):
        """Test handling of special characters"""
        content = """---
tags: [frågetyp/scq]
---
What about "quotes" & <html> tags?

- A: Option with åäö
- B: Option with émojis 🎉

```spoiler-block:
A
```
"""
        is_question, data = parse_markdown_question(Path("test.md"), content)

        assert '"quotes"' in data['text']
        assert 'åäö' in data['options'][0][1]