Initial commit

2025-10-13 16:35:40 +02:00
commit 18a6234db4
1081 changed files with 50261 additions and 0 deletions
--- a/content/scripts/fix_index_and_cleanup.py
+++ b/content/scripts/fix_index_and_cleanup.py
@@ -0,0 +1,123 @@
+#!/usr/bin/env python3
+import os
+from pathlib import Path
+
+ROOT = Path(__file__).resolve().parents[1]
+
+def compute_title_for_index(path: Path) -> str:
+    parent = path.parent.name
+    # Title-case first letter, keep diacritics
+    if not parent:
+        parent = 'Index'
+    title = parent[:1].upper() + parent[1:]
+    return f"{title} – översikt"
+
+def ensure_frontmatter_title(text: str, title: str) -> str:
+    if text.startswith('---'):
+        # update existing title or add
+        lines = text.splitlines()
+        end = 1
+        has_title = False
+        while end < len(lines) and lines[end].strip() != '---':
+            if lines[end].startswith('title:'):
+                lines[end] = f"title: {title}"
+                has_title = True
+            end += 1
+        if end < len(lines) and lines[end].strip() == '---':
+            if not has_title:
+                lines.insert(1, f"title: {title}")
+            return '\n'.join(lines)
+        # malformed frontmatter, prepend new
+    # no frontmatter -> add
+    return f"---\ntitle: {title}\n---\n\n" + text.lstrip('\n')
+
+def ensure_h1(text: str, title: str) -> str:
+    lines = text.splitlines()
+    # Detect frontmatter
+    if lines[:1] == ['---']:
+        end = 1
+        while end < len(lines) and lines[end].strip() != '---':
+            end += 1
+        if end < len(lines):
+            # position after frontmatter block
+            insert_at = end + 1
+            # skip blank lines
+            while insert_at < len(lines) and lines[insert_at].strip() == '':
+                insert_at += 1
+            if insert_at < len(lines) and lines[insert_at].startswith('# '):
+                lines[insert_at] = f"# {title}"
+            else:
+                lines[insert_at:insert_at] = [f"# {title}", ""]
+            return '\n'.join(lines) + ('\n' if text.endswith('\n') else '')
+    # No frontmatter: ensure H1 at top
+    i = 0
+    while i < len(lines) and lines[i].strip() == '':
+        i += 1
+    if i < len(lines) and lines[i].startswith('# '):
+        lines[i] = f"# {title}"
+        return '\n'.join(lines) + ('\n' if text.endswith('\n') else '')
+    return f"# {title}\n\n" + text
+
+def fix_index_pages():
+    for p in ROOT.rglob('_Index_.md'):
+        text = p.read_text(encoding='utf-8')
+        title = compute_title_for_index(p)
+        # add/update frontmatter title
+        t1 = ensure_frontmatter_title(text, title)
+        # ensure first H1 present and correct
+        t2 = ensure_h1(t1, title)
+        # remove any duplicate frontmatter blocks beyond the first
+        t3 = dedupe_extra_frontmatter(t2)
+        if t3 != text:
+            p.write_text(t3, encoding='utf-8')
+
+def dedupe_extra_frontmatter(text: str) -> str:
+    lines = text.splitlines()
+    result = []
+    i = 0
+    # keep first frontmatter (if at top)
+    if i < len(lines) and lines[i].strip() == '---':
+        result.append(lines[i]); i += 1
+        while i < len(lines):
+            result.append(lines[i])
+            if lines[i].strip() == '---':
+                i += 1
+                break
+            i += 1
+    # now copy the rest but strip any further '---' blocks
+    in_block = False
+    while i < len(lines):
+        if lines[i].strip() == '---':
+            in_block = not in_block
+            i += 1
+            continue
+        if not in_block:
+            result.append(lines[i])
+        i += 1
+    return '\n'.join(result) + ('\n' if text.endswith('\n') else '')
+
+def remove_empty_dirs(root: Path):
+    # remove empty directories bottom-up
+    removed = True
+    while removed:
+        removed = False
+        for d in sorted([p for p in root.rglob('*') if p.is_dir()], key=lambda x: len(str(x)), reverse=True):
+            try:
+                if not any(d.iterdir()):
+                    d.rmdir()
+                    removed = True
+            except Exception:
+                pass
+
+def main():
+    fix_index_pages()
+    # cleanup likely old ascii dirs if empty
+    for path in ['anatomi/karl', 'histologi/vavnad', 'histologi/kortlar']:
+        d = ROOT / path
+        if d.exists():
+            remove_empty_dirs(d)
+    # general sweep to remove empty leaf dirs
+    remove_empty_dirs(ROOT)
+
+if __name__ == '__main__':
+    main()
--- a/content/scripts/format_goal_docs.py
+++ b/content/scripts/format_goal_docs.py
@@ -0,0 +1,56 @@
+#!/usr/bin/env python3
+import re
+from pathlib import Path
+
+ROOT = Path(__file__).resolve().parents[1]
+
+def fmt_file(p: Path, rules):
+    text = p.read_text(encoding='utf-8')
+    orig = text
+    for pat, repl in rules:
+        text = re.sub(pat, repl, text, flags=re.MULTILINE)
+    if text != orig:
+        p.write_text(text, encoding='utf-8')
+
+def main():
+    f11 = ROOT / 'Målbeskrivning/1.1 Rörelseapparaten.md'
+    f12 = ROOT / 'Målbeskrivning/1.2 Hjärta och cirkulation.md'
+
+    if f11.exists():
+      rules11 = [
+        (r'^Lärandemål\s*$', '## Lärandemål\n'),
+        (r'^Detaljerad målbeskrivning\s*$', '## Detaljerad målbeskrivning\n'),
+        (r'^Anatomisk terminologi\s*$', '### Anatomisk terminologi\n'),
+        (r'^Rörelseapparatens principer\s*$', '### Rörelseapparatens principer\n'),
+        (r'^Rörelseapparatens ben, leder och muskler\s*$', '### Rörelseapparatens ben, leder och muskler\n'),
+      ]
+      fmt_file(f11, rules11)
+      # Ensure file starts with a title
+      text11 = f11.read_text(encoding='utf-8')
+      if not text11.lstrip().startswith('# '):
+          f11.write_text('# Rörelseapparatens anatomi (LPG001, block 1)\n\n' + text11, encoding='utf-8')
+
+    if f12.exists():
+      rules12 = [
+        (r'^Lärandemål kursdel C.*Anatomi\s*$', '## Lärandemål (kursdel C/F) – Anatomi\n'),
+        (r'^Hjärta\s*$', '## Hjärta\n'),
+        (r'^Blodkärl\s*$', '## Blodkärl\n'),
+      ]
+      fmt_file(f12, rules12)
+      # Bulletize linked lines under Hjärta and Blodkärl sections
+      t12 = f12.read_text(encoding='utf-8')
+      def bulletize_section(text, header):
+          pattern = re.compile(rf'(^## {header}\s*$)(.*?)(^## |\Z)', re.M | re.S)
+          def repl(m):
+              head, body, tail = m.group(1), m.group(2), m.group(3)
+              # add '- ' before lines starting with '[[' or alphabetic word followed by link
+              body2 = re.sub(r'^(\[\[)', r'- \1', body, flags=re.M)
+              body2 = re.sub(r'^(\*?\s*\w.*?\[\[)', r'- \1', body2, flags=re.M)
+              return head + body2 + (tail or '')
+          return pattern.sub(repl, text)
+      t12 = bulletize_section(t12, 'Hjärta')
+      t12 = bulletize_section(t12, 'Blodkärl')
+      f12.write_text(t12, encoding='utf-8')
+
+if __name__ == '__main__':
+    main()
--- a/content/scripts/normalize_notes.py
+++ b/content/scripts/normalize_notes.py
@@ -0,0 +1,144 @@
+#!/usr/bin/env python3
+import os
+import re
+import shutil
+from pathlib import Path
+
+ROOT = Path(__file__).resolve().parents[1]
+
+# Target only markdown content
+def iter_md_files(root: Path):
+    for p in root.rglob('*.md'):
+        # Skip hidden dirs (e.g., .git) and virtualenvs
+        if any(part.startswith('.') for part in p.parts):
+            continue
+        yield p
+
+# Map ASCII segments -> Swedish diacritics (only for known names we used)
+REPLACEMENTS = {
+    'vavnad': 'vävnad',
+    'bindvav': 'bindväv',
+    'kortlar': 'körtlar',
+    'karl': 'kärl',
+    'overarm': 'överarm',
+    'overgangsepitel-urotel': 'övergångsepitel-urotel',
+    'respirationsvagsepitel': 'respirationsvägsepitel',
+    'tradbrosk': 'trådbrosk',
+    'retikular': 'retikulär',
+    'forhornat': 'förhornat',
+    'oforhornat': 'oförhornat',
+    'tat-oregelbunden': 'tät-oregelbunden',
+    'tat-regelbunden': 'tät-regelbunden',
+    'mukos': 'mukös',
+    'seros': 'serös',
+    'seromukos': 'seromukös',
+    'endokrin-follikular-typ': 'endokrin-follikulär-typ',
+    'endokrin-strangtyp': 'endokrin-strängtyp',
+    'bagarcell': 'bägarcell',
+    'vit-fettvav': 'vit-fettväv',
+    'brun-fettvav': 'brun-fettväv',
+}
+
+def apply_diacritics(seg: str) -> str:
+    s = seg
+    for k, v in REPLACEMENTS.items():
+        if k in s:
+            s = s.replace(k, v)
+    # Second-pass quick fixes for prior Title-Cased names
+    s = s.replace('Tat ', 'Tät ')
+    s = s.replace('Forhornat', 'Förhornat')
+    return s
+
+def to_title_with_spaces(filename_base: str) -> str:
+    # Preserve special index pages verbatim
+    if filename_base == '_Index_':
+        return filename_base
+    # Replace hyphens with spaces
+    s = filename_base.replace('-', ' ')
+    # Collapse whitespace
+    s = re.sub(r'\s+', ' ', s).strip()
+    # Title Case each word
+    s = ' '.join(w.capitalize() for w in s.split(' '))
+    return s
+
+def compute_new_path(old_path: Path) -> Path:
+    rel = old_path.relative_to(ROOT)
+    parts = list(rel.parts)
+    new_parts = []
+    for i, seg in enumerate(parts):
+        # File segment with extension
+        if i == len(parts) - 1 and seg.endswith('.md'):
+            base = seg[:-3]
+            base = apply_diacritics(base)
+            base = to_title_with_spaces(base)
+            new_parts.append(base + '.md')
+        else:
+            seg2 = apply_diacritics(seg)
+            # Keep folder names as-is except diacritics; avoid title-case for common folders
+            new_parts.append(seg2)
+    return ROOT.joinpath(*new_parts)
+
+def build_rename_map():
+    mapping = {}
+    for p in iter_md_files(ROOT):
+        newp = compute_new_path(p)
+        if newp != p:
+            mapping[p] = newp
+    return mapping
+
+WIKILINK_RE = re.compile(r"\[\[([^\]|]+)(\|[^\]]+)?\]\]")
+
+def strip_leading_h1(text: str) -> str:
+    lines = text.splitlines()
+    i = 0
+    # Skip leading blank lines
+    while i < len(lines) and lines[i].strip() == '':
+        i += 1
+    if i < len(lines) and lines[i].lstrip().startswith('# '):
+        # Drop this H1 line
+        i += 1
+        # If the next is blank, drop a single blank too
+        if i < len(lines) and lines[i].strip() == '':
+            i += 1
+        return '\n'.join(lines[i:]) + ('\n' if text.endswith('\n') else '')
+    return text
+
+def rel_without_ext(p: Path) -> str:
+    return str(p.relative_to(ROOT).with_suffix('')).replace('\\', '/')
+
+def update_wikilinks(content: str, path_map_rel: dict) -> str:
+    def repl(m):
+        target = m.group(1).strip()
+        alias = m.group(2) or ''  # includes leading '|'
+        # Normalize target (strip potential .md)
+        t = target[:-3] if target.endswith('.md') else target
+        new = path_map_rel.get(t)
+        if new:
+            return f"[[{new}{alias}]]"
+        return m.group(0)
+    return WIKILINK_RE.sub(repl, content)
+
+def main():
+    # 1) Build rename map
+    rename_map = build_rename_map()
+
+    # Map for link updates: old_rel_noext -> new_rel_noext
+    link_map = {}
+    for old, new in rename_map.items():
+        link_map[rel_without_ext(old)] = rel_without_ext(new)
+
+    # 2) Rename paths (deepest first)
+    for old, new in sorted(rename_map.items(), key=lambda kv: len(str(kv[0]).split(os.sep)), reverse=True):
+        new.parent.mkdir(parents=True, exist_ok=True)
+        shutil.move(str(old), str(new))
+
+    # 3) Rewrite content in all md files: strip leading H1 and update wikilinks
+    for p in iter_md_files(ROOT):
+        text = p.read_text(encoding='utf-8')
+        new_text = strip_leading_h1(text)
+        new_text = update_wikilinks(new_text, link_map)
+        if new_text != text:
+            p.write_text(new_text, encoding='utf-8')
+
+if __name__ == '__main__':
+    main()