Initial commit
This commit is contained in:
123
content/scripts/fix_index_and_cleanup.py
Normal file
123
content/scripts/fix_index_and_cleanup.py
Normal file
@@ -0,0 +1,123 @@
|
||||
#!/usr/bin/env python3
|
||||
import os
|
||||
from pathlib import Path
|
||||
|
||||
ROOT = Path(__file__).resolve().parents[1]
|
||||
|
||||
def compute_title_for_index(path: Path) -> str:
|
||||
parent = path.parent.name
|
||||
# Title-case first letter, keep diacritics
|
||||
if not parent:
|
||||
parent = 'Index'
|
||||
title = parent[:1].upper() + parent[1:]
|
||||
return f"{title} – översikt"
|
||||
|
||||
def ensure_frontmatter_title(text: str, title: str) -> str:
|
||||
if text.startswith('---'):
|
||||
# update existing title or add
|
||||
lines = text.splitlines()
|
||||
end = 1
|
||||
has_title = False
|
||||
while end < len(lines) and lines[end].strip() != '---':
|
||||
if lines[end].startswith('title:'):
|
||||
lines[end] = f"title: {title}"
|
||||
has_title = True
|
||||
end += 1
|
||||
if end < len(lines) and lines[end].strip() == '---':
|
||||
if not has_title:
|
||||
lines.insert(1, f"title: {title}")
|
||||
return '\n'.join(lines)
|
||||
# malformed frontmatter, prepend new
|
||||
# no frontmatter -> add
|
||||
return f"---\ntitle: {title}\n---\n\n" + text.lstrip('\n')
|
||||
|
||||
def ensure_h1(text: str, title: str) -> str:
|
||||
lines = text.splitlines()
|
||||
# Detect frontmatter
|
||||
if lines[:1] == ['---']:
|
||||
end = 1
|
||||
while end < len(lines) and lines[end].strip() != '---':
|
||||
end += 1
|
||||
if end < len(lines):
|
||||
# position after frontmatter block
|
||||
insert_at = end + 1
|
||||
# skip blank lines
|
||||
while insert_at < len(lines) and lines[insert_at].strip() == '':
|
||||
insert_at += 1
|
||||
if insert_at < len(lines) and lines[insert_at].startswith('# '):
|
||||
lines[insert_at] = f"# {title}"
|
||||
else:
|
||||
lines[insert_at:insert_at] = [f"# {title}", ""]
|
||||
return '\n'.join(lines) + ('\n' if text.endswith('\n') else '')
|
||||
# No frontmatter: ensure H1 at top
|
||||
i = 0
|
||||
while i < len(lines) and lines[i].strip() == '':
|
||||
i += 1
|
||||
if i < len(lines) and lines[i].startswith('# '):
|
||||
lines[i] = f"# {title}"
|
||||
return '\n'.join(lines) + ('\n' if text.endswith('\n') else '')
|
||||
return f"# {title}\n\n" + text
|
||||
|
||||
def fix_index_pages():
|
||||
for p in ROOT.rglob('_Index_.md'):
|
||||
text = p.read_text(encoding='utf-8')
|
||||
title = compute_title_for_index(p)
|
||||
# add/update frontmatter title
|
||||
t1 = ensure_frontmatter_title(text, title)
|
||||
# ensure first H1 present and correct
|
||||
t2 = ensure_h1(t1, title)
|
||||
# remove any duplicate frontmatter blocks beyond the first
|
||||
t3 = dedupe_extra_frontmatter(t2)
|
||||
if t3 != text:
|
||||
p.write_text(t3, encoding='utf-8')
|
||||
|
||||
def dedupe_extra_frontmatter(text: str) -> str:
|
||||
lines = text.splitlines()
|
||||
result = []
|
||||
i = 0
|
||||
# keep first frontmatter (if at top)
|
||||
if i < len(lines) and lines[i].strip() == '---':
|
||||
result.append(lines[i]); i += 1
|
||||
while i < len(lines):
|
||||
result.append(lines[i])
|
||||
if lines[i].strip() == '---':
|
||||
i += 1
|
||||
break
|
||||
i += 1
|
||||
# now copy the rest but strip any further '---' blocks
|
||||
in_block = False
|
||||
while i < len(lines):
|
||||
if lines[i].strip() == '---':
|
||||
in_block = not in_block
|
||||
i += 1
|
||||
continue
|
||||
if not in_block:
|
||||
result.append(lines[i])
|
||||
i += 1
|
||||
return '\n'.join(result) + ('\n' if text.endswith('\n') else '')
|
||||
|
||||
def remove_empty_dirs(root: Path):
|
||||
# remove empty directories bottom-up
|
||||
removed = True
|
||||
while removed:
|
||||
removed = False
|
||||
for d in sorted([p for p in root.rglob('*') if p.is_dir()], key=lambda x: len(str(x)), reverse=True):
|
||||
try:
|
||||
if not any(d.iterdir()):
|
||||
d.rmdir()
|
||||
removed = True
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
def main():
|
||||
fix_index_pages()
|
||||
# cleanup likely old ascii dirs if empty
|
||||
for path in ['anatomi/karl', 'histologi/vavnad', 'histologi/kortlar']:
|
||||
d = ROOT / path
|
||||
if d.exists():
|
||||
remove_empty_dirs(d)
|
||||
# general sweep to remove empty leaf dirs
|
||||
remove_empty_dirs(ROOT)
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
56
content/scripts/format_goal_docs.py
Normal file
56
content/scripts/format_goal_docs.py
Normal file
@@ -0,0 +1,56 @@
|
||||
#!/usr/bin/env python3
|
||||
import re
|
||||
from pathlib import Path
|
||||
|
||||
ROOT = Path(__file__).resolve().parents[1]
|
||||
|
||||
def fmt_file(p: Path, rules):
|
||||
text = p.read_text(encoding='utf-8')
|
||||
orig = text
|
||||
for pat, repl in rules:
|
||||
text = re.sub(pat, repl, text, flags=re.MULTILINE)
|
||||
if text != orig:
|
||||
p.write_text(text, encoding='utf-8')
|
||||
|
||||
def main():
|
||||
f11 = ROOT / 'Målbeskrivning/1.1 Rörelseapparaten.md'
|
||||
f12 = ROOT / 'Målbeskrivning/1.2 Hjärta och cirkulation.md'
|
||||
|
||||
if f11.exists():
|
||||
rules11 = [
|
||||
(r'^Lärandemål\s*$', '## Lärandemål\n'),
|
||||
(r'^Detaljerad målbeskrivning\s*$', '## Detaljerad målbeskrivning\n'),
|
||||
(r'^Anatomisk terminologi\s*$', '### Anatomisk terminologi\n'),
|
||||
(r'^Rörelseapparatens principer\s*$', '### Rörelseapparatens principer\n'),
|
||||
(r'^Rörelseapparatens ben, leder och muskler\s*$', '### Rörelseapparatens ben, leder och muskler\n'),
|
||||
]
|
||||
fmt_file(f11, rules11)
|
||||
# Ensure file starts with a title
|
||||
text11 = f11.read_text(encoding='utf-8')
|
||||
if not text11.lstrip().startswith('# '):
|
||||
f11.write_text('# Rörelseapparatens anatomi (LPG001, block 1)\n\n' + text11, encoding='utf-8')
|
||||
|
||||
if f12.exists():
|
||||
rules12 = [
|
||||
(r'^Lärandemål kursdel C.*Anatomi\s*$', '## Lärandemål (kursdel C/F) – Anatomi\n'),
|
||||
(r'^Hjärta\s*$', '## Hjärta\n'),
|
||||
(r'^Blodkärl\s*$', '## Blodkärl\n'),
|
||||
]
|
||||
fmt_file(f12, rules12)
|
||||
# Bulletize linked lines under Hjärta and Blodkärl sections
|
||||
t12 = f12.read_text(encoding='utf-8')
|
||||
def bulletize_section(text, header):
|
||||
pattern = re.compile(rf'(^## {header}\s*$)(.*?)(^## |\Z)', re.M | re.S)
|
||||
def repl(m):
|
||||
head, body, tail = m.group(1), m.group(2), m.group(3)
|
||||
# add '- ' before lines starting with '[[' or alphabetic word followed by link
|
||||
body2 = re.sub(r'^(\[\[)', r'- \1', body, flags=re.M)
|
||||
body2 = re.sub(r'^(\*?\s*\w.*?\[\[)', r'- \1', body2, flags=re.M)
|
||||
return head + body2 + (tail or '')
|
||||
return pattern.sub(repl, text)
|
||||
t12 = bulletize_section(t12, 'Hjärta')
|
||||
t12 = bulletize_section(t12, 'Blodkärl')
|
||||
f12.write_text(t12, encoding='utf-8')
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
144
content/scripts/normalize_notes.py
Normal file
144
content/scripts/normalize_notes.py
Normal file
@@ -0,0 +1,144 @@
|
||||
#!/usr/bin/env python3
|
||||
import os
|
||||
import re
|
||||
import shutil
|
||||
from pathlib import Path
|
||||
|
||||
ROOT = Path(__file__).resolve().parents[1]
|
||||
|
||||
# Target only markdown content
|
||||
def iter_md_files(root: Path):
|
||||
for p in root.rglob('*.md'):
|
||||
# Skip hidden dirs (e.g., .git) and virtualenvs
|
||||
if any(part.startswith('.') for part in p.parts):
|
||||
continue
|
||||
yield p
|
||||
|
||||
# Map ASCII segments -> Swedish diacritics (only for known names we used)
|
||||
REPLACEMENTS = {
|
||||
'vavnad': 'vävnad',
|
||||
'bindvav': 'bindväv',
|
||||
'kortlar': 'körtlar',
|
||||
'karl': 'kärl',
|
||||
'overarm': 'överarm',
|
||||
'overgangsepitel-urotel': 'övergångsepitel-urotel',
|
||||
'respirationsvagsepitel': 'respirationsvägsepitel',
|
||||
'tradbrosk': 'trådbrosk',
|
||||
'retikular': 'retikulär',
|
||||
'forhornat': 'förhornat',
|
||||
'oforhornat': 'oförhornat',
|
||||
'tat-oregelbunden': 'tät-oregelbunden',
|
||||
'tat-regelbunden': 'tät-regelbunden',
|
||||
'mukos': 'mukös',
|
||||
'seros': 'serös',
|
||||
'seromukos': 'seromukös',
|
||||
'endokrin-follikular-typ': 'endokrin-follikulär-typ',
|
||||
'endokrin-strangtyp': 'endokrin-strängtyp',
|
||||
'bagarcell': 'bägarcell',
|
||||
'vit-fettvav': 'vit-fettväv',
|
||||
'brun-fettvav': 'brun-fettväv',
|
||||
}
|
||||
|
||||
def apply_diacritics(seg: str) -> str:
|
||||
s = seg
|
||||
for k, v in REPLACEMENTS.items():
|
||||
if k in s:
|
||||
s = s.replace(k, v)
|
||||
# Second-pass quick fixes for prior Title-Cased names
|
||||
s = s.replace('Tat ', 'Tät ')
|
||||
s = s.replace('Forhornat', 'Förhornat')
|
||||
return s
|
||||
|
||||
def to_title_with_spaces(filename_base: str) -> str:
|
||||
# Preserve special index pages verbatim
|
||||
if filename_base == '_Index_':
|
||||
return filename_base
|
||||
# Replace hyphens with spaces
|
||||
s = filename_base.replace('-', ' ')
|
||||
# Collapse whitespace
|
||||
s = re.sub(r'\s+', ' ', s).strip()
|
||||
# Title Case each word
|
||||
s = ' '.join(w.capitalize() for w in s.split(' '))
|
||||
return s
|
||||
|
||||
def compute_new_path(old_path: Path) -> Path:
|
||||
rel = old_path.relative_to(ROOT)
|
||||
parts = list(rel.parts)
|
||||
new_parts = []
|
||||
for i, seg in enumerate(parts):
|
||||
# File segment with extension
|
||||
if i == len(parts) - 1 and seg.endswith('.md'):
|
||||
base = seg[:-3]
|
||||
base = apply_diacritics(base)
|
||||
base = to_title_with_spaces(base)
|
||||
new_parts.append(base + '.md')
|
||||
else:
|
||||
seg2 = apply_diacritics(seg)
|
||||
# Keep folder names as-is except diacritics; avoid title-case for common folders
|
||||
new_parts.append(seg2)
|
||||
return ROOT.joinpath(*new_parts)
|
||||
|
||||
def build_rename_map():
|
||||
mapping = {}
|
||||
for p in iter_md_files(ROOT):
|
||||
newp = compute_new_path(p)
|
||||
if newp != p:
|
||||
mapping[p] = newp
|
||||
return mapping
|
||||
|
||||
WIKILINK_RE = re.compile(r"\[\[([^\]|]+)(\|[^\]]+)?\]\]")
|
||||
|
||||
def strip_leading_h1(text: str) -> str:
|
||||
lines = text.splitlines()
|
||||
i = 0
|
||||
# Skip leading blank lines
|
||||
while i < len(lines) and lines[i].strip() == '':
|
||||
i += 1
|
||||
if i < len(lines) and lines[i].lstrip().startswith('# '):
|
||||
# Drop this H1 line
|
||||
i += 1
|
||||
# If the next is blank, drop a single blank too
|
||||
if i < len(lines) and lines[i].strip() == '':
|
||||
i += 1
|
||||
return '\n'.join(lines[i:]) + ('\n' if text.endswith('\n') else '')
|
||||
return text
|
||||
|
||||
def rel_without_ext(p: Path) -> str:
|
||||
return str(p.relative_to(ROOT).with_suffix('')).replace('\\', '/')
|
||||
|
||||
def update_wikilinks(content: str, path_map_rel: dict) -> str:
|
||||
def repl(m):
|
||||
target = m.group(1).strip()
|
||||
alias = m.group(2) or '' # includes leading '|'
|
||||
# Normalize target (strip potential .md)
|
||||
t = target[:-3] if target.endswith('.md') else target
|
||||
new = path_map_rel.get(t)
|
||||
if new:
|
||||
return f"[[{new}{alias}]]"
|
||||
return m.group(0)
|
||||
return WIKILINK_RE.sub(repl, content)
|
||||
|
||||
def main():
|
||||
# 1) Build rename map
|
||||
rename_map = build_rename_map()
|
||||
|
||||
# Map for link updates: old_rel_noext -> new_rel_noext
|
||||
link_map = {}
|
||||
for old, new in rename_map.items():
|
||||
link_map[rel_without_ext(old)] = rel_without_ext(new)
|
||||
|
||||
# 2) Rename paths (deepest first)
|
||||
for old, new in sorted(rename_map.items(), key=lambda kv: len(str(kv[0]).split(os.sep)), reverse=True):
|
||||
new.parent.mkdir(parents=True, exist_ok=True)
|
||||
shutil.move(str(old), str(new))
|
||||
|
||||
# 3) Rewrite content in all md files: strip leading H1 and update wikilinks
|
||||
for p in iter_md_files(ROOT):
|
||||
text = p.read_text(encoding='utf-8')
|
||||
new_text = strip_leading_h1(text)
|
||||
new_text = update_wikilinks(new_text, link_map)
|
||||
if new_text != text:
|
||||
p.write_text(new_text, encoding='utf-8')
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
Reference in New Issue
Block a user