145 lines
4.6 KiB
Python
145 lines
4.6 KiB
Python
#!/usr/bin/env python3
|
|
import os
|
|
import re
|
|
import shutil
|
|
from pathlib import Path
|
|
|
|
ROOT = Path(__file__).resolve().parents[1]
|
|
|
|
# Target only markdown content
|
|
def iter_md_files(root: Path):
|
|
for p in root.rglob('*.md'):
|
|
# Skip hidden dirs (e.g., .git) and virtualenvs
|
|
if any(part.startswith('.') for part in p.parts):
|
|
continue
|
|
yield p
|
|
|
|
# Map ASCII segments -> Swedish diacritics (only for known names we used)
|
|
REPLACEMENTS = {
|
|
'vavnad': 'vävnad',
|
|
'bindvav': 'bindväv',
|
|
'kortlar': 'körtlar',
|
|
'karl': 'kärl',
|
|
'overarm': 'överarm',
|
|
'overgangsepitel-urotel': 'övergångsepitel-urotel',
|
|
'respirationsvagsepitel': 'respirationsvägsepitel',
|
|
'tradbrosk': 'trådbrosk',
|
|
'retikular': 'retikulär',
|
|
'forhornat': 'förhornat',
|
|
'oforhornat': 'oförhornat',
|
|
'tat-oregelbunden': 'tät-oregelbunden',
|
|
'tat-regelbunden': 'tät-regelbunden',
|
|
'mukos': 'mukös',
|
|
'seros': 'serös',
|
|
'seromukos': 'seromukös',
|
|
'endokrin-follikular-typ': 'endokrin-follikulär-typ',
|
|
'endokrin-strangtyp': 'endokrin-strängtyp',
|
|
'bagarcell': 'bägarcell',
|
|
'vit-fettvav': 'vit-fettväv',
|
|
'brun-fettvav': 'brun-fettväv',
|
|
}
|
|
|
|
def apply_diacritics(seg: str) -> str:
|
|
s = seg
|
|
for k, v in REPLACEMENTS.items():
|
|
if k in s:
|
|
s = s.replace(k, v)
|
|
# Second-pass quick fixes for prior Title-Cased names
|
|
s = s.replace('Tat ', 'Tät ')
|
|
s = s.replace('Forhornat', 'Förhornat')
|
|
return s
|
|
|
|
def to_title_with_spaces(filename_base: str) -> str:
|
|
# Preserve special index pages verbatim
|
|
if filename_base == '_Index_':
|
|
return filename_base
|
|
# Replace hyphens with spaces
|
|
s = filename_base.replace('-', ' ')
|
|
# Collapse whitespace
|
|
s = re.sub(r'\s+', ' ', s).strip()
|
|
# Title Case each word
|
|
s = ' '.join(w.capitalize() for w in s.split(' '))
|
|
return s
|
|
|
|
def compute_new_path(old_path: Path) -> Path:
|
|
rel = old_path.relative_to(ROOT)
|
|
parts = list(rel.parts)
|
|
new_parts = []
|
|
for i, seg in enumerate(parts):
|
|
# File segment with extension
|
|
if i == len(parts) - 1 and seg.endswith('.md'):
|
|
base = seg[:-3]
|
|
base = apply_diacritics(base)
|
|
base = to_title_with_spaces(base)
|
|
new_parts.append(base + '.md')
|
|
else:
|
|
seg2 = apply_diacritics(seg)
|
|
# Keep folder names as-is except diacritics; avoid title-case for common folders
|
|
new_parts.append(seg2)
|
|
return ROOT.joinpath(*new_parts)
|
|
|
|
def build_rename_map():
|
|
mapping = {}
|
|
for p in iter_md_files(ROOT):
|
|
newp = compute_new_path(p)
|
|
if newp != p:
|
|
mapping[p] = newp
|
|
return mapping
|
|
|
|
WIKILINK_RE = re.compile(r"\[\[([^\]|]+)(\|[^\]]+)?\]\]")
|
|
|
|
def strip_leading_h1(text: str) -> str:
|
|
lines = text.splitlines()
|
|
i = 0
|
|
# Skip leading blank lines
|
|
while i < len(lines) and lines[i].strip() == '':
|
|
i += 1
|
|
if i < len(lines) and lines[i].lstrip().startswith('# '):
|
|
# Drop this H1 line
|
|
i += 1
|
|
# If the next is blank, drop a single blank too
|
|
if i < len(lines) and lines[i].strip() == '':
|
|
i += 1
|
|
return '\n'.join(lines[i:]) + ('\n' if text.endswith('\n') else '')
|
|
return text
|
|
|
|
def rel_without_ext(p: Path) -> str:
|
|
return str(p.relative_to(ROOT).with_suffix('')).replace('\\', '/')
|
|
|
|
def update_wikilinks(content: str, path_map_rel: dict) -> str:
|
|
def repl(m):
|
|
target = m.group(1).strip()
|
|
alias = m.group(2) or '' # includes leading '|'
|
|
# Normalize target (strip potential .md)
|
|
t = target[:-3] if target.endswith('.md') else target
|
|
new = path_map_rel.get(t)
|
|
if new:
|
|
return f"[[{new}{alias}]]"
|
|
return m.group(0)
|
|
return WIKILINK_RE.sub(repl, content)
|
|
|
|
def main():
|
|
# 1) Build rename map
|
|
rename_map = build_rename_map()
|
|
|
|
# Map for link updates: old_rel_noext -> new_rel_noext
|
|
link_map = {}
|
|
for old, new in rename_map.items():
|
|
link_map[rel_without_ext(old)] = rel_without_ext(new)
|
|
|
|
# 2) Rename paths (deepest first)
|
|
for old, new in sorted(rename_map.items(), key=lambda kv: len(str(kv[0]).split(os.sep)), reverse=True):
|
|
new.parent.mkdir(parents=True, exist_ok=True)
|
|
shutil.move(str(old), str(new))
|
|
|
|
# 3) Rewrite content in all md files: strip leading H1 and update wikilinks
|
|
for p in iter_md_files(ROOT):
|
|
text = p.read_text(encoding='utf-8')
|
|
new_text = strip_leading_h1(text)
|
|
new_text = update_wikilinks(new_text, link_map)
|
|
if new_text != text:
|
|
p.write_text(new_text, encoding='utf-8')
|
|
|
|
if __name__ == '__main__':
|
|
main()
|