1
0
Files
medical-notes/wip/download-subs.py
Johan Dahlin 1a84c48e07
All checks were successful
Deploy Quartz site to GitHub Pages / build (push) Successful in 2m47s
vault backup: 2025-12-22 14:32:30
2025-12-22 14:32:30 +01:00

162 lines
4.8 KiB
Python

#!/usr/bin/env python3
"""
Download Swedish auto-generated subtitles from YouTube videos using yt-dlp.
"""
import subprocess
import sys
import platform
def get_url_from_dialog():
"""
Show a macOS dialog to get the YouTube URL.
Uses osascript (AppleScript) - similar to zenity on Linux or prompt() in HTML.
Returns:
str: The URL entered by the user, or None if cancelled
"""
if platform.system() != 'Darwin':
print("Error: Dialog is only supported on macOS", file=sys.stderr)
return None
# AppleScript to show a text input dialog
applescript = '''
display dialog "Enter YouTube URL:" default answer "" with title "Download Swedish Subtitles" buttons {"Cancel", "OK"} default button "OK"
set userInput to text returned of result
return userInput
'''
try:
result = subprocess.run(
['osascript', '-e', applescript],
capture_output=True,
text=True,
check=True
)
return result.stdout.strip()
except subprocess.CalledProcessError:
# User cancelled or error occurred
return None
def clean_subtitles(srt_content):
"""
Clean SRT subtitle content by removing timestamps, unwrapping lines, and removing duplicates.
Args:
srt_content: Raw SRT subtitle content
Returns:
Cleaned text content with timestamps removed and duplicates filtered
"""
lines = srt_content.strip().split('\n')
text_lines = []
seen_lines = set()
for line in lines:
line = line.strip()
# Skip empty lines, sequence numbers, and timestamp lines
if not line:
continue
if line.isdigit():
continue
if '-->' in line:
continue
# Skip duplicate lines (auto-generated subtitles often repeat)
if line in seen_lines:
continue
seen_lines.add(line)
text_lines.append(line)
# Join all lines with spaces to unwrap
return ' '.join(text_lines)
def download_subtitles(url):
"""
Download auto-generated Swedish subtitles from a YouTube video or playlist.
Args:
url: YouTube video or playlist URL
"""
import os
import glob
try:
# Get video ID to construct subtitle filename
# Download subtitles to current directory, then read and delete
cmd = [
'yt-dlp',
'--write-auto-sub',
'--sub-lang', 'sv',
'--skip-download',
'--convert-subs', 'srt',
'-o', '%(id)s.%(ext)s',
url
]
print(f"Downloading subtitles...", file=sys.stderr)
result = subprocess.run(cmd, capture_output=True, text=True, check=True)
# Find the generated .srt file
srt_files = glob.glob('*.sv.srt')
if srt_files:
subtitle_file = srt_files[0]
print(f"Reading subtitles from: {subtitle_file}", file=sys.stderr)
# Read, clean and print the subtitle file
with open(subtitle_file, 'r', encoding='utf-8') as f:
raw_content = f.read()
cleaned_content = clean_subtitles(raw_content)
print(cleaned_content)
# Clean up the subtitle file
os.remove(subtitle_file)
print(f"Cleaned up: {subtitle_file}", file=sys.stderr)
else:
print("Error: No Swedish subtitles found for this video", file=sys.stderr)
print("Available subtitle languages might not include Swedish auto-generated", file=sys.stderr)
sys.exit(1)
except subprocess.CalledProcessError as e:
print(f"Error running yt-dlp: {e}", file=sys.stderr)
if e.stderr:
print(e.stderr, file=sys.stderr)
sys.exit(1)
except FileNotFoundError:
print("Error: yt-dlp not found. Please install it first:", file=sys.stderr)
print(" pip install yt-dlp", file=sys.stderr)
sys.exit(1)
except Exception as e:
print(f"Unexpected error: {e}", file=sys.stderr)
import traceback
traceback.print_exc()
sys.exit(1)
if __name__ == '__main__':
url = None
# Get URL from command line argument or dialog
if len(sys.argv) == 2:
url = sys.argv[1]
elif len(sys.argv) == 1:
# No argument provided - show dialog (macOS only)
if platform.system() == 'Darwin':
url = get_url_from_dialog()
if not url:
print("No URL provided. Exiting.", file=sys.stderr)
sys.exit(1)
else:
print("Usage: python download-subs.py <youtube_url>", file=sys.stderr)
sys.exit(1)
else:
print("Usage: python download-subs.py [youtube_url]", file=sys.stderr)
print(" If no URL is provided on macOS, a dialog will appear.", file=sys.stderr)
sys.exit(1)
download_subtitles(url)