All checks were successful
Deploy Quartz site to GitHub Pages / build (push) Successful in 2m47s
162 lines
4.8 KiB
Python
162 lines
4.8 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
Download Swedish auto-generated subtitles from YouTube videos using yt-dlp.
|
|
"""
|
|
import subprocess
|
|
import sys
|
|
import platform
|
|
|
|
def get_url_from_dialog():
|
|
"""
|
|
Show a macOS dialog to get the YouTube URL.
|
|
Uses osascript (AppleScript) - similar to zenity on Linux or prompt() in HTML.
|
|
|
|
Returns:
|
|
str: The URL entered by the user, or None if cancelled
|
|
"""
|
|
if platform.system() != 'Darwin':
|
|
print("Error: Dialog is only supported on macOS", file=sys.stderr)
|
|
return None
|
|
|
|
# AppleScript to show a text input dialog
|
|
applescript = '''
|
|
display dialog "Enter YouTube URL:" default answer "" with title "Download Swedish Subtitles" buttons {"Cancel", "OK"} default button "OK"
|
|
set userInput to text returned of result
|
|
return userInput
|
|
'''
|
|
|
|
try:
|
|
result = subprocess.run(
|
|
['osascript', '-e', applescript],
|
|
capture_output=True,
|
|
text=True,
|
|
check=True
|
|
)
|
|
return result.stdout.strip()
|
|
except subprocess.CalledProcessError:
|
|
# User cancelled or error occurred
|
|
return None
|
|
|
|
def clean_subtitles(srt_content):
|
|
"""
|
|
Clean SRT subtitle content by removing timestamps, unwrapping lines, and removing duplicates.
|
|
|
|
Args:
|
|
srt_content: Raw SRT subtitle content
|
|
|
|
Returns:
|
|
Cleaned text content with timestamps removed and duplicates filtered
|
|
"""
|
|
lines = srt_content.strip().split('\n')
|
|
text_lines = []
|
|
seen_lines = set()
|
|
|
|
for line in lines:
|
|
line = line.strip()
|
|
|
|
# Skip empty lines, sequence numbers, and timestamp lines
|
|
if not line:
|
|
continue
|
|
if line.isdigit():
|
|
continue
|
|
if '-->' in line:
|
|
continue
|
|
|
|
# Skip duplicate lines (auto-generated subtitles often repeat)
|
|
if line in seen_lines:
|
|
continue
|
|
|
|
seen_lines.add(line)
|
|
text_lines.append(line)
|
|
|
|
# Join all lines with spaces to unwrap
|
|
return ' '.join(text_lines)
|
|
|
|
def download_subtitles(url):
|
|
"""
|
|
Download auto-generated Swedish subtitles from a YouTube video or playlist.
|
|
|
|
Args:
|
|
url: YouTube video or playlist URL
|
|
"""
|
|
import os
|
|
import glob
|
|
|
|
try:
|
|
# Get video ID to construct subtitle filename
|
|
# Download subtitles to current directory, then read and delete
|
|
cmd = [
|
|
'yt-dlp',
|
|
'--write-auto-sub',
|
|
'--sub-lang', 'sv',
|
|
'--skip-download',
|
|
'--convert-subs', 'srt',
|
|
'-o', '%(id)s.%(ext)s',
|
|
url
|
|
]
|
|
|
|
print(f"Downloading subtitles...", file=sys.stderr)
|
|
result = subprocess.run(cmd, capture_output=True, text=True, check=True)
|
|
|
|
# Find the generated .srt file
|
|
srt_files = glob.glob('*.sv.srt')
|
|
|
|
if srt_files:
|
|
subtitle_file = srt_files[0]
|
|
print(f"Reading subtitles from: {subtitle_file}", file=sys.stderr)
|
|
|
|
# Read, clean and print the subtitle file
|
|
with open(subtitle_file, 'r', encoding='utf-8') as f:
|
|
raw_content = f.read()
|
|
cleaned_content = clean_subtitles(raw_content)
|
|
print(cleaned_content)
|
|
|
|
# Clean up the subtitle file
|
|
os.remove(subtitle_file)
|
|
print(f"Cleaned up: {subtitle_file}", file=sys.stderr)
|
|
else:
|
|
print("Error: No Swedish subtitles found for this video", file=sys.stderr)
|
|
print("Available subtitle languages might not include Swedish auto-generated", file=sys.stderr)
|
|
sys.exit(1)
|
|
|
|
except subprocess.CalledProcessError as e:
|
|
print(f"Error running yt-dlp: {e}", file=sys.stderr)
|
|
if e.stderr:
|
|
print(e.stderr, file=sys.stderr)
|
|
sys.exit(1)
|
|
except FileNotFoundError:
|
|
print("Error: yt-dlp not found. Please install it first:", file=sys.stderr)
|
|
print(" pip install yt-dlp", file=sys.stderr)
|
|
sys.exit(1)
|
|
except Exception as e:
|
|
print(f"Unexpected error: {e}", file=sys.stderr)
|
|
import traceback
|
|
traceback.print_exc()
|
|
sys.exit(1)
|
|
|
|
if __name__ == '__main__':
|
|
url = None
|
|
|
|
# Get URL from command line argument or dialog
|
|
if len(sys.argv) == 2:
|
|
url = sys.argv[1]
|
|
elif len(sys.argv) == 1:
|
|
# No argument provided - show dialog (macOS only)
|
|
if platform.system() == 'Darwin':
|
|
url = get_url_from_dialog()
|
|
if not url:
|
|
print("No URL provided. Exiting.", file=sys.stderr)
|
|
sys.exit(1)
|
|
else:
|
|
print("Usage: python download-subs.py <youtube_url>", file=sys.stderr)
|
|
sys.exit(1)
|
|
else:
|
|
print("Usage: python download-subs.py [youtube_url]", file=sys.stderr)
|
|
print(" If no URL is provided on macOS, a dialog will appear.", file=sys.stderr)
|
|
sys.exit(1)
|
|
|
|
download_subtitles(url)
|
|
|
|
|
|
|