vault backup: 2025-12-22 14:32:30
All checks were successful
Deploy Quartz site to GitHub Pages / build (push) Successful in 2m47s
All checks were successful
Deploy Quartz site to GitHub Pages / build (push) Successful in 2m47s
This commit is contained in:
@@ -1,80 +1,161 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
"""
|
||||
Download Swedish auto-generated subtitles from YouTube videos using yt-dlp.
|
||||
"""
|
||||
import subprocess
|
||||
import sys
|
||||
import os
|
||||
import glob
|
||||
|
||||
def get_clipboard():
|
||||
result = subprocess.run(['pbpaste'], capture_output=True, text=True)
|
||||
return result.stdout.strip()
|
||||
|
||||
def set_clipboard(text):
|
||||
subprocess.run(['pbcopy'], input=text, text=True)
|
||||
import platform
|
||||
|
||||
def get_url_from_dialog():
|
||||
"""
|
||||
Show a macOS dialog to get the YouTube URL.
|
||||
Uses osascript (AppleScript) - similar to zenity on Linux or prompt() in HTML.
|
||||
|
||||
Returns:
|
||||
str: The URL entered by the user, or None if cancelled
|
||||
"""
|
||||
if platform.system() != 'Darwin':
|
||||
print("Error: Dialog is only supported on macOS", file=sys.stderr)
|
||||
return None
|
||||
|
||||
# AppleScript to show a text input dialog
|
||||
applescript = '''
|
||||
display dialog "Enter YouTube URL:" default answer "" with title "Download Swedish Subtitles" buttons {"Cancel", "OK"} default button "OK"
|
||||
set userInput to text returned of result
|
||||
return userInput
|
||||
'''
|
||||
|
||||
try:
|
||||
result = subprocess.run(['osascript', '-e', applescript], capture_output=True, text=True, check=True)
|
||||
result = subprocess.run(
|
||||
['osascript', '-e', applescript],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
check=True
|
||||
)
|
||||
return result.stdout.strip()
|
||||
except subprocess.CalledProcessError:
|
||||
# User cancelled or error occurred
|
||||
return None
|
||||
|
||||
def clean_subtitles(srt_content):
|
||||
"""
|
||||
Clean SRT subtitle content by removing timestamps, unwrapping lines, and removing duplicates.
|
||||
|
||||
Args:
|
||||
srt_content: Raw SRT subtitle content
|
||||
|
||||
Returns:
|
||||
Cleaned text content with timestamps removed and duplicates filtered
|
||||
"""
|
||||
lines = srt_content.strip().split('\n')
|
||||
text_lines = []
|
||||
seen_lines = set()
|
||||
|
||||
for line in lines:
|
||||
line = line.strip()
|
||||
if not line or line.isdigit() or '-->' in line:
|
||||
|
||||
# Skip empty lines, sequence numbers, and timestamp lines
|
||||
if not line:
|
||||
continue
|
||||
if line.isdigit():
|
||||
continue
|
||||
if '-->' in line:
|
||||
continue
|
||||
|
||||
# Skip duplicate lines (auto-generated subtitles often repeat)
|
||||
if line in seen_lines:
|
||||
continue
|
||||
|
||||
seen_lines.add(line)
|
||||
text_lines.append(line)
|
||||
|
||||
# Join all lines with spaces to unwrap
|
||||
return ' '.join(text_lines)
|
||||
|
||||
def download_subtitles(url):
|
||||
cmd = [
|
||||
'yt-dlp',
|
||||
'--write-auto-sub',
|
||||
'--sub-lang', 'sv',
|
||||
'--skip-download',
|
||||
'--convert-subs', 'srt',
|
||||
'-o', '%(id)s.%(ext)s',
|
||||
url
|
||||
]
|
||||
"""
|
||||
Download auto-generated Swedish subtitles from a YouTube video or playlist.
|
||||
|
||||
subprocess.run(cmd, capture_output=True, text=True, check=True)
|
||||
Args:
|
||||
url: YouTube video or playlist URL
|
||||
"""
|
||||
import os
|
||||
import glob
|
||||
|
||||
srt_files = glob.glob('*.sv.srt')
|
||||
try:
|
||||
# Get video ID to construct subtitle filename
|
||||
# Download subtitles to current directory, then read and delete
|
||||
cmd = [
|
||||
'yt-dlp',
|
||||
'--write-auto-sub',
|
||||
'--sub-lang', 'sv',
|
||||
'--skip-download',
|
||||
'--convert-subs', 'srt',
|
||||
'-o', '%(id)s.%(ext)s',
|
||||
url
|
||||
]
|
||||
|
||||
if srt_files:
|
||||
subtitle_file = srt_files[0]
|
||||
with open(subtitle_file, 'r', encoding='utf-8') as f:
|
||||
raw_content = f.read()
|
||||
cleaned_content = clean_subtitles(raw_content)
|
||||
os.remove(subtitle_file)
|
||||
return cleaned_content
|
||||
print(f"Downloading subtitles...", file=sys.stderr)
|
||||
result = subprocess.run(cmd, capture_output=True, text=True, check=True)
|
||||
|
||||
sys.exit(1)
|
||||
# Find the generated .srt file
|
||||
srt_files = glob.glob('*.sv.srt')
|
||||
|
||||
if __name__ == '__main__':
|
||||
url = get_clipboard()
|
||||
if srt_files:
|
||||
subtitle_file = srt_files[0]
|
||||
print(f"Reading subtitles from: {subtitle_file}", file=sys.stderr)
|
||||
|
||||
if not url or 'youtube.com' not in url and 'youtu.be' not in url:
|
||||
url = get_url_from_dialog()
|
||||
if not url:
|
||||
# Read, clean and print the subtitle file
|
||||
with open(subtitle_file, 'r', encoding='utf-8') as f:
|
||||
raw_content = f.read()
|
||||
cleaned_content = clean_subtitles(raw_content)
|
||||
print(cleaned_content)
|
||||
|
||||
# Clean up the subtitle file
|
||||
os.remove(subtitle_file)
|
||||
print(f"Cleaned up: {subtitle_file}", file=sys.stderr)
|
||||
else:
|
||||
print("Error: No Swedish subtitles found for this video", file=sys.stderr)
|
||||
print("Available subtitle languages might not include Swedish auto-generated", file=sys.stderr)
|
||||
sys.exit(1)
|
||||
|
||||
subtitles = download_subtitles(url)
|
||||
set_clipboard(subtitles)
|
||||
except subprocess.CalledProcessError as e:
|
||||
print(f"Error running yt-dlp: {e}", file=sys.stderr)
|
||||
if e.stderr:
|
||||
print(e.stderr, file=sys.stderr)
|
||||
sys.exit(1)
|
||||
except FileNotFoundError:
|
||||
print("Error: yt-dlp not found. Please install it first:", file=sys.stderr)
|
||||
print(" pip install yt-dlp", file=sys.stderr)
|
||||
sys.exit(1)
|
||||
except Exception as e:
|
||||
print(f"Unexpected error: {e}", file=sys.stderr)
|
||||
import traceback
|
||||
traceback.print_exc()
|
||||
sys.exit(1)
|
||||
|
||||
if __name__ == '__main__':
|
||||
url = None
|
||||
|
||||
# Get URL from command line argument or dialog
|
||||
if len(sys.argv) == 2:
|
||||
url = sys.argv[1]
|
||||
elif len(sys.argv) == 1:
|
||||
# No argument provided - show dialog (macOS only)
|
||||
if platform.system() == 'Darwin':
|
||||
url = get_url_from_dialog()
|
||||
if not url:
|
||||
print("No URL provided. Exiting.", file=sys.stderr)
|
||||
sys.exit(1)
|
||||
else:
|
||||
print("Usage: python download-subs.py <youtube_url>", file=sys.stderr)
|
||||
sys.exit(1)
|
||||
else:
|
||||
print("Usage: python download-subs.py [youtube_url]", file=sys.stderr)
|
||||
print(" If no URL is provided on macOS, a dialog will appear.", file=sys.stderr)
|
||||
sys.exit(1)
|
||||
|
||||
download_subtitles(url)
|
||||
|
||||
|
||||
|
||||
|
||||
Reference in New Issue
Block a user