diff --git a/backend/Dockerfile b/backend/Dockerfile index 60c882e..de8da2b 100644 --- a/backend/Dockerfile +++ b/backend/Dockerfile @@ -2,7 +2,7 @@ FROM python:3.12-slim WORKDIR /app -RUN apt-get update && apt-get install -y --no-install-recommends ffmpeg && rm -rf /var/lib/apt/lists/* +RUN apt-get update && apt-get install -y --no-install-recommends ffmpeg aria2 && rm -rf /var/lib/apt/lists/* COPY requirements.txt . RUN pip install --no-cache-dir -r requirements.txt && pip install --no-cache-dir -U yt-dlp diff --git a/backend/app/services/downloader.py b/backend/app/services/downloader.py index a34ee29..aa28a9f 100644 --- a/backend/app/services/downloader.py +++ b/backend/app/services/downloader.py @@ -6,6 +6,7 @@ import json import asyncio import logging import threading +import subprocess import urllib.request from pathlib import Path from typing import Optional @@ -82,12 +83,14 @@ X_VIDEOS_PATH = os.path.join(VIDEO_BASE_PATH, "x_videos") YOUTUBE_VIDEOS_PATH = os.path.join(VIDEO_BASE_PATH, "youtube_videos") PH_VIDEOS_PATH = os.path.join(VIDEO_BASE_PATH, "ph_videos") HLS_VIDEOS_PATH = os.path.join(VIDEO_BASE_PATH, "hls_videos") +TORRENT_VIDEOS_PATH = os.path.join(VIDEO_BASE_PATH, "torrent_videos") # Ensure directories exist os.makedirs(X_VIDEOS_PATH, exist_ok=True) os.makedirs(YOUTUBE_VIDEOS_PATH, exist_ok=True) os.makedirs(PH_VIDEOS_PATH, exist_ok=True) os.makedirs(HLS_VIDEOS_PATH, exist_ok=True) +os.makedirs(TORRENT_VIDEOS_PATH, exist_ok=True) # Pattern to match YouTube URLs YOUTUBE_URL_RE = re.compile( @@ -111,6 +114,12 @@ HLS_URL_RE = re.compile( re.IGNORECASE, ) +# Pattern to match Magnet links +MAGNET_RE = re.compile(r'^magnet:\?xt=urn:[a-z0-9]+:[a-zA-Z0-9]+', re.IGNORECASE) + +# Pattern to match .torrent file URLs +TORRENT_URL_RE = re.compile(r'https?://[^\s]+\.torrent(?:[?#][^\s]*)?', re.IGNORECASE) + def get_video_path(filename: str, platform: str = "twitter") -> str: if platform == "youtube": @@ -119,6 +128,8 @@ def get_video_path(filename: str, platform: str = "twitter") -> str: return os.path.join(PH_VIDEOS_PATH, filename) if platform == "hls": return os.path.join(HLS_VIDEOS_PATH, filename) + if platform == "torrent": + return os.path.join(TORRENT_VIDEOS_PATH, filename) return os.path.join(X_VIDEOS_PATH, filename) @@ -140,6 +151,8 @@ def detect_platform(url: str) -> str: return "pornhub" if _is_hls_url(url): return "hls" + if _is_torrent(url): + return "torrent" return "unknown" @@ -151,6 +164,10 @@ def _is_hls_url(url: str) -> bool: return bool(HLS_URL_RE.match(url)) +def _is_torrent(url: str) -> bool: + return bool(MAGNET_RE.match(url)) or bool(TORRENT_URL_RE.match(url)) + + def _extract_tweet_id(url: str) -> Optional[str]: m = TWITTER_URL_RE.match(url) return m.group(1) if m else None @@ -516,6 +533,87 @@ def _download_pornhub_video(url: str, format_id: str = "best", progress_callback } +def _parse_torrent(url: str) -> dict: + """Return minimal info for magnet/torrent (metadata only available after download starts).""" + return { + "title": url[:80] if url.startswith("magnet:") else os.path.basename(url.split("?")[0]), + "thumbnail": "", + "duration": 0, + "formats": [{"format_id": "best", "quality": "original", "ext": "*", "filesize": 0, "note": "原始文件(不转码)"}], + "url": url, + "platform": "torrent", + } + + +def _download_torrent(url: str, format_id: str = "best", progress_callback=None, task_id: str = None) -> dict: + """Download magnet link or .torrent file via aria2c.""" + out_dir = os.path.join(TORRENT_VIDEOS_PATH, task_id or str(uuid.uuid4())[:8]) + os.makedirs(out_dir, exist_ok=True) + + cmd = [ + "aria2c", + "--dir", out_dir, + "--seed-time=0", # 下完即停,不做 seed + "--max-connection-per-server=4", + "--split=4", + "--bt-stop-timeout=300", # 5分钟没速度则超时报错 + "--summary-interval=5", + "--console-log-level=warn", + "--file-allocation=none", + url, + ] + + flag = _cancel_flags.get(task_id) if task_id else None + + proc = subprocess.Popen( + cmd, + stdout=subprocess.PIPE, + stderr=subprocess.STDOUT, + text=True, + bufsize=1, + ) + + # Parse aria2c stdout for progress + # aria2c prints lines like: [#abcd 100MiB/500MiB(20%) CN:4 DL:2.0MiB] + progress_re = re.compile(r'\((\d+)%\)') + + for line in proc.stdout: + if flag and flag.is_set(): + proc.terminate() + raise yt_dlp.utils.DownloadCancelled("Cancelled by user") + m = progress_re.search(line) + if m and task_id: + pct = int(m.group(1)) + _download_progress[task_id] = max(1, min(pct, 99)) + + proc.wait() + if proc.returncode != 0: + raise RuntimeError(f"aria2c exited with code {proc.returncode}") + + if task_id: + _download_progress[task_id] = 99 + + # Find the largest file in out_dir (most likely the main video) + files = sorted(Path(out_dir).rglob("*"), key=lambda p: p.stat().st_size if p.is_file() else 0, reverse=True) + video_exts = {".mp4", ".mkv", ".avi", ".mov", ".wmv", ".flv", ".webm", ".ts", ".m2ts"} + # Prefer known video extension, fallback to largest file + target = next((f for f in files if f.is_file() and f.suffix.lower() in video_exts), None) + if not target: + target = next((f for f in files if f.is_file()), None) + if not target: + raise RuntimeError("aria2c finished but no file found") + + return { + "title": target.stem, + "thumbnail": "", + "duration": 0, + "filename": target.name, + "file_path": str(target), + "file_size": target.stat().st_size, + "platform": "torrent", + } + + def _parse_hls_video(url: str) -> dict: """Parse HLS/m3u8 stream info using yt-dlp.""" ydl_opts = { @@ -651,6 +749,11 @@ def parse_video_url(url: str) -> dict: logger.info(f"Parsing HLS stream: {url}") return _parse_hls_video(url) + # Magnet / torrent + if _is_torrent(url): + logger.info(f"Parsing torrent/magnet: {url}") + return _parse_torrent(url) + # Fallback to generic yt-dlp ydl_opts = { "quiet": True, @@ -734,6 +837,11 @@ def download_video(url: str, format_id: str = "best", progress_callback=None, ta logger.info(f"Downloading HLS stream: {url}") return _download_hls_video(url, format_id, progress_callback, task_id=task_id) + # Magnet / torrent + if _is_torrent(url): + logger.info(f"Downloading torrent/magnet: {url}") + return _download_torrent(url, format_id, progress_callback, task_id=task_id) + task_id = str(uuid.uuid4())[:8] output_template = os.path.join(X_VIDEOS_PATH, f"%(id)s_{task_id}.%(ext)s")