diff --git a/backend/app/database.py b/backend/app/database.py index 38e75b3..f2e8d91 100644 --- a/backend/app/database.py +++ b/backend/app/database.py @@ -21,3 +21,37 @@ async def get_db(): async def init_db(): async with engine.begin() as conn: await conn.run_sync(Base.metadata.create_all) + # Ensure indexes exist on already-created tables (idempotent) + from sqlalchemy import text + await conn.execute(text( + "CREATE INDEX IF NOT EXISTS ix_video_url_format_id ON videos (url, format_id)" + )) + await conn.execute(text( + "CREATE INDEX IF NOT EXISTS ix_download_logs_video_id ON download_logs (video_id)" + )) + await conn.execute(text( + "CREATE INDEX IF NOT EXISTS ix_download_logs_downloaded_at ON download_logs (downloaded_at)" + )) + # Migrate: add geo columns to existing download_logs table (idempotent) + for col_def in [ + "ALTER TABLE download_logs ADD COLUMN country_code VARCHAR(8) DEFAULT ''", + "ALTER TABLE download_logs ADD COLUMN country VARCHAR(128) DEFAULT ''", + "ALTER TABLE download_logs ADD COLUMN city VARCHAR(128) DEFAULT ''", + ]: + try: + await conn.execute(text(col_def)) + except Exception: + pass # Column already exists + + # Seed default cleanup settings (only if not already set) + defaults = { + "cleanup_enabled": "true", + "cleanup_retention_minutes": "10080", # 7 days + "cleanup_storage_limit_pct": "80", + "cleanup_last_run": "", + "cleanup_last_result": "", + } + for k, v in defaults.items(): + await conn.execute(text( + "INSERT OR IGNORE INTO app_settings (key, value, updated_at) VALUES (:k, :v, datetime('now'))" + ), {"k": k, "v": v}) diff --git a/backend/app/main.py b/backend/app/main.py index 4dd892e..54ff2be 100644 --- a/backend/app/main.py +++ b/backend/app/main.py @@ -1,4 +1,5 @@ """XDL - Twitter/X Video Downloader API.""" +import asyncio import os from contextlib import asynccontextmanager from dotenv import load_dotenv @@ -9,12 +10,19 @@ from fastapi import FastAPI from fastapi.middleware.cors import CORSMiddleware from app.database import init_db from app.routes import auth, parse, download, admin +from app.services.cleanup import cleanup_loop @asynccontextmanager async def lifespan(app: FastAPI): await init_db() + task = asyncio.create_task(cleanup_loop()) yield + task.cancel() + try: + await task + except asyncio.CancelledError: + pass app = FastAPI(title="XDL - Video Downloader", version="1.0.0", lifespan=lifespan) diff --git a/backend/app/models.py b/backend/app/models.py index e655bee..886d7f7 100644 --- a/backend/app/models.py +++ b/backend/app/models.py @@ -1,6 +1,7 @@ """SQLAlchemy models.""" from datetime import datetime -from sqlalchemy import Column, Integer, String, DateTime, BigInteger, Text +from sqlalchemy import Column, Integer, String, DateTime, BigInteger, Text, Index, ForeignKey +from sqlalchemy.orm import relationship from app.database import Base @@ -9,7 +10,7 @@ class Video(Base): id = Column(Integer, primary_key=True, autoincrement=True) task_id = Column(String(64), unique=True, index=True, nullable=False) - url = Column(String(512), nullable=False) + url = Column(String(512), nullable=False, index=True) title = Column(String(512), default="") platform = Column(String(32), default="twitter") thumbnail = Column(String(1024), default="") @@ -24,3 +25,34 @@ class Video(Base): progress = Column(Integer, default=0) # 0-100 created_at = Column(DateTime, default=datetime.utcnow) updated_at = Column(DateTime, default=datetime.utcnow, onupdate=datetime.utcnow) + + __table_args__ = ( + Index("ix_video_url_format_id", "url", "format_id"), + ) + + logs = relationship("DownloadLog", back_populates="video", lazy="select") + + +class DownloadLog(Base): + __tablename__ = "download_logs" + + id = Column(Integer, primary_key=True, autoincrement=True) + video_id = Column(Integer, ForeignKey("videos.id", ondelete="CASCADE"), nullable=False, index=True) + ip = Column(String(64), default="") + user_agent = Column(Text, default="") + browser = Column(String(64), default="") # Chrome / Firefox / Safari / Edge / … + device = Column(String(32), default="") # desktop / mobile / tablet / bot + country_code = Column(String(8), default="") # e.g. CN + country = Column(String(128), default="") # e.g. China + city = Column(String(128), default="") # e.g. Shanghai + downloaded_at = Column(DateTime, default=datetime.utcnow, index=True) + + video = relationship("Video", back_populates="logs") + + +class AppSetting(Base): + __tablename__ = "app_settings" + + key = Column(String(64), primary_key=True) + value = Column(Text, default="") + updated_at = Column(DateTime, default=datetime.utcnow, onupdate=datetime.utcnow) diff --git a/backend/app/routes/admin.py b/backend/app/routes/admin.py index f9d2a73..7afdbef 100644 --- a/backend/app/routes/admin.py +++ b/backend/app/routes/admin.py @@ -1,12 +1,18 @@ """Admin management routes.""" +import json import os from fastapi import APIRouter, HTTPException, Depends, Query from sqlalchemy.ext.asyncio import AsyncSession from sqlalchemy import select, func, or_ from app.database import get_db -from app.models import Video -from app.schemas import VideoInfo, VideoListResponse, StorageStats +from app.models import Video, DownloadLog, AppSetting +from app.schemas import ( + VideoInfo, VideoListResponse, StorageStats, + DownloadLogInfo, DownloadLogListResponse, + CleanupConfig, CleanupStatus, DiskStats, +) from app.auth import get_current_user +from app.services.cleanup import get_setting, set_setting, disk_stats, run_cleanup router = APIRouter(prefix="/api/admin", tags=["admin"]) @@ -68,3 +74,69 @@ async def storage_stats(user: dict = Depends(get_current_user), db: AsyncSession total = (await db.execute(select(func.count(Video.id)).where(Video.status == "done"))).scalar() or 0 total_size = (await db.execute(select(func.sum(Video.file_size)).where(Video.status == "done"))).scalar() or 0 return StorageStats(total_videos=total, total_size=total_size, total_size_human=human_size(total_size)) + + +@router.get("/download-logs", response_model=DownloadLogListResponse) +async def download_logs( + page: int = Query(1, ge=1), + page_size: int = Query(50, ge=1, le=200), + video_id: int = Query(None), + user: dict = Depends(get_current_user), + db: AsyncSession = Depends(get_db), +): + from sqlalchemy.orm import joinedload + query = ( + select(DownloadLog) + .options(joinedload(DownloadLog.video)) + .order_by(DownloadLog.downloaded_at.desc()) + ) + count_query = select(func.count(DownloadLog.id)) + if video_id is not None: + query = query.where(DownloadLog.video_id == video_id) + count_query = count_query.where(DownloadLog.video_id == video_id) + total = (await db.execute(count_query)).scalar() or 0 + logs = (await db.execute(query.offset((page - 1) * page_size).limit(page_size))).scalars().all() + + items = [] + for l in logs: + d = DownloadLogInfo.model_validate(l) + if l.video: + d.video_title = l.video.title or "" + d.video_platform = l.video.platform or "" + items.append(d) + + return DownloadLogListResponse(logs=items, total=total, page=page, page_size=page_size) + + +@router.get("/settings/cleanup", response_model=CleanupStatus) +async def get_cleanup_settings(user: dict = Depends(get_current_user), db: AsyncSession = Depends(get_db)): + video_base = os.getenv("VIDEO_BASE_PATH", "/home/xdl/xdl_videos") + last_result_raw = await get_setting(db, "cleanup_last_result", "{}") + try: + last_result = json.loads(last_result_raw) if last_result_raw else {} + except Exception: + last_result = {} + return CleanupStatus( + config=CleanupConfig( + enabled=(await get_setting(db, "cleanup_enabled", "true")) == "true", + retention_minutes=int(await get_setting(db, "cleanup_retention_minutes", "10080")), + storage_limit_pct=int(await get_setting(db, "cleanup_storage_limit_pct", "80")), + ), + disk=DiskStats(**disk_stats(video_base)), + last_run=await get_setting(db, "cleanup_last_run", ""), + last_result=last_result, + ) + + +@router.put("/settings/cleanup", response_model=CleanupStatus) +async def update_cleanup_settings(cfg: CleanupConfig, user: dict = Depends(get_current_user), db: AsyncSession = Depends(get_db)): + await set_setting(db, "cleanup_enabled", "true" if cfg.enabled else "false") + await set_setting(db, "cleanup_retention_minutes", str(cfg.retention_minutes)) + await set_setting(db, "cleanup_storage_limit_pct", str(cfg.storage_limit_pct)) + return await get_cleanup_settings(user=user, db=db) + + +@router.post("/cleanup/run") +async def trigger_cleanup(user: dict = Depends(get_current_user)): + result = await run_cleanup() + return result diff --git a/backend/app/routes/download.py b/backend/app/routes/download.py index 45200d9..37a89a1 100644 --- a/backend/app/routes/download.py +++ b/backend/app/routes/download.py @@ -1,14 +1,16 @@ """Download task routes.""" import uuid import os +import re import logging -from fastapi import APIRouter, HTTPException, Depends, BackgroundTasks +from datetime import datetime +from fastapi import APIRouter, HTTPException, Depends, BackgroundTasks, Request from fastapi.responses import FileResponse, StreamingResponse from sqlalchemy.ext.asyncio import AsyncSession from sqlalchemy import select from app.schemas import DownloadRequest, DownloadResponse, TaskStatus -from app.database import get_db -from app.models import Video +from app.database import get_db, async_session +from app.models import Video, DownloadLog from app.auth import get_current_user, optional_auth from app.services.downloader import download_video, get_video_path @@ -16,6 +18,94 @@ logger = logging.getLogger(__name__) router = APIRouter(prefix="/api", tags=["download"]) +# ── UA parsing ────────────────────────────────────────────────────────────── + +def _parse_ua(ua: str) -> tuple[str, str]: + """Return (browser, device) from User-Agent string.""" + ua_lower = ua.lower() + + # Device + if any(k in ua_lower for k in ("bot", "crawler", "spider", "slurp", "curl", "wget", "python", "axios")): + device = "bot" + elif "tablet" in ua_lower or "ipad" in ua_lower: + device = "tablet" + elif any(k in ua_lower for k in ("mobile", "android", "iphone", "ipod", "windows phone")): + device = "mobile" + else: + device = "desktop" + + # Browser + if "edg/" in ua_lower or "edghtml" in ua_lower: + browser = "Edge" + elif "opr/" in ua_lower or "opera" in ua_lower: + browser = "Opera" + elif "samsungbrowser" in ua_lower: + browser = "Samsung" + elif "chrome/" in ua_lower: + browser = "Chrome" + elif "firefox/" in ua_lower: + browser = "Firefox" + elif "safari/" in ua_lower: + browser = "Safari" + else: + m = re.search(r"(\w+)/[\d.]+$", ua) + browser = m.group(1).capitalize() if m else "Unknown" + + return browser, device + + +def _client_ip(request: Request) -> str: + forwarded = request.headers.get("x-forwarded-for") + if forwarded: + return forwarded.split(",")[0].strip() + if request.client: + return request.client.host + return "" + + +async def _geo_lookup(ip: str) -> tuple[str, str, str]: + """Return (country_code, country, city) via ip-api.com. Falls back to empty strings.""" + if not ip or ip in ("127.0.0.1", "::1"): + return "", "", "" + try: + import httpx + async with httpx.AsyncClient(timeout=5) as client: + res = await client.get( + f"http://ip-api.com/json/{ip}", + params={"fields": "status,countryCode,country,city"}, + ) + data = res.json() + if data.get("status") == "success": + return data.get("countryCode", ""), data.get("country", ""), data.get("city", "") + except Exception as e: + logger.debug(f"Geo lookup failed for {ip}: {e}") + return "", "", "" + + +async def _log_download(video_id: int, request: Request): + """Write a DownloadLog entry with geo info (fire-and-forget).""" + try: + ua = request.headers.get("user-agent", "") + browser, device = _parse_ua(ua) + ip = _client_ip(request) + country_code, country, city = await _geo_lookup(ip) + async with async_session() as db: + db.add(DownloadLog( + video_id=video_id, + ip=ip, + user_agent=ua[:512], + browser=browser, + device=device, + country_code=country_code, + country=country, + city=city, + downloaded_at=datetime.utcnow(), + )) + await db.commit() + except Exception as e: + logger.warning(f"Failed to log download: {e}") + + async def _do_download(task_id: str, url: str, format_id: str): """Background download task.""" from app.database import async_session @@ -50,6 +140,19 @@ async def _do_download(task_id: str, url: str, format_id: str): @router.post("/download", response_model=DownloadResponse) async def start_download(req: DownloadRequest, background_tasks: BackgroundTasks, db: AsyncSession = Depends(get_db)): + # Dedup: reuse existing completed download if file still on disk + existing = (await db.execute( + select(Video).where( + Video.url == req.url, + Video.format_id == req.format_id, + Video.status == "done", + ).order_by(Video.created_at.desc()).limit(1) + )).scalar_one_or_none() + + if existing and os.path.exists(existing.file_path): + logger.info(f"Reusing existing download task_id={existing.task_id} for url={req.url} format={req.format_id}") + return DownloadResponse(task_id=existing.task_id, status="done") + task_id = str(uuid.uuid4())[:8] video = Video(task_id=task_id, url=req.url, quality=req.quality, format_id=req.format_id, status="pending") db.add(video) @@ -74,17 +177,18 @@ async def get_download_status(task_id: str, db: AsyncSession = Depends(get_db)): @router.get("/file/{video_id}") -async def download_file(video_id: int, user: dict = Depends(get_current_user), db: AsyncSession = Depends(get_db)): +async def download_file(video_id: int, request: Request, background_tasks: BackgroundTasks, user: dict = Depends(get_current_user), db: AsyncSession = Depends(get_db)): video = (await db.execute(select(Video).where(Video.id == video_id))).scalar_one_or_none() if not video or video.status != "done": raise HTTPException(status_code=404, detail="Video not found") if not os.path.exists(video.file_path): raise HTTPException(status_code=404, detail="File not found on disk") + background_tasks.add_task(_log_download, video.id, request) return FileResponse(video.file_path, filename=video.filename, media_type="video/mp4") @router.get("/stream/{video_id}") -async def stream_video(video_id: int, token: str = None, user: dict = Depends(optional_auth), db: AsyncSession = Depends(get_db)): +async def stream_video(video_id: int, request: Request, background_tasks: BackgroundTasks, token: str = None, user: dict = Depends(optional_auth), db: AsyncSession = Depends(get_db)): # Allow token via query param for video player if not user and token: from app.auth import verify_token @@ -97,6 +201,7 @@ async def stream_video(video_id: int, token: str = None, user: dict = Depends(op raise HTTPException(status_code=404, detail="Video not found") if not os.path.exists(video.file_path): raise HTTPException(status_code=404, detail="File not found on disk") + background_tasks.add_task(_log_download, video.id, request) def iter_file(): with open(video.file_path, "rb") as f: @@ -110,11 +215,12 @@ async def stream_video(video_id: int, token: str = None, user: dict = Depends(op @router.get("/file/task/{task_id}") -async def download_file_by_task(task_id: str, db: AsyncSession = Depends(get_db)): +async def download_file_by_task(task_id: str, request: Request, background_tasks: BackgroundTasks, db: AsyncSession = Depends(get_db)): """Download file by task_id - no auth required (public download).""" video = (await db.execute(select(Video).where(Video.task_id == task_id))).scalar_one_or_none() if not video or video.status != "done": raise HTTPException(status_code=404, detail="Video not found") if not os.path.exists(video.file_path): raise HTTPException(status_code=404, detail="File not found on disk") + background_tasks.add_task(_log_download, video.id, request) return FileResponse(video.file_path, filename=video.filename, media_type="video/mp4") diff --git a/backend/app/routes/parse.py b/backend/app/routes/parse.py index aeabd60..730e53e 100644 --- a/backend/app/routes/parse.py +++ b/backend/app/routes/parse.py @@ -16,6 +16,7 @@ async def parse_url(req: ParseRequest): duration=info["duration"], formats=[FormatInfo(**f) for f in info["formats"]], url=info["url"], + platform=info.get("platform", "twitter"), ) except Exception as e: raise HTTPException(status_code=400, detail=f"Failed to parse URL: {str(e)}") diff --git a/backend/app/schemas.py b/backend/app/schemas.py index 1b991e0..c06e017 100644 --- a/backend/app/schemas.py +++ b/backend/app/schemas.py @@ -22,6 +22,7 @@ class ParseResponse(BaseModel): duration: int formats: list[FormatInfo] url: str + platform: str = "" class DownloadRequest(BaseModel): @@ -83,3 +84,48 @@ class LoginRequest(BaseModel): class TokenResponse(BaseModel): access_token: str token_type: str = "bearer" + + +class DownloadLogInfo(BaseModel): + id: int + video_id: int + video_title: str = "" + video_platform: str = "" + ip: str + user_agent: str + browser: str + device: str + country_code: str = "" + country: str = "" + city: str = "" + downloaded_at: datetime + + class Config: + from_attributes = True + + +class DownloadLogListResponse(BaseModel): + logs: list[DownloadLogInfo] + total: int + page: int + page_size: int + + +class CleanupConfig(BaseModel): + enabled: bool = True + retention_minutes: int = 10080 # 7 days + storage_limit_pct: int = 80 + + +class DiskStats(BaseModel): + total: int + used: int + free: int + used_pct: float + + +class CleanupStatus(BaseModel): + config: CleanupConfig + disk: DiskStats + last_run: str = "" + last_result: dict = {} diff --git a/backend/app/services/cleanup.py b/backend/app/services/cleanup.py new file mode 100644 index 0000000..afc4f8b --- /dev/null +++ b/backend/app/services/cleanup.py @@ -0,0 +1,146 @@ +"""Scheduled video cleanup service.""" +import asyncio +import json +import logging +import os +import shutil +from datetime import datetime, timedelta + +from sqlalchemy import select + +from app.database import async_session +from app.models import AppSetting, Video + +logger = logging.getLogger(__name__) + +CHECK_INTERVAL_SECONDS = 60 * 10 # Run check every 10 minutes + + +# ── Setting helpers ────────────────────────────────────────────────────────── + +async def get_setting(db, key: str, default: str = "") -> str: + row = (await db.execute(select(AppSetting).where(AppSetting.key == key))).scalar_one_or_none() + return row.value if row else default + + +async def set_setting(db, key: str, value: str): + row = (await db.execute(select(AppSetting).where(AppSetting.key == key))).scalar_one_or_none() + if row: + row.value = value + row.updated_at = datetime.utcnow() + else: + db.add(AppSetting(key=key, value=value)) + await db.commit() + + +# ── Disk helpers ───────────────────────────────────────────────────────────── + +def disk_stats(path: str) -> dict: + """Return disk usage stats for the given path.""" + try: + usage = shutil.disk_usage(path) + used_pct = round(usage.used / usage.total * 100, 1) + return { + "total": usage.total, + "used": usage.used, + "free": usage.free, + "used_pct": used_pct, + } + except Exception: + return {"total": 0, "used": 0, "free": 0, "used_pct": 0} + + +def _delete_video_file(video: Video) -> int: + """Delete file, return bytes freed (0 if file missing).""" + if video.file_path and os.path.exists(video.file_path): + size = video.file_size or 0 + try: + os.remove(video.file_path) + except OSError: + pass + return size + return 0 + + +# ── Main cleanup logic ─────────────────────────────────────────────────────── + +async def run_cleanup() -> dict: + """Execute cleanup. Returns a stats dict.""" + async with async_session() as db: + enabled = await get_setting(db, "cleanup_enabled", "true") + if enabled != "true": + return {"skipped": True, "reason": "disabled", "ran_at": datetime.utcnow().isoformat()} + + retention_min = int(await get_setting(db, "cleanup_retention_minutes", "10080")) + storage_limit_pct = int(await get_setting(db, "cleanup_storage_limit_pct", "80")) + video_base = os.getenv("VIDEO_BASE_PATH", "/home/xdl/xdl_videos") + + cutoff = datetime.utcnow() - timedelta(minutes=retention_min) + time_deleted = 0 + storage_deleted = 0 + freed_bytes = 0 + + # ── Phase 1: time-based cleanup ────────────────────────────────────── + old_videos = (await db.execute( + select(Video) + .where(Video.status == "done", Video.created_at < cutoff) + .order_by(Video.created_at.asc()) + )).scalars().all() + + for v in old_videos: + freed_bytes += _delete_video_file(v) + v.status = "deleted" + v.file_path = "" + time_deleted += 1 + + if time_deleted: + await db.commit() + logger.info(f"Cleanup: deleted {time_deleted} expired videos, freed {freed_bytes // 1024 // 1024} MB") + + # ── Phase 2: storage limit enforcement ─────────────────────────────── + stats = disk_stats(video_base) + if stats["used_pct"] > storage_limit_pct: + remaining = (await db.execute( + select(Video) + .where(Video.status == "done") + .order_by(Video.created_at.asc()) + )).scalars().all() + + for v in remaining: + stats = disk_stats(video_base) + if stats["used_pct"] <= storage_limit_pct: + break + freed_bytes += _delete_video_file(v) + v.status = "deleted" + v.file_path = "" + storage_deleted += 1 + + if storage_deleted: + await db.commit() + logger.info(f"Cleanup: storage limit reached, deleted {storage_deleted} extra videos") + + ran_at = datetime.utcnow().isoformat() + result = { + "time_deleted": time_deleted, + "storage_deleted": storage_deleted, + "freed_mb": round(freed_bytes / 1024 / 1024, 1), + "disk_used_pct": disk_stats(video_base)["used_pct"], + "ran_at": ran_at, + } + await set_setting(db, "cleanup_last_run", ran_at) + await set_setting(db, "cleanup_last_result", json.dumps(result)) + return result + + +# ── Background loop ────────────────────────────────────────────────────────── + +async def cleanup_loop(): + """Long-running background task. Starts after 60s, then every 10 min.""" + await asyncio.sleep(60) + while True: + try: + result = await run_cleanup() + logger.info(f"Cleanup finished: {result}") + except Exception as e: + logger.error(f"Cleanup loop error: {e}", exc_info=True) + await asyncio.sleep(CHECK_INTERVAL_SECONDS) diff --git a/backend/app/services/downloader.py b/backend/app/services/downloader.py index 72a9ea0..a291426 100644 --- a/backend/app/services/downloader.py +++ b/backend/app/services/downloader.py @@ -14,9 +14,16 @@ logger = logging.getLogger(__name__) VIDEO_BASE_PATH = os.getenv("VIDEO_BASE_PATH", "/home/xdl/xdl_videos") X_VIDEOS_PATH = os.path.join(VIDEO_BASE_PATH, "x_videos") +YOUTUBE_VIDEOS_PATH = os.path.join(VIDEO_BASE_PATH, "youtube_videos") # Ensure directories exist os.makedirs(X_VIDEOS_PATH, exist_ok=True) +os.makedirs(YOUTUBE_VIDEOS_PATH, exist_ok=True) + +# Pattern to match YouTube URLs +YOUTUBE_URL_RE = re.compile( + r'https?://(?:(?:www\.|m\.)?youtube\.com/(?:watch\?.*v=|shorts/|embed/|v/)|youtu\.be/)[\w-]+' +) # Pattern to match Twitter/X URLs and extract tweet ID TWITTER_URL_RE = re.compile( @@ -24,10 +31,16 @@ TWITTER_URL_RE = re.compile( ) -def get_video_path(filename: str) -> str: +def get_video_path(filename: str, platform: str = "twitter") -> str: + if platform == "youtube": + return os.path.join(YOUTUBE_VIDEOS_PATH, filename) return os.path.join(X_VIDEOS_PATH, filename) +def _is_youtube_url(url: str) -> bool: + return bool(YOUTUBE_URL_RE.match(url)) + + def _is_twitter_url(url: str) -> bool: return bool(TWITTER_URL_RE.match(url)) @@ -184,19 +197,126 @@ def _download_twitter_video(url: str, format_id: str = "best", progress_callback } -def parse_video_url(url: str) -> dict: - """Extract video info without downloading.""" - # Use syndication API for Twitter/X URLs - if _is_twitter_url(url): - logger.info(f"Using Twitter syndication API for: {url}") - try: - result = _parse_twitter_video(url) - # Remove internal keys before returning - result.pop('_formats_full', None) - return result - except Exception as e: - logger.warning(f"Twitter syndication failed, falling back to yt-dlp: {e}") - +def _parse_youtube_video(url: str) -> dict: + """Parse YouTube video info using yt-dlp.""" + ydl_opts = { + "quiet": True, + "no_warnings": True, + "extract_flat": False, + "skip_download": True, + } + with yt_dlp.YoutubeDL(ydl_opts) as ydl: + info = ydl.extract_info(url, download=False) + + formats = [] + seen = set() + for f in info.get("formats", []): + if f.get("vcodec", "none") == "none": + continue + height = f.get("height", 0) + if not height: + continue + ext = f.get("ext", "mp4") + fmt_id = f.get("format_id", "") + quality = f"{height}p" + key = f"{quality}" + if key in seen: + continue + seen.add(key) + formats.append({ + "format_id": fmt_id, + "quality": quality, + "ext": ext, + "filesize": f.get("filesize") or f.get("filesize_approx") or 0, + "note": f.get("format_note", ""), + }) + + formats.sort(key=lambda x: int(x["quality"].replace("p", "")), reverse=True) + + formats.insert(0, { + "format_id": "best", + "quality": "best", + "ext": "mp4", + "filesize": 0, + "note": "Best available quality", + }) + + return { + "title": info.get("title", "Untitled"), + "thumbnail": info.get("thumbnail", ""), + "duration": info.get("duration", 0) or 0, + "formats": formats, + "url": url, + "platform": "youtube", + } + + +def _download_youtube_video(url: str, format_id: str = "best", progress_callback=None) -> dict: + """Download YouTube video using yt-dlp.""" + task_id = str(uuid.uuid4())[:8] + output_template = os.path.join(YOUTUBE_VIDEOS_PATH, f"%(id)s_{task_id}.%(ext)s") + + if format_id == "best": + format_spec = "bestvideo[ext=mp4]+bestaudio[ext=m4a]/best[ext=mp4]/best" + else: + format_spec = f"{format_id}+bestaudio/best" + + def hook(d): + if d["status"] == "downloading" and progress_callback: + total = d.get("total_bytes") or d.get("total_bytes_estimate") or 0 + downloaded = d.get("downloaded_bytes", 0) + pct = int(downloaded * 100 / total) if total > 0 else 0 + progress_callback(pct) + elif d["status"] == "finished" and progress_callback: + progress_callback(100) + + ydl_opts = { + "format": format_spec, + "outtmpl": output_template, + "merge_output_format": "mp4", + "quiet": True, + "no_warnings": True, + "progress_hooks": [hook], + } + + with yt_dlp.YoutubeDL(ydl_opts) as ydl: + info = ydl.extract_info(url, download=True) + filename = ydl.prepare_filename(info) + if not os.path.exists(filename): + base = os.path.splitext(filename)[0] + filename = base + ".mp4" + + file_size = os.path.getsize(filename) if os.path.exists(filename) else 0 + + return { + "title": info.get("title", "Untitled"), + "thumbnail": info.get("thumbnail", ""), + "duration": info.get("duration", 0) or 0, + "filename": os.path.basename(filename), + "file_path": filename, + "file_size": file_size, + "platform": "youtube", + } + + +def parse_video_url(url: str) -> dict: + """Extract video info without downloading.""" + # Use syndication API for Twitter/X URLs + if _is_twitter_url(url): + logger.info(f"Using Twitter syndication API for: {url}") + try: + result = _parse_twitter_video(url) + result.pop('_formats_full', None) + return result + except Exception as e: + logger.warning(f"Twitter syndication failed, falling back to yt-dlp: {e}") + + # YouTube URLs + if _is_youtube_url(url): + logger.info(f"Parsing YouTube video: {url}") + return _parse_youtube_video(url) + + # Fallback to generic yt-dlp ydl_opts = { "quiet": True, "no_warnings": True, @@ -209,7 +329,6 @@ def parse_video_url(url: str) -> dict: formats = [] seen = set() for f in info.get("formats", []): - # Only video formats with both video and audio, or video-only if f.get("vcodec", "none") == "none": continue height = f.get("height", 0) @@ -228,10 +347,8 @@ def parse_video_url(url: str) -> dict: "note": f.get("format_note", ""), }) - # Sort by resolution descending formats.sort(key=lambda x: int(x["quality"].replace("p", "")) if x["quality"].endswith("p") else 0, reverse=True) - # Add a "best" option formats.insert(0, { "format_id": "best", "quality": "best", @@ -259,6 +376,11 @@ def download_video(url: str, format_id: str = "best", progress_callback=None) -> except Exception as e: logger.warning(f"Twitter syndication download failed, falling back to yt-dlp: {e}") + # YouTube URLs + if _is_youtube_url(url): + logger.info(f"Downloading YouTube video: {url}") + return _download_youtube_video(url, format_id, progress_callback) + task_id = str(uuid.uuid4())[:8] output_template = os.path.join(X_VIDEOS_PATH, f"%(id)s_{task_id}.%(ext)s") diff --git a/backend/requirements.txt b/backend/requirements.txt index 8047b62..82d72d2 100644 --- a/backend/requirements.txt +++ b/backend/requirements.txt @@ -8,3 +8,4 @@ python-dotenv==1.0.1 python-multipart==0.0.12 yt-dlp>=2024.1.0 pydantic>=2.0.0 +httpx>=0.27.0 diff --git a/frontend/src/App.vue b/frontend/src/App.vue index 4053944..de12954 100644 --- a/frontend/src/App.vue +++ b/frontend/src/App.vue @@ -30,5 +30,5 @@ const auth = useAuthStore() .nav-links { display: flex; gap: 1.5rem; } .nav-links a { color: #aaa; text-decoration: none; transition: color 0.2s; } .nav-links a:hover, .nav-links a.router-link-active { color: #1da1f2; } -.container { max-width: 800px; margin: 0 auto; padding: 2rem 1rem; } +.container { max-width: 1200px; margin: 0 auto; padding: 2rem 1.5rem; } diff --git a/frontend/src/views/Admin.vue b/frontend/src/views/Admin.vue index 677cd55..fefa0c9 100644 --- a/frontend/src/views/Admin.vue +++ b/frontend/src/views/Admin.vue @@ -1,52 +1,181 @@