feat: dedup downloads by (url, format_id) index, reuse existing files

This commit is contained in:
mini
2026-02-18 23:00:49 +08:00
parent 25c4973f57
commit 0bab021e21
4 changed files with 36 additions and 4 deletions

View File

@@ -21,3 +21,9 @@ async def get_db():
async def init_db():
async with engine.begin() as conn:
await conn.run_sync(Base.metadata.create_all)
# Ensure composite index exists on already-created tables (idempotent)
await conn.execute(
__import__("sqlalchemy").text(
"CREATE INDEX IF NOT EXISTS ix_video_url_format_id ON videos (url, format_id)"
)
)

View File

@@ -1,6 +1,6 @@
"""SQLAlchemy models."""
from datetime import datetime
from sqlalchemy import Column, Integer, String, DateTime, BigInteger, Text
from sqlalchemy import Column, Integer, String, DateTime, BigInteger, Text, Index
from app.database import Base
@@ -9,7 +9,7 @@ class Video(Base):
id = Column(Integer, primary_key=True, autoincrement=True)
task_id = Column(String(64), unique=True, index=True, nullable=False)
url = Column(String(512), nullable=False)
url = Column(String(512), nullable=False, index=True)
title = Column(String(512), default="")
platform = Column(String(32), default="twitter")
thumbnail = Column(String(1024), default="")
@@ -24,3 +24,7 @@ class Video(Base):
progress = Column(Integer, default=0) # 0-100
created_at = Column(DateTime, default=datetime.utcnow)
updated_at = Column(DateTime, default=datetime.utcnow, onupdate=datetime.utcnow)
__table_args__ = (
Index("ix_video_url_format_id", "url", "format_id"),
)

View File

@@ -50,6 +50,19 @@ async def _do_download(task_id: str, url: str, format_id: str):
@router.post("/download", response_model=DownloadResponse)
async def start_download(req: DownloadRequest, background_tasks: BackgroundTasks, db: AsyncSession = Depends(get_db)):
# Dedup: reuse existing completed download if file still on disk
existing = (await db.execute(
select(Video).where(
Video.url == req.url,
Video.format_id == req.format_id,
Video.status == "done",
).order_by(Video.created_at.desc()).limit(1)
)).scalar_one_or_none()
if existing and os.path.exists(existing.file_path):
logger.info(f"Reusing existing download task_id={existing.task_id} for url={req.url} format={req.format_id}")
return DownloadResponse(task_id=existing.task_id, status="done")
task_id = str(uuid.uuid4())[:8]
video = Video(task_id=task_id, url=req.url, quality=req.quality, format_id=req.format_id, status="pending")
db.add(video)