feat: dedup downloads by (url, format_id) index, reuse existing files

This commit is contained in:
mini
2026-02-18 23:00:49 +08:00
parent 25c4973f57
commit 0bab021e21
4 changed files with 36 additions and 4 deletions

View File

@@ -21,3 +21,9 @@ async def get_db():
async def init_db(): async def init_db():
async with engine.begin() as conn: async with engine.begin() as conn:
await conn.run_sync(Base.metadata.create_all) await conn.run_sync(Base.metadata.create_all)
# Ensure composite index exists on already-created tables (idempotent)
await conn.execute(
__import__("sqlalchemy").text(
"CREATE INDEX IF NOT EXISTS ix_video_url_format_id ON videos (url, format_id)"
)
)

View File

@@ -1,6 +1,6 @@
"""SQLAlchemy models.""" """SQLAlchemy models."""
from datetime import datetime from datetime import datetime
from sqlalchemy import Column, Integer, String, DateTime, BigInteger, Text from sqlalchemy import Column, Integer, String, DateTime, BigInteger, Text, Index
from app.database import Base from app.database import Base
@@ -9,7 +9,7 @@ class Video(Base):
id = Column(Integer, primary_key=True, autoincrement=True) id = Column(Integer, primary_key=True, autoincrement=True)
task_id = Column(String(64), unique=True, index=True, nullable=False) task_id = Column(String(64), unique=True, index=True, nullable=False)
url = Column(String(512), nullable=False) url = Column(String(512), nullable=False, index=True)
title = Column(String(512), default="") title = Column(String(512), default="")
platform = Column(String(32), default="twitter") platform = Column(String(32), default="twitter")
thumbnail = Column(String(1024), default="") thumbnail = Column(String(1024), default="")
@@ -24,3 +24,7 @@ class Video(Base):
progress = Column(Integer, default=0) # 0-100 progress = Column(Integer, default=0) # 0-100
created_at = Column(DateTime, default=datetime.utcnow) created_at = Column(DateTime, default=datetime.utcnow)
updated_at = Column(DateTime, default=datetime.utcnow, onupdate=datetime.utcnow) updated_at = Column(DateTime, default=datetime.utcnow, onupdate=datetime.utcnow)
__table_args__ = (
Index("ix_video_url_format_id", "url", "format_id"),
)

View File

@@ -50,6 +50,19 @@ async def _do_download(task_id: str, url: str, format_id: str):
@router.post("/download", response_model=DownloadResponse) @router.post("/download", response_model=DownloadResponse)
async def start_download(req: DownloadRequest, background_tasks: BackgroundTasks, db: AsyncSession = Depends(get_db)): async def start_download(req: DownloadRequest, background_tasks: BackgroundTasks, db: AsyncSession = Depends(get_db)):
# Dedup: reuse existing completed download if file still on disk
existing = (await db.execute(
select(Video).where(
Video.url == req.url,
Video.format_id == req.format_id,
Video.status == "done",
).order_by(Video.created_at.desc()).limit(1)
)).scalar_one_or_none()
if existing and os.path.exists(existing.file_path):
logger.info(f"Reusing existing download task_id={existing.task_id} for url={req.url} format={req.format_id}")
return DownloadResponse(task_id=existing.task_id, status="done")
task_id = str(uuid.uuid4())[:8] task_id = str(uuid.uuid4())[:8]
video = Video(task_id=task_id, url=req.url, quality=req.quality, format_id=req.format_id, status="pending") video = Video(task_id=task_id, url=req.url, quality=req.quality, format_id=req.format_id, status="pending")
db.add(video) db.add(video)

View File

@@ -94,8 +94,17 @@ async function startDownload() {
url: url.value, format_id: selectedFormat.value, quality: selectedFormat.value url: url.value, format_id: selectedFormat.value, quality: selectedFormat.value
}) })
taskId.value = res.data.task_id taskId.value = res.data.task_id
statusText.value = 'Starting download...' if (res.data.status === 'done') {
pollStatus() // Already downloaded — skip polling, show save button immediately
progress.value = 100
statusText.value = '✅ Already downloaded'
downloadReady.value = true
downloadUrl.value = `/api/file/task/${res.data.task_id}`
downloading.value = false
} else {
statusText.value = 'Starting download...'
pollStatus()
}
} catch (e) { } catch (e) {
error.value = e.response?.data?.detail || 'Failed to start download' error.value = e.response?.data?.detail || 'Failed to start download'
downloading.value = false downloading.value = false