feat: support HLS/m3u8 stream download
All checks were successful
continuous-integration/drone/push Build is passing
All checks were successful
continuous-integration/drone/push Build is passing
This commit is contained in:
@@ -77,11 +77,13 @@ VIDEO_BASE_PATH = os.getenv("VIDEO_BASE_PATH", "/home/xdl/xdl_videos")
|
|||||||
X_VIDEOS_PATH = os.path.join(VIDEO_BASE_PATH, "x_videos")
|
X_VIDEOS_PATH = os.path.join(VIDEO_BASE_PATH, "x_videos")
|
||||||
YOUTUBE_VIDEOS_PATH = os.path.join(VIDEO_BASE_PATH, "youtube_videos")
|
YOUTUBE_VIDEOS_PATH = os.path.join(VIDEO_BASE_PATH, "youtube_videos")
|
||||||
PH_VIDEOS_PATH = os.path.join(VIDEO_BASE_PATH, "ph_videos")
|
PH_VIDEOS_PATH = os.path.join(VIDEO_BASE_PATH, "ph_videos")
|
||||||
|
HLS_VIDEOS_PATH = os.path.join(VIDEO_BASE_PATH, "hls_videos")
|
||||||
|
|
||||||
# Ensure directories exist
|
# Ensure directories exist
|
||||||
os.makedirs(X_VIDEOS_PATH, exist_ok=True)
|
os.makedirs(X_VIDEOS_PATH, exist_ok=True)
|
||||||
os.makedirs(YOUTUBE_VIDEOS_PATH, exist_ok=True)
|
os.makedirs(YOUTUBE_VIDEOS_PATH, exist_ok=True)
|
||||||
os.makedirs(PH_VIDEOS_PATH, exist_ok=True)
|
os.makedirs(PH_VIDEOS_PATH, exist_ok=True)
|
||||||
|
os.makedirs(HLS_VIDEOS_PATH, exist_ok=True)
|
||||||
|
|
||||||
# Pattern to match YouTube URLs
|
# Pattern to match YouTube URLs
|
||||||
YOUTUBE_URL_RE = re.compile(
|
YOUTUBE_URL_RE = re.compile(
|
||||||
@@ -99,12 +101,20 @@ PORNHUB_URL_RE = re.compile(
|
|||||||
r'|https?://phub\.to/[\w-]+'
|
r'|https?://phub\.to/[\w-]+'
|
||||||
)
|
)
|
||||||
|
|
||||||
|
# Pattern to match HLS / m3u8 URLs (direct stream links)
|
||||||
|
HLS_URL_RE = re.compile(
|
||||||
|
r'https?://[^\s]+\.m3u8(?:[?#][^\s]*)?',
|
||||||
|
re.IGNORECASE,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
def get_video_path(filename: str, platform: str = "twitter") -> str:
|
def get_video_path(filename: str, platform: str = "twitter") -> str:
|
||||||
if platform == "youtube":
|
if platform == "youtube":
|
||||||
return os.path.join(YOUTUBE_VIDEOS_PATH, filename)
|
return os.path.join(YOUTUBE_VIDEOS_PATH, filename)
|
||||||
if platform == "pornhub":
|
if platform == "pornhub":
|
||||||
return os.path.join(PH_VIDEOS_PATH, filename)
|
return os.path.join(PH_VIDEOS_PATH, filename)
|
||||||
|
if platform == "hls":
|
||||||
|
return os.path.join(HLS_VIDEOS_PATH, filename)
|
||||||
return os.path.join(X_VIDEOS_PATH, filename)
|
return os.path.join(X_VIDEOS_PATH, filename)
|
||||||
|
|
||||||
|
|
||||||
@@ -124,6 +134,8 @@ def detect_platform(url: str) -> str:
|
|||||||
return "youtube"
|
return "youtube"
|
||||||
if _is_pornhub_url(url):
|
if _is_pornhub_url(url):
|
||||||
return "pornhub"
|
return "pornhub"
|
||||||
|
if _is_hls_url(url):
|
||||||
|
return "hls"
|
||||||
return "unknown"
|
return "unknown"
|
||||||
|
|
||||||
|
|
||||||
@@ -131,6 +143,10 @@ def _is_twitter_url(url: str) -> bool:
|
|||||||
return bool(TWITTER_URL_RE.match(url))
|
return bool(TWITTER_URL_RE.match(url))
|
||||||
|
|
||||||
|
|
||||||
|
def _is_hls_url(url: str) -> bool:
|
||||||
|
return bool(HLS_URL_RE.match(url))
|
||||||
|
|
||||||
|
|
||||||
def _extract_tweet_id(url: str) -> Optional[str]:
|
def _extract_tweet_id(url: str) -> Optional[str]:
|
||||||
m = TWITTER_URL_RE.match(url)
|
m = TWITTER_URL_RE.match(url)
|
||||||
return m.group(1) if m else None
|
return m.group(1) if m else None
|
||||||
@@ -496,6 +512,106 @@ def _download_pornhub_video(url: str, format_id: str = "best", progress_callback
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def _parse_hls_video(url: str) -> dict:
|
||||||
|
"""Parse HLS/m3u8 stream info using yt-dlp."""
|
||||||
|
ydl_opts = {
|
||||||
|
"quiet": True,
|
||||||
|
"no_warnings": True,
|
||||||
|
"skip_download": True,
|
||||||
|
"allowed_extractors": ["generic"],
|
||||||
|
}
|
||||||
|
try:
|
||||||
|
with yt_dlp.YoutubeDL(ydl_opts) as ydl:
|
||||||
|
info = ydl.extract_info(url, download=False)
|
||||||
|
except Exception:
|
||||||
|
# If yt-dlp can't parse, return minimal info to allow direct download
|
||||||
|
return {
|
||||||
|
"title": "HLS Stream",
|
||||||
|
"thumbnail": "",
|
||||||
|
"duration": 0,
|
||||||
|
"formats": [{"format_id": "best", "quality": "best", "ext": "mp4", "filesize": 0, "note": "HLS stream (auto-merge)"}],
|
||||||
|
"url": url,
|
||||||
|
"platform": "hls",
|
||||||
|
}
|
||||||
|
|
||||||
|
formats = []
|
||||||
|
seen = set()
|
||||||
|
for f in (info.get("formats") or []):
|
||||||
|
if f.get("vcodec", "none") == "none":
|
||||||
|
continue
|
||||||
|
height = f.get("height", 0)
|
||||||
|
fmt_id = f.get("format_id", "")
|
||||||
|
quality = f"{height}p" if height else f.get("format_note", "HLS")
|
||||||
|
key = quality
|
||||||
|
if key in seen:
|
||||||
|
continue
|
||||||
|
seen.add(key)
|
||||||
|
formats.append({
|
||||||
|
"format_id": fmt_id,
|
||||||
|
"quality": quality,
|
||||||
|
"ext": "mp4",
|
||||||
|
"filesize": f.get("filesize") or f.get("filesize_approx") or 0,
|
||||||
|
"note": f.get("format_note", "HLS"),
|
||||||
|
})
|
||||||
|
|
||||||
|
formats.sort(key=lambda x: int(x["quality"].replace("p", "")) if x["quality"].endswith("p") else 0, reverse=True)
|
||||||
|
formats.insert(0, {"format_id": "best", "quality": "best", "ext": "mp4", "filesize": 0, "note": "Best available quality"})
|
||||||
|
|
||||||
|
return {
|
||||||
|
"title": info.get("title") or "HLS Stream",
|
||||||
|
"thumbnail": info.get("thumbnail", ""),
|
||||||
|
"duration": info.get("duration", 0) or 0,
|
||||||
|
"formats": formats,
|
||||||
|
"url": url,
|
||||||
|
"platform": "hls",
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def _download_hls_video(url: str, format_id: str = "best", progress_callback=None, task_id: str = None) -> dict:
|
||||||
|
"""Download HLS/m3u8 stream using yt-dlp (handles segment merge automatically)."""
|
||||||
|
uid = str(uuid.uuid4())[:8]
|
||||||
|
output_template = os.path.join(HLS_VIDEOS_PATH, f"hls_{uid}.%(ext)s")
|
||||||
|
|
||||||
|
if format_id == "best":
|
||||||
|
format_spec = "bestvideo[ext=mp4]+bestaudio[ext=m4a]/bestvideo+bestaudio/best[ext=mp4]/best"
|
||||||
|
else:
|
||||||
|
format_spec = f"{format_id}+bestaudio/{format_id}/best"
|
||||||
|
|
||||||
|
hooks = [_make_hook(task_id)] if task_id else []
|
||||||
|
|
||||||
|
ydl_opts = {
|
||||||
|
"format": format_spec,
|
||||||
|
"outtmpl": output_template,
|
||||||
|
"merge_output_format": "mp4",
|
||||||
|
"quiet": True,
|
||||||
|
"no_warnings": True,
|
||||||
|
"progress_hooks": hooks,
|
||||||
|
"allowed_extractors": ["generic", "m3u8"],
|
||||||
|
# HLS-specific: concurrent fragment download for speed
|
||||||
|
"concurrent_fragment_downloads": 5,
|
||||||
|
}
|
||||||
|
|
||||||
|
with yt_dlp.YoutubeDL(ydl_opts) as ydl:
|
||||||
|
info = ydl.extract_info(url, download=True)
|
||||||
|
filename = ydl.prepare_filename(info)
|
||||||
|
if not os.path.exists(filename):
|
||||||
|
base = os.path.splitext(filename)[0]
|
||||||
|
filename = base + ".mp4"
|
||||||
|
|
||||||
|
file_size = os.path.getsize(filename) if os.path.exists(filename) else 0
|
||||||
|
title = info.get("title") or "HLS Stream"
|
||||||
|
|
||||||
|
return {
|
||||||
|
"title": title,
|
||||||
|
"thumbnail": info.get("thumbnail", ""),
|
||||||
|
"duration": info.get("duration", 0) or 0,
|
||||||
|
"filename": os.path.basename(filename),
|
||||||
|
"file_path": filename,
|
||||||
|
"file_size": file_size,
|
||||||
|
"platform": "hls",
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
def parse_video_url(url: str) -> dict:
|
def parse_video_url(url: str) -> dict:
|
||||||
"""Extract video info without downloading."""
|
"""Extract video info without downloading."""
|
||||||
# Use syndication API for Twitter/X URLs
|
# Use syndication API for Twitter/X URLs
|
||||||
@@ -526,6 +642,11 @@ def parse_video_url(url: str) -> dict:
|
|||||||
logger.info(f"Parsing Pornhub video: {url}")
|
logger.info(f"Parsing Pornhub video: {url}")
|
||||||
return _parse_pornhub_video(url)
|
return _parse_pornhub_video(url)
|
||||||
|
|
||||||
|
# HLS / m3u8 direct stream URLs
|
||||||
|
if _is_hls_url(url):
|
||||||
|
logger.info(f"Parsing HLS stream: {url}")
|
||||||
|
return _parse_hls_video(url)
|
||||||
|
|
||||||
# Fallback to generic yt-dlp
|
# Fallback to generic yt-dlp
|
||||||
ydl_opts = {
|
ydl_opts = {
|
||||||
"quiet": True,
|
"quiet": True,
|
||||||
@@ -604,6 +725,11 @@ def download_video(url: str, format_id: str = "best", progress_callback=None, ta
|
|||||||
logger.info(f"Downloading Pornhub video: {url}")
|
logger.info(f"Downloading Pornhub video: {url}")
|
||||||
return _download_pornhub_video(url, format_id, progress_callback, task_id=task_id)
|
return _download_pornhub_video(url, format_id, progress_callback, task_id=task_id)
|
||||||
|
|
||||||
|
# HLS / m3u8 direct stream URLs
|
||||||
|
if _is_hls_url(url):
|
||||||
|
logger.info(f"Downloading HLS stream: {url}")
|
||||||
|
return _download_hls_video(url, format_id, progress_callback, task_id=task_id)
|
||||||
|
|
||||||
task_id = str(uuid.uuid4())[:8]
|
task_id = str(uuid.uuid4())[:8]
|
||||||
output_template = os.path.join(X_VIDEOS_PATH, f"%(id)s_{task_id}.%(ext)s")
|
output_template = os.path.join(X_VIDEOS_PATH, f"%(id)s_{task_id}.%(ext)s")
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user