refactor: use yt-dlp instead of wrapper

This commit is contained in:
Peifan Li
2025-12-09 22:49:35 -05:00
parent 6343978c5f
commit 9dffd2b72b
4 changed files with 1196 additions and 1077 deletions

View File

@@ -19,7 +19,6 @@
"axios": "^1.8.1",
"bcryptjs": "^3.0.3",
"better-sqlite3": "^12.4.6",
"bilibili-save-nodejs": "^1.0.0",
"cheerio": "^1.1.2",
"cors": "^2.8.5",
"dotenv": "^16.4.7",

View File

@@ -1,14 +1,13 @@
import axios from "axios";
import fs from "fs-extra";
import path from "path";
// @ts-ignore
import { downloadByVedioPath } from "bilibili-save-nodejs";
import { IMAGES_DIR, SUBTITLES_DIR, VIDEOS_DIR } from "../../config/paths";
import { bccToVtt } from "../../utils/bccToVtt";
import {
extractBilibiliVideoId,
formatVideoFilename
formatVideoFilename,
} from "../../utils/helpers";
import { executeYtDlpJson, executeYtDlpSpawn } from "../../utils/ytDlpUtils";
import * as storageService from "../storageService";
import { Collection, Video } from "../storageService";
@@ -30,7 +29,7 @@ export interface BilibiliPartsCheckResult {
export interface BilibiliCollectionCheckResult {
success: boolean;
type: 'collection' | 'series' | 'none';
type: "collection" | "series" | "none";
id?: number;
title?: string;
count?: number;
@@ -63,7 +62,30 @@ export interface CollectionDownloadResult {
export class BilibiliDownloader {
// Get video info without downloading
static async getVideoInfo(videoId: string): Promise<{ title: string; author: string; date: string; thumbnailUrl: string }> {
static async getVideoInfo(videoId: string): Promise<{
title: string;
author: string;
date: string;
thumbnailUrl: string;
}> {
try {
const videoUrl = `https://www.bilibili.com/video/${videoId}`;
const info = await executeYtDlpJson(videoUrl, {
noWarnings: true,
});
return {
title: info.title || "Bilibili Video",
author: info.uploader || info.channel || "Bilibili User",
date:
info.upload_date ||
info.release_date ||
new Date().toISOString().slice(0, 10).replace(/-/g, ""),
thumbnailUrl: info.thumbnail || "",
};
} catch (error) {
console.error("Error fetching Bilibili video info with yt-dlp:", error);
// Fallback to API
try {
const apiUrl = `https://api.bilibili.com/x/web-interface/view?bvid=${videoId}`;
const response = await axios.get(apiUrl);
@@ -73,13 +95,16 @@ export class BilibiliDownloader {
return {
title: videoInfo.title || "Bilibili Video",
author: videoInfo.owner?.name || "Bilibili User",
date: new Date(videoInfo.pubdate * 1000).toISOString().slice(0, 10).replace(/-/g, ""),
date: new Date(videoInfo.pubdate * 1000)
.toISOString()
.slice(0, 10)
.replace(/-/g, ""),
thumbnailUrl: videoInfo.pic,
};
}
throw new Error("No data found");
} catch (error) {
console.error("Error fetching Bilibili video info:", error);
} catch (apiError) {
console.error("Error fetching Bilibili video info from API:", apiError);
}
return {
title: "Bilibili Video",
author: "Bilibili User",
@@ -105,82 +130,141 @@ export class BilibiliDownloader {
thumbnailPath: string,
downloadId?: string
): Promise<BilibiliVideoInfo> {
const tempDir = path.join(VIDEOS_DIR, `temp_${Date.now()}_${Math.floor(Math.random() * 10000)}`);
const tempDir = path.join(
VIDEOS_DIR,
`temp_${Date.now()}_${Math.floor(Math.random() * 10000)}`
);
try {
// Create a unique temporary directory for the download
fs.ensureDirSync(tempDir);
console.log("Downloading Bilibili video to temp directory:", tempDir);
console.log("Downloading Bilibili video using yt-dlp to:", tempDir);
// Start monitoring progress
let progressInterval: NodeJS.Timeout | undefined;
// Get video info first
const info = await executeYtDlpJson(url, {
noWarnings: true,
});
const videoTitle = info.title || "Bilibili Video";
const videoAuthor = info.uploader || info.channel || "Bilibili User";
const videoDate =
info.upload_date ||
info.release_date ||
new Date().toISOString().slice(0, 10).replace(/-/g, "");
const thumbnailUrl = info.thumbnail || null;
const description = info.description || "";
// Prepare output path with a safe filename to avoid issues with special characters
// Use a simple template that yt-dlp will fill in
const outputTemplate = path.join(tempDir, "video.%(ext)s");
// Prepare flags for yt-dlp
const flags: Record<string, any> = {
output: outputTemplate,
format: "bestvideo[ext=mp4]+bestaudio[ext=m4a]/best[ext=mp4]/best",
mergeOutputFormat: "mp4",
writeSubs: true,
writeAutoSubs: true,
convertSubs: "vtt",
ignoreErrors: true, // Continue even if subtitle download fails
noWarnings: false, // Show warnings for debugging
};
// Use spawn to capture stdout for progress
const subprocess = executeYtDlpSpawn(url, flags);
// Track progress from stdout
if (downloadId) {
let lastSize = 0;
let lastUpdateTime = Date.now();
subprocess.stdout?.on("data", (data: Buffer) => {
const output = data.toString();
// Parse progress: [download] 23.5% of 10.00MiB at 2.00MiB/s ETA 00:05
const progressMatch = output.match(
/(\d+\.?\d*)%\s+of\s+([~\d\w.]+)\s+at\s+([~\d\w.\/]+)/
);
progressInterval = setInterval(() => {
try {
const files = fs.readdirSync(tempDir);
const videoFile = files.find((file: string) => file.endsWith(".mp4"));
if (progressMatch) {
const percentage = parseFloat(progressMatch[1]);
const totalSize = progressMatch[2];
const speed = progressMatch[3];
if (videoFile) {
const filePath = path.join(tempDir, videoFile);
if (fs.existsSync(filePath)) {
const stats = fs.statSync(filePath);
const currentSize = stats.size;
const currentTime = Date.now();
const timeDiff = (currentTime - lastUpdateTime) / 1000; // seconds
if (timeDiff > 0 && currentSize > lastSize) {
// Calculate speed (bytes per second)
const bytesPerSecond = (currentSize - lastSize) / timeDiff;
// Format speed
const speedStr = this.formatBytes(bytesPerSecond) + "/s";
// Format downloaded size
const downloadedSizeStr = this.formatBytes(currentSize);
// Update progress
storageService.updateActiveDownload(downloadId, {
downloadedSize: downloadedSizeStr,
speed: speedStr,
progress: percentage,
totalSize: totalSize,
speed: speed,
});
}
});
}
lastSize = currentSize;
lastUpdateTime = currentTime;
// Capture stderr for better error reporting
let stderrOutput = "";
subprocess.stderr?.on("data", (data: Buffer) => {
stderrOutput += data.toString();
// Log stderr in real-time for debugging
const lines = data
.toString()
.split("\n")
.filter((line) => line.trim());
for (const line of lines) {
if (
!line.includes("[download]") &&
!line.includes("[info]") &&
!line.includes("[ExtractAudio]") &&
!line.includes("[Merger]")
) {
console.warn("yt-dlp stderr:", line);
}
}
} catch (error) {
// Ignore errors during monitoring
}
}, 500);
}
// Download the video using the package
await downloadByVedioPath({
url: url,
type: "mp4",
folder: tempDir,
});
// Stop progress monitoring
if (progressInterval) {
clearInterval(progressInterval);
// Wait for download to complete
let downloadError: any = null;
try {
await subprocess;
} catch (error: any) {
downloadError = error;
console.error("yt-dlp download failed:", error.message);
if (error.stderr) {
console.error("yt-dlp stderr:", error.stderr);
}
}
console.log("Download completed, checking for video file");
// Find the downloaded file
// Find the downloaded file (try multiple extensions)
const files = fs.readdirSync(tempDir);
console.log("Files in temp directory:", files);
const videoFile = files.find((file: string) => file.endsWith(".mp4"));
const videoFile =
files.find((file: string) => file.endsWith(".mp4")) ||
files.find((file: string) => file.endsWith(".mkv")) ||
files.find((file: string) => file.endsWith(".webm")) ||
files.find((file: string) => file.endsWith(".flv"));
if (!videoFile) {
throw new Error("Downloaded video file not found");
// List all files for debugging
console.error("No video file found. All files:", files);
const errorMsg = downloadError
? `Downloaded video file not found. yt-dlp error: ${
downloadError.message
}. stderr: ${(downloadError.stderr || stderrOutput).substring(
0,
500
)}`
: `Downloaded video file not found. yt-dlp stderr: ${stderrOutput.substring(
0,
500
)}`;
throw new Error(errorMsg);
}
// If there was an error but we found the file, log a warning but continue
if (downloadError) {
console.warn(
"yt-dlp reported an error but file was downloaded successfully:",
videoFile
);
}
console.log("Found video file:", videoFile);
@@ -206,37 +290,10 @@ export class BilibiliDownloader {
// Clean up temp directory
fs.removeSync(tempDir);
// Extract video title from filename (remove extension)
const videoTitle = videoFile.replace(".mp4", "") || "Bilibili Video";
// Try to get thumbnail from Bilibili
// Download thumbnail if available
let thumbnailSaved = false;
let thumbnailUrl: string | null = null;
const videoId = extractBilibiliVideoId(url);
console.log("Extracted video ID:", videoId);
if (videoId) {
try {
// Try to get video info from Bilibili API
const apiUrl = `https://api.bilibili.com/x/web-interface/view?bvid=${videoId}`;
console.log("Fetching video info from API:", apiUrl);
const response = await axios.get(apiUrl);
if (response.data && response.data.data) {
const videoInfo = response.data.data;
thumbnailUrl = videoInfo.pic;
const description = videoInfo.desc || "";
console.log("Got video info from API:", {
title: videoInfo.title,
author: videoInfo.owner?.name,
thumbnailUrl: thumbnailUrl,
});
if (thumbnailUrl) {
// Download thumbnail
try {
console.log("Downloading thumbnail from:", thumbnailUrl);
const thumbnailResponse = await axios({
@@ -257,32 +314,22 @@ export class BilibiliDownloader {
});
console.log("Thumbnail saved to:", thumbnailPath);
} catch (thumbnailError) {
console.error(
"Error downloading Bilibili thumbnail:",
thumbnailError
);
}
}
return {
title: videoInfo.title || videoTitle,
author: videoInfo.owner?.name || "Bilibili User",
date: new Date().toISOString().slice(0, 10).replace(/-/g, ""),
title: videoTitle,
author: videoAuthor,
date: videoDate,
thumbnailUrl: thumbnailUrl,
thumbnailSaved,
description,
};
}
}
} catch (thumbnailError) {
console.error("Error downloading Bilibili thumbnail:", thumbnailError);
}
}
console.log("Using basic video info");
// Return basic info if we couldn't get detailed info
return {
title: videoTitle,
author: "Bilibili User",
date: new Date().toISOString().slice(0, 10).replace(/-/g, ""),
thumbnailUrl: null,
thumbnailSaved: false,
};
} catch (error: any) {
console.error("Error in downloadBilibiliVideo:", error);
@@ -304,7 +351,9 @@ export class BilibiliDownloader {
}
// Helper function to check if a Bilibili video has multiple parts
static async checkVideoParts(videoId: string): Promise<BilibiliPartsCheckResult> {
static async checkVideoParts(
videoId: string
): Promise<BilibiliPartsCheckResult> {
try {
// Try to get video info from Bilibili API
const apiUrl = `https://api.bilibili.com/x/web-interface/view?bvid=${videoId}`;
@@ -333,16 +382,19 @@ export class BilibiliDownloader {
}
// Helper function to check if a Bilibili video belongs to a collection or series
static async checkCollectionOrSeries(videoId: string): Promise<BilibiliCollectionCheckResult> {
static async checkCollectionOrSeries(
videoId: string
): Promise<BilibiliCollectionCheckResult> {
try {
const apiUrl = `https://api.bilibili.com/x/web-interface/view?bvid=${videoId}`;
console.log("Checking if video belongs to collection/series:", apiUrl);
const response = await axios.get(apiUrl, {
headers: {
'Referer': 'https://www.bilibili.com',
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36'
}
Referer: "https://www.bilibili.com",
"User-Agent":
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36",
},
});
if (response.data && response.data.data) {
@@ -355,34 +407,39 @@ export class BilibiliDownloader {
console.log(`Video belongs to collection: ${season.title}`);
return {
success: true,
type: 'collection',
type: "collection",
id: season.id,
title: season.title,
count: season.ep_count || 0,
mid: mid
mid: mid,
};
}
// If no collection found, return none
return { success: true, type: 'none' };
return { success: true, type: "none" };
}
return { success: false, type: 'none' };
return { success: false, type: "none" };
} catch (error) {
console.error("Error checking collection/series:", error);
return { success: false, type: 'none' };
return { success: false, type: "none" };
}
}
// Helper function to get all videos from a Bilibili collection
static async getCollectionVideos(mid: number, seasonId: number): Promise<BilibiliVideosResult> {
static async getCollectionVideos(
mid: number,
seasonId: number
): Promise<BilibiliVideosResult> {
try {
const allVideos: BilibiliVideoItem[] = [];
let pageNum = 1;
const pageSize = 30;
let hasMore = true;
console.log(`Fetching collection videos for mid=${mid}, season_id=${seasonId}`);
console.log(
`Fetching collection videos for mid=${mid}, season_id=${seasonId}`
);
while (hasMore) {
const apiUrl = `https://api.bilibili.com/x/polymer/web-space/seasons_archives_list`;
@@ -391,7 +448,7 @@ export class BilibiliDownloader {
season_id: seasonId,
page_num: pageNum,
page_size: pageSize,
sort_reverse: false
sort_reverse: false,
};
console.log(`Fetching page ${pageNum} of collection...`);
@@ -399,9 +456,10 @@ export class BilibiliDownloader {
const response = await axios.get(apiUrl, {
params,
headers: {
'Referer': 'https://www.bilibili.com',
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36'
}
Referer: "https://www.bilibili.com",
"User-Agent":
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36",
},
});
if (response.data && response.data.data) {
@@ -414,7 +472,7 @@ export class BilibiliDownloader {
allVideos.push({
bvid: video.bvid,
title: video.title,
aid: video.aid
aid: video.aid,
});
});
@@ -436,14 +494,19 @@ export class BilibiliDownloader {
}
// Helper function to get all videos from a Bilibili series
static async getSeriesVideos(mid: number, seriesId: number): Promise<BilibiliVideosResult> {
static async getSeriesVideos(
mid: number,
seriesId: number
): Promise<BilibiliVideosResult> {
try {
const allVideos: BilibiliVideoItem[] = [];
let pageNum = 1;
const pageSize = 30;
let hasMore = true;
console.log(`Fetching series videos for mid=${mid}, series_id=${seriesId}`);
console.log(
`Fetching series videos for mid=${mid}, series_id=${seriesId}`
);
while (hasMore) {
const apiUrl = `https://api.bilibili.com/x/series/archives`;
@@ -451,7 +514,7 @@ export class BilibiliDownloader {
mid: mid,
series_id: seriesId,
pn: pageNum,
ps: pageSize
ps: pageSize,
};
console.log(`Fetching page ${pageNum} of series...`);
@@ -459,9 +522,10 @@ export class BilibiliDownloader {
const response = await axios.get(apiUrl, {
params,
headers: {
'Referer': 'https://www.bilibili.com',
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36'
}
Referer: "https://www.bilibili.com",
"User-Agent":
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36",
},
});
if (response.data && response.data.data) {
@@ -474,13 +538,15 @@ export class BilibiliDownloader {
allVideos.push({
bvid: video.bvid,
title: video.title,
aid: video.aid
aid: video.aid,
});
});
// Check if there are more pages
const page = data.page || {};
hasMore = archives.length === pageSize && allVideos.length < (page.total || 0);
hasMore =
archives.length === pageSize &&
allVideos.length < (page.total || 0);
pageNum++;
} else {
hasMore = false;
@@ -520,7 +586,12 @@ export class BilibiliDownloader {
const videoPath = path.join(VIDEOS_DIR, videoFilename);
const thumbnailPath = path.join(IMAGES_DIR, thumbnailFilename);
let videoTitle, videoAuthor, videoDate, videoDescription, thumbnailUrl, thumbnailSaved;
let videoTitle,
videoAuthor,
videoDate,
videoDescription,
thumbnailUrl,
thumbnailSaved;
let finalVideoFilename = videoFilename;
let finalThumbnailFilename = thumbnailFilename;
@@ -554,7 +625,11 @@ export class BilibiliDownloader {
// Update the safe base filename with the actual title
// Update the safe base filename with the new format
const newSafeBaseFilename = formatVideoFilename(videoTitle, videoAuthor, videoDate);
const newSafeBaseFilename = formatVideoFilename(
videoTitle,
videoAuthor,
videoDate
);
const newVideoFilename = `${newSafeBaseFilename}.mp4`;
const newThumbnailFilename = `${newSafeBaseFilename}.jpg`;
@@ -579,7 +654,9 @@ export class BilibiliDownloader {
// Get video duration
let duration: string | undefined;
try {
const { getVideoDuration } = await import("../../services/metadataService");
const { getVideoDuration } = await import(
"../../services/metadataService"
);
const durationSec = await getVideoDuration(newVideoPath);
if (durationSec) {
duration = durationSec.toString();
@@ -600,10 +677,17 @@ export class BilibiliDownloader {
}
// Download subtitles
let subtitles: Array<{ language: string; filename: string; path: string }> = [];
let subtitles: Array<{
language: string;
filename: string;
path: string;
}> = [];
try {
console.log("Attempting to download subtitles...");
subtitles = await BilibiliDownloader.downloadSubtitles(url, newSafeBaseFilename);
subtitles = await BilibiliDownloader.downloadSubtitles(
url,
newSafeBaseFilename
);
console.log(`Downloaded ${subtitles.length} subtitles`);
} catch (e) {
console.error("Error downloading subtitles:", e);
@@ -671,9 +755,9 @@ export class BilibiliDownloader {
// Fetch all videos from the collection/series
let videosResult: BilibiliVideosResult;
if (type === 'collection' && mid && id) {
if (type === "collection" && mid && id) {
videosResult = await BilibiliDownloader.getCollectionVideos(mid, id);
} else if (type === 'series' && mid && id) {
} else if (type === "series" && mid && id) {
videosResult = await BilibiliDownloader.getSeriesVideos(mid, id);
} else {
throw new Error(`Unknown type: ${type}`);
@@ -712,7 +796,9 @@ export class BilibiliDownloader {
);
}
console.log(`Downloading video ${videoNumber}/${videos.length}: ${video.title}`);
console.log(
`Downloading video ${videoNumber}/${videos.length}: ${video.title}`
);
// Construct video URL
const videoUrl = `https://www.bilibili.com/video/${video.bvid}`;
@@ -729,17 +815,27 @@ export class BilibiliDownloader {
// If download was successful, add to collection
if (result.success && result.videoData) {
storageService.atomicUpdateCollection(mytubeCollectionId, (collection) => {
storageService.atomicUpdateCollection(
mytubeCollectionId,
(collection) => {
collection.videos.push(result.videoData!.id);
return collection;
});
}
);
console.log(`Added video ${videoNumber}/${videos.length} to collection`);
console.log(
`Added video ${videoNumber}/${videos.length} to collection`
);
} else {
console.error(`Failed to download video ${videoNumber}/${videos.length}: ${video.title}`);
console.error(
`Failed to download video ${videoNumber}/${videos.length}: ${video.title}`
);
}
} catch (videoError) {
console.error(`Error downloading video ${videoNumber}/${videos.length}:`, videoError);
console.error(
`Error downloading video ${videoNumber}/${videos.length}:`,
videoError
);
// Continue with next video even if one fails
}
@@ -757,7 +853,7 @@ export class BilibiliDownloader {
return {
success: true,
collectionId: mytubeCollectionId,
videosDownloaded: videos.length
videosDownloaded: videos.length,
};
} catch (error: any) {
console.error(`Error downloading ${collectionInfo.type}:`, error);
@@ -766,7 +862,7 @@ export class BilibiliDownloader {
}
return {
success: false,
error: error.message
error: error.message,
};
}
}
@@ -783,7 +879,10 @@ export class BilibiliDownloader {
try {
// Add to active downloads if ID is provided
if (downloadId) {
storageService.addActiveDownload(downloadId, `Downloading ${seriesTitle}`);
storageService.addActiveDownload(
downloadId,
`Downloading ${seriesTitle}`
);
}
for (let part = startPart; part <= totalParts; part++) {
@@ -810,10 +909,13 @@ export class BilibiliDownloader {
// If download was successful and we have a collection ID, add to collection
if (result.success && collectionId && result.videoData) {
try {
storageService.atomicUpdateCollection(collectionId, (collection) => {
storageService.atomicUpdateCollection(
collectionId,
(collection) => {
collection.videos.push(result.videoData!.id);
return collection;
});
}
);
console.log(
`Added part ${part}/${totalParts} to collection ${collectionId}`
@@ -872,14 +974,19 @@ export class BilibiliDownloader {
}
// Helper function to download subtitles
static async downloadSubtitles(videoUrl: string, baseFilename: string): Promise<Array<{ language: string; filename: string; path: string }>> {
static async downloadSubtitles(
videoUrl: string,
baseFilename: string
): Promise<Array<{ language: string; filename: string; path: string }>> {
try {
const videoId = extractBilibiliVideoId(videoUrl);
if (!videoId) return [];
const cookieHeader = BilibiliDownloader.getCookieHeader();
if (!cookieHeader) {
console.warn("WARNING: No cookies found in cookies.txt. Bilibili subtitles usually require login.");
console.warn(
"WARNING: No cookies found in cookies.txt. Bilibili subtitles usually require login."
);
} else {
console.log(`Cookie header length: ${cookieHeader.length}`);
// Log first few chars to verify it's not empty/malformed
@@ -887,9 +994,10 @@ export class BilibiliDownloader {
}
const headers = {
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
'Referer': 'https://www.bilibili.com',
...(cookieHeader ? { 'Cookie': cookieHeader } : {})
"User-Agent":
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
Referer: "https://www.bilibili.com",
...(cookieHeader ? { Cookie: cookieHeader } : {}),
};
// Get CID first
@@ -908,14 +1016,18 @@ export class BilibiliDownloader {
const playerResponse = await axios.get(playerApiUrl, { headers });
if (cookieHeader && !cookieHeader.includes("SESSDATA")) {
console.warn("WARNING: SESSDATA cookie not found! This is required for Bilibili authentication.");
console.warn(
"WARNING: SESSDATA cookie not found! This is required for Bilibili authentication."
);
}
let subtitlesData = playerResponse.data?.data?.subtitle?.subtitles;
// Fallback: Check if subtitles are in the view response (sometimes they are)
if (!subtitlesData || subtitlesData.length === 0) {
console.log("No subtitles in player API, checking view API response...");
console.log(
"No subtitles in player API, checking view API response..."
);
// We already fetched viewResponse earlier to get CID
const viewSubtitles = viewResponse.data?.data?.subtitle?.list;
if (viewSubtitles && viewSubtitles.length > 0) {
@@ -948,18 +1060,22 @@ export class BilibiliDownloader {
if (!subUrl) continue;
// Ensure URL is absolute (sometimes it starts with //)
const absoluteSubUrl = subUrl.startsWith('//') ? `https:${subUrl}` : subUrl;
const absoluteSubUrl = subUrl.startsWith("//")
? `https:${subUrl}`
: subUrl;
console.log(`Downloading subtitle (${lang}): ${absoluteSubUrl}`);
// Do NOT send cookies to the subtitle CDN (hdslb.com) as it can cause 400 Bad Request (Header too large)
// and they are not needed for the CDN file itself.
const cdnHeaders = {
'User-Agent': headers['User-Agent'],
'Referer': headers['Referer']
"User-Agent": headers["User-Agent"],
Referer: headers["Referer"],
};
const subResponse = await axios.get(absoluteSubUrl, { headers: cdnHeaders });
const subResponse = await axios.get(absoluteSubUrl, {
headers: cdnHeaders,
});
const vttContent = bccToVtt(subResponse.data);
if (vttContent) {
@@ -971,13 +1087,12 @@ export class BilibiliDownloader {
savedSubtitles.push({
language: lang,
filename: subFilename,
path: `/subtitles/${subFilename}`
path: `/subtitles/${subFilename}`,
});
}
}
return savedSubtitles;
} catch (error) {
console.error("Error in downloadSubtitles:", error);
return [];

View File

@@ -1,13 +1,12 @@
import axios from "axios";
import { spawn } from "child_process";
import fs from "fs-extra";
import path from "path";
import { IMAGES_DIR, SUBTITLES_DIR, VIDEOS_DIR } from "../../config/paths";
import { formatVideoFilename } from "../../utils/helpers";
import { executeYtDlpJson, executeYtDlpSpawn } from "../../utils/ytDlpUtils";
import * as storageService from "../storageService";
import { Video } from "../storageService";
const YT_DLP_PATH = process.env.YT_DLP_PATH || "yt-dlp";
const PROVIDER_SCRIPT =
process.env.BGUTIL_SCRIPT_PATH ||
path.join(
@@ -15,201 +14,6 @@ const PROVIDER_SCRIPT =
"bgutil-ytdlp-pot-provider/server/build/generate_once.js"
);
/**
* Convert camelCase flag names to kebab-case CLI arguments
*/
function convertFlagToArg(flag: string): string {
return `--${flag.replace(/([A-Z])/g, "-$1").toLowerCase()}`;
}
/**
* Convert flags object to yt-dlp CLI arguments array
*/
function flagsToArgs(flags: Record<string, any>): string[] {
const args: string[] = [];
for (const [key, value] of Object.entries(flags)) {
if (value === undefined || value === null) {
continue;
}
// Handle special cases
if (key === "extractorArgs") {
// Support semicolon-separated extractor args (e.g., "youtube:key=value;other:key=value")
if (typeof value === "string" && value.includes(";")) {
const parts = value.split(";");
for (const part of parts) {
if (part.trim()) {
args.push("--extractor-args", part.trim());
}
}
} else {
args.push("--extractor-args", value);
}
continue;
}
if (key === "addHeader") {
// addHeader is an array of "key:value" strings
if (Array.isArray(value)) {
for (const header of value) {
args.push("--add-header", header);
}
} else {
args.push("--add-header", value);
}
continue;
}
// Convert camelCase to kebab-case
const argName = convertFlagToArg(key);
if (typeof value === "boolean") {
if (value) {
args.push(argName);
}
} else if (typeof value === "string" || typeof value === "number") {
args.push(argName, String(value));
} else if (Array.isArray(value)) {
// For arrays, join with comma or repeat the flag
args.push(argName, value.join(","));
}
}
return args;
}
/**
* Execute yt-dlp with JSON output and return parsed result
*/
async function executeYtDlpJson(
url: string,
flags: Record<string, any> = {}
): Promise<any> {
const args = [
"--dump-single-json",
"--no-warnings",
...flagsToArgs(flags),
url,
];
console.log(`Executing: ${YT_DLP_PATH} ${args.join(" ")}`);
return new Promise<any>((resolve, reject) => {
const subprocess = spawn(YT_DLP_PATH, args, {
stdio: ["ignore", "pipe", "pipe"],
});
let stdout = "";
let stderr = "";
subprocess.stdout?.on("data", (data: Buffer) => {
stdout += data.toString();
});
subprocess.stderr?.on("data", (data: Buffer) => {
stderr += data.toString();
});
subprocess.on("close", (code) => {
if (code !== 0) {
const error = new Error(`yt-dlp process exited with code ${code}`);
(error as any).stderr = stderr;
reject(error);
return;
}
if (
stderr &&
!stderr.includes("[download]") &&
!stderr.includes("[info]")
) {
console.warn("yt-dlp stderr:", stderr);
}
try {
resolve(JSON.parse(stdout));
} catch (parseError) {
console.error("Failed to parse yt-dlp JSON output:", parseError);
console.error("Output:", stdout);
reject(new Error("Failed to parse yt-dlp output as JSON"));
}
});
subprocess.on("error", (error) => {
reject(error);
});
});
}
/**
* Execute yt-dlp with spawn for progress tracking
* Returns a subprocess-like object with kill() method
*/
function executeYtDlpSpawn(
url: string,
flags: Record<string, any> = {}
): {
stdout: NodeJS.ReadableStream | null;
stderr: NodeJS.ReadableStream | null;
kill: (signal?: NodeJS.Signals) => boolean;
then: (
onFulfilled?: (value: void) => void | Promise<void>,
onRejected?: (reason: any) => void | Promise<void>
) => Promise<void>;
} {
const args = [...flagsToArgs(flags), url];
console.log(`Spawning: ${YT_DLP_PATH} ${args.join(" ")}`);
const subprocess = spawn(YT_DLP_PATH, args, {
stdio: ["ignore", "pipe", "pipe"],
});
let resolved = false;
let rejected = false;
let resolveFn: (() => void) | null = null;
let rejectFn: ((error: Error) => void) | null = null;
const promise = new Promise<void>((resolve, reject) => {
resolveFn = resolve;
rejectFn = reject;
subprocess.on("close", (code) => {
if (code === 0) {
if (!resolved && !rejected) {
resolved = true;
resolve();
}
} else {
if (!resolved && !rejected) {
rejected = true;
reject(new Error(`yt-dlp process exited with code ${code}`));
}
}
});
subprocess.on("error", (error) => {
if (!resolved && !rejected) {
rejected = true;
reject(error);
}
});
});
return {
stdout: subprocess.stdout,
stderr: subprocess.stderr,
kill: (signal?: NodeJS.Signals) => {
if (!subprocess.killed) {
return subprocess.kill(signal);
}
return false;
},
then: promise.then.bind(promise),
};
}
// Helper function to extract author from XiaoHongShu page when yt-dlp doesn't provide it
async function extractXiaoHongShuAuthor(url: string): Promise<string | null> {
try {
@@ -284,9 +88,7 @@ export class YtDlpDownloader {
}
// Get video info without downloading
static async getVideoInfo(
url: string
): Promise<{
static async getVideoInfo(url: string): Promise<{
title: string;
author: string;
date: string;

View File

@@ -0,0 +1,203 @@
import { spawn } from "child_process";
const YT_DLP_PATH = process.env.YT_DLP_PATH || "yt-dlp";
/**
* Convert camelCase flag names to kebab-case CLI arguments
*/
export function convertFlagToArg(flag: string): string {
return `--${flag.replace(/([A-Z])/g, "-$1").toLowerCase()}`;
}
/**
* Convert flags object to yt-dlp CLI arguments array
*/
export function flagsToArgs(flags: Record<string, any>): string[] {
const args: string[] = [];
for (const [key, value] of Object.entries(flags)) {
if (value === undefined || value === null) {
continue;
}
// Handle special cases
if (key === "extractorArgs") {
// Support semicolon-separated extractor args (e.g., "youtube:key=value;other:key=value")
if (typeof value === "string" && value.includes(";")) {
const parts = value.split(";");
for (const part of parts) {
if (part.trim()) {
args.push("--extractor-args", part.trim());
}
}
} else {
args.push("--extractor-args", value);
}
continue;
}
if (key === "addHeader") {
// addHeader is an array of "key:value" strings
if (Array.isArray(value)) {
for (const header of value) {
args.push("--add-header", header);
}
} else {
args.push("--add-header", value);
}
continue;
}
// Convert camelCase to kebab-case
const argName = convertFlagToArg(key);
if (typeof value === "boolean") {
if (value) {
args.push(argName);
}
} else if (typeof value === "string" || typeof value === "number") {
args.push(argName, String(value));
} else if (Array.isArray(value)) {
// For arrays, join with comma or repeat the flag
args.push(argName, value.join(","));
}
}
return args;
}
/**
* Execute yt-dlp with JSON output and return parsed result
*/
export async function executeYtDlpJson(
url: string,
flags: Record<string, any> = {}
): Promise<any> {
const args = [
"--dump-single-json",
"--no-warnings",
...flagsToArgs(flags),
url,
];
console.log(`Executing: ${YT_DLP_PATH} ${args.join(" ")}`);
return new Promise<any>((resolve, reject) => {
const subprocess = spawn(YT_DLP_PATH, args, {
stdio: ["ignore", "pipe", "pipe"],
});
let stdout = "";
let stderr = "";
subprocess.stdout?.on("data", (data: Buffer) => {
stdout += data.toString();
});
subprocess.stderr?.on("data", (data: Buffer) => {
stderr += data.toString();
});
subprocess.on("close", (code) => {
if (code !== 0) {
const error = new Error(`yt-dlp process exited with code ${code}`);
(error as any).stderr = stderr;
reject(error);
return;
}
if (
stderr &&
!stderr.includes("[download]") &&
!stderr.includes("[info]")
) {
console.warn("yt-dlp stderr:", stderr);
}
try {
resolve(JSON.parse(stdout));
} catch (parseError) {
console.error("Failed to parse yt-dlp JSON output:", parseError);
console.error("Output:", stdout);
reject(new Error("Failed to parse yt-dlp output as JSON"));
}
});
subprocess.on("error", (error) => {
reject(error);
});
});
}
/**
* Execute yt-dlp with spawn for progress tracking
* Returns a subprocess-like object with kill() method
*/
export function executeYtDlpSpawn(
url: string,
flags: Record<string, any> = {}
): {
stdout: NodeJS.ReadableStream | null;
stderr: NodeJS.ReadableStream | null;
kill: (signal?: NodeJS.Signals) => boolean;
then: (
onFulfilled?: (value: void) => void | Promise<void>,
onRejected?: (reason: any) => void | Promise<void>
) => Promise<void>;
} {
const args = [...flagsToArgs(flags), url];
console.log(`Spawning: ${YT_DLP_PATH} ${args.join(" ")}`);
const subprocess = spawn(YT_DLP_PATH, args, {
stdio: ["ignore", "pipe", "pipe"],
});
let resolved = false;
let rejected = false;
let stderr = "";
// Capture stderr for error reporting
subprocess.stderr?.on("data", (data: Buffer) => {
stderr += data.toString();
});
const promise = new Promise<void>((resolve, reject) => {
subprocess.on("close", (code) => {
if (code === 0) {
if (!resolved && !rejected) {
resolved = true;
resolve();
}
} else {
if (!resolved && !rejected) {
rejected = true;
const error = new Error(`yt-dlp process exited with code ${code}`);
(error as any).stderr = stderr;
(error as any).code = code;
console.error("yt-dlp error output:", stderr);
reject(error);
}
}
});
subprocess.on("error", (error) => {
if (!resolved && !rejected) {
rejected = true;
reject(error);
}
});
});
return {
stdout: subprocess.stdout,
stderr: subprocess.stderr,
kill: (signal?: NodeJS.Signals) => {
if (!subprocess.killed) {
return subprocess.kill(signal);
}
return false;
},
then: promise.then.bind(promise),
};
}