refactor: use yt-dlp instead of wrapper

This commit is contained in:
Peifan Li
2025-12-09 22:49:35 -05:00
parent 6343978c5f
commit 9dffd2b72b
4 changed files with 1196 additions and 1077 deletions

View File

@@ -19,7 +19,6 @@
"axios": "^1.8.1",
"bcryptjs": "^3.0.3",
"better-sqlite3": "^12.4.6",
"bilibili-save-nodejs": "^1.0.0",
"cheerio": "^1.1.2",
"cors": "^2.8.5",
"dotenv": "^16.4.7",

File diff suppressed because it is too large Load Diff

View File

@@ -1,13 +1,12 @@
import axios from "axios";
import { spawn } from "child_process";
import fs from "fs-extra";
import path from "path";
import { IMAGES_DIR, SUBTITLES_DIR, VIDEOS_DIR } from "../../config/paths";
import { formatVideoFilename } from "../../utils/helpers";
import { executeYtDlpJson, executeYtDlpSpawn } from "../../utils/ytDlpUtils";
import * as storageService from "../storageService";
import { Video } from "../storageService";
const YT_DLP_PATH = process.env.YT_DLP_PATH || "yt-dlp";
const PROVIDER_SCRIPT =
process.env.BGUTIL_SCRIPT_PATH ||
path.join(
@@ -15,201 +14,6 @@ const PROVIDER_SCRIPT =
"bgutil-ytdlp-pot-provider/server/build/generate_once.js"
);
/**
* Convert camelCase flag names to kebab-case CLI arguments
*/
function convertFlagToArg(flag: string): string {
return `--${flag.replace(/([A-Z])/g, "-$1").toLowerCase()}`;
}
/**
* Convert flags object to yt-dlp CLI arguments array
*/
function flagsToArgs(flags: Record<string, any>): string[] {
const args: string[] = [];
for (const [key, value] of Object.entries(flags)) {
if (value === undefined || value === null) {
continue;
}
// Handle special cases
if (key === "extractorArgs") {
// Support semicolon-separated extractor args (e.g., "youtube:key=value;other:key=value")
if (typeof value === "string" && value.includes(";")) {
const parts = value.split(";");
for (const part of parts) {
if (part.trim()) {
args.push("--extractor-args", part.trim());
}
}
} else {
args.push("--extractor-args", value);
}
continue;
}
if (key === "addHeader") {
// addHeader is an array of "key:value" strings
if (Array.isArray(value)) {
for (const header of value) {
args.push("--add-header", header);
}
} else {
args.push("--add-header", value);
}
continue;
}
// Convert camelCase to kebab-case
const argName = convertFlagToArg(key);
if (typeof value === "boolean") {
if (value) {
args.push(argName);
}
} else if (typeof value === "string" || typeof value === "number") {
args.push(argName, String(value));
} else if (Array.isArray(value)) {
// For arrays, join with comma or repeat the flag
args.push(argName, value.join(","));
}
}
return args;
}
/**
* Execute yt-dlp with JSON output and return parsed result
*/
async function executeYtDlpJson(
url: string,
flags: Record<string, any> = {}
): Promise<any> {
const args = [
"--dump-single-json",
"--no-warnings",
...flagsToArgs(flags),
url,
];
console.log(`Executing: ${YT_DLP_PATH} ${args.join(" ")}`);
return new Promise<any>((resolve, reject) => {
const subprocess = spawn(YT_DLP_PATH, args, {
stdio: ["ignore", "pipe", "pipe"],
});
let stdout = "";
let stderr = "";
subprocess.stdout?.on("data", (data: Buffer) => {
stdout += data.toString();
});
subprocess.stderr?.on("data", (data: Buffer) => {
stderr += data.toString();
});
subprocess.on("close", (code) => {
if (code !== 0) {
const error = new Error(`yt-dlp process exited with code ${code}`);
(error as any).stderr = stderr;
reject(error);
return;
}
if (
stderr &&
!stderr.includes("[download]") &&
!stderr.includes("[info]")
) {
console.warn("yt-dlp stderr:", stderr);
}
try {
resolve(JSON.parse(stdout));
} catch (parseError) {
console.error("Failed to parse yt-dlp JSON output:", parseError);
console.error("Output:", stdout);
reject(new Error("Failed to parse yt-dlp output as JSON"));
}
});
subprocess.on("error", (error) => {
reject(error);
});
});
}
/**
* Execute yt-dlp with spawn for progress tracking
* Returns a subprocess-like object with kill() method
*/
function executeYtDlpSpawn(
url: string,
flags: Record<string, any> = {}
): {
stdout: NodeJS.ReadableStream | null;
stderr: NodeJS.ReadableStream | null;
kill: (signal?: NodeJS.Signals) => boolean;
then: (
onFulfilled?: (value: void) => void | Promise<void>,
onRejected?: (reason: any) => void | Promise<void>
) => Promise<void>;
} {
const args = [...flagsToArgs(flags), url];
console.log(`Spawning: ${YT_DLP_PATH} ${args.join(" ")}`);
const subprocess = spawn(YT_DLP_PATH, args, {
stdio: ["ignore", "pipe", "pipe"],
});
let resolved = false;
let rejected = false;
let resolveFn: (() => void) | null = null;
let rejectFn: ((error: Error) => void) | null = null;
const promise = new Promise<void>((resolve, reject) => {
resolveFn = resolve;
rejectFn = reject;
subprocess.on("close", (code) => {
if (code === 0) {
if (!resolved && !rejected) {
resolved = true;
resolve();
}
} else {
if (!resolved && !rejected) {
rejected = true;
reject(new Error(`yt-dlp process exited with code ${code}`));
}
}
});
subprocess.on("error", (error) => {
if (!resolved && !rejected) {
rejected = true;
reject(error);
}
});
});
return {
stdout: subprocess.stdout,
stderr: subprocess.stderr,
kill: (signal?: NodeJS.Signals) => {
if (!subprocess.killed) {
return subprocess.kill(signal);
}
return false;
},
then: promise.then.bind(promise),
};
}
// Helper function to extract author from XiaoHongShu page when yt-dlp doesn't provide it
async function extractXiaoHongShuAuthor(url: string): Promise<string | null> {
try {
@@ -284,9 +88,7 @@ export class YtDlpDownloader {
}
// Get video info without downloading
static async getVideoInfo(
url: string
): Promise<{
static async getVideoInfo(url: string): Promise<{
title: string;
author: string;
date: string;

View File

@@ -0,0 +1,203 @@
import { spawn } from "child_process";
const YT_DLP_PATH = process.env.YT_DLP_PATH || "yt-dlp";
/**
* Convert camelCase flag names to kebab-case CLI arguments
*/
export function convertFlagToArg(flag: string): string {
return `--${flag.replace(/([A-Z])/g, "-$1").toLowerCase()}`;
}
/**
* Convert flags object to yt-dlp CLI arguments array
*/
export function flagsToArgs(flags: Record<string, any>): string[] {
const args: string[] = [];
for (const [key, value] of Object.entries(flags)) {
if (value === undefined || value === null) {
continue;
}
// Handle special cases
if (key === "extractorArgs") {
// Support semicolon-separated extractor args (e.g., "youtube:key=value;other:key=value")
if (typeof value === "string" && value.includes(";")) {
const parts = value.split(";");
for (const part of parts) {
if (part.trim()) {
args.push("--extractor-args", part.trim());
}
}
} else {
args.push("--extractor-args", value);
}
continue;
}
if (key === "addHeader") {
// addHeader is an array of "key:value" strings
if (Array.isArray(value)) {
for (const header of value) {
args.push("--add-header", header);
}
} else {
args.push("--add-header", value);
}
continue;
}
// Convert camelCase to kebab-case
const argName = convertFlagToArg(key);
if (typeof value === "boolean") {
if (value) {
args.push(argName);
}
} else if (typeof value === "string" || typeof value === "number") {
args.push(argName, String(value));
} else if (Array.isArray(value)) {
// For arrays, join with comma or repeat the flag
args.push(argName, value.join(","));
}
}
return args;
}
/**
* Execute yt-dlp with JSON output and return parsed result
*/
export async function executeYtDlpJson(
url: string,
flags: Record<string, any> = {}
): Promise<any> {
const args = [
"--dump-single-json",
"--no-warnings",
...flagsToArgs(flags),
url,
];
console.log(`Executing: ${YT_DLP_PATH} ${args.join(" ")}`);
return new Promise<any>((resolve, reject) => {
const subprocess = spawn(YT_DLP_PATH, args, {
stdio: ["ignore", "pipe", "pipe"],
});
let stdout = "";
let stderr = "";
subprocess.stdout?.on("data", (data: Buffer) => {
stdout += data.toString();
});
subprocess.stderr?.on("data", (data: Buffer) => {
stderr += data.toString();
});
subprocess.on("close", (code) => {
if (code !== 0) {
const error = new Error(`yt-dlp process exited with code ${code}`);
(error as any).stderr = stderr;
reject(error);
return;
}
if (
stderr &&
!stderr.includes("[download]") &&
!stderr.includes("[info]")
) {
console.warn("yt-dlp stderr:", stderr);
}
try {
resolve(JSON.parse(stdout));
} catch (parseError) {
console.error("Failed to parse yt-dlp JSON output:", parseError);
console.error("Output:", stdout);
reject(new Error("Failed to parse yt-dlp output as JSON"));
}
});
subprocess.on("error", (error) => {
reject(error);
});
});
}
/**
* Execute yt-dlp with spawn for progress tracking
* Returns a subprocess-like object with kill() method
*/
export function executeYtDlpSpawn(
url: string,
flags: Record<string, any> = {}
): {
stdout: NodeJS.ReadableStream | null;
stderr: NodeJS.ReadableStream | null;
kill: (signal?: NodeJS.Signals) => boolean;
then: (
onFulfilled?: (value: void) => void | Promise<void>,
onRejected?: (reason: any) => void | Promise<void>
) => Promise<void>;
} {
const args = [...flagsToArgs(flags), url];
console.log(`Spawning: ${YT_DLP_PATH} ${args.join(" ")}`);
const subprocess = spawn(YT_DLP_PATH, args, {
stdio: ["ignore", "pipe", "pipe"],
});
let resolved = false;
let rejected = false;
let stderr = "";
// Capture stderr for error reporting
subprocess.stderr?.on("data", (data: Buffer) => {
stderr += data.toString();
});
const promise = new Promise<void>((resolve, reject) => {
subprocess.on("close", (code) => {
if (code === 0) {
if (!resolved && !rejected) {
resolved = true;
resolve();
}
} else {
if (!resolved && !rejected) {
rejected = true;
const error = new Error(`yt-dlp process exited with code ${code}`);
(error as any).stderr = stderr;
(error as any).code = code;
console.error("yt-dlp error output:", stderr);
reject(error);
}
}
});
subprocess.on("error", (error) => {
if (!resolved && !rejected) {
rejected = true;
reject(error);
}
});
});
return {
stdout: subprocess.stdout,
stderr: subprocess.stderr,
kill: (signal?: NodeJS.Signals) => {
if (!subprocess.killed) {
return subprocess.kill(signal);
}
return false;
},
then: promise.then.bind(promise),
};
}