feat: Add BilibiliDownloader methods for author info & video

This commit is contained in:
Peifan Li
2025-12-10 22:36:16 -05:00
parent 1e2af75c99
commit e1bc7c464e
4 changed files with 421 additions and 175 deletions

View File

@@ -73,6 +73,137 @@ export interface CollectionDownloadResult {
}
export class BilibiliDownloader {
// Get author info from Bilibili space URL
static async getAuthorInfo(mid: string): Promise<{
name: string;
mid: string;
}> {
try {
// Use the card API which doesn't require WBI signing
const apiUrl = `https://api.bilibili.com/x/web-interface/card?mid=${mid}`;
console.log("Fetching Bilibili author info from:", apiUrl);
const response = await axios.get(apiUrl, {
headers: {
Referer: "https://www.bilibili.com",
"User-Agent":
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
},
});
if (response.data && response.data.data && response.data.data.card) {
const card = response.data.data.card;
return {
name: card.name || "Bilibili User",
mid: mid,
};
}
return { name: "Bilibili User", mid };
} catch (error) {
console.error("Error fetching Bilibili author info:", error);
return { name: "Bilibili User", mid };
}
}
// Get the latest video URL from a Bilibili author's space
static async getLatestVideoUrl(spaceUrl: string): Promise<string | null> {
try {
console.log("Fetching latest video for Bilibili space:", spaceUrl);
// Extract mid from the space URL
const { extractBilibiliMid } = await import("../../utils/helpers");
const mid = extractBilibiliMid(spaceUrl);
if (!mid) {
console.error(
"Could not extract mid from Bilibili space URL:",
spaceUrl
);
return null;
}
console.log("Extracted mid:", mid);
// Get user config for network options (cookies, proxy, etc.)
const userConfig = getUserYtDlpConfig();
const networkConfig = getNetworkConfigFromUserConfig(userConfig);
// Use yt-dlp to get the latest video from the user's space
// Bilibili space URL format: https://space.bilibili.com/{mid}/video
const videosUrl = `https://space.bilibili.com/${mid}/video`;
try {
const result = await executeYtDlpJson(videosUrl, {
...networkConfig,
playlistEnd: 1, // Only get the first (latest) video
flatPlaylist: true, // Don't download, just get info
noWarnings: true,
});
// If it's a playlist/channel, 'entries' will contain the videos
if (result.entries && result.entries.length > 0) {
const latestVideo = result.entries[0];
const bvid = latestVideo.id;
if (bvid) {
const videoUrl = `https://www.bilibili.com/video/${bvid}`;
console.log("Found latest Bilibili video:", videoUrl);
return videoUrl;
}
// Fallback to url if id is not available
if (latestVideo.url) {
console.log("Found latest Bilibili video:", latestVideo.url);
return latestVideo.url;
}
}
} catch (ytdlpError) {
console.error("yt-dlp failed, trying API fallback:", ytdlpError);
// Fallback: Try the non-WBI API endpoint
const apiUrl = `https://api.bilibili.com/x/space/arc/search?mid=${mid}&pn=1&ps=1&order=pubdate`;
const response = await axios.get(apiUrl, {
headers: {
Referer: "https://www.bilibili.com",
"User-Agent":
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
},
});
if (
response.data &&
response.data.data &&
response.data.data.list &&
response.data.data.list.vlist
) {
const videos = response.data.data.list.vlist;
if (videos.length > 0) {
const latestVideo = videos[0];
const bvid = latestVideo.bvid;
if (bvid) {
const videoUrl = `https://www.bilibili.com/video/${bvid}`;
console.log(
"Found latest Bilibili video (API fallback):",
videoUrl
);
return videoUrl;
}
}
}
}
console.log("No videos found for Bilibili space:", spaceUrl);
return null;
} catch (error) {
console.error("Error fetching latest Bilibili video:", error);
return null;
}
}
// Get video info without downloading
static async getVideoInfo(videoId: string): Promise<{
title: string;
@@ -323,20 +454,25 @@ export class BilibiliDownloader {
let stderrOutput = "";
subprocess.stderr?.on("data", (data: Buffer) => {
stderrOutput += data.toString();
// Log stderr in real-time for debugging
// Log stderr in real-time for debugging (filter out expected warnings)
const lines = data
.toString()
.split("\n")
.filter((line) => line.trim());
for (const line of lines) {
// Skip expected/informational messages
if (
!line.includes("[download]") &&
!line.includes("[info]") &&
!line.includes("[ExtractAudio]") &&
!line.includes("[Merger]")
line.includes("[download]") ||
line.includes("[info]") ||
line.includes("[ExtractAudio]") ||
line.includes("[Merger]") ||
line.includes("[BiliBili]") ||
line.includes("Subtitles are only available when logged in") ||
line.includes("Invalid data found when processing input")
) {
console.warn("yt-dlp stderr:", line);
continue;
}
console.warn("yt-dlp stderr:", line);
}
});
@@ -353,9 +489,16 @@ export class BilibiliDownloader {
}
throw new Error("Download cancelled by user");
}
console.error("yt-dlp download failed:", error.message);
if (error.stderr) {
console.error("yt-dlp stderr:", error.stderr);
// Only log as error if it's not an expected subtitle-related issue
const stderrMsg = error.stderr || "";
const isExpectedError =
stderrMsg.includes("Subtitles are only available when logged in") ||
stderrMsg.includes("Invalid data found when processing input");
if (!isExpectedError) {
console.error("yt-dlp download failed:", error.message);
if (error.stderr) {
console.error("yt-dlp error output:", error.stderr);
}
}
}

View File

@@ -1,169 +1,252 @@
import { eq } from 'drizzle-orm';
import cron, { ScheduledTask } from 'node-cron';
import { v4 as uuidv4 } from 'uuid';
import { db } from '../db';
import { subscriptions } from '../db/schema';
import { downloadYouTubeVideo } from './downloadService';
import { YtDlpDownloader } from './downloaders/YtDlpDownloader';
import { eq } from "drizzle-orm";
import cron, { ScheduledTask } from "node-cron";
import { v4 as uuidv4 } from "uuid";
import { db } from "../db";
import { subscriptions } from "../db/schema";
import { extractBilibiliMid, isBilibiliSpaceUrl } from "../utils/helpers";
import {
downloadSingleBilibiliPart,
downloadYouTubeVideo,
} from "./downloadService";
import { BilibiliDownloader } from "./downloaders/BilibiliDownloader";
import { YtDlpDownloader } from "./downloaders/YtDlpDownloader";
import * as storageService from "./storageService";
export interface Subscription {
id: string;
author: string;
authorUrl: string;
interval: number;
lastVideoLink?: string;
lastCheck?: number;
downloadCount: number;
createdAt: number;
platform: string;
id: string;
author: string;
authorUrl: string;
interval: number;
lastVideoLink?: string;
lastCheck?: number;
downloadCount: number;
createdAt: number;
platform: string;
}
export class SubscriptionService {
private static instance: SubscriptionService;
private checkTask: ScheduledTask | null = null;
private static instance: SubscriptionService;
private checkTask: ScheduledTask | null = null;
private constructor() { }
private constructor() {}
public static getInstance(): SubscriptionService {
if (!SubscriptionService.instance) {
SubscriptionService.instance = new SubscriptionService();
public static getInstance(): SubscriptionService {
if (!SubscriptionService.instance) {
SubscriptionService.instance = new SubscriptionService();
}
return SubscriptionService.instance;
}
async subscribe(authorUrl: string, interval: number): Promise<Subscription> {
// Detect platform and validate URL
let platform: string;
let authorName = "Unknown Author";
if (isBilibiliSpaceUrl(authorUrl)) {
platform = "Bilibili";
// Extract mid from the space URL
const mid = extractBilibiliMid(authorUrl);
if (!mid) {
throw new Error("Invalid Bilibili space URL");
}
// Try to get author name from Bilibili API
try {
const authorInfo = await BilibiliDownloader.getAuthorInfo(mid);
authorName = authorInfo.name;
} catch (error) {
console.error("Error fetching Bilibili author info:", error);
// Use mid as fallback author name
authorName = `Bilibili User ${mid}`;
}
} else if (authorUrl.includes("youtube.com")) {
platform = "YouTube";
// Extract author from YouTube URL if possible
const match = authorUrl.match(/youtube\.com\/(@[^\/]+)/);
if (match && match[1]) {
authorName = match[1];
} else {
// Fallback: try to extract from other URL formats
const parts = authorUrl.split("/");
if (parts.length > 0) {
const lastPart = parts[parts.length - 1];
if (lastPart) authorName = lastPart;
}
return SubscriptionService.instance;
}
} else {
throw new Error(
"Invalid URL. Only YouTube channel URLs and Bilibili space URLs are supported."
);
}
async subscribe(authorUrl: string, interval: number): Promise<Subscription> {
// Validate URL (basic check)
if (!authorUrl.includes('youtube.com')) {
throw new Error('Invalid YouTube URL');
}
// Check if already subscribed
const existing = await db
.select()
.from(subscriptions)
.where(eq(subscriptions.authorUrl, authorUrl));
if (existing.length > 0) {
throw new Error("Subscription already exists");
}
// Check if already subscribed
const existing = await db.select().from(subscriptions).where(eq(subscriptions.authorUrl, authorUrl));
if (existing.length > 0) {
throw new Error('Subscription already exists');
}
// We skip heavy getVideoInfo here to ensure fast response.
// The scheduler will eventually fetch new videos and we can update author name then if needed.
// Extract author from URL if possible
let authorName = 'Unknown Author';
const match = authorUrl.match(/youtube\.com\/(@[^\/]+)/);
if (match && match[1]) {
authorName = match[1];
} else {
// Fallback: try to extract from other URL formats
const parts = authorUrl.split('/');
if (parts.length > 0) {
const lastPart = parts[parts.length - 1];
if (lastPart) authorName = lastPart;
let lastVideoLink = "";
const newSubscription: Subscription = {
id: uuidv4(),
author: authorName,
authorUrl,
interval,
lastVideoLink,
lastCheck: Date.now(),
downloadCount: 0,
createdAt: Date.now(),
platform,
};
await db.insert(subscriptions).values(newSubscription);
return newSubscription;
}
async unsubscribe(id: string): Promise<void> {
await db.delete(subscriptions).where(eq(subscriptions.id, id));
}
async listSubscriptions(): Promise<Subscription[]> {
// @ts-ignore - Drizzle type inference might be tricky with raw select sometimes, but this should be fine.
// Actually, db.select().from(subscriptions) returns the inferred type.
return await db.select().from(subscriptions);
}
async checkSubscriptions(): Promise<void> {
// console.log('Checking subscriptions...'); // Too verbose
const allSubs = await this.listSubscriptions();
for (const sub of allSubs) {
const now = Date.now();
const lastCheck = sub.lastCheck || 0;
const intervalMs = sub.interval * 60 * 1000;
if (now - lastCheck >= intervalMs) {
try {
console.log(
`Checking subscription for ${sub.author} (${sub.platform})...`
);
// 1. Fetch latest video link based on platform
const latestVideoUrl = await this.getLatestVideoUrl(
sub.authorUrl,
sub.platform
);
if (latestVideoUrl && latestVideoUrl !== sub.lastVideoLink) {
console.log(`New video found for ${sub.author}: ${latestVideoUrl}`);
// 2. Download the video based on platform
let downloadResult: any;
try {
if (sub.platform === "Bilibili") {
downloadResult = await downloadSingleBilibiliPart(
latestVideoUrl,
1,
1,
""
);
} else {
downloadResult = await downloadYouTubeVideo(latestVideoUrl);
}
// Add to download history on success
const videoData =
downloadResult?.videoData || downloadResult || {};
storageService.addDownloadHistoryItem({
id: uuidv4(),
title: videoData.title || `New video from ${sub.author}`,
author: videoData.author || sub.author,
sourceUrl: latestVideoUrl,
finishedAt: Date.now(),
status: "success",
videoPath: videoData.videoPath,
thumbnailPath: videoData.thumbnailPath,
videoId: videoData.id,
});
} catch (downloadError: any) {
console.error(
`Error downloading subscription video for ${sub.author}:`,
downloadError
);
// Add to download history on failure
storageService.addDownloadHistoryItem({
id: uuidv4(),
title: `Video from ${sub.author}`,
author: sub.author,
sourceUrl: latestVideoUrl,
finishedAt: Date.now(),
status: "failed",
error: downloadError.message || "Download failed",
});
// Don't update lastVideoLink on failure so we retry next time
await db
.update(subscriptions)
.set({ lastCheck: now })
.where(eq(subscriptions.id, sub.id));
continue;
}
// 3. Update subscription record
await db
.update(subscriptions)
.set({
lastVideoLink: latestVideoUrl,
lastCheck: now,
downloadCount: (sub.downloadCount || 0) + 1,
})
.where(eq(subscriptions.id, sub.id));
} else {
// Just update lastCheck
await db
.update(subscriptions)
.set({ lastCheck: now })
.where(eq(subscriptions.id, sub.id));
}
} catch (error) {
console.error(
`Error checking subscription for ${sub.author}:`,
error
);
}
}
}
}
// We skip heavy getVideoInfo here to ensure fast response.
// The scheduler will eventually fetch new videos and we can update author name then if needed.
let lastVideoLink = '';
startScheduler() {
if (this.checkTask) {
this.checkTask.stop();
}
// Run every minute
this.checkTask = cron.schedule("* * * * *", () => {
this.checkSubscriptions();
});
console.log("Subscription scheduler started (node-cron).");
}
const newSubscription: Subscription = {
id: uuidv4(),
author: authorName,
authorUrl,
interval,
lastVideoLink,
lastCheck: Date.now(),
downloadCount: 0,
createdAt: Date.now(),
platform: 'YouTube'
};
await db.insert(subscriptions).values(newSubscription);
return newSubscription;
// Helper to get latest video URL based on platform
private async getLatestVideoUrl(
channelUrl: string,
platform?: string
): Promise<string | null> {
if (platform === "Bilibili" || isBilibiliSpaceUrl(channelUrl)) {
return await BilibiliDownloader.getLatestVideoUrl(channelUrl);
}
async unsubscribe(id: string): Promise<void> {
await db.delete(subscriptions).where(eq(subscriptions.id, id));
}
async listSubscriptions(): Promise<Subscription[]> {
// @ts-ignore - Drizzle type inference might be tricky with raw select sometimes, but this should be fine.
// Actually, db.select().from(subscriptions) returns the inferred type.
return await db.select().from(subscriptions);
}
async checkSubscriptions(): Promise<void> {
// console.log('Checking subscriptions...'); // Too verbose
const allSubs = await this.listSubscriptions();
for (const sub of allSubs) {
const now = Date.now();
const lastCheck = sub.lastCheck || 0;
const intervalMs = sub.interval * 60 * 1000;
if (now - lastCheck >= intervalMs) {
try {
console.log(`Checking subscription for ${sub.author}...`);
// 1. Fetch latest video link
// We need a robust way to get the latest video.
// We can use `yt-dlp --print webpage_url --playlist-end 1 "channel_url"`
// We'll need to expose a method in `downloadService` or `YtDlpDownloader` for this.
// For now, let's assume `getLatestVideoUrl` exists.
const latestVideoUrl = await this.getLatestVideoUrl(sub.authorUrl);
if (latestVideoUrl && latestVideoUrl !== sub.lastVideoLink) {
console.log(`New video found for ${sub.author}: ${latestVideoUrl}`);
// 2. Download the video
// We use `downloadYouTubeVideo` from downloadService`.
// We might want to associate this download with the subscription for tracking?
// The requirement says "update last_video_link value".
await downloadYouTubeVideo(latestVideoUrl);
// 3. Update subscription record
await db.update(subscriptions)
.set({
lastVideoLink: latestVideoUrl,
lastCheck: now,
downloadCount: (sub.downloadCount || 0) + 1
})
.where(eq(subscriptions.id, sub.id));
} else {
// Just update lastCheck
await db.update(subscriptions)
.set({ lastCheck: now })
.where(eq(subscriptions.id, sub.id));
}
} catch (error) {
console.error(`Error checking subscription for ${sub.author}:`, error);
}
}
}
}
startScheduler() {
if (this.checkTask) {
this.checkTask.stop();
}
// Run every minute
this.checkTask = cron.schedule('* * * * *', () => {
this.checkSubscriptions();
});
console.log('Subscription scheduler started (node-cron).');
}
// Helper to get latest video URL.
// This should probably be in YtDlpDownloader, but for now we can implement it here using a similar approach.
// We need to import `exec` or similar to run yt-dlp.
// Since `YtDlpDownloader` is in `services/downloaders`, we should probably add a method there.
// But to keep it self-contained for now, I'll assume we can add it to `YtDlpDownloader` later or mock it.
// Let's try to use `YtDlpDownloader.getLatestVideoUrl` if we can add it.
// For now, I will implement a placeholder that uses `YtDlpDownloader`'s internal logic if possible,
// or just calls `getVideoInfo` and hopes it works for channels (it might not give the *latest* video URL directly).
// BETTER APPROACH: Add `getLatestVideoUrl` to `YtDlpDownloader` class.
// I will do that in a separate step. For now, I'll define the interface.
private async getLatestVideoUrl(channelUrl: string): Promise<string | null> {
return await YtDlpDownloader.getLatestVideoUrl(channelUrl);
}
// Default to YouTube/yt-dlp
return await YtDlpDownloader.getLatestVideoUrl(channelUrl);
}
}
export const subscriptionService = SubscriptionService.getInstance();

View File

@@ -15,6 +15,11 @@ export function isBilibiliUrl(url: string): boolean {
return url.includes("bilibili.com") || url.includes("b23.tv");
}
// Helper function to check if a URL is a Bilibili space/author URL
export function isBilibiliSpaceUrl(url: string): boolean {
return url.includes("space.bilibili.com");
}
// Helper function to extract URL from text that might contain a title and URL
export function extractUrlFromText(text: string): string {
// Regular expression to find URLs in text
@@ -142,15 +147,18 @@ export function extractMissAVVideoId(url: string): string | null {
}
// Helper function to extract source video ID from any supported URL
export function extractSourceVideoId(url: string): { id: string | null; platform: string } {
export function extractSourceVideoId(url: string): {
id: string | null;
platform: string;
} {
if (isBilibiliUrl(url)) {
return { id: extractBilibiliVideoId(url), platform: "bilibili" };
}
if (url.includes("youtube.com") || url.includes("youtu.be")) {
return { id: extractYouTubeVideoId(url), platform: "youtube" };
}
if (url.includes("missav") || url.includes("123av")) {
return { id: extractMissAVVideoId(url), platform: "missav" };
}
@@ -182,14 +190,14 @@ export function extractBilibiliMid(url: string): string | null {
if (spaceMatch && spaceMatch[1]) {
return spaceMatch[1];
}
// Try to extract from URL parameters
const urlObj = new URL(url);
const midParam = urlObj.searchParams.get('mid');
const midParam = urlObj.searchParams.get("mid");
if (midParam) {
return midParam;
}
return null;
}
@@ -197,7 +205,7 @@ export function extractBilibiliMid(url: string): string | null {
export function extractBilibiliSeasonId(url: string): string | null {
try {
const urlObj = new URL(url);
const seasonId = urlObj.searchParams.get('season_id');
const seasonId = urlObj.searchParams.get("season_id");
return seasonId;
} catch (error) {
return null;
@@ -208,7 +216,7 @@ export function extractBilibiliSeasonId(url: string): string | null {
export function extractBilibiliSeriesId(url: string): string | null {
try {
const urlObj = new URL(url);
const seriesId = urlObj.searchParams.get('series_id');
const seriesId = urlObj.searchParams.get("series_id");
return seriesId;
} catch (error) {
return null;
@@ -217,7 +225,11 @@ export function extractBilibiliSeriesId(url: string): string | null {
// Helper function to format video filename according to: Title-Author-YYYY
// Symbols are removed, spaces replaced by dots.
export function formatVideoFilename(title: string, author: string, dateString: string): string {
export function formatVideoFilename(
title: string,
author: string,
dateString: string
): string {
// Helper to clean segments: remove symbols (keep letters/numbers/spaces), replace spaces with dots
const cleanSegment = (str: string) => {
if (!str) return "Unknown";
@@ -229,19 +241,19 @@ export function formatVideoFilename(title: string, author: string, dateString: s
let cleanTitle = cleanSegment(title) || "Video";
let cleanAuthor = cleanSegment(author) || "Unknown";
// Extract year
let year = new Date().getFullYear().toString();
if (dateString) {
const match = dateString.match(/(\d{4})/);
if (match) {
year = match[1];
}
const match = dateString.match(/(\d{4})/);
if (match) {
year = match[1];
}
}
// Truncate author if it's too long (e.g. > 50 chars) to prioritize title visibility
if (cleanAuthor.length > 50) {
cleanAuthor = cleanAuthor.substring(0, 50);
cleanAuthor = cleanAuthor.substring(0, 50);
}
// Construct the suffix parts
@@ -251,13 +263,13 @@ export function formatVideoFilename(title: string, author: string, dateString: s
// Max length for the filename (leaving room for extension)
const MAX_FILENAME_LENGTH = 200;
// Calculate available space for title
const availableTitleLength = MAX_FILENAME_LENGTH - fullSuffix.length;
if (cleanTitle.length > availableTitleLength) {
// Truncate title
cleanTitle = cleanTitle.substring(0, Math.max(0, availableTitleLength));
// Truncate title
cleanTitle = cleanTitle.substring(0, Math.max(0, availableTitleLength));
}
return `${cleanTitle}${fullSuffix}`;

View File

@@ -165,6 +165,14 @@ export const DownloadProvider: React.FC<{ children: React.ReactNode }> = ({ chil
return { success: true };
}
// Check for Bilibili space/author URL (e.g., https://space.bilibili.com/4652742)
const bilibiliSpaceRegex = /space\.bilibili\.com\/\d+/;
if (bilibiliSpaceRegex.test(videoUrl)) {
setSubscribeUrl(videoUrl);
setShowSubscribeModal(true);
return { success: true };
}
// Check if it's a Bilibili URL
if (videoUrl.includes('bilibili.com') || videoUrl.includes('b23.tv')) {
setIsCheckingParts(true);