feat: improve handling of search results

This commit is contained in:
mrbunker
2025-01-05 22:41:36 +08:00
parent 2ff919c8ca
commit 2ab10ea667
7 changed files with 89 additions and 53 deletions

View File

@@ -9,6 +9,7 @@
"build": "tsc && vite build"
},
"dependencies": {
"@types/node": "^22.10.5",
"preact": "10.25.4"
},
"devDependencies": {

15
pnpm-lock.yaml generated
View File

@@ -8,6 +8,9 @@ importers:
.:
dependencies:
'@types/node':
specifier: ^22.10.5
version: 22.10.5
preact:
specifier: 10.25.4
version: 10.25.4
@@ -413,6 +416,9 @@ packages:
'@types/estree@1.0.6':
resolution: {integrity: sha512-AYnb1nQyY49te+VRAVgmzfcgjYS91mY5P0TKUDCLEM+gNnA+3T6rWITXRLYCpahpqSQbN5cE+gHpnPyXjHWxcw==}
'@types/node@22.10.5':
resolution: {integrity: sha512-F8Q+SeGimwOo86fiovQh8qiXfFEh2/ocYv7tU5pJ3EXMSSxk1Joj5wefpFK2fHTf/N6HKGSxIDBT9f3gCxXPkQ==}
acorn-walk@8.3.4:
resolution: {integrity: sha512-ueEepnujpqee2o5aIYnvHU6C0A42MNdsIDeqy5BydrkuC5R1ZuUFnm27EeFJGoEHJQgn3uleRvmTXaJgfXbt4g==}
engines: {node: '>=0.4.0'}
@@ -659,6 +665,9 @@ packages:
engines: {node: '>=14.17'}
hasBin: true
undici-types@6.20.0:
resolution: {integrity: sha512-Ny6QZ2Nju20vw1SRHe3d9jVu6gJ+4e3+MMpqu7pqE5HT6WsTSlce++GQmK5UXS8mzV8DSYHrQH+Xrf2jVcuKNg==}
update-browserslist-db@1.1.1:
resolution: {integrity: sha512-R8UzCaa9Az+38REPiJ1tXlImTJXlVfgHZsglwBD/k6nj76ctsH1E3q4doGrukiLQd3sGQYu56r5+lo5r94l29A==}
hasBin: true
@@ -1051,6 +1060,10 @@ snapshots:
'@types/estree@1.0.6': {}
'@types/node@22.10.5':
dependencies:
undici-types: 6.20.0
acorn-walk@8.3.4:
dependencies:
acorn: 8.14.0
@@ -1294,6 +1307,8 @@ snapshots:
typescript@5.7.2: {}
undici-types@6.20.0: {}
update-browserslist-db@1.1.1(browserslist@4.24.3):
dependencies:
browserslist: 4.24.3

View File

@@ -8,6 +8,9 @@ const cleanUrl = (url: string): string => {
};
const getLibMirror = async () => {
if (process.env.NODE_ENV !== "production") {
return "";
}
try {
const user = "javlibcom";
const res = await fetch(`https://api.github.com/users/${user}`);

View File

@@ -28,7 +28,7 @@ const SiteBtn = ({ siteItem, CODE, multipleNavi, hiddenError }: Props) => {
const multipleFlag = multipleNavi && fetchRes?.multipleRes;
const tag = multipleFlag ? "多结果" : fetchRes?.tag;
const resultLink = (multipleFlag ? fetchRes.multipResLink : fetchRes?.targetLink) ?? originLink;
const resultLink = multipleFlag ? originLink : fetchRes?.resultLink;
const colorClass = fetchRes?.isSuccess ? "jop-button_green " : "jop-button_red ";
if (hiddenError && !fetchRes?.isSuccess) {
@@ -38,7 +38,7 @@ const SiteBtn = ({ siteItem, CODE, multipleNavi, hiddenError }: Props) => {
<a
className={"jop-button " + (loading ? " " : colorClass)}
target="_blank"
href={resultLink === "" ? originLink : resultLink}
href={!resultLink ? originLink : resultLink}
>
{tag && <div className="jop-button_label">{tag}</div>}

View File

@@ -42,7 +42,8 @@ export const gmPost = ({
});
};
export const isCaseInsensitiveEqual = (str1: string, str2: string) => {
export const isCaseInsensitiveEqual = (str1?: any, str2?: any) => {
if (!str1 || !str2) return false;
return str1.toLowerCase() === str2.toLowerCase();
};
@@ -68,7 +69,7 @@ export const getCode = (libItem: LibItem): string => {
export const regEnum = {
subtitle: /(中文|字幕|subtitle)/,
leakage: /(无码|無碼|泄漏|Uncensored)/,
leakage: /(无码|無碼|泄漏|泄露|Uncensored)/,
};
export const tagsQuery = ({

View File

@@ -46,15 +46,17 @@ export interface SiteItem_get extends SiteItemBase {
export interface SiteItem_parser extends SiteItemBase {
fetchType: "parser";
/** 严格匹配,会检查搜索结果的 code */
strictParser?: true;
domQuery: DomQuery_parser;
}
export interface SiteItem_post extends SiteItemBase {
fetchType: "post";
postParams: Record<string, any>;
domQuery: DomQuery_parser;
}
// export interface SiteItem_post extends SiteItemBase {
// fetchType: "post";
// postParams: Record<string, any>;
// domQuery: DomQuery_parser;
// }
export type SiteItem = SiteItem_get | SiteItem_parser | SiteItem_post;
export type SiteItem = SiteItem_get | SiteItem_parser;
/** 在线网站列表 */
export const siteList: SiteItem[] = [
@@ -66,7 +68,6 @@ export const siteList: SiteItem[] = [
domQuery: {
subQuery: ".info-header",
leakQuery: ".info-header",
videoQuery: ".plyr__controls",
},
},
{
@@ -96,9 +97,13 @@ export const siteList: SiteItem[] = [
{
name: "123av",
hostname: "123av.com",
url: "https://123av.com/zh/v/{{code}}",
fetchType: "get",
domQuery: {},
url: "https://123av.com/zh/search?keyword={{code}}",
fetchType: "parser",
strictParser: true,
domQuery: {
linkQuery: `.detail>a[href*='v/']`,
titleQuery: `.detail>a[href*='v/']`,
},
},
{
// 有可能搜出仨leakage subtitle 4k
@@ -117,8 +122,8 @@ export const siteList: SiteItem[] = [
url: "https://netflav5.com/search?type=title&keyword={{code}}",
fetchType: "parser",
domQuery: {
linkQuery: ".video_grid_container a",
titleQuery: ".video_grid_container",
linkQuery: ".grid_0_cell>a[href^='/video?']",
titleQuery: ".grid_0_cell>a[href^='/video?'] .grid_0_title",
},
},
{
@@ -233,7 +238,10 @@ export const siteList: SiteItem[] = [
hostname: "www.av01.tv",
url: "https://www.av01.tv/search/videos?search_query={{code}}",
fetchType: "parser",
domQuery: { linkQuery: "div[id].well-sm>a", titleQuery: ".video-views>.pull-left" },
domQuery: {
linkQuery: "div.well>a[href^='/video/']",
titleQuery: "div.well>a[href^='/video/']",
},
},
{
name: "18sex",
@@ -250,6 +258,7 @@ export const siteList: SiteItem[] = [
domQuery: { linkQuery: ".well>a[href]", titleQuery: ".well>a[href]>span.video-title" },
},
{
// 套了个 cf_clearance 的 cookie不好搞
name: "evojav",
hostname: "evojav.pro",
url: "https://evojav.pro/video/{{code}}/",

View File

@@ -1,9 +1,9 @@
import { gmGet, isCaseInsensitiveEqual, isErrorCode, regEnum, tagsQuery } from "./";
import type { DomQuery_get, DomQuery_parser, SiteItem } from "./siteList";
import { gmGet, isCaseInsensitiveEqual, isErrorCode, tagsQuery } from "./";
import type { DomQuery_get, SiteItem, SiteItem_parser } from "./siteList";
export type FetchResult = {
isSuccess: boolean;
targetLink?: string;
resultLink?: string;
tag?: string;
multipResLink?: string;
multipleRes?: boolean;
@@ -16,54 +16,68 @@ function videoPageParser(responseText: string, { subQuery, leakQuery, videoQuery
const subNode = subQuery ? doc.querySelector<HTMLElement>(subQuery) : "";
const subNodeText = subNode ? subNode.innerHTML : "";
const leakNode = leakQuery ? doc.querySelector<HTMLElement>(leakQuery) : null;
const linkNodeText = leakNode ? leakNode.innerHTML : "";
const leakNodeText = leakNode ? leakNode.innerHTML : "";
/** 部分网站收录视频,但是未提供播放资源,所以需要使用 videoQuery 进一步检测是否存在在线播放。
* videoQuery 为 undefine 时,不需要查找 video
*/
const videoNode = videoQuery ? doc.querySelector<HTMLElement>(videoQuery) : true;
return {
isSuccess: !!videoNode,
tag: tagsQuery({ leakageText: linkNodeText, subtitleText: subNodeText }),
tag: tagsQuery({ leakageText: leakNodeText, subtitleText: subNodeText }),
};
}
function searchPageCodeCheck(
titleNodes: NodeListOf<Element> | never[],
siteItem: SiteItem_parser,
CODE: string,
) {
if (!titleNodes || titleNodes.length === 0) return { isSuccess: false, titleNodeText: "" };
const codeRegex = /[a-zA-Z]{3,5}-\d{3,5}/;
if (siteItem.strictParser) {
const nodes = Array.from(titleNodes);
const passNodes = nodes.filter((node) => {
const nodeCode = node.outerHTML.match(codeRegex);
return isCaseInsensitiveEqual(nodeCode?.[0], CODE);
});
const titleNodeText = passNodes.map((node) => node.outerHTML).join(" ");
return {
titleNodeText,
isSuccess: passNodes.length > 0,
multipleRes: passNodes.length > 1,
};
} else {
const titleNode = titleNodes[siteItem.domQuery.listIndex ?? 0];
const titleNodeText = titleNode ? titleNode?.outerHTML : "";
const matchCode = titleNodeText.match(codeRegex);
const isSuccess = isCaseInsensitiveEqual(matchCode?.[0], CODE);
return { titleNodeText, isSuccess, multipleRes: titleNodes.length > 1 };
}
}
/** 针对 fetcher==="parser" 时的搜索结果页进行解析,寻找是否存在视频资源。
* linkQuery & titleQuery 都是必须,
* linkQuery 有结果且 titleQuery 有结果包含 code返回 isSuccess。
* 再检查下 title 中是否含有字幕信息等
*/
function serachPageParser(
responseText: string,
{ linkQuery, titleQuery, listIndex = 0 }: DomQuery_parser,
siteHostName: string,
CODE: string,
searchPageLink: string,
) {
function serachPageParser(responseText: string, siteItem: SiteItem_parser, CODE: string) {
const { linkQuery, titleQuery } = siteItem.domQuery;
const doc = new DOMParser().parseFromString(responseText, "text/html");
const titleNodes = titleQuery ? doc.querySelectorAll(titleQuery) : [];
const { isSuccess, titleNodeText, multipleRes } = searchPageCodeCheck(titleNodes, siteItem, CODE);
const linkNodes = linkQuery ? doc.querySelectorAll<HTMLAnchorElement>(linkQuery) : [];
const titleNode = titleNodes[listIndex];
const linkNode = linkNodes[listIndex];
const titleNodeText = titleNode ? titleNode?.outerHTML : "";
const codeRegex = /[a-zA-Z]{3,5}-\d{3,5}/;
const matchCode = titleNodeText.match(codeRegex);
const isSuccess =
linkNode && titleNode && matchCode && isCaseInsensitiveEqual(matchCode[0], CODE);
const linkNode = linkNodes[siteItem.domQuery.listIndex ?? 0];
if (!isSuccess) {
return { isSuccess: false };
}
const targetLinkText = linkNode.href.replace(linkNode.hostname, siteHostName);
const resultLinkText = linkNode.href.replace(linkNode.hostname, siteItem.hostname);
return {
isSuccess: true,
targetLink: targetLinkText,
multipResLink: searchPageLink,
multipleRes: titleNodes.length > 1,
resultLink: resultLinkText,
multipleRes,
tag: tagsQuery({ leakageText: titleNodeText, subtitleText: titleNodeText }),
};
}
@@ -84,24 +98,17 @@ export const baseFetcher = async ({ siteItem, targetLink, CODE }: Args): Promise
if (siteItem.fetchType === "get") {
// 直接 get 网页,成功,需要进一步解析 videoPage获取字幕等信息
return {
resultLink: targetLink,
...videoPageParser(response.responseText, siteItem.domQuery),
targetLink,
};
} else {
return {
...serachPageParser(
response.responseText,
siteItem.domQuery,
siteItem.hostname,
CODE,
targetLink,
),
...serachPageParser(response.responseText, siteItem, CODE),
};
}
} catch (error) {
return {
isSuccess: false,
targetLink: targetLink,
};
}
};