From 2ab10ea6670627b4b2ec6fd9fc7f6abc1842a30a Mon Sep 17 00:00:00 2001 From: mrbunker Date: Sun, 5 Jan 2025 22:41:36 +0800 Subject: [PATCH] feat: improve handling of search results --- package.json | 1 + pnpm-lock.yaml | 15 ++++++++ script/urlConfig.ts | 3 ++ src/components/SiteBtn.tsx | 4 +- src/utils/index.ts | 5 ++- src/utils/siteList.ts | 35 ++++++++++------- src/utils/xhr.ts | 79 +++++++++++++++++++++----------------- 7 files changed, 89 insertions(+), 53 deletions(-) diff --git a/package.json b/package.json index 627fd2e..0a697c9 100644 --- a/package.json +++ b/package.json @@ -9,6 +9,7 @@ "build": "tsc && vite build" }, "dependencies": { + "@types/node": "^22.10.5", "preact": "10.25.4" }, "devDependencies": { diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index 1ed204b..2e790e8 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -8,6 +8,9 @@ importers: .: dependencies: + '@types/node': + specifier: ^22.10.5 + version: 22.10.5 preact: specifier: 10.25.4 version: 10.25.4 @@ -413,6 +416,9 @@ packages: '@types/estree@1.0.6': resolution: {integrity: sha512-AYnb1nQyY49te+VRAVgmzfcgjYS91mY5P0TKUDCLEM+gNnA+3T6rWITXRLYCpahpqSQbN5cE+gHpnPyXjHWxcw==} + '@types/node@22.10.5': + resolution: {integrity: sha512-F8Q+SeGimwOo86fiovQh8qiXfFEh2/ocYv7tU5pJ3EXMSSxk1Joj5wefpFK2fHTf/N6HKGSxIDBT9f3gCxXPkQ==} + acorn-walk@8.3.4: resolution: {integrity: sha512-ueEepnujpqee2o5aIYnvHU6C0A42MNdsIDeqy5BydrkuC5R1ZuUFnm27EeFJGoEHJQgn3uleRvmTXaJgfXbt4g==} engines: {node: '>=0.4.0'} @@ -659,6 +665,9 @@ packages: engines: {node: '>=14.17'} hasBin: true + undici-types@6.20.0: + resolution: {integrity: sha512-Ny6QZ2Nju20vw1SRHe3d9jVu6gJ+4e3+MMpqu7pqE5HT6WsTSlce++GQmK5UXS8mzV8DSYHrQH+Xrf2jVcuKNg==} + update-browserslist-db@1.1.1: resolution: {integrity: sha512-R8UzCaa9Az+38REPiJ1tXlImTJXlVfgHZsglwBD/k6nj76ctsH1E3q4doGrukiLQd3sGQYu56r5+lo5r94l29A==} hasBin: true @@ -1051,6 +1060,10 @@ snapshots: '@types/estree@1.0.6': {} + '@types/node@22.10.5': + dependencies: + undici-types: 6.20.0 + acorn-walk@8.3.4: dependencies: acorn: 8.14.0 @@ -1294,6 +1307,8 @@ snapshots: typescript@5.7.2: {} + undici-types@6.20.0: {} + update-browserslist-db@1.1.1(browserslist@4.24.3): dependencies: browserslist: 4.24.3 diff --git a/script/urlConfig.ts b/script/urlConfig.ts index 9274009..a71ab9f 100644 --- a/script/urlConfig.ts +++ b/script/urlConfig.ts @@ -8,6 +8,9 @@ const cleanUrl = (url: string): string => { }; const getLibMirror = async () => { + if (process.env.NODE_ENV !== "production") { + return ""; + } try { const user = "javlibcom"; const res = await fetch(`https://api.github.com/users/${user}`); diff --git a/src/components/SiteBtn.tsx b/src/components/SiteBtn.tsx index 74b8c05..4bf5864 100644 --- a/src/components/SiteBtn.tsx +++ b/src/components/SiteBtn.tsx @@ -28,7 +28,7 @@ const SiteBtn = ({ siteItem, CODE, multipleNavi, hiddenError }: Props) => { const multipleFlag = multipleNavi && fetchRes?.multipleRes; const tag = multipleFlag ? "多结果" : fetchRes?.tag; - const resultLink = (multipleFlag ? fetchRes.multipResLink : fetchRes?.targetLink) ?? originLink; + const resultLink = multipleFlag ? originLink : fetchRes?.resultLink; const colorClass = fetchRes?.isSuccess ? "jop-button_green " : "jop-button_red "; if (hiddenError && !fetchRes?.isSuccess) { @@ -38,7 +38,7 @@ const SiteBtn = ({ siteItem, CODE, multipleNavi, hiddenError }: Props) => { {tag &&
{tag}
} diff --git a/src/utils/index.ts b/src/utils/index.ts index f240b7f..b0b6333 100644 --- a/src/utils/index.ts +++ b/src/utils/index.ts @@ -42,7 +42,8 @@ export const gmPost = ({ }); }; -export const isCaseInsensitiveEqual = (str1: string, str2: string) => { +export const isCaseInsensitiveEqual = (str1?: any, str2?: any) => { + if (!str1 || !str2) return false; return str1.toLowerCase() === str2.toLowerCase(); }; @@ -68,7 +69,7 @@ export const getCode = (libItem: LibItem): string => { export const regEnum = { subtitle: /(中文|字幕|subtitle)/, - leakage: /(无码|無碼|泄漏|Uncensored)/, + leakage: /(无码|無碼|泄漏|泄露|Uncensored)/, }; export const tagsQuery = ({ diff --git a/src/utils/siteList.ts b/src/utils/siteList.ts index 2f431a0..38d4cbf 100644 --- a/src/utils/siteList.ts +++ b/src/utils/siteList.ts @@ -46,15 +46,17 @@ export interface SiteItem_get extends SiteItemBase { export interface SiteItem_parser extends SiteItemBase { fetchType: "parser"; + /** 严格匹配,会检查搜索结果的 code */ + strictParser?: true; domQuery: DomQuery_parser; } -export interface SiteItem_post extends SiteItemBase { - fetchType: "post"; - postParams: Record; - domQuery: DomQuery_parser; -} +// export interface SiteItem_post extends SiteItemBase { +// fetchType: "post"; +// postParams: Record; +// domQuery: DomQuery_parser; +// } -export type SiteItem = SiteItem_get | SiteItem_parser | SiteItem_post; +export type SiteItem = SiteItem_get | SiteItem_parser; /** 在线网站列表 */ export const siteList: SiteItem[] = [ @@ -66,7 +68,6 @@ export const siteList: SiteItem[] = [ domQuery: { subQuery: ".info-header", leakQuery: ".info-header", - videoQuery: ".plyr__controls", }, }, { @@ -96,9 +97,13 @@ export const siteList: SiteItem[] = [ { name: "123av", hostname: "123av.com", - url: "https://123av.com/zh/v/{{code}}", - fetchType: "get", - domQuery: {}, + url: "https://123av.com/zh/search?keyword={{code}}", + fetchType: "parser", + strictParser: true, + domQuery: { + linkQuery: `.detail>a[href*='v/']`, + titleQuery: `.detail>a[href*='v/']`, + }, }, { // 有可能搜出仨:leakage subtitle 4k @@ -117,8 +122,8 @@ export const siteList: SiteItem[] = [ url: "https://netflav5.com/search?type=title&keyword={{code}}", fetchType: "parser", domQuery: { - linkQuery: ".video_grid_container a", - titleQuery: ".video_grid_container", + linkQuery: ".grid_0_cell>a[href^='/video?']", + titleQuery: ".grid_0_cell>a[href^='/video?'] .grid_0_title", }, }, { @@ -233,7 +238,10 @@ export const siteList: SiteItem[] = [ hostname: "www.av01.tv", url: "https://www.av01.tv/search/videos?search_query={{code}}", fetchType: "parser", - domQuery: { linkQuery: "div[id].well-sm>a", titleQuery: ".video-views>.pull-left" }, + domQuery: { + linkQuery: "div.well>a[href^='/video/']", + titleQuery: "div.well>a[href^='/video/']", + }, }, { name: "18sex", @@ -250,6 +258,7 @@ export const siteList: SiteItem[] = [ domQuery: { linkQuery: ".well>a[href]", titleQuery: ".well>a[href]>span.video-title" }, }, { + // 套了个 cf_clearance 的 cookie,不好搞 name: "evojav", hostname: "evojav.pro", url: "https://evojav.pro/video/{{code}}/", diff --git a/src/utils/xhr.ts b/src/utils/xhr.ts index 0ec58f7..f070211 100644 --- a/src/utils/xhr.ts +++ b/src/utils/xhr.ts @@ -1,9 +1,9 @@ -import { gmGet, isCaseInsensitiveEqual, isErrorCode, regEnum, tagsQuery } from "./"; -import type { DomQuery_get, DomQuery_parser, SiteItem } from "./siteList"; +import { gmGet, isCaseInsensitiveEqual, isErrorCode, tagsQuery } from "./"; +import type { DomQuery_get, SiteItem, SiteItem_parser } from "./siteList"; export type FetchResult = { isSuccess: boolean; - targetLink?: string; + resultLink?: string; tag?: string; multipResLink?: string; multipleRes?: boolean; @@ -16,54 +16,68 @@ function videoPageParser(responseText: string, { subQuery, leakQuery, videoQuery const subNode = subQuery ? doc.querySelector(subQuery) : ""; const subNodeText = subNode ? subNode.innerHTML : ""; const leakNode = leakQuery ? doc.querySelector(leakQuery) : null; - const linkNodeText = leakNode ? leakNode.innerHTML : ""; - + const leakNodeText = leakNode ? leakNode.innerHTML : ""; /** 部分网站收录视频,但是未提供播放资源,所以需要使用 videoQuery 进一步检测是否存在在线播放。 * videoQuery 为 undefine 时,不需要查找 video */ const videoNode = videoQuery ? doc.querySelector(videoQuery) : true; return { isSuccess: !!videoNode, - tag: tagsQuery({ leakageText: linkNodeText, subtitleText: subNodeText }), + tag: tagsQuery({ leakageText: leakNodeText, subtitleText: subNodeText }), }; } +function searchPageCodeCheck( + titleNodes: NodeListOf | never[], + siteItem: SiteItem_parser, + CODE: string, +) { + if (!titleNodes || titleNodes.length === 0) return { isSuccess: false, titleNodeText: "" }; + const codeRegex = /[a-zA-Z]{3,5}-\d{3,5}/; + if (siteItem.strictParser) { + const nodes = Array.from(titleNodes); + const passNodes = nodes.filter((node) => { + const nodeCode = node.outerHTML.match(codeRegex); + return isCaseInsensitiveEqual(nodeCode?.[0], CODE); + }); + const titleNodeText = passNodes.map((node) => node.outerHTML).join(" "); + return { + titleNodeText, + isSuccess: passNodes.length > 0, + multipleRes: passNodes.length > 1, + }; + } else { + const titleNode = titleNodes[siteItem.domQuery.listIndex ?? 0]; + const titleNodeText = titleNode ? titleNode?.outerHTML : ""; + const matchCode = titleNodeText.match(codeRegex); + const isSuccess = isCaseInsensitiveEqual(matchCode?.[0], CODE); + return { titleNodeText, isSuccess, multipleRes: titleNodes.length > 1 }; + } +} + /** 针对 fetcher==="parser" 时的搜索结果页进行解析,寻找是否存在视频资源。 * linkQuery & titleQuery 都是必须, * linkQuery 有结果且 titleQuery 有结果包含 code,返回 isSuccess。 * 再检查下 title 中是否含有字幕信息等 */ -function serachPageParser( - responseText: string, - { linkQuery, titleQuery, listIndex = 0 }: DomQuery_parser, - siteHostName: string, - CODE: string, - searchPageLink: string, -) { +function serachPageParser(responseText: string, siteItem: SiteItem_parser, CODE: string) { + const { linkQuery, titleQuery } = siteItem.domQuery; const doc = new DOMParser().parseFromString(responseText, "text/html"); const titleNodes = titleQuery ? doc.querySelectorAll(titleQuery) : []; + const { isSuccess, titleNodeText, multipleRes } = searchPageCodeCheck(titleNodes, siteItem, CODE); + const linkNodes = linkQuery ? doc.querySelectorAll(linkQuery) : []; - - const titleNode = titleNodes[listIndex]; - - const linkNode = linkNodes[listIndex]; - const titleNodeText = titleNode ? titleNode?.outerHTML : ""; - - const codeRegex = /[a-zA-Z]{3,5}-\d{3,5}/; - const matchCode = titleNodeText.match(codeRegex); - const isSuccess = - linkNode && titleNode && matchCode && isCaseInsensitiveEqual(matchCode[0], CODE); + const linkNode = linkNodes[siteItem.domQuery.listIndex ?? 0]; if (!isSuccess) { return { isSuccess: false }; } - const targetLinkText = linkNode.href.replace(linkNode.hostname, siteHostName); + const resultLinkText = linkNode.href.replace(linkNode.hostname, siteItem.hostname); return { isSuccess: true, - targetLink: targetLinkText, - multipResLink: searchPageLink, - multipleRes: titleNodes.length > 1, + resultLink: resultLinkText, + multipleRes, tag: tagsQuery({ leakageText: titleNodeText, subtitleText: titleNodeText }), }; } @@ -84,24 +98,17 @@ export const baseFetcher = async ({ siteItem, targetLink, CODE }: Args): Promise if (siteItem.fetchType === "get") { // 直接 get 网页,成功,需要进一步解析 videoPage,获取字幕等信息 return { + resultLink: targetLink, ...videoPageParser(response.responseText, siteItem.domQuery), - targetLink, }; } else { return { - ...serachPageParser( - response.responseText, - siteItem.domQuery, - siteItem.hostname, - CODE, - targetLink, - ), + ...serachPageParser(response.responseText, siteItem, CODE), }; } } catch (error) { return { isSuccess: false, - targetLink: targetLink, }; } };