您需要先安装一个扩展,例如 篡改猴、Greasemonkey 或 暴力猴,之后才能安装此脚本。
您需要先安装一个扩展,例如 篡改猴 或 暴力猴,之后才能安装此脚本。
您需要先安装一个扩展,例如 篡改猴 或 暴力猴,之后才能安装此脚本。
您需要先安装一个扩展,例如 篡改猴 或 Userscripts ,之后才能安装此脚本。
您需要先安装一款用户脚本管理器扩展,例如 Tampermonkey,才能安装此脚本。
您需要先安装用户脚本管理器扩展后才能安装此脚本。
The ultimate URL purifier
此脚本不应直接安装。它是供其他脚本使用的外部库,要使用该库请加入元指令 // @require https://update.cn-greasyfork.org/scripts/492078/1499254/pURLfy.js
class Purlfy extends EventTarget { // Static properties /** * Returns the version of the library. * @returns {string} The version of the library. */ static get version() { return "0.3.11"; }; /** * A TextDecoder object used internally. * @type {TextDecoder} */ static #decoder = new TextDecoder(); /** * A DOMParser object used internally. * @type {DOMParser | null} */ static #domParser = typeof DOMParser !== "undefined" ? new DOMParser() : null; /** * The constructor of the AsyncFunction class. * @type {Function} */ static #AsyncFunction = async function () { }.constructor; /** * The initial statistics object. (All values are 0) * @type {Object} */ static #zeroStatistics = { url: 0, param: 0, decoded: 0, redirected: 0, visited: 0, char: 0 }; /** * The default acts for URL purification. * @type {Object} */ static #acts = { url: decodeURIComponent, base64: s => { // https://developer.mozilla.org/en-US/docs/Web/API/Window/btoa#unicode_strings s = s.replaceAll('_', '/').replaceAll('-', '+'); const bytes = Uint8Array.from(atob(s), (m) => m.codePointAt(0)); return Purlfy.#decoder.decode(bytes); }, slice: (s, startEnd) => { const [start, end] = startEnd.split(":"); return s.slice(parseInt(start), end ? parseInt(end) : undefined) }, regex: (s, regex) => { const r = new RegExp(regex); const m = s.match(r); return m ? m[0] : ""; }, dom: (s) => Purlfy.#domParser.parseFromString(s, "text/html"), sel: (s, selector) => s.querySelector(selector), attr: (e, attr) => e.getAttribute(attr), text: (e) => e.textContent, }; // Instance properties /** * Whether to enable the fetch mode. * @type {boolean} */ fetchEnabled = false; /** * Whether to enable the lambda mode. * @type {boolean} */ lambdaEnabled = false; /** * The maximum number of iterations for purification. * @type {number} */ maxIterations = 5; /** * The logger function. * @type {Function} */ #log = console.log.bind(console, "\x1b[38;2;220;20;60m[pURLfy]\x1b[0m"); /** * The fetch function. * @type {Function} */ #fetch = fetch.bind(globalThis); /** * The statistics object. * @type {Object} */ #statistics = { ...Purlfy.#zeroStatistics }; /** * The rules object. * @type {Object} */ #rules = {}; /** * Creates a new instance of the Purlfy class. * @param {Object} [options] The options for the instance. * @param {boolean} [options.fetchEnabled] Whether to enable the fetch mode. * @param {boolean} [options.lambdaEnabled] Whether to enable the lambda mode. * @param {number} [options.maxIterations] The maximum number of iterations for purification. * @param {Object} [options.statistics] The statistics object. * @param {Function} [options.log] The logger function. * @param {Function} [options.fetch] The fetch function. */ constructor(options) { super(); this.fetchEnabled = options?.fetchEnabled ?? this.fetchEnabled; this.lambdaEnabled = options?.lambdaEnabled ?? this.lambdaEnabled; this.maxIterations = options?.maxIterations ?? this.maxIterations; Object.assign(this.#statistics, options?.statistics); this.#log = options?.log ?? this.#log; this.#fetch = options?.fetch ?? this.#fetch; } // Static methods /** * Checks if the given value is of the given type or undefined. * @param {*} value The value to check. * @param {string} type The type to check. * @returns {boolean} Whether the given value is of the given type or undefined. */ static #udfOrType(value, type) { return value === undefined || typeof value === type; } /** * Checks if the given URL object's search string follows the standard format. * @param {URL} urlObj The URL object to check. * @returns {boolean} Whether the given URL object's search string follows the standard format. */ static #isStandard(urlObj) { return urlObj.searchParams.toString() === urlObj.search.slice(1); } /** * Checks if the given item is an object. * @param {*} item The item to check. * @returns {boolean} Whether the given item is an object. * @see https://stackoverflow.com/questions/27936772 */ static #isObject(item) { return (item && typeof item === 'object' && !Array.isArray(item)); } /** * Merges the given objects deeply. * @param {Object} target The target object to merge into. * @param {...Object} sources The source objects to merge. * @returns {Object} The merged object. * @see https://stackoverflow.com/questions/27936772 */ static #mergeDeep(target, ...sources) { // TODO: handle rules conflict (e.g. "path" and "path/") if (!sources.length) return target; const source = sources.shift(); if (Purlfy.#isObject(target) && Purlfy.#isObject(source)) { for (const key in source) { if (Purlfy.#isObject(source[key])) { if (!target[key]) Object.assign(target, { [key]: {} }); Purlfy.#mergeDeep(target[key], source[key]); } else { Object.assign(target, { [key]: source[key] }); } } } return Purlfy.#mergeDeep(target, ...sources); } /** * Applies the given acts to the given input. * @param {string} input The input to apply the acts to. * @param {string[]} acts The acts to apply. * @param {Function} logFunc The logger function. * @returns {string | null} The result of applying the given acts to the given input. */ static #applyActs(input, acts, logFunc) { let dest = input; for (const cmd of (acts)) { const name = cmd.split(":")[0]; const arg = cmd.slice(name.length + 1); const act = Purlfy.#acts[name]; if (!act) { logFunc("Invalid act:", cmd); dest = null; break; } try { dest = act(dest, arg); } catch (e) { logFunc(`Error processing input with act "${name}":`, e); dest = null; break; } } if (typeof dest === "string" || dest instanceof URL) { return dest.toString(); } else { return null; } } // Instance methods /** * Clears the statistics. * @returns {void} */ clearStatistics() { const increment = {}; for (const [key, value] of Object.entries(this.#statistics)) { increment[key] = -value; } this.#incrementStatistics(increment); } /** * Clears the rules. * @returns {void} */ clearRules() { this.#rules = {}; } /** * Gets the statistics. * @returns {Object} The statistics. */ getStatistics() { return { ...this.#statistics }; } /** * Imports the given rules. * @param {...Object} rulesets The rulesets to import. * @returns {void} */ importRules(...rulesets) { Purlfy.#mergeDeep(this.#rules, ...rulesets); } /** * Checks if the given rule is valid. * @param {Object} rule The rule to check. * @returns {boolean} Whether the given rule is valid. */ #validRule(rule) { if (!rule || !rule.mode || !rule.description || !rule.author) return false; if ((rule.acts ?? []).includes("dom") && !Purlfy.#domParser) return false; // Feature detection for DOMParser switch (rule.mode) { case "white": return Array.isArray(rule.params); case "black": return Array.isArray(rule.params) && Purlfy.#udfOrType(rule.std, "boolean"); case "param": return Array.isArray(rule.params) && (rule.acts === undefined || Array.isArray(rule.acts)) && Purlfy.#udfOrType(rule.continue, "boolean"); case "regex": return Array.isArray(rule.regex) && Array.isArray(rule.replace) && Purlfy.#udfOrType(rule.continue, "boolean") && rule.regex.length === rule.replace.length; case "redirect": return this.fetchEnabled && Purlfy.#udfOrType(rule.ua, "string") && Purlfy.#udfOrType(rule.headers, "object") && Purlfy.#udfOrType(rule.continue, "boolean"); case "visit": return this.fetchEnabled && Purlfy.#udfOrType(rule.ua, "string") && Purlfy.#udfOrType(rule.headers, "object") && (rule.acts === undefined || Array.isArray(rule.acts)) && Purlfy.#udfOrType(rule.continue, "boolean"); case "lambda": return this.lambdaEnabled && (typeof rule.lambda === "string" || rule.lambda instanceof Purlfy.#AsyncFunction) && Purlfy.#udfOrType(rule.continue, "boolean"); default: return false; } } /** * Iteratively matches the longest rule for the given URL parts. * @param {string[]} parts The URL parts to match. * @returns {Object|null} The matched rule. */ #matchRule(parts) { let fallbackRule = null; // Most precise fallback rule let currentRules = this.#rules; for (const part of parts) { if (currentRules.hasOwnProperty("")) { fallbackRule = currentRules[""]; } if (currentRules.hasOwnProperty(part + "/")) { currentRules = currentRules[part + "/"]; // Exact match - continue to the next level } else if (currentRules.hasOwnProperty(part)) { const rule = currentRules[part]; if (this.#validRule(rule)) { return rule; // Exact match found } } else { // No exact match found, try to match with regex let found = false; // Iterate through current rules to match RegExp for (const [key, val] of Object.entries(currentRules)) { if (!key.startsWith("/")) continue; // Skip non-RegExp keys try { const sub = key.endsWith("/"); // Has sub-rules const regexStr = sub ? key.slice(1, -1) : key.slice(1); if (regexStr === "") continue; // Skip empty regex const regex = new RegExp(regexStr); if (regex.test(part)) { // Regex matches if (!sub && this.#validRule(val)) { return val; // Regex match found } else if (sub) { currentRules = val; // Continue to the next level found = true; break; } } } catch (e) { this.#log("Invalid regex:", key.slice(1)); } } if (!found) break; // No matching rule found } } if (currentRules.hasOwnProperty("")) { // Fallback rule fallbackRule = currentRules[""]; } if (this.#validRule(fallbackRule)) { return fallbackRule; } return null; } /** * Increments the statistics. * @param {Object} increment The incremental statistics. * @returns {void} */ #incrementStatistics(increment) { for (const [key, value] of Object.entries(increment)) { this.#statistics[key] += value; } if (typeof CustomEvent === "function") { this.dispatchEvent(new CustomEvent("statisticschange", { detail: increment })); } else { this.dispatchEvent(new Event("statisticschange")); } } /** * Applies the given rule to the given URL object. * @param {URL} urlObj The URL object to apply the rule to. * @param {Object} rule The rule to apply. * @param {Function} logFunc The logger function. * @returns {Promise<[URL, boolean, Object]>} The new URL object, whether to continue and the mode-specific incremental statistics. */ async #applyRule(urlObj, rule, logFunc) { const mode = rule.mode; const increment = { ...Purlfy.#zeroStatistics }; // Incremental statistics const lengthBefore = urlObj.href.length; const paramsCntBefore = urlObj.searchParams.size; let shallContinue = false; switch (mode) { // Purifies `urlObj` based on the rule case "white": { // Whitelist mode const newParams = new URLSearchParams(); for (const param of rule.params) { if (urlObj.searchParams.has(param)) { newParams.set(param, urlObj.searchParams.get(param)); } } urlObj.search = newParams.toString(); break; } case "black": { // Blacklist mode if (!rule.std && !Purlfy.#isStandard(urlObj)) { logFunc("Non-standard URL search string:", urlObj.search); break; } for (const param of rule.params) { urlObj.searchParams.delete(param); } urlObj.search = urlObj.searchParams.toString(); break; } case "param": { // Specific param mode // Process given parameter to be used as a new URL let paramValue = null; for (const param of rule.params) { // Find the first available parameter value if (urlObj.searchParams.has(param)) { paramValue = urlObj.searchParams.get(param); break; } } if (!paramValue) { logFunc("Parameter(s) not found:", rule.params.join(", ")); break; } const dest = Purlfy.#applyActs(paramValue, rule.acts ?? ["url"], logFunc); if (dest && URL.canParse(dest, urlObj.href)) { // Valid URL urlObj = new URL(dest, urlObj.href); } else { // Invalid URL logFunc("Invalid URL:", dest); break; } shallContinue = rule.continue ?? true; increment.decoded++; break; } case "regex": { // Regex mode let newUrl = urlObj.href; for (let i = 0; i < rule.regex.length; i++) { const regex = new RegExp(rule.regex[i], "g"); const replace = rule.replace[i]; newUrl = newUrl.replaceAll(regex, replace); } newUrl = Purlfy.#applyActs(newUrl, rule.acts ?? [], logFunc); if (newUrl && URL.canParse(newUrl, urlObj.href)) { // Valid URL urlObj = new URL(newUrl, urlObj.href); } else { // Invalid URL logFunc("Invalid URL:", newUrl); break; } shallContinue = rule.continue ?? true; break; } case "redirect": { // Redirect mode if (!this.fetchEnabled) { logFunc("Redirect mode is disabled."); break; } const options = { method: "HEAD", redirect: "manual", headers: rule.headers ?? {} }; if (rule.ua) { options.headers["User-Agent"] = rule.ua; } let dest = null; try { const r = await this.#fetch(urlObj.href, options); if (r.status >= 300 && r.status < 400 && r.headers.has("location")) { dest = r.headers.get("location"); } else if (r.url !== urlObj.href) { dest = r.url; // In case `redirect: manual` doesn't work } } catch (e) { logFunc("Error following redirect:", e); break; } if (dest && URL.canParse(dest, urlObj.href)) { const prevUrl = urlObj.href; urlObj = new URL(dest, urlObj.href); if (urlObj.href === prevUrl) { // No redirection logFunc("No redirection made."); break; } shallContinue = rule.continue ?? true; increment.redirected++; } else { logFunc("Invalid redirect destination:", dest); } break; } case "visit": { // Visit mode if (!this.fetchEnabled) { logFunc("Visit mode is disabled."); break; } const options = { method: "GET", redirect: "manual", headers: rule.headers ?? {} }; if (rule.ua) { options.headers["User-Agent"] = rule.ua; } let r, html = null; try { r = await this.#fetch(urlObj.href, options); html = await r.text(); } catch (e) { logFunc("Error visiting URL:", e); break; } if (r.status >= 300 && r.status < 400 && r.headers.has("location")) { logFunc("Visit mode, but got redirected to:", r.url); urlObj = new URL(r.headers.get("location"), urlObj.href); } else if (r.url !== urlObj.href) { // In case `redirect: manual` doesn't work logFunc("Visit mode, but got redirected to:", r.url); urlObj = new URL(r.url, urlObj.href); } else { const dest = Purlfy.#applyActs(html, rule.acts?.length ? rule.acts : [String.raw`regex:https?:\/\/.(?:www\.)?[-a-zA-Z0-9@%._\+~#=]{2,256}\.[a-z]{2,6}\b(?:[-a-zA-Z0-9@:%_\+.~#?!&\/\/=]*)`], logFunc); if (dest && URL.canParse(dest, urlObj.href)) { // Valid URL urlObj = new URL(dest, urlObj.href); } else { // Invalid URL logFunc("Invalid URL:", dest); break; } } shallContinue = rule.continue ?? true; increment.visited++; break; } case "lambda": { if (!this.lambdaEnabled) { logFunc("Lambda mode is disabled."); break; } try { const lambda = typeof rule.lambda === "string" ? new Purlfy.#AsyncFunction("url", rule.lambda) : rule.lambda; rule.lambda = lambda; // "Cache" the compiled lambda function urlObj = await lambda(urlObj); shallContinue = rule.continue ?? true; } catch (e) { logFunc("Error executing lambda:", e); } break; } default: { logFunc("Invalid mode:", mode); break; } } const paramsCntAfter = urlObj.searchParams.size; increment.param += (["white", "black"].includes(mode)) ? (paramsCntBefore - paramsCntAfter) : 0; increment.char += Math.max(lengthBefore - urlObj.href.length, 0); // Prevent negative char count return [urlObj, shallContinue, increment]; } /** * Purifies the given URL based on the rules. * @param {string} originalUrl The original URL to purify. * @returns {Promise<Object>} The purified URL and the rule applied. */ async purify(originalUrl) { let increment = { ...Purlfy.#zeroStatistics }; // Incremental statistics of a single purification let shallContinue = true; let firstRule = null; let iteration = 0; let urlObj; this.#log("Purifying URL:", originalUrl); const optionalLocation = typeof location !== 'undefined' ? location.href : undefined; if (originalUrl && URL.canParse(originalUrl, optionalLocation)) { urlObj = new URL(originalUrl, optionalLocation); } else { this.#log(`Cannot parse URL ${originalUrl}`); return { url: originalUrl, rule: "N/A" } } while (shallContinue && iteration++ < this.maxIterations) { const logi = (...args) => this.#log(`[#${iteration}]`, ...args); const protocol = urlObj.protocol; if (protocol !== "http:" && protocol !== "https:") { // Not a valid HTTP URL logi(`Not a HTTP URL: ${urlObj.href}`); break; } const hostAndPath = urlObj.host + urlObj.pathname; const parts = hostAndPath.split("/").filter(part => part !== ""); const rule = this.#matchRule(parts); if (!rule) { // No matching rule found logi(`No matching rule found for ${urlObj.href}.`); break; } firstRule ??= rule; logi(`Matching rule: ${rule.description} by ${rule.author}`); let singleIncrement; // Incremental statistics for the current iteration [urlObj, shallContinue, singleIncrement] = await this.#applyRule(urlObj, rule, logi); for (const [key, value] of Object.entries(singleIncrement)) { increment[key] += value; } logi("Purified URL:", urlObj.href); } if (firstRule && originalUrl !== urlObj.href) { // Increment statistics only if a rule was applied and URL has been changed increment.url++; this.#incrementStatistics(increment); } return { url: urlObj.href, rule: firstRule ? `${firstRule.description} by ${firstRule.author}` : "N/A" }; } } if (typeof module !== "undefined" && module.exports) { module.exports = Purlfy; // Export for Node.js } else { this.Purlfy = Purlfy; // Export for browser }