pURLfy

The ultimate URL purifier

此腳本不應該直接安裝,它是一個供其他腳本使用的函式庫。欲使用本函式庫,請在腳本 metadata 寫上: // @require https://update.cn-greasyfork.org/scripts/492078/1499254/pURLfy.js

  1. class Purlfy extends EventTarget {
  2. // Static properties
  3. /**
  4. * Returns the version of the library.
  5. * @returns {string} The version of the library.
  6. */
  7. static get version() {
  8. return "0.3.11";
  9. };
  10. /**
  11. * A TextDecoder object used internally.
  12. * @type {TextDecoder}
  13. */
  14. static #decoder = new TextDecoder();
  15. /**
  16. * A DOMParser object used internally.
  17. * @type {DOMParser | null}
  18. */
  19. static #domParser = typeof DOMParser !== "undefined" ? new DOMParser() : null;
  20. /**
  21. * The constructor of the AsyncFunction class.
  22. * @type {Function}
  23. */
  24. static #AsyncFunction = async function () { }.constructor;
  25. /**
  26. * The initial statistics object. (All values are 0)
  27. * @type {Object}
  28. */
  29. static #zeroStatistics = {
  30. url: 0,
  31. param: 0,
  32. decoded: 0,
  33. redirected: 0,
  34. visited: 0,
  35. char: 0
  36. };
  37. /**
  38. * The default acts for URL purification.
  39. * @type {Object}
  40. */
  41. static #acts = {
  42. url: decodeURIComponent,
  43. base64: s => { // https://developer.mozilla.org/en-US/docs/Web/API/Window/btoa#unicode_strings
  44. s = s.replaceAll('_', '/').replaceAll('-', '+');
  45. const bytes = Uint8Array.from(atob(s), (m) => m.codePointAt(0));
  46. return Purlfy.#decoder.decode(bytes);
  47. },
  48. slice: (s, startEnd) => {
  49. const [start, end] = startEnd.split(":");
  50. return s.slice(parseInt(start), end ? parseInt(end) : undefined)
  51. },
  52. regex: (s, regex) => {
  53. const r = new RegExp(regex);
  54. const m = s.match(r);
  55. return m ? m[0] : "";
  56. },
  57. dom: (s) => Purlfy.#domParser.parseFromString(s, "text/html"),
  58. sel: (s, selector) => s.querySelector(selector),
  59. attr: (e, attr) => e.getAttribute(attr),
  60. text: (e) => e.textContent,
  61. };
  62. // Instance properties
  63. /**
  64. * Whether to enable the fetch mode.
  65. * @type {boolean}
  66. */
  67. fetchEnabled = false;
  68. /**
  69. * Whether to enable the lambda mode.
  70. * @type {boolean}
  71. */
  72. lambdaEnabled = false;
  73. /**
  74. * The maximum number of iterations for purification.
  75. * @type {number}
  76. */
  77. maxIterations = 5;
  78. /**
  79. * The logger function.
  80. * @type {Function}
  81. */
  82. #log = console.log.bind(console, "\x1b[38;2;220;20;60m[pURLfy]\x1b[0m");
  83. /**
  84. * The fetch function.
  85. * @type {Function}
  86. */
  87. #fetch = fetch.bind(globalThis);
  88. /**
  89. * The statistics object.
  90. * @type {Object}
  91. */
  92. #statistics = { ...Purlfy.#zeroStatistics };
  93. /**
  94. * The rules object.
  95. * @type {Object}
  96. */
  97. #rules = {};
  98.  
  99. /**
  100. * Creates a new instance of the Purlfy class.
  101. * @param {Object} [options] The options for the instance.
  102. * @param {boolean} [options.fetchEnabled] Whether to enable the fetch mode.
  103. * @param {boolean} [options.lambdaEnabled] Whether to enable the lambda mode.
  104. * @param {number} [options.maxIterations] The maximum number of iterations for purification.
  105. * @param {Object} [options.statistics] The statistics object.
  106. * @param {Function} [options.log] The logger function.
  107. * @param {Function} [options.fetch] The fetch function.
  108. */
  109. constructor(options) {
  110. super();
  111. this.fetchEnabled = options?.fetchEnabled ?? this.fetchEnabled;
  112. this.lambdaEnabled = options?.lambdaEnabled ?? this.lambdaEnabled;
  113. this.maxIterations = options?.maxIterations ?? this.maxIterations;
  114. Object.assign(this.#statistics, options?.statistics);
  115. this.#log = options?.log ?? this.#log;
  116. this.#fetch = options?.fetch ?? this.#fetch;
  117. }
  118.  
  119. // Static methods
  120. /**
  121. * Checks if the given value is of the given type or undefined.
  122. * @param {*} value The value to check.
  123. * @param {string} type The type to check.
  124. * @returns {boolean} Whether the given value is of the given type or undefined.
  125. */
  126. static #udfOrType(value, type) {
  127. return value === undefined || typeof value === type;
  128. }
  129.  
  130. /**
  131. * Checks if the given URL object's search string follows the standard format.
  132. * @param {URL} urlObj The URL object to check.
  133. * @returns {boolean} Whether the given URL object's search string follows the standard format.
  134. */
  135. static #isStandard(urlObj) {
  136. return urlObj.searchParams.toString() === urlObj.search.slice(1);
  137. }
  138.  
  139. /**
  140. * Checks if the given item is an object.
  141. * @param {*} item The item to check.
  142. * @returns {boolean} Whether the given item is an object.
  143. * @see https://stackoverflow.com/questions/27936772
  144. */
  145. static #isObject(item) {
  146. return (item && typeof item === 'object' && !Array.isArray(item));
  147. }
  148.  
  149. /**
  150. * Merges the given objects deeply.
  151. * @param {Object} target The target object to merge into.
  152. * @param {...Object} sources The source objects to merge.
  153. * @returns {Object} The merged object.
  154. * @see https://stackoverflow.com/questions/27936772
  155. */
  156. static #mergeDeep(target, ...sources) { // TODO: handle rules conflict (e.g. "path" and "path/")
  157. if (!sources.length) return target;
  158. const source = sources.shift();
  159. if (Purlfy.#isObject(target) && Purlfy.#isObject(source)) {
  160. for (const key in source) {
  161. if (Purlfy.#isObject(source[key])) {
  162. if (!target[key]) Object.assign(target, { [key]: {} });
  163. Purlfy.#mergeDeep(target[key], source[key]);
  164. } else {
  165. Object.assign(target, { [key]: source[key] });
  166. }
  167. }
  168. }
  169. return Purlfy.#mergeDeep(target, ...sources);
  170. }
  171.  
  172. /**
  173. * Applies the given acts to the given input.
  174. * @param {string} input The input to apply the acts to.
  175. * @param {string[]} acts The acts to apply.
  176. * @param {Function} logFunc The logger function.
  177. * @returns {string | null} The result of applying the given acts to the given input.
  178. */
  179. static #applyActs(input, acts, logFunc) {
  180. let dest = input;
  181. for (const cmd of (acts)) {
  182. const name = cmd.split(":")[0];
  183. const arg = cmd.slice(name.length + 1);
  184. const act = Purlfy.#acts[name];
  185. if (!act) {
  186. logFunc("Invalid act:", cmd);
  187. dest = null;
  188. break;
  189. }
  190. try {
  191. dest = act(dest, arg);
  192. } catch (e) {
  193. logFunc(`Error processing input with act "${name}":`, e);
  194. dest = null;
  195. break;
  196. }
  197. }
  198. if (typeof dest === "string" || dest instanceof URL) {
  199. return dest.toString();
  200. } else {
  201. return null;
  202. }
  203. }
  204.  
  205. // Instance methods
  206. /**
  207. * Clears the statistics.
  208. * @returns {void}
  209. */
  210. clearStatistics() {
  211. const increment = {};
  212. for (const [key, value] of Object.entries(this.#statistics)) {
  213. increment[key] = -value;
  214. }
  215. this.#incrementStatistics(increment);
  216. }
  217.  
  218. /**
  219. * Clears the rules.
  220. * @returns {void}
  221. */
  222. clearRules() {
  223. this.#rules = {};
  224. }
  225.  
  226. /**
  227. * Gets the statistics.
  228. * @returns {Object} The statistics.
  229. */
  230. getStatistics() {
  231. return { ...this.#statistics };
  232. }
  233.  
  234. /**
  235. * Imports the given rules.
  236. * @param {...Object} rulesets The rulesets to import.
  237. * @returns {void}
  238. */
  239. importRules(...rulesets) {
  240. Purlfy.#mergeDeep(this.#rules, ...rulesets);
  241. }
  242.  
  243. /**
  244. * Checks if the given rule is valid.
  245. * @param {Object} rule The rule to check.
  246. * @returns {boolean} Whether the given rule is valid.
  247. */
  248. #validRule(rule) {
  249. if (!rule || !rule.mode || !rule.description || !rule.author) return false;
  250. if ((rule.acts ?? []).includes("dom") && !Purlfy.#domParser) return false; // Feature detection for DOMParser
  251. switch (rule.mode) {
  252. case "white":
  253. return Array.isArray(rule.params);
  254. case "black":
  255. return Array.isArray(rule.params) && Purlfy.#udfOrType(rule.std, "boolean");
  256. case "param":
  257. return Array.isArray(rule.params) && (rule.acts === undefined || Array.isArray(rule.acts)) && Purlfy.#udfOrType(rule.continue, "boolean");
  258. case "regex":
  259. return Array.isArray(rule.regex) && Array.isArray(rule.replace) && Purlfy.#udfOrType(rule.continue, "boolean") && rule.regex.length === rule.replace.length;
  260. case "redirect":
  261. return this.fetchEnabled && Purlfy.#udfOrType(rule.ua, "string") && Purlfy.#udfOrType(rule.headers, "object") && Purlfy.#udfOrType(rule.continue, "boolean");
  262. case "visit":
  263. return this.fetchEnabled && Purlfy.#udfOrType(rule.ua, "string") && Purlfy.#udfOrType(rule.headers, "object") && (rule.acts === undefined || Array.isArray(rule.acts)) && Purlfy.#udfOrType(rule.continue, "boolean");
  264. case "lambda":
  265. return this.lambdaEnabled && (typeof rule.lambda === "string" || rule.lambda instanceof Purlfy.#AsyncFunction) && Purlfy.#udfOrType(rule.continue, "boolean");
  266. default:
  267. return false;
  268. }
  269. }
  270.  
  271. /**
  272. * Iteratively matches the longest rule for the given URL parts.
  273. * @param {string[]} parts The URL parts to match.
  274. * @returns {Object|null} The matched rule.
  275. */
  276. #matchRule(parts) {
  277. let fallbackRule = null; // Most precise fallback rule
  278. let currentRules = this.#rules;
  279. for (const part of parts) {
  280. if (currentRules.hasOwnProperty("")) {
  281. fallbackRule = currentRules[""];
  282. }
  283. if (currentRules.hasOwnProperty(part + "/")) {
  284. currentRules = currentRules[part + "/"]; // Exact match - continue to the next level
  285. } else if (currentRules.hasOwnProperty(part)) {
  286. const rule = currentRules[part];
  287. if (this.#validRule(rule)) {
  288. return rule; // Exact match found
  289. }
  290. } else { // No exact match found, try to match with regex
  291. let found = false;
  292. // Iterate through current rules to match RegExp
  293. for (const [key, val] of Object.entries(currentRules)) {
  294. if (!key.startsWith("/")) continue; // Skip non-RegExp keys
  295. try {
  296. const sub = key.endsWith("/"); // Has sub-rules
  297. const regexStr = sub ? key.slice(1, -1) : key.slice(1);
  298. if (regexStr === "") continue; // Skip empty regex
  299. const regex = new RegExp(regexStr);
  300. if (regex.test(part)) { // Regex matches
  301. if (!sub && this.#validRule(val)) {
  302. return val; // Regex match found
  303. } else if (sub) {
  304. currentRules = val; // Continue to the next level
  305. found = true;
  306. break;
  307. }
  308. }
  309. } catch (e) {
  310. this.#log("Invalid regex:", key.slice(1));
  311. }
  312. }
  313. if (!found) break; // No matching rule found
  314. }
  315. }
  316. if (currentRules.hasOwnProperty("")) { // Fallback rule
  317. fallbackRule = currentRules[""];
  318. }
  319. if (this.#validRule(fallbackRule)) {
  320. return fallbackRule;
  321. }
  322. return null;
  323. }
  324.  
  325. /**
  326. * Increments the statistics.
  327. * @param {Object} increment The incremental statistics.
  328. * @returns {void}
  329. */
  330. #incrementStatistics(increment) {
  331. for (const [key, value] of Object.entries(increment)) {
  332. this.#statistics[key] += value;
  333. }
  334. if (typeof CustomEvent === "function") {
  335. this.dispatchEvent(new CustomEvent("statisticschange", {
  336. detail: increment
  337. }));
  338. } else {
  339. this.dispatchEvent(new Event("statisticschange"));
  340. }
  341. }
  342.  
  343. /**
  344. * Applies the given rule to the given URL object.
  345. * @param {URL} urlObj The URL object to apply the rule to.
  346. * @param {Object} rule The rule to apply.
  347. * @param {Function} logFunc The logger function.
  348. * @returns {Promise<[URL, boolean, Object]>} The new URL object, whether to continue and the mode-specific incremental statistics.
  349. */
  350. async #applyRule(urlObj, rule, logFunc) {
  351. const mode = rule.mode;
  352. const increment = { ...Purlfy.#zeroStatistics }; // Incremental statistics
  353. const lengthBefore = urlObj.href.length;
  354. const paramsCntBefore = urlObj.searchParams.size;
  355. let shallContinue = false;
  356. switch (mode) { // Purifies `urlObj` based on the rule
  357. case "white": { // Whitelist mode
  358. const newParams = new URLSearchParams();
  359. for (const param of rule.params) {
  360. if (urlObj.searchParams.has(param)) {
  361. newParams.set(param, urlObj.searchParams.get(param));
  362. }
  363. }
  364. urlObj.search = newParams.toString();
  365. break;
  366. }
  367. case "black": { // Blacklist mode
  368. if (!rule.std && !Purlfy.#isStandard(urlObj)) {
  369. logFunc("Non-standard URL search string:", urlObj.search);
  370. break;
  371. }
  372. for (const param of rule.params) {
  373. urlObj.searchParams.delete(param);
  374. }
  375. urlObj.search = urlObj.searchParams.toString();
  376. break;
  377. }
  378. case "param": { // Specific param mode
  379. // Process given parameter to be used as a new URL
  380. let paramValue = null;
  381. for (const param of rule.params) { // Find the first available parameter value
  382. if (urlObj.searchParams.has(param)) {
  383. paramValue = urlObj.searchParams.get(param);
  384. break;
  385. }
  386. }
  387. if (!paramValue) {
  388. logFunc("Parameter(s) not found:", rule.params.join(", "));
  389. break;
  390. }
  391. const dest = Purlfy.#applyActs(paramValue, rule.acts ?? ["url"], logFunc);
  392. if (dest && URL.canParse(dest, urlObj.href)) { // Valid URL
  393. urlObj = new URL(dest, urlObj.href);
  394. } else { // Invalid URL
  395. logFunc("Invalid URL:", dest);
  396. break;
  397. }
  398. shallContinue = rule.continue ?? true;
  399. increment.decoded++;
  400. break;
  401. }
  402. case "regex": { // Regex mode
  403. let newUrl = urlObj.href;
  404. for (let i = 0; i < rule.regex.length; i++) {
  405. const regex = new RegExp(rule.regex[i], "g");
  406. const replace = rule.replace[i];
  407. newUrl = newUrl.replaceAll(regex, replace);
  408. }
  409. newUrl = Purlfy.#applyActs(newUrl, rule.acts ?? [], logFunc);
  410. if (newUrl && URL.canParse(newUrl, urlObj.href)) { // Valid URL
  411. urlObj = new URL(newUrl, urlObj.href);
  412. } else { // Invalid URL
  413. logFunc("Invalid URL:", newUrl);
  414. break;
  415. }
  416. shallContinue = rule.continue ?? true;
  417. break;
  418. }
  419. case "redirect": { // Redirect mode
  420. if (!this.fetchEnabled) {
  421. logFunc("Redirect mode is disabled.");
  422. break;
  423. }
  424. const options = {
  425. method: "HEAD",
  426. redirect: "manual",
  427. headers: rule.headers ?? {}
  428. };
  429. if (rule.ua) {
  430. options.headers["User-Agent"] = rule.ua;
  431. }
  432. let dest = null;
  433. try {
  434. const r = await this.#fetch(urlObj.href, options);
  435. if (r.status >= 300 && r.status < 400 && r.headers.has("location")) {
  436. dest = r.headers.get("location");
  437. } else if (r.url !== urlObj.href) {
  438. dest = r.url; // In case `redirect: manual` doesn't work
  439. }
  440. } catch (e) {
  441. logFunc("Error following redirect:", e);
  442. break;
  443. }
  444. if (dest && URL.canParse(dest, urlObj.href)) {
  445. const prevUrl = urlObj.href;
  446. urlObj = new URL(dest, urlObj.href);
  447. if (urlObj.href === prevUrl) { // No redirection
  448. logFunc("No redirection made.");
  449. break;
  450. }
  451. shallContinue = rule.continue ?? true;
  452. increment.redirected++;
  453. } else {
  454. logFunc("Invalid redirect destination:", dest);
  455. }
  456. break;
  457. }
  458. case "visit": { // Visit mode
  459. if (!this.fetchEnabled) {
  460. logFunc("Visit mode is disabled.");
  461. break;
  462. }
  463. const options = {
  464. method: "GET",
  465. redirect: "manual",
  466. headers: rule.headers ?? {}
  467. };
  468. if (rule.ua) {
  469. options.headers["User-Agent"] = rule.ua;
  470. }
  471. let r, html = null;
  472. try {
  473. r = await this.#fetch(urlObj.href, options);
  474. html = await r.text();
  475. } catch (e) {
  476. logFunc("Error visiting URL:", e);
  477. break;
  478. }
  479. if (r.status >= 300 && r.status < 400 && r.headers.has("location")) {
  480. logFunc("Visit mode, but got redirected to:", r.url);
  481. urlObj = new URL(r.headers.get("location"), urlObj.href);
  482. } else if (r.url !== urlObj.href) { // In case `redirect: manual` doesn't work
  483. logFunc("Visit mode, but got redirected to:", r.url);
  484. urlObj = new URL(r.url, urlObj.href);
  485. } else {
  486. const dest = Purlfy.#applyActs(html, rule.acts?.length ? rule.acts : [String.raw`regex:https?:\/\/.(?:www\.)?[-a-zA-Z0-9@%._\+~#=]{2,256}\.[a-z]{2,6}\b(?:[-a-zA-Z0-9@:%_\+.~#?!&\/\/=]*)`], logFunc);
  487. if (dest && URL.canParse(dest, urlObj.href)) { // Valid URL
  488. urlObj = new URL(dest, urlObj.href);
  489. } else { // Invalid URL
  490. logFunc("Invalid URL:", dest);
  491. break;
  492. }
  493. }
  494. shallContinue = rule.continue ?? true;
  495. increment.visited++;
  496. break;
  497. }
  498. case "lambda": {
  499. if (!this.lambdaEnabled) {
  500. logFunc("Lambda mode is disabled.");
  501. break;
  502. }
  503. try {
  504. const lambda = typeof rule.lambda === "string" ? new Purlfy.#AsyncFunction("url", rule.lambda) : rule.lambda;
  505. rule.lambda = lambda; // "Cache" the compiled lambda function
  506. urlObj = await lambda(urlObj);
  507. shallContinue = rule.continue ?? true;
  508. } catch (e) {
  509. logFunc("Error executing lambda:", e);
  510. }
  511. break;
  512. }
  513. default: {
  514. logFunc("Invalid mode:", mode);
  515. break;
  516. }
  517. }
  518. const paramsCntAfter = urlObj.searchParams.size;
  519. increment.param += (["white", "black"].includes(mode)) ? (paramsCntBefore - paramsCntAfter) : 0;
  520. increment.char += Math.max(lengthBefore - urlObj.href.length, 0); // Prevent negative char count
  521. return [urlObj, shallContinue, increment];
  522. }
  523.  
  524. /**
  525. * Purifies the given URL based on the rules.
  526. * @param {string} originalUrl The original URL to purify.
  527. * @returns {Promise<Object>} The purified URL and the rule applied.
  528. */
  529. async purify(originalUrl) {
  530. let increment = { ...Purlfy.#zeroStatistics }; // Incremental statistics of a single purification
  531. let shallContinue = true;
  532. let firstRule = null;
  533. let iteration = 0;
  534. let urlObj;
  535. this.#log("Purifying URL:", originalUrl);
  536. const optionalLocation = typeof location !== 'undefined' ? location.href : undefined;
  537. if (originalUrl && URL.canParse(originalUrl, optionalLocation)) {
  538. urlObj = new URL(originalUrl, optionalLocation);
  539. } else {
  540. this.#log(`Cannot parse URL ${originalUrl}`);
  541. return {
  542. url: originalUrl,
  543. rule: "N/A"
  544. }
  545. }
  546. while (shallContinue && iteration++ < this.maxIterations) {
  547. const logi = (...args) => this.#log(`[#${iteration}]`, ...args);
  548. const protocol = urlObj.protocol;
  549. if (protocol !== "http:" && protocol !== "https:") { // Not a valid HTTP URL
  550. logi(`Not a HTTP URL: ${urlObj.href}`);
  551. break;
  552. }
  553. const hostAndPath = urlObj.host + urlObj.pathname;
  554. const parts = hostAndPath.split("/").filter(part => part !== "");
  555. const rule = this.#matchRule(parts);
  556. if (!rule) { // No matching rule found
  557. logi(`No matching rule found for ${urlObj.href}.`);
  558. break;
  559. }
  560. firstRule ??= rule;
  561. logi(`Matching rule: ${rule.description} by ${rule.author}`);
  562. let singleIncrement; // Incremental statistics for the current iteration
  563. [urlObj, shallContinue, singleIncrement] = await this.#applyRule(urlObj, rule, logi);
  564. for (const [key, value] of Object.entries(singleIncrement)) {
  565. increment[key] += value;
  566. }
  567. logi("Purified URL:", urlObj.href);
  568. }
  569. if (firstRule && originalUrl !== urlObj.href) { // Increment statistics only if a rule was applied and URL has been changed
  570. increment.url++;
  571. this.#incrementStatistics(increment);
  572. }
  573. return {
  574. url: urlObj.href,
  575. rule: firstRule ? `${firstRule.description} by ${firstRule.author}` : "N/A"
  576. };
  577. }
  578. }
  579.  
  580. if (typeof module !== "undefined" && module.exports) {
  581. module.exports = Purlfy; // Export for Node.js
  582. } else {
  583. this.Purlfy = Purlfy; // Export for browser
  584. }