- class Purlfy extends EventTarget {
- // Static properties
- /**
- * Returns the version of the library.
- * @returns {string} The version of the library.
- */
- static get version() {
- return "0.3.11";
- };
- /**
- * A TextDecoder object used internally.
- * @type {TextDecoder}
- */
- static #decoder = new TextDecoder();
- /**
- * A DOMParser object used internally.
- * @type {DOMParser | null}
- */
- static #domParser = typeof DOMParser !== "undefined" ? new DOMParser() : null;
- /**
- * The constructor of the AsyncFunction class.
- * @type {Function}
- */
- static #AsyncFunction = async function () { }.constructor;
- /**
- * The initial statistics object. (All values are 0)
- * @type {Object}
- */
- static #zeroStatistics = {
- url: 0,
- param: 0,
- decoded: 0,
- redirected: 0,
- visited: 0,
- char: 0
- };
- /**
- * The default acts for URL purification.
- * @type {Object}
- */
- static #acts = {
- url: decodeURIComponent,
- base64: s => { // https://developer.mozilla.org/en-US/docs/Web/API/Window/btoa#unicode_strings
- s = s.replaceAll('_', '/').replaceAll('-', '+');
- const bytes = Uint8Array.from(atob(s), (m) => m.codePointAt(0));
- return Purlfy.#decoder.decode(bytes);
- },
- slice: (s, startEnd) => {
- const [start, end] = startEnd.split(":");
- return s.slice(parseInt(start), end ? parseInt(end) : undefined)
- },
- regex: (s, regex) => {
- const r = new RegExp(regex);
- const m = s.match(r);
- return m ? m[0] : "";
- },
- dom: (s) => Purlfy.#domParser.parseFromString(s, "text/html"),
- sel: (s, selector) => s.querySelector(selector),
- attr: (e, attr) => e.getAttribute(attr),
- text: (e) => e.textContent,
- };
- // Instance properties
- /**
- * Whether to enable the fetch mode.
- * @type {boolean}
- */
- fetchEnabled = false;
- /**
- * Whether to enable the lambda mode.
- * @type {boolean}
- */
- lambdaEnabled = false;
- /**
- * The maximum number of iterations for purification.
- * @type {number}
- */
- maxIterations = 5;
- /**
- * The logger function.
- * @type {Function}
- */
- #log = console.log.bind(console, "\x1b[38;2;220;20;60m[pURLfy]\x1b[0m");
- /**
- * The fetch function.
- * @type {Function}
- */
- #fetch = fetch.bind(globalThis);
- /**
- * The statistics object.
- * @type {Object}
- */
- #statistics = { ...Purlfy.#zeroStatistics };
- /**
- * The rules object.
- * @type {Object}
- */
- #rules = {};
-
- /**
- * Creates a new instance of the Purlfy class.
- * @param {Object} [options] The options for the instance.
- * @param {boolean} [options.fetchEnabled] Whether to enable the fetch mode.
- * @param {boolean} [options.lambdaEnabled] Whether to enable the lambda mode.
- * @param {number} [options.maxIterations] The maximum number of iterations for purification.
- * @param {Object} [options.statistics] The statistics object.
- * @param {Function} [options.log] The logger function.
- * @param {Function} [options.fetch] The fetch function.
- */
- constructor(options) {
- super();
- this.fetchEnabled = options?.fetchEnabled ?? this.fetchEnabled;
- this.lambdaEnabled = options?.lambdaEnabled ?? this.lambdaEnabled;
- this.maxIterations = options?.maxIterations ?? this.maxIterations;
- Object.assign(this.#statistics, options?.statistics);
- this.#log = options?.log ?? this.#log;
- this.#fetch = options?.fetch ?? this.#fetch;
- }
-
- // Static methods
- /**
- * Checks if the given value is of the given type or undefined.
- * @param {*} value The value to check.
- * @param {string} type The type to check.
- * @returns {boolean} Whether the given value is of the given type or undefined.
- */
- static #udfOrType(value, type) {
- return value === undefined || typeof value === type;
- }
-
- /**
- * Checks if the given URL object's search string follows the standard format.
- * @param {URL} urlObj The URL object to check.
- * @returns {boolean} Whether the given URL object's search string follows the standard format.
- */
- static #isStandard(urlObj) {
- return urlObj.searchParams.toString() === urlObj.search.slice(1);
- }
-
- /**
- * Checks if the given item is an object.
- * @param {*} item The item to check.
- * @returns {boolean} Whether the given item is an object.
- * @see https://stackoverflow.com/questions/27936772
- */
- static #isObject(item) {
- return (item && typeof item === 'object' && !Array.isArray(item));
- }
-
- /**
- * Merges the given objects deeply.
- * @param {Object} target The target object to merge into.
- * @param {...Object} sources The source objects to merge.
- * @returns {Object} The merged object.
- * @see https://stackoverflow.com/questions/27936772
- */
- static #mergeDeep(target, ...sources) { // TODO: handle rules conflict (e.g. "path" and "path/")
- if (!sources.length) return target;
- const source = sources.shift();
- if (Purlfy.#isObject(target) && Purlfy.#isObject(source)) {
- for (const key in source) {
- if (Purlfy.#isObject(source[key])) {
- if (!target[key]) Object.assign(target, { [key]: {} });
- Purlfy.#mergeDeep(target[key], source[key]);
- } else {
- Object.assign(target, { [key]: source[key] });
- }
- }
- }
- return Purlfy.#mergeDeep(target, ...sources);
- }
-
- /**
- * Applies the given acts to the given input.
- * @param {string} input The input to apply the acts to.
- * @param {string[]} acts The acts to apply.
- * @param {Function} logFunc The logger function.
- * @returns {string | null} The result of applying the given acts to the given input.
- */
- static #applyActs(input, acts, logFunc) {
- let dest = input;
- for (const cmd of (acts)) {
- const name = cmd.split(":")[0];
- const arg = cmd.slice(name.length + 1);
- const act = Purlfy.#acts[name];
- if (!act) {
- logFunc("Invalid act:", cmd);
- dest = null;
- break;
- }
- try {
- dest = act(dest, arg);
- } catch (e) {
- logFunc(`Error processing input with act "${name}":`, e);
- dest = null;
- break;
- }
- }
- if (typeof dest === "string" || dest instanceof URL) {
- return dest.toString();
- } else {
- return null;
- }
- }
-
- // Instance methods
- /**
- * Clears the statistics.
- * @returns {void}
- */
- clearStatistics() {
- const increment = {};
- for (const [key, value] of Object.entries(this.#statistics)) {
- increment[key] = -value;
- }
- this.#incrementStatistics(increment);
- }
-
- /**
- * Clears the rules.
- * @returns {void}
- */
- clearRules() {
- this.#rules = {};
- }
-
- /**
- * Gets the statistics.
- * @returns {Object} The statistics.
- */
- getStatistics() {
- return { ...this.#statistics };
- }
-
- /**
- * Imports the given rules.
- * @param {...Object} rulesets The rulesets to import.
- * @returns {void}
- */
- importRules(...rulesets) {
- Purlfy.#mergeDeep(this.#rules, ...rulesets);
- }
-
- /**
- * Checks if the given rule is valid.
- * @param {Object} rule The rule to check.
- * @returns {boolean} Whether the given rule is valid.
- */
- #validRule(rule) {
- if (!rule || !rule.mode || !rule.description || !rule.author) return false;
- if ((rule.acts ?? []).includes("dom") && !Purlfy.#domParser) return false; // Feature detection for DOMParser
- switch (rule.mode) {
- case "white":
- return Array.isArray(rule.params);
- case "black":
- return Array.isArray(rule.params) && Purlfy.#udfOrType(rule.std, "boolean");
- case "param":
- return Array.isArray(rule.params) && (rule.acts === undefined || Array.isArray(rule.acts)) && Purlfy.#udfOrType(rule.continue, "boolean");
- case "regex":
- return Array.isArray(rule.regex) && Array.isArray(rule.replace) && Purlfy.#udfOrType(rule.continue, "boolean") && rule.regex.length === rule.replace.length;
- case "redirect":
- return this.fetchEnabled && Purlfy.#udfOrType(rule.ua, "string") && Purlfy.#udfOrType(rule.headers, "object") && Purlfy.#udfOrType(rule.continue, "boolean");
- case "visit":
- return this.fetchEnabled && Purlfy.#udfOrType(rule.ua, "string") && Purlfy.#udfOrType(rule.headers, "object") && (rule.acts === undefined || Array.isArray(rule.acts)) && Purlfy.#udfOrType(rule.continue, "boolean");
- case "lambda":
- return this.lambdaEnabled && (typeof rule.lambda === "string" || rule.lambda instanceof Purlfy.#AsyncFunction) && Purlfy.#udfOrType(rule.continue, "boolean");
- default:
- return false;
- }
- }
-
- /**
- * Iteratively matches the longest rule for the given URL parts.
- * @param {string[]} parts The URL parts to match.
- * @returns {Object|null} The matched rule.
- */
- #matchRule(parts) {
- let fallbackRule = null; // Most precise fallback rule
- let currentRules = this.#rules;
- for (const part of parts) {
- if (currentRules.hasOwnProperty("")) {
- fallbackRule = currentRules[""];
- }
- if (currentRules.hasOwnProperty(part + "/")) {
- currentRules = currentRules[part + "/"]; // Exact match - continue to the next level
- } else if (currentRules.hasOwnProperty(part)) {
- const rule = currentRules[part];
- if (this.#validRule(rule)) {
- return rule; // Exact match found
- }
- } else { // No exact match found, try to match with regex
- let found = false;
- // Iterate through current rules to match RegExp
- for (const [key, val] of Object.entries(currentRules)) {
- if (!key.startsWith("/")) continue; // Skip non-RegExp keys
- try {
- const sub = key.endsWith("/"); // Has sub-rules
- const regexStr = sub ? key.slice(1, -1) : key.slice(1);
- if (regexStr === "") continue; // Skip empty regex
- const regex = new RegExp(regexStr);
- if (regex.test(part)) { // Regex matches
- if (!sub && this.#validRule(val)) {
- return val; // Regex match found
- } else if (sub) {
- currentRules = val; // Continue to the next level
- found = true;
- break;
- }
- }
- } catch (e) {
- this.#log("Invalid regex:", key.slice(1));
- }
- }
- if (!found) break; // No matching rule found
- }
- }
- if (currentRules.hasOwnProperty("")) { // Fallback rule
- fallbackRule = currentRules[""];
- }
- if (this.#validRule(fallbackRule)) {
- return fallbackRule;
- }
- return null;
- }
-
- /**
- * Increments the statistics.
- * @param {Object} increment The incremental statistics.
- * @returns {void}
- */
- #incrementStatistics(increment) {
- for (const [key, value] of Object.entries(increment)) {
- this.#statistics[key] += value;
- }
- if (typeof CustomEvent === "function") {
- this.dispatchEvent(new CustomEvent("statisticschange", {
- detail: increment
- }));
- } else {
- this.dispatchEvent(new Event("statisticschange"));
- }
- }
-
- /**
- * Applies the given rule to the given URL object.
- * @param {URL} urlObj The URL object to apply the rule to.
- * @param {Object} rule The rule to apply.
- * @param {Function} logFunc The logger function.
- * @returns {Promise<[URL, boolean, Object]>} The new URL object, whether to continue and the mode-specific incremental statistics.
- */
- async #applyRule(urlObj, rule, logFunc) {
- const mode = rule.mode;
- const increment = { ...Purlfy.#zeroStatistics }; // Incremental statistics
- const lengthBefore = urlObj.href.length;
- const paramsCntBefore = urlObj.searchParams.size;
- let shallContinue = false;
- switch (mode) { // Purifies `urlObj` based on the rule
- case "white": { // Whitelist mode
- const newParams = new URLSearchParams();
- for (const param of rule.params) {
- if (urlObj.searchParams.has(param)) {
- newParams.set(param, urlObj.searchParams.get(param));
- }
- }
- urlObj.search = newParams.toString();
- break;
- }
- case "black": { // Blacklist mode
- if (!rule.std && !Purlfy.#isStandard(urlObj)) {
- logFunc("Non-standard URL search string:", urlObj.search);
- break;
- }
- for (const param of rule.params) {
- urlObj.searchParams.delete(param);
- }
- urlObj.search = urlObj.searchParams.toString();
- break;
- }
- case "param": { // Specific param mode
- // Process given parameter to be used as a new URL
- let paramValue = null;
- for (const param of rule.params) { // Find the first available parameter value
- if (urlObj.searchParams.has(param)) {
- paramValue = urlObj.searchParams.get(param);
- break;
- }
- }
- if (!paramValue) {
- logFunc("Parameter(s) not found:", rule.params.join(", "));
- break;
- }
- const dest = Purlfy.#applyActs(paramValue, rule.acts ?? ["url"], logFunc);
- if (dest && URL.canParse(dest, urlObj.href)) { // Valid URL
- urlObj = new URL(dest, urlObj.href);
- } else { // Invalid URL
- logFunc("Invalid URL:", dest);
- break;
- }
- shallContinue = rule.continue ?? true;
- increment.decoded++;
- break;
- }
- case "regex": { // Regex mode
- let newUrl = urlObj.href;
- for (let i = 0; i < rule.regex.length; i++) {
- const regex = new RegExp(rule.regex[i], "g");
- const replace = rule.replace[i];
- newUrl = newUrl.replaceAll(regex, replace);
- }
- newUrl = Purlfy.#applyActs(newUrl, rule.acts ?? [], logFunc);
- if (newUrl && URL.canParse(newUrl, urlObj.href)) { // Valid URL
- urlObj = new URL(newUrl, urlObj.href);
- } else { // Invalid URL
- logFunc("Invalid URL:", newUrl);
- break;
- }
- shallContinue = rule.continue ?? true;
- break;
- }
- case "redirect": { // Redirect mode
- if (!this.fetchEnabled) {
- logFunc("Redirect mode is disabled.");
- break;
- }
- const options = {
- method: "HEAD",
- redirect: "manual",
- headers: rule.headers ?? {}
- };
- if (rule.ua) {
- options.headers["User-Agent"] = rule.ua;
- }
- let dest = null;
- try {
- const r = await this.#fetch(urlObj.href, options);
- if (r.status >= 300 && r.status < 400 && r.headers.has("location")) {
- dest = r.headers.get("location");
- } else if (r.url !== urlObj.href) {
- dest = r.url; // In case `redirect: manual` doesn't work
- }
- } catch (e) {
- logFunc("Error following redirect:", e);
- break;
- }
- if (dest && URL.canParse(dest, urlObj.href)) {
- const prevUrl = urlObj.href;
- urlObj = new URL(dest, urlObj.href);
- if (urlObj.href === prevUrl) { // No redirection
- logFunc("No redirection made.");
- break;
- }
- shallContinue = rule.continue ?? true;
- increment.redirected++;
- } else {
- logFunc("Invalid redirect destination:", dest);
- }
- break;
- }
- case "visit": { // Visit mode
- if (!this.fetchEnabled) {
- logFunc("Visit mode is disabled.");
- break;
- }
- const options = {
- method: "GET",
- redirect: "manual",
- headers: rule.headers ?? {}
- };
- if (rule.ua) {
- options.headers["User-Agent"] = rule.ua;
- }
- let r, html = null;
- try {
- r = await this.#fetch(urlObj.href, options);
- html = await r.text();
- } catch (e) {
- logFunc("Error visiting URL:", e);
- break;
- }
- if (r.status >= 300 && r.status < 400 && r.headers.has("location")) {
- logFunc("Visit mode, but got redirected to:", r.url);
- urlObj = new URL(r.headers.get("location"), urlObj.href);
- } else if (r.url !== urlObj.href) { // In case `redirect: manual` doesn't work
- logFunc("Visit mode, but got redirected to:", r.url);
- urlObj = new URL(r.url, urlObj.href);
- } else {
- const dest = Purlfy.#applyActs(html, rule.acts?.length ? rule.acts : [String.raw`regex:https?:\/\/.(?:www\.)?[-a-zA-Z0-9@%._\+~#=]{2,256}\.[a-z]{2,6}\b(?:[-a-zA-Z0-9@:%_\+.~#?!&\/\/=]*)`], logFunc);
- if (dest && URL.canParse(dest, urlObj.href)) { // Valid URL
- urlObj = new URL(dest, urlObj.href);
- } else { // Invalid URL
- logFunc("Invalid URL:", dest);
- break;
- }
- }
- shallContinue = rule.continue ?? true;
- increment.visited++;
- break;
- }
- case "lambda": {
- if (!this.lambdaEnabled) {
- logFunc("Lambda mode is disabled.");
- break;
- }
- try {
- const lambda = typeof rule.lambda === "string" ? new Purlfy.#AsyncFunction("url", rule.lambda) : rule.lambda;
- rule.lambda = lambda; // "Cache" the compiled lambda function
- urlObj = await lambda(urlObj);
- shallContinue = rule.continue ?? true;
- } catch (e) {
- logFunc("Error executing lambda:", e);
- }
- break;
- }
- default: {
- logFunc("Invalid mode:", mode);
- break;
- }
- }
- const paramsCntAfter = urlObj.searchParams.size;
- increment.param += (["white", "black"].includes(mode)) ? (paramsCntBefore - paramsCntAfter) : 0;
- increment.char += Math.max(lengthBefore - urlObj.href.length, 0); // Prevent negative char count
- return [urlObj, shallContinue, increment];
- }
-
- /**
- * Purifies the given URL based on the rules.
- * @param {string} originalUrl The original URL to purify.
- * @returns {Promise<Object>} The purified URL and the rule applied.
- */
- async purify(originalUrl) {
- let increment = { ...Purlfy.#zeroStatistics }; // Incremental statistics of a single purification
- let shallContinue = true;
- let firstRule = null;
- let iteration = 0;
- let urlObj;
- this.#log("Purifying URL:", originalUrl);
- const optionalLocation = typeof location !== 'undefined' ? location.href : undefined;
- if (originalUrl && URL.canParse(originalUrl, optionalLocation)) {
- urlObj = new URL(originalUrl, optionalLocation);
- } else {
- this.#log(`Cannot parse URL ${originalUrl}`);
- return {
- url: originalUrl,
- rule: "N/A"
- }
- }
- while (shallContinue && iteration++ < this.maxIterations) {
- const logi = (...args) => this.#log(`[#${iteration}]`, ...args);
- const protocol = urlObj.protocol;
- if (protocol !== "http:" && protocol !== "https:") { // Not a valid HTTP URL
- logi(`Not a HTTP URL: ${urlObj.href}`);
- break;
- }
- const hostAndPath = urlObj.host + urlObj.pathname;
- const parts = hostAndPath.split("/").filter(part => part !== "");
- const rule = this.#matchRule(parts);
- if (!rule) { // No matching rule found
- logi(`No matching rule found for ${urlObj.href}.`);
- break;
- }
- firstRule ??= rule;
- logi(`Matching rule: ${rule.description} by ${rule.author}`);
- let singleIncrement; // Incremental statistics for the current iteration
- [urlObj, shallContinue, singleIncrement] = await this.#applyRule(urlObj, rule, logi);
- for (const [key, value] of Object.entries(singleIncrement)) {
- increment[key] += value;
- }
- logi("Purified URL:", urlObj.href);
- }
- if (firstRule && originalUrl !== urlObj.href) { // Increment statistics only if a rule was applied and URL has been changed
- increment.url++;
- this.#incrementStatistics(increment);
- }
- return {
- url: urlObj.href,
- rule: firstRule ? `${firstRule.description} by ${firstRule.author}` : "N/A"
- };
- }
- }
-
- if (typeof module !== "undefined" && module.exports) {
- module.exports = Purlfy; // Export for Node.js
- } else {
- this.Purlfy = Purlfy; // Export for browser
- }