// ==UserScript==
// @name Clean URL Improved
// @namespace i2p.schimon.clean-url
// @description Remove tracking parameters and redirect to original URL. This Userscript uses the URL Interface instread of RegEx.
// @homepageURL https://openuserjs.org/scripts/sjehuda/Clean_URL_Improved
// @supportURL https://openuserjs.org/scripts/sjehuda/Clean_URL_Improved/issues
// @copyright 2023, Schimon Jehudah (http://schimon.i2p)
// @license MIT; https://opensource.org/licenses/MIT
// @grant none
// @run-at document-end
// @include *
// @version 23.05.10
// @icon data:image/svg+xml;base64,PHN2ZyB4bWxucz0iaHR0cDovL3d3dy53My5vcmcvMjAwMC9zdmciIHZpZXdCb3g9IjAgMCAxMDAgMTAwIj48dGV4dCB5PSIuOWVtIiBmb250LXNpemU9IjkwIj7wn5qlPC90ZXh0Pjwvc3ZnPgo=
// ==/UserScript==
/*
Simple version of this Userscript
let url = new URL(location.href);
if (url.hash || url.search) {
location.href = url.origin + url.pathname
};
*/
// Check whether HTML; otherwise, exit.
//if (!document.contentType == 'text/html')
if (document.doctype == null) return;
//let point = [];
const namespace = 'i2p.schimon.cleanurl';
// List of Hash
const whitelist = [ // reserved
'art', // article
'action', // wiki
'bill', // law
'c', // cdn
'category', // id
'code', // code
'dark', // yorik.uncreated.net
'days', // wiki
'district', // house.mo.gov
'exp_time', // cdn
'ezimgfmt', // cdn image processor
'feedformat', // wiki
'file_host', // cdn
'format', // file type
'guid', // guid
'hidebots', // wiki
'hl', // language
'id', // id
'ip', // ip address
'key', // cdn
'limit', // wiki
'language', // language
'lr', // cdn
'lra', // cdn
'news_id', // post
'order', // bugzilla
'p', // search query / page number
'product', // bugzilla
'q', // search query
'query', // search query
'query_format', // bugzilla
'resolution', // bugzilla
's', // search query
'sign', // cdn
'speed', // cdn
'start_time', // media playback
'state', // cdn
'tag', // id
'type', // file type
'url', // url
'urlversion', // wiki
'v', // video
'year' // year
];
// List of Hash
const hash = [
'back-url',
'intcid',
'niche-',
'src'];
// List of Parameters
const blacklist = [
//'__cf_chl_rt_tk',
'_encoding',
'___SID',
'_t',
'ad_medium',
'ad_name',
'ad_pvid',
'ad_sub',
//'ad_tags',
'advertising-id',
//'aem_p4p_detail',
'af',
'aff',
'aff_fcid',
'aff_fsk',
'aff_platform',
'aff_trace_key',
'affparams',
'afSmartRedirect',
//'aid',
'algo_exp_id',
'algo_pvid',
//'ascsubtag',
//'asc_contentid',
'asgtbndr',
//'b64e', // breaks yandex
'bizType',
//'block',
'bta',
'businessType',
'campaign',
'campaignId',
'cid',
'ck',
//'clickid',
//'client_id',
//'cm_ven',
'content-id',
'crid',
'cst',
'cts',
'curPageLogUid',
//'data', // breaks yandex
//'dchild',
//'dclid',
'deals-widget',
'dicbo',
//'dt',
'edd',
'edm_click_module',
//'ei',
//'embed',
//'etext', // breaks yandex
'fbclid',
'feature',
'forced_click',
//'fr',
'frs',
//'from', // breaks yandex
'ga_order',
'ga_search_query',
'ga_search_type',
'ga_view_type',
'gatewayAdapt',
//'gclid',
//'gclsrc',
'gps-id',
//'gs_lcp',
'gt',
'guccounter',
'hdtime',
'ICID',
'ico',
'ig_rid',
//'idzone',
//'iflsig',
//'irgwc',
//'irpid',
'itid',
//'itok',
//'katds_labels',
//'keywords',
'keyno',
'l10n',
'linkCode',
'mc',
'mid',
'mp',
'nats',
'nci',
'obOrigUrl',
'optout',
'oq',
'organic_search_click',
'Partner',
'partner',
'partner_id',
'pcampaignid',
'pd_rd_i',
'pd_rd_r',
'pd_rd_w',
'pd_rd_wg',
'pdp_npi',
'pf_rd_i',
'pf_rd_m',
'pf_rd_p',
'pf_rd_r',
'pf_rd_s',
'pf_rd_t',
'pk_campaign',
'pdp_ext_f',
'pkey',
'platform',
'plkey',
'pqr',
'pro',
'prod',
'promo',
'promocode',
'promoid',
'psc',
'psprogram',
'pvid',
'qid',
//'r',
'realDomain',
'redirect',
'ref',
'ref_',
'refcode',
'referrer',
'refinements',
'reftag',
'rowan_id1',
'rowan_msg_id',
//'sCh',
'sclient',
'scm',
'scm_id',
'scm-url',
'shareId',
'showVariations',
'sid',
//'site_id',
'sk',
'smid',
'social_params',
'source',
'sourceId',
'spLa',
'spm',
'spreadType',
//'sprefix',
'sr',
'srcSns',
//'tag',
'tcampaign',
'td',
'terminal_id',
//'text',
'th', // Sometimes restored after page load
//'title',
'tracelog',
'traffic_id',
'traffic_type',
'tt',
'uact',
'ug_edm_item_id',
//'utm1',
//'utm2',
//'utm3',
//'utm4',
//'utm5',
//'utm6',
//'utm7',
//'utm8',
//'utm9',
'utm_campaign',
'utm_content',
'utm_medium',
'utm_source',
'utm_term',
'uuid',
//'utype',
//'ve',
//'ved',
//'zone'
];
// URL Indexers
const paraIDX = [
'algo_exp_id',
'algo_pvid',
'b64e',
'cst',
'cts',
'data',
'ei',
//'etext',
'from',
'iflsig',
'gbv',
'gs_lcp',
'hdtime',
'keyno',
'l10n',
'mc',
'oq',
//'q',
'sei',
'sclient',
'sign',
'source',
'state',
//'text',
'uact',
'uuid',
'ved'];
// Market Places
const paraMKT = [
'___SID',
'_t',
'ad_pvid',
'af',
'aff_fsk',
'aff_platform',
'aff_trace_key',
'afSmartRedirect',
'bizType',
'businessType',
'ck',
'content-id',
'crid',
'curPageLogUid',
'deals-widget',
'edm_click_module',
'gatewayAdapt',
'gps-id',
'keywords',
'pd_rd_i',
'pd_rd_r',
'pd_rd_w',
'pd_rd_wg',
'pdp_npi',
'pf_rd_i',
'pf_rd_m',
'pf_rd_p',
'pf_rd_r',
'pf_rd_s',
'pf_rd_t',
'platform',
'pdp_ext_f',
'ref_',
'refinements',
'rowan_id1',
'rowan_msg_id',
'scm',
'scm_id',
'scm-url',
'shareId',
//'showVariations',
'sk',
'smid',
'social_params',
'spLa',
'spm',
'spreadType',
'sr',
'srcSns',
'terminal_id',
'th', // Sometimes restored after page load
'tracelog',
'tt',
'ug_edm_item_id'];
// IL
const paraIL = [
'dicbo',
'obOrigUrl'];
// General
const paraWWW = [
'aff',
'promo',
'promoid',
'ref',
'utm_campaign',
'utm_content',
'utm_medium',
'utm_source',
'utm_term'];
// For URL of the Address bar
// Check and modify page address
// TODO Add bar and ask to clean address bar
(function modifyURL() {
let
check = [],
url = new URL(location.href);
// TODO turn into boolean function
for (let i = 0; i < blacklist.length; i++) {
if (url.searchParams.get(blacklist[i])) {
check.push(blacklist[i]);
url.searchParams.delete(blacklist[i]);
//newURL = url.origin + url.pathname + url.search + url.hash;
}
}
// TODO turn into boolean function
for (let i = 0; i < hash.length; i++) {
if (url.hash.startsWith('#' + hash[i])) {
check.push(hash[i]);
//newURL = url.origin + url.pathname + url.search;
}
}
if (check.length > 0) {
let newURL = url.origin + url.pathname + url.search;
window.history.pushState(null, null, newURL);
//location.href = newURL;
}
})();
(function scanURLs() {
for (let i = 0; i < document.links.length; i++) {
// TODO callback, Mutation Observer, and Event Listener
blacklist.forEach(j => cleanLink(document.links[i], j, 'para'));
hash.forEach(j => cleanLink(document.links[i], j, 'hash'));
}
})();
// TODO Add an Event Listener
function cleanLink(link, target, type) {
let url = new URL(link.href);
let modify = false;
switch (type) {
case 'hash':
//console.log('hash ' + i)
if (url.hash.startsWith('#' + target)) {
modify = true;
}
break;
case 'para':
//console.log('para ' + i)
if (url.searchParams.get(target)) {
url.searchParams.delete(target);
modify = true;
}
break;
}
if (modify) {
link.setAttribute('href-data', link.href);
link.href = url.origin + url.pathname + url.search;
//console.log(link.href + ' (mod) ' + i)
//point.push(null);
}
/*
// EXTRA
// For URL of hyperlinks
for (const a of document.querySelectorAll('a')) {
try{
let url = new URL(a.href);
for (let i = 0; i < blacklist.length; i++) {
if (url.searchParams.get(blacklist[i])) {
url.searchParams.delete(blacklist[i]);
}
}
a.href = url;
} catch (err) {
//console.warn('Found no href for element: ' + a);
//console.error(err);
}
} */
}
// Event Listener
document.body.addEventListener("mouseover", function(e) { // mouseover works with keyboard too
//if (e.target && e.target.nodeName == "A") {
hrefData = e.target.getAttribute('href-data');
//if (e.target && hrefData && !document.getElementById(namespace)) {
if (e.target && hrefData && hrefData != document.getElementById('url-original')) {
if (document.getElementById(namespace)) {
document.getElementById(namespace).remove();
}
selectionItem = createButton(e.pageX, e.pageY, hrefData);
document.body.append(selectionItem);
hrefData = new URL(hrefData);
selectionItem.append(purgeURL(hrefData));
selectionItem.append(purgeURL(hrefData, 'whitelist'));
selectionItem.append(purgeURL(hrefData, 'blacklist'));
selectionItem.append(purgeURL(hrefData, 'original'));
try {
// More possible parameters: 'source', 'utm_source'
if (hrefData.searchParams.get('url')) { // hrefData.includes('url=')
urlParameter = hrefData.searchParams.get('url');
newURLItem = extractURL(urlParameter);
selectionItem.prepend(newURLItem);
}
} catch {
// No parameter url;
}
}
});
function createButton(x, y, url) {
// create element
let item = document.createElement(namespace);
// set content
item.id = namespace;
// set position
item.style.position = 'absolute';
item.style.left = x+5 + 'px';
item.style.top = y-3 + 'px';
// set appearance
item.style.fontFamily = 'none'; // emoji
item.style.background = '#333';
item.style.borderRadius = '5%';
item.style.padding = '3px';
item.style.zIndex = 10000;
//item.style.opacity = 0.7;
item.style.filter = 'brightness(0.7)'
// center character
item.style.justifyContent = 'center';
item.style.alignItems = 'center';
item.style.display = 'flex';
// disable selection marks
item.style.userSelect = 'none';
item.style.cursor = 'default';
// set button behaviour
item.onmouseover = () => {
//item.style.opacity = 1;
item.style.filter = 'unset';
};
item.onmouseleave = () => { // onmouseout
// TODO Wait a few seconds
item.remove();
};
return item;
}
function extractURL(url) {
let item = document.createElement('a');
item.textContent = '🔗';
//item.id = 'url-extracted';
item.style.outline = 'none';
item.href = url;
return item;
}
// TODO Use icons (with shapes) for cases when color is not optimal
function purgeURL(url, listType) {
let item = document.createElement('a');
switch (listType) {
case (listType = 'original'): // TODO dbclick (double-click)
//item.textContent = '🔴';
item.style.background = 'orangered';
item.title = 'Original URL';
item.id = 'url-original';
resURL = url;
break;
case (listType = 'blacklist'):
//item.textContent = '🟡';
item.style.background = 'yellow';
item.title = 'Purged URL';
//item.id = 'url-purged';
resURL = hrefDataHandler(url, blacklist);
break;
case (listType = 'whitelist'):
//item.textContent = '🟢';
item.style.background = 'lawngreen';
item.title = 'URL with whitelisted parameters';
item.id = 'url-known';
resURL = hrefDataHandler(url, whitelist);
break;
default:
//item.textContent = '⚪';
item.style.background = 'antiquewhite';
item.title = 'URL without parameters';
//item.id = 'url-clean';
resURL = url.origin + url.pathname;
break;
}
item.style.borderRadius = '50%';
item.style.outline = 'none';
item.style.height = '15px';
item.style.width = '15px';
item.style.padding = '3px';
item.style.margin = '3px';
item.href = resURL;
return item;
}
function hrefDataHandler(url, listType) {
url = new URL(url.href);
switch (listType) {
case whitelist:
let newURL = new URL (url.origin + url.pathname);
for (let i = 0; i < whitelist.length; i++) {
if (url.searchParams.get(whitelist[i])) {
newURL.searchParams.set(
whitelist[i],
url.searchParams.get(whitelist[i]) // catchedValue
);
}
}
url = newURL;
break;
case blacklist:
for (let i = 0; i < blacklist.length; i++) {
if (url.searchParams.get(blacklist[i])) {
url.searchParams.delete(blacklist[i]);
}
}
break;
}
return url;
}