Clean URL Improved

Remove tracking parameters and redirect to original URL. This Userscript uses the URL Interface instread of RegEx.

当前为 2023-05-10 提交的版本,查看 最新版本

  1. // ==UserScript==
  2. // @name Clean URL Improved
  3. // @namespace i2p.schimon.clean-url
  4. // @description Remove tracking parameters and redirect to original URL. This Userscript uses the URL Interface instread of RegEx.
  5. // @homepageURL https://greasyfork.org/en/scripts/465933-clean-url-improved
  6. // @supportURL https://greasyfork.org/en/scripts/465933-clean-url-improved/feedback
  7. // @copyright 2023, Schimon Jehudah (http://schimon.i2p)
  8. // @license MIT; https://opensource.org/licenses/MIT
  9. // @grant none
  10. // @run-at document-end
  11. // @include *
  12. // @version 23.05.10
  13. // @icon 
  14.  
  15. // ==/UserScript==
  16.  
  17. /*
  18.  
  19. Simple version of this Userscript
  20. let url = new URL(location.href);
  21. if (url.hash || url.search) {
  22. location.href = url.origin + url.pathname
  23. };
  24.  
  25. */
  26.  
  27. // Check whether HTML; otherwise, exit.
  28. //if (!document.contentType == 'text/html')
  29. if (document.doctype == null) return;
  30.  
  31. //let point = [];
  32. const namespace = 'i2p.schimon.cleanurl';
  33.  
  34. // List of Hash
  35. const whitelist = [ // reserved
  36. 'art', // article
  37. 'action', // wiki
  38. 'bill', // law
  39. 'c', // cdn
  40. 'category', // id
  41. 'code', // code
  42. 'dark', // yorik.uncreated.net
  43. 'days', // wiki
  44. 'district', // house.mo.gov
  45. 'exp_time', // cdn
  46. 'ezimgfmt', // cdn image processor
  47. 'feedformat', // wiki
  48. 'file_host', // cdn
  49. 'format', // file type
  50. 'guid', // guid
  51. 'hidebots', // wiki
  52. 'hl', // language
  53. 'id', // id
  54. 'ip', // ip address
  55. 'key', // cdn
  56. 'limit', // wiki
  57. 'language', // language
  58. 'lr', // cdn
  59. 'lra', // cdn
  60. 'news_id', // post
  61. 'order', // bugzilla
  62. 'p', // search query / page number
  63. 'product', // bugzilla
  64. 'q', // search query
  65. 'query', // search query
  66. 'query_format', // bugzilla
  67. 'resolution', // bugzilla
  68. 's', // search query
  69. 'sign', // cdn
  70. 'speed', // cdn
  71. 'start_time', // media playback
  72. 'state', // cdn
  73. 'tag', // id
  74. 'type', // file type
  75. 'url', // url
  76. 'urlversion', // wiki
  77. 'v', // video
  78. 'year' // year
  79. ];
  80.  
  81. // List of Hash
  82. const hash = [
  83. 'back-url',
  84. 'intcid',
  85. 'niche-',
  86. 'src'];
  87.  
  88. // List of Parameters
  89. const blacklist = [
  90. //'__cf_chl_rt_tk',
  91. '_encoding',
  92. '___SID',
  93. '_t',
  94. 'ad_medium',
  95. 'ad_name',
  96. 'ad_pvid',
  97. 'ad_sub',
  98. //'ad_tags',
  99. 'advertising-id',
  100. //'aem_p4p_detail',
  101. 'af',
  102. 'aff',
  103. 'aff_fcid',
  104. 'aff_fsk',
  105. 'aff_platform',
  106. 'aff_trace_key',
  107. 'affparams',
  108. 'afSmartRedirect',
  109. //'aid',
  110. 'algo_exp_id',
  111. 'algo_pvid',
  112. //'ascsubtag',
  113. //'asc_contentid',
  114. 'asgtbndr',
  115. //'b64e', // breaks yandex
  116. 'bizType',
  117. //'block',
  118. 'bta',
  119. 'businessType',
  120. 'campaign',
  121. 'campaignId',
  122. 'cid',
  123. 'ck',
  124. //'clickid',
  125. //'client_id',
  126. //'cm_ven',
  127. 'content-id',
  128. 'crid',
  129. 'cst',
  130. 'cts',
  131. 'curPageLogUid',
  132. //'data', // breaks yandex
  133. //'dchild',
  134. //'dclid',
  135. 'deals-widget',
  136. 'dicbo',
  137. //'dt',
  138. 'edd',
  139. 'edm_click_module',
  140. //'ei',
  141. //'embed',
  142. //'etext', // breaks yandex
  143. 'fbclid',
  144. 'feature',
  145. 'forced_click',
  146. //'fr',
  147. 'frs',
  148. //'from', // breaks yandex
  149. 'ga_order',
  150. 'ga_search_query',
  151. 'ga_search_type',
  152. 'ga_view_type',
  153. 'gatewayAdapt',
  154. //'gclid',
  155. //'gclsrc',
  156. 'gps-id',
  157. //'gs_lcp',
  158. 'gt',
  159. 'guccounter',
  160. 'hdtime',
  161. 'ICID',
  162. 'ico',
  163. 'ig_rid',
  164. //'idzone',
  165. //'iflsig',
  166. //'irgwc',
  167. //'irpid',
  168. 'itid',
  169. //'itok',
  170. //'katds_labels',
  171. //'keywords',
  172. 'keyno',
  173. 'l10n',
  174. 'linkCode',
  175. 'mc',
  176. 'mid',
  177. 'mp',
  178. 'nats',
  179. 'nci',
  180. 'obOrigUrl',
  181. 'optout',
  182. 'oq',
  183. 'organic_search_click',
  184. 'Partner',
  185. 'partner',
  186. 'partner_id',
  187. 'pcampaignid',
  188. 'pd_rd_i',
  189. 'pd_rd_r',
  190. 'pd_rd_w',
  191. 'pd_rd_wg',
  192. 'pdp_npi',
  193. 'pf_rd_i',
  194. 'pf_rd_m',
  195. 'pf_rd_p',
  196. 'pf_rd_r',
  197. 'pf_rd_s',
  198. 'pf_rd_t',
  199. 'pk_campaign',
  200. 'pdp_ext_f',
  201. 'pkey',
  202. 'platform',
  203. 'plkey',
  204. 'pqr',
  205. 'pro',
  206. 'prod',
  207. 'promo',
  208. 'promocode',
  209. 'promoid',
  210. 'psc',
  211. 'psprogram',
  212. 'pvid',
  213. 'qid',
  214. //'r',
  215. 'realDomain',
  216. 'redirect',
  217. 'ref',
  218. 'ref_',
  219. 'refcode',
  220. 'referrer',
  221. 'refinements',
  222. 'reftag',
  223. 'rowan_id1',
  224. 'rowan_msg_id',
  225. //'sCh',
  226. 'sclient',
  227. 'scm',
  228. 'scm_id',
  229. 'scm-url',
  230. 'shareId',
  231. 'showVariations',
  232. 'sid',
  233. //'site_id',
  234. 'sk',
  235. 'smid',
  236. 'social_params',
  237. 'source',
  238. 'sourceId',
  239. 'spLa',
  240. 'spm',
  241. 'spreadType',
  242. //'sprefix',
  243. 'sr',
  244. 'srcSns',
  245. //'tag',
  246. 'tcampaign',
  247. 'td',
  248. 'terminal_id',
  249. //'text',
  250. 'th', // Sometimes restored after page load
  251. //'title',
  252. 'tracelog',
  253. 'traffic_id',
  254. 'traffic_type',
  255. 'tt',
  256. 'uact',
  257. 'ug_edm_item_id',
  258. //'utm1',
  259. //'utm2',
  260. //'utm3',
  261. //'utm4',
  262. //'utm5',
  263. //'utm6',
  264. //'utm7',
  265. //'utm8',
  266. //'utm9',
  267. 'utm_campaign',
  268. 'utm_content',
  269. 'utm_medium',
  270. 'utm_source',
  271. 'utm_term',
  272. 'uuid',
  273. //'utype',
  274. //'ve',
  275. //'ved',
  276. //'zone'
  277. ];
  278.  
  279. // URL Indexers
  280. const paraIDX = [
  281. 'algo_exp_id',
  282. 'algo_pvid',
  283. 'b64e',
  284. 'cst',
  285. 'cts',
  286. 'data',
  287. 'ei',
  288. //'etext',
  289. 'from',
  290. 'iflsig',
  291. 'gbv',
  292. 'gs_lcp',
  293. 'hdtime',
  294. 'keyno',
  295. 'l10n',
  296. 'mc',
  297. 'oq',
  298. //'q',
  299. 'sei',
  300. 'sclient',
  301. 'sign',
  302. 'source',
  303. 'state',
  304. //'text',
  305. 'uact',
  306. 'uuid',
  307. 'ved'];
  308.  
  309. // Market Places
  310. const paraMKT = [
  311. '___SID',
  312. '_t',
  313. 'ad_pvid',
  314. 'af',
  315. 'aff_fsk',
  316. 'aff_platform',
  317. 'aff_trace_key',
  318. 'afSmartRedirect',
  319. 'bizType',
  320. 'businessType',
  321. 'ck',
  322. 'content-id',
  323. 'crid',
  324. 'curPageLogUid',
  325. 'deals-widget',
  326. 'edm_click_module',
  327. 'gatewayAdapt',
  328. 'gps-id',
  329. 'keywords',
  330. 'pd_rd_i',
  331. 'pd_rd_r',
  332. 'pd_rd_w',
  333. 'pd_rd_wg',
  334. 'pdp_npi',
  335. 'pf_rd_i',
  336. 'pf_rd_m',
  337. 'pf_rd_p',
  338. 'pf_rd_r',
  339. 'pf_rd_s',
  340. 'pf_rd_t',
  341. 'platform',
  342. 'pdp_ext_f',
  343. 'ref_',
  344. 'refinements',
  345. 'rowan_id1',
  346. 'rowan_msg_id',
  347. 'scm',
  348. 'scm_id',
  349. 'scm-url',
  350. 'shareId',
  351. //'showVariations',
  352. 'sk',
  353. 'smid',
  354. 'social_params',
  355. 'spLa',
  356. 'spm',
  357. 'spreadType',
  358. 'sr',
  359. 'srcSns',
  360. 'terminal_id',
  361. 'th', // Sometimes restored after page load
  362. 'tracelog',
  363. 'tt',
  364. 'ug_edm_item_id'];
  365.  
  366. // IL
  367. const paraIL = [
  368. 'dicbo',
  369. 'obOrigUrl'];
  370.  
  371. // General
  372. const paraWWW = [
  373. 'aff',
  374. 'promo',
  375. 'promoid',
  376. 'ref',
  377. 'utm_campaign',
  378. 'utm_content',
  379. 'utm_medium',
  380. 'utm_source',
  381. 'utm_term'];
  382.  
  383. // For URL of the Address bar
  384. // Check and modify page address
  385. // TODO Add bar and ask to clean address bar
  386. (function modifyURL() {
  387.  
  388. let
  389. check = [],
  390. url = new URL(location.href);
  391.  
  392. // TODO turn into boolean function
  393. for (let i = 0; i < blacklist.length; i++) {
  394. if (url.searchParams.get(blacklist[i])) {
  395. check.push(blacklist[i]);
  396. url.searchParams.delete(blacklist[i]);
  397. //newURL = url.origin + url.pathname + url.search + url.hash;
  398. }
  399. }
  400.  
  401. // TODO turn into boolean function
  402. for (let i = 0; i < hash.length; i++) {
  403. if (url.hash.startsWith('#' + hash[i])) {
  404. check.push(hash[i]);
  405. //newURL = url.origin + url.pathname + url.search;
  406. }
  407. }
  408.  
  409. if (check.length > 0) {
  410. let newURL = url.origin + url.pathname + url.search;
  411. window.history.pushState(null, null, newURL);
  412. //location.href = newURL;
  413. }
  414.  
  415. })();
  416.  
  417. (function scanURLs() {
  418. for (let i = 0; i < document.links.length; i++) {
  419. // TODO callback, Mutation Observer, and Event Listener
  420. blacklist.forEach(j => cleanLink(document.links[i], j, 'para'));
  421. hash.forEach(j => cleanLink(document.links[i], j, 'hash'));
  422. }
  423. })();
  424.  
  425. // TODO Add an Event Listener
  426. function cleanLink(link, target, type) {
  427.  
  428. let url = new URL(link.href);
  429. let modify = false;
  430.  
  431. switch (type) {
  432.  
  433. case 'hash':
  434. //console.log('hash ' + i)
  435. if (url.hash.startsWith('#' + target)) {
  436. modify = true;
  437. }
  438. break;
  439.  
  440. case 'para':
  441. //console.log('para ' + i)
  442. if (url.searchParams.get(target)) {
  443. url.searchParams.delete(target);
  444. modify = true;
  445. }
  446. break;
  447.  
  448. }
  449.  
  450. if (modify) {
  451. link.setAttribute('href-data', link.href);
  452. link.href = url.origin + url.pathname + url.search;
  453. //console.log(link.href + ' (mod) ' + i)
  454. //point.push(null);
  455. }
  456.  
  457. /*
  458. // EXTRA
  459. // For URL of hyperlinks
  460. for (const a of document.querySelectorAll('a')) {
  461. try{
  462. let url = new URL(a.href);
  463. for (let i = 0; i < blacklist.length; i++) {
  464. if (url.searchParams.get(blacklist[i])) {
  465. url.searchParams.delete(blacklist[i]);
  466. }
  467. }
  468. a.href = url;
  469. } catch (err) {
  470. //console.warn('Found no href for element: ' + a);
  471. //console.error(err);
  472. }
  473. } */
  474.  
  475. }
  476.  
  477. // Event Listener
  478. document.body.addEventListener("mouseover", function(e) { // mouseover works with keyboard too
  479. //if (e.target && e.target.nodeName == "A") {
  480. hrefData = e.target.getAttribute('href-data');
  481. //if (e.target && hrefData && !document.getElementById(namespace)) {
  482. if (e.target && hrefData && hrefData != document.getElementById('url-original')) {
  483. if (document.getElementById(namespace)) {
  484. document.getElementById(namespace).remove();
  485. }
  486. selectionItem = createButton(e.pageX, e.pageY, hrefData);
  487. document.body.append(selectionItem);
  488. hrefData = new URL(hrefData);
  489. selectionItem.append(purgeURL(hrefData));
  490. selectionItem.append(purgeURL(hrefData, 'whitelist'));
  491. selectionItem.append(purgeURL(hrefData, 'blacklist'));
  492. selectionItem.append(purgeURL(hrefData, 'original'));
  493. try {
  494. // More possible parameters: 'source', 'utm_source'
  495. if (hrefData.searchParams.get('url')) { // hrefData.includes('url=')
  496. urlParameter = hrefData.searchParams.get('url');
  497. newURLItem = extractURL(urlParameter);
  498. selectionItem.prepend(newURLItem);
  499. }
  500. } catch {
  501. // No parameter url;
  502. }
  503. }
  504. });
  505.  
  506. function createButton(x, y, url) {
  507. // create element
  508. let item = document.createElement(namespace);
  509. // set content
  510. item.id = namespace;
  511. // set position
  512. item.style.position = 'absolute';
  513. item.style.left = x+5 + 'px';
  514. item.style.top = y-3 + 'px';
  515. // set appearance
  516. item.style.fontFamily = 'none'; // emoji
  517. item.style.background = '#333';
  518. item.style.borderRadius = '5%';
  519. item.style.padding = '3px';
  520. item.style.zIndex = 10000;
  521. //item.style.opacity = 0.7;
  522. item.style.filter = 'brightness(0.7)'
  523. // center character
  524. item.style.justifyContent = 'center';
  525. item.style.alignItems = 'center';
  526. item.style.display = 'flex';
  527. // disable selection marks
  528. item.style.userSelect = 'none';
  529. item.style.cursor = 'default';
  530. // set button behaviour
  531. item.onmouseover = () => {
  532. //item.style.opacity = 1;
  533. item.style.filter = 'unset';
  534. };
  535. item.onmouseleave = () => { // onmouseout
  536. // TODO Wait a few seconds
  537. item.remove();
  538. };
  539. return item;
  540. }
  541.  
  542. function extractURL(url) {
  543. let item = document.createElement('a');
  544. item.textContent = '🔗';
  545. //item.id = 'url-extracted';
  546. item.style.outline = 'none';
  547. item.href = url;
  548. return item;
  549. }
  550.  
  551. // TODO Use icons (with shapes) for cases when color is not optimal
  552. function purgeURL(url, listType) {
  553. let item = document.createElement('a');
  554. switch (listType) {
  555. case (listType = 'original'): // TODO dbclick (double-click)
  556. //item.textContent = '🔴';
  557. item.style.background = 'orangered';
  558. item.title = 'Original URL';
  559. item.id = 'url-original';
  560. resURL = url;
  561. break;
  562. case (listType = 'blacklist'):
  563. //item.textContent = '🟡';
  564. item.style.background = 'yellow';
  565. item.title = 'Purged URL';
  566. //item.id = 'url-purged';
  567. resURL = hrefDataHandler(url, blacklist);
  568. break;
  569. case (listType = 'whitelist'):
  570. //item.textContent = '🟢';
  571. item.style.background = 'lawngreen';
  572. item.title = 'URL with whitelisted parameters';
  573. item.id = 'url-known';
  574. resURL = hrefDataHandler(url, whitelist);
  575. break;
  576. default:
  577. //item.textContent = '⚪';
  578. item.style.background = 'antiquewhite';
  579. item.title = 'URL without parameters';
  580. //item.id = 'url-clean';
  581. resURL = url.origin + url.pathname;
  582. break;
  583. }
  584. item.style.borderRadius = '50%';
  585. item.style.outline = 'none';
  586. item.style.height = '15px';
  587. item.style.width = '15px';
  588. item.style.padding = '3px';
  589. item.style.margin = '3px';
  590. item.href = resURL;
  591. return item;
  592. }
  593.  
  594. function hrefDataHandler(url, listType) {
  595. url = new URL(url.href);
  596. switch (listType) {
  597. case whitelist:
  598. let newURL = new URL (url.origin + url.pathname);
  599. for (let i = 0; i < whitelist.length; i++) {
  600. if (url.searchParams.get(whitelist[i])) {
  601. newURL.searchParams.set(
  602. whitelist[i],
  603. url.searchParams.get(whitelist[i]) // catchedValue
  604. );
  605. }
  606. }
  607. url = newURL;
  608. break;
  609. case blacklist:
  610. for (let i = 0; i < blacklist.length; i++) {
  611. if (url.searchParams.get(blacklist[i])) {
  612. url.searchParams.delete(blacklist[i]);
  613. }
  614. }
  615. break;
  616. }
  617. return url;
  618. }