Clean URL Improved

Remove tracking parameters and redirect to original URL. This Userscript uses the URL Interface instread of RegEx.

当前为 2023-05-14 提交的版本,查看 最新版本

  1. // ==UserScript==
  2. // @name Clean URL Improved
  3. // @namespace i2p.schimon.cleanurl
  4. // @description Remove tracking parameters and redirect to original URL. This Userscript uses the URL Interface instread of RegEx.
  5. // @homepageURL https://greasyfork.org/en/scripts/465933-clean-url-improved
  6. // @supportURL https://greasyfork.org/en/scripts/465933-clean-url-improved/feedback
  7. // @copyright 2023, Schimon Jehudah (http://schimon.i2p)
  8. // @license MIT; https://opensource.org/licenses/MIT
  9. // @grant none
  10. // @run-at document-end
  11. // @include *
  12. // @version 23.05.14
  13. // @icon data:image/svg+xml;base64,PHN2ZyB4bWxucz0iaHR0cDovL3d3dy53My5vcmcvMjAwMC9zdmciIHZpZXdCb3g9IjAgMCAxMDAgMTAwIj48dGV4dCB5PSIuOWVtIiBmb250LXNpemU9IjkwIj7wn5qlPC90ZXh0Pjwvc3ZnPgo=
  14.  
  15. // ==/UserScript==
  16.  
  17. /*
  18.  
  19. Simple version of this Userscript
  20. let url = new URL(location.href);
  21. if (url.hash || url.search) {
  22. location.href = url.origin + url.pathname
  23. };
  24.  
  25. */
  26.  
  27. // Check whether HTML; otherwise, exit.
  28. //if (!document.contentType == 'text/html')
  29. if (document.doctype == null) return;
  30.  
  31. //let point = [];
  32. const namespace = 'i2p.schimon.cleanurl';
  33.  
  34. // List of url parameters
  35. const urls = [
  36. 'ref',
  37. 'source',
  38. 'src',
  39. 'url',
  40. 'utm_source'];
  41.  
  42. // List of reserved parameters
  43. const whitelist = [
  44. 'art', // article
  45. 'action', // wiki
  46. 'bill', // law
  47. 'c', // cdn
  48. 'category', // id
  49. 'code', // code
  50. 'dark', // yorik.uncreated.net
  51. 'date', // date
  52. 'days', // wiki
  53. 'district', // house.mo.gov
  54. 'exp_time', // cdn
  55. 'ezimgfmt', // cdn image processor
  56. 'feedformat', // wiki
  57. 'file_host', // cdn
  58. 'format', // file type
  59. 'guid', // guid
  60. 'hidebots', // wiki
  61. 'hl', // language
  62. 'id', // id
  63. 'ie', // character encoding
  64. 'ip', // ip address
  65. 'item_class', // greasyfork
  66. 'item_id', // greasyfork
  67. 'key', // cdn
  68. 'limit', // wiki
  69. 'language', // language
  70. 'library', // oujs
  71. 'lr', // cdn
  72. 'lra', // cdn
  73. 'news_id', // post
  74. 'order', // bugzilla
  75. 'orderBy', // oujs
  76. 'orderDir', // oujs
  77. 'p', // search query / page number
  78. 'preferencesReturnUrl', // return url
  79. 'product', // bugzilla
  80. 'q', // search query
  81. 'query', // search query
  82. 'query_format', // bugzilla
  83. 'resolution', // bugzilla
  84. 's', // search query
  85. 'show_all_versions', // greasyfork
  86. 'sign', // cdn
  87. 'sort', // greasyfork
  88. 'speed', // cdn
  89. 'start_time', // media playback
  90. 'state', // cdn
  91. 'tag', // id
  92. 'type', // file type
  93. 'url', // url
  94. 'utf8', // encoding
  95. 'urlversion', // wiki
  96. 'v', // video
  97. 'version', // greasyfork
  98. 'year' // year
  99. ];
  100.  
  101. // List of useless hash
  102. const hash = [
  103. 'back-url',
  104. 'intcid',
  105. 'niche-',
  106. 'src'];
  107.  
  108. // List of useless parameters
  109. const blacklist = [
  110. //'__cf_chl_rt_tk',
  111. '_encoding',
  112. '___SID',
  113. '_t',
  114. 'ad',
  115. 'ad_medium',
  116. 'ad_name',
  117. 'ad_pvid',
  118. 'ad_sub',
  119. //'ad_tags',
  120. 'advertising-id',
  121. //'aem_p4p_detail',
  122. 'af',
  123. 'aff',
  124. 'aff_fcid',
  125. 'aff_fsk',
  126. 'aff_platform',
  127. 'aff_trace_key',
  128. 'affparams',
  129. 'afSmartRedirect',
  130. //'aid',
  131. 'algo_exp_id',
  132. 'algo_pvid',
  133. 'ar',
  134. //'ascsubtag',
  135. //'asc_contentid',
  136. 'asgtbndr',
  137. 'ats',
  138. //'b64e', // breaks yandex
  139. 'bizType',
  140. //'block',
  141. 'bta',
  142. 'businessType',
  143. 'campaign',
  144. 'campaignId',
  145. 'cid',
  146. 'ck',
  147. //'clickid',
  148. //'client_id',
  149. //'cm_ven',
  150. 'content-id',
  151. 'crid',
  152. 'cst',
  153. 'cts',
  154. 'curPageLogUid',
  155. //'data', // breaks yandex
  156. //'dchild',
  157. //'dclid',
  158. 'deals-widget',
  159. 'dicbo',
  160. //'dt',
  161. 'edd',
  162. 'edm_click_module',
  163. //'ei',
  164. //'embed',
  165. //'etext', // breaks yandex
  166. 'fbclid',
  167. 'feature',
  168. 'forced_click',
  169. //'fr',
  170. 'frs',
  171. //'from', // breaks yandex
  172. 'ga_order',
  173. 'ga_search_query',
  174. 'ga_search_type',
  175. 'ga_view_type',
  176. 'gatewayAdapt',
  177. //'gclid',
  178. //'gclsrc',
  179. 'gps-id',
  180. //'gs_lcp',
  181. 'gt',
  182. 'guccounter',
  183. 'hdtime',
  184. 'ICID',
  185. 'ico',
  186. 'ig_rid',
  187. //'idzone',
  188. //'iflsig',
  189. //'irgwc',
  190. //'irpid',
  191. 'itid',
  192. //'itok',
  193. //'katds_labels',
  194. //'keywords',
  195. 'keyno',
  196. 'l10n',
  197. 'linkCode',
  198. 'mc',
  199. 'mid',
  200. 'mp',
  201. 'nats',
  202. 'nci',
  203. 'obOrigUrl',
  204. 'optout',
  205. 'oq',
  206. 'organic_search_click',
  207. 'pa',
  208. 'Partner',
  209. 'partner',
  210. 'partner_id',
  211. 'pcampaignid',
  212. 'pd_rd_i',
  213. 'pd_rd_r',
  214. 'pd_rd_w',
  215. 'pd_rd_wg',
  216. 'pdp_npi',
  217. 'pf_rd_i',
  218. 'pf_rd_m',
  219. 'pf_rd_p',
  220. 'pf_rd_r',
  221. 'pf_rd_s',
  222. 'pf_rd_t',
  223. 'pg',
  224. 'pk_campaign',
  225. 'pdp_ext_f',
  226. 'pkey',
  227. 'platform',
  228. 'plkey',
  229. 'pqr',
  230. 'pr',
  231. 'pro',
  232. 'prod',
  233. 'promo',
  234. 'promocode',
  235. 'promoid',
  236. 'psc',
  237. 'psprogram',
  238. 'pvid',
  239. 'qid',
  240. //'r',
  241. 'realDomain',
  242. 'redirect',
  243. 'ref',
  244. 'ref_',
  245. 'refcode',
  246. 'referrer',
  247. 'refinements',
  248. 'reftag',
  249. 'rowan_id1',
  250. 'rowan_msg_id',
  251. //'sCh',
  252. 'sclient',
  253. 'scm',
  254. 'scm_id',
  255. 'scm-url',
  256. 'si',
  257. 'src_cmp',
  258. 'src_src',
  259. 'shareId',
  260. 'showVariations',
  261. 'sid',
  262. //'site_id',
  263. 'sk',
  264. 'smid',
  265. 'social_params',
  266. 'source',
  267. 'sourceId',
  268. 'spLa',
  269. 'spm',
  270. 'spreadType',
  271. //'sprefix',
  272. 'sr',
  273. 'src',
  274. 'srcSns',
  275. 'su',
  276. //'tag',
  277. 'tcampaign',
  278. 'td',
  279. 'terminal_id',
  280. //'text',
  281. 'th', // Sometimes restored after page load
  282. //'title',
  283. 'tracelog',
  284. 'traffic_id',
  285. 'traffic_type',
  286. 'tt',
  287. 'uact',
  288. 'ug_edm_item_id',
  289. //'utm1',
  290. //'utm2',
  291. //'utm3',
  292. //'utm4',
  293. //'utm5',
  294. //'utm6',
  295. //'utm7',
  296. //'utm8',
  297. //'utm9',
  298. 'utm_campaign',
  299. 'utm_content',
  300. 'utm_medium',
  301. 'utm_source',
  302. 'utm_term',
  303. 'uuid',
  304. //'utype',
  305. //'ve',
  306. //'ved',
  307. //'zone'
  308. ];
  309.  
  310. // URL Indexers
  311. const paraIDX = [
  312. 'algo_exp_id',
  313. 'algo_pvid',
  314. 'b64e',
  315. 'cst',
  316. 'cts',
  317. 'data',
  318. 'ei',
  319. //'etext',
  320. 'from',
  321. 'iflsig',
  322. 'gbv',
  323. 'gs_lcp',
  324. 'hdtime',
  325. 'keyno',
  326. 'l10n',
  327. 'mc',
  328. 'oq',
  329. //'q',
  330. 'sei',
  331. 'sclient',
  332. 'sign',
  333. 'source',
  334. 'state',
  335. //'text',
  336. 'uact',
  337. 'uuid',
  338. 'ved'];
  339.  
  340. // Market Places
  341. const paraMKT = [
  342. '___SID',
  343. '_t',
  344. 'ad_pvid',
  345. 'af',
  346. 'aff_fsk',
  347. 'aff_platform',
  348. 'aff_trace_key',
  349. 'afSmartRedirect',
  350. 'bizType',
  351. 'businessType',
  352. 'ck',
  353. 'content-id',
  354. 'crid',
  355. 'curPageLogUid',
  356. 'deals-widget',
  357. 'edm_click_module',
  358. 'gatewayAdapt',
  359. 'gps-id',
  360. 'keywords',
  361. 'pd_rd_i',
  362. 'pd_rd_r',
  363. 'pd_rd_w',
  364. 'pd_rd_wg',
  365. 'pdp_npi',
  366. 'pf_rd_i',
  367. 'pf_rd_m',
  368. 'pf_rd_p',
  369. 'pf_rd_r',
  370. 'pf_rd_s',
  371. 'pf_rd_t',
  372. 'platform',
  373. 'pdp_ext_f',
  374. 'ref_',
  375. 'refinements',
  376. 'rowan_id1',
  377. 'rowan_msg_id',
  378. 'scm',
  379. 'scm_id',
  380. 'scm-url',
  381. 'shareId',
  382. //'showVariations',
  383. 'sk',
  384. 'smid',
  385. 'social_params',
  386. 'spLa',
  387. 'spm',
  388. 'spreadType',
  389. 'sr',
  390. 'srcSns',
  391. 'terminal_id',
  392. 'th', // Sometimes restored after page load
  393. 'tracelog',
  394. 'tt',
  395. 'ug_edm_item_id'];
  396.  
  397. // IL
  398. const paraIL = [
  399. 'dicbo',
  400. 'obOrigUrl'];
  401.  
  402. // General
  403. const paraWWW = [
  404. 'aff',
  405. 'promo',
  406. 'promoid',
  407. 'ref',
  408. 'utm_campaign',
  409. 'utm_content',
  410. 'utm_medium',
  411. 'utm_source',
  412. 'utm_term'];
  413.  
  414. // For URL of the Address bar
  415. // Check and modify page address
  416. // TODO Add bar and ask to clean address bar
  417. (function modifyURL() {
  418.  
  419. let
  420. check = [],
  421. url = new URL(location.href);
  422.  
  423. // TODO turn into boolean function
  424. for (let i = 0; i < blacklist.length; i++) {
  425. if (url.searchParams.get(blacklist[i])) {
  426. check.push(blacklist[i]);
  427. url.searchParams.delete(blacklist[i]);
  428. //newURL = url.origin + url.pathname + url.search + url.hash;
  429. }
  430. }
  431.  
  432. // TODO turn into boolean function
  433. for (let i = 0; i < hash.length; i++) {
  434. if (url.hash.startsWith('#' + hash[i])) {
  435. check.push(hash[i]);
  436. //newURL = url.origin + url.pathname + url.search;
  437. }
  438. }
  439.  
  440. if (check.length > 0) {
  441. let newURL = url.origin + url.pathname + url.search;
  442. window.history.pushState(null, null, newURL);
  443. //location.href = newURL;
  444. }
  445.  
  446. })();
  447.  
  448. (function scanAllURLs() {
  449. for (let i = 0; i < document.links.length; i++) {
  450. let url = new URL(document.links[i].href);
  451. if (url.search) {
  452. //if (url.search || url.hash) {
  453. document.links[i].setAttribute('href-data', document.links[i].href);
  454. }
  455. }
  456. })();
  457.  
  458. (function scanBadURLs() {
  459. for (let i = 0; i < document.links.length; i++) {
  460. // TODO callback, Mutation Observer, and Event Listener
  461. hash.forEach(j => cleanLink(document.links[i], j, 'hash'));
  462. blacklist.forEach(j => cleanLink(document.links[i], j, 'para'));
  463. }
  464. })();
  465.  
  466. // TODO Add an Event Listener
  467. function cleanLink(link, target, type) {
  468. let url = new URL(link.href);
  469. switch (type) {
  470. case 'hash':
  471. //console.log('hash ' + i)
  472. if (url.hash.startsWith('#' + target)) {
  473. //link.setAttribute('href-data', link.href);
  474. link.href = url.origin + url.pathname + url.search;
  475. }
  476. break;
  477. case 'para':
  478. //console.log('para ' + i)
  479. if (url.searchParams.get(target)) {
  480. url.searchParams.delete(target);
  481. //link.setAttribute('href-data', link.href);
  482. link.href = url.origin + url.pathname + url.search;
  483. }
  484. break;
  485. }
  486.  
  487. /*
  488. // EXTRA
  489. // For URL of hyperlinks
  490. for (const a of document.querySelectorAll('a')) {
  491. try{
  492. let url = new URL(a.href);
  493. for (let i = 0; i < blacklist.length; i++) {
  494. if (url.searchParams.get(blacklist[i])) {
  495. url.searchParams.delete(blacklist[i]);
  496. }
  497. }
  498. a.href = url;
  499. } catch (err) {
  500. //console.warn('Found no href for element: ' + a);
  501. //console.error(err);
  502. }
  503. } */
  504.  
  505. }
  506.  
  507. // Event Listener
  508. document.body.addEventListener("mouseover", function(e) { // mouseover works with keyboard too
  509. //if (e.target && e.target.nodeName == "A") {
  510. hrefData = e.target.getAttribute('href-data');
  511. //if (e.target && hrefData && !document.getElementById(namespace)) {
  512. if (e.target && hrefData && hrefData != document.getElementById('url-original')) {
  513. if (document.getElementById(namespace)) {
  514. document.getElementById(namespace).remove();
  515. }
  516. selectionItem = createButton(e.pageX, e.pageY, hrefData);
  517. document.body.append(selectionItem);
  518. hrefData = new URL(hrefData);
  519. selectionItem.append(purgeURL(hrefData));
  520. let types = ['whitelist', 'blacklist', 'original']
  521. for (let i = 0; i < types.length; i++) {
  522. let button = purgeURL(hrefData, types[i]);
  523. let exist;
  524. selectionItem.childNodes.forEach(
  525. node => {
  526. if (button.href == node.href) {
  527. exist = true;
  528. }
  529. }
  530. )
  531. if (!exist) {
  532. selectionItem.append(button);
  533. }
  534. }
  535. for (let i = 0; i < urls.length; i++) {
  536. if (hrefData.searchParams.get(urls[i])) { // hrefData.includes('url=')
  537. urlParameter = hrefData.searchParams.get(urls[i]);
  538. try {
  539. urlParameter = new URL (urlParameter);
  540. } catch {
  541. if (urlParameter.includes('.')) { // NOTE It is a guess
  542. try {
  543. urlParameter = new URL ('http:' + urlParameter);
  544. } catch {}
  545. }
  546. }
  547. if (typeof urlParameter == 'object') {
  548. newURLItem = extractURL(urlParameter);
  549. selectionItem.prepend(newURLItem);
  550. }
  551. }
  552. }
  553. }
  554. });
  555.  
  556. function createButton(x, y, url) {
  557. // create element
  558. let item = document.createElement(namespace);
  559. // set content
  560. item.id = namespace;
  561. // set position
  562. item.style.position = 'absolute';
  563. item.style.left = x+5 + 'px';
  564. item.style.top = y-3 + 'px';
  565. // set appearance
  566. item.style.fontFamily = 'none'; // emoji
  567. item.style.background = '#333';
  568. item.style.borderRadius = '5%';
  569. item.style.padding = '3px';
  570. item.style.zIndex = 10000;
  571. //item.style.opacity = 0.7;
  572. item.style.filter = 'brightness(0.7)'
  573. // center character
  574. item.style.justifyContent = 'center';
  575. item.style.alignItems = 'center';
  576. item.style.display = 'flex';
  577. // disable selection marks
  578. item.style.userSelect = 'none';
  579. item.style.cursor = 'default';
  580. // set button behaviour
  581. item.onmouseover = () => {
  582. //item.style.opacity = 1;
  583. item.style.filter = 'unset';
  584. };
  585. item.onmouseleave = () => { // onmouseout
  586. // TODO Wait a few seconds
  587. item.remove();
  588. };
  589. return item;
  590. }
  591.  
  592. function extractURL(url) {
  593. let item = document.createElement('a');
  594. item.textContent = '🔗';
  595. //item.id = 'url-extracted';
  596. item.style.outline = 'none';
  597. item.href = url;
  598. return item;
  599. }
  600.  
  601. // TODO Use icons (with shapes) for cases when color is not optimal
  602. function purgeURL(url, listType) {
  603. let itemTitle, itemId, resUrl;
  604. let item = document.createElement('a');
  605. switch (listType) {
  606. case 'blacklist':
  607. itemColor = 'yellow';
  608. //itemTextContent = '🟡';
  609. itemTitle = 'Clean link'; // Purged URL
  610. itemId = 'url-purged';
  611. resUrl = hrefDataHandler(url, blacklist);
  612. break;
  613. case 'original': // TODO dbclick (double-click)
  614. itemColor = 'orangered';
  615. //itemTextContent = '🔴';
  616. itemTitle = 'Unsafe link'; // Original URL
  617. itemId = 'url-original';
  618. resUrl = url;
  619. item.style.cursor = `not-allowed`; // no-drop
  620. item.onmouseenter = () => {
  621. item.style.filter = `drop-shadow(2px 4px 6px ${itemColor})`;
  622. };
  623. item.onmouseout = () => {
  624. item.style.filter = 'unset';
  625. };
  626. break;
  627. case 'whitelist':
  628. itemColor = 'lawngreen';
  629. //itemTextContent = '🟢';
  630. itemTitle = 'Safe link'; // Link with whitelisted parameters
  631. itemId = 'url-known';
  632. resUrl = hrefDataHandler(url, whitelist);
  633. break;
  634. default:
  635. itemColor = 'antiquewhite';
  636. //itemTextContent = '⚪';
  637. itemTitle = 'Pure link'; // Link without parameters
  638. itemId = 'url-clean';
  639. resUrl = url.origin + url.pathname;
  640. break;
  641. }
  642. item.id = itemId;
  643. item.title = itemTitle;
  644. item.style.background = itemColor;
  645. //item.textContent = itemTextContent;
  646. item.style.borderRadius = '50%';
  647. item.style.outline = 'none';
  648. item.style.height = '15px';
  649. item.style.width = '15px';
  650. item.style.padding = '3px';
  651. item.style.margin = '3px';
  652. item.href = resUrl;
  653. return item;
  654. }
  655.  
  656. function hrefDataHandler(url, listType) {
  657. url = new URL(url.href);
  658. url.searchParams.sort();
  659. switch (listType) {
  660. case whitelist:
  661. let newURL = new URL (url.origin + url.pathname);
  662. for (let i = 0; i < whitelist.length; i++) {
  663. if (url.searchParams.get(whitelist[i])) {
  664. newURL.searchParams.set(
  665. whitelist[i],
  666. url.searchParams.get(whitelist[i]) // catchedValue
  667. );
  668. }
  669. }
  670. url = newURL;
  671. break;
  672. case blacklist:
  673. for (let i = 0; i < blacklist.length; i++) {
  674. if (url.searchParams.get(blacklist[i])) {
  675. url.searchParams.delete(blacklist[i]);
  676. }
  677. }
  678. break;
  679. }
  680. return url;
  681. }