Clean URL Improved

Remove tracking parameters and redirect to original URL. This Userscript uses the URL Interface instread of RegEx.

当前为 2023-05-16 提交的版本,查看 最新版本

  1. // ==UserScript==
  2. // @name Clean URL Improved
  3. // @namespace i2p.schimon.cleanurl
  4. // @description Remove tracking parameters and redirect to original URL. This Userscript uses the URL Interface instread of RegEx.
  5. // @homepageURL https://greasyfork.org/en/scripts/465933-clean-url-improved
  6. // @supportURL https://greasyfork.org/en/scripts/465933-clean-url-improved/feedback
  7. // @copyright 2023, Schimon Jehudah (http://schimon.i2p)
  8. // @license MIT; https://opensource.org/licenses/MIT
  9. // @grant none
  10. // @run-at document-end
  11. // @include *
  12. // @version 23.05.16
  13. // @icon data:image/svg+xml;base64,PHN2ZyB4bWxucz0iaHR0cDovL3d3dy53My5vcmcvMjAwMC9zdmciIHZpZXdCb3g9IjAgMCAxMDAgMTAwIj48dGV4dCB5PSIuOWVtIiBmb250LXNpemU9IjkwIj7wn5qlPC90ZXh0Pjwvc3ZnPgo=
  14.  
  15. // ==/UserScript==
  16.  
  17. /*
  18.  
  19. Simple version of this Userscript
  20. let url = new URL(location.href);
  21. if (url.hash || url.search) {
  22. location.href = url.origin + url.pathname
  23. };
  24.  
  25. */
  26.  
  27. // Check whether HTML; otherwise, exit.
  28. //if (!document.contentType == 'text/html')
  29. if (document.doctype == null) return;
  30.  
  31. //let point = [];
  32. const namespace = 'i2p.schimon.cleanurl';
  33.  
  34. // List of url parameters
  35. const urls = [
  36. 'ref',
  37. 'source',
  38. 'src',
  39. 'url',
  40. 'utm_source'];
  41.  
  42. // List of reserved parameters
  43. const whitelist = [
  44. 'art', // article
  45. 'action', // wiki
  46. 'bill', // law
  47. 'c', // cdn
  48. 'category', // id
  49. 'code', // code
  50. 'content', // id
  51. 'dark', // yorik.uncreated.net
  52. 'date', // date
  53. 'days', // wiki
  54. 'district', // house.mo.gov
  55. 'exp_time', // cdn
  56. 'ezimgfmt', // cdn image processor
  57. 'feedformat', // wiki
  58. 'fid', // mybb
  59. 'file_host', // cdn
  60. 'filename', // filename
  61. 'format', // file type
  62. 'guid', // guid
  63. 'hidebots', // wiki
  64. 'hl', // language
  65. 'id', // id
  66. 'ie', // character encoding
  67. 'ip', // ip address
  68. 'item_class', // greasyfork
  69. 'item_id', // greasyfork
  70. 'key', // cdn
  71. 'limit', // wiki
  72. 'lang', // language
  73. 'language', // language
  74. 'library', // oujs
  75. 'locale', // locale
  76. 'lr', // cdn
  77. 'lra', // cdn
  78. 'mobileaction', // wiki
  79. 'news_id', // post
  80. 'order', // bugzilla
  81. 'orderBy', // oujs
  82. 'orderDir', // oujs
  83. 'p', // search query / page number
  84. 'page', // mybb
  85. 'preferencesReturnUrl', // return url
  86. 'product', // bugzilla
  87. 'q', // search query
  88. 'query', // search query
  89. 'query_format', // bugzilla
  90. //'referer', // signin <-- provided pathname contains login (log-in) or signin (sign-in)
  91. 'resolution', // bugzilla
  92. 'return_to', // signin
  93. 's', // search query
  94. 'show_all_versions', // greasyfork
  95. 'sign', // cdn
  96. 'sort', // greasyfork
  97. 'speed', // cdn
  98. 'start_time', // media playback
  99. 'state', // cdn
  100. 'tag', // id
  101. 'tid', // mybb
  102. 'title', // wiki
  103. 'type', // file type
  104. //'url', // url <-- not whitelisted nor blacklisted
  105. 'utf8', // encoding
  106. 'urlversion', // wiki
  107. 'v', // video
  108. 'version', // greasyfork
  109. //'_x_tr_sl', // translate online service
  110. //'_x_tr_tl=', // translate online service
  111. //'_x_tr_hl=', // translate online service
  112. //'_x_tr_pto', // translate online service
  113. //'_x_tr_hist', // translate online service
  114. 'year' // year
  115. ];
  116.  
  117. // List of useless hash
  118. const hash = [
  119. 'back-url',
  120. 'intcid',
  121. 'niche-',
  122. //'searchinput',
  123. 'src'];
  124.  
  125. // List of useless parameters
  126. const blacklist = [
  127. 'ad',
  128. 'ad_medium',
  129. 'ad_name',
  130. 'ad_pvid',
  131. 'ad_sub',
  132. //'ad_tags',
  133. 'advertising-id',
  134. //'aem_p4p_detail',
  135. 'af',
  136. 'aff',
  137. 'aff_fcid',
  138. 'aff_fsk',
  139. 'aff_platform',
  140. 'aff_trace_key',
  141. 'affparams',
  142. 'afSmartRedirect',
  143. //'aid',
  144. 'algo_exp_id',
  145. 'algo_pvid',
  146. 'ar',
  147. //'ascsubtag',
  148. //'asc_contentid',
  149. 'asgtbndr',
  150. 'ats',
  151. //'b64e', // breaks yandex
  152. 'bizType',
  153. //'block',
  154. 'bta',
  155. 'businessType',
  156. 'campaign',
  157. 'campaignId',
  158. //'__cf_chl_rt_tk',
  159. 'cid',
  160. 'ck',
  161. //'clickid',
  162. //'client_id',
  163. //'cm_ven',
  164. 'content-id',
  165. 'crid',
  166. 'cst',
  167. 'cts',
  168. 'curPageLogUid',
  169. //'data', // breaks yandex
  170. //'dchild',
  171. //'dclid',
  172. 'deals-widget',
  173. 'dicbo',
  174. //'dt',
  175. 'edd',
  176. 'edm_click_module',
  177. //'ei',
  178. //'embed',
  179. '_encoding',
  180. //'etext', // breaks yandex
  181. 'fbclid',
  182. 'feature',
  183. 'forced_click',
  184. //'fr',
  185. 'frs',
  186. //'from', // breaks yandex
  187. '_ga',
  188. 'ga_order',
  189. 'ga_search_query',
  190. 'ga_search_type',
  191. 'ga_view_type',
  192. 'gatewayAdapt',
  193. //'gclid',
  194. //'gclsrc',
  195. 'gps-id',
  196. //'gs_lcp',
  197. 'gt',
  198. 'guccounter',
  199. 'hdtime',
  200. 'ICID',
  201. 'ico',
  202. 'ig_rid',
  203. //'idzone',
  204. //'iflsig',
  205. //'irgwc',
  206. //'irpid',
  207. 'itid',
  208. //'itok',
  209. //'katds_labels',
  210. //'keywords',
  211. 'keyno',
  212. 'l10n',
  213. 'linkCode',
  214. 'mc',
  215. 'mid',
  216. 'mp',
  217. 'nats',
  218. 'nci',
  219. 'obOrigUrl',
  220. 'optout',
  221. 'oq',
  222. 'organic_search_click',
  223. 'pa',
  224. 'Partner',
  225. 'partner',
  226. 'partner_id',
  227. 'pcampaignid',
  228. 'pd_rd_i',
  229. 'pd_rd_r',
  230. 'pd_rd_w',
  231. 'pd_rd_wg',
  232. 'pdp_npi',
  233. 'pf_rd_i',
  234. 'pf_rd_m',
  235. 'pf_rd_p',
  236. 'pf_rd_r',
  237. 'pf_rd_s',
  238. 'pf_rd_t',
  239. 'pg',
  240. 'PHPSESSID',
  241. 'pk_campaign',
  242. 'pdp_ext_f',
  243. 'pkey',
  244. 'platform',
  245. 'plkey',
  246. 'pqr',
  247. 'pr',
  248. 'pro',
  249. 'prod',
  250. 'promo',
  251. 'promocode',
  252. 'promoid',
  253. 'psc',
  254. 'psprogram',
  255. 'pvid',
  256. 'qid',
  257. //'r',
  258. 'realDomain',
  259. 'redirect',
  260. 'ref',
  261. 'ref_',
  262. 'ref_src',
  263. 'refcode',
  264. 'referrer',
  265. 'refinements',
  266. 'reftag',
  267. 'rowan_id1',
  268. 'rowan_msg_id',
  269. //'sCh',
  270. 'sclient',
  271. 'scm',
  272. 'scm_id',
  273. 'scm-url',
  274. 'si',
  275. '___SID',
  276. '_src',
  277. 'src_cmp',
  278. 'src_src',
  279. 'shareId',
  280. 'showVariations',
  281. 'sid',
  282. //'site_id',
  283. 'sk',
  284. 'smid',
  285. 'social_params',
  286. 'source',
  287. 'sourceId',
  288. 'sp_csd',
  289. 'spLa',
  290. 'spm',
  291. 'spreadType',
  292. //'sprefix',
  293. 'sr',
  294. 'src',
  295. 'srcSns',
  296. 'su',
  297. '_t',
  298. //'tag',
  299. 'tcampaign',
  300. 'td',
  301. 'terminal_id',
  302. //'text',
  303. 'th', // Sometimes restored after page load
  304. //'title',
  305. 'tracelog',
  306. 'traffic_id',
  307. 'traffic_type',
  308. 'tt',
  309. 'uact',
  310. 'ug_edm_item_id',
  311. //'utm1',
  312. //'utm2',
  313. //'utm3',
  314. //'utm4',
  315. //'utm5',
  316. //'utm6',
  317. //'utm7',
  318. //'utm8',
  319. //'utm9',
  320. 'utm_campaign',
  321. 'utm_content',
  322. 'utm_medium',
  323. 'utm_source',
  324. 'utm_term',
  325. 'uuid',
  326. //'utype',
  327. //'ve',
  328. //'ved',
  329. //'zone'
  330. ];
  331.  
  332. // URL Indexers
  333. const paraIDX = [
  334. 'algo_exp_id',
  335. 'algo_pvid',
  336. 'b64e',
  337. 'cst',
  338. 'cts',
  339. 'data',
  340. 'ei',
  341. //'etext',
  342. 'from',
  343. 'iflsig',
  344. 'gbv',
  345. 'gs_lcp',
  346. 'hdtime',
  347. 'keyno',
  348. 'l10n',
  349. 'mc',
  350. 'oq',
  351. //'q',
  352. 'sei',
  353. 'sclient',
  354. 'sign',
  355. 'source',
  356. 'state',
  357. //'text',
  358. 'uact',
  359. 'uuid',
  360. 'ved'];
  361.  
  362. // Market Places
  363. const paraMKT = [
  364. '___SID',
  365. '_t',
  366. 'ad_pvid',
  367. 'af',
  368. 'aff_fsk',
  369. 'aff_platform',
  370. 'aff_trace_key',
  371. 'afSmartRedirect',
  372. 'bizType',
  373. 'businessType',
  374. 'ck',
  375. 'content-id',
  376. 'crid',
  377. 'curPageLogUid',
  378. 'deals-widget',
  379. 'edm_click_module',
  380. 'gatewayAdapt',
  381. 'gps-id',
  382. 'keywords',
  383. 'pd_rd_i',
  384. 'pd_rd_r',
  385. 'pd_rd_w',
  386. 'pd_rd_wg',
  387. 'pdp_npi',
  388. 'pf_rd_i',
  389. 'pf_rd_m',
  390. 'pf_rd_p',
  391. 'pf_rd_r',
  392. 'pf_rd_s',
  393. 'pf_rd_t',
  394. 'platform',
  395. 'pdp_ext_f',
  396. 'ref_',
  397. 'refinements',
  398. 'rowan_id1',
  399. 'rowan_msg_id',
  400. 'scm',
  401. 'scm_id',
  402. 'scm-url',
  403. 'shareId',
  404. //'showVariations',
  405. 'sk',
  406. 'smid',
  407. 'social_params',
  408. 'spLa',
  409. 'spm',
  410. 'spreadType',
  411. 'sr',
  412. 'srcSns',
  413. 'terminal_id',
  414. 'th', // Sometimes restored after page load
  415. 'tracelog',
  416. 'tt',
  417. 'ug_edm_item_id'];
  418.  
  419. // IL
  420. const paraIL = [
  421. 'dicbo',
  422. 'obOrigUrl'];
  423.  
  424. // General
  425. const paraWWW = [
  426. 'aff',
  427. 'promo',
  428. 'promoid',
  429. 'ref',
  430. 'utm_campaign',
  431. 'utm_content',
  432. 'utm_medium',
  433. 'utm_source',
  434. 'utm_term'];
  435.  
  436. // For URL of the Address bar
  437. // Check and modify page address
  438. // TODO Add bar and ask to clean address bar
  439. (function modifyURL() {
  440.  
  441. let
  442. check = [],
  443. url = new URL(location.href);
  444.  
  445. // TODO turn into boolean function
  446. for (let i = 0; i < blacklist.length; i++) {
  447. if (url.searchParams.get(blacklist[i])) {
  448. check.push(blacklist[i]);
  449. url.searchParams.delete(blacklist[i]);
  450. //newURL = url.origin + url.pathname + url.search + url.hash;
  451. }
  452. }
  453.  
  454. // TODO turn into boolean function
  455. for (let i = 0; i < hash.length; i++) {
  456. if (url.hash.startsWith('#' + hash[i])) {
  457. check.push(hash[i]);
  458. //newURL = url.origin + url.pathname + url.search;
  459. }
  460. }
  461.  
  462. if (check.length > 0) {
  463. let newURL = url.origin + url.pathname + url.search;
  464. window.history.pushState(null, null, newURL);
  465. //location.href = newURL;
  466. }
  467.  
  468. })();
  469.  
  470. (function scanAllURLs() {
  471. for (let i = 0; i < document.links.length; i++) {
  472. let url = new URL(document.links[i].href);
  473. if (url.search) {
  474. //if (url.search || url.hash) {
  475. document.links[i].setAttribute('href-data', document.links[i].href);
  476. }
  477. }
  478. })();
  479.  
  480. (function scanBadURLs() {
  481. for (let i = 0; i < document.links.length; i++) {
  482. // TODO callback, Mutation Observer, and Event Listener
  483. hash.forEach(j => cleanLink(document.links[i], j, 'hash'));
  484. blacklist.forEach(j => cleanLink(document.links[i], j, 'para'));
  485. }
  486. })();
  487.  
  488. // TODO Add an Event Listener
  489. function cleanLink(link, target, type) {
  490. let url = new URL(link.href);
  491. switch (type) {
  492. case 'hash':
  493. //console.log('hash ' + i)
  494. if (url.hash.startsWith('#' + target)) {
  495. //link.setAttribute('href-data', link.href);
  496. link.href = url.origin + url.pathname + url.search;
  497. }
  498. break;
  499. case 'para':
  500. //console.log('para ' + i)
  501. if (url.searchParams.get(target)) {
  502. url.searchParams.delete(target);
  503. //link.setAttribute('href-data', link.href);
  504. link.href = url.origin + url.pathname + url.search;
  505. }
  506. break;
  507. }
  508.  
  509. /*
  510. // EXTRA
  511. // For URL of hyperlinks
  512. for (const a of document.querySelectorAll('a')) {
  513. try{
  514. let url = new URL(a.href);
  515. for (let i = 0; i < blacklist.length; i++) {
  516. if (url.searchParams.get(blacklist[i])) {
  517. url.searchParams.delete(blacklist[i]);
  518. }
  519. }
  520. a.href = url;
  521. } catch (err) {
  522. //console.warn('Found no href for element: ' + a);
  523. //console.error(err);
  524. }
  525. } */
  526.  
  527. }
  528.  
  529. // TODO Hunt (for any) links within attributes using getAttributeNames()[i]
  530.  
  531. // Event Listener
  532. // TODO Scan 'e.target.childNodes' until 'href-data' (link) is found
  533. document.body.addEventListener("mouseover", function(e) { // mouseover works with keyboard too
  534. //if (e.target && e.target.nodeName == "A") {
  535. hrefData = e.target.getAttribute('href-data');
  536. //if (e.target && hrefData && !document.getElementById(namespace)) {
  537. if (e.target && hrefData && hrefData != document.getElementById('url-original')) {
  538. if (document.getElementById(namespace)) {
  539. document.getElementById(namespace).remove();
  540. }
  541. selectionItem = createButton(e.pageX, e.pageY, hrefData);
  542. document.body.append(selectionItem);
  543. hrefData = new URL(hrefData);
  544. selectionItem.append(purgeURL(hrefData));
  545. let types = ['whitelist', 'blacklist', 'original']
  546. for (let i = 0; i < types.length; i++) {
  547. let button = purgeURL(hrefData, types[i]);
  548. let exist;
  549. selectionItem.childNodes.forEach(
  550. node => {
  551. if (button.href == node.href) {
  552. exist = true;
  553. }
  554. }
  555. )
  556. if (!exist) {
  557. selectionItem.append(button);
  558. }
  559. }
  560. // Check for URLs
  561. for (let i = 0; i < urls.length; i++) {
  562. if (hrefData.searchParams.get(urls[i])) { // hrefData.includes('url=')
  563. urlParameter = hrefData.searchParams.get(urls[i]);
  564. try {
  565. urlParameter = new URL (urlParameter);
  566. } catch {
  567. if (urlParameter.includes('.')) { // NOTE It is a guess
  568. try {
  569. urlParameter = new URL ('http:' + urlParameter);
  570. } catch {}
  571. }
  572. }
  573. if (typeof urlParameter == 'object') {
  574. newURLItem = extractURL(urlParameter);
  575. selectionItem.prepend(newURLItem);
  576. }
  577. }
  578. }
  579. }
  580. });
  581.  
  582. function createButton(x, y, url) {
  583. // create element
  584. let item = document.createElement(namespace);
  585. // set content
  586. item.id = namespace;
  587. // set position
  588. item.style.all = 'unset';
  589. item.style.position = 'absolute';
  590. item.style.left = x+5 + 'px';
  591. item.style.top = y-3 + 'px';
  592. // set appearance
  593. item.style.fontFamily = 'none'; // emoji
  594. item.style.background = '#333';
  595. item.style.borderRadius = '5%';
  596. item.style.padding = '3px';
  597. item.style.zIndex = 10000;
  598. //item.style.opacity = 0.7;
  599. item.style.filter = 'brightness(0.7)'
  600. // center character
  601. item.style.justifyContent = 'center';
  602. item.style.alignItems = 'center';
  603. item.style.display = 'flex';
  604. // disable selection marks
  605. item.style.userSelect = 'none';
  606. item.style.cursor = 'default';
  607. // set button behaviour
  608. item.onmouseover = () => {
  609. //item.style.opacity = 1;
  610. item.style.filter = 'unset';
  611. };
  612. item.onmouseleave = () => { // onmouseout
  613. // TODO Wait a few seconds
  614. item.remove();
  615. };
  616. return item;
  617. }
  618.  
  619. function extractURL(url) {
  620. let item = document.createElement('a');
  621. item.textContent = '🔗'; // 🧧 🏷️ 🔖
  622. //item.id = 'url-extracted';
  623. item.style.all = 'unset';
  624. item.style.outline = 'none';
  625. item.style.height = '15px';
  626. item.style.width = '15px';
  627. item.style.padding = '3px';
  628. item.style.margin = '3px';
  629. //item.style.fontSize = '0.9rem' // 90%
  630. item.style.lineHeight = 'normal'; // initial
  631. //item.style.height = 'fit-content';
  632. item.href = url;
  633. return item;
  634. }
  635.  
  636. // TODO Use icons (with shapes) for cases when color is not optimal
  637. function purgeURL(url, listType) {
  638. let itemTitle, itemId, resUrl;
  639. let item = document.createElement('a');
  640. item.style.all = 'unset';
  641. switch (listType) {
  642. case 'blacklist':
  643. itemColor = 'yellow';
  644. //itemTextContent = '🟡';
  645. itemTitle = 'Clean link'; // Purged URL
  646. itemId = 'url-purged';
  647. resUrl = hrefDataHandler(url, blacklist);
  648. break;
  649. case 'original': // TODO dbclick (double-click)
  650. itemColor = 'orangered';
  651. //itemTextContent = '🔴';
  652. itemTitle = 'Unsafe link'; // Original URL
  653. itemId = 'url-original';
  654. resUrl = url;
  655. item.style.cursor = `not-allowed`; // no-drop
  656. item.onmouseenter = () => {
  657. item.style.filter = `drop-shadow(2px 4px 6px ${itemColor})`;
  658. };
  659. item.onmouseout = () => {
  660. item.style.filter = 'unset';
  661. };
  662. break;
  663. case 'whitelist':
  664. itemColor = 'lawngreen';
  665. //itemTextContent = '🟢';
  666. itemTitle = 'Safe link'; // Link with whitelisted parameters
  667. itemId = 'url-known';
  668. resUrl = hrefDataHandler(url, whitelist);
  669. break;
  670. default:
  671. itemColor = 'antiquewhite';
  672. //itemTextContent = '⚪';
  673. itemTitle = 'Pure link'; // Link without parameters
  674. itemId = 'url-clean';
  675. resUrl = url.origin + url.pathname;
  676. break;
  677. }
  678. item.id = itemId;
  679. item.title = itemTitle;
  680. item.style.background = itemColor;
  681. //item.textContent = itemTextContent;
  682. item.style.borderRadius = '50%';
  683. item.style.outline = 'none';
  684. item.style.height = '15px';
  685. item.style.width = '15px';
  686. item.style.padding = '3px';
  687. item.style.margin = '3px';
  688. item.href = resUrl;
  689. return item;
  690. }
  691.  
  692. function hrefDataHandler(url, listType) {
  693. url = new URL(url.href);
  694. url.searchParams.sort();
  695. switch (listType) {
  696. case whitelist:
  697. let newURL = new URL (url.origin + url.pathname);
  698. for (let i = 0; i < whitelist.length; i++) {
  699. if (url.searchParams.get(whitelist[i])) {
  700. newURL.searchParams.set(
  701. whitelist[i],
  702. url.searchParams.get(whitelist[i]) // catchedValue
  703. );
  704. }
  705. }
  706. url = newURL;
  707. break;
  708. case blacklist:
  709. for (let i = 0; i < blacklist.length; i++) {
  710. if (url.searchParams.get(blacklist[i])) {
  711. url.searchParams.delete(blacklist[i]);
  712. }
  713. }
  714. break;
  715. }
  716. return url;
  717. }