CleanURLs

Remove tracking parameters and redirect to original URL. This Userscript uses the URL Interface instread of RegEx.

目前为 2023-06-01 提交的版本。查看 最新版本

  1. // ==UserScript==
  2. // @name CleanURLs
  3. // @namespace i2p.schimon.cleanurl
  4. // @description Remove tracking parameters and redirect to original URL. This Userscript uses the URL Interface instread of RegEx.
  5. // @homepageURL https://greasyfork.org/en/scripts/465933-clean-url-improved
  6. // @supportURL https://greasyfork.org/en/scripts/465933-clean-url-improved/feedback
  7. // @copyright 2023, Schimon Jehudah (http://schimon.i2p)
  8. // @license MIT; https://opensource.org/licenses/MIT
  9. // @grant none
  10. // @run-at document-end
  11. // @match *://*/*
  12. // @version 23.06.01
  13. // @icon 
  14.  
  15. // ==/UserScript==
  16.  
  17. /*
  18.  
  19. Simple version of this Userscript
  20. let url = new URL(location.href);
  21. if (url.hash || url.search) {
  22. location.href = url.origin + url.pathname
  23. };
  24.  
  25. */
  26.  
  27. // Check whether HTML; otherwise, exit.
  28. //if (!document.contentType == 'text/html')
  29. if (document.doctype == null) return;
  30.  
  31. //let point = [];
  32. const namespace = 'i2p.schimon.cleanurl';
  33.  
  34. // List of url parameters
  35. const urls = [
  36. 'ref',
  37. 'source',
  38. 'src',
  39. 'url',
  40. 'utm_source'];
  41.  
  42. // List of reserved parameters
  43. const whitelist = [
  44. 'art', // article
  45. 'action', // wiki
  46. 'bill', // law
  47. 'c', // cdn
  48. 'category', // id
  49. 'code', // code
  50. 'content', // id
  51. 'dark', // yorik.uncreated.net
  52. 'date', // date
  53. 'days', // wiki
  54. 'district', // house.mo.gov
  55. 'exp_time', // cdn
  56. 'expires', // cdn
  57. 'ezimgfmt', // cdn image processor
  58. 'feedformat', // wiki
  59. 'fid', // mybb
  60. 'file_host', // cdn
  61. 'filename', // filename
  62. 'for', // cdn
  63. 'format', // file type
  64. 'guid', // guid
  65. 'hash', // cdn
  66. 'hidebots', // wiki
  67. 'hl', // language
  68. 'id', // id
  69. 'ie', // character encoding
  70. 'ip', // ip address
  71. 'item_class', // greasyfork
  72. 'item_id', // greasyfork
  73. 'jid', // jabber id (xmpp)
  74. 'key', // cdn
  75. 'limit', // wiki
  76. 'lang', // language
  77. 'language', // language
  78. 'library', // oujs
  79. 'locale', // locale
  80. 'lr', // cdn
  81. 'lra', // cdn
  82. 'mobileaction', // wiki
  83. 'news_id', // post
  84. 'order', // bugzilla
  85. 'orderBy', // oujs
  86. 'orderDir', // oujs
  87. 'p', // search query / page number
  88. 'page', // mybb
  89. 'preferencesReturnUrl', // return url
  90. 'product', // bugzilla
  91. 'q', // search query
  92. 'query', // search query
  93. 'query_format', // bugzilla
  94. //'referer', // signin <-- provided pathname contains login (log-in) or signin (sign-in)
  95. 'resolution', // bugzilla
  96. 'return_to', // signin
  97. 's', // search query
  98. 'search', // search query
  99. 'show_all_versions', // greasyfork
  100. 'sign', // cdn
  101. 'signature', // cdn
  102. 'sort', // greasyfork
  103. 'speed', // cdn
  104. 'start_time', // media playback
  105. 'state', // cdn
  106. '__switch_theme', // theme (theanarchistlibrary.org)
  107. 'tag', // id
  108. 'tid', // mybb
  109. 'title', // send (share) links and wiki
  110. 'type', // file type
  111. //'url', // url <-- not whitelisted nor blacklisted
  112. 'utf8', // encoding
  113. 'urlversion', // wiki
  114. 'v', // video
  115. 'version', // greasyfork
  116. //'_x_tr_sl', // translate online service
  117. //'_x_tr_tl=', // translate online service
  118. //'_x_tr_hl=', // translate online service
  119. //'_x_tr_pto', // translate online service
  120. //'_x_tr_hist', // translate online service
  121. 'year' // year
  122. ];
  123.  
  124. // List of useless hash
  125. const hash = [
  126. 'back-url',
  127. 'intcid',
  128. 'niche-',
  129. //'searchinput',
  130. 'src'];
  131.  
  132. // List of useless parameters
  133. const blacklist = [
  134. 'ad',
  135. 'ad_medium',
  136. 'ad_name',
  137. 'ad_pvid',
  138. 'ad_sub',
  139. //'ad_tags',
  140. 'advertising-id',
  141. //'aem_p4p_detail',
  142. 'af',
  143. 'aff',
  144. 'aff_fcid',
  145. 'aff_fsk',
  146. 'aff_platform',
  147. 'aff_trace_key',
  148. 'affparams',
  149. 'afSmartRedirect',
  150. //'aid',
  151. 'algo_exp_id',
  152. 'algo_pvid',
  153. 'ar',
  154. //'ascsubtag',
  155. //'asc_contentid',
  156. 'asgtbndr',
  157. 'ats',
  158. //'b64e', // breaks yandex
  159. 'bizType',
  160. //'block',
  161. 'bta',
  162. 'businessType',
  163. 'campaign',
  164. 'campaignId',
  165. //'__cf_chl_rt_tk',
  166. 'cid',
  167. 'ck',
  168. //'clickid',
  169. //'client_id',
  170. //'cm_ven',
  171. 'content-id',
  172. 'crid',
  173. 'cst',
  174. 'cts',
  175. 'curPageLogUid',
  176. //'data', // breaks yandex
  177. //'dchild',
  178. //'dclid',
  179. 'deals-widget',
  180. 'dicbo',
  181. //'dt',
  182. 'edd',
  183. 'edm_click_module',
  184. //'ei',
  185. //'embed',
  186. '_encoding',
  187. //'etext', // breaks yandex
  188. 'fbclid',
  189. 'feature',
  190. 'forced_click',
  191. //'fr',
  192. 'frs',
  193. //'from', // breaks yandex
  194. '_ga',
  195. 'ga_order',
  196. 'ga_search_query',
  197. 'ga_search_type',
  198. 'ga_view_type',
  199. 'gatewayAdapt',
  200. //'gclid',
  201. //'gclsrc',
  202. 'gh_jid',
  203. 'gps-id',
  204. //'gs_lcp',
  205. 'gt',
  206. 'guccounter',
  207. 'hdtime',
  208. 'ICID',
  209. 'ico',
  210. 'ig_rid',
  211. //'idzone',
  212. //'iflsig',
  213. //'irgwc',
  214. //'irpid',
  215. 'itid',
  216. //'itok',
  217. //'katds_labels',
  218. //'keywords',
  219. 'keyno',
  220. 'l10n',
  221. 'linkCode',
  222. 'mc',
  223. 'mid',
  224. 'mp',
  225. 'nats',
  226. 'nci',
  227. 'obOrigUrl',
  228. 'optout',
  229. 'oq',
  230. 'organic_search_click',
  231. 'pa',
  232. 'Partner',
  233. 'partner',
  234. 'partner_id',
  235. 'pcampaignid',
  236. 'pd_rd_i',
  237. 'pd_rd_r',
  238. 'pd_rd_w',
  239. 'pd_rd_wg',
  240. 'pdp_npi',
  241. 'pf_rd_i',
  242. 'pf_rd_m',
  243. 'pf_rd_p',
  244. 'pf_rd_r',
  245. 'pf_rd_s',
  246. 'pf_rd_t',
  247. 'pg',
  248. 'PHPSESSID',
  249. 'pk_campaign',
  250. 'pdp_ext_f',
  251. 'pkey',
  252. 'platform',
  253. 'plkey',
  254. 'pqr',
  255. 'pr',
  256. 'pro',
  257. 'prod',
  258. 'promo',
  259. 'promocode',
  260. 'promoid',
  261. 'psc',
  262. 'psprogram',
  263. 'pvid',
  264. 'qid',
  265. //'r',
  266. 'realDomain',
  267. 'redirect',
  268. 'ref',
  269. 'ref_',
  270. 'ref_src',
  271. 'refcode',
  272. 'referrer',
  273. 'refinements',
  274. 'reftag',
  275. 'rowan_id1',
  276. 'rowan_msg_id',
  277. //'sCh',
  278. 'sclient',
  279. 'scm',
  280. 'scm_id',
  281. 'scm-url',
  282. //'sd',
  283. 'si',
  284. '___SID',
  285. '_src',
  286. 'src_cmp',
  287. 'src_player',
  288. 'src_src',
  289. 'shareId',
  290. 'showVariations',
  291. 'sid',
  292. //'site_id',
  293. 'sk',
  294. 'smid',
  295. 'social_params',
  296. 'source',
  297. 'sourceId',
  298. 'sp_csd',
  299. 'spLa',
  300. 'spm',
  301. 'spreadType',
  302. //'sprefix',
  303. 'sr',
  304. 'src',
  305. 'srcSns',
  306. 'su',
  307. '_t',
  308. //'tag',
  309. 'tcampaign',
  310. 'td',
  311. 'terminal_id',
  312. //'text',
  313. 'th', // Sometimes restored after page load
  314. //'title',
  315. 'tracelog',
  316. 'traffic_id',
  317. 'traffic_type',
  318. 'tt',
  319. 'uact',
  320. 'ug_edm_item_id',
  321. //'utm1',
  322. //'utm2',
  323. //'utm3',
  324. //'utm4',
  325. //'utm5',
  326. //'utm6',
  327. //'utm7',
  328. //'utm8',
  329. //'utm9',
  330. 'utm_campaign',
  331. 'utm_content',
  332. 'utm_medium',
  333. 'utm_source',
  334. 'utm_term',
  335. 'uuid',
  336. //'utype',
  337. //'ve',
  338. //'ved',
  339. //'zone'
  340. ];
  341.  
  342. // URL Indexers
  343. const paraIDX = [
  344. 'algo_exp_id',
  345. 'algo_pvid',
  346. 'b64e',
  347. 'cst',
  348. 'cts',
  349. 'data',
  350. 'ei',
  351. //'etext',
  352. 'from',
  353. 'iflsig',
  354. 'gbv',
  355. 'gs_lcp',
  356. 'hdtime',
  357. 'keyno',
  358. 'l10n',
  359. 'mc',
  360. 'oq',
  361. //'q',
  362. 'sei',
  363. 'sclient',
  364. 'sign',
  365. 'source',
  366. 'state',
  367. //'text',
  368. 'uact',
  369. 'uuid',
  370. 'ved'];
  371.  
  372. // Market Places
  373. const paraMKT = [
  374. '___SID',
  375. '_t',
  376. 'ad_pvid',
  377. 'af',
  378. 'aff_fsk',
  379. 'aff_platform',
  380. 'aff_trace_key',
  381. 'afSmartRedirect',
  382. 'bizType',
  383. 'businessType',
  384. 'ck',
  385. 'content-id',
  386. 'crid',
  387. 'curPageLogUid',
  388. 'deals-widget',
  389. 'edm_click_module',
  390. 'gatewayAdapt',
  391. 'gps-id',
  392. 'keywords',
  393. 'pd_rd_i',
  394. 'pd_rd_r',
  395. 'pd_rd_w',
  396. 'pd_rd_wg',
  397. 'pdp_npi',
  398. 'pf_rd_i',
  399. 'pf_rd_m',
  400. 'pf_rd_p',
  401. 'pf_rd_r',
  402. 'pf_rd_s',
  403. 'pf_rd_t',
  404. 'platform',
  405. 'pdp_ext_f',
  406. 'ref_',
  407. 'refinements',
  408. 'rowan_id1',
  409. 'rowan_msg_id',
  410. 'scm',
  411. 'scm_id',
  412. 'scm-url',
  413. 'shareId',
  414. //'showVariations',
  415. 'sk',
  416. 'smid',
  417. 'social_params',
  418. 'spLa',
  419. 'spm',
  420. 'spreadType',
  421. 'sr',
  422. 'srcSns',
  423. 'terminal_id',
  424. 'th', // Sometimes restored after page load
  425. 'tracelog',
  426. 'tt',
  427. 'ug_edm_item_id'];
  428.  
  429. // IL
  430. const paraIL = [
  431. 'dicbo',
  432. 'obOrigUrl'];
  433.  
  434. // General
  435. const paraWWW = [
  436. 'aff',
  437. 'promo',
  438. 'promoid',
  439. 'ref',
  440. 'utm_campaign',
  441. 'utm_content',
  442. 'utm_medium',
  443. 'utm_source',
  444. 'utm_term'];
  445.  
  446. // For URL of the Address bar
  447. // Check and modify page address
  448. // TODO Add bar and ask to clean address bar
  449. (function modifyURL() {
  450.  
  451. let
  452. check = [],
  453. url = new URL(location.href);
  454.  
  455. // TODO turn into boolean function
  456. for (let i = 0; i < blacklist.length; i++) {
  457. if (url.searchParams.get(blacklist[i])) {
  458. check.push(blacklist[i]);
  459. url.searchParams.delete(blacklist[i]);
  460. //newURL = url.origin + url.pathname + url.search + url.hash;
  461. }
  462. }
  463.  
  464. // TODO turn into boolean function
  465. for (let i = 0; i < hash.length; i++) {
  466. if (url.hash.startsWith('#' + hash[i])) {
  467. check.push(hash[i]);
  468. //newURL = url.origin + url.pathname + url.search;
  469. }
  470. }
  471.  
  472. if (check.length > 0) {
  473. let newURL = url.origin + url.pathname + url.search;
  474. window.history.pushState(null, null, newURL);
  475. //location.href = newURL;
  476. }
  477.  
  478. })();
  479.  
  480. (function scanAllURLs() {
  481. for (let i = 0; i < document.links.length; i++) {
  482. let url = new URL(document.links[i].href);
  483. if (url.search) {
  484. //if (url.search || url.hash) {
  485. document.links[i].setAttribute('href-data', document.links[i].href);
  486. }
  487. }
  488. })();
  489.  
  490. (function scanBadURLs() {
  491. for (let i = 0; i < document.links.length; i++) {
  492. // TODO callback, Mutation Observer, and Event Listener
  493. hash.forEach(j => cleanLink(document.links[i], j, 'hash'));
  494. blacklist.forEach(j => cleanLink(document.links[i], j, 'para'));
  495. }
  496. })();
  497.  
  498. // TODO Add an Event Listener
  499. function cleanLink(link, target, type) {
  500. let url = new URL(link.href);
  501. switch (type) {
  502. case 'hash':
  503. //console.log('hash ' + i)
  504. if (url.hash.startsWith('#' + target)) {
  505. //link.setAttribute('href-data', link.href);
  506. link.href = url.origin + url.pathname + url.search;
  507. }
  508. break;
  509. case 'para':
  510. //console.log('para ' + i)
  511. if (url.searchParams.get(target)) {
  512. url.searchParams.delete(target);
  513. //link.setAttribute('href-data', link.href);
  514. link.href = url.origin + url.pathname + url.search;
  515. }
  516. break;
  517. }
  518.  
  519. /*
  520. // EXTRA
  521. // For URL of hyperlinks
  522. for (const a of document.querySelectorAll('a')) {
  523. try{
  524. let url = new URL(a.href);
  525. for (let i = 0; i < blacklist.length; i++) {
  526. if (url.searchParams.get(blacklist[i])) {
  527. url.searchParams.delete(blacklist[i]);
  528. }
  529. }
  530. a.href = url;
  531. } catch (err) {
  532. //console.warn('Found no href for element: ' + a);
  533. //console.error(err);
  534. }
  535. } */
  536.  
  537. }
  538.  
  539. // TODO Hunt (for any) links within attributes using getAttributeNames()[i]
  540.  
  541. // Event Listener
  542. // TODO Scan 'e.target.childNodes' until 'href-data' (link) is found
  543. document.body.addEventListener("mouseover", function(e) { // mouseover works with keyboard too
  544. //if (e.target && e.target.nodeName == "A") {
  545. hrefData = e.target.getAttribute('href-data');
  546. //if (e.target && hrefData && !document.getElementById(namespace)) {
  547. if (e.target && hrefData && hrefData != document.getElementById('url-original')) {
  548. if (document.getElementById(namespace)) {
  549. document.getElementById(namespace).remove();
  550. }
  551. selectionItem = createButton(e.pageX, e.pageY, hrefData);
  552. hrefData = new URL(hrefData);
  553. selectionItem.append(purgeURL(hrefData));
  554. let types = ['whitelist', 'blacklist', 'original']
  555. for (let i = 0; i < types.length; i++) {
  556. let button = purgeURL(hrefData, types[i]);
  557. let exist;
  558. selectionItem.childNodes.forEach(
  559. node => {
  560. if (button.href == node.href) {
  561. exist = true;
  562. }
  563. }
  564. )
  565. if (!exist) {
  566. selectionItem.append(button);
  567. }
  568. }
  569. // Check for URLs
  570. for (let i = 0; i < urls.length; i++) {
  571. if (hrefData.searchParams.get(urls[i])) { // hrefData.includes('url=')
  572. urlParameter = hrefData.searchParams.get(urls[i]);
  573. try {
  574. urlParameter = new URL (urlParameter);
  575. } catch {
  576. if (urlParameter.includes('.')) { // NOTE It is a guess
  577. try {
  578. urlParameter = new URL ('http:' + urlParameter);
  579. } catch {}
  580. }
  581. }
  582. if (typeof urlParameter == 'object') {
  583. newURLItem = extractURL(urlParameter);
  584. selectionItem.prepend(newURLItem);
  585. }
  586. }
  587. }
  588. // do not add element, if url has only whitelisted parameters and no potential url
  589. // add element, only if a potential url or non-whitelisted parameters are found
  590. urlTypes = ['url-extracted', 'url-original', 'url-purged'];
  591. for (let i = 0; i < urlTypes.length; i++) {
  592. if (selectionItem.querySelector(`#${urlTypes[i]}`)) {
  593. document.body.append(selectionItem);
  594. return;
  595. }
  596. }
  597. }
  598. });
  599.  
  600. function createButton(x, y, url) {
  601. // create element
  602. let item = document.createElement(namespace);
  603. // set content
  604. item.id = namespace;
  605. // set position
  606. item.style.all = 'unset';
  607. item.style.position = 'absolute';
  608. item.style.left = x+5 + 'px';
  609. item.style.top = y-3 + 'px';
  610. // set appearance
  611. item.style.fontFamily = 'none'; // emoji
  612. item.style.background = '#333';
  613. item.style.borderRadius = '5%';
  614. item.style.padding = '3px';
  615. item.style.zIndex = 10000;
  616. //item.style.opacity = 0.7;
  617. item.style.filter = 'brightness(0.7)'
  618. // center character
  619. item.style.justifyContent = 'center';
  620. item.style.alignItems = 'center';
  621. item.style.display = 'flex';
  622. // disable selection marks
  623. item.style.userSelect = 'none';
  624. item.style.cursor = 'default';
  625. // set button behaviour
  626. item.onmouseover = () => {
  627. //item.style.opacity = 1;
  628. item.style.filter = 'unset';
  629. };
  630. item.onmouseleave = () => { // onmouseout
  631. // TODO Wait a few seconds
  632. item.remove();
  633. };
  634. return item;
  635. }
  636.  
  637. function extractURL(url) {
  638. let item = document.createElement('a');
  639. item.textContent = '🔗'; // 🧧 🏷️ 🔖
  640. item.id = 'url-extracted';
  641. item.style.all = 'unset';
  642. item.style.outline = 'none';
  643. item.style.height = '15px';
  644. item.style.width = '15px';
  645. item.style.padding = '3px';
  646. item.style.margin = '3px';
  647. //item.style.fontSize = '0.9rem' // 90%
  648. item.style.lineHeight = 'normal'; // initial
  649. //item.style.height = 'fit-content';
  650. item.href = url;
  651. return item;
  652. }
  653.  
  654. // TODO Use icons (with shapes) for cases when color is not optimal
  655. function purgeURL(url, listType) {
  656. let itemTitle, itemId, resUrl;
  657. let item = document.createElement('a');
  658. item.style.all = 'unset';
  659. switch (listType) {
  660. case 'blacklist':
  661. itemColor = 'yellow';
  662. //itemTextContent = '🟡';
  663. itemTitle = 'Clean link'; // Purged URL
  664. itemId = 'url-purged';
  665. resUrl = hrefDataHandler(url, blacklist);
  666. break;
  667. case 'original': // TODO dbclick (double-click)
  668. itemColor = 'orangered';
  669. //itemTextContent = '🔴';
  670. itemTitle = 'Unsafe link'; // Original URL
  671. itemId = 'url-original';
  672. resUrl = url;
  673. item.style.cursor = `not-allowed`; // no-drop
  674. item.onmouseenter = () => {
  675. item.style.filter = `drop-shadow(2px 4px 6px ${itemColor})`;
  676. };
  677. item.onmouseout = () => {
  678. item.style.filter = 'unset';
  679. };
  680. break;
  681. case 'whitelist':
  682. itemColor = 'lawngreen';
  683. //itemTextContent = '🟢';
  684. itemTitle = 'Safe link'; // Link with whitelisted parameters
  685. itemId = 'url-known';
  686. resUrl = hrefDataHandler(url, whitelist);
  687. break;
  688. default:
  689. itemColor = 'antiquewhite';
  690. //itemTextContent = '⚪';
  691. itemTitle = 'Base link'; // Link without parameters
  692. itemId = 'url-base';
  693. resUrl = url.origin + url.pathname;
  694. break;
  695. }
  696. item.id = itemId;
  697. item.title = itemTitle;
  698. item.style.background = itemColor;
  699. //item.textContent = itemTextContent;
  700. item.style.borderRadius = '50%';
  701. item.style.outline = 'none';
  702. item.style.height = '15px';
  703. item.style.width = '15px';
  704. item.style.padding = '3px';
  705. item.style.margin = '3px';
  706. item.href = resUrl;
  707. return item;
  708. }
  709.  
  710. function hrefDataHandler(url, listType) {
  711. url = new URL(url.href);
  712. url.searchParams.sort();
  713. switch (listType) {
  714. case whitelist:
  715. let newURL = new URL (url.origin + url.pathname);
  716. for (let i = 0; i < whitelist.length; i++) {
  717. if (url.searchParams.get(whitelist[i])) {
  718. newURL.searchParams.set(
  719. whitelist[i],
  720. url.searchParams.get(whitelist[i]) // catchedValue
  721. );
  722. }
  723. }
  724. url = newURL;
  725. break;
  726. case blacklist:
  727. for (let i = 0; i < blacklist.length; i++) {
  728. if (url.searchParams.get(blacklist[i])) {
  729. url.searchParams.delete(blacklist[i]);
  730. }
  731. }
  732. break;
  733. }
  734. return url;
  735. }