CleanURLs

Remove tracking parameters and redirect to original URL. This Userscript uses the URL Interface instead of RegEx.

当前为 2023-06-09 提交的版本,查看 最新版本

  1. // ==UserScript==
  2. // @name CleanURLs
  3. // @namespace i2p.schimon.cleanurl
  4. // @description Remove tracking parameters and redirect to original URL. This Userscript uses the URL Interface instead of RegEx.
  5. // @homepageURL https://greasyfork.org/en/scripts/465933-clean-url-improved
  6. // @supportURL https://greasyfork.org/en/scripts/465933-clean-url-improved/feedback
  7. // @copyright 2023, Schimon Jehudah (http://schimon.i2p)
  8. // @license MIT; https://opensource.org/licenses/MIT
  9. // @grant none
  10. // @run-at document-end
  11. // @match *://*/*
  12. // @version 23.06.09
  13. // @icon data:image/svg+xml;base64,PHN2ZyB4bWxucz0iaHR0cDovL3d3dy53My5vcmcvMjAwMC9zdmciIHZpZXdCb3g9IjAgMCAxMDAgMTAwIj48dGV4dCB5PSIuOWVtIiBmb250LXNpemU9IjkwIj7wn5qlPC90ZXh0Pjwvc3ZnPgo=
  14.  
  15. // ==/UserScript==
  16.  
  17. /*
  18.  
  19. Simple version of this Userscript
  20. let url = new URL(location.href);
  21. if (url.hash || url.search) {
  22. location.href = url.origin + url.pathname
  23. };
  24.  
  25. */
  26.  
  27. // https://openuserjs.org/scripts/tfr/YouTube_Link_Cleaner
  28.  
  29. // Check whether HTML; otherwise, exit.
  30. //if (!document.contentType == 'text/html')
  31. if (document.doctype == null) return;
  32.  
  33. //let point = [];
  34. const namespace = 'i2p.schimon.cleanurl';
  35.  
  36. // List of url parameters
  37. const urls = [
  38. 'redirect',
  39. 'ref',
  40. 'source',
  41. 'src',
  42. 'url',
  43. 'utm_source'];
  44.  
  45. // List of reserved parameters
  46. const whitelist = [
  47. 'art', // article
  48. 'action', // wiki
  49. 'bill', // law
  50. 'c', // cdn
  51. 'category', // id
  52. 'code', // code
  53. 'content', // id
  54. 'dark', // yorik.uncreated.net
  55. 'date', // date
  56. 'days', // wiki
  57. 'district', // house.mo.gov
  58. 'exp_time', // cdn
  59. 'expires', // cdn
  60. 'ezimgfmt', // cdn image processor
  61. 'feedformat', // wiki
  62. 'fid', // mybb
  63. 'file_host', // cdn
  64. 'filename', // filename
  65. 'for', // cdn
  66. 'format', // file type
  67. 'guid', // guid
  68. 'hash', // cdn
  69. 'hidebots', // wiki
  70. 'hl', // language
  71. 'id', // id
  72. 'ie', // character encoding
  73. 'ip', // ip address
  74. 'item_class', // greasyfork
  75. 'item_id', // greasyfork
  76. 'jid', // jabber id (xmpp)
  77. 'key', // cdn
  78. 'limit', // wiki
  79. 'lang', // language
  80. 'language', // language
  81. 'library', // oujs
  82. 'locale', // locale
  83. 'lr', // cdn
  84. 'lra', // cdn
  85. 'mobileaction', // wiki
  86. 'news_id', // post
  87. 'order', // bugzilla
  88. 'orderBy', // oujs
  89. 'orderDir', // oujs
  90. 'p', // search query / page number
  91. 'page', // mybb
  92. 'preferencesReturnUrl', // return url
  93. 'product', // bugzilla
  94. 'q', // search query
  95. 'query', // search query
  96. 'query_format', // bugzilla
  97. //'referer', // signin <-- provided pathname contains login (log-in) or signin (sign-in)
  98. 'resolution', // bugzilla
  99. 'return_to', // signin
  100. 's', // search query
  101. 'search', // search query
  102. 'show_all_versions', // greasyfork
  103. 'sign', // cdn
  104. 'signature', // cdn
  105. 'sort', // greasyfork
  106. 'speed', // cdn
  107. 'start_time', // media playback
  108. 'state', // cdn
  109. '__switch_theme', // theme (theanarchistlibrary.org)
  110. 'tag', // id
  111. 'tid', // mybb
  112. 'title', // send (share) links and wiki
  113. 'type', // file type
  114. //'url', // url <-- not whitelisted nor blacklisted
  115. 'utf8', // encoding
  116. 'urlversion', // wiki
  117. 'v', // video
  118. 'version', // greasyfork
  119. //'_x_tr_sl', // translate online service
  120. //'_x_tr_tl=', // translate online service
  121. //'_x_tr_hl=', // translate online service
  122. //'_x_tr_pto', // translate online service
  123. //'_x_tr_hist', // translate online service
  124. 'year' // year
  125. ];
  126.  
  127. // List of useless hash
  128. const hash = [
  129. 'back-url',
  130. 'intcid',
  131. 'niche-',
  132. //'searchinput',
  133. 'src'];
  134.  
  135. // List of useless parameters
  136. const blacklist = [
  137. 'ad',
  138. 'ad_medium',
  139. 'ad_name',
  140. 'ad_pvid',
  141. 'ad_sub',
  142. //'ad_tags',
  143. 'advertising-id',
  144. //'aem_p4p_detail',
  145. 'af',
  146. 'aff',
  147. 'aff_fcid',
  148. 'aff_fsk',
  149. 'aff_platform',
  150. 'aff_trace_key',
  151. 'affparams',
  152. 'afSmartRedirect',
  153. 'afftrack',
  154. 'affparams',
  155. //'aid',
  156. 'algo_exp_id',
  157. 'algo_pvid',
  158. 'ar',
  159. //'ascsubtag',
  160. //'asc_contentid',
  161. 'asgtbndr',
  162. 'atc',
  163. 'ats',
  164. 'autostart',
  165. //'b64e', // breaks yandex
  166. 'bizType',
  167. //'block',
  168. 'bta',
  169. 'businessType',
  170. 'campaign',
  171. 'campaignId',
  172. //'__cf_chl_rt_tk',
  173. 'cid',
  174. 'ck',
  175. //'clickid',
  176. //'client_id',
  177. //'cm_ven',
  178. 'content-id',
  179. 'crid',
  180. 'cst',
  181. 'cts',
  182. 'curPageLogUid',
  183. //'data', // breaks yandex
  184. //'dchild',
  185. //'dclid',
  186. 'deals-widget',
  187. 'dicbo',
  188. //'dt',
  189. 'edd',
  190. 'edm_click_module',
  191. //'ei',
  192. //'embed',
  193. '_encoding',
  194. //'etext', // breaks yandex
  195. 'eventSource',
  196. 'fbclid',
  197. 'feature',
  198. 'forced_click',
  199. //'fr',
  200. 'frs',
  201. //'from', // breaks yandex
  202. '_ga',
  203. 'ga_order',
  204. 'ga_search_query',
  205. 'ga_search_type',
  206. 'ga_view_type',
  207. 'gatewayAdapt',
  208. //'gclid',
  209. //'gclsrc',
  210. 'gh_jid',
  211. 'gps-id',
  212. //'gs_lcp',
  213. 'gt',
  214. 'guccounter',
  215. 'hdtime',
  216. 'ICID',
  217. 'ico',
  218. 'ig_rid',
  219. //'idzone',
  220. //'iflsig',
  221. //'irgwc',
  222. //'irpid',
  223. 'itid',
  224. //'itok',
  225. //'katds_labels',
  226. //'keywords',
  227. 'keyno',
  228. 'l10n',
  229. 'linkCode',
  230. 'mc',
  231. 'mid',
  232. 'mp',
  233. 'nats',
  234. 'nci',
  235. 'obOrigUrl',
  236. 'optout',
  237. 'oq',
  238. 'organic_search_click',
  239. 'pa',
  240. 'Partner',
  241. 'partner',
  242. 'partner_id',
  243. 'pcampaignid',
  244. 'pd_rd_i',
  245. 'pd_rd_r',
  246. 'pd_rd_w',
  247. 'pd_rd_wg',
  248. 'pdp_npi',
  249. 'pf_rd_i',
  250. 'pf_rd_m',
  251. 'pf_rd_p',
  252. 'pf_rd_r',
  253. 'pf_rd_s',
  254. 'pf_rd_t',
  255. 'pg',
  256. 'PHPSESSID',
  257. 'pk_campaign',
  258. 'pdp_ext_f',
  259. 'pkey',
  260. 'platform',
  261. 'plkey',
  262. 'pqr',
  263. 'pr',
  264. 'pro',
  265. 'prod',
  266. 'promo',
  267. 'promocode',
  268. 'promoid',
  269. 'psc',
  270. 'psprogram',
  271. 'pvid',
  272. 'qid',
  273. //'r',
  274. 'realDomain',
  275. 'recruiter_id',
  276. 'redirect',
  277. 'ref',
  278. 'ref_',
  279. 'ref_src',
  280. 'refcode',
  281. 'referrer',
  282. 'refinements',
  283. 'reftag',
  284. 'rowan_id1',
  285. 'rowan_msg_id',
  286. //'sCh',
  287. 'sclient',
  288. 'scm',
  289. 'scm_id',
  290. 'scm-url',
  291. //'sd',
  292. 'si',
  293. '___SID',
  294. '_src',
  295. 'src_cmp',
  296. 'src_player',
  297. 'src_src',
  298. 'shareId',
  299. 'showVariations',
  300. 'sid',
  301. //'site_id',
  302. 'sk',
  303. 'smid',
  304. 'social_params',
  305. 'source',
  306. 'sourceId',
  307. 'sp_csd',
  308. 'spLa',
  309. 'spm',
  310. 'spreadType',
  311. //'sprefix',
  312. 'sr',
  313. 'src',
  314. 'srcSns',
  315. 'su',
  316. '_t',
  317. //'tag',
  318. 'tcampaign',
  319. 'td',
  320. 'terminal_id',
  321. //'text',
  322. 'th', // Sometimes restored after page load
  323. //'title',
  324. 'tracelog',
  325. 'traffic_id',
  326. 'traffic_type',
  327. 'tt',
  328. 'uact',
  329. 'ug_edm_item_id',
  330. //'utm1',
  331. //'utm2',
  332. //'utm3',
  333. //'utm4',
  334. //'utm5',
  335. //'utm6',
  336. //'utm7',
  337. //'utm8',
  338. //'utm9',
  339. 'utm_campaign',
  340. 'utm_content',
  341. 'utm_medium',
  342. 'utm_source',
  343. 'utm_term',
  344. 'uuid',
  345. //'utype',
  346. //'ve',
  347. //'ved',
  348. //'zone'
  349. ];
  350.  
  351. // URL Indexers
  352. const paraIDX = [
  353. 'algo_exp_id',
  354. 'algo_pvid',
  355. 'b64e',
  356. 'cst',
  357. 'cts',
  358. 'data',
  359. 'ei',
  360. //'etext',
  361. 'from',
  362. 'iflsig',
  363. 'gbv',
  364. 'gs_lcp',
  365. 'hdtime',
  366. 'keyno',
  367. 'l10n',
  368. 'mc',
  369. 'oq',
  370. //'q',
  371. 'sei',
  372. 'sclient',
  373. 'sign',
  374. 'source',
  375. 'state',
  376. //'text',
  377. 'uact',
  378. 'uuid',
  379. 'ved'];
  380.  
  381. // Market Places
  382. const paraMKT = [
  383. '___SID',
  384. '_t',
  385. 'ad_pvid',
  386. 'af',
  387. 'aff_fsk',
  388. 'aff_platform',
  389. 'aff_trace_key',
  390. 'afSmartRedirect',
  391. 'bizType',
  392. 'businessType',
  393. 'ck',
  394. 'content-id',
  395. 'crid',
  396. 'curPageLogUid',
  397. 'deals-widget',
  398. 'edm_click_module',
  399. 'gatewayAdapt',
  400. 'gps-id',
  401. 'keywords',
  402. 'pd_rd_i',
  403. 'pd_rd_r',
  404. 'pd_rd_w',
  405. 'pd_rd_wg',
  406. 'pdp_npi',
  407. 'pf_rd_i',
  408. 'pf_rd_m',
  409. 'pf_rd_p',
  410. 'pf_rd_r',
  411. 'pf_rd_s',
  412. 'pf_rd_t',
  413. 'platform',
  414. 'pdp_ext_f',
  415. 'ref_',
  416. 'refinements',
  417. 'rowan_id1',
  418. 'rowan_msg_id',
  419. 'scm',
  420. 'scm_id',
  421. 'scm-url',
  422. 'shareId',
  423. //'showVariations',
  424. 'sk',
  425. 'smid',
  426. 'social_params',
  427. 'spLa',
  428. 'spm',
  429. 'spreadType',
  430. 'sr',
  431. 'srcSns',
  432. 'terminal_id',
  433. 'th', // Sometimes restored after page load
  434. 'tracelog',
  435. 'tt',
  436. 'ug_edm_item_id'];
  437.  
  438. // IL
  439. const paraIL = [
  440. 'dicbo',
  441. 'obOrigUrl'];
  442.  
  443. // General
  444. const paraWWW = [
  445. 'aff',
  446. 'promo',
  447. 'promoid',
  448. 'ref',
  449. 'utm_campaign',
  450. 'utm_content',
  451. 'utm_medium',
  452. 'utm_source',
  453. 'utm_term'];
  454.  
  455. // For URL of the Address bar
  456. // Check and modify page address
  457. // TODO Add bar and ask to clean address bar
  458. (function modifyURL() {
  459.  
  460. let
  461. check = [],
  462. url = new URL(location.href);
  463.  
  464. // TODO turn into boolean function
  465. for (let i = 0; i < blacklist.length; i++) {
  466. if (url.searchParams.get(blacklist[i])) {
  467. check.push(blacklist[i]);
  468. url.searchParams.delete(blacklist[i]);
  469. //newURL = url.origin + url.pathname + url.search + url.hash;
  470. }
  471. }
  472.  
  473. // TODO turn into boolean function
  474. for (let i = 0; i < hash.length; i++) {
  475. if (url.hash.startsWith('#' + hash[i])) {
  476. check.push(hash[i]);
  477. //newURL = url.origin + url.pathname + url.search;
  478. }
  479. }
  480.  
  481. if (check.length > 0) {
  482. let newURL = url.origin + url.pathname + url.search;
  483. window.history.pushState(null, null, newURL);
  484. //location.href = newURL;
  485. }
  486.  
  487. })();
  488.  
  489. (function scanAllURLs() {
  490. for (let i = 0; i < document.links.length; i++) {
  491. let url = new URL(document.links[i].href);
  492. if (url.search) {
  493. //if (url.search || url.hash) {
  494. document.links[i].setAttribute('href-data', document.links[i].href);
  495. }
  496. }
  497. })();
  498.  
  499. (function scanBadURLs() {
  500. for (let i = 0; i < document.links.length; i++) {
  501. // TODO callback, Mutation Observer, and Event Listener
  502. hash.forEach(j => cleanLink(document.links[i], j, 'hash'));
  503. blacklist.forEach(j => cleanLink(document.links[i], j, 'para'));
  504. }
  505. })();
  506.  
  507. // TODO Add an Event Listener
  508. function cleanLink(link, target, type) {
  509. let url = new URL(link.href);
  510. switch (type) {
  511. case 'hash':
  512. //console.log('hash ' + i)
  513. if (url.hash.startsWith('#' + target)) {
  514. //link.setAttribute('href-data', link.href);
  515. link.href = url.origin + url.pathname + url.search;
  516. }
  517. break;
  518. case 'para':
  519. //console.log('para ' + i)
  520. if (url.searchParams.get(target)) {
  521. url.searchParams.delete(target);
  522. //link.setAttribute('href-data', link.href);
  523. link.href = url.origin + url.pathname + url.search;
  524. }
  525. break;
  526. }
  527.  
  528. /*
  529. // EXTRA
  530. // For URL of hyperlinks
  531. for (const a of document.querySelectorAll('a')) {
  532. try{
  533. let url = new URL(a.href);
  534. for (let i = 0; i < blacklist.length; i++) {
  535. if (url.searchParams.get(blacklist[i])) {
  536. url.searchParams.delete(blacklist[i]);
  537. }
  538. }
  539. a.href = url;
  540. } catch (err) {
  541. //console.warn('Found no href for element: ' + a);
  542. //console.error(err);
  543. }
  544. } */
  545.  
  546. }
  547.  
  548. // TODO Hunt (for any) links within attributes using getAttributeNames()[i]
  549.  
  550. // Event Listener
  551. // TODO Scan 'e.target.childNodes' until 'href-data' (link) is found
  552. document.body.addEventListener("mouseover", function(e) { // mouseover works with keyboard too
  553. //if (e.target && e.target.nodeName == "A") {
  554. hrefData = e.target.getAttribute('href-data');
  555. //if (e.target && hrefData && !document.getElementById(namespace)) {
  556. if (e.target && hrefData && hrefData != document.getElementById('url-original')) {
  557. if (document.getElementById(namespace)) {
  558. document.getElementById(namespace).remove();
  559. }
  560. selectionItem = createButton(e.pageX, e.pageY, hrefData);
  561. hrefData = new URL(hrefData);
  562. selectionItem.append(purgeURL(hrefData));
  563. let types = ['whitelist', 'blacklist', 'original']
  564. for (let i = 0; i < types.length; i++) {
  565. let button = purgeURL(hrefData, types[i]);
  566. let exist;
  567. selectionItem.childNodes.forEach(
  568. node => {
  569. if (button.href == node.href) {
  570. exist = true;
  571. }
  572. }
  573. )
  574. if (!exist) {
  575. selectionItem.append(button);
  576. }
  577. }
  578.  
  579. // Check for URLs
  580. for (let i = 0; i < urls.length; i++) {
  581. if (hrefData.searchParams.get(urls[i])) { // hrefData.includes('url=')
  582. urlParameter = hrefData.searchParams.get(urls[i]);
  583. try {
  584. urlParameter = new URL (urlParameter);
  585. } catch {
  586. if (urlParameter.includes('.')) { // NOTE It is a guess
  587. try {
  588. urlParameter = new URL ('http:' + urlParameter);
  589. } catch {}
  590. }
  591. }
  592. if (typeof urlParameter == 'object') {
  593. newURLItem = extractURL(urlParameter);
  594. selectionItem.prepend(newURLItem);
  595. }
  596. }
  597. }
  598.  
  599. // compare original against purged
  600. if (selectionItem.querySelector(`#url-purged`)) {
  601. //let urlOrigin = new URL (selectionItem.querySelector(`#url-original`).href);
  602. let urlPurge = new URL (selectionItem.querySelector(`#url-purged`).href);
  603. console.log(urlPurge.searchParams.sort())
  604. console.log(hrefData.searchParams.sort())
  605. if (hrefData.search == urlPurge.search) {
  606. selectionItem.querySelector(`#url-original`).remove();
  607. }
  608. }
  609.  
  610. // do not add element, if url has only whitelisted parameters and no potential url
  611. // add element, only if a potential url or non-whitelisted parameter was found
  612. let urlTypes = ['url-extracted', 'url-original', 'url-purged'];
  613. for (let i = 0; i < urlTypes.length; i++) {
  614. if (selectionItem.querySelector(`#${urlTypes[i]}`)) {
  615. document.body.append(selectionItem);
  616. return;
  617. }
  618. }
  619.  
  620. // NOTE in case return did not reach
  621. e.target.removeAttribute('href-data')
  622.  
  623. }
  624. });
  625.  
  626. function createButton(x, y, url) {
  627. // create element
  628. let item = document.createElement(namespace);
  629. // set content
  630. item.id = namespace;
  631. // set position
  632. item.style.all = 'unset';
  633. item.style.position = 'absolute';
  634. item.style.left = x+5 + 'px';
  635. item.style.top = y-3 + 'px';
  636. // set appearance
  637. item.style.fontFamily = 'none'; // emoji
  638. item.style.background = '#333';
  639. item.style.borderRadius = '5%';
  640. item.style.padding = '3px';
  641. item.style.zIndex = 10000;
  642. //item.style.opacity = 0.7;
  643. item.style.filter = 'brightness(0.7)'
  644. // center character
  645. item.style.justifyContent = 'center';
  646. item.style.alignItems = 'center';
  647. item.style.display = 'flex';
  648. // disable selection marks
  649. item.style.userSelect = 'none';
  650. item.style.cursor = 'default';
  651. // set button behaviour
  652. item.onmouseover = () => {
  653. //item.style.opacity = 1;
  654. item.style.filter = 'unset';
  655. };
  656. item.onmouseleave = () => { // onmouseout
  657. // TODO Wait a few seconds
  658. item.remove();
  659. };
  660. return item;
  661. }
  662.  
  663. function extractURL(url) {
  664. let item = document.createElement('a');
  665. item.textContent = '🔗'; // 🧧 🏷️ 🔖
  666. item.id = 'url-extracted';
  667. item.style.all = 'unset';
  668. item.style.outline = 'none';
  669. item.style.height = '15px';
  670. item.style.width = '15px';
  671. item.style.padding = '3px';
  672. item.style.margin = '3px';
  673. //item.style.fontSize = '0.9rem' // 90%
  674. item.style.lineHeight = 'normal'; // initial
  675. //item.style.height = 'fit-content';
  676. item.href = url;
  677. return item;
  678. }
  679.  
  680. // TODO Use icons (with shapes) for cases when color is not optimal
  681. function purgeURL(url, listType) {
  682. let itemTitle, itemId, resUrl;
  683. let item = document.createElement('a');
  684. item.style.all = 'unset';
  685. switch (listType) {
  686. case 'blacklist':
  687. itemColor = 'yellow';
  688. //itemTextContent = '🟡';
  689. itemTitle = 'Clean link'; // Purged URL
  690. itemId = 'url-purged';
  691. resUrl = hrefDataHandler(url, blacklist);
  692. break;
  693. case 'original': // TODO dbclick (double-click)
  694. itemColor = 'orangered';
  695. //itemTextContent = '🔴';
  696. itemTitle = 'Unsafe link'; // Original URL
  697. itemId = 'url-original';
  698. resUrl = url;
  699. item.style.cursor = `not-allowed`; // no-drop
  700. item.onmouseenter = () => {
  701. item.style.filter = `drop-shadow(2px 4px 6px ${itemColor})`;
  702. };
  703. item.onmouseout = () => {
  704. item.style.filter = 'unset';
  705. };
  706. break;
  707. case 'whitelist':
  708. itemColor = 'lawngreen';
  709. //itemTextContent = '🟢';
  710. itemTitle = 'Safe link'; // Link with whitelisted parameters
  711. itemId = 'url-known';
  712. resUrl = hrefDataHandler(url, whitelist);
  713. break;
  714. default:
  715. itemColor = 'antiquewhite';
  716. //itemTextContent = '⚪';
  717. itemTitle = 'Base link'; // Link without parameters
  718. itemId = 'url-base';
  719. resUrl = url.origin + url.pathname;
  720. break;
  721. }
  722. item.id = itemId;
  723. item.title = itemTitle;
  724. item.style.background = itemColor;
  725. //item.textContent = itemTextContent;
  726. item.style.borderRadius = '50%';
  727. item.style.outline = 'none';
  728. item.style.height = '15px';
  729. item.style.width = '15px';
  730. item.style.padding = '3px';
  731. item.style.margin = '3px';
  732. item.href = resUrl;
  733. return item;
  734. }
  735.  
  736. function hrefDataHandler(url, listType) {
  737. url = new URL(url.href);
  738. url.searchParams.sort();
  739. switch (listType) {
  740. case whitelist:
  741. let newURL = new URL (url.origin + url.pathname);
  742. for (let i = 0; i < whitelist.length; i++) {
  743. if (url.searchParams.get(whitelist[i])) {
  744. newURL.searchParams.set(
  745. whitelist[i],
  746. url.searchParams.get(whitelist[i]) // catchedValue
  747. );
  748. }
  749. }
  750. url = newURL;
  751. break;
  752. case blacklist:
  753. for (let i = 0; i < blacklist.length; i++) {
  754. if (url.searchParams.get(blacklist[i])) {
  755. url.searchParams.delete(blacklist[i]);
  756. }
  757. }
  758. break;
  759. }
  760. return url;
  761. }