URL Modifier for Search Engines

Modify URLs in search results of search engines

当前为 2024-01-01 提交的版本,查看 最新版本

  1. // ==UserScript==
  2. // @name URL Modifier for Search Engines
  3. // @namespace http://tampermonkey.net/
  4. // @version 1.7
  5. // @description Modify URLs in search results of search engines
  6. // @author Domenic
  7. // @match *://searx.tiekoetter.com/search*
  8. // @match *://search.disroot.org/search*
  9. // @match *://www.startpage.com/search*
  10. // @match *://www.startpage.com/sp/search*
  11. // @match *://search.brave.com/search*
  12. // @grant none
  13. // @run-at document-end
  14. // @license GPL-2.0-only
  15. // ==/UserScript==
  16.  
  17. (function() {
  18. 'use strict';
  19.  
  20. // Define URL modification rules
  21. const urlModificationRules = [
  22. {
  23. matchRegex: /^https?:\/\/www\.reddit\.com(.*)/,
  24. replaceWith: 'https://old.reddit.com$1'
  25. },
  26. {
  27. matchRegex: /^https?:\/\/(en(.m)?|simple)\.wikipedia.org\/wiki\/(?!Special:Search)(\w+)/,
  28. replaceWith: 'https://www.wikiwand.com/en/$3'
  29. },
  30. {
  31. matchRegex: /^https?:\/\/zh(\.m)?\.wikipedia\.org\/(zh-hans|wiki)\/(.*)/,
  32. replaceWith: 'https://www.wikiwand.com/zh-hans/$3'
  33. },
  34. {
  35. matchRegex: /^https?:\/\/((\w+\.)?medium\.com\/.*)/,
  36. replaceWith: 'https://freedium.cfd/https://$1'
  37. },
  38. {
  39. matchRegex: /^https?:\/\/((.*)arxiv\.org\/pdf|arxiv-export-lb.library.cornell.edu\/(pdf|abs))\/(\d{4}\.\d{4,5}(v\d)?)(.*)/,
  40. replaceWith: 'https://arxiv.org/abs/$4'
  41. },
  42. {
  43. matchRegex: /^https?:\/\/(ieeexplore\.ieee\.org\/document\/\d+)\//,
  44. replaceWith: 'https://$1'
  45. }
  46. // Add more rules here as needed
  47. ];
  48.  
  49. // Define enhanced selector rules for each search engine
  50. const selectorRules = {
  51. 'searx': [
  52. {
  53. selector: 'article a.url_wrapper',
  54. childSelector: '.url_i1',
  55. updateChildText: true,
  56. useTopLevelDomain: true, // Flag for using top-level domain
  57. containProtocol: true
  58. },
  59. {
  60. selector: 'h3 a'
  61. }
  62. ],
  63. 'startpage': [
  64. {
  65. selector: 'a.w-gl__result-url.result-link',
  66. updateText: true
  67. },
  68. {
  69. selector: 'a.w-gl__result-title.result-link'
  70. }
  71. ],
  72. 'brave': [
  73. {
  74. selector: 'a.h.svelte-1dihpoi',
  75. childSelector: 'cite.snippet-url.svelte-1ygzem6 span.netloc.text-small-bold.svelte-1ygzem6',
  76. updateChildText: true,
  77. useTopLevelDomain: true,
  78. containProtocol: false
  79. }
  80. ],
  81. 'duckduckgo': [
  82. {
  83. selector: 'a.eVNpHGjtxRBq_gLOfGDr.LQNqh2U1kzYxREs65IJu'
  84. },
  85. {
  86. selector: 'div.mwuQiMOjmFJ5vmN6Vcqw.LQVY1Jpkk8nyJ6HBWKAk a.Rn_JXVtoPVAFyGkcaXyK',
  87. childSelector: 'span',
  88. updateChildText: true,
  89. useTopLevelDomain: true,
  90. containProtocol: true
  91. }
  92. ]
  93. // Additional search engines can be defined here...
  94. };
  95.  
  96. // User-defined list of search engine instance URLs
  97. const searchEngines = {
  98. 'searx': [
  99. 'searx.tiekoetter.com',
  100. 'search.disroot.org'
  101. ],
  102. 'startpage': [
  103. 'www.startpage.com'
  104. ],
  105. 'brave': [
  106. 'search.brave.com'
  107. ],
  108. 'duckduckgo': [
  109. 'duckduckgo.com'
  110. ],
  111. // ... more search engines
  112. };
  113.  
  114. // Function to modify URLs and optionally text
  115. const modifyUrls = (engine) => {
  116. const selectors = selectorRules[engine];
  117. if (selectors) {
  118. selectors.forEach(rule => {
  119. document.querySelectorAll(rule.selector).forEach(element => {
  120. urlModificationRules.forEach(urlRule => {
  121. let newHref = "error";
  122. if (element.href && urlRule.matchRegex.test(element.href)) {
  123. newHref = element.href.replace(urlRule.matchRegex, urlRule.replaceWith);
  124. element.href = newHref;
  125.  
  126. // Check if text content update is needed
  127. if (rule.updateText) {
  128. let textContent = rule.useTopLevelDomain ? extractTopLevelDomain(newHref, rule.containProtocol) : newHref;
  129. element.textContent = textContent;
  130. }
  131.  
  132. // Check if child text content update is needed
  133. if (rule.updateChildText && rule.childSelector) {
  134. let childElement = element.querySelector(rule.childSelector);
  135. if (childElement) {
  136. let textContent = rule.useTopLevelDomain ? extractTopLevelDomain(newHref, rule.containProtocol) : newHref;
  137. childElement.textContent = textContent;
  138. }
  139. }
  140. }
  141. });
  142. });
  143. });
  144. }
  145. };
  146.  
  147. // Function to extract top-level domain from a URL
  148. const extractTopLevelDomain = (url, containProtocol) => {
  149. let regex = containProtocol ? /^(https?:\/\/[^\/]+)/ : /^(?:https?:\/\/)?([^\/]+)/;
  150. let matches = url.match(regex);
  151. return matches ? matches[1] : url;
  152. };
  153.  
  154. // Improved function to determine the search engine
  155. const getSearchEngine = () => {
  156. let host = window.location.host;
  157.  
  158. for (let engine in searchEngines) {
  159. if (searchEngines[engine].some(instanceHost => host.includes(instanceHost))) {
  160. return engine;
  161. }
  162. }
  163. };
  164.  
  165. // Run the script for the current search engine
  166. const currentEngine = getSearchEngine();
  167.  
  168. if (currentEngine) {
  169. modifyUrls(currentEngine);
  170.  
  171. // Observe DOM changes to handle dynamic content
  172. const observer = new MutationObserver(() => modifyUrls(currentEngine));
  173. observer.observe(document.body, { childList: true, subtree: true });
  174. }
  175. })();