URL Modifier for Search Engines

Modify URLs in search results of search engines

当前为 2024-01-01 提交的版本,查看 最新版本

  1. // ==UserScript==
  2. // @name URL Modifier for Search Engines
  3. // @namespace http://tampermonkey.net/
  4. // @version 1.7.1
  5. // @description Modify URLs in search results of search engines
  6. // @author Domenic
  7. // @match *://searx.tiekoetter.com/search*
  8. // @match *://search.disroot.org/search*
  9. // @match *://www.startpage.com/search*
  10. // @match *://www.startpage.com/sp/search*
  11. // @match *://search.brave.com/search*
  12. // @match *://duckduckgo.com
  13. // @match *://duckduckgo.com/?q=*
  14. // @grant none
  15. // @run-at document-end
  16. // @license GPL-2.0-only
  17. // ==/UserScript==
  18.  
  19. (function() {
  20. 'use strict';
  21.  
  22. // Define URL modification rules
  23. const urlModificationRules = [
  24. {
  25. matchRegex: /^https?:\/\/www\.reddit\.com(.*)/,
  26. replaceWith: 'https://old.reddit.com$1'
  27. },
  28. {
  29. matchRegex: /^https?:\/\/(en(.m)?|simple)\.wikipedia.org\/wiki\/(?!Special:Search)(\w+)/,
  30. replaceWith: 'https://www.wikiwand.com/en/$3'
  31. },
  32. {
  33. matchRegex: /^https?:\/\/zh(\.m)?\.wikipedia\.org\/(zh-hans|wiki)\/(.*)/,
  34. replaceWith: 'https://www.wikiwand.com/zh-hans/$3'
  35. },
  36. {
  37. matchRegex: /^https?:\/\/((\w+\.)?medium\.com\/.*)/,
  38. replaceWith: 'https://freedium.cfd/https://$1'
  39. },
  40. {
  41. matchRegex: /^https?:\/\/((.*)arxiv\.org\/pdf|arxiv-export-lb.library.cornell.edu\/(pdf|abs))\/(\d{4}\.\d{4,5}(v\d)?)(.*)/,
  42. replaceWith: 'https://arxiv.org/abs/$4'
  43. },
  44. {
  45. matchRegex: /^https?:\/\/(ieeexplore\.ieee\.org\/document\/\d+)\//,
  46. replaceWith: 'https://$1'
  47. }
  48. // Add more rules here as needed
  49. ];
  50.  
  51. // Define enhanced selector rules for each search engine
  52. const selectorRules = {
  53. 'searx': [
  54. {
  55. selector: 'article a.url_wrapper',
  56. childSelector: '.url_i1',
  57. updateChildText: true,
  58. useTopLevelDomain: true, // Flag for using top-level domain
  59. containProtocol: true
  60. },
  61. {
  62. selector: 'h3 a'
  63. }
  64. ],
  65. 'startpage': [
  66. {
  67. selector: 'a.w-gl__result-url.result-link',
  68. updateText: true
  69. },
  70. {
  71. selector: 'a.w-gl__result-title.result-link'
  72. }
  73. ],
  74. 'brave': [
  75. {
  76. selector: 'a.h.svelte-1dihpoi',
  77. childSelector: 'cite.snippet-url.svelte-1ygzem6 span.netloc.text-small-bold.svelte-1ygzem6',
  78. updateChildText: true,
  79. useTopLevelDomain: true,
  80. containProtocol: false
  81. }
  82. ],
  83. 'duckduckgo': [
  84. {
  85. selector: 'a.eVNpHGjtxRBq_gLOfGDr.LQNqh2U1kzYxREs65IJu'
  86. },
  87. {
  88. selector: 'div.mwuQiMOjmFJ5vmN6Vcqw.LQVY1Jpkk8nyJ6HBWKAk a.Rn_JXVtoPVAFyGkcaXyK',
  89. childSelector: 'span',
  90. updateChildText: true,
  91. useTopLevelDomain: true,
  92. containProtocol: true
  93. }
  94. ]
  95. // Additional search engines can be defined here...
  96. };
  97.  
  98. // User-defined list of search engine instance URLs
  99. const searchEngines = {
  100. 'searx': [
  101. 'searx.tiekoetter.com',
  102. 'search.disroot.org'
  103. ],
  104. 'startpage': [
  105. 'www.startpage.com'
  106. ],
  107. 'brave': [
  108. 'search.brave.com'
  109. ],
  110. 'duckduckgo': [
  111. 'duckduckgo.com'
  112. ],
  113. // ... more search engines
  114. };
  115.  
  116. // Function to modify URLs and optionally text
  117. const modifyUrls = (engine) => {
  118. const selectors = selectorRules[engine];
  119. if (selectors) {
  120. selectors.forEach(rule => {
  121. document.querySelectorAll(rule.selector).forEach(element => {
  122. urlModificationRules.forEach(urlRule => {
  123. let newHref = "error";
  124. if (element.href && urlRule.matchRegex.test(element.href)) {
  125. newHref = element.href.replace(urlRule.matchRegex, urlRule.replaceWith);
  126. element.href = newHref;
  127.  
  128. // Check if text content update is needed
  129. if (rule.updateText) {
  130. let textContent = rule.useTopLevelDomain ? extractTopLevelDomain(newHref, rule.containProtocol) : newHref;
  131. element.textContent = textContent;
  132. }
  133.  
  134. // Check if child text content update is needed
  135. if (rule.updateChildText && rule.childSelector) {
  136. let childElement = element.querySelector(rule.childSelector);
  137. if (childElement) {
  138. let textContent = rule.useTopLevelDomain ? extractTopLevelDomain(newHref, rule.containProtocol) : newHref;
  139. childElement.textContent = textContent;
  140. }
  141. }
  142. }
  143. });
  144. });
  145. });
  146. }
  147. };
  148.  
  149. // Function to extract top-level domain from a URL
  150. const extractTopLevelDomain = (url, containProtocol) => {
  151. let regex = containProtocol ? /^(https?:\/\/[^\/]+)/ : /^(?:https?:\/\/)?([^\/]+)/;
  152. let matches = url.match(regex);
  153. return matches ? matches[1] : url;
  154. };
  155.  
  156. // Improved function to determine the search engine
  157. const getSearchEngine = () => {
  158. let host = window.location.host;
  159.  
  160. for (let engine in searchEngines) {
  161. if (searchEngines[engine].some(instanceHost => host.includes(instanceHost))) {
  162. return engine;
  163. }
  164. }
  165. };
  166.  
  167. // Run the script for the current search engine
  168. const currentEngine = getSearchEngine();
  169.  
  170. if (currentEngine) {
  171. modifyUrls(currentEngine);
  172.  
  173. // Observe DOM changes to handle dynamic content
  174. const observer = new MutationObserver(() => modifyUrls(currentEngine));
  175. observer.observe(document.body, { childList: true, subtree: true });
  176. }
  177. })();