URL Modifier for Search Engines

Modify URLs in search results of search engines

当前为 2024-01-11 提交的版本,查看 最新版本

  1. // ==UserScript==
  2. // @name URL Modifier for Search Engines
  3. // @namespace http://tampermonkey.net/
  4. // @version 2.0.1
  5. // @description Modify URLs in search results of search engines
  6. // @author Domenic
  7. // @match *://www.google.com/search?*q=*
  8. // @match *://search.disroot.org/search*
  9. // @match *://search.bus-hit.me/search*
  10. // @match *://search.inetol.net/search*
  11. // @match *://priv.au/search*
  12. // @match *://searx.be/search*
  13. // @match *://searxng.site/search*
  14. // @match *://search.hbubli.cc/search*
  15. // @match *://search.im-in.space/search*
  16. // @match *://opnxng.com/search*
  17. // @match *://search.upinmars.com/search*
  18. // @match *://search.sapti.me/search*
  19. // @match *://freesearch.club/search*
  20. // @match *://xo.wtf/search*
  21. // @match *://www.gruble.de/search*
  22. // @match *://searx.tuxcloud.net/search*
  23. // @match *://baresearch.org/search*
  24. // @match *://searx.daetalytica.io/search*
  25. // @match *://etsi.me/search*
  26. // @match *://search.leptons.xyz/search*
  27. // @match *://search.rowie.at/search*
  28. // @match *://search.mdosch.de/search*
  29. // @match *://searx.catfluori.de/search*
  30. // @match *://searx.si/search*
  31. // @match *://searx.namejeff.xyz/search*
  32. // @match *://search.itstechtime.com/search*
  33. // @match *://s.mble.dk/search*
  34. // @match *://searx.kutay.dev/search*
  35. // @match *://ooglester.com/search*
  36. // @match *://searx.ox2.fr/search*
  37. // @match *://searx.techsaviours.org/search*
  38. // @match *://searx.perennialte.ch/search*
  39. // @match *://s.trung.fun/search*
  40. // @match *://search.in.projectsegfau.lt/search*
  41. // @match *://search.projectsegfau.lt/search*
  42. // @match *://darmarit.org/searx/search*
  43. // @match *://searx.lunar.icu/search*
  44. // @match *://nyc1.sx.ggtyler.dev/search*
  45. // @match *://search.rhscz.eu/search*
  46. // @match *://paulgo.io/search*
  47. // @match *://northboot.xyz/search*
  48. // @match *://searx.zhenyapav.com/search*
  49. // @match *://searxng.ch/search*
  50. // @match *://copp.gg/search*
  51. // @match *://searx.sev.monster/search*
  52. // @match *://searx.oakleycord.dev/search*
  53. // @match *://searx.juancord.xyz/search*
  54. // @match *://searx.work/search*
  55. // @match *://search.ononoki.org/search*
  56. // @match *://search.demoniak.ch/search*
  57. // @match *://searx.cthd.icu/search*
  58. // @match *://searx.fmhy.net/search*
  59. // @match *://searx.headpat.exchange/search*
  60. // @match *://sex.finaltek.net/search*
  61. // @match *://search.gcomm.ch/search*
  62. // @match *://search.smnz.de/search*
  63. // @match *://searx.ankha.ac/search*
  64. // @match *://search.lvkaszus.pl/search*
  65. // @match *://searx.nobulart.com/search*
  66. // @match *://sx.t-1.org/search*
  67. // @match *://www.jabber-germany.de/searx/search*
  68. // @match *://sx.catgirl.cloud/search*
  69. // @match *://www.startpage.com/search*
  70. // @match *://www.startpage.com/sp/search*
  71. // @match *://search.brave.com/search*
  72. // @match *://duckduckgo.com
  73. // @match *://duckduckgo.com/?*q=*
  74. // @match *://metager.org/meta/meta.ger3*
  75. // @match *://metager.de/meta/meta.ger3*
  76. // @match *://www.mojeek.com/search?q=*
  77. // @match *://www.qwant.com/?q=*
  78. // @grant none
  79. // @run-at document-end
  80. // @license GPL-2.0-only
  81. // ==/UserScript==
  82.  
  83. // TODO: display a "goto original link" button.
  84.  
  85. (function() {
  86. 'use strict';
  87.  
  88. // Define URL modification rules with precompiled regex
  89. const urlModificationRules = [
  90. {
  91. matchRegex: new RegExp(/^https?:\/\/www\.reddit\.com(.*)/),
  92. replaceWith: 'https://old.reddit.com$1'
  93. },
  94. {
  95. matchRegex: new RegExp(/^https?:\/\/twitter\.com\/([A-Za-z_][\w]+)(\/status\/(\d+))?.*/),
  96. replaceWith: 'https://nitter.net/$1$2'
  97. },
  98. {
  99. matchRegex: new RegExp(/^https?:\/\/(?:www\.)?youtube\.com\/(@[\w-]+|watch\?v=[\w-]+|playlist\?list=[\w-]+)/),
  100. replaceWith: 'https://yewtu.be/$1'
  101. // replaceWith: 'https://piped.video/$1'
  102. },
  103. // {
  104. // matchRegex: new RegExp(/^https?:\/\/stackoverflow\.com(\/questions\/\d+\/.*)/),
  105. // replaceWith: 'https://code.whatever.social$1'
  106. // },
  107. {
  108. matchRegex: new RegExp(/^https?:\/\/(?:en\.?m?|simple)\.wikipedia\.org\/wiki\/(?!Special:Search)(.*)/),
  109. replaceWith: 'https://www.wikiwand.com/en/$1'
  110. },
  111. {
  112. matchRegex: new RegExp(/^https?:\/\/zh\.?m?\.wikipedia\.org\/(?:zh-hans|wiki)\/(.*)/),
  113. replaceWith: 'https://www.wikiwand.com/zh-hans/$1'
  114. },
  115. {
  116. matchRegex: new RegExp(/^https?:\/\/((?!test)[a-z]+)\.?m?\.wikipedia\.org\/(?:[a-z]+|wiki)\/(.*)/),
  117. replaceWith: 'https://www.wikiwand.com/$1/$2'
  118. },
  119. {
  120. matchRegex: new RegExp(/^https?:\/\/((?:(?:\w+\.)?medium|towardsdatascience)\.com\/.*)/),
  121. replaceWith: 'https://freedium.cfd/https://$1'
  122. },
  123. {
  124. matchRegex: new RegExp(/^https?:\/\/imgur\.com\/(a\/)?((?!gallery)\w+)/),
  125. replaceWith: 'https://rimgo.totaldarkness.net/a/$1$2'
  126. },
  127. {
  128. matchRegex: new RegExp(/^https?:\/\/www\.npr\.org\/(?:\d{4}\/\d{2}\/\d{2}|sections)\/(?:[A-Za-z-]+\/\d{4}\/\d{2}\/\d{2}\/)?(\d+)\/.*/),
  129. replaceWith: 'https://text.npr.org/$1'
  130. },
  131. {
  132. matchRegex: new RegExp(/^https?:\/\/(?:m|www)\.imdb\.com(.*)/),
  133. replaceWith: 'https://ld.vern.cc$1'
  134. },
  135. {
  136. matchRegex: new RegExp(/^https?:\/\/(?:[a-z]+)\.slashdot\.org(.*)/),
  137. replaceWith: 'https://slashdot.org$1'
  138. },
  139. {
  140. matchRegex: new RegExp(/^https?:\/\/(?:(?:.*)arxiv\.org\/pdf|arxiv-export-lb\.library\.cornell\.edu\/(?:pdf|abs))\/(\d{4}\.\d{4,5}(v\d)?)(?:.*)/),
  141. replaceWith: 'https://arxiv.org/abs/$1'
  142. },
  143. {
  144. matchRegex: new RegExp(/^https?:\/\/(ieeexplore\.ieee\.org\/document\/\d+)\//),
  145. replaceWith: 'https://$1'
  146. },
  147. {
  148. matchRegex: new RegExp(/^https?:\/\/github\.ink\/(.*)/),
  149. replaceWith: 'https://github.com/$1'
  150. }
  151. // Add more rules here as needed
  152. ];
  153.  
  154. // Define enhanced selector rules for each search engine
  155. const selectorRules = {
  156. 'google': [
  157. {
  158. selector: 'div.yuRUbf div span a',
  159. childSelector: 'div.byrV5b cite',
  160. updateChildText: true,
  161. useTopLevelDomain: true, // Flag for using top-level domain
  162. containProtocol: true,
  163. displayMethod: 1
  164. },
  165. {
  166. hasSubResults: true, // Indicating the search engine can have sub-results
  167. subResultSelector: 'table tr h3 a' // Selector for sub-results
  168. }
  169. // ... [Other rules for Google]
  170. ],
  171. 'searx': [
  172. {
  173. selector: 'article a.url_wrapper',
  174. childSelector: 'span span',
  175. updateChildText: true,
  176. useTopLevelDomain: true,
  177. containProtocol: true,
  178. displayMethod: 1,
  179. multiElementsForUrlDisplay: true
  180. },
  181. {
  182. selector: 'h3 a'
  183. }
  184. ],
  185. 'startpage': [
  186. {
  187. selector: 'a.w-gl__result-url.result-link',
  188. updateText: true,
  189. displayMethod: 2
  190. },
  191. {
  192. selector: 'a.w-gl__result-title.result-link'
  193. }
  194. ],
  195. 'brave': [
  196. {
  197. selector: 'a.h.svelte-1dihpoi',
  198. childSelector: 'cite.snippet-url.svelte-1ygzem6 span',
  199. updateChildText: true,
  200. containProtocol: false,
  201. displayMethod: 1,
  202. multiElementsForUrlDisplay: true
  203. }
  204. ],
  205. 'duckduckgo': [
  206. {
  207. selector: 'a.eVNpHGjtxRBq_gLOfGDr.LQNqh2U1kzYxREs65IJu'
  208. },
  209. {
  210. selector: 'a.Rn_JXVtoPVAFyGkcaXyK',
  211. childSelector: 'span',
  212. updateChildText: true,
  213. containProtocol: true,
  214. displayMethod: 1,
  215. multiElementsForUrlDisplay: true
  216. },
  217. {
  218. hasSubResults: true, // Indicating Google has sub-results
  219. subResultSelector: 'ul.b269SZlC2oyR13Fcc4Iy li a.f3uDrYrWF3Exrfp1m3Og' // Selector for sub-results
  220. }
  221. ],
  222. 'qwant': [
  223. {
  224. selector: 'div._35zId._3A7p7.RMB_d.eoseI a.external'
  225. },
  226. {
  227. selector: 'div._35zId._3WA-c a.external',
  228. childSelector: 'span',
  229. updateChildText: true,
  230. containProtocol: false,
  231. displayMethod: 1,
  232. multiElementsForUrlDisplay: true
  233. },
  234. {
  235. hasSubResults: true, // Indicating Google has sub-results
  236. subResultSelector: 'div._12BMd div._2-LMx._2E8gc._16lFV.Ks7KS.tCpbb.m_hqb a.external' // Selector for sub-results
  237. }
  238. ],
  239. 'metager': [
  240. {
  241. selector: 'h2.result-title a'
  242. },
  243. {
  244. selector: 'div.result-subheadline a',
  245. updateText: true,
  246. containProtocol: false,
  247. displayMethod: 3
  248. }
  249. ],
  250. 'mojeek': [
  251. {
  252. selector: 'li a.ob',
  253. childSelector: 'span.url',
  254. updateChildText: true,
  255. useTopLevelDomain: true,
  256. containProtocol: true,
  257. displayMethod: 1
  258. }
  259. // ... [Other rules for Mojeek]
  260. ]
  261. // Additional search engines can be defined here...
  262. };
  263.  
  264. // User-defined list of search engine instance URLs
  265. const searchEngines = {
  266. 'google': {
  267. hosts: ['www.google.com'],
  268. // search results container
  269. // you can ignore this parameter if you don't want to set it, just delete it
  270. // defult value is 'body'
  271. resultContainerSelectors: ['div.GyAeWb#rcnt']
  272. },
  273. 'searx': {
  274. hosts: [
  275. 'search.disroot.org',
  276. 'search.bus-hit.me',
  277. 'search.inetol.net',
  278. 'priv.au',
  279. 'searx.be',
  280. 'searxng.site',
  281. 'search.hbubli.cc',
  282. 'search.im-in.space',
  283. 'opnxng.com',
  284. 'search.upinmars.com',
  285. 'search.sapti.me',
  286. 'freesearch.club',
  287. 'xo.wtf',
  288. 'www.gruble.de',
  289. 'searx.tuxcloud.net',
  290. 'baresearch.org',
  291. 'searx.daetalytica.io',
  292. 'etsi.me',
  293. 'search.leptons.xyz',
  294. 'search.rowie.at',
  295. 'search.mdosch.de',
  296. 'searx.catfluori.de',
  297. 'searx.si',
  298. 'searx.namejeff.xyz',
  299. 'search.itstechtime.com',
  300. 's.mble.dk',
  301. 'searx.kutay.dev',
  302. 'ooglester.com',
  303. 'searx.ox2.fr',
  304. 'searx.techsaviours.org',
  305. 'searx.perennialte.ch',
  306. 's.trung.fun',
  307. 'search.in.projectsegfau.lt',
  308. 'search.projectsegfau.lt',
  309. 'darmarit.org',
  310. 'searx.lunar.icu',
  311. 'nyc1.sx.ggtyler.dev',
  312. 'search.rhscz.eu',
  313. 'paulgo.io',
  314. 'northboot.xyz',
  315. 'searx.zhenyapav.com',
  316. 'searxng.ch',
  317. 'copp.gg',
  318. 'searx.sev.monster',
  319. 'searx.oakleycord.dev',
  320. 'searx.juancord.xyz',
  321. 'searx.work',
  322. 'search.ononoki.org',
  323. 'search.demoniak.ch',
  324. 'searx.cthd.icu',
  325. 'searx.fmhy.net',
  326. 'searx.headpat.exchange',
  327. 'sex.finaltek.net',
  328. 'search.gcomm.ch',
  329. 'search.smnz.de',
  330. 'searx.ankha.ac',
  331. 'search.lvkaszus.pl',
  332. 'searx.nobulart.com',
  333. 'sx.t-1.org',
  334. 'www.jabber-germany.de',
  335. 'sx.catgirl.cloud'
  336. ],
  337. resultContainerSelectors: [
  338. 'main#main_results'
  339. // 'maindiv#main_results div#urls'
  340. // 'div#sidebar div#infoboxes'
  341. ]
  342. },
  343. 'startpage': {
  344. hosts: ['www.startpage.com'],
  345. resultContainerSelectors: [
  346. 'div.show-results'
  347. // 'div.sidebar-results'
  348. ]
  349. },
  350. 'brave': {
  351. hosts: ['search.brave.com'],
  352. resultContainerSelectors: [
  353. 'main.main-column'
  354. // 'aside.sidebar'
  355. ]
  356. },
  357. 'duckduckgo': {
  358. hosts: ['duckduckgo.com'],
  359. resultContainerSelectors: [
  360. 'section[data-testid="mainline"][data-area="mainline"]'
  361. // 'section[data-testid="sidebar"][data-area="sidebar"]'
  362. ]
  363. },
  364. 'qwant': {
  365. hosts: ['qwant.com'],
  366. resultContainerSelectors: [
  367. 'div._35zId'
  368. ]
  369. },
  370. 'metager': {
  371. hosts: [
  372. 'metager.org',
  373. 'metager.de'
  374. ],
  375. resultContainerSelectors: ['div#results']
  376. },
  377. 'mojeek': {
  378. hosts: ['mojeek.com']
  379. }
  380. // ... more search engines
  381. };
  382.  
  383. // Function to modify URLs and optionally text
  384. const modifyUrls = (engine) => {
  385. try {
  386. const selectors = selectorRules[engine];
  387. if (selectors) {
  388. selectors.forEach(rule => {
  389. // Modify main results
  390. processElements(rule.selector, rule, engine);
  391.  
  392. // Modify sub-results if applicable
  393. if (rule.hasSubResults && rule.subResultSelector) {
  394. processElements(rule.subResultSelector, rule, engine);
  395. }
  396. });
  397. }
  398. } catch (error) {
  399. console.error("URL Modifier Script Error: ", error);
  400. }
  401. };
  402.  
  403. // Function to process elements based on selector and rule
  404. const processElements = (selector, rule, engine) => {
  405. const elements = document.querySelectorAll(selector);
  406. if (elements.length > 0) {
  407. elements.forEach(element => {
  408. urlModificationRules.forEach(urlRule => {
  409. if (element.href && urlRule.matchRegex.test(element.href)) {
  410. const newHref = element.href.replace(urlRule.matchRegex, urlRule.replaceWith);
  411. element.href = newHref;
  412. updateTextContent(element, rule, newHref);
  413. }
  414. });
  415. });
  416. }
  417. };
  418.  
  419. // Function to update text content
  420. const updateTextContent = (element, rule, newUrl) => {
  421. if (rule.updateText || (rule.updateChildText && rule.childSelector)) {
  422. // Special handling for DuckDuckGo and Brave
  423. if (rule.multiElementsForUrlDisplay) {
  424. updateDoubleElementContent(element, rule, newUrl);
  425. } else {
  426. // General handling for other search engines
  427. const targetElement = rule.childSelector ? element.querySelector(rule.childSelector) : element;
  428. updateSingleElementText(targetElement, rule, newUrl);
  429. }
  430. }
  431. };
  432.  
  433. // Function to clear existing content of an element
  434. const clearElementContent = (element) => {
  435. if (element) {
  436. element.textContent = '';
  437. }
  438. };
  439.  
  440. // Function to update text for multi elements (i.e. DuckDuckGo, Brave)
  441. const updateDoubleElementContent = (element, rule, newUrl) => {
  442. // Remove the "https://" protocol if containProtocol is false
  443. newUrl = rule.containProtocol ? newUrl : removeProtocol(newUrl);
  444.  
  445. let formattedUrl = formatMethod1(newUrl, 70); // Assume max length 70 for splitting
  446. let urlParts = formattedUrl.split(' › ');
  447.  
  448. // Correctly select the first and second <span> elements
  449. let spans = element.querySelectorAll(rule.childSelector);
  450.  
  451. if (spans && spans.length >= 2) {
  452. spans.forEach(clearElementContent);
  453. spans[0].textContent = urlParts[0]; // Update the first part
  454. spans[1].textContent = ' › ' + urlParts.slice(1).join(' › '); // Update the second part
  455. } else {
  456. console.error("Script: Expected structure not found for Double Element URL update!");
  457. }
  458. };
  459.  
  460. // Function to update text for a single element
  461. const updateSingleElementText = (targetElement, rule, newUrl) => {
  462. if (targetElement) {
  463. clearElementContent(targetElement);
  464. let formattedUrl = '';
  465. switch (rule.displayMethod) {
  466. case 1:
  467. formattedUrl = formatMethod1(newUrl, rule.maxLength);
  468. break;
  469. case 2:
  470. formattedUrl = newUrl; // Full URL with protocol
  471. break;
  472. case 3:
  473. formattedUrl = decodeURIComponent(removeProtocol(newUrl)); // Full URL without protocol
  474. break;
  475. }
  476. targetElement.textContent = formattedUrl;
  477. } else {
  478. console.error("Script: Expected element not found for Single Element URL update!");
  479. }
  480. };
  481.  
  482. // Function for Method 1 (Breadcrumb Style URLs), leaving 'https://' intact
  483. const formatMethod1 = (url, maxLength) => {
  484. // Split the URL while keeping 'https://' intact
  485. let parts = url.replace('https://', 'https›').split('/');
  486. parts[0] = parts[0].replace('https›', 'https://'); // Restore 'https://'
  487.  
  488. // Join the URL parts with ' › ' and check if it exceeds maxLength
  489. let joinedUrl = parts.join(' › ');
  490. if (joinedUrl.length > maxLength) {
  491. // Apply truncation based on maxLength
  492. let truncatedUrl = joinedUrl.slice(0, maxLength - 3); // Reserve space for '...'
  493. truncatedUrl += '...';
  494. joinedUrl = truncatedUrl;
  495. }
  496.  
  497. // Decode the URL to convert encoded characters to their original form
  498. return decodeURIComponent(joinedUrl);
  499. };
  500.  
  501. const removeProtocol = (url) => {
  502. return url.replace(/^https?:\/\//, '');
  503. };
  504.  
  505. // Improved function to determine the search engine
  506. const getSearchEngineInfo = () => {
  507. try {
  508. const host = window.location.host;
  509. for (const engine in searchEngines) {
  510. if (searchEngines[engine].hosts.some(instanceHost => host.includes(instanceHost))) {
  511. const selectors = searchEngines[engine].resultContainerSelectors || ['body']; // Default to 'body' if not specified
  512. return {
  513. engine,
  514. selectors: selectors
  515. };
  516. }
  517. }
  518. } catch (error) {
  519. console.error("Error determining search engine: ", error);
  520. }
  521. };
  522.  
  523. const observeToExecute = (engine, selector) => {
  524. const resultContainers = document.querySelectorAll(selector);
  525. if (resultContainers) {
  526. resultContainers.forEach(resultContainer => {
  527. modifyUrls(engine.engine);
  528. // Observe changes in each result container
  529. const observer = new MutationObserver(() => modifyUrls(engine));
  530. observer.observe(resultContainer, { childList: true, subtree: true });
  531. });
  532. }
  533. };
  534.  
  535. // Run the script for the current search engine
  536. try {
  537. const engineInfo = getSearchEngineInfo();
  538. if (engineInfo) {
  539. engineInfo.selectors.forEach(containerSelector => {
  540. observeToExecute(engineInfo.engine, containerSelector);
  541. });
  542. }
  543. } catch (error) {
  544. console.error("Error executing URL Modifier Script: ", error);
  545. }
  546. })();