Google Search Subdomain Extractor

Extracts unique subdomains from Google search result <cite> tags, logs beautifully, and sends to a local Python server. / 从 Google 搜索结果提取子域名,并发送到本地 Python 服务器。

  1. // ==UserScript==
  2. // @name Google Search Subdomain Extractor
  3. // @namespace http://tampermonkey.net/
  4. // @version 0.4.3
  5. // @description Extracts unique subdomains from Google search result <cite> tags, logs beautifully, and sends to a local Python server. / 从 Google 搜索结果提取子域名,并发送到本地 Python 服务器。
  6. // @author 特让他也让
  7. // @match https://*.google.com/search*
  8. // @connect 127.0.0.1
  9. // @icon https://www.google.com/favicon.ico
  10. // @connect localhost
  11. // @grant GM_xmlhttpRequest
  12. // @grant GM_log
  13. // @run-at document-idle
  14. // @license GPL-3.0
  15. // ==/UserScript==
  16.  
  17. /*
  18. * Google Search Subdomain Extractor
  19. * Copyright (C) 2025 特让他也让
  20. *
  21. * This program is free software: you can redistribute it and/or modify
  22. * it under the terms of the GNU General Public License as published by
  23. * the Free Software Foundation, either version 3 of the License, or
  24. * (at your option) any later version.
  25. *
  26. * This program is distributed in the hope that it will be useful,
  27. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  28. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  29. * GNU General Public License for more details.
  30. *
  31. * You should have received a copy of the GNU General Public License
  32. * along with this program. If not, see <https://www.gnu.org/licenses/>.
  33. */
  34.  
  35. (function () {
  36. "use strict";
  37.  
  38. const DEBOUNCE_DELAY = 700;
  39. const PYTHON_SERVER_URL = "http://127.0.0.1:5123/save_subdomains";
  40. const SCRIPT_PREFIX = "[Subdomain Extractor]";
  41. const STYLE_TITLE = "color: #1a73e8; font-weight: bold; font-size: 1.1em;";
  42. const STYLE_COUNT = "color: #1e8e3e; font-weight: bold;";
  43. const STYLE_INFO = "color: #5f6368;";
  44. const STYLE_HOSTNAME = "color: #202124;";
  45. const STYLE_SERVER_OK = "color: #1e8e3e;";
  46. const STYLE_SERVER_ERR = "color: #d93025; font-weight: bold;";
  47.  
  48. let extractionTimeoutId = null;
  49. let serverSendTimeoutId = null;
  50. let foundHostnames = new Set();
  51.  
  52. function sendHostnamesToServer(hostnamesArray) {
  53. if (hostnamesArray.length === 0) return;
  54.  
  55. GM_log(
  56. `%c${SCRIPT_PREFIX} Attempting to send ${hostnamesArray.length} hostnames to server...`,
  57. STYLE_INFO
  58. );
  59.  
  60. GM_xmlhttpRequest({
  61. method: "POST",
  62. url: PYTHON_SERVER_URL,
  63. headers: { "Content-Type": "application/json" },
  64. data: JSON.stringify({ hostnames: hostnamesArray }),
  65. timeout: 5000,
  66. onload: function (response) {
  67. try {
  68. const result = JSON.parse(response.responseText);
  69. if (response.status === 200 && result.status === "success") {
  70. console.log(
  71. `%c${SCRIPT_PREFIX} Server Response: OK - Received ${result.received}, Added ${result.newly_added} new, Total ${result.total_saved}`,
  72. STYLE_SERVER_OK
  73. );
  74. } else {
  75. console.error(
  76. `%c${SCRIPT_PREFIX} Server Error: ${
  77. result.message || response.statusText
  78. }`,
  79. STYLE_SERVER_ERR,
  80. response
  81. );
  82. }
  83. } catch (e) {
  84. console.error(
  85. `%c${SCRIPT_PREFIX} Failed to parse server response:`,
  86. STYLE_SERVER_ERR,
  87. response.responseText,
  88. e
  89. );
  90. }
  91. },
  92. onerror: function (response) {
  93. console.error(
  94. `%c${SCRIPT_PREFIX} Network Error: Could not connect to server at ${PYTHON_SERVER_URL}. Is it running?`,
  95. STYLE_SERVER_ERR,
  96. response
  97. );
  98. },
  99. ontimeout: function () {
  100. console.error(
  101. `%c${SCRIPT_PREFIX} Timeout: No response from server at ${PYTHON_SERVER_URL}.`,
  102. STYLE_SERVER_ERR
  103. );
  104. },
  105. });
  106. }
  107.  
  108. function extractAndLogSubdomains() {
  109. console.log(`%c${SCRIPT_PREFIX} Running extraction...`, STYLE_INFO);
  110. const citeElements = document.querySelectorAll("cite");
  111. const initialSize = foundHostnames.size;
  112.  
  113. citeElements.forEach((cite) => {
  114. const urlText = cite.textContent.trim();
  115. if (!urlText) return;
  116.  
  117. let potentialUrl = urlText.split(" › ")[0].split(" ...")[0].trim();
  118.  
  119. try {
  120. let urlObject;
  121. if (!potentialUrl.startsWith("http")) {
  122. if (potentialUrl.includes(".")) {
  123. potentialUrl = "https://" + potentialUrl;
  124. } else return;
  125. }
  126. urlObject = new URL(potentialUrl);
  127. const hostname = urlObject.hostname.toLowerCase();
  128. if (hostname) {
  129. foundHostnames.add(hostname);
  130. }
  131. } catch (e) {}
  132. });
  133.  
  134. const newlyFoundCount = foundHostnames.size - initialSize;
  135.  
  136. console.groupCollapsed(
  137. `%c${SCRIPT_PREFIX} Extraction Complete`,
  138. STYLE_TITLE
  139. );
  140. if (newlyFoundCount > 0)
  141. console.log(
  142. `%cFound ${newlyFoundCount} new unique hostnames this pass.`,
  143. STYLE_INFO
  144. );
  145. else if (foundHostnames.size > 0)
  146. console.log(`%cNo new unique hostnames found this pass.`, STYLE_INFO);
  147.  
  148. if (foundHostnames.size > 0) {
  149. console.log(
  150. `%cTotal unique hostnames found (client-side): ${foundHostnames.size}`,
  151. STYLE_COUNT
  152. );
  153. console.log("--------------------");
  154. const sortedHostnames = Array.from(foundHostnames).sort();
  155. sortedHostnames.forEach((hostname) =>
  156. console.log(`%c ${hostname}`, STYLE_HOSTNAME)
  157. );
  158. console.log("--------------------");
  159.  
  160. clearTimeout(serverSendTimeoutId);
  161. serverSendTimeoutId = setTimeout(() => {
  162. sendHostnamesToServer(sortedHostnames);
  163. }, 200);
  164. } else {
  165. console.log(`%cNo hostnames found yet.`, STYLE_INFO);
  166. }
  167. console.groupEnd();
  168. }
  169.  
  170. function debounceExtract() {
  171. clearTimeout(extractionTimeoutId);
  172. extractionTimeoutId = setTimeout(extractAndLogSubdomains, DEBOUNCE_DELAY);
  173. }
  174.  
  175. const targetNode = document.body;
  176. if (targetNode) {
  177. const observer = new MutationObserver(debounceExtract);
  178. observer.observe(targetNode, { childList: true, subtree: true });
  179. console.log(
  180. `%c${SCRIPT_PREFIX} Initialized. Watching for page changes. Ready to send data to ${PYTHON_SERVER_URL}`,
  181. STYLE_INFO
  182. );
  183. } else {
  184. console.warn(
  185. `%c${SCRIPT_PREFIX} Could not find target node for MutationObserver. Dynamic updates might not trigger extraction.`,
  186. "color: orange;"
  187. );
  188. }
  189.  
  190. setTimeout(extractAndLogSubdomains, 500);
  191.  
  192. function GM_log(message, ...styles) {
  193. console.log(message, ...styles);
  194. }
  195. })();