Google Search Subdomain Extractor

Extracts unique subdomains from Google search result <cite> tags, logs beautifully, and sends to a local Python server. / 从 Google 搜索结果提取子域名,并发送到本地 Python 服务器。

您需要先安裝使用者腳本管理器擴展,如 TampermonkeyGreasemonkeyViolentmonkey 之後才能安裝該腳本。

You will need to install an extension such as Tampermonkey to install this script.

您需要先安裝使用者腳本管理器擴充功能,如 TampermonkeyViolentmonkey 後才能安裝該腳本。

您需要先安裝使用者腳本管理器擴充功能,如 TampermonkeyUserscripts 後才能安裝該腳本。

你需要先安裝一款使用者腳本管理器擴展,比如 Tampermonkey,才能安裝此腳本

您需要先安裝使用者腳本管理器擴充功能後才能安裝該腳本。

(我已經安裝了使用者腳本管理器,讓我安裝!)

你需要先安裝一款使用者樣式管理器擴展,比如 Stylus,才能安裝此樣式

你需要先安裝一款使用者樣式管理器擴展,比如 Stylus,才能安裝此樣式

你需要先安裝一款使用者樣式管理器擴展,比如 Stylus,才能安裝此樣式

你需要先安裝一款使用者樣式管理器擴展後才能安裝此樣式

你需要先安裝一款使用者樣式管理器擴展後才能安裝此樣式

你需要先安裝一款使用者樣式管理器擴展後才能安裝此樣式

(我已經安裝了使用者樣式管理器,讓我安裝!)

// ==UserScript==
// @name         Google Search Subdomain Extractor
// @namespace    http://tampermonkey.net/
// @version      0.4.3
// @description  Extracts unique subdomains from Google search result <cite> tags, logs beautifully, and sends to a local Python server. / 从 Google 搜索结果提取子域名,并发送到本地 Python 服务器。
// @author       特让他也让
// @match        https://*.google.com/search*
// @connect      127.0.0.1
// @icon         https://www.google.com/favicon.ico
// @connect      localhost
// @grant        GM_xmlhttpRequest
// @grant        GM_log
// @run-at       document-idle
// @license      GPL-3.0
// ==/UserScript==

/*
 * Google Search Subdomain Extractor
 * Copyright (C) 2025 特让他也让
 *
 * This program is free software: you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program.  If not, see <https://www.gnu.org/licenses/>.
 */

(function () {
  "use strict";

  const DEBOUNCE_DELAY = 700;
  const PYTHON_SERVER_URL = "http://127.0.0.1:5123/save_subdomains";
  const SCRIPT_PREFIX = "[Subdomain Extractor]";
  const STYLE_TITLE = "color: #1a73e8; font-weight: bold; font-size: 1.1em;";
  const STYLE_COUNT = "color: #1e8e3e; font-weight: bold;";
  const STYLE_INFO = "color: #5f6368;";
  const STYLE_HOSTNAME = "color: #202124;";
  const STYLE_SERVER_OK = "color: #1e8e3e;";
  const STYLE_SERVER_ERR = "color: #d93025; font-weight: bold;";

  let extractionTimeoutId = null;
  let serverSendTimeoutId = null;
  let foundHostnames = new Set();

  function sendHostnamesToServer(hostnamesArray) {
    if (hostnamesArray.length === 0) return;

    GM_log(
      `%c${SCRIPT_PREFIX} Attempting to send ${hostnamesArray.length} hostnames to server...`,
      STYLE_INFO
    );

    GM_xmlhttpRequest({
      method: "POST",
      url: PYTHON_SERVER_URL,
      headers: { "Content-Type": "application/json" },
      data: JSON.stringify({ hostnames: hostnamesArray }),
      timeout: 5000,
      onload: function (response) {
        try {
          const result = JSON.parse(response.responseText);
          if (response.status === 200 && result.status === "success") {
            console.log(
              `%c${SCRIPT_PREFIX} Server Response: OK - Received ${result.received}, Added ${result.newly_added} new, Total ${result.total_saved}`,
              STYLE_SERVER_OK
            );
          } else {
            console.error(
              `%c${SCRIPT_PREFIX} Server Error: ${
                result.message || response.statusText
              }`,
              STYLE_SERVER_ERR,
              response
            );
          }
        } catch (e) {
          console.error(
            `%c${SCRIPT_PREFIX} Failed to parse server response:`,
            STYLE_SERVER_ERR,
            response.responseText,
            e
          );
        }
      },
      onerror: function (response) {
        console.error(
          `%c${SCRIPT_PREFIX} Network Error: Could not connect to server at ${PYTHON_SERVER_URL}. Is it running?`,
          STYLE_SERVER_ERR,
          response
        );
      },
      ontimeout: function () {
        console.error(
          `%c${SCRIPT_PREFIX} Timeout: No response from server at ${PYTHON_SERVER_URL}.`,
          STYLE_SERVER_ERR
        );
      },
    });
  }

  function extractAndLogSubdomains() {
    console.log(`%c${SCRIPT_PREFIX} Running extraction...`, STYLE_INFO);
    const citeElements = document.querySelectorAll("cite");
    const initialSize = foundHostnames.size;

    citeElements.forEach((cite) => {
      const urlText = cite.textContent.trim();
      if (!urlText) return;

      let potentialUrl = urlText.split(" › ")[0].split(" ...")[0].trim();

      try {
        let urlObject;
        if (!potentialUrl.startsWith("http")) {
          if (potentialUrl.includes(".")) {
            potentialUrl = "https://" + potentialUrl;
          } else return;
        }
        urlObject = new URL(potentialUrl);
        const hostname = urlObject.hostname.toLowerCase();
        if (hostname) {
          foundHostnames.add(hostname);
        }
      } catch (e) {}
    });

    const newlyFoundCount = foundHostnames.size - initialSize;

    console.groupCollapsed(
      `%c${SCRIPT_PREFIX} Extraction Complete`,
      STYLE_TITLE
    );
    if (newlyFoundCount > 0)
      console.log(
        `%cFound ${newlyFoundCount} new unique hostnames this pass.`,
        STYLE_INFO
      );
    else if (foundHostnames.size > 0)
      console.log(`%cNo new unique hostnames found this pass.`, STYLE_INFO);

    if (foundHostnames.size > 0) {
      console.log(
        `%cTotal unique hostnames found (client-side): ${foundHostnames.size}`,
        STYLE_COUNT
      );
      console.log("--------------------");
      const sortedHostnames = Array.from(foundHostnames).sort();
      sortedHostnames.forEach((hostname) =>
        console.log(`%c  ${hostname}`, STYLE_HOSTNAME)
      );
      console.log("--------------------");

      clearTimeout(serverSendTimeoutId);
      serverSendTimeoutId = setTimeout(() => {
        sendHostnamesToServer(sortedHostnames);
      }, 200);
    } else {
      console.log(`%cNo hostnames found yet.`, STYLE_INFO);
    }
    console.groupEnd();
  }

  function debounceExtract() {
    clearTimeout(extractionTimeoutId);
    extractionTimeoutId = setTimeout(extractAndLogSubdomains, DEBOUNCE_DELAY);
  }

  const targetNode = document.body;
  if (targetNode) {
    const observer = new MutationObserver(debounceExtract);
    observer.observe(targetNode, { childList: true, subtree: true });
    console.log(
      `%c${SCRIPT_PREFIX} Initialized. Watching for page changes. Ready to send data to ${PYTHON_SERVER_URL}`,
      STYLE_INFO
    );
  } else {
    console.warn(
      `%c${SCRIPT_PREFIX} Could not find target node for MutationObserver. Dynamic updates might not trigger extraction.`,
      "color: orange;"
    );
  }

  setTimeout(extractAndLogSubdomains, 500);

  function GM_log(message, ...styles) {
    console.log(message, ...styles);
  }
})();