// ==UserScript==
// @name Google Search Subdomain Extractor
// @namespace http://tampermonkey.net/
// @version 0.4.3
// @description Extracts unique subdomains from Google search result <cite> tags, logs beautifully, and sends to a local Python server. / 从 Google 搜索结果提取子域名,并发送到本地 Python 服务器。
// @author 特让他也让
// @match https://*.google.com/search*
// @connect 127.0.0.1
// @icon https://www.google.com/favicon.ico
// @connect localhost
// @grant GM_xmlhttpRequest
// @grant GM_log
// @run-at document-idle
// @license GPL-3.0
// ==/UserScript==
/*
* Google Search Subdomain Extractor
* Copyright (C) 2025 特让他也让
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <https://www.gnu.org/licenses/>.
*/
(function () {
"use strict";
const DEBOUNCE_DELAY = 700;
const PYTHON_SERVER_URL = "http://127.0.0.1:5123/save_subdomains";
const SCRIPT_PREFIX = "[Subdomain Extractor]";
const STYLE_TITLE = "color: #1a73e8; font-weight: bold; font-size: 1.1em;";
const STYLE_COUNT = "color: #1e8e3e; font-weight: bold;";
const STYLE_INFO = "color: #5f6368;";
const STYLE_HOSTNAME = "color: #202124;";
const STYLE_SERVER_OK = "color: #1e8e3e;";
const STYLE_SERVER_ERR = "color: #d93025; font-weight: bold;";
let extractionTimeoutId = null;
let serverSendTimeoutId = null;
let foundHostnames = new Set();
function sendHostnamesToServer(hostnamesArray) {
if (hostnamesArray.length === 0) return;
GM_log(
`%c${SCRIPT_PREFIX} Attempting to send ${hostnamesArray.length} hostnames to server...`,
STYLE_INFO
);
GM_xmlhttpRequest({
method: "POST",
url: PYTHON_SERVER_URL,
headers: { "Content-Type": "application/json" },
data: JSON.stringify({ hostnames: hostnamesArray }),
timeout: 5000,
onload: function (response) {
try {
const result = JSON.parse(response.responseText);
if (response.status === 200 && result.status === "success") {
console.log(
`%c${SCRIPT_PREFIX} Server Response: OK - Received ${result.received}, Added ${result.newly_added} new, Total ${result.total_saved}`,
STYLE_SERVER_OK
);
} else {
console.error(
`%c${SCRIPT_PREFIX} Server Error: ${
result.message || response.statusText
}`,
STYLE_SERVER_ERR,
response
);
}
} catch (e) {
console.error(
`%c${SCRIPT_PREFIX} Failed to parse server response:`,
STYLE_SERVER_ERR,
response.responseText,
e
);
}
},
onerror: function (response) {
console.error(
`%c${SCRIPT_PREFIX} Network Error: Could not connect to server at ${PYTHON_SERVER_URL}. Is it running?`,
STYLE_SERVER_ERR,
response
);
},
ontimeout: function () {
console.error(
`%c${SCRIPT_PREFIX} Timeout: No response from server at ${PYTHON_SERVER_URL}.`,
STYLE_SERVER_ERR
);
},
});
}
function extractAndLogSubdomains() {
console.log(`%c${SCRIPT_PREFIX} Running extraction...`, STYLE_INFO);
const citeElements = document.querySelectorAll("cite");
const initialSize = foundHostnames.size;
citeElements.forEach((cite) => {
const urlText = cite.textContent.trim();
if (!urlText) return;
let potentialUrl = urlText.split(" › ")[0].split(" ...")[0].trim();
try {
let urlObject;
if (!potentialUrl.startsWith("http")) {
if (potentialUrl.includes(".")) {
potentialUrl = "https://" + potentialUrl;
} else return;
}
urlObject = new URL(potentialUrl);
const hostname = urlObject.hostname.toLowerCase();
if (hostname) {
foundHostnames.add(hostname);
}
} catch (e) {}
});
const newlyFoundCount = foundHostnames.size - initialSize;
console.groupCollapsed(
`%c${SCRIPT_PREFIX} Extraction Complete`,
STYLE_TITLE
);
if (newlyFoundCount > 0)
console.log(
`%cFound ${newlyFoundCount} new unique hostnames this pass.`,
STYLE_INFO
);
else if (foundHostnames.size > 0)
console.log(`%cNo new unique hostnames found this pass.`, STYLE_INFO);
if (foundHostnames.size > 0) {
console.log(
`%cTotal unique hostnames found (client-side): ${foundHostnames.size}`,
STYLE_COUNT
);
console.log("--------------------");
const sortedHostnames = Array.from(foundHostnames).sort();
sortedHostnames.forEach((hostname) =>
console.log(`%c ${hostname}`, STYLE_HOSTNAME)
);
console.log("--------------------");
clearTimeout(serverSendTimeoutId);
serverSendTimeoutId = setTimeout(() => {
sendHostnamesToServer(sortedHostnames);
}, 200);
} else {
console.log(`%cNo hostnames found yet.`, STYLE_INFO);
}
console.groupEnd();
}
function debounceExtract() {
clearTimeout(extractionTimeoutId);
extractionTimeoutId = setTimeout(extractAndLogSubdomains, DEBOUNCE_DELAY);
}
const targetNode = document.body;
if (targetNode) {
const observer = new MutationObserver(debounceExtract);
observer.observe(targetNode, { childList: true, subtree: true });
console.log(
`%c${SCRIPT_PREFIX} Initialized. Watching for page changes. Ready to send data to ${PYTHON_SERVER_URL}`,
STYLE_INFO
);
} else {
console.warn(
`%c${SCRIPT_PREFIX} Could not find target node for MutationObserver. Dynamic updates might not trigger extraction.`,
"color: orange;"
);
}
setTimeout(extractAndLogSubdomains, 500);
function GM_log(message, ...styles) {
console.log(message, ...styles);
}
})();