Google Search Subdomain Extractor

Extracts unique subdomains from Google search result <cite> tags, logs beautifully, and sends to a local Python server. / 从 Google 搜索结果提取子域名,并发送到本地 Python 服务器。

คุณจะต้องติดตั้งส่วนขยาย เช่น Tampermonkey, Greasemonkey หรือ Violentmonkey เพื่อติดตั้งสคริปต์นี้

You will need to install an extension such as Tampermonkey to install this script.

คุณจะต้องติดตั้งส่วนขยาย เช่น Tampermonkey หรือ Violentmonkey เพื่อติดตั้งสคริปต์นี้

You will need to install an extension such as Tampermonkey or Userscripts to install this script.

You will need to install an extension such as Tampermonkey to install this script.

You will need to install a user script manager extension to install this script.

(I already have a user script manager, let me install it!)

You will need to install an extension such as Stylus to install this style.

You will need to install an extension such as Stylus to install this style.

You will need to install an extension such as Stylus to install this style.

You will need to install a user style manager extension to install this style.

You will need to install a user style manager extension to install this style.

You will need to install a user style manager extension to install this style.

(I already have a user style manager, let me install it!)

// ==UserScript==
// @name         Google Search Subdomain Extractor
// @namespace    http://tampermonkey.net/
// @version      0.4.3
// @description  Extracts unique subdomains from Google search result <cite> tags, logs beautifully, and sends to a local Python server. / 从 Google 搜索结果提取子域名,并发送到本地 Python 服务器。
// @author       特让他也让
// @match        https://*.google.com/search*
// @connect      127.0.0.1
// @icon         https://www.google.com/favicon.ico
// @connect      localhost
// @grant        GM_xmlhttpRequest
// @grant        GM_log
// @run-at       document-idle
// @license      GPL-3.0
// ==/UserScript==

/*
 * Google Search Subdomain Extractor
 * Copyright (C) 2025 特让他也让
 *
 * This program is free software: you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program.  If not, see <https://www.gnu.org/licenses/>.
 */

(function () {
  "use strict";

  const DEBOUNCE_DELAY = 700;
  const PYTHON_SERVER_URL = "http://127.0.0.1:5123/save_subdomains";
  const SCRIPT_PREFIX = "[Subdomain Extractor]";
  const STYLE_TITLE = "color: #1a73e8; font-weight: bold; font-size: 1.1em;";
  const STYLE_COUNT = "color: #1e8e3e; font-weight: bold;";
  const STYLE_INFO = "color: #5f6368;";
  const STYLE_HOSTNAME = "color: #202124;";
  const STYLE_SERVER_OK = "color: #1e8e3e;";
  const STYLE_SERVER_ERR = "color: #d93025; font-weight: bold;";

  let extractionTimeoutId = null;
  let serverSendTimeoutId = null;
  let foundHostnames = new Set();

  function sendHostnamesToServer(hostnamesArray) {
    if (hostnamesArray.length === 0) return;

    GM_log(
      `%c${SCRIPT_PREFIX} Attempting to send ${hostnamesArray.length} hostnames to server...`,
      STYLE_INFO
    );

    GM_xmlhttpRequest({
      method: "POST",
      url: PYTHON_SERVER_URL,
      headers: { "Content-Type": "application/json" },
      data: JSON.stringify({ hostnames: hostnamesArray }),
      timeout: 5000,
      onload: function (response) {
        try {
          const result = JSON.parse(response.responseText);
          if (response.status === 200 && result.status === "success") {
            console.log(
              `%c${SCRIPT_PREFIX} Server Response: OK - Received ${result.received}, Added ${result.newly_added} new, Total ${result.total_saved}`,
              STYLE_SERVER_OK
            );
          } else {
            console.error(
              `%c${SCRIPT_PREFIX} Server Error: ${
                result.message || response.statusText
              }`,
              STYLE_SERVER_ERR,
              response
            );
          }
        } catch (e) {
          console.error(
            `%c${SCRIPT_PREFIX} Failed to parse server response:`,
            STYLE_SERVER_ERR,
            response.responseText,
            e
          );
        }
      },
      onerror: function (response) {
        console.error(
          `%c${SCRIPT_PREFIX} Network Error: Could not connect to server at ${PYTHON_SERVER_URL}. Is it running?`,
          STYLE_SERVER_ERR,
          response
        );
      },
      ontimeout: function () {
        console.error(
          `%c${SCRIPT_PREFIX} Timeout: No response from server at ${PYTHON_SERVER_URL}.`,
          STYLE_SERVER_ERR
        );
      },
    });
  }

  function extractAndLogSubdomains() {
    console.log(`%c${SCRIPT_PREFIX} Running extraction...`, STYLE_INFO);
    const citeElements = document.querySelectorAll("cite");
    const initialSize = foundHostnames.size;

    citeElements.forEach((cite) => {
      const urlText = cite.textContent.trim();
      if (!urlText) return;

      let potentialUrl = urlText.split(" › ")[0].split(" ...")[0].trim();

      try {
        let urlObject;
        if (!potentialUrl.startsWith("http")) {
          if (potentialUrl.includes(".")) {
            potentialUrl = "https://" + potentialUrl;
          } else return;
        }
        urlObject = new URL(potentialUrl);
        const hostname = urlObject.hostname.toLowerCase();
        if (hostname) {
          foundHostnames.add(hostname);
        }
      } catch (e) {}
    });

    const newlyFoundCount = foundHostnames.size - initialSize;

    console.groupCollapsed(
      `%c${SCRIPT_PREFIX} Extraction Complete`,
      STYLE_TITLE
    );
    if (newlyFoundCount > 0)
      console.log(
        `%cFound ${newlyFoundCount} new unique hostnames this pass.`,
        STYLE_INFO
      );
    else if (foundHostnames.size > 0)
      console.log(`%cNo new unique hostnames found this pass.`, STYLE_INFO);

    if (foundHostnames.size > 0) {
      console.log(
        `%cTotal unique hostnames found (client-side): ${foundHostnames.size}`,
        STYLE_COUNT
      );
      console.log("--------------------");
      const sortedHostnames = Array.from(foundHostnames).sort();
      sortedHostnames.forEach((hostname) =>
        console.log(`%c  ${hostname}`, STYLE_HOSTNAME)
      );
      console.log("--------------------");

      clearTimeout(serverSendTimeoutId);
      serverSendTimeoutId = setTimeout(() => {
        sendHostnamesToServer(sortedHostnames);
      }, 200);
    } else {
      console.log(`%cNo hostnames found yet.`, STYLE_INFO);
    }
    console.groupEnd();
  }

  function debounceExtract() {
    clearTimeout(extractionTimeoutId);
    extractionTimeoutId = setTimeout(extractAndLogSubdomains, DEBOUNCE_DELAY);
  }

  const targetNode = document.body;
  if (targetNode) {
    const observer = new MutationObserver(debounceExtract);
    observer.observe(targetNode, { childList: true, subtree: true });
    console.log(
      `%c${SCRIPT_PREFIX} Initialized. Watching for page changes. Ready to send data to ${PYTHON_SERVER_URL}`,
      STYLE_INFO
    );
  } else {
    console.warn(
      `%c${SCRIPT_PREFIX} Could not find target node for MutationObserver. Dynamic updates might not trigger extraction.`,
      "color: orange;"
    );
  }

  setTimeout(extractAndLogSubdomains, 500);

  function GM_log(message, ...styles) {
    console.log(message, ...styles);
  }
})();