Greasy Fork is available in English.

Sequential Link Content Finder

Collect links from an index page, visit them sequentially, and save URLs whose page text matches a target string.

이 스크립트를 설치하려면 Tampermonkey, Greasemonkey 또는 Violentmonkey와 같은 확장 프로그램이 필요합니다.

이 스크립트를 설치하려면 Tampermonkey와 같은 확장 프로그램을 설치해야 합니다.

이 스크립트를 설치하려면 Tampermonkey 또는 Violentmonkey와 같은 확장 프로그램이 필요합니다.

이 스크립트를 설치하려면 Tampermonkey 또는 Userscripts와 같은 확장 프로그램이 필요합니다.

이 스크립트를 설치하려면 Tampermonkey와 같은 확장 프로그램이 필요합니다.

이 스크립트를 설치하려면 유저 스크립트 관리자 확장 프로그램이 필요합니다.

(이미 유저 스크립트 관리자가 설치되어 있습니다. 설치를 진행합니다!)

Advertisement:

이 스타일을 설치하려면 Stylus와 같은 확장 프로그램이 필요합니다.

이 스타일을 설치하려면 Stylus와 같은 확장 프로그램이 필요합니다.

이 스타일을 설치하려면 Stylus와 같은 확장 프로그램이 필요합니다.

이 스타일을 설치하려면 유저 스타일 관리자 확장 프로그램이 필요합니다.

이 스타일을 설치하려면 유저 스타일 관리자 확장 프로그램이 필요합니다.

이 스타일을 설치하려면 유저 스타일 관리자 확장 프로그램이 필요합니다.

(이미 유저 스타일 관리자가 설치되어 있습니다. 설치를 진행합니다!)

Advertisement:

// ==UserScript==
// @name         Sequential Link Content Finder
// @namespace    https://example.com/
// @version      1.0.0
// @description  Collect links from an index page, visit them sequentially, and save URLs whose page text matches a target string.
// @match        https://example.com/*
// @grant        GM.getValue
// @grant        GM.setValue
// @grant        GM.deleteValue
// @grant        GM.registerMenuCommand
// @run-at       document-idle
// ==/UserScript==

(async function () {
  "use strict";

  const STATE_STORAGE_KEY = "sequential-link-content-finder-state-v1";
  const CONFIG_STORAGE_KEY = "sequential-link-content-finder-config-v1";

  const DEFAULT_CONFIG = {
    indexUrlPattern: "^https://example\\.com/index",
    linkHrefPattern: "/target-pages/",
    targetText: "Text to search for",
    waitAfterLoadMs: 2000,
    maxUrls: 500,
  };

  function log(...args) {
    console.log("[LinkContentFinder]", ...args);
  }

  function sleep(ms) {
    return new Promise((resolve) => setTimeout(resolve, ms));
  }

  async function loadConfig() {
    const saved = await GM.getValue(CONFIG_STORAGE_KEY);
    return {
      ...DEFAULT_CONFIG,
      ...(saved || {}),
    };
  }

  async function saveConfig(config) {
    await GM.setValue(CONFIG_STORAGE_KEY, config);
  }

  async function resetConfig() {
    await GM.deleteValue(CONFIG_STORAGE_KEY);
  }

  async function loadState() {
    return (await GM.getValue(STATE_STORAGE_KEY)) || null;
  }

  async function saveState(state) {
    await GM.setValue(STATE_STORAGE_KEY, state);
  }

  async function clearState() {
    await GM.deleteValue(STATE_STORAGE_KEY);
  }

  function normalizeUrl(href) {
    try {
      return new URL(href, location.href).href;
    } catch {
      return null;
    }
  }

  function compileRegExp(patternText, label) {
    try {
      return new RegExp(patternText);
    } catch (error) {
      throw new Error(`${label} is invalid RegExp: ${error.message}`);
    }
  }

  function isValidConfig(config) {
    if (!config || typeof config !== "object") return false;

    if (typeof config.indexUrlPattern !== "string") return false;
    if (typeof config.linkHrefPattern !== "string") return false;
    if (typeof config.targetText !== "string") return false;

    if (!Number.isFinite(Number(config.waitAfterLoadMs))) return false;
    if (Number(config.waitAfterLoadMs) < 0) return false;

    if (!Number.isInteger(Number(config.maxUrls))) return false;
    if (Number(config.maxUrls) < 1) return false;

    try {
      compileRegExp(config.indexUrlPattern, "INDEX_URL_PATTERN");
      compileRegExp(config.linkHrefPattern, "LINK_HREF_PATTERN");
    } catch {
      return false;
    }

    return true;
  }

  function validateConfig(config) {
    compileRegExp(config.indexUrlPattern, "INDEX_URL_PATTERN");
    compileRegExp(config.linkHrefPattern, "LINK_HREF_PATTERN");

    if (!config.targetText) {
      throw new Error("TARGET_TEXT is empty.");
    }

    if (
      !Number.isFinite(Number(config.waitAfterLoadMs)) ||
      Number(config.waitAfterLoadMs) < 0
    ) {
      throw new Error("WAIT_AFTER_LOAD_MS must be 0 or greater.");
    }

    if (
      !Number.isInteger(Number(config.maxUrls)) ||
      Number(config.maxUrls) < 1
    ) {
      throw new Error("MAX_URLS must be an integer greater than 0.");
    }
  }

  /**
   * This validates both active and completed states.
   *
   * - active: true means the scan is running and may be auto-resumed.
   * - active: false means the scan has completed and should be kept for result display.
   */
  function isValidStoredState(state) {
    if (!state || typeof state !== "object") return false;

    if (typeof state.active !== "boolean") return false;

    if (typeof state.indexUrl !== "string") return false;
    if (!state.indexUrl) return false;

    if (!Array.isArray(state.queue)) return false;
    if (state.queue.length === 0) return false;
    if (!state.queue.every((url) => typeof url === "string" && url)) {
      return false;
    }

    if (!Number.isInteger(state.currentIndex)) return false;
    if (state.currentIndex < 0) return false;
    if (state.currentIndex > state.queue.length) return false;

    if (!Array.isArray(state.matchedUrls)) return false;
    if (!state.matchedUrls.every((url) => typeof url === "string" && url)) {
      return false;
    }

    if (!isValidConfig(state.configSnapshot)) return false;

    if (typeof state.startedAt !== "string") return false;

    if (state.active === false) {
      if (typeof state.finishedAt !== "string") return false;
      if (state.currentIndex !== state.queue.length) return false;
    }

    return true;
  }

  function isActiveScanState(state) {
    return isValidStoredState(state) && state.active === true;
  }

  function isUrlInScanScope(url, state) {
    if (!state || !Array.isArray(state.queue)) return false;
    return url === state.indexUrl || state.queue.includes(url);
  }

  function collectLinksFromIndexPage(config) {
    const linkHrefRegExp = compileRegExp(
      config.linkHrefPattern,
      "LINK_HREF_PATTERN"
    );

    const urls = [...document.querySelectorAll("a[href]")]
      .map((a) => normalizeUrl(a.getAttribute("href")))
      .filter(Boolean)
      .filter((url) => linkHrefRegExp.test(url));

    return [...new Set(urls)].slice(0, Number(config.maxUrls));
  }

  async function startScan() {
    const existingState = await loadState();

    if (existingState) {
      if (!isValidStoredState(existingState)) {
        log("Invalid existing scan state found. Clearing it:", existingState);
        await clearState();
      } else if (existingState.active === true) {
        const confirmed = confirm(
          [
            "An active scan state already exists.",
            "",
            `Total URLs: ${existingState.queue.length}`,
            `Processed URLs: ${existingState.currentIndex}`,
            "",
            "Discard the existing state and start a new scan?",
          ].join("\n")
        );

        if (!confirmed) {
          log("Start canceled because an active scan state exists.");
          return;
        }

        await clearState();
      } else {
        const confirmed = confirm(
          [
            "A previous scan result is saved.",
            "",
            `Total URLs: ${existingState.queue.length}`,
            `Matched URLs: ${existingState.matchedUrls.length}`,
            "",
            "Discard the previous result and start a new scan?",
          ].join("\n")
        );

        if (!confirmed) {
          log("Start canceled because a completed scan result exists.");
          return;
        }

        await clearState();
      }
    }

    const config = await loadConfig();

    try {
      validateConfig(config);
    } catch (error) {
      alert(error.message);
      log(error);
      return;
    }

    const indexUrlRegExp = compileRegExp(
      config.indexUrlPattern,
      "INDEX_URL_PATTERN"
    );

    if (!indexUrlRegExp.test(location.href)) {
      log("This page is not recognized as the index page:", location.href);
      alert(
        [
          "This page is not recognized as the index page.",
          "",
          `Current URL: ${location.href}`,
          `INDEX_URL_PATTERN: ${config.indexUrlPattern}`,
        ].join("\n")
      );
      return;
    }

    let urls;

    try {
      urls = collectLinksFromIndexPage(config);
    } catch (error) {
      alert(error.message);
      log(error);
      return;
    }

    if (urls.length === 0) {
      log("No matching links found.");
      alert(
        [
          "No matching links were found.",
          "",
          `LINK_HREF_PATTERN: ${config.linkHrefPattern}`,
        ].join("\n")
      );
      return;
    }

    const state = {
      active: true,
      indexUrl: location.href,
      queue: urls,
      currentIndex: 0,
      matchedUrls: [],
      configSnapshot: config,
      startedAt: new Date().toISOString(),
      finishedAt: null,
    };

    await saveState(state);

    log("Scan started.");
    log("Config:", config);
    log("Collected URLs:", urls.length);
    log("First URL:", urls[0]);

    location.href = urls[0];
  }

  async function continueScanOnTargetPage(state) {
    if (!isActiveScanState(state)) {
      log("Invalid active scan state before processing target page. Clearing state:", state);
      await clearState();
      return;
    }

    const config = state.configSnapshot;
    const currentUrl = location.href;
    const expectedUrl = state.queue[state.currentIndex];

    log(`Processing ${state.currentIndex + 1}/${state.queue.length}`);
    log("Current URL:", currentUrl);
    log("Expected URL:", expectedUrl);

    await sleep(Number(config.waitAfterLoadMs));

    const pageText = document.body ? document.body.innerText : "";
    const matched = pageText.includes(config.targetText);

    if (matched) {
      if (!state.matchedUrls.includes(currentUrl)) {
        state.matchedUrls.push(currentUrl);
      }
      log("Matched:", currentUrl);
    } else {
      log("Not matched:", currentUrl);
    }

    state.currentIndex += 1;

    if (state.currentIndex >= state.queue.length) {
      state.active = false;
      state.currentIndex = state.queue.length;
      state.finishedAt = new Date().toISOString();

      await saveState(state);

      log("Scan finished.");
      log("Matched URLs:", state.matchedUrls);
      console.table(state.matchedUrls);

      alert(
        [
          "Scan completed.",
          `Total URLs: ${state.queue.length}`,
          `Matched URLs: ${state.matchedUrls.length}`,
          "",
          "The result has been saved to Tampermonkey storage.",
          "Use the Tampermonkey menu item “Show scan results” to view it.",
        ].join("\n")
      );

      location.href = state.indexUrl;
      return;
    }

    await saveState(state);

    const nextUrl = state.queue[state.currentIndex];

    if (typeof nextUrl !== "string" || !nextUrl) {
      log("Invalid next URL. Clearing scan state:", nextUrl);
      await clearState();
      return;
    }

    log("Next URL:", nextUrl);

    location.href = nextUrl;
  }

  async function showResults() {
    const state = await loadState();

    if (!state) {
      log("No saved state.");
      alert("No saved state was found.");
      return;
    }

    if (!isValidStoredState(state)) {
      log("Invalid saved state found:", state);
      alert("The saved state is invalid. Run “Reset scan state” if necessary.");
      return;
    }

    console.table(state.matchedUrls || []);

    alert(
      [
        `Status: ${state.active ? "Running" : "Completed"}`,
        `Total URLs: ${state.queue.length}`,
        `Processed URLs: ${state.currentIndex}`,
        `Matched URLs: ${state.matchedUrls.length}`,
        "",
        "See console.table output for details.",
      ].join("\n")
    );
  }

  async function resetScanState() {
    await clearState();
    log("State cleared.");
    alert("The scan state has been cleared.");
  }

  async function emergencyStopScan() {
    await clearState();
    log("Emergency stop: scan state cleared.");
    alert("The scan state has been cleared. Reload the page if necessary.");
  }

  async function showConfigPanel() {
    const existing = document.getElementById("lc-finder-config-panel");
    if (existing) {
      existing.remove();
      return;
    }

    const config = await loadConfig();

    const panel = document.createElement("div");
    panel.id = "lc-finder-config-panel";

    panel.innerHTML = `
      <div class="lc-finder-header">
        <strong>Link Content Finder</strong>
        <button type="button" data-action="close">×</button>
      </div>

      <label>
        INDEX_URL_PATTERN
        <input type="text" data-field="indexUrlPattern">
      </label>

      <label>
        LINK_HREF_PATTERN
        <input type="text" data-field="linkHrefPattern">
      </label>

      <label>
        TARGET_TEXT
        <textarea data-field="targetText" rows="4"></textarea>
      </label>

      <label>
        WAIT_AFTER_LOAD_MS
        <input type="number" data-field="waitAfterLoadMs" min="0" step="100">
      </label>

      <label>
        MAX_URLS
        <input type="number" data-field="maxUrls" min="1" step="1">
      </label>

      <div class="lc-finder-actions">
        <button type="button" data-action="save">Save</button>
        <button type="button" data-action="save-and-start">Save & Start</button>
        <button type="button" data-action="reset-config">Reset Config</button>
        <button type="button" data-action="emergency-stop">Emergency Stop</button>
      </div>

      <div class="lc-finder-help">
        <p>
          Enter the body of the regular expression, not a JavaScript regex literal.
        </p>
        <p>
          Example: <code>^https://example\\.com/index</code>
        </p>
        <p>
          Example: <code>/articles/\\d+</code>
        </p>
      </div>
    `;

    const style = document.createElement("style");
    style.textContent = `
      #lc-finder-config-panel {
        position: fixed;
        right: 16px;
        bottom: 16px;
        z-index: 999999;
        width: 420px;
        max-width: calc(100vw - 32px);
        padding: 16px;
        box-sizing: border-box;
        background: #fff;
        color: #222;
        border: 1px solid #ccc;
        border-radius: 8px;
        box-shadow: 0 8px 24px rgba(0, 0, 0, 0.2);
        font-family: system-ui, -apple-system, BlinkMacSystemFont, "Segoe UI", sans-serif;
        font-size: 13px;
        line-height: 1.4;
      }

      #lc-finder-config-panel .lc-finder-header {
        display: flex;
        justify-content: space-between;
        align-items: center;
        margin-bottom: 12px;
      }

      #lc-finder-config-panel .lc-finder-header button {
        font-size: 18px;
        line-height: 1;
      }

      #lc-finder-config-panel label {
        display: block;
        margin: 10px 0;
        font-weight: 600;
      }

      #lc-finder-config-panel input,
      #lc-finder-config-panel textarea {
        display: block;
        width: 100%;
        box-sizing: border-box;
        margin-top: 4px;
        padding: 6px 8px;
        border: 1px solid #aaa;
        border-radius: 4px;
        font: 13px ui-monospace, SFMono-Regular, Menlo, Consolas, monospace;
      }

      #lc-finder-config-panel textarea {
        resize: vertical;
      }

      #lc-finder-config-panel .lc-finder-actions {
        display: flex;
        gap: 8px;
        flex-wrap: wrap;
        margin-top: 12px;
      }

      #lc-finder-config-panel button {
        cursor: pointer;
        padding: 6px 10px;
        border: 1px solid #999;
        border-radius: 4px;
        background: #f7f7f7;
        color: #222;
      }

      #lc-finder-config-panel button:hover {
        background: #eee;
      }

      #lc-finder-config-panel .lc-finder-help {
        margin-top: 12px;
        color: #555;
        font-size: 12px;
      }

      #lc-finder-config-panel .lc-finder-help p {
        margin: 4px 0;
      }

      #lc-finder-config-panel code {
        font-family: ui-monospace, SFMono-Regular, Menlo, Consolas, monospace;
      }
    `;

    document.documentElement.appendChild(style);
    document.body.appendChild(panel);

    panel.querySelector('[data-field="indexUrlPattern"]').value =
      config.indexUrlPattern;
    panel.querySelector('[data-field="linkHrefPattern"]').value =
      config.linkHrefPattern;
    panel.querySelector('[data-field="targetText"]').value = config.targetText;
    panel.querySelector('[data-field="waitAfterLoadMs"]').value =
      config.waitAfterLoadMs;
    panel.querySelector('[data-field="maxUrls"]').value = config.maxUrls;

    function readConfigFromPanel() {
      return {
        indexUrlPattern: panel
          .querySelector('[data-field="indexUrlPattern"]')
          .value.trim(),
        linkHrefPattern: panel
          .querySelector('[data-field="linkHrefPattern"]')
          .value.trim(),
        targetText: panel.querySelector('[data-field="targetText"]').value,
        waitAfterLoadMs: Number(
          panel.querySelector('[data-field="waitAfterLoadMs"]').value
        ),
        maxUrls: Number(panel.querySelector('[data-field="maxUrls"]').value),
      };
    }

    async function saveFromPanel() {
      const nextConfig = readConfigFromPanel();
      validateConfig(nextConfig);
      await saveConfig(nextConfig);
      log("Config saved:", nextConfig);
      alert("Config saved.");
      return nextConfig;
    }

    panel.addEventListener("click", async (event) => {
      const action = event.target?.dataset?.action;
      if (!action) return;

      try {
        if (action === "close") {
          panel.remove();
          style.remove();
          return;
        }

        if (action === "save") {
          await saveFromPanel();
          return;
        }

        if (action === "save-and-start") {
          await saveFromPanel();
          panel.remove();
          style.remove();
          await startScan();
          return;
        }

        if (action === "reset-config") {
          const confirmed = confirm("Reset the config to the default values?");
          if (!confirmed) return;

          await resetConfig();
          panel.remove();
          style.remove();
          await showConfigPanel();
          return;
        }

        if (action === "emergency-stop") {
          await emergencyStopScan();
          return;
        }
      } catch (error) {
        log(error);
        alert(error.message);
      }
    });
  }

  GM.registerMenuCommand("Show config panel", showConfigPanel);
  GM.registerMenuCommand("Start link content scan", startScan);
  GM.registerMenuCommand("Show scan results", showResults);
  GM.registerMenuCommand("Reset scan state", resetScanState);
  GM.registerMenuCommand("Emergency stop scan", emergencyStopScan);

  /**
   * Auto-resume guard.
   *
   * This script never starts a new scan automatically on page load.
   * Completed states are kept for result display.
   * Only active states are eligible for auto-resume.
   */
  const state = await loadState();

  if (!state) {
    log("Idle. Use the Tampermonkey menu: Show config panel or Start link content scan.");
    return;
  }

  if (!isValidStoredState(state)) {
    log("Invalid scan state found. Clearing state:", state);
    await clearState();
    return;
  }

  if (state.active === false) {
    log("Completed scan result exists. Not auto-resuming.");
    return;
  }

  if (!isActiveScanState(state)) {
    log("No active scan state. Doing nothing.");
    return;
  }

  if (!isUrlInScanScope(location.href, state)) {
    log(
      "Active scan state exists, but the current URL is outside the scan scope. Doing nothing.",
      location.href
    );
    return;
  }

  const config = state.configSnapshot;

  let indexUrlRegExp;

  try {
    indexUrlRegExp = compileRegExp(
      config.indexUrlPattern,
      "INDEX_URL_PATTERN"
    );
  } catch (error) {
    log("Invalid config snapshot. Clearing scan state:", error);
    await clearState();
    alert(
      [
        "The saved scan state's config is invalid, so the state was cleared.",
        "",
        error.message,
      ].join("\n")
    );
    return;
  }

  if (indexUrlRegExp.test(location.href)) {
    if (state.currentIndex < state.queue.length) {
      const nextUrl = state.queue[state.currentIndex];

      if (typeof nextUrl !== "string" || !nextUrl) {
        log("Invalid next URL. Clearing scan state:", nextUrl);
        await clearState();
        return;
      }

      log("Resuming from index page. Next URL:", nextUrl);
      location.href = nextUrl;
    } else {
      log("Active state is complete but not marked as finished. Marking it as completed.");

      state.active = false;
      state.currentIndex = state.queue.length;
      state.finishedAt = new Date().toISOString();

      await saveState(state);
    }

    return;
  }

  await continueScanOnTargetPage(state);
})();