Collect links from an index page, visit them sequentially, and save URLs whose page text matches a target string.
// ==UserScript==
// @name Sequential Link Content Finder
// @namespace https://example.com/
// @version 1.0.0
// @description Collect links from an index page, visit them sequentially, and save URLs whose page text matches a target string.
// @match https://example.com/*
// @grant GM.getValue
// @grant GM.setValue
// @grant GM.deleteValue
// @grant GM.registerMenuCommand
// @run-at document-idle
// ==/UserScript==
(async function () {
"use strict";
const STATE_STORAGE_KEY = "sequential-link-content-finder-state-v1";
const CONFIG_STORAGE_KEY = "sequential-link-content-finder-config-v1";
const DEFAULT_CONFIG = {
indexUrlPattern: "^https://example\\.com/index",
linkHrefPattern: "/target-pages/",
targetText: "Text to search for",
waitAfterLoadMs: 2000,
maxUrls: 500,
};
function log(...args) {
console.log("[LinkContentFinder]", ...args);
}
function sleep(ms) {
return new Promise((resolve) => setTimeout(resolve, ms));
}
async function loadConfig() {
const saved = await GM.getValue(CONFIG_STORAGE_KEY);
return {
...DEFAULT_CONFIG,
...(saved || {}),
};
}
async function saveConfig(config) {
await GM.setValue(CONFIG_STORAGE_KEY, config);
}
async function resetConfig() {
await GM.deleteValue(CONFIG_STORAGE_KEY);
}
async function loadState() {
return (await GM.getValue(STATE_STORAGE_KEY)) || null;
}
async function saveState(state) {
await GM.setValue(STATE_STORAGE_KEY, state);
}
async function clearState() {
await GM.deleteValue(STATE_STORAGE_KEY);
}
function normalizeUrl(href) {
try {
return new URL(href, location.href).href;
} catch {
return null;
}
}
function compileRegExp(patternText, label) {
try {
return new RegExp(patternText);
} catch (error) {
throw new Error(`${label} is invalid RegExp: ${error.message}`);
}
}
function isValidConfig(config) {
if (!config || typeof config !== "object") return false;
if (typeof config.indexUrlPattern !== "string") return false;
if (typeof config.linkHrefPattern !== "string") return false;
if (typeof config.targetText !== "string") return false;
if (!Number.isFinite(Number(config.waitAfterLoadMs))) return false;
if (Number(config.waitAfterLoadMs) < 0) return false;
if (!Number.isInteger(Number(config.maxUrls))) return false;
if (Number(config.maxUrls) < 1) return false;
try {
compileRegExp(config.indexUrlPattern, "INDEX_URL_PATTERN");
compileRegExp(config.linkHrefPattern, "LINK_HREF_PATTERN");
} catch {
return false;
}
return true;
}
function validateConfig(config) {
compileRegExp(config.indexUrlPattern, "INDEX_URL_PATTERN");
compileRegExp(config.linkHrefPattern, "LINK_HREF_PATTERN");
if (!config.targetText) {
throw new Error("TARGET_TEXT is empty.");
}
if (
!Number.isFinite(Number(config.waitAfterLoadMs)) ||
Number(config.waitAfterLoadMs) < 0
) {
throw new Error("WAIT_AFTER_LOAD_MS must be 0 or greater.");
}
if (
!Number.isInteger(Number(config.maxUrls)) ||
Number(config.maxUrls) < 1
) {
throw new Error("MAX_URLS must be an integer greater than 0.");
}
}
/**
* This validates both active and completed states.
*
* - active: true means the scan is running and may be auto-resumed.
* - active: false means the scan has completed and should be kept for result display.
*/
function isValidStoredState(state) {
if (!state || typeof state !== "object") return false;
if (typeof state.active !== "boolean") return false;
if (typeof state.indexUrl !== "string") return false;
if (!state.indexUrl) return false;
if (!Array.isArray(state.queue)) return false;
if (state.queue.length === 0) return false;
if (!state.queue.every((url) => typeof url === "string" && url)) {
return false;
}
if (!Number.isInteger(state.currentIndex)) return false;
if (state.currentIndex < 0) return false;
if (state.currentIndex > state.queue.length) return false;
if (!Array.isArray(state.matchedUrls)) return false;
if (!state.matchedUrls.every((url) => typeof url === "string" && url)) {
return false;
}
if (!isValidConfig(state.configSnapshot)) return false;
if (typeof state.startedAt !== "string") return false;
if (state.active === false) {
if (typeof state.finishedAt !== "string") return false;
if (state.currentIndex !== state.queue.length) return false;
}
return true;
}
function isActiveScanState(state) {
return isValidStoredState(state) && state.active === true;
}
function isUrlInScanScope(url, state) {
if (!state || !Array.isArray(state.queue)) return false;
return url === state.indexUrl || state.queue.includes(url);
}
function collectLinksFromIndexPage(config) {
const linkHrefRegExp = compileRegExp(
config.linkHrefPattern,
"LINK_HREF_PATTERN"
);
const urls = [...document.querySelectorAll("a[href]")]
.map((a) => normalizeUrl(a.getAttribute("href")))
.filter(Boolean)
.filter((url) => linkHrefRegExp.test(url));
return [...new Set(urls)].slice(0, Number(config.maxUrls));
}
async function startScan() {
const existingState = await loadState();
if (existingState) {
if (!isValidStoredState(existingState)) {
log("Invalid existing scan state found. Clearing it:", existingState);
await clearState();
} else if (existingState.active === true) {
const confirmed = confirm(
[
"An active scan state already exists.",
"",
`Total URLs: ${existingState.queue.length}`,
`Processed URLs: ${existingState.currentIndex}`,
"",
"Discard the existing state and start a new scan?",
].join("\n")
);
if (!confirmed) {
log("Start canceled because an active scan state exists.");
return;
}
await clearState();
} else {
const confirmed = confirm(
[
"A previous scan result is saved.",
"",
`Total URLs: ${existingState.queue.length}`,
`Matched URLs: ${existingState.matchedUrls.length}`,
"",
"Discard the previous result and start a new scan?",
].join("\n")
);
if (!confirmed) {
log("Start canceled because a completed scan result exists.");
return;
}
await clearState();
}
}
const config = await loadConfig();
try {
validateConfig(config);
} catch (error) {
alert(error.message);
log(error);
return;
}
const indexUrlRegExp = compileRegExp(
config.indexUrlPattern,
"INDEX_URL_PATTERN"
);
if (!indexUrlRegExp.test(location.href)) {
log("This page is not recognized as the index page:", location.href);
alert(
[
"This page is not recognized as the index page.",
"",
`Current URL: ${location.href}`,
`INDEX_URL_PATTERN: ${config.indexUrlPattern}`,
].join("\n")
);
return;
}
let urls;
try {
urls = collectLinksFromIndexPage(config);
} catch (error) {
alert(error.message);
log(error);
return;
}
if (urls.length === 0) {
log("No matching links found.");
alert(
[
"No matching links were found.",
"",
`LINK_HREF_PATTERN: ${config.linkHrefPattern}`,
].join("\n")
);
return;
}
const state = {
active: true,
indexUrl: location.href,
queue: urls,
currentIndex: 0,
matchedUrls: [],
configSnapshot: config,
startedAt: new Date().toISOString(),
finishedAt: null,
};
await saveState(state);
log("Scan started.");
log("Config:", config);
log("Collected URLs:", urls.length);
log("First URL:", urls[0]);
location.href = urls[0];
}
async function continueScanOnTargetPage(state) {
if (!isActiveScanState(state)) {
log("Invalid active scan state before processing target page. Clearing state:", state);
await clearState();
return;
}
const config = state.configSnapshot;
const currentUrl = location.href;
const expectedUrl = state.queue[state.currentIndex];
log(`Processing ${state.currentIndex + 1}/${state.queue.length}`);
log("Current URL:", currentUrl);
log("Expected URL:", expectedUrl);
await sleep(Number(config.waitAfterLoadMs));
const pageText = document.body ? document.body.innerText : "";
const matched = pageText.includes(config.targetText);
if (matched) {
if (!state.matchedUrls.includes(currentUrl)) {
state.matchedUrls.push(currentUrl);
}
log("Matched:", currentUrl);
} else {
log("Not matched:", currentUrl);
}
state.currentIndex += 1;
if (state.currentIndex >= state.queue.length) {
state.active = false;
state.currentIndex = state.queue.length;
state.finishedAt = new Date().toISOString();
await saveState(state);
log("Scan finished.");
log("Matched URLs:", state.matchedUrls);
console.table(state.matchedUrls);
alert(
[
"Scan completed.",
`Total URLs: ${state.queue.length}`,
`Matched URLs: ${state.matchedUrls.length}`,
"",
"The result has been saved to Tampermonkey storage.",
"Use the Tampermonkey menu item “Show scan results” to view it.",
].join("\n")
);
location.href = state.indexUrl;
return;
}
await saveState(state);
const nextUrl = state.queue[state.currentIndex];
if (typeof nextUrl !== "string" || !nextUrl) {
log("Invalid next URL. Clearing scan state:", nextUrl);
await clearState();
return;
}
log("Next URL:", nextUrl);
location.href = nextUrl;
}
async function showResults() {
const state = await loadState();
if (!state) {
log("No saved state.");
alert("No saved state was found.");
return;
}
if (!isValidStoredState(state)) {
log("Invalid saved state found:", state);
alert("The saved state is invalid. Run “Reset scan state” if necessary.");
return;
}
console.table(state.matchedUrls || []);
alert(
[
`Status: ${state.active ? "Running" : "Completed"}`,
`Total URLs: ${state.queue.length}`,
`Processed URLs: ${state.currentIndex}`,
`Matched URLs: ${state.matchedUrls.length}`,
"",
"See console.table output for details.",
].join("\n")
);
}
async function resetScanState() {
await clearState();
log("State cleared.");
alert("The scan state has been cleared.");
}
async function emergencyStopScan() {
await clearState();
log("Emergency stop: scan state cleared.");
alert("The scan state has been cleared. Reload the page if necessary.");
}
async function showConfigPanel() {
const existing = document.getElementById("lc-finder-config-panel");
if (existing) {
existing.remove();
return;
}
const config = await loadConfig();
const panel = document.createElement("div");
panel.id = "lc-finder-config-panel";
panel.innerHTML = `
<div class="lc-finder-header">
<strong>Link Content Finder</strong>
<button type="button" data-action="close">×</button>
</div>
<label>
INDEX_URL_PATTERN
<input type="text" data-field="indexUrlPattern">
</label>
<label>
LINK_HREF_PATTERN
<input type="text" data-field="linkHrefPattern">
</label>
<label>
TARGET_TEXT
<textarea data-field="targetText" rows="4"></textarea>
</label>
<label>
WAIT_AFTER_LOAD_MS
<input type="number" data-field="waitAfterLoadMs" min="0" step="100">
</label>
<label>
MAX_URLS
<input type="number" data-field="maxUrls" min="1" step="1">
</label>
<div class="lc-finder-actions">
<button type="button" data-action="save">Save</button>
<button type="button" data-action="save-and-start">Save & Start</button>
<button type="button" data-action="reset-config">Reset Config</button>
<button type="button" data-action="emergency-stop">Emergency Stop</button>
</div>
<div class="lc-finder-help">
<p>
Enter the body of the regular expression, not a JavaScript regex literal.
</p>
<p>
Example: <code>^https://example\\.com/index</code>
</p>
<p>
Example: <code>/articles/\\d+</code>
</p>
</div>
`;
const style = document.createElement("style");
style.textContent = `
#lc-finder-config-panel {
position: fixed;
right: 16px;
bottom: 16px;
z-index: 999999;
width: 420px;
max-width: calc(100vw - 32px);
padding: 16px;
box-sizing: border-box;
background: #fff;
color: #222;
border: 1px solid #ccc;
border-radius: 8px;
box-shadow: 0 8px 24px rgba(0, 0, 0, 0.2);
font-family: system-ui, -apple-system, BlinkMacSystemFont, "Segoe UI", sans-serif;
font-size: 13px;
line-height: 1.4;
}
#lc-finder-config-panel .lc-finder-header {
display: flex;
justify-content: space-between;
align-items: center;
margin-bottom: 12px;
}
#lc-finder-config-panel .lc-finder-header button {
font-size: 18px;
line-height: 1;
}
#lc-finder-config-panel label {
display: block;
margin: 10px 0;
font-weight: 600;
}
#lc-finder-config-panel input,
#lc-finder-config-panel textarea {
display: block;
width: 100%;
box-sizing: border-box;
margin-top: 4px;
padding: 6px 8px;
border: 1px solid #aaa;
border-radius: 4px;
font: 13px ui-monospace, SFMono-Regular, Menlo, Consolas, monospace;
}
#lc-finder-config-panel textarea {
resize: vertical;
}
#lc-finder-config-panel .lc-finder-actions {
display: flex;
gap: 8px;
flex-wrap: wrap;
margin-top: 12px;
}
#lc-finder-config-panel button {
cursor: pointer;
padding: 6px 10px;
border: 1px solid #999;
border-radius: 4px;
background: #f7f7f7;
color: #222;
}
#lc-finder-config-panel button:hover {
background: #eee;
}
#lc-finder-config-panel .lc-finder-help {
margin-top: 12px;
color: #555;
font-size: 12px;
}
#lc-finder-config-panel .lc-finder-help p {
margin: 4px 0;
}
#lc-finder-config-panel code {
font-family: ui-monospace, SFMono-Regular, Menlo, Consolas, monospace;
}
`;
document.documentElement.appendChild(style);
document.body.appendChild(panel);
panel.querySelector('[data-field="indexUrlPattern"]').value =
config.indexUrlPattern;
panel.querySelector('[data-field="linkHrefPattern"]').value =
config.linkHrefPattern;
panel.querySelector('[data-field="targetText"]').value = config.targetText;
panel.querySelector('[data-field="waitAfterLoadMs"]').value =
config.waitAfterLoadMs;
panel.querySelector('[data-field="maxUrls"]').value = config.maxUrls;
function readConfigFromPanel() {
return {
indexUrlPattern: panel
.querySelector('[data-field="indexUrlPattern"]')
.value.trim(),
linkHrefPattern: panel
.querySelector('[data-field="linkHrefPattern"]')
.value.trim(),
targetText: panel.querySelector('[data-field="targetText"]').value,
waitAfterLoadMs: Number(
panel.querySelector('[data-field="waitAfterLoadMs"]').value
),
maxUrls: Number(panel.querySelector('[data-field="maxUrls"]').value),
};
}
async function saveFromPanel() {
const nextConfig = readConfigFromPanel();
validateConfig(nextConfig);
await saveConfig(nextConfig);
log("Config saved:", nextConfig);
alert("Config saved.");
return nextConfig;
}
panel.addEventListener("click", async (event) => {
const action = event.target?.dataset?.action;
if (!action) return;
try {
if (action === "close") {
panel.remove();
style.remove();
return;
}
if (action === "save") {
await saveFromPanel();
return;
}
if (action === "save-and-start") {
await saveFromPanel();
panel.remove();
style.remove();
await startScan();
return;
}
if (action === "reset-config") {
const confirmed = confirm("Reset the config to the default values?");
if (!confirmed) return;
await resetConfig();
panel.remove();
style.remove();
await showConfigPanel();
return;
}
if (action === "emergency-stop") {
await emergencyStopScan();
return;
}
} catch (error) {
log(error);
alert(error.message);
}
});
}
GM.registerMenuCommand("Show config panel", showConfigPanel);
GM.registerMenuCommand("Start link content scan", startScan);
GM.registerMenuCommand("Show scan results", showResults);
GM.registerMenuCommand("Reset scan state", resetScanState);
GM.registerMenuCommand("Emergency stop scan", emergencyStopScan);
/**
* Auto-resume guard.
*
* This script never starts a new scan automatically on page load.
* Completed states are kept for result display.
* Only active states are eligible for auto-resume.
*/
const state = await loadState();
if (!state) {
log("Idle. Use the Tampermonkey menu: Show config panel or Start link content scan.");
return;
}
if (!isValidStoredState(state)) {
log("Invalid scan state found. Clearing state:", state);
await clearState();
return;
}
if (state.active === false) {
log("Completed scan result exists. Not auto-resuming.");
return;
}
if (!isActiveScanState(state)) {
log("No active scan state. Doing nothing.");
return;
}
if (!isUrlInScanScope(location.href, state)) {
log(
"Active scan state exists, but the current URL is outside the scan scope. Doing nothing.",
location.href
);
return;
}
const config = state.configSnapshot;
let indexUrlRegExp;
try {
indexUrlRegExp = compileRegExp(
config.indexUrlPattern,
"INDEX_URL_PATTERN"
);
} catch (error) {
log("Invalid config snapshot. Clearing scan state:", error);
await clearState();
alert(
[
"The saved scan state's config is invalid, so the state was cleared.",
"",
error.message,
].join("\n")
);
return;
}
if (indexUrlRegExp.test(location.href)) {
if (state.currentIndex < state.queue.length) {
const nextUrl = state.queue[state.currentIndex];
if (typeof nextUrl !== "string" || !nextUrl) {
log("Invalid next URL. Clearing scan state:", nextUrl);
await clearState();
return;
}
log("Resuming from index page. Next URL:", nextUrl);
location.href = nextUrl;
} else {
log("Active state is complete but not marked as finished. Marking it as completed.");
state.active = false;
state.currentIndex = state.queue.length;
state.finishedAt = new Date().toISOString();
await saveState(state);
}
return;
}
await continueScanOnTargetPage(state);
})();