Upload a CSV of hyperlinks, fetch each page's HTML, and export as a downloadable package
// ==UserScript==
// @name CSV Hyperlink HTML Exporter
// @namespace http://tampermonkey.net/
// @version 1.0
// @description Upload a CSV of hyperlinks, fetch each page's HTML, and export as a downloadable package
// @author You
// @match *://*/*
// @grant GM_xmlhttpRequest
// @grant GM_registerMenuCommand
// @license MIT
// @connect *
// ==/UserScript==
(function() {
'use strict';
GM_registerMenuCommand('\u{1F4C4} CSV HTML Exporter', openPanel);
let panel = null;
function openPanel() {
if (panel) {
panel.remove();
panel = null;
return;
}
panel = document.createElement('div');
panel.id = 'csv-html-exporter-panel';
panel.style.cssText = 'position:fixed;top:50px;right:50px;width:500px;max-height:80vh;overflow-y:auto;background:#fff;border:2px solid #333;border-radius:8px;padding:20px;z-index:999999;font-family:Arial,sans-serif;box-shadow:0 4px 20px rgba(0,0,0,0.3);cursor:move;';
var header = document.createElement('div');
header.style.cssText = 'display:flex;justify-content:space-between;align-items:center;margin-bottom:15px;';
var title = document.createElement('h3');
title.style.cssText = 'margin:0;font-size:16px;';
title.textContent = '\u{1F4C4} CSV Hyperlink HTML Exporter';
var closeBtn = document.createElement('button');
closeBtn.style.cssText = 'background:none;border:none;font-size:20px;cursor:pointer;';
closeBtn.textContent = '\u2715';
closeBtn.addEventListener('click', function() {
panel.remove();
panel = null;
});
header.appendChild(title);
header.appendChild(closeBtn);
panel.appendChild(header);
// File input section
var fileSection = document.createElement('div');
fileSection.style.cssText = 'margin-bottom:15px;';
var fileLabel = document.createElement('label');
fileLabel.style.cssText = 'font-size:13px;font-weight:bold;display:block;margin-bottom:5px;';
fileLabel.textContent = 'Upload CSV (one URL per row, or column header "url" / "link" / "hyperlink"):';
var fileInput = document.createElement('input');
fileInput.type = 'file';
fileInput.accept = '.csv';
fileInput.id = 'csv-file-input';
fileInput.style.cssText = 'font-size:13px;';
fileSection.appendChild(fileLabel);
fileSection.appendChild(fileInput);
panel.appendChild(fileSection);
// Full HTML checkbox
var checkSection = document.createElement('div');
checkSection.style.cssText = 'margin-bottom:15px;';
var checkLabel = document.createElement('label');
checkLabel.style.cssText = 'font-size:13px;display:flex;align-items:center;gap:8px;';
var checkbox = document.createElement('input');
checkbox.type = 'checkbox';
checkbox.id = 'csv-exporter-full-html';
checkbox.checked = true;
checkLabel.appendChild(checkbox);
checkLabel.appendChild(document.createTextNode('Export full HTML (uncheck for body only)'));
checkSection.appendChild(checkLabel);
panel.appendChild(checkSection);
// Delay input
var delaySection = document.createElement('div');
delaySection.style.cssText = 'margin-bottom:15px;';
var delayLabel = document.createElement('label');
delayLabel.style.cssText = 'font-size:13px;display:flex;align-items:center;gap:8px;';
var delayInput = document.createElement('input');
delayInput.type = 'number';
delayInput.id = 'csv-exporter-delay';
delayInput.value = '1000';
delayInput.min = '0';
delayInput.step = '500';
delayInput.style.cssText = 'width:80px;';
delayLabel.appendChild(delayInput);
delayLabel.appendChild(document.createTextNode('Delay between requests (ms)'));
delaySection.appendChild(delayLabel);
panel.appendChild(delaySection);
// Start button
var startBtn = document.createElement('button');
startBtn.id = 'csv-exporter-start';
startBtn.disabled = true;
startBtn.style.cssText = 'background:#0073bb;color:#fff;border:none;padding:10px 20px;border-radius:4px;cursor:pointer;font-size:14px;width:100%;';
startBtn.textContent = 'Start Export';
panel.appendChild(startBtn);
// Progress
var progress = document.createElement('div');
progress.id = 'csv-exporter-progress';
progress.style.cssText = 'margin-top:15px;font-size:12px;color:#555;';
panel.appendChild(progress);
// Log
var log = document.createElement('div');
log.id = 'csv-exporter-log';
log.style.cssText = 'margin-top:10px;max-height:200px;overflow-y:auto;font-size:11px;font-family:monospace;background:#f5f5f5;padding:8px;border-radius:4px;display:none;';
panel.appendChild(log);
document.body.appendChild(panel);
// Make draggable
makeDraggable(panel);
// File input handler
var urls = [];
fileInput.addEventListener('change', function(e) {
var file = e.target.files[0];
if (!file) return;
var reader = new FileReader();
reader.onload = function(evt) {
urls = parseCSV(evt.target.result);
if (urls.length > 0) {
startBtn.disabled = false;
startBtn.textContent = 'Start Export (' + urls.length + ' URLs found)';
} else {
startBtn.disabled = true;
startBtn.textContent = 'No valid URLs found in CSV';
}
};
reader.readAsText(file);
});
// Start button handler
startBtn.addEventListener('click', function() {
var fullHtml = checkbox.checked;
var delay = parseInt(delayInput.value) || 1000;
startExport(urls, fullHtml, delay, startBtn, progress, log);
});
}
function parseCSV(text) {
var lines = text.split(new RegExp('\r?' + String.fromCharCode(10))).filter(function(line) { return line.trim(); });
if (lines.length === 0) return [];
var urls = [];
var urlPattern = /^https?:\/\//i;
var startIndex = 0;
var firstLine = lines[0].toLowerCase().trim();
var firstCols = firstLine.split(',').map(function(h) { return h.replace(/"/g, '').trim(); });
if (!urlPattern.test(firstCols[0])) {
var urlColIndex = -1;
var urlHeaders = ['url', 'link', 'hyperlink', 'href', 'address', 'uri'];
for (var h = 0; h < firstCols.length; h++) {
if (urlHeaders.indexOf(firstCols[h]) !== -1) {
urlColIndex = h;
break;
}
}
startIndex = 1;
for (var i = startIndex; i < lines.length; i++) {
var cols = parseCSVLine(lines[i]);
var colIdx = urlColIndex >= 0 ? urlColIndex : 0;
var val = (cols[colIdx] || '').trim();
if (urlPattern.test(val)) {
urls.push(val);
}
}
} else {
for (var j = 0; j < lines.length; j++) {
var cols2 = parseCSVLine(lines[j]);
for (var k = 0; k < cols2.length; k++) {
var val2 = cols2[k].trim();
if (urlPattern.test(val2)) {
urls.push(val2);
break;
}
}
}
}
return urls;
}
function parseCSVLine(line) {
var result = [];
var current = '';
var inQuotes = false;
for (var i = 0; i < line.length; i++) {
var ch = line[i];
if (ch === '"') {
inQuotes = !inQuotes;
} else if (ch === ',' && !inQuotes) {
result.push(current);
current = '';
} else {
current += ch;
}
}
result.push(current);
return result;
}
function startExport(urls, fullHtml, delay, startBtn, progress, log) {
log.style.display = 'block';
log.innerHTML = '';
startBtn.disabled = true;
startBtn.textContent = 'Exporting...';
var results = [];
var index = 0;
function fetchNext() {
if (index >= urls.length) {
exportResults(results);
startBtn.disabled = false;
startBtn.textContent = 'Export Complete! (' + results.length + ' pages)';
progress.textContent = '\u2705 Done! ' + results.length + '/' + urls.length + ' pages exported.';
return;
}
var url = urls[index];
progress.textContent = 'Fetching ' + (index + 1) + ' of ' + urls.length + '...';
addLog(log, '\u23F3 Fetching: ' + url);
GM_xmlhttpRequest({
method: 'GET',
url: url,
timeout: 30000,
onload: function(response) {
var html = response.responseText;
if (!fullHtml) {
var bodyMatch = html.match(/<body[^>]*>([\s\S]*?)<\/body>/i);
if (bodyMatch) {
html = bodyMatch[1];
}
}
results.push({
url: url,
status: response.status,
html: html,
filename: urlToFilename(url)
});
addLog(log, '\u2705 ' + response.status + ' - ' + url);
index++;
setTimeout(fetchNext, delay);
},
onerror: function() {
results.push({
url: url,
status: 'ERROR',
html: '',
filename: urlToFilename(url)
});
addLog(log, '\u274C Error - ' + url);
index++;
setTimeout(fetchNext, delay);
},
ontimeout: function() {
results.push({
url: url,
status: 'TIMEOUT',
html: '',
filename: urlToFilename(url)
});
addLog(log, '\u23F0 Timeout - ' + url);
index++;
setTimeout(fetchNext, delay);
}
});
}
fetchNext();
}
function urlToFilename(url) {
try {
var u = new URL(url);
var name = u.hostname + u.pathname;
name = name.replace(/[^a-zA-Z0-9_\-]/g, '_').replace(/_+/g, '_');
if (name.length > 100) name = name.substring(0, 100);
return name + '.html';
} catch(e) {
return 'page_' + Date.now() + '.html';
}
}
function exportResults(results) {
var exportData = results.map(function(r) {
return { url: r.url, status: r.status, filename: r.filename, html: r.html };
});
downloadFile('html_export_bundle.json', JSON.stringify(exportData, null, 2), 'application/json');
var csvRows = ['url,status,filename'];
results.forEach(function(r) {
csvRows.push('"' + r.url + '","' + r.status + '","' + r.filename + '"');
});
downloadFile('export_summary.csv', csvRows.join(String.fromCharCode(10)), 'text/csv');
if (results.length <= 20) {
results.forEach(function(r, i) {
if (r.html) {
setTimeout(function() {
downloadFile(r.filename, r.html, 'text/html');
}, i * 200);
}
});
}
}
function downloadFile(filename, content, mimeType) {
var blob = new Blob([content], { type: mimeType });
var url = URL.createObjectURL(blob);
var a = document.createElement('a');
a.href = url;
a.download = filename;
document.body.appendChild(a);
a.click();
setTimeout(function() {
document.body.removeChild(a);
URL.revokeObjectURL(url);
}, 100);
}
function addLog(logEl, message) {
var line = document.createElement('div');
line.textContent = message;
logEl.appendChild(line);
logEl.scrollTop = logEl.scrollHeight;
}
function makeDraggable(el) {
var offsetX, offsetY, isDragging = false;
el.addEventListener('mousedown', function(e) {
if (e.target.tagName === 'INPUT' || e.target.tagName === 'BUTTON' || e.target.tagName === 'LABEL') return;
isDragging = true;
offsetX = e.clientX - el.getBoundingClientRect().left;
offsetY = e.clientY - el.getBoundingClientRect().top;
el.style.cursor = 'grabbing';
});
document.addEventListener('mousemove', function(e) {
if (!isDragging) return;
el.style.left = (e.clientX - offsetX) + 'px';
el.style.top = (e.clientY - offsetY) + 'px';
el.style.right = 'auto';
});
document.addEventListener('mouseup', function() {
isDragging = false;
el.style.cursor = 'move';
});
}
})();