CSV Hyperlink HTML Exporter

Upload a CSV of hyperlinks, fetch each page's HTML, and export as a downloadable package

Você precisará instalar uma extensão como Tampermonkey, Greasemonkey ou Violentmonkey para instalar este script.

Você precisará instalar uma extensão como Tampermonkey para instalar este script.

Você precisará instalar uma extensão como Tampermonkey ou Violentmonkey para instalar este script.

Você precisará instalar uma extensão como Tampermonkey ou Userscripts para instalar este script.

Você precisará instalar uma extensão como o Tampermonkey para instalar este script.

Você precisará instalar um gerenciador de scripts de usuário para instalar este script.

(Eu já tenho um gerenciador de scripts de usuário, me deixe instalá-lo!)

Você precisará instalar uma extensão como o Stylus para instalar este estilo.

Você precisará instalar uma extensão como o Stylus para instalar este estilo.

Você precisará instalar uma extensão como o Stylus para instalar este estilo.

Você precisará instalar um gerenciador de estilos de usuário para instalar este estilo.

Você precisará instalar um gerenciador de estilos de usuário para instalar este estilo.

Você precisará instalar um gerenciador de estilos de usuário para instalar este estilo.

(Eu já possuo um gerenciador de estilos de usuário, me deixar fazer a instalação!)

// ==UserScript==
// @name         CSV Hyperlink HTML Exporter
// @namespace    http://tampermonkey.net/
// @version      1.0
// @description  Upload a CSV of hyperlinks, fetch each page's HTML, and export as a downloadable package
// @author       You
// @match        *://*/*
// @grant        GM_xmlhttpRequest
// @grant        GM_registerMenuCommand
// @license MIT
// @connect      *
// ==/UserScript==

(function() {
    'use strict';

    GM_registerMenuCommand('\u{1F4C4} CSV HTML Exporter', openPanel);

    let panel = null;

    function openPanel() {
        if (panel) {
            panel.remove();
            panel = null;
            return;
        }

        panel = document.createElement('div');
        panel.id = 'csv-html-exporter-panel';
        panel.style.cssText = 'position:fixed;top:50px;right:50px;width:500px;max-height:80vh;overflow-y:auto;background:#fff;border:2px solid #333;border-radius:8px;padding:20px;z-index:999999;font-family:Arial,sans-serif;box-shadow:0 4px 20px rgba(0,0,0,0.3);cursor:move;';

        var header = document.createElement('div');
        header.style.cssText = 'display:flex;justify-content:space-between;align-items:center;margin-bottom:15px;';

        var title = document.createElement('h3');
        title.style.cssText = 'margin:0;font-size:16px;';
        title.textContent = '\u{1F4C4} CSV Hyperlink HTML Exporter';

        var closeBtn = document.createElement('button');
        closeBtn.style.cssText = 'background:none;border:none;font-size:20px;cursor:pointer;';
        closeBtn.textContent = '\u2715';
        closeBtn.addEventListener('click', function() {
            panel.remove();
            panel = null;
        });

        header.appendChild(title);
        header.appendChild(closeBtn);
        panel.appendChild(header);

        // File input section
        var fileSection = document.createElement('div');
        fileSection.style.cssText = 'margin-bottom:15px;';

        var fileLabel = document.createElement('label');
        fileLabel.style.cssText = 'font-size:13px;font-weight:bold;display:block;margin-bottom:5px;';
        fileLabel.textContent = 'Upload CSV (one URL per row, or column header "url" / "link" / "hyperlink"):';

        var fileInput = document.createElement('input');
        fileInput.type = 'file';
        fileInput.accept = '.csv';
        fileInput.id = 'csv-file-input';
        fileInput.style.cssText = 'font-size:13px;';

        fileSection.appendChild(fileLabel);
        fileSection.appendChild(fileInput);
        panel.appendChild(fileSection);

        // Full HTML checkbox
        var checkSection = document.createElement('div');
        checkSection.style.cssText = 'margin-bottom:15px;';

        var checkLabel = document.createElement('label');
        checkLabel.style.cssText = 'font-size:13px;display:flex;align-items:center;gap:8px;';

        var checkbox = document.createElement('input');
        checkbox.type = 'checkbox';
        checkbox.id = 'csv-exporter-full-html';
        checkbox.checked = true;

        checkLabel.appendChild(checkbox);
        checkLabel.appendChild(document.createTextNode('Export full HTML (uncheck for body only)'));
        checkSection.appendChild(checkLabel);
        panel.appendChild(checkSection);

        // Delay input
        var delaySection = document.createElement('div');
        delaySection.style.cssText = 'margin-bottom:15px;';

        var delayLabel = document.createElement('label');
        delayLabel.style.cssText = 'font-size:13px;display:flex;align-items:center;gap:8px;';

        var delayInput = document.createElement('input');
        delayInput.type = 'number';
        delayInput.id = 'csv-exporter-delay';
        delayInput.value = '1000';
        delayInput.min = '0';
        delayInput.step = '500';
        delayInput.style.cssText = 'width:80px;';

        delayLabel.appendChild(delayInput);
        delayLabel.appendChild(document.createTextNode('Delay between requests (ms)'));
        delaySection.appendChild(delayLabel);
        panel.appendChild(delaySection);

        // Start button
        var startBtn = document.createElement('button');
        startBtn.id = 'csv-exporter-start';
        startBtn.disabled = true;
        startBtn.style.cssText = 'background:#0073bb;color:#fff;border:none;padding:10px 20px;border-radius:4px;cursor:pointer;font-size:14px;width:100%;';
        startBtn.textContent = 'Start Export';
        panel.appendChild(startBtn);

        // Progress
        var progress = document.createElement('div');
        progress.id = 'csv-exporter-progress';
        progress.style.cssText = 'margin-top:15px;font-size:12px;color:#555;';
        panel.appendChild(progress);

        // Log
        var log = document.createElement('div');
        log.id = 'csv-exporter-log';
        log.style.cssText = 'margin-top:10px;max-height:200px;overflow-y:auto;font-size:11px;font-family:monospace;background:#f5f5f5;padding:8px;border-radius:4px;display:none;';
        panel.appendChild(log);

        document.body.appendChild(panel);

        // Make draggable
        makeDraggable(panel);

        // File input handler
        var urls = [];
        fileInput.addEventListener('change', function(e) {
            var file = e.target.files[0];
            if (!file) return;
            var reader = new FileReader();
            reader.onload = function(evt) {
                urls = parseCSV(evt.target.result);
                if (urls.length > 0) {
                    startBtn.disabled = false;
                    startBtn.textContent = 'Start Export (' + urls.length + ' URLs found)';
                } else {
                    startBtn.disabled = true;
                    startBtn.textContent = 'No valid URLs found in CSV';
                }
            };
            reader.readAsText(file);
        });

        // Start button handler
        startBtn.addEventListener('click', function() {
            var fullHtml = checkbox.checked;
            var delay = parseInt(delayInput.value) || 1000;
            startExport(urls, fullHtml, delay, startBtn, progress, log);
        });
    }

    function parseCSV(text) {
    var lines = text.split(new RegExp('\r?' + String.fromCharCode(10))).filter(function(line) { return line.trim(); });
        if (lines.length === 0) return [];

        var urls = [];
        var urlPattern = /^https?:\/\//i;
        var startIndex = 0;

        var firstLine = lines[0].toLowerCase().trim();
        var firstCols = firstLine.split(',').map(function(h) { return h.replace(/"/g, '').trim(); });

        if (!urlPattern.test(firstCols[0])) {
            var urlColIndex = -1;
            var urlHeaders = ['url', 'link', 'hyperlink', 'href', 'address', 'uri'];
            for (var h = 0; h < firstCols.length; h++) {
                if (urlHeaders.indexOf(firstCols[h]) !== -1) {
                    urlColIndex = h;
                    break;
                }
            }
            startIndex = 1;

            for (var i = startIndex; i < lines.length; i++) {
                var cols = parseCSVLine(lines[i]);
                var colIdx = urlColIndex >= 0 ? urlColIndex : 0;
                var val = (cols[colIdx] || '').trim();
                if (urlPattern.test(val)) {
                    urls.push(val);
                }
            }
        } else {
            for (var j = 0; j < lines.length; j++) {
                var cols2 = parseCSVLine(lines[j]);
                for (var k = 0; k < cols2.length; k++) {
                    var val2 = cols2[k].trim();
                    if (urlPattern.test(val2)) {
                        urls.push(val2);
                        break;
                    }
                }
            }
        }

        return urls;
    }

    function parseCSVLine(line) {
        var result = [];
        var current = '';
        var inQuotes = false;
        for (var i = 0; i < line.length; i++) {
            var ch = line[i];
            if (ch === '"') {
                inQuotes = !inQuotes;
            } else if (ch === ',' && !inQuotes) {
                result.push(current);
                current = '';
            } else {
                current += ch;
            }
        }
        result.push(current);
        return result;
    }

    function startExport(urls, fullHtml, delay, startBtn, progress, log) {
        log.style.display = 'block';
        log.innerHTML = '';
        startBtn.disabled = true;
        startBtn.textContent = 'Exporting...';

        var results = [];
        var index = 0;

        function fetchNext() {
            if (index >= urls.length) {
                exportResults(results);
                startBtn.disabled = false;
                startBtn.textContent = 'Export Complete! (' + results.length + ' pages)';
                progress.textContent = '\u2705 Done! ' + results.length + '/' + urls.length + ' pages exported.';
                return;
            }

            var url = urls[index];
            progress.textContent = 'Fetching ' + (index + 1) + ' of ' + urls.length + '...';
            addLog(log, '\u23F3 Fetching: ' + url);

            GM_xmlhttpRequest({
                method: 'GET',
                url: url,
                timeout: 30000,
                onload: function(response) {
                    var html = response.responseText;
                    if (!fullHtml) {
                        var bodyMatch = html.match(/<body[^>]*>([\s\S]*?)<\/body>/i);
                        if (bodyMatch) {
                            html = bodyMatch[1];
                        }
                    }
                    results.push({
                        url: url,
                        status: response.status,
                        html: html,
                        filename: urlToFilename(url)
                    });
                    addLog(log, '\u2705 ' + response.status + ' - ' + url);
                    index++;
                    setTimeout(fetchNext, delay);
                },
                onerror: function() {
                    results.push({
                        url: url,
                        status: 'ERROR',
                        html: '',
                        filename: urlToFilename(url)
                    });
                    addLog(log, '\u274C Error - ' + url);
                    index++;
                    setTimeout(fetchNext, delay);
                },
                ontimeout: function() {
                    results.push({
                        url: url,
                        status: 'TIMEOUT',
                        html: '',
                        filename: urlToFilename(url)
                    });
                    addLog(log, '\u23F0 Timeout - ' + url);
                    index++;
                    setTimeout(fetchNext, delay);
                }
            });
        }

        fetchNext();
    }

    function urlToFilename(url) {
        try {
            var u = new URL(url);
            var name = u.hostname + u.pathname;
            name = name.replace(/[^a-zA-Z0-9_\-]/g, '_').replace(/_+/g, '_');
            if (name.length > 100) name = name.substring(0, 100);
            return name + '.html';
        } catch(e) {
            return 'page_' + Date.now() + '.html';
        }
    }

    function exportResults(results) {
        var exportData = results.map(function(r) {
            return { url: r.url, status: r.status, filename: r.filename, html: r.html };
        });

        downloadFile('html_export_bundle.json', JSON.stringify(exportData, null, 2), 'application/json');

        var csvRows = ['url,status,filename'];
        results.forEach(function(r) {
            csvRows.push('"' + r.url + '","' + r.status + '","' + r.filename + '"');
        });
        downloadFile('export_summary.csv', csvRows.join(String.fromCharCode(10)), 'text/csv');


        if (results.length <= 20) {
            results.forEach(function(r, i) {
                if (r.html) {
                    setTimeout(function() {
                        downloadFile(r.filename, r.html, 'text/html');
                    }, i * 200);
                }
            });
        }
    }

    function downloadFile(filename, content, mimeType) {
        var blob = new Blob([content], { type: mimeType });
        var url = URL.createObjectURL(blob);
        var a = document.createElement('a');
        a.href = url;
        a.download = filename;
        document.body.appendChild(a);
        a.click();
        setTimeout(function() {
            document.body.removeChild(a);
            URL.revokeObjectURL(url);
        }, 100);
    }

    function addLog(logEl, message) {
        var line = document.createElement('div');
        line.textContent = message;
        logEl.appendChild(line);
        logEl.scrollTop = logEl.scrollHeight;
    }

    function makeDraggable(el) {
        var offsetX, offsetY, isDragging = false;
        el.addEventListener('mousedown', function(e) {
            if (e.target.tagName === 'INPUT' || e.target.tagName === 'BUTTON' || e.target.tagName === 'LABEL') return;
            isDragging = true;
            offsetX = e.clientX - el.getBoundingClientRect().left;
            offsetY = e.clientY - el.getBoundingClientRect().top;
            el.style.cursor = 'grabbing';
        });
        document.addEventListener('mousemove', function(e) {
            if (!isDragging) return;
            el.style.left = (e.clientX - offsetX) + 'px';
            el.style.top = (e.clientY - offsetY) + 'px';
            el.style.right = 'auto';
        });
        document.addEventListener('mouseup', function() {
            isDragging = false;
            el.style.cursor = 'move';
        });
    }

})();