CSV Hyperlink HTML Exporter

Upload a CSV of hyperlinks, fetch each page's HTML, and export as a downloadable package

K instalaci tototo skriptu si budete muset nainstalovat rozšíření jako Tampermonkey, Greasemonkey nebo Violentmonkey.

You will need to install an extension such as Tampermonkey to install this script.

K instalaci tohoto skriptu si budete muset nainstalovat rozšíření jako Tampermonkey nebo Violentmonkey.

K instalaci tohoto skriptu si budete muset nainstalovat rozšíření jako Tampermonkey nebo Userscripts.

You will need to install an extension such as Tampermonkey to install this script.

K instalaci tohoto skriptu si budete muset nainstalovat manažer uživatelských skriptů.

(Už mám manažer uživatelských skriptů, nechte mě ho nainstalovat!)

You will need to install an extension such as Stylus to install this style.

You will need to install an extension such as Stylus to install this style.

You will need to install an extension such as Stylus to install this style.

You will need to install a user style manager extension to install this style.

You will need to install a user style manager extension to install this style.

You will need to install a user style manager extension to install this style.

(Už mám manažer uživatelských stylů, nechte mě ho nainstalovat!)

// ==UserScript==
// @name         CSV Hyperlink HTML Exporter
// @namespace    http://tampermonkey.net/
// @version      1.0
// @description  Upload a CSV of hyperlinks, fetch each page's HTML, and export as a downloadable package
// @author       You
// @match        *://*/*
// @grant        GM_xmlhttpRequest
// @grant        GM_registerMenuCommand
// @license MIT
// @connect      *
// ==/UserScript==

(function() {
    'use strict';

    GM_registerMenuCommand('\u{1F4C4} CSV HTML Exporter', openPanel);

    let panel = null;

    function openPanel() {
        if (panel) {
            panel.remove();
            panel = null;
            return;
        }

        panel = document.createElement('div');
        panel.id = 'csv-html-exporter-panel';
        panel.style.cssText = 'position:fixed;top:50px;right:50px;width:500px;max-height:80vh;overflow-y:auto;background:#fff;border:2px solid #333;border-radius:8px;padding:20px;z-index:999999;font-family:Arial,sans-serif;box-shadow:0 4px 20px rgba(0,0,0,0.3);cursor:move;';

        var header = document.createElement('div');
        header.style.cssText = 'display:flex;justify-content:space-between;align-items:center;margin-bottom:15px;';

        var title = document.createElement('h3');
        title.style.cssText = 'margin:0;font-size:16px;';
        title.textContent = '\u{1F4C4} CSV Hyperlink HTML Exporter';

        var closeBtn = document.createElement('button');
        closeBtn.style.cssText = 'background:none;border:none;font-size:20px;cursor:pointer;';
        closeBtn.textContent = '\u2715';
        closeBtn.addEventListener('click', function() {
            panel.remove();
            panel = null;
        });

        header.appendChild(title);
        header.appendChild(closeBtn);
        panel.appendChild(header);

        // File input section
        var fileSection = document.createElement('div');
        fileSection.style.cssText = 'margin-bottom:15px;';

        var fileLabel = document.createElement('label');
        fileLabel.style.cssText = 'font-size:13px;font-weight:bold;display:block;margin-bottom:5px;';
        fileLabel.textContent = 'Upload CSV (one URL per row, or column header "url" / "link" / "hyperlink"):';

        var fileInput = document.createElement('input');
        fileInput.type = 'file';
        fileInput.accept = '.csv';
        fileInput.id = 'csv-file-input';
        fileInput.style.cssText = 'font-size:13px;';

        fileSection.appendChild(fileLabel);
        fileSection.appendChild(fileInput);
        panel.appendChild(fileSection);

        // Full HTML checkbox
        var checkSection = document.createElement('div');
        checkSection.style.cssText = 'margin-bottom:15px;';

        var checkLabel = document.createElement('label');
        checkLabel.style.cssText = 'font-size:13px;display:flex;align-items:center;gap:8px;';

        var checkbox = document.createElement('input');
        checkbox.type = 'checkbox';
        checkbox.id = 'csv-exporter-full-html';
        checkbox.checked = true;

        checkLabel.appendChild(checkbox);
        checkLabel.appendChild(document.createTextNode('Export full HTML (uncheck for body only)'));
        checkSection.appendChild(checkLabel);
        panel.appendChild(checkSection);

        // Delay input
        var delaySection = document.createElement('div');
        delaySection.style.cssText = 'margin-bottom:15px;';

        var delayLabel = document.createElement('label');
        delayLabel.style.cssText = 'font-size:13px;display:flex;align-items:center;gap:8px;';

        var delayInput = document.createElement('input');
        delayInput.type = 'number';
        delayInput.id = 'csv-exporter-delay';
        delayInput.value = '1000';
        delayInput.min = '0';
        delayInput.step = '500';
        delayInput.style.cssText = 'width:80px;';

        delayLabel.appendChild(delayInput);
        delayLabel.appendChild(document.createTextNode('Delay between requests (ms)'));
        delaySection.appendChild(delayLabel);
        panel.appendChild(delaySection);

        // Start button
        var startBtn = document.createElement('button');
        startBtn.id = 'csv-exporter-start';
        startBtn.disabled = true;
        startBtn.style.cssText = 'background:#0073bb;color:#fff;border:none;padding:10px 20px;border-radius:4px;cursor:pointer;font-size:14px;width:100%;';
        startBtn.textContent = 'Start Export';
        panel.appendChild(startBtn);

        // Progress
        var progress = document.createElement('div');
        progress.id = 'csv-exporter-progress';
        progress.style.cssText = 'margin-top:15px;font-size:12px;color:#555;';
        panel.appendChild(progress);

        // Log
        var log = document.createElement('div');
        log.id = 'csv-exporter-log';
        log.style.cssText = 'margin-top:10px;max-height:200px;overflow-y:auto;font-size:11px;font-family:monospace;background:#f5f5f5;padding:8px;border-radius:4px;display:none;';
        panel.appendChild(log);

        document.body.appendChild(panel);

        // Make draggable
        makeDraggable(panel);

        // File input handler
        var urls = [];
        fileInput.addEventListener('change', function(e) {
            var file = e.target.files[0];
            if (!file) return;
            var reader = new FileReader();
            reader.onload = function(evt) {
                urls = parseCSV(evt.target.result);
                if (urls.length > 0) {
                    startBtn.disabled = false;
                    startBtn.textContent = 'Start Export (' + urls.length + ' URLs found)';
                } else {
                    startBtn.disabled = true;
                    startBtn.textContent = 'No valid URLs found in CSV';
                }
            };
            reader.readAsText(file);
        });

        // Start button handler
        startBtn.addEventListener('click', function() {
            var fullHtml = checkbox.checked;
            var delay = parseInt(delayInput.value) || 1000;
            startExport(urls, fullHtml, delay, startBtn, progress, log);
        });
    }

    function parseCSV(text) {
    var lines = text.split(new RegExp('\r?' + String.fromCharCode(10))).filter(function(line) { return line.trim(); });
        if (lines.length === 0) return [];

        var urls = [];
        var urlPattern = /^https?:\/\//i;
        var startIndex = 0;

        var firstLine = lines[0].toLowerCase().trim();
        var firstCols = firstLine.split(',').map(function(h) { return h.replace(/"/g, '').trim(); });

        if (!urlPattern.test(firstCols[0])) {
            var urlColIndex = -1;
            var urlHeaders = ['url', 'link', 'hyperlink', 'href', 'address', 'uri'];
            for (var h = 0; h < firstCols.length; h++) {
                if (urlHeaders.indexOf(firstCols[h]) !== -1) {
                    urlColIndex = h;
                    break;
                }
            }
            startIndex = 1;

            for (var i = startIndex; i < lines.length; i++) {
                var cols = parseCSVLine(lines[i]);
                var colIdx = urlColIndex >= 0 ? urlColIndex : 0;
                var val = (cols[colIdx] || '').trim();
                if (urlPattern.test(val)) {
                    urls.push(val);
                }
            }
        } else {
            for (var j = 0; j < lines.length; j++) {
                var cols2 = parseCSVLine(lines[j]);
                for (var k = 0; k < cols2.length; k++) {
                    var val2 = cols2[k].trim();
                    if (urlPattern.test(val2)) {
                        urls.push(val2);
                        break;
                    }
                }
            }
        }

        return urls;
    }

    function parseCSVLine(line) {
        var result = [];
        var current = '';
        var inQuotes = false;
        for (var i = 0; i < line.length; i++) {
            var ch = line[i];
            if (ch === '"') {
                inQuotes = !inQuotes;
            } else if (ch === ',' && !inQuotes) {
                result.push(current);
                current = '';
            } else {
                current += ch;
            }
        }
        result.push(current);
        return result;
    }

    function startExport(urls, fullHtml, delay, startBtn, progress, log) {
        log.style.display = 'block';
        log.innerHTML = '';
        startBtn.disabled = true;
        startBtn.textContent = 'Exporting...';

        var results = [];
        var index = 0;

        function fetchNext() {
            if (index >= urls.length) {
                exportResults(results);
                startBtn.disabled = false;
                startBtn.textContent = 'Export Complete! (' + results.length + ' pages)';
                progress.textContent = '\u2705 Done! ' + results.length + '/' + urls.length + ' pages exported.';
                return;
            }

            var url = urls[index];
            progress.textContent = 'Fetching ' + (index + 1) + ' of ' + urls.length + '...';
            addLog(log, '\u23F3 Fetching: ' + url);

            GM_xmlhttpRequest({
                method: 'GET',
                url: url,
                timeout: 30000,
                onload: function(response) {
                    var html = response.responseText;
                    if (!fullHtml) {
                        var bodyMatch = html.match(/<body[^>]*>([\s\S]*?)<\/body>/i);
                        if (bodyMatch) {
                            html = bodyMatch[1];
                        }
                    }
                    results.push({
                        url: url,
                        status: response.status,
                        html: html,
                        filename: urlToFilename(url)
                    });
                    addLog(log, '\u2705 ' + response.status + ' - ' + url);
                    index++;
                    setTimeout(fetchNext, delay);
                },
                onerror: function() {
                    results.push({
                        url: url,
                        status: 'ERROR',
                        html: '',
                        filename: urlToFilename(url)
                    });
                    addLog(log, '\u274C Error - ' + url);
                    index++;
                    setTimeout(fetchNext, delay);
                },
                ontimeout: function() {
                    results.push({
                        url: url,
                        status: 'TIMEOUT',
                        html: '',
                        filename: urlToFilename(url)
                    });
                    addLog(log, '\u23F0 Timeout - ' + url);
                    index++;
                    setTimeout(fetchNext, delay);
                }
            });
        }

        fetchNext();
    }

    function urlToFilename(url) {
        try {
            var u = new URL(url);
            var name = u.hostname + u.pathname;
            name = name.replace(/[^a-zA-Z0-9_\-]/g, '_').replace(/_+/g, '_');
            if (name.length > 100) name = name.substring(0, 100);
            return name + '.html';
        } catch(e) {
            return 'page_' + Date.now() + '.html';
        }
    }

    function exportResults(results) {
        var exportData = results.map(function(r) {
            return { url: r.url, status: r.status, filename: r.filename, html: r.html };
        });

        downloadFile('html_export_bundle.json', JSON.stringify(exportData, null, 2), 'application/json');

        var csvRows = ['url,status,filename'];
        results.forEach(function(r) {
            csvRows.push('"' + r.url + '","' + r.status + '","' + r.filename + '"');
        });
        downloadFile('export_summary.csv', csvRows.join(String.fromCharCode(10)), 'text/csv');


        if (results.length <= 20) {
            results.forEach(function(r, i) {
                if (r.html) {
                    setTimeout(function() {
                        downloadFile(r.filename, r.html, 'text/html');
                    }, i * 200);
                }
            });
        }
    }

    function downloadFile(filename, content, mimeType) {
        var blob = new Blob([content], { type: mimeType });
        var url = URL.createObjectURL(blob);
        var a = document.createElement('a');
        a.href = url;
        a.download = filename;
        document.body.appendChild(a);
        a.click();
        setTimeout(function() {
            document.body.removeChild(a);
            URL.revokeObjectURL(url);
        }, 100);
    }

    function addLog(logEl, message) {
        var line = document.createElement('div');
        line.textContent = message;
        logEl.appendChild(line);
        logEl.scrollTop = logEl.scrollHeight;
    }

    function makeDraggable(el) {
        var offsetX, offsetY, isDragging = false;
        el.addEventListener('mousedown', function(e) {
            if (e.target.tagName === 'INPUT' || e.target.tagName === 'BUTTON' || e.target.tagName === 'LABEL') return;
            isDragging = true;
            offsetX = e.clientX - el.getBoundingClientRect().left;
            offsetY = e.clientY - el.getBoundingClientRect().top;
            el.style.cursor = 'grabbing';
        });
        document.addEventListener('mousemove', function(e) {
            if (!isDragging) return;
            el.style.left = (e.clientX - offsetX) + 'px';
            el.style.top = (e.clientY - offsetY) + 'px';
            el.style.right = 'auto';
        });
        document.addEventListener('mouseup', function() {
            isDragging = false;
            el.style.cursor = 'move';
        });
    }

})();