CSV Hyperlink HTML Exporter

Upload a CSV of hyperlinks, fetch each page's HTML, and export as a downloadable package

Vous devrez installer une extension telle que Tampermonkey, Greasemonkey ou Violentmonkey pour installer ce script.

Vous devrez installer une extension telle que Tampermonkey pour installer ce script.

Vous devrez installer une extension telle que Tampermonkey ou Violentmonkey pour installer ce script.

Vous devrez installer une extension telle que Tampermonkey ou Userscripts pour installer ce script.

Vous devrez installer une extension telle que Tampermonkey pour installer ce script.

Vous devrez installer une extension de gestionnaire de script utilisateur pour installer ce script.

(J'ai déjà un gestionnaire de scripts utilisateur, laissez-moi l'installer !)

Vous devrez installer une extension telle que Stylus pour installer ce style.

Vous devrez installer une extension telle que Stylus pour installer ce style.

Vous devrez installer une extension telle que Stylus pour installer ce style.

Vous devrez installer une extension du gestionnaire de style pour utilisateur pour installer ce style.

Vous devrez installer une extension du gestionnaire de style pour utilisateur pour installer ce style.

Vous devrez installer une extension du gestionnaire de style pour utilisateur pour installer ce style.

(J'ai déjà un gestionnaire de style utilisateur, laissez-moi l'installer!)

// ==UserScript==
// @name         CSV Hyperlink HTML Exporter
// @namespace    http://tampermonkey.net/
// @version      1.0
// @description  Upload a CSV of hyperlinks, fetch each page's HTML, and export as a downloadable package
// @author       You
// @match        *://*/*
// @grant        GM_xmlhttpRequest
// @grant        GM_registerMenuCommand
// @license MIT
// @connect      *
// ==/UserScript==

(function() {
    'use strict';

    GM_registerMenuCommand('\u{1F4C4} CSV HTML Exporter', openPanel);

    let panel = null;

    function openPanel() {
        if (panel) {
            panel.remove();
            panel = null;
            return;
        }

        panel = document.createElement('div');
        panel.id = 'csv-html-exporter-panel';
        panel.style.cssText = 'position:fixed;top:50px;right:50px;width:500px;max-height:80vh;overflow-y:auto;background:#fff;border:2px solid #333;border-radius:8px;padding:20px;z-index:999999;font-family:Arial,sans-serif;box-shadow:0 4px 20px rgba(0,0,0,0.3);cursor:move;';

        var header = document.createElement('div');
        header.style.cssText = 'display:flex;justify-content:space-between;align-items:center;margin-bottom:15px;';

        var title = document.createElement('h3');
        title.style.cssText = 'margin:0;font-size:16px;';
        title.textContent = '\u{1F4C4} CSV Hyperlink HTML Exporter';

        var closeBtn = document.createElement('button');
        closeBtn.style.cssText = 'background:none;border:none;font-size:20px;cursor:pointer;';
        closeBtn.textContent = '\u2715';
        closeBtn.addEventListener('click', function() {
            panel.remove();
            panel = null;
        });

        header.appendChild(title);
        header.appendChild(closeBtn);
        panel.appendChild(header);

        // File input section
        var fileSection = document.createElement('div');
        fileSection.style.cssText = 'margin-bottom:15px;';

        var fileLabel = document.createElement('label');
        fileLabel.style.cssText = 'font-size:13px;font-weight:bold;display:block;margin-bottom:5px;';
        fileLabel.textContent = 'Upload CSV (one URL per row, or column header "url" / "link" / "hyperlink"):';

        var fileInput = document.createElement('input');
        fileInput.type = 'file';
        fileInput.accept = '.csv';
        fileInput.id = 'csv-file-input';
        fileInput.style.cssText = 'font-size:13px;';

        fileSection.appendChild(fileLabel);
        fileSection.appendChild(fileInput);
        panel.appendChild(fileSection);

        // Full HTML checkbox
        var checkSection = document.createElement('div');
        checkSection.style.cssText = 'margin-bottom:15px;';

        var checkLabel = document.createElement('label');
        checkLabel.style.cssText = 'font-size:13px;display:flex;align-items:center;gap:8px;';

        var checkbox = document.createElement('input');
        checkbox.type = 'checkbox';
        checkbox.id = 'csv-exporter-full-html';
        checkbox.checked = true;

        checkLabel.appendChild(checkbox);
        checkLabel.appendChild(document.createTextNode('Export full HTML (uncheck for body only)'));
        checkSection.appendChild(checkLabel);
        panel.appendChild(checkSection);

        // Delay input
        var delaySection = document.createElement('div');
        delaySection.style.cssText = 'margin-bottom:15px;';

        var delayLabel = document.createElement('label');
        delayLabel.style.cssText = 'font-size:13px;display:flex;align-items:center;gap:8px;';

        var delayInput = document.createElement('input');
        delayInput.type = 'number';
        delayInput.id = 'csv-exporter-delay';
        delayInput.value = '1000';
        delayInput.min = '0';
        delayInput.step = '500';
        delayInput.style.cssText = 'width:80px;';

        delayLabel.appendChild(delayInput);
        delayLabel.appendChild(document.createTextNode('Delay between requests (ms)'));
        delaySection.appendChild(delayLabel);
        panel.appendChild(delaySection);

        // Start button
        var startBtn = document.createElement('button');
        startBtn.id = 'csv-exporter-start';
        startBtn.disabled = true;
        startBtn.style.cssText = 'background:#0073bb;color:#fff;border:none;padding:10px 20px;border-radius:4px;cursor:pointer;font-size:14px;width:100%;';
        startBtn.textContent = 'Start Export';
        panel.appendChild(startBtn);

        // Progress
        var progress = document.createElement('div');
        progress.id = 'csv-exporter-progress';
        progress.style.cssText = 'margin-top:15px;font-size:12px;color:#555;';
        panel.appendChild(progress);

        // Log
        var log = document.createElement('div');
        log.id = 'csv-exporter-log';
        log.style.cssText = 'margin-top:10px;max-height:200px;overflow-y:auto;font-size:11px;font-family:monospace;background:#f5f5f5;padding:8px;border-radius:4px;display:none;';
        panel.appendChild(log);

        document.body.appendChild(panel);

        // Make draggable
        makeDraggable(panel);

        // File input handler
        var urls = [];
        fileInput.addEventListener('change', function(e) {
            var file = e.target.files[0];
            if (!file) return;
            var reader = new FileReader();
            reader.onload = function(evt) {
                urls = parseCSV(evt.target.result);
                if (urls.length > 0) {
                    startBtn.disabled = false;
                    startBtn.textContent = 'Start Export (' + urls.length + ' URLs found)';
                } else {
                    startBtn.disabled = true;
                    startBtn.textContent = 'No valid URLs found in CSV';
                }
            };
            reader.readAsText(file);
        });

        // Start button handler
        startBtn.addEventListener('click', function() {
            var fullHtml = checkbox.checked;
            var delay = parseInt(delayInput.value) || 1000;
            startExport(urls, fullHtml, delay, startBtn, progress, log);
        });
    }

    function parseCSV(text) {
    var lines = text.split(new RegExp('\r?' + String.fromCharCode(10))).filter(function(line) { return line.trim(); });
        if (lines.length === 0) return [];

        var urls = [];
        var urlPattern = /^https?:\/\//i;
        var startIndex = 0;

        var firstLine = lines[0].toLowerCase().trim();
        var firstCols = firstLine.split(',').map(function(h) { return h.replace(/"/g, '').trim(); });

        if (!urlPattern.test(firstCols[0])) {
            var urlColIndex = -1;
            var urlHeaders = ['url', 'link', 'hyperlink', 'href', 'address', 'uri'];
            for (var h = 0; h < firstCols.length; h++) {
                if (urlHeaders.indexOf(firstCols[h]) !== -1) {
                    urlColIndex = h;
                    break;
                }
            }
            startIndex = 1;

            for (var i = startIndex; i < lines.length; i++) {
                var cols = parseCSVLine(lines[i]);
                var colIdx = urlColIndex >= 0 ? urlColIndex : 0;
                var val = (cols[colIdx] || '').trim();
                if (urlPattern.test(val)) {
                    urls.push(val);
                }
            }
        } else {
            for (var j = 0; j < lines.length; j++) {
                var cols2 = parseCSVLine(lines[j]);
                for (var k = 0; k < cols2.length; k++) {
                    var val2 = cols2[k].trim();
                    if (urlPattern.test(val2)) {
                        urls.push(val2);
                        break;
                    }
                }
            }
        }

        return urls;
    }

    function parseCSVLine(line) {
        var result = [];
        var current = '';
        var inQuotes = false;
        for (var i = 0; i < line.length; i++) {
            var ch = line[i];
            if (ch === '"') {
                inQuotes = !inQuotes;
            } else if (ch === ',' && !inQuotes) {
                result.push(current);
                current = '';
            } else {
                current += ch;
            }
        }
        result.push(current);
        return result;
    }

    function startExport(urls, fullHtml, delay, startBtn, progress, log) {
        log.style.display = 'block';
        log.innerHTML = '';
        startBtn.disabled = true;
        startBtn.textContent = 'Exporting...';

        var results = [];
        var index = 0;

        function fetchNext() {
            if (index >= urls.length) {
                exportResults(results);
                startBtn.disabled = false;
                startBtn.textContent = 'Export Complete! (' + results.length + ' pages)';
                progress.textContent = '\u2705 Done! ' + results.length + '/' + urls.length + ' pages exported.';
                return;
            }

            var url = urls[index];
            progress.textContent = 'Fetching ' + (index + 1) + ' of ' + urls.length + '...';
            addLog(log, '\u23F3 Fetching: ' + url);

            GM_xmlhttpRequest({
                method: 'GET',
                url: url,
                timeout: 30000,
                onload: function(response) {
                    var html = response.responseText;
                    if (!fullHtml) {
                        var bodyMatch = html.match(/<body[^>]*>([\s\S]*?)<\/body>/i);
                        if (bodyMatch) {
                            html = bodyMatch[1];
                        }
                    }
                    results.push({
                        url: url,
                        status: response.status,
                        html: html,
                        filename: urlToFilename(url)
                    });
                    addLog(log, '\u2705 ' + response.status + ' - ' + url);
                    index++;
                    setTimeout(fetchNext, delay);
                },
                onerror: function() {
                    results.push({
                        url: url,
                        status: 'ERROR',
                        html: '',
                        filename: urlToFilename(url)
                    });
                    addLog(log, '\u274C Error - ' + url);
                    index++;
                    setTimeout(fetchNext, delay);
                },
                ontimeout: function() {
                    results.push({
                        url: url,
                        status: 'TIMEOUT',
                        html: '',
                        filename: urlToFilename(url)
                    });
                    addLog(log, '\u23F0 Timeout - ' + url);
                    index++;
                    setTimeout(fetchNext, delay);
                }
            });
        }

        fetchNext();
    }

    function urlToFilename(url) {
        try {
            var u = new URL(url);
            var name = u.hostname + u.pathname;
            name = name.replace(/[^a-zA-Z0-9_\-]/g, '_').replace(/_+/g, '_');
            if (name.length > 100) name = name.substring(0, 100);
            return name + '.html';
        } catch(e) {
            return 'page_' + Date.now() + '.html';
        }
    }

    function exportResults(results) {
        var exportData = results.map(function(r) {
            return { url: r.url, status: r.status, filename: r.filename, html: r.html };
        });

        downloadFile('html_export_bundle.json', JSON.stringify(exportData, null, 2), 'application/json');

        var csvRows = ['url,status,filename'];
        results.forEach(function(r) {
            csvRows.push('"' + r.url + '","' + r.status + '","' + r.filename + '"');
        });
        downloadFile('export_summary.csv', csvRows.join(String.fromCharCode(10)), 'text/csv');


        if (results.length <= 20) {
            results.forEach(function(r, i) {
                if (r.html) {
                    setTimeout(function() {
                        downloadFile(r.filename, r.html, 'text/html');
                    }, i * 200);
                }
            });
        }
    }

    function downloadFile(filename, content, mimeType) {
        var blob = new Blob([content], { type: mimeType });
        var url = URL.createObjectURL(blob);
        var a = document.createElement('a');
        a.href = url;
        a.download = filename;
        document.body.appendChild(a);
        a.click();
        setTimeout(function() {
            document.body.removeChild(a);
            URL.revokeObjectURL(url);
        }, 100);
    }

    function addLog(logEl, message) {
        var line = document.createElement('div');
        line.textContent = message;
        logEl.appendChild(line);
        logEl.scrollTop = logEl.scrollHeight;
    }

    function makeDraggable(el) {
        var offsetX, offsetY, isDragging = false;
        el.addEventListener('mousedown', function(e) {
            if (e.target.tagName === 'INPUT' || e.target.tagName === 'BUTTON' || e.target.tagName === 'LABEL') return;
            isDragging = true;
            offsetX = e.clientX - el.getBoundingClientRect().left;
            offsetY = e.clientY - el.getBoundingClientRect().top;
            el.style.cursor = 'grabbing';
        });
        document.addEventListener('mousemove', function(e) {
            if (!isDragging) return;
            el.style.left = (e.clientX - offsetX) + 'px';
            el.style.top = (e.clientY - offsetY) + 'px';
            el.style.right = 'auto';
        });
        document.addEventListener('mouseup', function() {
            isDragging = false;
            el.style.cursor = 'move';
        });
    }

})();