Webpage to EPUB Converter

Converts webpage articles to EPUB

// ==UserScript==
// @name         Webpage to EPUB Converter
// @namespace    http://tampermonkey.net/
// @version      5.7
// @description  Converts webpage articles to EPUB
// @author       Gemini/You
// @match        *://*/*
// @grant        GM_xmlhttpRequest
// @grant        GM_registerMenuCommand
// @require      https://cdnjs.cloudflare.com/ajax/libs/jszip/3.10.1/jszip.min.js
// @require      https://cdnjs.cloudflare.com/ajax/libs/FileSaver.js/2.0.5/FileSaver.min.js
// @require      https://unpkg.com/@mozilla/[email protected]/Readability.js
// @run-at       document-idle
// @license      MIT
// ==/UserScript==

(function () {
    'use strict';

    // --- 1. UI CREATION ---

    const floatingButton = document.createElement('div');
    floatingButton.innerHTML = '📚 EPUB';
    Object.assign(floatingButton.style, {
        position: 'fixed', bottom: '20px', right: '20px',
        width: '60px', height: '60px', backgroundColor: '#2c3e50',
        color: 'white', borderRadius: '50%', display: 'flex',
        alignItems: 'center', justifyContent: 'center', cursor: 'pointer',
        zIndex: '10000', fontSize: '12px', fontWeight: 'bold',
        boxShadow: '0 4px 12px rgba(0,0,0,0.3)', transition: 'all 0.3s ease'
    });
    floatingButton.addEventListener('mouseenter', () => {
        if (!floatingButton.disabled) {
            floatingButton.style.transform = 'scale(1.1)';
            floatingButton.style.backgroundColor = '#34495e';
        }
    });
    floatingButton.addEventListener('mouseleave', () => {
        floatingButton.style.transform = 'scale(1)';
        floatingButton.style.backgroundColor = '#2c3e50';
    });
    const styleSheet = document.createElement("style");
    styleSheet.textContent = `@keyframes spin { 0% { transform: rotate(0deg); } 100% { transform: rotate(360deg); } }`;
    document.head.appendChild(styleSheet);
    document.body.appendChild(floatingButton);
    floatingButton.addEventListener('click', generateEPUB);

    // Register a menu command for Tampermonkey
    // This allows the user to trigger the EPUB generation directly from the Tampermonkey menu
    // without needing to click the floating button on the page.
    if (typeof GM_registerMenuCommand !== 'undefined') {
        GM_registerMenuCommand("Generate EPUB from Current Page", generateEPUB);
    }

    // --- 2. EPUB GENERATION LOGIC ---

    async function generateEPUB() {
        if (floatingButton.disabled) return;
        showLoadingIndicator('⏳');

        try {
            const article = extractArticle();
            if (!article) {
                // Using a custom message box instead of alert()
                showMessageBox("Could not extract any article content from this page.");
                throw new Error("Readability extraction failed.");
            }

            const rawTitle = article.title || 'Untitled Article';
            const cleanTitle = decodeHtmlEntities(rawTitle);
            const fileName = sanitizeFileName(cleanTitle); // Filename sanitization
            // Apply content title sanitization BEFORE XML escaping
            const contentSanitizedTitle = sanitizeContentTitle(cleanTitle);
            const epubTitle = sanitizeTextForXML(contentSanitizedTitle); // EPUB internal title sanitization

            const author = sanitizeTextForXML(article.byline) || window.location.hostname;
            const contentDiv = processContent(article.content);
            const images = await extractImagesFromContent(contentDiv);

            let coverImage = null;
            let userSelectedCover = false;

            const detectedImageLinks = findAllImageLinks(document.body);
            if (detectedImageLinks.length > 0) {
                const selectedCover = await promptForCoverSelection(detectedImageLinks);
                if (selectedCover === false) {
                    userSelectedCover = true;
                } else if (selectedCover) {
                    userSelectedCover = true;
                    const coverData = await fetchImage(selectedCover);
                    if (coverData) {
                        coverImage = {
                            id: 'cover-image',
                            filename: 'cover.' + (coverData.blob.type.split('/')[1] || 'jpg'),
                            mimetype: coverData.blob.type,
                            blob: coverData.blob,
                            originalSrc: selectedCover
                        };
                        const isCoverInContent = images.some(img => img.originalSrc === selectedCover);
                        if (!isCoverInContent) {
                            images.unshift({ ...coverImage, id: 'img_0' });
                        }
                    } else {
                        showMessageBox('Failed to load the selected cover image. Proceeding without cover.');
                    }
                }
            }

            if (!userSelectedCover && !coverImage) {
                const userCover = await promptForCover();
                if (userCover) {
                    let coverData = null;
                    if (userCover.blob) {
                        coverData = { blob: userCover.blob };
                    } else if (userCover.url) {
                        coverData = await fetchImage(userCover.url);
                    }

                    if (coverData && coverData.blob) {
                        coverImage = {
                            id: 'cover-image',
                            filename: 'cover.' + (coverData.blob.type.split('/')[1] || 'jpg'),
                            mimetype: coverData.blob.type,
                            blob: coverData.blob
                        };
                        images.unshift({ ...coverImage, id: 'img_0' });
                    }
                }
            }

            const zip = await createEpubZip(epubTitle, author, contentDiv.innerHTML, images, coverImage);
            const blob = await zip.generateAsync({ type: 'blob' });
            saveAs(blob, `${fileName}.epub`);
            showLoadingIndicator('Done!');

        } catch (error) {
            console.error('EPUB Generation Error:', error);
            showMessageBox(`Failed to create EPUB: ${error.message}`);
            showLoadingIndicator('❌');
        } finally {
            setTimeout(() => { resetButton(); }, 2000);
        }
    }

    // --- 3. CONTENT & IMAGE HANDLING ---

    function findAllImageLinks(container) {
        const imageLinks = [];
        const seenUrls = new Set();
        function hasImageExtension(url) {
            if (!url) return false;
            try {
                const pathname = new URL(url, window.location.href).pathname.toLowerCase();
                return /\.(jpg|jpeg|png|gif|webp|bmp|svg|tiff|ico)(\?.*)?$/i.test(pathname);
            } catch (e) {
                return /\.(jpg|jpeg|png|gif|webp|bmp|svg|tiff|ico)(\?.*)?$/i.test(url);
            }
        }
        container.querySelectorAll('a[href]').forEach(link => {
            if (hasImageExtension(link.href) && !seenUrls.has(link.href)) {
                imageLinks.push(link.href);
                seenUrls.add(link.href);
            }
        });
        container.querySelectorAll('img[src]').forEach(img => {
            if (img.src && !img.src.startsWith('data:') && !seenUrls.has(img.src)) {
                imageLinks.push(img.src);
                seenUrls.add(img.src);
            }
        });
        document.querySelectorAll('meta[property="og:image"], meta[name="twitter:image"], meta[property="twitter:image"]').forEach(meta => {
            const content = meta.getAttribute('content');
            if (content && !seenUrls.has(content)) {
                imageLinks.push(content);
                seenUrls.add(content);
            }
        });
        return imageLinks;
    }

    function extractArticle() {
        const docClone = document.cloneNode(true);
        const reader = new Readability(docClone);
        return reader.parse();
    }

    function processContent(htmlContent) {
        const contentDiv = document.createElement('div');
        contentDiv.innerHTML = htmlContent;
        contentDiv.querySelectorAll('img').forEach(img => {
            if (img.src) {
                try {
                    img.src = new URL(img.src, window.location.href).href;
                } catch (e) {
                    console.warn("Invalid image src, removing:", img.src);
                    img.remove();
                }
            }
        });
        return contentDiv;
    }

    async function extractImagesFromContent(contentDiv) {
        const images = [];
        const imgElements = Array.from(contentDiv.querySelectorAll('img'));
        let imageCounter = 1;

        for (const img of imgElements) {
            if (!img.src || img.src.startsWith('data:')) continue;

            const imageData = await fetchImage(img.src);
            if (imageData) {
                const ext = imageData.blob.type.split('/')[1] || 'jpg';
                const filename = `image_${imageCounter}.${ext}`;
                images.push({
                    id: `img_${imageCounter}`,
                    filename,
                    mimetype: imageData.blob.type,
                    blob: imageData.blob,
                    originalSrc: img.src
                });
                img.src = filename;
                imageCounter++;
            } else {
                img.remove();
            }
        }
        return images;
    }

    function fetchImage(url) {
        return new Promise((resolve) => {
            try {
                const absUrl = new URL(url, window.location.href).href;
                GM_xmlhttpRequest({
                    method: 'GET', url: absUrl, responseType: 'blob',
                    onload: function(response) {
                        if (response.status >= 200 && response.status < 300 && response.response && response.response.type.startsWith('image/')) {
                            resolve({ blob: response.response });
                        } else {
                            console.warn("GM_xmlhttpRequest failed or resource is not an image:", absUrl, "Status:", response.status);
                            resolve(null);
                        }
                    },
                    onerror: (error) => { console.warn("GM_xmlhttpRequest error:", absUrl, error); resolve(null); },
                    ontimeout: () => { console.warn("GM_xmlhttpRequest timeout:", absUrl); resolve(null); }
                });
            } catch (err) {
                console.warn("Image fetch failed (pre-request error):", url, err);
                resolve(null);
            }
        });
    }

    // --- 4. EPUB ZIP CREATION ---

    async function createEpubZip(title, author, contentHTML, images, coverImage) {
        const zip = new JSZip();
        zip.file('mimetype', 'application/epub+zip', { compression: 'STORE' });
        const oebps = zip.folder('OEBPS');
        const imageFolder = oebps.folder('images');

        for (const img of images) {
            const isCover = coverImage && (img.originalSrc === coverImage.originalSrc || img.id === coverImage.id);
            const filename = isCover ? coverImage.filename : img.filename;
            imageFolder.file(filename, img.blob);
        }

        oebps.file('style.css', getCSS());
        oebps.file('content.xhtml', getContentXHTML(title, contentHTML));
        oebps.file('nav.xhtml', getNavXHTML(title));
        oebps.file('package.opf', getPackageOPF(title, author, images, coverImage));
        zip.folder('META-INF').file('container.xml', `<?xml version="1.0" encoding="UTF-8"?><container version="1.0" xmlns="urn:oasis:names:tc:opendocument:xmlns:container"><rootfiles><rootfile full-path="OEBPS/package.opf" media-type="application/oebps-package+xml"/></rootfiles></container>`);
        return zip;
    }

    function getPackageOPF(title, author, images, coverImage) {
        const manifestItems = [];
        if (coverImage) {
            manifestItems.push(`<item id="cover-image" href="images/${coverImage.filename}" media-type="${coverImage.mimetype}" properties="cover-image"/>`);
        }
        images.forEach(img => {
            const isCoverDuplicate = coverImage && (img.originalSrc === coverImage.originalSrc || img.id === coverImage.id);
            if (!isCoverDuplicate) {
                manifestItems.push(`<item id="${img.id}" href="images/${img.filename}" media-type="${img.mimetype}"/>`);
            }
        });
        return `<?xml version="1.0" encoding="UTF-8"?><package xmlns="http://www.idpf.org/2007/opf" version="3.0" unique-identifier="book-id"><metadata xmlns:dc="http://purl.org/dc/elements/1.1/"><dc:identifier id="book-id">urn:uuid:${generateUUID()}</dc:identifier><dc:title>${title}</dc:title><dc:creator>${author}</dc:creator><dc:language>en</dc:language><meta property="dcterms:modified">${new Date().toISOString().replace(/\.\d+Z$/,'Z')}</meta>${coverImage ? '<meta name="cover" content="cover-image"/>' : ''}</metadata><manifest><item id="nav" href="nav.xhtml" media-type="application/xhtml+xml" properties="nav"/><item id="content" href="content.xhtml" media-type="application/xhtml+xml"/><item id="css" href="style.css" media-type="text/css"/>${manifestItems.join('\n    ')}</manifest><spine><itemref idref="content"/></spine></package>`;
    }

    function getNavXHTML(title) { return `<?xml version="1.0" encoding="UTF-8"?><html xmlns="http://www.w3.org/1999/xhtml" xmlns:epub="http://www.idpf.org/2007/ops"><head><title>Navigation</title></head><body><nav epub:type="toc" id="toc"><h1>Table of Contents</h1><ol><li><a href="content.xhtml">${title}</a></li></ol></nav></body></html>`; }
    function getContentXHTML(title, contentHTML) { const finalHTML = contentHTML.replace(/<img src="([^"]+)"/g, (match, src) => `<img src="images/${src}"`); return `<?xml version="1.0" encoding="UTF-8"?><html xmlns="http://www.w3.org/1999/xhtml" xmlns:epub="http://www.idpf.org/2007/ops"><head><title>${title}</title><link href="style.css" rel="stylesheet" type="text/css"/></head><body><h1>${title}</h1>${finalHTML}</body></html>`; }
    function getCSS() { return `body{font-family:Georgia,serif;line-height:1.6;margin:1em;color:#111}h1,h2,h3,h4,h5,h6{font-family:sans-serif;color:#2c3e50;margin-top:1.5em;margin-bottom:.5em;line-height:1.2}h1{font-size:2em;text-align:center;border-bottom:2px solid #3498db;padding-bottom:10px;margin-bottom:1em}p{margin:0 0 1em 0;text-align:justify}img{max-width:100%;height:auto;margin:1em auto;display:block;border-radius:4px}blockquote{border-left:4px solid #3498db;margin:1em 0;padding:.1em 1em;font-style:italic;background-color:#f9f9f9}a{color:#2980b9;text-decoration:none}a:hover{text-decoration:underline}pre,code{background-color:#f4f4f4;padding:.5em;border-radius:4px;font-family:monospace;white-space:pre-wrap;word-wrap:break-word}`; }

    // --- 5. HELPERS & UI FEEDBACK ---

    function showLoadingIndicator(text) { floatingButton.innerHTML = text; floatingButton.disabled = true; floatingButton.style.animation = (text === '⏳') ? 'spin 1.5s linear infinite' : ''; }
    function resetButton() { floatingButton.innerHTML = '📚 EPUB'; floatingButton.disabled = false; floatingButton.style.animation = ''; }

    // Custom message box function
    function showMessageBox(message) {
        const messageBox = document.createElement('div');
        Object.assign(messageBox.style, {
            position: 'fixed',
            top: '50%',
            left: '50%',
            transform: 'translate(-50%, -50%)',
            backgroundColor: 'white',
            padding: '20px',
            borderRadius: '8px',
            boxShadow: '0 4px 12px rgba(0,0,0,0.3)',
            zIndex: '10002',
            textAlign: 'center',
            fontFamily: 'sans-serif',
            color: '#2c3e50',
            maxWidth: '80%',
            wordBreak: 'break-word'
        });
        messageBox.innerHTML = `<p>${message}</p><button style="margin-top: 15px; padding: 8px 15px; background: #3498db; color: white; border: none; border-radius: 4px; cursor: pointer;">OK</button>`;
        document.body.appendChild(messageBox);

        messageBox.querySelector('button').onclick = () => {
            document.body.removeChild(messageBox);
        };

        // Automatically remove after a few seconds if not clicked
        setTimeout(() => {
            if (document.body.contains(messageBox)) {
                document.body.removeChild(messageBox);
            }
        }, 5000); // Remove after 5 seconds
    }

    function decodeHtmlEntities(text) {
        if (!text) return '';
        const textarea = document.createElement('textarea');
        textarea.innerHTML = text;
        return textarea.value;
    }

    /**
     * Sanitizes a string for use as a valid filename using a whitelist.
     * Removes emojis and symbols, but keeps a wide range of Unicode language characters,
     * including combining marks for scripts like Bengali.
     * @param {string} name The decoded, clean string to sanitize.
     * @returns {string} The sanitized string.
     */
    function sanitizeFileName(name) {
        if (!name || typeof name !== 'string') return 'Untitled';

        // Whitelist:
        // \p{L}: Any Unicode letter
        // \p{N}: Any Unicode number
        // \p{M}: Any Unicode combining mark (for diacritics, vowel signs, conjuncts)
        // \s: Whitespace characters
        // -: Hyphen
        // The 'u' flag is crucial for \p{} (Unicode property escapes) to work correctly.
        const invalidCharsRegex = /[^\p{L}\p{N}\p{M}\s-]/gu;

        // 1. Remove all characters NOT on our whitelist (emojis, other symbols, etc.).
        const cleaned = name.replace(invalidCharsRegex, '');

        // 2. Replace multiple spaces or hyphens with a single space and trim.
        const finalName = cleaned.replace(/[\s-]+/g, ' ').trim();

        // 3. Truncate and provide a fallback if the name becomes empty.
        return finalName.substring(0, 150) || 'Untitled';
    }

    /**
     * Sanitizes a string for use as content within the EPUB title.
     * Removes emojis and most symbols, but keeps a wide range of Unicode language characters
     * and common punctuation that might appear in a title.
     * @param {string} text The decoded, clean string to sanitize for title content.
     * @returns {string} The sanitized string.
     */
    function sanitizeContentTitle(text) {
        if (!text) return '';

        // Whitelist for title content:
        // \p{L}: Any Unicode letter
        // \p{N}: Any Unicode number
        // \p{M}: Any Unicode combining mark
        // \s: Whitespace characters
        // Common punctuation that is usually part of a title:
        // .,?!:;'"()[]{}/-_
        // The 'u' flag is crucial for \p{} (Unicode property escapes) to work correctly.
        const invalidCharsRegex = /[^\p{L}\p{N}\p{M}\s.,?!:;'"(){}[\]/\-_]/gu;

        // Remove all characters NOT on our whitelist.
        const cleaned = text.replace(invalidCharsRegex, '');

        // Replace multiple spaces or hyphens with a single space and trim.
        const finalTitle = cleaned.replace(/[\s-]+/g, ' ').trim();

        // Truncate to a reasonable length for titles, and provide a fallback.
        return finalTitle.substring(0, 250) || 'Untitled Article';
    }

    /**
     * Sanitizes text for safe inclusion in XML/XHTML by escaping special XML characters.
     * This should be applied *after* content-level sanitization.
     * @param {string} text The text to sanitize.
     * @returns {string} The sanitized text.
     */
    function sanitizeTextForXML(text) {
        if (!text) return '';
        return text.replace(/&/g, '&amp;')
                   .replace(/</g, '&lt;')
                   .replace(/>/g, '&gt;')
                   .replace(/"/g, '&quot;')
                   .replace(/'/g, '&apos;');
    }

    function generateUUID() { return 'xxxxxxxx-xxxx-4xxx-yxxx-xxxxxxxxxxxx'.replace(/[xy]/g, c => { const r = Math.random() * 16 | 0, v = c === 'x' ? r : (r & 0x3 | 0x8); return v.toString(16); }); }

    function promptForCoverSelection(imageLinks) {
        return new Promise((resolve) => {
            const modal = document.createElement('div');
            modal.style.cssText = `position: fixed; top: 0; left: 0; width: 100%; height: 100%; background: rgba(0,0,0,0.8); z-index: 10001; display: flex; align-items: center; justify-content: center; font-family: sans-serif;`;
            const dialog = document.createElement('div');
            dialog.style.cssText = `background: white; padding: 20px; border-radius: 12px; width: 90%; max-width: 800px; max-height: 80vh; overflow-y: auto; text-align: center; box-shadow: 0 8px 32px rgba(0,0,0,0.3);`;
            dialog.innerHTML = `<h3 style="margin-top: 0; color: #2c3e50;">Select Cover Image</h3><p style="color: #666; margin-bottom: 20px;">Found ${imageLinks.length} potential cover images. Click one to select it.</p><div style="margin-bottom: 20px; text-align: center;">${imageLinks.map(url => `<div style="display: inline-block; margin: 10px; padding: 10px; border: 2px solid #ddd; border-radius: 8px; cursor: pointer; transition: all 0.3s ease;" class="image-option" data-url="${url}"><img src="${url}" style="max-width: 150px; max-height: 150px; display: block; margin-bottom: 5px;" onerror="this.parentElement.style.display='none'"><div style="font-size: 12px; color: #666; word-break: break-all; max-width: 150px;">${url.substring(url.lastIndexOf('/')+1)}</div></div>`).join('')}</div><div style="margin-top: 20px;"><button id="skipSelection" style="padding: 10px 20px; background: #95a5a6; color: white; border: none; border-radius: 6px; cursor: pointer; margin-right: 10px;">Skip & Add Manually</button><button id="cancelSelection" style="padding: 10px 20px; background: #e74c3c; color: white; border: none; border-radius: 6px; cursor: pointer;">No Cover</button></div>`;
            modal.appendChild(dialog);
            document.body.appendChild(modal);
            const cleanup = () => document.body.removeChild(modal);
            dialog.querySelectorAll('.image-option').forEach(option => {
                option.onclick = function() { cleanup(); resolve(this.getAttribute('data-url')); };
                option.onmouseenter = function() { this.style.borderColor = '#3498db'; this.style.backgroundColor = '#f8f9fa'; };
                option.onmouseleave = function() { this.style.borderColor = '#ddd'; this.style.backgroundColor = 'white'; };
            });
            dialog.querySelector('#skipSelection').onclick = () => { cleanup(); resolve(null); };
            dialog.querySelector('#cancelSelection').onclick = () => { cleanup(); resolve(false); };
            modal.onclick = (e) => { if (e.target === modal) { cleanup(); resolve(null); } };
        });
    }

    function promptForCover() {
        return new Promise((resolve) => {
            const modal = document.createElement('div');
            modal.style.cssText = `position: fixed; top: 0; left: 0; width: 100%; height: 100%; background: rgba(0,0,0,0.7); z-index: 10001; display: flex; align-items: center; justify-content: center; font-family: sans-serif;`;
            const dialog = document.createElement('div');
            dialog.style.cssText = `background: white; padding: 30px; border-radius: 12px; width: 90%; max-width: 500px; text-align: center; box-shadow: 0 8px 32px rgba(0,0,0,0.3);`;
            dialog.innerHTML = `<h3 style="margin-top: 0; color: #2c3e50;">Add Cover Manually</h3><p style="color: #666; margin-bottom: 25px;">Enter an image URL or upload a file from your computer.</p><div style="margin-bottom: 20px;"><label for="coverUrl" style="display: block; margin-bottom: 8px; font-weight: bold; text-align: left;">Image URL:</label><input type="url" id="coverUrl" placeholder="https://example.com/image.jpg" style="width: 100%; box-sizing: border-box; padding: 10px; border: 1px solid #ccc; border-radius: 4px;"></div><p style="font-weight: bold; color: #666;">OR</p><div style="margin-bottom: 25px;"><input type="file" id="coverFile" accept="image/*" style="display: none;"><button id="uploadButton" style="width: 100%; padding: 12px; border: 2px dashed #3498db; background: #ecf0f1; color: #2c3e50; border-radius: 4px; cursor: pointer; font-weight: bold;">Choose a Local File</button></div><div><button id="useCover" style="margin-right: 10px; padding: 10px 20px; background: #2c3e50; color: white; border: none; border-radius: 6px; cursor: pointer; font-weight: bold;">Use Cover</button><button id="skipCover" style="padding: 10px 20px; background: #95a5a6; color: white; border: none; border-radius: 6px; cursor: pointer;">Skip</button></div>`;
            modal.appendChild(dialog);
            document.body.appendChild(modal);

            const fileInput = dialog.querySelector('#coverFile');
            const urlInput = dialog.querySelector('#coverUrl');
            const cleanup = () => document.body.removeChild(modal);

            dialog.querySelector('#uploadButton').onclick = () => fileInput.click();
            dialog.querySelector('#skipCover').onclick = () => { cleanup(); resolve(null); };
            modal.onclick = (e) => { if (e.target === modal) { cleanup(); resolve(null); } };

            dialog.querySelector('#useCover').onclick = () => {
                const localFile = fileInput.files.length > 0 ? fileInput.files[0] : null;
                const url = urlInput.value.trim();

                if (localFile) {
                    cleanup();
                    resolve({ blob: localFile });
                } else if (url) {
                    cleanup();
                    resolve({ url: url });
                } else {
                    cleanup();
                    resolve(null);
                }
            };
        });
    }
})();