// ==UserScript==
// @name Webpage to EPUB Converter
// @namespace http://tampermonkey.net/
// @version 5.7
// @description Converts webpage articles to EPUB
// @author Gemini/You
// @match *://*/*
// @grant GM_xmlhttpRequest
// @grant GM_registerMenuCommand
// @require https://cdnjs.cloudflare.com/ajax/libs/jszip/3.10.1/jszip.min.js
// @require https://cdnjs.cloudflare.com/ajax/libs/FileSaver.js/2.0.5/FileSaver.min.js
// @require https://unpkg.com/@mozilla/[email protected]/Readability.js
// @run-at document-idle
// @license MIT
// ==/UserScript==
(function () {
'use strict';
// --- 1. UI CREATION ---
const floatingButton = document.createElement('div');
floatingButton.innerHTML = '📚 EPUB';
Object.assign(floatingButton.style, {
position: 'fixed', bottom: '20px', right: '20px',
width: '60px', height: '60px', backgroundColor: '#2c3e50',
color: 'white', borderRadius: '50%', display: 'flex',
alignItems: 'center', justifyContent: 'center', cursor: 'pointer',
zIndex: '10000', fontSize: '12px', fontWeight: 'bold',
boxShadow: '0 4px 12px rgba(0,0,0,0.3)', transition: 'all 0.3s ease'
});
floatingButton.addEventListener('mouseenter', () => {
if (!floatingButton.disabled) {
floatingButton.style.transform = 'scale(1.1)';
floatingButton.style.backgroundColor = '#34495e';
}
});
floatingButton.addEventListener('mouseleave', () => {
floatingButton.style.transform = 'scale(1)';
floatingButton.style.backgroundColor = '#2c3e50';
});
const styleSheet = document.createElement("style");
styleSheet.textContent = `@keyframes spin { 0% { transform: rotate(0deg); } 100% { transform: rotate(360deg); } }`;
document.head.appendChild(styleSheet);
document.body.appendChild(floatingButton);
floatingButton.addEventListener('click', generateEPUB);
// Register a menu command for Tampermonkey
// This allows the user to trigger the EPUB generation directly from the Tampermonkey menu
// without needing to click the floating button on the page.
if (typeof GM_registerMenuCommand !== 'undefined') {
GM_registerMenuCommand("Generate EPUB from Current Page", generateEPUB);
}
// --- 2. EPUB GENERATION LOGIC ---
async function generateEPUB() {
if (floatingButton.disabled) return;
showLoadingIndicator('⏳');
try {
const article = extractArticle();
if (!article) {
// Using a custom message box instead of alert()
showMessageBox("Could not extract any article content from this page.");
throw new Error("Readability extraction failed.");
}
const rawTitle = article.title || 'Untitled Article';
const cleanTitle = decodeHtmlEntities(rawTitle);
const fileName = sanitizeFileName(cleanTitle); // Filename sanitization
// Apply content title sanitization BEFORE XML escaping
const contentSanitizedTitle = sanitizeContentTitle(cleanTitle);
const epubTitle = sanitizeTextForXML(contentSanitizedTitle); // EPUB internal title sanitization
const author = sanitizeTextForXML(article.byline) || window.location.hostname;
const contentDiv = processContent(article.content);
const images = await extractImagesFromContent(contentDiv);
let coverImage = null;
let userSelectedCover = false;
const detectedImageLinks = findAllImageLinks(document.body);
if (detectedImageLinks.length > 0) {
const selectedCover = await promptForCoverSelection(detectedImageLinks);
if (selectedCover === false) {
userSelectedCover = true;
} else if (selectedCover) {
userSelectedCover = true;
const coverData = await fetchImage(selectedCover);
if (coverData) {
coverImage = {
id: 'cover-image',
filename: 'cover.' + (coverData.blob.type.split('/')[1] || 'jpg'),
mimetype: coverData.blob.type,
blob: coverData.blob,
originalSrc: selectedCover
};
const isCoverInContent = images.some(img => img.originalSrc === selectedCover);
if (!isCoverInContent) {
images.unshift({ ...coverImage, id: 'img_0' });
}
} else {
showMessageBox('Failed to load the selected cover image. Proceeding without cover.');
}
}
}
if (!userSelectedCover && !coverImage) {
const userCover = await promptForCover();
if (userCover) {
let coverData = null;
if (userCover.blob) {
coverData = { blob: userCover.blob };
} else if (userCover.url) {
coverData = await fetchImage(userCover.url);
}
if (coverData && coverData.blob) {
coverImage = {
id: 'cover-image',
filename: 'cover.' + (coverData.blob.type.split('/')[1] || 'jpg'),
mimetype: coverData.blob.type,
blob: coverData.blob
};
images.unshift({ ...coverImage, id: 'img_0' });
}
}
}
const zip = await createEpubZip(epubTitle, author, contentDiv.innerHTML, images, coverImage);
const blob = await zip.generateAsync({ type: 'blob' });
saveAs(blob, `${fileName}.epub`);
showLoadingIndicator('Done!');
} catch (error) {
console.error('EPUB Generation Error:', error);
showMessageBox(`Failed to create EPUB: ${error.message}`);
showLoadingIndicator('❌');
} finally {
setTimeout(() => { resetButton(); }, 2000);
}
}
// --- 3. CONTENT & IMAGE HANDLING ---
function findAllImageLinks(container) {
const imageLinks = [];
const seenUrls = new Set();
function hasImageExtension(url) {
if (!url) return false;
try {
const pathname = new URL(url, window.location.href).pathname.toLowerCase();
return /\.(jpg|jpeg|png|gif|webp|bmp|svg|tiff|ico)(\?.*)?$/i.test(pathname);
} catch (e) {
return /\.(jpg|jpeg|png|gif|webp|bmp|svg|tiff|ico)(\?.*)?$/i.test(url);
}
}
container.querySelectorAll('a[href]').forEach(link => {
if (hasImageExtension(link.href) && !seenUrls.has(link.href)) {
imageLinks.push(link.href);
seenUrls.add(link.href);
}
});
container.querySelectorAll('img[src]').forEach(img => {
if (img.src && !img.src.startsWith('data:') && !seenUrls.has(img.src)) {
imageLinks.push(img.src);
seenUrls.add(img.src);
}
});
document.querySelectorAll('meta[property="og:image"], meta[name="twitter:image"], meta[property="twitter:image"]').forEach(meta => {
const content = meta.getAttribute('content');
if (content && !seenUrls.has(content)) {
imageLinks.push(content);
seenUrls.add(content);
}
});
return imageLinks;
}
function extractArticle() {
const docClone = document.cloneNode(true);
const reader = new Readability(docClone);
return reader.parse();
}
function processContent(htmlContent) {
const contentDiv = document.createElement('div');
contentDiv.innerHTML = htmlContent;
contentDiv.querySelectorAll('img').forEach(img => {
if (img.src) {
try {
img.src = new URL(img.src, window.location.href).href;
} catch (e) {
console.warn("Invalid image src, removing:", img.src);
img.remove();
}
}
});
return contentDiv;
}
async function extractImagesFromContent(contentDiv) {
const images = [];
const imgElements = Array.from(contentDiv.querySelectorAll('img'));
let imageCounter = 1;
for (const img of imgElements) {
if (!img.src || img.src.startsWith('data:')) continue;
const imageData = await fetchImage(img.src);
if (imageData) {
const ext = imageData.blob.type.split('/')[1] || 'jpg';
const filename = `image_${imageCounter}.${ext}`;
images.push({
id: `img_${imageCounter}`,
filename,
mimetype: imageData.blob.type,
blob: imageData.blob,
originalSrc: img.src
});
img.src = filename;
imageCounter++;
} else {
img.remove();
}
}
return images;
}
function fetchImage(url) {
return new Promise((resolve) => {
try {
const absUrl = new URL(url, window.location.href).href;
GM_xmlhttpRequest({
method: 'GET', url: absUrl, responseType: 'blob',
onload: function(response) {
if (response.status >= 200 && response.status < 300 && response.response && response.response.type.startsWith('image/')) {
resolve({ blob: response.response });
} else {
console.warn("GM_xmlhttpRequest failed or resource is not an image:", absUrl, "Status:", response.status);
resolve(null);
}
},
onerror: (error) => { console.warn("GM_xmlhttpRequest error:", absUrl, error); resolve(null); },
ontimeout: () => { console.warn("GM_xmlhttpRequest timeout:", absUrl); resolve(null); }
});
} catch (err) {
console.warn("Image fetch failed (pre-request error):", url, err);
resolve(null);
}
});
}
// --- 4. EPUB ZIP CREATION ---
async function createEpubZip(title, author, contentHTML, images, coverImage) {
const zip = new JSZip();
zip.file('mimetype', 'application/epub+zip', { compression: 'STORE' });
const oebps = zip.folder('OEBPS');
const imageFolder = oebps.folder('images');
for (const img of images) {
const isCover = coverImage && (img.originalSrc === coverImage.originalSrc || img.id === coverImage.id);
const filename = isCover ? coverImage.filename : img.filename;
imageFolder.file(filename, img.blob);
}
oebps.file('style.css', getCSS());
oebps.file('content.xhtml', getContentXHTML(title, contentHTML));
oebps.file('nav.xhtml', getNavXHTML(title));
oebps.file('package.opf', getPackageOPF(title, author, images, coverImage));
zip.folder('META-INF').file('container.xml', `<?xml version="1.0" encoding="UTF-8"?><container version="1.0" xmlns="urn:oasis:names:tc:opendocument:xmlns:container"><rootfiles><rootfile full-path="OEBPS/package.opf" media-type="application/oebps-package+xml"/></rootfiles></container>`);
return zip;
}
function getPackageOPF(title, author, images, coverImage) {
const manifestItems = [];
if (coverImage) {
manifestItems.push(`<item id="cover-image" href="images/${coverImage.filename}" media-type="${coverImage.mimetype}" properties="cover-image"/>`);
}
images.forEach(img => {
const isCoverDuplicate = coverImage && (img.originalSrc === coverImage.originalSrc || img.id === coverImage.id);
if (!isCoverDuplicate) {
manifestItems.push(`<item id="${img.id}" href="images/${img.filename}" media-type="${img.mimetype}"/>`);
}
});
return `<?xml version="1.0" encoding="UTF-8"?><package xmlns="http://www.idpf.org/2007/opf" version="3.0" unique-identifier="book-id"><metadata xmlns:dc="http://purl.org/dc/elements/1.1/"><dc:identifier id="book-id">urn:uuid:${generateUUID()}</dc:identifier><dc:title>${title}</dc:title><dc:creator>${author}</dc:creator><dc:language>en</dc:language><meta property="dcterms:modified">${new Date().toISOString().replace(/\.\d+Z$/,'Z')}</meta>${coverImage ? '<meta name="cover" content="cover-image"/>' : ''}</metadata><manifest><item id="nav" href="nav.xhtml" media-type="application/xhtml+xml" properties="nav"/><item id="content" href="content.xhtml" media-type="application/xhtml+xml"/><item id="css" href="style.css" media-type="text/css"/>${manifestItems.join('\n ')}</manifest><spine><itemref idref="content"/></spine></package>`;
}
function getNavXHTML(title) { return `<?xml version="1.0" encoding="UTF-8"?><html xmlns="http://www.w3.org/1999/xhtml" xmlns:epub="http://www.idpf.org/2007/ops"><head><title>Navigation</title></head><body><nav epub:type="toc" id="toc"><h1>Table of Contents</h1><ol><li><a href="content.xhtml">${title}</a></li></ol></nav></body></html>`; }
function getContentXHTML(title, contentHTML) { const finalHTML = contentHTML.replace(/<img src="([^"]+)"/g, (match, src) => `<img src="images/${src}"`); return `<?xml version="1.0" encoding="UTF-8"?><html xmlns="http://www.w3.org/1999/xhtml" xmlns:epub="http://www.idpf.org/2007/ops"><head><title>${title}</title><link href="style.css" rel="stylesheet" type="text/css"/></head><body><h1>${title}</h1>${finalHTML}</body></html>`; }
function getCSS() { return `body{font-family:Georgia,serif;line-height:1.6;margin:1em;color:#111}h1,h2,h3,h4,h5,h6{font-family:sans-serif;color:#2c3e50;margin-top:1.5em;margin-bottom:.5em;line-height:1.2}h1{font-size:2em;text-align:center;border-bottom:2px solid #3498db;padding-bottom:10px;margin-bottom:1em}p{margin:0 0 1em 0;text-align:justify}img{max-width:100%;height:auto;margin:1em auto;display:block;border-radius:4px}blockquote{border-left:4px solid #3498db;margin:1em 0;padding:.1em 1em;font-style:italic;background-color:#f9f9f9}a{color:#2980b9;text-decoration:none}a:hover{text-decoration:underline}pre,code{background-color:#f4f4f4;padding:.5em;border-radius:4px;font-family:monospace;white-space:pre-wrap;word-wrap:break-word}`; }
// --- 5. HELPERS & UI FEEDBACK ---
function showLoadingIndicator(text) { floatingButton.innerHTML = text; floatingButton.disabled = true; floatingButton.style.animation = (text === '⏳') ? 'spin 1.5s linear infinite' : ''; }
function resetButton() { floatingButton.innerHTML = '📚 EPUB'; floatingButton.disabled = false; floatingButton.style.animation = ''; }
// Custom message box function
function showMessageBox(message) {
const messageBox = document.createElement('div');
Object.assign(messageBox.style, {
position: 'fixed',
top: '50%',
left: '50%',
transform: 'translate(-50%, -50%)',
backgroundColor: 'white',
padding: '20px',
borderRadius: '8px',
boxShadow: '0 4px 12px rgba(0,0,0,0.3)',
zIndex: '10002',
textAlign: 'center',
fontFamily: 'sans-serif',
color: '#2c3e50',
maxWidth: '80%',
wordBreak: 'break-word'
});
messageBox.innerHTML = `<p>${message}</p><button style="margin-top: 15px; padding: 8px 15px; background: #3498db; color: white; border: none; border-radius: 4px; cursor: pointer;">OK</button>`;
document.body.appendChild(messageBox);
messageBox.querySelector('button').onclick = () => {
document.body.removeChild(messageBox);
};
// Automatically remove after a few seconds if not clicked
setTimeout(() => {
if (document.body.contains(messageBox)) {
document.body.removeChild(messageBox);
}
}, 5000); // Remove after 5 seconds
}
function decodeHtmlEntities(text) {
if (!text) return '';
const textarea = document.createElement('textarea');
textarea.innerHTML = text;
return textarea.value;
}
/**
* Sanitizes a string for use as a valid filename using a whitelist.
* Removes emojis and symbols, but keeps a wide range of Unicode language characters,
* including combining marks for scripts like Bengali.
* @param {string} name The decoded, clean string to sanitize.
* @returns {string} The sanitized string.
*/
function sanitizeFileName(name) {
if (!name || typeof name !== 'string') return 'Untitled';
// Whitelist:
// \p{L}: Any Unicode letter
// \p{N}: Any Unicode number
// \p{M}: Any Unicode combining mark (for diacritics, vowel signs, conjuncts)
// \s: Whitespace characters
// -: Hyphen
// The 'u' flag is crucial for \p{} (Unicode property escapes) to work correctly.
const invalidCharsRegex = /[^\p{L}\p{N}\p{M}\s-]/gu;
// 1. Remove all characters NOT on our whitelist (emojis, other symbols, etc.).
const cleaned = name.replace(invalidCharsRegex, '');
// 2. Replace multiple spaces or hyphens with a single space and trim.
const finalName = cleaned.replace(/[\s-]+/g, ' ').trim();
// 3. Truncate and provide a fallback if the name becomes empty.
return finalName.substring(0, 150) || 'Untitled';
}
/**
* Sanitizes a string for use as content within the EPUB title.
* Removes emojis and most symbols, but keeps a wide range of Unicode language characters
* and common punctuation that might appear in a title.
* @param {string} text The decoded, clean string to sanitize for title content.
* @returns {string} The sanitized string.
*/
function sanitizeContentTitle(text) {
if (!text) return '';
// Whitelist for title content:
// \p{L}: Any Unicode letter
// \p{N}: Any Unicode number
// \p{M}: Any Unicode combining mark
// \s: Whitespace characters
// Common punctuation that is usually part of a title:
// .,?!:;'"()[]{}/-_
// The 'u' flag is crucial for \p{} (Unicode property escapes) to work correctly.
const invalidCharsRegex = /[^\p{L}\p{N}\p{M}\s.,?!:;'"(){}[\]/\-_]/gu;
// Remove all characters NOT on our whitelist.
const cleaned = text.replace(invalidCharsRegex, '');
// Replace multiple spaces or hyphens with a single space and trim.
const finalTitle = cleaned.replace(/[\s-]+/g, ' ').trim();
// Truncate to a reasonable length for titles, and provide a fallback.
return finalTitle.substring(0, 250) || 'Untitled Article';
}
/**
* Sanitizes text for safe inclusion in XML/XHTML by escaping special XML characters.
* This should be applied *after* content-level sanitization.
* @param {string} text The text to sanitize.
* @returns {string} The sanitized text.
*/
function sanitizeTextForXML(text) {
if (!text) return '';
return text.replace(/&/g, '&')
.replace(/</g, '<')
.replace(/>/g, '>')
.replace(/"/g, '"')
.replace(/'/g, ''');
}
function generateUUID() { return 'xxxxxxxx-xxxx-4xxx-yxxx-xxxxxxxxxxxx'.replace(/[xy]/g, c => { const r = Math.random() * 16 | 0, v = c === 'x' ? r : (r & 0x3 | 0x8); return v.toString(16); }); }
function promptForCoverSelection(imageLinks) {
return new Promise((resolve) => {
const modal = document.createElement('div');
modal.style.cssText = `position: fixed; top: 0; left: 0; width: 100%; height: 100%; background: rgba(0,0,0,0.8); z-index: 10001; display: flex; align-items: center; justify-content: center; font-family: sans-serif;`;
const dialog = document.createElement('div');
dialog.style.cssText = `background: white; padding: 20px; border-radius: 12px; width: 90%; max-width: 800px; max-height: 80vh; overflow-y: auto; text-align: center; box-shadow: 0 8px 32px rgba(0,0,0,0.3);`;
dialog.innerHTML = `<h3 style="margin-top: 0; color: #2c3e50;">Select Cover Image</h3><p style="color: #666; margin-bottom: 20px;">Found ${imageLinks.length} potential cover images. Click one to select it.</p><div style="margin-bottom: 20px; text-align: center;">${imageLinks.map(url => `<div style="display: inline-block; margin: 10px; padding: 10px; border: 2px solid #ddd; border-radius: 8px; cursor: pointer; transition: all 0.3s ease;" class="image-option" data-url="${url}"><img src="${url}" style="max-width: 150px; max-height: 150px; display: block; margin-bottom: 5px;" onerror="this.parentElement.style.display='none'"><div style="font-size: 12px; color: #666; word-break: break-all; max-width: 150px;">${url.substring(url.lastIndexOf('/')+1)}</div></div>`).join('')}</div><div style="margin-top: 20px;"><button id="skipSelection" style="padding: 10px 20px; background: #95a5a6; color: white; border: none; border-radius: 6px; cursor: pointer; margin-right: 10px;">Skip & Add Manually</button><button id="cancelSelection" style="padding: 10px 20px; background: #e74c3c; color: white; border: none; border-radius: 6px; cursor: pointer;">No Cover</button></div>`;
modal.appendChild(dialog);
document.body.appendChild(modal);
const cleanup = () => document.body.removeChild(modal);
dialog.querySelectorAll('.image-option').forEach(option => {
option.onclick = function() { cleanup(); resolve(this.getAttribute('data-url')); };
option.onmouseenter = function() { this.style.borderColor = '#3498db'; this.style.backgroundColor = '#f8f9fa'; };
option.onmouseleave = function() { this.style.borderColor = '#ddd'; this.style.backgroundColor = 'white'; };
});
dialog.querySelector('#skipSelection').onclick = () => { cleanup(); resolve(null); };
dialog.querySelector('#cancelSelection').onclick = () => { cleanup(); resolve(false); };
modal.onclick = (e) => { if (e.target === modal) { cleanup(); resolve(null); } };
});
}
function promptForCover() {
return new Promise((resolve) => {
const modal = document.createElement('div');
modal.style.cssText = `position: fixed; top: 0; left: 0; width: 100%; height: 100%; background: rgba(0,0,0,0.7); z-index: 10001; display: flex; align-items: center; justify-content: center; font-family: sans-serif;`;
const dialog = document.createElement('div');
dialog.style.cssText = `background: white; padding: 30px; border-radius: 12px; width: 90%; max-width: 500px; text-align: center; box-shadow: 0 8px 32px rgba(0,0,0,0.3);`;
dialog.innerHTML = `<h3 style="margin-top: 0; color: #2c3e50;">Add Cover Manually</h3><p style="color: #666; margin-bottom: 25px;">Enter an image URL or upload a file from your computer.</p><div style="margin-bottom: 20px;"><label for="coverUrl" style="display: block; margin-bottom: 8px; font-weight: bold; text-align: left;">Image URL:</label><input type="url" id="coverUrl" placeholder="https://example.com/image.jpg" style="width: 100%; box-sizing: border-box; padding: 10px; border: 1px solid #ccc; border-radius: 4px;"></div><p style="font-weight: bold; color: #666;">OR</p><div style="margin-bottom: 25px;"><input type="file" id="coverFile" accept="image/*" style="display: none;"><button id="uploadButton" style="width: 100%; padding: 12px; border: 2px dashed #3498db; background: #ecf0f1; color: #2c3e50; border-radius: 4px; cursor: pointer; font-weight: bold;">Choose a Local File</button></div><div><button id="useCover" style="margin-right: 10px; padding: 10px 20px; background: #2c3e50; color: white; border: none; border-radius: 6px; cursor: pointer; font-weight: bold;">Use Cover</button><button id="skipCover" style="padding: 10px 20px; background: #95a5a6; color: white; border: none; border-radius: 6px; cursor: pointer;">Skip</button></div>`;
modal.appendChild(dialog);
document.body.appendChild(modal);
const fileInput = dialog.querySelector('#coverFile');
const urlInput = dialog.querySelector('#coverUrl');
const cleanup = () => document.body.removeChild(modal);
dialog.querySelector('#uploadButton').onclick = () => fileInput.click();
dialog.querySelector('#skipCover').onclick = () => { cleanup(); resolve(null); };
modal.onclick = (e) => { if (e.target === modal) { cleanup(); resolve(null); } };
dialog.querySelector('#useCover').onclick = () => {
const localFile = fileInput.files.length > 0 ? fileInput.files[0] : null;
const url = urlInput.value.trim();
if (localFile) {
cleanup();
resolve({ blob: localFile });
} else if (url) {
cleanup();
resolve({ url: url });
} else {
cleanup();
resolve(null);
}
};
});
}
})();