Pixiv novel to Epub

Download pixiv novels in Epub format

/* eslint-disable no-multi-spaces */
/* eslint-disable no-return-assign */

// ==UserScript==
// @name               Pixiv novel to Epub
// @name:zh-CN         Pixiv小说Epub合成器
// @name:en            Pixiv novel to Epub
// @namespace          PY-DNG userscripts
// @version            0.1.7.1
// @description        Download pixiv novels in Epub format
// @description:zh-CN  以Epub格式下载Pixiv小说
// @description:en     Download pixiv novels in Epub format
// @author             PY-DNG
// @license            GPL-3.0-or-later
// @match              *://www.pixiv.net/*
// @match              *://pixiv.net/*
// @connect            pximg.net
// @require            https://update.greasyfork.org/scripts/456034/1348286/Basic%20Functions%20%28For%20userscripts%29.js
// @require            data:application/javascript,window.setImmediate%20%3D%20window.setImmediate%20%7C%7C%20((f%2C%20...args)%20%3D%3E%20window.setTimeout(()%20%3D%3E%20f(args)%2C%200))%3B
// @require            https://fastly.jsdelivr.net/npm/jszip@3.10.1/dist/jszip.min.js
// @require            https://fastly.jsdelivr.net/npm/ejs@3.1.9/ejs.min.js
// @require            https://fastly.jsdelivr.net/npm/jepub@2.1.4/dist/jepub.min.js
// @icon               
// @grant              GM_xmlhttpRequest
// @grant              GM_registerMenuCommand
// @run-at             document-start
// ==/UserScript==

// @require            https://fastly.jsdelivr.net/npm/setimmediate@1.0.5/setImmediate.min.js
// @require            https://fastly.jsdelivr.net/npm/jepub@2.1.4/dist/jepub.min.js
// @require            https://fastly.jsdelivr.net/npm/ejs@3.1.9/ejs.min.js
// @require            https://fastly.jsdelivr.net/npm/jszip@3.10.1/dist/jszip.min.js

/* global LogLevel DoLog Err $ $All $CrE $AEL $$CrE addStyle detectDom destroyEvent copyProp copyProps parseArgs escJsStr replaceText getUrlArgv dl_browser dl_GM AsyncManager queueTask testChecker registerChecker loadFuncs */
/* global jEpub, JSZip, ejs */

let PixivAPI = (function() {
    queueTask.GM_xmlhttpRequest = {
        sleep: 200,
        max: 10
    };

    return {
        get, safeGet, utils: { toAbsURL, toSearch, queueTask },

        // https://www.pixiv.net/ajax/novel/18673574
        novel: id => safeGet(`/ajax/novel/${id}`),

        // https://www.pixiv.net/ajax/novel/7522350/insert_illusts?id%5B%5D=60139778-1&lang=zh&version=1efff679631a40a674235820806f7431d67065d9
        insert_illusts: (novel_id, illust_ids, lang='zh') => {
            const url = `/ajax/novel/${novel_id}/insert_illusts`;
            const query = { lang };
            if (Array.isArray(illust_ids)) {
                for (let i = 0; i < illust_ids.length; i++) {
                    const id = illust_ids[i];
                    query[`id[${i}]`] = id;
                }
            } else {
                query[`id[]`] = illust_ids;
            }
            return safeGet(url, query);
        },

        // https://www.pixiv.net/ajax/novel/series/9649276?lang=zh&version=a48f2f681629909b885608393916b81989accf5b
        // 'version' removed due to unspecified meaning
        series: (id, lang='zh') => safeGet(`/ajax/novel/series/${id}`, { id, lang }),

        // https://www.pixiv.net/ajax/novel/series_content/9649276?limit=30&last_order=0&order_by=asc
        series_content: (id, limit=30, last_order=0, order_by='asc') => safeGet(`/ajax/novel/series_content/${id}`, { limit, last_order, order_by }),
    };

    function safeGet() {
        return queueTask(() => get.call(this, ...arguments), 'GM_xmlhttpRequest');
    }

    function get(url, params, responseType='json', retry=2) {
        return new Promise((resolve, reject) => {
            GM_xmlhttpRequest({
                method: 'GET', responseType,
                headers: {
                    Referer: /^(www\.)?pixiv\.net$/.test(location.host) ? location.href : 'https://www.pixiv.net/'
                },
                url: toAbsURL(url, params),
                onload: async res => res.status === 200 && (responseType !== 'json' || res.response?.error === false) ? resolve(res.response) : checkRetry(res),
                onerror: checkRetry
            });

            async function checkRetry(err) {
                retry-- > 0 ? resolve(await get(url, params, responseType, retry)) : reject(err);
            }
        });
    }

    function toAbsURL(pathname, searchOptions) {
        return new URL(pathname, `https://www.pixiv.net/`).href + (searchOptions ? `?${toSearch(searchOptions)}` : '');
    }

    function toSearch(options) {
        return new URLSearchParams(options).toString()
    }
}) ();

(async function __MAIN__() {
    'use strict';

    const CONST = {
        TextAllLang: {
            DEFAULT: 'zh-CN',
            'zh-CN': {
                DownloadEpub: '下载当前小说Epub',
                DownloadEpub_Short: '下载Epub',
                DownloadEpub_Progress: 'Epub (C/A)',
                DownloadComplete: 'Epub下载完成',
                RestrictData: {"0":"Enable","1":"NotFound","2":"Mypixiv","3":"R18","4":"R18G","Enable":0,"NotFound":1,"Mypixiv":2,"R18":3,"R18G":4},
                RestrictInfo: {
                    NotFound: "#%(order)は非公開作品です", // No translation provided by pixiv yet
                    Mypixiv: '#%(order)是好P友限定作品',
                    R18: '#%(order)是R-18作品',
                    R18G: '#%(order)是R-18G作品'
                },
                UnvieableTitle: '该章节无法查看', // unused constance, deletable
                UnvieableContent: '此章节Pixiv并未开放查看,请到Pixiv网站或app检查该章节是否设置了阅读限制\n如果是R18/R18G阅读限制,可到Pixiv网站打开R18/R18G开关'
            }
        },
        GFURL: 'https://greasyfork.org/scripts/483999',
        GFAuthorURL: 'https://greasyfork.org/users/667968',
        Symbol: {
            CHAPTER_NOT_VIEWABLE: Symbol('CHAPTER_NOT_VIEWABLE')
        }
    };

    // Init language
    const i18n = Object.keys(CONST.TextAllLang).includes(navigator.language) ? navigator.language : CONST.TextAllLang.DEFAULT;
    CONST.Text = CONST.TextAllLang[i18n];

    // @require fallbacks
    await Promise.all([
        { missing: typeof setImmediate === 'undefined', src: 'https://fastly.jsdelivr.net/npm/setimmediate@1.0.5/setImmediate.min.js' },
        { missing: typeof JSZip === 'undefined', src: 'https://fastly.jsdelivr.net/npm/jszip@3.10.1/dist/jszip.min.js' },
        { missing: typeof ejs === 'undefined', src: 'https://fastly.jsdelivr.net/npm/ejs@3.1.9/ejs.min.js' },
        { missing: typeof jEpub === 'undefined', src: 'https://fastly.jsdelivr.net/npm/jepub@2.1.4/dist/jepub.min.js' }
    ].filter(script => script.missing).map(src => new Promise((resolve, reject) => document.head.appendChild($$CrE({
        tagName: 'script',
        props: { src },
        listeners: [
            ['load', resolve],
            ['error', reject]
        ]
    })))));

    // Progress
    const progress = {
        __finished: true, // {boolean} All tasks finished
        __cur: 0,         // {number}  current task number
        __all: 0,         // {number}  all tasks count
        __listeners: {},
        __id: 0,

        start() {
            this.__finished = false;
            this.__cur = 0;
            this.__all = 0;
            Object.values(this.__listeners).forEach(l => l(this.__cur, this.__all, this.__finished));
        },

        finish() {
            this.__finished = true;
            Object.values(this.__listeners).forEach(l => l(this.__cur, this.__all, this.__finished));
        },

        update(cur, all=false) {
            this.__cur = cur;
            all !== false && (this.__all = all);
            Object.values(this.__listeners).forEach(l => l(cur, all, this.__finished));
        },

        listen(l) {
            const id = this.__id++;
            this.__listeners[id] = l;
            return id;
        },

        remove(id) {
            delete this.__listeners[id]
        },

        get finished() {
            return this.__finished;
        },
        get cur() {
            return this.__cur;
        },
        get all() {
            return this.__all;
        },
        get listeners() {
            return this.__listeners;
        }
    };

    // User Interface
    GM_registerMenuCommand(CONST.Text.DownloadEpub, downloadEpub);
    loadFuncs([{
        func: () => {
            detectDom({
                selector: 'main>section section',
                callback: section => {
                    if (!testChecker({
                        type: 'regpath',
                        value: [
                            /^\/novel\/show\.php$/,
                            /^\/novel\/series\/\d+$/
                        ]
                    })) { return; }

                    const toolbar = section;
                    const dlDiv = makeDownloadButton();
                    toolbar.appendChild(dlDiv);
                }
            });

            function makeDownloadButton() {
                const DOWNLOAD = '<svg class="epub-download-svg" viewBox="0 0 32 32" width="32" height="32">\n            <mask id="mask">\n                <rect x="0" y="0" width="32" height="32" fill="white"></rect>\n                <path d="M21.358 6.7v6.39H27L16 25.7 5 13.09h5.642V6.7z"></path>\n            </mask>\n            <path d="M10.64 5.1c-1.104 0-2 .716-2 1.6v4.8H5c-.745 0-1.428.332-1.773.86s-.294 1.167.133 1.656l11 12.61c.374.43.987.685 1.64.685s1.266-.256 1.64-.685l11-12.61c.426-.49.477-1.127.133-1.656S27.745 11.5 27 11.5h-3.644V6.7c-.001-.883-.895-1.6-2-1.6z" mask="url(#mask)"></path>\n        </svg>';
                const CANCEL = '<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 384 512"><!--!Font Awesome Free 6.5.2 by @fontawesome - https://fontawesome.com License - https://fontawesome.com/license/free Copyright 2024 Fonticons, Inc.--><path d="M342.6 150.6c12.5-12.5 12.5-32.8 0-45.3s-32.8-12.5-45.3 0L192 210.7 86.6 105.4c-12.5-12.5-32.8-12.5-45.3 0s-12.5 32.8 0 45.3L146.7 256 41.4 361.4c-12.5 12.5-12.5 32.8 0 45.3s32.8 12.5 45.3 0L192 301.3 297.4 406.6c12.5 12.5 32.8 12.5 45.3 0s12.5-32.8 0-45.3L237.3 256 342.6 150.6z"/></svg>'
                const STYLE = '.epub-download { margin-right: 20px; line-height: 32px; font-weight: 700; cursor: pointer; padding: 0px; background: none; border: none; } .epub-download-button { display: inline-block; padding: 0; color: inherit; background: none; height: 32px; line-height: 32px; border: none; font-weight: 700; cursor: pointer; } .epub-download-svg { vertical-align: middle; overflow: visible !important; margin-right: 4px; width: 12px; font-size: 0; -webkit-transition: fill .2s; transition: fill .2s; fill: currentColor; } .epub-download-span { vertical-align: middle; }';
                addStyle(STYLE, 'novel-epub-download');

                const div = $$CrE({
                    tagName: 'div',
                    classes: 'epub-download',
                    //listeners: [['click', e => download]]
                });
                const button = $$CrE({
                    tagName: 'button',
                    classes: 'epub-download-button',
                    props: { innerHTML: DOWNLOAD }
                });
                const span = $$CrE({
                    tagName: 'span',
                    classes: 'epub-download-span',
                    props: { innerText: CONST.Text.DownloadEpub_Short }
                });
                $AEL(div, 'click', download);
                button.appendChild(span);
                div.appendChild(button);
                return div;

                function download() {
                    if (!progress.finished) { return; }
                    const listernerID = progress.listen((cur, all, finished) => {
                        if (finished) {
                            span.innerText = CONST.Text.DownloadComplete;
                            progress.remove(listernerID);
                        } else {
                            const text = replaceText(CONST.Text.DownloadEpub_Progress, { C: cur, A: all });
                            span.innerText = text;
                        }
                    })
                    downloadEpub();
                }
            }
        }
    }])

    function downloadEpub() {
        const pathname = location.pathname;

        // Novel series
        // https://www.pixiv.net/novel/series/9649276
        /^\/novel\/series\/\d+$/.test(pathname) && downloadSeries();

        // Novel
        // https://www.pixiv.net/novel/show.php?id=18673574
        /^\/novel\/show\.php$/.test(pathname) && downloadNovel();
    }

    async function downloadSeries() {
        DoLog('Start download series');
        progress.start();

        progress.update(1, 5);
        const id = location.pathname.split('/').pop();
        const epub = new jEpub();

        // Get series data
        const series = (await PixivAPI.series(id)).body;
        await initEpub(epub, series);

        // List all novels
        progress.update(2, 5);
        const promises = [];
        for (let index = 0; index < series.total; index += 30) {
            const promise = PixivAPI.series_content(id, 30, index);
            promises.push(promise);
        }
        const list = (await Promise.all(promises)).reduce((l, json) => ((l.push(...json.body.page.seriesContents), l)), []);
        DoLog(list);

        progress.update(3, 5);
        const novel_datas = await Promise.all(list.map(
            async novel => (
                novel.series.viewableType == 0 ? (
                    (await PixivAPI.novel(novel.id)).body
                ) : {
                    unviewable: CONST.Symbol.CHAPTER_NOT_VIEWABLE,
                    series: novel.series
                }
            )
        ));
        DoLog(novel_datas);

        /* Now loading chapter and adding loaded chapter has been separated, use Promise.all to speed up the process
		// Add chapters one by one
		// Do not use promise.all, because that will break the order
		for (const data of novel_datas) {
			await addChapter(epub, data);
		}
		//await Promise.all(novel_datas.map(async data => await addChapter(epub, data)));
		*/
        // Load all chapters asynchronously and Add them to epub at once
        progress.update(4, 5);
        const chapters = await Promise.all(novel_datas.map(data => loadChapter(epub, data)));
        chapters.forEach(chapter => addLoaded(epub, chapter));
        DoLog(chapters);
        DoLog('Saving Epub');

        progress.update(5, 5);
        saveEpub(epub, series.title + '.epub', () => progress.finish());
    }

    async function downloadNovel() {
        DoLog('Start download novel');
        progress.start();
        progress.update(1, 2);

        const id = getUrlArgv('id');
        const json = await PixivAPI.novel(id);
        const data = json.body;

        const epub = new jEpub();
        await Promise.all([initEpub(epub, data), addChapter(epub, data)]);
        progress.update(2, 2);

        saveEpub(epub, data.title + '.epub', () => progress.finish());
    }

    // Compatible with PixivAPI.novel / PixivAPI.series
    async function initEpub(epub, data) {
        epub.init({
            i18n: 'en',
            title: data.title,
            author: data.userName,
            publisher: '',
            description: data.description || data.caption || '',
            tags: Array.isArray(data.tags) ? data.tags : data.tags.tags.map(tag => tag.tag)
        });
        epub.date(new Date(data.uploadDate || data.lastPublishedContentTimestamp));
        epub.notes(`EPUB generated from: <a href="${htmlEncode(location.href)}" title="${htmlEncode(data.extraData.meta.title)}">${htmlEncode(location.href)}</a></br>By <a href="${htmlEncode(CONST.GFURL)}">${htmlEncode(GM_info.script.name)}</a> author <a href="${htmlEncode(CONST.GFAuthorURL)}">${htmlEncode(GM_info.script.author)}</a></br></br>Copyright belongs to the article author. Please comply with relevant legal requirements while reading and distributing this file.`);

        const coverUrl = data.coverUrl || data.cover.urls.original;
        const cover = await PixivAPI.safeGet(coverUrl, null, 'blob');
        epub.cover(cover);

        return epub;
    }

    // Load chapter assets and generate { title, content } (that ready to epub.add) which is called a 'chapter'
    async function loadChapter(epub, data) {
        if (data?.unviewable === CONST.Symbol.CHAPTER_NOT_VIEWABLE) {
            const texthint = replaceText(CONST.Text.RestrictInfo[CONST.Text.RestrictData[data.series.viewableType]], { '%(order)': data.series.contentOrder });
            return {
                title: texthint,
                content: `<p>${htmlEncode(CONST.Text.UnvieableContent).replace('\n', '<br>')}</p>`
            };
        }

        let content = data.content;

        // Load images
        const imagePromises = [];
        content = content.replace(/\[uploadedimage:([\d\-]+)\]/g, (match_str, id) => {
            const url = data.textEmbeddedImages[id].urls.original;
            const promise = PixivAPI.safeGet(url, null, 'blob').then(blob => epub.image(blob, id));//.catch(err => );
            imagePromises.push(promise);
            return `\n<%= image[${id}] %>\n`;
        });
        const illusts = Array.from(new Set( [...content.matchAll(/\[pixivimage:([\d\-]+)\]/g)] ));
        if (illusts.length) {
            const illustsJson = await PixivAPI.insert_illusts(data.id, illusts.map(match => match[1]));
            illusts.forEach(illust => {
                const id = illust[1];
                if (illustsJson.body[id].visible) {
                    const url = illustsJson.body[id].illust.images.original;
                    const promise = PixivAPI.safeGet(url, null, 'blob').then(blob => epub.image(blob, id));//.catch(err => );
                    imagePromises.push(promise);
                    content = content.replaceAll(illust[0], `\n<%= image[${escJsStr(id)}] %>\n`);
                }
            });
        }
        await Promise.all(imagePromises);

        // Parse '[[rb:久世彩葉 > くぜ いろは]]' // 10618179
        content = content.replace(/\[\[rb:([^\[\]]+) *> *([^\[\]]+)\]\]/g, (match_str, main, desc) => {
            return `<ruby>${htmlEncode(main)}<rp>(</rp><rt>${htmlEncode(desc)}</rt><rp>)</rp></ruby>`;
        });

        // Parse '[chapter:【プロローグ】]' // 21893883
        content = content.replace(/\[chapter: *([^\]]+)\]/g, (match_str, chapterName) => {
            return `<h2>${chapterName}</h2>`;
        });

        // Parse '[[jumpuri:捕虜の待遇に関する千九百四十九年八月十二日のジュネーヴ条約(第三条約)【日本国防衛省ホームページより】 > https://www.mod.go.jp/j/presiding/treaty/geneva/geneva3.html]]' // 19912145#12
        content = content.replace(/\[\[jumpuri:([^\[\]]+) *> *([^\[\]]+)\]\]/g, (match_str, text, url) => {
            return `<a href=${escJsStr(url)}>${htmlEncode(text)}</a>`;
        });

        // Parse '[jump:2]' // 22003928
        content = content.replace(/\[jump:(\d+)\]/g, (match_str, page) => {
            return `<a href=${escJsStr(`#ChapterPage-${page}`)}>Jump to page ${htmlEncode(page)}</a>`;
        });

        // Check undealed markers
        let markers = Array.from(content.matchAll(/\[+[^\[\]]+\]+/g));
        markers = markers.filter(match => {
            // remove dealed images
            const pattern = match.input.substring(match.index-9, match.index + match[0].length+3);
            const isImagePattern = pattern.startsWith('<%= image[') && pattern.endsWith('] %>');

            // remove [newpage]s
            const isNewpagePattern = match[0].includes('[newpage]'); // Why .include: for matches like '[xxx[[newpage]]]blabla]]'
            return !isImagePattern && !isNewpagePattern;
        });
        markers.length && DoLog(LogLevel.Warning, {
            message: 'Undealed markers found',
            chapter: data,
            markers
        });

        // Up to 4 connected newlines (3 empty lines between paragraphs) at once
        content = content.replaceAll(/\n{4,}/g, '\n'.repeat(4));

        // Parse '[newpage]' & Covert into html
        const pageCounter = (start => {
            let num = start;
            return () => start++;
        }) (1);

        content = content.split('[newpage]').map(subContent => {
            // Split content into pages and wrap each page's lines into <p>s
            return subContent.split('\n').map(line => line.trim() ? `<p>${line}</p>` : '<br>').join('\n');
        }).map(pageHTML => {
            const page = pageCounter();
            const page_id = `ChapterPage-${page}`;

            // Remove <br>s at beggining and ending of each page
            pageHTML = pageHTML.replaceAll(/^(<br>|\s)+/g, '').replaceAll(/(<br>|\s)+$/g, '');

            // Add page number to start and end of each page
            const pageNum = `<div class="ChapterBlockMarker">Page ${page}</div>`;
            pageHTML = `${pageNum}\n${pageHTML}\n${pageNum}`;

            // Wrap each page's html in <div id=pageID>
            return `<div id=${escJsStr(page_id)} class="ChapterContentBlock">\n${pageHTML}\n</div>`;
        }).join('\n');

        // Add description to chapter beginning
        let description = data.description;
        description = description
            .replace(/(<br \/>)+/g, '<br>').split('<br>')
            .filter(line => line.trim().length)
            .map(line => `<p>${line}</p>`)
            .join('\n');
        description = `<div id="ChapterDescription" class="ChapterContentBlock">${description}</div>\n`;
        content = description + content;

        // Add cover image to chapter beginning
        const cover = await PixivAPI.safeGet(data.coverUrl, null, 'blob');
        const coverId = `ChapterCover-${data.id}`;
        epub.image(cover, coverId);
        content = `\n<%= image[${escJsStr(coverId)}] %>\n` + content;

        // Add style
        content = '<style>.ChapterContentBlock { border-bottom: solid; padding: 1em 0; } .ChapterBlockMarker { font-size: 1em; text-align: right; }</style>' + content;

        return {
            title: data.title,
            content
        };
    }

    function addLoaded(epub, chapter) {
        epub.add(chapter.title, chapter.content);
    }

    async function addChapter(epub, data) {
        const chapter = await loadChapter(epub, data);
        addLoaded(epub, chapter);
    }

    async function saveEpub(epub, filename, callback=function() {}) {
        const blob = await epub.generate('blob');
        const url = URL.createObjectURL(blob);
        dl_browser(url, filename);
        setTimeout(() => {
            URL.revokeObjectURL(url);
            callback();
        });
    }

    function htmlEncode(text, encodes = '<>\'";&#') {
        return Array.from(text).map(char => !encodes || encodes.includes(char) ? `&#${char.charCodeAt(0)};` : char).join('');
    }

    // Pixiv's js hooked original EventTarget.prototype.addEventListener, using this function to bypass
    function $AEL(elm, ...args) {
        if (!$AEL.addEventListener) {
            const ifr = $$CrE({
                tagName: 'iframe',
                styles: {
                    border: 'none',
                    padding: 'none',
                    width: '0',
                    height: '0',
                    'z-index': '-9999999',
                },
                props: {
                    'srcdoc': '<html></html>'
                }
            });
            document.body.appendChild(ifr);
            $AEL.addEventListener = ifr.contentWindow.EventTarget.prototype.addEventListener;
        }
        return $AEL.addEventListener.apply(elm, args);
    }
})();