Pixiv novel to Epub

Download pixiv novels in Epub format

/* eslint-disable no-multi-spaces */
/* eslint-disable no-return-assign */

// ==UserScript==
// @name               Pixiv novel to Epub
// @name:zh-CN         Pixiv小说Epub合成器
// @name:en            Pixiv novel to Epub
// @namespace          PY-DNG userscripts
// @version            0.1.5
// @description        Download pixiv novels in Epub format
// @description:zh-CN  以Epub格式下载Pixiv小说
// @description:en     Download pixiv novels in Epub format
// @author             PY-DNG
// @license            GPL-3.0-or-later
// @match              *://www.pixiv.net/*
// @match              *://pixiv.net/*
// @connect            pximg.net
// @require            https://update.greasyfork.org/scripts/456034/1303041/Basic%20Functions%20%28For%20userscripts%29.js
// @require            data:application/javascript,window.setImmediate%20%3D%20window.setImmediate%20%7C%7C%20((f%2C%20...args)%20%3D%3E%20window.setTimeout(()%20%3D%3E%20f(args)%2C%200))%3B
// @require            https://fastly.jsdelivr.net/npm/jszip@3.10.1/dist/jszip.min.js
// @require            https://fastly.jsdelivr.net/npm/ejs@3.1.9/ejs.min.js
// @require            https://fastly.jsdelivr.net/npm/jepub@2.1.4/dist/jepub.min.js
// @icon               
// @grant              GM_xmlhttpRequest
// @grant              GM_registerMenuCommand
// @run-at             document-start
// ==/UserScript==

// @require            https://fastly.jsdelivr.net/npm/setimmediate@1.0.5/setImmediate.min.js
// @require            https://fastly.jsdelivr.net/npm/jepub@2.1.4/dist/jepub.min.js
// @require            https://fastly.jsdelivr.net/npm/ejs@3.1.9/ejs.min.js
// @require            https://fastly.jsdelivr.net/npm/jszip@3.10.1/dist/jszip.min.js

/* global LogLevel DoLog Err $ $All $CrE $AEL $$CrE addStyle detectDom destroyEvent copyProp copyProps parseArgs escJsStr replaceText getUrlArgv dl_browser dl_GM AsyncManager */
/* global jEpub, JSZip, ejs */

let PixivAPI = (function() {
	queueTask.sleep = 200;
	queueTask.max = 10;

	return {
		get, safeGet, utils: { toAbsURL, toSearch, queueTask },

		// https://www.pixiv.net/ajax/novel/18673574
		novel: id => safeGet(`/ajax/novel/${id}`),

		// https://www.pixiv.net/ajax/novel/7522350/insert_illusts?id%5B%5D=60139778-1&lang=zh&version=1efff679631a40a674235820806f7431d67065d9
		insert_illusts: (novel_id, illust_ids, lang='zh') => {
			const url = `/ajax/novel/${novel_id}/insert_illusts`;
			const query = { lang };
			if (Array.isArray(illust_ids)) {
				for (let i = 0; i < illust_ids.length; i++) {
					const id = illust_ids[i];
					query[`id[${i}]`] = id;
				}
			} else {
				query[`id[]`] = illust_ids;
			}
			return safeGet(url, query);
		},

		// https://www.pixiv.net/ajax/novel/series/9649276?lang=zh&version=a48f2f681629909b885608393916b81989accf5b
		// 'version' removed due to unspecified meaning
		series: (id, lang='zh') => safeGet(`/ajax/novel/series/${id}`, { id, lang }),

		// https://www.pixiv.net/ajax/novel/series_content/9649276?limit=30&last_order=0&order_by=asc
		series_content: (id, limit=30, last_order=0, order_by='asc') => safeGet(`/ajax/novel/series_content/${id}`, { limit, last_order, order_by }),
	};

	function safeGet() {
		return queueTask(() => get.call(this, ...arguments));
	}

	function get(url, params, responseType='json', retry=2) {
		return new Promise((resolve, reject) => {
			GM_xmlhttpRequest({
				method: 'GET', responseType,
				headers: {
					Referer: /^(www\.)?pixiv\.net$/.test(location.host) ? location.href : 'https://www.pixiv.net/'
				},
				url: toAbsURL(url, params),
				onload: async res => res.status === 200 && (responseType !== 'json' || res.response?.error === false) ? resolve(res.response) : checkRetry(res),
				onerror: checkRetry
			});

			async function checkRetry(err) {
				retry-- > 0 ? resolve(await get(url, params, responseType, retry)) : reject(err);
			}
		});
	}

	function toAbsURL(pathname, searchOptions) {
		return new URL(pathname, `https://www.pixiv.net/`).href + (searchOptions ? `?${toSearch(searchOptions)}` : '');
	}

	function toSearch(options) {
		return new URLSearchParams(options).toString()
	}

	function queueTask(task) {
		init();

		return new Promise((resolve, reject) => {
			queueTask.tasks.push({task, resolve, reject});
			checkTask();
		});

		function checkTask() {
			setTimeout(() => {
				if (queueTask.ongoing < queueTask.max && queueTask.tasks.length) {
					const task = queueTask.tasks.shift();
					queueTask.ongoing++;
					setTimeout(
						() => task.task().then(v => {
							queueTask.ongoing--;
							task.resolve(v);
							checkTask();
						}).catch(e => {
							queueTask.ongoing--;
							task.reject(e);
							checkTask();
						}),
						queueTask.sleep
					);
				}
			});
		}

		function init() {
			if (!queueTask.initialized) {
				const defaults = {
					max: 3,
					sleep: 500,
				};
				for (const [p, v] of Object.entries(defaults)) {
					!queueTask.hasOwnProperty(p) && (queueTask[p] = v);
				}

				queueTask.tasks = [];
				queueTask.ongoing = 0;
				queueTask.initialized = true;
			}
		}
	}
}) ();

(async function __MAIN__() {
    'use strict';

	const CONST = {
		TextAllLang: {
			DEFAULT: 'zh-CN',
			'zh-CN': {
				DownloadEpub: '下载当前小说Epub'
			}
		},
		GFURL: 'https://greasyfork.org/scripts/483999',
		GFAuthorURL: 'https://greasyfork.org/users/667968'
	};

	// Init language
	const i18n = Object.keys(CONST.TextAllLang).includes(navigator.language) ? navigator.language : CONST.TextAllLang.DEFAULT;
	CONST.Text = CONST.TextAllLang[i18n];

	// @require fallbacks
	await Promise.all([
		{ missing: typeof setImmediate === 'undefined', src: 'https://fastly.jsdelivr.net/npm/setimmediate@1.0.5/setImmediate.min.js' },
		{ missing: typeof JSZip === 'undefined', src: 'https://fastly.jsdelivr.net/npm/jszip@3.10.1/dist/jszip.min.js' },
		{ missing: typeof ejs === 'undefined', src: 'https://fastly.jsdelivr.net/npm/ejs@3.1.9/ejs.min.js' },
		{ missing: typeof jEpub === 'undefined', src: 'https://fastly.jsdelivr.net/npm/jepub@2.1.4/dist/jepub.min.js' }
	].filter(script => script.missing).map(src => new Promise((resolve, reject) => document.head.appendChild($$CrE({
		tagName: 'script',
		props: { src },
		listeners: [
			['load', resolve],
			['error', reject]
		]
	})))));

	// User Interface
	GM_registerMenuCommand(CONST.Text.DownloadEpub, downloadEpub);

	function downloadEpub() {
		const pathname = location.pathname;

		// Novel series
		// https://www.pixiv.net/novel/series/9649276
		/^\/novel\/series\/\d+$/.test(pathname) && downloadSeries();

		// Novel
		// https://www.pixiv.net/novel/show.php?id=18673574
		/^\/novel\/show\.php$/.test(pathname) && downloadNovel();
	}

	async function downloadSeries() {
		DoLog('Start download series');

		const id = location.pathname.split('/').pop();
		const epub = new jEpub();

		// Get series data
		const series = (await PixivAPI.series(id)).body;
		await initEpub(epub, series);

		// List all novels
		const promises = [];
		for (let index = 0; index < series.total; index += 30) {
			const promise = PixivAPI.series_content(id, 30, index);
			promises.push(promise);
		}
		const list = (await Promise.all(promises)).reduce((l, json) => ((l.push(...json.body.page.seriesContents), l)), []);
		DoLog(list);

		const novel_datas = await Promise.all(list.map(async novel => (await PixivAPI.novel(novel.id)).body));
		DoLog(novel_datas);

		/* Now loading chapter and adding loaded chapter has been separated, use Promise.all to speed up the process
		// Add chapters one by one
		// Do not use promise.all, because that will break the order
		for (const data of novel_datas) {
			await addChapter(epub, data);
		}
		//await Promise.all(novel_datas.map(async data => await addChapter(epub, data)));
		*/
		// Load all chapters asynchronously and Add them to epub at once
		const chapters = await Promise.all(novel_datas.map(data => loadChapter(epub, data)));
		chapters.forEach(chapter => addLoaded(epub, chapter));
		DoLog(chapters);
		DoLog('Saving Epub');

		saveEpub(epub, series.title + '.epub');
	}

	async function downloadNovel() {
		DoLog('Start download novel');

		const id = getUrlArgv('id');
		const json = await PixivAPI.novel(id);
		const data = json.body;

		const epub = new jEpub();
		await Promise.all([initEpub(epub, data), addChapter(epub, data)]);

		saveEpub(epub, data.title + '.epub');
	}

	// Compatible with PixivAPI.novel / PixivAPI.series
	async function initEpub(epub, data) {
		epub.init({
			i18n: 'en',
			title: data.title,
			author: data.userName,
			publisher: '',
			description: data.description || data.caption || '',
			tags: Array.isArray(data.tags) ? data.tags : data.tags.tags.map(tag => tag.tag)
		});
		epub.date(new Date(data.uploadDate || data.lastPublishedContentTimestamp));
		epub.notes(`EPUB generated from: <a href="${htmlEncode(location.href)}" title="${htmlEncode(data.extraData.meta.title)}">${htmlEncode(location.href)}</a></br>By <a href="${htmlEncode(CONST.GFURL)}">${htmlEncode(GM_info.script.name)}</a> author <a href="${htmlEncode(CONST.GFAuthorURL)}">${htmlEncode(GM_info.script.author)}</a></br></br>Copyright belongs to the article author. Please comply with relevant legal requirements while reading and distributing this file.`);

		const coverUrl = data.coverUrl || data.cover.urls.original;
		const cover = await PixivAPI.safeGet(coverUrl, null, 'blob');
		epub.cover(cover);

		return epub;
	}

	// Load chapter assets and generate { title, content } (that ready to epub.add) which is called a 'chapter'
	async function loadChapter(epub, data) {
		let content = data.content;

		// Add description and cover image to chapter beginning
		const cover = await PixivAPI.safeGet(data.coverUrl, null, 'blob');
		const coverId = `ChapterCover-${data.id}`;
		epub.image(cover, coverId);
		content = data.description + `\n<%= image[${escJsStr(coverId)}] %>\n` + content;

		// Load images
		const imagePromises = [];
		content = content.replace(/\[uploadedimage:([\d\-]+)\]/g, (match_str, id) => {
			const url = data.textEmbeddedImages[id].urls.original;
			const promise = PixivAPI.safeGet(url, null, 'blob').then(blob => epub.image(blob, id));//.catch(err => );
			imagePromises.push(promise);
			return `\n<%= image[${id}] %>\n`;
		});
		const illusts = Array.from(new Set( [...content.matchAll(/\[pixivimage:([\d\-]+)\]/g)] ));
		if (illusts.length) {
			const illustsJson = await PixivAPI.insert_illusts(data.id, illusts.map(match => match[1]));
			illusts.forEach(illust => {
				const id = illust[1];
				if (illustsJson.body[id].visible) {
					const url = illustsJson.body[id].illust.images.original;
					const promise = PixivAPI.safeGet(url, null, 'blob').then(blob => epub.image(blob, id));//.catch(err => );
					imagePromises.push(promise);
					content = content.replaceAll(illust[0], `\n<%= image[${escJsStr(id)}] %>\n`);
				}
			});
		}
		await Promise.all(imagePromises);

		// Parse '[[rb:久世彩葉 > くぜ いろは]]' // 10618179
		content = content.replace(/\[\[rb:([^\[\]]+) *> *([^\[\]]+)\]\]/g, (match_str, main, desc) => {
			return `<ruby>${htmlEncode(main)}<rp>(</rp><rt>${htmlEncode(desc)}</rt><rp>)</rp></ruby>`;
		});

		// Parse '[chapter:【プロローグ】]' // 21893883
		content = content.replace(/\[chapter: *([^\]]+)\]/g, (match_str, chapterName) => {
			return `<h2>${chapterName}</h2>`;
		});

		// Parse '[[jumpuri:捕虜の待遇に関する千九百四十九年八月十二日のジュネーヴ条約(第三条約)【日本国防衛省ホームページより】 > https://www.mod.go.jp/j/presiding/treaty/geneva/geneva3.html]]' // 19912145#12
		content = content.replace(/\[\[jumpuri:([^\[\]]+) *> *([^\[\]]+)\]\]/g, (match_str, text, url) => {
			return `<a href=${escJsStr(url)}>${htmlEncode(text)}</a>`;
		});

		// Parse '[jump:2]' // 22003928
		content = content.replace(/\[jump:(\d+)\]/g, (match_str, page) => {
			return `<a href=${escJsStr(`#ChapterPage-${page}`)}>Jump to page ${htmlEncode(page)}</a>`;
		});

		// Check undealed markers
		let markers = Array.from(content.matchAll(/\[+[^\[\]]+\]+/g));
		markers = markers.filter(match => {
			// remove dealed images
			const pattern = match.input.substring(match.index-9, match.index + match[0].length+3);
			const isImagePattern = !pattern.startsWith('<%= image[') || !pattern.endsWith('] %>');
			// remove [newpage]s
			const isNewpagePattern = match[0].includes('[newpage]'); // Why .include: for matches like '[xxx[[newpage]]]blabla]]'
			return !isImagePattern && !isNewpagePattern;
		});
		markers.length && DoLog(LogLevel.Warning, {
			message: 'Undealed markers found',
			chapter: data,
			markers
		});

		// Up to 4 connected newlines (3 empty lines between paragraphs) at once
		content = content.replaceAll(/\n{4,}/g, '\n'.repeat(4));

		// Parse '[newpage]' & Covert into html
		const pageCounter = (start => {
			let num = start;
			return () => start++;
		}) (1);
		const pageID = () => `ChapterPage-${ pageCounter().toString() }`;

		content = content.split('[newpage]').map(subContent => {
			// Split content into pages and wrap each page's lines into <p>s
			return subContent.split('\n').map(line => line.trim() ? `<p>${line}</p>` : '<br>').join('\n');
		}).map(pageHTML => {
			// Wrap each page's html in <div id=pageID>
			return `<div id=${escJsStr(pageID())}>\n${pageHTML}\n</div>`;
		}).join('\n<br><br><br>\n');

		return {
			title: data.title,
			content
		};
	}

	function addLoaded(epub, chapter) {
		epub.add(chapter.title, chapter.content);
	}

	async function addChapter(epub, data) {
		const chapter = await loadChapter(epub, data);
		addLoaded(epub, chapter);
	}


	async function saveEpub(epub, filename) {
		const blob = await epub.generate('blob');
		const url = URL.createObjectURL(blob);
		dl_browser(url, filename);
		setTimeout(() => URL.revokeObjectURL(url));
	}

	function htmlEncode(text, encodes = '<>\'";&#') {
		return Array.from(text).map(char => !encodes || encodes.includes(char) ? `&#${char.charCodeAt(0)};` : char).join('');
	}
})();