SingleFile - 单文件保存网页

保存当前页面的全部可见内容到一个.html文件中,包含了所有文字、排版、图像

Fra 01.05.2021. Se den seneste versjonen.

// ==UserScript==
// @name         SingleFile - 单文件保存网页
// @namespace    SingleFile
// @version      1.0.0
// @description  保存当前页面的全部可见内容到一个.html文件中,包含了所有文字、排版、图像
// @author       PY-DNG
// @include      *
// @connect      *
// @icon         data:image/gif;base64,R0lGODlhAQABAAAAACH5BAEKAAEALAAAAAABAAEAAAICTAEAOw==
// @grant        GM_xmlhttpRequest
// @grant        GM_registerMenuCommand
// @grant        GM_unregisterMenuCommand
// @grant        GM_info
// ==/UserScript==

// /*-pass*/ 标明待开发内容

(function () {
	'use strict';

	// Developer Mode
	const developer = true;

	// Inner consts
	const TEXT_SAVEPAGE = '保存此网页';
	const TEXT_SAVING = '保存中...';
	const TEXT_ABOUT = '<!-- Web Page Saved By {SCNM} Ver.{VRSN}, Author {ATNM} -->\n<!-- Page URL: {LINK} -->'
		.replaceAll('{SCNM}', GM_info.script.name)
		.replaceAll('{VRSN}', GM_info.script.version)
		.replaceAll('{ATNM}', GM_info.script.author)
		.replaceAll('{LINK}', location.href);

	// variants
	let i, j;

	let LogLevel = {
		None: 0,
		Error: 1,
		Success: 2,
		Warning: 3,
		Info: 4,
		Elements: 5,
	};
	let g_logCount = 0;
	let g_logLevel = LogLevel.Success;

	function DoLog(level = LogLevel.Info, msgOrElement, isElement = false) {
		if (level <= g_logLevel) {
			let prefix = '%c';
			let param = '';

			if (level == LogLevel.Error) {
				prefix += '[Error]';
				param = 'color:#ff0000';
			} else if (level == LogLevel.Success) {
				prefix += '[Success]';
				param = 'color:#00aa00';
			} else if (level == LogLevel.Warning) {
				prefix += '[Warning]';
				param = 'color:#ffa500';
			} else if (level == LogLevel.Info) {
				prefix += '[Info]';
				param = 'color:#888888';
			} else if (level == LogLevel.Elements) {
				prefix += 'Elements';
				param = 'color:#000000';
			}

			if (level != LogLevel.Elements && !isElement) {
				console.log(prefix + msgOrElement, param);
			} else {
				console.log(msgOrElement);
			}

			if (++g_logCount > 512) {
				console.clear();
				g_logCount = 0;
			}
		}
	}

	// Task list
	const taskList = [getDom, removeScripts, dealStyles, dealElements, output];
	let taskNow = null;

	let Dom;
	let saving = false, cmdID;

	GUI();

	function GUI() {
		cmdID = GM_registerMenuCommand(TEXT_SAVEPAGE, saveOnclick);
	}

	function saveOnclick() {
		if (saving) {return false;};
		switchStatus();
		DoLog(LogLevel.Success, 'SingleFile started.');
		nextTask();
	}

	function switchStatus() {
		saving = !saving;
		if (cmdID) {GM_unregisterMenuCommand(cmdID);};
		cmdID = GM_registerMenuCommand(saving ? TEXT_SAVING : TEXT_SAVEPAGE, saveOnclick);
	}

	function getDom() {
		DoLog(LogLevel.Info, 'Getting document...');
		const HTML_ORGINAL = document.querySelector('html').outerHTML;
		Dom = new DOMParser().parseFromString(HTML_ORGINAL, 'text/html');
		DoLog(LogLevel.Info, Dom, true);
		nextTask();
	}

	function removeScripts() {
		DoLog(LogLevel.Info, 'Removing scripts...');
		const scripts = Dom.querySelectorAll('script');
		for (i = 0; i < scripts.length; i++) {
			scripts[i].parentElement.removeChild(scripts[i]);
		}
		DoLog(Dom, true)
		DoLog(scripts, true);

		nextTask();
	}

	function dealStyles() {
		DoLog(LogLevel.Info, 'Dealing styles...');
		const CSSLinks = Dom.querySelectorAll('link[rel="stylesheet"]');
		let style = '', rest = CSSLinks.length;

		for (const cLink of CSSLinks) {
			DoLog(LogLevel.Info, 'Requesting style from ' + cLink.href);
			requestText(cLink.href, addToStyleText);
		}

		function addToStyleText(styleText) {
			style += styleText;
			rest--;
			DoLog(LogLevel.Info, 'Style got. Rest: ' + String(rest));
			if (rest === 0) {
				finish();
			}
		}

		function finish() {
			// Insert style element
			const styleEle = Dom.createElement('style');
			styleEle.innerHTML = style;
			const firstInnerStyle = document.querySelector('style');
			firstInnerStyle ?
				firstInnerStyle.parentElement.insertBefore(styleEle, firstInnerStyle) :
				Dom.head.appendChild(styleEle);

			// Remove link elements
			for (const link of CSSLinks) {
				link.parentElement.removeChild(link);
			}

			nextTask();
		}
	}

	function dealElements() {
		DoLog(LogLevel.Info, 'dealing elements...');
		const allEles = Dom.querySelectorAll('*');
		let restElesCount = allEles.length;
		for (const element of allEles) {
			dealElement(element);
		}

		function dealElement(element) {
			DoLog(LogLevel.Info, element, true);

			dealImg(element);
		}

		function dealImg(element) {
			const nextDealingTask = function() {dealBackgroundImg(element);};
			if (element.tagName === 'IMG') {
				if (element.src.substr(0,5) !== 'data:') {
					requestImageURL(element.src, function(dataURL) {
						element.src = dataURL;
						// 如何处理canvas? /*-pass*/
						// next dealing task
						nextDealingTask();
					})
				} else {nextDealingTask();}
			} else {nextDealingTask();}
		}

		function dealBackgroundImg(element) {
			// background-image to dataURL
			const cStyle = getComputedStyle(element);
			const backgroundImage = cStyle['background-image'];
			const httpUrlMatch = backgroundImage.match(/url\("(http.+)"\)/);
			if (httpUrlMatch) {
				const url = httpUrlMatch[1].replaceAll('\\\\', '\\');
				requestImageURL(url, function(dataURL) {
					const propValue = backgroundImage.replace(httpUrlMatch[1], dataURL);
					element.style['background-image'] = propValue;
					elementDealed();
				});
			} else {
				elementDealed();
			}
		}

		function elementDealed() {
			restElesCount--;
			DoLog(LogLevel.Info, 'element dealed, rest: ' + String(restElesCount) + ' elements')
			if (restElesCount === 0) {
				nextTask();
			}
		}
	}

	function output() {
		DoLog(LogLevel.Success, 'SingleFile finished.');
		DoLog(LogLevel.Success, Dom, true);

		const outputText = TEXT_ABOUT + '\n\n' + Dom.lastChild.outerHTML;
		saveTextToFile(outputText, 'SingleFile - ' + document.title + '.html');
		switchStatus();
	}

	function nextTask() {
		const funcIndex = taskNow ? taskList.indexOf(taskNow) : -1;
		if (funcIndex === taskList.length - 1) {
			taskNow = taskList[0];
			return true;
		}
		taskNow = taskList[funcIndex+1];
		taskNow();
	}

	function requestText(url, callback, args=[]) {
		GM_xmlhttpRequest({
            method:       'GET',
            url:          url,
            responseType: 'text',
            onload:       function(response) {
                const text = response.responseText;
				const argvs = [text].concat(args);
                callback.apply(null, argvs);
            }
        })
	}

	function requestImageURL(url, callback, args=[]) {
		GM_xmlhttpRequest({
            method:       'GET',
            url:          url,
            responseType: 'blob',
            onload:       function(response) {
                const blob = response.response;
				blobToDataURI(blob, function(url) {
					const argvs = [url].concat(args);
					callback.apply(null, argvs);
				})
            }
        })

		function blobToDataURI(blob, callback) {
			var reader = new FileReader();
			reader.onload = function (e) {
				callback(e.target.result);
			}
			reader.readAsDataURL(blob);
		}
	}

	function saveTextToFile(text, name) {
		const blob = new Blob([text],{type:"text/plain;charset=utf-8"});
		const url = URL.createObjectURL(blob);
		const a = document.createElement('a');
		a.href = url;
		a.download = name;
		a.click();
	}
})();