SingleFile Pure - Pure html downloader

将当前网页保存为一个纯文本的.html网页文件,不保存二进制

// ==UserScript==
// @name                    SingleFile Pure - Pure html downloader
// @name:zh                 SingleFile Pure - 保存纯HTML
// @namespace               https://gist.github.com/KnIfER
// @version                 3
// @description             将当前网页保存为一个纯文本的.html网页文件,不保存二进制
// @description:en          Save webpages into one pure html file, without binary data.
// @author                  PY-DNG
// @license                 MIT
// @grant                   GM_registerMenuCommand
// @grant                   GM_unregisterMenuCommand
// @grant                   unsafeWindow
// @icon                    
// @match                   *://*/*
// @include                 *
// @noframes
// ==/UserScript==

// based on @PY-DNG https://greasyfork.org/zh-CN/scripts/419798-singlefile-单文件保存网页

(function() { 
	'use strict';
	function debug(...args) {
		console.log("%c SingleFile: ", "color:#333!important;background:#0FF;", ...args);
	}

	var win = window.unsafeWindow || window, doc=document, d=doc
	, bank=win._sfpr_bank;
	var rM=debug, rMd=debug, err=console.error;
	if(!bank) {
		bank = win._sfpr_bank = {};
		rM = GM_registerMenuCommand;
		rMd = GM_unregisterMenuCommand;
	} else try{
		bank.unreg();
	} catch(e){debug(e)}
	bank.unreg = uninstall;var unregs = [];
	
	function uninstall() { // hot-reload
		for(var i=0;i<unregs.length;i++) {
			unregs[i]();
		}
		return 1;
	}
	
	function addEvent(a, b, c, d) {
		if(!d) d = win;
		((a, b, c, d)=>{
			d.addEventListener(a, b, c);
			unregs.push(function(){ d.removeEventListener(a, b, c)} );
		})(a, b, c, d);
	}
	
	const MSG = {
		'zh': {
			SavePage: '保存纯网页',
			Saving: '保存中……'
		},
		'en': {
			SavePage: 'Save pure webpage',
			Saving: 'Saving, please wait……'
		},
	}
	var btn, evtSt="single-file-on-before-capture-request", evtEd=evtSt.replace('before', 'after');
	addEvent(evtSt, (e) => {
		btn = doc.getElementById('sf-pure');
		if(btn) btn.remove();
	});
	addEvent(evtEd, (e) => {
		if(btn) {
			doc.body.append(btn);
			btn = 0;
		}
	});
	
	var t = navigator.language;
	if(t.includes('-')) t = t.slice(0, t.indexOf('-'));
	

	// GUI
	var GT=MSG[t]||MSG['en'], fnMenu, menu = rM(GT.SavePage, fnMenu = function() {
		Generate_Single_File({
			onfinish: (FinalHTML) => {
				var title = doc.title;
				saveTextToFile(FinalHTML, '{Title}.html'.replace('{Title}', title).replace('{Time}', getTime('-', '-')));
				dispatchEvent(new CustomEvent(evtEd));
				rMd(menu);
				menu = rM(GT.SavePage, fnMenu);
			}
		});
	});
	
	addEvent("single-file-pure-save", (e) => {
		fnMenu();
		stop(e);
	});
	
	if(!win.saveAsTaken)
		addEvent('keydown', (e) => {
			if(e.key=='s' && e.altKey && e.ctrlKey) {
				fnMenu();
				stop(e);
			}
		});
	
		
	function Generate_Single_File(details) {
		debug('Generate started...');
		if(!bank.init) {
			dispatchEvent(new CustomEvent("single-file-user-script-init"));
			bank.init = 1;
		}
		dispatchEvent(new CustomEvent(evtSt));
		// Init DOM
		var html, tmp, dom = doc;

		// Functions
		var _J = (args) => {const a = []; for (let i = 0; i < args.length; i++) {a.push(args[i]);}; return a;};
		var $ = function() {return dom.querySelector.apply(dom, _J(arguments))};
		var $_ = function() {return dom.querySelectorAll.apply(dom, _J(arguments))};
		var $C = function() {return dom.createElement.apply(dom, _J(arguments))};
		var $A = (a,b) => (a.appendChild(b));
		var $R = (e) => (e.parentElement ? e.parentElement.removeChild(e) : null);
		function ishttp(s) {
			// !/^[^\/:]*:/.test(s)
			if(s) return  s.startsWith('/') || s.startsWith('http')
		}
		dom = doc.cloneNode(1);
		
		const ElmProps = new (function() {
			const props = this.props = {};
			const cssMap = this.cssMap = new Map();
			this.getCssPath = function(elm) {
				return cssMap.get(elm) || (cssMap.set(elm, cssPath(elm)), cssMap.get(elm));
			}
			this.add = function(elm, type, value) {
				var path = cssPath(elm), store=props[path];
				if(!store) store = props[path] = [];
				store.push({type:type, value:value});
			}
		});
		
		 // Generate info button!
		function about() {
			var t=$C('A');
			t.id = 'sf-pure';
			t.style = 'position:fixed;right:16px;top:16px;width:24px;height:24px;color:#2d2d2d;background-color:#737373;border:2px solid;border-color:#eee;border-radius:16px;z-index:2147483647;opacity:0.7;display:flex;justify-content:center;align-items:center;';
			t.innerHTML = '<svg style=\'width:65%;height:65%;margin-left:1px;\' xmlns="http://www.w3.org/2000/svg"viewBox="0 0 64 64"width="64"height="64"><style>.p{fill:#f0f0f0}</style><path class="p"d="M30 3A3 3 0 1130 21 3 3 0 1130 3ZM16 25 23 29 23 58 16 63 46 63 39 58 39 25Z"/></svg>';
			t.name = Date.now()+'';
			t.title = doc.title;
			t.href = location.ohref || location.href;
			t.target = 'blank';
			return t;
		}

		const AM = new AsyncManager();
		AM.onfinish = function() {
			// Add applyProps script
			var script = $C('script');
			script.innerHTML = "window.addEventListener('load', function(){"+
				// show info button
				"setTimeout(function(){var btn=document.getElementById('sf-pure');btn.title=new Date(parseInt(btn.name))+'\\n\\n'+btn.title;location.ohref=btn.href;btn.oncontextmenu=function(e){btn.style.display='none';e.preventDefault()}}, 800);"
				+
				// {FUNC}
				"(function(c){var fs={Canvas_DataUrl:function(a,b){var e=new Image(),v=a.getContext('2d');e.onload=function(){v.drawImage(e,0,0)};e.src=b},Input_Value:function(a,b){a.value=b}};for(var i=0,arr=Object.entries(c),t,el;i<arr.length;i++){try{t=arr[i];if(el=document.querySelector(t[0]))for(var p of t[1])fs[p.type](el,p.value)}catch(e){console.error(e)}}}"
				+")("+ 
				// {PROPS}
				JSON.stringify(ElmProps.props)
				+")})";
			$A(dom.head, script);
			$A(dom.body, about());
			// Generate html
			var FinalHTML = dom.querySelector('html').outerHTML;
			debug('Generation Complete.', FinalHTML.length)
			details.onfinish(FinalHTML)
		};

		// debug('Setting charset');
		if (doc.characterSet !== 'UTF-8') {
			const meta = $('meta[http-equiv="Content-Type"][content*="charset"]');
			meta && (meta.content = meta.content.replace(/charset\s*=\s*[^;\s]*/i, 'charset=UTF-8'));
		}

		// debug('strip scripts');
		for (var tmp of $_('script')) {
			$R(tmp);
		}

		// debug('strip inline scripts');
		for (var tmp of $_('*')) {
			var ISKeys = ['onabort', 'onerror', 'onresize', 'onscroll', 'onunload', 'oncancel', 'oncanplay', 'oncanplaythrough', 'onchange', 'onclick', 'onclose', 'oncuechange', 'ondblclick', 'ondrag', 'ondragend', 'ondragenter', 'ondragexit', 'ondragleave', 'ondragover', 'ondragstart', 'ondrop', 'ondurationchange', 'onemptied', 'onended', 'onerror', 'onfocus', 'oninput', 'oninvalid', 'onkeydown', 'onkeypress', 'onkeyup', 'onload', 'onloadeddata', 'onloadedmetadata', 'onloadstart', 'onmousedown', 'onmouseenter', 'onmouseleave', 'onmousemove', 'onmouseout', 'onmouseover', 'onmouseup', 'onmousewheel', 'onpause', 'onplay', 'onplaying', 'onprogress', 'onratechange', 'onreset', 'onresize', 'onscroll', 'onseeked', 'onseeking', 'onselect', 'onshow', 'onstalled', 'onsubmit', 'onsuspend', 'ontimeupdate', 'ontoggle', 'onvolumechange', 'onwaiting', 'onbegin', 'onend', 'onrepeat'];
			for (var key of ISKeys) {
				tmp.removeAttribute(key);
				tmp[key] = undefined;
			}
		}

		// debug('strip preload scripts');
		for (var tmp of $_('link[rel*=modulepreload]')) {
			$R(tmp);
		}

		// debug('strip meta headers');
		for (var tmp of $_('meta[http-equiv="Content-Security-Policy"]')) {
			$R(tmp);
		}

		//debug('Resolve style urls');
		for (var tmp of $_('link[rel*=stylesheet][href]')) {
			resolveStyleLinked(tmp)
		}
		for (var elm of $_('style')) {
			//debug('style elm=', elm.id, elm)
			resolveStyle(elm.innerText, (style, elm) => (elm.innerHTML = style), elm);
		}

		//debug('Resolve links');
		for (const link of $_('link[href]')) {
			// Only for http[s] links
			if (!link.href) {continue;}
			if (!ishttp(link.href)) {continue;}

			// Only for links that rel includes one of the following:
			//   icon, apple-touch-icon, apple-touch-startup-image, prefetch, preload, prerender, manifest, stylesheet
			// And in the same time NOT includes any of the following:
			//   alternate
			var deal = false;
			const accepts = ['icon', 'apple-touch-icon', 'apple-touch-startup-image', 'prefetch', 'preload', 'prerender', 'manifest', 'stylesheet'];
			const excludes = ['alternate']
			const rels = link.rel.split(' ');
			for (const rel of rels) {
				deal = deal || (accepts.includes(rel) && !excludes.includes(rel));
			}
			if (!deal) {continue;}

			// Save original href to link.ohref
			link.ohref = link.href;

			AM.add();
			requestDataURL(link.href, function(durl, link) {
				link.href = durl;
				// Deal style if links to a stylesheet
				if (rels.includes('stylesheet')) {
					resolveStyleLinked(link);
				}
				AM.finish();
			}, link);
		}
		var arr = dom.links;
		for(var i=0;tmp=arr[i++];) {
			tmp.href = fullUrl(tmp.href);
		}

		//debug('Resolve image src');
		for (var img of $_('img[src], source[src]')) {
			// Get full src
			// if (img.src.length > 3999) {continue;}
			if (!img.src) {continue;}
			if (!ishttp(img.src)) {continue;}
			img.src = fullUrl(img.src);
		}

		//debug('Resolve image srcset');
		for (var img of $_('img[srcset], source[srcset]')) {
			if (img.srcset) {
				var list = img.srcset.split(',');
				for (let i = 0; i < list.length; i++) { // Get all srcs list
					var srcitem = list[i].trim();
					if (srcitem.length > 3999) {continue;}
					if (!srcitem) {continue}
					var parts = srcitem.replaceAll(/(\s){2,}/g, '$1').split(' ');
					if (!ishttp(parts[0])) {continue};
					var src = fullUrl(parts[0]);
					list[i] = {
						src: src,
						rest: parts.slice(1, parts.length).join(' '),
						parts: parts,
						dataurl: null,
						string: null
					};
				}
				img.srcset = list.join(',');
			}
		}

		//debug('Resolve canvas');
		for (var tmp of $_('canvas')) {
			try {
				var url = img2url(tmp);
				ElmProps.add(tmp, 'Canvas_DataUrl', url);
			} catch (e) {}
		}
		
		debug('Resolve styles', dom);
		for (var tmp of $_('style')) {
			try {
				if(!tmp.firstChild) {
					var oelm = doc.querySelector(ElmProps.getCssPath(tmp));
					debug('cssRulesX', tmp, oelm);
					if(oelm && oelm.sheet?.cssRules?.length) {
						var cssRules = oelm.sheet.cssRules; 
						var text = '';
						for (var i = 0; i < cssRules.length; i++) {
							var rule = cssRules[i];
							text += rule.cssText;
							text += '\n';
						}
						//debug('cssRules', text);
						tmp.innerHTML = text;
					}
				}
			} catch (e) {}
		}

		//debug('Resolve background-images');
		var urlReg = /^\s*url\(\s*['"]?([^\(\)'"]+)['"]?\s*\)\s*$/;
		for (var elm of $_('*')) {
			var url = elm.style.backgroundImage;
			if(url && url.length < 3999 // CONST.Number.MaxUrlLength
				 && url.lastIndexOf('data:', 10)==-1) { // not /^data:/.test(url)
				url = url.match(urlReg);
				if (url) { // Get full image url
					url = fullUrl(url[1]);
					elm.style.backgroundImage = 'url('+url+')';
				}
			}
		}

		//debug('Resolve input/textarea/progress values');
		for (var tmp of $_('input,textarea,progress')) {
			// Query origin element's value
			var oelm = doc.querySelector(ElmProps.getCssPath(tmp));
			// Add to property map
			oelm.value && ElmProps.add(tmp, 'Input_Value', oelm.value);
		}

		// Get favicon.ico if no icon found
		debug('Resolve favicon.ico');
		if (!$('link[rel*=icon]')) {
			var icon = $C('link');
			icon.rel = 'icon';
			icon.href = getHost() + 'favicon.ico',
			$A(dom.head, icon);
		}

		// Start generating the finish event
		debug('Waiting for async tasks to be finished');
		AM.finishEvent = true;

		function resolveStyle(style, callback, args=[]) {
			const argvs = [style].concat(args);
			if(!style) {
				return callback.apply(null, argvs);
			}
			const re = /url\(\s*['"]?([^\(\)'"]+)['"]?\s*\)/;
			const rg = /url\(\s*['"]?([^\(\)'"]+)['"]?\s*\)/g;
			const replace = (durl, urlexp, arg1, arg2, arg3) => {
				// Replace style text
				const durlexp = 'url("'+durl+'")';
				style = style.replaceAll(urlexp, durlexp);
				// Get args
				argvs[0]=style;
				callback.apply(null, argvs);
				AM.finish();
			};

			const all = style.match(rg);
			if (!all) {return;}
			for (const urlexp of all) {
				// Check url
				if (urlexp.length > 3999) {continue;}
				const osrc = urlexp.match(re)[1];
				const baseurl = args instanceof HTMLLinkElement && args.ohref ? args.ohref : location.href;
				if (!ishttp(osrc)) {continue;}
				const src = fullUrl(osrc, baseurl);

				// Request
				AM.add();
				requestDataURL(src, replace, [urlexp].concat(args));
			}
		}
		function resolveStyleLinked(link) {
			const durl = link.href;
			if ((durl||'')[0]!=='d') {return;} // not /^data:/.test()
			const blob = dataURLToBlob(durl);
			const reader = new FileReader();
			reader.onload = () => {
				resolveStyle(reader.result, (style, link) => {
					const blob = new Blob([style],{type:"text/css"});
					AM.add();
					blobToDataURL(blob, function(durl, link) {
						//debug('style elm=', link.id, link)
						link.href = durl;
						AM.finish();
					}, link)
				}, link);
				AM.finish();
			}
			AM.add();
			reader.readAsText(blob);
		}
	}

	var t0 = doc.createElement('a');
	
	function fullUrl(url, baseurl) {
		if(url) {
			if (url.startsWith('//')) {url = location.protocol + url;}
			if (!url.startsWith('http')) {
				var base = (baseurl||location.href).replace(/(.+\/).*?$/, '$1');
				t0.href = base + url;
				url = t0.href;
			}
		}
		return url;
	}

	function cssPath(el) {
		if (!(el instanceof Element)) return;
		var path = [];
		while (el.nodeType === Node.ELEMENT_NODE) {
			var selector = el.nodeName.toLowerCase();
			if (el.id) {
				selector += '#' + el.id;
				path.unshift(selector);
				break;
			} else {
				var sib = el,
					nth = 1;
				while (sib = sib.previousElementSibling) {
					if (sib.nodeName.toLowerCase() == selector) nth++;
				}
				if (nth != 1) selector += ":nth-of-type(" + nth + ")";
			}
			path.unshift(selector);
			el = el.parentNode;
		}
		return path.join(" > ");
	}

	function requestDataURL(url, callback, args=[]) {
		try{
			//debug('requestDataURL::', url, args);
			const argvs = [url].concat(args);
			callback.apply(null, argvs);
		}catch(e){err(e)}
	}

	function blobToDataURL(blob, callback, args=[]) {
		const reader = new FileReader();
		reader.onload = function () {
			callback.apply(null, [reader.result].concat(args));
		}
		reader.readAsDataURL(blob);
	}

	function dataURLToBlob(dataurl) {
		let arr = dataurl.split(','),
			mime = arr[0].match(/:(.*?);/)[1],
			bstr = atob(arr[1]),
			n = bstr.length,
			u8arr = new Uint8Array(n)
		while (n--) {
			u8arr[n] = bstr.charCodeAt(n)
		}
		return new Blob([u8arr], { type: mime })
	}

	function AsyncManager() {
		const AM = this;

		// Ongoing xhr count
		this.taskCount = 0;

		// Whether generate finish events
		let finishEvent = false;
		Object.defineProperty(this, 'finishEvent', {
			configurable: true,
			enumerable: true,
			get: () => (finishEvent),
			set: (b) => {
				finishEvent = b;
				b && AM.taskCount === 0 && AM.onfinish && AM.onfinish();
			}
		});
		// Add one task
		this.add = () => (++AM.taskCount);
		// Finish one task
		this.finish = () => ((--AM.taskCount === 0 && AM.finishEvent && AM.onfinish && AM.onfinish(), AM.taskCount));
	}

	function img2url(img) {
		var cvs = doc.createElement('canvas');
		var v = cvs.getContext('2d');
		cvs.width = img.width;
		cvs.height = img.height;
		v.drawImage(img, 0, 0)
		return cvs.toDataURL();
	}

	// Format timecode like 1970-01-01 00:00:00
	// if data-sep provided false, there will be no data part.
    function getTime(dateSep='-', timeSep=':') {
        var d = new Date(), fulltime = ''
		fulltime += dateSep ? f0(d.getFullYear(), 4) + dateSep + f0((d.getMonth() + 1), 2) + dateSep + f0(d.getDate(), 2) : '';
		fulltime += dateSep && timeSep ? ' ' : '';
		fulltime += timeSep ? f0(d.getHours(), 2) + timeSep + f0(d.getMinutes(), 2) + timeSep + f0(d.getSeconds(), 2) : '';
        return fulltime;
    }
	
    function f0(number, ln) {
        var str = String(number);
        for (var i = str.length; i < ln; i++) {
            str = '0' + str;
        }
        return str;
    }

	function stop(e) {
		try{
			e.stopPropagation();
			e.preventDefault();
		} catch(e) {debug(e)}
	}

	function saveTextToFile(text, name) {
		const blob = new Blob([text],{type:"text/plain;charset=utf-8"});
		const url = URL.createObjectURL(blob);
		const a = doc.createElement('a');
		a.href = url;
		a.download = name;
		a.click();
	}

	// get host part from a url(includes '^https://', '/$')
	function getHost(url=location.href) {
		const match = location.href.match(/https?:\/\/[^\/]+\//);
		return match ? match[0] : match;
	}

})();