// ==UserScript==
// @name MHTML Reader
// @namespace https://greasyfork.org/en/users/85671-jcunews
// @version 1.0.1
// @license AGPLv3
// @author jcunews
// @description Script to read MHTML files. Use the "Open MHTML file" menu of Tampermonkey/Violentmonkey/Greasemonkey popup menu. The menu can also be made as a bookmarklet using this URL: javascript:mhtreader_ujs()
// @match *://*/*
// @grant GM_registerMenuCommand
// @grant unsafeWindow
// ==/UserScript==
((mht) => {
function mhtReader() {
var inp = document.createElement("INPUT");
inp.type = "file";
inp.onchange = function(ev) {
var ps, ls = {}, pt, qp;
function decodeQuotedPrintable(s) {
return s.replace(/(?:=[0-9A-Fa-f]{2})+/g, s => decodeURIComponent(s.replace(/=/g, "%")));
}
function addPart() {
if (!pt) return;
if (qp) pt.body = decodeQuotedPrintable(pt.body);
ls[pt.headers["Content-Location"]] = ps.length;
ps.push(pt);
pt = null;
}
function absUrl(u, m) {
if (!(/^[a-z]+:/).test(u)) { //relative
if ((/^\/\//).test(u)) { //domain
u = mht.url.match(/^[a-z]+:/)[0] + u;
} else if (u[0] === "/") { //root path
u = mht.url.match(/^[a-z]+:\/\/[^\/]+/)[0] + u;
} else { //relative path
if (mht.url[mht.url.length - 1] === "/") {
u = mht.url + u;
} else u = mht.url + "/" + u;
}
} //else: absolute
if ((/^[a-z]+:\/\/[^\/]+$/).test(u)) u += "/";
return u;
}
function fixCss(t) {
return t.replace(/\b(url\(["']?)((?!data:)[^"')]+)/g, (s, a, b, u) => {
if (ls[u = absUrl(b)] !== undefined) {
return "data:;base64," + ps[ls[u]].body;
} else return s;
});
}
function fixHtml(p, i, h, a) {
if ((h = p.body.match(/^(?:(?:.*|\s*)<(?:!doctype|\?xml)\b[^>]+>)*/i)) && h[0]) {
p.body = p.body.substr((h = h[0]).length);
} else h = "";
(m = document.createElement("DIV")).innerHTML = p.body.replace(/<(\/?html|\/?head|\/?body)\b([^>]*>)/gi, (s, a, b) => "<" + a + "Z" + b);
if (a = m.querySelector("title")) {
p.title = a.textContent;
} else p.title = "";
if (mht.mainPart < 0) {
if (mht.content.headers.Subject && (p.title === mht.content.headers.Subject)) {
mht.url = ps[mht.mainPart = i].headers["Content-Location"] = absUrl(p.headers["Content-Location"]);
}
}
[ ["link[href]", "href"], ["[src]", "src"] ].forEach(
a => m.querySelectorAll(a[0]).forEach((e, u, b, c) => {
if ((b = ls[u = absUrl(e.getAttribute(a[1]))]) !== undefined) {
c = (b = ps[b]).headers["Content-Type"].match(/^[^;]+/)[0];
if (b.headers["Content-Transfer-Encoding"] === "base64") {
e.setAttribute(a[1], "data:" + c + ";base64," + b.body);
} else {
e.setAttribute(a[1], "data:" + c + ";base64," + btoa(encodeURIComponent(b.body).replace(
/%([0-9A-F]{2})/g, (m, c) => String.fromCharCode('0x' + c)
)));
}
}
})
);
m.querySelectorAll("style").forEach(e => e.innerHTML = fixCss(e.innerHTML));
m.querySelectorAll("[style]").forEach(e => e.style.cssText = fixCss(e.style.cssText));
a = m.innerHTML;
while (n = (/<\/(?:doctypez|xmlz)>$/i).exec(a)) {
a = a.substr(0, n.index);
}
return h + a.replace(/<(\/?html|\/?head|\/?body)z\b([^>]*>)/gi, (s, a, b) => "<" + a + b);
}
if (inp.files.length) {
mht = inp.files[0];
mht = {
name: mht.name, type: mht.type, size: mht.size, lastModified: mht.lastModified, text: "", mainPart: -1, url: "", content: {}
};
fr = new FileReader();
fr.onload = function() {
var dat = mht.content, txt = mht.text = fr.result, rl = /(.*?)\r?\n/g, rh = /(^[^\t ][^:]+):\s*(.*)/,
rt = /^multipart\/related(?:;\s*?boundary="([^"]+)"|;\s*?[^;]+)+/, rc = /^text\/html(;\s*charset="utf-8")?/i,
m, s, sp = " ", hs, hn, bd, ds, dn, b64;
while (m = rl.exec(txt)) {
s = m[1].replace(/\s+$/, "");
if (s) { //non-blank line
if (bd) {
if (s === (bd + "--")) { //boundary terminator
addPart();
break;
} else if (s === bd) { //boundary
addPart();
ds = (pt = {}).headers = {};
continue;
}
}
if (pt) { //part
if (pt.body !== undefined) { //body
if (qp) {
if (s[s.length - 1] === "=") {
pt.body += s.substr(0, s.length - 1);
} else pt.body += s + "\n";
} else pt.body += s + "\n";
} else { //headers
if (m = s.match(rh)) {
if (s = m[2].match(/^"(.*)"$/)) m[2] = s[1];
ds[dn = m[1]] = m[2];
} else ds[dn] += " " + s.replace(/^\s+/, "");
}
} else if (dat.body !== undefined) { //body
dat.body += s + "\n"; //body
} else { //headers
if (!dat.headers) {
if (s.substr(0, 6) !== "From: ") break;
hs = dat.headers = {};
}
if (m = s.match(rh)) {
if (s = m[2].match(/^"(.*)"$/)) m[2] = s[1];
hs[hn = m[1]] = m[2];
} else hs[hn] += " " + s.replace(/^\s+/, "");
}
} else { //blank line
if (pt) { //at/after part
if (pt.body !== undefined) {
pt.body += "\n";
} else {
if (ds["Content-Transfer-Encoding"]) {
qp = ds["Content-Transfer-Encoding"] === "quoted-printable";
b64 = ds["Content-Transfer-Encoding"] === "base64";
} else {
qp = false;
b64 = false;
}
pt.body = "";
}
} else if (dat.body !== undefined) { //at/after body
dat.body += "\n";
} else { //after headers
if (hs["Content-Type"] && (m = hs["Content-Type"].match(rt))) {
bd = "--" + m[1];
ps = dat.parts = [];
}
dat.body = "";
}
}
sp = s;
}
if (pt && pt.body && b64) pt.body = pt.body.replace(/\r?\n/g, "");
if (mht.content.headers["Content-Transfer-Encoding"] === "quoted-printable") {
mht.content.body = decodeQuotedPrintable(mht.content.body.replace(/=\r?\n/g, ""));
}
if (dat.body) {
addPart();
if (mht.content.parts) {
mht.content.parts.forEach((p, i) => {
switch (
p.headers["Content-Type"].match(/^[^;]+/)[0].toLowerCase()
) {
case "text/html":
p.body = fixHtml(p, i);
break;
case "text/css":
p.body = fixCss(p.body);
}
});
if (mht.mainPart < 0) {
mht.mainPart = mht.content.parts.findIndex((p, i) => (/^text\/html(;|$)/).test(p.headers["Content-Type"]));
}
if (mht.mainPart >= 0) {
s = new Blob([mht.content.parts[mht.mainPart].body], {type: "text/html"});
} else {
s = new Blob([mht.content.body], {type: mht.content.headers["Content-Type"].match(/^[^;]+/)[0]});
}
} else {
s = new Blob([mht.content.body], {type: mht.content.headers["Content-Type"].match(/^[^;]+/)[0]});
}
(s = open(m = URL.createObjectURL(s))).addEventListener("load", (e, t) => {
function addRow(n, v, r, a) {
(r = t.insertRow()).insertCell().textContent = n;
if (n === "Content-Location") {
(a = r.insertCell().appendChild(document.createElement("A"))).textContent = v;
a.href = v;
} else r.insertCell().textContent = v;
}
function comma(n, i, f, s) {
n = n.toString();
s = [];
if ((i = n.lastIndexOf(".")) > 0) {
f = n.substr(i);
n = n.substr(0, i);
} else f = "";
i = n.length - 3;
while (i >= 0) {
s.unshift(n.substr(i, 3));
i -= 3;
}
if ((i > -3) && (i < 0)) s.unshift(n.substr(0, i + 3));
return s.join(",") + f;
}
function fmtSize(n) {
var us = [" Bytes", " KB", " MB"], u = 0, i;
if (n > 9999) {
while (true) {
i = Math.floor(n);
if ((i > 999) && (u < 2)) {
n = n /1024;
u++;
} else if (i > 99) {
if (u) {
return comma(n.toFixed(0)) + us[u];
} else return comma(n) + us[u];
} else if (i > 9) {
if (u) {
return parseFloat(n.toFixed(1)) + us[u];
} else return n + us[u];
} else if (u) {
return parseFloat(n.toFixed(2)) + us[u];
} else return n + us[u];
}
} else return comma(n) + us[u];
}
(e = document.createElement("DIV")).id = "mhtReaderInfo";
e.innerHTML = `
<style>
html{overflow:hidden}
#mhtReaderInfo,#mhtReaderInfo *{
visibility:visible;opacity:1;position:static;float:none;margin:0;border:none;border-radius:0;padding:0;width:auto;height:auto;overflow:auto;background:none;
text-align:left;text-indent:0;color:#000;font:normal normal normal 12pt/12pt sans-serif;cursor:auto;
}
#mhtReaderInfo{display:flex;position:fixed;z-index:999999999;left:0;top:0;right:0;bottom:0;background:rgb(0,0,0,0.5);cursor:pointer}
#mhtReaderInfo .box{margin:auto;border:.3rem solid #007;border-radius:.3rem;background:#fff}
#mhtReaderInfo .title{position:relative;padding:.1rem 0;background:#77d;color:#fff;font-weight:bold;text-indent:.4rem}
#mhtReaderInfo .close{
position:absolute;top:.1rem;right:.1rem;width:1.4rem;background:#b00;
text-align:center;color:#fff;font-size:9pt;font-weight:bold;cursor:pointer;
}
#mhtReaderInfo .content{margin:.4rem .5rem;max-width:60rem}
#mhtReaderInfo td{padding:.2rem 0}
#mhtReaderInfo td:first-child{vertical-align:top;white-space:nowrap}
#mhtReaderInfo td:last-child{padding-left:1rem}
#mhtReaderInfo a{cursor:pointer;color:#00b}
#mhtReaderInfo a:visited{color:#b0b}
</style>
<div class="box">
<div class="title">Document Information - MHTML Reader<span class="close">X</span></div>
<table class="content"></table>
</div>`;
t = e.querySelector(".content");
addRow("File Name", mht.name);
addRow("File Type", mht.type);
addRow("File Last Modified", new Date(mht.lastModified));
addRow("File Size", fmtSize(mht.size));
Object.keys(mht.content.headers).forEach(k => addRow(k, mht.content.headers[k]));
e.addEventListener("click", ev => {
if ((ev.target.id === "mhtReaderInfo") || (ev.target.className === "close")) e.remove();
})
s.document.documentElement.appendChild(e);
if (s.document.activeElement) s.document.activeElement.blur();
});
sp = setInterval(() => {
if (s.closed) {
URL.revokeObjectURL(m);
clearInterval(sp);
}
}, 1000);
} else alert("Invalid MHTML file.");
};
fr.readAsText(inp.files[0]);
} else alert("Please select a file.");
ev.preventDefault();
}
inp.click();
}
GM_registerMenuCommand("Open MHTML file", unsafeWindow.mhtreader_ujs = mhtReader.bind(unsafeWindow));
})();