MHTML Reader

Script to read MHTML files. Use the "Open MHTML file" menu of Tampermonkey/Violentmonkey/Greasemonkey popup menu. The menu can also be made as a bookmarklet using this URL: javascript:mhtreader_ujs()

La data de 03-11-2019. Vezi ultima versiune.

You will need to install an extension such as Tampermonkey, Greasemonkey or Violentmonkey to install this script.

You will need to install an extension such as Tampermonkey or Violentmonkey to install this script.

You will need to install an extension such as Tampermonkey or Violentmonkey to install this script.

You will need to install an extension such as Tampermonkey or Userscripts to install this script.

You will need to install an extension such as Tampermonkey to install this script.

You will need to install a user script manager extension to install this script.

(I already have a user script manager, let me install it!)

You will need to install an extension such as Stylus to install this style.

You will need to install an extension such as Stylus to install this style.

You will need to install an extension such as Stylus to install this style.

You will need to install a user style manager extension to install this style.

You will need to install a user style manager extension to install this style.

You will need to install a user style manager extension to install this style.

(I already have a user style manager, let me install it!)

// ==UserScript==
// @name         MHTML Reader
// @namespace    https://greasyfork.org/en/users/85671-jcunews
// @version      1.0.1
// @license      AGPLv3
// @author       jcunews
// @description  Script to read MHTML files. Use the "Open MHTML file" menu of Tampermonkey/Violentmonkey/Greasemonkey popup menu. The menu can also be made as a bookmarklet using this URL: javascript:mhtreader_ujs()
// @match        *://*/*
// @grant        GM_registerMenuCommand
// @grant        unsafeWindow
// ==/UserScript==

((mht) => {
  function mhtReader() {
    var inp = document.createElement("INPUT");
    inp.type = "file";
    inp.onchange = function(ev) {
      var ps, ls = {}, pt, qp;
      function decodeQuotedPrintable(s) {
        return s.replace(/(?:=[0-9A-Fa-f]{2})+/g, s => decodeURIComponent(s.replace(/=/g, "%")));
      }
      function addPart() {
        if (!pt) return;
        if (qp) pt.body = decodeQuotedPrintable(pt.body);
        ls[pt.headers["Content-Location"]] = ps.length;
        ps.push(pt);
        pt = null;
      }
      function absUrl(u, m) {
        if (!(/^[a-z]+:/).test(u)) { //relative
          if ((/^\/\//).test(u)) { //domain
            u = mht.url.match(/^[a-z]+:/)[0] + u;
          } else if (u[0] === "/") { //root path
            u = mht.url.match(/^[a-z]+:\/\/[^\/]+/)[0] + u;
          } else { //relative path
            if (mht.url[mht.url.length - 1] === "/") {
              u = mht.url + u;
            } else u = mht.url + "/" + u;
          }
        } //else: absolute
        if ((/^[a-z]+:\/\/[^\/]+$/).test(u)) u += "/";
        return u;
      }
      function fixCss(t) {
        return t.replace(/\b(url\(["']?)((?!data:)[^"')]+)/g, (s, a, b, u) => {
          if (ls[u = absUrl(b)] !== undefined) {
            return "data:;base64," + ps[ls[u]].body;
          } else return s;
        });
      }
      function fixHtml(p, i, h, a) {
        if ((h = p.body.match(/^(?:(?:.*|\s*)<(?:!doctype|\?xml)\b[^>]+>)*/i)) && h[0]) {
          p.body = p.body.substr((h = h[0]).length);
        } else h = "";
        (m = document.createElement("DIV")).innerHTML = p.body.replace(/<(\/?html|\/?head|\/?body)\b([^>]*>)/gi, (s, a, b) => "<" + a + "Z" + b);
        if (a = m.querySelector("title")) {
          p.title = a.textContent;
        } else p.title = "";
        if (mht.mainPart < 0) {
          if (mht.content.headers.Subject && (p.title === mht.content.headers.Subject)) {
            mht.url = ps[mht.mainPart = i].headers["Content-Location"] = absUrl(p.headers["Content-Location"]);
          }
        }
        [ ["link[href]", "href"], ["[src]", "src"] ].forEach(
          a => m.querySelectorAll(a[0]).forEach((e, u, b, c) => {
            if ((b = ls[u = absUrl(e.getAttribute(a[1]))]) !== undefined) {
              c = (b = ps[b]).headers["Content-Type"].match(/^[^;]+/)[0];
              if (b.headers["Content-Transfer-Encoding"] === "base64") {
                e.setAttribute(a[1], "data:" + c + ";base64," + b.body);
              } else {
                e.setAttribute(a[1], "data:" + c + ";base64," + btoa(encodeURIComponent(b.body).replace(
                  /%([0-9A-F]{2})/g, (m, c) => String.fromCharCode('0x' + c)
                )));
              }
            }
          })
        );
        m.querySelectorAll("style").forEach(e => e.innerHTML = fixCss(e.innerHTML));
        m.querySelectorAll("[style]").forEach(e => e.style.cssText = fixCss(e.style.cssText));
        a = m.innerHTML;
        while (n = (/<\/(?:doctypez|xmlz)>$/i).exec(a)) {
          a = a.substr(0, n.index);
        }
        return h + a.replace(/<(\/?html|\/?head|\/?body)z\b([^>]*>)/gi, (s, a, b) => "<" + a + b);
      }
      if (inp.files.length) {
        mht = inp.files[0];
        mht = {
          name: mht.name, type: mht.type, size: mht.size, lastModified: mht.lastModified, text: "", mainPart: -1, url: "", content: {}
        };
        fr = new FileReader();
        fr.onload = function() {
          var dat = mht.content, txt = mht.text = fr.result, rl = /(.*?)\r?\n/g, rh = /(^[^\t ][^:]+):\s*(.*)/,
            rt = /^multipart\/related(?:;\s*?boundary="([^"]+)"|;\s*?[^;]+)+/, rc = /^text\/html(;\s*charset="utf-8")?/i,
            m, s, sp = " ", hs, hn, bd, ds, dn, b64;
          while (m = rl.exec(txt)) {
            s = m[1].replace(/\s+$/, "");
            if (s) { //non-blank line
              if (bd) {
                if (s === (bd + "--")) { //boundary terminator
                  addPart();
                  break;
                } else if (s === bd) { //boundary
                  addPart();
                  ds = (pt = {}).headers = {};
                  continue;
                }
              }
              if (pt) { //part
                if (pt.body !== undefined) { //body
                  if (qp) {
                    if (s[s.length - 1] === "=") {
                      pt.body += s.substr(0, s.length - 1);
                    } else pt.body += s + "\n";
                  } else pt.body += s + "\n";
                } else { //headers
                  if (m = s.match(rh)) {
                    if (s = m[2].match(/^"(.*)"$/)) m[2] = s[1];
                    ds[dn = m[1]] = m[2];
                  } else ds[dn] += " " + s.replace(/^\s+/, "");
                }
              } else if (dat.body !== undefined) { //body
                dat.body += s + "\n"; //body
              } else { //headers
                if (!dat.headers) {
                  if (s.substr(0, 6) !== "From: ") break;
                  hs = dat.headers = {};
                }
                if (m = s.match(rh)) {
                  if (s = m[2].match(/^"(.*)"$/)) m[2] = s[1];
                  hs[hn = m[1]] = m[2];
                } else hs[hn] += " " + s.replace(/^\s+/, "");
              }
            } else { //blank line
              if (pt) { //at/after part
                if (pt.body !== undefined) {
                  pt.body += "\n";
                } else {
                  if (ds["Content-Transfer-Encoding"]) {
                    qp = ds["Content-Transfer-Encoding"] === "quoted-printable";
                    b64 = ds["Content-Transfer-Encoding"] === "base64";
                  } else {
                    qp = false;
                    b64 = false;
                  }
                  pt.body = "";
                }
              } else if (dat.body !== undefined) { //at/after body
                dat.body += "\n";
              } else { //after headers
                if (hs["Content-Type"] && (m = hs["Content-Type"].match(rt))) {
                  bd = "--" + m[1];
                  ps = dat.parts = [];
                }
                dat.body = "";
              }
            }
            sp = s;
          }
          if (pt && pt.body && b64) pt.body = pt.body.replace(/\r?\n/g, "");
          if (mht.content.headers["Content-Transfer-Encoding"] === "quoted-printable") {
            mht.content.body = decodeQuotedPrintable(mht.content.body.replace(/=\r?\n/g, ""));
          }
          if (dat.body) {
            addPart();
            if (mht.content.parts) {
              mht.content.parts.forEach((p, i) => {
                switch (
                  p.headers["Content-Type"].match(/^[^;]+/)[0].toLowerCase()
                ) {
                  case "text/html":
                    p.body = fixHtml(p, i);
                    break;
                  case "text/css":
                    p.body = fixCss(p.body);
                }
              });
              if (mht.mainPart < 0) {
                mht.mainPart = mht.content.parts.findIndex((p, i) => (/^text\/html(;|$)/).test(p.headers["Content-Type"]));
              }
              if (mht.mainPart >= 0) {
                s = new Blob([mht.content.parts[mht.mainPart].body], {type: "text/html"});
              } else {
                s = new Blob([mht.content.body], {type: mht.content.headers["Content-Type"].match(/^[^;]+/)[0]});
              }
            } else {
              s = new Blob([mht.content.body], {type: mht.content.headers["Content-Type"].match(/^[^;]+/)[0]});
            }
            (s = open(m = URL.createObjectURL(s))).addEventListener("load", (e, t) => {
              function addRow(n, v, r, a) {
                (r = t.insertRow()).insertCell().textContent = n;
                if (n === "Content-Location") {
                  (a = r.insertCell().appendChild(document.createElement("A"))).textContent = v;
                  a.href = v;
                } else r.insertCell().textContent = v;
              }
              function comma(n, i, f, s) {
                n = n.toString();
                s = [];
                if ((i = n.lastIndexOf(".")) > 0) {
                  f = n.substr(i);
                  n = n.substr(0, i);
                } else f = "";
                i = n.length - 3;
                while (i >= 0) {
                  s.unshift(n.substr(i, 3));
                  i -= 3;
                }
                if ((i > -3) && (i < 0)) s.unshift(n.substr(0, i + 3));
                return s.join(",") + f;
              }
              function fmtSize(n) {
                var us = [" Bytes", " KB", " MB"], u = 0, i;
                if (n > 9999) {
                  while (true) {
                    i = Math.floor(n);
                    if ((i > 999) && (u < 2)) {
                      n = n /1024;
                      u++;
                    } else if (i > 99) {
                      if (u) {
                        return comma(n.toFixed(0)) + us[u];
                      } else return comma(n) + us[u];
                    } else if (i > 9) {
                      if (u) {
                        return parseFloat(n.toFixed(1)) + us[u];
                      } else return n + us[u];
                    } else if (u) {
                      return parseFloat(n.toFixed(2)) + us[u];
                    } else return n + us[u];
                  }
                } else return comma(n) + us[u];
              }
              (e = document.createElement("DIV")).id = "mhtReaderInfo";
              e.innerHTML = `
<style>
html{overflow:hidden}
#mhtReaderInfo,#mhtReaderInfo *{
  visibility:visible;opacity:1;position:static;float:none;margin:0;border:none;border-radius:0;padding:0;width:auto;height:auto;overflow:auto;background:none;
  text-align:left;text-indent:0;color:#000;font:normal normal normal 12pt/12pt sans-serif;cursor:auto;
}
#mhtReaderInfo{display:flex;position:fixed;z-index:999999999;left:0;top:0;right:0;bottom:0;background:rgb(0,0,0,0.5);cursor:pointer}
#mhtReaderInfo .box{margin:auto;border:.3rem solid #007;border-radius:.3rem;background:#fff}
#mhtReaderInfo .title{position:relative;padding:.1rem 0;background:#77d;color:#fff;font-weight:bold;text-indent:.4rem}
#mhtReaderInfo .close{
  position:absolute;top:.1rem;right:.1rem;width:1.4rem;background:#b00;
  text-align:center;color:#fff;font-size:9pt;font-weight:bold;cursor:pointer;
}
#mhtReaderInfo .content{margin:.4rem .5rem;max-width:60rem}
#mhtReaderInfo td{padding:.2rem 0}
#mhtReaderInfo td:first-child{vertical-align:top;white-space:nowrap}
#mhtReaderInfo td:last-child{padding-left:1rem}
#mhtReaderInfo a{cursor:pointer;color:#00b}
#mhtReaderInfo a:visited{color:#b0b}
</style>
<div class="box">
  <div class="title">Document Information - MHTML Reader<span class="close">X</span></div>
  <table class="content"></table>
</div>`;
              t = e.querySelector(".content");
              addRow("File Name", mht.name);
              addRow("File Type", mht.type);
              addRow("File Last Modified", new Date(mht.lastModified));
              addRow("File Size", fmtSize(mht.size));
              Object.keys(mht.content.headers).forEach(k => addRow(k, mht.content.headers[k]));
              e.addEventListener("click", ev => {
                if ((ev.target.id === "mhtReaderInfo") || (ev.target.className === "close")) e.remove();
              })
              s.document.documentElement.appendChild(e);
              if (s.document.activeElement) s.document.activeElement.blur();
            });
            sp = setInterval(() => {
              if (s.closed) {
                URL.revokeObjectURL(m);
                clearInterval(sp);
              }
            }, 1000);
          } else alert("Invalid MHTML file.");
        };
        fr.readAsText(inp.files[0]);
      } else alert("Please select a file.");
      ev.preventDefault();
    }
    inp.click();
  }
  GM_registerMenuCommand("Open MHTML file", unsafeWindow.mhtreader_ujs = mhtReader.bind(unsafeWindow));
})();