HTML2FB2Lib

This is a library for converting HTML to FB2.

Version vom 20.06.2023. Aktuellste Version

Dieses Skript sollte nicht direkt installiert werden. Es handelt sich hier um eine Bibliothek für andere Skripte, welche über folgenden Befehl in den Metadaten eines Skriptes eingebunden wird // @require https://update.greasyfork.org/scripts/468831/1208304/HTML2FB2Lib.js

// ==UserScript==
// @name           HTML2FB2Lib
// @namespace      90h.yy.zz
// @version        0.3.0
// @author         Ox90
// @match          http://*
// @match          https://*
// @description    This is a library for converting HTML to FB2.
// @run-at         document-start
// @license        MIT
// ==/UserScript==

class FB2Parser {
  constructor() {
    this._stop = null;
  }

  async parse(htmlNode, fromNode) {
    const that = this;
    async function _parse(node, from, fb2el, depth) {
      let n = from || node.firstChild;
      while (n) {
        const nn = that.startNode(n, depth);
        if (nn) {
          const f = that.processElement(FB2Element.fromHTML(nn, false), depth);
          if (f) {
            if (fb2el) fb2el.children.push(f);
            await _parse(nn, null, f, depth + 1);
          }
          that.endNode(nn, depth);
        }
        if (that._stop) break;
        n = n.nextSibling;
      }
    }
    await _parse(htmlNode, fromNode, null, 0);
    return this._stop;
  }

  startNode(node, depth) {
    return node;
  }

  processElement(fb2el, depth) {
    return fb2el;
  }

  endNode(node, depth) {
  }
}

class FB2Document {
  constructor() {
    this.binaries = [];
    this.bookAuthors = [];
    this.annotation = null;
    this.genres = [];
    this.chapters = [];
    this.xmldoc = null;
  }

  toString() {
    this._ensureXMLDocument();
    const root = this.xmldoc.documentElement;
    this._markBinaries();
    root.appendChild(this._makeDescriptionElement());
    root.appendChild(this._makeBodyElement());
    this._makeBinaryElements().forEach(el => root.appendChild(el));
    const res = (new XMLSerializer()).serializeToString(this.xmldoc);
    this.xmldoc = null;
    return res;
  }

  createElement(name) {
    this._ensureXMLDocument();
    return this.xmldoc.createElementNS(this.xmldoc.documentElement.namespaceURI, name);
  }

  createTextNode(value) {
    this._ensureXMLDocument();
    return this.xmldoc.createTextNode(value);
  }

  createDocumentFragment() {
    this._ensureXMLDocument();
    return this.xmldoc.createDocumentFragment();
  }

  _ensureXMLDocument() {
    if (!this.xmldoc) {
      this.xmldoc = new DOMParser().parseFromString(
        '<?xml version="1.0" encoding="UTF-8"?><FictionBook xmlns="http://www.gribuser.ru/xml/fictionbook/2.0"/>',
        "application/xml"
      );
      this.xmldoc.documentElement.setAttribute("xmlns:l", "http://www.w3.org/1999/xlink");
    }
  }

  _makeDescriptionElement() {
    const desc = this.createElement("description");
    // title-info
    const t_info = this.createElement("title-info");
    desc.appendChild(t_info);
    this.genres.forEach(g => t_info.appendChild(g.xml(this)));
    (this.bookAuthors.length ? this.bookAuthors : [ new FB2Author("Неизвестный автор") ]).forEach(a => {
      t_info.appendChild(a.xml(this));
    });
    t_info.appendChild((new FB2Element("book-title", this.bookTitle)).xml(this));
    t_info.appendChild(this.annotation.xml(this));
    if (this.keywords) t_info.appendChild(this.keywords.xml(this));
    if (this.bookDate) {
      const el = this.createElement("date");
      el.setAttribute("value", FB2Utils.dateToAtom(this.bookDate));
      el.textContent = this.bookDate.getFullYear();
      t_info.appendChild(el);
    }
    if (this.coverpage) {
      const el = this.createElement("coverpage");
      el.appendChild(this.coverpage.xml(this));
      t_info.appendChild(el);
    }
    const lang = this.createElement("lang");
    lang.textContent = "ru";
    t_info.appendChild(lang);
    if (this.sequence) {
      const el = this.createElement("sequence");
      el.setAttribute("name", this.sequence.name);
      if (this.sequence.number) el.setAttribute("number", this.sequence.number);
      t_info.appendChild(el);
    }
    // document-info
    const d_info = this.createElement("document-info");
    desc.appendChild(d_info);
    d_info.appendChild((new FB2Author("Ox90")).xml(this));
    if (this.programName) d_info.appendChild((new FB2Element("program-used", this.programName)).xml(this));
    d_info.appendChild((() => {
      const f_time = new Date();
      const el = this.createElement("date");
      el.setAttribute("value", FB2Utils.dateToAtom(f_time));
      el.textContent = f_time.toUTCString();
      return el;
    })());
    if (this.sourceURL) {
      d_info.appendChild((new FB2Element("src-url", this.sourceURL)).xml(this));
    }
    d_info.appendChild((new FB2Element("id", this._genBookId())).xml(this));
    d_info.appendChild((new FB2Element("version", "1.0")).xml(this));
    return desc;
  }

  _makeBodyElement() {
    const body = this.createElement("body");
    const title = this.createElement("title");
    body.appendChild(title);
    if (this.bookAuthors.length) title.appendChild((new FB2Paragraph(this.bookAuthors.join(", "))).xml(this));
    title.appendChild((new FB2Paragraph(this.bookTitle)).xml(this));
    this.chapters.forEach(ch => body.appendChild(ch.xml(this)));
    return body;
  }

  _markBinaries() {
    let idx = 0;
    this.binaries.forEach(img => {
      if (!img.id) img.id = "image" + (++idx) + img.suffix();
    });
  }

  _makeBinaryElements() {
    return this.binaries.reduce((list, img) => {
      if (img.value) list.push(img.xmlBinary(this));
      return list;
    }, []);
  }

  _genBookId() {
    let str = this.sourceURL || this.bookTitle;
    let hash = 0;
    const slen = str.length;
    for (let i = 0; i < slen; ++i) {
      const ch = str.charCodeAt(i);
      hash = ((hash << 5) - hash) + ch;
      hash = hash & hash; // Convert to 32bit integer
    }
    return "rbe_" + Math.abs(hash).toString() + (hash > 0 ? "1" : "");
  }
}

class FB2Element {
  constructor(name, value) {
    this.name = name;
    this.value = value !== undefined ? value : null;
    this.children = [];
  }

  static fromHTML(node, recursive) {
    let fb2el = null;
    const names = new Map([
      [ "U", "emphasis" ], [ "EM", "emphasis" ], [ "EMPHASIS", "emphasis" ], [ "I", "emphasis" ],
      [ "S", "strike" ], [ "DEL", "strike" ], [ "STRIKE", "strike" ],
      [ "STRONG", "strong" ], [ "BLOCKQUOTE", "cite" ],
      [ "#comment", null ]
    ]);
    const node_name = node.nodeName;
    if (names.has(node_name)) {
      const name = names.get(node_name);
      if (!name) return null;
      fb2el = new FB2Element(names.get(node_name));
    } else {
      switch (node_name) {
        case "#text":
          return new FB2Text(node.textContent);
        case "SPAN":
          fb2el = new FB2Text();
          break;
        case "P":
        case "LI":
          fb2el = new FB2Paragraph();
          break;
        case "SUBTITLE":
          fb2el = new FB2Subtitle();
          break;
        case "A":
          fb2el = new FB2Link(node.href || node.getAttribute("l:href"));
          break;
        case "OL":
          fb2el = new FB2OrderedList();
          break;
        case "BR":
          return new FB2EmptyLine();
        case "HR":
          return new FB2Paragraph("---");
        case "IMG":
          return new FB2Image(node.src);
        default:
          throw new FB2UnknownNodeError("Неизвестный HTML блок: " + node.nodeName);
      }
    }
    if (recursive) fb2el.appendContentFromHTML(node);
    return fb2el;
  }

  hasValue() {
    return ((this.value !== undefined && this.value !== null) || !!this.children.length);
  }

  setContentFromHTML(data, fb2doc, log) {
    this.children = [];
    this.appendContentFromHTML(data, fb2doc, log);
  }

  appendContentFromHTML(data, fb2doc, log) {
    for (const node of data.childNodes) {
      let fe = FB2Element.fromHTML(node, true);
      if (fe) this.children.push(fe);
    }
  }

  normalize() {
    const res_list = [ this ];
    let cur_el = this;
    const children = this.children;
    this.children = [];
    children.forEach(el => {
      if (el instanceof FB2EmptyLine || el instanceof FB2Subtitle) {
        res_list.push(el);
        cur_el = new this.constructor();
        res_list.push(cur_el);
      } else {
        el.normalize().forEach(el => {
          if (!el.value && el.children.length === 1 && el.name === el.children[0].name) {
            el = el.children[0];
          }
          if (el.hasValue()) cur_el.children.push(el);
        });
      }
    });
    return res_list;
  }

  xml(doc) {
    const el = doc.createElement(this.name);
    if (this.value !== null) el.textContent = this.value;
    this.children.forEach(ch => el.appendChild(ch.xml(doc)));
    return el;
  }
}

class FB2BlockElement extends FB2Element {
  normalize() {
    // Предварительная нормализация
    this.children = this.children.reduce((list, ch) => {
      ch.normalize().forEach(cc => list.push(cc));
      return list;
    }, []);
    // Удалить пустоты в конце блока
    while (this.children.length) {
      const el = this.children[this.children.length - 1];
      if (el.name === "text" && typeof(el.value) === "string") {
        el.value = el.value.trimEnd() || null;
      }
      if (!el.hasValue()) {
        this.children.pop();
        continue;
      }
      break;
    }
    // Удалить пустоты в начале блока
    while (this.children.length) {
      const el = this.children[0];
      if (el.name === "text" && typeof(el.value) === "string") {
        el.value = el.value.trimStart() || null;
      }
      if (!el.hasValue()) {
        this.children.shift();
        continue;
      }
      break;
    }
    // Окончательная нормализация
    return super.normalize();
  }
}

/**
 * FB2 элемент верхнего уровня section
 */
class FB2Chapter extends FB2Element {
  constructor(title) {
    super("section");
    this.title = title;
  }

  normalize() {
    // Обернуть текстовые ноды в параграфы и удалить пустые элементы
    this.children = this.children.reduce((list, el) => {
      if (el instanceof FB2Text) {
        const pe = new FB2Paragraph();
        pe.children.push(el);
        el = pe;
      }
      el.normalize().forEach(el => {
        if (el.hasValue()) list.push(el);
      });
      return list;
    }, []);
    return [ this ];
  }

  xml(doc) {
    const el = super.xml(doc);
    if (this.title) {
      const t_el = doc.createElement("title");
      const p_el = doc.createElement("p");
      p_el.textContent = this.title;
      t_el.appendChild(p_el);
      el.prepend(t_el);
    }
    return el;
  }
}

/**
 * FB2 элемент верхнего уровня annotation
 */
class FB2Annotation extends FB2Element {
  constructor() {
    super("annotation");
  }

  normalize() {
    // Обернуть неформатированный текст, разделенный <br> в параграфы
    let lp = null;
    const newParagraph = list => {
      lp = new FB2Paragraph();
      list.push(lp);
    };
    this.children = this.children.reduce((list, el) => {
      if (el.name === "empty-line") {
        newParagraph(list);
      } else if (el instanceof FB2BlockElement) {
        list.push(el);
        lp = null;
      } else {
        if (!lp) newParagraph(list);
        lp.children.push(el);
      }
      return list;
    }, []);
    // Запустить собственную нормализацию дочерних элементов
    // чтобы предотвратить их дальнейшее всплытие
    this.children = this.children.reduce((list, el) => {
      el.normalize().forEach(el => {
        if (el.hasValue()) list.push(el);
      });
      return list;
    }, []);
  }
}

class FB2Subtitle extends FB2BlockElement {
  constructor(value) {
    super("subtitle", value);
  }
}

class FB2Paragraph extends FB2BlockElement {
  constructor(value) {
    super("p", value);
  }
}

class FB2EmptyLine extends FB2Element {
  constructor() {
    super("empty-line");
  }

  hasValue() {
    return true;
  }
}

class FB2Text extends FB2Element {
  constructor(value) {
    super("text", value);
  }

  xml(doc) {
    if (!this.value && this.children.length) {
      let fr = doc.createDocumentFragment();
      for (const ch of this.children) {
        fr.appendChild(ch.xml(doc));
      }
      return fr;
    }
    return doc.createTextNode(this.value);
  }
}

class FB2Link extends FB2Element {
  constructor(href) {
    super("a");
    this.href = href;
  }

  xml(doc) {
    const el = super.xml(doc);
    el.setAttribute("l:href", this.href);
    return el;
  }
}

class FB2OrderedList extends FB2Element {
  constructor() {
    super("list");
  }

  xml(doc) {
    const fr = doc.createDocumentFragment();
    let pos = 0;
    for (const ch of this.children) {
      const ch_el = ch.xml(doc);
      if (ch.hasValue()) {
        ++pos;
        ch_el.prepend(`${pos}. `);
      }
      fr.appendChild(ch_el);
    }
    return fr;
  }
}

class FB2Author extends FB2Element {
  constructor(s) {
    super("author");
    const a = s.split(" ");
    switch (a.length) {
      case 1:
        this.nickName = s;
        break;
      case 2:
        this.firstName = a[0];
        this.lastName = a[1];
        break;
      default:
        this.firstName = a[0];
        this.middleName = a.slice(1, -1).join(" ");
        this.lastName = a[a.length - 1];
        break;
    }
    this.homePage = null;
  }

  hasValue() {
    return (!!this.firstName || !!this.lastName || !!this.middleName);
  }

  toString() {
    if (!this.firstName) return this.nickName;
    return [ this.firstName, this.middleName, this.lastName ].reduce((list, name) => {
      if (name) list.push(name);
      return list;
    }, []).join(" ");
  }

  xml(doc) {
    let a_el = super.xml(doc);
    [
      [ "first-name", this.firstName ], [ "middle-name", this.middleName ],
      [ "last-name", this.lastName ], [ "home-page", this.homePage ],
      [ "nickname", this.nickName ]
    ].forEach(it => {
      if (it[1]) {
        const e = doc.createElement(it[0]);
        e.textContent = it[1];
        a_el.appendChild(e);
      }
    });
    return a_el;
  }
}

class FB2Image extends FB2Element {
  constructor(value) {
    super("image");
    if (typeof(value) === "string") {
      this.url = value;
    } else {
      this.value = value;
    }
  }

  async load(onprogress) {
    if (this.url) {
      const bin = await this._load(this.url, { responseType: "binary", onprogress: onprogress });
      this.type = bin.type;
      this.size = bin.size;
      return new Promise((resolve, reject) => {
        const reader = new FileReader();
        reader.addEventListener("loadend", (event) => resolve(event.target.result));
        reader.readAsDataURL(bin);
      }).then(base64str => {
        this.value = base64str.substr(base64str.indexOf(",") + 1);
      }).catch(err => {
        throw new Error("Ошибка загрузки изображения");
      });
    }
  }

  hasValue() {
    return true;
  }

  xml(doc) {
    if (this.value) {
      const el = doc.createElement(this.name);
      el.setAttribute("l:href", "#" + this.id);
      return el
    }
    const id = this.id || "изображение";
    return doc.createTextNode(`[ ${id} ]`);
  }

  xmlBinary(doc) {
    const el = doc.createElement("binary");
    el.setAttribute("id", this.id);
    el.setAttribute("content-type", this.type);
    el.textContent = this.value
    return el;
  }

  suffix() {
    switch (this.type) {
      case "image/png":
        return ".png";
      case "image/jpeg":
        return ".jpg";
      case "image/webp":
        return ".webp";
    }
    return "";
  }

  async _load(...args) {
    return FB2Loader.addJob(...args);
  }
}

class FB2Loader {
  static async addJob(url, params) {
    params ||= {};
    const fp = {};
    fp.method = params.method || "GET";
    fp.credentials = "same-origin";
    fp.signal = this._getSignal();
    const resp = await fetch(url, fp);
    if (!resp.ok) throw new Error(`Сервер вернул ошибку (${resp.status})`);
    const reader = resp.body.getReader();
    const type = resp.headers.get("Content-Type");
    const total = +resp.headers.get("Content-Length");
    let loaded = 0;
    const chunks = [];
    const onprogress = (total && typeof(params.onprogress) === "function") ? params.onprogress : null;
    while (true) {
      const { done, value } = await reader.read();
      if (done) break;
      chunks.push(value);
      loaded += value.length;
      if (onprogress) onprogress(loaded, total);
    }
    switch (params.responseType) {
      case "binary":
        return new Blob(chunks, { type: type });
      default:
        {
          let pos = 0;
          const data = new Uint8Array(loaded);
          for (let ch of chunks) {
            data.set(ch, pos);
            pos += ch.length;
          }
          return (new TextDecoder("utf-8")).decode(data);
        }
    }
  }

  static abortAll() {
    if (this._controller) {
      this._controller.abort();
      this._controller = null;
    }
  }

  static _getSignal() {
    let controller = this._controller;
    if (!controller) this._controller = controller = new AbortController();
    return controller.signal;
  }
}

class FB2Utils {
  static dateToAtom(date) {
    const m = date.getMonth() + 1;
    const d = date.getDate();
    return "" + date.getFullYear() + '-' + (m < 10 ? "0" : "") + m + "-" + (d < 10 ? "0" : "") + d;
  }
}

class FB2UnknownNodeError extends Error {
  constructor(message) {
    super(message);
    this.name = "UnknownNodeError";
  }
}