HTML2FB2Lib

This is a library for converting HTML to FB2.

Från och med 2023-06-23. Se den senaste versionen.

Detta skript bör inte installeras direkt. Det är ett bibliotek för andra skript att inkludera med meta-direktivet // @require https://update.greasyfork.org/scripts/468831/1209826/HTML2FB2Lib.js

// ==UserScript==
// @name           HTML2FB2Lib
// @namespace      90h.yy.zz
// @version        0.4.1
// @author         Ox90
// @match          http://*
// @match          https://*
// @description    This is a library for converting HTML to FB2.
// @run-at         document-start
// @license        MIT
// ==/UserScript==

class FB2Parser {
  constructor() {
    this._stop = null;
  }

  async parse(htmlNode, fromNode) {
    const that = this;
    async function _parse(node, from, fb2el, depth) {
      let n = from || node.firstChild;
      while (n) {
        const nn = that.startNode(n, depth);
        if (nn) {
          const f = that.processElement(FB2Element.fromHTML(nn, false), depth);
          if (f) {
            if (fb2el) fb2el.children.push(f);
            await _parse(nn, null, f, depth + 1);
          }
          that.endNode(nn, depth);
        }
        if (that._stop) break;
        n = n.nextSibling;
      }
    }
    await _parse(htmlNode, fromNode, null, 0);
    return this._stop;
  }

  startNode(node, depth) {
    return node;
  }

  processElement(fb2el, depth) {
    return fb2el;
  }

  endNode(node, depth) {
  }
}

class FB2Document {
  constructor() {
    this.binaries = [];
    this.bookAuthors = [];
    this.annotation = null;
    this.genres = [];
    this.chapters = [];
    this.xmldoc = null;
  }

  toString() {
    this._ensureXMLDocument();
    const root = this.xmldoc.documentElement;
    this._markBinaries();
    root.appendChild(this._makeDescriptionElement());
    root.appendChild(this._makeBodyElement());
    this._makeBinaryElements().forEach(el => root.appendChild(el));
    const res = (new XMLSerializer()).serializeToString(this.xmldoc);
    this.xmldoc = null;
    return res;
  }

  createElement(name) {
    this._ensureXMLDocument();
    return this.xmldoc.createElementNS(this.xmldoc.documentElement.namespaceURI, name);
  }

  createTextNode(value) {
    this._ensureXMLDocument();
    return this.xmldoc.createTextNode(value);
  }

  createDocumentFragment() {
    this._ensureXMLDocument();
    return this.xmldoc.createDocumentFragment();
  }

  _ensureXMLDocument() {
    if (!this.xmldoc) {
      this.xmldoc = new DOMParser().parseFromString(
        '<?xml version="1.0" encoding="UTF-8"?><FictionBook xmlns="http://www.gribuser.ru/xml/fictionbook/2.0"/>',
        "application/xml"
      );
      this.xmldoc.documentElement.setAttribute("xmlns:l", "http://www.w3.org/1999/xlink");
    }
  }

  _makeDescriptionElement() {
    const desc = this.createElement("description");
    // title-info
    const t_info = this.createElement("title-info");
    desc.appendChild(t_info);
    this.genres.forEach(g => t_info.appendChild(g.xml(this)));
    (this.bookAuthors.length ? this.bookAuthors : [ new FB2Author("Неизвестный автор") ]).forEach(a => {
      t_info.appendChild(a.xml(this));
    });
    t_info.appendChild((new FB2Element("book-title", this.bookTitle)).xml(this));
    if (this.annotation) t_info.appendChild(this.annotation.xml(this));
    if (this.keywords) t_info.appendChild(this.keywords.xml(this));
    if (this.bookDate) {
      const el = this.createElement("date");
      el.setAttribute("value", FB2Utils.dateToAtom(this.bookDate));
      el.textContent = this.bookDate.getFullYear();
      t_info.appendChild(el);
    }
    if (this.coverpage) {
      const el = this.createElement("coverpage");
      (Array.isArray(this.coverpage) ? this.coverpage : [ this.coverpage ]).forEach(img => {
        el.appendChild(img.xml(this));
      });
      t_info.appendChild(el);
    }
    const lang = this.createElement("lang");
    lang.textContent = "ru";
    t_info.appendChild(lang);
    if (this.sequence) {
      const el = this.createElement("sequence");
      el.setAttribute("name", this.sequence.name);
      if (this.sequence.number) el.setAttribute("number", this.sequence.number);
      t_info.appendChild(el);
    }
    // document-info
    const d_info = this.createElement("document-info");
    desc.appendChild(d_info);
    d_info.appendChild((new FB2Author("Ox90")).xml(this));
    if (this.programName) d_info.appendChild((new FB2Element("program-used", this.programName)).xml(this));
    d_info.appendChild((() => {
      const f_time = new Date();
      const el = this.createElement("date");
      el.setAttribute("value", FB2Utils.dateToAtom(f_time));
      el.textContent = f_time.toUTCString();
      return el;
    })());
    if (this.sourceURL) {
      d_info.appendChild((new FB2Element("src-url", this.sourceURL)).xml(this));
    }
    d_info.appendChild((new FB2Element("id", this._genBookId())).xml(this));
    d_info.appendChild((new FB2Element("version", "1.0")).xml(this));
    return desc;
  }

  _makeBodyElement() {
    const body = this.createElement("body");
    if (this.bookTitle || this.bookAuthors.length) {
      const title = this.createElement("title");
      body.appendChild(title);
      if (this.bookAuthors.length) title.appendChild((new FB2Paragraph(this.bookAuthors.join(", "))).xml(this));
      if (this.bookTitle) title.appendChild((new FB2Paragraph(this.bookTitle)).xml(this));
    }
    this.chapters.forEach(ch => body.appendChild(ch.xml(this)));
    return body;
  }

  _markBinaries() {
    let idx = 0;
    this.binaries.forEach(img => {
      if (!img.id) img.id = "image" + (++idx) + img.suffix();
    });
  }

  _makeBinaryElements() {
    return this.binaries.reduce((list, img) => {
      if (img.value) list.push(img.xmlBinary(this));
      return list;
    }, []);
  }

  _genBookId() {
    let str = this.sourceURL || this.bookTitle || "";
    let hash = 0;
    const slen = str.length;
    for (let i = 0; i < slen; ++i) {
      const ch = str.charCodeAt(i);
      hash = ((hash << 5) - hash) + ch;
      hash = hash & hash; // Convert to 32bit integer
    }
    return this.idPrefix || "h2f2l_" + Math.abs(hash).toString() + (hash > 0 ? "1" : "");
  }
}

class FB2Element {
  constructor(name, value) {
    this.name = name;
    this.value = value !== undefined ? value : null;
    this.children = [];
  }

  static fromHTML(node, recursive) {
    let fb2el = null;
    const names = new Map([
      [ "U", "emphasis" ], [ "EM", "emphasis" ], [ "EMPHASIS", "emphasis" ], [ "I", "emphasis" ],
      [ "S", "strikethrough" ], [ "DEL", "strikethrough" ], [ "STRIKE", "strikethrough" ],
      [ "STRONG", "strong" ], [ "BLOCKQUOTE", "cite" ],
      [ "SCRIPT", null ], [ "#comment", null ]
    ]);
    const node_name = node.nodeName;
    if (names.has(node_name)) {
      const name = names.get(node_name);
      if (!name) return null;
      fb2el = new FB2Element(names.get(node_name));
    } else {
      switch (node_name) {
        case "#text":
          return new FB2Text(node.textContent);
        case "SPAN":
          fb2el = new FB2Text();
          break;
        case "P":
        case "LI":
          fb2el = new FB2Paragraph();
          break;
        case "SUBTITLE":
          fb2el = new FB2Subtitle();
          break;
        case "A":
          fb2el = new FB2Link(node.href || node.getAttribute("l:href"));
          break;
        case "OL":
          fb2el = new FB2OrderedList();
          break;
        case "UL":
          fb2el = new FB2UnorderedList();
          break;
        case "BR":
          return new FB2EmptyLine();
        case "HR":
          return new FB2Paragraph("---");
        case "IMG":
          return new FB2Image(node.src);
        default:
          throw new FB2UnknownNodeError("Неизвестный HTML блок: " + node.nodeName);
      }
    }
    if (recursive) fb2el.appendContentFromHTML(node);
    return fb2el;
  }

  hasValue() {
    return ((this.value !== undefined && this.value !== null) || !!this.children.length);
  }

  setContentFromHTML(data, fb2doc, log) {
    this.children = [];
    this.appendContentFromHTML(data, fb2doc, log);
  }

  appendContentFromHTML(data, fb2doc, log) {
    for (const node of data.childNodes) {
      let fe = FB2Element.fromHTML(node, true);
      if (fe) this.children.push(fe);
    }
  }

  normalize() {
    const _normalize = function(list) {
      let done = true;
      let res_list = list.reduce((accum, cur_el) => {
        accum.push(cur_el);
        const tmp_ch = cur_el.children;
        cur_el.children = [];
        tmp_ch.forEach(el => {
          if (el instanceof FB2EmptyLine || el instanceof FB2Subtitle) {
            accum.push(el);
            const nm = cur_el.name;
            cur_el = new cur_el.constructor();
            if (!cur_el.name) cur_el.name = nm;
            accum.push(cur_el);
            done = false;
          } else {
            let cnt = 0;
            el.normalize().forEach(e => {
              if (!e.value && e.children.length === 1 && e.name === e.children[0].name) {
                e = e.children[0];
              }
              if (e !== el) done = false;
              if (e.hasValue()) cur_el.children.push(e);
            });
          }
        });
        return accum;
      }, []);
      return { list: res_list, done: done };
    }
    //--
    let result = _normalize([ this ]);
    while (!result.done) {
      result = _normalize(result.list);
    }
    return result.list;
  }

  xml(doc) {
    const el = doc.createElement(this.name);
    if (this.value !== null) el.textContent = this.value;
    this.children.forEach(ch => el.appendChild(ch.xml(doc)));
    return el;
  }
}

class FB2BlockElement extends FB2Element {
  normalize() {
    // Предварительная нормализация
    this.children = this.children.reduce((list, ch) => {
      ch.normalize().forEach(cc => list.push(cc));
      return list;
    }, []);
    // Удалить пустоты справа
    while (this.children.length) {
      const el = this.children[this.children.length - 1];
      if (el instanceof FB2Text) el.trimRight();
      if (!el.hasValue()) {
        this.children.pop();
        continue;
      }
      break;
    }
    // Удалить пустоты слева
    while (this.children.length) {
      const el = this.children[0];
      if (el instanceof FB2Text) el.trimLeft();
      if (!el.hasValue()) {
        this.children.shift();
        continue;
      }
      break;
    }
    // Окончательная нормализация
    return super.normalize();
  }
}

/**
 * FB2 элемент верхнего уровня section
 */
class FB2Chapter extends FB2Element {
  constructor(title) {
    super("section");
    this.title = title;
  }

  normalize() {
    // Обернуть текстовые ноды в параграфы и удалить пустые элементы
    this.children = this.children.reduce((list, el) => {
      if (el instanceof FB2Text) {
        const pe = new FB2Paragraph();
        pe.children.push(el);
        el = pe;
      }
      el.normalize().forEach(el => {
        if (el.hasValue()) list.push(el);
      });
      return list;
    }, []);
    return [ this ];
  }

  xml(doc) {
    const el = super.xml(doc);
    if (this.title) {
      const t_el = doc.createElement("title");
      const p_el = doc.createElement("p");
      p_el.textContent = this.title;
      t_el.appendChild(p_el);
      el.prepend(t_el);
    }
    return el;
  }
}

/**
 * FB2 элемент верхнего уровня annotation
 */
class FB2Annotation extends FB2Element {
  constructor() {
    super("annotation");
  }

  normalize() {
    // Обернуть неформатированный текст, разделенный <br> в параграфы
    let lp = null;
    const newParagraph = list => {
      lp = new FB2Paragraph();
      list.push(lp);
    };
    this.children = this.children.reduce((list, el) => {
      if (el.name === "empty-line") {
        newParagraph(list);
      } else if (el instanceof FB2BlockElement) {
        list.push(el);
        lp = null;
      } else {
        if (!lp) newParagraph(list);
        lp.children.push(el);
      }
      return list;
    }, []);
    // Запустить собственную нормализацию дочерних элементов
    // чтобы предотвратить их дальнейшее всплытие
    this.children = this.children.reduce((list, el) => {
      el.normalize().forEach(el => {
        if (el.hasValue()) list.push(el);
      });
      return list;
    }, []);
  }
}

class FB2Subtitle extends FB2BlockElement {
  constructor(value) {
    super("subtitle", value);
  }
}

class FB2Paragraph extends FB2BlockElement {
  constructor(value) {
    super("p", value);
  }
}

class FB2EmptyLine extends FB2Element {
  constructor() {
    super("empty-line");
  }

  hasValue() {
    return true;
  }
}

class FB2Text extends FB2Element {
  constructor(value) {
    super("text", value);
  }

  trimLeft() {
    if (typeof(this.value) === "string") this.value = this.value.trimLeft() || null;
    if (!this.value) {
      while (this.children.length) {
        const first_child = this.children[0];
        if (first_child instanceof FB2Text) first_child.trimLeft();
        if (first_child.hasValue()) break;
        this.children.shift();
      }
    }
  }

  trimRight() {
    while (this.children.length) {
      const last_child = this.children[this.children.length - 1];
      if (last_child instanceof FB2Text) last_child.trimRight();
      if (last_child.hasValue()) break;
      this.children.pop();
    }
    if (!this.children.length && typeof(this.value) === "string") {
      this.value = this.value.trimRight() || null;
    }
  }

  xml(doc) {
    if (!this.value && this.children.length) {
      let fr = doc.createDocumentFragment();
      for (const ch of this.children) {
        fr.appendChild(ch.xml(doc));
      }
      return fr;
    }
    return doc.createTextNode(this.value);
  }
}

class FB2Link extends FB2Element {
  constructor(href) {
    super("a");
    this.href = href;
  }

  xml(doc) {
    const el = super.xml(doc);
    el.setAttribute("l:href", this.href);
    return el;
  }
}

class FB2OrderedList extends FB2Element {
  constructor() {
    super("list");
  }

  xml(doc) {
    const fr = doc.createDocumentFragment();
    let pos = 0;
    for (const ch of this.children) {
      const ch_el = ch.xml(doc);
      if (ch.hasValue()) {
        ++pos;
        ch_el.prepend(`${pos}. `);
      }
      fr.appendChild(ch_el);
    }
    return fr;
  }
}

class FB2UnorderedList extends FB2Element {
  constructor() {
    super("list");
  }

  xml(doc) {
    const fr = doc.createDocumentFragment();
    for (const ch of this.children) {
      const ch_el = ch.xml(doc);
      if (ch.hasValue()) ch_el.prepend("- ");
      fr.appendChild(ch_el);
    }
    return fr;
  }
}

class FB2Author extends FB2Element {
  constructor(s) {
    super("author");
    const a = s.split(" ");
    switch (a.length) {
      case 1:
        this.nickName = s;
        break;
      case 2:
        this.firstName = a[0];
        this.lastName = a[1];
        break;
      default:
        this.firstName = a[0];
        this.middleName = a.slice(1, -1).join(" ");
        this.lastName = a[a.length - 1];
        break;
    }
    this.homePage = null;
  }

  hasValue() {
    return (!!this.firstName || !!this.lastName || !!this.middleName);
  }

  toString() {
    if (!this.firstName) return this.nickName;
    return [ this.firstName, this.middleName, this.lastName ].reduce((list, name) => {
      if (name) list.push(name);
      return list;
    }, []).join(" ");
  }

  xml(doc) {
    let a_el = super.xml(doc);
    [
      [ "first-name", this.firstName ], [ "middle-name", this.middleName ],
      [ "last-name", this.lastName ], [ "home-page", this.homePage ],
      [ "nickname", this.nickName ]
    ].forEach(it => {
      if (it[1]) {
        const e = doc.createElement(it[0]);
        e.textContent = it[1];
        a_el.appendChild(e);
      }
    });
    return a_el;
  }
}

class FB2Image extends FB2Element {
  constructor(value) {
    super("image");
    if (typeof(value) === "string") {
      this.url = value;
    } else {
      this.value = value;
    }
  }

  async load(onprogress) {
    if (this.url) {
      const bin = await this._load(this.url, { responseType: "binary", onprogress: onprogress });
      this.type = bin.type;
      this.size = bin.size;
      return new Promise((resolve, reject) => {
        const reader = new FileReader();
        reader.addEventListener("loadend", (event) => resolve(event.target.result));
        reader.readAsDataURL(bin);
      }).then(base64str => {
        this.value = base64str.substr(base64str.indexOf(",") + 1);
      }).catch(err => {
        throw new Error("Ошибка загрузки изображения");
      });
    }
  }

  hasValue() {
    return true;
  }

  xml(doc) {
    if (this.value) {
      const el = doc.createElement(this.name);
      el.setAttribute("l:href", "#" + this.id);
      return el
    }
    const id = this.id || "изображение";
    return doc.createTextNode(`[ ${id} ]`);
  }

  xmlBinary(doc) {
    const el = doc.createElement("binary");
    el.setAttribute("id", this.id);
    el.setAttribute("content-type", this.type);
    el.textContent = this.value
    return el;
  }

  suffix() {
    switch (this.type) {
      case "image/png":
        return ".png";
      case "image/jpeg":
        return ".jpg";
      case "image/gif":
        return ".gif";
      case "image/webp":
        return ".webp";
    }
    return "";
  }

  async _load(...args) {
    return FB2Loader.addJob(...args);
  }
}

class FB2Loader {
  static async addJob(url, params) {
    params ||= {};
    const fp = {};
    fp.method = params.method || "GET";
    fp.credentials = "same-origin";
    fp.signal = this._getSignal();
    const resp = await fetch(url, fp);
    if (!resp.ok) throw new Error(`Сервер вернул ошибку (${resp.status})`);
    const reader = resp.body.getReader();
    const type = resp.headers.get("Content-Type");
    const total = +resp.headers.get("Content-Length");
    let loaded = 0;
    const chunks = [];
    const onprogress = (total && typeof(params.onprogress) === "function") ? params.onprogress : null;
    while (true) {
      const { done, value } = await reader.read();
      if (done) break;
      chunks.push(value);
      loaded += value.length;
      if (onprogress) onprogress(loaded, total);
    }
    switch (params.responseType) {
      case "binary":
        return new Blob(chunks, { type: type });
      default:
        {
          let pos = 0;
          const data = new Uint8Array(loaded);
          for (let ch of chunks) {
            data.set(ch, pos);
            pos += ch.length;
          }
          return (new TextDecoder("utf-8")).decode(data);
        }
    }
  }

  static abortAll() {
    if (this._controller) {
      this._controller.abort();
      this._controller = null;
    }
  }

  static _getSignal() {
    let controller = this._controller;
    if (!controller) this._controller = controller = new AbortController();
    return controller.signal;
  }
}

class FB2Utils {
  static dateToAtom(date) {
    const m = date.getMonth() + 1;
    const d = date.getDate();
    return "" + date.getFullYear() + '-' + (m < 10 ? "0" : "") + m + "-" + (d < 10 ? "0" : "") + d;
  }
}

class FB2UnknownNodeError extends Error {
  constructor(message) {
    super(message);
    this.name = "UnknownNodeError";
  }
}