HTML2FB2Lib

This is a library for converting HTML to FB2.

Verzia zo dňa 23.06.2023. Pozri najnovšiu verziu.

Tento skript by nemal byť nainštalovaný priamo. Je to knižnica pre ďalšie skripty, ktorú by mali používať cez meta príkaz // @require https://update.greasyfork.org/scripts/468831/1209826/HTML2FB2Lib.js

// ==UserScript==
// @name           HTML2FB2Lib
// @namespace      90h.yy.zz
// @version        0.4.1
// @author         Ox90
// @match          http://*
// @match          https://*
// @description    This is a library for converting HTML to FB2.
// @run-at         document-start
// @license        MIT
// ==/UserScript==

class FB2Parser {
  constructor() {
    this._stop = null;
  }

  async parse(htmlNode, fromNode) {
    const that = this;
    async function _parse(node, from, fb2el, depth) {
      let n = from || node.firstChild;
      while (n) {
        const nn = that.startNode(n, depth);
        if (nn) {
          const f = that.processElement(FB2Element.fromHTML(nn, false), depth);
          if (f) {
            if (fb2el) fb2el.children.push(f);
            await _parse(nn, null, f, depth + 1);
          }
          that.endNode(nn, depth);
        }
        if (that._stop) break;
        n = n.nextSibling;
      }
    }
    await _parse(htmlNode, fromNode, null, 0);
    return this._stop;
  }

  startNode(node, depth) {
    return node;
  }

  processElement(fb2el, depth) {
    return fb2el;
  }

  endNode(node, depth) {
  }
}

class FB2Document {
  constructor() {
    this.binaries = [];
    this.bookAuthors = [];
    this.annotation = null;
    this.genres = [];
    this.chapters = [];
    this.xmldoc = null;
  }

  toString() {
    this._ensureXMLDocument();
    const root = this.xmldoc.documentElement;
    this._markBinaries();
    root.appendChild(this._makeDescriptionElement());
    root.appendChild(this._makeBodyElement());
    this._makeBinaryElements().forEach(el => root.appendChild(el));
    const res = (new XMLSerializer()).serializeToString(this.xmldoc);
    this.xmldoc = null;
    return res;
  }

  createElement(name) {
    this._ensureXMLDocument();
    return this.xmldoc.createElementNS(this.xmldoc.documentElement.namespaceURI, name);
  }

  createTextNode(value) {
    this._ensureXMLDocument();
    return this.xmldoc.createTextNode(value);
  }

  createDocumentFragment() {
    this._ensureXMLDocument();
    return this.xmldoc.createDocumentFragment();
  }

  _ensureXMLDocument() {
    if (!this.xmldoc) {
      this.xmldoc = new DOMParser().parseFromString(
        '<?xml version="1.0" encoding="UTF-8"?><FictionBook xmlns="http://www.gribuser.ru/xml/fictionbook/2.0"/>',
        "application/xml"
      );
      this.xmldoc.documentElement.setAttribute("xmlns:l", "http://www.w3.org/1999/xlink");
    }
  }

  _makeDescriptionElement() {
    const desc = this.createElement("description");
    // title-info
    const t_info = this.createElement("title-info");
    desc.appendChild(t_info);
    this.genres.forEach(g => t_info.appendChild(g.xml(this)));
    (this.bookAuthors.length ? this.bookAuthors : [ new FB2Author("Неизвестный автор") ]).forEach(a => {
      t_info.appendChild(a.xml(this));
    });
    t_info.appendChild((new FB2Element("book-title", this.bookTitle)).xml(this));
    if (this.annotation) t_info.appendChild(this.annotation.xml(this));
    if (this.keywords) t_info.appendChild(this.keywords.xml(this));
    if (this.bookDate) {
      const el = this.createElement("date");
      el.setAttribute("value", FB2Utils.dateToAtom(this.bookDate));
      el.textContent = this.bookDate.getFullYear();
      t_info.appendChild(el);
    }
    if (this.coverpage) {
      const el = this.createElement("coverpage");
      (Array.isArray(this.coverpage) ? this.coverpage : [ this.coverpage ]).forEach(img => {
        el.appendChild(img.xml(this));
      });
      t_info.appendChild(el);
    }
    const lang = this.createElement("lang");
    lang.textContent = "ru";
    t_info.appendChild(lang);
    if (this.sequence) {
      const el = this.createElement("sequence");
      el.setAttribute("name", this.sequence.name);
      if (this.sequence.number) el.setAttribute("number", this.sequence.number);
      t_info.appendChild(el);
    }
    // document-info
    const d_info = this.createElement("document-info");
    desc.appendChild(d_info);
    d_info.appendChild((new FB2Author("Ox90")).xml(this));
    if (this.programName) d_info.appendChild((new FB2Element("program-used", this.programName)).xml(this));
    d_info.appendChild((() => {
      const f_time = new Date();
      const el = this.createElement("date");
      el.setAttribute("value", FB2Utils.dateToAtom(f_time));
      el.textContent = f_time.toUTCString();
      return el;
    })());
    if (this.sourceURL) {
      d_info.appendChild((new FB2Element("src-url", this.sourceURL)).xml(this));
    }
    d_info.appendChild((new FB2Element("id", this._genBookId())).xml(this));
    d_info.appendChild((new FB2Element("version", "1.0")).xml(this));
    return desc;
  }

  _makeBodyElement() {
    const body = this.createElement("body");
    if (this.bookTitle || this.bookAuthors.length) {
      const title = this.createElement("title");
      body.appendChild(title);
      if (this.bookAuthors.length) title.appendChild((new FB2Paragraph(this.bookAuthors.join(", "))).xml(this));
      if (this.bookTitle) title.appendChild((new FB2Paragraph(this.bookTitle)).xml(this));
    }
    this.chapters.forEach(ch => body.appendChild(ch.xml(this)));
    return body;
  }

  _markBinaries() {
    let idx = 0;
    this.binaries.forEach(img => {
      if (!img.id) img.id = "image" + (++idx) + img.suffix();
    });
  }

  _makeBinaryElements() {
    return this.binaries.reduce((list, img) => {
      if (img.value) list.push(img.xmlBinary(this));
      return list;
    }, []);
  }

  _genBookId() {
    let str = this.sourceURL || this.bookTitle || "";
    let hash = 0;
    const slen = str.length;
    for (let i = 0; i < slen; ++i) {
      const ch = str.charCodeAt(i);
      hash = ((hash << 5) - hash) + ch;
      hash = hash & hash; // Convert to 32bit integer
    }
    return this.idPrefix || "h2f2l_" + Math.abs(hash).toString() + (hash > 0 ? "1" : "");
  }
}

class FB2Element {
  constructor(name, value) {
    this.name = name;
    this.value = value !== undefined ? value : null;
    this.children = [];
  }

  static fromHTML(node, recursive) {
    let fb2el = null;
    const names = new Map([
      [ "U", "emphasis" ], [ "EM", "emphasis" ], [ "EMPHASIS", "emphasis" ], [ "I", "emphasis" ],
      [ "S", "strikethrough" ], [ "DEL", "strikethrough" ], [ "STRIKE", "strikethrough" ],
      [ "STRONG", "strong" ], [ "BLOCKQUOTE", "cite" ],
      [ "SCRIPT", null ], [ "#comment", null ]
    ]);
    const node_name = node.nodeName;
    if (names.has(node_name)) {
      const name = names.get(node_name);
      if (!name) return null;
      fb2el = new FB2Element(names.get(node_name));
    } else {
      switch (node_name) {
        case "#text":
          return new FB2Text(node.textContent);
        case "SPAN":
          fb2el = new FB2Text();
          break;
        case "P":
        case "LI":
          fb2el = new FB2Paragraph();
          break;
        case "SUBTITLE":
          fb2el = new FB2Subtitle();
          break;
        case "A":
          fb2el = new FB2Link(node.href || node.getAttribute("l:href"));
          break;
        case "OL":
          fb2el = new FB2OrderedList();
          break;
        case "UL":
          fb2el = new FB2UnorderedList();
          break;
        case "BR":
          return new FB2EmptyLine();
        case "HR":
          return new FB2Paragraph("---");
        case "IMG":
          return new FB2Image(node.src);
        default:
          throw new FB2UnknownNodeError("Неизвестный HTML блок: " + node.nodeName);
      }
    }
    if (recursive) fb2el.appendContentFromHTML(node);
    return fb2el;
  }

  hasValue() {
    return ((this.value !== undefined && this.value !== null) || !!this.children.length);
  }

  setContentFromHTML(data, fb2doc, log) {
    this.children = [];
    this.appendContentFromHTML(data, fb2doc, log);
  }

  appendContentFromHTML(data, fb2doc, log) {
    for (const node of data.childNodes) {
      let fe = FB2Element.fromHTML(node, true);
      if (fe) this.children.push(fe);
    }
  }

  normalize() {
    const _normalize = function(list) {
      let done = true;
      let res_list = list.reduce((accum, cur_el) => {
        accum.push(cur_el);
        const tmp_ch = cur_el.children;
        cur_el.children = [];
        tmp_ch.forEach(el => {
          if (el instanceof FB2EmptyLine || el instanceof FB2Subtitle) {
            accum.push(el);
            const nm = cur_el.name;
            cur_el = new cur_el.constructor();
            if (!cur_el.name) cur_el.name = nm;
            accum.push(cur_el);
            done = false;
          } else {
            let cnt = 0;
            el.normalize().forEach(e => {
              if (!e.value && e.children.length === 1 && e.name === e.children[0].name) {
                e = e.children[0];
              }
              if (e !== el) done = false;
              if (e.hasValue()) cur_el.children.push(e);
            });
          }
        });
        return accum;
      }, []);
      return { list: res_list, done: done };
    }
    //--
    let result = _normalize([ this ]);
    while (!result.done) {
      result = _normalize(result.list);
    }
    return result.list;
  }

  xml(doc) {
    const el = doc.createElement(this.name);
    if (this.value !== null) el.textContent = this.value;
    this.children.forEach(ch => el.appendChild(ch.xml(doc)));
    return el;
  }
}

class FB2BlockElement extends FB2Element {
  normalize() {
    // Предварительная нормализация
    this.children = this.children.reduce((list, ch) => {
      ch.normalize().forEach(cc => list.push(cc));
      return list;
    }, []);
    // Удалить пустоты справа
    while (this.children.length) {
      const el = this.children[this.children.length - 1];
      if (el instanceof FB2Text) el.trimRight();
      if (!el.hasValue()) {
        this.children.pop();
        continue;
      }
      break;
    }
    // Удалить пустоты слева
    while (this.children.length) {
      const el = this.children[0];
      if (el instanceof FB2Text) el.trimLeft();
      if (!el.hasValue()) {
        this.children.shift();
        continue;
      }
      break;
    }
    // Окончательная нормализация
    return super.normalize();
  }
}

/**
 * FB2 элемент верхнего уровня section
 */
class FB2Chapter extends FB2Element {
  constructor(title) {
    super("section");
    this.title = title;
  }

  normalize() {
    // Обернуть текстовые ноды в параграфы и удалить пустые элементы
    this.children = this.children.reduce((list, el) => {
      if (el instanceof FB2Text) {
        const pe = new FB2Paragraph();
        pe.children.push(el);
        el = pe;
      }
      el.normalize().forEach(el => {
        if (el.hasValue()) list.push(el);
      });
      return list;
    }, []);
    return [ this ];
  }

  xml(doc) {
    const el = super.xml(doc);
    if (this.title) {
      const t_el = doc.createElement("title");
      const p_el = doc.createElement("p");
      p_el.textContent = this.title;
      t_el.appendChild(p_el);
      el.prepend(t_el);
    }
    return el;
  }
}

/**
 * FB2 элемент верхнего уровня annotation
 */
class FB2Annotation extends FB2Element {
  constructor() {
    super("annotation");
  }

  normalize() {
    // Обернуть неформатированный текст, разделенный <br> в параграфы
    let lp = null;
    const newParagraph = list => {
      lp = new FB2Paragraph();
      list.push(lp);
    };
    this.children = this.children.reduce((list, el) => {
      if (el.name === "empty-line") {
        newParagraph(list);
      } else if (el instanceof FB2BlockElement) {
        list.push(el);
        lp = null;
      } else {
        if (!lp) newParagraph(list);
        lp.children.push(el);
      }
      return list;
    }, []);
    // Запустить собственную нормализацию дочерних элементов
    // чтобы предотвратить их дальнейшее всплытие
    this.children = this.children.reduce((list, el) => {
      el.normalize().forEach(el => {
        if (el.hasValue()) list.push(el);
      });
      return list;
    }, []);
  }
}

class FB2Subtitle extends FB2BlockElement {
  constructor(value) {
    super("subtitle", value);
  }
}

class FB2Paragraph extends FB2BlockElement {
  constructor(value) {
    super("p", value);
  }
}

class FB2EmptyLine extends FB2Element {
  constructor() {
    super("empty-line");
  }

  hasValue() {
    return true;
  }
}

class FB2Text extends FB2Element {
  constructor(value) {
    super("text", value);
  }

  trimLeft() {
    if (typeof(this.value) === "string") this.value = this.value.trimLeft() || null;
    if (!this.value) {
      while (this.children.length) {
        const first_child = this.children[0];
        if (first_child instanceof FB2Text) first_child.trimLeft();
        if (first_child.hasValue()) break;
        this.children.shift();
      }
    }
  }

  trimRight() {
    while (this.children.length) {
      const last_child = this.children[this.children.length - 1];
      if (last_child instanceof FB2Text) last_child.trimRight();
      if (last_child.hasValue()) break;
      this.children.pop();
    }
    if (!this.children.length && typeof(this.value) === "string") {
      this.value = this.value.trimRight() || null;
    }
  }

  xml(doc) {
    if (!this.value && this.children.length) {
      let fr = doc.createDocumentFragment();
      for (const ch of this.children) {
        fr.appendChild(ch.xml(doc));
      }
      return fr;
    }
    return doc.createTextNode(this.value);
  }
}

class FB2Link extends FB2Element {
  constructor(href) {
    super("a");
    this.href = href;
  }

  xml(doc) {
    const el = super.xml(doc);
    el.setAttribute("l:href", this.href);
    return el;
  }
}

class FB2OrderedList extends FB2Element {
  constructor() {
    super("list");
  }

  xml(doc) {
    const fr = doc.createDocumentFragment();
    let pos = 0;
    for (const ch of this.children) {
      const ch_el = ch.xml(doc);
      if (ch.hasValue()) {
        ++pos;
        ch_el.prepend(`${pos}. `);
      }
      fr.appendChild(ch_el);
    }
    return fr;
  }
}

class FB2UnorderedList extends FB2Element {
  constructor() {
    super("list");
  }

  xml(doc) {
    const fr = doc.createDocumentFragment();
    for (const ch of this.children) {
      const ch_el = ch.xml(doc);
      if (ch.hasValue()) ch_el.prepend("- ");
      fr.appendChild(ch_el);
    }
    return fr;
  }
}

class FB2Author extends FB2Element {
  constructor(s) {
    super("author");
    const a = s.split(" ");
    switch (a.length) {
      case 1:
        this.nickName = s;
        break;
      case 2:
        this.firstName = a[0];
        this.lastName = a[1];
        break;
      default:
        this.firstName = a[0];
        this.middleName = a.slice(1, -1).join(" ");
        this.lastName = a[a.length - 1];
        break;
    }
    this.homePage = null;
  }

  hasValue() {
    return (!!this.firstName || !!this.lastName || !!this.middleName);
  }

  toString() {
    if (!this.firstName) return this.nickName;
    return [ this.firstName, this.middleName, this.lastName ].reduce((list, name) => {
      if (name) list.push(name);
      return list;
    }, []).join(" ");
  }

  xml(doc) {
    let a_el = super.xml(doc);
    [
      [ "first-name", this.firstName ], [ "middle-name", this.middleName ],
      [ "last-name", this.lastName ], [ "home-page", this.homePage ],
      [ "nickname", this.nickName ]
    ].forEach(it => {
      if (it[1]) {
        const e = doc.createElement(it[0]);
        e.textContent = it[1];
        a_el.appendChild(e);
      }
    });
    return a_el;
  }
}

class FB2Image extends FB2Element {
  constructor(value) {
    super("image");
    if (typeof(value) === "string") {
      this.url = value;
    } else {
      this.value = value;
    }
  }

  async load(onprogress) {
    if (this.url) {
      const bin = await this._load(this.url, { responseType: "binary", onprogress: onprogress });
      this.type = bin.type;
      this.size = bin.size;
      return new Promise((resolve, reject) => {
        const reader = new FileReader();
        reader.addEventListener("loadend", (event) => resolve(event.target.result));
        reader.readAsDataURL(bin);
      }).then(base64str => {
        this.value = base64str.substr(base64str.indexOf(",") + 1);
      }).catch(err => {
        throw new Error("Ошибка загрузки изображения");
      });
    }
  }

  hasValue() {
    return true;
  }

  xml(doc) {
    if (this.value) {
      const el = doc.createElement(this.name);
      el.setAttribute("l:href", "#" + this.id);
      return el
    }
    const id = this.id || "изображение";
    return doc.createTextNode(`[ ${id} ]`);
  }

  xmlBinary(doc) {
    const el = doc.createElement("binary");
    el.setAttribute("id", this.id);
    el.setAttribute("content-type", this.type);
    el.textContent = this.value
    return el;
  }

  suffix() {
    switch (this.type) {
      case "image/png":
        return ".png";
      case "image/jpeg":
        return ".jpg";
      case "image/gif":
        return ".gif";
      case "image/webp":
        return ".webp";
    }
    return "";
  }

  async _load(...args) {
    return FB2Loader.addJob(...args);
  }
}

class FB2Loader {
  static async addJob(url, params) {
    params ||= {};
    const fp = {};
    fp.method = params.method || "GET";
    fp.credentials = "same-origin";
    fp.signal = this._getSignal();
    const resp = await fetch(url, fp);
    if (!resp.ok) throw new Error(`Сервер вернул ошибку (${resp.status})`);
    const reader = resp.body.getReader();
    const type = resp.headers.get("Content-Type");
    const total = +resp.headers.get("Content-Length");
    let loaded = 0;
    const chunks = [];
    const onprogress = (total && typeof(params.onprogress) === "function") ? params.onprogress : null;
    while (true) {
      const { done, value } = await reader.read();
      if (done) break;
      chunks.push(value);
      loaded += value.length;
      if (onprogress) onprogress(loaded, total);
    }
    switch (params.responseType) {
      case "binary":
        return new Blob(chunks, { type: type });
      default:
        {
          let pos = 0;
          const data = new Uint8Array(loaded);
          for (let ch of chunks) {
            data.set(ch, pos);
            pos += ch.length;
          }
          return (new TextDecoder("utf-8")).decode(data);
        }
    }
  }

  static abortAll() {
    if (this._controller) {
      this._controller.abort();
      this._controller = null;
    }
  }

  static _getSignal() {
    let controller = this._controller;
    if (!controller) this._controller = controller = new AbortController();
    return controller.signal;
  }
}

class FB2Utils {
  static dateToAtom(date) {
    const m = date.getMonth() + 1;
    const d = date.getDate();
    return "" + date.getFullYear() + '-' + (m < 10 ? "0" : "") + m + "-" + (d < 10 ? "0" : "") + d;
  }
}

class FB2UnknownNodeError extends Error {
  constructor(message) {
    super(message);
    this.name = "UnknownNodeError";
  }
}