Detta skript bör inte installeras direkt. Det är ett bibliotek för andra skript att inkludera med meta-direktivet // @require https://update.greasyfork.org/scripts/468831/1209826/HTML2FB2Lib.js
// ==UserScript==
// @name HTML2FB2Lib
// @namespace 90h.yy.zz
// @version 0.4.1
// @author Ox90
// @match http://*
// @match https://*
// @description This is a library for converting HTML to FB2.
// @run-at document-start
// @license MIT
// ==/UserScript==
class FB2Parser {
constructor() {
this._stop = null;
}
async parse(htmlNode, fromNode) {
const that = this;
async function _parse(node, from, fb2el, depth) {
let n = from || node.firstChild;
while (n) {
const nn = that.startNode(n, depth);
if (nn) {
const f = that.processElement(FB2Element.fromHTML(nn, false), depth);
if (f) {
if (fb2el) fb2el.children.push(f);
await _parse(nn, null, f, depth + 1);
}
that.endNode(nn, depth);
}
if (that._stop) break;
n = n.nextSibling;
}
}
await _parse(htmlNode, fromNode, null, 0);
return this._stop;
}
startNode(node, depth) {
return node;
}
processElement(fb2el, depth) {
return fb2el;
}
endNode(node, depth) {
}
}
class FB2Document {
constructor() {
this.binaries = [];
this.bookAuthors = [];
this.annotation = null;
this.genres = [];
this.chapters = [];
this.xmldoc = null;
}
toString() {
this._ensureXMLDocument();
const root = this.xmldoc.documentElement;
this._markBinaries();
root.appendChild(this._makeDescriptionElement());
root.appendChild(this._makeBodyElement());
this._makeBinaryElements().forEach(el => root.appendChild(el));
const res = (new XMLSerializer()).serializeToString(this.xmldoc);
this.xmldoc = null;
return res;
}
createElement(name) {
this._ensureXMLDocument();
return this.xmldoc.createElementNS(this.xmldoc.documentElement.namespaceURI, name);
}
createTextNode(value) {
this._ensureXMLDocument();
return this.xmldoc.createTextNode(value);
}
createDocumentFragment() {
this._ensureXMLDocument();
return this.xmldoc.createDocumentFragment();
}
_ensureXMLDocument() {
if (!this.xmldoc) {
this.xmldoc = new DOMParser().parseFromString(
'<?xml version="1.0" encoding="UTF-8"?><FictionBook xmlns="http://www.gribuser.ru/xml/fictionbook/2.0"/>',
"application/xml"
);
this.xmldoc.documentElement.setAttribute("xmlns:l", "http://www.w3.org/1999/xlink");
}
}
_makeDescriptionElement() {
const desc = this.createElement("description");
// title-info
const t_info = this.createElement("title-info");
desc.appendChild(t_info);
this.genres.forEach(g => t_info.appendChild(g.xml(this)));
(this.bookAuthors.length ? this.bookAuthors : [ new FB2Author("Неизвестный автор") ]).forEach(a => {
t_info.appendChild(a.xml(this));
});
t_info.appendChild((new FB2Element("book-title", this.bookTitle)).xml(this));
if (this.annotation) t_info.appendChild(this.annotation.xml(this));
if (this.keywords) t_info.appendChild(this.keywords.xml(this));
if (this.bookDate) {
const el = this.createElement("date");
el.setAttribute("value", FB2Utils.dateToAtom(this.bookDate));
el.textContent = this.bookDate.getFullYear();
t_info.appendChild(el);
}
if (this.coverpage) {
const el = this.createElement("coverpage");
(Array.isArray(this.coverpage) ? this.coverpage : [ this.coverpage ]).forEach(img => {
el.appendChild(img.xml(this));
});
t_info.appendChild(el);
}
const lang = this.createElement("lang");
lang.textContent = "ru";
t_info.appendChild(lang);
if (this.sequence) {
const el = this.createElement("sequence");
el.setAttribute("name", this.sequence.name);
if (this.sequence.number) el.setAttribute("number", this.sequence.number);
t_info.appendChild(el);
}
// document-info
const d_info = this.createElement("document-info");
desc.appendChild(d_info);
d_info.appendChild((new FB2Author("Ox90")).xml(this));
if (this.programName) d_info.appendChild((new FB2Element("program-used", this.programName)).xml(this));
d_info.appendChild((() => {
const f_time = new Date();
const el = this.createElement("date");
el.setAttribute("value", FB2Utils.dateToAtom(f_time));
el.textContent = f_time.toUTCString();
return el;
})());
if (this.sourceURL) {
d_info.appendChild((new FB2Element("src-url", this.sourceURL)).xml(this));
}
d_info.appendChild((new FB2Element("id", this._genBookId())).xml(this));
d_info.appendChild((new FB2Element("version", "1.0")).xml(this));
return desc;
}
_makeBodyElement() {
const body = this.createElement("body");
if (this.bookTitle || this.bookAuthors.length) {
const title = this.createElement("title");
body.appendChild(title);
if (this.bookAuthors.length) title.appendChild((new FB2Paragraph(this.bookAuthors.join(", "))).xml(this));
if (this.bookTitle) title.appendChild((new FB2Paragraph(this.bookTitle)).xml(this));
}
this.chapters.forEach(ch => body.appendChild(ch.xml(this)));
return body;
}
_markBinaries() {
let idx = 0;
this.binaries.forEach(img => {
if (!img.id) img.id = "image" + (++idx) + img.suffix();
});
}
_makeBinaryElements() {
return this.binaries.reduce((list, img) => {
if (img.value) list.push(img.xmlBinary(this));
return list;
}, []);
}
_genBookId() {
let str = this.sourceURL || this.bookTitle || "";
let hash = 0;
const slen = str.length;
for (let i = 0; i < slen; ++i) {
const ch = str.charCodeAt(i);
hash = ((hash << 5) - hash) + ch;
hash = hash & hash; // Convert to 32bit integer
}
return this.idPrefix || "h2f2l_" + Math.abs(hash).toString() + (hash > 0 ? "1" : "");
}
}
class FB2Element {
constructor(name, value) {
this.name = name;
this.value = value !== undefined ? value : null;
this.children = [];
}
static fromHTML(node, recursive) {
let fb2el = null;
const names = new Map([
[ "U", "emphasis" ], [ "EM", "emphasis" ], [ "EMPHASIS", "emphasis" ], [ "I", "emphasis" ],
[ "S", "strikethrough" ], [ "DEL", "strikethrough" ], [ "STRIKE", "strikethrough" ],
[ "STRONG", "strong" ], [ "BLOCKQUOTE", "cite" ],
[ "SCRIPT", null ], [ "#comment", null ]
]);
const node_name = node.nodeName;
if (names.has(node_name)) {
const name = names.get(node_name);
if (!name) return null;
fb2el = new FB2Element(names.get(node_name));
} else {
switch (node_name) {
case "#text":
return new FB2Text(node.textContent);
case "SPAN":
fb2el = new FB2Text();
break;
case "P":
case "LI":
fb2el = new FB2Paragraph();
break;
case "SUBTITLE":
fb2el = new FB2Subtitle();
break;
case "A":
fb2el = new FB2Link(node.href || node.getAttribute("l:href"));
break;
case "OL":
fb2el = new FB2OrderedList();
break;
case "UL":
fb2el = new FB2UnorderedList();
break;
case "BR":
return new FB2EmptyLine();
case "HR":
return new FB2Paragraph("---");
case "IMG":
return new FB2Image(node.src);
default:
throw new FB2UnknownNodeError("Неизвестный HTML блок: " + node.nodeName);
}
}
if (recursive) fb2el.appendContentFromHTML(node);
return fb2el;
}
hasValue() {
return ((this.value !== undefined && this.value !== null) || !!this.children.length);
}
setContentFromHTML(data, fb2doc, log) {
this.children = [];
this.appendContentFromHTML(data, fb2doc, log);
}
appendContentFromHTML(data, fb2doc, log) {
for (const node of data.childNodes) {
let fe = FB2Element.fromHTML(node, true);
if (fe) this.children.push(fe);
}
}
normalize() {
const _normalize = function(list) {
let done = true;
let res_list = list.reduce((accum, cur_el) => {
accum.push(cur_el);
const tmp_ch = cur_el.children;
cur_el.children = [];
tmp_ch.forEach(el => {
if (el instanceof FB2EmptyLine || el instanceof FB2Subtitle) {
accum.push(el);
const nm = cur_el.name;
cur_el = new cur_el.constructor();
if (!cur_el.name) cur_el.name = nm;
accum.push(cur_el);
done = false;
} else {
let cnt = 0;
el.normalize().forEach(e => {
if (!e.value && e.children.length === 1 && e.name === e.children[0].name) {
e = e.children[0];
}
if (e !== el) done = false;
if (e.hasValue()) cur_el.children.push(e);
});
}
});
return accum;
}, []);
return { list: res_list, done: done };
}
//--
let result = _normalize([ this ]);
while (!result.done) {
result = _normalize(result.list);
}
return result.list;
}
xml(doc) {
const el = doc.createElement(this.name);
if (this.value !== null) el.textContent = this.value;
this.children.forEach(ch => el.appendChild(ch.xml(doc)));
return el;
}
}
class FB2BlockElement extends FB2Element {
normalize() {
// Предварительная нормализация
this.children = this.children.reduce((list, ch) => {
ch.normalize().forEach(cc => list.push(cc));
return list;
}, []);
// Удалить пустоты справа
while (this.children.length) {
const el = this.children[this.children.length - 1];
if (el instanceof FB2Text) el.trimRight();
if (!el.hasValue()) {
this.children.pop();
continue;
}
break;
}
// Удалить пустоты слева
while (this.children.length) {
const el = this.children[0];
if (el instanceof FB2Text) el.trimLeft();
if (!el.hasValue()) {
this.children.shift();
continue;
}
break;
}
// Окончательная нормализация
return super.normalize();
}
}
/**
* FB2 элемент верхнего уровня section
*/
class FB2Chapter extends FB2Element {
constructor(title) {
super("section");
this.title = title;
}
normalize() {
// Обернуть текстовые ноды в параграфы и удалить пустые элементы
this.children = this.children.reduce((list, el) => {
if (el instanceof FB2Text) {
const pe = new FB2Paragraph();
pe.children.push(el);
el = pe;
}
el.normalize().forEach(el => {
if (el.hasValue()) list.push(el);
});
return list;
}, []);
return [ this ];
}
xml(doc) {
const el = super.xml(doc);
if (this.title) {
const t_el = doc.createElement("title");
const p_el = doc.createElement("p");
p_el.textContent = this.title;
t_el.appendChild(p_el);
el.prepend(t_el);
}
return el;
}
}
/**
* FB2 элемент верхнего уровня annotation
*/
class FB2Annotation extends FB2Element {
constructor() {
super("annotation");
}
normalize() {
// Обернуть неформатированный текст, разделенный <br> в параграфы
let lp = null;
const newParagraph = list => {
lp = new FB2Paragraph();
list.push(lp);
};
this.children = this.children.reduce((list, el) => {
if (el.name === "empty-line") {
newParagraph(list);
} else if (el instanceof FB2BlockElement) {
list.push(el);
lp = null;
} else {
if (!lp) newParagraph(list);
lp.children.push(el);
}
return list;
}, []);
// Запустить собственную нормализацию дочерних элементов
// чтобы предотвратить их дальнейшее всплытие
this.children = this.children.reduce((list, el) => {
el.normalize().forEach(el => {
if (el.hasValue()) list.push(el);
});
return list;
}, []);
}
}
class FB2Subtitle extends FB2BlockElement {
constructor(value) {
super("subtitle", value);
}
}
class FB2Paragraph extends FB2BlockElement {
constructor(value) {
super("p", value);
}
}
class FB2EmptyLine extends FB2Element {
constructor() {
super("empty-line");
}
hasValue() {
return true;
}
}
class FB2Text extends FB2Element {
constructor(value) {
super("text", value);
}
trimLeft() {
if (typeof(this.value) === "string") this.value = this.value.trimLeft() || null;
if (!this.value) {
while (this.children.length) {
const first_child = this.children[0];
if (first_child instanceof FB2Text) first_child.trimLeft();
if (first_child.hasValue()) break;
this.children.shift();
}
}
}
trimRight() {
while (this.children.length) {
const last_child = this.children[this.children.length - 1];
if (last_child instanceof FB2Text) last_child.trimRight();
if (last_child.hasValue()) break;
this.children.pop();
}
if (!this.children.length && typeof(this.value) === "string") {
this.value = this.value.trimRight() || null;
}
}
xml(doc) {
if (!this.value && this.children.length) {
let fr = doc.createDocumentFragment();
for (const ch of this.children) {
fr.appendChild(ch.xml(doc));
}
return fr;
}
return doc.createTextNode(this.value);
}
}
class FB2Link extends FB2Element {
constructor(href) {
super("a");
this.href = href;
}
xml(doc) {
const el = super.xml(doc);
el.setAttribute("l:href", this.href);
return el;
}
}
class FB2OrderedList extends FB2Element {
constructor() {
super("list");
}
xml(doc) {
const fr = doc.createDocumentFragment();
let pos = 0;
for (const ch of this.children) {
const ch_el = ch.xml(doc);
if (ch.hasValue()) {
++pos;
ch_el.prepend(`${pos}. `);
}
fr.appendChild(ch_el);
}
return fr;
}
}
class FB2UnorderedList extends FB2Element {
constructor() {
super("list");
}
xml(doc) {
const fr = doc.createDocumentFragment();
for (const ch of this.children) {
const ch_el = ch.xml(doc);
if (ch.hasValue()) ch_el.prepend("- ");
fr.appendChild(ch_el);
}
return fr;
}
}
class FB2Author extends FB2Element {
constructor(s) {
super("author");
const a = s.split(" ");
switch (a.length) {
case 1:
this.nickName = s;
break;
case 2:
this.firstName = a[0];
this.lastName = a[1];
break;
default:
this.firstName = a[0];
this.middleName = a.slice(1, -1).join(" ");
this.lastName = a[a.length - 1];
break;
}
this.homePage = null;
}
hasValue() {
return (!!this.firstName || !!this.lastName || !!this.middleName);
}
toString() {
if (!this.firstName) return this.nickName;
return [ this.firstName, this.middleName, this.lastName ].reduce((list, name) => {
if (name) list.push(name);
return list;
}, []).join(" ");
}
xml(doc) {
let a_el = super.xml(doc);
[
[ "first-name", this.firstName ], [ "middle-name", this.middleName ],
[ "last-name", this.lastName ], [ "home-page", this.homePage ],
[ "nickname", this.nickName ]
].forEach(it => {
if (it[1]) {
const e = doc.createElement(it[0]);
e.textContent = it[1];
a_el.appendChild(e);
}
});
return a_el;
}
}
class FB2Image extends FB2Element {
constructor(value) {
super("image");
if (typeof(value) === "string") {
this.url = value;
} else {
this.value = value;
}
}
async load(onprogress) {
if (this.url) {
const bin = await this._load(this.url, { responseType: "binary", onprogress: onprogress });
this.type = bin.type;
this.size = bin.size;
return new Promise((resolve, reject) => {
const reader = new FileReader();
reader.addEventListener("loadend", (event) => resolve(event.target.result));
reader.readAsDataURL(bin);
}).then(base64str => {
this.value = base64str.substr(base64str.indexOf(",") + 1);
}).catch(err => {
throw new Error("Ошибка загрузки изображения");
});
}
}
hasValue() {
return true;
}
xml(doc) {
if (this.value) {
const el = doc.createElement(this.name);
el.setAttribute("l:href", "#" + this.id);
return el
}
const id = this.id || "изображение";
return doc.createTextNode(`[ ${id} ]`);
}
xmlBinary(doc) {
const el = doc.createElement("binary");
el.setAttribute("id", this.id);
el.setAttribute("content-type", this.type);
el.textContent = this.value
return el;
}
suffix() {
switch (this.type) {
case "image/png":
return ".png";
case "image/jpeg":
return ".jpg";
case "image/gif":
return ".gif";
case "image/webp":
return ".webp";
}
return "";
}
async _load(...args) {
return FB2Loader.addJob(...args);
}
}
class FB2Loader {
static async addJob(url, params) {
params ||= {};
const fp = {};
fp.method = params.method || "GET";
fp.credentials = "same-origin";
fp.signal = this._getSignal();
const resp = await fetch(url, fp);
if (!resp.ok) throw new Error(`Сервер вернул ошибку (${resp.status})`);
const reader = resp.body.getReader();
const type = resp.headers.get("Content-Type");
const total = +resp.headers.get("Content-Length");
let loaded = 0;
const chunks = [];
const onprogress = (total && typeof(params.onprogress) === "function") ? params.onprogress : null;
while (true) {
const { done, value } = await reader.read();
if (done) break;
chunks.push(value);
loaded += value.length;
if (onprogress) onprogress(loaded, total);
}
switch (params.responseType) {
case "binary":
return new Blob(chunks, { type: type });
default:
{
let pos = 0;
const data = new Uint8Array(loaded);
for (let ch of chunks) {
data.set(ch, pos);
pos += ch.length;
}
return (new TextDecoder("utf-8")).decode(data);
}
}
}
static abortAll() {
if (this._controller) {
this._controller.abort();
this._controller = null;
}
}
static _getSignal() {
let controller = this._controller;
if (!controller) this._controller = controller = new AbortController();
return controller.signal;
}
}
class FB2Utils {
static dateToAtom(date) {
const m = date.getMonth() + 1;
const d = date.getDate();
return "" + date.getFullYear() + '-' + (m < 10 ? "0" : "") + m + "-" + (d < 10 ? "0" : "") + d;
}
}
class FB2UnknownNodeError extends Error {
constructor(message) {
super(message);
this.name = "UnknownNodeError";
}
}