HTML2FB2Lib

This is a library for converting HTML to FB2.

当前为 2023-06-23 提交的版本,查看 最新版本

此脚本不应直接安装。它是供其他脚本使用的外部库,要使用该库请加入元指令 // @require https://update.cn-greasyfork.org/scripts/468831/1209826/HTML2FB2Lib.js

  1. // ==UserScript==
  2. // @name HTML2FB2Lib
  3. // @namespace 90h.yy.zz
  4. // @version 0.4.1
  5. // @author Ox90
  6. // @match http://*
  7. // @match https://*
  8. // @description This is a library for converting HTML to FB2.
  9. // @run-at document-start
  10. // @license MIT
  11. // ==/UserScript==
  12.  
  13. class FB2Parser {
  14. constructor() {
  15. this._stop = null;
  16. }
  17.  
  18. async parse(htmlNode, fromNode) {
  19. const that = this;
  20. async function _parse(node, from, fb2el, depth) {
  21. let n = from || node.firstChild;
  22. while (n) {
  23. const nn = that.startNode(n, depth);
  24. if (nn) {
  25. const f = that.processElement(FB2Element.fromHTML(nn, false), depth);
  26. if (f) {
  27. if (fb2el) fb2el.children.push(f);
  28. await _parse(nn, null, f, depth + 1);
  29. }
  30. that.endNode(nn, depth);
  31. }
  32. if (that._stop) break;
  33. n = n.nextSibling;
  34. }
  35. }
  36. await _parse(htmlNode, fromNode, null, 0);
  37. return this._stop;
  38. }
  39.  
  40. startNode(node, depth) {
  41. return node;
  42. }
  43.  
  44. processElement(fb2el, depth) {
  45. return fb2el;
  46. }
  47.  
  48. endNode(node, depth) {
  49. }
  50. }
  51.  
  52. class FB2Document {
  53. constructor() {
  54. this.binaries = [];
  55. this.bookAuthors = [];
  56. this.annotation = null;
  57. this.genres = [];
  58. this.chapters = [];
  59. this.xmldoc = null;
  60. }
  61.  
  62. toString() {
  63. this._ensureXMLDocument();
  64. const root = this.xmldoc.documentElement;
  65. this._markBinaries();
  66. root.appendChild(this._makeDescriptionElement());
  67. root.appendChild(this._makeBodyElement());
  68. this._makeBinaryElements().forEach(el => root.appendChild(el));
  69. const res = (new XMLSerializer()).serializeToString(this.xmldoc);
  70. this.xmldoc = null;
  71. return res;
  72. }
  73.  
  74. createElement(name) {
  75. this._ensureXMLDocument();
  76. return this.xmldoc.createElementNS(this.xmldoc.documentElement.namespaceURI, name);
  77. }
  78.  
  79. createTextNode(value) {
  80. this._ensureXMLDocument();
  81. return this.xmldoc.createTextNode(value);
  82. }
  83.  
  84. createDocumentFragment() {
  85. this._ensureXMLDocument();
  86. return this.xmldoc.createDocumentFragment();
  87. }
  88.  
  89. _ensureXMLDocument() {
  90. if (!this.xmldoc) {
  91. this.xmldoc = new DOMParser().parseFromString(
  92. '<?xml version="1.0" encoding="UTF-8"?><FictionBook xmlns="http://www.gribuser.ru/xml/fictionbook/2.0"/>',
  93. "application/xml"
  94. );
  95. this.xmldoc.documentElement.setAttribute("xmlns:l", "http://www.w3.org/1999/xlink");
  96. }
  97. }
  98.  
  99. _makeDescriptionElement() {
  100. const desc = this.createElement("description");
  101. // title-info
  102. const t_info = this.createElement("title-info");
  103. desc.appendChild(t_info);
  104. this.genres.forEach(g => t_info.appendChild(g.xml(this)));
  105. (this.bookAuthors.length ? this.bookAuthors : [ new FB2Author("Неизвестный автор") ]).forEach(a => {
  106. t_info.appendChild(a.xml(this));
  107. });
  108. t_info.appendChild((new FB2Element("book-title", this.bookTitle)).xml(this));
  109. if (this.annotation) t_info.appendChild(this.annotation.xml(this));
  110. if (this.keywords) t_info.appendChild(this.keywords.xml(this));
  111. if (this.bookDate) {
  112. const el = this.createElement("date");
  113. el.setAttribute("value", FB2Utils.dateToAtom(this.bookDate));
  114. el.textContent = this.bookDate.getFullYear();
  115. t_info.appendChild(el);
  116. }
  117. if (this.coverpage) {
  118. const el = this.createElement("coverpage");
  119. (Array.isArray(this.coverpage) ? this.coverpage : [ this.coverpage ]).forEach(img => {
  120. el.appendChild(img.xml(this));
  121. });
  122. t_info.appendChild(el);
  123. }
  124. const lang = this.createElement("lang");
  125. lang.textContent = "ru";
  126. t_info.appendChild(lang);
  127. if (this.sequence) {
  128. const el = this.createElement("sequence");
  129. el.setAttribute("name", this.sequence.name);
  130. if (this.sequence.number) el.setAttribute("number", this.sequence.number);
  131. t_info.appendChild(el);
  132. }
  133. // document-info
  134. const d_info = this.createElement("document-info");
  135. desc.appendChild(d_info);
  136. d_info.appendChild((new FB2Author("Ox90")).xml(this));
  137. if (this.programName) d_info.appendChild((new FB2Element("program-used", this.programName)).xml(this));
  138. d_info.appendChild((() => {
  139. const f_time = new Date();
  140. const el = this.createElement("date");
  141. el.setAttribute("value", FB2Utils.dateToAtom(f_time));
  142. el.textContent = f_time.toUTCString();
  143. return el;
  144. })());
  145. if (this.sourceURL) {
  146. d_info.appendChild((new FB2Element("src-url", this.sourceURL)).xml(this));
  147. }
  148. d_info.appendChild((new FB2Element("id", this._genBookId())).xml(this));
  149. d_info.appendChild((new FB2Element("version", "1.0")).xml(this));
  150. return desc;
  151. }
  152.  
  153. _makeBodyElement() {
  154. const body = this.createElement("body");
  155. if (this.bookTitle || this.bookAuthors.length) {
  156. const title = this.createElement("title");
  157. body.appendChild(title);
  158. if (this.bookAuthors.length) title.appendChild((new FB2Paragraph(this.bookAuthors.join(", "))).xml(this));
  159. if (this.bookTitle) title.appendChild((new FB2Paragraph(this.bookTitle)).xml(this));
  160. }
  161. this.chapters.forEach(ch => body.appendChild(ch.xml(this)));
  162. return body;
  163. }
  164.  
  165. _markBinaries() {
  166. let idx = 0;
  167. this.binaries.forEach(img => {
  168. if (!img.id) img.id = "image" + (++idx) + img.suffix();
  169. });
  170. }
  171.  
  172. _makeBinaryElements() {
  173. return this.binaries.reduce((list, img) => {
  174. if (img.value) list.push(img.xmlBinary(this));
  175. return list;
  176. }, []);
  177. }
  178.  
  179. _genBookId() {
  180. let str = this.sourceURL || this.bookTitle || "";
  181. let hash = 0;
  182. const slen = str.length;
  183. for (let i = 0; i < slen; ++i) {
  184. const ch = str.charCodeAt(i);
  185. hash = ((hash << 5) - hash) + ch;
  186. hash = hash & hash; // Convert to 32bit integer
  187. }
  188. return this.idPrefix || "h2f2l_" + Math.abs(hash).toString() + (hash > 0 ? "1" : "");
  189. }
  190. }
  191.  
  192. class FB2Element {
  193. constructor(name, value) {
  194. this.name = name;
  195. this.value = value !== undefined ? value : null;
  196. this.children = [];
  197. }
  198.  
  199. static fromHTML(node, recursive) {
  200. let fb2el = null;
  201. const names = new Map([
  202. [ "U", "emphasis" ], [ "EM", "emphasis" ], [ "EMPHASIS", "emphasis" ], [ "I", "emphasis" ],
  203. [ "S", "strikethrough" ], [ "DEL", "strikethrough" ], [ "STRIKE", "strikethrough" ],
  204. [ "STRONG", "strong" ], [ "BLOCKQUOTE", "cite" ],
  205. [ "SCRIPT", null ], [ "#comment", null ]
  206. ]);
  207. const node_name = node.nodeName;
  208. if (names.has(node_name)) {
  209. const name = names.get(node_name);
  210. if (!name) return null;
  211. fb2el = new FB2Element(names.get(node_name));
  212. } else {
  213. switch (node_name) {
  214. case "#text":
  215. return new FB2Text(node.textContent);
  216. case "SPAN":
  217. fb2el = new FB2Text();
  218. break;
  219. case "P":
  220. case "LI":
  221. fb2el = new FB2Paragraph();
  222. break;
  223. case "SUBTITLE":
  224. fb2el = new FB2Subtitle();
  225. break;
  226. case "A":
  227. fb2el = new FB2Link(node.href || node.getAttribute("l:href"));
  228. break;
  229. case "OL":
  230. fb2el = new FB2OrderedList();
  231. break;
  232. case "UL":
  233. fb2el = new FB2UnorderedList();
  234. break;
  235. case "BR":
  236. return new FB2EmptyLine();
  237. case "HR":
  238. return new FB2Paragraph("---");
  239. case "IMG":
  240. return new FB2Image(node.src);
  241. default:
  242. throw new FB2UnknownNodeError("Неизвестный HTML блок: " + node.nodeName);
  243. }
  244. }
  245. if (recursive) fb2el.appendContentFromHTML(node);
  246. return fb2el;
  247. }
  248.  
  249. hasValue() {
  250. return ((this.value !== undefined && this.value !== null) || !!this.children.length);
  251. }
  252.  
  253. setContentFromHTML(data, fb2doc, log) {
  254. this.children = [];
  255. this.appendContentFromHTML(data, fb2doc, log);
  256. }
  257.  
  258. appendContentFromHTML(data, fb2doc, log) {
  259. for (const node of data.childNodes) {
  260. let fe = FB2Element.fromHTML(node, true);
  261. if (fe) this.children.push(fe);
  262. }
  263. }
  264.  
  265. normalize() {
  266. const _normalize = function(list) {
  267. let done = true;
  268. let res_list = list.reduce((accum, cur_el) => {
  269. accum.push(cur_el);
  270. const tmp_ch = cur_el.children;
  271. cur_el.children = [];
  272. tmp_ch.forEach(el => {
  273. if (el instanceof FB2EmptyLine || el instanceof FB2Subtitle) {
  274. accum.push(el);
  275. const nm = cur_el.name;
  276. cur_el = new cur_el.constructor();
  277. if (!cur_el.name) cur_el.name = nm;
  278. accum.push(cur_el);
  279. done = false;
  280. } else {
  281. let cnt = 0;
  282. el.normalize().forEach(e => {
  283. if (!e.value && e.children.length === 1 && e.name === e.children[0].name) {
  284. e = e.children[0];
  285. }
  286. if (e !== el) done = false;
  287. if (e.hasValue()) cur_el.children.push(e);
  288. });
  289. }
  290. });
  291. return accum;
  292. }, []);
  293. return { list: res_list, done: done };
  294. }
  295. //--
  296. let result = _normalize([ this ]);
  297. while (!result.done) {
  298. result = _normalize(result.list);
  299. }
  300. return result.list;
  301. }
  302.  
  303. xml(doc) {
  304. const el = doc.createElement(this.name);
  305. if (this.value !== null) el.textContent = this.value;
  306. this.children.forEach(ch => el.appendChild(ch.xml(doc)));
  307. return el;
  308. }
  309. }
  310.  
  311. class FB2BlockElement extends FB2Element {
  312. normalize() {
  313. // Предварительная нормализация
  314. this.children = this.children.reduce((list, ch) => {
  315. ch.normalize().forEach(cc => list.push(cc));
  316. return list;
  317. }, []);
  318. // Удалить пустоты справа
  319. while (this.children.length) {
  320. const el = this.children[this.children.length - 1];
  321. if (el instanceof FB2Text) el.trimRight();
  322. if (!el.hasValue()) {
  323. this.children.pop();
  324. continue;
  325. }
  326. break;
  327. }
  328. // Удалить пустоты слева
  329. while (this.children.length) {
  330. const el = this.children[0];
  331. if (el instanceof FB2Text) el.trimLeft();
  332. if (!el.hasValue()) {
  333. this.children.shift();
  334. continue;
  335. }
  336. break;
  337. }
  338. // Окончательная нормализация
  339. return super.normalize();
  340. }
  341. }
  342.  
  343. /**
  344. * FB2 элемент верхнего уровня section
  345. */
  346. class FB2Chapter extends FB2Element {
  347. constructor(title) {
  348. super("section");
  349. this.title = title;
  350. }
  351.  
  352. normalize() {
  353. // Обернуть текстовые ноды в параграфы и удалить пустые элементы
  354. this.children = this.children.reduce((list, el) => {
  355. if (el instanceof FB2Text) {
  356. const pe = new FB2Paragraph();
  357. pe.children.push(el);
  358. el = pe;
  359. }
  360. el.normalize().forEach(el => {
  361. if (el.hasValue()) list.push(el);
  362. });
  363. return list;
  364. }, []);
  365. return [ this ];
  366. }
  367.  
  368. xml(doc) {
  369. const el = super.xml(doc);
  370. if (this.title) {
  371. const t_el = doc.createElement("title");
  372. const p_el = doc.createElement("p");
  373. p_el.textContent = this.title;
  374. t_el.appendChild(p_el);
  375. el.prepend(t_el);
  376. }
  377. return el;
  378. }
  379. }
  380.  
  381. /**
  382. * FB2 элемент верхнего уровня annotation
  383. */
  384. class FB2Annotation extends FB2Element {
  385. constructor() {
  386. super("annotation");
  387. }
  388.  
  389. normalize() {
  390. // Обернуть неформатированный текст, разделенный <br> в параграфы
  391. let lp = null;
  392. const newParagraph = list => {
  393. lp = new FB2Paragraph();
  394. list.push(lp);
  395. };
  396. this.children = this.children.reduce((list, el) => {
  397. if (el.name === "empty-line") {
  398. newParagraph(list);
  399. } else if (el instanceof FB2BlockElement) {
  400. list.push(el);
  401. lp = null;
  402. } else {
  403. if (!lp) newParagraph(list);
  404. lp.children.push(el);
  405. }
  406. return list;
  407. }, []);
  408. // Запустить собственную нормализацию дочерних элементов
  409. // чтобы предотвратить их дальнейшее всплытие
  410. this.children = this.children.reduce((list, el) => {
  411. el.normalize().forEach(el => {
  412. if (el.hasValue()) list.push(el);
  413. });
  414. return list;
  415. }, []);
  416. }
  417. }
  418.  
  419. class FB2Subtitle extends FB2BlockElement {
  420. constructor(value) {
  421. super("subtitle", value);
  422. }
  423. }
  424.  
  425. class FB2Paragraph extends FB2BlockElement {
  426. constructor(value) {
  427. super("p", value);
  428. }
  429. }
  430.  
  431. class FB2EmptyLine extends FB2Element {
  432. constructor() {
  433. super("empty-line");
  434. }
  435.  
  436. hasValue() {
  437. return true;
  438. }
  439. }
  440.  
  441. class FB2Text extends FB2Element {
  442. constructor(value) {
  443. super("text", value);
  444. }
  445.  
  446. trimLeft() {
  447. if (typeof(this.value) === "string") this.value = this.value.trimLeft() || null;
  448. if (!this.value) {
  449. while (this.children.length) {
  450. const first_child = this.children[0];
  451. if (first_child instanceof FB2Text) first_child.trimLeft();
  452. if (first_child.hasValue()) break;
  453. this.children.shift();
  454. }
  455. }
  456. }
  457.  
  458. trimRight() {
  459. while (this.children.length) {
  460. const last_child = this.children[this.children.length - 1];
  461. if (last_child instanceof FB2Text) last_child.trimRight();
  462. if (last_child.hasValue()) break;
  463. this.children.pop();
  464. }
  465. if (!this.children.length && typeof(this.value) === "string") {
  466. this.value = this.value.trimRight() || null;
  467. }
  468. }
  469.  
  470. xml(doc) {
  471. if (!this.value && this.children.length) {
  472. let fr = doc.createDocumentFragment();
  473. for (const ch of this.children) {
  474. fr.appendChild(ch.xml(doc));
  475. }
  476. return fr;
  477. }
  478. return doc.createTextNode(this.value);
  479. }
  480. }
  481.  
  482. class FB2Link extends FB2Element {
  483. constructor(href) {
  484. super("a");
  485. this.href = href;
  486. }
  487.  
  488. xml(doc) {
  489. const el = super.xml(doc);
  490. el.setAttribute("l:href", this.href);
  491. return el;
  492. }
  493. }
  494.  
  495. class FB2OrderedList extends FB2Element {
  496. constructor() {
  497. super("list");
  498. }
  499.  
  500. xml(doc) {
  501. const fr = doc.createDocumentFragment();
  502. let pos = 0;
  503. for (const ch of this.children) {
  504. const ch_el = ch.xml(doc);
  505. if (ch.hasValue()) {
  506. ++pos;
  507. ch_el.prepend(`${pos}. `);
  508. }
  509. fr.appendChild(ch_el);
  510. }
  511. return fr;
  512. }
  513. }
  514.  
  515. class FB2UnorderedList extends FB2Element {
  516. constructor() {
  517. super("list");
  518. }
  519.  
  520. xml(doc) {
  521. const fr = doc.createDocumentFragment();
  522. for (const ch of this.children) {
  523. const ch_el = ch.xml(doc);
  524. if (ch.hasValue()) ch_el.prepend("- ");
  525. fr.appendChild(ch_el);
  526. }
  527. return fr;
  528. }
  529. }
  530.  
  531. class FB2Author extends FB2Element {
  532. constructor(s) {
  533. super("author");
  534. const a = s.split(" ");
  535. switch (a.length) {
  536. case 1:
  537. this.nickName = s;
  538. break;
  539. case 2:
  540. this.firstName = a[0];
  541. this.lastName = a[1];
  542. break;
  543. default:
  544. this.firstName = a[0];
  545. this.middleName = a.slice(1, -1).join(" ");
  546. this.lastName = a[a.length - 1];
  547. break;
  548. }
  549. this.homePage = null;
  550. }
  551.  
  552. hasValue() {
  553. return (!!this.firstName || !!this.lastName || !!this.middleName);
  554. }
  555.  
  556. toString() {
  557. if (!this.firstName) return this.nickName;
  558. return [ this.firstName, this.middleName, this.lastName ].reduce((list, name) => {
  559. if (name) list.push(name);
  560. return list;
  561. }, []).join(" ");
  562. }
  563.  
  564. xml(doc) {
  565. let a_el = super.xml(doc);
  566. [
  567. [ "first-name", this.firstName ], [ "middle-name", this.middleName ],
  568. [ "last-name", this.lastName ], [ "home-page", this.homePage ],
  569. [ "nickname", this.nickName ]
  570. ].forEach(it => {
  571. if (it[1]) {
  572. const e = doc.createElement(it[0]);
  573. e.textContent = it[1];
  574. a_el.appendChild(e);
  575. }
  576. });
  577. return a_el;
  578. }
  579. }
  580.  
  581. class FB2Image extends FB2Element {
  582. constructor(value) {
  583. super("image");
  584. if (typeof(value) === "string") {
  585. this.url = value;
  586. } else {
  587. this.value = value;
  588. }
  589. }
  590.  
  591. async load(onprogress) {
  592. if (this.url) {
  593. const bin = await this._load(this.url, { responseType: "binary", onprogress: onprogress });
  594. this.type = bin.type;
  595. this.size = bin.size;
  596. return new Promise((resolve, reject) => {
  597. const reader = new FileReader();
  598. reader.addEventListener("loadend", (event) => resolve(event.target.result));
  599. reader.readAsDataURL(bin);
  600. }).then(base64str => {
  601. this.value = base64str.substr(base64str.indexOf(",") + 1);
  602. }).catch(err => {
  603. throw new Error("Ошибка загрузки изображения");
  604. });
  605. }
  606. }
  607.  
  608. hasValue() {
  609. return true;
  610. }
  611.  
  612. xml(doc) {
  613. if (this.value) {
  614. const el = doc.createElement(this.name);
  615. el.setAttribute("l:href", "#" + this.id);
  616. return el
  617. }
  618. const id = this.id || "изображение";
  619. return doc.createTextNode(`[ ${id} ]`);
  620. }
  621.  
  622. xmlBinary(doc) {
  623. const el = doc.createElement("binary");
  624. el.setAttribute("id", this.id);
  625. el.setAttribute("content-type", this.type);
  626. el.textContent = this.value
  627. return el;
  628. }
  629.  
  630. suffix() {
  631. switch (this.type) {
  632. case "image/png":
  633. return ".png";
  634. case "image/jpeg":
  635. return ".jpg";
  636. case "image/gif":
  637. return ".gif";
  638. case "image/webp":
  639. return ".webp";
  640. }
  641. return "";
  642. }
  643.  
  644. async _load(...args) {
  645. return FB2Loader.addJob(...args);
  646. }
  647. }
  648.  
  649. class FB2Loader {
  650. static async addJob(url, params) {
  651. params ||= {};
  652. const fp = {};
  653. fp.method = params.method || "GET";
  654. fp.credentials = "same-origin";
  655. fp.signal = this._getSignal();
  656. const resp = await fetch(url, fp);
  657. if (!resp.ok) throw new Error(`Сервер вернул ошибку (${resp.status})`);
  658. const reader = resp.body.getReader();
  659. const type = resp.headers.get("Content-Type");
  660. const total = +resp.headers.get("Content-Length");
  661. let loaded = 0;
  662. const chunks = [];
  663. const onprogress = (total && typeof(params.onprogress) === "function") ? params.onprogress : null;
  664. while (true) {
  665. const { done, value } = await reader.read();
  666. if (done) break;
  667. chunks.push(value);
  668. loaded += value.length;
  669. if (onprogress) onprogress(loaded, total);
  670. }
  671. switch (params.responseType) {
  672. case "binary":
  673. return new Blob(chunks, { type: type });
  674. default:
  675. {
  676. let pos = 0;
  677. const data = new Uint8Array(loaded);
  678. for (let ch of chunks) {
  679. data.set(ch, pos);
  680. pos += ch.length;
  681. }
  682. return (new TextDecoder("utf-8")).decode(data);
  683. }
  684. }
  685. }
  686.  
  687. static abortAll() {
  688. if (this._controller) {
  689. this._controller.abort();
  690. this._controller = null;
  691. }
  692. }
  693.  
  694. static _getSignal() {
  695. let controller = this._controller;
  696. if (!controller) this._controller = controller = new AbortController();
  697. return controller.signal;
  698. }
  699. }
  700.  
  701. class FB2Utils {
  702. static dateToAtom(date) {
  703. const m = date.getMonth() + 1;
  704. const d = date.getDate();
  705. return "" + date.getFullYear() + '-' + (m < 10 ? "0" : "") + m + "-" + (d < 10 ? "0" : "") + d;
  706. }
  707. }
  708.  
  709. class FB2UnknownNodeError extends Error {
  710. constructor(message) {
  711. super(message);
  712. this.name = "UnknownNodeError";
  713. }
  714. }
  715.