HTML2FB2Lib

This is a library for converting HTML to FB2.

当前为 2023-06-23 提交的版本,查看 最新版本

此脚本不应直接安装。它是供其他脚本使用的外部库,要使用该库请加入元指令 // @require https://update.cn-greasyfork.org/scripts/468831/1209382/HTML2FB2Lib.js

  1. // ==UserScript==
  2. // @name HTML2FB2Lib
  3. // @namespace 90h.yy.zz
  4. // @version 0.4.0
  5. // @author Ox90
  6. // @match http://*
  7. // @match https://*
  8. // @description This is a library for converting HTML to FB2.
  9. // @run-at document-start
  10. // @license MIT
  11. // ==/UserScript==
  12.  
  13. class FB2Parser {
  14. constructor() {
  15. this._stop = null;
  16. }
  17.  
  18. async parse(htmlNode, fromNode) {
  19. const that = this;
  20. async function _parse(node, from, fb2el, depth) {
  21. let n = from || node.firstChild;
  22. while (n) {
  23. const nn = that.startNode(n, depth);
  24. if (nn) {
  25. const f = that.processElement(FB2Element.fromHTML(nn, false), depth);
  26. if (f) {
  27. if (fb2el) fb2el.children.push(f);
  28. await _parse(nn, null, f, depth + 1);
  29. }
  30. that.endNode(nn, depth);
  31. }
  32. if (that._stop) break;
  33. n = n.nextSibling;
  34. }
  35. }
  36. await _parse(htmlNode, fromNode, null, 0);
  37. return this._stop;
  38. }
  39.  
  40. startNode(node, depth) {
  41. return node;
  42. }
  43.  
  44. processElement(fb2el, depth) {
  45. return fb2el;
  46. }
  47.  
  48. endNode(node, depth) {
  49. }
  50. }
  51.  
  52. class FB2Document {
  53. constructor() {
  54. this.binaries = [];
  55. this.bookAuthors = [];
  56. this.annotation = null;
  57. this.genres = [];
  58. this.chapters = [];
  59. this.xmldoc = null;
  60. }
  61.  
  62. toString() {
  63. this._ensureXMLDocument();
  64. const root = this.xmldoc.documentElement;
  65. this._markBinaries();
  66. root.appendChild(this._makeDescriptionElement());
  67. root.appendChild(this._makeBodyElement());
  68. this._makeBinaryElements().forEach(el => root.appendChild(el));
  69. const res = (new XMLSerializer()).serializeToString(this.xmldoc);
  70. this.xmldoc = null;
  71. return res;
  72. }
  73.  
  74. createElement(name) {
  75. this._ensureXMLDocument();
  76. return this.xmldoc.createElementNS(this.xmldoc.documentElement.namespaceURI, name);
  77. }
  78.  
  79. createTextNode(value) {
  80. this._ensureXMLDocument();
  81. return this.xmldoc.createTextNode(value);
  82. }
  83.  
  84. createDocumentFragment() {
  85. this._ensureXMLDocument();
  86. return this.xmldoc.createDocumentFragment();
  87. }
  88.  
  89. _ensureXMLDocument() {
  90. if (!this.xmldoc) {
  91. this.xmldoc = new DOMParser().parseFromString(
  92. '<?xml version="1.0" encoding="UTF-8"?><FictionBook xmlns="http://www.gribuser.ru/xml/fictionbook/2.0"/>',
  93. "application/xml"
  94. );
  95. this.xmldoc.documentElement.setAttribute("xmlns:l", "http://www.w3.org/1999/xlink");
  96. }
  97. }
  98.  
  99. _makeDescriptionElement() {
  100. const desc = this.createElement("description");
  101. // title-info
  102. const t_info = this.createElement("title-info");
  103. desc.appendChild(t_info);
  104. this.genres.forEach(g => t_info.appendChild(g.xml(this)));
  105. (this.bookAuthors.length ? this.bookAuthors : [ new FB2Author("Неизвестный автор") ]).forEach(a => {
  106. t_info.appendChild(a.xml(this));
  107. });
  108. t_info.appendChild((new FB2Element("book-title", this.bookTitle)).xml(this));
  109. if (this.annotation) t_info.appendChild(this.annotation.xml(this));
  110. if (this.keywords) t_info.appendChild(this.keywords.xml(this));
  111. if (this.bookDate) {
  112. const el = this.createElement("date");
  113. el.setAttribute("value", FB2Utils.dateToAtom(this.bookDate));
  114. el.textContent = this.bookDate.getFullYear();
  115. t_info.appendChild(el);
  116. }
  117. if (this.coverpage) {
  118. const el = this.createElement("coverpage");
  119. (Array.isArray(this.coverpage) ? this.coverpage : [ this.coverpage ]).forEach(img => {
  120. el.appendChild(img.xml(this));
  121. });
  122. t_info.appendChild(el);
  123. }
  124. const lang = this.createElement("lang");
  125. lang.textContent = "ru";
  126. t_info.appendChild(lang);
  127. if (this.sequence) {
  128. const el = this.createElement("sequence");
  129. el.setAttribute("name", this.sequence.name);
  130. if (this.sequence.number) el.setAttribute("number", this.sequence.number);
  131. t_info.appendChild(el);
  132. }
  133. // document-info
  134. const d_info = this.createElement("document-info");
  135. desc.appendChild(d_info);
  136. d_info.appendChild((new FB2Author("Ox90")).xml(this));
  137. if (this.programName) d_info.appendChild((new FB2Element("program-used", this.programName)).xml(this));
  138. d_info.appendChild((() => {
  139. const f_time = new Date();
  140. const el = this.createElement("date");
  141. el.setAttribute("value", FB2Utils.dateToAtom(f_time));
  142. el.textContent = f_time.toUTCString();
  143. return el;
  144. })());
  145. if (this.sourceURL) {
  146. d_info.appendChild((new FB2Element("src-url", this.sourceURL)).xml(this));
  147. }
  148. d_info.appendChild((new FB2Element("id", this._genBookId())).xml(this));
  149. d_info.appendChild((new FB2Element("version", "1.0")).xml(this));
  150. return desc;
  151. }
  152.  
  153. _makeBodyElement() {
  154. const body = this.createElement("body");
  155. if (this.bookTitle || this.bookAuthors.length) {
  156. const title = this.createElement("title");
  157. body.appendChild(title);
  158. if (this.bookAuthors.length) title.appendChild((new FB2Paragraph(this.bookAuthors.join(", "))).xml(this));
  159. if (this.bookTitle) title.appendChild((new FB2Paragraph(this.bookTitle)).xml(this));
  160. }
  161. this.chapters.forEach(ch => body.appendChild(ch.xml(this)));
  162. return body;
  163. }
  164.  
  165. _markBinaries() {
  166. let idx = 0;
  167. this.binaries.forEach(img => {
  168. if (!img.id) img.id = "image" + (++idx) + img.suffix();
  169. });
  170. }
  171.  
  172. _makeBinaryElements() {
  173. return this.binaries.reduce((list, img) => {
  174. if (img.value) list.push(img.xmlBinary(this));
  175. return list;
  176. }, []);
  177. }
  178.  
  179. _genBookId() {
  180. let str = this.sourceURL || this.bookTitle || "";
  181. let hash = 0;
  182. const slen = str.length;
  183. for (let i = 0; i < slen; ++i) {
  184. const ch = str.charCodeAt(i);
  185. hash = ((hash << 5) - hash) + ch;
  186. hash = hash & hash; // Convert to 32bit integer
  187. }
  188. return this.idPrefix || "h2f2l_" + Math.abs(hash).toString() + (hash > 0 ? "1" : "");
  189. }
  190. }
  191.  
  192. class FB2Element {
  193. constructor(name, value) {
  194. this.name = name;
  195. this.value = value !== undefined ? value : null;
  196. this.children = [];
  197. }
  198.  
  199. static fromHTML(node, recursive) {
  200. let fb2el = null;
  201. const names = new Map([
  202. [ "U", "emphasis" ], [ "EM", "emphasis" ], [ "EMPHASIS", "emphasis" ], [ "I", "emphasis" ],
  203. [ "S", "strikethrough" ], [ "DEL", "strikethrough" ], [ "STRIKE", "strikethrough" ],
  204. [ "STRONG", "strong" ], [ "BLOCKQUOTE", "cite" ],
  205. [ "#comment", null ]
  206. ]);
  207. const node_name = node.nodeName;
  208. if (names.has(node_name)) {
  209. const name = names.get(node_name);
  210. if (!name) return null;
  211. fb2el = new FB2Element(names.get(node_name));
  212. } else {
  213. switch (node_name) {
  214. case "#text":
  215. return new FB2Text(node.textContent);
  216. case "SPAN":
  217. fb2el = new FB2Text();
  218. break;
  219. case "P":
  220. case "LI":
  221. fb2el = new FB2Paragraph();
  222. break;
  223. case "SUBTITLE":
  224. fb2el = new FB2Subtitle();
  225. break;
  226. case "A":
  227. fb2el = new FB2Link(node.href || node.getAttribute("l:href"));
  228. break;
  229. case "OL":
  230. fb2el = new FB2OrderedList();
  231. break;
  232. case "UL":
  233. fb2el = new FB2UnorderedList();
  234. break;
  235. case "BR":
  236. return new FB2EmptyLine();
  237. case "HR":
  238. return new FB2Paragraph("---");
  239. case "IMG":
  240. return new FB2Image(node.src);
  241. default:
  242. throw new FB2UnknownNodeError("Неизвестный HTML блок: " + node.nodeName);
  243. }
  244. }
  245. if (recursive) fb2el.appendContentFromHTML(node);
  246. return fb2el;
  247. }
  248.  
  249. hasValue() {
  250. return ((this.value !== undefined && this.value !== null) || !!this.children.length);
  251. }
  252.  
  253. setContentFromHTML(data, fb2doc, log) {
  254. this.children = [];
  255. this.appendContentFromHTML(data, fb2doc, log);
  256. }
  257.  
  258. appendContentFromHTML(data, fb2doc, log) {
  259. for (const node of data.childNodes) {
  260. let fe = FB2Element.fromHTML(node, true);
  261. if (fe) this.children.push(fe);
  262. }
  263. }
  264.  
  265. normalize() {
  266. const _normalize = function(list) {
  267. let done = true;
  268. let res_list = list.reduce((accum, cur_el) => {
  269. accum.push(cur_el);
  270. const tmp_ch = cur_el.children;
  271. cur_el.children = [];
  272. tmp_ch.forEach(el => {
  273. if (el instanceof FB2EmptyLine || el instanceof FB2Subtitle) {
  274. accum.push(el);
  275. const nm = cur_el.name;
  276. cur_el = new cur_el.constructor();
  277. if (!cur_el.name) cur_el.name = nm;
  278. accum.push(cur_el);
  279. done = false;
  280. } else {
  281. let cnt = 0;
  282. el.normalize().forEach(e => {
  283. if (!e.value && e.children.length === 1 && e.name === e.children[0].name) {
  284. e = e.children[0];
  285. }
  286. if (e !== el) done = false;
  287. if (e.hasValue()) cur_el.children.push(e);
  288. });
  289. }
  290. });
  291. return accum;
  292. }, []);
  293. return { list: res_list, done: done };
  294. }
  295. //--
  296. let result = _normalize([ this ]);
  297. while (!result.done) {
  298. result = _normalize(result.list);
  299. }
  300. return result.list;
  301. }
  302.  
  303. xml(doc) {
  304. const el = doc.createElement(this.name);
  305. if (this.value !== null) el.textContent = this.value;
  306. this.children.forEach(ch => el.appendChild(ch.xml(doc)));
  307. return el;
  308. }
  309. }
  310.  
  311. class FB2BlockElement extends FB2Element {
  312. normalize() {
  313. // Предварительная нормализация
  314. this.children = this.children.reduce((list, ch) => {
  315. ch.normalize().forEach(cc => list.push(cc));
  316. return list;
  317. }, []);
  318. // Удалить пустоты справа
  319. while (this.children.length) {
  320. const el = this.children[this.children.length - 1];
  321. if (el instanceof FB2Text) el.trimRight();
  322. if (!el.hasValue()) {
  323. this.children.pop();
  324. continue;
  325. }
  326. break;
  327. }
  328. // Удалить пустоты слева
  329. while (this.children.length) {
  330. const el = this.children[0];
  331. if (el instanceof FB2Text) el.trimLeft();
  332. if (!el.hasValue()) {
  333. this.children.shift();
  334. continue;
  335. }
  336. break;
  337. }
  338. // Окончательная нормализация
  339. return super.normalize();
  340. }
  341. }
  342.  
  343. /**
  344. * FB2 элемент верхнего уровня section
  345. */
  346. class FB2Chapter extends FB2Element {
  347. constructor(title) {
  348. super("section");
  349. this.title = title;
  350. }
  351.  
  352. normalize() {
  353. // Обернуть текстовые ноды в параграфы и удалить пустые элементы
  354. this.children = this.children.reduce((list, el) => {
  355. if (el instanceof FB2Text) {
  356. const pe = new FB2Paragraph();
  357. pe.children.push(el);
  358. el = pe;
  359. }
  360. el.normalize().forEach(el => {
  361. if (el.hasValue()) list.push(el);
  362. });
  363. return list;
  364. }, []);
  365. return [ this ];
  366. }
  367.  
  368. xml(doc) {
  369. const el = super.xml(doc);
  370. if (this.title) {
  371. const t_el = doc.createElement("title");
  372. const p_el = doc.createElement("p");
  373. p_el.textContent = this.title;
  374. t_el.appendChild(p_el);
  375. el.prepend(t_el);
  376. }
  377. return el;
  378. }
  379. }
  380.  
  381. /**
  382. * FB2 элемент верхнего уровня annotation
  383. */
  384. class FB2Annotation extends FB2Element {
  385. constructor() {
  386. super("annotation");
  387. }
  388.  
  389. normalize() {
  390. // Обернуть неформатированный текст, разделенный <br> в параграфы
  391. let lp = null;
  392. const newParagraph = list => {
  393. lp = new FB2Paragraph();
  394. list.push(lp);
  395. };
  396. this.children = this.children.reduce((list, el) => {
  397. if (el.name === "empty-line") {
  398. newParagraph(list);
  399. } else if (el instanceof FB2BlockElement) {
  400. list.push(el);
  401. lp = null;
  402. } else {
  403. if (!lp) newParagraph(list);
  404. lp.children.push(el);
  405. }
  406. return list;
  407. }, []);
  408. // Запустить собственную нормализацию дочерних элементов
  409. // чтобы предотвратить их дальнейшее всплытие
  410. this.children = this.children.reduce((list, el) => {
  411. el.normalize().forEach(el => {
  412. if (el.hasValue()) list.push(el);
  413. });
  414. return list;
  415. }, []);
  416. }
  417. }
  418.  
  419. class FB2Subtitle extends FB2BlockElement {
  420. constructor(value) {
  421. super("subtitle", value);
  422. }
  423. }
  424.  
  425. class FB2Paragraph extends FB2BlockElement {
  426. constructor(value) {
  427. super("p", value);
  428. }
  429. }
  430.  
  431. class FB2EmptyLine extends FB2Element {
  432. constructor() {
  433. super("empty-line");
  434. }
  435.  
  436. hasValue() {
  437. return true;
  438. }
  439. }
  440.  
  441. class FB2Text extends FB2Element {
  442. constructor(value) {
  443. super("text", value);
  444. }
  445.  
  446. trimLeft() {
  447. if (typeof(this.value) === "string") this.value = this.value.trimLeft() || null;
  448. if (!this.value) {
  449. while (this.children.length) {
  450. const first_child = this.children[0];
  451. if (first_child instanceof FB2Text) first_child.trimLeft();
  452. if (first_child.hasValue()) break;
  453. this.children.shift();
  454. }
  455. }
  456. }
  457.  
  458. trimRight() {
  459. while (this.children.length) {
  460. const last_child = this.children[this.children.length - 1];
  461. if (last_child instanceof FB2Text) last_child.trimRight();
  462. if (last_child.hasValue()) break;
  463. this.children.pop();
  464. }
  465. if (!this.children.length && typeof(this.value) === "string") {
  466. this.value = this.value.trimRight() || null;
  467. }
  468. }
  469.  
  470. xml(doc) {
  471. if (!this.value && this.children.length) {
  472. let fr = doc.createDocumentFragment();
  473. for (const ch of this.children) {
  474. fr.appendChild(ch.xml(doc));
  475. }
  476. return fr;
  477. }
  478. return doc.createTextNode(this.value);
  479. }
  480. }
  481.  
  482. class FB2Link extends FB2Element {
  483. constructor(href) {
  484. super("a");
  485. this.href = href;
  486. }
  487.  
  488. xml(doc) {
  489. const el = super.xml(doc);
  490. el.setAttribute("l:href", this.href);
  491. return el;
  492. }
  493. }
  494.  
  495. class FB2OrderedList extends FB2Element {
  496. constructor() {
  497. super("list");
  498. }
  499.  
  500. xml(doc) {
  501. const fr = doc.createDocumentFragment();
  502. let pos = 0;
  503. for (const ch of this.children) {
  504. const ch_el = ch.xml(doc);
  505. if (ch.hasValue()) {
  506. ++pos;
  507. ch_el.prepend(`${pos}. `);
  508. }
  509. fr.appendChild(ch_el);
  510. }
  511. return fr;
  512. }
  513. }
  514.  
  515. class FB2UnorderedList extends FB2Element {
  516. constructor() {
  517. super("list");
  518. }
  519.  
  520. xml(doc) {
  521. const fr = doc.createDocumentFragment();
  522. for (const ch of this.children) {
  523. if (ch.hasValue()) fr.appendChild(ch.xml(doc));
  524. }
  525. return fr;
  526. }
  527. }
  528.  
  529. class FB2Author extends FB2Element {
  530. constructor(s) {
  531. super("author");
  532. const a = s.split(" ");
  533. switch (a.length) {
  534. case 1:
  535. this.nickName = s;
  536. break;
  537. case 2:
  538. this.firstName = a[0];
  539. this.lastName = a[1];
  540. break;
  541. default:
  542. this.firstName = a[0];
  543. this.middleName = a.slice(1, -1).join(" ");
  544. this.lastName = a[a.length - 1];
  545. break;
  546. }
  547. this.homePage = null;
  548. }
  549.  
  550. hasValue() {
  551. return (!!this.firstName || !!this.lastName || !!this.middleName);
  552. }
  553.  
  554. toString() {
  555. if (!this.firstName) return this.nickName;
  556. return [ this.firstName, this.middleName, this.lastName ].reduce((list, name) => {
  557. if (name) list.push(name);
  558. return list;
  559. }, []).join(" ");
  560. }
  561.  
  562. xml(doc) {
  563. let a_el = super.xml(doc);
  564. [
  565. [ "first-name", this.firstName ], [ "middle-name", this.middleName ],
  566. [ "last-name", this.lastName ], [ "home-page", this.homePage ],
  567. [ "nickname", this.nickName ]
  568. ].forEach(it => {
  569. if (it[1]) {
  570. const e = doc.createElement(it[0]);
  571. e.textContent = it[1];
  572. a_el.appendChild(e);
  573. }
  574. });
  575. return a_el;
  576. }
  577. }
  578.  
  579. class FB2Image extends FB2Element {
  580. constructor(value) {
  581. super("image");
  582. if (typeof(value) === "string") {
  583. this.url = value;
  584. } else {
  585. this.value = value;
  586. }
  587. }
  588.  
  589. async load(onprogress) {
  590. if (this.url) {
  591. const bin = await this._load(this.url, { responseType: "binary", onprogress: onprogress });
  592. this.type = bin.type;
  593. this.size = bin.size;
  594. return new Promise((resolve, reject) => {
  595. const reader = new FileReader();
  596. reader.addEventListener("loadend", (event) => resolve(event.target.result));
  597. reader.readAsDataURL(bin);
  598. }).then(base64str => {
  599. this.value = base64str.substr(base64str.indexOf(",") + 1);
  600. }).catch(err => {
  601. throw new Error("Ошибка загрузки изображения");
  602. });
  603. }
  604. }
  605.  
  606. hasValue() {
  607. return true;
  608. }
  609.  
  610. xml(doc) {
  611. if (this.value) {
  612. const el = doc.createElement(this.name);
  613. el.setAttribute("l:href", "#" + this.id);
  614. return el
  615. }
  616. const id = this.id || "изображение";
  617. return doc.createTextNode(`[ ${id} ]`);
  618. }
  619.  
  620. xmlBinary(doc) {
  621. const el = doc.createElement("binary");
  622. el.setAttribute("id", this.id);
  623. el.setAttribute("content-type", this.type);
  624. el.textContent = this.value
  625. return el;
  626. }
  627.  
  628. suffix() {
  629. switch (this.type) {
  630. case "image/png":
  631. return ".png";
  632. case "image/jpeg":
  633. return ".jpg";
  634. case "image/gif":
  635. return ".gif";
  636. case "image/webp":
  637. return ".webp";
  638. }
  639. return "";
  640. }
  641.  
  642. async _load(...args) {
  643. return FB2Loader.addJob(...args);
  644. }
  645. }
  646.  
  647. class FB2Loader {
  648. static async addJob(url, params) {
  649. params ||= {};
  650. const fp = {};
  651. fp.method = params.method || "GET";
  652. fp.credentials = "same-origin";
  653. fp.signal = this._getSignal();
  654. const resp = await fetch(url, fp);
  655. if (!resp.ok) throw new Error(`Сервер вернул ошибку (${resp.status})`);
  656. const reader = resp.body.getReader();
  657. const type = resp.headers.get("Content-Type");
  658. const total = +resp.headers.get("Content-Length");
  659. let loaded = 0;
  660. const chunks = [];
  661. const onprogress = (total && typeof(params.onprogress) === "function") ? params.onprogress : null;
  662. while (true) {
  663. const { done, value } = await reader.read();
  664. if (done) break;
  665. chunks.push(value);
  666. loaded += value.length;
  667. if (onprogress) onprogress(loaded, total);
  668. }
  669. switch (params.responseType) {
  670. case "binary":
  671. return new Blob(chunks, { type: type });
  672. default:
  673. {
  674. let pos = 0;
  675. const data = new Uint8Array(loaded);
  676. for (let ch of chunks) {
  677. data.set(ch, pos);
  678. pos += ch.length;
  679. }
  680. return (new TextDecoder("utf-8")).decode(data);
  681. }
  682. }
  683. }
  684.  
  685. static abortAll() {
  686. if (this._controller) {
  687. this._controller.abort();
  688. this._controller = null;
  689. }
  690. }
  691.  
  692. static _getSignal() {
  693. let controller = this._controller;
  694. if (!controller) this._controller = controller = new AbortController();
  695. return controller.signal;
  696. }
  697. }
  698.  
  699. class FB2Utils {
  700. static dateToAtom(date) {
  701. const m = date.getMonth() + 1;
  702. const d = date.getDate();
  703. return "" + date.getFullYear() + '-' + (m < 10 ? "0" : "") + m + "-" + (d < 10 ? "0" : "") + d;
  704. }
  705. }
  706.  
  707. class FB2UnknownNodeError extends Error {
  708. constructor(message) {
  709. super(message);
  710. this.name = "UnknownNodeError";
  711. }
  712. }
  713.