HTML2FB2Lib

This is a library for converting HTML to FB2.

当前为 2023-06-20 提交的版本,查看 最新版本

此脚本不应直接安装。它是供其他脚本使用的外部库,要使用该库请加入元指令 // @require https://update.cn-greasyfork.org/scripts/468831/1208304/HTML2FB2Lib.js

  1. // ==UserScript==
  2. // @name HTML2FB2Lib
  3. // @namespace 90h.yy.zz
  4. // @version 0.3.0
  5. // @author Ox90
  6. // @match http://*
  7. // @match https://*
  8. // @description This is a library for converting HTML to FB2.
  9. // @run-at document-start
  10. // @license MIT
  11. // ==/UserScript==
  12.  
  13. class FB2Parser {
  14. constructor() {
  15. this._stop = null;
  16. }
  17.  
  18. async parse(htmlNode, fromNode) {
  19. const that = this;
  20. async function _parse(node, from, fb2el, depth) {
  21. let n = from || node.firstChild;
  22. while (n) {
  23. const nn = that.startNode(n, depth);
  24. if (nn) {
  25. const f = that.processElement(FB2Element.fromHTML(nn, false), depth);
  26. if (f) {
  27. if (fb2el) fb2el.children.push(f);
  28. await _parse(nn, null, f, depth + 1);
  29. }
  30. that.endNode(nn, depth);
  31. }
  32. if (that._stop) break;
  33. n = n.nextSibling;
  34. }
  35. }
  36. await _parse(htmlNode, fromNode, null, 0);
  37. return this._stop;
  38. }
  39.  
  40. startNode(node, depth) {
  41. return node;
  42. }
  43.  
  44. processElement(fb2el, depth) {
  45. return fb2el;
  46. }
  47.  
  48. endNode(node, depth) {
  49. }
  50. }
  51.  
  52. class FB2Document {
  53. constructor() {
  54. this.binaries = [];
  55. this.bookAuthors = [];
  56. this.annotation = null;
  57. this.genres = [];
  58. this.chapters = [];
  59. this.xmldoc = null;
  60. }
  61.  
  62. toString() {
  63. this._ensureXMLDocument();
  64. const root = this.xmldoc.documentElement;
  65. this._markBinaries();
  66. root.appendChild(this._makeDescriptionElement());
  67. root.appendChild(this._makeBodyElement());
  68. this._makeBinaryElements().forEach(el => root.appendChild(el));
  69. const res = (new XMLSerializer()).serializeToString(this.xmldoc);
  70. this.xmldoc = null;
  71. return res;
  72. }
  73.  
  74. createElement(name) {
  75. this._ensureXMLDocument();
  76. return this.xmldoc.createElementNS(this.xmldoc.documentElement.namespaceURI, name);
  77. }
  78.  
  79. createTextNode(value) {
  80. this._ensureXMLDocument();
  81. return this.xmldoc.createTextNode(value);
  82. }
  83.  
  84. createDocumentFragment() {
  85. this._ensureXMLDocument();
  86. return this.xmldoc.createDocumentFragment();
  87. }
  88.  
  89. _ensureXMLDocument() {
  90. if (!this.xmldoc) {
  91. this.xmldoc = new DOMParser().parseFromString(
  92. '<?xml version="1.0" encoding="UTF-8"?><FictionBook xmlns="http://www.gribuser.ru/xml/fictionbook/2.0"/>',
  93. "application/xml"
  94. );
  95. this.xmldoc.documentElement.setAttribute("xmlns:l", "http://www.w3.org/1999/xlink");
  96. }
  97. }
  98.  
  99. _makeDescriptionElement() {
  100. const desc = this.createElement("description");
  101. // title-info
  102. const t_info = this.createElement("title-info");
  103. desc.appendChild(t_info);
  104. this.genres.forEach(g => t_info.appendChild(g.xml(this)));
  105. (this.bookAuthors.length ? this.bookAuthors : [ new FB2Author("Неизвестный автор") ]).forEach(a => {
  106. t_info.appendChild(a.xml(this));
  107. });
  108. t_info.appendChild((new FB2Element("book-title", this.bookTitle)).xml(this));
  109. t_info.appendChild(this.annotation.xml(this));
  110. if (this.keywords) t_info.appendChild(this.keywords.xml(this));
  111. if (this.bookDate) {
  112. const el = this.createElement("date");
  113. el.setAttribute("value", FB2Utils.dateToAtom(this.bookDate));
  114. el.textContent = this.bookDate.getFullYear();
  115. t_info.appendChild(el);
  116. }
  117. if (this.coverpage) {
  118. const el = this.createElement("coverpage");
  119. el.appendChild(this.coverpage.xml(this));
  120. t_info.appendChild(el);
  121. }
  122. const lang = this.createElement("lang");
  123. lang.textContent = "ru";
  124. t_info.appendChild(lang);
  125. if (this.sequence) {
  126. const el = this.createElement("sequence");
  127. el.setAttribute("name", this.sequence.name);
  128. if (this.sequence.number) el.setAttribute("number", this.sequence.number);
  129. t_info.appendChild(el);
  130. }
  131. // document-info
  132. const d_info = this.createElement("document-info");
  133. desc.appendChild(d_info);
  134. d_info.appendChild((new FB2Author("Ox90")).xml(this));
  135. if (this.programName) d_info.appendChild((new FB2Element("program-used", this.programName)).xml(this));
  136. d_info.appendChild((() => {
  137. const f_time = new Date();
  138. const el = this.createElement("date");
  139. el.setAttribute("value", FB2Utils.dateToAtom(f_time));
  140. el.textContent = f_time.toUTCString();
  141. return el;
  142. })());
  143. if (this.sourceURL) {
  144. d_info.appendChild((new FB2Element("src-url", this.sourceURL)).xml(this));
  145. }
  146. d_info.appendChild((new FB2Element("id", this._genBookId())).xml(this));
  147. d_info.appendChild((new FB2Element("version", "1.0")).xml(this));
  148. return desc;
  149. }
  150.  
  151. _makeBodyElement() {
  152. const body = this.createElement("body");
  153. const title = this.createElement("title");
  154. body.appendChild(title);
  155. if (this.bookAuthors.length) title.appendChild((new FB2Paragraph(this.bookAuthors.join(", "))).xml(this));
  156. title.appendChild((new FB2Paragraph(this.bookTitle)).xml(this));
  157. this.chapters.forEach(ch => body.appendChild(ch.xml(this)));
  158. return body;
  159. }
  160.  
  161. _markBinaries() {
  162. let idx = 0;
  163. this.binaries.forEach(img => {
  164. if (!img.id) img.id = "image" + (++idx) + img.suffix();
  165. });
  166. }
  167.  
  168. _makeBinaryElements() {
  169. return this.binaries.reduce((list, img) => {
  170. if (img.value) list.push(img.xmlBinary(this));
  171. return list;
  172. }, []);
  173. }
  174.  
  175. _genBookId() {
  176. let str = this.sourceURL || this.bookTitle;
  177. let hash = 0;
  178. const slen = str.length;
  179. for (let i = 0; i < slen; ++i) {
  180. const ch = str.charCodeAt(i);
  181. hash = ((hash << 5) - hash) + ch;
  182. hash = hash & hash; // Convert to 32bit integer
  183. }
  184. return "rbe_" + Math.abs(hash).toString() + (hash > 0 ? "1" : "");
  185. }
  186. }
  187.  
  188. class FB2Element {
  189. constructor(name, value) {
  190. this.name = name;
  191. this.value = value !== undefined ? value : null;
  192. this.children = [];
  193. }
  194.  
  195. static fromHTML(node, recursive) {
  196. let fb2el = null;
  197. const names = new Map([
  198. [ "U", "emphasis" ], [ "EM", "emphasis" ], [ "EMPHASIS", "emphasis" ], [ "I", "emphasis" ],
  199. [ "S", "strike" ], [ "DEL", "strike" ], [ "STRIKE", "strike" ],
  200. [ "STRONG", "strong" ], [ "BLOCKQUOTE", "cite" ],
  201. [ "#comment", null ]
  202. ]);
  203. const node_name = node.nodeName;
  204. if (names.has(node_name)) {
  205. const name = names.get(node_name);
  206. if (!name) return null;
  207. fb2el = new FB2Element(names.get(node_name));
  208. } else {
  209. switch (node_name) {
  210. case "#text":
  211. return new FB2Text(node.textContent);
  212. case "SPAN":
  213. fb2el = new FB2Text();
  214. break;
  215. case "P":
  216. case "LI":
  217. fb2el = new FB2Paragraph();
  218. break;
  219. case "SUBTITLE":
  220. fb2el = new FB2Subtitle();
  221. break;
  222. case "A":
  223. fb2el = new FB2Link(node.href || node.getAttribute("l:href"));
  224. break;
  225. case "OL":
  226. fb2el = new FB2OrderedList();
  227. break;
  228. case "BR":
  229. return new FB2EmptyLine();
  230. case "HR":
  231. return new FB2Paragraph("---");
  232. case "IMG":
  233. return new FB2Image(node.src);
  234. default:
  235. throw new FB2UnknownNodeError("Неизвестный HTML блок: " + node.nodeName);
  236. }
  237. }
  238. if (recursive) fb2el.appendContentFromHTML(node);
  239. return fb2el;
  240. }
  241.  
  242. hasValue() {
  243. return ((this.value !== undefined && this.value !== null) || !!this.children.length);
  244. }
  245.  
  246. setContentFromHTML(data, fb2doc, log) {
  247. this.children = [];
  248. this.appendContentFromHTML(data, fb2doc, log);
  249. }
  250.  
  251. appendContentFromHTML(data, fb2doc, log) {
  252. for (const node of data.childNodes) {
  253. let fe = FB2Element.fromHTML(node, true);
  254. if (fe) this.children.push(fe);
  255. }
  256. }
  257.  
  258. normalize() {
  259. const res_list = [ this ];
  260. let cur_el = this;
  261. const children = this.children;
  262. this.children = [];
  263. children.forEach(el => {
  264. if (el instanceof FB2EmptyLine || el instanceof FB2Subtitle) {
  265. res_list.push(el);
  266. cur_el = new this.constructor();
  267. res_list.push(cur_el);
  268. } else {
  269. el.normalize().forEach(el => {
  270. if (!el.value && el.children.length === 1 && el.name === el.children[0].name) {
  271. el = el.children[0];
  272. }
  273. if (el.hasValue()) cur_el.children.push(el);
  274. });
  275. }
  276. });
  277. return res_list;
  278. }
  279.  
  280. xml(doc) {
  281. const el = doc.createElement(this.name);
  282. if (this.value !== null) el.textContent = this.value;
  283. this.children.forEach(ch => el.appendChild(ch.xml(doc)));
  284. return el;
  285. }
  286. }
  287.  
  288. class FB2BlockElement extends FB2Element {
  289. normalize() {
  290. // Предварительная нормализация
  291. this.children = this.children.reduce((list, ch) => {
  292. ch.normalize().forEach(cc => list.push(cc));
  293. return list;
  294. }, []);
  295. // Удалить пустоты в конце блока
  296. while (this.children.length) {
  297. const el = this.children[this.children.length - 1];
  298. if (el.name === "text" && typeof(el.value) === "string") {
  299. el.value = el.value.trimEnd() || null;
  300. }
  301. if (!el.hasValue()) {
  302. this.children.pop();
  303. continue;
  304. }
  305. break;
  306. }
  307. // Удалить пустоты в начале блока
  308. while (this.children.length) {
  309. const el = this.children[0];
  310. if (el.name === "text" && typeof(el.value) === "string") {
  311. el.value = el.value.trimStart() || null;
  312. }
  313. if (!el.hasValue()) {
  314. this.children.shift();
  315. continue;
  316. }
  317. break;
  318. }
  319. // Окончательная нормализация
  320. return super.normalize();
  321. }
  322. }
  323.  
  324. /**
  325. * FB2 элемент верхнего уровня section
  326. */
  327. class FB2Chapter extends FB2Element {
  328. constructor(title) {
  329. super("section");
  330. this.title = title;
  331. }
  332.  
  333. normalize() {
  334. // Обернуть текстовые ноды в параграфы и удалить пустые элементы
  335. this.children = this.children.reduce((list, el) => {
  336. if (el instanceof FB2Text) {
  337. const pe = new FB2Paragraph();
  338. pe.children.push(el);
  339. el = pe;
  340. }
  341. el.normalize().forEach(el => {
  342. if (el.hasValue()) list.push(el);
  343. });
  344. return list;
  345. }, []);
  346. return [ this ];
  347. }
  348.  
  349. xml(doc) {
  350. const el = super.xml(doc);
  351. if (this.title) {
  352. const t_el = doc.createElement("title");
  353. const p_el = doc.createElement("p");
  354. p_el.textContent = this.title;
  355. t_el.appendChild(p_el);
  356. el.prepend(t_el);
  357. }
  358. return el;
  359. }
  360. }
  361.  
  362. /**
  363. * FB2 элемент верхнего уровня annotation
  364. */
  365. class FB2Annotation extends FB2Element {
  366. constructor() {
  367. super("annotation");
  368. }
  369.  
  370. normalize() {
  371. // Обернуть неформатированный текст, разделенный <br> в параграфы
  372. let lp = null;
  373. const newParagraph = list => {
  374. lp = new FB2Paragraph();
  375. list.push(lp);
  376. };
  377. this.children = this.children.reduce((list, el) => {
  378. if (el.name === "empty-line") {
  379. newParagraph(list);
  380. } else if (el instanceof FB2BlockElement) {
  381. list.push(el);
  382. lp = null;
  383. } else {
  384. if (!lp) newParagraph(list);
  385. lp.children.push(el);
  386. }
  387. return list;
  388. }, []);
  389. // Запустить собственную нормализацию дочерних элементов
  390. // чтобы предотвратить их дальнейшее всплытие
  391. this.children = this.children.reduce((list, el) => {
  392. el.normalize().forEach(el => {
  393. if (el.hasValue()) list.push(el);
  394. });
  395. return list;
  396. }, []);
  397. }
  398. }
  399.  
  400. class FB2Subtitle extends FB2BlockElement {
  401. constructor(value) {
  402. super("subtitle", value);
  403. }
  404. }
  405.  
  406. class FB2Paragraph extends FB2BlockElement {
  407. constructor(value) {
  408. super("p", value);
  409. }
  410. }
  411.  
  412. class FB2EmptyLine extends FB2Element {
  413. constructor() {
  414. super("empty-line");
  415. }
  416.  
  417. hasValue() {
  418. return true;
  419. }
  420. }
  421.  
  422. class FB2Text extends FB2Element {
  423. constructor(value) {
  424. super("text", value);
  425. }
  426.  
  427. xml(doc) {
  428. if (!this.value && this.children.length) {
  429. let fr = doc.createDocumentFragment();
  430. for (const ch of this.children) {
  431. fr.appendChild(ch.xml(doc));
  432. }
  433. return fr;
  434. }
  435. return doc.createTextNode(this.value);
  436. }
  437. }
  438.  
  439. class FB2Link extends FB2Element {
  440. constructor(href) {
  441. super("a");
  442. this.href = href;
  443. }
  444.  
  445. xml(doc) {
  446. const el = super.xml(doc);
  447. el.setAttribute("l:href", this.href);
  448. return el;
  449. }
  450. }
  451.  
  452. class FB2OrderedList extends FB2Element {
  453. constructor() {
  454. super("list");
  455. }
  456.  
  457. xml(doc) {
  458. const fr = doc.createDocumentFragment();
  459. let pos = 0;
  460. for (const ch of this.children) {
  461. const ch_el = ch.xml(doc);
  462. if (ch.hasValue()) {
  463. ++pos;
  464. ch_el.prepend(`${pos}. `);
  465. }
  466. fr.appendChild(ch_el);
  467. }
  468. return fr;
  469. }
  470. }
  471.  
  472. class FB2Author extends FB2Element {
  473. constructor(s) {
  474. super("author");
  475. const a = s.split(" ");
  476. switch (a.length) {
  477. case 1:
  478. this.nickName = s;
  479. break;
  480. case 2:
  481. this.firstName = a[0];
  482. this.lastName = a[1];
  483. break;
  484. default:
  485. this.firstName = a[0];
  486. this.middleName = a.slice(1, -1).join(" ");
  487. this.lastName = a[a.length - 1];
  488. break;
  489. }
  490. this.homePage = null;
  491. }
  492.  
  493. hasValue() {
  494. return (!!this.firstName || !!this.lastName || !!this.middleName);
  495. }
  496.  
  497. toString() {
  498. if (!this.firstName) return this.nickName;
  499. return [ this.firstName, this.middleName, this.lastName ].reduce((list, name) => {
  500. if (name) list.push(name);
  501. return list;
  502. }, []).join(" ");
  503. }
  504.  
  505. xml(doc) {
  506. let a_el = super.xml(doc);
  507. [
  508. [ "first-name", this.firstName ], [ "middle-name", this.middleName ],
  509. [ "last-name", this.lastName ], [ "home-page", this.homePage ],
  510. [ "nickname", this.nickName ]
  511. ].forEach(it => {
  512. if (it[1]) {
  513. const e = doc.createElement(it[0]);
  514. e.textContent = it[1];
  515. a_el.appendChild(e);
  516. }
  517. });
  518. return a_el;
  519. }
  520. }
  521.  
  522. class FB2Image extends FB2Element {
  523. constructor(value) {
  524. super("image");
  525. if (typeof(value) === "string") {
  526. this.url = value;
  527. } else {
  528. this.value = value;
  529. }
  530. }
  531.  
  532. async load(onprogress) {
  533. if (this.url) {
  534. const bin = await this._load(this.url, { responseType: "binary", onprogress: onprogress });
  535. this.type = bin.type;
  536. this.size = bin.size;
  537. return new Promise((resolve, reject) => {
  538. const reader = new FileReader();
  539. reader.addEventListener("loadend", (event) => resolve(event.target.result));
  540. reader.readAsDataURL(bin);
  541. }).then(base64str => {
  542. this.value = base64str.substr(base64str.indexOf(",") + 1);
  543. }).catch(err => {
  544. throw new Error("Ошибка загрузки изображения");
  545. });
  546. }
  547. }
  548.  
  549. hasValue() {
  550. return true;
  551. }
  552.  
  553. xml(doc) {
  554. if (this.value) {
  555. const el = doc.createElement(this.name);
  556. el.setAttribute("l:href", "#" + this.id);
  557. return el
  558. }
  559. const id = this.id || "изображение";
  560. return doc.createTextNode(`[ ${id} ]`);
  561. }
  562.  
  563. xmlBinary(doc) {
  564. const el = doc.createElement("binary");
  565. el.setAttribute("id", this.id);
  566. el.setAttribute("content-type", this.type);
  567. el.textContent = this.value
  568. return el;
  569. }
  570.  
  571. suffix() {
  572. switch (this.type) {
  573. case "image/png":
  574. return ".png";
  575. case "image/jpeg":
  576. return ".jpg";
  577. case "image/webp":
  578. return ".webp";
  579. }
  580. return "";
  581. }
  582.  
  583. async _load(...args) {
  584. return FB2Loader.addJob(...args);
  585. }
  586. }
  587.  
  588. class FB2Loader {
  589. static async addJob(url, params) {
  590. params ||= {};
  591. const fp = {};
  592. fp.method = params.method || "GET";
  593. fp.credentials = "same-origin";
  594. fp.signal = this._getSignal();
  595. const resp = await fetch(url, fp);
  596. if (!resp.ok) throw new Error(`Сервер вернул ошибку (${resp.status})`);
  597. const reader = resp.body.getReader();
  598. const type = resp.headers.get("Content-Type");
  599. const total = +resp.headers.get("Content-Length");
  600. let loaded = 0;
  601. const chunks = [];
  602. const onprogress = (total && typeof(params.onprogress) === "function") ? params.onprogress : null;
  603. while (true) {
  604. const { done, value } = await reader.read();
  605. if (done) break;
  606. chunks.push(value);
  607. loaded += value.length;
  608. if (onprogress) onprogress(loaded, total);
  609. }
  610. switch (params.responseType) {
  611. case "binary":
  612. return new Blob(chunks, { type: type });
  613. default:
  614. {
  615. let pos = 0;
  616. const data = new Uint8Array(loaded);
  617. for (let ch of chunks) {
  618. data.set(ch, pos);
  619. pos += ch.length;
  620. }
  621. return (new TextDecoder("utf-8")).decode(data);
  622. }
  623. }
  624. }
  625.  
  626. static abortAll() {
  627. if (this._controller) {
  628. this._controller.abort();
  629. this._controller = null;
  630. }
  631. }
  632.  
  633. static _getSignal() {
  634. let controller = this._controller;
  635. if (!controller) this._controller = controller = new AbortController();
  636. return controller.signal;
  637. }
  638. }
  639.  
  640. class FB2Utils {
  641. static dateToAtom(date) {
  642. const m = date.getMonth() + 1;
  643. const d = date.getDate();
  644. return "" + date.getFullYear() + '-' + (m < 10 ? "0" : "") + m + "-" + (d < 10 ? "0" : "") + d;
  645. }
  646. }
  647.  
  648. class FB2UnknownNodeError extends Error {
  649. constructor(message) {
  650. super(message);
  651. this.name = "UnknownNodeError";
  652. }
  653. }
  654.