HTML2FB2Lib

This is a library for converting HTML to FB2.

当前为 2023-06-16 提交的版本,查看 最新版本

此脚本不应直接安装。它是供其他脚本使用的外部库,要使用该库请加入元指令 // @require https://update.cn-greasyfork.org/scripts/468831/1206302/HTML2FB2Lib.js

  1. // ==UserScript==
  2. // @name HTML2FB2Lib
  3. // @namespace 90h.yy.zz
  4. // @version 0.1.0
  5. // @author Ox90
  6. // @match http://*
  7. // @match https://*
  8. // @description This is a library for converting HTML to FB2.
  9. // @run-at document-start
  10. // @license MIT
  11. // ==/UserScript==
  12.  
  13. class FB2Parser {
  14. constructor() {
  15. this._stop = null;
  16. }
  17.  
  18. async parse(htmlNode, fromNode) {
  19. const that = this;
  20. async function _parse(node, from, fb2el, depth) {
  21. let n = from || node.firstChild;
  22. while (n) {
  23. if (that.startNode(n, depth)) {
  24. const f = that.processElement(FB2Element.fromHTML(n, false), depth);
  25. if (f) {
  26. if (fb2el) fb2el.children.push(f);
  27. _parse(n, null, f, depth + 1);
  28. }
  29. that.endNode(n, depth);
  30. }
  31. if (that._stop) break;
  32. n = n.nextSibling;
  33. }
  34. }
  35. await _parse(htmlNode, fromNode, null, 0);
  36. return this._stop;
  37. }
  38.  
  39. startNode(node, depth) {
  40. return true;
  41. }
  42.  
  43. processElement(fb2el, depth) {
  44. return fb2el;
  45. }
  46.  
  47. endNode(node, depth) {
  48. }
  49. }
  50.  
  51. class FB2Document {
  52. constructor() {
  53. this.binaries = [];
  54. this.bookAuthors = [];
  55. this.annotation = null;
  56. this.genres = [];
  57. this.chapters = [];
  58. this.xmldoc = null;
  59. }
  60.  
  61. toString() {
  62. this._ensureXMLDocument();
  63. const root = this.xmldoc.documentElement;
  64. this._markBinaries();
  65. root.appendChild(this._makeDescriptionElement());
  66. root.appendChild(this._makeBodyElement());
  67. this._makeBinaryElements().forEach(el => root.appendChild(el));
  68. const res = (new XMLSerializer()).serializeToString(this.xmldoc);
  69. this.xmldoc = null;
  70. return res;
  71. }
  72.  
  73. createElement(name) {
  74. this._ensureXMLDocument();
  75. return this.xmldoc.createElementNS(this.xmldoc.documentElement.namespaceURI, name);
  76. }
  77.  
  78. createTextNode(value) {
  79. this._ensureXMLDocument();
  80. return this.xmldoc.createTextNode(value);
  81. }
  82.  
  83. _ensureXMLDocument() {
  84. if (!this.xmldoc) {
  85. this.xmldoc = new DOMParser().parseFromString(
  86. '<?xml version="1.0" encoding="UTF-8"?><FictionBook xmlns="http://www.gribuser.ru/xml/fictionbook/2.0"/>',
  87. "application/xml"
  88. );
  89. this.xmldoc.documentElement.setAttribute("xmlns:l", "http://www.w3.org/1999/xlink");
  90. }
  91. }
  92.  
  93. _makeDescriptionElement() {
  94. const desc = this.createElement("description");
  95. // title-info
  96. const t_info = this.createElement("title-info");
  97. desc.appendChild(t_info);
  98. this.genres.forEach(g => t_info.appendChild(g.xml(this)));
  99. (this.bookAuthors.length ? this.bookAuthors : [ new FB2Author("Неизвестный автор") ]).forEach(a => {
  100. t_info.appendChild(a.xml(this));
  101. });
  102. t_info.appendChild((new FB2Element("book-title", this.bookTitle)).xml(this));
  103. t_info.appendChild(this.annotation.xml(this));
  104. if (this.keywords) t_info.appendChild(this.keywords.xml(this));
  105. if (this.bookDate) {
  106. const el = this.createElement("date");
  107. el.setAttribute("value", this.bookDate.toAtomDate());
  108. el.textContent = this.bookDate.getFullYear();
  109. t_info.appendChild(el);
  110. }
  111. if (this.coverpage) {
  112. const el = this.createElement("coverpage");
  113. el.appendChild(this.coverpage.xml(this));
  114. t_info.appendChild(el);
  115. }
  116. const lang = this.createElement("lang");
  117. lang.textContent = "ru";
  118. t_info.appendChild(lang);
  119. if (this.sequence) {
  120. const el = this.createElement("sequence");
  121. el.setAttribute("name", this.sequence.name);
  122. if (this.sequence.number) el.setAttribute("number", this.sequence.number);
  123. t_info.appendChild(el);
  124. }
  125. // document-info
  126. const d_info = this.createElement("document-info");
  127. desc.appendChild(d_info);
  128. d_info.appendChild((new FB2Author("Ox90")).xml(this));
  129. d_info.appendChild((new FB2Element("program-used", PROGRAM_NAME + " v" + GM_info.script.version)).xml(this));
  130. d_info.appendChild((() => {
  131. const f_time = new Date();
  132. const el = this.createElement("date");
  133. el.setAttribute("value", f_time.toAtomDate());
  134. el.textContent = f_time.toUTCString();
  135. return el;
  136. })());
  137. if (this.sourceURL) {
  138. d_info.appendChild((new FB2Element("src-url", this.sourceURL)).xml(this));
  139. }
  140. d_info.appendChild((new FB2Element("id", this._genBookId())).xml(this));
  141. d_info.appendChild((new FB2Element("version", "1.0")).xml(this));
  142. return desc;
  143. }
  144.  
  145. _makeBodyElement() {
  146. const body = this.createElement("body");
  147. const title = this.createElement("title");
  148. body.appendChild(title);
  149. if (this.bookAuthors.length) title.appendChild((new FB2Paragraph(this.bookAuthors.join(", "))).xml(this));
  150. title.appendChild((new FB2Paragraph(this.bookTitle)).xml(this));
  151. this.chapters.forEach(ch => body.appendChild(ch.xml(this)));
  152. return body;
  153. }
  154.  
  155. _markBinaries() {
  156. let idx = 0;
  157. this.binaries.forEach(img => {
  158. if (!img.id) img.id = "image" + (++idx) + img.suffix();
  159. });
  160. }
  161.  
  162. _makeBinaryElements() {
  163. return this.binaries.reduce((list, img) => {
  164. if (img.value) list.push(img.xmlBinary(this));
  165. return list;
  166. }, []);
  167. }
  168.  
  169. _genBookId() {
  170. let str = this.sourceURL;
  171. let hash = 0;
  172. const slen = str.length;
  173. for (let i = 0; i < slen; ++i) {
  174. const ch = str.charCodeAt(i);
  175. hash = ((hash << 5) - hash) + ch;
  176. hash = hash & hash; // Convert to 32bit integer
  177. }
  178. return "rbe_" + Math.abs(hash).toString() + (hash > 0 ? "1" : "");
  179. }
  180. }
  181.  
  182. class FB2Element {
  183. constructor(name, value) {
  184. this.name = name;
  185. this.value = value !== undefined ? value : null;
  186. this.children = [];
  187. }
  188.  
  189. static fromHTML(node, recursive) {
  190. let fb2el = null;
  191. const names = new Map([
  192. [ "U", "emphasis" ], [ "EM", "emphasis" ], [ "EMPHASIS", "emphasis" ], [ "I", "emphasis" ],
  193. [ "S", "strike" ], [ "DEL", "strike" ], [ "STRIKE", "strike" ],
  194. [ "STRONG", "strong" ], [ "BLOCKQUOTE", "cite" ],
  195. [ "#comment", null ]
  196. ]);
  197. const node_name = node.nodeName;
  198. if (names.has(node_name)) {
  199. const name = names.get(node_name);
  200. if (!name) return null;
  201. fb2el = new FB2Element(names.get(node_name));
  202. } else {
  203. switch (node_name) {
  204. case "#text":
  205. return new FB2Text(node.textContent);
  206. case "P":
  207. fb2el = new FB2Paragraph();
  208. break;
  209. case "SUBTITLE":
  210. fb2el = new FB2Subtitle();
  211. break;
  212. case "A":
  213. fb2el = new FB2Link(node.href || node.getAttribute("l:href"));
  214. break;
  215. case "BR":
  216. return new FB2EmptyLine();
  217. case "HR":
  218. return new FB2Paragraph("---");
  219. case "IMG":
  220. return new FB2Image(node.src);
  221. default:
  222. throw new Error("Неизвестный HTML блок: " + node.nodeName);
  223. }
  224. }
  225. if (recursive) fb2el.appendContentFromHTML(node);
  226. return fb2el;
  227. }
  228.  
  229. hasValue() {
  230. return ((this.value !== undefined && this.value !== null) || !!this.children.length);
  231. }
  232.  
  233. setContentFromHTML(data, fb2doc, log) {
  234. this.children = [];
  235. this.appendContentFromHTML(data, fb2doc, log);
  236. }
  237.  
  238. appendContentFromHTML(data, fb2doc, log) {
  239. for (const node of data.childNodes) {
  240. let fe = FB2Element.fromHTML(node, true);
  241. if (fe) this.children.push(fe);
  242. }
  243. }
  244.  
  245. normalize() {
  246. const res_list = [ this ];
  247. let cur_el = this;
  248. const children = this.children;
  249. this.children = [];
  250. children.forEach(el => {
  251. if (el instanceof FB2EmptyLine || el instanceof FB2Subtitle) {
  252. res_list.push(el);
  253. cur_el = new this.constructor();
  254. res_list.push(cur_el);
  255. } else {
  256. el.normalize().forEach(el => {
  257. cur_el.children.push(el);
  258. });
  259. }
  260. });
  261. return res_list;
  262. }
  263.  
  264. xml(doc) {
  265. const el = doc.createElement(this.name);
  266. if (this.value !== null) el.textContent = this.value;
  267. this.children.forEach(ch => el.appendChild(ch.xml(doc)));
  268. return el;
  269. }
  270. }
  271.  
  272. class FB2BlockElement extends FB2Element {
  273. normalize() {
  274. // Удалить пробельные символы в конце блока
  275. while (this.children.length) {
  276. const el = this.children[this.children.length - 1];
  277. if (el.name === "text" && typeof(el.value) === "string") {
  278. el.value = el.value.trimEnd();
  279. if (!el.value) {
  280. this.children.pop();
  281. continue;
  282. }
  283. }
  284. break;
  285. }
  286. // Удалить пробельные символы в начале блока
  287. while (this.children.length) {
  288. const el = this.children[0];
  289. if (el.name === "text" && typeof(el.value) === "string") {
  290. el.value = el.value.trimStart();
  291. if (!el.value) {
  292. this.children.shift();
  293. continue;
  294. }
  295. }
  296. break;
  297. }
  298. //--
  299. return super.normalize();
  300. }
  301. }
  302.  
  303. /**
  304. * FB2 элемент верхнего уровня section
  305. */
  306. class FB2Chapter extends FB2Element {
  307. constructor(title) {
  308. super("section");
  309. this.title = title;
  310. }
  311.  
  312. normalize() {
  313. // Обернуть текстовые ноды в параграфы и удалить пустые элементы
  314. this.children = this.children.reduce((list, el) => {
  315. if (el instanceof FB2Text) {
  316. const pe = new FB2Paragraph();
  317. pe.children.push(el);
  318. el = pe;
  319. }
  320. el.normalize().forEach(el => {
  321. if (el.hasValue()) list.push(el);
  322. });
  323. return list;
  324. }, []);
  325. return [ this ];
  326. }
  327.  
  328. xml(doc) {
  329. const el = super.xml(doc);
  330. if (this.title) {
  331. const t_el = doc.createElement("title");
  332. const p_el = doc.createElement("p");
  333. p_el.textContent = this.title;
  334. t_el.appendChild(p_el);
  335. el.prepend(t_el);
  336. }
  337. return el;
  338. }
  339. }
  340.  
  341. /**
  342. * FB2 элемент верхнего уровня annotation
  343. */
  344. class FB2Annotation extends FB2Element {
  345. constructor() {
  346. super("annotation");
  347. }
  348.  
  349. normalize() {
  350. // Обернуть неформатированный текст, разделенный <br> в параграфы
  351. let lp = null;
  352. const newParagraph = list => {
  353. lp = new FB2Paragraph();
  354. list.push(lp);
  355. };
  356. this.children = this.children.reduce((list, el) => {
  357. if (el.name === "empty-line") {
  358. newParagraph(list);
  359. } else if (el instanceof FB2BlockElement) {
  360. list.push(el);
  361. lp = null;
  362. } else {
  363. if (!lp) newParagraph(list);
  364. lp.children.push(el);
  365. }
  366. return list;
  367. }, []);
  368. // Запустить собственную нормализацию дочерних элементов
  369. // чтобы предотвратить их дальнейшее всплытие
  370. this.children = this.children.reduce((list, el) => {
  371. el.normalize().forEach(el => {
  372. if (el.hasValue()) list.push(el);
  373. });
  374. return list;
  375. }, []);
  376. }
  377. }
  378.  
  379. class FB2Subtitle extends FB2BlockElement {
  380. constructor(value) {
  381. super("subtitle", value);
  382. }
  383. }
  384.  
  385. class FB2Paragraph extends FB2BlockElement {
  386. constructor(value) {
  387. super("p", value);
  388. }
  389. }
  390.  
  391. class FB2EmptyLine extends FB2Element {
  392. constructor() {
  393. super("empty-line");
  394. }
  395.  
  396. hasValue() {
  397. return true;
  398. }
  399. }
  400.  
  401. class FB2Text extends FB2Element {
  402. constructor(value) {
  403. super("text", value);
  404. }
  405.  
  406. xml(doc) {
  407. return doc.createTextNode(this.value);
  408. }
  409. }
  410.  
  411. class FB2Link extends FB2Element {
  412. constructor(href) {
  413. super("a");
  414. this.href = href;
  415. }
  416.  
  417. xml(doc) {
  418. const el = super.xml(doc);
  419. el.setAttribute("l:href", this.href);
  420. return el;
  421. }
  422. }
  423.  
  424. class FB2Author extends FB2Element {
  425. constructor(s) {
  426. super("author");
  427. const a = s.split(" ");
  428. switch (a.length) {
  429. case 1:
  430. this.nickName = s;
  431. break;
  432. case 2:
  433. this.firstName = a[0];
  434. this.lastName = a[1];
  435. break;
  436. default:
  437. this.firstName = a[0];
  438. this.middleName = a.slice(1, -1).join(" ");
  439. this.lastName = a[a.length - 1];
  440. break;
  441. }
  442. this.homePage = null;
  443. }
  444.  
  445. hasValue() {
  446. return (!!this.firstName || !!this.lastName || !!this.middleName);
  447. }
  448.  
  449. toString() {
  450. if (!this.firstName) return this.nickName;
  451. return [ this.firstName, this.middleName, this.lastName ].reduce((list, name) => {
  452. if (name) list.push(name);
  453. return list;
  454. }, []).join(" ");
  455. }
  456.  
  457. xml(doc) {
  458. let a_el = super.xml(doc);
  459. [
  460. [ "first-name", this.firstName ], [ "middle-name", this.middleName ],
  461. [ "last-name", this.lastName ], [ "home-page", this.homePage ],
  462. [ "nickname", this.nickName ]
  463. ].forEach(it => {
  464. if (it[1]) {
  465. const e = doc.createElement(it[0]);
  466. e.textContent = it[1];
  467. a_el.appendChild(e);
  468. }
  469. });
  470. return a_el;
  471. }
  472. }
  473.  
  474. class FB2Image extends FB2Element {
  475. constructor(value) {
  476. super("image");
  477. if (typeof(value) === "string") {
  478. this.url = value;
  479. } else {
  480. this.value = value;
  481. }
  482. }
  483.  
  484. async load(onprogress) {
  485. if (this.url) {
  486. const bin = await this._load(this.url, { responseType: "binary", onprogress: onprogress });
  487. this.type = bin.type;
  488. this.size = bin.size;
  489. return new Promise((resolve, reject) => {
  490. const reader = new FileReader();
  491. reader.addEventListener("loadend", (event) => resolve(event.target.result));
  492. reader.readAsDataURL(bin);
  493. }).then(base64str => {
  494. this.value = base64str.substr(base64str.indexOf(",") + 1);
  495. }).catch(err => {
  496. throw new Error("Ошибка загрузки изображения");
  497. });
  498. }
  499. }
  500.  
  501. xml(doc) {
  502. if (this.value) {
  503. const el = doc.createElement(this.name);
  504. el.setAttribute("l:href", "#" + this.id);
  505. return el
  506. }
  507. const id = this.id || "изображение";
  508. return doc.createTextNode(`[ ${id} ]`);
  509. }
  510.  
  511. xmlBinary(doc) {
  512. const el = doc.createElement("binary");
  513. el.setAttribute("id", this.id);
  514. el.setAttribute("content-type", this.type);
  515. el.textContent = this.value
  516. return el;
  517. }
  518.  
  519. suffix() {
  520. switch (this.type) {
  521. case "image/png":
  522. return ".png";
  523. case "image/jpeg":
  524. return ".jpg";
  525. case "image/webp":
  526. return ".webp";
  527. }
  528. return "";
  529. }
  530.  
  531. async _load(...args) {
  532. return FB2Loader.addJob(...args);
  533. }
  534. }
  535.  
  536. class FB2Loader {
  537. static async addJob(url, params) {
  538. params ||= {};
  539. const fp = {};
  540. fp.method = params.method || "GET";
  541. fp.credentials = "same-origin";
  542. fp.signal = this._getSignal();
  543. const resp = await fetch(url, fp);
  544. if (!resp.ok) throw new Error(`Сервер вернул ошибку (${resp.status})`);
  545. const reader = resp.body.getReader();
  546. const type = resp.headers.get("Content-Type");
  547. const total = +resp.headers.get("Content-Length");
  548. let loaded = 0;
  549. const chunks = [];
  550. const onprogress = (total && typeof(params.onprogress) === "function") ? params.onprogress : null;
  551. while (true) {
  552. const { done, value } = await reader.read();
  553. if (done) break;
  554. chunks.push(value);
  555. loaded += value.length;
  556. if (onprogress) onprogress(loaded, total);
  557. }
  558. switch (params.responseType) {
  559. case "binary":
  560. return new Blob(chunks, { type: type });
  561. default:
  562. {
  563. let pos = 0;
  564. const data = new Uint8Array(loaded);
  565. for (let ch of chunks) {
  566. data.set(ch, pos);
  567. pos += ch.length;
  568. }
  569. return (new TextDecoder("utf-8")).decode(data);
  570. }
  571. }
  572. }
  573.  
  574. static abortAll() {
  575. if (this._controller) {
  576. this._controller.abort();
  577. this._controller = null;
  578. }
  579. }
  580.  
  581. static _getSignal() {
  582. let controller = this._controller;
  583. if (!controller) this._controller = controller = new AbortController();
  584. return controller.signal;
  585. }
  586. }
  587.