OverDrive Transcriber

Transcribes books you read on OverDrive for offline reading

目前为 2018-04-25 提交的版本。查看 最新版本

  1. // ==UserScript==
  2. // @name OverDrive Transcriber
  3. // @description Transcribes books you read on OverDrive for offline reading
  4. // @namespace Violentmonkey Scripts
  5. // @match *://*.overdrive.com/*
  6. // @match *://*.greasyfork.org/*
  7. // @grant GM_setValue
  8. // @grant GM_getValue
  9. // @grant GM_listValues
  10. // @grant GM_deleteValue
  11. // @grant GM_xmlhttpRequest
  12. // @run-at document-start
  13. // @version 0.1.3
  14. // @author qsniyg
  15. // ==/UserScript==
  16.  
  17. (function() {
  18. var content_html_regex = /\.x?html?\?cmpt=/;
  19. var content_html_match = /^(?:.*\/)?([^/.]*)\.x?html?/;
  20. var default_options = {
  21. images: {
  22. name: "Include images",
  23. default: true
  24. }
  25. };
  26. var options = {};
  27. var img_requests = 0;
  28. for (var option in default_options) {
  29. var value = GM_getValue("SETTINGS:" + option);
  30. if (value !== undefined)
  31. options[option] = JSON.parse(value);
  32. else
  33. options[option] = default_options[option].default;
  34. }
  35. function onload(f) {
  36. if (document.readyState === "interactive" || document.readyState === "complete") {
  37. f();
  38. } else {
  39. document.addEventListener("DOMContentLoaded", f, false);
  40. }
  41. }
  42. function makebutton(el, bg) {
  43. el.style.padding = ".5em 1em";
  44. el.style.background = bg;
  45. el.style.color = "white";
  46. el.style.textDecoration = "none";
  47. el.style.display = "inline-block";
  48. el.style.margin = ".3em .5em";
  49. }
  50. var fullurl = function(url, x) {
  51. return urljoin(url, x);
  52. };
  53. function urljoin(a, b) {
  54. var protocol_split = a.split("://");
  55. var protocol = protocol_split[0];
  56. var splitted = protocol_split[1].split("/");
  57. var domain = splitted[0];
  58. var start = protocol + "://" + domain;
  59.  
  60. if (b.length === 0)
  61. return a;
  62. if (b.match(/[a-z]*:\/\//))
  63. return b;
  64. if (b.length >= 2 && b.slice(0, 2) === "//")
  65. return protocol + ":" + b;
  66. if (b.length >= 1 && b.slice(0, 1) === "/")
  67. return start + b;
  68. if (a.match(/\/$/))
  69. return a + b.replace(/^\/*/, "");
  70. else
  71. return a.replace(/\/[^/]*$/, "/") + b.replace(/^\/*/, "");
  72. }
  73.  
  74. // OverDrive section
  75. function overdrive() {
  76. var transcribe_btn;
  77. var transcribed = false;
  78. function parse(url, content) {
  79. var html = document.createElement("html");
  80. html.innerHTML = content;
  81.  
  82. var url_match = url.match(content_html_match);
  83. if (!url_match)
  84. // shouldn't happen
  85. return;
  86. //var book_id = url_match[2];
  87. var book_id = bData["-odread-buid"];
  88. var var_id = url_match[1];
  89.  
  90. var titleel = html.getElementsByTagName("title")[0];
  91. if (!titleel)
  92. return;
  93.  
  94. var title = titleel.innerHTML;
  95. /*console.log(book_id);
  96. console.log(var_id);
  97. console.log(title);*/
  98.  
  99. GM_setValue("TITLE:" + book_id, title.toString());
  100. GM_setValue("INFO:" + book_id, JSON.stringify(unsafeWindow.bData));
  101.  
  102. var setcontents = function(contents) {
  103. GM_setValue("CONTENTS:" + book_id + ":" + var_id, contents);
  104. _contents = contents;
  105. }
  106.  
  107. var scripts = html.getElementsByTagName("script");
  108. var regex = /^ *parent\.[^;(]*\(.*?['"](.*?)['"]/;
  109. var set = false;
  110. var _contents;
  111. for (var i = 0; i < scripts.length; i++) {
  112. var matchobj = scripts[i].innerHTML.match(regex);
  113. if (matchobj) {
  114. var text = unsafeWindow.atob(matchobj[1]);
  115. setcontents(text);
  116. set = true;
  117. }
  118. }
  119.  
  120. if (!set) {
  121. if (html.querySelectorAll("body > p").length >= 0) {
  122. var body = html.getElementsByTagName("body")[0].cloneNode(true);
  123. body.removeAttribute("xmlns");
  124. body.removeAttribute("onload");
  125. setcontents(body.outerHTML);
  126. }
  127. }
  128. if (options.images) {
  129. var doc = document.implementation.createHTMLDocument("preview");
  130. var newhtml = doc.createElement("html");
  131. newhtml.innerHTML = _contents;
  132. /*var parser = new DOMParser();
  133. var newhtml = parser.parseFromString(_contents, "text/xml");*/
  134. var images = newhtml.getElementsByTagName("img");
  135. for (var i = 0; i < images.length; i++) {
  136. console.log(images[i]);
  137. img_requests++;
  138. (function(src) {
  139. var full_url = fullurl(document.location.href, url);
  140. var newsrc = fullurl(full_url, src);
  141. new GM_xmlhttpRequest({
  142. method: 'GET',
  143. url: newsrc,
  144. overrideMimeType: 'text/plain; charset=x-user-defined',
  145. headers: {
  146. "Origin": document.location.href.replace(/^([a-z]+:\/\/[^/]*).*?$/, "$1"),
  147. "Referer": full_url
  148. },
  149. onload: function (resp) {
  150. if (resp.status !== 200 && resp.status !== 304) {
  151. console.dir(resp);
  152. return;
  153. }
  154.  
  155. console.log(src);
  156. img_requests--;
  157. var retval = "";
  158. for (var i = 0; i < resp.responseText.length; i++) {
  159. retval += String.fromCharCode(resp.responseText.charCodeAt(i) & 0xff);
  160. }
  161. GM_setValue("IMAGE:" + book_id + ":" + src.replace(/.*?:\/\/[^/]*\/*/, ""), retval);
  162.  
  163. if (img_requests === 0 && transcribed) {
  164. transcribe_btn.innerHTML = "Done";
  165. }
  166. }
  167. });
  168. })(images[i].getAttribute("src"));
  169. }
  170. }
  171. }
  172.  
  173. var original_open = window.XMLHttpRequest.prototype.open;
  174. window.XMLHttpRequest.prototype.open = function(method, url) {
  175. if (!url)
  176. return;
  177.  
  178. if (url.match(content_html_regex)) {
  179. this.addEventListener("readystatechange", function() {
  180. if (this.readyState === 4) {
  181. parse(url, this.responseText);
  182. }
  183. });
  184. }
  185. original_open.apply(this, arguments);
  186. };
  187.  
  188. function run_iframe(iframe) {
  189. var ifdocument = iframe.contentDocument || iframe.contentWindow.document;
  190. parse(iframe.src, ifdocument.documentElement.innerHTML)
  191. }
  192.  
  193. function find_iframes() {
  194. return;
  195. var iframes = document.getElementsByTagName("iframe");
  196. for (var i = 0; i < iframes.length; i++) {
  197. if (iframes[i].src.match(content_html_regex)) {
  198. (function(iframes, i) {
  199. iframes[i].onload = function() {
  200. run_iframe(iframes[i]);
  201. }
  202.  
  203. run_iframe(iframes[i]);
  204. })(iframes, i);
  205. }
  206. }
  207. }
  208. function transcribe(el) {
  209. el.innerHTML = "Transcribing...";
  210. var info = unsafeWindow.bData;
  211. var i = 0;
  212. function do_request() {
  213. if (i >= info.spine.length) {
  214. console.log("Done text");
  215. transcribed = true;
  216. if (img_requests === 0) {
  217. el.innerHTML = "Done";
  218. }
  219. return;
  220. }
  221. var path = info.spine[i].path;
  222. i++;
  223. console.log(path);
  224. if (!path.match(content_html_regex)) {
  225. console.log("Skipping: " + path);
  226. do_request();
  227. return;
  228. }
  229.  
  230. var oReq = new XMLHttpRequest();
  231. oReq.addEventListener("load", do_request);
  232. oReq.open("GET", path);
  233. oReq.send();
  234. }
  235. do_request();
  236. }
  237.  
  238. function start() {
  239. new MutationObserver(find_iframes).observe(document.documentElement, {
  240. attributes: true,
  241. childList: true
  242. });
  243. find_iframes();
  244. if (unsafeWindow.bData) {
  245. var outer_div = document.createElement("div");
  246. outer_div.style.width = "100%";
  247.  
  248. transcribe_btn = document.createElement("a");
  249. transcribe_btn.innerHTML = "Transcribe";
  250. transcribe_btn.onclick = function() {
  251. transcribe(transcribe_btn);
  252. };
  253. transcribe_btn.href = "javascript:void(0)";
  254. transcribe_btn.style.zIndex = 999999999;
  255. transcribe_btn.style.position = "absolute";
  256. makebutton(transcribe_btn, "#1070a0");
  257.  
  258. outer_div.appendChild(transcribe_btn);
  259. document.body.appendChild(outer_div);
  260. }
  261. }
  262.  
  263. onload(start);
  264. }
  265. // GreasyFork section
  266. function greasyfork() {
  267. function start() {
  268. var insert = document.getElementById("overdrive-insert");
  269. if (!insert) {
  270. var addto = document.querySelector(".script-author-description");
  271. addto.innerHTML = "<div id='overdrive-insert'></div>" + addto.innerHTML;
  272. insert = document.getElementById("overdrive-insert");
  273. }
  274. insert.innerHTML = "";
  275. var table = document.createElement("table");
  276. table.style.border = "1px solid black";
  277. table.style.background = "white";
  278. table.style.width = "100%";
  279. var preview_iframe = document.createElement("iframe");
  280. preview_iframe.style.width = "100%";
  281. preview_iframe.style.display = "block";
  282. preview_iframe.style.border = "0";
  283. var preview_tr = document.createElement("tr");
  284. preview_tr.style.padding = 0;
  285. preview_tr.style.margin = 0;
  286. var preview_td = document.createElement("td");
  287. preview_td.style.padding = 0;
  288. preview_td.style.margin = 0;
  289. preview_td.style.borderBottom = "1px solid black";
  290. preview_td.style.display = "none";
  291. preview_td.setAttribute("colspan", 10);
  292. preview_td.appendChild(preview_iframe);
  293. preview_tr.appendChild(preview_td);
  294. table.appendChild(preview_tr);
  295. var keys = GM_listValues();
  296. if (keys.length === 0) {
  297. var tr = document.createElement("tr");
  298. var td = document.createElement("td");
  299. td.innerHTML = "<i>No books yet</>";
  300. tr.appendChild(td);
  301. table.appendChild(tr);
  302. }
  303. for (var i = 0; i < keys.length; i++) {
  304. if (!keys[i].match(/^TITLE:/)) {
  305. continue;
  306. }
  307. (function(key) {
  308. var title = GM_getValue(key);
  309. var id = key.replace(/.*:/, "");
  310. var info = JSON.parse(GM_getValue("INFO:" + id));
  311.  
  312. var tr = document.createElement("tr");
  313. tr.style.border = "1px solid black";
  314.  
  315. var name_td = document.createElement("td");
  316. name_td.innerHTML = "<b>" + title + "</b>";
  317. var html = '<html><head><title>' + title + '</title><meta http-equiv="Content-Type" content="text/html; charset=UTF-8" /></head><body>';
  318. var size = 0;
  319. var items = {};
  320. var images = {};
  321. for (var i = 0; i < keys.length; i++) {
  322. if (keys[i].indexOf("CONTENTS:" + id + ":") !== 0 &&
  323. keys[i].indexOf("IMAGE:" + id + ":") !== 0) {
  324. continue;
  325. }
  326.  
  327. var contents = GM_getValue(keys[i]);
  328. size += contents.length;
  329. if (keys[i].indexOf("IMAGE:") === 0) {
  330. images[keys[i].replace(/.*:/, "")] = contents;
  331. continue;
  332. }
  333. var element = document.createElement("html");
  334. element.innerHTML = '<head><meta http-equiv="Content-Type" content="text/html; charset=UTF-8" /></head>';
  335. element.innerHTML += contents;
  336. var text = element.getElementsByTagName("body")[0].innerHTML;
  337.  
  338. //html += text;
  339. items[keys[i].replace(/.*:/, "")] = text;
  340. }
  341. var have = 0;
  342. var total = 0;
  343. for (var i = 0; i < info.spine.length; i++) {
  344. if (!info.spine[i].path.match(content_html_regex))
  345. continue;
  346. total++;
  347. var spinematch = info.spine[i].path.match(content_html_match);
  348. if (!spinematch)
  349. continue;
  350. var spineid = spinematch[1];
  351. if (spineid in items) {
  352. html += items[spineid];
  353. have++;
  354. } else {
  355. console.log("Missing " + spineid);
  356. }
  357. }
  358. html += "</body></html>";
  359. if (Object.keys(images).length > 0) {
  360. var doc = document.implementation.createHTMLDocument("preview");
  361. var htmlel = doc.createElement("html");
  362. htmlel.innerHTML = html;
  363. var img_els = htmlel.getElementsByTagName("img");
  364. for (var i = 0; i < img_els.length; i++) {
  365. var src = img_els[i].getAttribute("src");
  366. if (src in images) {
  367. var ext = src.replace(/.*\.([a-zA-Z]*).*?$/, "$1").toLowerCase();
  368. var mime = "image/" + ext;
  369. if (ext === "jpg")
  370. mime = "image/jpeg";
  371. img_els[i].setAttribute("src", "data:" + mime + ";base64," + btoa(images[src]));
  372. }
  373. }
  374. html = htmlel.innerHTML;
  375. }
  376.  
  377. var link = "data:text/html," + encodeURIComponent(html);
  378. name_td.innerHTML += " (" + have + "/" + total + ")";
  379.  
  380. var preview = document.createElement("a");
  381. preview.onclick = function() {
  382. preview_iframe.src = link;
  383. preview_td.style.display = "table-cell";
  384. };
  385. preview.href = "javascript:void(0)";
  386. preview.innerHTML = "Preview";
  387. makebutton(preview, "#105210");
  388. var download = document.createElement("a");
  389. download.href = link;
  390. download.setAttribute("download", title + ".html");
  391. download.innerHTML = "Download (" + (html.length / 1024).toFixed(1) + "KB)";
  392. makebutton(download, "#1070a0");
  393. var del = document.createElement("a");
  394. del.href = "javascript:void(0)";
  395. del.onclick = function() {
  396. if (!confirm("Delete '" + title + "'?"))
  397. return;
  398. for (var i = 0; i < keys.length; i++) {
  399. if (keys[i] === ("TITLE:" + id) ||
  400. keys[i] === ("INFO:" + id) ||
  401. keys[i].indexOf("CONTENTS:" + id + ":") >= 0 ||
  402. keys[i].indexOf("IMAGE:" + id + ":") >= 0) {
  403. console.log(keys[i]);
  404. GM_deleteValue(keys[i]);
  405. }
  406. }
  407. start();
  408. }
  409. del.innerHTML = "Delete (" + (size / 1024).toFixed(1) + "KB)";
  410. del.style.float = "right";
  411. makebutton(del, "#a02010");
  412. var actions_td = document.createElement("td");
  413. actions_td.appendChild(preview);
  414. actions_td.appendChild(download);
  415. actions_td.appendChild(del);
  416. tr.appendChild(name_td);
  417. tr.appendChild(actions_td);
  418. table.appendChild(tr);
  419. })(keys[i]);
  420. }
  421. var hr_tr = document.createElement("tr");
  422. var hr_td = document.createElement("td");
  423. hr_td.setAttribute("colspan", 10);
  424. hr_td.style.borderBottom = "1px solid black";
  425. hr_tr.appendChild(hr_td);
  426. table.appendChild(hr_tr);
  427. for (var option in default_options) {
  428. (function(option) {
  429. var tr = document.createElement("tr");
  430. var name_td = document.createElement("td");
  431. name_td.innerHTML = default_options[option].name;
  432. tr.appendChild(name_td);
  433. var value_td = document.createElement("td");
  434. var value_input = document.createElement("input");
  435. value_input.type = "checkbox";
  436. if (options[option])
  437. value_input.setAttribute("checked", "");
  438. value_input.onclick = function() {
  439. var val = false;
  440. if (value_input.checked)
  441. val = true;
  442. GM_setValue("SETTINGS:" + option, JSON.stringify(val));
  443. };
  444. value_td.appendChild(value_input);
  445. tr.appendChild(value_td);
  446. table.appendChild(tr);
  447. })(option);
  448. }
  449. insert.appendChild(table);
  450. }
  451. onload(start);
  452. }
  453. if (document.location.href.match(/:\/\/[^/]*\.overdrive\.com\//))
  454. overdrive();
  455. else if (document.location.href.match(/\/41000-overdrive-transcriber(?:\/?[?#].*)?$/))
  456. greasyfork();
  457. })();