懒人小说下载器

通用网站内容抓取工具,可抓取小说,论坛内容等

当前为 2016-11-23 提交的版本,查看 最新版本

  1. // ==UserScript==
  2. // @name DownloadAllContent
  3. // @name:zh-CN 懒人小说下载器
  4. // @name:zh-TW 懶人小説下載器
  5. // @name:ja 怠惰者小説ダウンロードツール
  6. // @namespace hoothin
  7. // @version 0.3
  8. // @description Fetch and download main content on current page
  9. // @description:zh-CN 通用网站内容抓取工具,可抓取小说,论坛内容等
  10. // @description:zh-TW 通用網站內容抓取工具,可抓取小說,論壇內容等
  11. // @description:ja ユニバーサルサイトコンテンツクロールツール、クロール、フォーラム内容など
  12. // @author hoothin
  13. // @include *
  14. // @grant GM_xmlhttpRequest
  15. // @grant GM_registerMenuCommand
  16. // @require https://cdnjs.cloudflare.com/ajax/libs/FileSaver.js/1.3.3/FileSaver.min.js
  17. // @license MIT License
  18. // @compatible chrome
  19. // @compatible firefox
  20. // @compatible opera 未测试
  21. // @compatible safari 未测试
  22. // @contributionURL https://www.paypal.com/cgi-bin/webscr?cmd=_donations&business=rixixi@sina.com&item_name=Greasy+Fork+donation
  23. // @contributionAmount 1
  24. // ==/UserScript==
  25.  
  26. (function() {
  27. 'use strict';
  28. var lang = navigator.appName=="Netscape"?navigator.language:navigator.userLanguage;
  29. var i18n={};
  30. switch (lang){
  31. case "zh-CN":
  32. i18n={
  33. fetch:"开始下载小说或其他",
  34. info:"本文使用DownloadAllContent脚本下载"
  35. };
  36. break;
  37. default:
  38. i18n={
  39. fetch:"Download All Content",
  40. info:"The TXT is downloaded with 'DownloadAllContent'"
  41. };
  42. break;
  43. }
  44. var rocketContent=document.createElement("div");
  45. function indexDownload(aEles){
  46. document.body.appendChild(rocketContent);
  47. rocketContent.outerHTML=`
  48. <div id="txtDownContent" style="display: none;">
  49. <div style="width:300px;height:50px;position:fixed;left:50%;top:50%;margin-top:-25px;margin-left:-150px;z-index:100000;background-color:#ffffff;border:1px solid #afb3b6;border-radius:10px;opacity:0.95;filter:alpha(opacity=95);box-shadow:5px 5px 20px 0px #000;">
  50. <div id="txtDownWords" style="position:absolute;left:20px;top:10px;width:260px;">
  51. </div>
  52. <img src="data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAB4AAAAeCAMAAAAM7l6QAAAA5FBMVEUAAAD+/v7////9/f7////////+/v7+/v7////+/v7+/v7////+/v7+/v7////+/v7+/v7+/v7+/v7+/v7+/v7+/v7+/v7+/v7+/v7+/v7+/v7+/v7////////////+/v7+/v7+/v7+/v7+/v4uje3///82ke7s9P3N5PtQoPDI4fqCu/Tu9v5Im+/6/P+VxfZgqPFNnvDp8/3f7fq42Pmnz/d1tPNvsfNkq/JCmO/4+/7X6fz19/rn8PqYx/aNwfV8uPRqrvJZpfFUovAzkO3Q5vu92/mr0fieyva92fWx0vQ6lO5pygFTAAAAJHRSTlMAmfD+RMGwgj2mknlIKR/36+XGnIyHfnJfVDk2My8S4E1CJBvTatKDAAABY0lEQVQoz4WSZ1fCMBRA05ahLPfemkspBUFwgGz3+P//x/hK6ZBzvB/ak96+kZeoGCtFK5eziitqCSfWOnM2CuWUrOyQYPc0bjM2htHXh+/ffQ4lQ6zEPoZ6TwdUp10M56EtAe5MR7y0HGAeX7Ghe6MTzIZwFtTfBrepUzw4UFSGY8CUTTMGLo3eglv58By21pRnB7aNBqTwPfWq/OVey9sH22wZXiWo6Yr3GtRl/f0IRyoPYy14v97YWjVY12BPrcK9XvhaZPUdWCoLbR35yOoW5P7R8eQ3JrmbSp6HVmSrXuTrmNYyUAs2JkL6D6YjG1PgNGXK87F4nWAsUxmL2gyH6oVD7cuhdqFg9CE4T/oPE6CsgvBOP21715AP7ugaDFK+3YD1KyUcAG47bn0TSylxFd8WDTwMMByoBSUHw+jd9/3JbQODnVExygUS7FRUksPVtdDZW8dqCZm8lc1auxcq4gc02GVGTUchmgAAAABJRU5ErkJggg==" id="txtDownQuit" style="position:absolute;right:0px;top:0px;cursor: pointer;" />
  53. </div>
  54. </div>`;
  55. var txtDownContent=document.querySelector("#txtDownContent");
  56. var txtDownWords=document.querySelector("#txtDownWords");
  57. var txtDownQuit=document.querySelector("#txtDownQuit");
  58. txtDownQuit.onclick=function(){
  59. txtDownContent.style.display="none";
  60. txtDownContent.parentNode.removeChild(txtDownContent);
  61. };
  62. var j=0,rCats=[];
  63. for(let i=0;i<aEles.length;i++){
  64. let aTag=aEles[i];
  65. GM_xmlhttpRequest({
  66. method: 'GET',
  67. url: aTag.href,
  68. onload: function(result) {
  69. var doc = null;
  70. try {
  71. doc = document.implementation.createHTMLDocument('');
  72. doc.documentElement.innerHTML = result.responseText;
  73. }
  74. catch (e) {
  75. console.log('parse error');
  76. }
  77. if (!doc) {
  78. return;
  79. }
  80. j++;
  81. rCats[i]=(aTag.textContent+"\r\n"+getPageContent(doc));
  82. txtDownContent.style.display="block";
  83. txtDownWords.innerHTML="已下载完成 "+j+" 段,剩余 "+(aEles.length-j)+" 段";
  84. if(j==aEles.length){
  85. var blob = new Blob([i18n.info+"\r\n"+document.title+"\r\n\r\n"+rCats.join("\r\n\r\n")], {type: "text/plain;charset=utf-8"});
  86. saveAs(blob, document.title+".txt");
  87. }
  88. }
  89. });
  90. }
  91. }
  92.  
  93. function getPageContent(pageData){
  94. var i,rStr="";
  95. var largestContent,contents=pageData.querySelectorAll("span,div,article,p,td");
  96. for(i=0;i<contents.length;i++){
  97. var content=contents[i];
  98. if(content.firstChild && (
  99. (content.firstChild.nodeType!=3 && !/^[I|A]$/.test(content.firstChild.tagName)) ||
  100. (/^\s*$/.test(content.firstChild.data) &&
  101. (!content.childNodes[1]||!/^[I|A]$/.test(content.childNodes[1].tagName)))
  102. ))
  103. continue;
  104. if(pageData==document && content.offsetWidth <= 0 && content.offsetHeight <= 0)
  105. continue;
  106. if(navigator.userAgent.toLowerCase().indexOf('firefox')!=-1){
  107. if(!largestContent || largestContent.textContent.length<content.textContent.length){
  108. largestContent=content;
  109. }
  110. }else{
  111. if(!largestContent || largestContent.innerText.length<content.innerText.length){
  112. largestContent=content;
  113. }
  114. }
  115. }
  116. var childlist=pageData.querySelectorAll(largestContent.tagName);
  117. for(i=0;i<childlist.length;i++){
  118. var child=childlist[i];
  119. if(largestContent.className && largestContent.className==child.className){
  120. }else if(child.firstChild && ((child.firstChild.nodeType!=3 && !/^[I|A]$/.test(child.firstChild.tagName)) || (/^\s*$/.test(child.firstChild.data) && (!child.childNodes[1] || !/^[I|A]$/.test(child.childNodes[1].tagName)))))continue;
  121. if(getDepth(child)==getDepth(largestContent)){
  122. let childNodes=child.childNodes,cStr="\r\n",hasText=false;
  123. for(var j=0;j<childNodes.length;j++){
  124. var childNode=childNodes[j];
  125. if(childNode.nodeType==3 && childNode.data && !/^\s*$/.test(childNode.data))hasText=true;
  126. if(childNode.tagName=="BR")cStr+="\r\n";
  127. else if(!/SCRIPT|STYLE/.test(childNode.tagName) && childNode.textContent)cStr+=childNode.textContent.replace(/\s*/," ");
  128. }
  129. if(hasText)rStr+=cStr;
  130. }
  131. }
  132. return rStr;
  133. }
  134.  
  135. function getDepth(dom){
  136. var pa=dom,i=0;
  137. while(pa.parentNode){
  138. pa=pa.parentNode;
  139. i++;
  140. }
  141. return i;
  142. }
  143.  
  144. function fetch(){
  145. var aEles=document.querySelectorAll("a"),list=[];
  146. for(var i=0;i<aEles.length;i++){
  147. var aEle=aEles[i];
  148. if(/第[\d|〇|零|一|二|三|四|五|六|七|八|九|十|百|千|万|萬]+[章|节|回|卷|折|篇|幕|集]|序|序\s*言|序\s*章|前\s*言|引\s*言|引\s*子|摘\s*要|楔\s*子|后\s*记|附\s*言|结\s*语/.test(aEle.innerHTML)){
  149. list.push(aEle);
  150. }
  151. }
  152. if(list.length>2){
  153. indexDownload(list);
  154. }else{
  155. var blob = new Blob([i18n.info+"\r\n"+document.title+"\r\n\r\n"+getPageContent(document)], {type: "text/plain;charset=utf-8"});
  156. saveAs(blob, document.title+".txt");
  157. }
  158. }
  159.  
  160. document.addEventListener("keydown", function(e) {
  161. if(e.keyCode == 120 && e.ctrlKey) {
  162. fetch();
  163. }
  164. });
  165. GM_registerMenuCommand(i18n.fetch, fetch);
  166. })();