怠惰小说下载器

通用网站内容抓取工具,可批量抓取任意站点的小说、论坛内容等并保存为TXT文档

目前为 2023-01-27 提交的版本。查看 最新版本

  1. // ==UserScript==
  2. // @name DownloadAllContent
  3. // @name:zh-CN 怠惰小说下载器
  4. // @name:zh-TW 怠惰小説下載器
  5. // @name:ja 怠惰者小説ダウンロードツール
  6. // @namespace hoothin
  7. // @version 2.7.3.18
  8. // @description Fetch and download main content on current page, provide special support for chinese novel
  9. // @description:zh-CN 通用网站内容抓取工具,可批量抓取任意站点的小说、论坛内容等并保存为TXT文档
  10. // @description:zh-TW 通用網站內容抓取工具,可批量抓取任意站點的小說、論壇內容等並保存為TXT文檔
  11. // @description:ja ユニバーサルサイトコンテンツクロールツール、クロール、フォーラム内容など
  12. // @author hoothin
  13. // @match http://*/*
  14. // @match https://*/*
  15. // @match ftp://*/*
  16. // @grant GM_xmlhttpRequest
  17. // @grant GM_registerMenuCommand
  18. // @grant GM_setValue
  19. // @grant GM_getValue
  20. // @grant unsafeWindow
  21. // @license MIT License
  22. // @compatible chrome
  23. // @compatible firefox
  24. // @compatible opera 未测试
  25. // @compatible safari 未测试
  26. // @contributionURL https://buymeacoffee.com/hoothin
  27. // @contributionAmount 1
  28. // ==/UserScript==
  29. (function (global, factory) {
  30. if (typeof define === "function" && define.amd) {
  31. define([], factory);
  32. } else if (typeof exports !== "undefined") {
  33. factory();
  34. } else {
  35. var mod = {
  36. exports: {}
  37. };
  38. factory();
  39. global.FileSaver = mod.exports;
  40. }
  41. })(this, function () {
  42. "use strict";
  43.  
  44. /*
  45. * FileSaver.js
  46. * A saveAs() FileSaver implementation.
  47. *
  48. * By Eli Grey, http://eligrey.com
  49. *
  50. * License : https://github.com/eligrey/FileSaver.js/blob/master/LICENSE.md (MIT)
  51. * source : http://purl.eligrey.com/github/FileSaver.js
  52. */
  53. var _global = typeof window === 'object' && window.window === window ? window : typeof self === 'object' && self.self === self ? self : typeof global === 'object' && global.global === global ? global : void 0;
  54.  
  55. function bom(blob, opts) {
  56. if (typeof opts === 'undefined') opts = {
  57. autoBom: false
  58. };else if (typeof opts !== 'object') {
  59. console.warn('Deprecated: Expected third argument to be a object');
  60. opts = {
  61. autoBom: !opts
  62. };
  63. }
  64.  
  65. if (opts.autoBom && /^\s*(?:text\/\S*|application\/xml|\S*\/\S*\+xml)\s*;.*charset\s*=\s*utf-8/i.test(blob.type)) {
  66. return new Blob([String.fromCharCode(0xFEFF), blob], {
  67. type: blob.type
  68. });
  69. }
  70.  
  71. return blob;
  72. }
  73.  
  74. function download(url, name, opts) {
  75. var xhr = new XMLHttpRequest();
  76. xhr.open('GET', url);
  77. xhr.responseType = 'blob';
  78.  
  79. xhr.onload = function () {
  80. saveAs(xhr.response, name, opts);
  81. };
  82.  
  83. xhr.onerror = function () {
  84. console.error('could not download file');
  85. };
  86.  
  87. xhr.send();
  88. }
  89.  
  90. function corsEnabled(url) {
  91. var xhr = new XMLHttpRequest();
  92.  
  93. xhr.open('HEAD', url, false);
  94.  
  95. try {
  96. xhr.send();
  97. } catch (e) {}
  98.  
  99. return xhr.status >= 200 && xhr.status <= 299;
  100. }
  101.  
  102.  
  103. function click(node) {
  104. try {
  105. node.dispatchEvent(new MouseEvent('click'));
  106. } catch (e) {
  107. var evt = document.createEvent('MouseEvents');
  108. evt.initMouseEvent('click', true, true, window, 0, 0, 0, 80, 20, false, false, false, false, 0, null);
  109. node.dispatchEvent(evt);
  110. }
  111. }
  112.  
  113.  
  114. var isMacOSWebView = _global.navigator && /Macintosh/.test(navigator.userAgent) && /AppleWebKit/.test(navigator.userAgent) && !/Safari/.test(navigator.userAgent);
  115. var saveAs = _global.saveAs || (
  116. typeof window !== 'object' || window !== _global ? function saveAs() {}
  117.  
  118. : 'download' in HTMLAnchorElement.prototype && !isMacOSWebView ? function saveAs(blob, name, opts) {
  119. var URL = _global.URL || _global.webkitURL;
  120. var a = document.createElement('a');
  121. name = name || blob.name || 'download';
  122. a.download = name;
  123. a.rel = 'noopener';
  124.  
  125. if (typeof blob === 'string') {
  126. a.href = blob;
  127.  
  128. if (a.origin !== location.origin) {
  129. corsEnabled(a.href) ? download(blob, name, opts) : click(a, a.target = '_blank');
  130. } else {
  131. click(a);
  132. }
  133. } else {
  134. a.href = URL.createObjectURL(blob);
  135. setTimeout(function () {
  136. URL.revokeObjectURL(a.href);
  137. }, 4E4);
  138.  
  139. setTimeout(function () {
  140. click(a);
  141. }, 0);
  142. }
  143. }
  144. : 'msSaveOrOpenBlob' in navigator ? function saveAs(blob, name, opts) {
  145. name = name || blob.name || 'download';
  146.  
  147. if (typeof blob === 'string') {
  148. if (corsEnabled(blob)) {
  149. download(blob, name, opts);
  150. } else {
  151. var a = document.createElement('a');
  152. a.href = blob;
  153. a.target = '_blank';
  154. setTimeout(function () {
  155. click(a);
  156. });
  157. }
  158. } else {
  159. navigator.msSaveOrOpenBlob(bom(blob, opts), name);
  160. }
  161. }
  162. : function saveAs(blob, name, opts, popup) {
  163. popup = popup || open('', '_blank');
  164.  
  165. if (popup) {
  166. popup.document.title = popup.document.body.innerText = 'downloading...';
  167. }
  168.  
  169. if (typeof blob === 'string') return download(blob, name, opts);
  170. var force = blob.type === 'application/octet-stream';
  171.  
  172. var isSafari = /constructor/i.test(_global.HTMLElement) || _global.safari;
  173.  
  174. var isChromeIOS = /CriOS\/[\d]+/.test(navigator.userAgent);
  175.  
  176. if ((isChromeIOS || force && isSafari || isMacOSWebView) && typeof FileReader !== 'undefined') {
  177. var reader = new FileReader();
  178.  
  179. reader.onloadend = function () {
  180. var url = reader.result;
  181. url = isChromeIOS ? url : url.replace(/^data:[^;]*;/, 'data:attachment/file;');
  182. if (popup) popup.location.href = url;else location = url;
  183. popup = null;
  184. };
  185.  
  186. reader.readAsDataURL(blob);
  187. } else {
  188. var URL = _global.URL || _global.webkitURL;
  189. var url = URL.createObjectURL(blob);
  190. if (popup) popup.location = url;else location.href = url;
  191. popup = null;
  192.  
  193. setTimeout(function () {
  194. URL.revokeObjectURL(url);
  195. }, 4E4);
  196. }
  197. });
  198. _global.saveAs = saveAs.saveAs = saveAs;
  199.  
  200. if (typeof module !== 'undefined') {
  201. module.exports = saveAs;
  202. }
  203. });
  204.  
  205. (function() {
  206. 'use strict';
  207. var indexReg=/PART\b|^Prologue|Chapter\s*[\-_]?\d+|分卷|^序$|^序\s*言|^序\s*章|^前\s*言|^附\s*[录錄]|^引\s*[言子]|^摘\s*要|^[楔契]\s*子|^后\s*记|^後\s*記|^附\s*言|^结\s*语|^結\s*語|^尾\s*[声聲]|^最終話|^最终话|^番\s*外|^\d+[\s\.、,,)\-_::][^\d#\.]+$|^[第(]?[\d〇零一二三四五六七八九十百千万萬-]+\s*[、)章节節回卷折篇幕集话話]/i;
  208. var innerNextPage=/下一[页頁张張]|next\s*page|次のページ/i;
  209. var lang = navigator.appName=="Netscape"?navigator.language:navigator.userLanguage;
  210. var i18n={};
  211. var rCats=[];
  212. var processFunc;
  213. switch (lang){
  214. case "zh-CN":
  215. case "zh-SG":
  216. i18n={
  217. fetch:"开始下载小说【Ctrl+F9】",
  218. info:"本文是使用怠惰小说下载器(DownloadAllContent)下载的",
  219. error:"该段内容获取失败",
  220. downloading:"已下载完成 %s 段,剩余 %s 段<br>正在下载 %s",
  221. complete:"已全部下载完成,共 %s 段",
  222. del:"设置文本干扰码的CSS选择器",
  223. custom:"自定义下载",
  224. customInfo:"输入网址或者章节CSS选择器",
  225. reSort:"按标题名重新排序",
  226. reSortUrl:"按网址重新排序",
  227. setting:"选项设置",
  228. abort:"跳过此章",
  229. save:"保存当前",
  230. saveAsMd:"存为 Markdown",
  231. downThreadNum:"设置同时下载的线程数"
  232. };
  233. break;
  234. case "zh-TW":
  235. case "zh-HK":
  236. i18n={
  237. fetch:"開始下載小說【Ctrl+F9】",
  238. info:"本文是使用怠惰小說下載器(DownloadAllContent)下載的",
  239. error:"該段內容獲取失敗",
  240. downloading:"已下載完成 %s 段,剩餘 %s 段<br>正在下載 %s",
  241. complete:"已全部下載完成,共 %s 段",
  242. del:"設置文本干擾碼的CSS選擇器",
  243. custom:"自定義下載",
  244. customInfo:"輸入網址或者章節CSS選擇器",
  245. reSort:"按標題名重新排序",
  246. reSortUrl:"按網址重新排序",
  247. setting:"選項設置",
  248. abort:"跳過此章",
  249. save:"保存當前",
  250. saveAsMd:"存爲 Markdown",
  251. downThreadNum:"設置同時下載的綫程數"
  252. };
  253. break;
  254. default:
  255. i18n={
  256. fetch:"Download [Ctrl+F9]",
  257. info:"The TXT is downloaded by 'DownloadAllContent'",
  258. error:"Failed in downloading current chapter",
  259. downloading:"%s pages are downloaded, there are still %s pages left<br>Downloading %s ......",
  260. complete:"Completed! Get %s pages in total",
  261. del:"Set css selectors for ignore",
  262. custom:"Custom to download",
  263. customInfo:"Input urls OR sss selectors for chapter links",
  264. reSort:"ReSort by title",
  265. reSortUrl:"Resort by URLs",
  266. setting:"Open Setting",
  267. abort:"Abort",
  268. save:"Save",
  269. saveAsMd:"Save as Markdown",
  270. downThreadNum:"Set threadNum for download"
  271. };
  272. break;
  273. }
  274. var firefox=navigator.userAgent.toLowerCase().indexOf('firefox')!=-1,curRequests=[];
  275. var rocketContent,txtDownContent,txtDownWords,txtDownQuit,txtDownDivInited=false;
  276.  
  277. function initTxtDownDiv(){
  278. if(txtDownDivInited)return;
  279. txtDownDivInited=true;
  280. rocketContent=document.createElement("div");
  281. document.body.appendChild(rocketContent);
  282. rocketContent.outerHTML=`
  283. <div id="txtDownContent">
  284. <div style="font-size:16px;color:#333333;width:362px;height:110px;position:fixed;left:50%;top:50%;margin-top:-25px;margin-left:-150px;z-index:100000;background-color:#ffffff;border:1px solid #afb3b6;border-radius:10px;opacity:0.95;filter:alpha(opacity=95);box-shadow:5px 5px 20px 0px #000;">
  285. <div id="txtDownWords" style="position:absolute;width:275px;height: 90px;max-height: 90%;border: 1px solid #f3f1f1;padding: 8px;border-radius: 10px;overflow: auto;">
  286. Analysing......
  287. </div>
  288. <div id="txtDownQuit" style="width: 30px;height: 30px;border-radius: 30px;position:absolute;right:2px;top:2px;cursor: pointer;background-color:#ff5a5a;">
  289. <span style="height: 30px;line-height: 30px;display:block;color:#FFF;text-align:center;font-size: 12px;font-weight: bold;">╳</span>
  290. </div>
  291. <div style="position:absolute;right:0px;bottom:2px;cursor: pointer;max-width:85px">
  292. <button id="abortRequest" style="background: #008aff;border: 0;padding: 5px;border-radius: 6px;color: white;float: right;margin: 1px;height: 25px;display:none;line-height: 16px;">${getI18n('abort')}</button>
  293. <button id="tempSaveTxt" style="background: #008aff;border: 0;padding: 5px;border-radius: 6px;color: white;float: right;margin: 1px;height: 25px;line-height: 16px;cursor: pointer;">${getI18n('save')}</button>
  294. <button id="saveAsMd" style="background: #008aff;border: 0;padding: 5px;border-radius: 6px;color: white;float: right;margin: 1px;height: 25px;line-height: 16px;cursor: pointer;" title="${getI18n('saveAsMd')}">Markdown</button>
  295. </div>
  296. </div>
  297. </div>`;
  298. txtDownContent=document.querySelector("#txtDownContent");
  299. txtDownWords=document.querySelector("#txtDownWords");
  300. txtDownQuit=document.querySelector("#txtDownQuit");
  301. txtDownQuit.onclick=function(){
  302. txtDownContent.style.display="none";
  303. txtDownContent.parentNode.removeChild(txtDownContent);
  304. };
  305. initTempSave();
  306. }
  307.  
  308. function initTempSave(){
  309. var tempSavebtn = document.getElementById('tempSaveTxt');
  310. var abortbtn = document.getElementById('abortRequest');
  311. var saveAsMd = document.getElementById('saveAsMd');
  312. tempSavebtn.onclick = function(){
  313. var blob = new Blob([i18n.info+"\r\n\r\n"+document.title+"\r\n\r\n"+rCats.join("\r\n\r\n")], {type: "text/plain;charset=utf-8"});
  314. saveAs(blob, document.title+".txt");
  315. console.log(curRequests);
  316. }
  317. abortbtn.onclick = function(){
  318. let curRequest = curRequests.pop();
  319. if(curRequest)curRequest[1].abort();
  320. }
  321. saveAsMd.onclick = function(){
  322. let txt = i18n.info+"\n\n---\n"+document.title+"\n===\n";
  323. rCats.forEach(cat => {
  324. cat = cat.replace("\r\n", "\n---").replace(/(\r\n|\n\r)+/g, "\n\n").replace(/[\n\r]\t+/g, "\n");
  325. txt += '\n'+cat;
  326. });
  327. var blob = new Blob([txt], {type: "text/plain;charset=utf-8"});
  328. saveAs(blob, document.title+".md");
  329. }
  330. }
  331.  
  332. function indexDownload(aEles){
  333. if(aEles.length<1)return;
  334. initTxtDownDiv();
  335. if(GM_getValue("contentSort")){
  336. aEles.sort(function(a,b){
  337. return parseInt(a.innerText.replace(/[^0-9]/ig,"")) - parseInt(b.innerText.replace(/[^0-9]/ig,""));
  338. });
  339. }
  340. if(GM_getValue("contentSortUrl")){
  341. aEles.sort(function(a,b){
  342. return parseInt(a.href.replace(/[^0-9]/ig,"")) - parseInt(b.href.replace(/[^0-9]/ig,""));
  343. });
  344. }
  345. rCats=[];
  346. var insertSigns=[];
  347. // var j=0,rCats=[];
  348. var downIndex=0,downNum=0,downOnce=function(){
  349. if(downNum>=aEles.length)return;
  350. let curIndex=downIndex;
  351. let aTag=aEles[curIndex];
  352. let request=(aTag, curIndex)=>{
  353. let tryTimes=0;
  354. let requestBody={
  355. method: 'GET',
  356. url: aTag.href,
  357. headers:{
  358. referer:aTag.href,
  359. "Content-Type":"text/html;charset="+document.charset
  360. },
  361. timeout:15000,
  362. overrideMimeType:"text/html;charset="+document.charset,
  363. onload: function(result) {
  364. downIndex++;
  365. downNum++;
  366. var doc = getDocEle(result.responseText);
  367. let nextPage=checkNextPage(doc);
  368. if(nextPage){
  369. var inArr=false;
  370. for(var ai=0;ai<aEles.length;ai++){
  371. if(aEles[ai].href==nextPage.href){
  372. inArr=true;
  373. break;
  374. }
  375. }
  376. if(!inArr){
  377. nextPage.innerText=aTag.innerText+"\t>>";
  378. aEles.push(nextPage);
  379. let targetIndex = curIndex;
  380. for(let a=0;a<insertSigns.length;a++){
  381. let signs=insertSigns[a],breakSign=false;
  382. if(signs){
  383. for(let b=0;b<signs.length;b++){
  384. let sign=signs[b];
  385. if(sign==curIndex){
  386. targetIndex=a;
  387. breakSign=true;
  388. break;
  389. }
  390. }
  391. }
  392. if(breakSign)break;
  393. }
  394. let insertSign = insertSigns[targetIndex];
  395. if(!insertSign)insertSigns[targetIndex] = [];
  396. insertSigns[targetIndex].push(aEles.length-1);
  397. }
  398. }
  399. processDoc(curIndex, aTag, doc, (result.status>=400?` status: ${result.status} `:""));
  400. let request=downOnce();
  401. if(request)curRequests.push(request);
  402. },
  403. onerror: function(e) {
  404. console.warn("error:");
  405. console.log(e);
  406. downIndex++;
  407. downNum++;
  408. processDoc(curIndex, aTag, null, ` NETWORK ERROR: '+${(e.response||e.responseText)} `);
  409. let request=downOnce();
  410. if(request)curRequests.push(request);
  411. },
  412. ontimeout: function(e) {
  413. console.warn("timeout: times="+tryTimes+" url="+aTag.href);
  414. //console.log(e);
  415. if(++tryTimes<3){
  416. return GM_xmlhttpRequest(requestBody);
  417. }
  418. downIndex++;
  419. downNum++;
  420. processDoc(curIndex, aTag, null, ` TIMEOUT: '+${aTag.href} `);
  421. let request=downOnce();
  422. if(request)curRequests.push(request);
  423. }
  424. };
  425. return [curIndex,GM_xmlhttpRequest(requestBody),aTag.href];
  426. }
  427. if(!aTag){
  428. let waitAtagReadyInterval=setInterval(function(){
  429. if(downNum>=aEles.length)clearInterval(waitAtagReadyInterval);
  430. aTag=aEles[curIndex];
  431. if(aTag){
  432. clearInterval(waitAtagReadyInterval);
  433. request(aTag, curIndex);
  434. }
  435. },1000);
  436. return null;
  437. }
  438. return request(aTag, curIndex);
  439. };
  440. function getDocEle(str){
  441. var doc = null;
  442. try {
  443. doc = document.implementation.createHTMLDocument('');
  444. doc.documentElement.innerHTML = str;
  445. }
  446. catch (e) {
  447. console.log('parse error');
  448. }
  449. return doc;
  450. }
  451. function sortInnerPage(){
  452. var pageArrs=[],maxIndex=0,i,j;
  453. for(i=0;i<insertSigns.length;i++){
  454. var signs=insertSigns[i];
  455. if(signs){
  456. for(j=0;j<signs.length;j++){
  457. var sign=signs[j];
  458. var cat=rCats[sign];
  459. rCats[sign]=null;
  460. if(!pageArrs[i])pageArrs[i]=[];
  461. pageArrs[i].push(cat);
  462. }
  463. }
  464. }
  465. for(i=pageArrs.length-1;i>=0;i--){
  466. let pageArr=pageArrs[i];
  467. if(pageArr){
  468. for(j=pageArr.length-1;j>=0;j--){
  469. rCats.splice(i+1, 0, pageArr[j]);
  470. }
  471. }
  472. }
  473. rCats = rCats.filter(function(e){return e!=null});
  474. }
  475. var waitForComplete;
  476. function processDoc(i, aTag, doc, cause){
  477. let cbFunc=content=>{
  478. rCats[i]=(aTag.innerText.replace(/[\r\n\t]/g, "") + "\r\n" + (cause || '') + content);
  479. curRequests = curRequests.filter(function(e){return e[0]!=i});
  480. txtDownContent.style.display="block";
  481. txtDownWords.innerHTML=getI18n("downloading",[downNum,(aEles.length-downNum),aTag.innerText]);
  482. if(downNum==aEles.length){
  483. if(waitForComplete) clearTimeout(waitForComplete);
  484. waitForComplete=setTimeout(()=>{
  485. if(downNum==aEles.length){
  486. txtDownWords.innerHTML=getI18n("complete",[downNum]);
  487. sortInnerPage();
  488. var blob = new Blob([i18n.info+"\r\n\r\n"+document.title+"\r\n\r\n"+rCats.join("\r\n\r\n")], {type: "text/plain;charset=utf-8"});
  489. saveAs(blob, document.title+".txt");
  490. }
  491. },3000);
  492. }
  493. };
  494. let contentResult=getPageContent(doc, content=>{
  495. cbFunc(content);
  496. });
  497. if(contentResult!==false){
  498. cbFunc(contentResult);
  499. }
  500. }
  501. var downThreadNum = parseInt(GM_getValue("downThreadNum"));
  502. downThreadNum=downThreadNum>0?downThreadNum:20;
  503. for(var i=0;i<downThreadNum;i++){
  504. let request=downOnce();
  505. if(request)curRequests.push(request);
  506. if(downIndex>=aEles.length-1 || downIndex>=downThreadNum-1)break;
  507. else downIndex++;
  508. }
  509.  
  510. /*for(let i=0;i<aEles.length;i++){
  511. let aTag=aEles[i];
  512. GM_xmlhttpRequest({
  513. method: 'GET',
  514. url: aTag.href,
  515. overrideMimeType:"text/html;charset="+document.charset,
  516. onload: function(result) {
  517. var doc = getDocEle(result.responseText);
  518. processDoc(i, aTag, doc);
  519. }
  520. });
  521. }*/
  522. }
  523.  
  524. function checkNextPage(doc){
  525. if (processFunc) return false;
  526. let aTags=doc.querySelectorAll("a"),nextPage=null;
  527. for(var i=0;i<aTags.length;i++){
  528. let aTag=aTags[i];
  529. if(innerNextPage.test(aTag.innerText) && aTag.href.indexOf("javascript")==-1){
  530. nextPage=aTag;
  531. break;
  532. }
  533. }
  534. return nextPage;
  535. }
  536.  
  537. function textNodesUnder(el){
  538. var n, a=[], walk=document.createTreeWalker(el,NodeFilter.SHOW_TEXT,null,false);
  539. while(n=walk.nextNode()) a.push(n);
  540. return a;
  541. }
  542.  
  543. function getPageContent(doc, cb){
  544. if(!doc)return i18n.error;
  545. if(processFunc){
  546. return processFunc(doc, cb);
  547. }
  548. [].forEach.call(doc.querySelectorAll("span,div,ul"),function(item){
  549. var thisStyle=doc.defaultView?doc.defaultView.getComputedStyle(item):item.style;
  550. if(thisStyle && (thisStyle.display=="none" || (item.tagName=="SPAN" && thisStyle.fontSize=="0px"))){
  551. item.innerHTML="";
  552. }
  553. });
  554. var i,j,k,rStr="",pageData=(doc.body?doc.body:doc).cloneNode(true),delList=[];
  555. pageData.innerHTML=pageData.innerHTML.replace(/\<\!\-\-((.|[\n|\r|\r\n])*?)\-\-\>/g,"");
  556. [].forEach.call(pageData.querySelectorAll("font.jammer"),function(item){
  557. item.innerHTML="";
  558. });
  559. var selectors=GM_getValue("selectors");
  560. if(selectors){
  561. [].forEach.call(pageData.querySelectorAll(selectors),function(item){
  562. item.innerHTML="";
  563. });
  564. }
  565. [].forEach.call(pageData.querySelectorAll("script,style,link,img,noscript,iframe"),function(item){delList.push(item);});
  566. [].forEach.call(delList,function(item){item.innerHTML="";});
  567. var largestContent,contents=pageData.querySelectorAll("span,div,article,p,td"),largestNum=0;
  568. for(i=0;i<contents.length;i++){
  569. let content=contents[i],hasText=false,allSingle=true,item,curNum=0;
  570. for(j=content.childNodes.length-1;j>=0;j--){
  571. item=content.childNodes[j];
  572. if(item.nodeType==3){
  573. if(/^\s*$/.test(item.data))
  574. item.innerHTML="";
  575. else hasText=true;
  576. }else if(/^(I|A|STRONG|B|FONT|P|DL|DD|H\d)$/.test(item.tagName)){
  577. hasText=true;
  578. }else if(item.nodeType==1&&item.children.length==1&&/^(I|A|STRONG|B|FONT|P|DL|DD|H\d)$/.test(item.children[0].tagName)){
  579. hasText=true;
  580. }
  581. }
  582. for(j=content.childNodes.length-1;j>=0;j--){
  583. item=content.childNodes[j];
  584. if(item.nodeType==1 && !/^(I|A|STRONG|B|FONT|BR)$/.test(item.tagName) && /^[\s\-\_\?\>\|]*$/.test(item.innerHTML))
  585. item.innerHTML="";
  586. }
  587. if(content.childNodes.length>1){
  588. let indexItem=0;
  589. for(j=0;j<content.childNodes.length;j++){
  590. item=content.childNodes[j];
  591. if(item.nodeType==1){
  592. if(item.innerText && item.innerText.length<50 && indexReg.test(item.innerText))indexItem++;
  593. for(k=0;k<item.childNodes.length;k++){
  594. var childNode=item.childNodes[k];
  595. if(childNode.nodeType!=3 && !/^(I|A|STRONG|B|FONT|BR)$/.test(childNode.tagName)){
  596. allSingle=false;
  597. break;
  598. }
  599. }
  600. if(!allSingle)break;
  601. }
  602. }
  603. if(indexItem>=5)continue;
  604. }else{
  605. allSingle=false;
  606. }
  607. if(allSingle){
  608. curNum=(firefox?content.textContent.length:content.innerText.length);
  609. }else {
  610. if(!hasText)continue;
  611. if(pageData==document && content.offsetWidth<=0 && content.offsetHeight<=0)
  612. continue;
  613. [].forEach.call(content.childNodes,function(item){
  614. if(item.nodeType==3)curNum+=item.data.length;
  615. else if(/^(I|A|STRONG|B|FONT|P|DL|DD|H\d)$/.test(item.tagName))curNum+=(firefox?item.textContent.length:item.innerText.length);
  616. else if(item.nodeType==1&&item.children.length==1&&/^(I|A|STRONG|B|FONT|P|DL|DD|H\d)$/.test(item.children[0].tagName)){
  617. curNum+=(firefox?item.textContent.length:item.innerText.length);
  618. }
  619. });
  620. }
  621. if(curNum>largestNum){
  622. largestNum=curNum;
  623. largestContent=content;
  624. }
  625. }
  626. if(!largestContent)return i18n.error+" : NO TEXT CONTENT";
  627. var childlist=pageData.querySelectorAll(largestContent.tagName);//+(largestContent.className?"."+largestContent.className.replace(/(^\s*)|(\s*$)/g, '').replace(/\s+/g, '.'):""));
  628. function getRightStr(ele, noTextEnable){
  629. let childNodes=ele.childNodes,cStr="\r\n",hasText=false;
  630. for(let j=0;j<childNodes.length;j++){
  631. let childNode=childNodes[j];
  632. if(childNode.nodeType==3 && childNode.data && !/^[\s\-\_\?\>\|]*$/.test(childNode.data))hasText=true;
  633. if(childNode.innerHTML){
  634. childNode.innerHTML=childNode.innerHTML.replace(/\<\s*br\s*\>/gi,"\r\n").replace(/\n+/gi,"\n").replace(/\r+/gi,"\r");
  635. }
  636. if(childNode.textContent){
  637. cStr+=childNode.textContent.replace(/ +/g," ").replace(/([^\r]|^)\n([^\r]|$)/gi,"$1\r\n$2");
  638. }
  639. if(childNode.nodeType!=3 && !/^(I|A|STRONG|B|FONT)$/.test(childNode.tagName))cStr+="\r\n";
  640. }
  641. if(hasText || noTextEnable || ele==largestContent)rStr+=cStr+"\r\n";
  642. }
  643. for(i=0;i<childlist.length;i++){
  644. var child=childlist[i];
  645. if(getDepth(child)==getDepth(largestContent)){
  646. if((!largestContent.className && child.className) || (largestContent.className && !child.className) || (largestContent.className && child.className && largestContent.className != child.className))continue;
  647. if((largestContent.className && largestContent.className==child.className)||largestContent.parentNode ==child.parentNode){
  648. getRightStr(child, true);
  649. }else {
  650. getRightStr(child, false);
  651. }
  652. }
  653. }
  654. return rStr.replace(/[\n\r]+/g,"\n\r");
  655. }
  656.  
  657. function getI18n(key, args){
  658. var resultStr=i18n[key];
  659. if(args && args.length>0){
  660. args.forEach(function(item){
  661. resultStr=resultStr.replace(/%s/,item);
  662. });
  663. }
  664. return resultStr;
  665. }
  666.  
  667. function getDepth(dom){
  668. var pa=dom,i=0;
  669. while(pa.parentNode){
  670. pa=pa.parentNode;
  671. i++;
  672. }
  673. return i;
  674. }
  675.  
  676. function fetch(forceSingle){
  677. forceSingle=forceSingle===true;
  678. processFunc=null;
  679. var aEles=document.body.querySelectorAll("a"),list=[];
  680. for(var i=0;i<aEles.length;i++){
  681. var aEle=aEles[i],has=false;
  682. if((!aEle.href || aEle.href.indexOf("javascript")!=-1) && aEle.dataset.href){
  683. aEle.href=aEle.dataset.href;
  684. }
  685. for(var j=0;j<list.length;j++){
  686. if(list[j].href==aEle.href){
  687. aEle=list[j];
  688. list.splice(j,1);
  689. list.push(aEle);
  690. has=true;
  691. break;
  692. }
  693. }
  694. if(!has && aEle.href && /^http/i.test(aEle.href) && ((aEle.innerText.trim()!="" && indexReg.test(aEle.innerText.trim())) || /chapter[\-_]?\d/.test(aEle.href))){
  695. list.push(aEle);
  696. }
  697. }
  698. if(list.length>2 && !forceSingle){
  699. indexDownload(list);
  700. }else{
  701. var blob = new Blob([i18n.info+"\r\n\r\n"+document.title+"\r\n\r\n"+getPageContent(document)], {type: "text/plain;charset=utf-8"});
  702. saveAs(blob, document.title+".txt");
  703. }
  704. }
  705.  
  706. document.addEventListener("keydown", function(e) {
  707. if(e.keyCode == 120 && e.ctrlKey) {
  708. fetch(e.shiftKey);
  709. }
  710. });
  711. function setDel(){
  712. var selValue=GM_getValue("selectors");
  713. var selectors=prompt(i18n.del,selValue?selValue:"");
  714. GM_setValue("selectors",selectors);
  715. selValue=GM_getValue("downThreadNum");
  716. var downThreadNum=prompt(i18n.downThreadNum,selValue?selValue:"20");
  717. GM_setValue("downThreadNum",downThreadNum);
  718. var sortByUrl=window.confirm(i18n.reSortUrl);
  719. GM_setValue("contentSortUrl",sortByUrl);
  720. if(!sortByUrl)GM_setValue("contentSort",window.confirm(i18n.reSort));
  721. }
  722. function customDown(){
  723. processFunc=null;
  724. var customRules=GM_getValue("DACrules_"+document.domain);
  725. var urls=window.prompt(i18n.customInfo,customRules?customRules:"https://xxx.xxx/book-[20-99].html, https://xxx.xxx/book-[01-10].html");
  726. if(urls){
  727. urls=decodeURIComponent(urls.replace(/%/g,'%25'));
  728. GM_setValue("DACrules_"+document.domain, urls);
  729. var processEles=[];
  730. let urlsArr=urls.split("@@"),eles=[];
  731. if(/^http|^ftp/.test(urlsArr[0])){
  732. [].forEach.call(urlsArr[0].split(","),function(i){
  733. var curEle;
  734. var varNum=/\[\d+\-\d+\]/.exec(i);
  735. if(varNum){
  736. varNum=varNum[0].trim();
  737. }else{
  738. curEle=document.createElement("a");
  739. curEle.href=i;
  740. processEles.push(curEle);
  741. return;
  742. }
  743. var num1=/\[(\d+)/.exec(varNum)[1].trim();
  744. var num2=/(\d+)\]/.exec(varNum)[1].trim();
  745. var num1Int=parseInt(num1);
  746. var num2Int=parseInt(num2);
  747. var numLen=num1.length;
  748. var needAdd=num1.charAt(0)=="0";
  749. if(num1Int>=num2Int)return;
  750. for(var j=num1Int;j<=num2Int;j++){
  751. var urlIndex=j.toString();
  752. if(needAdd){
  753. while(urlIndex.length<numLen)urlIndex="0"+urlIndex;
  754. }
  755. var curUrl=i.replace(/\[\d+\-\d+\]/,urlIndex).trim();
  756. curEle=document.createElement("a");
  757. curEle.href=curUrl;
  758. curEle.innerText=processEles.length.toString();
  759. processEles.push(curEle);
  760. }
  761. });
  762. }else{
  763. let urlSel=urlsArr[0].split(">>");
  764. try{
  765. eles=document.querySelectorAll(urlSel[0]);
  766. eles=[].filter.call(eles, ele=>{
  767. return ele.tagName=='BODY'||(!!ele.offsetParent&&getComputedStyle(ele).display!=='none');
  768. })
  769. }catch(e){}
  770. if(eles.length==0){
  771. eles=[];
  772. var eleTxts=urlsArr[0].split(/(?<=[^\\])[,,]/),exmpEles=[],excludeTxts={};
  773. [].forEach.call(document.querySelectorAll("a"),function(item){
  774. eleTxts.forEach(txt=>{
  775. var txtArr=txt.split("!");
  776. if(item.innerText.indexOf(txtArr[0])!=-1){
  777. exmpEles.push(item);
  778. excludeTxts[item]=txtArr.splice(1);
  779. }
  780. });
  781. })
  782. exmpEles.forEach(e=>{
  783. var cssSelStr="a",pa=e.parentNode,excludeTxt=excludeTxts[e];
  784. if(e.className)cssSelStr+="."+CSS.escape(e.className);
  785. while(pa && pa.nodeName!="BODY"){
  786. cssSelStr=pa.nodeName+">"+cssSelStr;
  787. pa=pa.parentNode;
  788. }
  789. [].forEach.call(document.querySelectorAll(cssSelStr),function(item){
  790. var isExclude=false;
  791. for(var t in excludeTxt){
  792. if(item.innerText.indexOf(excludeTxt[t])!=-1){
  793. isExclude=true;
  794. break;
  795. }
  796. }
  797. if(!isExclude && eles.indexOf(item)==-1){
  798. eles.push(item);
  799. }
  800. });
  801. });
  802. }
  803. function addItem(item) {
  804. let has=false;
  805. for(var j=0;j<processEles.length;j++){
  806. if(processEles[j].href==item.href){
  807. processEles.splice(j,1);
  808. processEles.push(item);
  809. has=true;
  810. break;
  811. }
  812. }
  813. if((!item.href || item.href.indexOf("javascript")!=-1) && item.dataset.href){
  814. item.href=item.dataset.href;
  815. }
  816. if(!has && item.href && /^http/i.test(item.href)){
  817. processEles.push(item.cloneNode(1));
  818. }
  819. }
  820. [].forEach.call(eles,function(item){
  821. if(urlSel[1]){
  822. item=Function("item",urlSel[1])(item);
  823. let items;
  824. if (Array.isArray(item)) {
  825. items = item;
  826. } else items = [item];
  827. items.forEach(item => {
  828. if(!item || !item.href)return;
  829. if(!item.tagName || item.tagName!="A"){
  830. let href=item.href;
  831. let innerText=item.innerText;
  832. item=document.createElement("a");
  833. item.href=href;
  834. item.innerText=innerText;
  835. }
  836. addItem(item);
  837. });
  838. } else {
  839. addItem(item);
  840. }
  841. });
  842. }
  843. if(urlsArr[1]){
  844. processEles.forEach(ele=>{
  845. ele.href=ele.href.replace(new RegExp(urlsArr[1]), urlsArr[2]);
  846. });
  847. }
  848. if(urlsArr[3]){
  849. processFunc=(data, cb)=>{
  850. if(urlsArr[3].indexOf("return ")==-1){
  851. return eval(urlsArr[3])
  852. }else{
  853. return Function("data","cb",urlsArr[3])(data, cb);
  854. }
  855. };
  856. }else{
  857. var win=(typeof unsafeWindow=='undefined'? window : unsafeWindow);
  858. if(win.dacProcess){
  859. processFunc=win.dacProcess;
  860. }
  861. }
  862. indexDownload(processEles);
  863. }
  864. }
  865. GM_registerMenuCommand(i18n.fetch, fetch);
  866. GM_registerMenuCommand(i18n.custom, customDown);
  867. GM_registerMenuCommand(i18n.setting, setDel);
  868. })();