Bachngocsach Vip scraper

Scrape novel content from bachngocsach.net.vn. Ctrl+Alt+S to start, Ctrl+Alt+C to stop.

  1. // ==UserScript==
  2. // @name Bachngocsach Vip scraper
  3. // @name:en Bachngocsach Vip scraper
  4. // @name:vi Tải truyện vip bachngocsach
  5. // @namespace Violentmonkey Scripts
  6. // @match *://bachngocsach.info/*
  7. // @grant GM_setValue
  8. // @grant GM_getValue
  9. // @grant GM_deleteValue
  10. // @grant GM_listValues
  11. // @version 1.1.0
  12. // @author Tác giả = tác = làm, giả là giả (dối) = làm giả
  13. // @description Scrape novel content from bachngocsach.net.vn. Ctrl+Alt+S to start, Ctrl+Alt+C to stop.
  14. // @description:vi Tải truyện từ bachngocsach.net.vn. Ctrl+Alt+S để bắt đầu, Ctrl+Alt+C để ngừng
  15. // @description:en Scrape novel content from bachngocsach.net.vn. Ctrl+Alt+S to start, Ctrl+Alt+C to stop
  16. // @license MIT2
  17. // ==/UserScript==
  18. const startKey={key:'KeyS', ctrlKey:true, altKey: true, shiftKey:false };
  19. const stopKey= {key:'KeyC', ctrlKey:true, altKey: true, shiftKey:false };
  20.  
  21. const useEvent=false;
  22.  
  23. const sleep = (ms) => new Promise(rs => setTimeout(rs, ms));
  24.  
  25. const pressKey = (key) => window.dispatchEvent(new KeyboardEvent('keydown', { key: key, code: key, bubbles: true }));
  26.  
  27. function addUrlChangeEvent() {
  28. const urlchangeEvent= new Event('urlchange');
  29. history._pushState=history.pushState;
  30. history.pushState=(...args)=>{
  31. history._pushState(...args);
  32. if(!!window.onurlchange && typeof window.onurlchange =='function') window.onurlchange(); else
  33. window.dispatchEvent(urlchangeEvent);
  34. }
  35.  
  36. window.addEventListener('popstate',()=>{
  37. if(!!window.onurlchange && typeof window.onurlchange =='function') window.onurlchange(); else
  38. window.dispatchEvent(urlchangeEvent);
  39. })
  40. }
  41.  
  42. function reEnableConsoleLog(c) {
  43. switch (c) {
  44. case 1: console.log = console.dir; break;
  45. case 2: console.log = console.info; break;
  46. case 3: console.log = console.debug; break;
  47. case 4: console.log = console.warn; break;
  48. default: {
  49. const iF = document.createElement('iframe');
  50. document.body.appendChild(iF);
  51. iF.style.display = 'none';
  52. window.console.log = iF.contentWindow.console.log;
  53. }
  54. }
  55. }
  56.  
  57. function getStyles(useRegex=true) {
  58. const style={};
  59. const styleStr=document.querySelector('style.dynamic-styles').textContent;
  60. if (!styleStr) return style;
  61. if(useRegex) {
  62. const reg=/\.?([0-9a-zA-Z]+?){order:([0-9]+?)}/g
  63. styleStr.matchAll(reg).forEach(m=>style[m[1]]=parseInt(m[2]));
  64. } else {
  65. styleStr.split('}').forEach(m=>{
  66. if(m=='') return;
  67. let s=m.split('{order:');
  68. if(s[0].startsWith('.')) style[s[0].slice(1)]=parseInt(s[1]); else style[s[0]]=parseInt(s[1]);
  69. })
  70. }
  71. return style;
  72. }
  73.  
  74. function chapterContentByTreeWalker(el = document.body) { //dang chay sai o day
  75. const textList = [];
  76. textList.toString = () => { return textList.reduce((s, n) => s += n.nodeValue, '') }
  77. const treeWalker = document.createTreeWalker(el, NodeFilter.SHOW_TEXT, (node) => {
  78. if (['META', 'SCRIPT', 'NOSCRIPT', 'STYLE', 'AREA', 'BASE', 'CANVAS', 'CODE', 'EMBED', 'LINK', 'MAP', 'PARAM', 'SOURCE', 'VIDEO', 'IMG', 'PICTURE', 'INPUT', 'TEXTAREA'].includes(node.parentNode?.tagName))
  79. return NodeFilter.FILTER_REJECT;
  80. return NodeFilter.FILTER_ACCEPT;
  81. });
  82.  
  83. let node;
  84. while (node = treeWalker.nextNode())
  85. textList.push(node);
  86. return textList.toString();
  87. }
  88.  
  89. function chapterContentByStyle() {
  90. console.log('Get chapter content');
  91. const result={};
  92. let t= document.querySelectorAll('main>div>.container>div>.line-clamp-1');
  93. result.storyName=t[0].textContent;
  94. result.chapterName=t[1].textContent;
  95. result.chapterNumber=parseInt(location.href.match(/.\/chuong-(\d+)/)[1]);
  96.  
  97. const badText = document.querySelector('div.published-content');
  98. const goodText = badText?.previousElementSibling?.innerText||'';
  99. if (!badText) {result.chapterContent=goodText; return result;}
  100.  
  101. let badLines = Array(badText.children.length);
  102. let style = getStyles();
  103.  
  104. for (const eights of badText.children) {
  105. let className = eights.className.toLowerCase();
  106. if (!style[className]) style[className] = parseInt(getComputedStyle(eights).order);
  107. let badLine = Array(eights.children.length);
  108. for (const sixes of eights.children) {
  109. let tagName = sixes.tagName.toLowerCase();
  110. if (!style[tagName]) style[tagName] = parseInt(getComputedStyle(sixes).order);
  111. badLine[style[tagName]] = sixes.textContent;
  112. }
  113. badLines[style[className]] = badLine.join('');
  114. }
  115. result.chapterContent = (goodText.trim() + badLines.join('\n\n')).replaceAll(/\n{3,}/g, '\n\n').replaceAll('·', '');
  116. return result;
  117. }
  118.  
  119. const getChapterContent=chapterContentByStyle;
  120. // const getChapterContent=chapterContentByTreeWalker;
  121.  
  122. function startDownload(url) {
  123. console.log('Start downloading');
  124. GM_setValue('downloading', true);
  125. if (/^https:\/\/bachngocsach\.net\.vn\/truyen\/[a-z\-\d]+\/?$/.test(window.location.href)) window.location.assign(window.location.href + '/chuong-1');
  126. else window.location.reload();
  127. }
  128.  
  129. async function stopDownload() {
  130. console.log('Stop downloading');
  131. let storyName = GM_getValue('storyName');
  132. GM_deleteValue('storyName');
  133. GM_deleteValue('downloading');
  134. let chapters = GM_listValues();
  135. chapters.sort((a, b) => { parseInt(a) - parseInt(b) });
  136. let content = await Promise.all(chapters.map(chapter => GM_getValue(`${chapter}`)));
  137. chapters.forEach(ch => GM_deleteValue(ch));
  138. content = content.join('\n\n').replaceAll(/\n{1,1}/g, '\n\n').replaceAll(/\n{3,}/g, '\n\n');
  139.  
  140. let download = document.createElement('a');
  141. download.href = 'data:attachment/text,' + encodeURI(content);
  142. download.target = '_blank';
  143. download.download = storyName + `(c${chapters[0]}-c${chapters.at(-1)})` + '.txt';
  144. download.click();
  145. return;
  146. }
  147.  
  148. function nextChapter() {
  149. console.log('Go to next chapter');
  150. if(Math.random() > .4) pressKey('ArrowRight'); //ArrowRight
  151. else {
  152. const t=document.querySelector('.container>div:nth-last-child(2)>div:first-child>a:last-of-type');
  153. t.scrollIntoView({behavior:'smooth'})
  154. t.click();
  155. }
  156. }
  157.  
  158. function isLastChapter() {
  159. console.log('Is last chapter?');
  160. return document.querySelector('.container>div:nth-last-child(2)>div:first-child>a:last-of-type').href.endsWith('#');
  161. }
  162.  
  163. async function download() {
  164. console.log('Downloading...');
  165. scrollTo({left:0,top:10000, behavior:'smooth'});
  166. await sleep(500+ Math.random()*600); //350 is necessary time the chapter content being loaded
  167. const chapter=getChapterContent();
  168. GM_setValue('storyName',chapter.storyName);
  169. GM_setValue(chapter.chapterNumber,chapter.chapterName+'\n\n'+chapter.chapterContent);
  170. scrollTo({left:0,top:0, behavior:'smooth'});
  171. await sleep(100+ Math.random()*300);
  172. if (isLastChapter()) await stopDownload();
  173. else nextChapter();
  174. }
  175.  
  176. (async function(){
  177. if(window!==window.top) return;
  178.  
  179. if (useEvent) {
  180. addUrlChangeEvent();
  181. //await sleep(1000);
  182. let txt=getChapterContent();
  183. console.log(txt.chapterContent);
  184.  
  185. window.addEventListener('urlchange',async (e)=>{
  186. if (GM_getValue('downloading', undefined)) await download();
  187. else {
  188. console.log(getChapterContent().chapterContent); }
  189. })
  190. } else {
  191. let oldURL='';
  192. const observer= new MutationObserver(async (mList)=>{
  193. mList.forEach(async (m)=>{
  194. if (m.target.className?.includes('published-content')&& window.location.href!=oldURL) {
  195. oldURL=window.location.href;
  196. if (GM_getValue('downloading', false)) await download();
  197. else console.log(getChapterContent().chapterContent);
  198. }
  199. });
  200. });
  201. observer.observe(document.querySelector('body'), { childList: true, subtree:true });
  202. }
  203.  
  204. window.addEventListener('keydown',async(e)=>{
  205. if (e.ctrlKey==startKey.ctrlKey && e.altKey==startKey.altKey && e.shiftKey==startKey.shiftKey && (e.key==startKey.key||e.code==startKey.key)) startDownload(location.href);
  206. if (e.ctrlKey==stopKey.ctrlKey && e.altKey==stopKey.altKey && e.shiftKey==stopKey.shiftKey && (e.key==stopKey.key||e.code==stopKey.key)) await stopDownload();
  207. })
  208. })();