Voz save thread to file

Save your favorite thread into a html file

// ==UserScript==
// @name          Voz save thread to file
// @description   Save your favorite thread into a html file
// @namespace     Violentmonkey Scripts
// @match         *://*.voz.vn/*
// @grant         GM_xmlhttpRequest
// @version       0.5
// @run-at        document-idle
// @license       MIT
// ==/UserScript==
let wT = 20; //in ms
const saveWithImages=true; 
const agressiveFetch=true; //false de tai trang lan luot, true de lay nhieu (15) cug luc
if (agressiveFetch) wT+=300;

const sleep = (ms) => new Promise((rs) => setTimeout(rs, ms));

async function zip(data) {  // return Blob
  let blob=new Blob([data]);
  const cs = new CompressionStream("gzip");
  const compressedStream = blob.stream().pipeThrough(cs);
  return await new Response(compressedStream).blob();
}

async function hash(message) {
  const msgUint8 = new TextEncoder().encode(message);
  const hashBuffer = await window.crypto.subtle.digest("SHA-1", msgUint8);
  const hashArray = Array.from(new Uint8Array(hashBuffer));
  return hashArray.map((b) => b.toString(16).padStart(2, "0")).join("");
}

function xhr(url, detail) {
  const nurl=new URL(url);
  const option={'url':url, origin:nurl.origin}

  if (typeof detail =='string' && /^(?:blob|text|json|arraybuffer|document)$/.test(detail))  option['responseType']=detail;
  if (typeof detail =='object') option=detail;

  return new Promise(rs=>{
    option['onloadend']=res=> (res.status==200) ? rs(res.response) : rs(false);
    const c = GM_xmlhttpRequest(option);
  })
}

let threadId;
let images={};
async function convertContent(htmlStr, fetchWrapper=false) {
  dump=new DOMParser();
  let html=dump.parseFromString(htmlStr,'text/html');

  html.querySelector('.blockMessage--none')?.remove();
  html.querySelectorAll('form').forEach(el=>el?.remove());
  html.querySelectorAll('div.block').forEach(el=>{
    if (el.matches('.block--messages')) return;
    el.remove() });

  html.querySelectorAll('div.p-body-main.p-body-main--withSidebar>*').forEach(el=>{
    if (el.matches('.p-body-content')) return;
    el.remove(); });
  html.querySelector('footer.p-footer')?.remove();


  html.querySelectorAll('[href]').forEach(el=> {
    let href=el.getAttribute('href');
    if (href.startsWith('/')) el.setAttribute('href','https://voz.vn'+ href);
  });

  html.querySelectorAll('[src]').forEach(el=> {
    // if(el.tagName=='SCRIPT') return; //skip script;
    let src=el.getAttribute('src');
    if (src.startsWith('data:image')) el.setAttribute('src',el.getAttribute('data-src'));
    if (src.startsWith('/')) el.setAttribute('src','https://voz.vn'+ src);
  });

  html.querySelectorAll('[srcset]').forEach(el=> {
      src=el.getAttribute('srcset').split(',').map(a=>{ if (a.startsWith('/')) return 'https://voz.vn'+a }).join(',');
      el.setAttribute('srcset',src);
    });

  //Expand long quote
  html.querySelectorAll('div.bbCodeBlock-content>div.bbCodeBlock-expandContent.js-expandContent').forEach(el=>el.className='')

  //Spoiler
  html.querySelectorAll('.bbCodeSpoiler-button,.bbCodeSpoiler-content').forEach(el=>el.classList.add('is-active'))

  // Sửa link trang
  html.querySelectorAll('div.pageNav a').forEach(el=>{
    el.removeAttribute('href');
  });

  if(saveWithImages) {
    // let p= html.querySelectorAll('img.bbImage');
    let p= html.querySelectorAll('img');
    for (let i=0; i<p.length; i++) {
      if (p[i].src.startsWith('data:image')) continue;
      const key=await hash(p[i].src);
      if (images[key]!==undefined) continue;
      const org=new URL(p[i].src);
      const b=await xhr(p[i].src,'blob');  //GM_xmlhttpRequest can bypass cors
      const c=(await toDataURL(b)).replace(/data:.*;base64,/,'data:image;base64,');
      images[key]=c;
      p[i].setAttribute('image-data',key);
    }
  }

  if (fetchWrapper) {
    const styles = html.querySelectorAll('link[rel="stylesheet"]')
    for (let i=0; i<styles.length; i++) styles[i].href=await toDataURL(await xhr(styles[i].href,'blob'));
  }

  let threadBody=await toDataURL(await zip(html.querySelector('div.p-body-main')?.outerHTML));
  html.querySelector('div.p-body-main').outerHTML=`{ThreadBody_${threadId}}`;
  htmlStr=fetchWrapper ? await toDataURL(await zip(new XMLSerializer().serializeToString(html))) : '';
  return {threadWrapper:htmlStr, threadBody}
}

function toDataURL(data) {
  return new Promise((rs,rj)=>{
    const fs=new FileReader();
    fs.onload=()=> rs(fs.result);
    fs.onerror=()=>rj(fs.error);
    fs.readAsDataURL(new Blob([data]));
  });
}

async function saveThread() {
  const maxPage = parseInt(document.querySelector("ul.pageNav-main>li:last-of-type>a")?.textContent)??1;
  document.body.insertAdjacentHTML("beforeend",`<progress id="us_vtstf_progress" value=0 max=100 style="position:fixed;left:2px; bottom: 1px; width:99%"></progress>`);
  const progressBar=document.querySelector('#us_vtstf_progress');
  progressBar.max=maxPage;

  let pages=[]
  async function fetchContent(pageNo) {
    let pageUrl = `https://voz.vn/t/${threadId}/page-${pageNo}`;
    let data= await fetch(pageUrl).then((res)=> {
      if (res.status !==200) return false;
      return res.text();  });
    if (!data) return false;

    let { threadWrapper, threadBody } = await convertContent(data, pageNo==1);
    if (pageNo==1) {pages[0]= threadWrapper;  pages[1]=threadBody;}
    else pages[pageNo]=threadBody;
    progressBar.value=pages.length-1;
    return true;
  }

  if(agressiveFetch) {
    const runsNumber=15;
    let i=1;
    while (i<=maxPage) {
      let run=[];
      while (run.length<runsNumber && i<=maxPage) {
        run.push(fetchContent(i));
        i++; }
      const result = (await Promise.all(run)).filter((el, i)=>{if (!el) return i});
      if(result.length>1) {
        i=Math.min(...result);
        console.log('Sâm thinh roong, wait for 2s to continue');
        await sleep(2000);}
      await sleep(wT);
    }
  } else {
    for (let i = 1; i <= maxPage; i++) {
      await fetchContent(i);
      await sleep(wT); }
  }
  progressBar.remove();
  return JSON.stringify(pages);
}

(async function main() {
  //create SaveThread Button
  dump = location.href.match(/https:\/\/(?:.*\.)?voz.vn\/(f|t)\/[a-z\d\-]+.(\d+)\/?(page-(\d+))?/);
  let fOrT;
  if (dump) { fOrT = dump[1]; threadId = dump[2]; }

  if (fOrT == "t") {
    const btnSave = document.createElement("a");
    btnSave.classList.add("pageNav-jump", "pageNav-jump--next");
    btnSave.textContent = "Save Thread to file";
    btnSave.onclick = async ()=>await exportToFile(await saveThread());
    btnSave.style = "cursor:pointer;";
    document.querySelectorAll("ul.pageNav-main")
      .forEach((el, i) =>i == 0 ? el.parentElement.appendChild(btnSave) : ((dump = btnSave.cloneNode(true)), (dump.onclick = async ()=>await exportToFile(await saveThread())),el.parentElement.appendChild(dump)));
  }
})();

async function exportToFile(pages) {
  let html = `<html>
  <head>
  </head>
  <body>
    <div id="screen"></div>
    <style>
      #screen div.pageNav a{cursor: pointer;}
    </style>
  <script>
const threadBodyReplacement = '{ThreadBody_${threadId}}';

async function unZip(data) {  //return Blob, lấy text thì them await .text()
  let blob=new Blob([data]);
  const ds = new DecompressionStream("gzip");
  const decompressedStream = blob.stream().pipeThrough(ds);
  return await new Response(decompressedStream).blob();
}

let screen = document.getElementById('screen');
let threadWrapper; //load in main()

async function showPage(pageId=1) {
  const threadBody= await (await unZip(await fetch(pages[pageId]).then(a=>a.blob()) )).text();
  const pageContent= threadWrapper.replace(threadBodyReplacement,threadBody);
  screen.innerHTML=pageContent;

  //load images
  screen.querySelectorAll('img').forEach(el=>{
    const key=el.getAttribute('image-data');
    /(?:[0-9a-f][0-9a-f])+/.test(key) ? el.src=images[key] :''; //createObjectURL tu dataUrl bao loi, khong chuyen qua, mat cong
  });

  //Page number click
  screen.querySelectorAll('ul.pageNav-main a:not([id])').forEach(el=> el.addEventListener('click',e=> {
    e.preventDefault();
    showPage(parseInt(e.target.textContent.trim()));
  }));

    //Goto page Click
  screen.querySelectorAll('ul.pageNav-main a[title="Go to page"]')?.forEach(el => el.addEventListener('click',e=>{
    let pageNo = prompt('Enter page number','1');
    if (isNaN(pageNo)) return;
    showPage(pageNo);
    })
  )

  //Next Click
  screen.querySelectorAll('.pageNav-jump.pageNav-jump--next')?.forEach(el=>el.addEventListener('click',(e)=> {
    showPage(parseInt(pageId)+1);
  }   ));

    //Prev Click
  screen.querySelectorAll('.pageNav-jump.pageNav-jump--prev')?.forEach(el=>el.addEventListener('click',(e)=> {
    showPage(parseInt(pageId)-1);
  }   ));

  scrollTo(0,0);
}

</script></body></html>`

  html=html.replace(`</script></body></html>`, 'var pages='+ pages+';\n'+
  'var images= ' + JSON.stringify(images) + ';\n' +
  `(async function main() {
    threadWrapper=await (await unZip(await fetch(pages[0]).then(r=>r.blob()) )).text();
    await showPage(1);
  })();` +` </script></body></html>`);

  const download=document.createElement('a');
  download.href= await toDataURL(html);
  download.target = '_blank';
  console.log('Saving to html file, size: ', html.length);
  title=document.querySelector('title')?.textContent.split('-').at(-1)?.split('|')[0].trim();
  download.download = title +'_'+ (new Date()).toISOString().slice(0,10) + '.html';
  download.click();
return;
}