// ==UserScript==
// @name Voz save thread to file
// @description Save your favorite thread into a html file
// @namespace Violentmonkey Scripts
// @match *://*.voz.vn/*
// @grant GM_xmlhttpRequest
// @version 0.5
// @run-at document-idle
// @license MIT
// ==/UserScript==
let wT = 20; //in ms
const saveWithImages=true;
const agressiveFetch=true; //false de tai trang lan luot, true de lay nhieu (15) cug luc
if (agressiveFetch) wT+=300;
const sleep = (ms) => new Promise((rs) => setTimeout(rs, ms));
async function zip(data) { // return Blob
let blob=new Blob([data]);
const cs = new CompressionStream("gzip");
const compressedStream = blob.stream().pipeThrough(cs);
return await new Response(compressedStream).blob();
}
async function hash(message) {
const msgUint8 = new TextEncoder().encode(message);
const hashBuffer = await window.crypto.subtle.digest("SHA-1", msgUint8);
const hashArray = Array.from(new Uint8Array(hashBuffer));
return hashArray.map((b) => b.toString(16).padStart(2, "0")).join("");
}
function xhr(url, detail) {
const nurl=new URL(url);
const option={'url':url, origin:nurl.origin}
if (typeof detail =='string' && /^(?:blob|text|json|arraybuffer|document)$/.test(detail)) option['responseType']=detail;
if (typeof detail =='object') option=detail;
return new Promise(rs=>{
option['onloadend']=res=> (res.status==200) ? rs(res.response) : rs(false);
const c = GM_xmlhttpRequest(option);
})
}
let threadId;
let images={};
async function convertContent(htmlStr, fetchWrapper=false) {
dump=new DOMParser();
let html=dump.parseFromString(htmlStr,'text/html');
html.querySelector('.blockMessage--none')?.remove();
html.querySelectorAll('form').forEach(el=>el?.remove());
html.querySelectorAll('div.block').forEach(el=>{
if (el.matches('.block--messages')) return;
el.remove() });
html.querySelectorAll('div.p-body-main.p-body-main--withSidebar>*').forEach(el=>{
if (el.matches('.p-body-content')) return;
el.remove(); });
html.querySelector('footer.p-footer')?.remove();
html.querySelectorAll('[href]').forEach(el=> {
let href=el.getAttribute('href');
if (href.startsWith('/')) el.setAttribute('href','https://voz.vn'+ href);
});
html.querySelectorAll('[src]').forEach(el=> {
// if(el.tagName=='SCRIPT') return; //skip script;
let src=el.getAttribute('src');
if (src.startsWith('data:image')) el.setAttribute('src',el.getAttribute('data-src'));
if (src.startsWith('/')) el.setAttribute('src','https://voz.vn'+ src);
});
html.querySelectorAll('[srcset]').forEach(el=> {
src=el.getAttribute('srcset').split(',').map(a=>{ if (a.startsWith('/')) return 'https://voz.vn'+a }).join(',');
el.setAttribute('srcset',src);
});
//Expand long quote
html.querySelectorAll('div.bbCodeBlock-content>div.bbCodeBlock-expandContent.js-expandContent').forEach(el=>el.className='')
//Spoiler
html.querySelectorAll('.bbCodeSpoiler-button,.bbCodeSpoiler-content').forEach(el=>el.classList.add('is-active'))
// Sửa link trang
html.querySelectorAll('div.pageNav a').forEach(el=>{
el.removeAttribute('href');
});
if(saveWithImages) {
// let p= html.querySelectorAll('img.bbImage');
let p= html.querySelectorAll('img');
for (let i=0; i<p.length; i++) {
if (p[i].src.startsWith('data:image')) continue;
const key=await hash(p[i].src);
if (images[key]!==undefined) continue;
const org=new URL(p[i].src);
const b=await xhr(p[i].src,'blob'); //GM_xmlhttpRequest can bypass cors
const c=(await toDataURL(b)).replace(/data:.*;base64,/,'data:image;base64,');
images[key]=c;
p[i].setAttribute('image-data',key);
}
}
if (fetchWrapper) {
const styles = html.querySelectorAll('link[rel="stylesheet"]')
for (let i=0; i<styles.length; i++) styles[i].href=await toDataURL(await xhr(styles[i].href,'blob'));
}
let threadBody=await toDataURL(await zip(html.querySelector('div.p-body-main')?.outerHTML));
html.querySelector('div.p-body-main').outerHTML=`{ThreadBody_${threadId}}`;
htmlStr=fetchWrapper ? await toDataURL(await zip(new XMLSerializer().serializeToString(html))) : '';
return {threadWrapper:htmlStr, threadBody}
}
function toDataURL(data) {
return new Promise((rs,rj)=>{
const fs=new FileReader();
fs.onload=()=> rs(fs.result);
fs.onerror=()=>rj(fs.error);
fs.readAsDataURL(new Blob([data]));
});
}
async function saveThread() {
const maxPage = parseInt(document.querySelector("ul.pageNav-main>li:last-of-type>a")?.textContent)??1;
document.body.insertAdjacentHTML("beforeend",`<progress id="us_vtstf_progress" value=0 max=100 style="position:fixed;left:2px; bottom: 1px; width:99%"></progress>`);
const progressBar=document.querySelector('#us_vtstf_progress');
progressBar.max=maxPage;
let pages=[]
async function fetchContent(pageNo) {
let pageUrl = `https://voz.vn/t/${threadId}/page-${pageNo}`;
let data= await fetch(pageUrl).then((res)=> {
if (res.status !==200) return false;
return res.text(); });
if (!data) return false;
let { threadWrapper, threadBody } = await convertContent(data, pageNo==1);
if (pageNo==1) {pages[0]= threadWrapper; pages[1]=threadBody;}
else pages[pageNo]=threadBody;
progressBar.value=pages.length-1;
return true;
}
if(agressiveFetch) {
const runsNumber=15;
let i=1;
while (i<=maxPage) {
let run=[];
while (run.length<runsNumber && i<=maxPage) {
run.push(fetchContent(i));
i++; }
const result = (await Promise.all(run)).filter((el, i)=>{if (!el) return i});
if(result.length>1) {
i=Math.min(...result);
console.log('Sâm thinh roong, wait for 2s to continue');
await sleep(2000);}
await sleep(wT);
}
} else {
for (let i = 1; i <= maxPage; i++) {
await fetchContent(i);
await sleep(wT); }
}
progressBar.remove();
return JSON.stringify(pages);
}
(async function main() {
//create SaveThread Button
dump = location.href.match(/https:\/\/(?:.*\.)?voz.vn\/(f|t)\/[a-z\d\-]+.(\d+)\/?(page-(\d+))?/);
let fOrT;
if (dump) { fOrT = dump[1]; threadId = dump[2]; }
if (fOrT == "t") {
const btnSave = document.createElement("a");
btnSave.classList.add("pageNav-jump", "pageNav-jump--next");
btnSave.textContent = "Save Thread to file";
btnSave.onclick = async ()=>await exportToFile(await saveThread());
btnSave.style = "cursor:pointer;";
document.querySelectorAll("ul.pageNav-main")
.forEach((el, i) =>i == 0 ? el.parentElement.appendChild(btnSave) : ((dump = btnSave.cloneNode(true)), (dump.onclick = async ()=>await exportToFile(await saveThread())),el.parentElement.appendChild(dump)));
}
})();
async function exportToFile(pages) {
let html = `<html>
<head>
</head>
<body>
<div id="screen"></div>
<style>
#screen div.pageNav a{cursor: pointer;}
</style>
<script>
const threadBodyReplacement = '{ThreadBody_${threadId}}';
async function unZip(data) { //return Blob, lấy text thì them await .text()
let blob=new Blob([data]);
const ds = new DecompressionStream("gzip");
const decompressedStream = blob.stream().pipeThrough(ds);
return await new Response(decompressedStream).blob();
}
let screen = document.getElementById('screen');
let threadWrapper; //load in main()
async function showPage(pageId=1) {
const threadBody= await (await unZip(await fetch(pages[pageId]).then(a=>a.blob()) )).text();
const pageContent= threadWrapper.replace(threadBodyReplacement,threadBody);
screen.innerHTML=pageContent;
//load images
screen.querySelectorAll('img').forEach(el=>{
const key=el.getAttribute('image-data');
/(?:[0-9a-f][0-9a-f])+/.test(key) ? el.src=images[key] :''; //createObjectURL tu dataUrl bao loi, khong chuyen qua, mat cong
});
//Page number click
screen.querySelectorAll('ul.pageNav-main a:not([id])').forEach(el=> el.addEventListener('click',e=> {
e.preventDefault();
showPage(parseInt(e.target.textContent.trim()));
}));
//Goto page Click
screen.querySelectorAll('ul.pageNav-main a[title="Go to page"]')?.forEach(el => el.addEventListener('click',e=>{
let pageNo = prompt('Enter page number','1');
if (isNaN(pageNo)) return;
showPage(pageNo);
})
)
//Next Click
screen.querySelectorAll('.pageNav-jump.pageNav-jump--next')?.forEach(el=>el.addEventListener('click',(e)=> {
showPage(parseInt(pageId)+1);
} ));
//Prev Click
screen.querySelectorAll('.pageNav-jump.pageNav-jump--prev')?.forEach(el=>el.addEventListener('click',(e)=> {
showPage(parseInt(pageId)-1);
} ));
scrollTo(0,0);
}
</script></body></html>`
html=html.replace(`</script></body></html>`, 'var pages='+ pages+';\n'+
'var images= ' + JSON.stringify(images) + ';\n' +
`(async function main() {
threadWrapper=await (await unZip(await fetch(pages[0]).then(r=>r.blob()) )).text();
await showPage(1);
})();` +` </script></body></html>`);
const download=document.createElement('a');
download.href= await toDataURL(html);
download.target = '_blank';
console.log('Saving to html file, size: ', html.length);
title=document.querySelector('title')?.textContent.split('-').at(-1)?.split('|')[0].trim();
download.download = title +'_'+ (new Date()).toISOString().slice(0,10) + '.html';
download.click();
return;
}