SingleFile - 单文件保存网页

保存当前页面的全部可见内容到一个.html文件中,包含了所有文字、排版、图像

当前为 2021-05-01 提交的版本,查看 最新版本

您需要先安装一个扩展,例如 篡改猴Greasemonkey暴力猴,之后才能安装此脚本。

You will need to install an extension such as Tampermonkey to install this script.

您需要先安装一个扩展,例如 篡改猴暴力猴,之后才能安装此脚本。

您需要先安装一个扩展,例如 篡改猴Userscripts ,之后才能安装此脚本。

您需要先安装一款用户脚本管理器扩展,例如 Tampermonkey,才能安装此脚本。

您需要先安装用户脚本管理器扩展后才能安装此脚本。

(我已经安装了用户脚本管理器,让我安装!)

您需要先安装一款用户样式管理器扩展,比如 Stylus,才能安装此样式。

您需要先安装一款用户样式管理器扩展,比如 Stylus,才能安装此样式。

您需要先安装一款用户样式管理器扩展,比如 Stylus,才能安装此样式。

您需要先安装一款用户样式管理器扩展后才能安装此样式。

您需要先安装一款用户样式管理器扩展后才能安装此样式。

您需要先安装一款用户样式管理器扩展后才能安装此样式。

(我已经安装了用户样式管理器,让我安装!)

// ==UserScript==
// @name         SingleFile - 单文件保存网页
// @namespace    SingleFile
// @version      1.0.0
// @description  保存当前页面的全部可见内容到一个.html文件中,包含了所有文字、排版、图像
// @author       PY-DNG
// @include      *
// @connect      *
// @icon         data:image/gif;base64,R0lGODlhAQABAAAAACH5BAEKAAEALAAAAAABAAEAAAICTAEAOw==
// @grant        GM_xmlhttpRequest
// @grant        GM_registerMenuCommand
// @grant        GM_unregisterMenuCommand
// @grant        GM_info
// ==/UserScript==

// /*-pass*/ 标明待开发内容

(function () {
	'use strict';

	// Developer Mode
	const developer = true;

	// Inner consts
	const TEXT_SAVEPAGE = '保存此网页';
	const TEXT_SAVING = '保存中...';
	const TEXT_ABOUT = '<!-- Web Page Saved By {SCNM} Ver.{VRSN}, Author {ATNM} -->\n<!-- Page URL: {LINK} -->'
		.replaceAll('{SCNM}', GM_info.script.name)
		.replaceAll('{VRSN}', GM_info.script.version)
		.replaceAll('{ATNM}', GM_info.script.author)
		.replaceAll('{LINK}', location.href);

	// variants
	let i, j;

	let LogLevel = {
		None: 0,
		Error: 1,
		Success: 2,
		Warning: 3,
		Info: 4,
		Elements: 5,
	};
	let g_logCount = 0;
	let g_logLevel = LogLevel.Success;

	function DoLog(level = LogLevel.Info, msgOrElement, isElement = false) {
		if (level <= g_logLevel) {
			let prefix = '%c';
			let param = '';

			if (level == LogLevel.Error) {
				prefix += '[Error]';
				param = 'color:#ff0000';
			} else if (level == LogLevel.Success) {
				prefix += '[Success]';
				param = 'color:#00aa00';
			} else if (level == LogLevel.Warning) {
				prefix += '[Warning]';
				param = 'color:#ffa500';
			} else if (level == LogLevel.Info) {
				prefix += '[Info]';
				param = 'color:#888888';
			} else if (level == LogLevel.Elements) {
				prefix += 'Elements';
				param = 'color:#000000';
			}

			if (level != LogLevel.Elements && !isElement) {
				console.log(prefix + msgOrElement, param);
			} else {
				console.log(msgOrElement);
			}

			if (++g_logCount > 512) {
				console.clear();
				g_logCount = 0;
			}
		}
	}

	// Task list
	const taskList = [getDom, removeScripts, dealStyles, dealElements, output];
	let taskNow = null;

	let Dom;
	let saving = false, cmdID;

	GUI();

	function GUI() {
		cmdID = GM_registerMenuCommand(TEXT_SAVEPAGE, saveOnclick);
	}

	function saveOnclick() {
		if (saving) {return false;};
		switchStatus();
		DoLog(LogLevel.Success, 'SingleFile started.');
		nextTask();
	}

	function switchStatus() {
		saving = !saving;
		if (cmdID) {GM_unregisterMenuCommand(cmdID);};
		cmdID = GM_registerMenuCommand(saving ? TEXT_SAVING : TEXT_SAVEPAGE, saveOnclick);
	}

	function getDom() {
		DoLog(LogLevel.Info, 'Getting document...');
		const HTML_ORGINAL = document.querySelector('html').outerHTML;
		Dom = new DOMParser().parseFromString(HTML_ORGINAL, 'text/html');
		DoLog(LogLevel.Info, Dom, true);
		nextTask();
	}

	function removeScripts() {
		DoLog(LogLevel.Info, 'Removing scripts...');
		const scripts = Dom.querySelectorAll('script');
		for (i = 0; i < scripts.length; i++) {
			scripts[i].parentElement.removeChild(scripts[i]);
		}
		DoLog(Dom, true)
		DoLog(scripts, true);

		nextTask();
	}

	function dealStyles() {
		DoLog(LogLevel.Info, 'Dealing styles...');
		const CSSLinks = Dom.querySelectorAll('link[rel="stylesheet"]');
		let style = '', rest = CSSLinks.length;

		for (const cLink of CSSLinks) {
			DoLog(LogLevel.Info, 'Requesting style from ' + cLink.href);
			requestText(cLink.href, addToStyleText);
		}

		function addToStyleText(styleText) {
			style += styleText;
			rest--;
			DoLog(LogLevel.Info, 'Style got. Rest: ' + String(rest));
			if (rest === 0) {
				finish();
			}
		}

		function finish() {
			// Insert style element
			const styleEle = Dom.createElement('style');
			styleEle.innerHTML = style;
			const firstInnerStyle = document.querySelector('style');
			firstInnerStyle ?
				firstInnerStyle.parentElement.insertBefore(styleEle, firstInnerStyle) :
				Dom.head.appendChild(styleEle);

			// Remove link elements
			for (const link of CSSLinks) {
				link.parentElement.removeChild(link);
			}

			nextTask();
		}
	}

	function dealElements() {
		DoLog(LogLevel.Info, 'dealing elements...');
		const allEles = Dom.querySelectorAll('*');
		let restElesCount = allEles.length;
		for (const element of allEles) {
			dealElement(element);
		}

		function dealElement(element) {
			DoLog(LogLevel.Info, element, true);

			dealImg(element);
		}

		function dealImg(element) {
			const nextDealingTask = function() {dealBackgroundImg(element);};
			if (element.tagName === 'IMG') {
				if (element.src.substr(0,5) !== 'data:') {
					requestImageURL(element.src, function(dataURL) {
						element.src = dataURL;
						// 如何处理canvas? /*-pass*/
						// next dealing task
						nextDealingTask();
					})
				} else {nextDealingTask();}
			} else {nextDealingTask();}
		}

		function dealBackgroundImg(element) {
			// background-image to dataURL
			const cStyle = getComputedStyle(element);
			const backgroundImage = cStyle['background-image'];
			const httpUrlMatch = backgroundImage.match(/url\("(http.+)"\)/);
			if (httpUrlMatch) {
				const url = httpUrlMatch[1].replaceAll('\\\\', '\\');
				requestImageURL(url, function(dataURL) {
					const propValue = backgroundImage.replace(httpUrlMatch[1], dataURL);
					element.style['background-image'] = propValue;
					elementDealed();
				});
			} else {
				elementDealed();
			}
		}

		function elementDealed() {
			restElesCount--;
			DoLog(LogLevel.Info, 'element dealed, rest: ' + String(restElesCount) + ' elements')
			if (restElesCount === 0) {
				nextTask();
			}
		}
	}

	function output() {
		DoLog(LogLevel.Success, 'SingleFile finished.');
		DoLog(LogLevel.Success, Dom, true);

		const outputText = TEXT_ABOUT + '\n\n' + Dom.lastChild.outerHTML;
		saveTextToFile(outputText, 'SingleFile - ' + document.title + '.html');
		switchStatus();
	}

	function nextTask() {
		const funcIndex = taskNow ? taskList.indexOf(taskNow) : -1;
		if (funcIndex === taskList.length - 1) {
			taskNow = taskList[0];
			return true;
		}
		taskNow = taskList[funcIndex+1];
		taskNow();
	}

	function requestText(url, callback, args=[]) {
		GM_xmlhttpRequest({
            method:       'GET',
            url:          url,
            responseType: 'text',
            onload:       function(response) {
                const text = response.responseText;
				const argvs = [text].concat(args);
                callback.apply(null, argvs);
            }
        })
	}

	function requestImageURL(url, callback, args=[]) {
		GM_xmlhttpRequest({
            method:       'GET',
            url:          url,
            responseType: 'blob',
            onload:       function(response) {
                const blob = response.response;
				blobToDataURI(blob, function(url) {
					const argvs = [url].concat(args);
					callback.apply(null, argvs);
				})
            }
        })

		function blobToDataURI(blob, callback) {
			var reader = new FileReader();
			reader.onload = function (e) {
				callback(e.target.result);
			}
			reader.readAsDataURL(blob);
		}
	}

	function saveTextToFile(text, name) {
		const blob = new Blob([text],{type:"text/plain;charset=utf-8"});
		const url = URL.createObjectURL(blob);
		const a = document.createElement('a');
		a.href = url;
		a.download = name;
		a.click();
	}
})();