Markdown Grabber

markdown downloader

目前為 2025-01-03 提交的版本,檢視 最新版本

您需要先安裝使用者腳本管理器擴展,如 TampermonkeyGreasemonkeyViolentmonkey 之後才能安裝該腳本。

You will need to install an extension such as Tampermonkey to install this script.

您需要先安裝使用者腳本管理器擴充功能,如 TampermonkeyViolentmonkey 後才能安裝該腳本。

您需要先安裝使用者腳本管理器擴充功能,如 TampermonkeyUserscripts 後才能安裝該腳本。

你需要先安裝一款使用者腳本管理器擴展,比如 Tampermonkey,才能安裝此腳本

您需要先安裝使用者腳本管理器擴充功能後才能安裝該腳本。

(我已經安裝了使用者腳本管理器,讓我安裝!)

你需要先安裝一款使用者樣式管理器擴展,比如 Stylus,才能安裝此樣式

你需要先安裝一款使用者樣式管理器擴展,比如 Stylus,才能安裝此樣式

你需要先安裝一款使用者樣式管理器擴展,比如 Stylus,才能安裝此樣式

你需要先安裝一款使用者樣式管理器擴展後才能安裝此樣式

你需要先安裝一款使用者樣式管理器擴展後才能安裝此樣式

你需要先安裝一款使用者樣式管理器擴展後才能安裝此樣式

(我已經安裝了使用者樣式管理器,讓我安裝!)

// ==UserScript==
// @name         Markdown Grabber
// @namespace    http://tampermonkey.net/
// @version      1.0
// @description  markdown downloader
// @author       5ec1cff
// @match        *://*/*
// @license      AGPL
// @grant        unsafeWindow
// @grant        GM_registerMenuCommand
// @grant        GM_xmlhttpRequest
// @connect      *
// ==/UserScript==

// 2021.12.24 Fri: 修正
// 2022.03.15 Tue: 增加下载图片支持(默认启用)
// 2023.03.01 Wed: 支持 xz.aliyun.cn ;下载图片附代 Referer

(function () {
    'use strict';

    if (window.top !== window) return; // 阻止在 iframe 启用

    const downloadPics = true;

    const picMap = new Map();

    const console = unsafeWindow.console.context();

    function getPictureKey(url) {
        return url;
    }

    function getPicture(url) {
        const key = getPictureKey(url);
        if (picMap.get(url) == null) {
            picMap.set(url,
                new Promise((rs, rj) => {
                    if (!downloadPics || !url?.startsWith("http")) {
                        rs([key, url]);
                        return;
                    }
                    GM_xmlhttpRequest({
                        url: url,
                        headers: { Referer: location.href },
                        responseType: "blob",
                        onload(r) {
                            const fr = new FileReader();
                            fr.onloadend = () => {
                                console.log('load done:', url);
                                rs([key, fr.result]);
                            }
                            fr.onerror = (e) => {
                                rj(e);
                            }
                            fr.readAsDataURL(r.response);
                        },
                        onerror(e) {
                            rj(e);
                        },
                        onabort(e) {
                            rj(e);
                        }
                    })
                })
            );
        }
        return key;
    }

    function parseSimpleStyle(e) {
        let r = '';
        switch (e.tagName.toLowerCase()) {
            case 'b':
            case 'strong':
                r += `**${parseSingleLine(e)}**`;
                break;
            case 'i':
            case 'em':
                r += `*${parseSingleLine(e)}*`;
                break;
            case 's':
            case 'strike':
                r += `~~${parseSingleLine(e)}~~`;
                break;
            case 'a': {
                if (e.href) {
                    r += `[${parseSingleLine(e)}](${e.getAttribute('href')})`;
                }
                break;
            }
            case 'code':
                r += `\`${e.innerText}\``;
                break;
            case 'img':
                r += `\n![][${getPicture(e.src)}]\n`;
                break;
            default:
                r += parseSingleLine(e);
        }
        return r;
    }

    function parseSingleLine(element) {
        if (element instanceof Text) return element.data.trim();
        let r = '';
        if (element instanceof HTMLElement) {
            for (let e of element.childNodes) {
                if (e instanceof Text) r += e.data;
                if (!(e instanceof HTMLElement)) continue;
                r += parseSimpleStyle(e);
            }
        }
        return r.trim();
    }

    function isSingleLine(node) {
        return !node.querySelector('p,ul,ol,br');
    }

    function parseNode(element) {
        let lines = [], singleLine = null;
        if (element instanceof HTMLElement) {
            for (let e of element.childNodes) {
                if (!(e instanceof HTMLElement) && !(e instanceof Text)) continue;
                let tagName;
                if (e instanceof Text) {
                    tagName = 'TEXT';
                } else {
                    tagName = e.tagName.toLowerCase();
                }
                switch (tagName) {
                    case 'TEXT':
                    case 'a':
                    case 'b':
                    case 'strong':
                    case 'i':
                    case 'em':
                    case 's':
                    case 'strike':
                    case 'a':
                    case 'code': {
                        if (singleLine == null) singleLine = '';
                        if (tagName == 'TEXT') {
                            singleLine += e.data.trim();
                        }
                        else {
                            singleLine += parseSimpleStyle(e);
                        }
                        continue;
                    }
                    default:
                        if (singleLine != null) {
                            lines.push(singleLine);
                            singleLine = null;
                        }
                }

                switch (tagName) {
                    // ignores
                    case 'button':
                    case 'style':
                    case 'header':
                    case 'script':
                        continue;
                    case 'p':
                        lines.push(parseSingleLine(e) + '\n');
                        break;
                    case 'br':
                        lines.push('\n');
                        break;
                    case 'ul':
                    case 'ol': {
                        lines.push('');
                        let is_order = tagName == 'ol',
                            j = 1;
                        for (let item of e.childNodes) {
                            let pref = is_order ? `${j}. ` : `- `;
                            if (item instanceof HTMLLIElement) {
                                if (!isSingleLine(item)) {
                                    let item_lines = parseNode(item);
                                    for (let i = 0; i < item_lines.length; i++) {
                                        const l = item_lines[i].trim()
                                        if (l) {
                                            lines.push(`${i==0?pref:'    '}${item_lines[i]}`);
                                        }
                                    }
                                } else {
                                    lines.push(`${pref}${parseSingleLine(item)}`);
                                }
                                j++;
                            }
                        }
                        lines.push('');
                        break;
                    }
                    case 'pre': {
                        // debugger
                        lines.push('```');
                        lines.push(...(e.querySelector('code') || e).innerText.trim().split('\n'));
                        lines.push('```');
                        break;
                    }
                    case 'blockquote': {
                        lines.push('');
                        let item_lines = parseNode(e);
                        for (let i = 0; i < item_lines.length; i++) {
                            lines.push(`> ${item_lines[i]}`);
                        }
                        lines.push('');
                        break;
                    }
                    case 'table': {
                        lines.push('');
                        let head = e.querySelector('thead');
                        if (!head) {
                            console.warn('unknown table!');
                            // resolve body as normal tag
                            let body;
                            if (body = e.querySelector('tbody')) {
                                lines.push(...parseNode(body));
                            }
                            continue;
                        }
                        let head_line = '|',
                            sep_line = '|';
                        for (let h of head.querySelectorAll('th')) {
                            head_line += `${parseSingleLine(h)}|`;
                            sep_line += `--|`
                        }
                        lines.push(head_line);
                        lines.push(sep_line);
                        let body = e.querySelector('tbody');
                        for (let b of body.querySelectorAll('tr')) {
                            let line = '|';
                            for (let d of b.querySelectorAll('td')) {
                                line += `${parseSingleLine(d)}|`;
                            }
                            lines.push(line);
                        }
                        lines.push('');
                        break;
                    }
                    case 'hr':
                        lines.push('\n---\n');
                        break;
                    case 'img':
                        lines.push(`\n![][${getPicture(e.src)}]\n`);
                        break;
                    case 'figure': {
                        if (e.classList.contains('highlight')) {
                            let lang = e.classList[1] || '';
                            let code = e.querySelector('td.code pre');
                            if (code != null) {
                                lines.push('```' + lang);
                                lines.push(...code.innerText.trim().split('\n'));
                                lines.push('```');
                                break;
                            }
                        }
                        // fallthrough
                    }
                    case 'td': {
                        if (e.classList.contains('gutter')) continue;
                        // fallthrough
                    }
                    default: {
                        let r;
                        if (r = tagName.match(/h(\d+)/)) {
                            lines.push(`\n${'#'.repeat(Number(r[1]))} ${parseSingleLine(e)} \n`);
                        } else {
                            lines.splice(lines.length, 0, ...parseNode(e));
                        }
                    }
                }
            }
            if (singleLine != null) lines.push(singleLine);
        }
        return lines;
    }

    function findArticle() {
        let article = document.body.querySelector('article');
        if (article) return article;
        article = document.body.querySelector('div.markdown-body,div.mod-content');
        if (article) return article;
        let maxChild = 0, node = null;
        for (let n of document.querySelectorAll('h1')) {
            if (n.parentNode && n.parentNode.childElementCount >= maxChild) {
                node = n.parentNode;
            }
        }
        return node;
    }

    async function html2MD() {
        let article = findArticle();
        let title = document.querySelector('h1');
        let r = '';
        if (title) {
            r += `# ${parseSingleLine(title)}`;
        } else {
            r += `# ${document.title}`;
        }
        r += `\n${location.href}\n\n`;
        r += await nodeToMD(article);
        return r;
    }

    async function nodeToMD(node) {
        picMap.clear();
        let r = '';
        let lines = parseNode(node);
        for (let l of lines) {
            r += `${l}\n`;
        }

        let pics = await Promise.race([
            Promise.all(picMap.values()),
            new Promise((_, rj) => {
                console.log("waiting 10s for downloading pictures...", picMap.size);
                setTimeout(() => { rj('time out!'); }, 10000)
            })
        ]);
        r += '\n';
        for (let [key, url] of pics) {
            r += `[${key}]:${url}\n`;
        }
        return r;
    }

    unsafeWindow.md = nodeToMD;
    // unsafeWindow.__xhr = GM_xmlhttpRequest;
    // unsafeWindow._getpic = getPicture

    async function onClick() {
        let url = URL.createObjectURL(new Blob([await html2MD()], { type: 'text/plain' }));
        let a = document.createElement('a');
        a.download = `${document.title}.md`;
        a.href = url;
        document.body.append(a);
        a.click();
        a.remove();
    }

    GM_registerMenuCommand('下载 Markdown', () => {
        onClick();
    })

})();