Pkg.go.dev 文档转 LLM.txt 格式化工具

从 pkg.go.dev 提取 Go 语言包的文档内容(包括 API 定义和代码示例),并将其转换为结构化的 LLM.txt 文本格式,方便在本地使用大型语言模型 (LLM) 进行参考和分析。

您需要先安装一个扩展,例如 篡改猴Greasemonkey暴力猴,之后才能安装此脚本。

您需要先安装一个扩展,例如 篡改猴暴力猴,之后才能安装此脚本。

您需要先安装一个扩展,例如 篡改猴暴力猴,之后才能安装此脚本。

您需要先安装一个扩展,例如 篡改猴Userscripts ,之后才能安装此脚本。

您需要先安装一款用户脚本管理器扩展,例如 Tampermonkey,才能安装此脚本。

您需要先安装用户脚本管理器扩展后才能安装此脚本。

(我已经安装了用户脚本管理器,让我安装!)

您需要先安装一款用户样式管理器扩展,比如 Stylus,才能安装此样式。

您需要先安装一款用户样式管理器扩展,比如 Stylus,才能安装此样式。

您需要先安装一款用户样式管理器扩展,比如 Stylus,才能安装此样式。

您需要先安装一款用户样式管理器扩展后才能安装此样式。

您需要先安装一款用户样式管理器扩展后才能安装此样式。

您需要先安装一款用户样式管理器扩展后才能安装此样式。

(我已经安装了用户样式管理器,让我安装!)

// ==UserScript==
// @name         Pkg.go.dev to LLM.txt Formatter
// @name:zh-CN   Pkg.go.dev 文档转 LLM.txt 格式化工具
// @namespace    https://x.com/janxin
// @version      0.1.1
// @description  Extracts Go package documentation (API definitions, examples) from pkg.go.dev and converts it into a structured LLM.txt format, suitable for local AI/LLM reference and analysis.
// @description:zh-CN 从 pkg.go.dev 提取 Go 语言包的文档内容(包括 API 定义和代码示例),并将其转换为结构化的 LLM.txt 文本格式,方便在本地使用大型语言模型 (LLM) 进行参考和分析。
// @author       hellowor
// @match        https://pkg.go.dev/*
// @icon         https://pkg.go.dev/favicon.ico
// @grant        GM_addStyle
// @grant        GM_setClipboard
// @homepageURL  https://x.com/janxin
// @supportURL   https://x.com/janxin
// @license MIT
// ==/UserScript==

(function() {
    'use strict';

    GM_addStyle(`
        .llm-download-button {
            position: fixed;
            bottom: 20px;
            right: 20px;
            z-index: 9999;
            padding: 10px 15px;
            background-color: #007d9c;
            color: white;
            border: none;
            border-radius: 5px;
            cursor: pointer;
            font-size: 14px;
            box-shadow: 0 2px 5px rgba(0,0,0,0.2);
        }
        .llm-download-button:hover {
            background-color: #005f79;
        }
    `);

    function getCleanText(element) {
        return element ? element.textContent.trim() : '';
    }

    function getCodeFromPre(preElement) {
        if (!preElement) return '';
        // Check if there's a span inside the pre, which often holds the actual code lines
        const spanInsidePre = preElement.querySelector('span');
        if (spanInsidePre) {
            let code = '';
            spanInsidePre.childNodes.forEach(node => {
                if (node.nodeType === Node.TEXT_NODE) {
                    code += node.textContent;
                } else if (node.nodeType === Node.ELEMENT_NODE && node.tagName === 'BR') {
                    code += '\n';
                } else if (node.nodeType === Node.ELEMENT_NODE) {
                    code += node.textContent;
                }
            });
            const trimmedCodeFromSpan = code.trim();
            if (trimmedCodeFromSpan) {
                return trimmedCodeFromSpan;
            }
        }
        // Fallback if no span or span processing yielded empty string
        return preElement.textContent; // textContent decodes HTML entities
    }


    function getDirectSiblingParagraphs(element) {
        if (!element) {
            return '';
        }
        let description = [];
        let sibling = element.nextElementSibling;
        while (sibling && (sibling.tagName === 'P' || (sibling.tagName === 'UL' && !sibling.closest('.Documentation-exampleDetails')))) {
            if (sibling.tagName === 'P') {
                description.push(getCleanText(sibling));
            } else if (sibling.tagName === 'UL') {
                 let listItems = [];
                 sibling.querySelectorAll('li').forEach(li => listItems.push('- ' + getCleanText(li)));
                 if (listItems.length > 0) {
                     description.push(listItems.join('\n'));
                 }
            }
            sibling = sibling.nextElementSibling;
        }
        return description.join('\n\n');
    }

    function extractExample(detailElement, level = 3) {
        const summaryEl = detailElement.querySelector('.Documentation-exampleDetailsHeader');
        const exampleBody = detailElement.querySelector('.Documentation-exampleDetailsBody');
        let codeContent = '';

        if (exampleBody) {
            const textareaEl = exampleBody.querySelector('textarea.Documentation-exampleCode.code');
            if (textareaEl) {
                codeContent = textareaEl.value;
                // console.log(`[extractExample] Found textarea for "${getCleanText(summaryEl)}". Value length: ${codeContent.length}. Starts with: ${codeContent.substring(0, 70).replace(/\n/g, '\\n')}`);
            }

            // If textarea not found or its value is empty, try <pre>
            if (!codeContent.trim()) {
                const preEl = exampleBody.querySelector('pre.Documentation-exampleCode');
                if (preEl) {
                    codeContent = getCodeFromPre(preEl);
                    // console.log(`[extractExample] Found pre for "${getCleanText(summaryEl)}". Content length: ${codeContent.length}. Starts with: ${codeContent.substring(0, 70).replace(/\n/g, '\\n')}`);
                }
            }
             if (!codeContent.trim() && !textareaEl && !exampleBody.querySelector('pre.Documentation-exampleCode')) {
                // console.warn(`[extractExample] No code element (textarea or pre) found for example: "${getCleanText(summaryEl)}" in body:`, exampleBody.innerHTML.substring(0,200));
            }
        } else {
            // console.warn(`[extractExample] No exampleBody found for example: "${getCleanText(summaryEl)}"`);
        }

        const outputLabelEl = detailElement.querySelector('.Documentation-exampleOutputLabel');
        const outputEl = exampleBody ? exampleBody.querySelector('span.Documentation-exampleOutput, pre.Documentation-exampleOutput') : null;

        let exampleText = "";
        const title = getCleanText(summaryEl).replace(/ ¶$/, '');
        if (title) {
            exampleText += `${'#'.repeat(level)} Example: ${title}\n\n`;
        } else {
            exampleText += `${'#'.repeat(level)} Example\n\n`;
        }

        const trimmedCode = codeContent.trim();
        if (trimmedCode) {
            exampleText += "```go\n" + trimmedCode + "\n```\n\n";
        } else {
            // console.warn(`[extractExample] Code content is effectively empty for: "${title}"`);
        }

        if (outputLabelEl && outputEl) {
            let outputContent = "";
            if (outputEl.tagName === 'PRE') {
                outputContent = getCodeFromPre(outputEl);
            } else {
                const preInsideSpan = outputEl.querySelector('pre');
                if (preInsideSpan) {
                    outputContent = getCodeFromPre(preInsideSpan);
                } else {
                    outputContent = getCleanText(outputEl);
                }
            }
            const trimmedOutput = outputContent.trim();
            if (trimmedOutput) {
                exampleText += `Output:\n\`\`\`\n${trimmedOutput}\n\`\`\`\n\n`;
            }
        }
        return exampleText;
    }

    function extractNameFromSignatureOrHeader(headerEl, sigPre, entityType = "Unknown") {
        if (headerEl) {
            const nameAnchor = headerEl.querySelector('a:not(.Documentation-idLink):not(.Documentation-source)');
            if (nameAnchor && getCleanText(nameAnchor)) {
                return getCleanText(nameAnchor);
            }
        }
        if (sigPre) {
            const sigText = getCodeFromPre(sigPre).trim(); // Ensure sigText is trimmed before regex
            let match;
            switch (entityType) {
                case "Function":
                case "Constructor":
                    match = sigText.match(/^func\s+([A-Z_][A-Za-z0-9_]*)\s*\(/);
                    if (match && match[1]) return match[1];
                    break;
                case "Method":
                    match = sigText.match(/^func\s*\([\s\S]*?\)\s*([A-Z_][A-Za-z0-9_]*)\s*\(/);
                    if (match && match[1]) return match[1];
                    break;
                case "Type":
                    match = sigText.match(/^type\s+([A-Z_][A-Za-z0-9_]*)/);
                    if (match && match[1]) return match[1];
                    break;
            }
        }
        if (headerEl) {
            let headerText = getCleanText(headerEl).replace(/ ¶$/, '');
            headerText = headerText.replace(/^func\s+/, '').replace(/^type\s+/, '');
            const firstWord = headerText.split(/\s|\(/)[0];
            if (firstWord) return firstWord;
        }
        return `Unknown${entityType}`;
    }


    function processDocumentationSection() {
        let output = [];
        const docContainer = document.querySelector('.Documentation.js-documentation .Documentation-content.js-docContent');
        if (!docContainer) {
            console.warn("Main documentation content (.Documentation-content.js-docContent) not found.");
            return '';
        }

        const overviewSection = docContainer.querySelector('section.Documentation-overview');
        if (overviewSection) {
            const overviewHeader = overviewSection.querySelector('h3#pkg-overview');
            if (overviewHeader) {
                const packageDescription = getDirectSiblingParagraphs(overviewHeader);
                 if (packageDescription) {
                    output.push("## Package Overview\n\n" + packageDescription + "\n\n");
                }
            }
            const overviewExamples = overviewSection.querySelectorAll('details.Documentation-exampleDetails');
            if (overviewExamples.length > 0) {
                let examplesInSectionFound = false;
                overviewExamples.forEach(ex => {
                    const exampleContent = extractExample(ex, 3);
                    if (exampleContent.split('\n').filter(l => l.trim() !== '').length > 2) { // Check if more than just title
                        if (!examplesInSectionFound) {
                            output.push("## Package Examples (from Overview)\n");
                            examplesInSectionFound = true;
                        }
                        output.push(exampleContent);
                    }
                });
            }
        }

        const examplesSectionHeader = docContainer.querySelector('h4#pkg-examples');
        if (examplesSectionHeader) {
            const examplesList = examplesSectionHeader.parentElement.querySelector('ul.Documentation-examplesList');
            if (examplesList) {
                 let examplesInSectionFound = false;
                 examplesList.querySelectorAll('li a.js-exampleHref').forEach(exLink => {
                     const exampleId = exLink.getAttribute('href').substring(1);
                     const exampleDetail = docContainer.querySelector(`details#${exampleId}.Documentation-exampleDetails`);
                     if (exampleDetail) {
                         const exampleContent = extractExample(exampleDetail, 3);
                         if (exampleContent.split('\n').filter(l => l.trim() !== '').length > 2) {
                            if (!examplesInSectionFound) {
                                output.push("## Examples (Listed)\n");
                                examplesInSectionFound = true;
                            }
                            output.push(exampleContent);
                         }
                     }
                 });
            }
        }

        const constHeader = docContainer.querySelector('#pkg-constants');
        if (constHeader) {
            const constSection = constHeader.closest('h3').nextElementSibling;
            if (constSection && constSection.classList.contains('Documentation-constants')) {
                const declarations = constSection.querySelectorAll('div.Documentation-declaration');
                if (declarations.length > 0) {
                    output.push("## Constants\n");
                    declarations.forEach(decl => {
                        const sigPre = decl.querySelector('pre');
                        if (sigPre) output.push("```go\n" + getCodeFromPre(sigPre).trim() + "\n```\n");
                        const desc = getDirectSiblingParagraphs(decl);
                        if (desc) output.push(desc + "\n");
                        output.push("---\n");
                    });
                }
            }
        }

        const varHeader = docContainer.querySelector('#pkg-variables');
        if (varHeader) {
            const varSection = varHeader.closest('h3').nextElementSibling;
            if (varSection && varSection.classList.contains('Documentation-variables')) {
                const declarations = varSection.querySelectorAll('div.Documentation-declaration');
                if (declarations.length > 0) {
                    output.push("## Variables\n");
                    declarations.forEach(decl => {
                        const sigPre = decl.querySelector('pre');
                        if (sigPre) output.push("```go\n" + getCodeFromPre(sigPre).trim() + "\n```\n");
                        const desc = getDirectSiblingParagraphs(decl);
                        if (desc) output.push(desc + "\n");
                        output.push("---\n");
                    });
                }
            }
        }

        const funcHeader = docContainer.querySelector('#pkg-functions');
        if (funcHeader) {
            const funcSection = funcHeader.closest('h3').nextElementSibling;
            if (funcSection && funcSection.classList.contains('Documentation-functions')) {
                const functions = funcSection.querySelectorAll('div.Documentation-function');
                 if (functions.length > 0) {
                    output.push("## Functions\n");
                    functions.forEach(fnDiv => {
                        const fnHeaderEl = fnDiv.querySelector('h4.Documentation-functionHeader');
                        const declarationDiv = fnDiv.querySelector('div.Documentation-declaration');
                        const sigPre = declarationDiv ? declarationDiv.querySelector('pre') : null;
                        const funcName = extractNameFromSignatureOrHeader(fnHeaderEl, sigPre, "Function");

                        output.push(`### Function: ${funcName}\n`);
                        if (sigPre) {
                            output.push("```go\n" + getCodeFromPre(sigPre).trim() + "\n```\n");
                        }
                        const desc = declarationDiv ? getDirectSiblingParagraphs(declarationDiv) : (fnHeaderEl ? getDirectSiblingParagraphs(fnHeaderEl) : '');
                        if (desc) output.push(desc + "\n");

                        fnDiv.querySelectorAll('details.Documentation-exampleDetails').forEach(ex => {
                            const exampleContent = extractExample(ex, 4);
                            if (exampleContent.split('\n').filter(l => l.trim() !== '').length > 2) {
                                output.push(exampleContent);
                            }
                        });
                        output.push("---\n");
                    });
                }
            }
        }

        const typeHeader = docContainer.querySelector('#pkg-types');
        if (typeHeader) {
            const typeSection = typeHeader.closest('h3').nextElementSibling;
            if (typeSection && typeSection.classList.contains('Documentation-types')) {
                const types = typeSection.querySelectorAll('div.Documentation-type');
                if (types.length > 0) {
                    output.push("## Types\n");
                    types.forEach(typeDiv => {
                        const typeHeaderEl = typeDiv.querySelector('h4.Documentation-typeHeader');
                        const typeDeclarationDiv = typeDiv.querySelector('div.Documentation-declaration');
                        const sigPre = typeDeclarationDiv ? typeDeclarationDiv.querySelector('pre') : null;
                        const typeName = extractNameFromSignatureOrHeader(typeHeaderEl, sigPre, "Type");

                        output.push(`### Type: ${typeName}\n`);
                        if (sigPre) {
                            output.push("```go\n" + getCodeFromPre(sigPre).trim() + "\n```\n");
                        }
                        const desc = typeDeclarationDiv ? getDirectSiblingParagraphs(typeDeclarationDiv) : (typeHeaderEl ? getDirectSiblingParagraphs(typeHeaderEl) : '');
                        if (desc) output.push(desc + "\n");

                        typeDiv.querySelectorAll(':scope > details.Documentation-exampleDetails').forEach(ex => {
                             const exampleContent = extractExample(ex, 4);
                             if (exampleContent.split('\n').filter(l => l.trim() !== '').length > 2) {
                                 output.push(exampleContent);
                             }
                        });

                        typeDiv.querySelectorAll('div.Documentation-typeFunc').forEach(assocFnDiv => {
                            const assocFnHeaderEl = assocFnDiv.querySelector('h4.Documentation-functionHeader');
                            const assocFnDeclarationDiv = assocFnDiv.querySelector('div.Documentation-declaration');
                            const assocSigPre = assocFnDeclarationDiv ? assocFnDeclarationDiv.querySelector('pre') : null;
                            const constructorName = extractNameFromSignatureOrHeader(assocFnHeaderEl, assocSigPre, "Constructor");

                            output.push(`#### Constructor: ${constructorName}\n`);
                            if (assocSigPre) {
                                output.push("```go\n" + getCodeFromPre(assocSigPre).trim() + "\n```\n");
                            }
                            const assocDesc = assocFnDeclarationDiv ? getDirectSiblingParagraphs(assocFnDeclarationDiv) : (assocFnHeaderEl ? getDirectSiblingParagraphs(assocFnHeaderEl) : '');
                            if (assocDesc) output.push(assocDesc + "\n");

                            assocFnDiv.querySelectorAll('details.Documentation-exampleDetails').forEach(ex => {
                                const exampleContent = extractExample(ex, 5);
                                if (exampleContent.split('\n').filter(l => l.trim() !== '').length > 2) {
                                    output.push(exampleContent);
                                }
                            });
                            output.push("---\n");
                        });

                        typeDiv.querySelectorAll('div.Documentation-typeMethod').forEach(assocMethodDiv => {
                            const assocMethodHeaderEl = assocMethodDiv.querySelector('h4.Documentation-functionHeader');
                            const assocMethodDeclarationDiv = assocMethodDiv.querySelector('div.Documentation-declaration');
                            const assocSigPre = assocMethodDeclarationDiv ? assocMethodDeclarationDiv.querySelector('pre') : null;
                            const methodName = extractNameFromSignatureOrHeader(assocMethodHeaderEl, assocSigPre, "Method");

                            output.push(`#### Method: ${methodName}\n`);
                             if (assocSigPre) {
                                output.push("```go\n" + getCodeFromPre(assocSigPre).trim() + "\n```\n");
                            }
                            const assocDesc = assocMethodDeclarationDiv ? getDirectSiblingParagraphs(assocMethodDeclarationDiv) : (assocMethodHeaderEl ? getDirectSiblingParagraphs(assocMethodHeaderEl) : '');
                            if (assocDesc) output.push(assocDesc + "\n");
                             assocMethodDiv.querySelectorAll('details.Documentation-exampleDetails').forEach(ex => {
                                const exampleContent = extractExample(ex, 5);
                                if (exampleContent.split('\n').filter(l => l.trim() !== '').length > 2) {
                                    output.push(exampleContent);
                                }
                            });
                            output.push("---\n");
                        });
                         output.push("===\n");
                    });
                }
            }
        }
        let cleanedOutput = output.join('\n')
                                .replace(/(\n---\n)+(\s*(\n---|\n===|$))/g, '\n---\n$2') // Consolidate multiple --- unless followed by ===
                                .replace(/(\n===\n)+/g, '\n===\n') // Consolidate multiple ===
                                .replace(/\n{3,}/g, '\n\n');       // Max 2 blank lines
        return cleanedOutput.trim();
    }

    function getPackageNameForFilename() {
        let path = window.location.pathname;
        path = path.split('@')[0];
        if (path.startsWith('/')) {
            path = path.substring(1);
        }
        return path.replace(/\/$/, '').replace(/\//g, '_');
    }

    function download(filename, text) {
        const element = document.createElement('a');
        element.setAttribute('href', 'data:text/plain;charset=utf-8,' + encodeURIComponent(text));
        element.setAttribute('download', filename);
        element.style.display = 'none';
        document.body.appendChild(element);
        element.click();
        document.body.removeChild(element);
    }

    function initializeScraper() {
        // console.log("Starting extraction...");
        const data = processDocumentationSection();
        if (data.trim()) {
            const packageName = getPackageNameForFilename();
            const filename = packageName ? `${packageName}_llm.txt` : 'llm.txt';
            download(filename, data);
            // console.log(`${filename} download initiated.`);
            // GM_setClipboard(data); // For debugging
            // alert('Data extracted and download initiated!');
        } else {
            // console.warn("No documentation section found or no actual data was extracted.");
            alert("Could not find documentation section or no actual data was extracted. Check console for details (if any logs were enabled).");
        }
    }

    const downloadButton = document.createElement('button');
    downloadButton.textContent = 'Download llm.txt';
    downloadButton.className = 'llm-download-button';
    downloadButton.addEventListener('click', () => {
        // console.log("Button clicked, scheduling scraper with 1s delay.");
        setTimeout(initializeScraper, 1000); // 1 second delay
    });

    // Fallback: if the page is very simple and loads fast, or if the button is added very late
    // For very dynamic pages, a MutationObserver on document.body or a specific container might be more robust
    // but setTimeout is simpler for now.
    if (document.readyState === "complete") {
        // If page is already loaded, maybe CodeMirror is also ready?
        // This is less likely to be the case for why it's not working.
        // The click handler with setTimeout is more reliable.
    }

    document.body.appendChild(downloadButton);

})();