您需要先安装一个扩展,例如 篡改猴、Greasemonkey 或 暴力猴,之后才能安装此脚本。
您需要先安装一个扩展,例如 篡改猴 或 暴力猴,之后才能安装此脚本。
您需要先安装一个扩展,例如 篡改猴 或 暴力猴,之后才能安装此脚本。
您需要先安装一个扩展,例如 篡改猴 或 Userscripts ,之后才能安装此脚本。
您需要先安装一款用户脚本管理器扩展,例如 Tampermonkey,才能安装此脚本。
您需要先安装用户脚本管理器扩展后才能安装此脚本。
Convert webpage to Markdown via Tampermonkey menu. Significantly optimizes content selection logic, reducing content loss and useless elements.
// ==UserScript== // @name Webpage to Markdown // @namespace http://tampermonkey.net/ // @version 1.0 // @description Convert webpage to Markdown via Tampermonkey menu. Significantly optimizes content selection logic, reducing content loss and useless elements. // @author Feiyt // @homepageURL https://github.com/Feiyt // @license MIT // @match *://*/* // @require https://unpkg.com/turndown/dist/turndown.js // @require https://unpkg.com/[email protected]/dist/turndown-plugin-gfm.js // @grant GM_registerMenuCommand // @grant GM_download // @grant GM_addStyle // @run-at document-idle // ==/UserScript== // Copyright (c) 2025 Feiyt // Released under the MIT license // https://github.com/Feiyt (or specify the exact repo if available) (function() { 'use strict'; console.log("Webpage to Markdown (v1.0) script starting..."); // Version updated here // --- Configuration --- const turndownOptions = { /* ... options from previous version ... */ }; // Reference to previous settings turndownOptions.headingStyle = 'atx'; turndownOptions.hr = '---'; turndownOptions.bulletListMarker = '*'; turndownOptions.codeBlockStyle = 'fenced'; turndownOptions.emDelimiter = '*'; turndownOptions.strongDelimiter = '**'; turndownOptions.linkStyle = 'inlined'; // --- Helper Functions --- function sanitizeFilename(name) { /* ... function from previous version ... */ } // Placeholder comment // Sanitizes a string to be used as a filename. sanitizeFilename = function(name) { // Replace forbidden characters with underscore, collapse whitespace, trim, provide default. return name.replace(/[\/\\:*?"<>|#%\n\r]/g, '_').replace(/\s+/g, ' ').trim() || "markdown_export"; }; /** * Improved content selection and cleaning. * Prioritizes semantic tags and common content IDs/classes. * @returns {object|null} Object containing { title: string, contentNode: Node } or null on failure. */ function getPageContentNode() { console.log("getPageContentNode (v1.0 logic): Starting content retrieval..."); // Adjusted log message slightly const pageTitle = document.title || window.location.hostname; let bestCandidate = null; let maxScore = -1; // Simple scoring mechanism // More robust selectors with priorities implied by order const selectors = [ // Highest Priority: Semantic & Specific Roles/IDs/Classes 'article', '[role="article"]', '.article-body', '.post-content', '.entry-content', '#article-content', '.post-body', '.markdown-body', // High Priority: Main content areas 'main', '[role="main"]', '#main-content', '#main', '.main-content', '.main', '#primary', // Medium Priority: Common generic containers (often need cleaning) '#content', '.content', // Lower Priority: More specific layout patterns '#page .content', // Example of nested structure '.container .content', // Stack Overflow Example '#mainbar', // Lowest Priority (if nothing else works, but avoid body initially) // Maybe add specific blog platform IDs? '.hentry'? ]; console.log("Searching for best content container..."); selectors.forEach((selector, index) => { try { const element = document.querySelector(selector); if (element) { // Basic score: higher priority selectors get higher base score const score = selectors.length - index; // Higher index = lower priority = lower score console.log(`Found candidate [${selector}] with score ${score}`); // --- Basic Heuristic Check --- const textLength = element.textContent?.trim().length || 0; const childCount = element.childElementCount || 0; // Arbitrary thresholds - adjust as needed if (textLength < 100 && childCount < 3) { console.log(`... Candidate [${selector}] seems too small/empty (Text: ${textLength}, Children: ${childCount}). Lowering confidence.`); } if (score > maxScore) { maxScore = score; bestCandidate = element; console.log(`>>> New best candidate: [${selector}]`); } } } catch (e) { console.warn(`Error querying selector "${selector}": ${e.message}`); } }); // If no good candidate found via specific selectors, use body as last resort if (!bestCandidate) { console.warn("No suitable specific container found after checking selectors. Falling back to document.body."); bestCandidate = document.body; } else { const likelySelectorIndex = selectors.length - 1 - Math.floor(maxScore); const likelySelector = selectors[likelySelectorIndex] || 'heuristic/fallback'; console.log(`Selected final container: <${bestCandidate.tagName.toLowerCase()}> (Selector likely: ${likelySelector})`); } // --- Clone and Clean --- try { if (!bestCandidate || typeof bestCandidate.cloneNode !== 'function') { console.error("Cannot clone the selected content element."); return null; } console.log("Cloning selected container..."); const clone = bestCandidate.cloneNode(true); // Define selectors for elements to exclude from the conversion. const excludeSelectors = [ 'header', 'footer', 'nav', '.header', '.footer', '.navbar', '.menu', '.toc', '#toc', '.breadcrumb', '#breadcrumb', '[role="navigation"]', '[role="banner"]', '[role="contentinfo"]', 'aside', '.sidebar', '#sidebar', '.widget-area', '#secondary', '.left-column', '.right-column', '[role="complementary"]', '.actions', '.share', '.social', '.buttons', '.post-meta', '.entry-meta', '.feedback', '.related-posts', '.like-button-container', '.feedback-container', '#comments', '.comments', '.comment-section', '#respond', '.ad', '.ads', '.advertisement', '.adsbygoogle', '[id*="ad-"]', '[class*="ad-"]', '[class*="advert"]', '.edit-link', '.print-link', '[role="search"]', 'script', 'style', 'noscript', 'template', 'link[rel="stylesheet"]', 'meta', 'input[type="hidden"]', '.visually-hidden', '.sr-only', '[aria-hidden="true"]', '.cookie-banner', '#related-articles', '.related_posts', ]; console.log("Removing excluded elements from clone..."); let removedCount = 0; for (const selector of excludeSelectors) { try { const elementsToRemove = clone.querySelectorAll(selector); elementsToRemove.forEach(el => { if (el !== clone && typeof el.remove === 'function') { el.remove(); removedCount++; } else if (el === clone) { console.warn(`Exclusion selector "${selector}" matched the container root itself! Skipping removal of root.`); } }); } catch (e) { console.warn(`Error removing elements for selector "${selector}": ${e.message}`); } } console.log(`Removed ${removedCount} elements/subtrees from clone.`); // --- Post-cleaning Check --- if (clone.childElementCount === 0 && clone.textContent.trim().length < 50) { console.warn("Clone seems empty after cleaning! Original selection or exclusion might be wrong."); } return { title: pageTitle, contentNode: clone }; } catch (error) { console.error("Critical error during cloning or cleaning:", error.message, error.stack); return null; } } // --- Main Conversion and Download Logic --- function convertAndDownload() { console.log("Convert to Markdown (v1.0): Button clicked..."); // Version updated here try { // --- Initialize Turndown, Apply GFM, Add Math Rule --- console.log("Initializing TurndownService..."); if (typeof TurndownService === 'undefined') { throw new Error('TurndownService is not defined.'); } const turndownService = new TurndownService(turndownOptions); console.log("Applying GFM plugin..."); if (typeof turndownPluginGfm !== 'undefined' && typeof turndownPluginGfm.gfm === 'function') { try { turndownService.use(turndownPluginGfm.gfm); console.log("GFM applied."); } catch (gfmError) { console.error("Error applying GFM plugin:", gfmError); } } else { console.warn("GFM plugin not loaded."); } // Define and Add Math Rule (for KaTeX/MathJax) const mathRule = {}; // Simplified for brevity, keep full logic from previous step mathRule.filter = function (node, options) { try { return ( (node.nodeName === 'SPAN' && (node.classList.contains('katex') || node.classList.contains('MathJax_Preview'))) || (node.nodeName === 'DIV' && node.classList.contains('katex-display')) || (node.nodeName === 'SCRIPT' && node.getAttribute('type')?.startsWith('math/tex')) || (node.getAttribute('role') === 'math') ); } catch (filterError) { console.error("Error inside MathJax filter function:", filterError, "Node:", node); return false; } }; mathRule.replacement = function (content, node, options) { let latex = '', delimiter = '$'; try { if (node.nodeName === 'SCRIPT') { latex = node.textContent || ''; if (node.getAttribute('type')?.includes('mode=display') || latex.trim().startsWith('\\display')) { delimiter = '$$'; } } else if (node.dataset && node.dataset.originalLatex) { latex = node.dataset.originalLatex; if (node.classList.contains('katex-display') || node.closest('.MathJax_Display')) { delimiter = '$$'; } } else if (node.getAttribute('aria-label')) { latex = node.getAttribute('aria-label'); if (node.nodeName === 'DIV' || node.classList.contains('katex-display') || node.closest('.MathJax_Display')) { delimiter = '$$'; } } else if (node.classList.contains('katex')) { const annotation = node.querySelector('annotation[encoding="application/x-tex"]'); if (annotation) { latex = annotation.textContent || ''; if (node.classList.contains('katex-display')) { delimiter = '$$'; } } } else if (node.nodeName === 'MATH' && node.getAttribute('alttext')) { latex = node.getAttribute('alttext'); if (node.getAttribute('display') === 'block') { delimiter = '$$'; } } if (latex) { latex = latex.trim(); if ((latex.startsWith('$$') && latex.endsWith('$$')) || (latex.startsWith('$') && latex.endsWith('$') && !latex.startsWith('$$'))) { return latex; } return `${delimiter}${latex}${delimiter}`; } return ''; } catch (ruleError) { console.error("Error processing math rule replacement for node:", node, ruleError); return ''; } }; try { console.log("Adding Math rule..."); if (typeof mathRule.filter !== 'function') { throw new Error("Math rule filter is not a function!"); } turndownService.addRule('mathjaxKatex', mathRule); console.log("Math rule added."); } catch (addRuleError) { console.error("Failed to add Math rule:", addRuleError); } // --- Perform Conversion --- console.log("Getting page content node..."); const pageData = getPageContentNode(); if (!pageData || !pageData.contentNode) { console.error("Failed to get valid page content node. Aborting."); alert("Could not get a valid page content node for conversion."); return; } console.log(`Content node retrieved. Title: ${pageData.title}. Starting conversion...`); let markdownContent = ''; try { markdownContent = turndownService.turndown(pageData.contentNode); console.log("Markdown conversion complete."); } catch (convertError) { console.error("Error during Turndown conversion:", convertError.message, convertError.stack); alert(`Error during Markdown conversion: ${convertError.message}`); return; } if (!markdownContent || markdownContent.trim() === '') { console.warn("Conversion resulted in empty Markdown content."); alert("Warning: The converted Markdown content is empty."); } // --- Prepare Filename & Download --- const filename = sanitizeFilename(pageData.title) + ".md"; const dataUri = `data:text/markdown;charset=utf-8,${encodeURIComponent(markdownContent)}`; console.log(`Initiating download for ${filename}...`); GM_download({ url: dataUri, name: filename, saveAs: true, onerror: (err) => { console.error('GM_download error:', err); alert(`Error downloading file: ${err.error || 'Unknown error'}. Check Tampermonkey settings (Advanced -> Downloads BETA -> Whitelist .md).`); }, }); console.log("Download initiated."); } catch (error) { console.error("Critical error during convertAndDownload:", error.message, error.stack); alert(`A critical error occurred while running the script: ${error.message}`); } } // --- Register Menu Command --- if (typeof GM_registerMenuCommand === 'function') { try { // Updated menu command text to reflect v1.0 GM_registerMenuCommand("Convert Page to Markdown (v1.0)", convertAndDownload, "m"); console.log("Menu command registered."); } catch (registerError) { console.error("Failed to register menu command:", registerError); alert("Failed to register menu command!"); } } else { console.error("GM_registerMenuCommand is not available."); alert("GM_registerMenuCommand is not available!"); } console.log("Webpage to Markdown (v1.0) script finished loading."); // Version updated here })();