Scribd Enhancer All-in-One (v2.7.3)

Scribd Enhancer with OCR, export, print, parallel scraping, and clean UI. Auto language detection, smart filtering, output splitting — full feature set restored and refined. By Eliminater74.

目前為 2025-06-18 提交的版本,檢視 最新版本

您需要先安裝使用者腳本管理器擴展,如 TampermonkeyGreasemonkeyViolentmonkey 之後才能安裝該腳本。

You will need to install an extension such as Tampermonkey to install this script.

您需要先安裝使用者腳本管理器擴充功能,如 TampermonkeyViolentmonkey 後才能安裝該腳本。

您需要先安裝使用者腳本管理器擴充功能,如 TampermonkeyUserscripts 後才能安裝該腳本。

你需要先安裝一款使用者腳本管理器擴展,比如 Tampermonkey,才能安裝此腳本

您需要先安裝使用者腳本管理器擴充功能後才能安裝該腳本。

(我已經安裝了使用者腳本管理器,讓我安裝!)

你需要先安裝一款使用者樣式管理器擴展,比如 Stylus,才能安裝此樣式

你需要先安裝一款使用者樣式管理器擴展,比如 Stylus,才能安裝此樣式

你需要先安裝一款使用者樣式管理器擴展,比如 Stylus,才能安裝此樣式

你需要先安裝一款使用者樣式管理器擴展後才能安裝此樣式

你需要先安裝一款使用者樣式管理器擴展後才能安裝此樣式

你需要先安裝一款使用者樣式管理器擴展後才能安裝此樣式

(我已經安裝了使用者樣式管理器,讓我安裝!)

// ==UserScript==
// @name         Scribd Enhancer All-in-One (v2.7.3)
// @namespace    https://greasyfork.org/users/Eliminater74
// @version      2.7.3
// @description  Scribd Enhancer with OCR, export, print, parallel scraping, and clean UI. Auto language detection, smart filtering, output splitting — full feature set restored and refined. By Eliminater74.
// @author       Eliminater74
// @license      MIT
// @match        *://*.scribd.com/*
// @grant        none
// @icon         https://s-f.scribdassets.com/favicon.ico
// ==/UserScript==

(function () {
  'use strict';

  const SETTINGS_KEY = 'scribdEnhancerSettings';
  const defaultSettings = {
    unblur: true,
    autoScrape: false,
    darkMode: false,
    showPreview: true,
    enableOCR: true,
    ocrLang: 'auto',
    splitEvery: 0
  };
  const settings = { ...defaultSettings, ...JSON.parse(localStorage.getItem(SETTINGS_KEY) || '{}') };
  const saveSettings = () => localStorage.setItem(SETTINGS_KEY, JSON.stringify(settings));

  const tesseractScript = document.createElement('script');
  tesseractScript.src = 'https://cdn.jsdelivr.net/npm/[email protected]/dist/tesseract.min.js';
  document.head.appendChild(tesseractScript);

  const style = document.createElement('style');
  style.textContent = `
    #se-ui {
      position: fixed; bottom: 20px; right: 20px; background: #222; color: #fff;
      border-radius: 10px; padding: 10px; z-index: 9999; width: 320px;
      font-family: sans-serif; font-size: 13px; box-shadow: 0 0 10px #000;
    }
    #se-ui label, #se-ui select, #se-ui button {
      display: block; width: 100%; margin: 4px 0;
    }
    #se-ui input[type="checkbox"] { margin-right: 6px; }
    #se-ui button {
      background: #444; color: white; border: none; border-radius: 6px; padding: 6px;
    }
    #se-preview {
      position: fixed; top: 10px; right: 20px; bottom: 140px; width: 360px;
      background: #f4f4f4; color: #000; overflow: auto; padding: 10px;
      font-family: monospace; font-size: 12px; white-space: pre-wrap;
      border: 1px solid #999; z-index: 9998; border-radius: 10px;
    }
    .dark-mode #se-preview {
      background: #222; color: #eee; border-color: #555;
    }
    .dark-mode * {
      background-color: transparent !important;
      color: #e0e0e0 !important;
      border-color: #444 !important;
    }
  `;
  document.head.appendChild(style);

  function applyDarkMode() {
    document.documentElement.classList.toggle('dark-mode', settings.darkMode);
    document.body.classList.toggle('dark-mode', settings.darkMode);
  }

  function unblurContent() {
    if (!settings.unblur) return;
    const cleanup = () => {
      document.querySelectorAll('.blurred_page, .promo_div, [unselectable="on"]').forEach(el => el.remove());
      document.querySelectorAll('*').forEach(el => {
        const cs = getComputedStyle(el);
        if (cs.color === 'transparent') el.style.color = '#111';
        if (cs.textShadow?.includes('white')) el.style.textShadow = 'none';
      });
    };
    cleanup();
    new MutationObserver(cleanup).observe(document.body, { childList: true, subtree: true });
  }

  function cleanOCRText(text) {
    return text.split('\n').map(t => t.trim()).filter(line =>
      line.length >= 3 && /[a-zA-Z]/.test(line) && !/^[^a-zA-Z0-9]{3,}$/.test(line)
    ).join('\n');
  }

  function detectLanguage(text) {
    const map = { spa: /ñ|á|í|ó|ú/, fra: /é|è|ê|ç/, deu: /ä|ö|ü|ß/, ron: /ș|ț|ă|î|â/ };
    for (const [lang, regex] of Object.entries(map)) {
      if (regex.test(text)) return lang;
    }
    return 'eng';
  }

  async function preprocessImage(src) {
    return new Promise(resolve => {
      const img = new Image();
      img.crossOrigin = 'anonymous';
      img.onload = () => {
        if (img.naturalWidth < 100 || img.naturalHeight < 100 || /logo|icon|watermark/i.test(src)) return resolve(null);
        const canvas = document.createElement('canvas');
        canvas.width = img.width; canvas.height = img.height;
        const ctx = canvas.getContext('2d');
        ctx.drawImage(img, 0, 0);
        const imageData = ctx.getImageData(0, 0, canvas.width, canvas.height);
        for (let i = 0; i < imageData.data.length; i += 4) {
          const avg = (imageData.data[i] + imageData.data[i+1] + imageData.data[i+2]) / 3;
          imageData.data[i] = imageData.data[i+1] = imageData.data[i+2] = avg;
        }
        ctx.putImageData(imageData, 0, 0);
        resolve(canvas.toDataURL('image/png'));
      };
      img.src = src;
    });
  }

  function createPreview() {
    const preview = document.createElement('div');
    preview.id = 'se-preview';
    if (settings.showPreview) {
      preview.textContent = '[Preview Initialized]\n';
      document.body.appendChild(preview);
    }
    return preview;
  }

  function exportOutput(content, ext) {
    const split = settings.splitEvery;
    const lines = content.split(/(?=\[Page \d+])/);
    if (!split || split < 1) {
      const blob = new Blob([content], { type: `text/${ext}` });
      const a = document.createElement('a');
      a.href = URL.createObjectURL(blob);
      a.download = `scribd_output.${ext}`;
      a.click();
    } else {
      for (let i = 0; i < lines.length; i += split) {
        const chunk = lines.slice(i, i + split).join('\n');
        const blob = new Blob([chunk], { type: `text/${ext}` });
        const a = document.createElement('a');
        a.href = URL.createObjectURL(blob);
        a.download = `scribd_part${Math.floor(i / split) + 1}.${ext}`;
        a.click();
      }
    }
  }

  function printToPDF(content) {
    const win = window.open('', 'PrintView');
    win.document.write(`<html><head><title>Scribd Print</title></head><body><pre>${content}</pre></body></html>`);
    win.document.close();
    win.focus();
    setTimeout(() => win.print(), 600);
  }

  async function scrapePages(pages, preview) {
    const concurrency = 4;
    let index = 0;
    const firstText = [];

    async function scrape(page, i) {
      page.scrollIntoView();
      await new Promise(r => setTimeout(r, 300));

      let found = false;
      const text = page.innerText.trim();
      if (text) {
        preview.textContent += `[Page ${i + 1}] ✅\n${text}\n\n`;
        firstText.push(text);
        found = true;
      }

      if (settings.enableOCR && window.Tesseract) {
        const imgs = page.querySelectorAll('img');
        for (let img of imgs) {
          const src = img.src || '';
          const processed = await preprocessImage(src);
          if (!processed) continue;
          const lang = settings.ocrLang === 'auto' ? detectLanguage(firstText.join(' ')) : settings.ocrLang;
          const result = await window.Tesseract.recognize(processed, lang);
          const ocrText = cleanOCRText(result.data.text || '');
          if (ocrText) {
            preview.textContent += `[OCR] ${ocrText}\n\n`;
            found = true;
          }
        }
      }

      if (!found) preview.textContent += `[Page ${i + 1}] ❌ No content\n\n`;
    }

    const tasks = Array(concurrency).fill(null).map(async () => {
      while (index < pages.length) {
        const i = index++;
        await scrape(pages[i], i);
      }
    });
    await Promise.all(tasks);
    alert(`✅ Scraped ${pages.length} pages.`);
  }

  function createUI(preview) {
    const ui = document.createElement('div');
    ui.id = 'se-ui';
    ui.innerHTML = `
      <label><input type="checkbox" id="opt-unblur"> Unblur</label>
      <label><input type="checkbox" id="opt-autoscrape"> Auto Scrape</label>
      <label><input type="checkbox" id="opt-dark"> Dark Mode</label>
      <label><input type="checkbox" id="opt-preview"> Show Preview</label>
      <label>OCR:
        <select id="opt-lang">
          <option value="auto">Auto</option>
          <option value="eng">English</option>
          <option value="spa">Spanish</option>
          <option value="fra">French</option>
          <option value="deu">German</option>
        </select>
      </label>
      <label>Split Every:
        <select id="opt-split">
          <option value="0">Off</option>
          <option value="100">100</option>
          <option value="250">250</option>
          <option value="500">500</option>
        </select>
      </label>
      <button id="btn-scrape">📖 Scrape Pages</button>
      <button id="btn-export">💾 Export TXT</button>
      <button id="btn-html">🧾 Export HTML</button>
      <button id="btn-print">🖨️ Print to PDF</button>
    `;
    document.body.appendChild(ui);

    ui.querySelector('#opt-unblur').checked = settings.unblur;
    ui.querySelector('#opt-autoscrape').checked = settings.autoScrape;
    ui.querySelector('#opt-dark').checked = settings.darkMode;
    ui.querySelector('#opt-preview').checked = settings.showPreview;
    ui.querySelector('#opt-lang').value = settings.ocrLang;
    ui.querySelector('#opt-split').value = settings.splitEvery;

    ui.querySelectorAll('input, select').forEach(input => {
      input.onchange = () => {
        settings.unblur = ui.querySelector('#opt-unblur').checked;
        settings.autoScrape = ui.querySelector('#opt-autoscrape').checked;
        settings.darkMode = ui.querySelector('#opt-dark').checked;
        settings.showPreview = ui.querySelector('#opt-preview').checked;
        settings.ocrLang = ui.querySelector('#opt-lang').value;
        settings.splitEvery = parseInt(ui.querySelector('#opt-split').value);
        saveSettings();
        applyDarkMode();
      };
    });

    ui.querySelector('#btn-scrape').onclick = () => {
      const pages = [...document.querySelectorAll(
        '.page, .reader_column, [id^="page_container"], .outer_page, .abs_page, .scribd_page, .text_layer'
      )];
      if (!pages.length) return alert('❌ No pages found.');
      scrapePages(pages, preview);
    };
    ui.querySelector('#btn-export').onclick = () => exportOutput(preview.textContent, 'txt');
    ui.querySelector('#btn-html').onclick = () => exportOutput(`<html><body><pre>${preview.textContent}</pre></body></html>`, 'html');
    ui.querySelector('#btn-print').onclick = () => printToPDF(preview.textContent);
  }

  window.addEventListener('load', () => {
    applyDarkMode();
    unblurContent();
    const preview = createPreview();
    createUI(preview);
    if (settings.autoScrape) document.querySelector('#btn-scrape').click();
  });
})();