Scribd Enhancer All-in-One (v2.7.3)

Scribd Enhancer with OCR, export, print, parallel scraping, and clean UI. Auto language detection, smart filtering, output splitting — full feature set restored and refined. By Eliminater74.

Versión del día 18/06/2025. Echa un vistazo a la versión más reciente.

Tendrás que instalar una extensión para tu navegador como Tampermonkey, Greasemonkey o Violentmonkey si quieres utilizar este script.

You will need to install an extension such as Tampermonkey to install this script.

Necesitarás instalar una extensión como Tampermonkey o Violentmonkey para instalar este script.

Necesitarás instalar una extensión como Tampermonkey o Userscripts para instalar este script.

Necesitará instalar una extensión como Tampermonkey para instalar este script.

Necesitarás instalar una extensión para administrar scripts de usuario si quieres instalar este script.

(Ya tengo un administrador de scripts de usuario, déjame instalarlo)

Necesitará instalar una extensión como Stylus para instalar este estilo.

Necesitará instalar una extensión como Stylus para instalar este estilo.

Necesitará instalar una extensión como Stylus para instalar este estilo.

Necesitará instalar una extensión del gestor de estilos de usuario para instalar este estilo.

Necesitará instalar una extensión del gestor de estilos de usuario para instalar este estilo.

Necesitará instalar una extensión del gestor de estilos de usuario para instalar este estilo.

(Ya tengo un administrador de estilos de usuario, déjame instalarlo)

// ==UserScript==
// @name         Scribd Enhancer All-in-One (v2.7.3)
// @namespace    https://greasyfork.org/users/Eliminater74
// @version      2.7.3
// @description  Scribd Enhancer with OCR, export, print, parallel scraping, and clean UI. Auto language detection, smart filtering, output splitting — full feature set restored and refined. By Eliminater74.
// @author       Eliminater74
// @license      MIT
// @match        *://*.scribd.com/*
// @grant        none
// @icon         https://s-f.scribdassets.com/favicon.ico
// ==/UserScript==

(function () {
  'use strict';

  const SETTINGS_KEY = 'scribdEnhancerSettings';
  const defaultSettings = {
    unblur: true,
    autoScrape: false,
    darkMode: false,
    showPreview: true,
    enableOCR: true,
    ocrLang: 'auto',
    splitEvery: 0
  };
  const settings = { ...defaultSettings, ...JSON.parse(localStorage.getItem(SETTINGS_KEY) || '{}') };
  const saveSettings = () => localStorage.setItem(SETTINGS_KEY, JSON.stringify(settings));

  const tesseractScript = document.createElement('script');
  tesseractScript.src = 'https://cdn.jsdelivr.net/npm/[email protected]/dist/tesseract.min.js';
  document.head.appendChild(tesseractScript);

  const style = document.createElement('style');
  style.textContent = `
    #se-ui {
      position: fixed; bottom: 20px; right: 20px; background: #222; color: #fff;
      border-radius: 10px; padding: 10px; z-index: 9999; width: 320px;
      font-family: sans-serif; font-size: 13px; box-shadow: 0 0 10px #000;
    }
    #se-ui label, #se-ui select, #se-ui button {
      display: block; width: 100%; margin: 4px 0;
    }
    #se-ui input[type="checkbox"] { margin-right: 6px; }
    #se-ui button {
      background: #444; color: white; border: none; border-radius: 6px; padding: 6px;
    }
    #se-preview {
      position: fixed; top: 10px; right: 20px; bottom: 140px; width: 360px;
      background: #f4f4f4; color: #000; overflow: auto; padding: 10px;
      font-family: monospace; font-size: 12px; white-space: pre-wrap;
      border: 1px solid #999; z-index: 9998; border-radius: 10px;
    }
    .dark-mode #se-preview {
      background: #222; color: #eee; border-color: #555;
    }
    .dark-mode * {
      background-color: transparent !important;
      color: #e0e0e0 !important;
      border-color: #444 !important;
    }
  `;
  document.head.appendChild(style);

  function applyDarkMode() {
    document.documentElement.classList.toggle('dark-mode', settings.darkMode);
    document.body.classList.toggle('dark-mode', settings.darkMode);
  }

  function unblurContent() {
    if (!settings.unblur) return;
    const cleanup = () => {
      document.querySelectorAll('.blurred_page, .promo_div, [unselectable="on"]').forEach(el => el.remove());
      document.querySelectorAll('*').forEach(el => {
        const cs = getComputedStyle(el);
        if (cs.color === 'transparent') el.style.color = '#111';
        if (cs.textShadow?.includes('white')) el.style.textShadow = 'none';
      });
    };
    cleanup();
    new MutationObserver(cleanup).observe(document.body, { childList: true, subtree: true });
  }

  function cleanOCRText(text) {
    return text.split('\n').map(t => t.trim()).filter(line =>
      line.length >= 3 && /[a-zA-Z]/.test(line) && !/^[^a-zA-Z0-9]{3,}$/.test(line)
    ).join('\n');
  }

  function detectLanguage(text) {
    const map = { spa: /ñ|á|í|ó|ú/, fra: /é|è|ê|ç/, deu: /ä|ö|ü|ß/, ron: /ș|ț|ă|î|â/ };
    for (const [lang, regex] of Object.entries(map)) {
      if (regex.test(text)) return lang;
    }
    return 'eng';
  }

  async function preprocessImage(src) {
    return new Promise(resolve => {
      const img = new Image();
      img.crossOrigin = 'anonymous';
      img.onload = () => {
        if (img.naturalWidth < 100 || img.naturalHeight < 100 || /logo|icon|watermark/i.test(src)) return resolve(null);
        const canvas = document.createElement('canvas');
        canvas.width = img.width; canvas.height = img.height;
        const ctx = canvas.getContext('2d');
        ctx.drawImage(img, 0, 0);
        const imageData = ctx.getImageData(0, 0, canvas.width, canvas.height);
        for (let i = 0; i < imageData.data.length; i += 4) {
          const avg = (imageData.data[i] + imageData.data[i+1] + imageData.data[i+2]) / 3;
          imageData.data[i] = imageData.data[i+1] = imageData.data[i+2] = avg;
        }
        ctx.putImageData(imageData, 0, 0);
        resolve(canvas.toDataURL('image/png'));
      };
      img.src = src;
    });
  }

  function createPreview() {
    const preview = document.createElement('div');
    preview.id = 'se-preview';
    if (settings.showPreview) {
      preview.textContent = '[Preview Initialized]\n';
      document.body.appendChild(preview);
    }
    return preview;
  }

  function exportOutput(content, ext) {
    const split = settings.splitEvery;
    const lines = content.split(/(?=\[Page \d+])/);
    if (!split || split < 1) {
      const blob = new Blob([content], { type: `text/${ext}` });
      const a = document.createElement('a');
      a.href = URL.createObjectURL(blob);
      a.download = `scribd_output.${ext}`;
      a.click();
    } else {
      for (let i = 0; i < lines.length; i += split) {
        const chunk = lines.slice(i, i + split).join('\n');
        const blob = new Blob([chunk], { type: `text/${ext}` });
        const a = document.createElement('a');
        a.href = URL.createObjectURL(blob);
        a.download = `scribd_part${Math.floor(i / split) + 1}.${ext}`;
        a.click();
      }
    }
  }

  function printToPDF(content) {
    const win = window.open('', 'PrintView');
    win.document.write(`<html><head><title>Scribd Print</title></head><body><pre>${content}</pre></body></html>`);
    win.document.close();
    win.focus();
    setTimeout(() => win.print(), 600);
  }

  async function scrapePages(pages, preview) {
    const concurrency = 4;
    let index = 0;
    const firstText = [];

    async function scrape(page, i) {
      page.scrollIntoView();
      await new Promise(r => setTimeout(r, 300));

      let found = false;
      const text = page.innerText.trim();
      if (text) {
        preview.textContent += `[Page ${i + 1}] ✅\n${text}\n\n`;
        firstText.push(text);
        found = true;
      }

      if (settings.enableOCR && window.Tesseract) {
        const imgs = page.querySelectorAll('img');
        for (let img of imgs) {
          const src = img.src || '';
          const processed = await preprocessImage(src);
          if (!processed) continue;
          const lang = settings.ocrLang === 'auto' ? detectLanguage(firstText.join(' ')) : settings.ocrLang;
          const result = await window.Tesseract.recognize(processed, lang);
          const ocrText = cleanOCRText(result.data.text || '');
          if (ocrText) {
            preview.textContent += `[OCR] ${ocrText}\n\n`;
            found = true;
          }
        }
      }

      if (!found) preview.textContent += `[Page ${i + 1}] ❌ No content\n\n`;
    }

    const tasks = Array(concurrency).fill(null).map(async () => {
      while (index < pages.length) {
        const i = index++;
        await scrape(pages[i], i);
      }
    });
    await Promise.all(tasks);
    alert(`✅ Scraped ${pages.length} pages.`);
  }

  function createUI(preview) {
    const ui = document.createElement('div');
    ui.id = 'se-ui';
    ui.innerHTML = `
      <label><input type="checkbox" id="opt-unblur"> Unblur</label>
      <label><input type="checkbox" id="opt-autoscrape"> Auto Scrape</label>
      <label><input type="checkbox" id="opt-dark"> Dark Mode</label>
      <label><input type="checkbox" id="opt-preview"> Show Preview</label>
      <label>OCR:
        <select id="opt-lang">
          <option value="auto">Auto</option>
          <option value="eng">English</option>
          <option value="spa">Spanish</option>
          <option value="fra">French</option>
          <option value="deu">German</option>
        </select>
      </label>
      <label>Split Every:
        <select id="opt-split">
          <option value="0">Off</option>
          <option value="100">100</option>
          <option value="250">250</option>
          <option value="500">500</option>
        </select>
      </label>
      <button id="btn-scrape">📖 Scrape Pages</button>
      <button id="btn-export">💾 Export TXT</button>
      <button id="btn-html">🧾 Export HTML</button>
      <button id="btn-print">🖨️ Print to PDF</button>
    `;
    document.body.appendChild(ui);

    ui.querySelector('#opt-unblur').checked = settings.unblur;
    ui.querySelector('#opt-autoscrape').checked = settings.autoScrape;
    ui.querySelector('#opt-dark').checked = settings.darkMode;
    ui.querySelector('#opt-preview').checked = settings.showPreview;
    ui.querySelector('#opt-lang').value = settings.ocrLang;
    ui.querySelector('#opt-split').value = settings.splitEvery;

    ui.querySelectorAll('input, select').forEach(input => {
      input.onchange = () => {
        settings.unblur = ui.querySelector('#opt-unblur').checked;
        settings.autoScrape = ui.querySelector('#opt-autoscrape').checked;
        settings.darkMode = ui.querySelector('#opt-dark').checked;
        settings.showPreview = ui.querySelector('#opt-preview').checked;
        settings.ocrLang = ui.querySelector('#opt-lang').value;
        settings.splitEvery = parseInt(ui.querySelector('#opt-split').value);
        saveSettings();
        applyDarkMode();
      };
    });

    ui.querySelector('#btn-scrape').onclick = () => {
      const pages = [...document.querySelectorAll(
        '.page, .reader_column, [id^="page_container"], .outer_page, .abs_page, .scribd_page, .text_layer'
      )];
      if (!pages.length) return alert('❌ No pages found.');
      scrapePages(pages, preview);
    };
    ui.querySelector('#btn-export').onclick = () => exportOutput(preview.textContent, 'txt');
    ui.querySelector('#btn-html').onclick = () => exportOutput(`<html><body><pre>${preview.textContent}</pre></body></html>`, 'html');
    ui.querySelector('#btn-print').onclick = () => printToPDF(preview.textContent);
  }

  window.addEventListener('load', () => {
    applyDarkMode();
    unblurContent();
    const preview = createPreview();
    createUI(preview);
    if (settings.autoScrape) document.querySelector('#btn-scrape').click();
  });
})();