metruyencv.com Tesseract

5/25/2024, 5:50:11 PM

// ==UserScript==
// @name        metruyencv.com Tesseract
// @namespace   Violentmonkey Scripts
// @match       https://metruyencv.com/truyen/*
// @grant       GM.setValue
// @grant       GM.getValue
// @grant       GM.listValues
// @grant       GM.deleteValue
// @grant       GM.setClipboard
// @version     1.22
// @author      -
// @description 5/25/2024, 5:50:11 PM
// @require     https://cdnjs.cloudflare.com/ajax/libs/tesseract.js/5.1.0/tesseract.min.js
// @require     https://cdnjs.cloudflare.com/ajax/libs/dom-to-image/2.6.0/dom-to-image.min.js
// @run-at      document-idle
// ==/UserScript==
let maxChapterNumber;  // xoa = 100 de chay het cac chuong
maxChapterNumber = maxChapterNumber ?? 999999;

function sleep(ms) { return new Promise((rs) => setTimeout(rs, ms)); }

async function waitForKeyElement(el, ms = 3000, attr = false) {
    const delay = 300;
    const retry = Math.floor(ms / delay);
    await sleep(delay);
    for (let i = 0; i < retry; i++) {
        let e = document.querySelector(el);
        if (e) {
            if (attr) {
                switch (attr) {
                    case 'innerHTML':
                    case 'textContent':
                    case 'innerText': if (e.innerText) return true; break;
                    case 'value': if (e.value) return true; break;
                    default: if (e.getAttribute(attr)) return true;
                }
            } else return true;
        }
        await sleep(delay);
    }
    return false;
}

function reEnableConsoleLog() {
    if (typeof console.log !== 'function') {
        const iF = document.createElement('iframe');
        document.body.appendChild(iF);
        iF.style.display = 'none';
        window.console.log = iF.contentWindow.console.log;
    }
}

function isChuongCuoi() { return location.href.endsWith('/chuong-cuoi') }

async function startDownload(url) {
    if (!window.location.href.startsWith('https://metruyencv.com/truyen/')) return;
    await GM.setValue('downloading', true);
    if (/^https:\/\/metruyencv\.com\/truyen\/[a-z\-\d]+\/?$/.test(window.location.href)) window.location.assign(window.location.href + '/chuong-1');
    else window.location.reload();
}

async function loadChapterContent() {
    let chapterContent = '';
    await sleep(100);
    await waitForKeyElement('#load-more', 3000, 'innerText');

    if (document.querySelectorAll('#chapter-detail canvas').length > 0) {
        let im = await domtoimage.toPng(document.querySelector('#chapter-detail'));  //800ms
        const ts = await Tesseract.createWorker('vie');
        const ret = await ts.recognize(im);  //40s-180s
        chapterContent = ret.data.text;
        ts.terminate();
    }
    else chapterContent = document.querySelector('#chapter-detail').innerText;
    return chapterContent;
}

async function getChapterContent() {
    await sleep(100);
    await waitForKeyElement('#load-more', 3000, 'innerText');
    await sleep(300); //cho chac an

    let canvas = document.querySelectorAll('#chapter-detail canvas');
    if (canvas.length > 0) {
        let img = await domtoimage.toPng(document.querySelector('#chapter-detail'));  //800ms
        const scheduler = await Tesseract.createScheduler();
        let workers = Array(navigator.hardwareConcurrency - 1);

        for (let i = 0; i < workers.length; i++) workers[i] = await Tesseract.createWorker('vie');
        workers = await Promise.all(workers.map((wk) => (scheduler.addWorker(wk))));

        let result = await Promise.all(Array.from(canvas).map((cv) => (scheduler.addJob('recognize', cv.toDataURL()))));
        result.forEach((e, i) => canvas[i].outerHTML = `<fromcanvas>${e.data.text}</fromcanvas>`);

        scheduler.terminate();
    }

    return document.querySelector('#chapter-detail')?.innerText;
}

async function finishdownload() {
    let storyName = await GM.getValue('storyName');
    await GM.deleteValue('storyName');
    await GM.deleteValue('downloading');
    let chapters = await GM.listValues();
    let content = await Promise.all(chapters.map(chapter => GM.getValue(`${chapter}`)));
    chapters.forEach(async (val) => await GM.deleteValue(val));

    content = content.join('\n\n').replaceAll(/\n{1,1}/g, '\n\n').replaceAll(/\n{3,}/g, '\n\n');
    download = document.createElement('a');
    download.href = 'data:attachment/text,' + encodeURI(content);
    download.target = '_blank';
    download.download = storyName + '.txt';
    download.click();
}

(async function () {
    window.scrollTo(0, 10000);
    reEnableConsoleLog();

    let arr = window.location.href.match(/^https:\/\/metruyencv\.com\/truyen\/[a-z\-\d]+\/chuong-(\d+|cuoi)/i);
    let chapterNumber = (arr && arr[1]) ? (isNaN(arr[1]) ? 0 : Number(arr[1])) : 0; //0 means 'chuong-cuoi'
    let downloading = await GM.getValue('downloading', false);

    if (downloading) {
        document.body.addEventListener('keypress', async (event) => { if (event.key == 'F9' || event.code == 'F9') await finishdownload() });

        if (isChuongCuoi() || chapterNumber == 0 || chapterNumber > maxChapterNumber) { await finishdownload(); return; }
        else {
            let chapterContent = await getChapterContent();
            const story = document.querySelector('main h1>a').textContent.trim();
            const chapterTitle = document.querySelector('main h2').textContent.trim();
            let storyName = await GM.getValue('storyName', '');
            if (storyName == '') await GM.setValue('storyName', story);
            await GM.setValue(chapterNumber.toString(), chapterTitle + '\n\n' + chapterContent);

            //move to next chapter
            document.querySelector('[data-x-bind="GoNext"]').click();
        }

    } else {  // else of if(downloading)
        await GM.deleteValue('downloading');
        const btn = document.createElement('button');
        document.body.appendChild(btn);
        btn.innerText = 'Bam de tai toan bo truyen';
        btn.onclick = startDownload;
        btn.style.border = "1px solid";
        btn.style.backgroundColor = 'red';

        console.time('Chay trong thoi gian');
        window.scrollTo(0, 10000);
        reEnableConsoleLog();
        let content = await getChapterContent();
        await GM.setClipboard(content);
        console.log(content);
        console.timeEnd('Chay trong thoi gian');
    }
})();