Bachngocsach Vip scraper

Scrape novel content from bachngocsach.net.vn. Ctrl+Alt+S to start, Ctrl+Alt+C to stop.

You will need to install an extension such as Tampermonkey, Greasemonkey or Violentmonkey to install this script.

You will need to install an extension such as Tampermonkey to install this script.

You will need to install an extension such as Tampermonkey or Violentmonkey to install this script.

You will need to install an extension such as Tampermonkey or Userscripts to install this script.

You will need to install an extension such as Tampermonkey to install this script.

You will need to install a user script manager extension to install this script.

(I already have a user script manager, let me install it!)

You will need to install an extension such as Stylus to install this style.

You will need to install an extension such as Stylus to install this style.

You will need to install an extension such as Stylus to install this style.

You will need to install a user style manager extension to install this style.

You will need to install a user style manager extension to install this style.

You will need to install a user style manager extension to install this style.

(I already have a user style manager, let me install it!)

// ==UserScript==
// @name        Bachngocsach Vip scraper
// @name:en     Bachngocsach Vip scraper
// @name:vi     Tải truyện vip bachngocsach
// @namespace   Violentmonkey Scripts
// @match       *://bachngocsach.info/*
// @grant       GM_setValue
// @grant       GM_getValue
// @grant       GM_deleteValue
// @grant       GM_listValues
// @version     1.1.0
// @author      Tác giả = tác = làm, giả là giả (dối) = làm giả
// @description Scrape novel content from bachngocsach.net.vn. Ctrl+Alt+S to start, Ctrl+Alt+C to stop.
// @description:vi Tải truyện từ bachngocsach.net.vn. Ctrl+Alt+S để bắt đầu, Ctrl+Alt+C để ngừng
// @description:en Scrape novel content from bachngocsach.net.vn. Ctrl+Alt+S to start, Ctrl+Alt+C to stop
// @license     MIT2
// ==/UserScript==
const startKey={key:'KeyS', ctrlKey:true, altKey: true, shiftKey:false };
const stopKey= {key:'KeyC', ctrlKey:true, altKey: true, shiftKey:false };

const useEvent=false;

const sleep = (ms) => new Promise(rs => setTimeout(rs, ms));

const pressKey = (key) => window.dispatchEvent(new KeyboardEvent('keydown', { key: key, code: key, bubbles: true }));

function addUrlChangeEvent() {
  const urlchangeEvent= new Event('urlchange');
  history._pushState=history.pushState;
  history.pushState=(...args)=>{
    history._pushState(...args);
    if(!!window.onurlchange && typeof window.onurlchange =='function') window.onurlchange(); else
    window.dispatchEvent(urlchangeEvent);
  }

  window.addEventListener('popstate',()=>{
    if(!!window.onurlchange && typeof window.onurlchange =='function') window.onurlchange(); else
    window.dispatchEvent(urlchangeEvent);
  })
}

function reEnableConsoleLog(c) {
  switch (c) {
    case 1: console.log = console.dir; break;
    case 2: console.log = console.info; break;
    case 3: console.log = console.debug; break;
    case 4: console.log = console.warn; break;
    default: {
      const iF = document.createElement('iframe');
      document.body.appendChild(iF);
      iF.style.display = 'none';
      window.console.log = iF.contentWindow.console.log;
    }
  }
}

function getStyles(useRegex=true) {
  const style={};
  const styleStr=document.querySelector('style.dynamic-styles').textContent;
  if (!styleStr) return style;
  if(useRegex) {
    const reg=/\.?([0-9a-zA-Z]+?){order:([0-9]+?)}/g
    styleStr.matchAll(reg).forEach(m=>style[m[1]]=parseInt(m[2]));
  } else {
    styleStr.split('}').forEach(m=>{
      if(m=='') return;
      let s=m.split('{order:');
      if(s[0].startsWith('.')) style[s[0].slice(1)]=parseInt(s[1]); else style[s[0]]=parseInt(s[1]);
    })
  }
  return style;
}

function chapterContentByTreeWalker(el = document.body) {  //dang chay sai o day
  const textList = [];
  textList.toString = () => { return textList.reduce((s, n) => s += n.nodeValue, '') }
  const treeWalker = document.createTreeWalker(el, NodeFilter.SHOW_TEXT, (node) => {
    if (['META', 'SCRIPT', 'NOSCRIPT', 'STYLE', 'AREA', 'BASE', 'CANVAS', 'CODE', 'EMBED', 'LINK', 'MAP', 'PARAM', 'SOURCE', 'VIDEO', 'IMG', 'PICTURE', 'INPUT', 'TEXTAREA'].includes(node.parentNode?.tagName))
      return NodeFilter.FILTER_REJECT;
    return NodeFilter.FILTER_ACCEPT;
  });

  let node;
  while (node = treeWalker.nextNode())
    textList.push(node);
  return textList.toString();
}

function chapterContentByStyle() {
  console.log('Get chapter content');
  const result={};
  let t= document.querySelectorAll('main>div>.container>div>.line-clamp-1');
  result.storyName=t[0].textContent;
  result.chapterName=t[1].textContent;
  result.chapterNumber=parseInt(location.href.match(/.\/chuong-(\d+)/)[1]);

  const badText = document.querySelector('div.published-content');
  const goodText = badText?.previousElementSibling?.innerText||'';
  if (!badText) {result.chapterContent=goodText; return result;}

  let badLines = Array(badText.children.length);
  let style = getStyles();

  for (const eights of badText.children) {
    let className = eights.className.toLowerCase();
    if (!style[className]) style[className] = parseInt(getComputedStyle(eights).order);
    let badLine = Array(eights.children.length);
    for (const sixes of eights.children) {
      let tagName = sixes.tagName.toLowerCase();
      if (!style[tagName]) style[tagName] = parseInt(getComputedStyle(sixes).order);
      badLine[style[tagName]] = sixes.textContent;
    }
    badLines[style[className]] = badLine.join('');
  }
  result.chapterContent = (goodText.trim() + badLines.join('\n\n')).replaceAll(/\n{3,}/g, '\n\n').replaceAll('·', '');
  return result;
}

const getChapterContent=chapterContentByStyle;
// const getChapterContent=chapterContentByTreeWalker;

function startDownload(url) {
  console.log('Start downloading');
  GM_setValue('downloading', true);
  if (/^https:\/\/bachngocsach\.net\.vn\/truyen\/[a-z\-\d]+\/?$/.test(window.location.href)) window.location.assign(window.location.href + '/chuong-1');
  else window.location.reload();
}

async function stopDownload() {
  console.log('Stop downloading');
  let storyName = GM_getValue('storyName');
  GM_deleteValue('storyName');
  GM_deleteValue('downloading');
  let chapters = GM_listValues();
  chapters.sort((a, b) => { parseInt(a) - parseInt(b) });
  let content = await Promise.all(chapters.map(chapter => GM_getValue(`${chapter}`)));
  chapters.forEach(ch => GM_deleteValue(ch));
  content = content.join('\n\n').replaceAll(/\n{1,1}/g, '\n\n').replaceAll(/\n{3,}/g, '\n\n');

  let download = document.createElement('a');
  download.href = 'data:attachment/text,' + encodeURI(content);
  download.target = '_blank';
  download.download = storyName + `(c${chapters[0]}-c${chapters.at(-1)})` + '.txt';
  download.click();
  return;
}

function nextChapter() {
  console.log('Go to next chapter');
  if(Math.random() > .4) pressKey('ArrowRight'); //ArrowRight
  else {
    const t=document.querySelector('.container>div:nth-last-child(2)>div:first-child>a:last-of-type');
    t.scrollIntoView({behavior:'smooth'})
    t.click();
  }
}

function isLastChapter() {
  console.log('Is last chapter?');
  return document.querySelector('.container>div:nth-last-child(2)>div:first-child>a:last-of-type').href.endsWith('#');
}

async function download() {
  console.log('Downloading...');
  scrollTo({left:0,top:10000, behavior:'smooth'});
  await sleep(500+ Math.random()*600); //350 is necessary time the chapter content being loaded
  const chapter=getChapterContent();
  GM_setValue('storyName',chapter.storyName);
  GM_setValue(chapter.chapterNumber,chapter.chapterName+'\n\n'+chapter.chapterContent);
  scrollTo({left:0,top:0, behavior:'smooth'});
  await sleep(100+ Math.random()*300);
  if (isLastChapter()) await stopDownload();
  else nextChapter();
}

(async function(){
  if(window!==window.top) return;

  if (useEvent) {
    addUrlChangeEvent();
    //await sleep(1000);
    let txt=getChapterContent();
    console.log(txt.chapterContent);

    window.addEventListener('urlchange',async (e)=>{
      if (GM_getValue('downloading', undefined)) await download();
      else {
        console.log(getChapterContent().chapterContent); }
    })
  } else {
    let oldURL='';
    const observer= new MutationObserver(async (mList)=>{
      mList.forEach(async (m)=>{
        if (m.target.className?.includes('published-content')&& window.location.href!=oldURL) {
            oldURL=window.location.href;
            if (GM_getValue('downloading', false)) await download();
            else console.log(getChapterContent().chapterContent);
        }
      });
    });
    observer.observe(document.querySelector('body'), { childList: true, subtree:true });
  }

  window.addEventListener('keydown',async(e)=>{
    if (e.ctrlKey==startKey.ctrlKey && e.altKey==startKey.altKey && e.shiftKey==startKey.shiftKey && (e.key==startKey.key||e.code==startKey.key)) startDownload(location.href);
    if (e.ctrlKey==stopKey.ctrlKey && e.altKey==stopKey.altKey && e.shiftKey==stopKey.shiftKey && (e.key==stopKey.key||e.code==stopKey.key)) await stopDownload();
  })
})();