页面合并器(PageMerger)

从链接列表中获取和合并页面,并将每个页面的主要内容放入一个页面中!fetch and merge pages from urls and put main content of each page into one page!

// ==UserScript==
// @name         页面合并器(PageMerger)
// @namespace    http://tampermonkey.net/
// @version      0.1
// @description  从链接列表中获取和合并页面,并将每个页面的主要内容放入一个页面中!fetch and merge pages from urls and put main content of each page into one page!
// @author       ghosttk
// @match        */*
// @icon         
// @grant        none
// ==/UserScript==

(function() {
    'use strict';

    // Your code here...
    initialize()
})();

function initialize(){
    const aDiv = createDiv()
    setStyleSheet()
    document.querySelector('body').prepend(aDiv)
}

function getAnchors(){
  const anchors = $x('//a')
  //const groups = anchors.reduce(groupReducer, [])
  const groups = groupBy(anchors, 'parentElement')
}

function createDiv(){
    const aDiv = wwCE('div', {'id': 'PageMergerDiv'})
    aDiv.style.backgroundColor = '#ffffff'
    const aHeader = wwCE('sidebar', {'innerText': 'PageMerger(页面合并器):'})
    aDiv.append(aHeader)
    const cbList = wwCE('input', {'type': 'checkbox', 'id': 'ShowLists'})
    const labelList = wwCE('label', {'innerText': `${cbList.id}(显示列表)`, 'htmlFor': cbList.id , 'className': 'btnLike' })
    labelList.style.margin = '0px 12px 0px  0px'
    aDiv.append(cbList)
    aDiv.append(labelList)
    const startListBtn = wwCE('input', {'type': 'button', 'value': 'startList(全部合并)', 'disabled': 'true'})
    startListBtn.addEventListener('click', onStartList)
    aDiv.append(startListBtn)
    //const fieldset = getLists( '//ul | //ol' )
    const fieldset = getLists( '//div ' )
    fieldset.style.display = 'none'
    aDiv.append(fieldset)
    cbList.addEventListener('click', x => {
        fieldset.style.display = x.target.checked ? '' : 'none'
        startListBtn.disabled = x.target.checked ? '' : 'none'
    })
    return aDiv
}

function setStyleSheet(){
    // Create a new style tag
    const style = document.createElement("style");
    // Append the style tag to head
    document.head.appendChild(style);
    // Grab the stylesheet object
    const sheet = style.sheet
    // Use addRule or insertRule to inject styles
    sheet.insertRule('input[type="checkbox"] { margin: 0px 0px 0px 10px; display:inline}')
    sheet.insertRule('legend {background-color:SkyBlue; }')
    sheet.insertRule('label {display: inline; }')
    sheet.insertRule('input[type="button"] { background-color:DodgerBlue; color:White;}')
    sheet.insertRule('.btnLike { background-color:Salmon; color:Black; border:solid Blue; display:inline}')
}

//traverse all anchros from given elements and create fieldsets
function getLists( xpathParent ){
    //const uls = $x('//ul[//a] | //div[//a]')
    const uls = $x( xpathParent )
    console.log(uls)
    const outerField = wwCE('fieldset', {'id': 'urllist'})
    for(let i=0; i<uls.length; i++){
        const fieldset = wwCE('fieldset', {'id': `fieldset-group-${i}`})
        fieldset.style.border = '1px solid'
        const legend = createGroupLegend(i)
        fieldset.append( legend )
        const items = uls[i].querySelectorAll('a')
        for(let j=0; j<items.length ; j++){
            const url = items[j].href
            let text = items[j].textContent
            const cb = wwCE('input', {'type': 'checkbox', 'className': `group-${i}`, 'id': `group-${i}-url-${j}`})
            cb.setAttribute('url', url)
            const label = wwCE('label', {'innerText':text, 'htmlFor': cb.id})
            label.style.margin = '0px 12px 0px 0px'
            fieldset.append(cb)
            fieldset.append(label)
        }
        outerField.append(fieldset)
    }
    return outerField
}

function groupReducer(prev, curr) {
      let found = 0
      for(let i=0; i<prev.length; i++){
       if(prev[i].parentElement == curr.parentElement){
           found = 1
           break
       }
      }
    //create new category
      if( !found ){
          prev.push( curr )
      }
      return prev
  }

//create sub groups
function createGroupLegend(i){
    const legend = wwCE('legend', {'id': `legend-group-${i}`})
    const cbGroup = wwCE('input', {'type': 'checkbox', 'id': `group-${i}`})
    cbGroup.addEventListener('click', onGroupClicked)
    const labelGroup = wwCE('label', {'htmlFor': cbGroup.id, 'innerText': `group-${i}(选中本组)`})
    const startGroup = wwCE('input', {'type': 'button', 'value': `StartGroup_${i}(合并本组)`})
    startGroup.addEventListener( 'click', onStartGroup )
    legend.append(cbGroup)
    legend.append(labelGroup)
    legend.append(startGroup)
    return legend
}

function onGroupClicked(){
    const checkboxes = document.querySelectorAll(`input.${this.id}`)
    for(let i=0; i<checkboxes.length; i++){
        checkboxes[i].setAttribute('checked', this.checked ? true:false)
        checkboxes[i].checked = this.checked ? true : false
    }
}

//if startLlistBtn clicked
function onStartList(){
    const rtn = []
    const className = this.className
    const urls = $x(`//input[ @type = "checkbox" and @url and @class = ${className}`)
    //const urls = $x('//input[ @type = "checkbox" and @url]')
    for(let i = 0; i < urls.length; i++){
      if(urls[i].checked) rtn.push(urls[i].getAttribute('url'))
    }
    StartCrawler( rtn )
}

function onStartGroup(){
    const rtn = []
    const urls = $x('//input[ @type = "checkbox" and @url ]')
    for(let i = 0; i < urls.length; i++){
      if(urls[i].checked) rtn.push(urls[i].getAttribute('url'))
    }
    StartCrawler( rtn )
}

//@urls array of url
function StartCrawler(urls){
    const oldBody = document.querySelector('body')
    oldBody.innerHTML = ''
    oldBody.style.backgroundColor = '#ffffff'
    for(let i=0; i<urls.length; i++){
        crawler(urls[i], text => {
            const mainContent = getMainContent(text)
            oldBody.append( mainContent )
        })
    }
}
function wwCE(element, option){
    const elem = document.createElement(element)
    for( const prop in option) {
        elem[prop] = option[prop]
    }
    return elem
}
function getMainContent( text ){
    const parser = new DOMParser()
    const DOCUMENT = parser.parseFromString(text, 'text/html')
    const elems = $x('//div[count(p) > 0 ]', DOCUMENT)
    let bigger = []
    let record = 0
    for(let el of elems ){
        const paras = el.querySelectorAll('p')
        if( record < paras.length){
            record = paras.length
            bigger = el
        }
    }
    return bigger
}

function *genParse( title ) {
    let nth = yield title
    while( nth ){
        yield getMainContent( nth )
    }
}

//find elements by xpath
function $x(STR_XPATH, DOCUMENT = document) {
    let xresult = DOCUMENT.evaluate(STR_XPATH, DOCUMENT, null, XPathResult.ANY_TYPE, null);
    let xnodes = [];
    let xres;
    while (xres = xresult.iterateNext()) {
        xnodes.push(xres);
    }
    return xnodes;
}

function groupBy(objectArray, property) {
    return objectArray.reduce(function (acc, obj) {
        let key = obj[property]
        if (!acc[key]) {
            acc[key] = []
        }
          acc[key].push(obj)
          return acc
    }, {})
}

//send request for @url, get text response and excute @func on it
function crawler(url, func) {
    fetch(url)
        .then((response) => response.body)
        .then((rb) => {
        const reader = rb.getReader();
        return new ReadableStream({
            start(controller) {
                // The following function handles each data chunk
                function push() {
                    // "done" is a Boolean and value a "Uint8Array"
                    reader.read().then(({ done, value }) => {
                        // If there is no more data to read
                        if (done) {
                            controller.close();
                            return;
                        }
                        // Get the data and send it to the browser via the controller
                        controller.enqueue(value);
                        push();
                    });
                }
                push();
            },
        });
    })
        .then((stream) =>
              // Respond with our stream
              new Response(stream, { headers: { 'Content-Type': 'text/html' } }).text()
             )
        .then((result) => {
        // Do things with result
        func(result)
    });
}