try to take over the world!
// ==UserScript== // @name CNPatentFetcherClient // @namespace http://tampermonkey.net/ // @version 0.6.2 // @description try to take over the world! // @author [email protected] // @match http://epub.sipo.gov.cn/patentoutline.action // @grant none // ==/UserScript== (async function() { 'use strict'; function sleep(ms) { return new Promise(resolve => setTimeout(resolve, ms)); } async function fetchData(startDate, endDate, page, serverURL) { let url = "http://epub.sipo.gov.cn/patentoutline.action"; let params = { "credentials":"include", "headers":{ "accept":"text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3", "accept-language":"zh-CN,zh;q=0.9,zh-TW;q=0.8,en-US;q=0.7,en;q=0.6,ja;q=0.5,la;q=0.4", "cache-control":"max-age=0", "content-type":"application/x-www-form-urlencoded", "upgrade-insecure-requests":"1" }, "referrer":"http://epub.sipo.gov.cn/patentoutline.action", "referrerPolicy":"no-referrer-when-downgrade", "body":`showType=1&strSources=pip&strWhere=AD%3DBETWEEN%5B%27${startDate}%27%2C%27${endDate}%27%5D&numSortMethod=2&strLicenseCode=&numIp=0&numIpc=0&numIg=0&numIgc=0&numIgd=0&numUg=0&numUgc=0&numUgd=&numDg=0&numDgc=0&pageSize=10&pageNow=${page+1}`, "method":"POST", "mode":"cors" } try { let resp = await fetch(url, params) let text = await resp.text() let p = new DOMParser() let d = p.parseFromString(text, "text/html") let cpLinrs = d.getElementsByClassName("cp_linr") let content = "" for (let i = 0; i < cpLinrs.length; i++) { let cpLinr = cpLinrs[i] cpLinr.removeChild(cpLinr.getElementsByClassName("cp_botsm")[0]) content = content.concat(cpLinr.outerHTML, "\n") } let result = { startDate: startDate, endDate: endDate, page: page, content:content } await fetch(`${serverURL}/result`,{method:"POST", headers:{"content-type":"application/json"}, body:JSON.stringify(result)} ) if (cpLinrs.length < 1) { taskInfoBoard.append(`请求失败= ${startDate}-${endDate} #${page}`) return 1 } else { taskInfoBoard.append(`请求成功< ${startDate}-${endDate} #${page}`) } return 0 } catch(err) { taskInfoBoard.append(`请求失败= ${startDate}-${endDate} #${page}: ${err}`) return 1 } } let started = false let timeout = 0 async function startExecute() { if (started) { // 已经开始 started = false clearTimeout(timeout) document.getElementById("startExecute").disabled = true; return } // 开始 started = true document.getElementById("startExecute").value = "停止执行"; document.getElementById("startExecute").disabled = false; //taskInfoBoard.clear() let serverURL = document.getElementById("serverURL").value let applyTaskURL = `${serverURL}/task` let errCount = 0 let stopped = false while (!stopped && started) { clearTimeout(timeout) // 45秒没有响应,则stop, 并启动另一个执行过程 timeout = setTimeout(function(){ taskExceptionBoard.append("执行超时") stopped = true }, 45000) try { let task = await fetch(applyTaskURL).then(resp=>resp.json()) if (task.pages.length == 0) { break } let startAt = new Date() let responses = [] task.pages.forEach(page => { taskInfoBoard.append(`开始请求> ${task.startDate}-${task.endDate} #${page}`) responses = responses.concat(fetchData(task.startDate, task.endDate, page, serverURL)) }) taskInfoBoard.append(`>>>>>>>`) for (let i=0;i<responses.length;i++) { let ec = await responses[i] if (ec == 0) { errCount = 0 } else { errCount += ec } } let endAt = new Date() taskInfoBoard.append(`>>>>>>>>>>>> ${endAt-startAt}ms`) } catch(err) { taskInfoBoard.append(`执行异常= ${err}`) } if (errCount>20) { taskExceptionBoard.append(`连续执行失败= ${errCount}次`) stopped = true await sleep(10000) } } if (started) { started=false startExecute() } else { started=false taskInfoBoard.append(`执行结束`) document.getElementById("startExecute").value = "开始执行"; document.getElementById("startExecute").disabled = false; } } let taskInfoBoard = null let taskExceptionBoard = null function insertControlBox() { let p = new DOMParser() let d = p.parseFromString(`<div id="controlBox" style="border:1px solid red;"> <h3>爬取中国专利公告任务执行前端<h3> server: <input type="text" id="serverURL" value="http://localhost:6789" size="96"><br/> <input type="button" id="startExecute" value="开始执行"> <p id="taskInfoBoard" style="border:1px solid green"></p> <p id="taskExceptionBoard" style="color:red;border:1px solid black"></p> </div>`, "text/html") let controlBox = d.getElementById("controlBox") let content = document.getElementsByClassName("w790 right")[0] content.insertBefore(controlBox, content.children[0]) taskExceptionBoard = document.getElementById("taskExceptionBoard") taskExceptionBoard.lines = 0 taskExceptionBoard.append = function(t) { this.innerText = this.innerText.concat((new Date()).toISOString(), " ", t, "\n") this.lines += 1 if (this.lines > 10) { this.removeChild(this.childNodes[0]) this.removeChild(this.childNodes[0]) } } taskInfoBoard = document.getElementById("taskInfoBoard") taskInfoBoard.lines = 0 taskInfoBoard.clear = function() { this.innerText = "" } taskInfoBoard.append = function(t) { this.innerText = this.innerText.concat((new Date()).toISOString(), " ", t, "\n") this.lines += 1 if (this.lines > 15) { this.removeChild(this.childNodes[0]) this.removeChild(this.childNodes[0]) } } // action document.getElementById("startExecute").addEventListener("click", startExecute, false) } insertControlBox() })();