// ==UserScript==
// @name CNPatentFetcherClient
// @namespace http://tampermonkey.net/
// @version 0.6.2
// @description try to take over the world!
// @author [email protected]
// @match http://epub.sipo.gov.cn/patentoutline.action
// @grant none
// ==/UserScript==
(async function() {
'use strict';
function sleep(ms) {
return new Promise(resolve => setTimeout(resolve, ms));
}
async function fetchData(startDate, endDate, page, serverURL) {
let url = "http://epub.sipo.gov.cn/patentoutline.action";
let params = {
"credentials":"include",
"headers":{
"accept":"text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3",
"accept-language":"zh-CN,zh;q=0.9,zh-TW;q=0.8,en-US;q=0.7,en;q=0.6,ja;q=0.5,la;q=0.4",
"cache-control":"max-age=0",
"content-type":"application/x-www-form-urlencoded",
"upgrade-insecure-requests":"1"
},
"referrer":"http://epub.sipo.gov.cn/patentoutline.action",
"referrerPolicy":"no-referrer-when-downgrade",
"body":`showType=1&strSources=pip&strWhere=AD%3DBETWEEN%5B%27${startDate}%27%2C%27${endDate}%27%5D&numSortMethod=2&strLicenseCode=&numIp=0&numIpc=0&numIg=0&numIgc=0&numIgd=0&numUg=0&numUgc=0&numUgd=&numDg=0&numDgc=0&pageSize=10&pageNow=${page+1}`,
"method":"POST",
"mode":"cors"
}
try {
let resp = await fetch(url, params)
let text = await resp.text()
let p = new DOMParser()
let d = p.parseFromString(text, "text/html")
let cpLinrs = d.getElementsByClassName("cp_linr")
let content = ""
for (let i = 0; i < cpLinrs.length; i++) {
let cpLinr = cpLinrs[i]
cpLinr.removeChild(cpLinr.getElementsByClassName("cp_botsm")[0])
content = content.concat(cpLinr.outerHTML, "\n")
}
let result = {
startDate: startDate,
endDate: endDate,
page: page,
content:content
}
await fetch(`${serverURL}/result`,{method:"POST",
headers:{"content-type":"application/json"},
body:JSON.stringify(result)}
)
if (cpLinrs.length < 1) {
taskInfoBoard.append(`请求失败= ${startDate}-${endDate} #${page}`)
return 1
} else {
taskInfoBoard.append(`请求成功< ${startDate}-${endDate} #${page}`)
}
return 0
} catch(err) {
taskInfoBoard.append(`请求失败= ${startDate}-${endDate} #${page}: ${err}`)
return 1
}
}
let started = false
let timeout = 0
async function startExecute() {
if (started) {
// 已经开始
started = false
clearTimeout(timeout)
document.getElementById("startExecute").disabled = true;
return
}
// 开始
started = true
document.getElementById("startExecute").value = "停止执行";
document.getElementById("startExecute").disabled = false;
//taskInfoBoard.clear()
let serverURL = document.getElementById("serverURL").value
let applyTaskURL = `${serverURL}/task`
let errCount = 0
let stopped = false
while (!stopped && started) {
clearTimeout(timeout)
// 45秒没有响应,则stop, 并启动另一个执行过程
timeout = setTimeout(function(){
taskExceptionBoard.append("执行超时")
stopped = true
}, 45000)
try {
let task = await fetch(applyTaskURL).then(resp=>resp.json())
if (task.pages.length == 0) {
break
}
let startAt = new Date()
let responses = []
task.pages.forEach(page => {
taskInfoBoard.append(`开始请求> ${task.startDate}-${task.endDate} #${page}`)
responses = responses.concat(fetchData(task.startDate, task.endDate, page, serverURL))
})
taskInfoBoard.append(`>>>>>>>`)
for (let i=0;i<responses.length;i++) {
let ec = await responses[i]
if (ec == 0) {
errCount = 0
} else {
errCount += ec
}
}
let endAt = new Date()
taskInfoBoard.append(`>>>>>>>>>>>> ${endAt-startAt}ms`)
} catch(err) {
taskInfoBoard.append(`执行异常= ${err}`)
}
if (errCount>20) {
taskExceptionBoard.append(`连续执行失败= ${errCount}次`)
stopped = true
await sleep(10000)
}
}
if (started) {
started=false
startExecute()
} else {
started=false
taskInfoBoard.append(`执行结束`)
document.getElementById("startExecute").value = "开始执行";
document.getElementById("startExecute").disabled = false;
}
}
let taskInfoBoard = null
let taskExceptionBoard = null
function insertControlBox() {
let p = new DOMParser()
let d = p.parseFromString(`<div id="controlBox" style="border:1px solid red;">
<h3>爬取中国专利公告任务执行前端<h3>
server: <input type="text" id="serverURL" value="http://localhost:6789" size="96"><br/>
<input type="button" id="startExecute" value="开始执行">
<p id="taskInfoBoard" style="border:1px solid green"></p>
<p id="taskExceptionBoard" style="color:red;border:1px solid black"></p>
</div>`, "text/html")
let controlBox = d.getElementById("controlBox")
let content = document.getElementsByClassName("w790 right")[0]
content.insertBefore(controlBox, content.children[0])
taskExceptionBoard = document.getElementById("taskExceptionBoard")
taskExceptionBoard.lines = 0
taskExceptionBoard.append = function(t) {
this.innerText = this.innerText.concat((new Date()).toISOString(), " ", t, "\n")
this.lines += 1
if (this.lines > 10) {
this.removeChild(this.childNodes[0])
this.removeChild(this.childNodes[0])
}
}
taskInfoBoard = document.getElementById("taskInfoBoard")
taskInfoBoard.lines = 0
taskInfoBoard.clear = function() {
this.innerText = ""
}
taskInfoBoard.append = function(t) {
this.innerText = this.innerText.concat((new Date()).toISOString(), " ", t, "\n")
this.lines += 1
if (this.lines > 15) {
this.removeChild(this.childNodes[0])
this.removeChild(this.childNodes[0])
}
}
// action
document.getElementById("startExecute").addEventListener("click", startExecute, false)
}
insertControlBox()
})();