Posts_Dumper

导出帖子内容到数据库

// ==UserScript==
// @name:zh-CN   帖子导出工具
// @name         Posts_Dumper
// @namespace    https://blog.chrxw.com
// @version      1.4
// @description:zh-CN  导出帖子内容到数据库
// @description  导出帖子内容到数据库
// @author       Chr_
// @match        https://keylol.com/*
// @match        https://dev.keylol.com/*
// @connect      127.0.0.1
// @connect      store.steampowered.com
// @license      AGPL-3.0
// @icon         https://blog.chrxw.com/favicon.ico
// @grant        GM_setValue
// @grant        GM_getValue
// @grant        GM_deleteValue
// @grant        GM_xmlhttpRequest
// @grant        GM_addStyle
// ==/UserScript==

setTimeout(async () => {
    'use strict';

    const port = 8000;
    const host = '127.0.0.1';

    const matchTid = new RegExp(/(?:t|tid=)(\d+)/);

    const treadList = document.querySelector("#threadlisttableid");

    if (treadList !== null) {//获取帖子列表

        function genBtn(name, foo) {
            const b = document.createElement('button');
            b.textContent = name;
            b.className = 'pd_btn';
            b.addEventListener('click', foo);
            return b;
        }
        function genDiv(cls) {
            const d = document.createElement('div');
            d.className = cls ?? 'pd_div';
            return d;
        }
        function genSpan(text) {
            const s = document.createElement('span');
            s.textContent = text;
            return s;
        }
        function genHr() {
            const b = document.createElement('hr');
            return b;
        }
        function genBr() {
            const b = document.createElement('br');
            return b;
        }
        function genIframe() {
            const i = document.createElement('iframe');
            return i;
        }
        function genText() {
            const t = document.createElement('input');
            t.placeholder = '帖子ID';
            t.className = 'pd_text';
            return t;
        }

        const panel = genDiv('pd_panel');

        const tempIframe = genIframe();
        const tempIframe2 = genIframe();
        const tempIframe3 = genIframe();

        const tempIFrames = [tempIframe, tempIframe2, tempIframe3];

        const status = await testBackend();

        const statusTips = genSpan(status ? '连接成功' : '连接失败');

        const btnGrubNew = genBtn('抓取尚未记录的', async () => {
            const postLists = treadList.querySelectorAll("th.common>a.pd_not_added.xst,th.new>a.pd_not_added.xst,th.lock>a.pd_not_added.xst");
            const total = postLists.length;
            if (total > 0) {
                statusTips.textContent = `开始抓取,共 ${total} 篇`;
                const workTread = tempIFrames.length;
                for (let i = 0; i < total; i += workTread) {
                    const max = Math.min(i + workTread, total);
                    const tasks = [];
                    for (let j = i; j < max; j++) {
                        const postTag = postLists[j];
                        const tid = grubTid(postTag.href);
                        const url = genUrl(tid) + '?utm=114514';
                        tempIFrames[j - i].src = url;
                        postTag.classList.remove('pd_not_added');
                        postTag.classList.add('pd_done');
                        tasks.push(waitUnitlDone(tid));
                    }

                    await Promise.all(tasks);

                    statusTips.textContent = `抓取进度 ${max}/${total}`;
                }
                statusTips.textContent = '抓取结束';
            } else {
                statusTips.textContent = '没有可以抓取的帖子';
            }
            await freshPostList();
        });

        const btnGrubAll = genBtn('抓取所有', async () => {
            const postLists = treadList.querySelectorAll("th.common>a.xst,th.new>a.xst,th.lock>a.xst");
            const total = postLists.length;
            if (total > 0) {
                statusTips.textContent = `开始抓取,共 ${total} 篇`;
                const workTread = tempIFrames.length;
                for (let i = 0; i < total; i += workTread) {
                    const max = Math.min(i + workTread, total);
                    const tasks = [];
                    for (let j = i; j < max; j++) {
                        const postTag = postLists[j];
                        const tid = grubTid(postTag.href);
                        const url = genUrl(tid) + '?utm=114514';
                        tempIFrames[j - i].src = url;
                        postTag.classList.remove('pd_not_added');
                        postTag.classList.add('pd_done');
                        tasks.push(waitUnitlDone(tid));
                    }

                    await Promise.all(tasks);

                    statusTips.textContent = `抓取进度 ${max}/${total}`;
                }
                statusTips.textContent = '抓取结束';
            } else {
                statusTips.textContent = '没有可以抓取的帖子';
            }
            await freshPostList();
        });

        const txtTid = genText();
        const btnGrubOne = genBtn('手动抓取', async () => {

            const tid = parseInt(txtTid.value);
            if (!(tid > 0)) {
                alert('请输入整数 TID');
                return;
            }
            statusTips.textContent = `TID ${tid} 开始抓取`;
            const url = genUrl(tid) + '?utm=114514';
            tempIframe.src = url;
            const result = await waitUnitlDone(tid);
            postTag.classList.remove('pd_not_added');
            postTag.classList.remove('pd_added');
            postTag.classList.add('pd_done');
            statusTips.textContent = `TID ${tid} ${result}`;

            await freshPostList();
        });

        const btnExportExcel = genBtn('导出Excel', () => {
            window.open(`http://${host}:${port}/api/excel`);
        });

        const btnExportBBCode = genBtn('导出BBCode', () => {
            window.open(`http://${host}:${port}/api/bbcode`);
        });

        const btnResetDB = genBtn('重置数据库(删除所有数据)', async () => {
            if (confirm('真的要删除所有数据吗?')) {
                await deleteAllData();
            }
        });

        const btnControl = genBtn('在管理面板浏览数据', () => {
            window.open(`http://${host}:${port}/index.html`);
        });

        panel.appendChild(statusTips);
        panel.appendChild(genHr());

        if (status) {
            panel.appendChild(btnGrubNew);
            panel.appendChild(btnGrubAll);
            panel.appendChild(genHr());
            panel.appendChild(txtTid);
            panel.appendChild(btnGrubOne);
            panel.appendChild(genHr());
            panel.appendChild(btnExportExcel);
            panel.appendChild(btnExportBBCode);
            panel.appendChild(genHr());
            panel.appendChild(btnResetDB);
            panel.appendChild(genHr());
            panel.appendChild(btnControl);
            panel.appendChild(genHr());
            panel.appendChild(tempIframe);
            panel.appendChild(genBr());
            panel.appendChild(tempIframe2);
            panel.appendChild(genBr());
            panel.appendChild(tempIframe3);

            document.getElementById('autopbn').addEventListener('click', async () => {
                setTimeout(async () => {
                    await freshPostList();
                }, 500);
            });

            //判断是否已抓取
            await freshPostList();
        }
        else {
            panel.appendChild(genSpan('请检查软件是否运行以及端口是否被占用'));

            setTimeout(() => {
                panel.style.display = 'none';
            }, 3000);
        }

        document.body.appendChild(panel);

    } else if (ifNeedGrub()) {//抓取帖子内容
        const tid = grubTid(location.href);
        const post_url = genUrl(tid);
        const post_title = document.getElementById('thread_subject')?.textContent ?? '获取失败';
        const eleAuthor = document.querySelector('div.pi>div.authi>a.xw1');
        const author_nick = eleAuthor?.textContent ?? '获取失败';
        const author_uid = eleAuthor?.href.replace('https://keylol.com/suid-', '') ?? '获取失败';
        const post_date = document.querySelector('div.pti>div.authi>em[id]')?.textContent.substring(4) ?? '获取失败';
        const eleContent = document.querySelector('td[id^=postmessage');
        const nodes = eleContent?.childNodes ?? [];
        const contentLines = [];

        function node2text(node) {
            switch (node.nodeName) {
                case 'I':
                case 'A':
                case 'IFRAME':
                case 'STYLE':
                case 'SCRIPT':
                case 'IMG':
                    return;
                case "DIV":
                    if (node.classList.contains('aimg_tip')) {
                        return;
                    }
            }

            if (node.nodeType === Node.TEXT_NODE) {
                const raw = node.textContent?.trim();
                if (raw && raw.length > 2 && raw.search('未经许可,严禁转载') === -1) {
                    contentLines.push(raw);
                }
            }
            else {
                if (node.childNodes?.length > 0) {
                    for (let child of node.childNodes) {
                        node2text(child);
                    }
                }
            }
        }

        for (let node of nodes) {
            node2text(node);
        }
        const content = contentLines.join('\n');

        const steamLinks = document.querySelectorAll("a[href^='https://store.steampowered.com/'],a[href^='https://steamdb.info/app/']");
        const grubAppid = new RegExp(/app\/(\d+)\/?/);
        const appIDsSet = new Set();
        for (const ele of steamLinks) {
            const href = ele.href;
            if (href) {
                const appID = parseInt(grubAppid.exec(href)?.[1] ?? 0);
                if (appID > 0) {
                    appIDsSet.add(appID);
                }
            }
        }

        const appIDs = [...appIDsSet];
        const bbcodes = [];
        const excels = [];

        const tasks = [];
        for (let appid of appIDs) {
            tasks.push(getGameName(appid));
        }

        const values = await Promise.all(tasks);

        for (let [succ, name, appid] of values) {
            if (!succ) {
                name = `【${name ?? '读取出错'}】`;
            }
            bbcodes.push(`[url=https://store.steampowered.com/app/${appid}/]${name}[/url]`);
            excels.push(`${name} https://store.steampowered.com/app/${appid}/`);
        }

        const game_list = appIDs.join(' | ');
        const game_bbcode = bbcodes.join('\n');
        const game_excel = excels.join('\r\n');
        const data = { tid, post_url, post_title, author_nick, author_uid, post_date, content, game_list, game_bbcode, game_excel };
        console.log(data);
        try {
            GM_setValue(tid, '抓取完成');
            await savePostData(data);
        }
        catch (error) {
            GM_setValue(tid, error);
        }
    }

    //显示是否已经抓取
    async function freshPostList() {
        const tidSet = await getPostIds();
        const postLists = treadList.querySelectorAll("th.common>a.xst,th.new>a.xst,th.lock>a.xst");
        for (let postTag of postLists) {
            const tid = grubTid(postTag.href);

            postTag.classList.remove('pd_not_added');
            postTag.classList.remove('pd_added');
            postTag.classList.remove('pd_done');

            if (tidSet.has(tid)) {
                postTag.classList.add('pd_added');
                postTag.title = '【已抓取】';
            } else {
                postTag.classList.add('pd_not_added');
                postTag.title = '【未抓取】';
            }
        }
    }

    //判断是否需要抓取
    function ifNeedGrub() {
        if (location.search.endsWith('utm=114514')) {
            return matchTid.test(location.href) >= 0;
        } else {
            return false;
        }
    }

    //提取tid
    function grubTid(url) {
        return matchTid.exec(url)?.[1] ?? url.match(matchTid);
    }

    //生成链接
    function genUrl(tid) {
        return `https://keylol.com/t${tid}-1-1`;
    }

    //-----------------------------------
    //检测后台连通性
    function testBackend() {
        return new Promise((resolve, reject) => {
            $http.get(`http://${host}:${port}/api/test`)
                .then((response) => {
                    resolve(response?.code === 666);
                })
                .catch((reason) => {
                    resolve(false);
                });
        });
    }
    //检测是否抓取完成
    function waitUnitlDone(tid) {
        return new Promise((resolve, reject) => {
            let t1, t2;

            t1 = setInterval(() => {
                const fin = GM_getValue(tid);
                if (fin) {
                    clearInterval(t1);
                    clearInterval(t2);
                    GM_deleteValue(tid);
                    resolve(fin);
                }
            }, 50);

            t2 = setTimeout(() => {
                clearInterval(t1);
                GM_deleteValue(tid);
                resolve('操作超时');
            }, 10000);
        });
    }
    //获取已抓取的帖子tid列表
    function getPostIds() {
        return new Promise((resolve, reject) => {
            $http.get(`http://${host}:${port}/api/posts/ids`)
                .then((response) => {
                    const tidSet = new Set();
                    if (response?.code !== 0) {
                        console.error(response?.msg ?? '消息为空');
                    } else {
                        const data = response?.data ?? [];
                        for (let o of data) {
                            tidSet.add(o);
                        }
                    }
                    resolve(tidSet);
                })
                .catch((reason) => {
                    reject(reason);
                });
        });
    }
    //上传抓取结果
    function savePostData(data) {
        return new Promise((resolve, reject) => {
            $http.post(`http://${host}:${port}/api/post`, JSON.stringify(data))
                .then((response) => {
                    console.log(response);
                    resolve(response?.code !== 0);
                })
                .catch((reason) => {
                    console.log(reason);
                    resolve(false);
                });
        });
    }
    //删除所有数据
    function deleteAllData() {
        return new Promise((resolve, reject) => {
            $http.delete(`http://${host}:${port}/api/posts`)
                .then((response) => {
                    console.log(response);
                    resolve(response?.code !== 0);
                })
                .catch((reason) => {
                    console.log(reason);
                    resolve(false);
                });
        });
    }
    //获取游戏名
    function getGameName(appid) {
        return new Promise((resolve, reject) => {
            $http.get(`https://store.steampowered.com/api/appdetails?appids=${appid}&l=schinese`)
                .then((response) => {
                    const { success, data } = response[appid];
                    resolve([success, data['name'], appid]);
                })
                .catch((reason) => {
                    console.log(reason);
                    resolve(false, reason, appid);
                });
        });
    }
}, 500);
//-----------------------------------
class Request {
    'use strict';
    constructor(timeout = 3000) {
        this.timeout = timeout;
    }
    get(url, opt = {}) {
        return this.#baseRequest(url, 'GET', opt, 'json');
    }
    getHtml(url, opt = {}) {
        return this.#baseRequest(url, 'GET', opt, '');
    }
    getText(url, opt = {}) {
        return this.#baseRequest(url, 'GET', opt, 'text');
    }
    post(url, data, opt = {}) {
        opt.data = data;
        opt.headers = {
            "Content-Type": "application/json"
        };
        return this.#baseRequest(url, 'POST', opt, 'json');
    }
    delete(url, opt = {}) {
        return this.#baseRequest(url, 'DELETE', opt, 'json');
    }
    #baseRequest(url, method = 'GET', opt = {}, responseType = 'json') {
        Object.assign(opt, {
            url, method, responseType, timeout: this.timeout
        });
        return new Promise((resolve, reject) => {
            opt.ontimeout = opt.onerror = reject;
            opt.onload = ({ readyState, status, response, responseXML, responseText }) => {
                if (readyState === 4 && status === 200) {
                    if (responseType == 'json') {
                        resolve(response);
                    } else if (responseType == 'text') {
                        resolve(responseText);
                    } else {
                        resolve(responseXML);
                    }
                } else {
                    console.error('网络错误');
                    console.log(readyState);
                    console.log(status);
                    console.log(response);
                    reject('解析出错');
                }
            };
            GM_xmlhttpRequest(opt);
        });
    }
}
const $http = new Request();

//CSS表
GM_addStyle(`
.pd_div {
    vertical-align: middle;
  }
  
  .pd_panel {
    background: rgba(58, 58, 58, 0.5);
    position: fixed;
    top: 50%;
    right: 0px;
    text-align: center;
    transform: translate(0px, -50%);
    z-index: 100;
    padding: 5px;
    border-radius: 5px 0 0 5px;
  }
  
  .pd_panel > *:not(:last-child) {
    margin-right: 5px;
  }
  
  .pd_panel > hr {
    margin: 5px 0 5px;
  }
  
  .pd_panel > span {
    color: #fff;
  }
  
  .pd_panel > iframe {
    width: 200px;
    height: 50px;
  }
  
  .pd_added::before {
    content: "✅";
  }
  
  .pd_not_added::before {
    content: "❌";
  }
  
  .pd_done::before {
    content: "🤔";
  }
  
  .pd_text {
    width: 90px;
    text-align: center;
  }
`);