Greasy Fork is available in English.

mxz_crawler

B站评论区原神玩家纯度检测

// ==UserScript==
// @name         mxz_crawler
// @namespace    www.cber.ltd
// @version      0.2.2
// @description  B站评论区原神玩家纯度检测
// @author       Tom
// @match        https://www.bilibili.com/video/*
// @match        https://t.bilibili.com/*
// @match        https://space.bilibili.com/*
// @match        https://www.bilibili.com/read/*
// @match        https://www.bilibili.com/opus/*
// @icon         https://static.hdslb.com/images/favicon.ico
// @connect      bilibili.com
// @connect      fastly.jsdelivr.net
// @connect      raw.githubusercontent.com
// @grant        GM_xmlhttpRequest
// @grant        GM_setValue
// @grant        GM_getValue
// @grant        GM_deleteValue
// @grant        GM_addValueChangeListener
// @license MIT
// @run-at document-end
// ==/UserScript==

// TODO: 如果已经有相同的人在不同的地方留言,应该直接给他加上level: DONE
// TODO: 如何解决验证的问题?调低每个人需要的动态数量? : DONE
// TODO: 取名!发帖!
// TODO: 翻页有bug,在一个评论区下翻页无法显示新的: DONE
// TODO: 加入缓存机制? 存在 localStorage 里? user-id: level, timestamp (设置30天): DONE
// TODO: 仙的tag?
// TODO: 在个人空间主页分析?更加详细的分析?在评论区粗略分析?
// TODO: 结合关注列表进行分析?看过的主播?


// TODO: 转发仙列表?
// TODO: 在菜单栏中增加一个选项让用户跳转到动态页面进行验证码输入

var thread_number = 0;
let mxz_tags = ["原神", "原宝", "崩坏", "星铁", "星穹铁道", "米哈游", "芙芙", "提瓦特", "旅行者", "派蒙", "稻妻", "枫丹", "蒙德", "璃月", "尘歌壶",
    "mhy", "绝区零", "散宝", "魈宝"];

let xianLists = [];  // 仙uid
let xianFavList = [];  // 反仙?uid
let wordLists = [];  // 仙关键词
const xian_word_weights = [3, 6, 9];

function filterWordList(original_list) {
    const filter_map = new Map([
        ["仙(家|庭|帝|友|丹)", "仙(庭|帝|友)"],
        ["镀金旅团", ""],
        ["(百分百|100%)参团", ""],
        ["地心游记", ""],
        ["小麦地", ""],
        ["舫", ""],
        ["米哈[^游基哟]", ""],
        ["@.{0,8}?(毁灭|虚无|爱莉希雅|纳西妲|QM|芝士是猫)", ""],
        ["枘凿六合", ""],
        ["硬核不媚", ""],
        ["尾气厂", ""],
        ["(?=.*米哈[^游])(?=.*(尾气|抄))", ""],
        ["赛博(以色列|犹太|贞操)", ""],
        ["散去吧", ""],
        ["不死孽物", ""],
        ["孽物不除", ""],
        ["巡猎不休", ""],
        ["我也玩.{0,10}?我也喜欢", ""],
        ["以此烈火", ""],
        ["斩无不断", ""],
        ["/(?=.*(海拉|点燃|火把|任天堂|任豚|王国|之泪|吸))(?=.*瘴)/", "/(?=.*(任天堂|任豚))(?=.*瘴)/"],
        ["先驱.{0,4}?(春|夏|秋|冬|梗|爱|派蒙|攻略|农|茶话)", ""],
        ["仙舟.{0,2}?(吃瓜|幼儿园)", ""],
        ["(不是|是不)好惹", ""],
        ["(门|🚪)(酱|🐖)", ""],
        ["悲.*?(铁道|崩铁|星铁).*?[五5]天", ""]
    ]);

    return original_list.reduce((result, item) => {
        if (filter_map.has(item)) {
            const value = filter_map.get(item);
            if (value === "") {
                // If the value is an empty string, skip this item
                return result;
            } else {
                // If the value is not empty, replace the item with the value
                result.push(value);
            }
        } else {
            // If the item is not in the filter_map, keep it as is
            result.push(item);
        }
        return result;
    }, []);
}


const urlSourceDic = {
    githubusercontent: "https://raw.githubusercontent.com/Darknights1750/XianLists/main/xianLists.json",
    jsdelivr: "https://fastly.jsdelivr.net/gh/Darknights1750/XianLists@main/xianLists.json"
}
const getXianListOnline = function () {
    return new Promise(resolve => {
        GM_xmlhttpRequest({
            method: "GET",
            url: urlSourceDic[GM_getValue("urlSource", "jsdelivr")],
            data: '',
            headers: {
                'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/104.0.0.0 Safari/537.36'
            },
            onload: res => {
                if (res.status === 200) {
                    resolve(JSON.parse(res.responseText));
                } else {
                    resolve(JSON.parse('{"xianList":[],"xianFavList":[],"wordLv1List":[],"wordLv2List":[],"wordLv3List":[]}'));
                }
            }
        });
    });
}

const fillLists = async function () {
    let json = await getXianListOnline();
    xianLists = [
        json.xianList,
        json.xianLv1List,
        json.xianLv2List,
        json.xianLv3List
    ];
    xianLists = xianLists.map(lst => new Set(lst));
    xianFavList = new Set(json.xianFavList);
    wordLists = [
        filterWordList(json.wordLv1List).map((item) => new RegExp(item)),
        filterWordList(json.wordLv2List).map((item) => new RegExp(item)),
        filterWordList(json.wordLv3List).map((item) => new RegExp(item))
    ];
    let xianLeakList = json.xianLeakList.map((item) => new RegExp(item));
    wordLists[2] = [...wordLists[2], ...xianLeakList];
    mxz_tags = mxz_tags.map((item) => new RegExp(item))
}

function xianListLevel(uid) {
    for (let i = xianLists.length - 1; i >= 0; i--) {
        if (xianLists[i].has(uid)) return i;
    }
    return -1;
}


// 从一个 object 中提取出所有含有汉字的字符串, 合并为一个String返回
function extractAndCombineStringsWithChineseFromObject(obj) {
    let strings = [];
    const chineseCharPattern = /[\u4e00-\u9fa5]/;

    function recurse(currentObj) {
        if (typeof currentObj === 'string' && chineseCharPattern.test(currentObj)) {
            strings.push(currentObj);
        } else if (typeof currentObj === 'object' && currentObj !== null) {
            for (let key in currentObj) {
                if (Object.hasOwn(currentObj, key)) {
                    recurse(currentObj[key]);
                }
            }
        }
    }

    recurse(obj);
    return strings.join('');
}

function getRandomUserAgent() {
    let userAgent = [
        "Mozilla/5.0 (Linux; Android 10; K) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Mobile Safari/537.36",
        "Mozilla/5.0 (iPhone; CPU iPhone OS 17_2 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) EdgiOS/121.0.2277.107 Version/17.0 Mobile/15E148 Safari/604.1\n",
        "Mozilla/5.0 (Linux; Android 10; K) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/121.0.0.0 Mobile Safari/537.36 EdgA/121.0.0.0",
        "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36",
        "Mozilla/5.0 (Linux; Android 10; K) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Mobile Safari/537.36",
        "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/121.0.0.0 Safari/537.36 Edg/121.0.0.0",
        "Mozilla/5.0 (iPhone; CPU iPhone OS 17_0 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.0 Mobile/15E148 Safari/604.1 OPX/2.1.0"
    ];
    let randomIndex = Math.floor(Math.random() * userAgent.length);
    return userAgent[randomIndex];
}

function getWeightByTime(ts) {
    let date1 = new Date(ts*1000);
    let date2 = new Date(Date.now())

    let year1 = date1.getFullYear();
    let month1 = date1.getMonth();

    let year2 = date2.getFullYear();
    let month2 = date2.getMonth();

    // 计算年份和月份的差值
    let month_diff = (year2 - year1) * 12 + (month2 - month1);

    // 每 4 个月,权重减少 0.1
    return Math.max(0, 1 - Math.floor(month_diff / 4) * 0.1)
}



// 计算关键词出现次数
function getKeywordCount(items) {
    const checkIfKeywordsInText = (text, keywords) => {
        for (let regex of keywords) {
            if (regex.test(text)) return 1;
        }
        return 0;
    }

    let count = 0;
    // 将 content 转换为字符串
    for (let item of items) {  // 每个 item 是一个动态
        let text = extractAndCombineStringsWithChineseFromObject(item);
        let pub_ts = item.modules.module_author.pub_ts;
        let time_weight = getWeightByTime(pub_ts);

        let word_weight = 0;
        for (let xian_level = 2; xian_level >= 0; xian_level--) {
            if (checkIfKeywordsInText(text, wordLists[xian_level])) {
                console.log(`存在仙关键词,等级为 ${xian_level+1}, 文字为 ${text}`);
                word_weight = xian_word_weights[xian_level];
                break;
            }
        }

        if (word_weight === 0) word_weight = checkIfKeywordsInText(text, mxz_tags);
        if (word_weight > 1) time_weight = 1;  // 如果有仙相关的关键词,不会随时间权重衰减

        count += time_weight * word_weight;
    }
    return count;
}

// 随机延迟函数
function randomDelay(minDelay, maxDelay) {
    return new Promise(resolve => setTimeout(resolve, Math.floor(Math.random() * (maxDelay - minDelay + 1)) + minDelay));
}

function get_uid(html) {
    let userId = null;

    // 尝试通过 dataset.userId 获取 userId
    try {
        userId = html.dataset.userId;
        if (userId) {
            return userId;
        }
    } catch (error) {}

    // 尝试通过 children[0].href 获取 userId
    try {
        userId = html.children[0].href.replace(/[^\d]/g, "");
        if (userId) {
            return userId;
        }
    } catch (error) {}

    // 尝试通过 window.location.href 获取 userId
    try {
        const match = window.location.href.match(/(?<=space\.bilibili\.com\/)\d+/);
        if (match) {
            userId = match[0];
            return userId;
        }
    } catch (error) {}

    return null;
}

function get_comment_list() {
    return Array.from(document.querySelectorAll(".user-name,.sub-user-name,.user"));
}

async function readCommentListHtml(current_thread_number) {
    console.log(`starting crawling...`);
    const is_new = document.getElementsByClassName('fixed-header').length !== 0;

    let comment_list = get_comment_list();
    console.log("comment_list.length = " + comment_list.length);
    if (comment_list.length !== 0) {
        for (let html of comment_list) {
            if (current_thread_number !== thread_number) break;
            await updateUserHtml(html, current_thread_number, true);
        }
        for (let html of comment_list) {
            if (current_thread_number !== thread_number) break;
            await updateUserHtml(html, current_thread_number, false);
        }
    }
}

// 分析 mxz 纯度
function analyze(count, total_count) {
    count = Math.floor(count);
    const tags = [
        "LV.0",
        "LV.1",
        "LV.2",
        "LV.3",
        "LV.4",
        "LV.5",
        "LV.6",
        "LV.7",
        "LV.8",
        "LV.9",
        "LV.10",
        "LV.11",
        "LV.12",
        "LV.13",
        "LV.14",
        "LV.15",
        "LV.16",
        "LV.17",
        "LV.18",
    ]

    let level = 0;
    if (count <= 4) return [count, tags[count]];
    if (count >= 6) level = 5;
    if (count >= 8) level = 6;
    if (count >= 10) level = 7;
    if (count >= 13) level = 8;
    if (count >= 16) level = 9;
    if (count >= 20) level = 10;
    if (count >= 25) level = 11;
    if (count >= 30) level = 12;
    if (count >= 40) level = 13;
    if (count >= 55) level = 14;
    if (count >= 70) level = 15;
    if (count >= 100) level = 16;
    if (count >= 150) level = 17;
    if (count >= 200) level = 18;
    return [level, tags[level]];
}

function getColorFromLevel(level) {
    if (level <= 4) return "rgb(84,93,101)";
    if (level <= 8) return "rgb(94,228,65)";
    if (level <= 12) return "rgb(28,71,209)";
    if (level <= 15) return "rgb(156,7,234)";
    return "rgb(243,137,6)";
}


function updateHtmlWithCount(html, count, total_count) {
    let [level, tag] = analyze(count, total_count);
    const applyStyles = (element, tag, level) => {
        element.innerHTML = tag;
        element.style.color = getColorFromLevel(level);
        element.style.fontWeight = '900';  // 设置字体加粗
        element.style.fontSize = '120%';   // 设置字体字号为原先的120%
    };

    let existingB = html.querySelector('b.analyze-result');
    if (existingB) {
        applyStyles(existingB, tag, level);
    } else {
        let newB = document.createElement('b');
        newB.className = 'analyze-result';
        applyStyles(newB, tag, level);
        html.appendChild(newB);
    }
}

async function updateUserHtml(html, current_thread_number, load_local_only) {
    let existingB = html.querySelector('b.analyze-result');
    if (existingB && existingB.classList.contains('analyze-done')) return;  // 如果这个元素已经被分析过了

    let uid = get_uid(html);
    if (!uid) return;
    const headers = {
        'authority': 'api.bilibili.com',
        'method': 'GET',
        'path': `/x/polymer/web-dynamic/v1/feed/space?offset=&host_mid=${uid}&timezone_offset=420&platform=web&features=itemOpusStyle,listOnlyfans,opusBigCover,onlyfansVote&web_location=333.999`,
        'scheme': 'https',
        'Accept': '*/*',
        'Accept-Encoding': 'gzip, deflate, br, zstd',
        'Accept-Language': 'en-US,en;q=0.9',
        'Origin': 'https://space.bilibili.com',
        'Priority': 'u=1, i',
        'Referer': `https://space.bilibili.com/${uid}/dynamic`,
        'Sec-Ch-Ua': '"Chromium";v="124", "Google Chrome";v="124", "Not-A.Brand";v="99"',
        'Sec-Ch-Ua-Mobile': '?0',
        'Sec-Ch-Ua-Platform': '"Windows"',
        'Sec-Fetch-Dest': 'empty',
        'Sec-Fetch-Mode': 'cors',
        'Sec-Fetch-Site': 'same-site',
        'User-Agent': getRandomUserAgent()
    };

    const localStorageKey = 'uidData';
    let uidDataMap = GM_getValue(localStorageKey, {}) || {};
    let currentData = uidDataMap[uid];
    let now = Date.now();
    let DAY30 = 30 * 24 * 60 * 60 * 1000;

    if (currentData && now - currentData.updated_timestamp < DAY30) {
        // 使用 Tampermonkey 存储的数据
        let count = currentData["count"];
        let total_count = currentData["total_count"]
        updateHtmlWithCount(html, count, total_count);
    } else if (!load_local_only) {
        let count = 0, offset = "", has_more = true, total_count = 0, last_ts = Math.floor(Date.now() / 1000);
        // 如果本身在仙list上,那么将会直接
        let xian_level = xianListLevel(uid);
        if (xian_level === 0) count = 100;
        if (xian_level === 1) count = 125;
        if (xian_level === 2) count = 150;
        if (xian_level === 3) count = 200;

        var minDelay = 600, maxDelay = 1400;
        while (has_more && total_count <= 12*12 && count <= 200 && current_thread_number === thread_number && getWeightByTime(last_ts) > 0) {
            const url = `https://api.bilibili.com/x/polymer/web-dynamic/v1/feed/space?offset=${offset}&host_mid=${uid}&platform=web&features=itemOpusStyle,listOnlyfans,opusBigCover,onlyfansVote&web_location=333.999`;
            await randomDelay(minDelay, maxDelay);
            await new Promise((resolve, reject) => {
                GM_xmlhttpRequest({
                    method: 'GET',
                    url: url,
                    headers: headers,
                    onload: function (res) {
                        if (res.status === 200) {
                            let data = JSON.parse(res.response);
                            if (data.code === 0) {
                                count += getKeywordCount(data.data.items, mxz_tags);
                                total_count += data.data.items.length;
                                has_more = data.data.has_more;
                                offset = data.data.offset;
                                if (data.data.items.length > 0) last_ts = data.data.items[data.data.items.length - 1].modules.module_author.pub_ts;

                                console.log(`uid = ${uid}, count = ${count}, total_count = ${total_count}, thread_number = ${current_thread_number}`);
                                updateHtmlWithCount(html, count, total_count);
                            } else {
                                console.log(`Request success with status 200, but code is ${data.code}, minDelay = ${minDelay}, maxDelay = ${maxDelay}`);
                                // minDelay *= 2;
                                // maxDelay *= 2;
                            }
                        } else {
                            console.log(`Request failed: ${res.status} ${res.statusText}`);
                        }
                        resolve(1);
                    },
                    onerror: function (error) {
                        console.error(error);
                        reject(error);
                    }
                });
            });
        }
        uidDataMap[uid] = { "updated_timestamp": now, "count": Math.floor(count), "total_count": total_count };
        GM_setValue(localStorageKey, uidDataMap);
    }
    existingB = html.querySelector('b.analyze-result');
    if (existingB) existingB.classList.add('analyze-done');  // 这个元素已经被分析过了
}

async function computeHash(str) {
    const buffer = new TextEncoder().encode(str);
    const hashBuffer = await crypto.subtle.digest('SHA-256', buffer);
    const hashArray = Array.from(new Uint8Array(hashBuffer));
    const hashHex = hashArray.map(b => b.toString(16).padStart(2, '0')).join('');
    return hashHex;
}

// main function
(async function () {
    let isTesting = false;

    // 当前脚本版本号,通过 GM_info 读取 @version
    const currentVersion = GM_info.script.version;
    const storedVersion = GM_getValue('scriptVersion', ''); // 获取之前存储的版本号
    if (isTesting || currentVersion !== storedVersion) {
        // 如果版本号不同,说明安装了新版本
        console.log('Deleting local data...');
        // 删除存储的数据
        GM_deleteValue('uidData');
        // 更新存储的版本号为当前版本号
        GM_setValue('scriptVersion', currentVersion);
    }

    await fillLists();
    let lastCommentListSize = 0;
    let counter = 0;
    let lastCommentListHash = "";

    setInterval(async () => {
        const commentList = get_comment_list();
        const currentSize = commentList.size;
        counter++;

        const extractUserIds = (commentList) => {
            return Array.from(commentList).map(div => get_uid(div));
        }

        const userIds = extractUserIds(commentList);
        const commentListString = JSON.stringify(userIds);
        const commentListHash = await computeHash(commentListString);

        // console.log(`hash = ${commentListHash}, lasthash = ${lastCommentListHash}`);

        if (currentSize !== lastCommentListSize || lastCommentListHash !== commentListHash) {
            lastCommentListSize = currentSize;
            thread_number++;
            readCommentListHtml(thread_number);
        }
        lastCommentListHash = commentListHash;
    }, 4000);
})();