HLS(m3u8) Ad Remover

Remove HLS.js based(m3u8) ad stream

// ==UserScript==
// @name         HLS(m3u8) Ad Remover
// @name:zh-CN   HLS(m3u8) 去广告
// @namespace    http://tampermonkey.net/
// @license      GNU AGPLv3
// @version      0.8.2
// @description  Remove HLS.js based(m3u8) ad stream
// @description:zh-cn   基于HLS.js(m3u8)播放器的去视频流内插广告插件,大部分视频网站都是基于这个库的,欢迎提交视频网址的匹配规则
// @author       douniwan6

// @include      /^https?://[^/]*xiaobaotv[^/]*/player.*/
// @include      /^https?://[^/]*xiaoheimi[^/]*/player.*/
// @include      /^https?://[^/]*xiaoxintv[^/]*/player.*/
// @match        http*://xbyy.app/player*

// @match        http*://danmu.yhdm666.top/player*
// @match        http*://danmu3.yhdm6go.top/player*
// @match        http*://www.yhdmz2.com/tpsf/player*
// @match        http*://player.mcue.cc/yinhua*

// @include      /^https?://[^/]*gimy[^/]*/jcplayer.*/

// @match        http*://www.haitu.tv/static/dmku/player/index.php*

// @match        http*://olevod1.com/addons/dp/player*

// @match        http*://api.tinga88.com/*
// @match        http*://www.kuaikanys.net/jiexi*
// @match        http*://w5cin.com/player*
// @match        http*://nnyy.in/*
// @match        http*://fstoo.com/vod/player.html*

// @icon         https://www.google.com/s2/favicons?sz=64&domain=greasyfork.org
// @supportURL   https://greasyfork.org/en/scripts/463326-hls-m3u8-ad-remover/feedback
// @grant        GM_xmlhttpRequest
// @connect      *
// @run-at       document-start
// ==/UserScript==

(function () {
    'use strict';
    /*global Hls*/

    // debugger

    const CACHE_BUSTER_PARAM = 'cb';
    const REQUEST_TIMEOUT = 5000;

    async function gmFetch(url) {
        return new Promise((resolve, reject) => {
            // Extra parameter may break signature
            // const cacheBustUrl = `${url}${url.includes('?') ? '&' : '?'}${CACHE_BUSTER_PARAM}=${Date.now()}`;
            const cacheBustUrl = url;

            GM_xmlhttpRequest({
                method: 'HEAD',
                url: cacheBustUrl,
                timeout: REQUEST_TIMEOUT,
                onload: (response) => {
                    // got 206 sometimes
                    if (response.status >= 200 && response.status < 300) {
                        resolve({
                            data: response.responseText,
                            headers: {
                                date: response.responseHeaders.match(/^date:(.*)/im)?.[1],
                                contentLength: response.responseHeaders.match(/^content-length:(.*)/im)?.[1],
                                server: response.responseHeaders.match(/^server:(.*)/im)?.[1],
                                age: response.responseHeaders.match(/^Age:(.*)/im)?.[1],
                                etag: response.responseHeaders.match(/^ETag:(.*)/im)?.[1],
                                lastModified: response.responseHeaders.match(/^Last-Modified:(.*)/im)?.[1]
                            }
                        });
                    } else {
                        reject(new Error(`Failed to fetch ${cacheBustUrl}: ${response.status}`));
                    }
                },
                ontimeout: () => reject(new Error('Timeout fetching clean manifest')),
                onerror: () => reject(new Error('Error fetching clean manifest'))
            });
        });
    }

    function levenshteinDistance(s, t) {
        // 判断输入是否为空
        if (s.length === 0) return t.length;
        if (t.length === 0) return s.length;

        // 初始化距离矩阵
        const distanceMatrix = Array(t.length + 1).fill(null).map(() => Array(s.length + 1).fill(null));

        // 初始化第一行和第一列
        for (let i = 0; i <= s.length; i += 1) {
            distanceMatrix[0][i] = i;
        }

        for (let j = 0; j <= t.length; j += 1) {
            distanceMatrix[j][0] = j;
        }

        // 计算距离矩阵
        for (let j = 1; j <= t.length; j += 1) {
            for (let i = 1; i <= s.length; i += 1) {
                const substitutionCost = s.charAt(i - 1) === t.charAt(j - 1) ? 0 : 1;
                distanceMatrix[j][i] = Math.min(
                    distanceMatrix[j][i - 1] + 1, // 插入操作
                    distanceMatrix[j - 1][i] + 1, // 删除操作
                    distanceMatrix[j - 1][i - 1] + substitutionCost // 替换操作
                );
            }
        }

        // 返回编辑距离
        return distanceMatrix[t.length][s.length];
    }

    function naturalDistance(str1, str2) {
        // Remove non-alphanumeric characters
        const alphaNumeric1 = str1.replace(/\W|_/g, "");
        const alphaNumeric2 = str2.replace(/\W|_/g, "");

        // Convert to integers in base 36 (handling empty strings as 0)
        const num1 = parseInt(alphaNumeric1, 36) || 0;
        const num2 = parseInt(alphaNumeric2, 36) || 0;

        // Calculate absolute difference
        const difference = Math.abs(num1 - num2);

        return difference;
    }

    function getMostFrequent(arr) {
        const HOUR = 3600000; // 1 hour in milliseconds
        const counter = new Map();
        let maxCount = 0;
        let mostFrequent;

        for (const item of arr) {
            // Find existing timestamp within 1 hour
            const similar = Array.from(counter.keys()).find(key =>
                                                            Math.abs(key - item) < HOUR
                                                           );
            const key = similar ?? item;

            const count = (counter.get(key) || 0) + 1;
            counter.set(key, count);
            if (count > maxCount) {
                maxCount = count;
                mostFrequent = key;
            }
        }
        return mostFrequent;
    }

    // const g_ads = GM_getValue("ads", {})

    async function recordChunk(url, chunk) {
        // debug
        if (false) {
            const ts_urls = chunk.match(/^.*[.]ts.*$/gm);
            if (!ts_urls) return;

            for (const ts_url of ts_urls) {
                const fullurl = URL.parse(ts_url, url);

                const resp = await gmFetch(fullurl);
                const ad = g_ads[resp.headers.etag] ?? { hits: 0, urls: [] };

                ad.hits += 1;
                ad.urls.push(url);
                ad.urls=ad.urls.slice(-10);
                ad.headers = resp.headers;

                g_ads[resp.headers.etag] = ad;
            }
            GM_setValue("ads", g_ads);
        }
    }

    const playlistCache = new Map();
    // special playlist post processing function
    async function process(url, playlist) {
        if (playlistCache.has(url)) {
            console.log("REMOVE Done using playlistCache");
            return playlistCache.get(url);
        }

        // const ts_count_threshold = 10;

        // ad stream usually surrounded by #EXT-X-DISCONTINUITY
        //         let adExp = new RegExp(`#EXT-X-DISCONTINUITY\n(?<ad>#EXTINF:.*\n.*\n){1,${ts_count_threshold}}#EXT-X-DISCONTINUITY`,'g');

        //         let around = new RegExp(`(?<before>(?:.*\n){0,6})(?<ads>${adExp.source})(?<after>(?:.*\n){0,6})`, adExp.flags);

        // Collect all chunks first
        const chunks = playlist.split("#EXT-X-DISCONTINUITY");
        const lastModifiedTimes = [];

        // Sample and fetch headers from all chunks
        for (const chunk of chunks) {
            const ts_urls = chunk.match(/^.*[.]ts.*$/gm);
            if (!ts_urls) continue;

            // Randomly select one ts_url from the chunk
            const ts_url = ts_urls[Math.floor(Math.random() * ts_urls.length)];
            const fullurl = URL.parse(ts_url, url);

            const resp = await gmFetch(fullurl);
            lastModifiedTimes.push(Date.parse(resp.headers.lastModified));
            console.log(fullurl.href);
            console.log("lastModified: ", resp.headers.lastModified);
        }

        const lastModifiedBase = getMostFrequent(lastModifiedTimes);
        console.log("lastModifiedBase: ", new Date(lastModifiedBase).toUTCString());

        let filtered_playlist = [];
        let avg_distance = 0, max_distance = 0, n = 0;
        let last_uri_path = '';

        next_chunk:
        for (const chunk of chunks) {
            const uri_path_it = chunk.matchAll(/^.*[.]ts(?=$|\?|#)/gm);
            const uri_path = uri_path_it.next().value;

            // skip none .ts chunk
            if (!uri_path) {
                filtered_playlist.push(chunk);
                continue;
            }

            const lastModified = lastModifiedTimes.shift();

            const extinfs = [...chunk.matchAll(/EXTINF:\s*([0-9.]+)\s*,/gm)].reduce(
                (extinfs, match) => {
                    extinfs.push(match[1]);
                    return extinfs;
                }, []);

            const duation = extinfs.reduce((accumulator, currentValue) => accumulator + parseFloat(currentValue), 0);
            if (duation >= 60) {
                console.log("SKIP CHUNK by duration: ", duation);
                filtered_playlist.push(chunk);
                continue;
            }

            // Need to analyze arguments
            if (new Set(extinfs).size === 1 /*all same time*/ && extinfs.reduce((accumulator, currentValue) => accumulator + parseFloat(currentValue), 0) < 10) {
                console.log("REMOVED CHUNK by extinfs: ", chunk);
                recordChunk(url, chunk);
                continue
            }



            // if (document.location.host.includes("xiao")) {
            if (Math.abs(lastModified - lastModifiedBase) > 3600 * 1000/* 1 hour*/) {
                console.log("REMOVED CHUNK by lastModified: ", new Date(lastModified).toUTCString(), chunk);
                recordChunk(url, chunk);
                continue
            }
            // }

            // if (chunk.includes("921c07e8bfad6789b64f007a85e475d1.ts")) {
            //     console.log("REMOVED CHUNK by suffix: ", chunk);
            //     continue;
            // }

            //             // #EXTINF:0.767432,
            //             const ad_extinfs = [...chunk.matchAll(/EXTINF:\s*(\d+\.\d+)\s*,/gm)].reduce(
            //                 (adts, match) =>{
            //                     adts.push(match[1]);
            //                     return adts;
            //                 },[]);


            //             const adts = [
            //                 ["9.175832","0.767432"],
            //                 ["9.175","0.767"],
            //                 ["8.208200"],
            //                 ["8.208"],
            //             ];

            //             for (const adt of adts){
            //                 if (adt.toString() === ad_extinfs.toString()) {
            //                     console.log("REMOVED CHUNK by extinf: ", adt.toString(), chunk);
            //                     continue;
            //                 }
            //             }

            const distance = levenshteinDistance(uri_path[0], last_uri_path);
            if (max_distance !== 0 && max_distance < 10 && distance > max_distance) {
                console.log("REMOVED CHUNK by distance: ", distance, chunk);
                recordChunk(url, chunk);
                continue;
            }

            last_uri_path = uri_path[0];

            for (const uri_path of uri_path_it) {
                const distance = levenshteinDistance(uri_path[0], last_uri_path);
                if (distance > max_distance) {
                    max_distance = distance;
                }

                avg_distance = (n * avg_distance + distance) / (n + 1);
                n += 1;
                console.log(distance, avg_distance, max_distance);
                last_uri_path = uri_path[0];
            }

            filtered_playlist.push(chunk);
        }

        // for (const match of playlist.matchAll(around)) {
        //     console.log(match.groups.before);
        //     console.log("*********************REMOVED*********************");
        //     console.log(match.groups.ads);
        //     let last_uri_path = '';
        //     for (const uri_path of match.groups.ads.matchAll(/^.*.ts$/gm) ){
        //         console.log(levenshteinDistance(uri_path[0], last_uri_path));
        //         last_uri_path=uri_path[0];
        //     }
        //     console.log("*********************REMOVED*********************");
        //     console.log(match.groups.after);
        //     console.log(match);
        // }

        //playlist = playlist.replace(adExp, "");
        playlist = filtered_playlist.join("#EXT-X-DISCONTINUITY");
        playlistCache.set(url, playlist);
        console.log("REMOVE Done");
        return playlist;
    }

    Object.defineProperty(unsafeWindow || window, 'Hls', {
        get() {
            return this.__TrueHls
        },
        set(value) {
            this.__TrueHls = value;
            console.log("HLS Ad Remover");

            class pLoader extends this.__TrueHls.DefaultConfig.loader {
                constructor(config) {
                    super(config);
                    var load = this.load.bind(this);
                    this.load = function (context, config, callbacks) {
                        if (context.type == 'manifest' || context.type == 'level') {
                            var onSuccess = callbacks.onSuccess;
                            callbacks.onSuccess = function (response, stats, context) {
                                console.log(response, stats, context);
                                process(response.url, response.data).then((data) => {
                                    response.data = data;
                                    onSuccess(response, stats, context);
                                });
                            };
                        }
                        load(context, config, callbacks);
                    };
                }
            }

            // console.log("this.__TrueHls.DefaultConfig.pLoader", this.__TrueHls.DefaultConfig.pLoader);
            this.__TrueHls.DefaultConfig.pLoader = pLoader;
            /**** debug */
            var loadSource = this.__TrueHls.prototype.loadSource;
            this.__TrueHls.prototype.loadSource = function (src) {
                // console.log("src:", src);
                loadSource.call(this, src);
            };
            /* debug ****/
            // console.log(this.__TrueHls.DefaultConfig.pLoader);
        }
    });
})();