Speech Recognition Polyfill Userscript

Get extremely fast, free, & accurate server-side multilingual Speech Recognition. Polyfills Web Speech API on any browser!

2026/03/03のページです。最新版はこちら

スクリプトをインストールするには、Tampermonkey, GreasemonkeyViolentmonkey のような拡張機能のインストールが必要です。

You will need to install an extension such as Tampermonkey to install this script.

スクリプトをインストールするには、TampermonkeyViolentmonkey のような拡張機能のインストールが必要です。

スクリプトをインストールするには、TampermonkeyUserscripts のような拡張機能のインストールが必要です。

このスクリプトをインストールするには、Tampermonkeyなどの拡張機能をインストールする必要があります。

このスクリプトをインストールするには、ユーザースクリプト管理ツールの拡張機能をインストールする必要があります。

(ユーザースクリプト管理ツールは設定済みなのでインストール!)

このスタイルをインストールするには、Stylusなどの拡張機能をインストールする必要があります。

このスタイルをインストールするには、Stylus などの拡張機能をインストールする必要があります。

このスタイルをインストールするには、Stylus tなどの拡張機能をインストールする必要があります。

このスタイルをインストールするには、ユーザースタイル管理用の拡張機能をインストールする必要があります。

このスタイルをインストールするには、ユーザースタイル管理用の拡張機能をインストールする必要があります。

このスタイルをインストールするには、ユーザースタイル管理用の拡張機能をインストールする必要があります。

(ユーザースタイル管理ツールは設定済みなのでインストール!)

このスクリプトの質問や評価の投稿はこちら通報はこちらへお寄せください
// ==UserScript==
// @name         Speech Recognition Polyfill Userscript
// @namespace    http://tampermonkey.net/
// @version      v1.0
// @description  Get extremely fast, free, & accurate server-side multilingual Speech Recognition. Polyfills Web Speech API on any browser!
// @author       apersongithub
// @match        *://*/*
// @icon         https://raw.githubusercontent.com/apersongithub/Speech-Recognition-Polyfill/refs/heads/main/extension/images/microphone.svg
// @grant        none
// @run-at       document-start
// @license MIT
// ==/UserScript==

(function () {
    'use strict';

    /**
     * Speech Recognition Polyfill Userscript
     *
     * This script provides a custom implementation of the standard Web Speech API
     * (SpeechRecognition) by communicating directly with Google's voice servers.
     * It is useful for environments where the native SpeechRecognition API is
     * unavailable, broken, or needs to be bypassed.
     *
     * Key improvements in this version:
     *
     * 1. Better Response Parsing: Correctly handles Google's proprietary server
     *    responses (stripping security prefixes and parsing structured data frames).
     *
     * 2. Organized Audio Sending (Queue): Audio chunks are queued and sent one
     *    by one. This prevents sending too many requests at the exact same time.
     *
     * 3. Error Tolerance: Minor network glitches when sending audio chunks won't
     *    immediately crash the entire transcription process.
     *
     * 4. Reliable Final Results: Hardened logic for determining when a user is
     *    finished speaking, ensuring we pick the most accurate text result.
     *
     * 5. Crash Prevention: Includes safety checks to prevent crashes if asynchronous
     *    network responses arrive after the microphone has already been turned off.
     *
     * 6. Fallback APIs: Automatically cycles through multiple backup API keys and
     *    Google service endpoints (like YouTube's voice search API) if the primary
     *    connection fails.
     */



    (function initGoogleSpeechPolyfill() {
        const DEV_MODE = false;

        const API_KEYS = [
            "AIzaSyBm7NubC-Swn1nt2nhYfxb58eCdmL2vCVU", // default
            "AIzaSyBU2xE_JHvB6wag3tMfhxXpg2Q_W8xnM-I", // backup 1
            "AIzaSyD6n9asBjvx1yBHfhFhfw_kpS9Faq0BZHM"  // backup 2
        ];

        const API_ENDPOINTS = [
            {
                url: "https://embeddedassistant-webchannel.googleapis.com/google.assistant.embedded.v1.EmbeddedAssistant/Assist/channel",
                referrer: "https://www.google.com/"
            },
            {
                url: "https://embeddedassistant-frontend-clients6.youtube.com/google.assistant.embedded.v1.EmbeddedAssistant/YTAssist/channel",
                referrer: "https://www.youtube.com/"
            }
        ];

        let currentEndpointIndex = 0;
        let currentKeyIndex = 0;

        const getBaseUrl = () => API_ENDPOINTS[currentEndpointIndex].url;
        const getFetchOpts = () => ({
            mode: "cors",
            credentials: "omit",
            referrer: API_ENDPOINTS[currentEndpointIndex].referrer
        });
        const getApiKey = () => API_KEYS[currentKeyIndex];

        let preSession = null;
        let preSessionPromise = null;

        function findApiKey() {
            if (window.location.hostname === "www.google.com" && window.location.pathname === "/") {
                for (const script of document.querySelectorAll("script")) {
                    const text = script.textContent || "";
                    const m = text.match(/"X-Goog-Api-Key"\s*:\s*"([^"]{33,})"/i);
                    if (m && m[1].startsWith("AIzaSyBm")) return m[1];
                }
            }
            return null;
        }

        const scrapedKey = findApiKey();
        if (scrapedKey) {
            const idx = API_KEYS.indexOf(scrapedKey);
            if (idx !== -1) API_KEYS.splice(idx, 1);
            API_KEYS.unshift(scrapedKey);
        }

        function findAuthUser() {
            for (const script of document.querySelectorAll("script")) {
                const text = script.textContent || "";
                const m = text.match(/"X-Goog-AuthUser"\s*:\s*(?:[^"\n]+)?"([^"]+)"/i);
                if (m) return m[1];
            }
            const m2 = document.documentElement.innerHTML.match(/"X-Goog-AuthUser"\s*:\s*(?:[^"\n]+)?"([^"]+)"/i);
            return m2 ? m2[1] : "0";
        }
        const AUTH_USER = findAuthUser();
        const CURRENT_YEAR = String(new Date().getFullYear());
        let browserValidation = null;

        const _origXhrSetHeader = XMLHttpRequest.prototype.setRequestHeader;
        XMLHttpRequest.prototype.setRequestHeader = function (h, v) {
            if (h.toLowerCase() === "x-browser-validation" && !browserValidation) browserValidation = v;
            return _origXhrSetHeader.apply(this, arguments);
        };

        if (!browserValidation) {
            const valMatch = document.documentElement.innerHTML.match(
                /x-browser-validation['":\s]+([A-Za-z0-9+/=]{20,44})/i
            );
            if (valMatch) browserValidation = valMatch[1];
        }

        function getHeaders() {
            return {
                accept: "*/*",
                "accept-language": "en-US,en;q=0.9",
                "content-type": "application/x-www-form-urlencoded",
                "x-browser-channel": "stable",
                "x-browser-copyright": `Copyright ${CURRENT_YEAR} Google LLC. All Rights reserved.`,
                "x-goog-authuser": AUTH_USER,
                ...(browserValidation ? { "x-browser-validation": browserValidation } : {}),
                "x-browser-year": CURRENT_YEAR
            };
        }

        async function createSession() {
            let attempts = 0;
            const maxAttempts = API_KEYS.length * API_ENDPOINTS.length;
            let lastError = null;

            while (attempts < maxAttempts) {
                const ridCounter = 62480 + Math.floor(Math.random() * 9000);
                const bindUrl =
                    `${getBaseUrl()}?VER=8&RID=${ridCounter}&CVER=22&X-HTTP-Session-Id=gsessionid` +
                    `&%24httpHeaders=x-goog-api-key%3A${getApiKey()}%0D%0A&zx=${Date.now()}&t=1`;

                try {
                    const bindRes = await fetch(bindUrl, {
                        ...getFetchOpts(),
                        method: "POST",
                        headers: getHeaders(),
                        body: "count=0"
                    });

                    if (bindRes.ok) {
                        const bindText = await bindRes.text();
                        const jsonLines = bindText
                            .split("\n")
                            .filter((line) => line.trim() && !/^\d+$/.test(line.trim()));
                        const jsonStr = jsonLines.join("\n");

                        let parsed;
                        try {
                            parsed = JSON.parse(jsonStr);
                        } catch {
                            parsed = JSON.parse("[" + jsonStr.replace(/\]\s*\[/g, "],[") + "]");
                        }

                        let sid = null;
                        (function findSid(arr) {
                            if (!Array.isArray(arr)) return;
                            for (const item of arr) {
                                if (Array.isArray(item)) {
                                    if (item[0] === "c" && typeof item[1] === "string") sid = item[1];
                                    findSid(item);
                                }
                            }
                        })(parsed);

                        const gsessionid = bindRes.headers.get("x-http-session-id") || null;
                        if (sid) {
                            return { sid, gsessionid, ridCounter: ridCounter + 1 };
                        }
                    } else {
                        lastError = new Error(`Bind failed with status ${bindRes.status}`);
                    }
                } catch (err) {
                    lastError = err;
                }

                // Move to next key/endpoint combination
                currentKeyIndex++;
                if (currentKeyIndex >= API_KEYS.length) {
                    currentKeyIndex = 0;
                    currentEndpointIndex = (currentEndpointIndex + 1) % API_ENDPOINTS.length;
                }
                attempts++;
            }

            throw lastError || new Error("No SID or bind failed after trying all backups");
        }

        function warmSession() {
            if (preSessionPromise) return preSessionPromise;
            preSessionPromise = createSession()
                .then((s) => {
                    preSession = s;
                    return s;
                })
                .catch(() => {
                    preSession = null;
                    preSessionPromise = null;
                    return null;
                });
            return preSessionPromise;
        }

        const BaseClass =
            typeof EventTarget !== "undefined"
                ? EventTarget
                : class {
                    constructor() {
                        this.listeners = {};
                    }
                    addEventListener(type, callback) {
                        if (!(type in this.listeners)) this.listeners[type] = [];
                        this.listeners[type].push(callback);
                    }
                    removeEventListener(type, callback) {
                        if (!(type in this.listeners)) return;
                        this.listeners[type] = this.listeners[type].filter((cb) => cb !== callback);
                    }
                    dispatchEvent(event) {
                        if (!(event.type in this.listeners)) return true;
                        this.listeners[event.type].forEach((cb) => cb.call(this, event));
                        return !event.defaultPrevented;
                    }
                };

        class GoogleWebchannelSpeechRecognition extends BaseClass {
            constructor() {
                super();

                // W3C properties
                this.continuous = false;
                this.interimResults = false;
                this.lang = "en-US";
                this.maxAlternatives = 1;
                this.serviceURI = "";
                this.grammars = new SpeechGrammarList();

                // Event handlers
                this.onaudiostart = null;
                this.onaudioend = null;
                this.onend = null;
                this.onerror = null;
                this.onnomatch = null;
                this.onresult = null;
                this.onsoundstart = null;
                this.onsoundend = null;
                this.onspeechstart = null;
                this.onspeechend = null;
                this.onstart = null;

                // Runtime state
                this._stream = null;
                this._audioCtx = null;
                this._processor = null;
                this._dummyAudio = null;
                this._processorConnected = false;

                this._aborting = false;
                this._cleanupCalled = false;
                this._switchingSession = false;
                this._abortController = null;

                this._bcDone = false;
                this._bcBuffer = "";

                this._latestHighStabilityTranscript = null;
                this._latestInterimTranscript = null;
                this._latestInterimStability = null;
                this._lastEmittedInterimTranscript = null;
                this._lastFinalTranscript = null;

                this._speechendFired = false;
                this._pendingFinal = false;
                this._finalizedThisUtterance = false;
                this._bestFinalCandidate = null;
                this._bestFinalStability = -1;

                this._finalResults = [];
                this._currentUtteranceId = 0;
                this._lastEmittedUtteranceId = -1;

                // Session IDs
                this._currentSid = null;
                this._currentGsessionid = null;
                this._currentRidCounter = 0;
                this._currentOfs = 1;

                // VAD
                this._vadSilenceFrames = 0;
                this._isVadSpeaking = false;

                // pre-session audio buffer (holds chunks captured before session is ready)
                this._preSessionBuffer = [];

                // chunk send queue
                this._sendQueue = [];
                this._sendingChunks = false;
                this._consecutiveChunkFailures = 0;
                this._maxConsecutiveChunkFailures = 6;

                // stale-session guards
                this._sessionGen = 0;
                this._activeBackchannelGen = 0;
                this._lastStartId = 0;

                // restart coalescing
                this._restartPromise = null;

                this._suppressEndOnce = false;
            }

            _dbg(...args) {
                if (!DEV_MODE) return;
                if (!GoogleWebchannelSpeechRecognition._forceLog) {
                    try {
                        const i = document.createElement('iframe');
                        i.style.display = 'none';
                        i.id = 'speech-polyfill-logger';
                        (document.head || document.documentElement).appendChild(i);
                        GoogleWebchannelSpeechRecognition._forceLog = i.contentWindow.console.log.bind(i.contentWindow.console);
                        // Do NOT remove the iframe, otherwise its console.log is destroyed.
                    } catch (e) {
                        const backupLog = console.log || console.info || console.debug;
                        GoogleWebchannelSpeechRecognition._forceLog = backupLog.bind(console);
                    }
                }
                try {
                    GoogleWebchannelSpeechRecognition._forceLog("[polyfill dbg]", ...args);
                } catch (e) {
                    try {
                        console.log("[polyfill dbg]", ...args);
                    } catch (e2) { }
                }
            }

            _dispatchEvent(name, eventObj) {
                const ev = eventObj || new Event(name);
                if (typeof this["on" + name] === "function") {
                    try {
                        this["on" + name](ev);
                    } catch (e) {
                        if (DEV_MODE) console.warn("[polyfill] on" + name + " handler error:", e);
                    }
                }
                try {
                    this.dispatchEvent(ev);
                } catch (e) {
                    if (DEV_MODE) console.warn("[polyfill] dispatchEvent error:", e);
                }
            }

            _norm(t) {
                return (t || "").replace(/\s+/g, " ").trim();
            }

            _stripXssiPrefix(text) {
                return text.replace(/^\)\]\}'\s*\n?/, "");
            }

            _readFrameFromBuffer() {
                this._bcBuffer = this._stripXssiPrefix(this._bcBuffer).replace(/^\s+/, "");
                if (!this._bcBuffer.length) return null;

                const nl = this._bcBuffer.indexOf("\n");
                if (nl === -1) return null;

                const lenStr = this._bcBuffer.slice(0, nl).trim();
                if (!/^\d+$/.test(lenStr)) {
                    this._bcBuffer = this._bcBuffer.slice(nl + 1);
                    return null;
                }

                const len = Number(lenStr);
                const start = nl + 1;
                const end = start + len;
                if (this._bcBuffer.length < end) return null;

                const payload = this._bcBuffer.slice(start, end);
                this._bcBuffer = this._bcBuffer.slice(end);
                return payload;
            }

            _extractFrameSignals(frameObj) {
                let lastSpeechResults = null;
                let sawEOU = false;
                let sawClose = false;
                let sawNoSpeech = false;

                const walk = (n) => {
                    if (n == null) return;

                    if (typeof n === "string") {
                        if (n === "close") sawClose = true;
                        if (n.includes("END_OF_UTTERANCE")) sawEOU = true;
                        return;
                    }

                    if (Array.isArray(n)) {
                        for (const x of n) walk(x);
                        return;
                    }

                    if (typeof n === "object") {
                        if (n.eventType === "END_OF_UTTERANCE") sawEOU = true;
                        if (n.noSpeech === true) sawNoSpeech = true;

                        if (Array.isArray(n.speechResults) && n.speechResults.length > 0) {
                            lastSpeechResults = n.speechResults;
                        }

                        for (const k of Object.keys(n)) {
                            if (k !== "speechResults" && k !== "transcript" && k !== "stability") {
                                walk(n[k]);
                            }
                        }
                    }
                };

                walk(frameObj);

                const STABILITY_THRESHOLD = 0.5;
                let highParts = [];
                let lowParts = [];
                let bestStability = null;

                if (lastSpeechResults) {
                    for (const sr of lastSpeechResults) {
                        if (sr.noSpeech === true) sawNoSpeech = true;
                        if (typeof sr.transcript === "string") {
                            const s = typeof sr.stability === "number" ? sr.stability : 0;
                            if (bestStability === null || s > bestStability) bestStability = s;
                            if (s < STABILITY_THRESHOLD) lowParts.push(sr.transcript);
                            else highParts.push(sr.transcript);
                        }
                    }
                }

                const highText = highParts.join(" ");
                const lowText = lowParts.join(" ");
                const fullText = (highText + (highText && lowText ? " " : "") + lowText).trim();

                return {
                    fullText: fullText || null,
                    highText: highText || null,
                    bestStability,
                    sawEOU,
                    sawClose,
                    sawNoSpeech
                };
            }

            async _consumeBackchannel(bcRes, gen, startId) {
                const reader = bcRes.body.getReader();
                const decoder = new TextDecoder();

                this._bcBuffer = "";

                while (!this._aborting) {
                    if (gen !== this._activeBackchannelGen) return;
                    if (startId !== this._lastStartId) return;

                    const { done, value } = await reader.read();
                    if (done) break;

                    if (gen !== this._activeBackchannelGen) return;
                    if (startId !== this._lastStartId) return;

                    this._bcBuffer += decoder.decode(value, { stream: true });

                    while (!this._aborting) {
                        if (gen !== this._activeBackchannelGen) return;
                        if (startId !== this._lastStartId) return;

                        const payload = this._readFrameFromBuffer();
                        if (payload == null) break;

                        let frameObj;
                        try {
                            frameObj = JSON.parse(payload);
                        } catch {
                            continue;
                        }

                        const {
                            fullText,
                            highText,
                            bestStability,
                            sawEOU,
                            sawClose,
                            sawNoSpeech
                        } = this._extractFrameSignals(frameObj);

                        const ignoreTextThisFrame = sawClose;

                        this._dbg("frame", {
                            gen, activeGen: this._activeBackchannelGen,
                            startId, activeStart: this._lastStartId,
                            sawEOU, sawClose, fullText, bestStability
                        });

                        if (sawNoSpeech) {
                            this._dispatchEvent("nomatch");
                            this._bcDone = true;
                            this._cleanup();
                            return;
                        }

                        if (fullText && !ignoreTextThisFrame) {
                            this._latestInterimTranscript = fullText;
                            if (highText) this._latestHighStabilityTranscript = highText;
                            if (bestStability !== null) this._latestInterimStability = bestStability;
                            this._considerFinalCandidate(fullText, bestStability);
                        }

                        if (sawEOU) {
                            this._pendingFinal = true;
                            if (!this._speechendFired) {
                                this._speechendFired = true;
                                this._dispatchEvent("speechend");
                            }

                            if (fullText && !ignoreTextThisFrame && this.interimResults && !this._finalizedThisUtterance) {
                                if (
                                    fullText !== this._lastEmittedInterimTranscript ||
                                    this._currentUtteranceId !== this._lastEmittedUtteranceId
                                ) {
                                    this._lastEmittedInterimTranscript = fullText;
                                    this._lastEmittedUtteranceId = this._currentUtteranceId;
                                    this._emitResult(fullText, bestStability ?? 0.01, false);
                                }
                            }
                        } else if (fullText && !ignoreTextThisFrame) {
                            if (this._pendingFinal) {
                                this._finalizeCurrentUtteranceOnce();
                            } else if (this.interimResults) {
                                if (
                                    fullText !== this._lastEmittedInterimTranscript ||
                                    this._currentUtteranceId !== this._lastEmittedUtteranceId
                                ) {
                                    this._lastEmittedInterimTranscript = fullText;
                                    this._lastEmittedUtteranceId = this._currentUtteranceId;
                                    this._emitResult(fullText, bestStability ?? 0.01, false);
                                }
                            }
                        }

                        if (sawClose) {
                            if (!this._finalizedThisUtterance) {
                                this._finalizeCurrentUtteranceOnce();
                            }
                            if (!this.continuous && this._finalizedThisUtterance) {
                                this._suppressEndOnce = true;
                            }
                            this._bcDone = true;

                            if (this.continuous && !this._aborting) {
                                await this._restartSession();
                            } else {
                                this._cleanup();
                            }
                            return;
                        }
                    }
                }

                if (this._pendingFinal || this._latestInterimTranscript) {
                    this._finalizeCurrentUtteranceOnce();
                }
            }

            _considerFinalCandidate(transcript, stability) {
                const t = this._norm(transcript);
                if (!t) return;

                const s = typeof stability === "number" ? stability : 0;
                const currentBestLen = this._bestFinalCandidate ? this._bestFinalCandidate.length : 0;

                if (
                    this._bestFinalCandidate == null ||
                    s > this._bestFinalStability ||
                    (s === this._bestFinalStability && t.length >= currentBestLen)
                ) {
                    this._bestFinalCandidate = t;
                    this._bestFinalStability = s;
                }
            }

            _finalizeCurrentUtteranceOnce() {
                if (this._finalizedThisUtterance) return;

                let finalText = this._bestFinalCandidate || this._norm(this._latestInterimTranscript);
                if (!finalText) return;

                const finalStability =
                    this._bestFinalStability >= 0 ? this._bestFinalStability : this._latestInterimStability ?? 0.99;

                if (finalText === this._lastFinalTranscript) {
                    this._finalizedThisUtterance = true;
                    return;
                }

                this._dbg("finalizeOnce", {
                    pending: this._pendingFinal,
                    finalized: this._finalizedThisUtterance,
                    best: this._bestFinalCandidate,
                    latest: this._latestInterimTranscript
                });

                this._emitResult(finalText, finalStability, true);
                this._lastFinalTranscript = finalText;
                this._finalizedThisUtterance = true;
                this._lastEmittedInterimTranscript = null;
                this._lastEmittedUtteranceId = -1;
            }

            async start() {
                if (this._stream && !this._aborting) throw new Error("Already started");

                this._lastStartId++;
                this._sessionGen++;
                this._activeBackchannelGen = this._sessionGen;
                this._dbg("start", { startId: this._lastStartId, sessionGen: this._sessionGen, continuous: this.continuous });

                this._aborting = false;
                this._cleanupCalled = false;
                this._switchingSession = false;
                this._bcDone = false;
                this._speechendFired = false;
                this._pendingFinal = false;
                this._finalizedThisUtterance = false;
                this._bestFinalCandidate = null;
                this._bestFinalStability = -1;

                this._latestInterimTranscript = null;
                this._latestInterimStability = null;
                this._lastEmittedInterimTranscript = null;
                this._lastFinalTranscript = null;
                this._finalResults = [];
                this._currentUtteranceId = 0;
                this._lastEmittedUtteranceId = -1;

                this._vadSilenceFrames = 0;
                this._isVadSpeaking = false;

                this._preSessionBuffer = [];
                this._sendQueue = [];
                this._sendingChunks = false;
                this._consecutiveChunkFailures = 0;

                this._abortController = new AbortController();

                try {
                    if (!navigator.mediaDevices || !navigator.mediaDevices.getUserMedia) {
                        throw new Error("getUserMedia not supported (requires HTTPS)");
                    }

                    this._stream = await navigator.mediaDevices.getUserMedia({ audio: true });
                    this._dispatchEvent("start");
                    this._dispatchEvent("audiostart");

                    await warmSession();

                    const AudioContext = window.AudioContext || window.webkitAudioContext;
                    if (!AudioContext) throw new Error("AudioContext not supported");
                    this._audioCtx = new AudioContext();

                    this._dummyAudio = new Audio();
                    this._dummyAudio.muted = true;
                    this._dummyAudio.srcObject = this._stream;
                    try {
                        const p = this._dummyAudio.play();
                        if (p?.catch) p.catch(() => { });
                    } catch { }

                    const source = this._audioCtx.createMediaStreamSource(this._stream);
                    const processor = this._audioCtx.createScriptProcessor(8192, 1, 1);
                    source.connect(processor);
                    processor.connect(this._audioCtx.destination);

                    if (this._audioCtx.state === "suspended") await this._audioCtx.resume();

                    this._processor = processor;
                    await this._setupSession(preSession);
                } catch (err) {
                    if (DEV_MODE) console.error("[polyfill] start error:", err);

                    if (err.name === "NotAllowedError") {
                        this._handleError("not-allowed", "NO_MICROPHONE_PERMISSION");
                    } else if (
                        err.name === "NotFoundError" ||
                        err.name === "NotReadableError" ||
                        err.name === "OverconstrainedError" ||
                        err.name === "SecurityError" ||
                        (err.message && (err.message.includes("getUserMedia") || err.message.includes("AudioContext")))
                    ) {
                        this._handleError("audio-capture", err.message);
                    } else {
                        this._handleError("network", err.message || "Unknown network error");
                    }
                }
            }

            async _setupSession(initialSession = null) {
                try {
                    let session = initialSession;
                    if (!session) {
                        if (preSessionPromise) await preSessionPromise;
                        session = preSession || (await createSession());
                    }
                    preSession = null;

                    const { sid, gsessionid } = session;
                    let { ridCounter } = session;

                    const backchannelUrl =
                        `${getBaseUrl()}?` +
                        (gsessionid ? `gsessionid=${gsessionid}&` : "") +
                        `VER=8&RID=rpc&SID=${sid}&AID=0&CI=0&TYPE=xmlhttp&zx=${Date.now()}&t=1`;

                    const myGen = ++this._sessionGen;
                    this._activeBackchannelGen = myGen;
                    const myStartId = this._lastStartId;

                    this._dbg("open backchannel", { myGen, myStartId, sid });

                    fetch(backchannelUrl, {
                        ...getFetchOpts(),
                        method: "GET",
                        headers: { ...getHeaders(), "content-type": undefined },
                        signal: this._abortController.signal
                    })
                        .then(async (bcRes) => {
                            if (myGen !== this._activeBackchannelGen) return;
                            if (myStartId !== this._lastStartId) return;
                            await this._consumeBackchannel(bcRes, myGen, myStartId);
                        })
                        .catch((e) => {
                            if (myGen !== this._activeBackchannelGen) return;
                            if (myStartId !== this._lastStartId) return;
                            if (e.name !== "AbortError") this._handleError("network", e.message);
                        });

                    const configRid = ridCounter++;
                    const assistConfig = {
                        config: {
                            dialogStateIn: { languageCode: this.lang },
                            deviceConfig: { deviceId: "example", deviceModelId: "example" },
                            audioInConfig: { encoding: "LINEAR16", sampleRateHertz: 16000 },
                            audioOutConfig: { encoding: "MP3", sampleRateHertz: 22050, volumePercentage: 0 },
                            requestType: 4
                        }
                    };

                    const configUrl =
                        `${getBaseUrl()}?VER=8` +
                        (gsessionid ? `&gsessionid=${gsessionid}` : "") +
                        `&SID=${sid}&RID=${configRid}&AID=0&zx=${Date.now()}&t=1`;

                    const configPayload = `count=1&ofs=0&req0___data__=${encodeURIComponent(
                        JSON.stringify(assistConfig)
                    )}`;

                    fetch(configUrl, { ...getFetchOpts(), method: "POST", headers: getHeaders(), body: configPayload });

                    this._currentSid = sid;
                    this._currentGsessionid = gsessionid;
                    this._currentRidCounter = ridCounter;
                    this._currentOfs = 1;

                    // Flush any audio captured before the session was ready
                    if (this._preSessionBuffer.length > 0) {
                        this._dbg("flushing pre-session buffer", { chunks: this._preSessionBuffer.length });
                        this._sendQueue.push(...this._preSessionBuffer);
                        this._preSessionBuffer = [];
                        if (!this._sendingChunks) this._drainChunkQueue();
                    }

                    if (!this._processorConnected) {
                        this._processorConnected = true;
                        const processorRef = this._processor;

                        this._processor.onaudioprocess = (e) => {
                            if (this._aborting || this._cleanupCalled || this._switchingSession) return;
                            if (!this._processor || this._processor !== processorRef) return;
                            if (!this._audioCtx || !this._stream) return;
                            if (this._bcDone) return;

                            const float32 = e.inputBuffer.getChannelData(0);

                            let sumSquares = 0;
                            for (let i = 0; i < float32.length; i++) sumSquares += float32[i] ** 2;
                            const rms = Math.sqrt(sumSquares / float32.length);

                            const isSpeech = rms >= 0.01;
                            if (isSpeech) {
                                this._vadSilenceFrames = 0;
                                this._isVadSpeaking = true;
                            } else {
                                this._vadSilenceFrames++;
                            }

                            // Keep sending a short tail of silence so server can endpoint/finalize.
                            // ~8192 samples per frame; at 48kHz that's ~170ms/frame.
                            const TRAILING_SILENCE_FRAMES = 12; // about ~2s tail max
                            const shouldSend =
                                isSpeech ||
                                (this._isVadSpeaking && this._vadSilenceFrames <= TRAILING_SILENCE_FRAMES);

                            if (!shouldSend) {
                                this._isVadSpeaking = false;
                                return;
                            }

                            if (!this._audioCtx) return;
                            const originalSampleRate = this._audioCtx.sampleRate;
                            if (!originalSampleRate) return;

                            const ratio = originalSampleRate / 16000;
                            const targetLength = Math.round(float32.length / ratio);
                            const int16 = new Int16Array(targetLength);

                            for (let i = 0; i < targetLength; i++) {
                                const srcIndex = Math.min(Math.floor(i * ratio), float32.length - 1);
                                int16[i] = Math.max(-1, Math.min(1, float32[srcIndex])) * 0x7fff;
                            }

                            const uint8 = new Uint8Array(int16.buffer);
                            let binary = "";
                            for (let i = 0; i < uint8.length; i += 8192) {
                                binary += String.fromCharCode(...uint8.subarray(i, i + 8192));
                            }
                            const b64 = btoa(binary);

                            this._enqueueChunk(b64);
                        };
                    }
                } catch (err) {
                    this._handleError("network", err.message);
                }
            }

            _enqueueChunk(audioBase64) {
                if (this._aborting || this._cleanupCalled || this._switchingSession) return;
                if (this._pendingFinal) return;

                // If the session isn't ready yet, buffer the audio for later
                if (!this._currentSid) {
                    this._preSessionBuffer.push(audioBase64);
                    this._dbg("buffered pre-session chunk", { buffered: this._preSessionBuffer.length });
                    return;
                }

                this._sendQueue.push(audioBase64);
                if (!this._sendingChunks) this._drainChunkQueue();
            }

            async _drainChunkQueue() {
                if (this._sendingChunks) return;
                this._sendingChunks = true;

                try {
                    while (this._sendQueue.length && !this._aborting && !this._cleanupCalled && !this._switchingSession) {
                        if (!this._currentSid || !this._abortController) break;

                        const audioBase64 = this._sendQueue.shift();

                        const chunkRid = this._currentRidCounter++;
                        const cSid = this._currentSid;
                        const cGsessionid = this._currentGsessionid;
                        const cOfs = this._currentOfs++;

                        const chunkUrl =
                            `${getBaseUrl()}?VER=8` +
                            (cGsessionid ? `&gsessionid=${cGsessionid}` : "") +
                            `&SID=${cSid}&RID=${chunkRid}&AID=0&zx=${Date.now()}&t=1`;

                        const chunkPayload = `count=1&ofs=${cOfs}&req0___data__=${encodeURIComponent(
                            JSON.stringify({ audioIn: audioBase64 })
                        )}`;

                        try {
                            const res = await fetch(chunkUrl, {
                                ...getFetchOpts(),
                                method: "POST",
                                headers: getHeaders(),
                                body: chunkPayload,
                                signal: this._abortController.signal
                            });

                            if (!res.ok) {
                                this._consecutiveChunkFailures++;
                                if (DEV_MODE) console.warn("[polyfill] chunk non-ok:", res.status);

                                if (this._consecutiveChunkFailures >= this._maxConsecutiveChunkFailures) {
                                    if (DEV_MODE) console.warn("[polyfill] too many chunk failures, soft-restarting session");
                                    await this._restartSession();
                                    this._consecutiveChunkFailures = 0;
                                }
                            } else {
                                this._consecutiveChunkFailures = 0;
                            }
                        } catch (err) {
                            if (err.name === "AbortError") break;

                            this._consecutiveChunkFailures++;
                            if (DEV_MODE) console.warn("[polyfill] chunk send error:", err.message);

                            if (this._consecutiveChunkFailures >= this._maxConsecutiveChunkFailures) {
                                if (DEV_MODE) console.warn("[polyfill] too many chunk exceptions, soft-restarting session");
                                await this._restartSession();
                                this._consecutiveChunkFailures = 0;
                            }
                        }
                    }
                } finally {
                    this._sendingChunks = false;
                }
            }

            async _restartSession(initialSession = null) {
                if (!this.continuous) return;
                if (this._aborting || this._cleanupCalled) return;
                if (this._restartPromise) return this._restartPromise;

                this._dbg("restart requested", {
                    switching: this._switchingSession,
                    hasRestartPromise: !!this._restartPromise,
                    bcDone: this._bcDone
                });

                this._restartPromise = (async () => {
                    if (this._abortController) this._abortController.abort();
                    this._abortController = new AbortController();
                    this._switchingSession = true;

                    this._bcDone = false;
                    this._speechendFired = false;
                    this._pendingFinal = false;
                    this._finalizedThisUtterance = false;
                    this._bestFinalCandidate = null;
                    this._bestFinalStability = -1;

                    this._lastEmittedInterimTranscript = null;
                    this._latestInterimTranscript = null;
                    this._latestInterimStability = null;
                    this._currentUtteranceId++;

                    this._preSessionBuffer = [];
                    this._sendQueue = [];
                    this._sendingChunks = false;
                    this._consecutiveChunkFailures = 0;

                    try {
                        let session = initialSession || preSession;
                        if (!session) session = await warmSession();
                        preSession = null;
                        preSessionPromise = null;
                        if (!session) throw new Error("Failed to warm session");

                        const { sid, gsessionid } = session;
                        let { ridCounter } = session;

                        const backchannelUrl =
                            `${getBaseUrl()}?` +
                            (gsessionid ? `gsessionid=${gsessionid}&` : "") +
                            `VER=8&RID=rpc&SID=${sid}&AID=0&CI=0&TYPE=xmlhttp&zx=${Date.now()}&t=1`;

                        const myGen = ++this._sessionGen;
                        this._activeBackchannelGen = myGen;
                        const myStartId = this._lastStartId;

                        this._dbg("open backchannel (restart)", { myGen, myStartId, sid });

                        fetch(backchannelUrl, {
                            ...getFetchOpts(),
                            method: "GET",
                            headers: { ...getHeaders(), "content-type": undefined },
                            signal: this._abortController.signal
                        })
                            .then(async (bcRes) => {
                                if (myGen !== this._activeBackchannelGen) return;
                                if (myStartId !== this._lastStartId) return;
                                await this._consumeBackchannel(bcRes, myGen, myStartId);
                            })
                            .catch((e) => {
                                if (myGen !== this._activeBackchannelGen) return;
                                if (myStartId !== this._lastStartId) return;
                                if (e.name !== "AbortError") this._handleError("network", e.message);
                            });

                        const configRid = ridCounter++;
                        const assistConfig = {
                            config: {
                                dialogStateIn: { languageCode: this.lang },
                                deviceConfig: { deviceId: "example", deviceModelId: "example" },
                                audioInConfig: { encoding: "LINEAR16", sampleRateHertz: 16000 },
                                audioOutConfig: { encoding: "MP3", sampleRateHertz: 22050, volumePercentage: 0 },
                                requestType: 4
                            }
                        };

                        const configUrl =
                            `${getBaseUrl()}?VER=8` +
                            (gsessionid ? `&gsessionid=${gsessionid}` : "") +
                            `&SID=${sid}&RID=${configRid}&AID=0&zx=${Date.now()}&t=1`;

                        const configPayload = `count=1&ofs=0&req0___data__=${encodeURIComponent(
                            JSON.stringify(assistConfig)
                        )}`;

                        fetch(configUrl, { ...getFetchOpts(), method: "POST", headers: getHeaders(), body: configPayload });

                        this._currentSid = sid;
                        this._currentGsessionid = gsessionid;
                        this._currentRidCounter = ridCounter;
                        this._currentOfs = 1;

                        this._switchingSession = false;
                    } catch (err) {
                        this._switchingSession = false;
                        this._handleError("network", err.message);
                    }
                })().finally(() => {
                    this._restartPromise = null;
                });

                return this._restartPromise;
            }

            stop() {
                if (this._aborting) return;
                this._aborting = true;

                if (this._pendingFinal) this._finalizeCurrentUtteranceOnce();
                else if (this._latestInterimTranscript && this._norm(this._latestInterimTranscript) !== this._lastFinalTranscript) {
                    this._considerFinalCandidate(this._latestInterimTranscript, this._latestInterimStability ?? 0.99);
                    this._finalizeCurrentUtteranceOnce();
                }

                if (this._abortController) this._abortController.abort();
                if (!this.continuous && (this._pendingFinal || this._latestInterimTranscript)) {
                    this._suppressEndOnce = true;
                }
                this._cleanup();
            }

            abort() {
                if (this._aborting) return;
                this._aborting = true;
                if (this._abortController) this._abortController.abort();
                this._cleanup();
            }

            _cleanup() {
                if (this._cleanupCalled) return;
                this._cleanupCalled = true;

                if (this._processor) {
                    try { this._processor.onaudioprocess = null; } catch { }
                    try { this._processor.disconnect(); } catch { }
                    this._processor = null;
                }

                if (this._dummyAudio) {
                    try { this._dummyAudio.pause(); } catch { }
                    this._dummyAudio.srcObject = null;
                    this._dummyAudio = null;
                }

                if (this._stream) {
                    this._stream.getTracks().forEach((t) => t.stop());
                    this._stream = null;
                }

                if (this._audioCtx && this._audioCtx.state !== "closed") {
                    try { this._audioCtx.close(); } catch { }
                }
                this._audioCtx = null;

                this._dispatchEvent("audioend");
                if (!this._suppressEndOnce) this._dispatchEvent("end");
                else this._suppressEndOnce = false;

                this._aborting = false;
                this._cleanupCalled = false;
                this._processorConnected = false;
                this._switchingSession = false;
                this._bcDone = false;

                this._speechendFired = false;
                this._pendingFinal = false;
                this._finalizedThisUtterance = false;
                this._bestFinalCandidate = null;
                this._bestFinalStability = -1;

                this._latestInterimTranscript = null;
                this._latestInterimStability = null;
                this._lastEmittedInterimTranscript = null;
                this._lastFinalTranscript = null;

                this._currentUtteranceId = 0;
                this._lastEmittedUtteranceId = -1;

                this._bcBuffer = "";

                this._preSessionBuffer = [];
                this._sendQueue = [];
                this._sendingChunks = false;
                this._consecutiveChunkFailures = 0;
            }

            _emitResult(transcript, stability, isFinal) {
                if (isFinal && transcript && transcript === this._lastFinalTranscript) return;

                this._dbg("emit", { transcript, isFinal, utt: this._currentUtteranceId });

                const alt = new SpeechRecognitionAlternative(transcript, stability ?? 0);
                const res = new SpeechRecognitionResult([alt], isFinal);

                const currentResults = [];
                for (let i = 0; i < this._finalResults.length; i++) currentResults.push(this._finalResults[i]);
                if (transcript) currentResults.push(res);

                const event = new SpeechRecognitionEvent("result", {
                    resultIndex: this._finalResults.length,
                    results: new SpeechRecognitionResultList(currentResults)
                });

                this._dispatchEvent("result", event);

                if (isFinal && transcript) {
                    this._finalResults.push(res);
                }
            }

            _handleError(errorType, message) {
                const ev = new SpeechRecognitionErrorEvent("error", { error: errorType, message });
                this._dispatchEvent("error", ev);
                this._cleanup();
            }
        }

        class SpeechRecognitionEvent extends Event {
            constructor(type, eventInitDict) {
                super(type, eventInitDict);
                this.resultIndex = eventInitDict?.resultIndex || 0;
                this.results = eventInitDict?.results || [];
                this.interpretation = eventInitDict?.interpretation || null;
                this.emma = eventInitDict?.emma || null;
            }
        }

        class SpeechRecognitionErrorEvent extends Event {
            constructor(type, eventInitDict) {
                super(type, eventInitDict);
                this.error = eventInitDict?.error || "unknown";
                this.message = eventInitDict?.message || "";
            }
        }

        class SpeechRecognitionAlternative {
            constructor(transcript, confidence) {
                this.transcript = transcript;
                this.confidence = confidence;
            }
        }

        class SpeechRecognitionResult {
            constructor(alternatives, isFinal) {
                this.isFinal = isFinal;
                this.length = alternatives.length;
                for (let i = 0; i < alternatives.length; i++) this[i] = alternatives[i];
            }
            item(index) {
                return this[index];
            }
        }

        class SpeechRecognitionResultList {
            constructor(results) {
                this.length = results.length;
                for (let i = 0; i < results.length; i++) this[i] = results[i];
            }
            item(index) {
                return this[index];
            }
        }

        class SpeechGrammar {
            constructor() {
                this.src = "";
                this.weight = 1;
            }
        }

        class SpeechGrammarList {
            constructor() {
                this.length = 0;
            }
            addFromURI() { }
            addFromUri() { }
            addFromString() { }
            item() {
                return null;
            }
        }

        const globals = {
            SpeechRecognition: GoogleWebchannelSpeechRecognition,
            webkitSpeechRecognition: GoogleWebchannelSpeechRecognition,
            SpeechRecognitionEvent,
            webkitSpeechRecognitionEvent: SpeechRecognitionEvent,
            SpeechRecognitionErrorEvent,
            webkitSpeechRecognitionErrorEvent: SpeechRecognitionErrorEvent,
            SpeechGrammar,
            webkitSpeechGrammar: SpeechGrammar,
            SpeechGrammarList,
            webkitSpeechGrammarList: SpeechGrammarList
        };

        for (const [key, val] of Object.entries(globals)) {
            try {
                if (Object.getOwnPropertyDescriptor(window, key)?.configurable) {
                    delete window[key];
                }
            } catch { }

            Object.defineProperty(window, key, {
                get() {
                    return val;
                },
                set() { },
                configurable: true,
                enumerable: true
            });
        }

        if (DEV_MODE) console.log("💉 Speech Recognition Polyfill has been injected! (DEV MODE IS ENABLED)");
    })();

})();