Speech Recognition Polyfill Userscript

Get extremely fast, free, & accurate server-side multilingual Speech Recognition. Polyfills Web Speech API on any browser!

2026-03-03 기준 버전입니다. 최신 버전을 확인하세요.

이 스크립트를 설치하려면 Tampermonkey, Greasemonkey 또는 Violentmonkey와 같은 확장 프로그램이 필요합니다.

이 스크립트를 설치하려면 Tampermonkey와 같은 확장 프로그램을 설치해야 합니다.

이 스크립트를 설치하려면 Tampermonkey 또는 Violentmonkey와 같은 확장 프로그램이 필요합니다.

이 스크립트를 설치하려면 Tampermonkey 또는 Userscripts와 같은 확장 프로그램이 필요합니다.

이 스크립트를 설치하려면 Tampermonkey와 같은 확장 프로그램이 필요합니다.

이 스크립트를 설치하려면 유저 스크립트 관리자 확장 프로그램이 필요합니다.

(이미 유저 스크립트 관리자가 설치되어 있습니다. 설치를 진행합니다!)

이 스타일을 설치하려면 Stylus와 같은 확장 프로그램이 필요합니다.

이 스타일을 설치하려면 Stylus와 같은 확장 프로그램이 필요합니다.

이 스타일을 설치하려면 Stylus와 같은 확장 프로그램이 필요합니다.

이 스타일을 설치하려면 유저 스타일 관리자 확장 프로그램이 필요합니다.

이 스타일을 설치하려면 유저 스타일 관리자 확장 프로그램이 필요합니다.

이 스타일을 설치하려면 유저 스타일 관리자 확장 프로그램이 필요합니다.

(이미 유저 스타일 관리자가 설치되어 있습니다. 설치를 진행합니다!)

// ==UserScript==
// @name         Speech Recognition Polyfill Userscript
// @namespace    http://tampermonkey.net/
// @version      v1.0
// @description  Get extremely fast, free, & accurate server-side multilingual Speech Recognition. Polyfills Web Speech API on any browser!
// @author       apersongithubhttps://greasyfork.org/en/scripts/568183-speech-recognition-polyfill-userscript/delete
// @match        *://*/*
// @icon         https://raw.githubusercontent.com/apersongithub/Speech-Recognition-Polyfill/refs/heads/main/extension/images/microphone.svg
// @grant        none
// @run-at       document-start
// @license MIT
// ==/UserScript==

(function() {
    'use strict';

/**
 * Google Webchannel SpeechRecognition Polyfill
 *
 * This script provides a custom implementation of the standard Web Speech API
 * (SpeechRecognition) by communicating directly with Google's voice servers.
 * It is useful for environments where the native SpeechRecognition API is
 * unavailable, broken, or needs to be bypassed.
 *
 * Key improvements in this version:
 *
 * 1. Better Response Parsing: Correctly handles Google's proprietary server
 *    responses (stripping security prefixes and parsing structured data frames).
 *
 * 2. Organized Audio Sending (Queue): Audio chunks are queued and sent one
 *    by one. This prevents sending too many requests at the exact same time.
 *
 * 3. Error Tolerance: Minor network glitches when sending audio chunks won't
 *    immediately crash the entire transcription process.
 *
 * 4. Reliable Final Results: Hardened logic for determining when a user is
 *    finished speaking, ensuring we pick the most accurate text result.
 *
 * 5. Crash Prevention: Includes safety checks to prevent crashes if asynchronous
 *    network responses arrive after the microphone has already been turned off.
 *
 * 6. Fallback APIs: Automatically cycles through multiple backup API keys and
 *    Google service endpoints (like YouTube's voice search API) if the primary
 *    connection fails.
 */



(function initGoogleSpeechPolyfill() {
    const DEV_MODE = false;

    const API_KEYS = [
        "AIzaSyBm7NubC-Swn1nt2nhYfxb58eCdmL2vCVU", // default
        "AIzaSyBU2xE_JHvB6wag3tMfhxXpg2Q_W8xnM-I", // backup 1
        "AIzaSyD6n9asBjvx1yBHfhFhfw_kpS9Faq0BZHM"  // backup 2
    ];

    const API_ENDPOINTS = [
        {
            url: "https://embeddedassistant-webchannel.googleapis.com/google.assistant.embedded.v1.EmbeddedAssistant/Assist/channel",
            referrer: "https://www.google.com/"
        },
        {
            url: "https://embeddedassistant-frontend-clients6.youtube.com/google.assistant.embedded.v1.EmbeddedAssistant/YTAssist/channel",
            referrer: "https://www.youtube.com/"
        }
    ];

    let currentEndpointIndex = 0;
    let currentKeyIndex = 0;

    const getBaseUrl = () => API_ENDPOINTS[currentEndpointIndex].url;
    const getFetchOpts = () => ({
        mode: "cors",
        credentials: "omit",
        referrer: API_ENDPOINTS[currentEndpointIndex].referrer
    });
    const getApiKey = () => API_KEYS[currentKeyIndex];

    let preSession = null;
    let preSessionPromise = null;

    function findApiKey() {
        if (window.location.hostname === "www.google.com" && window.location.pathname === "/") {
            for (const script of document.querySelectorAll("script")) {
                const text = script.textContent || "";
                const m = text.match(/"X-Goog-Api-Key"\s*:\s*"([^"]{33,})"/i);
                if (m && m[1].startsWith("AIzaSyBm")) return m[1];
            }
        }
        return null;
    }

    const scrapedKey = findApiKey();
    if (scrapedKey) {
        const idx = API_KEYS.indexOf(scrapedKey);
        if (idx !== -1) API_KEYS.splice(idx, 1);
        API_KEYS.unshift(scrapedKey);
    }

    function findAuthUser() {
        for (const script of document.querySelectorAll("script")) {
            const text = script.textContent || "";
            const m = text.match(/"X-Goog-AuthUser"\s*:\s*(?:[^"\n]+)?"([^"]+)"/i);
            if (m) return m[1];
        }
        const m2 = document.documentElement.innerHTML.match(/"X-Goog-AuthUser"\s*:\s*(?:[^"\n]+)?"([^"]+)"/i);
        return m2 ? m2[1] : "0";
    }
    const AUTH_USER = findAuthUser();
    const CURRENT_YEAR = String(new Date().getFullYear());
    let browserValidation = null;

    const _origXhrSetHeader = XMLHttpRequest.prototype.setRequestHeader;
    XMLHttpRequest.prototype.setRequestHeader = function (h, v) {
        if (h.toLowerCase() === "x-browser-validation" && !browserValidation) browserValidation = v;
        return _origXhrSetHeader.apply(this, arguments);
    };

    if (!browserValidation) {
        const valMatch = document.documentElement.innerHTML.match(
            /x-browser-validation['":\s]+([A-Za-z0-9+/=]{20,44})/i
        );
        if (valMatch) browserValidation = valMatch[1];
    }

    function getHeaders() {
        return {
            accept: "*/*",
            "accept-language": "en-US,en;q=0.9",
            "content-type": "application/x-www-form-urlencoded",
            "x-browser-channel": "stable",
            "x-browser-copyright": `Copyright ${CURRENT_YEAR} Google LLC. All Rights reserved.`,
            "x-goog-authuser": AUTH_USER,
            ...(browserValidation ? { "x-browser-validation": browserValidation } : {}),
            "x-browser-year": CURRENT_YEAR
        };
    }

    async function createSession() {
        let attempts = 0;
        const maxAttempts = API_KEYS.length * API_ENDPOINTS.length;
        let lastError = null;

        while (attempts < maxAttempts) {
            const ridCounter = 62480 + Math.floor(Math.random() * 9000);
            const bindUrl =
                `${getBaseUrl()}?VER=8&RID=${ridCounter}&CVER=22&X-HTTP-Session-Id=gsessionid` +
                `&%24httpHeaders=x-goog-api-key%3A${getApiKey()}%0D%0A&zx=${Date.now()}&t=1`;

            try {
                const bindRes = await fetch(bindUrl, {
                    ...getFetchOpts(),
                    method: "POST",
                    headers: getHeaders(),
                    body: "count=0"
                });

                if (bindRes.ok) {
                    const bindText = await bindRes.text();
                    const jsonLines = bindText
                        .split("\n")
                        .filter((line) => line.trim() && !/^\d+$/.test(line.trim()));
                    const jsonStr = jsonLines.join("\n");

                    let parsed;
                    try {
                        parsed = JSON.parse(jsonStr);
                    } catch {
                        parsed = JSON.parse("[" + jsonStr.replace(/\]\s*\[/g, "],[") + "]");
                    }

                    let sid = null;
                    (function findSid(arr) {
                        if (!Array.isArray(arr)) return;
                        for (const item of arr) {
                            if (Array.isArray(item)) {
                                if (item[0] === "c" && typeof item[1] === "string") sid = item[1];
                                findSid(item);
                            }
                        }
                    })(parsed);

                    const gsessionid = bindRes.headers.get("x-http-session-id") || null;
                    if (sid) {
                        return { sid, gsessionid, ridCounter: ridCounter + 1 };
                    }
                } else {
                    lastError = new Error(`Bind failed with status ${bindRes.status}`);
                }
            } catch (err) {
                lastError = err;
            }

            // Move to next key/endpoint combination
            currentKeyIndex++;
            if (currentKeyIndex >= API_KEYS.length) {
                currentKeyIndex = 0;
                currentEndpointIndex = (currentEndpointIndex + 1) % API_ENDPOINTS.length;
            }
            attempts++;
        }

        throw lastError || new Error("No SID or bind failed after trying all backups");
    }

    function warmSession() {
        if (preSessionPromise) return preSessionPromise;
        preSessionPromise = createSession()
            .then((s) => {
                preSession = s;
                return s;
            })
            .catch(() => {
                preSession = null;
                preSessionPromise = null;
                return null;
            });
        return preSessionPromise;
    }

    const BaseClass =
        typeof EventTarget !== "undefined"
            ? EventTarget
            : class {
                constructor() {
                    this.listeners = {};
                }
                addEventListener(type, callback) {
                    if (!(type in this.listeners)) this.listeners[type] = [];
                    this.listeners[type].push(callback);
                }
                removeEventListener(type, callback) {
                    if (!(type in this.listeners)) return;
                    this.listeners[type] = this.listeners[type].filter((cb) => cb !== callback);
                }
                dispatchEvent(event) {
                    if (!(event.type in this.listeners)) return true;
                    this.listeners[event.type].forEach((cb) => cb.call(this, event));
                    return !event.defaultPrevented;
                }
            };

    class GoogleWebchannelSpeechRecognition extends BaseClass {
        constructor() {
            super();

            // W3C properties
            this.continuous = false;
            this.interimResults = false;
            this.lang = "en-US";
            this.maxAlternatives = 1;
            this.serviceURI = "";
            this.grammars = new SpeechGrammarList();

            // Event handlers
            this.onaudiostart = null;
            this.onaudioend = null;
            this.onend = null;
            this.onerror = null;
            this.onnomatch = null;
            this.onresult = null;
            this.onsoundstart = null;
            this.onsoundend = null;
            this.onspeechstart = null;
            this.onspeechend = null;
            this.onstart = null;

            // Runtime state
            this._stream = null;
            this._audioCtx = null;
            this._processor = null;
            this._dummyAudio = null;
            this._processorConnected = false;

            this._aborting = false;
            this._cleanupCalled = false;
            this._switchingSession = false;
            this._abortController = null;

            this._bcDone = false;
            this._bcBuffer = "";

            this._latestHighStabilityTranscript = null;
            this._latestInterimTranscript = null;
            this._latestInterimStability = null;
            this._lastEmittedInterimTranscript = null;
            this._lastFinalTranscript = null;

            this._speechendFired = false;
            this._pendingFinal = false;
            this._finalizedThisUtterance = false;
            this._bestFinalCandidate = null;
            this._bestFinalStability = -1;

            this._finalResults = [];
            this._currentUtteranceId = 0;
            this._lastEmittedUtteranceId = -1;

            // Session IDs
            this._currentSid = null;
            this._currentGsessionid = null;
            this._currentRidCounter = 0;
            this._currentOfs = 1;

            // VAD
            this._vadSilenceFrames = 0;
            this._isVadSpeaking = false;

            // chunk send queue
            this._sendQueue = [];
            this._sendingChunks = false;
            this._consecutiveChunkFailures = 0;
            this._maxConsecutiveChunkFailures = 6;

            // stale-session guards
            this._sessionGen = 0;
            this._activeBackchannelGen = 0;
            this._lastStartId = 0;

            // restart coalescing
            this._restartPromise = null;

            this._suppressEndOnce = false;
        }

        _dbg(...args) {
            if (!DEV_MODE) return;
            if (!GoogleWebchannelSpeechRecognition._forceLog) {
                try {
                    const i = document.createElement('iframe');
                    i.style.display = 'none';
                    i.id = 'speech-polyfill-logger';
                    (document.head || document.documentElement).appendChild(i);
                    GoogleWebchannelSpeechRecognition._forceLog = i.contentWindow.console.log.bind(i.contentWindow.console);
                    // Do NOT remove the iframe, otherwise its console.log is destroyed.
                } catch (e) {
                    const backupLog = console.log || console.info || console.debug;
                    GoogleWebchannelSpeechRecognition._forceLog = backupLog.bind(console);
                }
            }
            try {
                GoogleWebchannelSpeechRecognition._forceLog("[polyfill dbg]", ...args);
            } catch (e) {
                try {
                    console.log("[polyfill dbg]", ...args);
                } catch (e2) { }
            }
        }

        _dispatchEvent(name, eventObj) {
            const ev = eventObj || new Event(name);
            if (typeof this["on" + name] === "function") {
                try {
                    this["on" + name](ev);
                } catch (e) {
                    if (DEV_MODE) console.warn("[polyfill] on" + name + " handler error:", e);
                }
            }
            try {
                this.dispatchEvent(ev);
            } catch (e) {
                if (DEV_MODE) console.warn("[polyfill] dispatchEvent error:", e);
            }
        }

        _norm(t) {
            return (t || "").replace(/\s+/g, " ").trim();
        }

        _stripXssiPrefix(text) {
            return text.replace(/^\)\]\}'\s*\n?/, "");
        }

        _readFrameFromBuffer() {
            this._bcBuffer = this._stripXssiPrefix(this._bcBuffer).replace(/^\s+/, "");
            if (!this._bcBuffer.length) return null;

            const nl = this._bcBuffer.indexOf("\n");
            if (nl === -1) return null;

            const lenStr = this._bcBuffer.slice(0, nl).trim();
            if (!/^\d+$/.test(lenStr)) {
                this._bcBuffer = this._bcBuffer.slice(nl + 1);
                return null;
            }

            const len = Number(lenStr);
            const start = nl + 1;
            const end = start + len;
            if (this._bcBuffer.length < end) return null;

            const payload = this._bcBuffer.slice(start, end);
            this._bcBuffer = this._bcBuffer.slice(end);
            return payload;
        }

        _extractFrameSignals(frameObj) {
            let lastSpeechResults = null;
            let sawEOU = false;
            let sawClose = false;
            let sawNoSpeech = false;

            const walk = (n) => {
                if (n == null) return;

                if (typeof n === "string") {
                    if (n === "close") sawClose = true;
                    if (n.includes("END_OF_UTTERANCE")) sawEOU = true;
                    return;
                }

                if (Array.isArray(n)) {
                    for (const x of n) walk(x);
                    return;
                }

                if (typeof n === "object") {
                    if (n.eventType === "END_OF_UTTERANCE") sawEOU = true;
                    if (n.noSpeech === true) sawNoSpeech = true;

                    if (Array.isArray(n.speechResults) && n.speechResults.length > 0) {
                        lastSpeechResults = n.speechResults;
                    }

                    for (const k of Object.keys(n)) {
                        if (k !== "speechResults" && k !== "transcript" && k !== "stability") {
                            walk(n[k]);
                        }
                    }
                }
            };

            walk(frameObj);

            const STABILITY_THRESHOLD = 0.5;
            let highParts = [];
            let lowParts = [];
            let bestStability = null;

            if (lastSpeechResults) {
                for (const sr of lastSpeechResults) {
                    if (sr.noSpeech === true) sawNoSpeech = true;
                    if (typeof sr.transcript === "string") {
                        const s = typeof sr.stability === "number" ? sr.stability : 0;
                        if (bestStability === null || s > bestStability) bestStability = s;
                        if (s < STABILITY_THRESHOLD) lowParts.push(sr.transcript);
                        else highParts.push(sr.transcript);
                    }
                }
            }

            const highText = highParts.join(" ");
            const lowText = lowParts.join(" ");
            const fullText = (highText + (highText && lowText ? " " : "") + lowText).trim();

            return {
                fullText: fullText || null,
                highText: highText || null,
                bestStability,
                sawEOU,
                sawClose,
                sawNoSpeech
            };
        }

        async _consumeBackchannel(bcRes, gen, startId) {
            const reader = bcRes.body.getReader();
            const decoder = new TextDecoder();

            this._bcBuffer = "";

            while (!this._aborting) {
                if (gen !== this._activeBackchannelGen) return;
                if (startId !== this._lastStartId) return;

                const { done, value } = await reader.read();
                if (done) break;

                if (gen !== this._activeBackchannelGen) return;
                if (startId !== this._lastStartId) return;

                this._bcBuffer += decoder.decode(value, { stream: true });

                while (!this._aborting) {
                    if (gen !== this._activeBackchannelGen) return;
                    if (startId !== this._lastStartId) return;

                    const payload = this._readFrameFromBuffer();
                    if (payload == null) break;

                    let frameObj;
                    try {
                        frameObj = JSON.parse(payload);
                    } catch {
                        continue;
                    }

                    const {
                        fullText,
                        highText,
                        bestStability,
                        sawEOU,
                        sawClose,
                        sawNoSpeech
                    } = this._extractFrameSignals(frameObj);

                    const ignoreTextThisFrame = sawClose;

                    this._dbg("frame", {
                        gen, activeGen: this._activeBackchannelGen,
                        startId, activeStart: this._lastStartId,
                        sawEOU, sawClose, fullText, bestStability
                    });

                    if (sawNoSpeech) {
                        this._dispatchEvent("nomatch");
                        this._bcDone = true;
                        this._cleanup();
                        return;
                    }

                    if (fullText && !ignoreTextThisFrame) {
                        this._latestInterimTranscript = fullText;
                        if (highText) this._latestHighStabilityTranscript = highText;
                        if (bestStability !== null) this._latestInterimStability = bestStability;
                        this._considerFinalCandidate(fullText, bestStability);
                    }

                    if (sawEOU) {
                        this._pendingFinal = true;
                        if (!this._speechendFired) {
                            this._speechendFired = true;
                            this._dispatchEvent("speechend");
                        }

                        if (fullText && !ignoreTextThisFrame && this.interimResults && !this._finalizedThisUtterance) {
                            if (
                                fullText !== this._lastEmittedInterimTranscript ||
                                this._currentUtteranceId !== this._lastEmittedUtteranceId
                            ) {
                                this._lastEmittedInterimTranscript = fullText;
                                this._lastEmittedUtteranceId = this._currentUtteranceId;
                                this._emitResult(fullText, bestStability ?? 0.01, false);
                            }
                        }
                    } else if (fullText && !ignoreTextThisFrame) {
                        if (this._pendingFinal) {
                            this._finalizeCurrentUtteranceOnce();
                        } else if (this.interimResults) {
                            if (
                                fullText !== this._lastEmittedInterimTranscript ||
                                this._currentUtteranceId !== this._lastEmittedUtteranceId
                            ) {
                                this._lastEmittedInterimTranscript = fullText;
                                this._lastEmittedUtteranceId = this._currentUtteranceId;
                                this._emitResult(fullText, bestStability ?? 0.01, false);
                            }
                        }
                    }

                    if (sawClose) {
                        if (!this._finalizedThisUtterance) {
                            this._finalizeCurrentUtteranceOnce();
                        }
                        if (!this.continuous && this._finalizedThisUtterance) {
                            this._suppressEndOnce = true;
                        }
                        this._bcDone = true;

                        if (this.continuous && !this._aborting) {
                            await this._restartSession();
                        } else {
                            this._cleanup();
                        }
                        return;
                    }
                }
            }

            if (this._pendingFinal || this._latestInterimTranscript) {
                this._finalizeCurrentUtteranceOnce();
            }
        }

        _considerFinalCandidate(transcript, stability) {
            const t = this._norm(transcript);
            if (!t) return;

            const s = typeof stability === "number" ? stability : 0;
            const currentBestLen = this._bestFinalCandidate ? this._bestFinalCandidate.length : 0;

            if (
                this._bestFinalCandidate == null ||
                s > this._bestFinalStability ||
                (s === this._bestFinalStability && t.length >= currentBestLen)
            ) {
                this._bestFinalCandidate = t;
                this._bestFinalStability = s;
            }
        }

        _finalizeCurrentUtteranceOnce() {
            if (this._finalizedThisUtterance) return;

            let finalText = this._bestFinalCandidate || this._norm(this._latestInterimTranscript);
            if (!finalText) return;

            const finalStability =
                this._bestFinalStability >= 0 ? this._bestFinalStability : this._latestInterimStability ?? 0.99;

            if (finalText === this._lastFinalTranscript) {
                this._finalizedThisUtterance = true;
                return;
            }

            this._dbg("finalizeOnce", {
                pending: this._pendingFinal,
                finalized: this._finalizedThisUtterance,
                best: this._bestFinalCandidate,
                latest: this._latestInterimTranscript
            });

            this._emitResult(finalText, finalStability, true);
            this._lastFinalTranscript = finalText;
            this._finalizedThisUtterance = true;
            this._lastEmittedInterimTranscript = null;
            this._lastEmittedUtteranceId = -1;
        }

        async start() {
            if (this._stream && !this._aborting) throw new Error("Already started");

            this._lastStartId++;
            this._sessionGen++;
            this._activeBackchannelGen = this._sessionGen;
            this._dbg("start", { startId: this._lastStartId, sessionGen: this._sessionGen, continuous: this.continuous });

            this._aborting = false;
            this._cleanupCalled = false;
            this._switchingSession = false;
            this._bcDone = false;
            this._speechendFired = false;
            this._pendingFinal = false;
            this._finalizedThisUtterance = false;
            this._bestFinalCandidate = null;
            this._bestFinalStability = -1;

            this._latestInterimTranscript = null;
            this._latestInterimStability = null;
            this._lastEmittedInterimTranscript = null;
            this._lastFinalTranscript = null;
            this._finalResults = [];
            this._currentUtteranceId = 0;
            this._lastEmittedUtteranceId = -1;

            this._vadSilenceFrames = 0;
            this._isVadSpeaking = false;

            this._sendQueue = [];
            this._sendingChunks = false;
            this._consecutiveChunkFailures = 0;

            this._abortController = new AbortController();

            try {
                if (!navigator.mediaDevices || !navigator.mediaDevices.getUserMedia) {
                    throw new Error("getUserMedia not supported (requires HTTPS)");
                }

                this._stream = await navigator.mediaDevices.getUserMedia({ audio: true });
                this._dispatchEvent("start");
                this._dispatchEvent("audiostart");

                await warmSession();

                const AudioContext = window.AudioContext || window.webkitAudioContext;
                if (!AudioContext) throw new Error("AudioContext not supported");
                this._audioCtx = new AudioContext();

                this._dummyAudio = new Audio();
                this._dummyAudio.muted = true;
                this._dummyAudio.srcObject = this._stream;
                try {
                    const p = this._dummyAudio.play();
                    if (p?.catch) p.catch(() => { });
                } catch { }

                const source = this._audioCtx.createMediaStreamSource(this._stream);
                const processor = this._audioCtx.createScriptProcessor(8192, 1, 1);
                source.connect(processor);
                processor.connect(this._audioCtx.destination);

                if (this._audioCtx.state === "suspended") await this._audioCtx.resume();

                this._processor = processor;
                await this._setupSession(preSession);
            } catch (err) {
                if (DEV_MODE) console.error("[polyfill] start error:", err);

                if (err.name === "NotAllowedError") {
                    this._handleError("not-allowed", "NO_MICROPHONE_PERMISSION");
                } else if (
                    err.name === "NotFoundError" ||
                    err.name === "NotReadableError" ||
                    err.name === "OverconstrainedError" ||
                    err.name === "SecurityError" ||
                    (err.message && (err.message.includes("getUserMedia") || err.message.includes("AudioContext")))
                ) {
                    this._handleError("audio-capture", err.message);
                } else {
                    this._handleError("network", err.message || "Unknown network error");
                }
            }
        }

        async _setupSession(initialSession = null) {
            try {
                let session = initialSession;
                if (!session) {
                    if (preSessionPromise) await preSessionPromise;
                    session = preSession || (await createSession());
                }
                preSession = null;

                const { sid, gsessionid } = session;
                let { ridCounter } = session;

                const backchannelUrl =
                    `${getBaseUrl()}?` +
                    (gsessionid ? `gsessionid=${gsessionid}&` : "") +
                    `VER=8&RID=rpc&SID=${sid}&AID=0&CI=0&TYPE=xmlhttp&zx=${Date.now()}&t=1`;

                const myGen = ++this._sessionGen;
                this._activeBackchannelGen = myGen;
                const myStartId = this._lastStartId;

                this._dbg("open backchannel", { myGen, myStartId, sid });

                fetch(backchannelUrl, {
                    ...getFetchOpts(),
                    method: "GET",
                    headers: { ...getHeaders(), "content-type": undefined },
                    signal: this._abortController.signal
                })
                    .then(async (bcRes) => {
                        if (myGen !== this._activeBackchannelGen) return;
                        if (myStartId !== this._lastStartId) return;
                        await this._consumeBackchannel(bcRes, myGen, myStartId);
                    })
                    .catch((e) => {
                        if (myGen !== this._activeBackchannelGen) return;
                        if (myStartId !== this._lastStartId) return;
                        if (e.name !== "AbortError") this._handleError("network", e.message);
                    });

                const configRid = ridCounter++;
                const assistConfig = {
                    config: {
                        dialogStateIn: { languageCode: this.lang },
                        deviceConfig: { deviceId: "example", deviceModelId: "example" },
                        audioInConfig: { encoding: "LINEAR16", sampleRateHertz: 16000 },
                        audioOutConfig: { encoding: "MP3", sampleRateHertz: 22050, volumePercentage: 0 },
                        requestType: 4
                    }
                };

                const configUrl =
                    `${getBaseUrl()}?VER=8` +
                    (gsessionid ? `&gsessionid=${gsessionid}` : "") +
                    `&SID=${sid}&RID=${configRid}&AID=0&zx=${Date.now()}&t=1`;

                const configPayload = `count=1&ofs=0&req0___data__=${encodeURIComponent(
                    JSON.stringify(assistConfig)
                )}`;

                fetch(configUrl, { ...getFetchOpts(), method: "POST", headers: getHeaders(), body: configPayload });

                this._currentSid = sid;
                this._currentGsessionid = gsessionid;
                this._currentRidCounter = ridCounter;
                this._currentOfs = 1;

                if (!this._processorConnected) {
                    this._processorConnected = true;
                    const processorRef = this._processor;

                    this._processor.onaudioprocess = (e) => {
                        if (this._aborting || this._cleanupCalled || this._switchingSession) return;
                        if (!this._processor || this._processor !== processorRef) return;
                        if (!this._audioCtx || !this._stream) return;
                        if (this._bcDone) return;

                        const float32 = e.inputBuffer.getChannelData(0);

                        let sumSquares = 0;
                        for (let i = 0; i < float32.length; i++) sumSquares += float32[i] ** 2;
                        const rms = Math.sqrt(sumSquares / float32.length);

                        const isSpeech = rms >= 0.01;
                        if (isSpeech) {
                            this._vadSilenceFrames = 0;
                            this._isVadSpeaking = true;
                        } else {
                            this._vadSilenceFrames++;
                        }

                        // Keep sending a short tail of silence so server can endpoint/finalize.
                        // ~8192 samples per frame; at 48kHz that's ~170ms/frame.
                        const TRAILING_SILENCE_FRAMES = 12; // about ~2s tail max
                        const shouldSend =
                            isSpeech ||
                            (this._isVadSpeaking && this._vadSilenceFrames <= TRAILING_SILENCE_FRAMES);

                        if (!shouldSend) {
                            this._isVadSpeaking = false;
                            return;
                        }

                        if (!this._audioCtx) return;
                        const originalSampleRate = this._audioCtx.sampleRate;
                        if (!originalSampleRate) return;

                        const ratio = originalSampleRate / 16000;
                        const targetLength = Math.round(float32.length / ratio);
                        const int16 = new Int16Array(targetLength);

                        for (let i = 0; i < targetLength; i++) {
                            const srcIndex = Math.min(Math.floor(i * ratio), float32.length - 1);
                            int16[i] = Math.max(-1, Math.min(1, float32[srcIndex])) * 0x7fff;
                        }

                        const uint8 = new Uint8Array(int16.buffer);
                        let binary = "";
                        for (let i = 0; i < uint8.length; i += 8192) {
                            binary += String.fromCharCode(...uint8.subarray(i, i + 8192));
                        }
                        const b64 = btoa(binary);

                        this._enqueueChunk(b64);
                    };
                }
            } catch (err) {
                this._handleError("network", err.message);
            }
        }

        _enqueueChunk(audioBase64) {
            if (this._aborting || this._cleanupCalled || this._switchingSession) return;
            if (this._pendingFinal) return;
            this._sendQueue.push(audioBase64);
            if (!this._sendingChunks) this._drainChunkQueue();
        }

        async _drainChunkQueue() {
            if (this._sendingChunks) return;
            this._sendingChunks = true;

            try {
                while (this._sendQueue.length && !this._aborting && !this._cleanupCalled && !this._switchingSession) {
                    if (!this._currentSid || !this._abortController) break;

                    const audioBase64 = this._sendQueue.shift();

                    const chunkRid = this._currentRidCounter++;
                    const cSid = this._currentSid;
                    const cGsessionid = this._currentGsessionid;
                    const cOfs = this._currentOfs++;

                    const chunkUrl =
                        `${getBaseUrl()}?VER=8` +
                        (cGsessionid ? `&gsessionid=${cGsessionid}` : "") +
                        `&SID=${cSid}&RID=${chunkRid}&AID=0&zx=${Date.now()}&t=1`;

                    const chunkPayload = `count=1&ofs=${cOfs}&req0___data__=${encodeURIComponent(
                        JSON.stringify({ audioIn: audioBase64 })
                    )}`;

                    try {
                        const res = await fetch(chunkUrl, {
                            ...getFetchOpts(),
                            method: "POST",
                            headers: getHeaders(),
                            body: chunkPayload,
                            signal: this._abortController.signal
                        });

                        if (!res.ok) {
                            this._consecutiveChunkFailures++;
                            if (DEV_MODE) console.warn("[polyfill] chunk non-ok:", res.status);

                            if (this._consecutiveChunkFailures >= this._maxConsecutiveChunkFailures) {
                                if (DEV_MODE) console.warn("[polyfill] too many chunk failures, soft-restarting session");
                                await this._restartSession();
                                this._consecutiveChunkFailures = 0;
                            }
                        } else {
                            this._consecutiveChunkFailures = 0;
                        }
                    } catch (err) {
                        if (err.name === "AbortError") break;

                        this._consecutiveChunkFailures++;
                        if (DEV_MODE) console.warn("[polyfill] chunk send error:", err.message);

                        if (this._consecutiveChunkFailures >= this._maxConsecutiveChunkFailures) {
                            if (DEV_MODE) console.warn("[polyfill] too many chunk exceptions, soft-restarting session");
                            await this._restartSession();
                            this._consecutiveChunkFailures = 0;
                        }
                    }
                }
            } finally {
                this._sendingChunks = false;
            }
        }

        async _restartSession(initialSession = null) {
            if (!this.continuous) return;
            if (this._aborting || this._cleanupCalled) return;
            if (this._restartPromise) return this._restartPromise;

            this._dbg("restart requested", {
                switching: this._switchingSession,
                hasRestartPromise: !!this._restartPromise,
                bcDone: this._bcDone
            });

            this._restartPromise = (async () => {
                if (this._abortController) this._abortController.abort();
                this._abortController = new AbortController();
                this._switchingSession = true;

                this._bcDone = false;
                this._speechendFired = false;
                this._pendingFinal = false;
                this._finalizedThisUtterance = false;
                this._bestFinalCandidate = null;
                this._bestFinalStability = -1;

                this._lastEmittedInterimTranscript = null;
                this._latestInterimTranscript = null;
                this._latestInterimStability = null;
                this._currentUtteranceId++;

                this._sendQueue = [];
                this._sendingChunks = false;
                this._consecutiveChunkFailures = 0;

                try {
                    let session = initialSession || preSession;
                    if (!session) session = await warmSession();
                    preSession = null;
                    preSessionPromise = null;
                    if (!session) throw new Error("Failed to warm session");

                    const { sid, gsessionid } = session;
                    let { ridCounter } = session;

                    const backchannelUrl =
                        `${getBaseUrl()}?` +
                        (gsessionid ? `gsessionid=${gsessionid}&` : "") +
                        `VER=8&RID=rpc&SID=${sid}&AID=0&CI=0&TYPE=xmlhttp&zx=${Date.now()}&t=1`;

                    const myGen = ++this._sessionGen;
                    this._activeBackchannelGen = myGen;
                    const myStartId = this._lastStartId;

                    this._dbg("open backchannel (restart)", { myGen, myStartId, sid });

                    fetch(backchannelUrl, {
                        ...getFetchOpts(),
                        method: "GET",
                        headers: { ...getHeaders(), "content-type": undefined },
                        signal: this._abortController.signal
                    })
                        .then(async (bcRes) => {
                            if (myGen !== this._activeBackchannelGen) return;
                            if (myStartId !== this._lastStartId) return;
                            await this._consumeBackchannel(bcRes, myGen, myStartId);
                        })
                        .catch((e) => {
                            if (myGen !== this._activeBackchannelGen) return;
                            if (myStartId !== this._lastStartId) return;
                            if (e.name !== "AbortError") this._handleError("network", e.message);
                        });

                    const configRid = ridCounter++;
                    const assistConfig = {
                        config: {
                            dialogStateIn: { languageCode: this.lang },
                            deviceConfig: { deviceId: "example", deviceModelId: "example" },
                            audioInConfig: { encoding: "LINEAR16", sampleRateHertz: 16000 },
                            audioOutConfig: { encoding: "MP3", sampleRateHertz: 22050, volumePercentage: 0 },
                            requestType: 4
                        }
                    };

                    const configUrl =
                        `${getBaseUrl()}?VER=8` +
                        (gsessionid ? `&gsessionid=${gsessionid}` : "") +
                        `&SID=${sid}&RID=${configRid}&AID=0&zx=${Date.now()}&t=1`;

                    const configPayload = `count=1&ofs=0&req0___data__=${encodeURIComponent(
                        JSON.stringify(assistConfig)
                    )}`;

                    fetch(configUrl, { ...getFetchOpts(), method: "POST", headers: getHeaders(), body: configPayload });

                    this._currentSid = sid;
                    this._currentGsessionid = gsessionid;
                    this._currentRidCounter = ridCounter;
                    this._currentOfs = 1;

                    this._switchingSession = false;
                } catch (err) {
                    this._switchingSession = false;
                    this._handleError("network", err.message);
                }
            })().finally(() => {
                this._restartPromise = null;
            });

            return this._restartPromise;
        }

        stop() {
            if (this._aborting) return;
            this._aborting = true;

            if (this._pendingFinal) this._finalizeCurrentUtteranceOnce();
            else if (this._latestInterimTranscript && this._norm(this._latestInterimTranscript) !== this._lastFinalTranscript) {
                this._considerFinalCandidate(this._latestInterimTranscript, this._latestInterimStability ?? 0.99);
                this._finalizeCurrentUtteranceOnce();
            }

            if (this._abortController) this._abortController.abort();
            if (!this.continuous && (this._pendingFinal || this._latestInterimTranscript)) {
                this._suppressEndOnce = true;
            }
            this._cleanup();
        }

        abort() {
            if (this._aborting) return;
            this._aborting = true;
            if (this._abortController) this._abortController.abort();
            this._cleanup();
        }

        _cleanup() {
            if (this._cleanupCalled) return;
            this._cleanupCalled = true;

            if (this._processor) {
                try { this._processor.onaudioprocess = null; } catch { }
                try { this._processor.disconnect(); } catch { }
                this._processor = null;
            }

            if (this._dummyAudio) {
                try { this._dummyAudio.pause(); } catch { }
                this._dummyAudio.srcObject = null;
                this._dummyAudio = null;
            }

            if (this._stream) {
                this._stream.getTracks().forEach((t) => t.stop());
                this._stream = null;
            }

            if (this._audioCtx && this._audioCtx.state !== "closed") {
                try { this._audioCtx.close(); } catch { }
            }
            this._audioCtx = null;

            this._dispatchEvent("audioend");
            if (!this._suppressEndOnce) this._dispatchEvent("end");
            else this._suppressEndOnce = false;

            this._aborting = false;
            this._cleanupCalled = false;
            this._processorConnected = false;
            this._switchingSession = false;
            this._bcDone = false;

            this._speechendFired = false;
            this._pendingFinal = false;
            this._finalizedThisUtterance = false;
            this._bestFinalCandidate = null;
            this._bestFinalStability = -1;

            this._latestInterimTranscript = null;
            this._latestInterimStability = null;
            this._lastEmittedInterimTranscript = null;
            this._lastFinalTranscript = null;

            this._currentUtteranceId = 0;
            this._lastEmittedUtteranceId = -1;

            this._bcBuffer = "";

            this._sendQueue = [];
            this._sendingChunks = false;
            this._consecutiveChunkFailures = 0;
        }

        _emitResult(transcript, stability, isFinal) {
            if (isFinal && transcript && transcript === this._lastFinalTranscript) return;

            this._dbg("emit", { transcript, isFinal, utt: this._currentUtteranceId });

            const alt = new SpeechRecognitionAlternative(transcript, stability ?? 0);
            const res = new SpeechRecognitionResult([alt], isFinal);

            const currentResults = [];
            for (let i = 0; i < this._finalResults.length; i++) currentResults.push(this._finalResults[i]);
            if (transcript) currentResults.push(res);

            const event = new SpeechRecognitionEvent("result", {
                resultIndex: this._finalResults.length,
                results: new SpeechRecognitionResultList(currentResults)
            });

            this._dispatchEvent("result", event);

            if (isFinal && transcript) {
                this._finalResults.push(res);
            }
        }

        _handleError(errorType, message) {
            const ev = new SpeechRecognitionErrorEvent("error", { error: errorType, message });
            this._dispatchEvent("error", ev);
            this._cleanup();
        }
    }

    class SpeechRecognitionEvent extends Event {
        constructor(type, eventInitDict) {
            super(type, eventInitDict);
            this.resultIndex = eventInitDict?.resultIndex || 0;
            this.results = eventInitDict?.results || [];
            this.interpretation = eventInitDict?.interpretation || null;
            this.emma = eventInitDict?.emma || null;
        }
    }

    class SpeechRecognitionErrorEvent extends Event {
        constructor(type, eventInitDict) {
            super(type, eventInitDict);
            this.error = eventInitDict?.error || "unknown";
            this.message = eventInitDict?.message || "";
        }
    }

    class SpeechRecognitionAlternative {
        constructor(transcript, confidence) {
            this.transcript = transcript;
            this.confidence = confidence;
        }
    }

    class SpeechRecognitionResult {
        constructor(alternatives, isFinal) {
            this.isFinal = isFinal;
            this.length = alternatives.length;
            for (let i = 0; i < alternatives.length; i++) this[i] = alternatives[i];
        }
        item(index) {
            return this[index];
        }
    }

    class SpeechRecognitionResultList {
        constructor(results) {
            this.length = results.length;
            for (let i = 0; i < results.length; i++) this[i] = results[i];
        }
        item(index) {
            return this[index];
        }
    }

    class SpeechGrammar {
        constructor() {
            this.src = "";
            this.weight = 1;
        }
    }

    class SpeechGrammarList {
        constructor() {
            this.length = 0;
        }
        addFromURI() { }
        addFromUri() { }
        addFromString() { }
        item() {
            return null;
        }
    }

    const globals = {
        SpeechRecognition: GoogleWebchannelSpeechRecognition,
        webkitSpeechRecognition: GoogleWebchannelSpeechRecognition,
        SpeechRecognitionEvent,
        webkitSpeechRecognitionEvent: SpeechRecognitionEvent,
        SpeechRecognitionErrorEvent,
        webkitSpeechRecognitionErrorEvent: SpeechRecognitionErrorEvent,
        SpeechGrammar,
        webkitSpeechGrammar: SpeechGrammar,
        SpeechGrammarList,
        webkitSpeechGrammarList: SpeechGrammarList
    };

    for (const [key, val] of Object.entries(globals)) {
        try {
            if (Object.getOwnPropertyDescriptor(window, key)?.configurable) {
                delete window[key];
            }
        } catch { }

        Object.defineProperty(window, key, {
            get() {
                return val;
            },
            set() { },
            configurable: true,
            enumerable: true
        });
    }

    if (DEV_MODE) console.log("🧩 Google Webchannel SpeechRecognition Polyfill injected!");
})();

    })();