Speech Recognition Polyfill Userscript

Get extremely fast, free, & accurate server-side multilingual Speech Recognition. Polyfills Web Speech API on any browser!

Dovrai installare un'estensione come Tampermonkey, Greasemonkey o Violentmonkey per installare questo script.

You will need to install an extension such as Tampermonkey to install this script.

Dovrai installare un'estensione come Tampermonkey o Violentmonkey per installare questo script.

Dovrai installare un'estensione come Tampermonkey o Userscripts per installare questo script.

Dovrai installare un'estensione come ad esempio Tampermonkey per installare questo script.

Dovrai installare un gestore di script utente per installare questo script.

(Ho già un gestore di script utente, lasciamelo installare!)

Dovrai installare un'estensione come ad esempio Stylus per installare questo stile.

Dovrai installare un'estensione come ad esempio Stylus per installare questo stile.

Dovrai installare un'estensione come ad esempio Stylus per installare questo stile.

Dovrai installare un'estensione per la gestione degli stili utente per installare questo stile.

Dovrai installare un'estensione per la gestione degli stili utente per installare questo stile.

Dovrai installare un'estensione per la gestione degli stili utente per installare questo stile.

(Ho già un gestore di stile utente, lasciamelo installare!)

// ==UserScript==
// @name         Speech Recognition Polyfill Userscript
// @namespace    http://tampermonkey.net/
// @version      1.1
// @description  Get extremely fast, free, & accurate server-side multilingual Speech Recognition. Polyfills Web Speech API on any browser!
// @author       apersongithub
// @match        *://*/*
// @icon         https://raw.githubusercontent.com/apersongithub/Speech-Recognition-Polyfill/refs/heads/main/extension/images/microphone.svg
// @grant        none
// @run-at       document-start
// @license MIT
// ==/UserScript==

(function () {
    'use strict';

    /**
     * Speech Recognition Polyfill Userscript
     *
     * This script provides a custom implementation of the standard Web Speech API
     * (SpeechRecognition) by communicating directly with PUBLIC Google's Voice APIs.
     * It is useful for environments where the native SpeechRecognition API is
     * unavailable, broken, or needs to be bypassed.
     *
     * Credits & Legal:
     *
     * - The original streaming architecture, protobuf definitions, and advanced cloud endpoints 
     *   were designed and engineered entirely by Google. 
     * - All voice transcription, inference models, and internal APIs utilized by this polyfill
     *   belong to Google LLC. All rights are reserved to them. 
     * - DISCLAIMER: Use at your own risk, the author of this script is not responsible for any service bans, 
     *   misuse, abuse, or limits exceeded on the provided Google API keys or webchannel endpoints. These endpoints 
     *   are not intended for use outside of their intended services and may be subject to change or discontinuation at any time.
     *
     * Key features:
     *
     * 1. Dual Backend Support (v1 & v2): 
     *      - 'v1' uses the Embedded Assistant API (JSON payloads, No puncuation)
     *      - 'v2' uses the Cloud Speech v2 StreamingRecognize API (Binary Protobuf payloads, Puncuation)
     *    Controlled via the SERVER_MODE constant.
     *
     * 2. Organized Audio Sending (Queue): Audio chunks are queued and sent one
     *    by one. This prevents sending too many requests at the exact same time.
     *
     * 3. Error Tolerance: Minor network glitches when sending audio chunks won't
     *    immediately crash the entire transcription process.
     *
     * 4. Reliable Final Results: Hardened logic for determining when a user is
     *    finished speaking, ensuring we pick the most accurate text result.
     *
     * 5. Crash Prevention: Includes safety checks to prevent crashes if asynchronous
     *    network responses arrive after the microphone has already been turned off.
     *
     * 6. Fallback APIs: Automatically cycles through multiple backup API keys
     *    if the primary connection fails.
     *
     * 7. Voice Activity Detection (VAD): Features custom trailing-silence detection
     *    logic to perfectly endpoint speech and reduce transcription latency.
     *
     * 8. Audio Pre-Processing: Injects a Web Audio API GainNode to slightly amplify
     *    the user's voice, drastically increasing Google's dictation accuracy.
     *
     * 9. Space Normalization: Cleans up formatting artifacts and ensures transcript
     *    text has proper spacing and punctuation handling automatically.
     *
     * 10. Spam Prevention: Incorporates self 'rate-limiting' features, such as processing
     *     audio frame streaks to detect idle stalled states, and safely draining
     *     chunk queues synchronously to avoid overloading Google's servers.
     */

    // If all else fails, try using the extension equivalent of this script.
    // https://github.com/apersongithub/Speech-Recognition-Polyfill

    (function initialization() {

        // =========================================================================
        // CONFIGURATION BEGINS
        // =========================================================================

        // Enable debug logging (helpful for diagnosing issues)
        const DEV_MODE = false;

        // "v2" (v2 backend, default) or "v1" (v1 backend)
        const SERVER_MODE = "v2";

        // How long to wait before tearing down the PC Microphone hardware after dictation ends.
        // (Leaving it hot for 5 seconds makes starting back-to-back commands slightly faster)
        // Feel free to make it 0 seconds if you want the mic to turn off right after dictation ends.
        const MIC_IDLE_TIMEOUT_MS = 5000;

        // =========================================================================
        // BACKEND PROFILES: API Keys & Endpoints Reference
        // =========================================================================
        // This dictionary stores the configuration for both 'v1' and 'v2' Google
        // voice APIs. The polyfill will cycle through "backup" keys automatically.
        // There are still other backends that could be added to the script that
        // would need to be reverse engineered (Ex: Google Meet Live Captions, etc).
        // =========================================================================

        const BACKEND_PROFILES = {
            v1: {
                name: "v1", // Resembles Google Cloud Speech v1
                apiKeys: [
                    "AIzaSyBm7NubC-Swn1nt2nhYfxb58eCdmL2vCVU",
                    "AIzaSyBU2xE_JHvB6wag3tMfhxXpg2Q_W8xnM-I"
                ],
                endpoints: [
                    {
                        url: "https://embeddedassistant-webchannel.googleapis.com/google.assistant.embedded.v1.EmbeddedAssistant/Assist/channel", // Google Search Voice API
                        referrer: "https://www.google.com/"
                    },
                    {
                        url: "https://embeddedassistant-frontend-clients6.youtube.com/google.assistant.embedded.v1.EmbeddedAssistant/YTAssist/channel", // YouTube Search Voice API
                        referrer: "https://www.youtube.com/"
                    }
                ]
            },
            v2: {
                name: "v2", // Resembles Google Cloud Speech v2
                apiKeys: [
                    "AIzaSyBm7NubC-Swn1nt2nhYfxb58eCdmL2vCVU",
                    "AIzaSyD6n9asBjvx1yBHfhFhfw_kpS9Faq0BZHM"
                ],
                endpoints: [
                    {
                        url: "https://speechs3proto2-pa.googleapis.com/s3web/prod/streaming/channel", // Gemini Voice API
                        referrer: "https://gemini.google.com/"
                    }
                ]
            }
        };

        // =========================================================================
        // CONFIGURATION ENDS
        // =========================================================================

        // =========================================================================
        // Protobuf Encoder / Decoder Helpers (used by v2 backend)
        // Wire types: 0=varint, 2=length-delimited, 5=32-bit
        // =========================================================================

        // Encodes a number into a Protobuf varint (variable-length integer).
        function pbEncodeVarint(value) {
            const bytes = [];
            value = value >>> 0; // force unsigned 32-bit
            while (value > 0x7f) {
                bytes.push((value & 0x7f) | 0x80);
                value >>>= 7;
            }
            bytes.push(value & 0x7f);
            return new Uint8Array(bytes);
        }

        // Encodes a Protobuf field tag (combines field number and wire type).
        function pbEncodeTag(fieldNum, wireType) {
            return pbEncodeVarint((fieldNum << 3) | wireType);
        }

        // Concatenates multiple Uint8Arrays together into a single Uint8Array.
        function pbConcat(...arrays) {
            const totalLen = arrays.reduce((s, a) => s + a.length, 0);
            const result = new Uint8Array(totalLen);
            let offset = 0;
            for (const arr of arrays) {
                result.set(arr, offset);
                offset += arr.length;
            }
            return result;
        }

        // Encodes a string into a length-delimited Protobuf field.
        function pbEncodeStringField(fieldNum, str) {
            const encoded = new TextEncoder().encode(str);
            return pbConcat(pbEncodeTag(fieldNum, 2), pbEncodeVarint(encoded.length), encoded);
        }

        // Encodes raw bytes into a length-delimited Protobuf field.
        function pbEncodeBytesField(fieldNum, bytes) {
            const data = bytes instanceof Uint8Array ? bytes : new Uint8Array(bytes);
            return pbConcat(pbEncodeTag(fieldNum, 2), pbEncodeVarint(data.length), data);
        }

        // Encodes a varint into a Protobuf field (fieldNum + wireType 0).
        function pbEncodeVarintField(fieldNum, value) {
            return pbConcat(pbEncodeTag(fieldNum, 0), pbEncodeVarint(value));
        }

        // Encodes a nested Protobuf message into a length-delimited field.
        function pbEncodeMessageField(fieldNum, submessageBytes) {
            return pbConcat(pbEncodeTag(fieldNum, 2), pbEncodeVarint(submessageBytes.length), submessageBytes);
        }

        // --- Protobuf Decoder ---

        // Decodes a raw Protobuf buffer into a dictionary of field numbers to arrays of values.
        function pbDecode(buffer) {
            const view = new DataView(buffer.buffer || buffer, buffer.byteOffset || 0, buffer.byteLength || buffer.length);
            const fields = {};
            let pos = 0;

            function readVarint() {
                let result = 0, shift = 0;
                while (pos < view.byteLength) {
                    const b = view.getUint8(pos++);
                    result |= (b & 0x7f) << shift;
                    if (!(b & 0x80)) return result >>> 0;
                    shift += 7;
                }
                return result >>> 0;
            }

            while (pos < view.byteLength) {
                const tag = readVarint();
                const fieldNum = tag >>> 3;
                const wireType = tag & 0x07;

                let value;
                if (wireType === 0) {
                    value = readVarint();
                } else if (wireType === 2) {
                    const len = readVarint();
                    value = new Uint8Array(view.buffer, view.byteOffset + pos, len);
                    pos += len;
                } else if (wireType === 5) {
                    value = view.getFloat32(pos, true);
                    pos += 4;
                } else if (wireType === 1) {
                    value = view.getFloat64(pos, true);
                    pos += 8;
                } else {
                    break; // unknown wire type
                }

                if (!fields[fieldNum]) fields[fieldNum] = [];
                fields[fieldNum].push(value);
            }
            return fields;
        }

        function pbDecodeString(bytes) {
            return new TextDecoder().decode(bytes);
        }

        // =========================================================================
        // V2 Config & Audio Builders (v2 backend)
        // =========================================================================

        // Builds the initial StreamingRecognizeRequest protobuf payload containing the recognition configuration.
        function buildStreamingConfigProto(lang, interimResults) {
            const langCode = lang || "en-US";

            const langInner = pbEncodeStringField(1, langCode);
            const langWrapper = pbEncodeMessageField(2, langInner);
            const field293000 = pbEncodeMessageField(293000, langWrapper);

            const audioConfig = pbConcat(
                pbEncodeTag(2, 5), new Uint8Array(new Float32Array([16000.0]).buffer),
                pbEncodeVarintField(3, 11),
                pbEncodeVarintField(4, 1)
            );
            const field293100 = pbEncodeMessageField(293100, audioConfig);

            const clientId = pbEncodeStringField(2, "bard-web-frontend");
            const field294000 = pbEncodeMessageField(294000, clientId);

            const recogConfig = pbConcat(
                pbEncodeMessageField(1, pbEncodeStringField(10, langCode)),
                pbEncodeVarintField(5, 1),
                pbEncodeVarintField(40, 1),
                pbEncodeVarintField(52, 1)
            );
            const field294500 = pbEncodeMessageField(294500, recogConfig);

            return pbConcat(
                pbEncodeStringField(1, "intelligent-dictation"),
                pbEncodeVarintField(2, 1),
                field293000,
                field293100,
                field294000,
                field294500
            );
        }

        // Builds a StreamingRecognizeRequest protobuf payload containing a chunk of audio.
        function buildAudioChunkProto(audioBytes) {
            const inner = pbEncodeBytesField(1, audioBytes);
            return pbEncodeMessageField(293101, inner);
        }

        // Decodes the StreamingRecognizeResponse protobuf received from the Cloud Speech v2 server.
        function decodeStreamingResponse(bytes) {
            const resp = pbDecode(bytes);
            const result = { results: [], speechEventType: 0 };
            if (resp[5]) result.speechEventType = resp[5][0];
            if (resp[1253625]) {
                for (const cBytes of resp[1253625]) {
                    if (!(cBytes instanceof Uint8Array)) continue;
                    const c = pbDecode(cBytes);
                    let lang = "";
                    if (c[4] && c[4][0] instanceof Uint8Array) lang = pbDecodeString(c[4][0]);
                    else if (c[3] && c[3][0] instanceof Uint8Array) lang = pbDecodeString(c[3][0]);
                    if (c[1]) {
                        for (const eBytes of c[1]) {
                            if (!(eBytes instanceof Uint8Array)) continue;
                            const e = pbDecode(eBytes);
                            const pr = { alternatives: [], isFinal: e[1] && e[1][0] === 1, stability: e[2] ? e[2][0] : 0, languageCode: lang };
                            if (e[4] || e[3]) {
                                for (const aBytes of (e[4] || e[3])) {
                                    if (!(aBytes instanceof Uint8Array)) continue;
                                    const a = pbDecode(aBytes);
                                    if (a[1]) {
                                        for (const sBytes of a[1]) {
                                            if (!(sBytes instanceof Uint8Array)) continue;
                                            const s = pbDecode(sBytes);
                                            if (s[1] && s[1][0] instanceof Uint8Array) {
                                                pr.alternatives.push({ transcript: pbDecodeString(s[1][0]), confidence: s[2] ? s[2][0] : 0.9 });
                                            }
                                        }
                                    }
                                }
                            }
                            if (pr.alternatives.length > 0) result.results.push(pr);
                        }
                    }
                }
            }
            return result;
        }

        function uint8ToBase64(bytes) {
            let binary = "";
            for (let i = 0; i < bytes.length; i += 8192) {
                binary += String.fromCharCode(...bytes.subarray(i, i + 8192));
            }
            return btoa(binary);
        }



        let ACTIVE_BACKEND = BACKEND_PROFILES[SERVER_MODE] || BACKEND_PROFILES.v2;
        let API_KEYS = [...ACTIVE_BACKEND.apiKeys];
        let API_ENDPOINTS = [...ACTIVE_BACKEND.endpoints];

        let currentEndpointIndex = 0;
        let currentKeyIndex = 0;

        const getBaseUrl = () => API_ENDPOINTS[currentEndpointIndex].url;
        const getFetchOpts = () => ({
            mode: "cors",
            credentials: "omit",
            referrer: API_ENDPOINTS[currentEndpointIndex].referrer
        });
        const getApiKey = () => API_KEYS[currentKeyIndex];

        let preSession = null;
        let preSessionPromise = null;

        // Attempts to scrape an active Google API key from the current page's scripts.
        function findApiKey() {
            if (window.location.hostname === "www.google.com" && window.location.pathname === "/") {
                for (const script of document.querySelectorAll("script")) {
                    const text = script.textContent || "";
                    const m = text.match(/"X-Goog-Api-Key"\s*:\s*"([^"]{33,})"/i);
                    if (m && m[1].startsWith("AIzaSyBm")) return m[1];
                }
            }
            return null;
        }

        const scrapedKey = findApiKey();
        if (scrapedKey) {
            const idx = API_KEYS.indexOf(scrapedKey);
            if (idx !== -1) API_KEYS.splice(idx, 1);
            API_KEYS.unshift(scrapedKey);
        }

        // Attempts to extract the active Google account index (AuthUser) from the page for authentication.
        function findAuthUser() {
            for (const script of document.querySelectorAll("script")) {
                const text = script.textContent || "";
                const m = text.match(/"X-Goog-AuthUser"\s*:\s*(?:[^"\n]+)?"([^"]+)"/i);
                if (m) return m[1];
            }
            const m2 = document.documentElement.innerHTML.match(/"X-Goog-AuthUser"\s*:\s*(?:[^"\n]+)?"([^"]+)"/i);
            return m2 ? m2[1] : "0";
        }

        const AUTH_USER = findAuthUser();
        const CURRENT_YEAR = String(new Date().getFullYear());
        let browserValidation = null;

        const _origXhrSetHeader = XMLHttpRequest.prototype.setRequestHeader;
        XMLHttpRequest.prototype.setRequestHeader = function (h, v) {
            if (h.toLowerCase() === "x-browser-validation" && !browserValidation) browserValidation = v;
            return _origXhrSetHeader.apply(this, arguments);
        };

        if (!browserValidation) {
            const valMatch = document.documentElement.innerHTML.match(
                /x-browser-validation['":\s]+([A-Za-z0-9+/=]{20,44})/i
            );
            if (valMatch) browserValidation = valMatch[1];
        }

        if (ACTIVE_BACKEND.name === "v2" && !browserValidation) {
            browserValidation = "JmUDa+WXIcEmBPOq9TTt1Hr7mMI=";
        }

        // Constructs the specific HTTP headers required to initialize a new WebChannel session.
        function getSessionHeaders() {
            if (ACTIVE_BACKEND.name === "v2") {
                return {
                    accept: "*/*",
                    "accept-language": "en-US,en;q=0.9",
                    "content-type": "application/x-www-form-urlencoded",
                    "x-browser-channel": "stable",
                    "x-browser-copyright": `Copyright ${CURRENT_YEAR} Google LLC. All Rights reserved.`,
                    "x-goog-api-key": getApiKey(),
                    ...(browserValidation ? { "x-browser-validation": browserValidation } : {}),
                    "x-browser-year": CURRENT_YEAR,
                    "x-webchannel-content-type": "application/x-protobuf"
                };
            }

            return {
                accept: "*/*",
                "accept-language": "en-US,en;q=0.9",
                "content-type": "application/x-www-form-urlencoded",
                "x-browser-channel": "stable",
                "x-browser-copyright": `Copyright ${CURRENT_YEAR} Google LLC. All Rights reserved.`,
                "x-goog-authuser": AUTH_USER,
                ...(browserValidation ? { "x-browser-validation": browserValidation } : {}),
                "x-browser-year": CURRENT_YEAR
            };
        }

        // Constructs the standard HTTP headers required for Google WebChannel requests.
        function getHeaders() {
            if (ACTIVE_BACKEND.name === "v2") {
                return {
                    accept: "*/*",
                    "accept-language": "en-US,en;q=0.9",
                    "content-type": "application/x-www-form-urlencoded",
                    "x-browser-channel": "stable",
                    "x-browser-copyright": `Copyright ${CURRENT_YEAR} Google LLC. All Rights reserved.`,
                    ...(browserValidation ? { "x-browser-validation": browserValidation } : {}),
                    "x-browser-year": CURRENT_YEAR
                };
            }

            return {
                accept: "*/*",
                "accept-language": "en-US,en;q=0.9",
                "content-type": "application/x-www-form-urlencoded",
                "x-browser-channel": "stable",
                "x-browser-copyright": `Copyright ${CURRENT_YEAR} Google LLC. All Rights reserved.`,
                "x-goog-authuser": AUTH_USER,
                ...(browserValidation ? { "x-browser-validation": browserValidation } : {}),
                "x-browser-year": CURRENT_YEAR
            };
        }

        function showPolyfillNotification(messageHtml) {
            const container = document.createElement("div");
            Object.assign(container.style, {
                position: "fixed", top: "20px", right: "20px", zIndex: "999999",
                background: "#fef2f2", color: "#991b1b", border: "1px solid #ef4444",
                padding: "16px", borderRadius: "8px", boxShadow: "0 4px 6px rgba(0,0,0,0.1)",
                fontFamily: "system-ui, -apple-system, sans-serif", fontSize: "14px",
                maxWidth: "350px", display: "flex", flexDirection: "column", gap: "8px"
            });
            container.innerHTML = `
                <div style="display: flex; justify-content: space-between; align-items: flex-start; gap: 12px;">
                    <div>${messageHtml}</div>
                    <button style="background: none; border: none; font-size: 18px; cursor: pointer; color: #991b1b; padding: 0; line-height: 1;">&times;</button>
                </div>
            `;
            container.querySelector("button").onclick = () => container.remove();
            document.body.appendChild(container);
            setTimeout(() => container.remove(), 15000);
        }

        let apiKeyInvalidCount = 0;
        let polyfillPermanentlyFailed = false;
        const rotateApiKey = () => {
            currentKeyIndex++;
            apiKeyInvalidCount++;
            if (currentKeyIndex >= API_KEYS.length) {
                currentKeyIndex = 0;
                currentEndpointIndex = (currentEndpointIndex + 1) % API_ENDPOINTS.length;
            }
        };

        // Establishes a new WebChannel session with Google servers, handling fallbacks through backup API keys and endpoints if the primary fails.
        async function createSession() {
            let attempts = 0;
            const maxAttempts = API_KEYS.length * API_ENDPOINTS.length;
            let lastError = null;

            while (attempts < maxAttempts) {
                const ridCounter = 62480 + Math.floor(Math.random() * 9000);
                const bindUrl =
                    ACTIVE_BACKEND.name === "v2"
                        ? `${getBaseUrl()}?VER=8&RID=${ridCounter}&CVER=22&X-HTTP-Session-Id=gsessionid&zx=${Date.now()}&t=1`
                        : `${getBaseUrl()}?VER=8&RID=${ridCounter}&CVER=22&X-HTTP-Session-Id=gsessionid&%24httpHeaders=x-goog-api-key%3A${getApiKey()}%0D%0A&zx=${Date.now()}&t=1`;

                try {
                    const bindRes = await fetch(bindUrl, {
                        ...getFetchOpts(),
                        method: "POST",
                        headers: getSessionHeaders(),
                        body: "count=0"
                    });

                    if (bindRes.ok) {
                        const bindText = await bindRes.text();
                        const jsonLines = bindText
                            .split("\n")
                            .filter((line) => line.trim() && !/^\d+$/.test(line.trim()));
                        const jsonStr = jsonLines.join("\n");

                        let parsed;
                        try {
                            parsed = JSON.parse(jsonStr);
                        } catch {
                            parsed = JSON.parse("[" + jsonStr.replace(/\]\s*\[/g, "],[") + "]");
                        }

                        let sid = null;
                        (function findSid(arr) {
                            if (!Array.isArray(arr)) return;
                            for (const item of arr) {
                                if (Array.isArray(item)) {
                                    if (item[0] === "c" && typeof item[1] === "string") sid = item[1];
                                    findSid(item);
                                }
                            }
                        })(parsed);

                        const gsessionid = bindRes.headers.get("x-http-session-id") || null;
                        if (sid) {
                            return { sid, gsessionid, ridCounter: ridCounter + 1 };
                        }
                    } else {
                        lastError = new Error(`Bind failed with status ${bindRes.status}`);
                    }
                } catch (err) {
                    lastError = err;
                }

                rotateApiKey();
                attempts++;
            }

            const errorMsg = `<strong>🎙️ Speech Recognition Userscript</strong><br><br><strong>Speech Recognition Error</strong><br>Unfortunately, the server backend cannot be reached.<br><br>This means either the server is down, Google disabled the ability to use the script natively, you are rate-limited, or blocked. Try the original extension.`;
            showPolyfillNotification(errorMsg);
            polyfillPermanentlyFailed = true;
            throw lastError || new Error("No SID or bind failed after trying all backups");
        }

        // Pre-emptively creates a session before recognition starts. This reduces latency when the user actually begins speaking.
        function warmSession() {
            if (preSessionPromise) return preSessionPromise;
            preSessionPromise = createSession()
                .then((s) => {
                    preSession = s;
                    return s;
                })
                .catch(() => {
                    preSession = null;
                    preSessionPromise = null;
                    return null;
                });
            return preSessionPromise;
        }

        const BaseClass =
            typeof EventTarget !== "undefined"
                ? EventTarget
                : class {
                    constructor() {
                        this.listeners = {};
                    }
                    addEventListener(type, callback) {
                        if (!(type in this.listeners)) this.listeners[type] = [];
                        this.listeners[type].push(callback);
                    }
                    removeEventListener(type, callback) {
                        if (!(type in this.listeners)) return;
                        this.listeners[type] = this.listeners[type].filter((cb) => cb !== callback);
                    }
                    dispatchEvent(event) {
                        if (!(event.type in this.listeners)) return true;
                        this.listeners[event.type].forEach((cb) => cb.call(this, event));
                        return !event.defaultPrevented;
                    }
                };

        /**
         * Main Polyfill Class.
         * Replaces the native SpeechRecognition object and orchestrates audio capture, 
         * WebChannel networking, audio chunk dispatching, and Web Speech API event handling.
         */
        class GoogleWebchannelSpeechRecognition extends BaseClass {
            constructor() {
                super();

                this._forcedFinalizeTimer = null;
                // W3C properties
                this.continuous = false;
                this.interimResults = false;
                this.lang = "en-US";
                this.maxAlternatives = 1;
                this.serviceURI = "";
                this.grammars = new SpeechGrammarList();

                // Event handlers
                this.onaudiostart = null;
                this.onaudioend = null;
                this.onend = null;
                this.onerror = null;
                this.onnomatch = null;
                this.onresult = null;
                this.onsoundstart = null;
                this.onsoundend = null;
                this.onspeechstart = null;
                this.onspeechend = null;
                this.onstart = null;

                // Runtime
                this._stream = null;
                this._audioCtx = null;
                this._processor = null;
                this._dummyAudio = null;
                this._recorder = null;

                this._aborting = false;
                this._cleanupCalled = false;
                this._switchingSession = false;
                this._stopRequested = false;
                this._abortController = null;

                this._bcDone = false;
                this._bcBuffer = "";

                this._latestHighStabilityTranscript = null;
                this._latestInterimTranscript = null;
                this._latestInterimStability = null;
                this._lastEmittedInterimTranscript = null;
                this._lastFinalTranscript = null;

                this._speechendFired = false;
                this._pendingFinal = false;
                this._finalizedThisUtterance = false;
                this._bestFinalCandidate = null;
                this._bestFinalStability = -1;

                this._finalResults = [];
                this._currentUtteranceId = 0;
                this._lastEmittedUtteranceId = -1;

                this._currentSid = null;
                this._currentGsessionid = null;
                this._currentRidCounter = 0;
                this._currentOfs = 1;

                this._vadSilenceFrames = 0;
                this._isVadSpeaking = false;

                this._preSessionBuffer = [];
                this._sendQueue = [];
                this._sendingChunks = false;
                this._consecutiveChunkFailures = 0;
                this._maxConsecutiveChunkFailures = 6;

                this._sessionGen = 0;
                this._activeBackchannelGen = 0;
                this._lastStartId = 0;

                this._sessionActive = false;
                this._micIdleTimer = null;
                this._restartPromise = null;
                this._suppressEndOnce = false;

                this._oggHeader = null;

                // s3 stall watchdog
                this._lastMeaningfulFrameTs = 0;
                this._noopFrameStreak = 0;

                this._permanentlyFailed = false;

                window.__polyfill_active_instances = window.__polyfill_active_instances || [];
                if (!window.__polyfill_active_instances.includes(this)) {
                    window.__polyfill_active_instances.push(this);
                }
                window._polyfillSR = this; // Debugging hook for console access
            }

            _dbg(...args) {
                if (!DEV_MODE) return;
                try { console.log("[polyfill dbg]", ...args); } catch { }
            }

            // Dispatches a standard SpeechRecognitionEvent to attached listeners and triggers corresponding 'on[event]' handlers.
            _dispatchEvent(name, eventObj) {
                const ev = eventObj || new Event(name);
                if (typeof this["on" + name] === "function") {
                    try { this["on" + name](ev); } catch { }
                }
                try { this.dispatchEvent(ev); } catch { }
            }

            _norm(t) { return (t || "").replace(/\s+/g, " ").trim(); }
            _stripXssiPrefix(text) { return text.replace(/^\)\]\}'\s*\n?/, ""); }

            // Reads and extracts a single complete payload frame from the raw incoming WebChannel stream buffer.
            _readFrameFromBuffer() {
                this._bcBuffer = this._stripXssiPrefix(this._bcBuffer).replace(/^\s+/, "");
                if (!this._bcBuffer.length) return null;

                const nl = this._bcBuffer.indexOf("\n");
                if (nl === -1) return null;

                const lenStr = this._bcBuffer.slice(0, nl).trim();
                if (!/^\d+$/.test(lenStr)) {
                    this._bcBuffer = this._bcBuffer.slice(nl + 1);
                    return null;
                }

                const len = Number(lenStr);
                const start = nl + 1;
                const end = start + len;
                if (this._bcBuffer.length < end) return null;

                const payload = this._bcBuffer.slice(start, end);
                this._bcBuffer = this._bcBuffer.slice(end);
                return payload;
            }

            // Extracts transcript and End-Of-Utterance signals from the v1 backend's JSON stream
            _extractFrameSignalsV1(frameObj) {
                let lastSpeechResults = null, sawEOU = false, sawClose = false, sawNoSpeech = false;

                const walk = (n) => {
                    if (n == null) return;
                    if (typeof n === "string") {
                        if (n === "close") sawClose = true;
                        if (n.includes("END_OF_UTTERANCE")) sawEOU = true;
                        return;
                    }
                    if (Array.isArray(n)) return void n.forEach(walk);
                    if (typeof n === "object") {
                        if (n.eventType === "END_OF_UTTERANCE") sawEOU = true;
                        if (n.noSpeech === true) sawNoSpeech = true;
                        if (Array.isArray(n.speechResults) && n.speechResults.length > 0) lastSpeechResults = n.speechResults;
                        for (const k of Object.keys(n)) {
                            if (k !== "speechResults" && k !== "transcript" && k !== "stability") walk(n[k]);
                        }
                    }
                };
                walk(frameObj);

                const STABILITY_THRESHOLD = 0.5;
                let highParts = [], lowParts = [], bestStability = null;

                if (lastSpeechResults) {
                    for (const sr of lastSpeechResults) {
                        if (sr.noSpeech === true) sawNoSpeech = true;
                        if (typeof sr.transcript === "string") {
                            const s = typeof sr.stability === "number" ? sr.stability : 0;
                            if (bestStability === null || s > bestStability) bestStability = s;
                            if (s < STABILITY_THRESHOLD) lowParts.push(sr.transcript);
                            else highParts.push(sr.transcript);
                        }
                    }
                }

                const highText = highParts.join(" ");
                const lowText = lowParts.join(" ");
                const fullText = (highText + (highText && lowText ? " " : "") + lowText).trim();

                return { fullText: fullText || null, highText: highText || null, bestStability, sawEOU, sawClose, sawNoSpeech };
            }

            // Extracts transcript and End-Of-Utterance signals from the v2 backend's binary protobuf stream (encoded in base64 arrays)
            _extractFrameSignalsS3(frameObj) {
                let sawEOU = false, sawClose = false, sawNoSpeech = false;

                if (Array.isArray(frameObj)) {
                    const flat = JSON.stringify(frameObj);
                    if (flat.includes('"close"')) sawClose = true;
                    if (flat.includes('"noop"') && !flat.includes('"__sm__"') && flat.length < 50) {
                        return { fullText: null, highText: null, bestStability: null, sawEOU: false, sawClose: false, sawNoSpeech: false };
                    }
                }

                let protoResponse = null;
                const findProtoData = (arr) => {
                    if (!Array.isArray(arr)) return null;
                    for (const item of arr) {
                        if (typeof item === "string" && item.length > 10 && /^[A-Za-z0-9+/=]+$/.test(item)) {
                            try {
                                const binary = atob(item);
                                const bytes = new Uint8Array(binary.length);
                                for (let i = 0; i < binary.length; i++) bytes[i] = binary.charCodeAt(i);
                                const decoded = decodeStreamingResponse(bytes);
                                if (decoded.results.length > 0 || decoded.speechEventType > 0) return decoded;
                            } catch { }
                        }
                        if (Array.isArray(item)) {
                            const found = findProtoData(item);
                            if (found) return found;
                        }
                    }
                    return null;
                };
                protoResponse = findProtoData(frameObj);

                if (!protoResponse) return { fullText: null, highText: null, bestStability: null, sawEOU, sawClose, sawNoSpeech };
                if (protoResponse.speechEventType === 1) sawEOU = true;

                const STABILITY_THRESHOLD = 0.5;
                let highParts = [], lowParts = [], bestStability = null;

                for (const result of protoResponse.results) {
                    if (result.isFinal) sawEOU = true;
                    const s = typeof result.stability === "number" ? result.stability : (result.isFinal ? 1 : 0);
                    if (bestStability === null || s > bestStability) bestStability = s;

                    for (const alt of result.alternatives) {
                        if (!alt.transcript) continue;
                        if (s < STABILITY_THRESHOLD) lowParts.push(alt.transcript);
                        else highParts.push(alt.transcript);
                    }
                    if (result.alternatives.length === 0) sawNoSpeech = true;
                }

                const highText = highParts.join("");
                const lowText = lowParts.join("");
                const fullText = (highText + (highText && lowText ? " " : "") + lowText).trim();

                return { fullText: fullText || null, highText: highText || null, bestStability, sawEOU, sawClose, sawNoSpeech };
            }

            _extractFrameSignals(frameObj) {
                return ACTIVE_BACKEND.name === "v1"
                    ? this._extractFrameSignalsV1(frameObj)
                    : this._extractFrameSignalsS3(frameObj);
            }

            // Evaluates an incoming interim transcript to determine if it should become the final settled result based on stability and length.
            _considerFinalCandidate(transcript, stability) {
                const t = this._norm(transcript);
                if (!t) return;

                if (t.length < 6 && /[.?!]$/.test(t)) return;

                const s = typeof stability === "number" ? stability : 0;
                const bestLen = this._bestFinalCandidate ? this._bestFinalCandidate.length : 0;
                if (!this._bestFinalCandidate || s > this._bestFinalStability || (s === this._bestFinalStability && t.length >= bestLen)) {
                    this._bestFinalCandidate = t;
                    this._bestFinalStability = s;
                }
            }

            // Commits the best current transcript as a final result and emits the 'result' event.
            _finalizeCurrentUtteranceOnce() {
                if (this._finalizedThisUtterance) return;

                const finalText = this._bestFinalCandidate || this._norm(this._latestInterimTranscript);
                if (!finalText) return;

                const finalStability = this._bestFinalStability >= 0 ? this._bestFinalStability : (this._latestInterimStability ?? 0.99);

                this._dbg("finalizeOnce", {
                    pending: this._pendingFinal,
                    finalized: this._finalizedThisUtterance,
                    best: this._bestFinalCandidate,
                    latest: this._latestInterimTranscript
                });

                this._emitResult(finalText, finalStability, true);
                this._lastFinalTranscript = finalText;
                this._finalizedThisUtterance = true;
                this._lastEmittedInterimTranscript = null;
                this._lastEmittedUtteranceId = -1;
            }

            // Continuously reads the active streaming response connection (backchannel) for incoming speech recognition results.
            async _consumeBackchannel(bcRes, gen, startId) {
                const reader = bcRes.body.getReader();
                const decoder = new TextDecoder();
                this._bcBuffer = "";

                while (!this._aborting) {
                    if (gen !== this._activeBackchannelGen || startId !== this._lastStartId) return;
                    const { done, value } = await reader.read();
                    if (done) break;

                    if (gen !== this._activeBackchannelGen || startId !== this._lastStartId) return;
                    this._bcBuffer += decoder.decode(value, { stream: true });

                    while (!this._aborting) {
                        if (gen !== this._activeBackchannelGen || startId !== this._lastStartId) return;
                        const payload = this._readFrameFromBuffer();
                        if (payload == null) break;

                        let frameObj;
                        try { frameObj = JSON.parse(payload); } catch { continue; }

                        this._dbg("raw frame", payload.length > 500 ? payload.substring(0, 500) + "..." : payload);

                        if (payload.includes("API_KEY_INVALID") || payload.includes("SERVICE_DISABLED")) {
                            this._dbg("API exhaustion detected in payload, rotating API key");
                            rotateApiKey();
                            if (apiKeyInvalidCount >= API_KEYS.length) {
                                const errorMsg = `<strong>🎙️ Speech Recognition Userscript</strong><br><br><strong>API Key Exhausted</strong><br>Unfortunately, none of the keys for <code>${ACTIVE_BACKEND.name}</code> worked.<br><br>The keys have likely been redistributed or no longer work with this service (SERVICE_DISABLED). You would need to supply new API keys by navigating google domains and extracting the keys from network traffic.`;
                                showPolyfillNotification(errorMsg);
                                polyfillPermanentlyFailed = true;
                                this._dispatchEvent("error", new SpeechRecognitionErrorEvent("error", { error: "not-allowed", message: "API keys exhausted" }));
                                this._cleanup("api keys exhausted");
                                return;
                            } else {
                                this._dbg("API auth failed during backchannel, silently restarting session with next key");
                                await this._restartSession(true);
                                return;
                            }
                        }

                        const { fullText, highText, bestStability, sawEOU, sawClose, sawNoSpeech } = this._extractFrameSignals(frameObj);

                        this._dbg("frame", {
                            backend: ACTIVE_BACKEND.name,
                            gen, activeGen: this._activeBackchannelGen,
                            startId, activeStart: this._lastStartId,
                            sawEOU, sawClose, fullText, bestStability
                        });

                        const hasMeaningfulText = !!fullText;
                        const hasBoundarySignal = sawEOU || sawClose || sawNoSpeech;
                        if (hasMeaningfulText || hasBoundarySignal) {
                            this._lastMeaningfulFrameTs = Date.now();
                            this._noopFrameStreak = 0;
                        } else {
                            this._noopFrameStreak++;
                        }

                        if (
                            ACTIVE_BACKEND.name === "v2" &&
                            !this._aborting &&
                            !this._stopRequested &&
                            this.continuous &&
                            this._currentSid &&
                            this._noopFrameStreak > 20 &&
                            (this._sendQueue.length > 0 || this._sendingChunks)
                        ) {
                            this._dbg("noop-stall detected; forcing restart");
                            await this._restartSession();
                            return;
                        }

                        if (sawNoSpeech) {
                            this._dispatchEvent("nomatch");
                            this._bcDone = true;
                            this._cleanup("no speech");
                            return;
                        }

                        if (sawEOU) {
                            this._pendingFinal = true;
                            if (fullText) {
                                this._considerFinalCandidate(fullText, bestStability);
                                this._latestInterimTranscript = fullText;

                                // Emit interim result immediately like v1.js does so UX feels fast
                                if (this.interimResults && !this._finalizedThisUtterance) {
                                    if (fullText !== this._lastEmittedInterimTranscript || this._currentUtteranceId !== this._lastEmittedUtteranceId) {
                                        this._lastEmittedInterimTranscript = fullText;
                                        this._lastEmittedUtteranceId = this._currentUtteranceId;
                                        this._emitResult(fullText, bestStability ?? 0.01, false);
                                    }
                                }
                            }
                            if (!this._speechendFired) {
                                this._speechendFired = true;
                                this._dispatchEvent("speechend");
                            }
                        } else if (this._pendingFinal) {
                            if (fullText) {
                                this._considerFinalCandidate(fullText, bestStability);
                                this._latestInterimTranscript = fullText;
                                if (this.interimResults) {
                                    this._lastEmittedInterimTranscript = fullText;
                                    this._lastEmittedUtteranceId = this._currentUtteranceId;
                                    this._emitResult(fullText, bestStability ?? 0.01, false);
                                }
                            } else {
                                this._finalizeCurrentUtteranceOnce();
                                this._pendingFinal = false;
                                this._finalizedThisUtterance = false;
                                this._bestFinalCandidate = null;
                                this._bestFinalStability = -1;
                                this._currentUtteranceId++;
                                this._lastEmittedInterimTranscript = null;
                                this._latestInterimTranscript = null;
                                this._latestInterimStability = null;
                                this._speechendFired = false;

                                if (!this.continuous || this._stopRequested) {
                                    this._dbg("ending session after final result (stop requested or non-continuous)");
                                    this._cleanup("post-final end");
                                    return;
                                }
                            }
                            // REPLACE WITH THIS EXACT BLOCK:

                        } else if (fullText && !sawClose) {
                            this._latestInterimTranscript = fullText;
                            if (highText) this._latestHighStabilityTranscript = highText;
                            if (bestStability !== null) this._latestInterimStability = bestStability;
                            this._considerFinalCandidate(fullText, bestStability);

                            if (this.interimResults) {
                                this._lastEmittedInterimTranscript = fullText;
                                this._lastEmittedUtteranceId = this._currentUtteranceId;
                                this._emitResult(fullText, bestStability ?? 0.01, false);
                            }

                            // v2 sometimes never sends explicit EOU/final.
                            // If user has gone silent and we already have interim text, force-finalize shortly.
                            if (ACTIVE_BACKEND.name === "v2") {
                                if (this._forcedFinalizeTimer) {
                                    clearTimeout(this._forcedFinalizeTimer);
                                    this._forcedFinalizeTimer = null;
                                }

                                if (!this._isVadSpeaking && !this._pendingFinal && !this._finalizedThisUtterance) {
                                    this._forcedFinalizeTimer = setTimeout(() => {
                                        if (this._aborting || !this._sessionActive) return;
                                        if (this._pendingFinal || this._finalizedThisUtterance) return;
                                        if (this._isVadSpeaking) return;
                                        if (!this._latestInterimTranscript) return;

                                        this._considerFinalCandidate(
                                            this._latestInterimTranscript,
                                            this._latestInterimStability ?? 0.99
                                        );
                                        this._finalizeCurrentUtteranceOnce();

                                        this._bestFinalCandidate = null;
                                        this._bestFinalStability = -1;
                                        this._currentUtteranceId++;
                                        this._lastEmittedInterimTranscript = null;
                                        this._latestInterimTranscript = null;
                                        this._latestInterimStability = null;
                                        this._finalizedThisUtterance = false;
                                        this._speechendFired = false;
                                    }, 700);
                                }
                            }
                        } else if (!fullText && !sawEOU && this._latestInterimTranscript) {
                            // Null-text without EOU = utterance boundary in continuous mode
                            this._finalizeCurrentUtteranceOnce();
                            this._bestFinalCandidate = null;
                            this._bestFinalStability = -1;
                            this._currentUtteranceId++;
                            this._lastEmittedInterimTranscript = null;
                            this._latestInterimTranscript = null;
                            this._latestInterimStability = null;
                            this._finalizedThisUtterance = false;
                            this._dbg("utterance boundary (no EOU), ready for next");

                            if (!this.continuous || this._stopRequested) {
                                this._dbg("ending session after final result (stop requested or non-continuous)");
                                this._cleanup("utterance boundary end");
                                return;
                            }
                        }

                        if (sawClose) {
                            if (!this._finalizedThisUtterance) this._finalizeCurrentUtteranceOnce();
                            this._bcDone = true;

                            if (this.continuous && !this._aborting && !this._stopRequested) await this._restartSession();
                            else this._cleanup("server close");
                            return;
                        }
                    }
                }

                if (this._pendingFinal) this._finalizeCurrentUtteranceOnce();

                this._bcDone = true;
                if (this.continuous && !this._aborting && !this._cleanupCalled && !this._stopRequested) {
                    if (this._latestInterimTranscript && !this._pendingFinal) {
                        this._considerFinalCandidate(this._latestInterimTranscript, this._latestInterimStability ?? 0.99);
                        this._finalizeCurrentUtteranceOnce();
                    }
                    this._dbg("backchannel ended naturally, restarting");
                    await this._restartSession();
                } else if (!this._aborting) {
                    if (this._latestInterimTranscript) this._finalizeCurrentUtteranceOnce();
                    this._cleanup("backchannel end cleanup");
                }
            }

            // Starts the speech recognition process: claims the microphone, connects audio graphs, and negotiates the server session.
            async start() {
                if (polyfillPermanentlyFailed) {
                    this._dbg("start() rejected: polyfill permanently failed");
                    return;
                }
                if (this._sessionActive && !this._aborting) throw new Error("Already started");
                this._sessionActive = true;

                this._vadSilenceFrames = 0;
                this._isVadSpeaking = true;

                if (this._micIdleTimer) {
                    clearTimeout(this._micIdleTimer);
                    this._micIdleTimer = null;
                }

                this._lastStartId++;
                this._sessionGen++;
                this._activeBackchannelGen = this._sessionGen;

                this._dbg("start", {
                    backend: ACTIVE_BACKEND.name,
                    startId: this._lastStartId,
                    sessionGen: this._sessionGen,
                    continuous: this.continuous
                });

                this._aborting = false;
                this._cleanupCalled = false;
                this._switchingSession = false;
                this._bcDone = false;
                this._stopRequested = false;

                this._speechendFired = false;
                this._pendingFinal = false;
                this._finalizedThisUtterance = false;
                this._bestFinalCandidate = null;
                this._bestFinalStability = -1;

                this._latestInterimTranscript = null;
                this._latestInterimStability = null;
                this._lastEmittedInterimTranscript = null;
                this._lastFinalTranscript = null;
                this._finalResults = [];
                this._currentUtteranceId = 0;
                this._lastEmittedUtteranceId = -1;

                this._preSessionBuffer = [];
                this._sendQueue = [];
                this._sendingChunks = false;
                this._consecutiveChunkFailures = 0;

                this._abortController = new AbortController();

                try {
                    if (!this._stream) {
                        if (!navigator.mediaDevices || !navigator.mediaDevices.getUserMedia) {
                            throw new Error("getUserMedia not supported (requires HTTPS)");
                        }

                        this._stream = await navigator.mediaDevices.getUserMedia({
                            audio: {
                                echoCancellation: true,
                                noiseSuppression: true,
                                autoGainControl: true
                            }
                        });
                        const AudioContext = window.AudioContext || window.webkitAudioContext;
                        if (!AudioContext) throw new Error("AudioContext not supported");
                        this._audioCtx = new AudioContext();

                        this._dummyAudio = new Audio();
                        this._dummyAudio.muted = true;
                        this._dummyAudio.srcObject = this._stream;
                        try { this._dummyAudio.play()?.catch?.(() => { }); } catch { }

                        const source = this._audioCtx.createMediaStreamSource(this._stream);

                        // --- NEW GAIN NODE (Enhanced Recognition) ---
                        this._gainNode = this._audioCtx.createGain();
                        // Google Speech works better with slightly amplified volume but not clipped.
                        // We also added noise suppression / echo cancellation constraints.
                        this._gainNode.gain.value = 1.25;
                        source.connect(this._gainNode);

                        // Destination for MediaRecorder (v2)
                        this._destinationNode = this._audioCtx.createMediaStreamDestination();
                        this._gainNode.connect(this._destinationNode);

                        this._processor = this._audioCtx.createScriptProcessor(8192, 1, 1);
                        this._gainNode.connect(this._processor);
                        this._processor.connect(this._audioCtx.destination);

                        if (this._audioCtx.state === "suspended") await this._audioCtx.resume();

                        this._processor.onaudioprocess = (e) => {
                            if (!this._sessionActive || this._aborting) return;

                            const float32 = e.inputBuffer.getChannelData(0);
                            let sumSquares = 0;
                            for (let i = 0; i < float32.length; i++) sumSquares += float32[i] ** 2;
                            const rms = Math.sqrt(sumSquares / float32.length);
                            const isSpeech = rms >= 0.01;
                            if (isSpeech) {
                                this._vadSilenceFrames = 0;
                                this._isVadSpeaking = true;
                            } else {
                                this._vadSilenceFrames++;
                            }

                            // If non-continuous dictation has an active transcript but trailing silence is detected (~2.5s), auto-stop.
                            if (!this.continuous && this._latestInterimTranscript && this._vadSilenceFrames > 15) {
                                this._dbg("VAD auto-endpointed non-continuous utterance");
                                this.stop();
                                return;
                            }

                            // The v1 backend accepts raw 16-bit PCM Audio generated via the ScriptProcessor
                            if (ACTIVE_BACKEND.name === "v1") {
                                if (this._aborting || this._cleanupCalled || this._switchingSession || this._bcDone) return;

                                // Keep sending a short tail of silence so the server can correctly endpoint/finalize
                                // ~8192 samples per frame; at 48kHz that's ~170ms/frame (~2 seconds max tail = 12 frames)
                                const shouldSend = isSpeech || (this._isVadSpeaking && this._vadSilenceFrames <= 12);
                                if (!shouldSend) {
                                    this._isVadSpeaking = false;
                                    return;
                                }

                                const originalSampleRate = this._audioCtx.sampleRate;
                                if (!originalSampleRate) return;

                                const ratio = originalSampleRate / 16000;
                                const targetLength = Math.round(float32.length / ratio);
                                const int16 = new Int16Array(targetLength);

                                for (let i = 0; i < targetLength; i++) {
                                    const srcIndex = Math.min(Math.floor(i * ratio), float32.length - 1);
                                    int16[i] = Math.max(-1, Math.min(1, float32[srcIndex])) * 0x7fff;
                                }

                                const uint8 = new Uint8Array(int16.buffer);
                                let binary = "";
                                for (let i = 0; i < uint8.length; i += 8192) {
                                    binary += String.fromCharCode(...uint8.subarray(i, i + 8192));
                                }
                                this._enqueueChunk(btoa(binary));
                            }
                        };
                    }

                    if (ACTIVE_BACKEND.name === "v2") this._setupMediaRecorder();

                    this._dispatchEvent("start");
                    this._dispatchEvent("audiostart");

                    if (!preSession) await warmSession();

                    if (ACTIVE_BACKEND.name === "v2" && this._oggHeader) {
                        const headerProto = buildAudioChunkProto(this._oggHeader);
                        this._preSessionBuffer.unshift(uint8ToBase64(headerProto));
                    }

                    await this._setupSession(preSession);
                } catch (err) {
                    this._handleError("network", err?.message || "Unknown network error");
                }
            }

            // The v2 backend requires OGG/WebM Opus chunks. We use MediaRecorder exclusively for v2.
            _setupMediaRecorder() {
                if (ACTIVE_BACKEND.name !== "v2") return;
                if (this._recorder) {
                    if (this._recorder.state === "paused") {
                        try { this._recorder.resume(); } catch { }
                    }
                    return;
                }

                const mimeType = MediaRecorder.isTypeSupported("audio/ogg;codecs=opus")
                    ? "audio/ogg;codecs=opus"
                    : MediaRecorder.isTypeSupported("audio/webm;codecs=opus")
                        ? "audio/webm;codecs=opus"
                        : "audio/webm";

                const streamToRecord = this._destinationNode ? this._destinationNode.stream : this._stream;
                const recorder = new MediaRecorder(streamToRecord, { mimeType, audioBitsPerSecond: 32000 });
                this._recorder = recorder;

                recorder.ondataavailable = async (e) => {
                    if (!this._sessionActive || this._aborting) return;
                    if (this._recorder !== recorder) return;
                    if (!e.data || e.data.size === 0) return;

                    const audioBytes = new Uint8Array(await e.data.arrayBuffer());

                    if (!this._oggHeader) {
                        this._oggHeader = audioBytes.slice();
                        this._dbg("saved Ogg header", this._oggHeader.length, "bytes");
                    } else if (this._oggHeader === audioBytes) {
                        // ignore if same reference somehow
                    }

                    if (!this._isVadSpeaking && !this._pendingFinal) return;

                    const audioProto = buildAudioChunkProto(audioBytes);
                    this._enqueueChunk(uint8ToBase64(audioProto));
                };

                recorder.onerror = (e) => this._dbg("MediaRecorder error", e.error?.name || e);
                recorder.start(160);
            }

            // Wires up the backchannel (download stream) and initial configuration payloads (upload stream) for a new session.
            async _setupSession(initialSession = null) {
                try {
                    let session = initialSession;
                    if (!session) {
                        if (preSessionPromise) await preSessionPromise;
                        session = preSession || (await createSession());
                    }
                    preSession = null;

                    const { sid, gsessionid } = session;
                    let { ridCounter } = session;

                    const backchannelUrl =
                        `${getBaseUrl()}?` +
                        (gsessionid ? `gsessionid=${gsessionid}&` : "") +
                        `VER=8&RID=rpc&SID=${sid}&AID=0&CI=0&TYPE=xmlhttp&zx=${Date.now()}&t=1`;

                    const myGen = ++this._sessionGen;
                    this._activeBackchannelGen = myGen;
                    const myStartId = this._lastStartId;

                    this._dbg("open backchannel", { backend: ACTIVE_BACKEND.name, myGen, myStartId, sid });

                    this._lastMeaningfulFrameTs = Date.now();
                    this._noopFrameStreak = 0;

                    fetch(backchannelUrl, {
                        ...getFetchOpts(),
                        method: "GET",
                        headers: { ...getHeaders(), "content-type": undefined },
                        signal: this._abortController.signal
                    })
                        .then(async (bcRes) => {
                            if (myGen !== this._activeBackchannelGen || myStartId !== this._lastStartId) return;
                            await this._consumeBackchannel(bcRes, myGen, myStartId);
                        })
                        .catch((e) => {
                            if (myGen !== this._activeBackchannelGen || myStartId !== this._lastStartId) return;
                            if (e.name !== "AbortError") {
                                if (this.continuous && !this._stopRequested) {
                                    this._dbg("backchannel network error in continuous mode, soft-restarting", e.name, e.message);
                                    this._restartSession();
                                } else {
                                    this._handleError("network", e.message);
                                }
                            }
                        });

                    const configRid = ridCounter++;
                    const configUrl =
                        `${getBaseUrl()}?VER=8` +
                        (gsessionid ? `&gsessionid=${gsessionid}` : "") +
                        `&SID=${sid}&RID=${configRid}&AID=0&zx=${Date.now()}&t=1`;

                    if (ACTIVE_BACKEND.name === "v1") {
                        const assistConfig = {
                            config: {
                                dialogStateIn: { languageCode: this.lang },
                                deviceConfig: { deviceId: "example", deviceModelId: "example" },
                                audioInConfig: { encoding: "LINEAR16", sampleRateHertz: 16000 },
                                audioOutConfig: { encoding: "MP3", sampleRateHertz: 22050, volumePercentage: 0 },
                                requestType: 4
                            }
                        };
                        const configPayload = `count=1&ofs=0&req0___data__=${encodeURIComponent(JSON.stringify(assistConfig))}`;
                        fetch(configUrl, { ...getFetchOpts(), method: "POST", headers: getHeaders(), body: configPayload });
                    } else {
                        const configProto = buildStreamingConfigProto(this.lang, this.interimResults);
                        const configB64 = uint8ToBase64(configProto);

                        this._dbg("config proto b64", configB64);
                        this._dbg("api key", getApiKey());
                        this._dbg("browser validation", browserValidation || "(none)");
                        this._dbg("session headers", JSON.stringify(Object.keys(getSessionHeaders())));
                        this._dbg("data headers", JSON.stringify(Object.keys(getHeaders())));

                        const configPayload = `count=1&ofs=0&req0___data__=${encodeURIComponent(configB64)}`;
                        fetch(configUrl, { ...getFetchOpts(), method: "POST", headers: getHeaders(), body: configPayload });
                    }

                    this._currentSid = sid;
                    this._currentGsessionid = gsessionid;
                    this._currentRidCounter = ridCounter;
                    this._currentOfs = 1;

                    if (ACTIVE_BACKEND.name === "v2" && this._recorder && this._recorder.state === "paused") {
                        try { this._recorder.resume(); } catch { }
                    }

                    if (this._preSessionBuffer.length > 0) {
                        this._dbg("flushing pre-session buffer", { chunks: this._preSessionBuffer.length });
                        this._sendQueue.push(...this._preSessionBuffer);
                        this._preSessionBuffer = [];
                        if (!this._sendingChunks) this._drainChunkQueue();
                    }
                } catch (err) {
                    this._handleError("network", err.message);
                }
            }

            // Adds a base64 encoded audio chunk to the send queue and triggers draining if not already active.
            _enqueueChunk(audioBase64) {
                if (this._aborting || this._cleanupCalled) return;

                if (!this._currentSid) {
                    this._preSessionBuffer.push(audioBase64);
                    this._dbg("buffered pre-session chunk", { buffered: this._preSessionBuffer.length });
                    return;
                }

                this._sendQueue.push(audioBase64);
                if (!this._sendingChunks) this._drainChunkQueue();
            }

            // Sequentially uploads all queued audio chunks to the server via POST requests, handling retries and failures.
            async _drainChunkQueue() {
                if (this._sendingChunks) return;
                this._sendingChunks = true;

                try {
                    while (this._sendQueue.length && !this._aborting && !this._cleanupCalled && !this._switchingSession) {
                        if (!this._currentSid || !this._abortController) break;

                        if (
                            ACTIVE_BACKEND.name === "v2" &&
                            !this._isVadSpeaking &&
                            !this._pendingFinal &&
                            this._sendQueue.length > 2
                        ) {
                            this._sendQueue.length = 0;
                            break;
                        }

                        const audioBase64 = this._sendQueue.shift();

                        const chunkRid = this._currentRidCounter++;
                        const cSid = this._currentSid;
                        const cGsessionid = this._currentGsessionid;
                        const cOfs = this._currentOfs++;

                        const chunkUrl =
                            `${getBaseUrl()}?VER=8` +
                            (cGsessionid ? `&gsessionid=${cGsessionid}` : "") +
                            `&SID=${cSid}&RID=${chunkRid}&AID=0&zx=${Date.now()}&t=1`;

                        const chunkPayload = ACTIVE_BACKEND.name === "v1"
                            ? `count=1&ofs=${cOfs}&req0___data__=${encodeURIComponent(JSON.stringify({ audioIn: audioBase64 }))}`
                            : `count=1&ofs=${cOfs}&req0___data__=${encodeURIComponent(audioBase64)}`;

                        try {
                            const res = await fetch(chunkUrl, {
                                ...getFetchOpts(),
                                method: "POST",
                                headers: getHeaders(),
                                body: chunkPayload,
                                signal: this._abortController.signal
                            });

                            if (!res.ok) {
                                this._consecutiveChunkFailures++;
                                if (DEV_MODE) console.warn("[polyfill] chunk non-ok:", res.status);

                                if (this._consecutiveChunkFailures >= this._maxConsecutiveChunkFailures) {
                                    if (DEV_MODE) console.warn("[polyfill] too many chunk failures, soft-restarting session");
                                    await this._restartSession();
                                    this._consecutiveChunkFailures = 0;
                                }
                            } else {
                                this._consecutiveChunkFailures = 0;
                            }
                        } catch (err) {
                            if (err.name === "AbortError") break;

                            this._consecutiveChunkFailures++;
                            if (DEV_MODE) console.warn("[polyfill] chunk send error:", err.message);

                            if (this._consecutiveChunkFailures >= this._maxConsecutiveChunkFailures) {
                                if (DEV_MODE) console.warn("[polyfill] too many chunk exceptions, soft-restarting session");
                                await this._restartSession();
                                this._consecutiveChunkFailures = 0;
                            }
                        }
                    }
                } finally {
                    this._sendingChunks = false;
                }
            }

            // Soft-restarts the recording session without resetting completely to zero. 
            // In Continuous mode, the Google backend will close the stream after a period of time. 
            // This function creates a new connection but seamlessly carries over any 
            // buffered audio chunks so the user doesn't experience "interrupted" dictation.
            // To debug you can call window._polyfillSR.restartSession() from the console mid or end of a speech segment.
            async _restartSession(overrideContinuous = false) {
                if (polyfillPermanentlyFailed) return;
                if ((!this.continuous && !overrideContinuous) || this._aborting || this._cleanupCalled) return;
                if (this._restartPromise) return this._restartPromise;

                this._dbg("restart requested", {
                    backend: ACTIVE_BACKEND.name,
                    switching: this._switchingSession,
                    hasRestartPromise: !!this._restartPromise,
                    bcDone: this._bcDone
                });

                this._restartPromise = (async () => {
                    if (this._abortController) this._abortController.abort();
                    this._abortController = new AbortController();
                    this._switchingSession = true;

                    // Finalize any pending text before wiping state so it doesn't get lost
                    if (this._latestInterimTranscript && this._norm(this._latestInterimTranscript) !== this._lastFinalTranscript) {
                        this._considerFinalCandidate(this._latestInterimTranscript, this._latestInterimStability ?? 0.99);
                        this._finalizeCurrentUtteranceOnce();
                    } else if (this._pendingFinal) {
                        this._finalizeCurrentUtteranceOnce();
                    }

                    this._bcDone = false;
                    this._speechendFired = false;
                    this._pendingFinal = false;
                    this._finalizedThisUtterance = false;
                    this._bestFinalCandidate = null;
                    this._bestFinalStability = -1;

                    this._lastEmittedInterimTranscript = null;
                    this._latestInterimTranscript = null;
                    this._latestInterimStability = null;
                    this._currentUtteranceId++;
                    this._lastEmittedUtteranceId = -1;

                    const carryOver = ACTIVE_BACKEND.name === "v2" ? [] : [...this._sendQueue, ...this._preSessionBuffer];
                    this._sendQueue = [];
                    this._sendingChunks = false;
                    this._consecutiveChunkFailures = 0;

                    this._vadSilenceFrames = 0;
                    this._isVadSpeaking = true;

                    this._currentSid = null;
                    this._currentGsessionid = null;

                    this._preSessionBuffer = [];

                    if (ACTIVE_BACKEND.name === "v2") {
                        if (this._oggHeader) {
                            const headerProto = buildAudioChunkProto(this._oggHeader);
                            this._preSessionBuffer.unshift(uint8ToBase64(headerProto));
                        }
                    } else {
                        this._preSessionBuffer.push(...carryOver);
                    }

                    this._dbg("queued carry-over for restart", {
                        backend: ACTIVE_BACKEND.name,
                        carriedChunks: carryOver.length,
                        totalBuffered: this._preSessionBuffer.length
                    });

                    try {
                        preSession = null;
                        preSessionPromise = null;
                        const session = await createSession();
                        if (!session) throw new Error("Failed to create session");

                        await this._setupSession(session);

                        this._lastMeaningfulFrameTs = Date.now();
                        this._noopFrameStreak = 0;
                        this._switchingSession = false;
                    } catch (err) {
                        this._switchingSession = false;
                        this._handleError("network", err.message);
                    }
                })().finally(() => {
                    this._restartPromise = null;
                });

                return this._restartPromise;
            }

            // Requests the speech recognition to stop listening. It waits for pending server results to finalize before fully shutting down.
            stop() {
                if (this._aborting || !this._sessionActive || this._stopRequested) return;
                this._dbg("stop() called");

                // Soft Stop: leave audio pipeline intact but disable session and VAD
                this._isVadSpeaking = false;
                this._stopRequested = true;

                // For continuous: false requests, if the server has signaled an End-Of-Utterance but we are 
                // waiting for the final text refinement, we MUST let the backchannel finish naturally instead of killing it.
                if (this._pendingFinal) {
                    this._dbg("stop(): Pending final result exists. Waiting for server refinement.");
                    return;
                }

                if (ACTIVE_BACKEND.name === "v2" && this._recorder && this._recorder.state === "recording") {
                    try { this._recorder.pause(); } catch { }
                }

                if (this._latestInterimTranscript && this._norm(this._latestInterimTranscript) !== this._lastFinalTranscript) {
                    this._considerFinalCandidate(this._latestInterimTranscript, this._latestInterimStability ?? 0.99);
                    this._finalizeCurrentUtteranceOnce();
                }

                if (this._abortController) this._abortController.abort();
                if (!this.continuous && this._latestInterimTranscript) this._suppressEndOnce = true;
                this._cleanup("stop() called");
            }

            // Immediately aborts the speech recognition session without waiting for final results from the server.
            abort() {
                if (this._aborting || !this._sessionActive) return;
                this._aborting = true;
                if (this._abortController) this._abortController.abort();
                this._cleanup("abort() called");
            }

            // add this helper on the prototype (near other public methods like stop/abort)
            async restartSession() {
                // restart only makes sense when a session is active
                if (!this._sessionActive) {
                    // if not started yet, just start
                    return this.start();
                }
                // restarts are only wired for continuous mode; enforce it
                this.continuous = true;

                // trigger the internal async restart
                return this._restartSession();
            }

            // Cleans up runtime state, resets variables, and emits end events after a session terminates.
            _cleanup(reason = "unknown") {
                if (!this._sessionActive) return;
                this._sessionActive = false;
                this._dbg("CLEANUP called, reason:", reason);

                this._dispatchEvent("audioend");
                if (!this._suppressEndOnce) this._dispatchEvent("end");
                else this._suppressEndOnce = false;

                this._aborting = false;
                this._switchingSession = false;
                this._bcDone = false;
                this._stopRequested = false;

                this._speechendFired = false;
                this._pendingFinal = false;
                this._finalizedThisUtterance = false;
                this._bestFinalCandidate = null;
                this._bestFinalStability = -1;

                this._latestInterimTranscript = null;
                this._latestInterimStability = null;
                this._lastEmittedInterimTranscript = null;
                this._lastFinalTranscript = null;

                this._currentUtteranceId = 0;
                this._lastEmittedUtteranceId = -1;

                this._bcBuffer = "";

                this._currentSid = null;
                this._currentGsessionid = null;
                this._currentRidCounter = 0;
                this._currentOfs = 1;

                this._preSessionBuffer = [];
                this._sendQueue = [];
                this._sendingChunks = false;
                this._consecutiveChunkFailures = 0;

                if (this._micIdleTimer) clearTimeout(this._micIdleTimer);
                this._micIdleTimer = setTimeout(() => this._cleanupMic(), MIC_IDLE_TIMEOUT_MS);
            }

            // Hard shutdown of all local hardware audio resources (Microphone tracks, AudioContext, MediaRecorder).
            _cleanupMic() {
                this._dbg("TEARDOWN MIC hardware");
                if (this._processor) {
                    try { this._processor.onaudioprocess = null; } catch { }
                    try { this._processor.disconnect(); } catch { }
                    this._processor = null;
                }

                if (this._recorder) {
                    try { if (this._recorder.state !== "inactive") this._recorder.stop(); } catch { }
                    this._recorder = null;
                }

                if (this._dummyAudio) {
                    try { this._dummyAudio.pause(); } catch { }
                    this._dummyAudio.srcObject = null;
                    this._dummyAudio = null;
                }

                if (this._stream) {
                    this._stream.getTracks().forEach((t) => t.stop());
                    this._stream = null;
                }

                if (this._gainNode) { try { this._gainNode.disconnect(); } catch { } this._gainNode = null; }
                if (this._destinationNode) { try { this._destinationNode.disconnect(); } catch { } this._destinationNode = null; }

                if (this._audioCtx && this._audioCtx.state !== "closed") {
                    try { this._audioCtx.close(); } catch { }
                }
                this._audioCtx = null;
                this._oggHeader = null;
            }

            // Packages transcript text into standard SpeechRecognitionResult objects and dispatches the 'result' event to the user.
            _emitResult(transcript, stability, isFinal) {
                // guard only duplicate finals
                if (isFinal && transcript && transcript === this._lastFinalTranscript) return;

                // strip punctuation on interim (v2 behavior)
                if (!isFinal && transcript) {
                    transcript = transcript.replace(/[.,?!;:¿¡]/g, "");
                }

                // prepend space for utterances after first final
                if (transcript && this._currentUtteranceId > 0 && this._finalResults.length > 0) {
                    transcript = " " + transcript;
                }

                this._dbg("emit", { transcript, isFinal, utt: this._currentUtteranceId });

                const confidence = isFinal ? Math.max(stability ?? 0, 0.9) : (stability ?? 0);
                const alt = new SpeechRecognitionAlternative(transcript, confidence);
                const res = new SpeechRecognitionResult([alt], isFinal);

                const currentResults = [];
                for (let i = 0; i < this._finalResults.length; i++) currentResults.push(this._finalResults[i]);
                if (transcript) currentResults.push(res);

                const event = new SpeechRecognitionEvent("result", {
                    resultIndex: this._finalResults.length,
                    results: new SpeechRecognitionResultList(currentResults)
                });

                this._dispatchEvent("result", event);

                if (isFinal && transcript) this._finalResults.push(res);
            }

            // Dispatches an error event and initiates cleanup of the current session.
            _handleError(errorType, message) {
                this._dbg(`handling error: [${errorType}]`, message);
                const ev = new SpeechRecognitionErrorEvent("error", { error: errorType, message });
                this._dispatchEvent("error", ev);
                this._cleanup(`error: ${errorType}`);
            }

            getDebugState() {
                return {
                    backend: ACTIVE_BACKEND.name,
                    sessionActive: this._sessionActive,
                    aborting: this._aborting,
                    stopRequested: this._stopRequested,
                    continuous: this.continuous,
                    vadSpeaking: this._isVadSpeaking,
                    preSessionBufferLength: this._preSessionBuffer?.length || 0,
                    sendQueueLength: this._sendQueue?.length || 0,
                    consecutiveChunkFailures: this._consecutiveChunkFailures || 0,
                    noopFrameStreak: this._noopFrameStreak || 0,
                    latestInterimTranscript: this._latestInterimTranscript,
                    lastFinalTranscript: this._lastFinalTranscript || ""
                };
            }
        }

        /**
         * Extended Polyfill Classes.
         */

        class SpeechRecognitionEvent extends Event {
            /**
             * Represents a SpeechRecognitionEvent containing the updated results list.
             */
            constructor(type, eventInitDict) {
                super(type, eventInitDict);
                this.resultIndex = eventInitDict?.resultIndex || 0;
                this.results = eventInitDict?.results || [];
                this.interpretation = eventInitDict?.interpretation || null;
                this.emma = eventInitDict?.emma || null;
            }
        }

        class SpeechRecognitionErrorEvent extends Event {
            /**
             * Represents a SpeechRecognitionErrorEvent containing error details.
             */
            constructor(type, eventInitDict) {
                super(type, eventInitDict);
                this.error = eventInitDict?.error || "unknown";
                this.message = eventInitDict?.message || "";
            }
        }

        class SpeechRecognitionAlternative {
            /**
             * Represents an alternative transcript and its confidence score.
             */
            constructor(transcript, confidence) {
                this.transcript = transcript;
                this.confidence = confidence;
            }
        }

        class SpeechRecognitionResult {
            /**
             * Represents a single recognition result containing an array of alternatives.
             */
            constructor(alternatives, isFinal) {
                this.isFinal = isFinal;
                this.length = alternatives.length;
                for (let i = 0; i < alternatives.length; i++) this[i] = alternatives[i];
            }
            item(index) {
                return this[index];
            }
        }

        class SpeechRecognitionResultList {
            /**
             * Represents the list of results accumulated during the session.
             */
            constructor(results) {
                this.length = results.length;
                for (let i = 0; i < results.length; i++) this[i] = results[i];
            }
            item(index) {
                return this[index];
            }
        }

        class SpeechGrammar {
            /**
             * Dummy implementation of SpeechGrammar for compatibility with certain sites.
             */
            constructor() {
                this.src = "";
                this.weight = 1;
            }
        }

        class SpeechGrammarList {
            /**
             * Dummy implementation of SpeechGrammarList to prevent standard sites from throwing missing object errors.
             */
            constructor() {
                this.length = 0;
            }
            addFromURI() { }
            addFromUri() { }
            addFromString() { }
            item() {
                return null;
            }
        }

        const globals = {
            SpeechRecognition: GoogleWebchannelSpeechRecognition,
            webkitSpeechRecognition: GoogleWebchannelSpeechRecognition,
            SpeechRecognitionEvent,
            webkitSpeechRecognitionEvent: SpeechRecognitionEvent,
            SpeechRecognitionErrorEvent,
            webkitSpeechRecognitionErrorEvent: SpeechRecognitionErrorEvent,
            SpeechGrammar,
            webkitSpeechGrammar: SpeechGrammar,
            SpeechGrammarList,
            webkitSpeechGrammarList: SpeechGrammarList
        };

        for (const [key, val] of Object.entries(globals)) {
            try {
                if (Object.getOwnPropertyDescriptor(window, key)?.configurable) {
                    delete window[key];
                }
            } catch { }

            Object.defineProperty(window, key, {
                get() {
                    return val;
                },
                set() { },
                configurable: true,
                enumerable: true
            });
        }

        if (DEV_MODE) {
            console.log(`💉 Speech Recognition Polyfill has been successfully injected! BACKEND=${ACTIVE_BACKEND.name}, DEV_MODE=${DEV_MODE}`);
        }
    })();

})();