Get extremely fast, free, & accurate server-side multilingual Speech Recognition/Web Speech API on any browser!
Per
// ==UserScript==
// @name Speech Recognition Polyfill Userscript
// @namespace http://tampermonkey.net/
// @version v1.0
// @description Get extremely fast, free, & accurate server-side multilingual Speech Recognition/Web Speech API on any browser!
// @author apersongithubhttps://greasyfork.org/en/scripts/568183-speech-recognition-polyfill-userscript/delete
// @match *://*/*
// @icon https://addons.mozilla.org/user-media/addon_icons/2970/2970008-64.png
// @grant none
// @run-at document-start
// @license MIT
// ==/UserScript==
(function() {
'use strict';
/**
* Google Webchannel SpeechRecognition Polyfill
*
* This script provides a custom implementation of the standard Web Speech API
* (SpeechRecognition) by communicating directly with Google's voice servers.
* It is useful for environments where the native SpeechRecognition API is
* unavailable, broken, or needs to be bypassed.
*
* Key improvements in this version:
*
* 1. Better Response Parsing: Correctly handles Google's proprietary server
* responses (stripping security prefixes and parsing structured data frames).
*
* 2. Organized Audio Sending (Queue): Audio chunks are queued and sent one
* by one. This prevents sending too many requests at the exact same time.
*
* 3. Error Tolerance: Minor network glitches when sending audio chunks won't
* immediately crash the entire transcription process.
*
* 4. Reliable Final Results: Hardened logic for determining when a user is
* finished speaking, ensuring we pick the most accurate text result.
*
* 5. Crash Prevention: Includes safety checks to prevent crashes if asynchronous
* network responses arrive after the microphone has already been turned off.
*
* 6. Fallback APIs: Automatically cycles through multiple backup API keys and
* Google service endpoints (like YouTube's voice search API) if the primary
* connection fails.
*/
(function initGoogleSpeechPolyfill() {
const DEV_MODE = false;
const API_KEYS = [
"AIzaSyBm7NubC-Swn1nt2nhYfxb58eCdmL2vCVU", // default
"AIzaSyBU2xE_JHvB6wag3tMfhxXpg2Q_W8xnM-I", // backup 1
"AIzaSyD6n9asBjvx1yBHfhFhfw_kpS9Faq0BZHM" // backup 2
];
const API_ENDPOINTS = [
{
url: "https://embeddedassistant-webchannel.googleapis.com/google.assistant.embedded.v1.EmbeddedAssistant/Assist/channel",
referrer: "https://www.google.com/"
},
{
url: "https://embeddedassistant-frontend-clients6.youtube.com/google.assistant.embedded.v1.EmbeddedAssistant/YTAssist/channel",
referrer: "https://youtube.com"
}
];
let currentEndpointIndex = 0;
let currentKeyIndex = 0;
const getBaseUrl = () => API_ENDPOINTS[currentEndpointIndex].url;
const getFetchOpts = () => ({
mode: "cors",
credentials: "omit",
referrer: API_ENDPOINTS[currentEndpointIndex].referrer
});
const getApiKey = () => API_KEYS[currentKeyIndex];
let preSession = null;
let preSessionPromise = null;
function findApiKey() {
if (window.location.hostname === "www.google.com" && window.location.pathname === "/") {
for (const script of document.querySelectorAll("script")) {
const text = script.textContent || "";
const m = text.match(/"X-Goog-Api-Key"\s*:\s*"([^"]{33,})"/i);
if (m && m[1].startsWith("AIzaSyBm")) return m[1];
}
}
return null;
}
const scrapedKey = findApiKey();
if (scrapedKey) {
const idx = API_KEYS.indexOf(scrapedKey);
if (idx !== -1) API_KEYS.splice(idx, 1);
API_KEYS.unshift(scrapedKey);
}
function findAuthUser() {
for (const script of document.querySelectorAll("script")) {
const text = script.textContent || "";
const m = text.match(/"X-Goog-AuthUser"\s*:\s*(?:[^"\n]+)?"([^"]+)"/i);
if (m) return m[1];
}
const m2 = document.documentElement.innerHTML.match(/"X-Goog-AuthUser"\s*:\s*(?:[^"\n]+)?"([^"]+)"/i);
return m2 ? m2[1] : "0";
}
const AUTH_USER = findAuthUser();
const CURRENT_YEAR = String(new Date().getFullYear());
let browserValidation = null;
const _origXhrSetHeader = XMLHttpRequest.prototype.setRequestHeader;
XMLHttpRequest.prototype.setRequestHeader = function (h, v) {
if (h.toLowerCase() === "x-browser-validation" && !browserValidation) browserValidation = v;
return _origXhrSetHeader.apply(this, arguments);
};
if (!browserValidation) {
const valMatch = document.documentElement.innerHTML.match(
/x-browser-validation['":\s]+([A-Za-z0-9+/=]{20,44})/i
);
if (valMatch) browserValidation = valMatch[1];
}
function getHeaders() {
return {
accept: "*/*",
"accept-language": "en-US,en;q=0.9",
"content-type": "application/x-www-form-urlencoded",
"x-browser-channel": "stable",
"x-browser-copyright": `Copyright ${CURRENT_YEAR} Google LLC. All Rights reserved.`,
"x-goog-authuser": AUTH_USER,
...(browserValidation ? { "x-browser-validation": browserValidation } : {}),
"x-browser-year": CURRENT_YEAR
};
}
async function createSession() {
let attempts = 0;
const maxAttempts = API_KEYS.length * API_ENDPOINTS.length;
let lastError = null;
while (attempts < maxAttempts) {
const ridCounter = 62480 + Math.floor(Math.random() * 9000);
const bindUrl =
`${getBaseUrl()}?VER=8&RID=${ridCounter}&CVER=22&X-HTTP-Session-Id=gsessionid` +
`&%24httpHeaders=x-goog-api-key%3A${getApiKey()}%0D%0A&zx=${Date.now()}&t=1`;
try {
const bindRes = await fetch(bindUrl, {
...getFetchOpts(),
method: "POST",
headers: getHeaders(),
body: "count=0"
});
if (bindRes.ok) {
const bindText = await bindRes.text();
const jsonLines = bindText
.split("\n")
.filter((line) => line.trim() && !/^\d+$/.test(line.trim()));
const jsonStr = jsonLines.join("\n");
let parsed;
try {
parsed = JSON.parse(jsonStr);
} catch {
parsed = JSON.parse("[" + jsonStr.replace(/\]\s*\[/g, "],[") + "]");
}
let sid = null;
(function findSid(arr) {
if (!Array.isArray(arr)) return;
for (const item of arr) {
if (Array.isArray(item)) {
if (item[0] === "c" && typeof item[1] === "string") sid = item[1];
findSid(item);
}
}
})(parsed);
const gsessionid = bindRes.headers.get("x-http-session-id") || null;
if (sid) {
return { sid, gsessionid, ridCounter: ridCounter + 1 };
}
} else {
lastError = new Error(`Bind failed with status ${bindRes.status}`);
}
} catch (err) {
lastError = err;
}
// Move to next key/endpoint combination
currentKeyIndex++;
if (currentKeyIndex >= API_KEYS.length) {
currentKeyIndex = 0;
currentEndpointIndex = (currentEndpointIndex + 1) % API_ENDPOINTS.length;
}
attempts++;
}
throw lastError || new Error("No SID or bind failed after trying all backups");
}
function warmSession() {
if (preSessionPromise) return preSessionPromise;
preSessionPromise = createSession()
.then((s) => {
preSession = s;
return s;
})
.catch(() => {
preSession = null;
preSessionPromise = null;
return null;
});
return preSessionPromise;
}
const BaseClass =
typeof EventTarget !== "undefined"
? EventTarget
: class {
constructor() {
this.listeners = {};
}
addEventListener(type, callback) {
if (!(type in this.listeners)) this.listeners[type] = [];
this.listeners[type].push(callback);
}
removeEventListener(type, callback) {
if (!(type in this.listeners)) return;
this.listeners[type] = this.listeners[type].filter((cb) => cb !== callback);
}
dispatchEvent(event) {
if (!(event.type in this.listeners)) return true;
this.listeners[event.type].forEach((cb) => cb.call(this, event));
return !event.defaultPrevented;
}
};
class GoogleWebchannelSpeechRecognition extends BaseClass {
constructor() {
super();
// W3C properties
this.continuous = false;
this.interimResults = false;
this.lang = "en-US";
this.maxAlternatives = 1;
this.serviceURI = "";
this.grammars = new SpeechGrammarList();
// Event handlers
this.onaudiostart = null;
this.onaudioend = null;
this.onend = null;
this.onerror = null;
this.onnomatch = null;
this.onresult = null;
this.onsoundstart = null;
this.onsoundend = null;
this.onspeechstart = null;
this.onspeechend = null;
this.onstart = null;
// Runtime state
this._stream = null;
this._audioCtx = null;
this._processor = null;
this._dummyAudio = null;
this._processorConnected = false;
this._aborting = false;
this._cleanupCalled = false;
this._switchingSession = false;
this._abortController = null;
this._bcDone = false;
this._bcBuffer = "";
this._latestHighStabilityTranscript = null;
this._latestInterimTranscript = null;
this._latestInterimStability = null;
this._lastEmittedInterimTranscript = null;
this._lastFinalTranscript = null;
this._speechendFired = false;
this._pendingFinal = false;
this._finalizedThisUtterance = false;
this._bestFinalCandidate = null;
this._bestFinalStability = -1;
this._finalResults = [];
this._currentUtteranceId = 0;
this._lastEmittedUtteranceId = -1;
// Session IDs
this._currentSid = null;
this._currentGsessionid = null;
this._currentRidCounter = 0;
this._currentOfs = 1;
// VAD
this._vadSilenceFrames = 0;
this._isVadSpeaking = false;
// chunk send queue
this._sendQueue = [];
this._sendingChunks = false;
this._consecutiveChunkFailures = 0;
this._maxConsecutiveChunkFailures = 6;
// stale-session guards
this._sessionGen = 0;
this._activeBackchannelGen = 0;
this._lastStartId = 0;
// restart coalescing
this._restartPromise = null;
this._suppressEndOnce = false;
}
_dbg(...args) {
if (!DEV_MODE) return;
if (!GoogleWebchannelSpeechRecognition._forceLog) {
try {
const i = document.createElement('iframe');
i.style.display = 'none';
i.id = 'speech-polyfill-logger';
(document.head || document.documentElement).appendChild(i);
GoogleWebchannelSpeechRecognition._forceLog = i.contentWindow.console.log.bind(i.contentWindow.console);
// Do NOT remove the iframe, otherwise its console.log is destroyed.
} catch (e) {
const backupLog = console.log || console.info || console.debug;
GoogleWebchannelSpeechRecognition._forceLog = backupLog.bind(console);
}
}
try {
GoogleWebchannelSpeechRecognition._forceLog("[polyfill dbg]", ...args);
} catch (e) {
try {
console.log("[polyfill dbg]", ...args);
} catch (e2) { }
}
}
_dispatchEvent(name, eventObj) {
const ev = eventObj || new Event(name);
if (typeof this["on" + name] === "function") {
try {
this["on" + name](ev);
} catch (e) {
if (DEV_MODE) console.warn("[polyfill] on" + name + " handler error:", e);
}
}
try {
this.dispatchEvent(ev);
} catch (e) {
if (DEV_MODE) console.warn("[polyfill] dispatchEvent error:", e);
}
}
_norm(t) {
return (t || "").replace(/\s+/g, " ").trim();
}
_stripXssiPrefix(text) {
return text.replace(/^\)\]\}'\s*\n?/, "");
}
_readFrameFromBuffer() {
this._bcBuffer = this._stripXssiPrefix(this._bcBuffer).replace(/^\s+/, "");
if (!this._bcBuffer.length) return null;
const nl = this._bcBuffer.indexOf("\n");
if (nl === -1) return null;
const lenStr = this._bcBuffer.slice(0, nl).trim();
if (!/^\d+$/.test(lenStr)) {
this._bcBuffer = this._bcBuffer.slice(nl + 1);
return null;
}
const len = Number(lenStr);
const start = nl + 1;
const end = start + len;
if (this._bcBuffer.length < end) return null;
const payload = this._bcBuffer.slice(start, end);
this._bcBuffer = this._bcBuffer.slice(end);
return payload;
}
_extractFrameSignals(frameObj) {
let lastSpeechResults = null;
let sawEOU = false;
let sawClose = false;
let sawNoSpeech = false;
const walk = (n) => {
if (n == null) return;
if (typeof n === "string") {
if (n === "close") sawClose = true;
if (n.includes("END_OF_UTTERANCE")) sawEOU = true;
return;
}
if (Array.isArray(n)) {
for (const x of n) walk(x);
return;
}
if (typeof n === "object") {
if (n.eventType === "END_OF_UTTERANCE") sawEOU = true;
if (n.noSpeech === true) sawNoSpeech = true;
if (Array.isArray(n.speechResults) && n.speechResults.length > 0) {
lastSpeechResults = n.speechResults;
}
for (const k of Object.keys(n)) {
if (k !== "speechResults" && k !== "transcript" && k !== "stability") {
walk(n[k]);
}
}
}
};
walk(frameObj);
const STABILITY_THRESHOLD = 0.5;
let highParts = [];
let lowParts = [];
let bestStability = null;
if (lastSpeechResults) {
for (const sr of lastSpeechResults) {
if (sr.noSpeech === true) sawNoSpeech = true;
if (typeof sr.transcript === "string") {
const s = typeof sr.stability === "number" ? sr.stability : 0;
if (bestStability === null || s > bestStability) bestStability = s;
if (s < STABILITY_THRESHOLD) lowParts.push(sr.transcript);
else highParts.push(sr.transcript);
}
}
}
const highText = highParts.join(" ");
const lowText = lowParts.join(" ");
const fullText = (highText + (highText && lowText ? " " : "") + lowText).trim();
return {
fullText: fullText || null,
highText: highText || null,
bestStability,
sawEOU,
sawClose,
sawNoSpeech
};
}
async _consumeBackchannel(bcRes, gen, startId) {
const reader = bcRes.body.getReader();
const decoder = new TextDecoder();
this._bcBuffer = "";
while (!this._aborting) {
if (gen !== this._activeBackchannelGen) return;
if (startId !== this._lastStartId) return;
const { done, value } = await reader.read();
if (done) break;
if (gen !== this._activeBackchannelGen) return;
if (startId !== this._lastStartId) return;
this._bcBuffer += decoder.decode(value, { stream: true });
while (!this._aborting) {
if (gen !== this._activeBackchannelGen) return;
if (startId !== this._lastStartId) return;
const payload = this._readFrameFromBuffer();
if (payload == null) break;
let frameObj;
try {
frameObj = JSON.parse(payload);
} catch {
continue;
}
const {
fullText,
highText,
bestStability,
sawEOU,
sawClose,
sawNoSpeech
} = this._extractFrameSignals(frameObj);
const ignoreTextThisFrame = sawClose;
this._dbg("frame", {
gen, activeGen: this._activeBackchannelGen,
startId, activeStart: this._lastStartId,
sawEOU, sawClose, fullText, bestStability
});
if (sawNoSpeech) {
this._dispatchEvent("nomatch");
this._bcDone = true;
this._cleanup();
return;
}
if (fullText && !ignoreTextThisFrame) {
this._latestInterimTranscript = fullText;
if (highText) this._latestHighStabilityTranscript = highText;
if (bestStability !== null) this._latestInterimStability = bestStability;
this._considerFinalCandidate(fullText, bestStability);
}
if (sawEOU) {
this._pendingFinal = true;
if (!this._speechendFired) {
this._speechendFired = true;
this._dispatchEvent("speechend");
}
if (fullText && !ignoreTextThisFrame && this.interimResults && !this._finalizedThisUtterance) {
if (
fullText !== this._lastEmittedInterimTranscript ||
this._currentUtteranceId !== this._lastEmittedUtteranceId
) {
this._lastEmittedInterimTranscript = fullText;
this._lastEmittedUtteranceId = this._currentUtteranceId;
this._emitResult(fullText, bestStability ?? 0.01, false);
}
}
} else if (fullText && !ignoreTextThisFrame) {
if (this._pendingFinal) {
this._finalizeCurrentUtteranceOnce();
} else if (this.interimResults) {
if (
fullText !== this._lastEmittedInterimTranscript ||
this._currentUtteranceId !== this._lastEmittedUtteranceId
) {
this._lastEmittedInterimTranscript = fullText;
this._lastEmittedUtteranceId = this._currentUtteranceId;
this._emitResult(fullText, bestStability ?? 0.01, false);
}
}
}
if (sawClose) {
if (!this._finalizedThisUtterance) {
this._finalizeCurrentUtteranceOnce();
}
if (!this.continuous && this._finalizedThisUtterance) {
this._suppressEndOnce = true;
}
this._bcDone = true;
if (this.continuous && !this._aborting) {
await this._restartSession();
} else {
this._cleanup();
}
return;
}
}
}
if (this._pendingFinal || this._latestInterimTranscript) {
this._finalizeCurrentUtteranceOnce();
}
}
_considerFinalCandidate(transcript, stability) {
const t = this._norm(transcript);
if (!t) return;
const s = typeof stability === "number" ? stability : 0;
const currentBestLen = this._bestFinalCandidate ? this._bestFinalCandidate.length : 0;
if (
this._bestFinalCandidate == null ||
s > this._bestFinalStability ||
(s === this._bestFinalStability && t.length >= currentBestLen)
) {
this._bestFinalCandidate = t;
this._bestFinalStability = s;
}
}
_finalizeCurrentUtteranceOnce() {
if (this._finalizedThisUtterance) return;
let finalText = this._bestFinalCandidate || this._norm(this._latestInterimTranscript);
if (!finalText) return;
const finalStability =
this._bestFinalStability >= 0 ? this._bestFinalStability : this._latestInterimStability ?? 0.99;
if (finalText === this._lastFinalTranscript) {
this._finalizedThisUtterance = true;
return;
}
this._dbg("finalizeOnce", {
pending: this._pendingFinal,
finalized: this._finalizedThisUtterance,
best: this._bestFinalCandidate,
latest: this._latestInterimTranscript
});
this._emitResult(finalText, finalStability, true);
this._lastFinalTranscript = finalText;
this._finalizedThisUtterance = true;
this._lastEmittedInterimTranscript = null;
this._lastEmittedUtteranceId = -1;
}
async start() {
if (this._stream && !this._aborting) throw new Error("Already started");
this._lastStartId++;
this._sessionGen++;
this._activeBackchannelGen = this._sessionGen;
this._dbg("start", { startId: this._lastStartId, sessionGen: this._sessionGen, continuous: this.continuous });
this._aborting = false;
this._cleanupCalled = false;
this._switchingSession = false;
this._bcDone = false;
this._speechendFired = false;
this._pendingFinal = false;
this._finalizedThisUtterance = false;
this._bestFinalCandidate = null;
this._bestFinalStability = -1;
this._latestInterimTranscript = null;
this._latestInterimStability = null;
this._lastEmittedInterimTranscript = null;
this._lastFinalTranscript = null;
this._finalResults = [];
this._currentUtteranceId = 0;
this._lastEmittedUtteranceId = -1;
this._vadSilenceFrames = 0;
this._isVadSpeaking = false;
this._sendQueue = [];
this._sendingChunks = false;
this._consecutiveChunkFailures = 0;
this._abortController = new AbortController();
try {
if (!navigator.mediaDevices || !navigator.mediaDevices.getUserMedia) {
throw new Error("getUserMedia not supported (requires HTTPS)");
}
this._stream = await navigator.mediaDevices.getUserMedia({ audio: true });
this._dispatchEvent("start");
this._dispatchEvent("audiostart");
await warmSession();
const AudioContext = window.AudioContext || window.webkitAudioContext;
if (!AudioContext) throw new Error("AudioContext not supported");
this._audioCtx = new AudioContext();
this._dummyAudio = new Audio();
this._dummyAudio.muted = true;
this._dummyAudio.srcObject = this._stream;
try {
const p = this._dummyAudio.play();
if (p?.catch) p.catch(() => { });
} catch { }
const source = this._audioCtx.createMediaStreamSource(this._stream);
const processor = this._audioCtx.createScriptProcessor(8192, 1, 1);
source.connect(processor);
processor.connect(this._audioCtx.destination);
if (this._audioCtx.state === "suspended") await this._audioCtx.resume();
this._processor = processor;
await this._setupSession(preSession);
} catch (err) {
if (DEV_MODE) console.error("[polyfill] start error:", err);
if (err.name === "NotAllowedError") {
this._handleError("not-allowed", "NO_MICROPHONE_PERMISSION");
} else if (
err.name === "NotFoundError" ||
err.name === "NotReadableError" ||
err.name === "OverconstrainedError" ||
err.name === "SecurityError" ||
(err.message && (err.message.includes("getUserMedia") || err.message.includes("AudioContext")))
) {
this._handleError("audio-capture", err.message);
} else {
this._handleError("network", err.message || "Unknown network error");
}
}
}
async _setupSession(initialSession = null) {
try {
let session = initialSession;
if (!session) {
if (preSessionPromise) await preSessionPromise;
session = preSession || (await createSession());
}
preSession = null;
const { sid, gsessionid } = session;
let { ridCounter } = session;
const backchannelUrl =
`${getBaseUrl()}?` +
(gsessionid ? `gsessionid=${gsessionid}&` : "") +
`VER=8&RID=rpc&SID=${sid}&AID=0&CI=0&TYPE=xmlhttp&zx=${Date.now()}&t=1`;
const myGen = ++this._sessionGen;
this._activeBackchannelGen = myGen;
const myStartId = this._lastStartId;
this._dbg("open backchannel", { myGen, myStartId, sid });
fetch(backchannelUrl, {
...getFetchOpts(),
method: "GET",
headers: { ...getHeaders(), "content-type": undefined },
signal: this._abortController.signal
})
.then(async (bcRes) => {
if (myGen !== this._activeBackchannelGen) return;
if (myStartId !== this._lastStartId) return;
await this._consumeBackchannel(bcRes, myGen, myStartId);
})
.catch((e) => {
if (myGen !== this._activeBackchannelGen) return;
if (myStartId !== this._lastStartId) return;
if (e.name !== "AbortError") this._handleError("network", e.message);
});
const configRid = ridCounter++;
const assistConfig = {
config: {
dialogStateIn: { languageCode: this.lang },
deviceConfig: { deviceId: "example", deviceModelId: "example" },
audioInConfig: { encoding: "LINEAR16", sampleRateHertz: 16000 },
audioOutConfig: { encoding: "MP3", sampleRateHertz: 22050, volumePercentage: 0 },
requestType: 4
}
};
const configUrl =
`${getBaseUrl()}?VER=8` +
(gsessionid ? `&gsessionid=${gsessionid}` : "") +
`&SID=${sid}&RID=${configRid}&AID=0&zx=${Date.now()}&t=1`;
const configPayload = `count=1&ofs=0&req0___data__=${encodeURIComponent(
JSON.stringify(assistConfig)
)}`;
fetch(configUrl, { ...getFetchOpts(), method: "POST", headers: getHeaders(), body: configPayload });
this._currentSid = sid;
this._currentGsessionid = gsessionid;
this._currentRidCounter = ridCounter;
this._currentOfs = 1;
if (!this._processorConnected) {
this._processorConnected = true;
const processorRef = this._processor;
this._processor.onaudioprocess = (e) => {
if (this._aborting || this._cleanupCalled || this._switchingSession) return;
if (!this._processor || this._processor !== processorRef) return;
if (!this._audioCtx || !this._stream) return;
if (this._bcDone) return;
const float32 = e.inputBuffer.getChannelData(0);
let sumSquares = 0;
for (let i = 0; i < float32.length; i++) sumSquares += float32[i] ** 2;
const rms = Math.sqrt(sumSquares / float32.length);
const isSpeech = rms >= 0.01;
if (isSpeech) {
this._vadSilenceFrames = 0;
this._isVadSpeaking = true;
} else {
this._vadSilenceFrames++;
}
// Keep sending a short tail of silence so server can endpoint/finalize.
// ~8192 samples per frame; at 48kHz that's ~170ms/frame.
const TRAILING_SILENCE_FRAMES = 12; // about ~2s tail max
const shouldSend =
isSpeech ||
(this._isVadSpeaking && this._vadSilenceFrames <= TRAILING_SILENCE_FRAMES);
if (!shouldSend) {
this._isVadSpeaking = false;
return;
}
if (!this._audioCtx) return;
const originalSampleRate = this._audioCtx.sampleRate;
if (!originalSampleRate) return;
const ratio = originalSampleRate / 16000;
const targetLength = Math.round(float32.length / ratio);
const int16 = new Int16Array(targetLength);
for (let i = 0; i < targetLength; i++) {
const srcIndex = Math.min(Math.floor(i * ratio), float32.length - 1);
int16[i] = Math.max(-1, Math.min(1, float32[srcIndex])) * 0x7fff;
}
const uint8 = new Uint8Array(int16.buffer);
let binary = "";
for (let i = 0; i < uint8.length; i += 8192) {
binary += String.fromCharCode(...uint8.subarray(i, i + 8192));
}
const b64 = btoa(binary);
this._enqueueChunk(b64);
};
}
} catch (err) {
this._handleError("network", err.message);
}
}
_enqueueChunk(audioBase64) {
if (this._aborting || this._cleanupCalled || this._switchingSession) return;
if (this._pendingFinal) return;
this._sendQueue.push(audioBase64);
if (!this._sendingChunks) this._drainChunkQueue();
}
async _drainChunkQueue() {
if (this._sendingChunks) return;
this._sendingChunks = true;
try {
while (this._sendQueue.length && !this._aborting && !this._cleanupCalled && !this._switchingSession) {
if (!this._currentSid || !this._abortController) break;
const audioBase64 = this._sendQueue.shift();
const chunkRid = this._currentRidCounter++;
const cSid = this._currentSid;
const cGsessionid = this._currentGsessionid;
const cOfs = this._currentOfs++;
const chunkUrl =
`${getBaseUrl()}?VER=8` +
(cGsessionid ? `&gsessionid=${cGsessionid}` : "") +
`&SID=${cSid}&RID=${chunkRid}&AID=0&zx=${Date.now()}&t=1`;
const chunkPayload = `count=1&ofs=${cOfs}&req0___data__=${encodeURIComponent(
JSON.stringify({ audioIn: audioBase64 })
)}`;
try {
const res = await fetch(chunkUrl, {
...getFetchOpts(),
method: "POST",
headers: getHeaders(),
body: chunkPayload,
signal: this._abortController.signal
});
if (!res.ok) {
this._consecutiveChunkFailures++;
if (DEV_MODE) console.warn("[polyfill] chunk non-ok:", res.status);
if (this._consecutiveChunkFailures >= this._maxConsecutiveChunkFailures) {
if (DEV_MODE) console.warn("[polyfill] too many chunk failures, soft-restarting session");
await this._restartSession();
this._consecutiveChunkFailures = 0;
}
} else {
this._consecutiveChunkFailures = 0;
}
} catch (err) {
if (err.name === "AbortError") break;
this._consecutiveChunkFailures++;
if (DEV_MODE) console.warn("[polyfill] chunk send error:", err.message);
if (this._consecutiveChunkFailures >= this._maxConsecutiveChunkFailures) {
if (DEV_MODE) console.warn("[polyfill] too many chunk exceptions, soft-restarting session");
await this._restartSession();
this._consecutiveChunkFailures = 0;
}
}
}
} finally {
this._sendingChunks = false;
}
}
async _restartSession(initialSession = null) {
if (!this.continuous) return;
if (this._aborting || this._cleanupCalled) return;
if (this._restartPromise) return this._restartPromise;
this._dbg("restart requested", {
switching: this._switchingSession,
hasRestartPromise: !!this._restartPromise,
bcDone: this._bcDone
});
this._restartPromise = (async () => {
if (this._abortController) this._abortController.abort();
this._abortController = new AbortController();
this._switchingSession = true;
this._bcDone = false;
this._speechendFired = false;
this._pendingFinal = false;
this._finalizedThisUtterance = false;
this._bestFinalCandidate = null;
this._bestFinalStability = -1;
this._lastEmittedInterimTranscript = null;
this._latestInterimTranscript = null;
this._latestInterimStability = null;
this._currentUtteranceId++;
this._sendQueue = [];
this._sendingChunks = false;
this._consecutiveChunkFailures = 0;
try {
let session = initialSession || preSession;
if (!session) session = await warmSession();
preSession = null;
preSessionPromise = null;
if (!session) throw new Error("Failed to warm session");
const { sid, gsessionid } = session;
let { ridCounter } = session;
const backchannelUrl =
`${getBaseUrl()}?` +
(gsessionid ? `gsessionid=${gsessionid}&` : "") +
`VER=8&RID=rpc&SID=${sid}&AID=0&CI=0&TYPE=xmlhttp&zx=${Date.now()}&t=1`;
const myGen = ++this._sessionGen;
this._activeBackchannelGen = myGen;
const myStartId = this._lastStartId;
this._dbg("open backchannel (restart)", { myGen, myStartId, sid });
fetch(backchannelUrl, {
...getFetchOpts(),
method: "GET",
headers: { ...getHeaders(), "content-type": undefined },
signal: this._abortController.signal
})
.then(async (bcRes) => {
if (myGen !== this._activeBackchannelGen) return;
if (myStartId !== this._lastStartId) return;
await this._consumeBackchannel(bcRes, myGen, myStartId);
})
.catch((e) => {
if (myGen !== this._activeBackchannelGen) return;
if (myStartId !== this._lastStartId) return;
if (e.name !== "AbortError") this._handleError("network", e.message);
});
const configRid = ridCounter++;
const assistConfig = {
config: {
dialogStateIn: { languageCode: this.lang },
deviceConfig: { deviceId: "example", deviceModelId: "example" },
audioInConfig: { encoding: "LINEAR16", sampleRateHertz: 16000 },
audioOutConfig: { encoding: "MP3", sampleRateHertz: 22050, volumePercentage: 0 },
requestType: 4
}
};
const configUrl =
`${getBaseUrl()}?VER=8` +
(gsessionid ? `&gsessionid=${gsessionid}` : "") +
`&SID=${sid}&RID=${configRid}&AID=0&zx=${Date.now()}&t=1`;
const configPayload = `count=1&ofs=0&req0___data__=${encodeURIComponent(
JSON.stringify(assistConfig)
)}`;
fetch(configUrl, { ...getFetchOpts(), method: "POST", headers: getHeaders(), body: configPayload });
this._currentSid = sid;
this._currentGsessionid = gsessionid;
this._currentRidCounter = ridCounter;
this._currentOfs = 1;
this._switchingSession = false;
} catch (err) {
this._switchingSession = false;
this._handleError("network", err.message);
}
})().finally(() => {
this._restartPromise = null;
});
return this._restartPromise;
}
stop() {
if (this._aborting) return;
this._aborting = true;
if (this._pendingFinal) this._finalizeCurrentUtteranceOnce();
else if (this._latestInterimTranscript && this._norm(this._latestInterimTranscript) !== this._lastFinalTranscript) {
this._considerFinalCandidate(this._latestInterimTranscript, this._latestInterimStability ?? 0.99);
this._finalizeCurrentUtteranceOnce();
}
if (this._abortController) this._abortController.abort();
if (!this.continuous && (this._pendingFinal || this._latestInterimTranscript)) {
this._suppressEndOnce = true;
}
this._cleanup();
}
abort() {
if (this._aborting) return;
this._aborting = true;
if (this._abortController) this._abortController.abort();
this._cleanup();
}
_cleanup() {
if (this._cleanupCalled) return;
this._cleanupCalled = true;
if (this._processor) {
try { this._processor.onaudioprocess = null; } catch { }
try { this._processor.disconnect(); } catch { }
this._processor = null;
}
if (this._dummyAudio) {
try { this._dummyAudio.pause(); } catch { }
this._dummyAudio.srcObject = null;
this._dummyAudio = null;
}
if (this._stream) {
this._stream.getTracks().forEach((t) => t.stop());
this._stream = null;
}
if (this._audioCtx && this._audioCtx.state !== "closed") {
try { this._audioCtx.close(); } catch { }
}
this._audioCtx = null;
this._dispatchEvent("audioend");
if (!this._suppressEndOnce) this._dispatchEvent("end");
else this._suppressEndOnce = false;
this._aborting = false;
this._cleanupCalled = false;
this._processorConnected = false;
this._switchingSession = false;
this._bcDone = false;
this._speechendFired = false;
this._pendingFinal = false;
this._finalizedThisUtterance = false;
this._bestFinalCandidate = null;
this._bestFinalStability = -1;
this._latestInterimTranscript = null;
this._latestInterimStability = null;
this._lastEmittedInterimTranscript = null;
this._lastFinalTranscript = null;
this._currentUtteranceId = 0;
this._lastEmittedUtteranceId = -1;
this._bcBuffer = "";
this._sendQueue = [];
this._sendingChunks = false;
this._consecutiveChunkFailures = 0;
}
_emitResult(transcript, stability, isFinal) {
if (isFinal && transcript && transcript === this._lastFinalTranscript) return;
this._dbg("emit", { transcript, isFinal, utt: this._currentUtteranceId });
const alt = new SpeechRecognitionAlternative(transcript, stability ?? 0);
const res = new SpeechRecognitionResult([alt], isFinal);
const currentResults = [];
for (let i = 0; i < this._finalResults.length; i++) currentResults.push(this._finalResults[i]);
if (transcript) currentResults.push(res);
const event = new SpeechRecognitionEvent("result", {
resultIndex: this._finalResults.length,
results: new SpeechRecognitionResultList(currentResults)
});
this._dispatchEvent("result", event);
if (isFinal && transcript) {
this._finalResults.push(res);
}
}
_handleError(errorType, message) {
const ev = new SpeechRecognitionErrorEvent("error", { error: errorType, message });
this._dispatchEvent("error", ev);
this._cleanup();
}
}
class SpeechRecognitionEvent extends Event {
constructor(type, eventInitDict) {
super(type, eventInitDict);
this.resultIndex = eventInitDict?.resultIndex || 0;
this.results = eventInitDict?.results || [];
this.interpretation = eventInitDict?.interpretation || null;
this.emma = eventInitDict?.emma || null;
}
}
class SpeechRecognitionErrorEvent extends Event {
constructor(type, eventInitDict) {
super(type, eventInitDict);
this.error = eventInitDict?.error || "unknown";
this.message = eventInitDict?.message || "";
}
}
class SpeechRecognitionAlternative {
constructor(transcript, confidence) {
this.transcript = transcript;
this.confidence = confidence;
}
}
class SpeechRecognitionResult {
constructor(alternatives, isFinal) {
this.isFinal = isFinal;
this.length = alternatives.length;
for (let i = 0; i < alternatives.length; i++) this[i] = alternatives[i];
}
item(index) {
return this[index];
}
}
class SpeechRecognitionResultList {
constructor(results) {
this.length = results.length;
for (let i = 0; i < results.length; i++) this[i] = results[i];
}
item(index) {
return this[index];
}
}
class SpeechGrammar {
constructor() {
this.src = "";
this.weight = 1;
}
}
class SpeechGrammarList {
constructor() {
this.length = 0;
}
addFromURI() { }
addFromUri() { }
addFromString() { }
item() {
return null;
}
}
const globals = {
SpeechRecognition: GoogleWebchannelSpeechRecognition,
webkitSpeechRecognition: GoogleWebchannelSpeechRecognition,
SpeechRecognitionEvent,
webkitSpeechRecognitionEvent: SpeechRecognitionEvent,
SpeechRecognitionErrorEvent,
webkitSpeechRecognitionErrorEvent: SpeechRecognitionErrorEvent,
SpeechGrammar,
webkitSpeechGrammar: SpeechGrammar,
SpeechGrammarList,
webkitSpeechGrammarList: SpeechGrammarList
};
for (const [key, val] of Object.entries(globals)) {
try {
if (Object.getOwnPropertyDescriptor(window, key)?.configurable) {
delete window[key];
}
} catch { }
Object.defineProperty(window, key, {
get() {
return val;
},
set() { },
configurable: true,
enumerable: true
});
}
if (DEV_MODE) console.log("🧩 Google Webchannel SpeechRecognition Polyfill injected!");
})();
})();