// ==UserScript==
// @name Google Cloud TTS Downloader
// @description Add a Download button, language flags, voice gender for Google Cloud Text-to-Speech AI.
// @icon https://www.google.com/s2/favicons?sz=64&domain=cloud.google.com
// @version 1.3
// @author afkarxyz
// @namespace https://github.com/afkarxyz/userscripts/
// @supportURL https://github.com/afkarxyz/userscripts/issues
// @license MIT
// @match https://www.gstatic.com/cloud-site-ux/text_to_speech/text_to_speech.min.html
// @grant none
// ==/UserScript==
;(() => {
const FLAG_BASE_URL = "https://cdn.jsdelivr.net/gh/lipis/[email protected]/flags/4x3/"
const AUDIO_DEVICE_PROFILES = [
"Default",
"Smart watch or wearable",
"Smartphone",
"Headphones or earbuds",
"Small home speaker",
"Smart home speaker",
"Home entertainment system or smart TV",
"Car speaker",
"Interactive Voice Response (IVR) system",
]
const languageMap = {
textMap: {
"Arabic, multi-region": { code: "sa", text: "Arabic (Multi-region)" },
"Bahasa Indonesia (Indonesia)": { code: "id", text: "Indonesian (Indonesia)" },
"Deutsch (Deutschland)": { code: "de", text: "German (Germany)" },
"English (Australia)": { code: "au", text: "English (Australia)" },
"English (Great Britain)": { code: "gb", text: "English (Great Britain)" },
"English (India)": { code: "in", text: "English (India)" },
"English (United States)": { code: "us", text: "English (United States)" },
"Español (España)": { code: "es", text: "Spanish (Spain)" },
"Español (Estados Unidos)": { code: "us", text: "Spanish (United States)" },
"Français (Canada)": { code: "ca", text: "French (Canada)" },
"Français (France)": { code: "fr", text: "French (France)" },
"Italiano (Italia)": { code: "it", text: "Italian (Italy)" },
"Nederlands (Nederland)": { code: "nl", text: "Dutch (Netherlands)" },
"Polski (Polska)": { code: "pl", text: "Polish (Poland)" },
"Português (Brasil)": { code: "br", text: "Portuguese (Brazil)" },
"Tiếng Việt (Việt Nam)": { code: "vn", text: "Vietnamese (Vietnam)" },
"Türkçe (Türkiye)": { code: "tr", text: "Turkish (Turkey)" },
"Русский (Россия)": { code: "ru", text: "Russian (Russia)" },
"मराठी (भारत)": { code: "in", text: "Marathi (India)" },
"हिन्दी (भारत)": { code: "in", text: "Hindi (India)" },
"বাংলা (ভারত)": { code: "in", text: "Bengali (India)" },
"ગુજરાતી (ભારત)": { code: "in", text: "Gujarati (India)" },
"தமிழ் (இந்தியா)": { code: "in", text: "Tamil (India)" },
"తెలుగు (భారతదేశం)": { code: "in", text: "Telugu (India)" },
"ಕನ್ನಡ (ಭಾರತ)": { code: "in", text: "Kannada (India)" },
"മലയാളം (ഇന്ത്യ)": { code: "in", text: "Malayalam (India)" },
"ไทย (ประเทศไทย)": { code: "th", text: "Thai (Thailand)" },
"日本語(日本)": { code: "jp", text: "Japanese (Japan)" },
"普通话 (中国大陆)": { code: "cn", text: "Mandarin (Mainland China)" },
"한국어 (대한민국)": { code: "kr", text: "Korean (South Korea)" },
},
}
const voiceModelMap = {
female: ["Aoede", "Kore", "Leda", "Zephyr"],
male: ["Charon", "Fenrir", "Orus", "Puck"],
}
let lastResponse = null
let lastPayload = null
let audioPlayer = null
let downloadButton = null
function getVoiceGender(voiceName) {
for (const [gender, voices] of Object.entries(voiceModelMap)) {
if (voices.includes(voiceName)) {
return gender.charAt(0).toUpperCase() + gender.slice(1)
}
}
return "Unknown"
}
const originalOpen = XMLHttpRequest.prototype.open
XMLHttpRequest.prototype.open = function (_method, url) {
this.customURL = url
if (url.includes("texttospeech.googleapis.com/v1beta1/text:synthesize")) {
this.addEventListener("readystatechange", function () {
if (this.readyState === 4) {
try {
const response = JSON.parse(this.responseText)
lastResponse = response.audioContent
updateAudioPlayerAndDownload()
} catch (e) {}
}
})
}
originalOpen.apply(this, arguments)
}
const originalSend = XMLHttpRequest.prototype.send
XMLHttpRequest.prototype.send = function (data) {
if (this.customURL && this.customURL.includes("texttospeech.googleapis.com/v1beta1/text:synthesize")) {
try {
lastPayload = typeof data === "string" ? JSON.parse(data) : data
} catch (e) {}
}
originalSend.apply(this, arguments)
}
const base64ToArrayBuffer = (base64) => {
const binary = atob(base64)
const buffer = new Uint8Array(binary.length)
for (let i = 0; i < binary.length; i++) {
buffer[i] = binary.charCodeAt(i)
}
return buffer.buffer
}
const downloadAudio = () => {
if (!lastResponse || !lastPayload) return
const now = new Date()
const timestamp = `${now.getFullYear()}${String(now.getMonth() + 1).padStart(2, "0")}${String(now.getDate()).padStart(2, "0")}_${String(now.getHours()).padStart(2, "0")}${String(now.getMinutes()).padStart(2, "0")}${String(now.getSeconds()).padStart(2, "0")}`
const truncatedText = lastPayload.input.text.substring(0, 25) + "..."
const filename = `${timestamp}_${lastPayload.voice.name}_${truncatedText}.wav`
const blob = new Blob([base64ToArrayBuffer(lastResponse)], { type: "audio/wav" })
const link = document.createElement("a")
link.href = URL.createObjectURL(blob)
link.download = filename
link.click()
URL.revokeObjectURL(link.href)
}
const createAudioPlayerContainer = () => {
const playerContainer = document.createElement("div")
playerContainer.id = "custom-audio-container"
playerContainer.style.cssText = `
display: flex;
flex-direction: column;
align-items: center;
justify-content: center;
width: 100%;
margin-top: 15px;
padding: 10px;
border-radius: 8px;
`
audioPlayer = document.createElement("audio")
audioPlayer.id = "custom-audio-player"
audioPlayer.controls = true
audioPlayer.style.cssText = `
width: 100%;
max-width: 500px;
margin-bottom: 10px;
`
downloadButton = document.createElement("paper-button")
downloadButton.setAttribute("role", "button")
downloadButton.setAttribute("tabindex", "0")
downloadButton.setAttribute("animated", "")
downloadButton.setAttribute("elevation", "0")
downloadButton.classList.add("state-paused")
downloadButton.style.backgroundColor = "var(--google-blue-500)"
downloadButton.style.color = "#fff"
downloadButton.innerHTML = `
<span class="button-inner">
<span class="label">
<span class="ready">Download</span>
</span>
</span>
`
downloadButton.addEventListener("click", downloadAudio)
playerContainer.appendChild(audioPlayer)
playerContainer.appendChild(downloadButton)
return playerContainer
}
const updateAudioPlayerAndDownload = () => {
if (!lastResponse) return
const existingContainer = document.getElementById("custom-audio-container")
if (existingContainer) {
const existingAudio = existingContainer.querySelector("audio")
if (existingAudio && existingAudio.src) {
URL.revokeObjectURL(existingAudio.src)
}
existingContainer.remove()
}
const blob = new Blob([base64ToArrayBuffer(lastResponse)], { type: "audio/wav" })
const audioUrl = URL.createObjectURL(blob)
const playerContainer = createAudioPlayerContainer()
audioPlayer.src = audioUrl
const app = document.querySelector("ts-app")
if (app && app.shadowRoot) {
const controlPlayback = app.shadowRoot.querySelector(".control-playback")
if (controlPlayback) {
const existingContainers = app.shadowRoot.querySelectorAll("#custom-audio-container")
existingContainers.forEach((container) => container.remove())
controlPlayback.insertAdjacentElement("afterend", playerContainer)
}
}
}
function enhanceLanguageAndVoice() {
let enhancedItems = 0
function processRoot(root) {
if (!root) return 0
try {
const items = root.querySelectorAll("paper-item")
let count = 0
items.forEach((item) => {
if (!item) return
if (item.dataset.enhanced === "true") return
const originalText = item.textContent ? item.textContent.trim() : ""
const langInfo = languageMap.textMap[originalText]
if (langInfo) {
const wrapper = document.createElement("div")
wrapper.style.display = "flex"
wrapper.style.alignItems = "center"
wrapper.style.gap = "8px"
const flagImg = document.createElement("img")
flagImg.src = `${FLAG_BASE_URL}${langInfo.code}.svg`
flagImg.style.width = "24px"
flagImg.style.height = "18px"
flagImg.style.marginRight = "5px"
const textSpan = document.createElement("span")
textSpan.textContent = langInfo.text
wrapper.appendChild(flagImg)
wrapper.appendChild(textSpan)
item.innerHTML = ""
item.appendChild(wrapper)
item.dataset.enhanced = "true"
count++
item.addEventListener("click", () => {
localStorage.setItem("lastSelectedLanguage", langInfo.text)
})
}
const voiceModelMatch = originalText.match(/^[a-z]{2,3}(-[A-Z]{1,2})?-Chirp3-HD-(\w+)$/)
if (voiceModelMatch) {
const voiceModelName = voiceModelMatch[2]
const voiceGender = getVoiceGender(voiceModelName)
if (voiceGender !== "Unknown") {
item.textContent = `${voiceModelName} (${voiceGender})`
item.dataset.enhanced = "true"
count++
}
}
if (AUDIO_DEVICE_PROFILES.includes(originalText)) {
item.dataset.enhanced = "true"
item.addEventListener("click", () => {
localStorage.setItem("lastSelectedAudioDeviceProfile", originalText)
})
}
})
return count
} catch (error) {
return 0
}
}
function traverseDeepDOM(element) {
if (!element) return 0
try {
let count = processRoot(element)
if (element.shadowRoot) {
count += processRoot(element.shadowRoot)
}
const children = element.children || []
for (const child of children) {
if (child) {
count += traverseDeepDOM(child)
}
}
return count
} catch (error) {
return 0
}
}
const searchRoots = [document.body, document, document.documentElement, window.document]
searchRoots.forEach((root) => {
if (root) {
enhancedItems += traverseDeepDOM(root)
}
})
return enhancedItems
}
function restoreLastSelection() {
const lastLanguage = localStorage.getItem("lastSelectedLanguage")
const lastAudioDeviceProfile = localStorage.getItem("lastSelectedAudioDeviceProfile")
function findAndClickItem(text) {
const searchInRoot = (root) => {
if (!root) return false
const items = root.querySelectorAll("paper-item")
for (const item of items) {
if (item.textContent && item.textContent.trim() === text) {
item.click()
return true
}
}
return false
}
;[document.body, document, document.documentElement, window.document].forEach((root) => {
if (root) {
searchInRoot(root)
const elements = root.querySelectorAll("*")
for (const el of elements) {
if (el.shadowRoot) {
searchInRoot(el.shadowRoot)
}
}
}
})
}
if (lastLanguage) findAndClickItem(lastLanguage)
if (lastAudioDeviceProfile) findAndClickItem(lastAudioDeviceProfile)
}
function waitForElementsAndEnhance() {
const enhancedCount = enhanceLanguageAndVoice()
if (enhancedCount > 0) {
restoreLastSelection()
setupObserver()
} else {
setTimeout(waitForElementsAndEnhance, 200)
}
}
function setupObserver() {
const observer = new MutationObserver(() => {
enhanceLanguageAndVoice()
})
observer.observe(document.body, {
childList: true,
subtree: true,
attributes: true,
})
document.addEventListener(
"click",
() => {
setTimeout(enhanceLanguageAndVoice, 100)
},
true,
)
}
function waitForApp() {
const app = document.querySelector("ts-app")
if (app && app.shadowRoot) {
waitForElementsAndEnhance()
} else {
requestAnimationFrame(waitForApp)
}
}
waitForApp()
})()