Google Cloud TTS Downloader

Add a Download button, language flags, voice gender for Google Cloud Text-to-Speech AI.

// ==UserScript==
// @name         Google Cloud TTS Downloader
// @description  Add a Download button, language flags, voice gender for Google Cloud Text-to-Speech AI.
// @icon         https://www.google.com/s2/favicons?sz=64&domain=cloud.google.com
// @version      1.3
// @author       afkarxyz
// @namespace    https://github.com/afkarxyz/userscripts/
// @supportURL   https://github.com/afkarxyz/userscripts/issues
// @license      MIT
// @match        https://www.gstatic.com/cloud-site-ux/text_to_speech/text_to_speech.min.html
// @grant        none
// ==/UserScript==

;(() => {
  const FLAG_BASE_URL = "https://cdn.jsdelivr.net/gh/lipis/[email protected]/flags/4x3/"

  const AUDIO_DEVICE_PROFILES = [
    "Default",
    "Smart watch or wearable",
    "Smartphone",
    "Headphones or earbuds",
    "Small home speaker",
    "Smart home speaker",
    "Home entertainment system or smart TV",
    "Car speaker",
    "Interactive Voice Response (IVR) system",
  ]

  const languageMap = {
    textMap: {
      "Arabic, multi-region": { code: "sa", text: "Arabic (Multi-region)" },
      "Bahasa Indonesia (Indonesia)": { code: "id", text: "Indonesian (Indonesia)" },
      "Deutsch (Deutschland)": { code: "de", text: "German (Germany)" },
      "English (Australia)": { code: "au", text: "English (Australia)" },
      "English (Great Britain)": { code: "gb", text: "English (Great Britain)" },
      "English (India)": { code: "in", text: "English (India)" },
      "English (United States)": { code: "us", text: "English (United States)" },
      "Español (España)": { code: "es", text: "Spanish (Spain)" },
      "Español (Estados Unidos)": { code: "us", text: "Spanish (United States)" },
      "Français (Canada)": { code: "ca", text: "French (Canada)" },
      "Français (France)": { code: "fr", text: "French (France)" },
      "Italiano (Italia)": { code: "it", text: "Italian (Italy)" },
      "Nederlands (Nederland)": { code: "nl", text: "Dutch (Netherlands)" },
      "Polski (Polska)": { code: "pl", text: "Polish (Poland)" },
      "Português (Brasil)": { code: "br", text: "Portuguese (Brazil)" },
      "Tiếng Việt (Việt Nam)": { code: "vn", text: "Vietnamese (Vietnam)" },
      "Türkçe (Türkiye)": { code: "tr", text: "Turkish (Turkey)" },
      "Русский (Россия)": { code: "ru", text: "Russian (Russia)" },
      "मराठी (भारत)": { code: "in", text: "Marathi (India)" },
      "हिन्दी (भारत)": { code: "in", text: "Hindi (India)" },
      "বাংলা (ভারত)": { code: "in", text: "Bengali (India)" },
      "ગુજરાતી (ભારત)": { code: "in", text: "Gujarati (India)" },
      "தமிழ் (இந்தியா)": { code: "in", text: "Tamil (India)" },
      "తెలుగు (భారతదేశం)": { code: "in", text: "Telugu (India)" },
      "ಕನ್ನಡ (ಭಾರತ)": { code: "in", text: "Kannada (India)" },
      "മലയാളം (ഇന്ത്യ)": { code: "in", text: "Malayalam (India)" },
      "ไทย (ประเทศไทย)": { code: "th", text: "Thai (Thailand)" },
      "日本語(日本)": { code: "jp", text: "Japanese (Japan)" },
      "普通话 (中国大陆)": { code: "cn", text: "Mandarin (Mainland China)" },
      "한국어 (대한민국)": { code: "kr", text: "Korean (South Korea)" },
    },
  }

  const voiceModelMap = {
    female: ["Aoede", "Kore", "Leda", "Zephyr"],
    male: ["Charon", "Fenrir", "Orus", "Puck"],
  }

  let lastResponse = null
  let lastPayload = null
  let audioPlayer = null
  let downloadButton = null

  function getVoiceGender(voiceName) {
    for (const [gender, voices] of Object.entries(voiceModelMap)) {
      if (voices.includes(voiceName)) {
        return gender.charAt(0).toUpperCase() + gender.slice(1)
      }
    }
    return "Unknown"
  }

  const originalOpen = XMLHttpRequest.prototype.open
  XMLHttpRequest.prototype.open = function (_method, url) {
    this.customURL = url
    if (url.includes("texttospeech.googleapis.com/v1beta1/text:synthesize")) {
      this.addEventListener("readystatechange", function () {
        if (this.readyState === 4) {
          try {
            const response = JSON.parse(this.responseText)
            lastResponse = response.audioContent
            updateAudioPlayerAndDownload()
          } catch (e) {}
        }
      })
    }
    originalOpen.apply(this, arguments)
  }

  const originalSend = XMLHttpRequest.prototype.send
  XMLHttpRequest.prototype.send = function (data) {
    if (this.customURL && this.customURL.includes("texttospeech.googleapis.com/v1beta1/text:synthesize")) {
      try {
        lastPayload = typeof data === "string" ? JSON.parse(data) : data
      } catch (e) {}
    }
    originalSend.apply(this, arguments)
  }

  const base64ToArrayBuffer = (base64) => {
    const binary = atob(base64)
    const buffer = new Uint8Array(binary.length)
    for (let i = 0; i < binary.length; i++) {
      buffer[i] = binary.charCodeAt(i)
    }
    return buffer.buffer
  }

  const downloadAudio = () => {
    if (!lastResponse || !lastPayload) return

    const now = new Date()
    const timestamp = `${now.getFullYear()}${String(now.getMonth() + 1).padStart(2, "0")}${String(now.getDate()).padStart(2, "0")}_${String(now.getHours()).padStart(2, "0")}${String(now.getMinutes()).padStart(2, "0")}${String(now.getSeconds()).padStart(2, "0")}`

    const truncatedText = lastPayload.input.text.substring(0, 25) + "..."
    const filename = `${timestamp}_${lastPayload.voice.name}_${truncatedText}.wav`

    const blob = new Blob([base64ToArrayBuffer(lastResponse)], { type: "audio/wav" })
    const link = document.createElement("a")
    link.href = URL.createObjectURL(blob)
    link.download = filename
    link.click()
    URL.revokeObjectURL(link.href)
  }

  const createAudioPlayerContainer = () => {
    const playerContainer = document.createElement("div")
    playerContainer.id = "custom-audio-container"
    playerContainer.style.cssText = `
            display: flex;
            flex-direction: column;
            align-items: center;
            justify-content: center;
            width: 100%;
            margin-top: 15px;
            padding: 10px;
            border-radius: 8px;
        `

    audioPlayer = document.createElement("audio")
    audioPlayer.id = "custom-audio-player"
    audioPlayer.controls = true
    audioPlayer.style.cssText = `
            width: 100%;
            max-width: 500px;
            margin-bottom: 10px;
        `

    downloadButton = document.createElement("paper-button")
    downloadButton.setAttribute("role", "button")
    downloadButton.setAttribute("tabindex", "0")
    downloadButton.setAttribute("animated", "")
    downloadButton.setAttribute("elevation", "0")
    downloadButton.classList.add("state-paused")
    downloadButton.style.backgroundColor = "var(--google-blue-500)"
    downloadButton.style.color = "#fff"

    downloadButton.innerHTML = `
            <span class="button-inner">
                <span class="label">
                    <span class="ready">Download</span>
                </span>
            </span>
        `

    downloadButton.addEventListener("click", downloadAudio)

    playerContainer.appendChild(audioPlayer)
    playerContainer.appendChild(downloadButton)

    return playerContainer
  }

  const updateAudioPlayerAndDownload = () => {
    if (!lastResponse) return

    const existingContainer = document.getElementById("custom-audio-container")
    if (existingContainer) {
      const existingAudio = existingContainer.querySelector("audio")
      if (existingAudio && existingAudio.src) {
        URL.revokeObjectURL(existingAudio.src)
      }
      existingContainer.remove()
    }

    const blob = new Blob([base64ToArrayBuffer(lastResponse)], { type: "audio/wav" })
    const audioUrl = URL.createObjectURL(blob)

    const playerContainer = createAudioPlayerContainer()
    audioPlayer.src = audioUrl

    const app = document.querySelector("ts-app")
    if (app && app.shadowRoot) {
      const controlPlayback = app.shadowRoot.querySelector(".control-playback")
      if (controlPlayback) {
        const existingContainers = app.shadowRoot.querySelectorAll("#custom-audio-container")
        existingContainers.forEach((container) => container.remove())

        controlPlayback.insertAdjacentElement("afterend", playerContainer)
      }
    }
  }

  function enhanceLanguageAndVoice() {
    let enhancedItems = 0

    function processRoot(root) {
      if (!root) return 0

      try {
        const items = root.querySelectorAll("paper-item")
        let count = 0

        items.forEach((item) => {
          if (!item) return

          if (item.dataset.enhanced === "true") return

          const originalText = item.textContent ? item.textContent.trim() : ""

          const langInfo = languageMap.textMap[originalText]
          if (langInfo) {
            const wrapper = document.createElement("div")
            wrapper.style.display = "flex"
            wrapper.style.alignItems = "center"
            wrapper.style.gap = "8px"

            const flagImg = document.createElement("img")
            flagImg.src = `${FLAG_BASE_URL}${langInfo.code}.svg`
            flagImg.style.width = "24px"
            flagImg.style.height = "18px"
            flagImg.style.marginRight = "5px"

            const textSpan = document.createElement("span")
            textSpan.textContent = langInfo.text

            wrapper.appendChild(flagImg)
            wrapper.appendChild(textSpan)

            item.innerHTML = ""
            item.appendChild(wrapper)
            item.dataset.enhanced = "true"
            count++

            item.addEventListener("click", () => {
              localStorage.setItem("lastSelectedLanguage", langInfo.text)
            })
          }

          const voiceModelMatch = originalText.match(/^[a-z]{2,3}(-[A-Z]{1,2})?-Chirp3-HD-(\w+)$/)
          if (voiceModelMatch) {
            const voiceModelName = voiceModelMatch[2]
            const voiceGender = getVoiceGender(voiceModelName)

            if (voiceGender !== "Unknown") {
              item.textContent = `${voiceModelName} (${voiceGender})`
              item.dataset.enhanced = "true"
              count++
            }
          }

          if (AUDIO_DEVICE_PROFILES.includes(originalText)) {
            item.dataset.enhanced = "true"
            item.addEventListener("click", () => {
              localStorage.setItem("lastSelectedAudioDeviceProfile", originalText)
            })
          }
        })

        return count
      } catch (error) {
        return 0
      }
    }

    function traverseDeepDOM(element) {
      if (!element) return 0

      try {
        let count = processRoot(element)

        if (element.shadowRoot) {
          count += processRoot(element.shadowRoot)
        }

        const children = element.children || []
        for (const child of children) {
          if (child) {
            count += traverseDeepDOM(child)
          }
        }

        return count
      } catch (error) {
        return 0
      }
    }

    const searchRoots = [document.body, document, document.documentElement, window.document]

    searchRoots.forEach((root) => {
      if (root) {
        enhancedItems += traverseDeepDOM(root)
      }
    })

    return enhancedItems
  }

  function restoreLastSelection() {
    const lastLanguage = localStorage.getItem("lastSelectedLanguage")
    const lastAudioDeviceProfile = localStorage.getItem("lastSelectedAudioDeviceProfile")

    function findAndClickItem(text) {
      const searchInRoot = (root) => {
        if (!root) return false

        const items = root.querySelectorAll("paper-item")
        for (const item of items) {
          if (item.textContent && item.textContent.trim() === text) {
            item.click()
            return true
          }
        }
        return false
      }
      ;[document.body, document, document.documentElement, window.document].forEach((root) => {
        if (root) {
          searchInRoot(root)

          const elements = root.querySelectorAll("*")
          for (const el of elements) {
            if (el.shadowRoot) {
              searchInRoot(el.shadowRoot)
            }
          }
        }
      })
    }

    if (lastLanguage) findAndClickItem(lastLanguage)
    if (lastAudioDeviceProfile) findAndClickItem(lastAudioDeviceProfile)
  }

  function waitForElementsAndEnhance() {
    const enhancedCount = enhanceLanguageAndVoice()

    if (enhancedCount > 0) {
      restoreLastSelection()
      setupObserver()
    } else {
      setTimeout(waitForElementsAndEnhance, 200)
    }
  }

  function setupObserver() {
    const observer = new MutationObserver(() => {
      enhanceLanguageAndVoice()
    })

    observer.observe(document.body, {
      childList: true,
      subtree: true,
      attributes: true,
    })

    document.addEventListener(
      "click",
      () => {
        setTimeout(enhanceLanguageAndVoice, 100)
      },
      true,
    )
  }

  function waitForApp() {
    const app = document.querySelector("ts-app")
    if (app && app.shadowRoot) {
      waitForElementsAndEnhance()
    } else {
      requestAnimationFrame(waitForApp)
    }
  }

  waitForApp()
})()