Azure Speech Download

为微软的文本转语音服务的 demo 页面添加下载按钮

נכון ליום 16-09-2022. ראה הגרסה האחרונה.

// ==UserScript==
// @name         Azure Speech Download
// @namespace
// @version      0.9.0
// @description  为微软的文本转语音服务的 demo 页面添加下载按钮
// @author       Puteulanus
// @homepage     https://greasyfork.org/zh-CN/scripts/444347-azure-speech-download
// @match        https://azure.microsoft.com/*/products/cognitive-services/text-to-speech/*
// @icon         https://www.microsoft.com/favicon.ico
// @require      https://cdn.bootcdn.net/ajax/libs/FileSaver.js/2.0.5/FileSaver.min.js
// @grant        none
// @run-at       document-end
// @namespace https://greasyfork.org/users/909438
// ==/UserScript==

/* globals saveAs */
/* jshint esversion: 6 */
(function() {
    'use strict';

    // Your code here...
    if(!window.saveAs) {
        window.saveAs = (blob, name) => {
            const a = document.createElement("a");
            document.body.appendChild(a);
            a.style = "display: none";

            const url = window.URL.createObjectURL(blob);
            a.href = url;
            a.download = name;
            a.click();
            window.URL.revokeObjectURL(url);
        }
    }

    const SpeechSDK = window.SpeechSDK
    let fileSize = 0
    let streamSize = 0
    let wavFragments = []
    let enableDownload = false
    let enableCollect = false
    let autoProcessing = false
    let tasks = []
    let fileExt = '.mp3'
    let enableSaveOptions = false

    function createButton(id, color, content) {
        const button = document.getElementById('playli').cloneNode(true)
        button.id = id
        button.querySelector('span:last-of-type').textContent = content
        button.querySelector('button').style.backgroundColor = color
        button.querySelector('button').style.borderColor = color
        return button
    }

    function setButton(button, color, content) {
        button.querySelector('span:last-of-type').textContent = content
        button.querySelector('button').style.backgroundColor = color
        button.querySelector('button').style.borderColor = color
    }

    function downloadAndClean() {
        const sentAudio = new window.Uint8Array(fileSize)
        fileSize = 0
        streamSize = 0
        wavFragments.reduce((size, fragment) => {
            sentAudio.set(new window.Uint8Array(fragment), size)
            return size + fragment.byteLength
        }, 0)
        wavFragments.length = 0
        saveAs(new Blob([sentAudio]), (new Date()).toISOString().replace('T', ' ').replace(':', '_').split('.')[0] + fileExt)
    }

    function switchOptionDisplay() {
        if (enableCollect) {
            autoSplitButton.style.display = 'block'
            optionArea.style.display = 'block'
            previewPlayer.style.display = 'inline-block'
        } else {
            autoSplitButton.style.display = 'none'
            optionArea.style.display = 'none'
            previewPlayer.style.display = 'none'
        }
    }

    function syncAudioToPlayer() {
        const sentAudio = new window.Uint8Array(fileSize)
        wavFragments.reduce((size, fragment) => {
            sentAudio.set(new window.Uint8Array(fragment), size)
            return size + fragment.byteLength
        }, 0)
        const audioBlob = new Blob([sentAudio], {type : 'audio/ogg'})
        previewPlayer.src = URL.createObjectURL(audioBlob)
    }

    function dispatchTextChange() {
        const evt = document.createEvent('HTMLEvents')
        evt.initEvent('input', true, true)
        ttstext.dispatchEvent(evt)
    }

    function saveOptions() {
        if (!enableSaveOptions) return
        localStorage.setItem('savedOptions', JSON.stringify({
            language: languageInput.value,
            voice: voiceInput.value,
            style: styleInput.value,
            codec: codecInput.value,
            speed: speedInput.value,
            pitch: pitchInput.value,
            splitLength: maxSizeInput.value,
            delimiter: delimiterInput.value
        }))
    }

    function restoreOptions() {
        const optionsJSON = localStorage.getItem('savedOptions')
        if (!optionsJSON) return
        const options = JSON.parse(optionsJSON)
        let evt = document.createEvent('HTMLEvents')
        evt.initEvent('change', true, true)
        languageInput.value = options.language
        languageInput.dispatchEvent(evt)
        voiceInput.value = options.voice
        voiceInput.dispatchEvent(evt)
        styleInput.value = options.style
        styleInput.dispatchEvent(evt)
        codecInput.value = options.codec
        codecInput.dispatchEvent(evt)
        speedInput.value = options.speed
        speedInput.dispatchEvent(evt)
        pitchInput.value = options.pitch
        pitchInput.dispatchEvent(evt)
        evt = document.createEvent('HTMLEvents')
        evt.initEvent('input', true, true)
        speedInput.dispatchEvent(evt)
        pitchInput.dispatchEvent(evt)
        maxSizeInput.value = options.splitLength
        delimiterInput.value = options.delimiter
        saveCheckBox.checked = true
        enableSaveOptions = true
    }

    function bindSaveOption() {
        languageInput.addEventListener('change', saveOptions)
        voiceInput.addEventListener('change', saveOptions)
        styleInput.addEventListener('change', saveOptions)
        codecInput.addEventListener('change', saveOptions)
        speedInput.addEventListener('change', saveOptions)
        pitchInput.addEventListener('change', saveOptions)
        maxSizeInput.addEventListener('change', saveOptions)
        delimiterInput.addEventListener('change', saveOptions)
    }

    function initSpeedAndPitch() {
        const evt = document.createEvent('HTMLEvents')
        evt.initEvent('input', true, true)
        speedInput.value = '0'
        speedInput.dispatchEvent(evt)
        pitchInput.value = '0'
        pitchInput.dispatchEvent(evt)
    }

    const downloadStatus = document.createElement('div')
    const downloadSize = document.createElement('div')
    const buttonArea = document.getElementById('playli').parentElement
    const ttstext = document.getElementById('ttstext')
    const styleSelecter = document.getElementById('voicestyleselect').parentElement
    const languageInput = document.getElementById('languageselect')
    const voiceInput = document.getElementById('voiceselect')
    const styleInput = document.getElementById('voicestyleselect')
    const speedInput = document.getElementById('speed')
    const pitchInput = document.getElementById('pitch')

    ttstext.ondrop = async (e) => {
        const files = e.dataTransfer.files
        if (files.length === 1 && files[0].type === 'text/plain') {
            e.preventDefault()
            const file = files[0]
            ttstext.value = await file.text()
            dispatchTextChange()
        }
    }

    // reuqired by Firefox
    ttstext.ondragover = function(e){
        e.preventDefault();
    }

    // set document
    setTimeout(() => {
        setTimeout(() => {
            const onchange = languageInput.onchange
            languageInput.onchange = (...args) => {
                onchange(...args)
                languageInput.onchange = onchange
                initSpeedAndPitch()
                restoreOptions()
                bindSaveOption()
                ttstext.value += "\n\n\n收集模式:\n\n打开之后,点击\“下载\”按钮转换的音频会被收集,在收集模式关闭时合成一个音频下载"
                ttstext.value += "\n\n自动拆分:\n\n将长文本拆分为多个接近“段落长度”的片段,并只在“分隔符”处截断,避免句子被截断,影响阅读效果"
                ttstext.value += "\n\n\n\n拖拽 txt 文件至此框可加载文本文件"
            }
        }, 0)
    }, 0)

    // set download button
    const downloadButton = createButton('donwloadli', 'green', '下载')
    downloadButton.addEventListener('click', () => {
        downloadStatus.textContent = '下载中'
        enableDownload = true
        streamSize = 0
        document.getElementById('playbtn').click()
        enableDownload = false
    })
    downloadStatus.style.marginRight = '10px'
    buttonArea.appendChild(downloadButton)
    // set collect button
    const collectButton = createButton('collectli', 'red', '收集模式关')
    collectButton.addEventListener('click', () => {
        if(!enableCollect) {
            enableCollect = true
            switchOptionDisplay()
            setButton(collectButton, 'green', '收集模式开')
        } else {
            enableCollect = false
            switchOptionDisplay()
            setButton(collectButton, 'red', '收集模式关')
            if (!fileSize) return
            downloadAndClean()
        }
    })
    collectButton.style.marginRight = '10px'
    buttonArea.appendChild(collectButton)
    // set options
    const optionArea = document.createElement('div')
    const maxSizeInput = document.createElement('input')
    const delimiterInput = document.createElement('input')
    const maxSizeLabel = document.createElement('span')
    const delimiterLabel = document.createElement('span')
    optionArea.id = 'optiondiv'
    optionArea.style.display = 'none'
    maxSizeLabel.textContent = '段落长度'
    maxSizeInput.style.width = '50px'
    maxSizeInput.style.margin = '10px'
    maxSizeInput.value = '300'
    delimiterLabel.textContent = '分隔符'
    delimiterInput.style.width = '100px'
    delimiterInput.style.margin = '10px'
    delimiterInput.value = ',。?,.?'
    optionArea.appendChild(maxSizeLabel)
    optionArea.appendChild(maxSizeInput)
    optionArea.appendChild(delimiterLabel)
    optionArea.appendChild(delimiterInput)
    buttonArea.parentElement.appendChild(optionArea)
    // set download status
    buttonArea.parentElement.appendChild(downloadStatus)
    buttonArea.parentElement.appendChild(downloadSize)
    // set auto split button
    const autoSplitButton = createButton('autosplit', 'red', '自动拆分')
    autoSplitButton.addEventListener('click', () => {
        setButton(autoSplitButton, 'green', '拆分中')
        autoProcessing = true
        const maxSize = +maxSizeInput.value
        const delimiters = delimiterInput.value.split('')
        const text = ttstext.value
        const textHandler = text.split('').reduce(
            (obj, char, index, arr) => {
                obj.buffer.push(char)
                if (delimiters.indexOf(char) >= 0) obj.end = index
                if (obj.buffer.length === maxSize) {
                    obj.res.push(obj.buffer.splice(0, obj.end + 1 - obj.offset).join(''))
                    obj.offset += obj.res[obj.res.length - 1].length
                }
                return obj
            }, {
                buffer: [],
                end: 0,
                offset:0,
                res: []
            })
        textHandler.res.push(textHandler.buffer.join(''))
        ttstext.value = textHandler.res.shift()
        tasks = textHandler.res
        dispatchTextChange()
        downloadButton.click()
    })
    autoSplitButton.style.display = 'none'
    buttonArea.appendChild(autoSplitButton)
    // set preview player
    const previewPlayer = document.createElement('audio')
    previewPlayer.controls = true
    previewPlayer.style.display = 'none'
    previewPlayer.style.width = '100%'
    previewPlayer.style.marginTop = '10px'
    ttstext.after(previewPlayer)
    // set formatting options
    let codecInput
    try {
        const optionSelector = styleSelecter.cloneNode(true)
        const label = optionSelector.querySelector('label')
        label.textContent = '音频编码'
        label.htmlFor = 'voiceformatselect'
        codecInput = optionSelector.querySelector('select')
        codecInput.id = 'voiceformatselect'
        codecInput.innerHTML = ''
        Object.entries(SpeechSDK.SpeechSynthesisOutputFormat).filter(item => !isNaN(item[0]))
            .filter(item => /(^Audio.+Mp3$)|(^Ogg)|(^Webm)/.test(item[1]))
            .forEach(item => {
            const format = item[1]
            const option = document.createElement("option")
            option.value = format
            option.text = format
            if (format === 'Audio24Khz96KBitRateMonoMp3') option.selected = true
            codecInput.appendChild(option)
        })
        styleSelecter.after(optionSelector)
        const audio24Khz96KBitRateMonoMp3 = SpeechSDK.SpeechSynthesisOutputFormat.Audio24Khz96KBitRateMonoMp3
        codecInput.addEventListener('change', () => {
            SpeechSDK.SpeechSynthesisOutputFormat.Audio24Khz96KBitRateMonoMp3 = SpeechSDK.SpeechSynthesisOutputFormat[codecInput.value]
            if (codecInput.value === 'Audio24Khz96KBitRateMonoMp3') {
                SpeechSDK.SpeechSynthesisOutputFormat.Audio24Khz96KBitRateMonoMp3 = audio24Khz96KBitRateMonoMp3
            }
            if (codecInput.value.startsWith('Ogg')) {
                fileExt = '.ogg'
            } else if (codecInput.value.startsWith('Webm')) {
                fileExt = '.webm'
            } else {
                fileExt = '.mp3'
            }
        })
    } catch (e) {
        console.log(e)
    }
    // set save options
    const saveLabel = document.createElement("span")
    saveLabel.innerText = '保存配置'
    saveLabel.style.marginLeft = '5px'
    const saveCheckBox = document.createElement("input")
    saveCheckBox.type = 'checkbox'
    const pitchArea = document.getElementById('pitchlabel').parentElement
    pitchArea.appendChild(saveCheckBox)
    pitchArea.appendChild(saveLabel)
    saveCheckBox.addEventListener('change', () => {
        if (saveCheckBox.checked) {
            enableSaveOptions = true
            saveOptions()
        } else {
            enableSaveOptions = false
            localStorage.removeItem('savedOptions')
        }
    })

    const streamHandler = {
        write: function (dataBuffer) {
            streamSize += dataBuffer.byteLength
            if (streamSize <= 1900800) {
                fileSize += dataBuffer.byteLength
                downloadSize.textContent = `已接收 ${fileSize / 1000} kb`
                if (autoProcessing) downloadSize.textContent = `剩余分段 ${tasks.length} ` + downloadSize.textContent
                wavFragments.push(dataBuffer)
            }
            if (streamSize === 1900800) {
                downloadStatus.textContent = '下载长度超过免费限额,请分割文本后使用收集模式'
                if (!enableCollect) {
                    fileSize = 0
                    wavFragments.length = 0
                } else {
                    fileSize -= 1900800
                    wavFragments.length -= 1320
                }
            }
        },
        close: function () {
            downloadStatus.textContent = '下载完成'
            if (!enableCollect) {
                downloadAndClean()
                return
            }
            if (!autoProcessing) {
                syncAudioToPlayer()
                return
            }
            if (tasks.length) {
                ttstext.value = tasks.shift()
                dispatchTextChange()
                downloadButton.click()
            } else {
                autoProcessing = false
                setButton(autoSplitButton, 'red', '自动拆分')
                ttstext.value = "自动拆分完成\n\n使用下方播放器播放,或关闭收集模式下载音频文件"
                syncAudioToPlayer()
            }
        }
    }

    const outputStream = SpeechSDK.PushAudioOutputStream.create(streamHandler)

    SpeechSDK.AudioConfig.fromSpeakerOutput = (() => {
        const fromSpeakerOutput = SpeechSDK.AudioConfig.fromSpeakerOutput
        return function (audioDestination) {
            return enableDownload ? audioDestination.onAudioEnd() || SpeechSDK.AudioConfig.fromStreamOutput(outputStream) : fromSpeakerOutput(audioDestination)
        }
    })()
})();