// ==UserScript==
// @name Azure Speech Download
// @namespace
// @version 1.0.0
// @description 为微软的文本转语音服务的 demo 页面添加下载按钮
// @author Puteulanus
// @homepage https://greasyfork.org/zh-CN/scripts/444347-azure-speech-download
// @match https://azure.microsoft.com/*/products/cognitive-services/text-to-speech/*
// @icon https://www.microsoft.com/favicon.ico
// @require https://cdn.bootcdn.net/ajax/libs/FileSaver.js/2.0.5/FileSaver.min.js
// @grant none
// @run-at document-end
// @namespace https://greasyfork.org/users/909438
// ==/UserScript==
/* globals saveAs */
/* jshint esversion: 6 */
(function() {
'use strict';
// Your code here...
if(!window.saveAs) {
window.saveAs = (blob, name) => {
const a = document.createElement("a");
document.body.appendChild(a);
a.style = "display: none";
const url = window.URL.createObjectURL(blob);
a.href = url;
a.download = name;
a.click();
window.URL.revokeObjectURL(url);
}
}
const SpeechSDK = window.SpeechSDK
let fileSize = 0
let streamSize = 0
let wavFragments = []
let enableDownload = false
let enableCollect = false
let autoProcessing = false
let tasks = []
let fileExt = '.mp3'
let enableSaveOptions = false
const i18n = {
zh: {
document1: "\n\n\n收集模式:\n\n打开之后,点击“下载”按钮转换的音频会被收集,在收集模式关闭时合成一个音频下载",
document2: "\n\n自动拆分:\n\n将长文本拆分为多个接近“段落长度”的片段,并只在“分隔符”处截断,避免句子被截断,影响阅读效果",
document3: "\n\n\n\n拖拽 txt 文件至此框可加载文本文件",
download: '下载',
downloading: '下载中',
downloaded: '下载完成',
split: '自动拆分',
spliting: '拆分中',
codec: '音频编码',
saveSetting: '保存设置',
lengthWarning: '下载长度超过免费限额,请分割文本后使用收集模式',
splitedMsg: "自动拆分完成\n\n使用下方播放器播放,或关闭收集模式下载音频文件",
length: '段落长度',
delimiter: '分隔符',
collectionOn: '收集模式开',
collectionOff: '收集模式关',
received: '已接收',
taskQueue: '剩余分段',
profileName: '配置名',
createProfile: '创建配置',
},
eng: {
document1: "\n\n\nCollection:\n\nCollect audio files converted by clicking \"Download\" button, do the really download when it is turned off",
document2: "\n\nSplit:\n\nSplit long text into segments close to the \"paragraph length\", which only truncate at \"delimiter\"",
document3: "\n\n\n\nYou can drag .txt file to this text box to load a text file",
download: 'Download',
downloading: 'Downloading',
downloaded: 'Download complete',
split: 'Split',
spliting: 'Spliting',
codec: 'Codec',
saveSetting: 'Save settings',
lengthWarning: 'Text length exceeds the free limit, please split the text and use collection mode',
splitedMsg: "Split finished\n\nUse the player below to play, or turn off collection mode to download the audio file",
length: 'Paragraph length',
delimiter: 'Delimiter',
collectionOn: 'Collection On',
collectionOff: 'Collection Off',
received: 'Received:',
taskQueue: 'Task queue:',
profileName: 'Profile name',
createProfile: 'Create profile',
}
}
const lang = window.Acom.currentCulture
if (lang === 'zh-cn' || lang === 'zh-tw') {
i18n.lang = i18n.zh
} else {
i18n.lang = i18n.eng
}
function createButton(id, color, content) {
const button = document.getElementById('playli').cloneNode(true)
button.id = id
button.querySelector('span:last-of-type').textContent = content
button.querySelector('button').style.backgroundColor = color
button.querySelector('button').style.borderColor = color
return button
}
function setButton(button, color, content) {
button.querySelector('span:last-of-type').textContent = content
button.querySelector('button').style.backgroundColor = color
button.querySelector('button').style.borderColor = color
}
function downloadAndClean() {
const sentAudio = new window.Uint8Array(fileSize)
fileSize = 0
streamSize = 0
wavFragments.reduce((size, fragment) => {
sentAudio.set(new window.Uint8Array(fragment), size)
return size + fragment.byteLength
}, 0)
wavFragments.length = 0
saveAs(new Blob([sentAudio]), (new Date()).toISOString().replace('T', ' ').replace(':', '_').split('.')[0] + fileExt)
}
function switchOptionDisplay() {
if (enableCollect) {
autoSplitButton.style.display = 'block'
optionArea.style.display = 'block'
previewPlayer.style.display = 'inline-block'
} else {
autoSplitButton.style.display = 'none'
optionArea.style.display = 'none'
previewPlayer.style.display = 'none'
}
}
function syncAudioToPlayer() {
const sentAudio = new window.Uint8Array(fileSize)
wavFragments.reduce((size, fragment) => {
sentAudio.set(new window.Uint8Array(fragment), size)
return size + fragment.byteLength
}, 0)
const audioBlob = new Blob([sentAudio], {type : 'audio/ogg'})
previewPlayer.src = URL.createObjectURL(audioBlob)
}
function dispatchTextChange() {
const evt = document.createEvent('HTMLEvents')
evt.initEvent('input', true, true)
ttstext.dispatchEvent(evt)
}
function saveOptions() {
if (!enableSaveOptions) return
localStorage.setItem('savedOptions', JSON.stringify(getCurrentSettings()))
}
function restoreOptions() {
const optionsJSON = localStorage.getItem('savedOptions')
if (!optionsJSON) return
const options = JSON.parse(optionsJSON)
setSettings(options)
saveCheckBox.checked = true
enableSaveOptions = true
}
function bindSaveOption() {
languageInput.addEventListener('change', saveOptions)
voiceInput.addEventListener('change', saveOptions)
styleInput.addEventListener('change', saveOptions)
codecInput.addEventListener('change', saveOptions)
speedInput.addEventListener('change', saveOptions)
pitchInput.addEventListener('change', saveOptions)
maxSizeInput.addEventListener('change', saveOptions)
delimiterInput.addEventListener('change', saveOptions)
}
function initSpeedAndPitch() {
const evt = document.createEvent('HTMLEvents')
evt.initEvent('input', true, true)
speedInput.value = '0'
speedInput.dispatchEvent(evt)
pitchInput.value = '0'
pitchInput.dispatchEvent(evt)
}
function createProfile(name, profile) {
const profiles = JSON.parse(localStorage.getItem('savedProfiles'))
localStorage.setItem('savedProfiles', JSON.stringify([...profiles.filter(profile => profile.name !== name),{
name,
setting: profile
}]))
refreshProfile()
}
function removeProfile(name) {
let profiles = JSON.parse(localStorage.getItem('savedProfiles'))
localStorage.setItem('savedProfiles', JSON.stringify(profiles.filter(profile => profile.name !== name)))
refreshProfile()
}
function refreshProfile() {
let profilesJSON = localStorage.getItem('savedProfiles')
let profiles
if (!profilesJSON) {
profiles = []
localStorage.setItem('savedProfiles', JSON.stringify(profiles))
} else {
profiles = JSON.parse(profilesJSON)
}
profileContainer.innerHTML = ''
profiles.forEach(profile => {
const profileDiv = document.createElement("div")
const profileName = document.createElement("span")
const profileDelete = document.createElement("span")
profileDiv.style.display = 'inline-block'
profileDiv.style.border = '1px solid'
profileDiv.style.marginLeft = '5px'
profileDiv.style.cursor = 'pointer'
profileName.innerText = profile.name
profileName.style.padding = '5px'
profileDelete.innerText = 'X'
profileDelete.style.backgroundColor = 'black'
profileDelete.style.color = 'white'
profileDelete.style.padding = '2px'
profileDiv.appendChild(profileName)
profileDiv.append(profileDelete)
profileContainer.append(profileDiv)
profileName.addEventListener('click', () => {
const textBackup = ttstext.value
setSettings(profile.setting)
ttstext.value = textBackup
dispatchTextChange()
})
profileDelete.addEventListener('click', () => {
removeProfile(profile.name)
})
})
}
function getCurrentSettings() {
return {
language: languageInput.value,
voice: voiceInput.value,
style: styleInput.value,
codec: codecInput.value,
speed: speedInput.value,
pitch: pitchInput.value,
splitLength: maxSizeInput.value,
delimiter: delimiterInput.value
}
}
function setSettings(setting) {
let evt = document.createEvent('HTMLEvents')
evt.initEvent('change', true, true)
languageInput.value = setting.language
languageInput.dispatchEvent(evt)
voiceInput.value = setting.voice
voiceInput.dispatchEvent(evt)
styleInput.value = setting.style
styleInput.dispatchEvent(evt)
codecInput.value = setting.codec
codecInput.dispatchEvent(evt)
speedInput.value = setting.speed
speedInput.dispatchEvent(evt)
pitchInput.value = setting.pitch
pitchInput.dispatchEvent(evt)
evt = document.createEvent('HTMLEvents')
evt.initEvent('input', true, true)
speedInput.dispatchEvent(evt)
pitchInput.dispatchEvent(evt)
maxSizeInput.value = setting.splitLength
delimiterInput.value = setting.delimiter
}
const downloadStatus = document.createElement('div')
const downloadSize = document.createElement('div')
const buttonArea = document.getElementById('playli').parentElement
const ttstext = document.getElementById('ttstext')
const styleSelecter = document.getElementById('voicestyleselect').parentElement
const languageInput = document.getElementById('languageselect')
const voiceInput = document.getElementById('voiceselect')
const styleInput = document.getElementById('voicestyleselect')
const speedInput = document.getElementById('speed')
const pitchInput = document.getElementById('pitch')
ttstext.ondrop = async (e) => {
const files = e.dataTransfer.files
if (files.length === 1 && files[0].type === 'text/plain') {
e.preventDefault()
const file = files[0]
ttstext.value = await file.text()
dispatchTextChange()
}
}
// reuqired by Firefox
ttstext.ondragover = function(e){
e.preventDefault();
}
// set document
setTimeout(() => {
setTimeout(() => {
const onchange = languageInput.onchange
languageInput.onchange = (...args) => {
onchange(...args)
languageInput.onchange = onchange
initSpeedAndPitch()
restoreOptions()
bindSaveOption()
ttstext.value += i18n.lang.document1
ttstext.value += i18n.lang.document2
ttstext.value += i18n.lang.document3
}
}, 0)
}, 0)
// set download button
const downloadButton = createButton('donwloadli', 'green', i18n.lang.download)
downloadButton.addEventListener('click', () => {
downloadStatus.textContent = i18n.lang.downloading
enableDownload = true
streamSize = 0
document.getElementById('playbtn').click()
enableDownload = false
})
downloadStatus.style.marginTop = '10px'
buttonArea.appendChild(downloadButton)
// set collect button
const collectButton = createButton('collectli', 'red', i18n.lang.collectionOff)
collectButton.addEventListener('click', () => {
if(!enableCollect) {
enableCollect = true
switchOptionDisplay()
setButton(collectButton, 'green', i18n.lang.collectionOn)
} else {
enableCollect = false
switchOptionDisplay()
setButton(collectButton, 'red', i18n.lang.collectionOff)
if (!fileSize) return
downloadAndClean()
}
})
collectButton.style.marginRight = '10px'
buttonArea.appendChild(collectButton)
// set options
const optionArea = document.createElement('div')
const maxSizeInput = document.createElement('input')
const delimiterInput = document.createElement('input')
const maxSizeLabel = document.createElement('span')
const delimiterLabel = document.createElement('span')
optionArea.id = 'optiondiv'
optionArea.style.display = 'none'
maxSizeLabel.textContent = i18n.lang.length
maxSizeInput.style.width = '50px'
maxSizeInput.style.margin = '10px'
maxSizeInput.value = '300'
delimiterLabel.textContent = i18n.lang.delimiter
delimiterInput.style.width = '100px'
delimiterInput.style.margin = '10px'
delimiterInput.value = ',。?,.?'
optionArea.appendChild(maxSizeLabel)
optionArea.appendChild(maxSizeInput)
optionArea.appendChild(delimiterLabel)
optionArea.appendChild(delimiterInput)
buttonArea.parentElement.appendChild(optionArea)
// set download status
buttonArea.parentElement.appendChild(downloadStatus)
buttonArea.parentElement.appendChild(downloadSize)
// set auto split button
const autoSplitButton = createButton('autosplit', 'red', i18n.lang.split)
autoSplitButton.addEventListener('click', () => {
setButton(autoSplitButton, 'green', i18n.lang.spliting)
autoProcessing = true
const maxSize = +maxSizeInput.value
const delimiters = delimiterInput.value.split('')
const text = ttstext.value
const textHandler = text.split('').reduce(
(obj, char, index, arr) => {
obj.buffer.push(char)
if (delimiters.indexOf(char) >= 0) obj.end = index
if (obj.buffer.length === maxSize) {
obj.res.push(obj.buffer.splice(0, obj.end + 1 - obj.offset).join(''))
obj.offset += obj.res[obj.res.length - 1].length
}
return obj
}, {
buffer: [],
end: 0,
offset:0,
res: []
})
textHandler.res.push(textHandler.buffer.join(''))
ttstext.value = textHandler.res.shift()
tasks = textHandler.res
dispatchTextChange()
downloadButton.click()
})
autoSplitButton.style.display = 'none'
buttonArea.appendChild(autoSplitButton)
// set preview player
const previewPlayer = document.createElement('audio')
previewPlayer.controls = true
previewPlayer.style.display = 'none'
previewPlayer.style.width = '100%'
previewPlayer.style.marginTop = '10px'
ttstext.after(previewPlayer)
// set formatting options
let codecInput
try {
const optionSelector = styleSelecter.cloneNode(true)
const label = optionSelector.querySelector('label')
label.textContent = i18n.lang.codec
label.htmlFor = 'voiceformatselect'
codecInput = optionSelector.querySelector('select')
codecInput.id = 'voiceformatselect'
codecInput.innerHTML = ''
Object.entries(SpeechSDK.SpeechSynthesisOutputFormat).filter(item => !isNaN(item[0]))
.filter(item => /(^Audio.+Mp3$)|(^Ogg)|(^Webm)/.test(item[1]))
.forEach(item => {
const format = item[1]
const option = document.createElement("option")
option.value = format
option.text = format
if (format === 'Audio24Khz96KBitRateMonoMp3') option.selected = true
codecInput.appendChild(option)
})
styleSelecter.after(optionSelector)
const audio24Khz96KBitRateMonoMp3 = SpeechSDK.SpeechSynthesisOutputFormat.Audio24Khz96KBitRateMonoMp3
codecInput.addEventListener('change', () => {
SpeechSDK.SpeechSynthesisOutputFormat.Audio24Khz96KBitRateMonoMp3 = SpeechSDK.SpeechSynthesisOutputFormat[codecInput.value]
if (codecInput.value === 'Audio24Khz96KBitRateMonoMp3') {
SpeechSDK.SpeechSynthesisOutputFormat.Audio24Khz96KBitRateMonoMp3 = audio24Khz96KBitRateMonoMp3
}
if (codecInput.value.startsWith('Ogg')) {
fileExt = '.ogg'
} else if (codecInput.value.startsWith('Webm')) {
fileExt = '.webm'
} else {
fileExt = '.mp3'
}
})
} catch (e) {
console.log(e)
}
// set save options
const saveLabel = document.createElement("span")
saveLabel.innerText = i18n.lang.saveSetting
saveLabel.style.marginLeft = '5px'
const saveCheckBox = document.createElement("input")
saveCheckBox.type = 'checkbox'
const pitchArea = document.getElementById('pitchlabel').parentElement
pitchArea.appendChild(saveCheckBox)
pitchArea.appendChild(saveLabel)
saveCheckBox.addEventListener('change', () => {
if (saveCheckBox.checked) {
enableSaveOptions = true
saveOptions()
} else {
enableSaveOptions = false
localStorage.removeItem('savedOptions')
}
})
// set profile manage
const profileArea = document.createElement("div")
const createProfileInput = document.createElement("input")
const createProfileButton = document.createElement("button")
const profileContainer = document.createElement("div")
createProfileInput.placeholder = i18n.lang.profileName
createProfileInput.style.width = '120px'
createProfileButton.innerText = i18n.lang.createProfile
createProfileButton.style.border = '1px solid'
createProfileButton.style.marginLeft = '5px'
createProfileButton.style.padding = '2px'
profileContainer.style.display = 'inline-block'
profileArea.appendChild(createProfileInput)
profileArea.appendChild(createProfileButton)
profileArea.appendChild(profileContainer)
profileArea.style.marginTop = '10px'
previewPlayer.after(profileArea)
refreshProfile()
createProfileButton.addEventListener('click', () => {
if (!createProfileInput.value) return
const profile = getCurrentSettings()
createProfile(createProfileInput.value, profile)
createProfileInput.value = ''
})
const streamHandler = {
write: function (dataBuffer) {
streamSize += dataBuffer.byteLength
if (streamSize <= 1900800) {
fileSize += dataBuffer.byteLength
downloadSize.textContent = `${i18n.lang.received} ${fileSize / 1000} kb`
if (autoProcessing) downloadSize.textContent = `${i18n.lang.taskQueue} ${tasks.length} ` + downloadSize.textContent
wavFragments.push(dataBuffer)
}
if (streamSize === 1900800) {
downloadStatus.textContent = i18n.lang.lengthWarning
if (!enableCollect) {
fileSize = 0
wavFragments.length = 0
} else {
fileSize -= 1900800
wavFragments.length -= 1320
}
}
},
close: function () {
downloadStatus.textContent = i18n.lang.downloaded
if (!enableCollect) {
downloadAndClean()
return
}
if (!autoProcessing) {
syncAudioToPlayer()
return
}
if (tasks.length) {
ttstext.value = tasks.shift()
dispatchTextChange()
downloadButton.click()
} else {
autoProcessing = false
setButton(autoSplitButton, 'red', i18n.lang.split)
ttstext.value = i18n.lang.splitedMsg
syncAudioToPlayer()
}
}
}
const outputStream = SpeechSDK.PushAudioOutputStream.create(streamHandler)
SpeechSDK.AudioConfig.fromSpeakerOutput = (() => {
const fromSpeakerOutput = SpeechSDK.AudioConfig.fromSpeakerOutput
return function (audioDestination) {
return enableDownload ? audioDestination.onAudioEnd() || SpeechSDK.AudioConfig.fromStreamOutput(outputStream) : fromSpeakerOutput(audioDestination)
}
})()
})();