- // ==UserScript==
- // @name Azure Speech Download
- // @namespace
- // @version 1.0.0
- // @description 为微软的文本转语音服务的 demo 页面添加下载按钮
- // @author Puteulanus
- // @homepage https://greasyfork.org/zh-CN/scripts/444347-azure-speech-download
- // @match https://azure.microsoft.com/*/products/cognitive-services/text-to-speech/*
- // @icon https://www.microsoft.com/favicon.ico
- // @require https://cdn.bootcdn.net/ajax/libs/FileSaver.js/2.0.5/FileSaver.min.js
- // @grant none
- // @run-at document-end
- // @namespace https://greasyfork.org/users/909438
- // ==/UserScript==
-
- /* globals saveAs */
- /* jshint esversion: 6 */
- (function() {
- 'use strict';
-
- // Your code here...
- if(!window.saveAs) {
- window.saveAs = (blob, name) => {
- const a = document.createElement("a");
- document.body.appendChild(a);
- a.style = "display: none";
-
- const url = window.URL.createObjectURL(blob);
- a.href = url;
- a.download = name;
- a.click();
- window.URL.revokeObjectURL(url);
- }
- }
-
- const SpeechSDK = window.SpeechSDK
- let fileSize = 0
- let streamSize = 0
- let wavFragments = []
- let enableDownload = false
- let enableCollect = false
- let autoProcessing = false
- let tasks = []
- let fileExt = '.mp3'
- let enableSaveOptions = false
- const i18n = {
- zh: {
- document1: "\n\n\n收集模式:\n\n打开之后,点击“下载”按钮转换的音频会被收集,在收集模式关闭时合成一个音频下载",
- document2: "\n\n自动拆分:\n\n将长文本拆分为多个接近“段落长度”的片段,并只在“分隔符”处截断,避免句子被截断,影响阅读效果",
- document3: "\n\n\n\n拖拽 txt 文件至此框可加载文本文件",
- download: '下载',
- downloading: '下载中',
- downloaded: '下载完成',
- split: '自动拆分',
- spliting: '拆分中',
- codec: '音频编码',
- saveSetting: '保存设置',
- lengthWarning: '下载长度超过免费限额,请分割文本后使用收集模式',
- splitedMsg: "自动拆分完成\n\n使用下方播放器播放,或关闭收集模式下载音频文件",
- length: '段落长度',
- delimiter: '分隔符',
- collectionOn: '收集模式开',
- collectionOff: '收集模式关',
- received: '已接收',
- taskQueue: '剩余分段',
- profileName: '配置名',
- createProfile: '创建配置',
- },
- eng: {
- document1: "\n\n\nCollection:\n\nCollect audio files converted by clicking \"Download\" button, do the really download when it is turned off",
- document2: "\n\nSplit:\n\nSplit long text into segments close to the \"paragraph length\", which only truncate at \"delimiter\"",
- document3: "\n\n\n\nYou can drag .txt file to this text box to load a text file",
- download: 'Download',
- downloading: 'Downloading',
- downloaded: 'Download complete',
- split: 'Split',
- spliting: 'Spliting',
- codec: 'Codec',
- saveSetting: 'Save settings',
- lengthWarning: 'Text length exceeds the free limit, please split the text and use collection mode',
- splitedMsg: "Split finished\n\nUse the player below to play, or turn off collection mode to download the audio file",
- length: 'Paragraph length',
- delimiter: 'Delimiter',
- collectionOn: 'Collection On',
- collectionOff: 'Collection Off',
- received: 'Received:',
- taskQueue: 'Task queue:',
- profileName: 'Profile name',
- createProfile: 'Create profile',
- }
- }
- const lang = window.Acom.currentCulture
- if (lang === 'zh-cn' || lang === 'zh-tw') {
- i18n.lang = i18n.zh
- } else {
- i18n.lang = i18n.eng
- }
-
- function createButton(id, color, content) {
- const button = document.getElementById('playli').cloneNode(true)
- button.id = id
- button.querySelector('span:last-of-type').textContent = content
- button.querySelector('button').style.backgroundColor = color
- button.querySelector('button').style.borderColor = color
- return button
- }
-
- function setButton(button, color, content) {
- button.querySelector('span:last-of-type').textContent = content
- button.querySelector('button').style.backgroundColor = color
- button.querySelector('button').style.borderColor = color
- }
-
- function downloadAndClean() {
- const sentAudio = new window.Uint8Array(fileSize)
- fileSize = 0
- streamSize = 0
- wavFragments.reduce((size, fragment) => {
- sentAudio.set(new window.Uint8Array(fragment), size)
- return size + fragment.byteLength
- }, 0)
- wavFragments.length = 0
- saveAs(new Blob([sentAudio]), (new Date()).toISOString().replace('T', ' ').replace(':', '_').split('.')[0] + fileExt)
- }
-
- function switchOptionDisplay() {
- if (enableCollect) {
- autoSplitButton.style.display = 'block'
- optionArea.style.display = 'block'
- previewPlayer.style.display = 'inline-block'
- } else {
- autoSplitButton.style.display = 'none'
- optionArea.style.display = 'none'
- previewPlayer.style.display = 'none'
- }
- }
-
- function syncAudioToPlayer() {
- const sentAudio = new window.Uint8Array(fileSize)
- wavFragments.reduce((size, fragment) => {
- sentAudio.set(new window.Uint8Array(fragment), size)
- return size + fragment.byteLength
- }, 0)
- const audioBlob = new Blob([sentAudio], {type : 'audio/ogg'})
- previewPlayer.src = URL.createObjectURL(audioBlob)
- }
-
- function dispatchTextChange() {
- const evt = document.createEvent('HTMLEvents')
- evt.initEvent('input', true, true)
- ttstext.dispatchEvent(evt)
- }
-
- function saveOptions() {
- if (!enableSaveOptions) return
- localStorage.setItem('savedOptions', JSON.stringify(getCurrentSettings()))
- }
-
- function restoreOptions() {
- const optionsJSON = localStorage.getItem('savedOptions')
- if (!optionsJSON) return
- const options = JSON.parse(optionsJSON)
- setSettings(options)
- saveCheckBox.checked = true
- enableSaveOptions = true
- }
-
- function bindSaveOption() {
- languageInput.addEventListener('change', saveOptions)
- voiceInput.addEventListener('change', saveOptions)
- styleInput.addEventListener('change', saveOptions)
- codecInput.addEventListener('change', saveOptions)
- speedInput.addEventListener('change', saveOptions)
- pitchInput.addEventListener('change', saveOptions)
- maxSizeInput.addEventListener('change', saveOptions)
- delimiterInput.addEventListener('change', saveOptions)
- }
-
- function initSpeedAndPitch() {
- const evt = document.createEvent('HTMLEvents')
- evt.initEvent('input', true, true)
- speedInput.value = '0'
- speedInput.dispatchEvent(evt)
- pitchInput.value = '0'
- pitchInput.dispatchEvent(evt)
- }
-
- function createProfile(name, profile) {
- const profiles = JSON.parse(localStorage.getItem('savedProfiles'))
- localStorage.setItem('savedProfiles', JSON.stringify([...profiles.filter(profile => profile.name !== name),{
- name,
- setting: profile
- }]))
- refreshProfile()
- }
-
- function removeProfile(name) {
- let profiles = JSON.parse(localStorage.getItem('savedProfiles'))
- localStorage.setItem('savedProfiles', JSON.stringify(profiles.filter(profile => profile.name !== name)))
- refreshProfile()
- }
-
- function refreshProfile() {
- let profilesJSON = localStorage.getItem('savedProfiles')
- let profiles
- if (!profilesJSON) {
- profiles = []
- localStorage.setItem('savedProfiles', JSON.stringify(profiles))
- } else {
- profiles = JSON.parse(profilesJSON)
- }
- profileContainer.innerHTML = ''
- profiles.forEach(profile => {
- const profileDiv = document.createElement("div")
- const profileName = document.createElement("span")
- const profileDelete = document.createElement("span")
- profileDiv.style.display = 'inline-block'
- profileDiv.style.border = '1px solid'
- profileDiv.style.marginLeft = '5px'
- profileDiv.style.cursor = 'pointer'
- profileName.innerText = profile.name
- profileName.style.padding = '5px'
- profileDelete.innerText = 'X'
- profileDelete.style.backgroundColor = 'black'
- profileDelete.style.color = 'white'
- profileDelete.style.padding = '2px'
- profileDiv.appendChild(profileName)
- profileDiv.append(profileDelete)
- profileContainer.append(profileDiv)
- profileName.addEventListener('click', () => {
- const textBackup = ttstext.value
- setSettings(profile.setting)
- ttstext.value = textBackup
- dispatchTextChange()
- })
- profileDelete.addEventListener('click', () => {
- removeProfile(profile.name)
- })
- })
- }
-
- function getCurrentSettings() {
- return {
- language: languageInput.value,
- voice: voiceInput.value,
- style: styleInput.value,
- codec: codecInput.value,
- speed: speedInput.value,
- pitch: pitchInput.value,
- splitLength: maxSizeInput.value,
- delimiter: delimiterInput.value
- }
- }
-
- function setSettings(setting) {
- let evt = document.createEvent('HTMLEvents')
- evt.initEvent('change', true, true)
- languageInput.value = setting.language
- languageInput.dispatchEvent(evt)
- voiceInput.value = setting.voice
- voiceInput.dispatchEvent(evt)
- styleInput.value = setting.style
- styleInput.dispatchEvent(evt)
- codecInput.value = setting.codec
- codecInput.dispatchEvent(evt)
- speedInput.value = setting.speed
- speedInput.dispatchEvent(evt)
- pitchInput.value = setting.pitch
- pitchInput.dispatchEvent(evt)
- evt = document.createEvent('HTMLEvents')
- evt.initEvent('input', true, true)
- speedInput.dispatchEvent(evt)
- pitchInput.dispatchEvent(evt)
- maxSizeInput.value = setting.splitLength
- delimiterInput.value = setting.delimiter
- }
-
- const downloadStatus = document.createElement('div')
- const downloadSize = document.createElement('div')
- const buttonArea = document.getElementById('playli').parentElement
- const ttstext = document.getElementById('ttstext')
- const styleSelecter = document.getElementById('voicestyleselect').parentElement
- const languageInput = document.getElementById('languageselect')
- const voiceInput = document.getElementById('voiceselect')
- const styleInput = document.getElementById('voicestyleselect')
- const speedInput = document.getElementById('speed')
- const pitchInput = document.getElementById('pitch')
-
- ttstext.ondrop = async (e) => {
- const files = e.dataTransfer.files
- if (files.length === 1 && files[0].type === 'text/plain') {
- e.preventDefault()
- const file = files[0]
- ttstext.value = await file.text()
- dispatchTextChange()
- }
- }
-
- // reuqired by Firefox
- ttstext.ondragover = function(e){
- e.preventDefault();
- }
-
- // set document
- setTimeout(() => {
- setTimeout(() => {
- const onchange = languageInput.onchange
- languageInput.onchange = (...args) => {
- onchange(...args)
- languageInput.onchange = onchange
- initSpeedAndPitch()
- restoreOptions()
- bindSaveOption()
- ttstext.value += i18n.lang.document1
- ttstext.value += i18n.lang.document2
- ttstext.value += i18n.lang.document3
- }
- }, 0)
- }, 0)
-
- // set download button
- const downloadButton = createButton('donwloadli', 'green', i18n.lang.download)
- downloadButton.addEventListener('click', () => {
- downloadStatus.textContent = i18n.lang.downloading
- enableDownload = true
- streamSize = 0
- document.getElementById('playbtn').click()
- enableDownload = false
- })
- downloadStatus.style.marginTop = '10px'
- buttonArea.appendChild(downloadButton)
- // set collect button
- const collectButton = createButton('collectli', 'red', i18n.lang.collectionOff)
- collectButton.addEventListener('click', () => {
- if(!enableCollect) {
- enableCollect = true
- switchOptionDisplay()
- setButton(collectButton, 'green', i18n.lang.collectionOn)
- } else {
- enableCollect = false
- switchOptionDisplay()
- setButton(collectButton, 'red', i18n.lang.collectionOff)
- if (!fileSize) return
- downloadAndClean()
- }
- })
- collectButton.style.marginRight = '10px'
- buttonArea.appendChild(collectButton)
- // set options
- const optionArea = document.createElement('div')
- const maxSizeInput = document.createElement('input')
- const delimiterInput = document.createElement('input')
- const maxSizeLabel = document.createElement('span')
- const delimiterLabel = document.createElement('span')
- optionArea.id = 'optiondiv'
- optionArea.style.display = 'none'
- maxSizeLabel.textContent = i18n.lang.length
- maxSizeInput.style.width = '50px'
- maxSizeInput.style.margin = '10px'
- maxSizeInput.value = '300'
- delimiterLabel.textContent = i18n.lang.delimiter
- delimiterInput.style.width = '100px'
- delimiterInput.style.margin = '10px'
- delimiterInput.value = ',。?,.?'
- optionArea.appendChild(maxSizeLabel)
- optionArea.appendChild(maxSizeInput)
- optionArea.appendChild(delimiterLabel)
- optionArea.appendChild(delimiterInput)
- buttonArea.parentElement.appendChild(optionArea)
- // set download status
- buttonArea.parentElement.appendChild(downloadStatus)
- buttonArea.parentElement.appendChild(downloadSize)
- // set auto split button
- const autoSplitButton = createButton('autosplit', 'red', i18n.lang.split)
- autoSplitButton.addEventListener('click', () => {
- setButton(autoSplitButton, 'green', i18n.lang.spliting)
- autoProcessing = true
- const maxSize = +maxSizeInput.value
- const delimiters = delimiterInput.value.split('')
- const text = ttstext.value
- const textHandler = text.split('').reduce(
- (obj, char, index, arr) => {
- obj.buffer.push(char)
- if (delimiters.indexOf(char) >= 0) obj.end = index
- if (obj.buffer.length === maxSize) {
- obj.res.push(obj.buffer.splice(0, obj.end + 1 - obj.offset).join(''))
- obj.offset += obj.res[obj.res.length - 1].length
- }
- return obj
- }, {
- buffer: [],
- end: 0,
- offset:0,
- res: []
- })
- textHandler.res.push(textHandler.buffer.join(''))
- ttstext.value = textHandler.res.shift()
- tasks = textHandler.res
- dispatchTextChange()
- downloadButton.click()
- })
- autoSplitButton.style.display = 'none'
- buttonArea.appendChild(autoSplitButton)
- // set preview player
- const previewPlayer = document.createElement('audio')
- previewPlayer.controls = true
- previewPlayer.style.display = 'none'
- previewPlayer.style.width = '100%'
- previewPlayer.style.marginTop = '10px'
- ttstext.after(previewPlayer)
- // set formatting options
- let codecInput
- try {
- const optionSelector = styleSelecter.cloneNode(true)
- const label = optionSelector.querySelector('label')
- label.textContent = i18n.lang.codec
- label.htmlFor = 'voiceformatselect'
- codecInput = optionSelector.querySelector('select')
- codecInput.id = 'voiceformatselect'
- codecInput.innerHTML = ''
- Object.entries(SpeechSDK.SpeechSynthesisOutputFormat).filter(item => !isNaN(item[0]))
- .filter(item => /(^Audio.+Mp3$)|(^Ogg)|(^Webm)/.test(item[1]))
- .forEach(item => {
- const format = item[1]
- const option = document.createElement("option")
- option.value = format
- option.text = format
- if (format === 'Audio24Khz96KBitRateMonoMp3') option.selected = true
- codecInput.appendChild(option)
- })
- styleSelecter.after(optionSelector)
- const audio24Khz96KBitRateMonoMp3 = SpeechSDK.SpeechSynthesisOutputFormat.Audio24Khz96KBitRateMonoMp3
- codecInput.addEventListener('change', () => {
- SpeechSDK.SpeechSynthesisOutputFormat.Audio24Khz96KBitRateMonoMp3 = SpeechSDK.SpeechSynthesisOutputFormat[codecInput.value]
- if (codecInput.value === 'Audio24Khz96KBitRateMonoMp3') {
- SpeechSDK.SpeechSynthesisOutputFormat.Audio24Khz96KBitRateMonoMp3 = audio24Khz96KBitRateMonoMp3
- }
- if (codecInput.value.startsWith('Ogg')) {
- fileExt = '.ogg'
- } else if (codecInput.value.startsWith('Webm')) {
- fileExt = '.webm'
- } else {
- fileExt = '.mp3'
- }
- })
- } catch (e) {
- console.log(e)
- }
- // set save options
- const saveLabel = document.createElement("span")
- saveLabel.innerText = i18n.lang.saveSetting
- saveLabel.style.marginLeft = '5px'
- const saveCheckBox = document.createElement("input")
- saveCheckBox.type = 'checkbox'
- const pitchArea = document.getElementById('pitchlabel').parentElement
- pitchArea.appendChild(saveCheckBox)
- pitchArea.appendChild(saveLabel)
- saveCheckBox.addEventListener('change', () => {
- if (saveCheckBox.checked) {
- enableSaveOptions = true
- saveOptions()
- } else {
- enableSaveOptions = false
- localStorage.removeItem('savedOptions')
- }
- })
- // set profile manage
- const profileArea = document.createElement("div")
- const createProfileInput = document.createElement("input")
- const createProfileButton = document.createElement("button")
- const profileContainer = document.createElement("div")
- createProfileInput.placeholder = i18n.lang.profileName
- createProfileInput.style.width = '120px'
- createProfileButton.innerText = i18n.lang.createProfile
- createProfileButton.style.border = '1px solid'
- createProfileButton.style.marginLeft = '5px'
- createProfileButton.style.padding = '2px'
- profileContainer.style.display = 'inline-block'
- profileArea.appendChild(createProfileInput)
- profileArea.appendChild(createProfileButton)
- profileArea.appendChild(profileContainer)
- profileArea.style.marginTop = '10px'
- previewPlayer.after(profileArea)
- refreshProfile()
- createProfileButton.addEventListener('click', () => {
- if (!createProfileInput.value) return
- const profile = getCurrentSettings()
- createProfile(createProfileInput.value, profile)
- createProfileInput.value = ''
- })
-
- const streamHandler = {
- write: function (dataBuffer) {
- streamSize += dataBuffer.byteLength
- if (streamSize <= 1900800) {
- fileSize += dataBuffer.byteLength
- downloadSize.textContent = `${i18n.lang.received} ${fileSize / 1000} kb`
- if (autoProcessing) downloadSize.textContent = `${i18n.lang.taskQueue} ${tasks.length} ` + downloadSize.textContent
- wavFragments.push(dataBuffer)
- }
- if (streamSize === 1900800) {
- downloadStatus.textContent = i18n.lang.lengthWarning
- if (!enableCollect) {
- fileSize = 0
- wavFragments.length = 0
- } else {
- fileSize -= 1900800
- wavFragments.length -= 1320
- }
- }
- },
- close: function () {
- downloadStatus.textContent = i18n.lang.downloaded
- if (!enableCollect) {
- downloadAndClean()
- return
- }
- if (!autoProcessing) {
- syncAudioToPlayer()
- return
- }
- if (tasks.length) {
- ttstext.value = tasks.shift()
- dispatchTextChange()
- downloadButton.click()
- } else {
- autoProcessing = false
- setButton(autoSplitButton, 'red', i18n.lang.split)
- ttstext.value = i18n.lang.splitedMsg
- syncAudioToPlayer()
- }
- }
- }
-
- const outputStream = SpeechSDK.PushAudioOutputStream.create(streamHandler)
-
- SpeechSDK.AudioConfig.fromSpeakerOutput = (() => {
- const fromSpeakerOutput = SpeechSDK.AudioConfig.fromSpeakerOutput
- return function (audioDestination) {
- return enableDownload ? audioDestination.onAudioEnd() || SpeechSDK.AudioConfig.fromStreamOutput(outputStream) : fromSpeakerOutput(audioDestination)
- }
- })()
- })();