Azure Speech Download

为微软的文本转语音服务的 demo 页面添加下载按钮

您需要先安裝使用者腳本管理器擴展,如 TampermonkeyGreasemonkeyViolentmonkey 之後才能安裝該腳本。

您需要先安裝使用者腳本管理器擴充功能,如 TampermonkeyViolentmonkey 後才能安裝該腳本。

您需要先安裝使用者腳本管理器擴充功能,如 TampermonkeyViolentmonkey 後才能安裝該腳本。

您需要先安裝使用者腳本管理器擴充功能,如 TampermonkeyUserscripts 後才能安裝該腳本。

你需要先安裝一款使用者腳本管理器擴展,比如 Tampermonkey,才能安裝此腳本

您需要先安裝使用者腳本管理器擴充功能後才能安裝該腳本。

(我已經安裝了使用者腳本管理器,讓我安裝!)

你需要先安裝一款使用者樣式管理器擴展,比如 Stylus,才能安裝此樣式

你需要先安裝一款使用者樣式管理器擴展,比如 Stylus,才能安裝此樣式

你需要先安裝一款使用者樣式管理器擴展,比如 Stylus,才能安裝此樣式

你需要先安裝一款使用者樣式管理器擴展後才能安裝此樣式

你需要先安裝一款使用者樣式管理器擴展後才能安裝此樣式

你需要先安裝一款使用者樣式管理器擴展後才能安裝此樣式

(我已經安裝了使用者樣式管理器,讓我安裝!)

// ==UserScript==
// @name         Azure Speech Download
// @namespace
// @version      1.0.0
// @description  为微软的文本转语音服务的 demo 页面添加下载按钮
// @author       Puteulanus
// @homepage     https://greasyfork.org/zh-CN/scripts/444347-azure-speech-download
// @match        https://azure.microsoft.com/*/products/cognitive-services/text-to-speech/*
// @icon         https://www.microsoft.com/favicon.ico
// @require      https://cdn.bootcdn.net/ajax/libs/FileSaver.js/2.0.5/FileSaver.min.js
// @grant        none
// @run-at       document-end
// @namespace https://greasyfork.org/users/909438
// ==/UserScript==

/* globals saveAs */
/* jshint esversion: 6 */
(function() {
    'use strict';

    // Your code here...
    if(!window.saveAs) {
        window.saveAs = (blob, name) => {
            const a = document.createElement("a");
            document.body.appendChild(a);
            a.style = "display: none";

            const url = window.URL.createObjectURL(blob);
            a.href = url;
            a.download = name;
            a.click();
            window.URL.revokeObjectURL(url);
        }
    }

    const SpeechSDK = window.SpeechSDK
    let fileSize = 0
    let streamSize = 0
    let wavFragments = []
    let enableDownload = false
    let enableCollect = false
    let autoProcessing = false
    let tasks = []
    let fileExt = '.mp3'
    let enableSaveOptions = false
    const i18n = {
        zh: {
            document1: "\n\n\n收集模式:\n\n打开之后,点击“下载”按钮转换的音频会被收集,在收集模式关闭时合成一个音频下载",
            document2: "\n\n自动拆分:\n\n将长文本拆分为多个接近“段落长度”的片段,并只在“分隔符”处截断,避免句子被截断,影响阅读效果",
            document3: "\n\n\n\n拖拽 txt 文件至此框可加载文本文件",
            download: '下载',
            downloading: '下载中',
            downloaded: '下载完成',
            split: '自动拆分',
            spliting: '拆分中',
            codec: '音频编码',
            saveSetting: '保存设置',
            lengthWarning: '下载长度超过免费限额,请分割文本后使用收集模式',
            splitedMsg: "自动拆分完成\n\n使用下方播放器播放,或关闭收集模式下载音频文件",
            length: '段落长度',
            delimiter: '分隔符',
            collectionOn: '收集模式开',
            collectionOff: '收集模式关',
            received: '已接收',
            taskQueue: '剩余分段',
            profileName: '配置名',
            createProfile: '创建配置',
        },
        eng: {
            document1: "\n\n\nCollection:\n\nCollect audio files converted by clicking \"Download\" button, do the really download when it is turned off",
            document2: "\n\nSplit:\n\nSplit long text into segments close to the \"paragraph length\", which only truncate at \"delimiter\"",
            document3: "\n\n\n\nYou can drag .txt file to this text box to load a text file",
            download: 'Download',
            downloading: 'Downloading',
            downloaded: 'Download complete',
            split: 'Split',
            spliting: 'Spliting',
            codec: 'Codec',
            saveSetting: 'Save settings',
            lengthWarning: 'Text length exceeds the free limit, please split the text and use collection mode',
            splitedMsg: "Split finished\n\nUse the player below to play, or turn off collection mode to download the audio file",
            length: 'Paragraph length',
            delimiter: 'Delimiter',
            collectionOn: 'Collection On',
            collectionOff: 'Collection Off',
            received: 'Received:',
            taskQueue: 'Task queue:',
            profileName: 'Profile name',
            createProfile: 'Create profile',
        }
    }
    const lang = window.Acom.currentCulture
    if (lang === 'zh-cn' || lang === 'zh-tw') {
        i18n.lang = i18n.zh
    } else {
        i18n.lang = i18n.eng
    }

    function createButton(id, color, content) {
        const button = document.getElementById('playli').cloneNode(true)
        button.id = id
        button.querySelector('span:last-of-type').textContent = content
        button.querySelector('button').style.backgroundColor = color
        button.querySelector('button').style.borderColor = color
        return button
    }

    function setButton(button, color, content) {
        button.querySelector('span:last-of-type').textContent = content
        button.querySelector('button').style.backgroundColor = color
        button.querySelector('button').style.borderColor = color
    }

    function downloadAndClean() {
        const sentAudio = new window.Uint8Array(fileSize)
        fileSize = 0
        streamSize = 0
        wavFragments.reduce((size, fragment) => {
            sentAudio.set(new window.Uint8Array(fragment), size)
            return size + fragment.byteLength
        }, 0)
        wavFragments.length = 0
        saveAs(new Blob([sentAudio]), (new Date()).toISOString().replace('T', ' ').replace(':', '_').split('.')[0] + fileExt)
    }

    function switchOptionDisplay() {
        if (enableCollect) {
            autoSplitButton.style.display = 'block'
            optionArea.style.display = 'block'
            previewPlayer.style.display = 'inline-block'
        } else {
            autoSplitButton.style.display = 'none'
            optionArea.style.display = 'none'
            previewPlayer.style.display = 'none'
        }
    }

    function syncAudioToPlayer() {
        const sentAudio = new window.Uint8Array(fileSize)
        wavFragments.reduce((size, fragment) => {
            sentAudio.set(new window.Uint8Array(fragment), size)
            return size + fragment.byteLength
        }, 0)
        const audioBlob = new Blob([sentAudio], {type : 'audio/ogg'})
        previewPlayer.src = URL.createObjectURL(audioBlob)
    }

    function dispatchTextChange() {
        const evt = document.createEvent('HTMLEvents')
        evt.initEvent('input', true, true)
        ttstext.dispatchEvent(evt)
    }

    function saveOptions() {
        if (!enableSaveOptions) return
        localStorage.setItem('savedOptions', JSON.stringify(getCurrentSettings()))
    }

    function restoreOptions() {
        const optionsJSON = localStorage.getItem('savedOptions')
        if (!optionsJSON) return
        const options = JSON.parse(optionsJSON)
        setSettings(options)
        saveCheckBox.checked = true
        enableSaveOptions = true
    }

    function bindSaveOption() {
        languageInput.addEventListener('change', saveOptions)
        voiceInput.addEventListener('change', saveOptions)
        styleInput.addEventListener('change', saveOptions)
        codecInput.addEventListener('change', saveOptions)
        speedInput.addEventListener('change', saveOptions)
        pitchInput.addEventListener('change', saveOptions)
        maxSizeInput.addEventListener('change', saveOptions)
        delimiterInput.addEventListener('change', saveOptions)
    }

    function initSpeedAndPitch() {
        const evt = document.createEvent('HTMLEvents')
        evt.initEvent('input', true, true)
        speedInput.value = '0'
        speedInput.dispatchEvent(evt)
        pitchInput.value = '0'
        pitchInput.dispatchEvent(evt)
    }

    function createProfile(name, profile) {
        const profiles = JSON.parse(localStorage.getItem('savedProfiles'))
        localStorage.setItem('savedProfiles', JSON.stringify([...profiles.filter(profile => profile.name !== name),{
            name,
            setting: profile
        }]))
        refreshProfile()
    }

    function removeProfile(name) {
        let profiles = JSON.parse(localStorage.getItem('savedProfiles'))
        localStorage.setItem('savedProfiles', JSON.stringify(profiles.filter(profile => profile.name !== name)))
        refreshProfile()
    }

    function refreshProfile() {
        let profilesJSON = localStorage.getItem('savedProfiles')
        let profiles
        if (!profilesJSON) {
            profiles = []
            localStorage.setItem('savedProfiles', JSON.stringify(profiles))
        } else {
            profiles = JSON.parse(profilesJSON)
        }
        profileContainer.innerHTML = ''
        profiles.forEach(profile => {
            const profileDiv = document.createElement("div")
            const profileName = document.createElement("span")
            const profileDelete = document.createElement("span")
            profileDiv.style.display = 'inline-block'
            profileDiv.style.border = '1px solid'
            profileDiv.style.marginLeft = '5px'
            profileDiv.style.cursor = 'pointer'
            profileName.innerText = profile.name
            profileName.style.padding = '5px'
            profileDelete.innerText = 'X'
            profileDelete.style.backgroundColor = 'black'
            profileDelete.style.color = 'white'
            profileDelete.style.padding = '2px'
            profileDiv.appendChild(profileName)
            profileDiv.append(profileDelete)
            profileContainer.append(profileDiv)
            profileName.addEventListener('click', () => {
                const textBackup = ttstext.value
                setSettings(profile.setting)
                ttstext.value = textBackup
                dispatchTextChange()
            })
            profileDelete.addEventListener('click', () => {
                removeProfile(profile.name)
            })
        })
    }

    function getCurrentSettings() {
        return {
            language: languageInput.value,
            voice: voiceInput.value,
            style: styleInput.value,
            codec: codecInput.value,
            speed: speedInput.value,
            pitch: pitchInput.value,
            splitLength: maxSizeInput.value,
            delimiter: delimiterInput.value
        }
    }

    function setSettings(setting) {
        let evt = document.createEvent('HTMLEvents')
        evt.initEvent('change', true, true)
        languageInput.value = setting.language
        languageInput.dispatchEvent(evt)
        voiceInput.value = setting.voice
        voiceInput.dispatchEvent(evt)
        styleInput.value = setting.style
        styleInput.dispatchEvent(evt)
        codecInput.value = setting.codec
        codecInput.dispatchEvent(evt)
        speedInput.value = setting.speed
        speedInput.dispatchEvent(evt)
        pitchInput.value = setting.pitch
        pitchInput.dispatchEvent(evt)
        evt = document.createEvent('HTMLEvents')
        evt.initEvent('input', true, true)
        speedInput.dispatchEvent(evt)
        pitchInput.dispatchEvent(evt)
        maxSizeInput.value = setting.splitLength
        delimiterInput.value = setting.delimiter
    }

    const downloadStatus = document.createElement('div')
    const downloadSize = document.createElement('div')
    const buttonArea = document.getElementById('playli').parentElement
    const ttstext = document.getElementById('ttstext')
    const styleSelecter = document.getElementById('voicestyleselect').parentElement
    const languageInput = document.getElementById('languageselect')
    const voiceInput = document.getElementById('voiceselect')
    const styleInput = document.getElementById('voicestyleselect')
    const speedInput = document.getElementById('speed')
    const pitchInput = document.getElementById('pitch')

    ttstext.ondrop = async (e) => {
        const files = e.dataTransfer.files
        if (files.length === 1 && files[0].type === 'text/plain') {
            e.preventDefault()
            const file = files[0]
            ttstext.value = await file.text()
            dispatchTextChange()
        }
    }

    // reuqired by Firefox
    ttstext.ondragover = function(e){
        e.preventDefault();
    }

    // set document
    setTimeout(() => {
        setTimeout(() => {
            const onchange = languageInput.onchange
            languageInput.onchange = (...args) => {
                onchange(...args)
                languageInput.onchange = onchange
                initSpeedAndPitch()
                restoreOptions()
                bindSaveOption()
                ttstext.value += i18n.lang.document1
                ttstext.value += i18n.lang.document2
                ttstext.value += i18n.lang.document3
            }
        }, 0)
    }, 0)

    // set download button
    const downloadButton = createButton('donwloadli', 'green', i18n.lang.download)
    downloadButton.addEventListener('click', () => {
        downloadStatus.textContent = i18n.lang.downloading
        enableDownload = true
        streamSize = 0
        document.getElementById('playbtn').click()
        enableDownload = false
    })
    downloadStatus.style.marginTop = '10px'
    buttonArea.appendChild(downloadButton)
    // set collect button
    const collectButton = createButton('collectli', 'red', i18n.lang.collectionOff)
    collectButton.addEventListener('click', () => {
        if(!enableCollect) {
            enableCollect = true
            switchOptionDisplay()
            setButton(collectButton, 'green', i18n.lang.collectionOn)
        } else {
            enableCollect = false
            switchOptionDisplay()
            setButton(collectButton, 'red', i18n.lang.collectionOff)
            if (!fileSize) return
            downloadAndClean()
        }
    })
    collectButton.style.marginRight = '10px'
    buttonArea.appendChild(collectButton)
    // set options
    const optionArea = document.createElement('div')
    const maxSizeInput = document.createElement('input')
    const delimiterInput = document.createElement('input')
    const maxSizeLabel = document.createElement('span')
    const delimiterLabel = document.createElement('span')
    optionArea.id = 'optiondiv'
    optionArea.style.display = 'none'
    maxSizeLabel.textContent = i18n.lang.length
    maxSizeInput.style.width = '50px'
    maxSizeInput.style.margin = '10px'
    maxSizeInput.value = '300'
    delimiterLabel.textContent = i18n.lang.delimiter
    delimiterInput.style.width = '100px'
    delimiterInput.style.margin = '10px'
    delimiterInput.value = ',。?,.?'
    optionArea.appendChild(maxSizeLabel)
    optionArea.appendChild(maxSizeInput)
    optionArea.appendChild(delimiterLabel)
    optionArea.appendChild(delimiterInput)
    buttonArea.parentElement.appendChild(optionArea)
    // set download status
    buttonArea.parentElement.appendChild(downloadStatus)
    buttonArea.parentElement.appendChild(downloadSize)
    // set auto split button
    const autoSplitButton = createButton('autosplit', 'red', i18n.lang.split)
    autoSplitButton.addEventListener('click', () => {
        setButton(autoSplitButton, 'green', i18n.lang.spliting)
        autoProcessing = true
        const maxSize = +maxSizeInput.value
        const delimiters = delimiterInput.value.split('')
        const text = ttstext.value
        const textHandler = text.split('').reduce(
            (obj, char, index, arr) => {
                obj.buffer.push(char)
                if (delimiters.indexOf(char) >= 0) obj.end = index
                if (obj.buffer.length === maxSize) {
                    obj.res.push(obj.buffer.splice(0, obj.end + 1 - obj.offset).join(''))
                    obj.offset += obj.res[obj.res.length - 1].length
                }
                return obj
            }, {
                buffer: [],
                end: 0,
                offset:0,
                res: []
            })
        textHandler.res.push(textHandler.buffer.join(''))
        ttstext.value = textHandler.res.shift()
        tasks = textHandler.res
        dispatchTextChange()
        downloadButton.click()
    })
    autoSplitButton.style.display = 'none'
    buttonArea.appendChild(autoSplitButton)
    // set preview player
    const previewPlayer = document.createElement('audio')
    previewPlayer.controls = true
    previewPlayer.style.display = 'none'
    previewPlayer.style.width = '100%'
    previewPlayer.style.marginTop = '10px'
    ttstext.after(previewPlayer)
    // set formatting options
    let codecInput
    try {
        const optionSelector = styleSelecter.cloneNode(true)
        const label = optionSelector.querySelector('label')
        label.textContent = i18n.lang.codec
        label.htmlFor = 'voiceformatselect'
        codecInput = optionSelector.querySelector('select')
        codecInput.id = 'voiceformatselect'
        codecInput.innerHTML = ''
        Object.entries(SpeechSDK.SpeechSynthesisOutputFormat).filter(item => !isNaN(item[0]))
            .filter(item => /(^Audio.+Mp3$)|(^Ogg)|(^Webm)/.test(item[1]))
            .forEach(item => {
            const format = item[1]
            const option = document.createElement("option")
            option.value = format
            option.text = format
            if (format === 'Audio24Khz96KBitRateMonoMp3') option.selected = true
            codecInput.appendChild(option)
        })
        styleSelecter.after(optionSelector)
        const audio24Khz96KBitRateMonoMp3 = SpeechSDK.SpeechSynthesisOutputFormat.Audio24Khz96KBitRateMonoMp3
        codecInput.addEventListener('change', () => {
            SpeechSDK.SpeechSynthesisOutputFormat.Audio24Khz96KBitRateMonoMp3 = SpeechSDK.SpeechSynthesisOutputFormat[codecInput.value]
            if (codecInput.value === 'Audio24Khz96KBitRateMonoMp3') {
                SpeechSDK.SpeechSynthesisOutputFormat.Audio24Khz96KBitRateMonoMp3 = audio24Khz96KBitRateMonoMp3
            }
            if (codecInput.value.startsWith('Ogg')) {
                fileExt = '.ogg'
            } else if (codecInput.value.startsWith('Webm')) {
                fileExt = '.webm'
            } else {
                fileExt = '.mp3'
            }
        })
    } catch (e) {
        console.log(e)
    }
    // set save options
    const saveLabel = document.createElement("span")
    saveLabel.innerText = i18n.lang.saveSetting
    saveLabel.style.marginLeft = '5px'
    const saveCheckBox = document.createElement("input")
    saveCheckBox.type = 'checkbox'
    const pitchArea = document.getElementById('pitchlabel').parentElement
    pitchArea.appendChild(saveCheckBox)
    pitchArea.appendChild(saveLabel)
    saveCheckBox.addEventListener('change', () => {
        if (saveCheckBox.checked) {
            enableSaveOptions = true
            saveOptions()
        } else {
            enableSaveOptions = false
            localStorage.removeItem('savedOptions')
        }
    })
    // set profile manage
    const profileArea = document.createElement("div")
    const createProfileInput = document.createElement("input")
    const createProfileButton = document.createElement("button")
    const profileContainer = document.createElement("div")
    createProfileInput.placeholder = i18n.lang.profileName
    createProfileInput.style.width = '120px'
    createProfileButton.innerText = i18n.lang.createProfile
    createProfileButton.style.border = '1px solid'
    createProfileButton.style.marginLeft = '5px'
    createProfileButton.style.padding = '2px'
    profileContainer.style.display = 'inline-block'
    profileArea.appendChild(createProfileInput)
    profileArea.appendChild(createProfileButton)
    profileArea.appendChild(profileContainer)
    profileArea.style.marginTop = '10px'
    previewPlayer.after(profileArea)
    refreshProfile()
    createProfileButton.addEventListener('click', () => {
        if (!createProfileInput.value) return
        const profile = getCurrentSettings()
        createProfile(createProfileInput.value, profile)
        createProfileInput.value = ''
    })

    const streamHandler = {
        write: function (dataBuffer) {
            streamSize += dataBuffer.byteLength
            if (streamSize <= 1900800) {
                fileSize += dataBuffer.byteLength
                downloadSize.textContent = `${i18n.lang.received} ${fileSize / 1000} kb`
                if (autoProcessing) downloadSize.textContent = `${i18n.lang.taskQueue} ${tasks.length} ` + downloadSize.textContent
                wavFragments.push(dataBuffer)
            }
            if (streamSize === 1900800) {
                downloadStatus.textContent = i18n.lang.lengthWarning
                if (!enableCollect) {
                    fileSize = 0
                    wavFragments.length = 0
                } else {
                    fileSize -= 1900800
                    wavFragments.length -= 1320
                }
            }
        },
        close: function () {
            downloadStatus.textContent = i18n.lang.downloaded
            if (!enableCollect) {
                downloadAndClean()
                return
            }
            if (!autoProcessing) {
                syncAudioToPlayer()
                return
            }
            if (tasks.length) {
                ttstext.value = tasks.shift()
                dispatchTextChange()
                downloadButton.click()
            } else {
                autoProcessing = false
                setButton(autoSplitButton, 'red', i18n.lang.split)
                ttstext.value = i18n.lang.splitedMsg
                syncAudioToPlayer()
            }
        }
    }

    const outputStream = SpeechSDK.PushAudioOutputStream.create(streamHandler)

    SpeechSDK.AudioConfig.fromSpeakerOutput = (() => {
        const fromSpeakerOutput = SpeechSDK.AudioConfig.fromSpeakerOutput
        return function (audioDestination) {
            return enableDownload ? audioDestination.onAudioEnd() || SpeechSDK.AudioConfig.fromStreamOutput(outputStream) : fromSpeakerOutput(audioDestination)
        }
    })()
})();