Azure Speech Download

为微软的文本转语音服务的 demo 页面添加下载按钮

目前為 2022-09-14 提交的版本,檢視 最新版本

  1. // ==UserScript==
  2. // @name Azure Speech Download
  3. // @namespace
  4. // @version 0.8
  5. // @description 为微软的文本转语音服务的 demo 页面添加下载按钮
  6. // @author Puteulanus
  7. // @homepage https://greasyfork.org/zh-CN/scripts/444347-azure-speech-download
  8. // @match https://azure.microsoft.com/*/services/cognitive-services/text-to-speech/*
  9. // @icon https://www.microsoft.com/favicon.ico
  10. // @require https://cdn.bootcdn.net/ajax/libs/FileSaver.js/2.0.5/FileSaver.min.js
  11. // @grant none
  12. // @run-at document-end
  13. // @namespace https://greasyfork.org/users/909438
  14. // ==/UserScript==
  15.  
  16. /* globals saveAs */
  17. /* jshint esversion: 6 */
  18. (function() {
  19. 'use strict';
  20.  
  21. // Your code here...
  22. if(!window.saveAs) {
  23. window.saveAs = (blob, name) => {
  24. const a = document.createElement("a");
  25. document.body.appendChild(a);
  26. a.style = "display: none";
  27.  
  28. const url = window.URL.createObjectURL(blob);
  29. a.href = url;
  30. a.download = name;
  31. a.click();
  32. window.URL.revokeObjectURL(url);
  33. }
  34. }
  35.  
  36. const SpeechSDK = window.SpeechSDK
  37. let fileSize = 0
  38. let streamSize = 0
  39. let wavFragments = []
  40. let enableDownload = false
  41. let enableCollect = false
  42. let autoProcessing = false
  43. let tasks = []
  44. let fileExt = '.mp3'
  45.  
  46. function createButton(id, color, content) {
  47. const button = document.getElementById('playli').cloneNode(true)
  48. button.id = id
  49. button.querySelector('span:last-of-type').textContent = content
  50. button.querySelector('button').style.backgroundColor = color
  51. button.querySelector('button').style.borderColor = color
  52. return button
  53. }
  54.  
  55. function setButton(button, color, content) {
  56. button.querySelector('span:last-of-type').textContent = content
  57. button.querySelector('button').style.backgroundColor = color
  58. button.querySelector('button').style.borderColor = color
  59. }
  60.  
  61. function downloadAndClean() {
  62. const sentAudio = new window.Uint8Array(fileSize)
  63. fileSize = 0
  64. streamSize = 0
  65. wavFragments.reduce((size, fragment) => {
  66. sentAudio.set(new window.Uint8Array(fragment), size)
  67. return size + fragment.byteLength
  68. }, 0)
  69. wavFragments.length = 0
  70. saveAs(new Blob([sentAudio]), (new Date()).toISOString().replace('T', ' ').replace(':', '_').split('.')[0] + fileExt)
  71. }
  72.  
  73. function switchOptionDisplay() {
  74. if (enableCollect) {
  75. autoSplitButton.style.display = 'block'
  76. optionArea.style.display = 'block'
  77. previewPlayer.style.display = 'inline-block'
  78. } else {
  79. autoSplitButton.style.display = 'none'
  80. optionArea.style.display = 'none'
  81. previewPlayer.style.display = 'none'
  82. }
  83. }
  84.  
  85. function syncAudioToPlayer() {
  86. const sentAudio = new window.Uint8Array(fileSize)
  87. wavFragments.reduce((size, fragment) => {
  88. sentAudio.set(new window.Uint8Array(fragment), size)
  89. return size + fragment.byteLength
  90. }, 0)
  91. const audioBlob = new Blob([sentAudio], {type : 'audio/ogg'})
  92. previewPlayer.src = URL.createObjectURL(audioBlob)
  93. }
  94.  
  95. function dispatchTextChange() {
  96. const evt = document.createEvent('HTMLEvents')
  97. evt.initEvent('input', true, true)
  98. ttstext.dispatchEvent(evt)
  99. }
  100.  
  101. const downloadStatus = document.createElement('div')
  102. const downloadSize = document.createElement('div')
  103. const buttonArea = document.getElementById('playli').parentElement
  104. const ttstext = document.getElementById('ttstext')
  105. const styleSelecter = document.getElementById('voicestyleselect').parentElement
  106.  
  107. ttstext.ondrop = async (e) => {
  108. const files = e.dataTransfer.files
  109. if (files.length === 1 && files[0].type === 'text/plain') {
  110. e.preventDefault()
  111. const file = files[0]
  112. ttstext.value = await file.text()
  113. dispatchTextChange()
  114. }
  115. }
  116.  
  117. // reuqired by Firefox
  118. ttstext.ondragover = function(e){
  119. e.preventDefault();
  120. }
  121.  
  122. // set document
  123. setTimeout(() => {
  124. setTimeout(() => {
  125. const languageselect = document.getElementById('languageselect')
  126. const onchange = languageselect.onchange
  127. languageselect.onchange = (...args) => {
  128. onchange(...args)
  129. ttstext.value += "\n\n\n收集模式:\n\n打开之后,点击\“下载\”按钮转换的音频会被收集,在收集模式关闭时合成一个音频下载"
  130. ttstext.value += "\n\n自动拆分:\n\n将长文本拆分为多个接近“段落长度”的片段,并只在“分隔符”处截断,避免句子被截断,影响阅读效果"
  131. ttstext.value += "\n\n\n\n拖拽 txt 文件至此框可加载文本文件"
  132. languageselect.onchange = onchange
  133. }
  134. }, 0)
  135. }, 0)
  136.  
  137. // set download button
  138. const downloadButton = createButton('donwloadli', 'green', '下载')
  139. downloadButton.addEventListener('click', () => {
  140. downloadStatus.textContent = '下载中'
  141. enableDownload = true
  142. streamSize = 0
  143. document.getElementById('playbtn').click()
  144. enableDownload = false
  145. })
  146. downloadStatus.style.marginRight = '10px'
  147. buttonArea.appendChild(downloadButton)
  148. // set collect button
  149. const collectButton = createButton('collectli', 'red', '收集模式关')
  150. collectButton.addEventListener('click', () => {
  151. if(!enableCollect) {
  152. enableCollect = true
  153. switchOptionDisplay()
  154. setButton(collectButton, 'green', '收集模式开')
  155. } else {
  156. enableCollect = false
  157. switchOptionDisplay()
  158. setButton(collectButton, 'red', '收集模式关')
  159. if (!fileSize) return
  160. downloadAndClean()
  161. }
  162. })
  163. collectButton.style.marginRight = '10px'
  164. buttonArea.appendChild(collectButton)
  165. // set options
  166. const optionArea = document.createElement('div')
  167. const maxSizeInput = document.createElement('input')
  168. const delimiterInput = document.createElement('input')
  169. const maxSizeLabel = document.createElement('span')
  170. const delimiterLabel = document.createElement('span')
  171. optionArea.id = 'optiondiv'
  172. optionArea.style.display = 'none'
  173. maxSizeLabel.textContent = '段落长度'
  174. maxSizeInput.style.width = '50px'
  175. maxSizeInput.style.margin = '10px'
  176. maxSizeInput.value = '300'
  177. delimiterLabel.textContent = '分隔符'
  178. delimiterInput.style.width = '100px'
  179. delimiterInput.style.margin = '10px'
  180. delimiterInput.value = ',。?,.?'
  181. optionArea.appendChild(maxSizeLabel)
  182. optionArea.appendChild(maxSizeInput)
  183. optionArea.appendChild(delimiterLabel)
  184. optionArea.appendChild(delimiterInput)
  185. buttonArea.parentElement.appendChild(optionArea)
  186. // set download status
  187. buttonArea.parentElement.appendChild(downloadStatus)
  188. buttonArea.parentElement.appendChild(downloadSize)
  189. // set auto split button
  190. const autoSplitButton = createButton('autosplit', 'red', '自动拆分')
  191. autoSplitButton.addEventListener('click', () => {
  192. setButton(autoSplitButton, 'green', '拆分中')
  193. autoProcessing = true
  194. const maxSize = +maxSizeInput.value
  195. const delimiters = delimiterInput.value.split('')
  196. const text = ttstext.value
  197. const textHandler = text.split('').reduce(
  198. (obj, char, index, arr) => {
  199. obj.buffer.push(char)
  200. if (delimiters.indexOf(char) >= 0) obj.end = index
  201. if (obj.buffer.length === maxSize) {
  202. obj.res.push(obj.buffer.splice(0, obj.end + 1 - obj.offset).join(''))
  203. obj.offset += obj.res[obj.res.length - 1].length
  204. }
  205. return obj
  206. }, {
  207. buffer: [],
  208. end: 0,
  209. offset:0,
  210. res: []
  211. })
  212. textHandler.res.push(textHandler.buffer.join(''))
  213. ttstext.value = textHandler.res.shift()
  214. tasks = textHandler.res
  215. dispatchTextChange()
  216. downloadButton.click()
  217. })
  218. autoSplitButton.style.display = 'none'
  219. buttonArea.appendChild(autoSplitButton)
  220. // set preview player
  221. const previewPlayer = document.createElement('audio')
  222. previewPlayer.controls = true
  223. previewPlayer.style.display = 'none'
  224. previewPlayer.style.width = '100%'
  225. previewPlayer.style.marginTop = '10px'
  226. ttstext.after(previewPlayer)
  227. // set formatting options
  228. try {
  229. const optionSelector = styleSelecter.cloneNode(true)
  230. const label = optionSelector.querySelector('label')
  231. label.textContent = '音频编码'
  232. label.htmlFor = 'voiceformatselect'
  233. const options = optionSelector.querySelector('select')
  234. options.id = 'voiceformatselect'
  235. options.innerHTML = ''
  236. Object.entries(SpeechSDK.SpeechSynthesisOutputFormat).filter(item => !isNaN(item[0]))
  237. .filter(item => /(^Audio.+Mp3$)|(^Ogg)|(^Webm)/.test(item[1]))
  238. .forEach(item => {
  239. const format = item[1]
  240. const option = document.createElement("option")
  241. option.value = format
  242. option.text = format
  243. if (format === 'Audio24Khz96KBitRateMonoMp3') option.selected = true
  244. options.appendChild(option)
  245. })
  246. styleSelecter.after(optionSelector)
  247. options.addEventListener('change', () => {
  248. SpeechSDK.SpeechSynthesisOutputFormat.Audio24Khz96KBitRateMonoMp3 = SpeechSDK.SpeechSynthesisOutputFormat[options.value]
  249. if (options.value.startsWith('Ogg')) {
  250. fileExt = '.ogg'
  251. } else if (options.value.startsWith('Webm')) {
  252. fileExt = '.webm'
  253. } else {
  254. fileExt = '.mp3'
  255. }
  256. })
  257. } catch (e) {
  258. console.log(e)
  259. }
  260.  
  261. const streamHandler = {
  262. write: function (dataBuffer) {
  263. streamSize += dataBuffer.byteLength
  264. if (streamSize <= 1900800) {
  265. fileSize += dataBuffer.byteLength
  266. downloadSize.textContent = `已接收 ${fileSize / 1000} kb`
  267. if (autoProcessing) downloadSize.textContent = `剩余分段 ${tasks.length} ` + downloadSize.textContent
  268. wavFragments.push(dataBuffer)
  269. }
  270. if (streamSize === 1900800) {
  271. downloadStatus.textContent = '下载长度超过免费限额,请分割文本后使用收集模式'
  272. if (!enableCollect) {
  273. fileSize = 0
  274. wavFragments.length = 0
  275. } else {
  276. fileSize -= 1900800
  277. wavFragments.length -= 1320
  278. }
  279. }
  280. },
  281. close: function () {
  282. downloadStatus.textContent = '下载完成'
  283. if (!enableCollect) {
  284. downloadAndClean()
  285. return
  286. }
  287. if (!autoProcessing) {
  288. syncAudioToPlayer()
  289. return
  290. }
  291. if (tasks.length) {
  292. ttstext.value = tasks.shift()
  293. dispatchTextChange()
  294. downloadButton.click()
  295. } else {
  296. autoProcessing = false
  297. setButton(autoSplitButton, 'red', '自动拆分')
  298. ttstext.value = "自动拆分完成\n\n使用下方播放器播放,或关闭收集模式下载音频文件"
  299. syncAudioToPlayer()
  300. }
  301. }
  302. }
  303.  
  304. const outputStream = SpeechSDK.PushAudioOutputStream.create(streamHandler)
  305.  
  306. SpeechSDK.AudioConfig.fromSpeakerOutput = (() => {
  307. const fromSpeakerOutput = SpeechSDK.AudioConfig.fromSpeakerOutput
  308. return function (audioDestination) {
  309. return enableDownload ? audioDestination.onAudioEnd() || SpeechSDK.AudioConfig.fromStreamOutput(outputStream) : fromSpeakerOutput(audioDestination)
  310. }
  311. })()
  312. })();