Azure Speech Download

为微软的文本转语音服务的 demo 页面添加下载按钮

当前为 2022-09-16 提交的版本,查看 最新版本

  1. // ==UserScript==
  2. // @name Azure Speech Download
  3. // @namespace
  4. // @version 0.9.0
  5. // @description 为微软的文本转语音服务的 demo 页面添加下载按钮
  6. // @author Puteulanus
  7. // @homepage https://greasyfork.org/zh-CN/scripts/444347-azure-speech-download
  8. // @match https://azure.microsoft.com/*/products/cognitive-services/text-to-speech/*
  9. // @icon https://www.microsoft.com/favicon.ico
  10. // @require https://cdn.bootcdn.net/ajax/libs/FileSaver.js/2.0.5/FileSaver.min.js
  11. // @grant none
  12. // @run-at document-end
  13. // @namespace https://greasyfork.org/users/909438
  14. // ==/UserScript==
  15.  
  16. /* globals saveAs */
  17. /* jshint esversion: 6 */
  18. (function() {
  19. 'use strict';
  20.  
  21. // Your code here...
  22. if(!window.saveAs) {
  23. window.saveAs = (blob, name) => {
  24. const a = document.createElement("a");
  25. document.body.appendChild(a);
  26. a.style = "display: none";
  27.  
  28. const url = window.URL.createObjectURL(blob);
  29. a.href = url;
  30. a.download = name;
  31. a.click();
  32. window.URL.revokeObjectURL(url);
  33. }
  34. }
  35.  
  36. const SpeechSDK = window.SpeechSDK
  37. let fileSize = 0
  38. let streamSize = 0
  39. let wavFragments = []
  40. let enableDownload = false
  41. let enableCollect = false
  42. let autoProcessing = false
  43. let tasks = []
  44. let fileExt = '.mp3'
  45. let enableSaveOptions = false
  46.  
  47. function createButton(id, color, content) {
  48. const button = document.getElementById('playli').cloneNode(true)
  49. button.id = id
  50. button.querySelector('span:last-of-type').textContent = content
  51. button.querySelector('button').style.backgroundColor = color
  52. button.querySelector('button').style.borderColor = color
  53. return button
  54. }
  55.  
  56. function setButton(button, color, content) {
  57. button.querySelector('span:last-of-type').textContent = content
  58. button.querySelector('button').style.backgroundColor = color
  59. button.querySelector('button').style.borderColor = color
  60. }
  61.  
  62. function downloadAndClean() {
  63. const sentAudio = new window.Uint8Array(fileSize)
  64. fileSize = 0
  65. streamSize = 0
  66. wavFragments.reduce((size, fragment) => {
  67. sentAudio.set(new window.Uint8Array(fragment), size)
  68. return size + fragment.byteLength
  69. }, 0)
  70. wavFragments.length = 0
  71. saveAs(new Blob([sentAudio]), (new Date()).toISOString().replace('T', ' ').replace(':', '_').split('.')[0] + fileExt)
  72. }
  73.  
  74. function switchOptionDisplay() {
  75. if (enableCollect) {
  76. autoSplitButton.style.display = 'block'
  77. optionArea.style.display = 'block'
  78. previewPlayer.style.display = 'inline-block'
  79. } else {
  80. autoSplitButton.style.display = 'none'
  81. optionArea.style.display = 'none'
  82. previewPlayer.style.display = 'none'
  83. }
  84. }
  85.  
  86. function syncAudioToPlayer() {
  87. const sentAudio = new window.Uint8Array(fileSize)
  88. wavFragments.reduce((size, fragment) => {
  89. sentAudio.set(new window.Uint8Array(fragment), size)
  90. return size + fragment.byteLength
  91. }, 0)
  92. const audioBlob = new Blob([sentAudio], {type : 'audio/ogg'})
  93. previewPlayer.src = URL.createObjectURL(audioBlob)
  94. }
  95.  
  96. function dispatchTextChange() {
  97. const evt = document.createEvent('HTMLEvents')
  98. evt.initEvent('input', true, true)
  99. ttstext.dispatchEvent(evt)
  100. }
  101.  
  102. function saveOptions() {
  103. if (!enableSaveOptions) return
  104. localStorage.setItem('savedOptions', JSON.stringify({
  105. language: languageInput.value,
  106. voice: voiceInput.value,
  107. style: styleInput.value,
  108. codec: codecInput.value,
  109. speed: speedInput.value,
  110. pitch: pitchInput.value,
  111. splitLength: maxSizeInput.value,
  112. delimiter: delimiterInput.value
  113. }))
  114. }
  115.  
  116. function restoreOptions() {
  117. const optionsJSON = localStorage.getItem('savedOptions')
  118. if (!optionsJSON) return
  119. const options = JSON.parse(optionsJSON)
  120. let evt = document.createEvent('HTMLEvents')
  121. evt.initEvent('change', true, true)
  122. languageInput.value = options.language
  123. languageInput.dispatchEvent(evt)
  124. voiceInput.value = options.voice
  125. voiceInput.dispatchEvent(evt)
  126. styleInput.value = options.style
  127. styleInput.dispatchEvent(evt)
  128. codecInput.value = options.codec
  129. codecInput.dispatchEvent(evt)
  130. speedInput.value = options.speed
  131. speedInput.dispatchEvent(evt)
  132. pitchInput.value = options.pitch
  133. pitchInput.dispatchEvent(evt)
  134. evt = document.createEvent('HTMLEvents')
  135. evt.initEvent('input', true, true)
  136. speedInput.dispatchEvent(evt)
  137. pitchInput.dispatchEvent(evt)
  138. maxSizeInput.value = options.splitLength
  139. delimiterInput.value = options.delimiter
  140. saveCheckBox.checked = true
  141. enableSaveOptions = true
  142. }
  143.  
  144. function bindSaveOption() {
  145. languageInput.addEventListener('change', saveOptions)
  146. voiceInput.addEventListener('change', saveOptions)
  147. styleInput.addEventListener('change', saveOptions)
  148. codecInput.addEventListener('change', saveOptions)
  149. speedInput.addEventListener('change', saveOptions)
  150. pitchInput.addEventListener('change', saveOptions)
  151. maxSizeInput.addEventListener('change', saveOptions)
  152. delimiterInput.addEventListener('change', saveOptions)
  153. }
  154.  
  155. function initSpeedAndPitch() {
  156. const evt = document.createEvent('HTMLEvents')
  157. evt.initEvent('input', true, true)
  158. speedInput.value = '0'
  159. speedInput.dispatchEvent(evt)
  160. pitchInput.value = '0'
  161. pitchInput.dispatchEvent(evt)
  162. }
  163.  
  164. const downloadStatus = document.createElement('div')
  165. const downloadSize = document.createElement('div')
  166. const buttonArea = document.getElementById('playli').parentElement
  167. const ttstext = document.getElementById('ttstext')
  168. const styleSelecter = document.getElementById('voicestyleselect').parentElement
  169. const languageInput = document.getElementById('languageselect')
  170. const voiceInput = document.getElementById('voiceselect')
  171. const styleInput = document.getElementById('voicestyleselect')
  172. const speedInput = document.getElementById('speed')
  173. const pitchInput = document.getElementById('pitch')
  174.  
  175. ttstext.ondrop = async (e) => {
  176. const files = e.dataTransfer.files
  177. if (files.length === 1 && files[0].type === 'text/plain') {
  178. e.preventDefault()
  179. const file = files[0]
  180. ttstext.value = await file.text()
  181. dispatchTextChange()
  182. }
  183. }
  184.  
  185. // reuqired by Firefox
  186. ttstext.ondragover = function(e){
  187. e.preventDefault();
  188. }
  189.  
  190. // set document
  191. setTimeout(() => {
  192. setTimeout(() => {
  193. const onchange = languageInput.onchange
  194. languageInput.onchange = (...args) => {
  195. onchange(...args)
  196. languageInput.onchange = onchange
  197. initSpeedAndPitch()
  198. restoreOptions()
  199. bindSaveOption()
  200. ttstext.value += "\n\n\n收集模式:\n\n打开之后,点击\“下载\”按钮转换的音频会被收集,在收集模式关闭时合成一个音频下载"
  201. ttstext.value += "\n\n自动拆分:\n\n将长文本拆分为多个接近“段落长度”的片段,并只在“分隔符”处截断,避免句子被截断,影响阅读效果"
  202. ttstext.value += "\n\n\n\n拖拽 txt 文件至此框可加载文本文件"
  203. }
  204. }, 0)
  205. }, 0)
  206.  
  207. // set download button
  208. const downloadButton = createButton('donwloadli', 'green', '下载')
  209. downloadButton.addEventListener('click', () => {
  210. downloadStatus.textContent = '下载中'
  211. enableDownload = true
  212. streamSize = 0
  213. document.getElementById('playbtn').click()
  214. enableDownload = false
  215. })
  216. downloadStatus.style.marginRight = '10px'
  217. buttonArea.appendChild(downloadButton)
  218. // set collect button
  219. const collectButton = createButton('collectli', 'red', '收集模式关')
  220. collectButton.addEventListener('click', () => {
  221. if(!enableCollect) {
  222. enableCollect = true
  223. switchOptionDisplay()
  224. setButton(collectButton, 'green', '收集模式开')
  225. } else {
  226. enableCollect = false
  227. switchOptionDisplay()
  228. setButton(collectButton, 'red', '收集模式关')
  229. if (!fileSize) return
  230. downloadAndClean()
  231. }
  232. })
  233. collectButton.style.marginRight = '10px'
  234. buttonArea.appendChild(collectButton)
  235. // set options
  236. const optionArea = document.createElement('div')
  237. const maxSizeInput = document.createElement('input')
  238. const delimiterInput = document.createElement('input')
  239. const maxSizeLabel = document.createElement('span')
  240. const delimiterLabel = document.createElement('span')
  241. optionArea.id = 'optiondiv'
  242. optionArea.style.display = 'none'
  243. maxSizeLabel.textContent = '段落长度'
  244. maxSizeInput.style.width = '50px'
  245. maxSizeInput.style.margin = '10px'
  246. maxSizeInput.value = '300'
  247. delimiterLabel.textContent = '分隔符'
  248. delimiterInput.style.width = '100px'
  249. delimiterInput.style.margin = '10px'
  250. delimiterInput.value = ',。?,.?'
  251. optionArea.appendChild(maxSizeLabel)
  252. optionArea.appendChild(maxSizeInput)
  253. optionArea.appendChild(delimiterLabel)
  254. optionArea.appendChild(delimiterInput)
  255. buttonArea.parentElement.appendChild(optionArea)
  256. // set download status
  257. buttonArea.parentElement.appendChild(downloadStatus)
  258. buttonArea.parentElement.appendChild(downloadSize)
  259. // set auto split button
  260. const autoSplitButton = createButton('autosplit', 'red', '自动拆分')
  261. autoSplitButton.addEventListener('click', () => {
  262. setButton(autoSplitButton, 'green', '拆分中')
  263. autoProcessing = true
  264. const maxSize = +maxSizeInput.value
  265. const delimiters = delimiterInput.value.split('')
  266. const text = ttstext.value
  267. const textHandler = text.split('').reduce(
  268. (obj, char, index, arr) => {
  269. obj.buffer.push(char)
  270. if (delimiters.indexOf(char) >= 0) obj.end = index
  271. if (obj.buffer.length === maxSize) {
  272. obj.res.push(obj.buffer.splice(0, obj.end + 1 - obj.offset).join(''))
  273. obj.offset += obj.res[obj.res.length - 1].length
  274. }
  275. return obj
  276. }, {
  277. buffer: [],
  278. end: 0,
  279. offset:0,
  280. res: []
  281. })
  282. textHandler.res.push(textHandler.buffer.join(''))
  283. ttstext.value = textHandler.res.shift()
  284. tasks = textHandler.res
  285. dispatchTextChange()
  286. downloadButton.click()
  287. })
  288. autoSplitButton.style.display = 'none'
  289. buttonArea.appendChild(autoSplitButton)
  290. // set preview player
  291. const previewPlayer = document.createElement('audio')
  292. previewPlayer.controls = true
  293. previewPlayer.style.display = 'none'
  294. previewPlayer.style.width = '100%'
  295. previewPlayer.style.marginTop = '10px'
  296. ttstext.after(previewPlayer)
  297. // set formatting options
  298. let codecInput
  299. try {
  300. const optionSelector = styleSelecter.cloneNode(true)
  301. const label = optionSelector.querySelector('label')
  302. label.textContent = '音频编码'
  303. label.htmlFor = 'voiceformatselect'
  304. codecInput = optionSelector.querySelector('select')
  305. codecInput.id = 'voiceformatselect'
  306. codecInput.innerHTML = ''
  307. Object.entries(SpeechSDK.SpeechSynthesisOutputFormat).filter(item => !isNaN(item[0]))
  308. .filter(item => /(^Audio.+Mp3$)|(^Ogg)|(^Webm)/.test(item[1]))
  309. .forEach(item => {
  310. const format = item[1]
  311. const option = document.createElement("option")
  312. option.value = format
  313. option.text = format
  314. if (format === 'Audio24Khz96KBitRateMonoMp3') option.selected = true
  315. codecInput.appendChild(option)
  316. })
  317. styleSelecter.after(optionSelector)
  318. const audio24Khz96KBitRateMonoMp3 = SpeechSDK.SpeechSynthesisOutputFormat.Audio24Khz96KBitRateMonoMp3
  319. codecInput.addEventListener('change', () => {
  320. SpeechSDK.SpeechSynthesisOutputFormat.Audio24Khz96KBitRateMonoMp3 = SpeechSDK.SpeechSynthesisOutputFormat[codecInput.value]
  321. if (codecInput.value === 'Audio24Khz96KBitRateMonoMp3') {
  322. SpeechSDK.SpeechSynthesisOutputFormat.Audio24Khz96KBitRateMonoMp3 = audio24Khz96KBitRateMonoMp3
  323. }
  324. if (codecInput.value.startsWith('Ogg')) {
  325. fileExt = '.ogg'
  326. } else if (codecInput.value.startsWith('Webm')) {
  327. fileExt = '.webm'
  328. } else {
  329. fileExt = '.mp3'
  330. }
  331. })
  332. } catch (e) {
  333. console.log(e)
  334. }
  335. // set save options
  336. const saveLabel = document.createElement("span")
  337. saveLabel.innerText = '保存配置'
  338. saveLabel.style.marginLeft = '5px'
  339. const saveCheckBox = document.createElement("input")
  340. saveCheckBox.type = 'checkbox'
  341. const pitchArea = document.getElementById('pitchlabel').parentElement
  342. pitchArea.appendChild(saveCheckBox)
  343. pitchArea.appendChild(saveLabel)
  344. saveCheckBox.addEventListener('change', () => {
  345. if (saveCheckBox.checked) {
  346. enableSaveOptions = true
  347. saveOptions()
  348. } else {
  349. enableSaveOptions = false
  350. localStorage.removeItem('savedOptions')
  351. }
  352. })
  353.  
  354. const streamHandler = {
  355. write: function (dataBuffer) {
  356. streamSize += dataBuffer.byteLength
  357. if (streamSize <= 1900800) {
  358. fileSize += dataBuffer.byteLength
  359. downloadSize.textContent = `已接收 ${fileSize / 1000} kb`
  360. if (autoProcessing) downloadSize.textContent = `剩余分段 ${tasks.length} ` + downloadSize.textContent
  361. wavFragments.push(dataBuffer)
  362. }
  363. if (streamSize === 1900800) {
  364. downloadStatus.textContent = '下载长度超过免费限额,请分割文本后使用收集模式'
  365. if (!enableCollect) {
  366. fileSize = 0
  367. wavFragments.length = 0
  368. } else {
  369. fileSize -= 1900800
  370. wavFragments.length -= 1320
  371. }
  372. }
  373. },
  374. close: function () {
  375. downloadStatus.textContent = '下载完成'
  376. if (!enableCollect) {
  377. downloadAndClean()
  378. return
  379. }
  380. if (!autoProcessing) {
  381. syncAudioToPlayer()
  382. return
  383. }
  384. if (tasks.length) {
  385. ttstext.value = tasks.shift()
  386. dispatchTextChange()
  387. downloadButton.click()
  388. } else {
  389. autoProcessing = false
  390. setButton(autoSplitButton, 'red', '自动拆分')
  391. ttstext.value = "自动拆分完成\n\n使用下方播放器播放,或关闭收集模式下载音频文件"
  392. syncAudioToPlayer()
  393. }
  394. }
  395. }
  396.  
  397. const outputStream = SpeechSDK.PushAudioOutputStream.create(streamHandler)
  398.  
  399. SpeechSDK.AudioConfig.fromSpeakerOutput = (() => {
  400. const fromSpeakerOutput = SpeechSDK.AudioConfig.fromSpeakerOutput
  401. return function (audioDestination) {
  402. return enableDownload ? audioDestination.onAudioEnd() || SpeechSDK.AudioConfig.fromStreamOutput(outputStream) : fromSpeakerOutput(audioDestination)
  403. }
  404. })()
  405. })();