Azure Speech Download

为微软的文本转语音服务的 demo 页面添加下载按钮

  1. // ==UserScript==
  2. // @name Azure Speech Download
  3. // @namespace
  4. // @version 1.0.0
  5. // @description 为微软的文本转语音服务的 demo 页面添加下载按钮
  6. // @author Puteulanus
  7. // @homepage https://greasyfork.org/zh-CN/scripts/444347-azure-speech-download
  8. // @match https://azure.microsoft.com/*/products/cognitive-services/text-to-speech/*
  9. // @icon https://www.microsoft.com/favicon.ico
  10. // @require https://cdn.bootcdn.net/ajax/libs/FileSaver.js/2.0.5/FileSaver.min.js
  11. // @grant none
  12. // @run-at document-end
  13. // @namespace https://greasyfork.org/users/909438
  14. // ==/UserScript==
  15.  
  16. /* globals saveAs */
  17. /* jshint esversion: 6 */
  18. (function() {
  19. 'use strict';
  20.  
  21. // Your code here...
  22. if(!window.saveAs) {
  23. window.saveAs = (blob, name) => {
  24. const a = document.createElement("a");
  25. document.body.appendChild(a);
  26. a.style = "display: none";
  27.  
  28. const url = window.URL.createObjectURL(blob);
  29. a.href = url;
  30. a.download = name;
  31. a.click();
  32. window.URL.revokeObjectURL(url);
  33. }
  34. }
  35.  
  36. const SpeechSDK = window.SpeechSDK
  37. let fileSize = 0
  38. let streamSize = 0
  39. let wavFragments = []
  40. let enableDownload = false
  41. let enableCollect = false
  42. let autoProcessing = false
  43. let tasks = []
  44. let fileExt = '.mp3'
  45. let enableSaveOptions = false
  46. const i18n = {
  47. zh: {
  48. document1: "\n\n\n收集模式:\n\n打开之后,点击“下载”按钮转换的音频会被收集,在收集模式关闭时合成一个音频下载",
  49. document2: "\n\n自动拆分:\n\n将长文本拆分为多个接近“段落长度”的片段,并只在“分隔符”处截断,避免句子被截断,影响阅读效果",
  50. document3: "\n\n\n\n拖拽 txt 文件至此框可加载文本文件",
  51. download: '下载',
  52. downloading: '下载中',
  53. downloaded: '下载完成',
  54. split: '自动拆分',
  55. spliting: '拆分中',
  56. codec: '音频编码',
  57. saveSetting: '保存设置',
  58. lengthWarning: '下载长度超过免费限额,请分割文本后使用收集模式',
  59. splitedMsg: "自动拆分完成\n\n使用下方播放器播放,或关闭收集模式下载音频文件",
  60. length: '段落长度',
  61. delimiter: '分隔符',
  62. collectionOn: '收集模式开',
  63. collectionOff: '收集模式关',
  64. received: '已接收',
  65. taskQueue: '剩余分段',
  66. profileName: '配置名',
  67. createProfile: '创建配置',
  68. },
  69. eng: {
  70. document1: "\n\n\nCollection:\n\nCollect audio files converted by clicking \"Download\" button, do the really download when it is turned off",
  71. document2: "\n\nSplit:\n\nSplit long text into segments close to the \"paragraph length\", which only truncate at \"delimiter\"",
  72. document3: "\n\n\n\nYou can drag .txt file to this text box to load a text file",
  73. download: 'Download',
  74. downloading: 'Downloading',
  75. downloaded: 'Download complete',
  76. split: 'Split',
  77. spliting: 'Spliting',
  78. codec: 'Codec',
  79. saveSetting: 'Save settings',
  80. lengthWarning: 'Text length exceeds the free limit, please split the text and use collection mode',
  81. splitedMsg: "Split finished\n\nUse the player below to play, or turn off collection mode to download the audio file",
  82. length: 'Paragraph length',
  83. delimiter: 'Delimiter',
  84. collectionOn: 'Collection On',
  85. collectionOff: 'Collection Off',
  86. received: 'Received:',
  87. taskQueue: 'Task queue:',
  88. profileName: 'Profile name',
  89. createProfile: 'Create profile',
  90. }
  91. }
  92. const lang = window.Acom.currentCulture
  93. if (lang === 'zh-cn' || lang === 'zh-tw') {
  94. i18n.lang = i18n.zh
  95. } else {
  96. i18n.lang = i18n.eng
  97. }
  98.  
  99. function createButton(id, color, content) {
  100. const button = document.getElementById('playli').cloneNode(true)
  101. button.id = id
  102. button.querySelector('span:last-of-type').textContent = content
  103. button.querySelector('button').style.backgroundColor = color
  104. button.querySelector('button').style.borderColor = color
  105. return button
  106. }
  107.  
  108. function setButton(button, color, content) {
  109. button.querySelector('span:last-of-type').textContent = content
  110. button.querySelector('button').style.backgroundColor = color
  111. button.querySelector('button').style.borderColor = color
  112. }
  113.  
  114. function downloadAndClean() {
  115. const sentAudio = new window.Uint8Array(fileSize)
  116. fileSize = 0
  117. streamSize = 0
  118. wavFragments.reduce((size, fragment) => {
  119. sentAudio.set(new window.Uint8Array(fragment), size)
  120. return size + fragment.byteLength
  121. }, 0)
  122. wavFragments.length = 0
  123. saveAs(new Blob([sentAudio]), (new Date()).toISOString().replace('T', ' ').replace(':', '_').split('.')[0] + fileExt)
  124. }
  125.  
  126. function switchOptionDisplay() {
  127. if (enableCollect) {
  128. autoSplitButton.style.display = 'block'
  129. optionArea.style.display = 'block'
  130. previewPlayer.style.display = 'inline-block'
  131. } else {
  132. autoSplitButton.style.display = 'none'
  133. optionArea.style.display = 'none'
  134. previewPlayer.style.display = 'none'
  135. }
  136. }
  137.  
  138. function syncAudioToPlayer() {
  139. const sentAudio = new window.Uint8Array(fileSize)
  140. wavFragments.reduce((size, fragment) => {
  141. sentAudio.set(new window.Uint8Array(fragment), size)
  142. return size + fragment.byteLength
  143. }, 0)
  144. const audioBlob = new Blob([sentAudio], {type : 'audio/ogg'})
  145. previewPlayer.src = URL.createObjectURL(audioBlob)
  146. }
  147.  
  148. function dispatchTextChange() {
  149. const evt = document.createEvent('HTMLEvents')
  150. evt.initEvent('input', true, true)
  151. ttstext.dispatchEvent(evt)
  152. }
  153.  
  154. function saveOptions() {
  155. if (!enableSaveOptions) return
  156. localStorage.setItem('savedOptions', JSON.stringify(getCurrentSettings()))
  157. }
  158.  
  159. function restoreOptions() {
  160. const optionsJSON = localStorage.getItem('savedOptions')
  161. if (!optionsJSON) return
  162. const options = JSON.parse(optionsJSON)
  163. setSettings(options)
  164. saveCheckBox.checked = true
  165. enableSaveOptions = true
  166. }
  167.  
  168. function bindSaveOption() {
  169. languageInput.addEventListener('change', saveOptions)
  170. voiceInput.addEventListener('change', saveOptions)
  171. styleInput.addEventListener('change', saveOptions)
  172. codecInput.addEventListener('change', saveOptions)
  173. speedInput.addEventListener('change', saveOptions)
  174. pitchInput.addEventListener('change', saveOptions)
  175. maxSizeInput.addEventListener('change', saveOptions)
  176. delimiterInput.addEventListener('change', saveOptions)
  177. }
  178.  
  179. function initSpeedAndPitch() {
  180. const evt = document.createEvent('HTMLEvents')
  181. evt.initEvent('input', true, true)
  182. speedInput.value = '0'
  183. speedInput.dispatchEvent(evt)
  184. pitchInput.value = '0'
  185. pitchInput.dispatchEvent(evt)
  186. }
  187.  
  188. function createProfile(name, profile) {
  189. const profiles = JSON.parse(localStorage.getItem('savedProfiles'))
  190. localStorage.setItem('savedProfiles', JSON.stringify([...profiles.filter(profile => profile.name !== name),{
  191. name,
  192. setting: profile
  193. }]))
  194. refreshProfile()
  195. }
  196.  
  197. function removeProfile(name) {
  198. let profiles = JSON.parse(localStorage.getItem('savedProfiles'))
  199. localStorage.setItem('savedProfiles', JSON.stringify(profiles.filter(profile => profile.name !== name)))
  200. refreshProfile()
  201. }
  202.  
  203. function refreshProfile() {
  204. let profilesJSON = localStorage.getItem('savedProfiles')
  205. let profiles
  206. if (!profilesJSON) {
  207. profiles = []
  208. localStorage.setItem('savedProfiles', JSON.stringify(profiles))
  209. } else {
  210. profiles = JSON.parse(profilesJSON)
  211. }
  212. profileContainer.innerHTML = ''
  213. profiles.forEach(profile => {
  214. const profileDiv = document.createElement("div")
  215. const profileName = document.createElement("span")
  216. const profileDelete = document.createElement("span")
  217. profileDiv.style.display = 'inline-block'
  218. profileDiv.style.border = '1px solid'
  219. profileDiv.style.marginLeft = '5px'
  220. profileDiv.style.cursor = 'pointer'
  221. profileName.innerText = profile.name
  222. profileName.style.padding = '5px'
  223. profileDelete.innerText = 'X'
  224. profileDelete.style.backgroundColor = 'black'
  225. profileDelete.style.color = 'white'
  226. profileDelete.style.padding = '2px'
  227. profileDiv.appendChild(profileName)
  228. profileDiv.append(profileDelete)
  229. profileContainer.append(profileDiv)
  230. profileName.addEventListener('click', () => {
  231. const textBackup = ttstext.value
  232. setSettings(profile.setting)
  233. ttstext.value = textBackup
  234. dispatchTextChange()
  235. })
  236. profileDelete.addEventListener('click', () => {
  237. removeProfile(profile.name)
  238. })
  239. })
  240. }
  241.  
  242. function getCurrentSettings() {
  243. return {
  244. language: languageInput.value,
  245. voice: voiceInput.value,
  246. style: styleInput.value,
  247. codec: codecInput.value,
  248. speed: speedInput.value,
  249. pitch: pitchInput.value,
  250. splitLength: maxSizeInput.value,
  251. delimiter: delimiterInput.value
  252. }
  253. }
  254.  
  255. function setSettings(setting) {
  256. let evt = document.createEvent('HTMLEvents')
  257. evt.initEvent('change', true, true)
  258. languageInput.value = setting.language
  259. languageInput.dispatchEvent(evt)
  260. voiceInput.value = setting.voice
  261. voiceInput.dispatchEvent(evt)
  262. styleInput.value = setting.style
  263. styleInput.dispatchEvent(evt)
  264. codecInput.value = setting.codec
  265. codecInput.dispatchEvent(evt)
  266. speedInput.value = setting.speed
  267. speedInput.dispatchEvent(evt)
  268. pitchInput.value = setting.pitch
  269. pitchInput.dispatchEvent(evt)
  270. evt = document.createEvent('HTMLEvents')
  271. evt.initEvent('input', true, true)
  272. speedInput.dispatchEvent(evt)
  273. pitchInput.dispatchEvent(evt)
  274. maxSizeInput.value = setting.splitLength
  275. delimiterInput.value = setting.delimiter
  276. }
  277.  
  278. const downloadStatus = document.createElement('div')
  279. const downloadSize = document.createElement('div')
  280. const buttonArea = document.getElementById('playli').parentElement
  281. const ttstext = document.getElementById('ttstext')
  282. const styleSelecter = document.getElementById('voicestyleselect').parentElement
  283. const languageInput = document.getElementById('languageselect')
  284. const voiceInput = document.getElementById('voiceselect')
  285. const styleInput = document.getElementById('voicestyleselect')
  286. const speedInput = document.getElementById('speed')
  287. const pitchInput = document.getElementById('pitch')
  288.  
  289. ttstext.ondrop = async (e) => {
  290. const files = e.dataTransfer.files
  291. if (files.length === 1 && files[0].type === 'text/plain') {
  292. e.preventDefault()
  293. const file = files[0]
  294. ttstext.value = await file.text()
  295. dispatchTextChange()
  296. }
  297. }
  298.  
  299. // reuqired by Firefox
  300. ttstext.ondragover = function(e){
  301. e.preventDefault();
  302. }
  303.  
  304. // set document
  305. setTimeout(() => {
  306. setTimeout(() => {
  307. const onchange = languageInput.onchange
  308. languageInput.onchange = (...args) => {
  309. onchange(...args)
  310. languageInput.onchange = onchange
  311. initSpeedAndPitch()
  312. restoreOptions()
  313. bindSaveOption()
  314. ttstext.value += i18n.lang.document1
  315. ttstext.value += i18n.lang.document2
  316. ttstext.value += i18n.lang.document3
  317. }
  318. }, 0)
  319. }, 0)
  320.  
  321. // set download button
  322. const downloadButton = createButton('donwloadli', 'green', i18n.lang.download)
  323. downloadButton.addEventListener('click', () => {
  324. downloadStatus.textContent = i18n.lang.downloading
  325. enableDownload = true
  326. streamSize = 0
  327. document.getElementById('playbtn').click()
  328. enableDownload = false
  329. })
  330. downloadStatus.style.marginTop = '10px'
  331. buttonArea.appendChild(downloadButton)
  332. // set collect button
  333. const collectButton = createButton('collectli', 'red', i18n.lang.collectionOff)
  334. collectButton.addEventListener('click', () => {
  335. if(!enableCollect) {
  336. enableCollect = true
  337. switchOptionDisplay()
  338. setButton(collectButton, 'green', i18n.lang.collectionOn)
  339. } else {
  340. enableCollect = false
  341. switchOptionDisplay()
  342. setButton(collectButton, 'red', i18n.lang.collectionOff)
  343. if (!fileSize) return
  344. downloadAndClean()
  345. }
  346. })
  347. collectButton.style.marginRight = '10px'
  348. buttonArea.appendChild(collectButton)
  349. // set options
  350. const optionArea = document.createElement('div')
  351. const maxSizeInput = document.createElement('input')
  352. const delimiterInput = document.createElement('input')
  353. const maxSizeLabel = document.createElement('span')
  354. const delimiterLabel = document.createElement('span')
  355. optionArea.id = 'optiondiv'
  356. optionArea.style.display = 'none'
  357. maxSizeLabel.textContent = i18n.lang.length
  358. maxSizeInput.style.width = '50px'
  359. maxSizeInput.style.margin = '10px'
  360. maxSizeInput.value = '300'
  361. delimiterLabel.textContent = i18n.lang.delimiter
  362. delimiterInput.style.width = '100px'
  363. delimiterInput.style.margin = '10px'
  364. delimiterInput.value = ',。?,.?'
  365. optionArea.appendChild(maxSizeLabel)
  366. optionArea.appendChild(maxSizeInput)
  367. optionArea.appendChild(delimiterLabel)
  368. optionArea.appendChild(delimiterInput)
  369. buttonArea.parentElement.appendChild(optionArea)
  370. // set download status
  371. buttonArea.parentElement.appendChild(downloadStatus)
  372. buttonArea.parentElement.appendChild(downloadSize)
  373. // set auto split button
  374. const autoSplitButton = createButton('autosplit', 'red', i18n.lang.split)
  375. autoSplitButton.addEventListener('click', () => {
  376. setButton(autoSplitButton, 'green', i18n.lang.spliting)
  377. autoProcessing = true
  378. const maxSize = +maxSizeInput.value
  379. const delimiters = delimiterInput.value.split('')
  380. const text = ttstext.value
  381. const textHandler = text.split('').reduce(
  382. (obj, char, index, arr) => {
  383. obj.buffer.push(char)
  384. if (delimiters.indexOf(char) >= 0) obj.end = index
  385. if (obj.buffer.length === maxSize) {
  386. obj.res.push(obj.buffer.splice(0, obj.end + 1 - obj.offset).join(''))
  387. obj.offset += obj.res[obj.res.length - 1].length
  388. }
  389. return obj
  390. }, {
  391. buffer: [],
  392. end: 0,
  393. offset:0,
  394. res: []
  395. })
  396. textHandler.res.push(textHandler.buffer.join(''))
  397. ttstext.value = textHandler.res.shift()
  398. tasks = textHandler.res
  399. dispatchTextChange()
  400. downloadButton.click()
  401. })
  402. autoSplitButton.style.display = 'none'
  403. buttonArea.appendChild(autoSplitButton)
  404. // set preview player
  405. const previewPlayer = document.createElement('audio')
  406. previewPlayer.controls = true
  407. previewPlayer.style.display = 'none'
  408. previewPlayer.style.width = '100%'
  409. previewPlayer.style.marginTop = '10px'
  410. ttstext.after(previewPlayer)
  411. // set formatting options
  412. let codecInput
  413. try {
  414. const optionSelector = styleSelecter.cloneNode(true)
  415. const label = optionSelector.querySelector('label')
  416. label.textContent = i18n.lang.codec
  417. label.htmlFor = 'voiceformatselect'
  418. codecInput = optionSelector.querySelector('select')
  419. codecInput.id = 'voiceformatselect'
  420. codecInput.innerHTML = ''
  421. Object.entries(SpeechSDK.SpeechSynthesisOutputFormat).filter(item => !isNaN(item[0]))
  422. .filter(item => /(^Audio.+Mp3$)|(^Ogg)|(^Webm)/.test(item[1]))
  423. .forEach(item => {
  424. const format = item[1]
  425. const option = document.createElement("option")
  426. option.value = format
  427. option.text = format
  428. if (format === 'Audio24Khz96KBitRateMonoMp3') option.selected = true
  429. codecInput.appendChild(option)
  430. })
  431. styleSelecter.after(optionSelector)
  432. const audio24Khz96KBitRateMonoMp3 = SpeechSDK.SpeechSynthesisOutputFormat.Audio24Khz96KBitRateMonoMp3
  433. codecInput.addEventListener('change', () => {
  434. SpeechSDK.SpeechSynthesisOutputFormat.Audio24Khz96KBitRateMonoMp3 = SpeechSDK.SpeechSynthesisOutputFormat[codecInput.value]
  435. if (codecInput.value === 'Audio24Khz96KBitRateMonoMp3') {
  436. SpeechSDK.SpeechSynthesisOutputFormat.Audio24Khz96KBitRateMonoMp3 = audio24Khz96KBitRateMonoMp3
  437. }
  438. if (codecInput.value.startsWith('Ogg')) {
  439. fileExt = '.ogg'
  440. } else if (codecInput.value.startsWith('Webm')) {
  441. fileExt = '.webm'
  442. } else {
  443. fileExt = '.mp3'
  444. }
  445. })
  446. } catch (e) {
  447. console.log(e)
  448. }
  449. // set save options
  450. const saveLabel = document.createElement("span")
  451. saveLabel.innerText = i18n.lang.saveSetting
  452. saveLabel.style.marginLeft = '5px'
  453. const saveCheckBox = document.createElement("input")
  454. saveCheckBox.type = 'checkbox'
  455. const pitchArea = document.getElementById('pitchlabel').parentElement
  456. pitchArea.appendChild(saveCheckBox)
  457. pitchArea.appendChild(saveLabel)
  458. saveCheckBox.addEventListener('change', () => {
  459. if (saveCheckBox.checked) {
  460. enableSaveOptions = true
  461. saveOptions()
  462. } else {
  463. enableSaveOptions = false
  464. localStorage.removeItem('savedOptions')
  465. }
  466. })
  467. // set profile manage
  468. const profileArea = document.createElement("div")
  469. const createProfileInput = document.createElement("input")
  470. const createProfileButton = document.createElement("button")
  471. const profileContainer = document.createElement("div")
  472. createProfileInput.placeholder = i18n.lang.profileName
  473. createProfileInput.style.width = '120px'
  474. createProfileButton.innerText = i18n.lang.createProfile
  475. createProfileButton.style.border = '1px solid'
  476. createProfileButton.style.marginLeft = '5px'
  477. createProfileButton.style.padding = '2px'
  478. profileContainer.style.display = 'inline-block'
  479. profileArea.appendChild(createProfileInput)
  480. profileArea.appendChild(createProfileButton)
  481. profileArea.appendChild(profileContainer)
  482. profileArea.style.marginTop = '10px'
  483. previewPlayer.after(profileArea)
  484. refreshProfile()
  485. createProfileButton.addEventListener('click', () => {
  486. if (!createProfileInput.value) return
  487. const profile = getCurrentSettings()
  488. createProfile(createProfileInput.value, profile)
  489. createProfileInput.value = ''
  490. })
  491.  
  492. const streamHandler = {
  493. write: function (dataBuffer) {
  494. streamSize += dataBuffer.byteLength
  495. if (streamSize <= 1900800) {
  496. fileSize += dataBuffer.byteLength
  497. downloadSize.textContent = `${i18n.lang.received} ${fileSize / 1000} kb`
  498. if (autoProcessing) downloadSize.textContent = `${i18n.lang.taskQueue} ${tasks.length} ` + downloadSize.textContent
  499. wavFragments.push(dataBuffer)
  500. }
  501. if (streamSize === 1900800) {
  502. downloadStatus.textContent = i18n.lang.lengthWarning
  503. if (!enableCollect) {
  504. fileSize = 0
  505. wavFragments.length = 0
  506. } else {
  507. fileSize -= 1900800
  508. wavFragments.length -= 1320
  509. }
  510. }
  511. },
  512. close: function () {
  513. downloadStatus.textContent = i18n.lang.downloaded
  514. if (!enableCollect) {
  515. downloadAndClean()
  516. return
  517. }
  518. if (!autoProcessing) {
  519. syncAudioToPlayer()
  520. return
  521. }
  522. if (tasks.length) {
  523. ttstext.value = tasks.shift()
  524. dispatchTextChange()
  525. downloadButton.click()
  526. } else {
  527. autoProcessing = false
  528. setButton(autoSplitButton, 'red', i18n.lang.split)
  529. ttstext.value = i18n.lang.splitedMsg
  530. syncAudioToPlayer()
  531. }
  532. }
  533. }
  534.  
  535. const outputStream = SpeechSDK.PushAudioOutputStream.create(streamHandler)
  536.  
  537. SpeechSDK.AudioConfig.fromSpeakerOutput = (() => {
  538. const fromSpeakerOutput = SpeechSDK.AudioConfig.fromSpeakerOutput
  539. return function (audioDestination) {
  540. return enableDownload ? audioDestination.onAudioEnd() || SpeechSDK.AudioConfig.fromStreamOutput(outputStream) : fromSpeakerOutput(audioDestination)
  541. }
  542. })()
  543. })();