YT Subtitle Downloader v40

Download Subtitles

当前为 2024-12-09 提交的版本,查看 最新版本

  1. // ==UserScript==
  2. // @name YT Subtitle Downloader v40
  3. // @description Download Subtitles
  4. // @include https://*youtube.com/*
  5. // @author InternetNinja
  6. // @copyright 2009 Tim Smart; 2011 gw111zz; 2014~2023 Cheng Zheng; 2024 InternetNinja
  7. // @license http://www.gnu.org/copyleft/gpl.html
  8. // @require https://code.jquery.com/jquery-1.12.4.min.js
  9. // @version 40
  10. // @grant GM_xmlhttpRequest
  11. // @grant unsafeWindow
  12. // @namespace https://greasyfork.org/en/users/1408474-internetninjo
  13. // ==/UserScript==
  14.  
  15. /*
  16. [What is this?]
  17. This Tampermonkey script allows you to download Youtube "Automatic subtitle" and "closed subtitle".
  18.  
  19. [Note]
  20. If it doesn't work (rarely), try to refresh the page.
  21. If problem still exists after refreshing, send an email to guokrfans@gmail.com.
  22.  
  23. [Who built this?]
  24. Author : InternetNinja
  25. Email : IntNinj4@proton.me
  26. Github : Coming Soon.
  27.  
  28. [Note for Developers]
  29. 1. Some comments are written in Chinese.
  30. 2. This code handles both "Auto" and "Closed" subtitles.
  31.  
  32. [Test Video]
  33. https://www.youtube.com/watch?v=bkVsus8Ehxs
  34. This videos only has a closed English subtitle, with no auto subtitles.
  35.  
  36. https://www.youtube.com/watch?v=-WEqFzyrbbs
  37. no subtitle at all
  38.  
  39. https://www.youtube.com/watch?v=9AzNEG1GB-k
  40. have a lot of subtitles
  41.  
  42. https://www.youtube.com/watch?v=tqGkOvrKGfY
  43. 1:36:33 super long subtitle
  44.  
  45. [How does it work?]
  46. The code can be roughly divided into three parts:
  47. 1. Add a button on the page. (UI)
  48. 2. Detect if subtitle exists.
  49. 3. Convert subtitle format, then download.
  50.  
  51. [Test Enviroment]
  52. Works best on Chrome + Tampermonkey.
  53. There are plenty Chromium-based Browser, I do not guarantee this work on all of them;
  54.  
  55. Note:
  56. Somtimes it may fail to work if jQuery CDN can't be loaded.
  57. // @require https://code.jquery.com/jquery-1.12.4.min.js
  58. The solution is to modify this line ⬆️ to a different
  59. jQuery URL such as these ⬇️
  60. https://cdn.bootcdn.net/ajax/libs/jquery/1.12.4/jquery.js
  61. https://cdn.staticfile.org/jquery/1.12.4/jquery.min.js
  62.  
  63. Update log:
  64.  
  65. ## December 12, 2024:Update v37
  66. Fixed an issue where the subtitles don't download at all.
  67. */
  68.  
  69. ;(function () {
  70.  
  71. // Config
  72. var NO_SUBTITLE = 'No Subtitle'
  73. var HAVE_SUBTITLE = 'Download Subtitles'
  74. var TEXT_LOADING = 'Loading...'
  75. const BUTTON_ID =
  76. 'youtube-subtitle-downloader-by-1c7-latest-update-2022-decemeber-23'
  77. // Config
  78.  
  79. var HASH_BUTTON_ID = `#${BUTTON_ID}`
  80.  
  81. // initialize
  82. var first_load = true // indicate if first load this webpage or not
  83. var youtube_playerResponse_1c7 = null // for auto subtitle
  84. unsafeWindow.caption_array = [] // store all subtitle
  85.  
  86. $(document).ready(function () {
  87. make_sure_it_load_properly_before_continue()
  88. })
  89.  
  90. async function wait_until_element_exists(element_identifier) {
  91. var retry_count = 0
  92. var RETRY_LIMIT = 50
  93. return new Promise(function (resolve, reject) {
  94. var intervalID = setInterval(function () {
  95. try {
  96. var element = document.querySelector(element_identifier)
  97. if (element != null) {
  98. resolve(true)
  99. } else {
  100. retry_count = retry_count + 1
  101. // console.log(`重试次数 ${retry_count}`);
  102. if (retry_count > RETRY_LIMIT) {
  103. clearInterval(intervalID)
  104. reject(false)
  105. }
  106. }
  107. } catch (error) {
  108. reject(false)
  109. }
  110. }, 330)
  111. })
  112. }
  113.  
  114. async function make_sure_it_load_properly_before_continue() {
  115. var id = new_Youtube_2022_UI_element_identifier()
  116. var result = await wait_until_element_exists(id)
  117. if (result) {
  118. init_UI()
  119. }
  120. }
  121.  
  122. // trigger when loading new page
  123. // (actually this would also trigger when first loading, that's not what we want, that's why we need to use firsr_load === false)
  124. // (new Material design version would trigger this "yt-navigate-finish" event. old version would not.)
  125. var body = document.getElementsByTagName('body')[0]
  126. body.addEventListener('yt-navigate-finish', function (event) {
  127.  
  128. if (current_page_is_video_page() === false) {
  129. return
  130. }
  131. youtube_playerResponse_1c7 = event.detail.response.playerResponse // for auto subtitle
  132. unsafeWindow.caption_array = [] // clean up (important, otherwise would have more and more item and cause error)
  133.  
  134. // if use click to another page, init again to get correct subtitle
  135. if (first_load === false) {
  136. remove_subtitle_download_button()
  137. init_UI()
  138. }
  139. })
  140.  
  141.  
  142. // return Element;
  143. function new_Youtube_2022_UI_element() {
  144. return document.querySelector(new_Youtube_2022_UI_element_identifier())
  145. }
  146.  
  147. function new_Youtube_2022_UI_element_identifier() {
  148. var document_querySelector = '#owner.item.style-scope.ytd-watch-metadata'
  149. return document_querySelector
  150. }
  151.  
  152. // return true / false
  153. // Detect [new version UI(material design)] OR [old version UI]
  154. // I tested this, accurated.
  155. function new_material_design_version() {
  156. var old_title_element = document.getElementById('watch7-headline')
  157. if (old_title_element) {
  158. return false
  159. } else {
  160. return true
  161. }
  162. }
  163.  
  164. // return true / false
  165. function current_page_is_video_page() {
  166. return get_url_video_id() !== null
  167. }
  168.  
  169. // return string like "RW1ChiWyiZQ", from "https://www.youtube.com/watch?v=RW1ChiWyiZQ"
  170. // or null
  171. function get_url_video_id() {
  172. return getURLParameter('v')
  173. }
  174.  
  175. //https://stackoverflow.com/questions/11582512/how-to-get-url-parameters-with-javascript/11582513#11582513
  176. function getURLParameter(name) {
  177. return (
  178. decodeURIComponent(
  179. (new RegExp('[?|&]' + name + '=' + '([^&;]+?)(&|#|;|$)').exec(
  180. location.search
  181. ) || [null, ''])[1].replace(/\+/g, '%20')
  182. ) || null
  183. )
  184. }
  185.  
  186. function remove_subtitle_download_button() {
  187. $(HASH_BUTTON_ID).remove()
  188. }
  189.  
  190.  
  191. function init_UI() {
  192. var html_element = get_main_UI_element()
  193.  
  194.  
  195. var old_anchor_element = document.getElementById('watch7-headline')
  196. if (old_anchor_element != null) {
  197. old_anchor_element.appendChild(html_element)
  198. }
  199.  
  200.  
  201. var anchor = document.querySelector('#above-the-fold #title')
  202. if (anchor) {
  203. anchor.appendChild(html_element)
  204. }
  205.  
  206. first_load = false
  207. }
  208.  
  209. function get_main_UI_element() {
  210. var div = document.createElement('div'),
  211. select = document.createElement('select'),
  212. option = document.createElement('option')
  213.  
  214. var css_div = `display: table;
  215. margin-top:4px;
  216. border: 1px solid rgb(0, 183, 90);
  217. cursor: pointer; color: rgb(255, 255, 255);
  218. border-top-left-radius: 3px;
  219. border-top-right-radius: 3px;
  220. border-bottom-right-radius: 3px;
  221. border-bottom-left-radius: 3px;
  222. background-color: #00B75A;
  223. `
  224. div.setAttribute('style', css_div)
  225.  
  226. div.id = BUTTON_ID
  227.  
  228. select.id = 'captions_selector'
  229. select.disabled = true
  230. let css_select = `display:block;
  231. border: 1px solid rgb(0, 183, 90);
  232. cursor: pointer;
  233. color: rgb(255, 255, 255);
  234. background-color: #00B75A;
  235. padding: 4px;
  236. `
  237. select.setAttribute('style', css_select)
  238.  
  239. option.textContent = TEXT_LOADING
  240. option.selected = true
  241. select.appendChild(option)
  242.  
  243.  
  244. select.addEventListener(
  245. 'change',
  246. function () {
  247. download_subtitle(this)
  248. },
  249. false
  250. )
  251.  
  252. div.appendChild(select) // put <select> into <div>
  253.  
  254. // put the div into page: new material design
  255. var title_element = document.querySelectorAll(
  256. '.title.style-scope.ytd-video-primary-info-renderer'
  257. )
  258. if (title_element) {
  259. $(title_element[0]).after(div)
  260. }
  261.  
  262. load_language_list(select)
  263.  
  264. // <a> element is for download
  265. var a = document.createElement('a')
  266. a.style.cssText = 'display:none;'
  267. a.setAttribute('id', 'ForSubtitleDownload')
  268. var body = document.getElementsByTagName('body')[0]
  269. body.appendChild(a)
  270.  
  271. return div
  272. }
  273.  
  274. // trigger when user select <option>
  275. async function download_subtitle(selector) {
  276. // if user select first <option>, we just return, do nothing.
  277. if (selector.selectedIndex == 0) {
  278. return;
  279. }
  280.  
  281. // Check if ytplayer is defined and has bootstrapPlayerResponse
  282. if (typeof unsafeWindow.ytplayer !== 'undefined' && unsafeWindow.ytplayer.bootstrapPlayerResponse) {
  283. var caption = caption_array[selector.selectedIndex - 1]; // Select the appropriate caption
  284. var result = null;
  285. var filename = null;
  286.  
  287. // If user chooses auto subtitle
  288. if (caption.lang_code == 'AUTO') {
  289. result = await get_auto_subtitle();
  290. filename = get_file_name(get_auto_subtitle_name());
  291. } else {
  292. // Closed subtitle
  293. let lang_code = caption.lang_code;
  294. let lang_name = caption.lang_name;
  295. result = await get_closed_subtitle(lang_code);
  296. filename = get_file_name(lang_name);
  297. }
  298.  
  299. let srt = parse_youtube_XML_to_SRT(result);
  300. downloadString(srt, 'text/plain', filename); // Download the subtitle file
  301.  
  302. // After download, select first <option>
  303. selector.options[0].selected = true;
  304. } else {
  305. console.error("ytplayer.bootstrapPlayerResponse is not defined");
  306. }
  307. }
  308.  
  309.  
  310. // Return something like: "(English)How Did Python Become A Data Science Powerhouse?.srt"
  311. function get_file_name(x) {
  312. // var method_1 = '(' + x + ')' + document.title + '.srt'; // 如果有通知数,文件名也会带上,比较烦,这种方式不好
  313. // var method_2 = '(' + x + ')' + get_title() + '.srt';
  314. var method_3 = `(${x})${get_title()}_video_id_${get_video_id()}.srt`
  315. return method_3
  316. }
  317.  
  318. // Get the full subtitles XML
  319. // async function get_closed_subtitles() {
  320. // var list_url = 'https://video.google.com/timedtext?hl=en&v=' + get_url_video_id() + '&type=list';
  321. // // Example: https://video.google.com/timedtext?hl=en&v=if36bqHypqk&type=list
  322. // return new Promise(function (resolve, reject) {
  323. // GM_xmlhttpRequest({
  324. // method: 'GET',
  325. // url: list_url,
  326. // onload: function (xhr) {
  327. // resolve(xhr.responseText)
  328. // }
  329. // })
  330. // })
  331. // }
  332.  
  333. // detect if "auto subtitle" and "closed subtitle" exist
  334. // and add <option> into <select>
  335. async function load_language_list(select) {
  336. // auto
  337. var auto_subtitle_exist = false
  338.  
  339. // closed
  340. var closed_subtitle_exist = false
  341.  
  342. // get auto subtitle
  343. var auto_subtitle_url = get_auto_subtitle_xml_url()
  344. if (auto_subtitle_url != false) {
  345. auto_subtitle_exist = true
  346. }
  347.  
  348. var captionTracks = get_captionTracks()
  349. if (
  350. captionTracks != undefined &&
  351. typeof captionTracks === 'object' &&
  352. captionTracks.length > 0
  353. ) {
  354. closed_subtitle_exist = true
  355. }
  356.  
  357. // if no subtitle at all, just say no and stop
  358. if (auto_subtitle_exist == false && closed_subtitle_exist == false) {
  359. select.options[0].textContent = NO_SUBTITLE
  360. disable_download_button()
  361. return false
  362. }
  363.  
  364. // if at least one type of subtitle exist
  365. select.options[0].textContent = HAVE_SUBTITLE
  366. select.disabled = false
  367.  
  368. var option = null // for <option>
  369. var caption_info = null // for our custom object
  370.  
  371. // if auto subtitle exist
  372. if (auto_subtitle_exist) {
  373. caption_info = {
  374. lang_code: 'AUTO', // later we use this to know if it's auto subtitle
  375. lang_name: get_auto_subtitle_name(), // for display only
  376. }
  377. caption_array.push(caption_info)
  378.  
  379. option = document.createElement('option')
  380. option.textContent = caption_info.lang_name
  381. select.appendChild(option)
  382. }
  383.  
  384. // if closed_subtitle_exist
  385. if (closed_subtitle_exist) {
  386. for (var i = 0, il = captionTracks.length; i < il; i++) {
  387. var caption = captionTracks[i]
  388. if (caption.kind == 'asr') {
  389. continue
  390. }
  391. let lang_code = caption.languageCode
  392. let lang_translated = caption.name.simpleText
  393. let lang_name = lang_code_to_local_name(lang_code, lang_translated)
  394. caption_info = {
  395. lang_code: lang_code,
  396. lang_name: lang_name,
  397. }
  398. caption_array.push(caption_info)
  399.  
  400. option = document.createElement('option')
  401. option.textContent = caption_info.lang_name
  402. select.appendChild(option)
  403. }
  404. }
  405. }
  406.  
  407. function disable_download_button() {
  408. $(HASH_BUTTON_ID)
  409. .css('border', '#95a5a6')
  410. .css('cursor', 'not-allowed')
  411. .css('background-color', '#95a5a6')
  412. $('#captions_selector')
  413. .css('border', '#95a5a6')
  414. .css('cursor', 'not-allowed')
  415. .css('background-color', '#95a5a6')
  416.  
  417. if (new_material_design_version()) {
  418. $(HASH_BUTTON_ID).css('padding', '6px')
  419. } else {
  420. $(HASH_BUTTON_ID).css('padding', '5px')
  421. }
  422. }
  423.  
  424.  
  425. function process_time(s) {
  426. s = s.toFixed(3)
  427.  
  428.  
  429. // 671.33 -> 671.330
  430. // 671 -> 671.000
  431.  
  432.  
  433. var array = s.split('.')
  434.  
  435.  
  436. var Hour = 0
  437. var Minute = 0
  438. var Second = array[0] // 671
  439. var MilliSecond = array[1] // 330
  440.  
  441. if (Second >= 60) {
  442. Minute = Math.floor(Second / 60)
  443. Second = Second - Minute * 60
  444.  
  445.  
  446. Hour = Math.floor(Minute / 60)
  447. Minute = Minute - Hour * 60
  448.  
  449. }
  450.  
  451. if (Minute < 10) {
  452. Minute = '0' + Minute
  453. }
  454.  
  455. if (Hour < 10) {
  456. Hour = '0' + Hour
  457. }
  458.  
  459. if (Second < 10) {
  460. Second = '0' + Second
  461. }
  462. return Hour + ':' + Minute + ':' + Second + ',' + MilliSecond
  463. }
  464.  
  465. function downloadString(text, fileType, fileName) {
  466. var blob = new Blob([text], {
  467. type: fileType,
  468. })
  469. var a = document.createElement('a')
  470. a.download = fileName
  471. a.href = URL.createObjectURL(blob)
  472. a.dataset.downloadurl = [fileType, a.download, a.href].join(':')
  473. a.style.display = 'none'
  474. document.body.appendChild(a)
  475. a.click()
  476. document.body.removeChild(a)
  477. setTimeout(function () {
  478. URL.revokeObjectURL(a.href)
  479. }, 1500)
  480. }
  481.  
  482. // https://css-tricks.com/snippets/javascript/unescape-html-in-js/
  483. // turn HTML entity back to text, example: &quot; should be "
  484. function htmlDecode(input) {
  485. var e = document.createElement('div')
  486. e.class =
  487. 'dummy-element-for-tampermonkey-Youtube-Subtitle-Downloader-script-to-decode-html-entity'
  488. e.innerHTML = input
  489. return e.childNodes.length === 0 ? '' : e.childNodes[0].nodeValue
  490. }
  491.  
  492. // return URL or null;
  493. // later we can send a AJAX and get XML subtitle
  494. function get_auto_subtitle_xml_url() {
  495. try {
  496. var captionTracks = get_captionTracks()
  497. for (var index in captionTracks) {
  498. var caption = captionTracks[index]
  499. if (caption.kind === 'asr') {
  500. return captionTracks[index].baseUrl
  501. }
  502. // ASR – A caption track generated using automatic speech recognition.
  503. // https://developers.google.com/youtube/v3/docs/captions
  504. }
  505. return false
  506. } catch (error) {
  507. return false
  508. }
  509. }
  510.  
  511. async function get_auto_subtitle() {
  512. var url = get_auto_subtitle_xml_url()
  513. if (url == false) {
  514. return false
  515. }
  516. var result = await get(url)
  517. return result
  518. }
  519.  
  520. async function get_closed_subtitle(lang_code) {
  521. try {
  522. var captionTracks = get_captionTracks()
  523. for (var i in captionTracks) {
  524. var caption = captionTracks[i]
  525. if (caption.languageCode === lang_code && caption.kind != 'asr') {
  526.  
  527. let url = captionTracks[i].baseUrl
  528. let result = await get(url)
  529. return result
  530. }
  531. }
  532. return false
  533. } catch (error) {
  534. return false
  535. }
  536. }
  537.  
  538. // Youtube return XML. we want SRT
  539. // input: Youtube XML format
  540. // output: SRT format
  541. function parse_youtube_XML_to_SRT(youtube_xml_string) {
  542. if (youtube_xml_string === '') {
  543. return false
  544. }
  545. var text = youtube_xml_string.getElementsByTagName('text')
  546. var result = ''
  547. var BOM = '\uFEFF'
  548. result = BOM + result // store final SRT result
  549. var len = text.length
  550. for (var i = 0; i < len; i++) {
  551. var index = i + 1
  552. var content = text[i].textContent.toString()
  553. content = content.replace(/(<([^>]+)>)/gi, '') // remove all html tag.
  554. var start = text[i].getAttribute('start')
  555. var end =
  556. parseFloat(text[i].getAttribute('start')) +
  557. parseFloat(text[i].getAttribute('dur'))
  558.  
  559.  
  560. // if (i + 1 >= len) {
  561. // end = parseFloat(text[i].getAttribute('start')) + parseFloat(text[i].getAttribute('dur'));
  562. // } else {
  563. // end = text[i + 1].getAttribute('start');
  564. // }
  565.  
  566. // we want SRT format:
  567. /*
  568. 1
  569. 00:00:01,939 --> 00:00:04,350
  570. everybody Craig Adams here I'm a
  571.  
  572. 2
  573. 00:00:04,350 --> 00:00:06,720
  574. filmmaker on YouTube who's digging
  575. */
  576. var new_line = '\n'
  577. result = result + index + new_line
  578. // 1
  579.  
  580. var start_time = process_time(parseFloat(start))
  581. var end_time = process_time(parseFloat(end))
  582. result = result + start_time
  583. result = result + ' --> '
  584. result = result + end_time + new_line
  585. // 00:00:01,939 --> 00:00:04,350
  586.  
  587. content = htmlDecode(content)
  588. // turn HTML entity back to text. example: &#39; back to apostrophe (')
  589.  
  590. result = result + content + new_line + new_line
  591. // everybody Craig Adams here I'm a
  592. }
  593. return result
  594. }
  595.  
  596. // return "English (auto-generated)" or a default name;
  597. function get_auto_subtitle_name() {
  598. try {
  599. var captionTracks = get_captionTracks()
  600. for (var index in captionTracks) {
  601. var caption = captionTracks[index]
  602. if (typeof caption.kind === 'string' && caption.kind == 'asr') {
  603. return captionTracks[index].name.simpleText
  604. }
  605. }
  606. return 'Auto Subtitle'
  607. } catch (error) {
  608. return 'Auto Subtitle'
  609. }
  610. }
  611.  
  612. function get_youtube_data() {
  613. return document.getElementsByTagName('ytd-app')[0].data.playerResponse
  614. }
  615.  
  616. function get_captionTracks() {
  617. let data = get_youtube_data()
  618. var captionTracks =
  619. data?.captions?.playerCaptionsTracklistRenderer?.captionTracks
  620. return captionTracks
  621. }
  622.  
  623. // Input a language code, output that language name in current locale
  624. // if current locale is English(US), Input: "de" Output: "Germany"
  625. function lang_code_to_local_name(languageCode, fallback_name) {
  626. try {
  627. var captionTracks = get_captionTracks()
  628. for (var i in captionTracks) {
  629. var caption = captionTracks[i]
  630. if (caption.languageCode === languageCode) {
  631. let simpleText = captionTracks[i].name.simpleText
  632. if (simpleText) {
  633. return simpleText
  634. } else {
  635. return fallback_name
  636. }
  637. }
  638. }
  639. } catch (error) {
  640. return fallback_name
  641. }
  642. }
  643.  
  644.  
  645. function get_title() {
  646.  
  647. var title_element = document.querySelector(
  648. 'h1.title.style-scope.ytd-video-primary-info-renderer'
  649. )
  650. if (title_element != null) {
  651. var title = title_element.innerText
  652.  
  653. if (title != undefined && title != null && title != '') {
  654. return title
  655. }
  656. }
  657.  
  658. return ytplayer.bootstrapPlayerResponse.videoDetails.videoId // 这个会 delay, 如果页面跳转了,这个获得的标题还是旧的
  659. }
  660.  
  661. function get_video_id() {
  662. return ytplayer.bootstrapPlayerResponse.videoDetails.videoId
  663. }
  664.  
  665. // Usage: var result = await get(url)
  666. function get(url) {
  667. return $.ajax({
  668. url: url,
  669. type: 'get',
  670. success: function (r) {
  671. return r
  672. },
  673. fail: function (error) {
  674. return error
  675. },
  676. })
  677. }
  678.  
  679. const wait = (ms) => new Promise((resolve) => setTimeout(resolve, ms))
  680.  
  681. // https://stackoverflow.com/questions/5525071/how-to-wait-until-an-element-exists
  682. function waitForElm(selector) {
  683. return new Promise((resolve) => {
  684. if (document.querySelector(selector)) {
  685. return resolve(document.querySelector(selector))
  686. }
  687.  
  688. const observer = new MutationObserver((mutations) => {
  689. if (document.querySelector(selector)) {
  690. resolve(document.querySelector(selector))
  691. observer.disconnect()
  692. }
  693. })
  694.  
  695. observer.observe(document.body, {
  696. childList: true,
  697. subtree: true,
  698. })
  699. })
  700. }
  701.  
  702. })()