Youtube Subtitle Downloader v22

Download Subtitles

当前为 2020-12-03 提交的版本,查看 最新版本

  1. // ==UserScript==
  2. // @name Youtube Subtitle Downloader v22
  3. // @include https://*youtube.com/*
  4. // @author Cheng Zheng
  5. // @copyright 2009 Tim Smart; 2011 gw111zz; 2014~2021 Cheng Zheng;
  6. // @license GNU GPL v3.0 or later. http://www.gnu.org/copyleft/gpl.html
  7. // @require http://ajax.googleapis.com/ajax/libs/jquery/1.9.1/jquery.min.js
  8. // @version 22
  9. // @grant GM_xmlhttpRequest
  10. // @namespace https://greasyfork.org/users/5711
  11. // @description Download Subtitles
  12. // ==/UserScript==
  13.  
  14. /*
  15. [What is this?]
  16. This "Tampermonkey script" allow you download Youtube "Automatic subtitle" and "closed subtitle"
  17.  
  18. [Note]
  19. If it doesn't work(rarely), try refresh.
  20. if problem still exist. Email: guokrfans@gmail.com
  21.  
  22. [Who build this]
  23. Author : Cheng Zheng
  24. Email : guokrfans@gmail.com
  25. Github : https://github.com/1c7/Youtube-Auto-Subtitle-Download
  26. If you want improve the script, Github Pull Request are welcome
  27.  
  28. [Note]
  29. Few things before you read the code:
  30. 0. Some code comments are written in Chinese
  31. 1. Youtube have 2 UI: Material design and The old design
  32. 2. Code need handle both Auto & Closed subtitle
  33.  
  34. (Explain: "Tampermonkey script" mean
  35. you have to install a Chrome extension call "Tampermonkey", and then install this script)
  36.  
  37. [Test Video]
  38. https://www.youtube.com/watch?v=bkVsus8Ehxs
  39. only have English closed subtitle, nothing else (no auto subtitle)
  40.  
  41. https://www.youtube.com/watch?v=-WEqFzyrbbs
  42. no subtitle at all
  43.  
  44. https://www.youtube.com/watch?v=9AzNEG1GB-k
  45. have a lot subtitle
  46.  
  47. https://www.youtube.com/watch?v=tqGkOvrKGfY
  48. 1:36:33 super long subtitle
  49.  
  50. [Code Explain]
  51. mainly three part
  52. 1. UI specific (add button on page etc)
  53. 2. detech if subtitle exists
  54. 3. transform subtitle format & download
  55.  
  56. [Changelog]
  57. v1~v15: I forgot, and I am too lazy to check git log
  58.  
  59. v16: add support for auto subtitle
  60.  
  61. v17: fix few minor issue in v16, to make sure all user get update, bump up 1 version
  62.  
  63. v18: fix https://greasyfork.org/zh-CN/forum/discussion/38299/x?locale=zh-CN video too long issue
  64. (for example 1:36:33) and cause subtitle error
  65. reason is the 'downloadFile' function
  66. using a <a> element 'href' attribute to download .srt file.
  67. and this 'href' can't handle string that's too long
  68.  
  69. v19: fix HTML html entity problem, for example: apostrophe as &#39;
  70.  
  71. v20: 2018-June-13 seem like Youtube change their URL format, now URL must have something like '&name=en'
  72. v20 test with: https://www.youtube.com/watch?v=tqGkOvrKGfY https://www.youtube.com/watch?time_continue=5&v=36tggrpRoTI
  73.  
  74. v21&v22: improve code logic
  75. */
  76.  
  77. (function () {
  78.  
  79. // Config
  80. var NO_SUBTITLE = 'No Subtitle';
  81. var HAVE_SUBTITLE = 'Download Subtitles';
  82. const BUTTON_ID = 'youtube-subtitle-downloader-by-1c7-last-update-2020-12-3'
  83. // Config
  84.  
  85. var HASH_BUTTON_ID = `#${BUTTON_ID}`
  86.  
  87. // initialize
  88. var first_load = true; // indicate if first load this webpage or not
  89. var youtube_playerResponse_1c7 = null; // for auto subtitle
  90. unsafeWindow.caption_array = []; // store all subtitle
  91.  
  92. // trigger when first load
  93. $(document).ready(function () {
  94. start();
  95. });
  96.  
  97. // Explain this function: we repeatly try if certain HTML element exist,
  98. // if it does, we call init()
  99. // if it doesn't, stop trying after certain time
  100. function start() {
  101. var retry_count = 0;
  102. var RETRY_LIMIT = 20;
  103. // use "setInterval" is because "$(document).ready()" still not enough, still too early
  104. // 330 work for me.
  105. if (new_material_design_version()) {
  106. var material_checkExist = setInterval(function () {
  107. if (document.querySelectorAll('.title.style-scope.ytd-video-primary-info-renderer').length) {
  108. init();
  109. clearInterval(material_checkExist);
  110. }
  111. retry_count = retry_count + 1;
  112. if (retry_count > RETRY_LIMIT) {
  113. clearInterval(material_checkExist);
  114. }
  115. }, 330);
  116. } else {
  117. var checkExist = setInterval(function () {
  118. if ($('#watch7-headline').length) {
  119. init();
  120. clearInterval(checkExist);
  121. }
  122. retry_count = retry_count + 1;
  123. if (retry_count > RETRY_LIMIT) {
  124. clearInterval(checkExist);
  125. }
  126. }, 330);
  127. }
  128. }
  129.  
  130. // trigger when loading new page
  131. // (actually this would also trigger when first loading, that's not what we want, that's why we need to use firsr_load === false)
  132. // (new Material design version would trigger this "yt-navigate-finish" event. old version would not.)
  133. var body = document.getElementsByTagName("body")[0];
  134. body.addEventListener("yt-navigate-finish", function (event) {
  135. if (current_page_is_video_page() === false) {
  136. return;
  137. }
  138. youtube_playerResponse_1c7 = event.detail.response.playerResponse; // for auto subtitle
  139. unsafeWindow.caption_array = []; // clean up (important, otherwise would have more and more item and cause error)
  140.  
  141. // if use click to another page, init again to get correct subtitle
  142. if (first_load === false) {
  143. remove_subtitle_download_button();
  144. init();
  145. }
  146. });
  147.  
  148. // trigger when loading new page
  149. // (old version would trigger "spfdone" event. new Material design version not sure yet.)
  150. window.addEventListener("spfdone", function (e) {
  151. if (current_page_is_video_page()) {
  152. remove_subtitle_download_button();
  153. var checkExist = setInterval(function () {
  154. if ($('#watch7-headline').length) {
  155. init();
  156. clearInterval(checkExist);
  157. }
  158. }, 330);
  159. }
  160. });
  161.  
  162. // return true / false
  163. // Detect [new version UI(material design)] OR [old version UI]
  164. // I tested this, accurated.
  165. function new_material_design_version() {
  166. var old_title_element = document.getElementById('watch7-headline');
  167. if (old_title_element) {
  168. return false;
  169. } else {
  170. return true;
  171. }
  172. }
  173.  
  174. // return true / false
  175. function current_page_is_video_page() {
  176. return get_video_id() !== null;
  177. }
  178.  
  179. // return string like "RW1ChiWyiZQ", from "https://www.youtube.com/watch?v=RW1ChiWyiZQ"
  180. // or null
  181. function get_video_id() {
  182. return getURLParameter('v');
  183. }
  184.  
  185. //https://stackoverflow.com/questions/11582512/how-to-get-url-parameters-with-javascript/11582513#11582513
  186. function getURLParameter(name) {
  187. return decodeURIComponent((new RegExp('[?|&]' + name + '=' + '([^&;]+?)(&|#|;|$)').exec(location.search) || [null, ''])[1].replace(/\+/g, '%20')) || null;
  188. }
  189.  
  190. function remove_subtitle_download_button() {
  191. $(HASH_BUTTON_ID).remove();
  192. }
  193.  
  194. function init() {
  195. inject_our_script();
  196. first_load = false;
  197. }
  198.  
  199. function inject_our_script() {
  200. var div = document.createElement('div'),
  201. select = document.createElement('select'),
  202. option = document.createElement('option'),
  203. controls = document.getElementById('watch7-headline'); // Youtube video title DIV
  204.  
  205. div.setAttribute('style', `display: table;
  206. margin-top:4px;
  207. border: 1px solid rgb(0, 183, 90);
  208. cursor: pointer; color: rgb(255, 255, 255);
  209. border-top-left-radius: 3px;
  210. border-top-right-radius: 3px;
  211. border-bottom-right-radius: 3px;
  212. border-bottom-left-radius: 3px;
  213. background-color: #00B75A;
  214. `);
  215.  
  216. div.id = BUTTON_ID;
  217. div.title = 'Youtube Subtitle Download v16'; // display when cursor hover
  218.  
  219. select.id = 'captions_selector';
  220. select.disabled = true;
  221. select.setAttribute('style', `display:block;
  222. border: 1px solid rgb(0, 183, 90);
  223. cursor: pointer;
  224. color: rgb(255, 255, 255);
  225. background-color: #00B75A;
  226. padding: 4px;
  227. `);
  228.  
  229. option.textContent = 'Loading...';
  230. option.selected = true;
  231. select.appendChild(option);
  232.  
  233. // 下拉菜单里,选择一项后触发下载
  234. select.addEventListener('change', function () {
  235. download_subtitle(this);
  236. }, false);
  237.  
  238. div.appendChild(select); // put <select> into <div>
  239.  
  240. // put the div into page: new material design
  241. var title_element = document.querySelectorAll('.title.style-scope.ytd-video-primary-info-renderer');
  242. if (title_element) {
  243. $(title_element[0]).after(div);
  244. }
  245. // put the div into page: old version
  246. if (controls) {
  247. controls.appendChild(div);
  248. }
  249.  
  250. load_language_list(select);
  251.  
  252. // <a> element is for download
  253. var a = document.createElement('a');
  254. a.style.cssText = 'display:none;';
  255. a.setAttribute("id", "ForSubtitleDownload");
  256. var body = document.getElementsByTagName('body')[0];
  257. body.appendChild(a);
  258. }
  259.  
  260. // trigger when user select <option>
  261. function download_subtitle(selector) {
  262. // if user select first <option>
  263. // we just return, do nothing.
  264. if (selector.selectedIndex == 0) {
  265. return;
  266. }
  267.  
  268. var caption = caption_array[selector.selectedIndex - 1];
  269. // because first <option> is for display
  270. // so index - 1
  271.  
  272. // if user choose auto subtitle
  273. if (caption.lang_code == 'AUTO') {
  274. get_auto_subtitle(function (r) {
  275. if (r != false) {
  276. var srt = parse_youtube_XML_to_SRT(r);
  277. var title = get_file_name('auto');
  278. downloadString(srt, "text/plain", title);
  279. }
  280. });
  281. } else {
  282. // closed subtitle
  283. var lang_code = caption.lang_code;
  284. var lang_name = caption.lang_name;
  285. get_closed_subtitle(lang_code, function (r) {
  286. if (r != false) {
  287. var srt = parse_youtube_XML_to_SRT(r);
  288. var title = get_file_name(lang_name);
  289. downloadString(srt, "text/plain", title);
  290. }
  291. });
  292. }
  293. // after download, select first <option>
  294. selector.options[0].selected = true;
  295. }
  296.  
  297.  
  298. // Return something like: "(English)How Did Python Become A Data Science Powerhouse?.srt"
  299. function get_file_name(x) {
  300. return '(' + x + ')' + document.title + '.srt';
  301. }
  302.  
  303. // detect if "auto subtitle" and "closed subtitle" exist
  304. // and add <option> into <select>
  305. function load_language_list(select) {
  306. // auto
  307. var auto_subtitle_exist = false;
  308.  
  309. // closed
  310. var closed_subtitle_exist = false;
  311. var captions = null;
  312.  
  313. // get auto subtitle
  314. var auto_subtitle_url = get_auto_subtitle_xml_url();
  315. if (auto_subtitle_url != false) {
  316. auto_subtitle_exist = true;
  317. }
  318.  
  319. // get closed subtitle
  320. var list_url = 'https://video.google.com/timedtext?hl=en&v=' + get_video_id() + '&type=list';
  321. // Example: https://video.google.com/timedtext?hl=en&v=if36bqHypqk&type=list
  322. GM_xmlhttpRequest({
  323. method: 'GET',
  324. url: list_url,
  325. onload: function (xhr) {
  326. captions = new DOMParser().parseFromString(xhr.responseText, "text/xml").getElementsByTagName('track');
  327. if (captions.length != 0) {
  328. closed_subtitle_exist = true;
  329. }
  330.  
  331. // if no subtitle at all, just say no and stop
  332. if (auto_subtitle_exist == false && closed_subtitle_exist == false) {
  333. select.options[0].textContent = NO_SUBTITLE;
  334. disable_download_button();
  335. return false;
  336. }
  337.  
  338. // if at least one type of subtitle exist
  339. select.options[0].textContent = HAVE_SUBTITLE;
  340. select.disabled = false;
  341.  
  342. var caption = null; // for inside loop
  343. var option = null; // for <option>
  344. var caption_info = null; // for our custom object
  345.  
  346. // if auto subtitle exist
  347. if (auto_subtitle_exist) {
  348. caption_info = {
  349. lang_code: 'AUTO', // later we use this to know if it's auto subtitle
  350. lang_name: get_auto_subtitle_name() // for display only
  351. };
  352. caption_array.push(caption_info);
  353.  
  354. option = document.createElement('option');
  355. option.textContent = caption_info.lang_name;
  356. select.appendChild(option);
  357. }
  358.  
  359. // if closed_subtitle_exist
  360. if (closed_subtitle_exist) {
  361. for (var i = 0, il = captions.length; i < il; i++) {
  362. caption = captions[i];
  363. caption_info = {
  364. lang_code: caption.getAttribute('lang_code'), // for AJAX request
  365. lang_name: caption.getAttribute('lang_translated') // for display only
  366. };
  367. caption_array.push(caption_info);
  368. // 注意这里是加到 caption_array, 一个全局变量, 待会要靠它来下载
  369. option = document.createElement('option');
  370. option.textContent = caption_info.lang_name;
  371. select.appendChild(option);
  372. }
  373. }
  374. }
  375. });
  376. }
  377.  
  378. function disable_download_button() {
  379. $(HASH_BUTTON_ID)
  380. .css('border', '#95a5a6')
  381. .css('cursor', 'not-allowed')
  382. .css('background-color', '#95a5a6');
  383. $('#captions_selector')
  384. .css('border', '#95a5a6')
  385. .css('cursor', 'not-allowed')
  386. .css('background-color', '#95a5a6');
  387.  
  388. if (new_material_design_version()) {
  389. $(HASH_BUTTON_ID).css('padding', '6px');
  390. } else {
  391. $(HASH_BUTTON_ID).css('padding', '5px');
  392. }
  393. }
  394.  
  395. // 处理时间. 比如 start="671.33" start="37.64" start="12" start="23.029"
  396. // 处理成 srt 时间, 比如 00:00:00,090 00:00:08,460 00:10:29,350
  397. function process_time(s) {
  398. s = s.toFixed(3);
  399. // 超棒的函数, 不论是整数还是小数都给弄成3位小数形式
  400. // 举个柚子:
  401. // 671.33 -> 671.330
  402. // 671 -> 671.000
  403. // 注意函数会四舍五入. 具体读文档
  404.  
  405. var array = s.split('.');
  406. // 把开始时间根据句号分割
  407. // 671.330 会分割成数组: [671, 330]
  408.  
  409. var Hour = 0;
  410. var Minute = 0;
  411. var Second = array[0]; // 671
  412. var MilliSecond = array[1]; // 330
  413. // 先声明下变量, 待会把这几个拼好就行了
  414.  
  415. // 我们来处理秒数. 把"分钟"和"小时"除出来
  416. if (Second >= 60) {
  417. Minute = Math.floor(Second / 60);
  418. Second = Second - Minute * 60;
  419. // 把 秒 拆成 分钟和秒, 比如121秒, 拆成2分钟1秒
  420.  
  421. Hour = Math.floor(Minute / 60);
  422. Minute = Minute - Hour * 60;
  423. // 把 分钟 拆成 小时和分钟, 比如700分钟, 拆成11小时40分钟
  424. }
  425. // 分钟,如果位数不够两位就变成两位,下面两个if语句的作用也是一样。
  426. if (Minute < 10) {
  427. Minute = '0' + Minute;
  428. }
  429. // 小时
  430. if (Hour < 10) {
  431. Hour = '0' + Hour;
  432. }
  433. // 秒
  434. if (Second < 10) {
  435. Second = '0' + Second;
  436. }
  437. return Hour + ':' + Minute + ':' + Second + ',' + MilliSecond;
  438. }
  439.  
  440. // copy from: https://gist.github.com/danallison/3ec9d5314788b337b682
  441. // Thanks! https://github.com/danallison
  442. // work in Chrome 66
  443. // test passed: 2018-5-19
  444. function downloadString(text, fileType, fileName) {
  445. var blob = new Blob([text], {
  446. type: fileType
  447. });
  448. var a = document.createElement('a');
  449. a.download = fileName;
  450. a.href = URL.createObjectURL(blob);
  451. a.dataset.downloadurl = [fileType, a.download, a.href].join(':');
  452. a.style.display = "none";
  453. document.body.appendChild(a);
  454. a.click();
  455. document.body.removeChild(a);
  456. setTimeout(function () {
  457. URL.revokeObjectURL(a.href);
  458. }, 1500);
  459. }
  460.  
  461. // https://css-tricks.com/snippets/javascript/unescape-html-in-js/
  462. // turn HTML entity back to text, example: &quot; should be "
  463. function htmlDecode(input) {
  464. var e = document.createElement('div');
  465. e.class = 'dummy-element-for-tampermonkey-Youtube-Subtitle-Downloader-script-to-decode-html-entity';
  466. e.innerHTML = input;
  467. return e.childNodes.length === 0 ? "" : e.childNodes[0].nodeValue;
  468. }
  469.  
  470. // return URL or null;
  471. // later we can send a AJAX and get XML subtitle
  472. function get_auto_subtitle_xml_url() {
  473. try {
  474. var captionTracks = get_captionTracks()
  475. for (var index in captionTracks) {
  476. var caption = captionTracks[index];
  477. if (typeof caption.kind === 'string' && caption.kind == 'asr') {
  478. return captionTracks[index].baseUrl;
  479. }
  480. // ASR – A caption track generated using automatic speech recognition.
  481. // https://developers.google.com/youtube/v3/docs/captions
  482. }
  483. return false;
  484. } catch (error) {
  485. return false;
  486. }
  487. }
  488.  
  489. function get_auto_subtitle(callback) {
  490. var url = get_auto_subtitle_xml_url();
  491. get_from_url(url, callback);
  492. }
  493.  
  494. function get_closed_subtitle(lang_code, callback) {
  495. try {
  496. var captionTracks = get_captionTracks()
  497. for (var index in captionTracks) {
  498. var caption = captionTracks[index];
  499. if (caption.languageCode === lang_code && caption.kind != 'asr') {
  500. // 必须写 caption.kind != 'asr'
  501. // 否则会下载2个字幕文件(也就是这个分支会进来2次)
  502. // 因为 lang_code 是 "en" 会 match 2条纪录,一条是自动字幕,一条是完整字幕
  503. // 自动字幕那条是 kind=asr
  504. // 完成字幕那条没有 kind 属性
  505. var url = captionTracks[index].baseUrl;
  506. get_from_url(url, callback);
  507. }
  508. }
  509. return false;
  510. } catch (error) {
  511. return false;
  512. }
  513.  
  514. }
  515.  
  516. function get_from_url(url, callback) {
  517. $.ajax({
  518. url: url,
  519. type: 'get',
  520. success: function (r) {
  521. callback(r);
  522. },
  523. fail: function (error) {
  524. callback(false);
  525. }
  526. });
  527. }
  528.  
  529. // Youtube return XML. we want SRT
  530. // input: Youtube XML format
  531. // output: SRT format
  532. function parse_youtube_XML_to_SRT(youtube_xml_string) {
  533. if (youtube_xml_string === '') {
  534. return false;
  535. }
  536. var text = youtube_xml_string.getElementsByTagName('text');
  537. var result = '';
  538. var BOM = '\uFEFF';
  539. result = BOM + result; // store final SRT result
  540. var len = text.length;
  541. for (var i = 0; i < len; i++) {
  542. var index = i + 1;
  543. var content = text[i].textContent.toString();
  544. content = content.replace(/(<([^>]+)>)/ig, ""); // remove all html tag.
  545. var start = text[i].getAttribute('start');
  546. var end = parseFloat(text[i].getAttribute('start')) + parseFloat(text[i].getAttribute('dur'));
  547.  
  548. // if (i + 1 >= len) {
  549. // end = parseFloat(text[i].getAttribute('start')) + parseFloat(text[i].getAttribute('dur'));
  550. // } else {
  551. // end = text[i + 1].getAttribute('start');
  552. // }
  553.  
  554. // we want SRT format:
  555. /*
  556. 1
  557. 00:00:01,939 --> 00:00:04,350
  558. everybody Craig Adams here I'm a
  559.  
  560. 2
  561. 00:00:04,350 --> 00:00:06,720
  562. filmmaker on YouTube who's digging
  563. */
  564. var new_line = "\n";
  565. result = result + index + new_line;
  566. // 1
  567.  
  568. var start_time = process_time(parseFloat(start));
  569. var end_time = process_time(parseFloat(end));
  570. result = result + start_time;
  571. result = result + ' --> ';
  572. result = result + end_time + new_line;
  573. // 00:00:01,939 --> 00:00:04,350
  574.  
  575. content = htmlDecode(content);
  576. // turn HTML entity back to text. example: &#39; back to apostrophe (')
  577.  
  578. result = result + content + new_line + new_line;
  579. // everybody Craig Adams here I'm a
  580. }
  581. return result;
  582. }
  583.  
  584. // return "English (auto-generated)" or a default name;
  585. function get_auto_subtitle_name() {
  586. try {
  587. var json = get_json();
  588. if (typeof json.captions !== "undefined") {
  589. var captionTracks = json.captions.playerCaptionsTracklistRenderer.captionTracks;
  590. for (var index in captionTracks) {
  591. var caption = captionTracks[index];
  592. if (typeof caption.kind === 'string' && caption.kind == 'asr') {
  593. return captionTracks[index].name.simpleText;
  594. }
  595. }
  596. }
  597. return 'Auto Subtitle';
  598. } catch (error) {
  599. return 'Auto Subtitle';
  600. }
  601. }
  602.  
  603. // return player_response
  604. // or return null
  605. function get_json() {
  606. try {
  607. var json = null
  608. if (typeof youtube_playerResponse_1c7 !== "undefined" && youtube_playerResponse_1c7 !== null && youtube_playerResponse_1c7 !== '') {
  609. json = youtube_playerResponse_1c7;
  610. }
  611. if (ytplayer.config.args.player_response) {
  612. var raw_string = ytplayer.config.args.player_response;
  613. json = JSON.parse(raw_string);
  614. }
  615. if (ytplayer.config.args.raw_player_response) {
  616. json = ytplayer.config.args.raw_player_response;
  617. }
  618. return json
  619. } catch (error) {
  620. return null
  621. }
  622. }
  623.  
  624. function get_captionTracks() {
  625. var json = get_json();
  626. var captionTracks = json.captions.playerCaptionsTracklistRenderer.captionTracks;
  627. return captionTracks
  628. }
  629.  
  630. })();