Youtube Subtitle Downloader v18

v17 support both automatic and closed subtitle

当前为 2018-05-19 提交的版本,查看 最新版本

  1. // ==UserScript==
  2. // @name Youtube Subtitle Downloader v18
  3. // @include https://*youtube.com/*
  4. // @author Cheng Zheng
  5. // @copyright 2009 Tim Smart; 2011 gw111zz; 2014~2018 Cheng Zheng;
  6. // @license GNU GPL v3.0 or later. http://www.gnu.org/copyleft/gpl.html
  7. // @require http://ajax.googleapis.com/ajax/libs/jquery/1.9.1/jquery.min.js
  8. // @version 18
  9. // @grant GM_xmlhttpRequest
  10. // @namespace https://greasyfork.org/users/5711
  11. // @description v17 support both automatic and closed subtitle
  12. // ==/UserScript==
  13.  
  14. /*
  15. [What is this?]
  16. This "Tampermonkey script" allow you download Youtube "Automatic subtitle" AND "closed subtitle"
  17. [Note]
  18. If not working(rarely), Refresh!!
  19. if problem still exist. Email me: guokrfans@gmail.com
  20.  
  21. [Who build this]
  22. Author : Cheng Zheng
  23. Email : guokrfans@gmail.com
  24. Github : https://github.com/1c7/Youtube-Auto-Subtitle-Download
  25. If you want improve the script, Github Pull Request are welcome
  26.  
  27. [Note]
  28. Few things before you read the code:
  29. 0. Some code comments are written in Chinese
  30. 1. Youtube have 2 UI: Material design and The old design
  31. 2. Code need handle both Auto & Closed subtitle
  32.  
  33. (Explain: "Tampermonkey script" mean
  34. you have to install a Chrome extension call "Tampermonkey", and then install this script)
  35.  
  36. [Test Video]
  37. https://www.youtube.com/watch?v=bkVsus8Ehxs
  38. only have English closed subtitle, nothing else (no auto subtitle)
  39.  
  40. https://www.youtube.com/watch?v=-WEqFzyrbbs
  41. no subtitle at all
  42.  
  43. https://www.youtube.com/watch?v=9AzNEG1GB-k
  44. have a lot subtitle
  45.  
  46. https://www.youtube.com/watch?v=tqGkOvrKGfY
  47. 1:36:33 super long subtitle
  48.  
  49. [Code Explain]
  50. mainly three part
  51. 1. UI specific (add button on page etc)
  52. 2. detech if subtitle exists
  53. 3. transform subtitle format & download
  54.  
  55. [Changelog]
  56. v1~v15: I forgot, and I am too lazy to check git log
  57.  
  58. v16: add support for auto subtitle
  59. v17: fix few minor issue in v16, to make sure all user get update, bump up 1 version
  60. v18: fix https://greasyfork.org/zh-CN/forum/discussion/38299/x?locale=zh-CN video too long issue
  61. (for example 1:36:33) and cause subtitle error
  62. reason is the 'downloadFile' function
  63. using a <a> element 'href' attribute to download .srt file.
  64. and this 'href' can't handle string that's too long
  65. */
  66.  
  67. // text for display
  68. var NO_SUBTITLE = 'No subtitle';
  69. var HAVE_SUBTITLE = 'Download subtitle';
  70.  
  71. // initialize
  72. var first_load = true; // indicate if first load this webpage or not
  73. var youtube_playerResponse_1c7 = null; // for auto subtitle
  74. unsafeWindow.caption_array = []; // store all subtitle
  75.  
  76. // trigger when first load
  77. $(document).ready(function(){
  78. start();
  79. });
  80.  
  81. // Explain this function: we repeatly try if certain HTML element exist,
  82. // if it does, we call init()
  83. // if it doesn't, stop trying after certain time
  84. function start(){
  85. var retry_count = 0;
  86. var RETRY_LIMIT = 20;
  87. // use "setInterval" is because "$(document).ready()" still not enough, still too early
  88. // 330 work for me.
  89. if (new_material_design_version()) {
  90. var material_checkExist = setInterval(function () {
  91. if (document.querySelectorAll('.title.style-scope.ytd-video-primary-info-renderer').length) {
  92. init();
  93. clearInterval(material_checkExist);
  94. }
  95. retry_count = retry_count + 1;
  96. if (retry_count > RETRY_LIMIT) {
  97. clearInterval(material_checkExist);
  98. }
  99. }, 330);
  100. } else {
  101. var checkExist = setInterval(function () {
  102. if ($('#watch7-headline').length) {
  103. init();
  104. clearInterval(checkExist);
  105. }
  106. retry_count = retry_count + 1;
  107. if (retry_count > RETRY_LIMIT) {
  108. clearInterval(checkExist);
  109. }
  110. }, 330);
  111. }
  112. }
  113.  
  114. // trigger when loading new page
  115. // (actually this would also trigger when first loading, that's not what we want, that's why we need to use firsr_load === false)
  116. // (new Material design version would trigger this "yt-navigate-finish" event. old version would not.)
  117. var body = document.getElementsByTagName("body")[0];
  118. body.addEventListener("yt-navigate-finish", function(event) {
  119. if (current_page_is_video_page() === false) {
  120. return;
  121. }
  122. youtube_playerResponse_1c7 = event.detail.response.playerResponse; // for auto subtitle
  123. unsafeWindow.caption_array = []; // clean up (important, otherwise would have more and more item and cause error)
  124.  
  125. // if use click to another page, init again to get correct subtitle
  126. if(first_load === false){
  127. remove_subtitle_download_button();
  128. init();
  129. }
  130. });
  131.  
  132. // trigger when loading new page
  133. // (old version would trigger "spfdone" event. new Material design version not sure yet.)
  134. window.addEventListener("spfdone", function(e) {
  135. if(current_page_is_video_page()){
  136. remove_subtitle_download_button();
  137. var checkExist = setInterval(function() {
  138. if ($('#watch7-headline').length) {
  139. init();
  140. clearInterval(checkExist);
  141. }
  142. }, 330);
  143. }
  144. });
  145.  
  146. // return true / false
  147. // Detect [new version UI(material design)] OR [old version UI]
  148. // I tested this, accurated.
  149. function new_material_design_version() {
  150. var old_title_element = document.getElementById('watch7-headline');
  151. if (old_title_element) {
  152. return false;
  153. } else {
  154. return true;
  155. }
  156. }
  157.  
  158. // return true / false
  159. function current_page_is_video_page(){
  160. return get_video_id() !== null;
  161. }
  162.  
  163. // return string like "RW1ChiWyiZQ", from "https://www.youtube.com/watch?v=RW1ChiWyiZQ"
  164. // or null
  165. function get_video_id(){
  166. return getURLParameter('v');
  167. }
  168.  
  169. //https://stackoverflow.com/questions/11582512/how-to-get-url-parameters-with-javascript/11582513#11582513
  170. function getURLParameter(name) {
  171. return decodeURIComponent((new RegExp('[?|&]' + name + '=' + '([^&;]+?)(&|#|;|$)').exec(location.search) || [null, ''])[1].replace(/\+/g, '%20')) || null;
  172. }
  173.  
  174. function remove_subtitle_download_button(){
  175. $('#youtube-subtitle-downloader-by-1c7').remove();
  176. }
  177.  
  178. function init(){
  179. inject_our_script();
  180. first_load = false;
  181. }
  182.  
  183. function inject_our_script(){
  184. var div = document.createElement('div'),
  185. select = document.createElement('select'),
  186. option = document.createElement('option'),
  187. controls = document.getElementById('watch7-headline'); // Youtube video title DIV
  188.  
  189. div.setAttribute('style', `display: table;
  190. margin-top:4px;
  191. border: 1px solid rgb(0, 183, 90);
  192. cursor: pointer; color: rgb(255, 255, 255);
  193. border-top-left-radius: 3px;
  194. border-top-right-radius: 3px;
  195. border-bottom-right-radius: 3px;
  196. border-bottom-left-radius: 3px;
  197. background-color: #00B75A;
  198. `);
  199.  
  200. div.id = 'youtube-subtitle-downloader-by-1c7';
  201. div.title = 'Youtube Subtitle Download v16'; // display when cursor hover
  202.  
  203. select.id = 'captions_selector';
  204. select.disabled = true;
  205. select.setAttribute( 'style', `display:block;
  206. border: 1px solid rgb(0, 183, 90);
  207. cursor: pointer;
  208. color: rgb(255, 255, 255);
  209. background-color: #00B75A;
  210. padding: 4px;
  211. `);
  212.  
  213. option.textContent = 'Loading...';
  214. option.selected = true;
  215. select.appendChild(option);
  216.  
  217. // 下拉菜单里,选择一项后触发下载
  218. select.addEventListener('change', function() {
  219. download_subtitle(this);
  220. }, false);
  221.  
  222. div.appendChild(select); // put <select> into <div>
  223.  
  224. // put the div into page: new material design
  225. var title_element = document.querySelectorAll('.title.style-scope.ytd-video-primary-info-renderer');
  226. if (title_element){
  227. $(title_element[0]).after(div);
  228. }
  229. // put the div into page: old version
  230. if(controls){
  231. controls.appendChild(div);
  232. }
  233.  
  234. load_language_list(select);
  235.  
  236. // <a> element is for download
  237. var a = document.createElement('a');
  238. a.style.cssText = 'display:none;';
  239. a.setAttribute("id", "ForSubtitleDownload");
  240. var body = document.getElementsByTagName('body')[0];
  241. body.appendChild(a);
  242. }
  243.  
  244. // trigger when user select <option>
  245. function download_subtitle(selector) {
  246. // if user select first <option>
  247. // we just return, do nothing.
  248. if (selector.selectedIndex == 0){
  249. return;
  250. }
  251.  
  252. var caption = caption_array[selector.selectedIndex - 1];
  253. // because first <option> is for display
  254. // so index - 1
  255.  
  256. // if user choose auto subtitle
  257. if (caption.lang_code == 'AUTO'){
  258. get_auto_subtitle(function(r){
  259. if (r != false) {
  260. var srt = parse_youtube_XML_to_SRT(r);
  261. var title = get_file_name('auto');
  262. // downloadFile(title, srt);
  263. downloadString(srt, "text/plain", title);
  264. }
  265. });
  266. } else {
  267. // closed subtitle
  268. var lang_code = caption.lang_code;
  269. var lang_name = caption.lang_name;
  270. get_closed_subtitle(lang_code, function (r) {
  271. if (r != false) {
  272. var srt = parse_youtube_XML_to_SRT(r);
  273. var title = get_file_name(lang_name);
  274. // downloadFile(title, srt);
  275. downloadString(srt, "text/plain", title);
  276. }
  277. });
  278. }
  279. // after download, select first <option>
  280. selector.options[0].selected = true;
  281. }
  282.  
  283.  
  284. // Return something like: "(English)How Did Python Become A Data Science Powerhouse?.srt"
  285. function get_file_name(x){
  286. return '(' + x + ')' + document.title + '.srt';
  287. }
  288.  
  289. // detect if "auto subtitle" and "closed subtitle" exist
  290. // and add <option> into <select>
  291. function load_language_list(select) {
  292. // auto
  293. var auto_subtitle_exist = false;
  294.  
  295. // closed
  296. var closed_subtitle_exist = false;
  297. var captions = null;
  298.  
  299. // get auto subtitle
  300. var auto_subtitle_url = get_auto_subtitle_xml_url();
  301. if (auto_subtitle_url != false) {
  302. auto_subtitle_exist = true;
  303. }
  304.  
  305. // get closed subtitle
  306. var list_url = 'https://video.google.com/timedtext?hl=en&v=' + get_video_id() + '&type=list';
  307. // Example: https://video.google.com/timedtext?hl=en&v=if36bqHypqk&type=list
  308. GM_xmlhttpRequest({
  309. method: 'GET',
  310. url: list_url,
  311. onload: function( xhr ) {
  312. captions = new DOMParser().parseFromString(xhr.responseText, "text/xml").getElementsByTagName('track');
  313. if (captions.length != 0) {
  314. closed_subtitle_exist = true;
  315. }
  316.  
  317. // if no subtitle at all, just say no and stop
  318. if (auto_subtitle_exist == false && closed_subtitle_exist == false) {
  319. select.options[0].textContent = NO_SUBTITLE;
  320. disable_download_button();
  321. return false;
  322. }
  323.  
  324. // if at least one type of subtitle exist
  325. select.options[0].textContent = HAVE_SUBTITLE;
  326. select.disabled = false;
  327.  
  328. var caption = null; // for inside loop
  329. var option = null; // for <option>
  330. var caption_info = null; // for our custom object
  331.  
  332. // if auto subtitle exist
  333. if (auto_subtitle_exist) {
  334. caption_info = {
  335. lang_code: 'AUTO', // later we use this to know if it's auto subtitle
  336. lang_name: get_auto_subtitle_name() // for display only
  337. };
  338. caption_array.push(caption_info);
  339.  
  340. option = document.createElement('option');
  341. option.textContent = caption_info.lang_name;
  342. select.appendChild(option);
  343. }
  344.  
  345. // if closed_subtitle_exist
  346. if (closed_subtitle_exist) {
  347. for (var i = 0, il = captions.length; i < il; i++) {
  348. caption = captions[i];
  349. caption_info = {
  350. lang_code: caption.getAttribute('lang_code'), // for AJAX request
  351. lang_name: caption.getAttribute('lang_translated') // for display only
  352. };
  353. caption_array.push(caption_info);
  354. // 注意这里是加到 caption_array, 一个全局变量, 待会要靠它来下载
  355. option = document.createElement('option');
  356. option.textContent = caption_info.lang_name;
  357. select.appendChild(option);
  358. }
  359. }
  360. }
  361. });
  362. }
  363.  
  364. function disable_download_button(){
  365. $('#youtube-subtitle-downloader-by-1c7')
  366. .css('border', '#95a5a6')
  367. .css('cursor', 'not-allowed')
  368. .css('background-color', '#95a5a6');
  369. $('#captions_selector')
  370. .css('border', '#95a5a6')
  371. .css('cursor', 'not-allowed')
  372. .css('background-color', '#95a5a6');
  373.  
  374. if (new_material_design_version()) {
  375. $('#youtube-subtitle-downloader-by-1c7').css('padding', '6px');
  376. } else {
  377. $('#youtube-subtitle-downloader-by-1c7').css('padding', '5px');
  378. }
  379. }
  380.  
  381. // 处理时间. 比如 start="671.33" start="37.64" start="12" start="23.029"
  382. // 处理成 srt 时间, 比如 00:00:00,090 00:00:08,460 00:10:29,350
  383. function process_time(s){
  384. s = s.toFixed(3);
  385. // 超棒的函数, 不论是整数还是小数都给弄成3位小数形式
  386. // 举个柚子:
  387. // 671.33 -> 671.330
  388. // 671 -> 671.000
  389. // 注意函数会四舍五入. 具体读文档
  390.  
  391. var array = s.split('.');
  392. // 把开始时间根据句号分割
  393. // 671.330 会分割成数组: [671, 330]
  394.  
  395. var Hour = 0;
  396. var Minute = 0;
  397. var Second = array[0]; // 671
  398. var MilliSecond = array[1]; // 330
  399. // 先声明下变量, 待会把这几个拼好就行了
  400.  
  401. // 我们来处理秒数. 把"分钟"和"小时"除出来
  402. if(Second >= 60){
  403. Minute = Math.floor(Second / 60);
  404. Second = Second - Minute * 60;
  405. // 把 秒 拆成 分钟和秒, 比如121秒, 拆成2分钟1秒
  406.  
  407. Hour = Math.floor(Minute / 60);
  408. Minute = Minute - Hour * 60;
  409. // 把 分钟 拆成 小时和分钟, 比如700分钟, 拆成11小时40分钟
  410. }
  411. // 分钟,如果位数不够两位就变成两位,下面两个if语句的作用也是一样。
  412. if (Minute < 10){
  413. Minute = '0' + Minute;
  414. }
  415. // 小时
  416. if (Hour < 10){
  417. Hour = '0' + Hour;
  418. }
  419. // 秒
  420. if (Second < 10){
  421. Second = '0' + Second;
  422. }
  423. return Hour + ':' + Minute + ':' + Second + ',' + MilliSecond;
  424. }
  425.  
  426. // copy from: https://gist.github.com/danallison/3ec9d5314788b337b682
  427. // Thanks! https://github.com/danallison
  428. // work in Chrome 66
  429. // test passed: 2018-5-19
  430. function downloadString(text, fileType, fileName) {
  431. var blob = new Blob([text], { type: fileType });
  432. var a = document.createElement('a');
  433. a.download = fileName;
  434. a.href = URL.createObjectURL(blob);
  435. a.dataset.downloadurl = [fileType, a.download, a.href].join(':');
  436. a.style.display = "none";
  437. document.body.appendChild(a);
  438. a.click();
  439. document.body.removeChild(a);
  440. setTimeout(function() { URL.revokeObjectURL(a.href); }, 1500);
  441. }
  442.  
  443. // https://css-tricks.com/snippets/javascript/unescape-html-in-js/
  444. // turn HTML entity back to text, example: &quot; should be "
  445. function htmlDecode(input){
  446. var e = document.createElement('div');
  447. e.class = 'dummy-element-for-tampermonkey-Youtube-Subtitle-Downloader-script-to-decode-html-entity';
  448. e.innerHTML = input;
  449. return e.childNodes.length === 0 ? "" : e.childNodes[0].nodeValue;
  450. }
  451.  
  452. // return URL or null;
  453. // later we can send a AJAX and get XML subtitle
  454. function get_auto_subtitle_xml_url() {
  455. try {
  456. // get JSON
  457. var json = '';
  458. if (typeof youtube_playerResponse_1c7 !== "undefined" && youtube_playerResponse_1c7 !== null && youtube_playerResponse_1c7 !== '') {
  459. json = youtube_playerResponse_1c7;
  460. } else {
  461. var raw_string = ytplayer.config.args.player_response;
  462. json = JSON.parse(raw_string);
  463. }
  464.  
  465. // get data from JSON
  466. var captionTracks = json.captions.playerCaptionsTracklistRenderer.captionTracks;
  467. for (var index in captionTracks) {
  468. var caption = captionTracks[index];
  469. if (typeof caption.kind === 'string' && caption.kind == 'asr') {
  470. return captionTracks[index].baseUrl;
  471. }
  472. // ASR – A caption track generated using automatic speech recognition.
  473. // https://developers.google.com/youtube/v3/docs/captions
  474. }
  475. } catch (error) {
  476. return false;
  477. }
  478. }
  479.  
  480. function get_auto_subtitle(callback) {
  481. var url = get_auto_subtitle_xml_url();
  482. get_from_url(url, callback);
  483. }
  484.  
  485. function get_closed_subtitle(lang_code, callback) {
  486. var url = 'https://video.google.com/timedtext?hl=' + lang_code + '&lang=' + lang_code + '&v=' + get_video_id();
  487. // example: https://video.google.com/timedtext?hl=en&lang=en&v=FWuwq8HTLQo
  488. console.log('地址是:' + url);
  489. get_from_url(url, callback);
  490. }
  491.  
  492. function get_from_url(url, callback) {
  493. $.ajax({
  494. url: url,
  495. type: 'get',
  496. success: function (r) {
  497. callback(r);
  498. },
  499. fail: function (error) {
  500. callback(false);
  501. }
  502. });
  503. }
  504.  
  505. // Youtube return XML. we want SRT
  506. // input: Youtube XML format
  507. // output: SRT format
  508. function parse_youtube_XML_to_SRT(youtube_xml_string) {
  509. if (youtube_xml_string === '') {
  510. return false;
  511. }
  512. var text = youtube_xml_string.getElementsByTagName('text');
  513. var result = '';
  514. var BOM = '\uFEFF';
  515. result = BOM + result; // store final SRT result
  516. var len = text.length;
  517. for (var i = 0; i < len; i++) {
  518. var index = i + 1;
  519. var content = text[i].textContent.toString();
  520. content = content.replace(/(<([^>]+)>)/ig, ""); // remove all html tag.
  521. var start = text[i].getAttribute('start');
  522. var end = '';
  523.  
  524. if (i + 1 >= len) {
  525. end = parseFloat(text[i].getAttribute('start')) + parseFloat(text[i].getAttribute('dur'));
  526. } else {
  527. end = text[i + 1].getAttribute('start');
  528. }
  529.  
  530. // we want SRT format:
  531. /*
  532. 1
  533. 00:00:01,939 --> 00:00:04,350
  534. everybody Craig Adams here I'm a
  535.  
  536. 2
  537. 00:00:04,350 --> 00:00:06,720
  538. filmmaker on YouTube who's digging
  539. */
  540. var new_line = "\n";
  541. // var new_line = "%0D%0A";
  542. result = result + index + new_line;
  543. // 1
  544.  
  545. var start_time = process_time(parseFloat(start));
  546. var end_time = process_time(parseFloat(end));
  547. result = result + start_time;
  548. result = result + ' --> ';
  549. result = result + end_time + new_line;
  550. // 00:00:01,939 --> 00:00:04,350
  551.  
  552. result = result + content + new_line + new_line;
  553. // everybody Craig Adams here I'm a
  554. }
  555. return result;
  556. }
  557.  
  558. // return "English (auto-generated)" or a default name;
  559. function get_auto_subtitle_name(){
  560. try {
  561. var raw_string = ytplayer.config.args.player_response;
  562. var json = JSON.parse(raw_string);
  563.  
  564. if (typeof json.captions !== "undefined") {
  565. var captionTracks = json.captions.playerCaptionsTracklistRenderer.captionTracks;
  566. for (var index in captionTracks) {
  567. var caption = captionTracks[index];
  568. if (typeof caption.kind === 'string' && caption.kind == 'asr') {
  569. return captionTracks[index].name.simpleText;
  570. }
  571. }
  572. }
  573. } catch (error) {
  574. return 'Auto Subtitle';
  575. }
  576. }