Youtube 翻译中文字幕下载 v14

Youtube 播放器右下角有个 Auto-tranlsate,可以把视频字幕翻成中文。这个脚本是下载这个中文字幕

  1. // ==UserScript==
  2. // @name Youtube 翻译中文字幕下载 v14
  3. // @description Youtube 播放器右下角有个 Auto-tranlsate,可以把视频字幕翻成中文。这个脚本是下载这个中文字幕
  4. // @include https://*youtube.com/*
  5. // @author Cheng Zheng
  6. // @license MIT
  7. // @require https://code.jquery.com/jquery-1.12.4.min.js
  8. // @version 14
  9. // @grant GM_xmlhttpRequest
  10. // @grant unsafeWindow
  11. // @namespace https://greasyfork.org/users/5711
  12. // ==/UserScript==
  13.  
  14. /*
  15. 作者: 郑诚
  16. 邮箱: guokrfans@gmail.com
  17. Github: https://github.com/1c7/Youtube-Auto-Subtitle-Download
  18.  
  19. 测试视频:
  20. https://www.youtube.com/watch?v=nGlQkaoIfBI 1门语言
  21. https://www.youtube.com/watch?v=O5nskjZ_GoI 13门语言
  22. https://www.youtube.com/watch?v=VfEz3DIbkvo 测试自动字幕(西班牙语)
  23. https://www.youtube.com/watch?v=WSnKbcfsT1E
  24.  
  25. 更新日志:
  26. 2022-12-23 v13 -> v14
  27. 修复按钮不出现的问题。
  28. */
  29.  
  30. ;
  31. (function () {
  32.  
  33. // 配置项
  34. const NO_SUBTITLE = '无字幕';
  35. const HAVE_SUBTITLE = '下载翻译的中文字幕';
  36. const TEXT_LOADING = '载入中...';
  37. const BUTTON_ID = 'youtube-translate-to-chinese-subtitle-downloader-by-1c7'
  38. const anchor_element = "#above-the-fold #title";
  39. // 配置项
  40.  
  41. var HASH_BUTTON_ID = `#${BUTTON_ID}`
  42. var first_load = true;
  43.  
  44. // trigger when loading new page (actually this would also trigger when first loading, that's not what we want, that's why we need to use firsr_load === false)
  45. // (new Material design version would trigger this "yt-navigate-finish" event. old version would not.)
  46. var body = document.getElementsByTagName("body")[0];
  47. body.addEventListener("yt-navigate-finish", function (event) {
  48. if (first_load === false) {
  49. remove_subtitle_download_button();
  50. init();
  51. }
  52. });
  53.  
  54. // trigger when loading new page
  55. // (old version would trigger this "spfdone" event. new Material design version not sure yet.)
  56. window.addEventListener("spfdone", function (e) {
  57. if (current_page_is_video_page()) {
  58. remove_subtitle_download_button();
  59. init();
  60. }
  61.  
  62. });
  63.  
  64. // return true / false
  65. function current_page_is_video_page() {
  66. return get_video_id() !== null;
  67. }
  68.  
  69. // return string like "RW1ChiWyiZQ", from "https://www.youtube.com/watch?v=RW1ChiWyiZQ"
  70. // or null
  71. function get_video_id() {
  72. return getURLParameter('v');
  73. }
  74.  
  75. //https://stackoverflow.com/questions/11582512/how-to-get-url-parameters-with-javascript/11582513#11582513
  76. function getURLParameter(name) {
  77. return decodeURIComponent((new RegExp('[?|&]' + name + '=' + '([^&;]+?)(&|#|;|$)').exec(location.search) || [null, ''])[1].replace(/\+/g, '%20')) || null;
  78. }
  79.  
  80. function remove_subtitle_download_button() {
  81. $(HASH_BUTTON_ID).remove();
  82. }
  83.  
  84.  
  85.  
  86. function inject_our_script() {
  87. var div = document.createElement('div'),
  88. select = document.createElement('select'),
  89. option = document.createElement('option');
  90.  
  91. div.setAttribute('style', `display: table;
  92. margin-top:4px;
  93. border: 1px solid rgb(0, 183, 90);
  94. cursor: pointer; color: rgb(255, 255, 255);
  95. border-top-left-radius: 3px;
  96. border-top-right-radius: 3px;
  97. border-bottom-right-radius: 3px;
  98. border-bottom-left-radius: 3px;
  99. background-color: #00B75A;
  100. padding: 3px;
  101. padding-right: 8px;
  102. `);
  103.  
  104. div.id = BUTTON_ID;
  105.  
  106. select.id = 'captions_selector';
  107. select.disabled = true;
  108. select.setAttribute('style', 'display:block; border: 1px solid rgb(0, 183, 90); cursor: pointer; color: rgb(255, 255, 255); background-color: #00B75A;');
  109.  
  110. option.textContent = TEXT_LOADING;
  111. option.selected = true;
  112. select.appendChild(option);
  113.  
  114. // 下拉菜单中选择后的事件侦听
  115. select.addEventListener('change', function () {
  116. download_subtitle(this);
  117. }, false);
  118.  
  119. div.appendChild(select);
  120. // put <select> into <div>
  121.  
  122. // put the div into page: new material design
  123. var title_element = document.querySelectorAll('.title.style-scope.ytd-video-primary-info-renderer');
  124. if (title_element) {
  125. $(title_element[0]).after(div);
  126. }
  127.  
  128. // 把按钮加到页面上。
  129. document.querySelector(anchor_element).appendChild(div)
  130.  
  131. load_language_list(select);
  132.  
  133. // <a> element is for download
  134. var a = document.createElement('a');
  135. a.style.cssText = 'display:none;';
  136. a.setAttribute("id", "ForSubtitleDownload");
  137. var body = document.getElementsByTagName('body')[0];
  138. body.appendChild(a);
  139. }
  140.  
  141. // Trigger when user select <option>
  142. async function download_subtitle(selector) {
  143. // if user select first <option>, we just return, do nothing.
  144. if (selector.selectedIndex == 0) {
  145. return;
  146. }
  147.  
  148. var caption = caption_array[selector.selectedIndex - 1]; // because first <option> is for display, so index-1
  149. if (!caption) return;
  150.  
  151. var lang_code = caption.lang_code;
  152. var lang_name = caption.lang_name;
  153.  
  154. // if user choose auto subtitle // 如果用户选的是自动字幕
  155. if (caption.lang_code == 'AUTO') {
  156. var file_name = get_file_name(lang_name);
  157. download_auto_subtitle(file_name);
  158. selector.options[0].selected = true; // after download, select first <option>
  159. return
  160. }
  161.  
  162. // 如果用户选的是完整字幕
  163. // 原文
  164. // sub mean "subtitle"
  165. var sub_original_url = await get_closed_subtitle_url(lang_code)
  166.  
  167. // 中文
  168. var sub_translated_url = sub_original_url + "&tlang=" + "zh-Hans"
  169. var sub_translated_xml = await get(sub_translated_url);
  170.  
  171. var sub_translated_srt = parse_youtube_XML_to_object_list(sub_translated_xml)
  172.  
  173. var srt_string = object_array_to_SRT_string(sub_translated_srt)
  174. var title = get_file_name(lang_name);
  175. downloadString(srt_string, "text/plain", title);
  176.  
  177. // after download, select first <option>
  178. selector.options[0].selected = true;
  179. }
  180.  
  181. // Return something like: "(English)How Did Python Become A Data Science Powerhouse?.srt"
  182. function get_file_name(x) {
  183. return `(${x})${get_title()}.srt`;
  184. }
  185.  
  186. // 载入有多少种语言, 然后加到 <select> 里
  187. function load_language_list(select) {
  188. // auto
  189. var auto_subtitle_exist = false;
  190.  
  191. // closed
  192. var closed_subtitle_exist = false;
  193.  
  194. // get auto subtitle
  195. var auto_subtitle_url = get_auto_subtitle_xml_url();
  196. if (auto_subtitle_url != false) {
  197. auto_subtitle_exist = true;
  198. }
  199.  
  200. var captionTracks = get_captionTracks()
  201. if (captionTracks != undefined && typeof captionTracks === 'object' && captionTracks.length > 0) {
  202. closed_subtitle_exist = true;
  203. }
  204.  
  205. // if no subtitle at all, just say no and stop
  206. if (auto_subtitle_exist == false && closed_subtitle_exist == false) {
  207. select.options[0].textContent = NO_SUBTITLE;
  208. disable_download_button();
  209. return false;
  210. }
  211.  
  212. // if at least one type of subtitle exist
  213. select.options[0].textContent = HAVE_SUBTITLE;
  214. select.disabled = false;
  215.  
  216. // if at least one type of subtitle exist
  217. select.options[0].textContent = HAVE_SUBTITLE;
  218. select.disabled = false;
  219.  
  220. var caption = null; // for inside loop
  221. var option = null; // for <option>
  222. var caption_info = null; // for our custom object
  223.  
  224. // 自动字幕
  225. if (auto_subtitle_exist) {
  226. var auto_sub_name = get_auto_subtitle_name()
  227. var lang_name = `${auto_sub_name} 翻译成 中文`
  228. caption_info = {
  229. lang_code: 'AUTO', // later we use this to know if it's auto subtitle
  230. lang_name: lang_name // for display only
  231. };
  232. caption_array.push(caption_info);
  233.  
  234. option = document.createElement('option');
  235. option.textContent = caption_info.lang_name;
  236. select.appendChild(option);
  237. }
  238.  
  239. // if closed_subtitle_exist
  240. if (closed_subtitle_exist) {
  241. for (var i = 0, il = captionTracks.length; i < il; i++) {
  242. var caption = captionTracks[i];
  243. if (caption.kind == 'asr') {
  244. continue
  245. }
  246. let lang_code = caption.languageCode
  247. let lang_translated = caption.name.simpleText
  248. var lang_name = `${lang_code_to_local_name(lang_code, lang_translated)} 翻译成 中文`
  249. caption_info = {
  250. lang_code: lang_code, // for AJAX request
  251. lang_name: lang_name, // display to user
  252. };
  253. caption_array.push(caption_info);
  254. // 注意这里是加到 caption_array, 一个全局变量, 待会要靠它来下载
  255. option = document.createElement('option');
  256. option.textContent = caption_info.lang_name;
  257. select.appendChild(option);
  258. }
  259. }
  260. }
  261.  
  262. // 处理时间. 比如 start="671.33" start="37.64" start="12" start="23.029"
  263. // 处理成 srt 时间, 比如 00:00:00,090 00:00:08,460 00:10:29,350
  264. function process_time(s) {
  265. s = s.toFixed(3);
  266. // 超棒的函数, 不论是整数还是小数都给弄成3位小数形式
  267. // 举个柚子:
  268. // 671.33 -> 671.330
  269. // 671 -> 671.000
  270. // 注意函数会四舍五入. 具体读文档
  271.  
  272. var array = s.split('.');
  273. // 把开始时间根据句号分割
  274. // 671.330 会分割成数组: [671, 330]
  275.  
  276. var Hour = 0;
  277. var Minute = 0;
  278. var Second = array[0]; // 671
  279. var MilliSecond = array[1]; // 330
  280. // 先声明下变量, 待会把这几个拼好就行了
  281.  
  282. // 我们来处理秒数. 把"分钟"和"小时"除出来
  283. if (Second >= 60) {
  284. Minute = Math.floor(Second / 60);
  285. Second = Second - Minute * 60;
  286. // 把 秒 拆成 分钟和秒, 比如121秒, 拆成2分钟1秒
  287.  
  288. Hour = Math.floor(Minute / 60);
  289. Minute = Minute - Hour * 60;
  290. // 把 分钟 拆成 小时和分钟, 比如700分钟, 拆成11小时40分钟
  291. }
  292. // 分钟,如果位数不够两位就变成两位,下面两个if语句的作用也是一样。
  293. if (Minute < 10) {
  294. Minute = '0' + Minute;
  295. }
  296. // 小时
  297. if (Hour < 10) {
  298. Hour = '0' + Hour;
  299. }
  300. // 秒
  301. if (Second < 10) {
  302. Second = '0' + Second;
  303. }
  304. return Hour + ':' + Minute + ':' + Second + ',' + MilliSecond;
  305. }
  306.  
  307. // https://css-tricks.com/snippets/javascript/unescape-html-in-js/
  308. // turn HTML entity back to text, example: &quot; should be "
  309. function htmlDecode(input) {
  310. var e = document.createElement('div');
  311. e.class = 'dummy-element-for-tampermonkey-Youtube-Subtitle-Downloader-script-to-decode-html-entity';
  312. e.innerHTML = input;
  313. return e.childNodes.length === 0 ? "" : e.childNodes[0].nodeValue;
  314. }
  315.  
  316. // return URL or null;
  317. // later we can send a AJAX and get XML subtitle
  318. function get_auto_subtitle_xml_url() {
  319. try {
  320. var captionTracks = get_captionTracks()
  321. for (var index in captionTracks) {
  322. var caption = captionTracks[index];
  323. if (typeof caption.kind === 'string' && caption.kind == 'asr') {
  324. return captionTracks[index].baseUrl;
  325. }
  326. // ASR – A caption track generated using automatic speech recognition.
  327. // https://developers.google.com/youtube/v3/docs/captions
  328. }
  329. return false;
  330. } catch (error) {
  331. return false;
  332. }
  333. }
  334.  
  335. function disable_download_button() {
  336. $(HASH_BUTTON_ID)
  337. .css('border', '#95a5a6')
  338. .css('cursor', 'not-allowed')
  339. .css('background-color', '#95a5a6');
  340. $('#captions_selector')
  341. .css('border', '#95a5a6')
  342. .css('cursor', 'not-allowed')
  343. .css('background-color', '#95a5a6');
  344.  
  345. if (new_material_design_version()) {
  346. $(HASH_BUTTON_ID).css('padding', '6px');
  347. } else {
  348. $(HASH_BUTTON_ID).css('padding', '5px');
  349. }
  350. }
  351.  
  352. // 下载自动字幕的中英双语
  353. // 输入: file_name: 保存的文件名
  354. // 输出: 无 (会触发浏览器下载一个文件)
  355. async function download_auto_subtitle(file_name) {
  356. var auto_sub_url = get_auto_subtitle_xml_url();
  357. var format_json3_url = auto_sub_url + '&fmt=json3'
  358. var cn_url = format_json3_url + '&tlang=zh-Hans'
  359.  
  360. var cn_srt = await auto_sub_in_chinese_fmt_json3_to_srt(cn_url)
  361. var srt_string = to_srt(cn_srt)
  362.  
  363. downloadString(srt_string, "text/plain", file_name);
  364. }
  365.  
  366. function to_srt(srt_array) {
  367. // var srt_array_item_example = {
  368. // "startTime": "00:00:06,640",
  369. // "endTime": "00:00:09,760",
  370. // "text": "在与朋友的长时间交谈中以及与陌生人的简短交谈中",
  371. // "tStartMs": 6640,
  372. // "dDurationMs": 3120,
  373. // "words": ["in", " a", " long", " conversation", " with", " a", " friend", " and", "a", " short", " chat", " with", " a", " stranger", "the", " endless", " streams"]
  374. // }
  375. var result_array = []
  376. for (let i = 0; i < srt_array.length; i++) {
  377. const line = srt_array[i];
  378. var text = line.text; // 中文
  379. var item = {
  380. startTime: line.startTime,
  381. endTime: line.endTime,
  382. text: text
  383. }
  384. result_array.push(item)
  385. }
  386.  
  387. var srt_string = object_array_to_SRT_string(result_array)
  388. return srt_string
  389. }
  390.  
  391. // return "English (auto-generated)" or a default name;
  392. function get_auto_subtitle_name() {
  393. const name = "自动字幕"
  394. try {
  395. var captionTracks = get_captionTracks()
  396. for (var index in captionTracks) {
  397. var caption = captionTracks[index];
  398. if (typeof caption.kind === 'string' && caption.kind == 'asr') {
  399. return captionTracks[index].name.simpleText;
  400. }
  401. }
  402. return name;
  403. } catch (error) {
  404. console.log(error);
  405. return name;
  406. }
  407. }
  408.  
  409. // Usage: var result = await get(url)
  410. function get(url) {
  411. return $.ajax({
  412. url: url,
  413. type: 'get',
  414. success: function (r) {
  415. return r
  416. },
  417. fail: function (error) {
  418. return error
  419. }
  420. });
  421. }
  422.  
  423.  
  424. // 输入: url (String)
  425. // 输出: SRT (Array)
  426. async function auto_sub_in_chinese_fmt_json3_to_srt(url) {
  427. var srt_array = []
  428.  
  429. var json = await get(url);
  430. var events = json.events;
  431. for (let index = 0; index < events.length; index++) {
  432. const event = events[index];
  433.  
  434. if (event.segs === undefined) {
  435. continue
  436. }
  437. if (event.segs.length === 1 && event.segs[0].utf8 === '\n') {
  438. continue
  439. }
  440.  
  441. var tStartMs = event.tStartMs
  442. var dDurationMs = event.dDurationMs
  443. var segs = event.segs
  444. var text = segs.map(seg => seg.utf8).join("")
  445.  
  446. var item = {
  447. startTime: ms_to_srt(tStartMs),
  448. endTime: ms_to_srt(tStartMs + dDurationMs),
  449. text: text,
  450.  
  451. tStartMs: tStartMs,
  452. dDurationMs: dDurationMs,
  453. }
  454. srt_array.push(item);
  455. }
  456. return srt_array
  457. }
  458.  
  459. // 毫秒转成 srt 时间
  460. function ms_to_srt($milliseconds) {
  461. var $seconds = Math.floor($milliseconds / 1000);
  462. var $minutes = Math.floor($seconds / 60);
  463. var $hours = Math.floor($minutes / 60);
  464. var $milliseconds = $milliseconds % 1000;
  465. var $seconds = $seconds % 60;
  466. var $minutes = $minutes % 60;
  467. return ($hours < 10 ? '0' : '') + $hours + ':' +
  468. ($minutes < 10 ? '0' : '') + $minutes + ':' +
  469. ($seconds < 10 ? '0' : '') + $seconds + ',' +
  470. ($milliseconds < 100 ? '0' : '') + ($milliseconds < 10 ? '0' : '') + $milliseconds;
  471. }
  472.  
  473. /*
  474. Input: [ {startTime: "", endTime: "", text: ""}, {...}, {...} ]
  475. Output: SRT
  476. */
  477. function object_array_to_SRT_string(object_array) {
  478. var result = '';
  479. var BOM = '\uFEFF';
  480. result = BOM + result; // store final SRT result
  481.  
  482. for (var i = 0; i < object_array.length; i++) {
  483. var item = object_array[i]
  484. var index = i + 1;
  485. var start_time = item.startTime
  486. var end_time = item.endTime
  487. var text = item.text
  488.  
  489. var new_line = "\n";
  490. result = result + index + new_line;
  491.  
  492. result = result + start_time;
  493. result = result + ' --> ';
  494. result = result + end_time + new_line;
  495.  
  496. result = result + text + new_line + new_line;
  497. }
  498.  
  499. return result;
  500. }
  501.  
  502. // Copy from: https://gist.github.com/danallison/3ec9d5314788b337b682
  503. // Thanks! https://github.com/danallison
  504. // Work in Chrome 66
  505. // Test passed: 2018-5-19
  506. function downloadString(text, fileType, fileName) {
  507. var blob = new Blob([text], {
  508. type: fileType
  509. });
  510. var a = document.createElement('a');
  511. a.download = fileName;
  512. a.href = URL.createObjectURL(blob);
  513. a.dataset.downloadurl = [fileType, a.download, a.href].join(':');
  514. a.style.display = "none";
  515. document.body.appendChild(a);
  516. a.click();
  517. document.body.removeChild(a);
  518. setTimeout(function () {
  519. URL.revokeObjectURL(a.href);
  520. }, 1500);
  521. }
  522.  
  523. // Input: lang_code like 'en'
  524. // Output: URL (String)
  525. async function get_closed_subtitle_url(lang_code) {
  526. try {
  527. var captionTracks = get_captionTracks()
  528. for (var index in captionTracks) {
  529. var caption = captionTracks[index];
  530. if (caption.languageCode === lang_code && caption.kind != 'asr') {
  531. var url = captionTracks[index].baseUrl;
  532. return url
  533. }
  534. }
  535. } catch (error) {
  536. console.log(error);
  537. return false;
  538. }
  539. }
  540.  
  541. // Input: XML (provide by Youtube)
  542. // Output: Array of object
  543. // each object look like:
  544. /*
  545. {
  546. startTime: "",
  547. endTime: "",
  548. text: ""
  549. }
  550. */
  551. // it's intermediate representation for SRT
  552. function parse_youtube_XML_to_object_list(youtube_xml_string) {
  553. if (youtube_xml_string === '' || youtube_xml_string === undefined || youtube_xml_string === null) {
  554. return false;
  555. }
  556. var result_array = []
  557. var text_nodes = youtube_xml_string.getElementsByTagName('text');
  558. var len = text_nodes.length;
  559. for (var i = 0; i < len; i++) {
  560. var text = text_nodes[i].textContent.toString();
  561. text = text.replace(/(<([^>]+)>)/ig, ""); // remove all html tag.
  562. text = htmlDecode(text);
  563.  
  564. var start = text_nodes[i].getAttribute('start');
  565. var end = parseFloat(text_nodes[i].getAttribute('start')) + parseFloat(text_nodes[i].getAttribute('dur'));
  566.  
  567. // if (i + 1 >= len) {
  568. // end = parseFloat(text_nodes[i].getAttribute('start')) + parseFloat(text_nodes[i].getAttribute('dur'));
  569. // } else {
  570. // end = text_nodes[i + 1].getAttribute('start');
  571. // }
  572.  
  573. var start_time = process_time(parseFloat(start));
  574. var end_time = process_time(parseFloat(end));
  575.  
  576. var item = {
  577. startTime: start_time,
  578. endTime: end_time,
  579. text: text
  580. }
  581. result_array.push(item)
  582. }
  583.  
  584. return result_array
  585. }
  586.  
  587. function get_youtube_data() {
  588. return document.getElementsByTagName("ytd-app")[0].data.playerResponse
  589. }
  590.  
  591. function get_captionTracks() {
  592. let data = get_youtube_data();
  593. var captionTracks = data?.captions?.playerCaptionsTracklistRenderer?.captionTracks
  594. return captionTracks
  595. }
  596.  
  597. // Input a language code, output that language name in current locale
  598. // 如果当前语言是中文简体, Input: "de" Output: 德语
  599. // if current locale is English(US), Input: "de" Output: "Germany"
  600. function lang_code_to_local_name(languageCode, fallback_name) {
  601. try {
  602. var captionTracks = get_captionTracks()
  603. for (var i in captionTracks) {
  604. var caption = captionTracks[i];
  605. if (caption.languageCode === languageCode) {
  606. let simpleText = captionTracks[i].name.simpleText;
  607. if (simpleText) {
  608. return simpleText
  609. } else {
  610. return fallback_name
  611. }
  612. }
  613. }
  614. } catch (error) {
  615. return fallback_name
  616. }
  617. }
  618.  
  619. function get_title() {
  620. return ytplayer.config.args.title;
  621. }
  622.  
  623. const wait = (ms) => new Promise((resolve) => setTimeout(resolve, ms))
  624.  
  625. // 等待一个元素存在
  626. // https://stackoverflow.com/questions/5525071/how-to-wait-until-an-element-exists
  627. function waitForElm(selector) {
  628. return new Promise((resolve) => {
  629. if (document.querySelector(selector)) {
  630. return resolve(document.querySelector(selector))
  631. }
  632.  
  633. const observer = new MutationObserver((mutations) => {
  634. if (document.querySelector(selector)) {
  635. resolve(document.querySelector(selector))
  636. observer.disconnect()
  637. }
  638. })
  639.  
  640. observer.observe(document.body, {
  641. childList: true,
  642. subtree: true,
  643. })
  644. })
  645. }
  646.  
  647. function init() {
  648. console.log('进入 init');
  649. unsafeWindow.caption_array = [];
  650. inject_our_script();
  651. first_load = false;
  652. }
  653.  
  654. async function main() {
  655. console.log('进入 main');
  656. await waitForElm(anchor_element)
  657. init()
  658. }
  659.  
  660. setTimeout(main, 2000);
  661. })();