Youtube Subtitle Downloader v25

Download Subtitles

2020-12-03 일자. 최신 버전을 확인하세요.

질문, 리뷰하거나, 이 스크립트를 신고하세요.
  1. // ==UserScript==
  2. // @name Youtube Subtitle Downloader v25
  3. // @include https://*youtube.com/*
  4. // @author Cheng Zheng
  5. // @copyright 2009 Tim Smart; 2011 gw111zz; 2014~2021 Cheng Zheng;
  6. // @license GNU GPL v3.0 or later. http://www.gnu.org/copyleft/gpl.html
  7. // @require http://ajax.googleapis.com/ajax/libs/jquery/1.9.1/jquery.min.js
  8. // @version 25
  9. // @grant GM_xmlhttpRequest
  10. // @namespace https://greasyfork.org/users/5711
  11. // @description Download Subtitles
  12. // ==/UserScript==
  13.  
  14. /*
  15. [What is this?]
  16. This "Tampermonkey script" allow you download Youtube "Automatic subtitle" and "closed subtitle"
  17.  
  18. [Note]
  19. If it doesn't work(rarely), try refresh.
  20. if problem still exist. Email: guokrfans@gmail.com
  21.  
  22. [Who build this]
  23. Author : Cheng Zheng
  24. Email : guokrfans@gmail.com
  25. Github : https://github.com/1c7/Youtube-Auto-Subtitle-Download
  26. If you want improve the script, Github Pull Request are welcome
  27.  
  28. [Note]
  29. Few things before you read the code:
  30. 0. Some code comments are written in Chinese
  31. 1. Youtube have 2 UI: Material design and The old design
  32. 2. Code need handle both Auto & Closed subtitle
  33.  
  34. (Explain: "Tampermonkey script" mean
  35. you have to install a Chrome extension call "Tampermonkey", and then install this script)
  36.  
  37. [Test Video]
  38. https://www.youtube.com/watch?v=bkVsus8Ehxs
  39. only have English closed subtitle, nothing else (no auto subtitle)
  40.  
  41. https://www.youtube.com/watch?v=-WEqFzyrbbs
  42. no subtitle at all
  43.  
  44. https://www.youtube.com/watch?v=9AzNEG1GB-k
  45. have a lot subtitle
  46.  
  47. https://www.youtube.com/watch?v=tqGkOvrKGfY
  48. 1:36:33 super long subtitle
  49.  
  50. [Code Explain]
  51. Three part
  52. 1. UI specific (add button on page)
  53. 2. Detect if subtitle exists
  54. 3. Transform subtitle format & download
  55. */
  56.  
  57. (function () {
  58.  
  59. // Config
  60. var NO_SUBTITLE = 'No Subtitle';
  61. var HAVE_SUBTITLE = 'Download Subtitles';
  62. var TEXT_LOADING = 'Loading...';
  63. const BUTTON_ID = 'youtube-subtitle-downloader-by-1c7-last-update-2020-12-3'
  64. // Config
  65.  
  66. var HASH_BUTTON_ID = `#${BUTTON_ID}`
  67.  
  68. // initialize
  69. var first_load = true; // indicate if first load this webpage or not
  70. var youtube_playerResponse_1c7 = null; // for auto subtitle
  71. unsafeWindow.caption_array = []; // store all subtitle
  72.  
  73. // trigger when first load
  74. $(document).ready(function () {
  75. start();
  76. });
  77.  
  78. // Explain this function: we repeatly try if certain HTML element exist,
  79. // if it does, we call init()
  80. // if it doesn't, stop trying after certain time
  81. function start() {
  82. var retry_count = 0;
  83. var RETRY_LIMIT = 30;
  84. // use "setInterval" is because "$(document).ready()" still not enough, still too early
  85. // 330 work for me.
  86. if (new_material_design_version()) {
  87. var material_checkExist = setInterval(function () {
  88. if (document.querySelectorAll('.title.style-scope.ytd-video-primary-info-renderer').length) {
  89. init();
  90. clearInterval(material_checkExist);
  91. }
  92. retry_count = retry_count + 1;
  93. if (retry_count > RETRY_LIMIT) {
  94. clearInterval(material_checkExist);
  95. }
  96. }, 330);
  97. } else {
  98. var checkExist = setInterval(function () {
  99. if ($('#watch7-headline').length) {
  100. init();
  101. clearInterval(checkExist);
  102. }
  103. retry_count = retry_count + 1;
  104. if (retry_count > RETRY_LIMIT) {
  105. clearInterval(checkExist);
  106. }
  107. }, 330);
  108. }
  109. }
  110.  
  111. // trigger when loading new page
  112. // (actually this would also trigger when first loading, that's not what we want, that's why we need to use firsr_load === false)
  113. // (new Material design version would trigger this "yt-navigate-finish" event. old version would not.)
  114. var body = document.getElementsByTagName("body")[0];
  115. body.addEventListener("yt-navigate-finish", function (event) {
  116. if (current_page_is_video_page() === false) {
  117. return;
  118. }
  119. youtube_playerResponse_1c7 = event.detail.response.playerResponse; // for auto subtitle
  120. unsafeWindow.caption_array = []; // clean up (important, otherwise would have more and more item and cause error)
  121.  
  122. // if use click to another page, init again to get correct subtitle
  123. if (first_load === false) {
  124. remove_subtitle_download_button();
  125. init();
  126. }
  127. });
  128.  
  129. // trigger when loading new page
  130. // (old version would trigger "spfdone" event. new Material design version not sure yet.)
  131. window.addEventListener("spfdone", function (e) {
  132. if (current_page_is_video_page()) {
  133. remove_subtitle_download_button();
  134. var checkExist = setInterval(function () {
  135. if ($('#watch7-headline').length) {
  136. init();
  137. clearInterval(checkExist);
  138. }
  139. }, 330);
  140. }
  141. });
  142.  
  143. // return true / false
  144. // Detect [new version UI(material design)] OR [old version UI]
  145. // I tested this, accurated.
  146. function new_material_design_version() {
  147. var old_title_element = document.getElementById('watch7-headline');
  148. if (old_title_element) {
  149. return false;
  150. } else {
  151. return true;
  152. }
  153. }
  154.  
  155. // return true / false
  156. function current_page_is_video_page() {
  157. return get_url_video_id() !== null;
  158. }
  159.  
  160. // return string like "RW1ChiWyiZQ", from "https://www.youtube.com/watch?v=RW1ChiWyiZQ"
  161. // or null
  162. function get_url_video_id() {
  163. return getURLParameter('v');
  164. }
  165.  
  166. //https://stackoverflow.com/questions/11582512/how-to-get-url-parameters-with-javascript/11582513#11582513
  167. function getURLParameter(name) {
  168. return decodeURIComponent((new RegExp('[?|&]' + name + '=' + '([^&;]+?)(&|#|;|$)').exec(location.search) || [null, ''])[1].replace(/\+/g, '%20')) || null;
  169. }
  170.  
  171. function remove_subtitle_download_button() {
  172. $(HASH_BUTTON_ID).remove();
  173. }
  174.  
  175. function init() {
  176. inject_our_script();
  177. first_load = false;
  178. }
  179.  
  180. function inject_our_script() {
  181. var div = document.createElement('div'),
  182. select = document.createElement('select'),
  183. option = document.createElement('option'),
  184. controls = document.getElementById('watch7-headline'); // Youtube video title DIV
  185.  
  186. var css_div = `display: table;
  187. margin-top:4px;
  188. border: 1px solid rgb(0, 183, 90);
  189. cursor: pointer; color: rgb(255, 255, 255);
  190. border-top-left-radius: 3px;
  191. border-top-right-radius: 3px;
  192. border-bottom-right-radius: 3px;
  193. border-bottom-left-radius: 3px;
  194. background-color: #00B75A;
  195. `;
  196. div.setAttribute('style', css_div);
  197.  
  198. div.id = BUTTON_ID;
  199. div.title = 'Youtube Subtitle Download v16'; // display when cursor hover
  200.  
  201. select.id = 'captions_selector';
  202. select.disabled = true;
  203. let css_select = `display:block;
  204. border: 1px solid rgb(0, 183, 90);
  205. cursor: pointer;
  206. color: rgb(255, 255, 255);
  207. background-color: #00B75A;
  208. padding: 4px;
  209. `;
  210. select.setAttribute('style', css_select);
  211.  
  212. option.textContent = TEXT_LOADING;
  213. option.selected = true;
  214. select.appendChild(option);
  215.  
  216. // 下拉菜单里,选择一项后触发下载
  217. select.addEventListener('change', function () {
  218. download_subtitle(this);
  219. }, false);
  220.  
  221. div.appendChild(select); // put <select> into <div>
  222.  
  223. // put the div into page: new material design
  224. var title_element = document.querySelectorAll('.title.style-scope.ytd-video-primary-info-renderer');
  225. if (title_element) {
  226. $(title_element[0]).after(div);
  227. }
  228. // put the div into page: old version
  229. if (controls) {
  230. controls.appendChild(div);
  231. }
  232.  
  233. load_language_list(select);
  234.  
  235. // <a> element is for download
  236. var a = document.createElement('a');
  237. a.style.cssText = 'display:none;';
  238. a.setAttribute("id", "ForSubtitleDownload");
  239. var body = document.getElementsByTagName('body')[0];
  240. body.appendChild(a);
  241. }
  242.  
  243. // trigger when user select <option>
  244. async function download_subtitle(selector) {
  245. // if user select first <option>, we just return, do nothing.
  246. if (selector.selectedIndex == 0) {
  247. return;
  248. }
  249.  
  250. var caption = caption_array[selector.selectedIndex - 1];
  251. // because first <option> is for display, so index - 1
  252.  
  253. var result = null;
  254. var filename = null; // 保存文件名
  255.  
  256. // if user choose auto subtitle
  257. if (caption.lang_code == 'AUTO') {
  258. result = await get_auto_subtitle();
  259. filename = get_file_name(get_auto_subtitle_name());
  260. } else {
  261. // closed subtitle
  262. let lang_code = caption.lang_code;
  263. let lang_name = caption.lang_name;
  264. result = await get_closed_subtitle(lang_code);
  265. filename = get_file_name(lang_name);
  266. }
  267.  
  268. let srt = parse_youtube_XML_to_SRT(result);
  269. downloadString(srt, "text/plain", filename);
  270.  
  271. // After download, select first <option>
  272. selector.options[0].selected = true;
  273. }
  274.  
  275.  
  276. // Return something like: "(English)How Did Python Become A Data Science Powerhouse?.srt"
  277. function get_file_name(x) {
  278. // var method_1 = '(' + x + ')' + document.title + '.srt'; // 如果有通知数,文件名也会带上,比较烦,这种方式不好
  279. // var method_2 = '(' + x + ')' + get_title() + '.srt';
  280. var method_3 = `(${x})${get_title()}_video_id_${get_video_id()}.srt`;
  281. return method_3
  282. }
  283.  
  284. // detect if "auto subtitle" and "closed subtitle" exist
  285. // and add <option> into <select>
  286. function load_language_list(select) {
  287. // auto
  288. var auto_subtitle_exist = false;
  289.  
  290. // closed
  291. var closed_subtitle_exist = false;
  292. var captions = null;
  293.  
  294. // get auto subtitle
  295. var auto_subtitle_url = get_auto_subtitle_xml_url();
  296. if (auto_subtitle_url != false) {
  297. auto_subtitle_exist = true;
  298. }
  299.  
  300. // get closed subtitle
  301. var list_url = 'https://video.google.com/timedtext?hl=en&v=' + get_url_video_id() + '&type=list';
  302. // Example: https://video.google.com/timedtext?hl=en&v=if36bqHypqk&type=list
  303. GM_xmlhttpRequest({
  304. method: 'GET',
  305. url: list_url,
  306. onload: function (xhr) {
  307. captions = new DOMParser().parseFromString(xhr.responseText, "text/xml").getElementsByTagName('track');
  308. if (captions.length != 0) {
  309. closed_subtitle_exist = true;
  310. }
  311.  
  312. // if no subtitle at all, just say no and stop
  313. if (auto_subtitle_exist == false && closed_subtitle_exist == false) {
  314. select.options[0].textContent = NO_SUBTITLE;
  315. disable_download_button();
  316. return false;
  317. }
  318.  
  319. // if at least one type of subtitle exist
  320. select.options[0].textContent = HAVE_SUBTITLE;
  321. select.disabled = false;
  322.  
  323. var caption = null; // for inside loop
  324. var option = null; // for <option>
  325. var caption_info = null; // for our custom object
  326.  
  327. // if auto subtitle exist
  328. if (auto_subtitle_exist) {
  329. caption_info = {
  330. lang_code: 'AUTO', // later we use this to know if it's auto subtitle
  331. lang_name: get_auto_subtitle_name() // for display only
  332. };
  333. caption_array.push(caption_info);
  334.  
  335. option = document.createElement('option');
  336. option.textContent = caption_info.lang_name;
  337. select.appendChild(option);
  338. }
  339.  
  340. // if closed_subtitle_exist
  341. if (closed_subtitle_exist) {
  342. for (var i = 0, il = captions.length; i < il; i++) {
  343. caption = captions[i];
  344. caption_info = {
  345. lang_code: caption.getAttribute('lang_code'), // for AJAX request
  346. lang_name: caption.getAttribute('lang_translated') // for display only
  347. };
  348. caption_array.push(caption_info);
  349. // 注意这里是加到 caption_array, 一个全局变量, 待会要靠它来下载
  350. option = document.createElement('option');
  351. option.textContent = caption_info.lang_name;
  352. select.appendChild(option);
  353. }
  354. }
  355. }
  356. });
  357. }
  358.  
  359. function disable_download_button() {
  360. $(HASH_BUTTON_ID)
  361. .css('border', '#95a5a6')
  362. .css('cursor', 'not-allowed')
  363. .css('background-color', '#95a5a6');
  364. $('#captions_selector')
  365. .css('border', '#95a5a6')
  366. .css('cursor', 'not-allowed')
  367. .css('background-color', '#95a5a6');
  368.  
  369. if (new_material_design_version()) {
  370. $(HASH_BUTTON_ID).css('padding', '6px');
  371. } else {
  372. $(HASH_BUTTON_ID).css('padding', '5px');
  373. }
  374. }
  375.  
  376. // 处理时间. 比如 start="671.33" start="37.64" start="12" start="23.029"
  377. // 处理成 srt 时间, 比如 00:00:00,090 00:00:08,460 00:10:29,350
  378. function process_time(s) {
  379. s = s.toFixed(3);
  380. // 超棒的函数, 不论是整数还是小数都给弄成3位小数形式
  381. // 举个柚子:
  382. // 671.33 -> 671.330
  383. // 671 -> 671.000
  384. // 注意函数会四舍五入. 具体读文档
  385.  
  386. var array = s.split('.');
  387. // 把开始时间根据句号分割
  388. // 671.330 会分割成数组: [671, 330]
  389.  
  390. var Hour = 0;
  391. var Minute = 0;
  392. var Second = array[0]; // 671
  393. var MilliSecond = array[1]; // 330
  394. // 先声明下变量, 待会把这几个拼好就行了
  395.  
  396. // 我们来处理秒数. 把"分钟"和"小时"除出来
  397. if (Second >= 60) {
  398. Minute = Math.floor(Second / 60);
  399. Second = Second - Minute * 60;
  400. // 把 秒 拆成 分钟和秒, 比如121秒, 拆成2分钟1秒
  401.  
  402. Hour = Math.floor(Minute / 60);
  403. Minute = Minute - Hour * 60;
  404. // 把 分钟 拆成 小时和分钟, 比如700分钟, 拆成11小时40分钟
  405. }
  406. // 分钟,如果位数不够两位就变成两位,下面两个if语句的作用也是一样。
  407. if (Minute < 10) {
  408. Minute = '0' + Minute;
  409. }
  410. // 小时
  411. if (Hour < 10) {
  412. Hour = '0' + Hour;
  413. }
  414. // 秒
  415. if (Second < 10) {
  416. Second = '0' + Second;
  417. }
  418. return Hour + ':' + Minute + ':' + Second + ',' + MilliSecond;
  419. }
  420.  
  421. // copy from: https://gist.github.com/danallison/3ec9d5314788b337b682
  422. // Thanks! https://github.com/danallison
  423. // work in Chrome 66
  424. // test passed: 2018-5-19
  425. function downloadString(text, fileType, fileName) {
  426. var blob = new Blob([text], {
  427. type: fileType
  428. });
  429. var a = document.createElement('a');
  430. a.download = fileName;
  431. a.href = URL.createObjectURL(blob);
  432. a.dataset.downloadurl = [fileType, a.download, a.href].join(':');
  433. a.style.display = "none";
  434. document.body.appendChild(a);
  435. a.click();
  436. document.body.removeChild(a);
  437. setTimeout(function () {
  438. URL.revokeObjectURL(a.href);
  439. }, 1500);
  440. }
  441.  
  442. // https://css-tricks.com/snippets/javascript/unescape-html-in-js/
  443. // turn HTML entity back to text, example: &quot; should be "
  444. function htmlDecode(input) {
  445. var e = document.createElement('div');
  446. e.class = 'dummy-element-for-tampermonkey-Youtube-Subtitle-Downloader-script-to-decode-html-entity';
  447. e.innerHTML = input;
  448. return e.childNodes.length === 0 ? "" : e.childNodes[0].nodeValue;
  449. }
  450.  
  451. // return URL or null;
  452. // later we can send a AJAX and get XML subtitle
  453. function get_auto_subtitle_xml_url() {
  454. try {
  455. var captionTracks = get_captionTracks()
  456. for (var index in captionTracks) {
  457. var caption = captionTracks[index];
  458. if (caption.kind === 'asr') {
  459. return captionTracks[index].baseUrl;
  460. }
  461. // ASR – A caption track generated using automatic speech recognition.
  462. // https://developers.google.com/youtube/v3/docs/captions
  463. }
  464. return false;
  465. } catch (error) {
  466. return false;
  467. }
  468. }
  469.  
  470. async function get_auto_subtitle() {
  471. var url = get_auto_subtitle_xml_url();
  472. if (url == false) {
  473. return false;
  474. }
  475. var result = await get(url)
  476. return result
  477. }
  478.  
  479. async function get_closed_subtitle(lang_code) {
  480. try {
  481. var captionTracks = get_captionTracks()
  482. for (var index in captionTracks) {
  483. var caption = captionTracks[index];
  484. if (caption.languageCode === lang_code && caption.kind != 'asr') {
  485. // 必须写 caption.kind != 'asr'
  486. // 否则会下载2个字幕文件(也就是这个分支会进来2次)
  487. // 因为 lang_code 是 "en" 会 match 2条纪录,一条是自动字幕,一条是完整字幕
  488. // "自动字幕"那条是 kind=asr
  489. // "完整字幕"那条没有 kind 属性
  490. let url = captionTracks[index].baseUrl;
  491. let result = await get(url)
  492. return result
  493. }
  494. }
  495. return false;
  496. } catch (error) {
  497. return false;
  498. }
  499.  
  500. }
  501.  
  502. // Youtube return XML. we want SRT
  503. // input: Youtube XML format
  504. // output: SRT format
  505. function parse_youtube_XML_to_SRT(youtube_xml_string) {
  506. if (youtube_xml_string === '') {
  507. return false;
  508. }
  509. var text = youtube_xml_string.getElementsByTagName('text');
  510. var result = '';
  511. var BOM = '\uFEFF';
  512. result = BOM + result; // store final SRT result
  513. var len = text.length;
  514. for (var i = 0; i < len; i++) {
  515. var index = i + 1;
  516. var content = text[i].textContent.toString();
  517. content = content.replace(/(<([^>]+)>)/ig, ""); // remove all html tag.
  518. var start = text[i].getAttribute('start');
  519. var end = parseFloat(text[i].getAttribute('start')) + parseFloat(text[i].getAttribute('dur'));
  520.  
  521. // 保留这段代码
  522. // 如果希望字幕的结束时间和下一行的开始时间相同(连在一起)
  523. // 可以取消下面的注释
  524. // if (i + 1 >= len) {
  525. // end = parseFloat(text[i].getAttribute('start')) + parseFloat(text[i].getAttribute('dur'));
  526. // } else {
  527. // end = text[i + 1].getAttribute('start');
  528. // }
  529.  
  530. // we want SRT format:
  531. /*
  532. 1
  533. 00:00:01,939 --> 00:00:04,350
  534. everybody Craig Adams here I'm a
  535.  
  536. 2
  537. 00:00:04,350 --> 00:00:06,720
  538. filmmaker on YouTube who's digging
  539. */
  540. var new_line = "\n";
  541. result = result + index + new_line;
  542. // 1
  543.  
  544. var start_time = process_time(parseFloat(start));
  545. var end_time = process_time(parseFloat(end));
  546. result = result + start_time;
  547. result = result + ' --> ';
  548. result = result + end_time + new_line;
  549. // 00:00:01,939 --> 00:00:04,350
  550.  
  551. content = htmlDecode(content);
  552. // turn HTML entity back to text. example: &#39; back to apostrophe (')
  553.  
  554. result = result + content + new_line + new_line;
  555. // everybody Craig Adams here I'm a
  556. }
  557. return result;
  558. }
  559.  
  560. // return "English (auto-generated)" or a default name;
  561. function get_auto_subtitle_name() {
  562. try {
  563. var captionTracks = get_captionTracks();
  564. for (var index in captionTracks) {
  565. var caption = captionTracks[index];
  566. if (typeof caption.kind === 'string' && caption.kind == 'asr') {
  567. return captionTracks[index].name.simpleText;
  568. }
  569. }
  570. return 'Auto Subtitle';
  571. } catch (error) {
  572. return 'Auto Subtitle';
  573. }
  574. }
  575.  
  576. // return player_response
  577. // or return null
  578. function get_json() {
  579. try {
  580. var json = null
  581. if (typeof youtube_playerResponse_1c7 !== "undefined" && youtube_playerResponse_1c7 !== null && youtube_playerResponse_1c7 !== '') {
  582. json = youtube_playerResponse_1c7;
  583. }
  584. if (ytplayer.config.args.player_response) {
  585. let raw_string = ytplayer.config.args.player_response;
  586. json = JSON.parse(raw_string);
  587. }
  588. if (ytplayer.config.args.raw_player_response) {
  589. json = ytplayer.config.args.raw_player_response;
  590. }
  591. return json
  592. } catch (error) {
  593. return null
  594. }
  595. }
  596.  
  597. function get_captionTracks() {
  598. let json = get_json();
  599. let captionTracks = json.captions.playerCaptionsTracklistRenderer.captionTracks;
  600. return captionTracks
  601. }
  602.  
  603. function get_title() {
  604. return ytplayer.config.args.title;
  605. }
  606.  
  607. function get_video_id() {
  608. return ytplayer.config.args.video_id;
  609. }
  610.  
  611. // Usage: var result = await get(url)
  612. function get(url) {
  613. return $.ajax({
  614. url: url,
  615. type: 'get',
  616. success: function (r) {
  617. return r
  618. },
  619. fail: function (error) {
  620. return error
  621. }
  622. });
  623. }
  624.  
  625. })();