MP3 to Transcript with Auto-Detection and Persistent Storage

Automatically scan the last conversation message for MP3 links, transcribe them, and avoid duplicates using local storage. Limits to 3 entries.

  1. // ==UserScript==
  2. // @name MP3 to Transcript with Auto-Detection and Persistent Storage
  3. // @namespace http://tampermonkey.net/
  4. // @version 1.8
  5. // @description Automatically scan the last conversation message for MP3 links, transcribe them, and avoid duplicates using local storage. Limits to 3 entries.
  6. // @author Vishanka
  7. // @match https://discord.com/channels/*
  8. // @grant GM_addStyle
  9. // @grant GM_xmlhttpRequest
  10. // @grant unsafeWindow
  11. // @run-at document-idle
  12. // ==/UserScript==
  13.  
  14. (function () {
  15. 'use strict';
  16.  
  17. // Function to get API key from local storage or prompt user to input it
  18. function getApiKey() {
  19. let apiKey = localStorage.getItem('google_cloud_api_key');
  20. if (!apiKey) {
  21. apiKey = prompt("Please enter your Google Cloud API key for speech recognition:");
  22. if (apiKey) {
  23. localStorage.setItem('google_cloud_api_key', apiKey);
  24. } else {
  25. alert("API key is required to proceed.");
  26. return null;
  27. }
  28. }
  29. return apiKey;
  30. }
  31.  
  32. // Fetch the API key
  33. let API_KEY = getApiKey();
  34. if (!API_KEY) {
  35. return; // Exit if no API key provided
  36. }
  37.  
  38. const API_URL = `https://speech.googleapis.com/v1/speech:recognize?key=${API_KEY}`;
  39.  
  40. // Create a button to toggle the transcription panel
  41. const toggleButton = document.createElement('div');
  42. toggleButton.innerHTML = `
  43. <button id="toggle-transcription-panel" style="position: relative; top: 10px; right: 0px; left: 10px; padding: 10px; background: #007bff; color: white; border: none; border-radius: 3px; cursor: pointer; z-index: 1001;">Show MP3 Transcription Tool</button>
  44. `;
  45.  
  46. // Append the button to DCstoragePanel if available, otherwise to the body
  47. DCstoragePanel.appendChild(toggleButton);
  48.  
  49. // Add a simple panel to the webpage
  50. const panelHTML = `
  51. <div id="transcription-panel" style="display: none;">
  52. <h3>MP3 Transcription Tool</h3>
  53. <input type="text" id="mp3-url" placeholder="Enter MP3 URL here" />
  54. <button id="transcribe-button">Transcribe</button>
  55. <textarea id="transcription-result" placeholder="Transcript will appear here..." readonly></textarea>
  56. </div>
  57. `;
  58.  
  59. document.body.insertAdjacentHTML('beforeend', panelHTML);
  60.  
  61. // Add styles for the panel
  62. GM_addStyle(`
  63. #transcription-panel {
  64. position: fixed;
  65. bottom: 50px;
  66. right: 10px;
  67. width: 300px;
  68. background: #f8f9fa;
  69. border: 1px solid #ccc;
  70. padding: 10px;
  71. box-shadow: 0 2px 10px rgba(0, 0, 0, 0.1);
  72. z-index: 9999;
  73. font-family: Arial, sans-serif;
  74. }
  75. #transcription-panel h3 {
  76. margin: 0 0 10px;
  77. font-size: 16px;
  78. }
  79. #transcription-panel input, #transcription-panel textarea {
  80. width: 100%;
  81. margin-bottom: 10px;
  82. padding: 5px;
  83. box-sizing: border-box;
  84. }
  85. #transcription-panel button {
  86. width: 100%;
  87. padding: 5px;
  88. cursor: pointer;
  89. background: #007bff;
  90. color: white;
  91. border: none;
  92. border-radius: 3px;
  93. }
  94. #transcription-done-message {
  95. position: fixed;
  96. bottom: 100px;
  97. left: 50%;
  98. transform: translateX(-50%);
  99. background: #28a745;
  100. color: white;
  101. padding: 10px;
  102. border-radius: 3px;
  103. box-shadow: 0 2px 10px rgba(0, 0, 0, 0.1);
  104. z-index: 10000;
  105. font-family: Arial, sans-serif;
  106. display: none;
  107. }
  108. `);
  109.  
  110. // Add event listener to the toggle button
  111. document.getElementById('toggle-transcription-panel').addEventListener('click', () => {
  112. const panel = document.getElementById('transcription-panel');
  113. if (panel.style.display === 'none') {
  114. panel.style.display = 'block';
  115. document.getElementById('toggle-transcription-panel').innerText = 'Hide MP3 Transcription Tool';
  116. } else {
  117. panel.style.display = 'none';
  118. document.getElementById('toggle-transcription-panel').innerText = 'Show MP3 Transcription Tool';
  119. }
  120. });
  121.  
  122. // Add event listener to the Transcribe button
  123. document.getElementById('transcribe-button').addEventListener('click', transcribe);
  124. // Function to transcribe the entered MP3 URL
  125. function transcribe() {
  126. const mp3Url = document.getElementById('mp3-url').value.trim();
  127. if (!mp3Url) {
  128. alert('Please enter a valid MP3 URL.');
  129. return;
  130. }
  131.  
  132. // Check if the transcript already exists in local storage
  133. const storedTranscript = localStorage.getItem(mp3Url);
  134. if (storedTranscript) {
  135. document.getElementById('transcription-result').value = storedTranscript;
  136. return;
  137. }
  138.  
  139. // Fetch MP3 file using GM_xmlhttpRequest and process it
  140. GM_xmlhttpRequest({
  141. method: 'GET',
  142. url: mp3Url,
  143. responseType: 'arraybuffer', // Required for audio data
  144. onload: (response) => {
  145. // Convert the audio file to Base64
  146. const audioBase64 = arrayBufferToBase64(response.response);
  147.  
  148. // Send the Base64-encoded audio to Google Cloud Speech-to-Text API
  149. sendToGoogleCloud(audioBase64, mp3Url);
  150. },
  151. onerror: (err) => {
  152. alert('Failed to fetch the MP3 file.');
  153. console.error(err);
  154. },
  155. });
  156. }
  157.  
  158. // Function to send the Base64 audio data to Google Cloud Speech-to-Text API
  159. function sendToGoogleCloud(audioBase64, mp3Url) {
  160. GM_xmlhttpRequest({
  161. method: 'POST',
  162. url: API_URL,
  163. headers: { 'Content-Type': 'application/json' },
  164. data: JSON.stringify({
  165. config: {
  166. encoding: 'MP3',
  167. sampleRateHertz: 16000,
  168. languageCode: 'en-US',
  169. },
  170. audio: {
  171. content: audioBase64,
  172. },
  173. }),
  174. onload: (response) => {
  175. const result = JSON.parse(response.responseText);
  176. if (result.error) {
  177. alert(`Error: ${result.error.message}`);
  178. } else {
  179. const transcript = result.results
  180. ?.map((r) => r.alternatives[0].transcript)
  181. .join('\n');
  182. document.getElementById('transcription-result').value =
  183. transcript || 'No transcript found.';
  184.  
  185. // Store the transcript in local storage
  186. localStorage.setItem(mp3Url, transcript || 'No transcript found.');
  187.  
  188. // Limit local storage to 3 entries
  189. manageLocalStorageLimit();
  190.  
  191. // Show "Transcript done!" message
  192. showTranscriptionDoneMessage();
  193. }
  194. },
  195. onerror: (err) => {
  196. alert('Failed to process the transcription.');
  197. console.error(err);
  198. },
  199. });
  200. }
  201.  
  202. // Function to convert ArrayBuffer to Base64
  203. function arrayBufferToBase64(buffer) {
  204. const binary = [];
  205. const bytes = new Uint8Array(buffer);
  206. const len = bytes.byteLength;
  207. for (let i = 0; i < len; i++) {
  208. binary.push(String.fromCharCode(bytes[i]));
  209. }
  210. return btoa(binary.join(''));
  211. }
  212.  
  213. // Observe the conversation for changes and detect MP3 links
  214. const observer = new MutationObserver(() => {
  215. const messageItems = document.querySelectorAll('div[class*="messageContent_"]');
  216. const lastMessage = messageItems[messageItems.length - 1];
  217.  
  218. if (lastMessage) {
  219. const mp3LinkMatch = lastMessage.innerText.match(/https:\/\/files\.shapes\.inc\/.*\.mp3/);
  220. if (mp3LinkMatch) {
  221. const mp3Link = mp3LinkMatch[0];
  222. const storedLink = localStorage.getItem('lastMp3Link');
  223.  
  224. // Check if the link is new or different
  225. if (mp3Link !== storedLink) {
  226. localStorage.setItem('lastMp3Link', mp3Link); // Store the new link
  227. document.getElementById('mp3-url').value = mp3Link; // Populate the input
  228. transcribe(); // Automatically transcribe the new link
  229. }
  230. }
  231. }
  232. });
  233.  
  234. // Start observing the document body for new messages
  235. observer.observe(document.body, { childList: true, subtree: true });
  236.  
  237. // Function to manage local storage limit of 3 entries
  238. function manageLocalStorageLimit() {
  239. const keys = Object.keys(localStorage).filter((key) => key.startsWith('http'));
  240. if (keys.length > 10) {
  241. // Remove oldest entries until only 3 remain
  242. while (keys.length > 10) {
  243. localStorage.removeItem(keys.shift());
  244. }
  245. }
  246. }
  247.  
  248. // Function to show "Transcript done!" message
  249. function showTranscriptionDoneMessage() {
  250. let messageDiv = document.getElementById('transcription-done-message');
  251. if (!messageDiv) {
  252. messageDiv = document.createElement('div');
  253. messageDiv.id = 'transcription-done-message';
  254. messageDiv.innerText = 'Transcript done!';
  255. document.body.appendChild(messageDiv);
  256. }
  257.  
  258. messageDiv.style.display = 'block';
  259. setTimeout(() => {
  260. messageDiv.style.display = 'none';
  261. }, 3000);
  262. }
  263. })();