Google Cloud TTS Downloader

Add a Download button, language flags, voice gender for Google Cloud Text-to-Speech AI.

// ==UserScript==
// @name         Google Cloud TTS Downloader
// @description  Add a Download button, language flags, voice gender for Google Cloud Text-to-Speech AI.
// @icon         https://www.google.com/s2/favicons?sz=64&domain=cloud.google.com
// @version      1.6
// @author       afkarxyz
// @namespace    https://github.com/afkarxyz/userscripts/
// @supportURL   https://github.com/afkarxyz/userscripts/issues
// @license      MIT
// @match        https://www.gstatic.com/cloud-site-ux/text_to_speech/text_to_speech.min.html
// ==/UserScript==

;(() => {
  const FLAG_BASE_URL = "https://cdn.jsdelivr.net/gh/lipis/[email protected]/flags/4x3/";

  const AUDIO_DEVICE_PROFILES = [
    "Default", "Smart watch or wearable", "Smartphone", "Headphones or earbuds",
    "Small home speaker", "Smart home speaker", "Home entertainment system or smart TV",
    "Car speaker", "Interactive Voice Response (IVR) system",
  ];

  const languageMap = {
    "textMap": {
      "Arabic, multi-region": { "code": "sa", "text": "Arabic" },
      "Bahasa Indonesia (Indonesia)": { "code": "id", "text": "Indonesian (Indonesia)" },
      "Deutsch (Deutschland)": { "code": "de", "text": "German (Germany)" },
      "English (Australia)": { "code": "au", "text": "English (Australia)" },
      "English (Great Britain)": { "code": "gb", "text": "English (Great Britain)" },
      "English (India)": { "code": "in", "text": "English (India)" },
      "English (United States)": { "code": "us", "text": "English (United States)" },
      "Español (España)": { "code": "es", "text": "Spanish (Spain)" },
      "Español (Estados Unidos)": { "code": "us", "text": "Spanish (United States)" },
      "Français (Canada)": { "code": "ca", "text": "French (Canada)" },
      "Français (France)": { "code": "fr", "text": "French (France)" },
      "Italiano (Italia)": { "code": "it", "text": "Italian (Italy)" },
      "Nederlands (Nederland)": { "code": "nl", "text": "Dutch (Netherlands)" },
      "Polski (Polska)": { "code": "pl", "text": "Polish (Poland)" },
      "Português (Brasil)": { "code": "br", "text": "Portuguese (Brazil)" },
      "Swahili (Kenya)": { "code": "ke", "text": "Swahili (Kenya)" },
      "Tiếng Việt (Việt Nam)": { "code": "vn", "text": "Vietnamese (Vietnam)" },
      "Türkçe (Türkiye)": { "code": "tr", "text": "Turkish (Turkey)" },
      "Русский (Россия)": { "code": "ru", "text": "Russian (Russia)" },
      "Українська (Україна)": { "code": "ua", "text": "Ukrainian (Ukraine)" },
      "اردو (بھارت)": { "code": "in", "text": "Urdu (India)" },
      "मराठी (भारत)": { "code": "in", "text": "Marathi (India)" },
      "हिन्दी (भारत)": { "code": "in", "text": "Hindi (India)" },
      "বাংলা (ভারত)": { "code": "in", "text": "Bengali (India)" },
      "ગુજરાતી (ભારત)": { "code": "in", "text": "Gujarati (India)" },
      "தமிழ் (இந்தியா)": { "code": "in", "text": "Tamil (India)" },
      "తెలుగు (భారతదేశం)": { "code": "in", "text": "Telugu (India)" },
      "ಕನ್ನಡ (ಭಾರತ)": { "code": "in", "text": "Kannada (India)" },
      "മലയാളം (ഇന്ത്യ)": { "code": "in", "text": "Malayalam (India)" },
      "ไทย (ประเทศไทย)": { "code": "th", "text": "Thai (Thailand)" },
      "日本語(日本)": { "code": "jp", "text": "Japanese (Japan)" },
      "普通话 (中国大陆)": { "code": "cn", "text": "Mandarin (China)" },
      "한국어 (대한민국)": { "code": "kr", "text": "Korean (South Korea)" }
    }
  };

  const voiceModelMap = {
    "female": ["Aoede", "Kore", "Leda", "Zephyr", "Achernar", "Autonoe", "Callirrhoe", "Despina", "Erinome", "Gacrux", "Laomedeia", "Pulcherrima", "Sulafat", "Vindemiatrix"],
    "male": ["Charon", "Fenrir", "Orus", "Puck", "Achird", "Algenib", "Algieba","Alnilam", "Enceladus", "Iapetus", "Rasalgethi", "Sadachbia", "Sadaltager", "Schedar", "Umbriel", "Zubenelgenubi"]
  };

  const ALL_VOICE_NAMES = Object.values(voiceModelMap).flat();
  const GENDER_COLORS = { "Male": "dodgerblue", "Female": "hotpink", "Unknown": "inherit" };
  const GENDER_ICONS = {
    "Male": `<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512" width="1em" height="1em" style="vertical-align: middle;"><path d="M289.8 46.8c3.7-9 12.5-14.8 22.2-14.8l112 0c13.3 0 24 10.7 24 24l0 112c0 9.7-5.8 18.5-14.8 22.2s-19.3 1.7-26.2-5.2l-33.4-33.4L321 204.2c19.5 28.4 31 62.7 31 99.8c0 97.2-78.8 176-176 176S0 401.2 0 304s78.8-176 176-176c37 0 71.4 11.4 99.8 31l52.6-52.6L295 73c-6.9-6.9-8.9-17.2-5.2-26.2zM400 80s0 0 0 0s0 0 0 0s0 0 0 0zM176 416a112 112 0 1 0 0-224 112 112 0 1 0 0 224z"/></svg>`,
    "Female": `<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 384 512" width="1em" height="1em" style="vertical-align: middle;"><path d="M80 176a112 112 0 1 1 224 0A112 112 0 1 1 80 176zM224 349.1c81.9-15 144-86.8 144-173.1C368 78.8 289.2 0 192 0S16 78.8 16 176c0 86.3 62.1 158.1 144 173.1l0 34.9-32 0c-17.7 0-32 14.3-32 32s14.3 32 32 32l32 0 0 32c0 17.7 14.3 32 32 32s32-14.3 32-32l0-32 32 0c17.7 0 32-14.3 32-32s-14.3-32-32-32l-32 0 0-34.9z"/></svg>`,
    "Unknown": ""
  };

  let lastResponse = null;
  let lastPayload = null;
  let generalObserver = null;
  let voiceListObserver = null;
  const CUSTOM_AUDIO_CONTAINER_ID = "gctts-custom-audio-container";

  function getVoiceGender(voiceName) {
    for (const [gender, voices] of Object.entries(voiceModelMap)) {
      if (voices.includes(voiceName)) return gender.charAt(0).toUpperCase() + gender.slice(1);
    }
    return "Unknown";
  }

  const originalOpen = XMLHttpRequest.prototype.open;
  XMLHttpRequest.prototype.open = function (_method, url) {
    this.customURL = url;
    if (url.includes("texttospeech.googleapis.com/v1beta1/text:synthesize")) {
      this.addEventListener("readystatechange", function () {
        if (this.readyState === 4 && this.status === 200) {
          try {
            const response = JSON.parse(this.responseText);
            lastResponse = response.audioContent;
            updateAudioPlayerAndDownload();
          } catch (e) {}
        }
      });
    }
    originalOpen.apply(this, arguments);
  };

  const originalSend = XMLHttpRequest.prototype.send;
  XMLHttpRequest.prototype.send = function (data) {
    if (this.customURL && this.customURL.includes("texttospeech.googleapis.com/v1beta1/text:synthesize")) {
      try {
        lastPayload = typeof data === "string" ? JSON.parse(data) : data;
      } catch (e) {}
    }
    originalSend.apply(this, arguments);
  };

  const base64ToArrayBuffer = (base64) => {
    const binary = atob(base64);
    const buffer = new Uint8Array(binary.length);
    for (let i = 0; i < binary.length; i++) {
      buffer[i] = binary.charCodeAt(i);
    }
    return buffer.buffer;
  };

  const downloadAudio = () => {
    if (!lastResponse || !lastPayload) return;
    const now = new Date();
    const timestamp = `${now.getFullYear()}${String(now.getMonth() + 1).padStart(2, "0")}${String(now.getDate()).padStart(2, "0")}_${String(now.getHours()).padStart(2, "0")}${String(now.getMinutes()).padStart(2, "0")}${String(now.getSeconds()).padStart(2, "0")}`;
    const truncatedText = lastPayload.input.text ? lastPayload.input.text.substring(0, 25).replace(/[^\w\s.-]/gi, '_') + "..." : "tts_output";
    const voiceNameStr = lastPayload.voice && lastPayload.voice.name ? lastPayload.voice.name.replace(/[^\w-]/gi, '_') : "unknown_voice";
    const filename = `${timestamp}_${voiceNameStr}_${truncatedText}.wav`;
    const blobForDownload = new Blob([base64ToArrayBuffer(lastResponse)], { type: "audio/wav" });
    const link = document.createElement("a");
    link.href = URL.createObjectURL(blobForDownload);
    link.download = filename;
    link.click();
    URL.revokeObjectURL(link.href);
  };

  const createAudioPlayerContainer = () => {
    const playerContainer = document.createElement("div");
    playerContainer.id = CUSTOM_AUDIO_CONTAINER_ID;
    playerContainer.style.cssText = `display: flex; flex-direction: column; align-items: center; justify-content: center; width: 100%; margin-top: 15px; padding: 10px; border-radius: 8px;`;

    const audioPlayerElement = document.createElement("audio");
    audioPlayerElement.id = "custom-audio-player";
    audioPlayerElement.controls = true;
    audioPlayerElement.style.cssText = `width: 100%; max-width: 500px; margin-bottom: 10px;`;

    const downloadButtonElement = document.createElement("paper-button");
    downloadButtonElement.setAttribute("role", "button");
    downloadButtonElement.setAttribute("tabindex", "0");
    downloadButtonElement.setAttribute("animated", "");
    downloadButtonElement.setAttribute("elevation", "0");
    downloadButtonElement.style.backgroundColor = "var(--google-blue-500)";
    downloadButtonElement.style.color = "#fff";
    downloadButtonElement.style.padding = "0.7em 1em";
    downloadButtonElement.style.minWidth = "5.14em";
    downloadButtonElement.style.margin = "0 0.29em";
    downloadButtonElement.style.textTransform = "uppercase";
    downloadButtonElement.style.borderRadius = "3px";
    downloadButtonElement.style.boxSizing = "border-box";

    const speakButton = document.querySelector("ts-app")?.shadowRoot?.querySelector("#button > paper-button");
    if (speakButton) {
        const speakButtonComputedStyle = getComputedStyle(speakButton);
        downloadButtonElement.style.height = speakButtonComputedStyle.height;
        downloadButtonElement.style.lineHeight = speakButtonComputedStyle.height;
        downloadButtonElement.style.fontFamily = speakButtonComputedStyle.fontFamily;
        downloadButtonElement.style.fontSize = speakButtonComputedStyle.fontSize;
        downloadButtonElement.style.fontWeight = speakButtonComputedStyle.fontWeight;
        downloadButtonElement.style.letterSpacing = speakButtonComputedStyle.letterSpacing;
    }

    const downloadIconSVG = `<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512" width="18px" height="18px" fill="currentColor" style="margin-right: 8px; vertical-align: middle;"><path d="M369 217L241 345c-9.4 9.4-24.6 9.4-33.9 0L79 217c-9.4-9.4-9.4-24.6 0-33.9s24.6-9.4 33.9 0l87 87L200 24c0-13.3 10.7-24 24-24s24 10.7 24 24l0 246.1 87-87c9.4-9.4 24.6-9.4 33.9 0s9.4 24.6 0 33.9zM48 344l0 80c0 22.1 17.9 40 40 40l272 0c22.1 0 40-17.9 40-40l0-80c0-13.3 10.7-24 24-24s24 10.7 24 24l0 80c0 48.6-39.4 88-88 88L88 512c-48.6 0-88-39.4-88-88l0-80c0-13.3 10.7-24 24-24s24 10.7 24 24z"/></svg>`;
    downloadButtonElement.innerHTML = `<span class="button-inner" style="display: flex; align-items: center; justify-content: center;">${downloadIconSVG}<span class="label" style="display: inline-block; line-height: normal;"><span class="ready">Download</span></span></span>`;
    downloadButtonElement.addEventListener("click", downloadAudio);

    playerContainer.appendChild(audioPlayerElement);
    playerContainer.appendChild(downloadButtonElement);
    return { playerContainer, audioPlayerElement };
  };

  const updateAudioPlayerAndDownload = () => {
    document.querySelectorAll(`#${CUSTOM_AUDIO_CONTAINER_ID}`).forEach(container => {
        const audio = container.querySelector("audio#custom-audio-player");
        if (audio && audio.src) {
            URL.revokeObjectURL(audio.src);
        }
        container.remove();
    });

    const app = document.querySelector("ts-app");
    if (app && app.shadowRoot) {
        app.shadowRoot.querySelectorAll(`#${CUSTOM_AUDIO_CONTAINER_ID}`).forEach(container => {
            const audio = container.querySelector("audio#custom-audio-player");
            if (audio && audio.src) {
                URL.revokeObjectURL(audio.src);
            }
            container.remove();
        });
    }

    if (!lastResponse) return;

    const blobForPlayer = new Blob([base64ToArrayBuffer(lastResponse)], { type: "audio/wav" });
    const audioUrl = URL.createObjectURL(blobForPlayer);
    const { playerContainer, audioPlayerElement } = createAudioPlayerContainer();
    if (audioPlayerElement) {
        audioPlayerElement.src = audioUrl;
    }

    let targetInsertionPoint = null;
    if (app && app.shadowRoot) {
        targetInsertionPoint = app.shadowRoot.querySelector(".control-playback") || app.shadowRoot.querySelector(".synth-panel");
        if (targetInsertionPoint) {
            if (targetInsertionPoint.classList.contains('control-playback')) {
                targetInsertionPoint.insertAdjacentElement("afterend", playerContainer);
            } else {
                targetInsertionPoint.appendChild(playerContainer);
            }
        } else {
            app.shadowRoot.appendChild(playerContainer);
        }
    } else {
        const speakButtonElement = document.querySelector('ts-button#button > paper-button') || document.querySelector('paper-button[aria-label="Speak text"]');
        targetInsertionPoint = speakButtonElement ? speakButtonElement.closest('div.control-playback') || speakButtonElement.parentElement : document.body;
         if (targetInsertionPoint && targetInsertionPoint !== document.body) {
            targetInsertionPoint.insertAdjacentElement('afterend', playerContainer);
        } else {
            document.body.appendChild(playerContainer);
        }
    }
  };

  function enhanceItem(item) {
    if (!item || item.dataset.enhanced === "true") return false;
    const originalText = item.textContent ? item.textContent.trim() : "";
    if (!originalText) return false;

    const langInfo = languageMap.textMap[originalText];
    if (langInfo) {
      const wrapper = document.createElement("div");
      wrapper.style.cssText = "display: flex; align-items: center; gap: 8px;";
      const flagImg = document.createElement("img");
      flagImg.src = `${FLAG_BASE_URL}${langInfo.code}.svg`;
      flagImg.alt = `${langInfo.code} flag`;
      flagImg.style.cssText = "width: 24px; height: 18px; margin-right: 5px;";
      const textSpan = document.createElement("span");
      textSpan.textContent = langInfo.text;
      wrapper.appendChild(flagImg);
      wrapper.appendChild(textSpan);
      item.innerHTML = "";
      item.appendChild(wrapper);
      item.dataset.enhanced = "true";
      item.dataset.lookupText = langInfo.text;
      return true;
    }

    let voiceModelName = null;
    const fullVoiceIdMatch = originalText.match(/^[a-z]{2,3}(?:-[A-Z]{1,2})?(?:-[A-Za-z]+)*-([A-Za-z0-9]+(?:-[A-Z])?)$/) ||
                           originalText.match(/^[a-z]{2,3}(?:-[A-Z]{1,2})?-Chirp3-HD-([A-Za-z0-9]+)$/);
    if (fullVoiceIdMatch && fullVoiceIdMatch[1]) {
      voiceModelName = fullVoiceIdMatch[1];
    } else if (ALL_VOICE_NAMES.includes(originalText)) {
      voiceModelName = originalText;
    }

    if (voiceModelName) {
      const voiceGender = getVoiceGender(voiceModelName);
      const displayVoiceText = `${voiceModelName} (${voiceGender})`;
      const genderIconHTML = GENDER_ICONS[voiceGender] || "";

      if (voiceGender !== "Unknown") {
        const wrapper = document.createElement("div");
        wrapper.style.display = "flex";
        wrapper.style.alignItems = "center";
        wrapper.style.gap = "5px";
        const iconContainer = document.createElement("span");
        iconContainer.innerHTML = genderIconHTML;
        const svgElement = iconContainer.querySelector('svg');
        if (svgElement) {
            svgElement.style.fill = GENDER_COLORS[voiceGender];
        }
        const nameSpan = document.createElement("span");
        nameSpan.textContent = voiceModelName;
        wrapper.appendChild(iconContainer);
        wrapper.appendChild(nameSpan);
        item.innerHTML = "";
        item.appendChild(wrapper);
        item.dataset.enhanced = "true";
        item.dataset.lookupText = displayVoiceText;
        item.dataset.voiceText = displayVoiceText;
        return true;
      }
    }

    if (AUDIO_DEVICE_PROFILES.includes(originalText)) {
      item.dataset.enhanced = "true";
      item.dataset.lookupText = originalText;
      return true;
    }
    return false;
  }

  function enhanceAllItemsInRoot(root) {
    if (!root) return 0;
    let count = 0;
    try {
      const items = root.querySelectorAll("paper-item:not([data-enhanced='true'])");
      items.forEach(item => {
        if (enhanceItem(item)) count++;
      });
    } catch (e) {}
    return count;
  }

  function enhanceDynamically() {
    let enhancedCount = 0;
    const app = document.querySelector("ts-app");
    if (app && app.shadowRoot) {
      enhancedCount += enhanceAllItemsInRoot(app.shadowRoot);
    }
    enhancedCount += enhanceAllItemsInRoot(document.body);
    return enhancedCount;
  }

  function setupObservers() {
    if (generalObserver) generalObserver.disconnect();
    generalObserver = new MutationObserver((mutationsList) => {
      let needsEnhance = false;
      for (const mutation of mutationsList) {
        if (mutation.type === 'childList' && mutation.addedNodes.length > 0) {
          mutation.addedNodes.forEach(node => {
            if (node.nodeType === Node.ELEMENT_NODE && (node.matches('paper-listbox, paper-item') || node.querySelector('paper-listbox, paper-item'))) {
              needsEnhance = true;
            }
          });
        }
      }
      if (needsEnhance) {
        enhanceDynamically();
      }
    });
    generalObserver.observe(document.body, { childList: true, subtree: true });
    const app = document.querySelector("ts-app");
    if (app && app.shadowRoot) {
        const voiceDropdownContainer = app.shadowRoot.querySelector('.control-variant paper-dropdown-menu');
        if (voiceDropdownContainer) {
            const voiceListbox = voiceDropdownContainer.querySelector('paper-listbox#variantListbox');
            if (voiceListbox) {
                if (voiceListObserver) voiceListObserver.disconnect();
                voiceListObserver = new MutationObserver(async (mutations) => {
                    let itemsAdded = false;
                    for (const mutation of mutations) {
                        if (mutation.type === 'childList' && mutation.addedNodes.length > 0) {
                            itemsAdded = true;
                            break;
                        }
                    }
                    if (itemsAdded) {
                        enhanceDynamically();
                    }
                });
                voiceListObserver.observe(voiceListbox, { childList: true });
            }
        }
    }
    document.removeEventListener("click", handleGlobalClick, true);
    document.addEventListener("click", handleGlobalClick, true);
  }

  function handleGlobalClick(event) {
    const appElement = document.querySelector("ts-app");
    const voiceDropdownMenu = appElement?.shadowRoot?.querySelector('.control-variant paper-dropdown-menu');
    let clickedInVoiceDropdownArea = false;
    if (voiceDropdownMenu && event.target instanceof Node) {
        const trigger = voiceDropdownMenu.querySelector('[slot="dropdown-trigger"]');
        const dropdownContent = voiceDropdownMenu.dropdownContent;
        if (trigger && trigger.contains(event.target)) {
            clickedInVoiceDropdownArea = true;
        }
        else if (voiceDropdownMenu.opened && dropdownContent && dropdownContent.contains(event.target)) {
            clickedInVoiceDropdownArea = true;
        }
    }
    if (clickedInVoiceDropdownArea) {
        setTimeout(enhanceDynamically, 500);
    } else {
        setTimeout(enhanceDynamically, 350);
    }
  }

  function main() {
    enhanceDynamically();
    setupObservers();
  }

  function waitForApp() {
    const app = document.querySelector("ts-app");
    const languageDropdownListbox = app?.shadowRoot?.querySelector('.control-language paper-dropdown-menu paper-listbox');
    const voiceDropdownListbox = app?.shadowRoot?.querySelector('.control-variant paper-dropdown-menu paper-listbox#variantListbox');
    if (app && app.shadowRoot && languageDropdownListbox && voiceDropdownListbox) {
      setTimeout(main, 1200);
    } else {
      requestAnimationFrame(waitForApp);
    }
  }

  if (document.readyState === "complete" || document.readyState === "interactive") {
    waitForApp();
  } else {
    document.addEventListener("DOMContentLoaded", waitForApp);
  }

})();