您需要先安装一个扩展,例如 篡改猴、Greasemonkey 或 暴力猴,之后才能安装此脚本。
您需要先安装一个扩展,例如 篡改猴 或 暴力猴,之后才能安装此脚本。
您需要先安装一个扩展,例如 篡改猴 或 暴力猴,之后才能安装此脚本。
您需要先安装一个扩展,例如 篡改猴 或 Userscripts ,之后才能安装此脚本。
您需要先安装一款用户脚本管理器扩展,例如 Tampermonkey,才能安装此脚本。
您需要先安装用户脚本管理器扩展后才能安装此脚本。
Adds OpenAI text-to-speech and speech-to-text to T3Chat
- // ==UserScript==
- // @name T3Chat OpenAI TTS & STT
- // @namespace https://github.com/cameron/t3chat-userscripts
- // @version 0.1.2
- // @description Adds OpenAI text-to-speech and speech-to-text to T3Chat
- // @match https://t3.chat/*
- // @match https://*.t3.chat/*
- // @run-at document-idle
- // @grant none
- // @license MIT
- // ==/UserScript==
- (() => {
- 'use strict';
- const CONFIG = {
- apiBaseUrl: 'https://api.openai.com/v1',
- ttsModel: 'tts-1',
- ttsVoice: 'alloy',
- sttModel: 'whisper-1',
- maxRecordingTime: 60000,
- currentVersion: '0.1.2',
- storageKeys: {
- t3chatApiKey: 'apikey:openai',
- ttsEnabled: 't3chat-tts-enabled',
- sttEnabled: 't3chat-stt-enabled',
- ttsVoice: 't3chat-tts-voice',
- sttMethod: 't3chat-stt-method',
- version: 't3chat-tts-stt-version'
- }
- };
- if (localStorage.getItem(CONFIG.storageKeys.version) !== CONFIG.currentVersion) {
- localStorage.removeItem(CONFIG.storageKeys.sttMethod);
- localStorage.setItem(CONFIG.storageKeys.version, CONFIG.currentVersion);
- }
- const SELECTORS = {
- chatInput: [
- '#chat-input',
- 'textarea[aria-describedby="chat-input-description"]',
- 'textarea[placeholder*="message"]',
- 'textarea[data-testid="chat-input"]'
- ],
- messageContainer: '[role="article"], .message, div[class*="message"]',
- messageContent: '.prose, .message-content, div[class*="prose"], p, div[class*="text"]',
- messageActionsContainer:
- 'div[class*="absolute"][class*="flex"][class*="items-center"][class*="gap"], div.absolute.left-0[class*="-ml-0"][class*="mt-2"], div.absolute.right-0[class*="mt-"]',
- sendButton: 'button[type="submit"][aria-label*="Message"], button[aria-label*="send" i]'
- };
- const getT3ChatApiKey = () => {
- const key = localStorage.getItem(CONFIG.storageKeys.t3chatApiKey);
- return key?.startsWith('sk-') ? key : null;
- };
- const state = {
- get apiKey() {
- return getT3ChatApiKey();
- },
- ttsEnabled: localStorage.getItem(CONFIG.storageKeys.ttsEnabled) !== 'false',
- sttEnabled: localStorage.getItem(CONFIG.storageKeys.sttEnabled) !== 'false',
- sttMethod: localStorage.getItem(CONFIG.storageKeys.sttMethod) || 'openai',
- ttsVoice: localStorage.getItem(CONFIG.storageKeys.ttsVoice) || CONFIG.ttsVoice,
- isRecording: false,
- mediaRecorder: null,
- audioChunks: [],
- currentAudio: null,
- recordingMimeType: '',
- speechRecognition: null
- };
- if (localStorage.getItem(CONFIG.storageKeys.ttsEnabled) === null) {
- localStorage.setItem(CONFIG.storageKeys.ttsEnabled, 'true');
- state.ttsEnabled = true;
- }
- if (localStorage.getItem(CONFIG.storageKeys.sttEnabled) === null) {
- localStorage.setItem(CONFIG.storageKeys.sttEnabled, 'true');
- state.sttEnabled = true;
- }
- const findChatInput = () =>
- SELECTORS.chatInput
- .map((s) => document.querySelector(s))
- .find((el) => el && el.tagName === 'TEXTAREA');
- const findInputContainer = () => {
- const input = findChatInput();
- if (!input) return null;
- const sendBtn =
- document.querySelector(SELECTORS.sendButton) ||
- input.parentElement?.querySelector('button[type="submit"]') ||
- input.parentElement?.querySelector('button[aria-label*="send" i]');
- return sendBtn ? sendBtn.parentElement : input.closest('div[class*="flex"]') || input.parentElement;
- };
- const injectStyles = () => {
- if (document.querySelector('#t3chat-tts-stt-styles')) return;
- const style = document.createElement('style');
- style.id = 't3chat-tts-stt-styles';
- style.textContent = `
- .t3-tts-btn,.t3-stt-btn,.t3-settings-btn{
- display:flex;align-items:center;justify-content:center;width:32px;height:32px;border:1px solid hsl(var(--border));
- border-radius:6px;background:hsl(var(--background));color:hsl(var(--foreground));cursor:pointer;
- transition:all .2s ease;position:relative;flex-shrink:0
- }
- .t3-tts-btn:hover,.t3-stt-btn:hover,.t3-settings-btn:hover{background:hsl(var(--muted));border-color:hsl(var(--ring))}
- .t3-stt-btn.recording{background:#ef4444;color:#fff;animation:pulse 1s infinite}
- .t3-tts-btn.speaking{background:#3b82f6;color:#fff}
- .t3-tts-btn.disabled,.t3-stt-btn.disabled{opacity:.5;cursor:not-allowed}
- @keyframes pulse{0%,100%{opacity:1}50%{opacity:.7}}
- .t3-tooltip{position:absolute;bottom:100%;left:50%;transform:translateX(-50%);background:hsl(var(--foreground));
- color:hsl(var(--background));padding:4px 8px;border-radius:4px;font-size:12px;white-space:nowrap;opacity:0;
- pointer-events:none;transition:opacity .2s ease;margin-bottom:4px;z-index:1000}
- .t3-stt-btn:hover .t3-tooltip,.t3-settings-btn:hover .t3-tooltip{opacity:1}
- button[aria-label="Speak message"].speaking{background:#3b82f6!important;color:#fff!important}
- button[aria-label="Speak message"]{width:32px!important;height:32px!important;min-width:32px!important;min-height:32px!important;
- display:flex!important;align-items:center!important;justify-content:center!important}
- button[aria-label="Speak message"] .relative,button[aria-label="Speak message"] svg{width:24px!important;height:24px!important}
- `;
- document.head.appendChild(style);
- };
- const callOpenAI = async (endpoint, data, options = {}) => {
- if (!state.apiKey) throw new Error('OpenAI API key not configured');
- const res = await fetch(`${CONFIG.apiBaseUrl}${endpoint}`, {
- method: 'POST',
- headers: {
- Authorization: `Bearer ${state.apiKey}`,
- 'Content-Type': 'application/json',
- ...options.headers
- },
- body: JSON.stringify(data),
- ...options
- });
- if (!res.ok) {
- const err = await res.json().catch(() => ({ error: { message: `HTTP ${res.status}` } }));
- throw new Error(err.error?.message || `HTTP ${res.status}`);
- }
- return res;
- };
- const textToSpeech = async (text) => {
- const res = await callOpenAI('/audio/speech', {
- model: CONFIG.ttsModel,
- voice: state.ttsVoice,
- input: text.slice(0, 4096)
- });
- const blob = await res.blob();
- const url = URL.createObjectURL(blob);
- if (state.currentAudio) {
- state.currentAudio.pause();
- URL.revokeObjectURL(state.currentAudio.src);
- }
- state.currentAudio = new Audio(url);
- return state.currentAudio;
- };
- const speechToText = async (blob) => {
- const mime = blob.type.toLowerCase();
- const ext =
- mime.includes('wav')
- ? 'wav'
- : mime.includes('mp4')
- ? 'mp4'
- : mime.includes('mp3')
- ? 'mp3'
- : mime.includes('ogg')
- ? 'ogg'
- : 'webm';
- const form = new FormData();
- form.append('file', blob, `audio.${ext}`);
- form.append('model', CONFIG.sttModel);
- const res = await fetch(`${CONFIG.apiBaseUrl}/audio/transcriptions`, {
- method: 'POST',
- headers: { Authorization: `Bearer ${state.apiKey}` },
- body: form
- });
- if (!res.ok) {
- const txt = await res.text();
- throw new Error(`STT failed: ${txt}`);
- }
- const json = await res.json();
- return json.text;
- };
- const initSpeechRecognition = () => {
- const SR = window.SpeechRecognition || window.webkitSpeechRecognition;
- if (!SR) return null;
- const rec = new SR();
- rec.continuous = false;
- rec.interimResults = false;
- rec.maxAlternatives = 1;
- rec.lang = 'en-US';
- rec.onstart = () => {
- state.isRecording = true;
- updateSTTButton();
- };
- rec.onresult = (e) => {
- const txt = e.results[0][0].transcript;
- const input = findChatInput();
- if (input && txt.trim()) {
- input.value = (input.value + ' ' + txt).trim();
- input.dispatchEvent(new Event('input', { bubbles: true }));
- input.focus();
- }
- };
- rec.onerror = rec.onend = () => {
- state.isRecording = false;
- updateSTTButton();
- };
- return rec;
- };
- const startRecording = async () => {
- if (state.sttMethod === 'browser') return startBrowserSpeechRecognition();
- try {
- const stream = await navigator.mediaDevices.getUserMedia({ audio: true });
- const types = [
- 'audio/wav',
- 'audio/mp4',
- 'audio/webm;codecs=opus',
- 'audio/webm',
- 'audio/ogg;codecs=opus',
- 'audio/mp3'
- ];
- const type = types.find((t) => MediaRecorder.isTypeSupported(t)) || '';
- if (!type) throw new Error('No supported audio MIME type found');
- state.mediaRecorder = new MediaRecorder(stream, { mimeType: type });
- state.audioChunks = [];
- state.recordingMimeType = type;
- state.mediaRecorder.ondataavailable = (e) => e.data.size && state.audioChunks.push(e.data);
- state.mediaRecorder.onstop = async () => {
- const blob = new Blob(state.audioChunks, { type: state.recordingMimeType });
- try {
- const txt = await speechToText(blob);
- const input = findChatInput();
- if (input && txt.trim()) {
- input.value = (input.value + ' ' + txt).trim();
- input.dispatchEvent(new Event('input', { bubbles: true }));
- input.focus();
- }
- } finally {
- stream.getTracks().forEach((t) => t.stop());
- state.isRecording = false;
- updateSTTButton();
- }
- };
- state.mediaRecorder.start();
- state.isRecording = true;
- updateSTTButton();
- setTimeout(() => state.isRecording && stopRecording(), CONFIG.maxRecordingTime);
- } catch (err) {}
- };
- const startBrowserSpeechRecognition = () => {
- if (!state.speechRecognition) state.speechRecognition = initSpeechRecognition();
- state.speechRecognition?.start();
- };
- const stopRecording = () => {
- if (state.sttMethod === 'browser') {
- state.speechRecognition?.stop();
- } else {
- state.mediaRecorder?.stop();
- }
- };
- const createButton = (cls, svg, tooltip) => {
- const btn = document.createElement('button');
- btn.className = cls;
- btn.innerHTML = `${svg}<div class="t3-tooltip">${tooltip}</div>`;
- return btn;
- };
- const createTTSButton = () => {
- const svg =
- '<svg width="16" height="16" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round"><polygon points="11 5,6 9,2 9,2 15,6 15,11 19,11 5"></polygon><path d="M15.54 8.46a5 5 0 0 1 0 7.07"></path><path d="M19.07 4.93a10 10 0 0 1 0 14.14"></path></svg>';
- const btn = createButton('t3-tts-btn', svg, 'Text to Speech');
- btn.addEventListener('click', async () => {
- const input = findChatInput();
- if (input?.value.trim()) await speakText(input.value.trim());
- });
- return btn;
- };
- const createSTTButton = () => {
- const svg =
- '<svg width="16" height="16" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round"><path d="M12 2a3 3 0 0 0-3 3v7a3 3 0 0 0 6 0V5a3 3 0 0 0-3-3Z"></path><path d="M19 10v2a7 7 0 0 1-14 0v-2"></path><line x1="12" x2="12" y1="19" y2="22"></line><line x1="8" x2="16" y1="22" y2="22"></line></svg>';
- const btn = createButton('t3-stt-btn', svg, 'Speech to Text');
- btn.addEventListener('click', () => (state.isRecording ? stopRecording() : startRecording()));
- return btn;
- };
- const createSettingsButton = () => {
- const svg =
- '<svg width="16" height="16" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round"><path d="M12.22 2h-.44a2 2 0 0 0-2 2v.18a2 2 0 0 1-1 1.73l-.43.25a2 2 0 0 1-2 0l-.15-.08a2 2 0 0 0-2.73.73l-.22.38a2 2 0 0 0 .73 2.73l.15.1a2 2 0 0 1 1 1.72v.51a2 2 0 0 1-1 1.74l-.15.09a2 2 0 0 0-.73 2.73l.22.38a2 2 0 0 0 2.73.73l.15-.08a2 2 0 0 1 2 0l.43.25a2 2 0 0 1 1 1.73V20a2 2 0 0 0 2 2h.44a2 2 0 0 0 2-2v-.18a2 2 0 0 1 1-1.73l.43-.25a2 2 0 0 1 2 0l.15.08a2 2 0 0 0 2.73-.73l.22-.39a2 2 0 0 0-.73-2.73l-.15-.08a2 2 0 0 1-1-1.74v-.5a2 2 0 0 1 1-1.74l.15-.09a2 2 0 0 0 .73-2.73l-.22-.38a2 2 0 0 0-2.73-.73l-.15.08a2 2 0 0 1-2 0l-.43-.25a2 2 0 0 1-1-1.73V4a2 2 0 0 0-2-2z"></path><circle cx="12" cy="12" r="3"></circle></svg>';
- const btn = createButton('t3-settings-btn', svg, 'TTS/STT Settings');
- btn.addEventListener('click', showSettingsModal);
- return btn;
- };
- const createMessageSpeakButton = (msg) => {
- const btn = document.createElement('button');
- btn.className =
- 'inline-flex items-center justify-center text-xs rounded-lg p-0 hover:bg-muted/40';
- btn.setAttribute('aria-label', 'Speak message');
- btn.innerHTML =
- '<div class="relative" style="width:24px;height:24px"><svg xmlns="http://www.w3.org/2000/svg" width="24" height="24" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="1.5" stroke-linecap="round" stroke-linejoin="round"><polygon points="11 5,6 9,2 9,2 15,6 15,11 19,11 5"></polygon><path d="M15.54 8.46a5 5 0 0 1 0 7.07"></path></svg></div>';
- btn.addEventListener('click', () => {
- const text = msg.textContent.trim();
- if (!text) return;
- btn.classList.add('speaking');
- speakText(text).finally(() => btn.classList.remove('speaking'));
- });
- return btn;
- };
- const speakText = async (txt) => {
- try {
- const audio = await textToSpeech(txt);
- await audio.play();
- } catch (err) {}
- };
- const updateSTTButton = () => {
- const btn = document.querySelector('.t3-stt-btn');
- if (!btn) return;
- btn.classList.toggle('recording', state.isRecording);
- const tip = btn.querySelector('.t3-tooltip');
- if (tip) tip.textContent = state.isRecording ? 'Stop Recording' : 'Speech to Text';
- };
- const showSettingsModal = () => {
- const hasKey = !!state.apiKey;
- const modal = document.createElement('div');
- modal.className = 't3-settings-modal';
- modal.innerHTML = `
- <style>
- .t3-settings-modal{position:fixed;inset:0;background:rgba(0,0,0,.5);display:flex;align-items:center;justify-content:center;z-index:10000}
- .t3-settings-content{background:hsl(var(--background));border:1px solid hsl(var(--border));border-radius:8px;padding:24px;min-width:400px;max-width:500px}
- .t3-settings-title{font-size:18px;font-weight:600;margin-bottom:16px;color:hsl(var(--foreground))}
- .t3-form-group{margin-bottom:16px}
- .t3-form-label{display:block;font-size:14px;font-weight:500;margin-bottom:4px;color:hsl(var(--foreground))}
- .t3-form-select,.t3-form-input{width:100%;padding:8px 12px;border:1px solid hsl(var(--border));border-radius:6px;background:hsl(var(--background));color:hsl(var(--foreground));font-size:14px}
- .t3-form-checkbox{display:flex;align-items:center;gap:8px}
- .t3-button-group{display:flex;gap:8px;justify-content:flex-end;margin-top:20px}
- .t3-btn{padding:8px 16px;border-radius:6px;border:1px solid hsl(var(--border));background:hsl(var(--background));color:hsl(var(--foreground));cursor:pointer;font-size:14px;transition:all .2s ease}
- .t3-btn:hover{background:hsl(var(--muted))}
- .t3-btn.primary{background:hsl(var(--primary));color:hsl(var(--primary-foreground));border-color:hsl(var(--primary))}
- .t3-btn.primary:hover{opacity:.9}
- .t3-api-key-status{padding:12px;border-radius:6px;background:hsl(var(--muted));border:1px solid hsl(var(--border))}
- .t3-api-status{font-weight:500;margin-top:4px}
- .t3-api-status.connected{color:#22c55e}
- .t3-api-status.disconnected{color:#ef4444}
- .t3-form-help{font-size:12px;color:hsl(var(--muted-foreground));margin-top:8px}
- </style>
- <div class="t3-settings-content">
- <div class="t3-settings-title">TTS & STT Settings</div>
- <div class="t3-form-group">
- <div class="t3-api-key-status">
- <div class="t3-form-label">OpenAI API Key Status</div>
- <div class="t3-api-status ${hasKey ? 'connected' : 'disconnected'}">
- ${hasKey ? '✅ Connected' : '❌ Not configured'}
- </div>
- ${hasKey ? '' : '<p class="t3-form-help">Add your OpenAI key in T3Chat settings.</p>'}
- </div>
- </div>
- <div class="t3-form-group">
- <label class="t3-form-label">STT Method</label>
- <select class="t3-form-select" id="stt-method-select">
- <option value="browser" ${state.sttMethod === 'browser' ? 'selected' : ''}>Browser</option>
- <option value="openai" ${state.sttMethod === 'openai' ? 'selected' : ''} ${!hasKey ? 'disabled' : ''}>OpenAI Whisper</option>
- </select>
- </div>
- <div class="t3-form-group">
- <label class="t3-form-label">TTS Voice</label>
- <select class="t3-form-select" id="voice-select" ${!hasKey ? 'disabled' : ''}>
- ${['alloy', 'echo', 'fable', 'onyx', 'nova', 'shimmer']
- .map((v) => `<option value="${v}" ${state.ttsVoice === v ? 'selected' : ''}>${v[0].toUpperCase() + v.slice(1)}</option>`)
- .join('')}
- </select>
- </div>
- <div class="t3-form-group">
- <label class="t3-form-checkbox"><input type="checkbox" id="tts-enabled" ${state.ttsEnabled ? 'checked' : ''}><span>Enable Text-to-Speech</span></label>
- </div>
- <div class="t3-form-group">
- <label class="t3-form-checkbox"><input type="checkbox" id="stt-enabled" ${state.sttEnabled ? 'checked' : ''}><span>Enable Speech-to-Text</span></label>
- </div>
- <div class="t3-button-group">
- <button class="t3-btn" id="cancel-settings">Cancel</button>
- <button class="t3-btn primary" id="save-settings">Save</button>
- </div>
- </div>`;
- modal.addEventListener('click', (e) => e.target === modal && modal.remove());
- modal.querySelector('#cancel-settings').addEventListener('click', () => modal.remove());
- modal.querySelector('#save-settings').addEventListener('click', () => {
- const voice = modal.querySelector('#voice-select').value;
- const ttsEnabled = modal.querySelector('#tts-enabled').checked;
- const sttEnabled = modal.querySelector('#stt-enabled').checked;
- const method = modal.querySelector('#stt-method-select').value;
- state.ttsVoice = voice;
- state.ttsEnabled = ttsEnabled;
- state.sttEnabled = sttEnabled;
- state.sttMethod = method;
- localStorage.setItem(CONFIG.storageKeys.ttsVoice, voice);
- localStorage.setItem(CONFIG.storageKeys.ttsEnabled, ttsEnabled);
- localStorage.setItem(CONFIG.storageKeys.sttEnabled, sttEnabled);
- localStorage.setItem(CONFIG.storageKeys.sttMethod, method);
- updateControlsVisibility();
- modal.remove();
- });
- document.body.appendChild(modal);
- };
- const updateControlsVisibility = () => {
- const stt = document.querySelector('.t3-stt-btn');
- if (!stt) return;
- stt.style.display = state.sttEnabled ? 'flex' : 'none';
- stt.classList.toggle('disabled', !state.apiKey);
- };
- const addControlsToInput = () => {
- const container = findInputContainer();
- if (!container || container.querySelector('.t3-settings-btn')) return;
- const sendBtn =
- container.querySelector(SELECTORS.sendButton) ||
- container.querySelector('button[type="submit"]') ||
- container.querySelector('button[aria-label*="send" i]');
- const settingsBtn = createSettingsButton();
- if (sendBtn) container.insertBefore(settingsBtn, sendBtn);
- else container.appendChild(settingsBtn);
- if (state.sttEnabled) {
- const sttBtn = createSTTButton();
- sendBtn ? container.insertBefore(sttBtn, sendBtn) : container.appendChild(sttBtn);
- }
- updateControlsVisibility();
- };
- const processMessage = (msg) => {
- const content = msg.querySelector(SELECTORS.messageContent);
- if (!content || !content.textContent.trim() || !state.ttsEnabled) return;
- let actions =
- msg.parentElement?.querySelector(SELECTORS.messageActionsContainer) ||
- msg.querySelector(SELECTORS.messageActionsContainer);
- if (!actions) actions = msg.parentElement?.querySelector('div[class*="absolute"][class*="flex"]');
- if (!actions || actions.querySelector('button[aria-label="Speak message"]')) return;
- const speakBtn = createMessageSpeakButton(content);
- const genTxt = actions.querySelector('span[class*="select-none"]');
- if (genTxt) actions.insertBefore(speakBtn, genTxt);
- else {
- const first = actions.querySelector('button');
- first?.nextSibling ? actions.insertBefore(speakBtn, first.nextSibling) : actions.appendChild(speakBtn);
- }
- msg.setAttribute('data-tts-added', 'true');
- };
- const addTTSToMessages = () => {
- document
- .querySelectorAll(`${SELECTORS.messageContainer}:not([data-tts-added])`)
- .forEach(processMessage);
- };
- const initialize = () => {
- injectStyles();
- addControlsToInput();
- addTTSToMessages();
- new MutationObserver(() => {
- addControlsToInput();
- addTTSToMessages();
- }).observe(document.documentElement, { childList: true, subtree: true });
- setTimeout(addTTSToMessages, 2000);
- };
- document.readyState === 'loading'
- ? document.addEventListener('DOMContentLoaded', initialize)
- : initialize();
- })();