T3Chat OpenAI TTS & STT

Adds OpenAI text-to-speech and speech-to-text to T3Chat

За да инсталирате този скрипт, трябва да имате инсталирано разширение като Tampermonkey, Greasemonkey или Violentmonkey.

За да инсталирате този скрипт, трябва да имате инсталирано разширение като Tampermonkey или Violentmonkey.

За да инсталирате този скрипт, трябва да имате инсталирано разширение като Tampermonkey или Violentmonkey.

За да инсталирате този скрипт, трябва да имате инсталирано разширение като Tampermonkey или Userscripts.

За да инсталирате скрипта, трябва да инсталирате разширение като Tampermonkey.

За да инсталирате този скрипт, трябва да имате инсталиран скриптов мениджър.

(Вече имам скриптов мениджър, искам да го инсталирам!)

За да инсталирате този стил, трябва да инсталирате разширение като Stylus.

За да инсталирате този стил, трябва да инсталирате разширение като Stylus.

За да инсталирате този стил, трябва да инсталирате разширение като Stylus.

За да инсталирате този стил, трябва да имате инсталиран мениджър на потребителски стилове.

За да инсталирате този стил, трябва да имате инсталиран мениджър на потребителски стилове.

За да инсталирате този стил, трябва да имате инсталиран мениджър на потребителски стилове.

(Вече имам инсталиран мениджър на стиловете, искам да го инсталирам!)

// ==UserScript==
// @name         T3Chat OpenAI TTS & STT
// @namespace    https://github.com/cameron/t3chat-userscripts
// @version      0.1.2
// @description  Adds OpenAI text-to-speech and speech-to-text to T3Chat
// @match        https://t3.chat/*
// @match        https://*.t3.chat/*
// @run-at       document-idle
// @grant        none
// @license      MIT
// ==/UserScript==

(() => {
  'use strict';

  const CONFIG = {
    apiBaseUrl: 'https://api.openai.com/v1',
    ttsModel: 'tts-1',
    ttsVoice: 'alloy',
    sttModel: 'whisper-1',
    maxRecordingTime: 60000,
    currentVersion: '0.1.2',
    storageKeys: {
      t3chatApiKey: 'apikey:openai',
      ttsEnabled: 't3chat-tts-enabled',
      sttEnabled: 't3chat-stt-enabled',
      ttsVoice: 't3chat-tts-voice',
      sttMethod: 't3chat-stt-method',
      version: 't3chat-tts-stt-version'
    }
  };

  if (localStorage.getItem(CONFIG.storageKeys.version) !== CONFIG.currentVersion) {
    localStorage.removeItem(CONFIG.storageKeys.sttMethod);
    localStorage.setItem(CONFIG.storageKeys.version, CONFIG.currentVersion);
  }

  const SELECTORS = {
    chatInput: [
      '#chat-input',
      'textarea[aria-describedby="chat-input-description"]',
      'textarea[placeholder*="message"]',
      'textarea[data-testid="chat-input"]'
    ],
    messageContainer: '[role="article"], .message, div[class*="message"]',
    messageContent: '.prose, .message-content, div[class*="prose"], p, div[class*="text"]',
    messageActionsContainer:
      'div[class*="absolute"][class*="flex"][class*="items-center"][class*="gap"], div.absolute.left-0[class*="-ml-0"][class*="mt-2"], div.absolute.right-0[class*="mt-"]',
    sendButton: 'button[type="submit"][aria-label*="Message"], button[aria-label*="send" i]'
  };

  const getT3ChatApiKey = () => {
    const key = localStorage.getItem(CONFIG.storageKeys.t3chatApiKey);
    return key?.startsWith('sk-') ? key : null;
  };

  const state = {
    get apiKey() {
      return getT3ChatApiKey();
    },
    ttsEnabled: localStorage.getItem(CONFIG.storageKeys.ttsEnabled) !== 'false',
    sttEnabled: localStorage.getItem(CONFIG.storageKeys.sttEnabled) !== 'false',
    sttMethod: localStorage.getItem(CONFIG.storageKeys.sttMethod) || 'openai',
    ttsVoice: localStorage.getItem(CONFIG.storageKeys.ttsVoice) || CONFIG.ttsVoice,
    isRecording: false,
    mediaRecorder: null,
    audioChunks: [],
    currentAudio: null,
    recordingMimeType: '',
    speechRecognition: null
  };

  if (localStorage.getItem(CONFIG.storageKeys.ttsEnabled) === null) {
    localStorage.setItem(CONFIG.storageKeys.ttsEnabled, 'true');
    state.ttsEnabled = true;
  }
  if (localStorage.getItem(CONFIG.storageKeys.sttEnabled) === null) {
    localStorage.setItem(CONFIG.storageKeys.sttEnabled, 'true');
    state.sttEnabled = true;
  }

  const findChatInput = () =>
    SELECTORS.chatInput
      .map((s) => document.querySelector(s))
      .find((el) => el && el.tagName === 'TEXTAREA');

  const findInputContainer = () => {
    const input = findChatInput();
    if (!input) return null;
    const sendBtn =
      document.querySelector(SELECTORS.sendButton) ||
      input.parentElement?.querySelector('button[type="submit"]') ||
      input.parentElement?.querySelector('button[aria-label*="send" i]');
    return sendBtn ? sendBtn.parentElement : input.closest('div[class*="flex"]') || input.parentElement;
  };

  const injectStyles = () => {
    if (document.querySelector('#t3chat-tts-stt-styles')) return;
    const style = document.createElement('style');
    style.id = 't3chat-tts-stt-styles';
    style.textContent = `
      .t3-tts-btn,.t3-stt-btn,.t3-settings-btn{
        display:flex;align-items:center;justify-content:center;width:32px;height:32px;border:1px solid hsl(var(--border));
        border-radius:6px;background:hsl(var(--background));color:hsl(var(--foreground));cursor:pointer;
        transition:all .2s ease;position:relative;flex-shrink:0
      }
      .t3-tts-btn:hover,.t3-stt-btn:hover,.t3-settings-btn:hover{background:hsl(var(--muted));border-color:hsl(var(--ring))}
      .t3-stt-btn.recording{background:#ef4444;color:#fff;animation:pulse 1s infinite}
      .t3-tts-btn.speaking{background:#3b82f6;color:#fff}
      .t3-tts-btn.disabled,.t3-stt-btn.disabled{opacity:.5;cursor:not-allowed}
      @keyframes pulse{0%,100%{opacity:1}50%{opacity:.7}}
      .t3-tooltip{position:absolute;bottom:100%;left:50%;transform:translateX(-50%);background:hsl(var(--foreground));
        color:hsl(var(--background));padding:4px 8px;border-radius:4px;font-size:12px;white-space:nowrap;opacity:0;
        pointer-events:none;transition:opacity .2s ease;margin-bottom:4px;z-index:1000}
      .t3-stt-btn:hover .t3-tooltip,.t3-settings-btn:hover .t3-tooltip{opacity:1}
      button[aria-label="Speak message"].speaking{background:#3b82f6!important;color:#fff!important}
      button[aria-label="Speak message"]{width:32px!important;height:32px!important;min-width:32px!important;min-height:32px!important;
        display:flex!important;align-items:center!important;justify-content:center!important}
      button[aria-label="Speak message"] .relative,button[aria-label="Speak message"] svg{width:24px!important;height:24px!important}
    `;
    document.head.appendChild(style);
  };

  const callOpenAI = async (endpoint, data, options = {}) => {
    if (!state.apiKey) throw new Error('OpenAI API key not configured');
    const res = await fetch(`${CONFIG.apiBaseUrl}${endpoint}`, {
      method: 'POST',
      headers: {
        Authorization: `Bearer ${state.apiKey}`,
        'Content-Type': 'application/json',
        ...options.headers
      },
      body: JSON.stringify(data),
      ...options
    });
    if (!res.ok) {
      const err = await res.json().catch(() => ({ error: { message: `HTTP ${res.status}` } }));
      throw new Error(err.error?.message || `HTTP ${res.status}`);
    }
    return res;
  };

  const textToSpeech = async (text) => {
    const res = await callOpenAI('/audio/speech', {
      model: CONFIG.ttsModel,
      voice: state.ttsVoice,
      input: text.slice(0, 4096)
    });
    const blob = await res.blob();
    const url = URL.createObjectURL(blob);
    if (state.currentAudio) {
      state.currentAudio.pause();
      URL.revokeObjectURL(state.currentAudio.src);
    }
    state.currentAudio = new Audio(url);
    return state.currentAudio;
  };

  const speechToText = async (blob) => {
    const mime = blob.type.toLowerCase();
    const ext =
      mime.includes('wav')
        ? 'wav'
        : mime.includes('mp4')
        ? 'mp4'
        : mime.includes('mp3')
        ? 'mp3'
        : mime.includes('ogg')
        ? 'ogg'
        : 'webm';

    const form = new FormData();
    form.append('file', blob, `audio.${ext}`);
    form.append('model', CONFIG.sttModel);

    const res = await fetch(`${CONFIG.apiBaseUrl}/audio/transcriptions`, {
      method: 'POST',
      headers: { Authorization: `Bearer ${state.apiKey}` },
      body: form
    });
    if (!res.ok) {
      const txt = await res.text();
      throw new Error(`STT failed: ${txt}`);
    }
    const json = await res.json();
    return json.text;
  };

  const initSpeechRecognition = () => {
    const SR = window.SpeechRecognition || window.webkitSpeechRecognition;
    if (!SR) return null;
    const rec = new SR();
    rec.continuous = false;
    rec.interimResults = false;
    rec.maxAlternatives = 1;
    rec.lang = 'en-US';

    rec.onstart = () => {
      state.isRecording = true;
      updateSTTButton();
    };
    rec.onresult = (e) => {
      const txt = e.results[0][0].transcript;
      const input = findChatInput();
      if (input && txt.trim()) {
        input.value = (input.value + ' ' + txt).trim();
        input.dispatchEvent(new Event('input', { bubbles: true }));
        input.focus();
      }
    };
    rec.onerror = rec.onend = () => {
      state.isRecording = false;
      updateSTTButton();
    };
    return rec;
  };

  const startRecording = async () => {
    if (state.sttMethod === 'browser') return startBrowserSpeechRecognition();
    try {
      const stream = await navigator.mediaDevices.getUserMedia({ audio: true });
      const types = [
        'audio/wav',
        'audio/mp4',
        'audio/webm;codecs=opus',
        'audio/webm',
        'audio/ogg;codecs=opus',
        'audio/mp3'
      ];
      const type = types.find((t) => MediaRecorder.isTypeSupported(t)) || '';
      if (!type) throw new Error('No supported audio MIME type found');

      state.mediaRecorder = new MediaRecorder(stream, { mimeType: type });
      state.audioChunks = [];
      state.recordingMimeType = type;

      state.mediaRecorder.ondataavailable = (e) => e.data.size && state.audioChunks.push(e.data);
      state.mediaRecorder.onstop = async () => {
        const blob = new Blob(state.audioChunks, { type: state.recordingMimeType });
        try {
          const txt = await speechToText(blob);
          const input = findChatInput();
          if (input && txt.trim()) {
            input.value = (input.value + ' ' + txt).trim();
            input.dispatchEvent(new Event('input', { bubbles: true }));
            input.focus();
          }
        } finally {
          stream.getTracks().forEach((t) => t.stop());
          state.isRecording = false;
          updateSTTButton();
        }
      };
      state.mediaRecorder.start();
      state.isRecording = true;
      updateSTTButton();
      setTimeout(() => state.isRecording && stopRecording(), CONFIG.maxRecordingTime);
    } catch (err) {}
  };

  const startBrowserSpeechRecognition = () => {
    if (!state.speechRecognition) state.speechRecognition = initSpeechRecognition();
    state.speechRecognition?.start();
  };

  const stopRecording = () => {
    if (state.sttMethod === 'browser') {
      state.speechRecognition?.stop();
    } else {
      state.mediaRecorder?.stop();
    }
  };

  const createButton = (cls, svg, tooltip) => {
    const btn = document.createElement('button');
    btn.className = cls;
    btn.innerHTML = `${svg}<div class="t3-tooltip">${tooltip}</div>`;
    return btn;
  };

  const createTTSButton = () => {
    const svg =
      '<svg width="16" height="16" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round"><polygon points="11 5,6 9,2 9,2 15,6 15,11 19,11 5"></polygon><path d="M15.54 8.46a5 5 0 0 1 0 7.07"></path><path d="M19.07 4.93a10 10 0 0 1 0 14.14"></path></svg>';
    const btn = createButton('t3-tts-btn', svg, 'Text to Speech');
    btn.addEventListener('click', async () => {
      const input = findChatInput();
      if (input?.value.trim()) await speakText(input.value.trim());
    });
    return btn;
  };

  const createSTTButton = () => {
    const svg =
      '<svg width="16" height="16" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round"><path d="M12 2a3 3 0 0 0-3 3v7a3 3 0 0 0 6 0V5a3 3 0 0 0-3-3Z"></path><path d="M19 10v2a7 7 0 0 1-14 0v-2"></path><line x1="12" x2="12" y1="19" y2="22"></line><line x1="8" x2="16" y1="22" y2="22"></line></svg>';
    const btn = createButton('t3-stt-btn', svg, 'Speech to Text');
    btn.addEventListener('click', () => (state.isRecording ? stopRecording() : startRecording()));
    return btn;
  };

  const createSettingsButton = () => {
    const svg =
      '<svg width="16" height="16" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round"><path d="M12.22 2h-.44a2 2 0 0 0-2 2v.18a2 2 0 0 1-1 1.73l-.43.25a2 2 0 0 1-2 0l-.15-.08a2 2 0 0 0-2.73.73l-.22.38a2 2 0 0 0 .73 2.73l.15.1a2 2 0 0 1 1 1.72v.51a2 2 0 0 1-1 1.74l-.15.09a2 2 0 0 0-.73 2.73l.22.38a2 2 0 0 0 2.73.73l.15-.08a2 2 0 0 1 2 0l.43.25a2 2 0 0 1 1 1.73V20a2 2 0 0 0 2 2h.44a2 2 0 0 0 2-2v-.18a2 2 0 0 1 1-1.73l.43-.25a2 2 0 0 1 2 0l.15.08a2 2 0 0 0 2.73-.73l.22-.39a2 2 0 0 0-.73-2.73l-.15-.08a2 2 0 0 1-1-1.74v-.5a2 2 0 0 1 1-1.74l.15-.09a2 2 0 0 0 .73-2.73l-.22-.38a2 2 0 0 0-2.73-.73l-.15.08a2 2 0 0 1-2 0l-.43-.25a2 2 0 0 1-1-1.73V4a2 2 0 0 0-2-2z"></path><circle cx="12" cy="12" r="3"></circle></svg>';
    const btn = createButton('t3-settings-btn', svg, 'TTS/STT Settings');
    btn.addEventListener('click', showSettingsModal);
    return btn;
  };

  const createMessageSpeakButton = (msg) => {
    const btn = document.createElement('button');
    btn.className =
      'inline-flex items-center justify-center text-xs rounded-lg p-0 hover:bg-muted/40';
    btn.setAttribute('aria-label', 'Speak message');
    btn.innerHTML =
      '<div class="relative" style="width:24px;height:24px"><svg xmlns="http://www.w3.org/2000/svg" width="24" height="24" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="1.5" stroke-linecap="round" stroke-linejoin="round"><polygon points="11 5,6 9,2 9,2 15,6 15,11 19,11 5"></polygon><path d="M15.54 8.46a5 5 0 0 1 0 7.07"></path></svg></div>';
    btn.addEventListener('click', () => {
      const text = msg.textContent.trim();
      if (!text) return;
      btn.classList.add('speaking');
      speakText(text).finally(() => btn.classList.remove('speaking'));
    });
    return btn;
  };

  const speakText = async (txt) => {
    try {
      const audio = await textToSpeech(txt);
      await audio.play();
    } catch (err) {}
  };

  const updateSTTButton = () => {
    const btn = document.querySelector('.t3-stt-btn');
    if (!btn) return;
    btn.classList.toggle('recording', state.isRecording);
    const tip = btn.querySelector('.t3-tooltip');
    if (tip) tip.textContent = state.isRecording ? 'Stop Recording' : 'Speech to Text';
  };

  const showSettingsModal = () => {
    const hasKey = !!state.apiKey;
    const modal = document.createElement('div');
    modal.className = 't3-settings-modal';
    modal.innerHTML = `
      <style>
        .t3-settings-modal{position:fixed;inset:0;background:rgba(0,0,0,.5);display:flex;align-items:center;justify-content:center;z-index:10000}
        .t3-settings-content{background:hsl(var(--background));border:1px solid hsl(var(--border));border-radius:8px;padding:24px;min-width:400px;max-width:500px}
        .t3-settings-title{font-size:18px;font-weight:600;margin-bottom:16px;color:hsl(var(--foreground))}
        .t3-form-group{margin-bottom:16px}
        .t3-form-label{display:block;font-size:14px;font-weight:500;margin-bottom:4px;color:hsl(var(--foreground))}
        .t3-form-select,.t3-form-input{width:100%;padding:8px 12px;border:1px solid hsl(var(--border));border-radius:6px;background:hsl(var(--background));color:hsl(var(--foreground));font-size:14px}
        .t3-form-checkbox{display:flex;align-items:center;gap:8px}
        .t3-button-group{display:flex;gap:8px;justify-content:flex-end;margin-top:20px}
        .t3-btn{padding:8px 16px;border-radius:6px;border:1px solid hsl(var(--border));background:hsl(var(--background));color:hsl(var(--foreground));cursor:pointer;font-size:14px;transition:all .2s ease}
        .t3-btn:hover{background:hsl(var(--muted))}
        .t3-btn.primary{background:hsl(var(--primary));color:hsl(var(--primary-foreground));border-color:hsl(var(--primary))}
        .t3-btn.primary:hover{opacity:.9}
        .t3-api-key-status{padding:12px;border-radius:6px;background:hsl(var(--muted));border:1px solid hsl(var(--border))}
        .t3-api-status{font-weight:500;margin-top:4px}
        .t3-api-status.connected{color:#22c55e}
        .t3-api-status.disconnected{color:#ef4444}
        .t3-form-help{font-size:12px;color:hsl(var(--muted-foreground));margin-top:8px}
      </style>
      <div class="t3-settings-content">
        <div class="t3-settings-title">TTS & STT Settings</div>
        <div class="t3-form-group">
          <div class="t3-api-key-status">
            <div class="t3-form-label">OpenAI API Key Status</div>
            <div class="t3-api-status ${hasKey ? 'connected' : 'disconnected'}">
              ${hasKey ? '✅ Connected' : '❌ Not configured'}
            </div>
            ${hasKey ? '' : '<p class="t3-form-help">Add your OpenAI key in T3Chat settings.</p>'}
          </div>
        </div>
        <div class="t3-form-group">
          <label class="t3-form-label">STT Method</label>
          <select class="t3-form-select" id="stt-method-select">
            <option value="browser" ${state.sttMethod === 'browser' ? 'selected' : ''}>Browser</option>
            <option value="openai" ${state.sttMethod === 'openai' ? 'selected' : ''} ${!hasKey ? 'disabled' : ''}>OpenAI Whisper</option>
          </select>
        </div>
        <div class="t3-form-group">
          <label class="t3-form-label">TTS Voice</label>
          <select class="t3-form-select" id="voice-select" ${!hasKey ? 'disabled' : ''}>
            ${['alloy', 'echo', 'fable', 'onyx', 'nova', 'shimmer']
              .map((v) => `<option value="${v}" ${state.ttsVoice === v ? 'selected' : ''}>${v[0].toUpperCase() + v.slice(1)}</option>`)
              .join('')}
          </select>
        </div>
        <div class="t3-form-group">
          <label class="t3-form-checkbox"><input type="checkbox" id="tts-enabled" ${state.ttsEnabled ? 'checked' : ''}><span>Enable Text-to-Speech</span></label>
        </div>
        <div class="t3-form-group">
          <label class="t3-form-checkbox"><input type="checkbox" id="stt-enabled" ${state.sttEnabled ? 'checked' : ''}><span>Enable Speech-to-Text</span></label>
        </div>
        <div class="t3-button-group">
          <button class="t3-btn" id="cancel-settings">Cancel</button>
          <button class="t3-btn primary" id="save-settings">Save</button>
        </div>
      </div>`;
    modal.addEventListener('click', (e) => e.target === modal && modal.remove());
    modal.querySelector('#cancel-settings').addEventListener('click', () => modal.remove());
    modal.querySelector('#save-settings').addEventListener('click', () => {
      const voice = modal.querySelector('#voice-select').value;
      const ttsEnabled = modal.querySelector('#tts-enabled').checked;
      const sttEnabled = modal.querySelector('#stt-enabled').checked;
      const method = modal.querySelector('#stt-method-select').value;
      state.ttsVoice = voice;
      state.ttsEnabled = ttsEnabled;
      state.sttEnabled = sttEnabled;
      state.sttMethod = method;
      localStorage.setItem(CONFIG.storageKeys.ttsVoice, voice);
      localStorage.setItem(CONFIG.storageKeys.ttsEnabled, ttsEnabled);
      localStorage.setItem(CONFIG.storageKeys.sttEnabled, sttEnabled);
      localStorage.setItem(CONFIG.storageKeys.sttMethod, method);
      updateControlsVisibility();
      modal.remove();
    });
    document.body.appendChild(modal);
  };

  const updateControlsVisibility = () => {
    const stt = document.querySelector('.t3-stt-btn');
    if (!stt) return;
    stt.style.display = state.sttEnabled ? 'flex' : 'none';
    stt.classList.toggle('disabled', !state.apiKey);
  };

  const addControlsToInput = () => {
    const container = findInputContainer();
    if (!container || container.querySelector('.t3-settings-btn')) return;
    const sendBtn =
      container.querySelector(SELECTORS.sendButton) ||
      container.querySelector('button[type="submit"]') ||
      container.querySelector('button[aria-label*="send" i]');

    const settingsBtn = createSettingsButton();
    if (sendBtn) container.insertBefore(settingsBtn, sendBtn);
    else container.appendChild(settingsBtn);

    if (state.sttEnabled) {
      const sttBtn = createSTTButton();
      sendBtn ? container.insertBefore(sttBtn, sendBtn) : container.appendChild(sttBtn);
    }
    updateControlsVisibility();
  };

  const processMessage = (msg) => {
    const content = msg.querySelector(SELECTORS.messageContent);
    if (!content || !content.textContent.trim() || !state.ttsEnabled) return;
    let actions =
      msg.parentElement?.querySelector(SELECTORS.messageActionsContainer) ||
      msg.querySelector(SELECTORS.messageActionsContainer);
    if (!actions) actions = msg.parentElement?.querySelector('div[class*="absolute"][class*="flex"]');
    if (!actions || actions.querySelector('button[aria-label="Speak message"]')) return;
    const speakBtn = createMessageSpeakButton(content);
    const genTxt = actions.querySelector('span[class*="select-none"]');
    if (genTxt) actions.insertBefore(speakBtn, genTxt);
    else {
      const first = actions.querySelector('button');
      first?.nextSibling ? actions.insertBefore(speakBtn, first.nextSibling) : actions.appendChild(speakBtn);
    }
    msg.setAttribute('data-tts-added', 'true');
  };

  const addTTSToMessages = () => {
    document
      .querySelectorAll(`${SELECTORS.messageContainer}:not([data-tts-added])`)
      .forEach(processMessage);
  };

  const initialize = () => {
    injectStyles();
    addControlsToInput();
    addTTSToMessages();
    new MutationObserver(() => {
      addControlsToInput();
      addTTSToMessages();
    }).observe(document.documentElement, { childList: true, subtree: true });
    setTimeout(addTTSToMessages, 2000);
  };

  document.readyState === 'loading'
    ? document.addEventListener('DOMContentLoaded', initialize)
    : initialize();
})();