Add model field parameter for generic OpenAI TTS (#4171)

resolves #4165
2026-04-25 17:15:37 +02:00 · 2025-07-17 10:28:51 -07:00
parent ff34c8cefc
commit 043cb1d085
6 changed files with 40 additions and 4 deletions
--- a/docker/.env.example
+++ b/docker/.env.example
@@ -255,6 +255,7 @@ GID='1000'

 # TTS_PROVIDER="generic-openai"
 # TTS_OPEN_AI_COMPATIBLE_KEY=sk-example
+# TTS_OPEN_AI_COMPATIBLE_MODEL=tts-1
 # TTS_OPEN_AI_COMPATIBLE_VOICE_MODEL=nova
 # TTS_OPEN_AI_COMPATIBLE_ENDPOINT="https://api.openai.com/v1"

--- a/frontend/src/components/TextToSpeech/OpenAiGenericOptions/index.jsx
+++ b/frontend/src/components/TextToSpeech/OpenAiGenericOptions/index.jsx
@@ -5,7 +5,7 @@ export default function OpenAiGenericTextToSpeechOptions({ settings }) {
    <div className="w-full flex flex-col gap-y-7">
      <div className="flex gap-x-4">
        <div className="flex flex-col w-60">
-          <div className="flex justify-between items-center mb-2">
+          <div className="flex justify-between items-start mb-2">
            <label className="text-white text-sm font-semibold">Base URL</label>
          </div>
          <input
@@ -23,9 +23,8 @@ export default function OpenAiGenericTextToSpeechOptions({ settings }) {
            will generate TTS responses from.
          </p>
        </div>
-
        <div className="flex flex-col w-60">
-          <label className="text-white text-sm font-semibold block mb-3">
+          <label className="text-white text-sm font-semibold block mb-2">
            API Key
          </label>
          <input
@@ -44,6 +43,28 @@ export default function OpenAiGenericTextToSpeechOptions({ settings }) {
            this is optional if your service does not require one.
          </p>
        </div>
+      </div>
+      <div className="flex gap-x-4">
+        <div className="flex flex-col w-60">
+          <label className="text-white text-sm font-semibold block mb-3">
+            TTS Model
+          </label>
+          <input
+            type="text"
+            name="TTSOpenAICompatibleModel"
+            className="border-none bg-theme-settings-input-bg text-white placeholder:text-theme-settings-input-placeholder text-sm rounded-lg focus:outline-primary-button active:outline-primary-button outline-none block w-full p-2.5"
+            placeholder="Your TTS model identifier"
+            defaultValue={settings?.TTSOpenAICompatibleModel}
+            required={true}
+            autoComplete="off"
+            spellCheck={false}
+          />
+          <p className="text-xs leading-[18px] font-base text-white text-opacity-60 mt-2">
+            Most TTS services will have several models available. This is the{" "}
+            <code>model</code> parameter you will use to select the model you
+            want to use. Note: This is not the same as the voice model.
+          </p>
+        </div>
        <div className="flex flex-col w-60">
          <label className="text-white text-sm font-semibold block mb-3">
            Voice Model
--- a/server/.env.example
+++ b/server/.env.example
@@ -257,6 +257,7 @@ TTS_PROVIDER="native"

 # TTS_PROVIDER="generic-openai"
 # TTS_OPEN_AI_COMPATIBLE_KEY=sk-example
+# TTS_OPEN_AI_COMPATIBLE_MODEL=tts-1
 # TTS_OPEN_AI_COMPATIBLE_VOICE_MODEL=nova
 # TTS_OPEN_AI_COMPATIBLE_ENDPOINT="https://api.openai.com/v1"

--- a/server/models/systemSettings.js
+++ b/server/models/systemSettings.js
@@ -256,6 +256,7 @@ const SystemSettings = {
        process.env.TTS_PIPER_VOICE_MODEL ?? "en_US-hfc_female-medium",
      // OpenAI Generic TTS
      TTSOpenAICompatibleKey: !!process.env.TTS_OPEN_AI_COMPATIBLE_KEY,
+      TTSOpenAICompatibleModel: process.env.TTS_OPEN_AI_COMPATIBLE_MODEL,
      TTSOpenAICompatibleVoiceModel:
        process.env.TTS_OPEN_AI_COMPATIBLE_VOICE_MODEL,
      TTSOpenAICompatibleEndpoint: process.env.TTS_OPEN_AI_COMPATIBLE_ENDPOINT,
--- a/server/utils/TextToSpeech/openAiGeneric/index.js
+++ b/server/utils/TextToSpeech/openAiGeneric/index.js
@@ -4,6 +4,10 @@ class GenericOpenAiTTS {
      this.#log(
        "No OpenAI compatible API key was set. You might need to set this to use your OpenAI compatible TTS service."
      );
+    if (!process.env.TTS_OPEN_AI_COMPATIBLE_MODEL)
+      this.#log(
+        "No OpenAI compatible TTS model was set. We will use the default voice model 'tts-1'. This may not exist or be valid your selected endpoint."
+      );
    if (!process.env.TTS_OPEN_AI_COMPATIBLE_VOICE_MODEL)
      this.#log(
        "No OpenAI compatible voice model was set. We will use the default voice model 'alloy'. This may not exist for your selected endpoint."
@@ -18,7 +22,11 @@ class GenericOpenAiTTS {
      apiKey: process.env.TTS_OPEN_AI_COMPATIBLE_KEY || null,
      baseURL: process.env.TTS_OPEN_AI_COMPATIBLE_ENDPOINT,
    });
+    this.model = process.env.TTS_OPEN_AI_COMPATIBLE_MODEL ?? "tts-1";
    this.voice = process.env.TTS_OPEN_AI_COMPATIBLE_VOICE_MODEL ?? "alloy";
+    this.#log(
+      `Service (${process.env.TTS_OPEN_AI_COMPATIBLE_ENDPOINT}) with model: ${this.model} and voice: ${this.voice}`
+    );
  }

  #log(text, ...args) {
@@ -33,7 +41,7 @@ class GenericOpenAiTTS {
  async ttsBuffer(textInput) {
    try {
      const result = await this.openai.audio.speech.create({
-        model: "tts-1",
+        model: this.model,
        voice: this.voice,
        input: textInput,
      });
--- a/server/utils/helpers/updateENV.js
+++ b/server/utils/helpers/updateENV.js
@@ -601,6 +601,10 @@ const KEY_MAPPING = {
    envKey: "TTS_OPEN_AI_COMPATIBLE_KEY",
    checks: [],
  },
+  TTSOpenAICompatibleModel: {
+    envKey: "TTS_OPEN_AI_COMPATIBLE_MODEL",
+    checks: [],
+  },
  TTSOpenAICompatibleVoiceModel: {
    envKey: "TTS_OPEN_AI_COMPATIBLE_VOICE_MODEL",
    checks: [isNotEmpty],