Add model field parameter for generic OpenAI TTS (#4171)

resolves #4165
This commit is contained in:
Timothy Carambat
2025-07-17 10:28:51 -07:00
committed by GitHub
parent ff34c8cefc
commit 043cb1d085
6 changed files with 40 additions and 4 deletions

View File

@@ -255,6 +255,7 @@ GID='1000'
# TTS_PROVIDER="generic-openai"
# TTS_OPEN_AI_COMPATIBLE_KEY=sk-example
# TTS_OPEN_AI_COMPATIBLE_MODEL=tts-1
# TTS_OPEN_AI_COMPATIBLE_VOICE_MODEL=nova
# TTS_OPEN_AI_COMPATIBLE_ENDPOINT="https://api.openai.com/v1"

View File

@@ -5,7 +5,7 @@ export default function OpenAiGenericTextToSpeechOptions({ settings }) {
<div className="w-full flex flex-col gap-y-7">
<div className="flex gap-x-4">
<div className="flex flex-col w-60">
<div className="flex justify-between items-center mb-2">
<div className="flex justify-between items-start mb-2">
<label className="text-white text-sm font-semibold">Base URL</label>
</div>
<input
@@ -23,9 +23,8 @@ export default function OpenAiGenericTextToSpeechOptions({ settings }) {
will generate TTS responses from.
</p>
</div>
<div className="flex flex-col w-60">
<label className="text-white text-sm font-semibold block mb-3">
<label className="text-white text-sm font-semibold block mb-2">
API Key
</label>
<input
@@ -44,6 +43,28 @@ export default function OpenAiGenericTextToSpeechOptions({ settings }) {
this is optional if your service does not require one.
</p>
</div>
</div>
<div className="flex gap-x-4">
<div className="flex flex-col w-60">
<label className="text-white text-sm font-semibold block mb-3">
TTS Model
</label>
<input
type="text"
name="TTSOpenAICompatibleModel"
className="border-none bg-theme-settings-input-bg text-white placeholder:text-theme-settings-input-placeholder text-sm rounded-lg focus:outline-primary-button active:outline-primary-button outline-none block w-full p-2.5"
placeholder="Your TTS model identifier"
defaultValue={settings?.TTSOpenAICompatibleModel}
required={true}
autoComplete="off"
spellCheck={false}
/>
<p className="text-xs leading-[18px] font-base text-white text-opacity-60 mt-2">
Most TTS services will have several models available. This is the{" "}
<code>model</code> parameter you will use to select the model you
want to use. Note: This is not the same as the voice model.
</p>
</div>
<div className="flex flex-col w-60">
<label className="text-white text-sm font-semibold block mb-3">
Voice Model

View File

@@ -257,6 +257,7 @@ TTS_PROVIDER="native"
# TTS_PROVIDER="generic-openai"
# TTS_OPEN_AI_COMPATIBLE_KEY=sk-example
# TTS_OPEN_AI_COMPATIBLE_MODEL=tts-1
# TTS_OPEN_AI_COMPATIBLE_VOICE_MODEL=nova
# TTS_OPEN_AI_COMPATIBLE_ENDPOINT="https://api.openai.com/v1"

View File

@@ -256,6 +256,7 @@ const SystemSettings = {
process.env.TTS_PIPER_VOICE_MODEL ?? "en_US-hfc_female-medium",
// OpenAI Generic TTS
TTSOpenAICompatibleKey: !!process.env.TTS_OPEN_AI_COMPATIBLE_KEY,
TTSOpenAICompatibleModel: process.env.TTS_OPEN_AI_COMPATIBLE_MODEL,
TTSOpenAICompatibleVoiceModel:
process.env.TTS_OPEN_AI_COMPATIBLE_VOICE_MODEL,
TTSOpenAICompatibleEndpoint: process.env.TTS_OPEN_AI_COMPATIBLE_ENDPOINT,

View File

@@ -4,6 +4,10 @@ class GenericOpenAiTTS {
this.#log(
"No OpenAI compatible API key was set. You might need to set this to use your OpenAI compatible TTS service."
);
if (!process.env.TTS_OPEN_AI_COMPATIBLE_MODEL)
this.#log(
"No OpenAI compatible TTS model was set. We will use the default voice model 'tts-1'. This may not exist or be valid your selected endpoint."
);
if (!process.env.TTS_OPEN_AI_COMPATIBLE_VOICE_MODEL)
this.#log(
"No OpenAI compatible voice model was set. We will use the default voice model 'alloy'. This may not exist for your selected endpoint."
@@ -18,7 +22,11 @@ class GenericOpenAiTTS {
apiKey: process.env.TTS_OPEN_AI_COMPATIBLE_KEY || null,
baseURL: process.env.TTS_OPEN_AI_COMPATIBLE_ENDPOINT,
});
this.model = process.env.TTS_OPEN_AI_COMPATIBLE_MODEL ?? "tts-1";
this.voice = process.env.TTS_OPEN_AI_COMPATIBLE_VOICE_MODEL ?? "alloy";
this.#log(
`Service (${process.env.TTS_OPEN_AI_COMPATIBLE_ENDPOINT}) with model: ${this.model} and voice: ${this.voice}`
);
}
#log(text, ...args) {
@@ -33,7 +41,7 @@ class GenericOpenAiTTS {
async ttsBuffer(textInput) {
try {
const result = await this.openai.audio.speech.create({
model: "tts-1",
model: this.model,
voice: this.voice,
input: textInput,
});

View File

@@ -601,6 +601,10 @@ const KEY_MAPPING = {
envKey: "TTS_OPEN_AI_COMPATIBLE_KEY",
checks: [],
},
TTSOpenAICompatibleModel: {
envKey: "TTS_OPEN_AI_COMPATIBLE_MODEL",
checks: [],
},
TTSOpenAICompatibleVoiceModel: {
envKey: "TTS_OPEN_AI_COMPATIBLE_VOICE_MODEL",
checks: [isNotEmpty],