diff --git a/extras/translator/index.mjs b/extras/translator/index.mjs index f3111ce07..478384c29 100644 --- a/extras/translator/index.mjs +++ b/extras/translator/index.mjs @@ -51,6 +51,36 @@ function restorePlaceholders(text, placeholders) { }); } +/** + * Extract Trans component tags like , , , , etc. + * These are used by react-i18next Trans component for rich text formatting. + * @param {string} text + * @returns {{ text: string, tags: string[] }} + */ +function extractTransTags(text) { + const tags = []; + // Match opening tags and closing tags + // Also matches self-closing tags + const modifiedText = text.replace(/<\/?([a-zA-Z][a-zA-Z0-9]*)\s*\/?>/g, (match) => { + const index = tags.length; + tags.push(match); + return `__TAG_${index}__`; + }); + return { text: modifiedText, tags }; +} + +/** + * Restore original Trans component tags from tokens. + * @param {string} text + * @param {string[]} tags + * @returns {string} + */ +function restoreTransTags(text, tags) { + return text.replace(/__TAG_(\d+)__/g, (_, index) => { + return tags[parseInt(index, 10)] || `__TAG_${index}__`; + }); +} + /** * Validate that all placeholders from source exist in translated text. * @param {string} sourceText @@ -64,6 +94,19 @@ function validatePlaceholders(sourceText, translatedText) { return { valid: missing.length === 0, missing }; } +/** + * Validate that all Trans component tags from source exist in translated text. + * @param {string} sourceText + * @param {string} translatedText + * @returns {{ valid: boolean, missing: string[] }} + */ +function validateTransTags(sourceText, translatedText) { + const sourceMatches = sourceText.match(/<\/?([a-zA-Z][a-zA-Z0-9]*)\s*\/?>/g) || []; + const translatedMatches = translatedText.match(/<\/?([a-zA-Z][a-zA-Z0-9]*)\s*\/?>/g) || []; + const missing = sourceMatches.filter(t => !translatedMatches.includes(t)); + return { valid: missing.length === 0, missing }; +} + class Translator { static modelTag = 'translategemma:4b' constructor() { @@ -87,13 +130,19 @@ class Translator { console.log(`\x1b[32m[Translator]\x1b[0m ${text}`, ...args); } - buildPrompt(text, sourceLangCode, targetLangCode, hasPlaceholders = false) { + buildPrompt(text, sourceLangCode, targetLangCode, { hasPlaceholders = false, hasTags = false } = {}) { const sourceLanguage = this.getLanguageName(sourceLangCode); const targetLanguage = this.getLanguageName(targetLangCode); - const placeholderInstruction = hasPlaceholders - ? `\nIMPORTANT: The text contains placeholders like __PLACEHOLDER_0__, __PLACEHOLDER_1__, etc. You MUST keep these placeholders exactly as they are in the translation - do not translate, modify, or remove them.` - : ''; - return `You are a professional ${sourceLanguage} (${sourceLangCode.toLowerCase()}) to ${targetLanguage} (${targetLangCode.toLowerCase()}) translator. Your goal is to accurately convey the meaning and nuances of the original ${sourceLanguage} text while adhering to ${targetLanguage} grammar, vocabulary, and cultural sensitivities.${placeholderInstruction} + + let specialInstructions = ''; + if (hasPlaceholders || hasTags) { + const items = []; + if (hasPlaceholders) items.push('__PLACEHOLDER_0__, __PLACEHOLDER_1__'); + if (hasTags) items.push('__TAG_0__, __TAG_1__'); + specialInstructions = `\nIMPORTANT: The text contains tokens like ${items.join(', ')}, etc. You MUST keep these tokens exactly as they are in the translation - do not translate, modify, or remove them.`; + } + + return `You are a professional ${sourceLanguage} (${sourceLangCode.toLowerCase()}) to ${targetLanguage} (${targetLangCode.toLowerCase()}) translator. Your goal is to accurately convey the meaning and nuances of the original ${sourceLanguage} text while adhering to ${targetLanguage} grammar, vocabulary, and cultural sensitivities.${specialInstructions} Produce only the ${targetLanguage} translation, without any additional explanations or commentary. Please translate the following ${sourceLanguage} text into ${targetLanguage}: @@ -113,11 +162,15 @@ ${text}` async translate(text, sourceLangCode, targetLangCode) { // Extract placeholders like {{variableName}} and replace with tokens - const { text: textWithTokens, placeholders } = extractPlaceholders(text); + const { text: textWithPlaceholders, placeholders } = extractPlaceholders(text); const hasPlaceholders = placeholders.length > 0; - const prompt = this.buildPrompt(textWithTokens, sourceLangCode, targetLangCode, hasPlaceholders); - const response = await fetch(`http://localhost:11434/api/chat`, { + // Extract Trans component tags like , , etc. + const { text: textWithTokens, tags } = extractTransTags(textWithPlaceholders); + const hasTags = tags.length > 0; + + const prompt = this.buildPrompt(textWithTokens, sourceLangCode, targetLangCode, { hasPlaceholders, hasTags }); + const response = await fetch(`http://127.0.0.1:11434/api/chat`, { method: 'POST', headers: { 'Content-Type': 'application/json' }, body: JSON.stringify({ @@ -132,6 +185,22 @@ ${text}` const data = await response.json(); let translatedText = this.cleanOutputText(data.message.content); + // Restore Trans component tags first (order matters since tags may contain placeholders) + if (hasTags) { + translatedText = restoreTransTags(translatedText, tags); + + // Validate all tags were preserved + const tagValidation = validateTransTags(text, translatedText); + if (!tagValidation.valid) { + console.warn(`Warning: Missing Trans tags in translation: ${tagValidation.missing.join(', ')}`); + for (let i = 0; i < tags.length; i++) { + if (!translatedText.includes(tags[i])) { + console.warn(` Tag ${tags[i]} was lost in translation`); + } + } + } + } + // Restore original placeholders if (hasPlaceholders) { translatedText = restorePlaceholders(translatedText, placeholders);