mirror of
https://github.com/Mintplex-Labs/anything-llm
synced 2026-04-25 17:15:37 +02:00
491 lines
16 KiB
JavaScript
491 lines
16 KiB
JavaScript
const Anthropic = require("@anthropic-ai/sdk");
|
|
const { AnthropicLLM } = require("../../../AiProviders/anthropic");
|
|
const { RetryError } = require("../error.js");
|
|
const Provider = require("./ai-provider.js");
|
|
const { v4 } = require("uuid");
|
|
const { safeJsonParse } = require("../../../http");
|
|
const { getAnythingLLMUserAgent } = require("../../../../endpoints/utils");
|
|
|
|
/**
|
|
* The agent provider for the Anthropic API.
|
|
* By default, the model is set to 'claude-sonnet-4-6'.
|
|
*/
|
|
class AnthropicProvider extends Provider {
|
|
model;
|
|
maxTokens = null;
|
|
|
|
constructor(config = {}) {
|
|
const {
|
|
options = {
|
|
apiKey: process.env.ANTHROPIC_API_KEY,
|
|
maxRetries: 3,
|
|
defaultHeaders: {
|
|
"User-Agent": getAnythingLLMUserAgent(),
|
|
},
|
|
},
|
|
model = "claude-sonnet-4-6",
|
|
} = config;
|
|
|
|
const client = new Anthropic(options);
|
|
|
|
super(client);
|
|
this.model = model;
|
|
}
|
|
|
|
/**
|
|
* Whether this provider supports native OpenAI-compatible tool calling.
|
|
* - Anthropic always supports tool calling.
|
|
* @returns {boolean}
|
|
*/
|
|
supportsNativeToolCalling() {
|
|
return true;
|
|
}
|
|
|
|
/**
|
|
* Fetches the maximum number of tokens the model should generate in its response.
|
|
* This varies per model but will fallback to 4096 if the model is not found.
|
|
* @returns {Promise<number>} The maximum output tokens limit for API calls.
|
|
*/
|
|
async assertModelMaxTokens() {
|
|
if (this.maxTokens) return this.maxTokens;
|
|
this.maxTokens = await AnthropicLLM.fetchModelMaxTokens(this.model);
|
|
return this.maxTokens;
|
|
}
|
|
|
|
/**
|
|
* Parses the cache control ENV variable
|
|
*
|
|
* If caching is enabled, we can pass less than 1024 tokens and Anthropic will just
|
|
* ignore it unless it is above the model's minimum. Since this feature is opt-in
|
|
* we can safely assume that if caching is enabled that we should just pass the content as is.
|
|
* https://docs.claude.com/en/docs/build-with-claude/prompt-caching#cache-limitations
|
|
*
|
|
* @param {string} value - The ENV value (5m or 1h)
|
|
* @returns {null|{type: "ephemeral", ttl: "5m" | "1h"}} Cache control configuration
|
|
*/
|
|
get cacheControl() {
|
|
// Store result in instance variable to avoid recalculating
|
|
if (this._cacheControl) return this._cacheControl;
|
|
|
|
if (!process.env.ANTHROPIC_CACHE_CONTROL) this._cacheControl = null;
|
|
else {
|
|
const normalized =
|
|
process.env.ANTHROPIC_CACHE_CONTROL.toLowerCase().trim();
|
|
if (["5m", "1h"].includes(normalized))
|
|
this._cacheControl = { type: "ephemeral", ttl: normalized };
|
|
else this._cacheControl = null;
|
|
}
|
|
return this._cacheControl;
|
|
}
|
|
|
|
get supportsAgentStreaming() {
|
|
return true;
|
|
}
|
|
|
|
/**
|
|
* Builds system parameter with cache control if applicable
|
|
* @param {string} systemContent - The system prompt content
|
|
* @returns {string|array} System parameter for API call
|
|
*/
|
|
#buildSystemPrompt(systemContent) {
|
|
if (!systemContent || !this.cacheControl) return systemContent;
|
|
return [
|
|
{
|
|
type: "text",
|
|
text: systemContent,
|
|
cache_control: this.cacheControl,
|
|
},
|
|
];
|
|
}
|
|
|
|
/**
|
|
* Parse a data URL into media type and base64 data
|
|
* @param {string} dataUrl - Data URL like "data:image/jpeg;base64,/9j/..."
|
|
* @returns {{mediaType: string, data: string}|null}
|
|
*/
|
|
#parseDataUrl(dataUrl) {
|
|
if (!dataUrl || !dataUrl.startsWith("data:")) return null;
|
|
const matches = dataUrl.match(/^data:([^;]+);base64,(.+)$/);
|
|
if (!matches) return null;
|
|
return { mediaType: matches[1], data: matches[2] };
|
|
}
|
|
|
|
#prepareMessages(messages = []) {
|
|
// Extract system prompt and filter out any system messages from the main chat.
|
|
let systemPrompt =
|
|
"You are a helpful ai assistant who can assist the user and use tools available to help answer the users prompts and questions.";
|
|
const chatMessages = messages.filter((msg) => {
|
|
if (msg.role === "system") {
|
|
systemPrompt = msg.content;
|
|
return false;
|
|
}
|
|
return true;
|
|
});
|
|
|
|
const processedMessages = chatMessages.reduce(
|
|
(processedMessages, message, index) => {
|
|
// Normalize `function` role to Anthropic's `tool_result` format.
|
|
if (message.role === "function") {
|
|
const prevMessage = chatMessages[index - 1];
|
|
if (prevMessage?.role === "assistant") {
|
|
const toolUse = prevMessage.content.find(
|
|
(item) => item.type === "tool_use"
|
|
);
|
|
if (toolUse) {
|
|
processedMessages.push({
|
|
role: "user",
|
|
content: [
|
|
{
|
|
type: "tool_result",
|
|
tool_use_id: toolUse.id,
|
|
content: message.content
|
|
? String(message.content)
|
|
: "Tool executed successfully.",
|
|
},
|
|
],
|
|
});
|
|
}
|
|
}
|
|
return processedMessages;
|
|
}
|
|
|
|
// Ensure message content is in array format and filter out empty text blocks.
|
|
let content = Array.isArray(message.content)
|
|
? message.content
|
|
: [{ type: "text", text: message.content }];
|
|
content = content.filter(
|
|
(item) =>
|
|
item.type !== "text" || (item.text && item.text.trim().length > 0)
|
|
);
|
|
|
|
// Add image attachments if present (for vision/multimodal support)
|
|
if (message.attachments && message.attachments.length > 0) {
|
|
for (const attachment of message.attachments) {
|
|
const parsed = this.#parseDataUrl(attachment.contentString);
|
|
if (parsed) {
|
|
content.push({
|
|
type: "image",
|
|
source: {
|
|
type: "base64",
|
|
media_type: parsed.mediaType,
|
|
data: parsed.data,
|
|
},
|
|
});
|
|
}
|
|
}
|
|
}
|
|
|
|
if (content.length === 0) return processedMessages;
|
|
|
|
// Add a text block to assistant messages with tool use if one doesn't exist.
|
|
if (
|
|
message.role === "assistant" &&
|
|
content.some((item) => item.type === "tool_use") &&
|
|
!content.some((item) => item.type === "text")
|
|
) {
|
|
content.unshift({
|
|
type: "text",
|
|
text: "I'll use a tool to help answer this question.",
|
|
});
|
|
}
|
|
|
|
const lastMessage = processedMessages[processedMessages.length - 1];
|
|
if (lastMessage && lastMessage.role === message.role) {
|
|
// Merge consecutive messages from the same role.
|
|
lastMessage.content.push(...content);
|
|
} else {
|
|
// Don't pass attachments to the final message object
|
|
const { attachments: _, ...restOfMessage } = message;
|
|
processedMessages.push({ ...restOfMessage, content });
|
|
}
|
|
|
|
return processedMessages;
|
|
},
|
|
[]
|
|
);
|
|
|
|
// The first message must be from the user.
|
|
if (processedMessages.length > 0 && processedMessages[0].role !== "user") {
|
|
processedMessages.shift();
|
|
}
|
|
|
|
return [systemPrompt, processedMessages];
|
|
}
|
|
|
|
// Anthropic does not use the regular schema for functions so here we need to ensure it is in there specific format
|
|
// so that the call can run correctly.
|
|
#formatFunctions(functions = []) {
|
|
return functions.map((func) => {
|
|
const { name, description, parameters, required } = func;
|
|
const { type, properties } = parameters;
|
|
return {
|
|
name,
|
|
description,
|
|
input_schema: {
|
|
type,
|
|
properties,
|
|
required,
|
|
},
|
|
};
|
|
});
|
|
}
|
|
|
|
/**
|
|
* Stream a chat completion from the LLM with tool calling
|
|
* Note: This using the Anthropic API SDK and its implementation is specific to Anthropic.
|
|
*
|
|
* @param {any[]} messages - The messages to send to the LLM.
|
|
* @param {any[]} functions - The functions to use in the LLM.
|
|
* @param {function} eventHandler - The event handler to use to report stream events.
|
|
* @returns {Promise<{ functionCall: any, textResponse: string, uuid: string }>} - The result of the chat completion.
|
|
*/
|
|
async stream(messages, functions = [], eventHandler = null) {
|
|
await this.assertModelMaxTokens();
|
|
this.resetUsage();
|
|
|
|
try {
|
|
const msgUUID = v4();
|
|
const [systemPrompt, chats] = this.#prepareMessages(messages);
|
|
const response = await this.client.messages.create(
|
|
{
|
|
model: this.model,
|
|
max_tokens: this.maxTokens,
|
|
system: this.#buildSystemPrompt(systemPrompt),
|
|
messages: chats,
|
|
stream: true,
|
|
...(Array.isArray(functions) && functions?.length > 0
|
|
? { tools: this.#formatFunctions(functions) }
|
|
: {}),
|
|
},
|
|
{ headers: { "anthropic-beta": "tools-2024-04-04" } } // Required to we can use tools.
|
|
);
|
|
|
|
const result = {
|
|
functionCall: null,
|
|
textResponse: "",
|
|
};
|
|
|
|
// Track usage from streaming events
|
|
const usage = { input_tokens: 0, output_tokens: 0 };
|
|
|
|
for await (const chunk of response) {
|
|
// Capture input tokens from message_start event
|
|
if (chunk.type === "message_start" && chunk.message?.usage) {
|
|
usage.input_tokens = chunk.message.usage.input_tokens || 0;
|
|
}
|
|
|
|
// Capture output tokens from message_delta event
|
|
if (chunk.type === "message_delta" && chunk.usage) {
|
|
usage.output_tokens = chunk.usage.output_tokens || 0;
|
|
}
|
|
|
|
if (chunk.type === "content_block_start") {
|
|
if (chunk.content_block.type === "text") {
|
|
result.textResponse += chunk.content_block.text;
|
|
eventHandler?.("reportStreamEvent", {
|
|
type: "textResponseChunk",
|
|
uuid: msgUUID,
|
|
content: chunk.content_block.text,
|
|
});
|
|
}
|
|
|
|
if (chunk.content_block.type === "tool_use") {
|
|
result.functionCall = {
|
|
id: chunk.content_block.id,
|
|
name: chunk.content_block.name,
|
|
// The initial arguments are empty {} (object) so we need to set it to an empty string.
|
|
// It is unclear if this is ALWAYS empty on the tool_use block or if it can possible be populated.
|
|
// This is a workaround to ensure the tool call is valid.
|
|
arguments: "",
|
|
};
|
|
eventHandler?.("reportStreamEvent", {
|
|
type: "toolCallInvocation",
|
|
uuid: `${msgUUID}:tool_call_invocation`,
|
|
content: `Assembling Tool Call: ${result.functionCall.name}(${result.functionCall.arguments})`,
|
|
});
|
|
}
|
|
}
|
|
|
|
if (chunk.type === "content_block_delta") {
|
|
if (chunk.delta.type === "text_delta") {
|
|
result.textResponse += chunk.delta.text;
|
|
eventHandler?.("reportStreamEvent", {
|
|
type: "textResponseChunk",
|
|
uuid: msgUUID,
|
|
content: chunk.delta.text,
|
|
});
|
|
}
|
|
|
|
if (chunk.delta.type === "input_json_delta") {
|
|
result.functionCall.arguments += chunk.delta.partial_json;
|
|
eventHandler?.("reportStreamEvent", {
|
|
type: "toolCallInvocation",
|
|
uuid: `${msgUUID}:tool_call_invocation`,
|
|
content: `Assembling Tool Call: ${result.functionCall.name}(${result.functionCall.arguments})`,
|
|
});
|
|
}
|
|
}
|
|
}
|
|
|
|
// Record accumulated usage
|
|
this.recordUsage(usage);
|
|
if (result.functionCall) {
|
|
result.functionCall.arguments = safeJsonParse(
|
|
result.functionCall.arguments,
|
|
{}
|
|
);
|
|
messages.push({
|
|
role: "assistant",
|
|
content: [
|
|
{ type: "text", text: result.textResponse },
|
|
{
|
|
type: "tool_use",
|
|
id: result.functionCall.id,
|
|
name: result.functionCall.name,
|
|
input: result.functionCall.arguments,
|
|
},
|
|
],
|
|
});
|
|
return {
|
|
textResponse: result.textResponse,
|
|
functionCall: {
|
|
name: result.functionCall.name,
|
|
arguments: result.functionCall.arguments,
|
|
},
|
|
cost: 0,
|
|
uuid: msgUUID,
|
|
};
|
|
}
|
|
|
|
return {
|
|
textResponse: result.textResponse,
|
|
functionCall: null,
|
|
cost: 0,
|
|
uuid: msgUUID,
|
|
};
|
|
} catch (error) {
|
|
// If invalid Auth error we need to abort because no amount of waiting
|
|
// will make auth better.
|
|
if (error instanceof Anthropic.AuthenticationError) throw error;
|
|
|
|
if (
|
|
error instanceof Anthropic.RateLimitError ||
|
|
error instanceof Anthropic.InternalServerError ||
|
|
error instanceof Anthropic.APIError // Also will catch AuthenticationError!!!
|
|
) {
|
|
throw new RetryError(error.message);
|
|
}
|
|
|
|
throw error;
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Create a completion based on the received messages.
|
|
*
|
|
* @param messages A list of messages to send to the Anthropic API.
|
|
* @param functions
|
|
* @returns The completion.
|
|
*/
|
|
async complete(messages, functions = []) {
|
|
await this.assertModelMaxTokens();
|
|
this.resetUsage();
|
|
|
|
try {
|
|
const [systemPrompt, chats] = this.#prepareMessages(messages);
|
|
const response = await this.client.messages.create(
|
|
{
|
|
model: this.model,
|
|
max_tokens: this.maxTokens,
|
|
system: this.#buildSystemPrompt(systemPrompt),
|
|
messages: chats,
|
|
stream: false,
|
|
...(Array.isArray(functions) && functions?.length > 0
|
|
? { tools: this.#formatFunctions(functions) }
|
|
: {}),
|
|
},
|
|
{ headers: { "anthropic-beta": "tools-2024-04-04" } } // Required to we can use tools.
|
|
);
|
|
|
|
// Record usage from response (Anthropic uses input_tokens/output_tokens)
|
|
if (response.usage) this.recordUsage(response.usage);
|
|
|
|
// We know that we need to call a tool. So we are about to recurse through completions/handleExecution
|
|
// https://docs.anthropic.com/claude/docs/tool-use#how-tool-use-works
|
|
if (response.stop_reason === "tool_use") {
|
|
// Get the tool call explicitly.
|
|
const toolCall = response.content.find(
|
|
(res) => res.type === "tool_use"
|
|
);
|
|
|
|
// Here we need the chain of thought the model may or may not have generated alongside the call.
|
|
// this needs to be in a very specific format so we always ensure there is a 2-item content array
|
|
// so that we can ensure the tool_call content is correct. For anthropic all text items must not
|
|
// be empty, but the api will still return empty text so we need to make 100% sure text is not empty
|
|
// or the tool call will fail.
|
|
// wtf.
|
|
let thought = response.content.find((res) => res.type === "text");
|
|
thought =
|
|
thought?.content?.length > 0
|
|
? {
|
|
role: thought.role,
|
|
content: [
|
|
{ type: "text", text: thought.content },
|
|
{ ...toolCall },
|
|
],
|
|
}
|
|
: {
|
|
role: "assistant",
|
|
content: [
|
|
{
|
|
type: "text",
|
|
text: `Okay, im going to use ${toolCall.name} to help me.`,
|
|
},
|
|
{ ...toolCall },
|
|
],
|
|
};
|
|
|
|
// Modify messages forcefully by adding system thought so that tool_use/tool_result
|
|
// messaging works with Anthropic's disastrous tool calling API.
|
|
messages.push(thought);
|
|
|
|
const functionArgs = toolCall.input;
|
|
return {
|
|
result: null,
|
|
functionCall: {
|
|
name: toolCall.name,
|
|
arguments: functionArgs,
|
|
},
|
|
cost: 0,
|
|
usage: this.getUsage(),
|
|
};
|
|
}
|
|
|
|
const completion = response.content.find((msg) => msg.type === "text");
|
|
return {
|
|
textResponse:
|
|
completion?.text ??
|
|
"The model failed to complete the task and return back a valid response.",
|
|
cost: 0,
|
|
usage: this.getUsage(),
|
|
};
|
|
} catch (error) {
|
|
// If invalid Auth error we need to abort because no amount of waiting
|
|
// will make auth better.
|
|
if (error instanceof Anthropic.AuthenticationError) throw error;
|
|
|
|
if (
|
|
error instanceof Anthropic.RateLimitError ||
|
|
error instanceof Anthropic.InternalServerError ||
|
|
error instanceof Anthropic.APIError // Also will catch AuthenticationError!!!
|
|
) {
|
|
throw new RetryError(error.message);
|
|
}
|
|
|
|
throw error;
|
|
}
|
|
}
|
|
}
|
|
|
|
module.exports = AnthropicProvider;
|