anything-llm/server/utils/agents/aibitat/providers/anthropic.js

const Anthropic = require("@anthropic-ai/sdk");
const { AnthropicLLM } = require("../../../AiProviders/anthropic");
const { RetryError } = require("../error.js");
const Provider = require("./ai-provider.js");
const { v4 } = require("uuid");
const { safeJsonParse } = require("../../../http");
const { getAnythingLLMUserAgent } = require("../../../../endpoints/utils");

/**
 * The agent provider for the Anthropic API.
 * By default, the model is set to 'claude-sonnet-4-6'.
 */
class AnthropicProvider extends Provider {
  model;
  maxTokens = null;

  constructor(config = {}) {
    const {
      options = {
        apiKey: process.env.ANTHROPIC_API_KEY,
        maxRetries: 3,
        defaultHeaders: {
          "User-Agent": getAnythingLLMUserAgent(),
        },
      },
      model = "claude-sonnet-4-6",
    } = config;

    const client = new Anthropic(options);

    super(client);
    this.model = model;
  }

  /**
   * Whether this provider supports native OpenAI-compatible tool calling.
   * - Anthropic always supports tool calling.
   * @returns {boolean}
   */
  supportsNativeToolCalling() {
    return true;
  }

  /**
   * Fetches the maximum number of tokens the model should generate in its response.
   * This varies per model but will fallback to 4096 if the model is not found.
   * @returns {Promise<number>} The maximum output tokens limit for API calls.
   */
  async assertModelMaxTokens() {
    if (this.maxTokens) return this.maxTokens;
    this.maxTokens = await AnthropicLLM.fetchModelMaxTokens(this.model);
    return this.maxTokens;
  }

  /**
   * Parses the cache control ENV variable
   *
   * If caching is enabled, we can pass less than 1024 tokens and Anthropic will just
   * ignore it unless it is above the model's minimum. Since this feature is opt-in
   * we can safely assume that if caching is enabled that we should just pass the content as is.
   * https://docs.claude.com/en/docs/build-with-claude/prompt-caching#cache-limitations
   *
   * @param {string} value - The ENV value (5m or 1h)
   * @returns {null|{type: "ephemeral", ttl: "5m" | "1h"}} Cache control configuration
   */
  get cacheControl() {
    // Store result in instance variable to avoid recalculating
    if (this._cacheControl) return this._cacheControl;

    if (!process.env.ANTHROPIC_CACHE_CONTROL) this._cacheControl = null;
    else {
      const normalized =
        process.env.ANTHROPIC_CACHE_CONTROL.toLowerCase().trim();
      if (["5m", "1h"].includes(normalized))
        this._cacheControl = { type: "ephemeral", ttl: normalized };
      else this._cacheControl = null;
    }
    return this._cacheControl;
  }

  get supportsAgentStreaming() {
    return true;
  }

  /**
   * Builds system parameter with cache control if applicable
   * @param {string} systemContent - The system prompt content
   * @returns {string|array} System parameter for API call
   */
  #buildSystemPrompt(systemContent) {
    if (!systemContent || !this.cacheControl) return systemContent;
    return [
      {
        type: "text",
        text: systemContent,
        cache_control: this.cacheControl,
      },
    ];
  }

  /**
   * Parse a data URL into media type and base64 data
   * @param {string} dataUrl - Data URL like "data:image/jpeg;base64,/9j/..."
   * @returns {{mediaType: string, data: string}|null}
   */
  #parseDataUrl(dataUrl) {
    if (!dataUrl || !dataUrl.startsWith("data:")) return null;
    const matches = dataUrl.match(/^data:([^;]+);base64,(.+)$/);
    if (!matches) return null;
    return { mediaType: matches[1], data: matches[2] };
  }

  #prepareMessages(messages = []) {
    // Extract system prompt and filter out any system messages from the main chat.
    let systemPrompt =
      "You are a helpful ai assistant who can assist the user and use tools available to help answer the users prompts and questions.";
    const chatMessages = messages.filter((msg) => {
      if (msg.role === "system") {
        systemPrompt = msg.content;
        return false;
      }
      return true;
    });

    const processedMessages = chatMessages.reduce(
      (processedMessages, message, index) => {
        // Normalize `function` role to Anthropic's `tool_result` format.
        if (message.role === "function") {
          const prevMessage = chatMessages[index - 1];
          if (prevMessage?.role === "assistant") {
            const toolUse = prevMessage.content.find(
              (item) => item.type === "tool_use"
            );
            if (toolUse) {
              processedMessages.push({
                role: "user",
                content: [
                  {
                    type: "tool_result",
                    tool_use_id: toolUse.id,
                    content: message.content
                      ? String(message.content)
                      : "Tool executed successfully.",
                  },
                ],
              });
            }
          }
          return processedMessages;
        }

        // Ensure message content is in array format and filter out empty text blocks.
        let content = Array.isArray(message.content)
          ? message.content
          : [{ type: "text", text: message.content }];
        content = content.filter(
          (item) =>
            item.type !== "text" || (item.text && item.text.trim().length > 0)
        );

        // Add image attachments if present (for vision/multimodal support)
        if (message.attachments && message.attachments.length > 0) {
          for (const attachment of message.attachments) {
            const parsed = this.#parseDataUrl(attachment.contentString);
            if (parsed) {
              content.push({
                type: "image",
                source: {
                  type: "base64",
                  media_type: parsed.mediaType,
                  data: parsed.data,
                },
              });
            }
          }
        }

        if (content.length === 0) return processedMessages;

        // Add a text block to assistant messages with tool use if one doesn't exist.
        if (
          message.role === "assistant" &&
          content.some((item) => item.type === "tool_use") &&
          !content.some((item) => item.type === "text")
        ) {
          content.unshift({
            type: "text",
            text: "I'll use a tool to help answer this question.",
          });
        }

        const lastMessage = processedMessages[processedMessages.length - 1];
        if (lastMessage && lastMessage.role === message.role) {
          // Merge consecutive messages from the same role.
          lastMessage.content.push(...content);
        } else {
          // Don't pass attachments to the final message object
          const { attachments: _, ...restOfMessage } = message;
          processedMessages.push({ ...restOfMessage, content });
        }

        return processedMessages;
      },
      []
    );

    // The first message must be from the user.
    if (processedMessages.length > 0 && processedMessages[0].role !== "user") {
      processedMessages.shift();
    }

    return [systemPrompt, processedMessages];
  }

  // Anthropic does not use the regular schema for functions so here we need to ensure it is in there specific format
  // so that the call can run correctly.
  #formatFunctions(functions = []) {
    return functions.map((func) => {
      const { name, description, parameters, required } = func;
      const { type, properties } = parameters;
      return {
        name,
        description,
        input_schema: {
          type,
          properties,
          required,
        },
      };
    });
  }

  /**
   * Stream a chat completion from the LLM with tool calling
   * Note: This using the Anthropic API SDK and its implementation is specific to Anthropic.
   *
   * @param {any[]} messages - The messages to send to the LLM.
   * @param {any[]} functions - The functions to use in the LLM.
   * @param {function} eventHandler - The event handler to use to report stream events.
   * @returns {Promise<{ functionCall: any, textResponse: string, uuid: string }>} - The result of the chat completion.
   */
  async stream(messages, functions = [], eventHandler = null) {
    await this.assertModelMaxTokens();
    this.resetUsage();

    try {
      const msgUUID = v4();
      const [systemPrompt, chats] = this.#prepareMessages(messages);
      const response = await this.client.messages.create(
        {
          model: this.model,
          max_tokens: this.maxTokens,
          system: this.#buildSystemPrompt(systemPrompt),
          messages: chats,
          stream: true,
          ...(Array.isArray(functions) && functions?.length > 0
            ? { tools: this.#formatFunctions(functions) }
            : {}),
        },
        { headers: { "anthropic-beta": "tools-2024-04-04" } } // Required to we can use tools.
      );

      const result = {
        functionCall: null,
        textResponse: "",
      };

      // Track usage from streaming events
      const usage = { input_tokens: 0, output_tokens: 0 };

      for await (const chunk of response) {
        // Capture input tokens from message_start event
        if (chunk.type === "message_start" && chunk.message?.usage) {
          usage.input_tokens = chunk.message.usage.input_tokens || 0;
        }

        // Capture output tokens from message_delta event
        if (chunk.type === "message_delta" && chunk.usage) {
          usage.output_tokens = chunk.usage.output_tokens || 0;
        }

        if (chunk.type === "content_block_start") {
          if (chunk.content_block.type === "text") {
            result.textResponse += chunk.content_block.text;
            eventHandler?.("reportStreamEvent", {
              type: "textResponseChunk",
              uuid: msgUUID,
              content: chunk.content_block.text,
            });
          }

          if (chunk.content_block.type === "tool_use") {
            result.functionCall = {
              id: chunk.content_block.id,
              name: chunk.content_block.name,
              // The initial arguments are empty {} (object) so we need to set it to an empty string.
              // It is unclear if this is ALWAYS empty on the tool_use block or if it can possible be populated.
              // This is a workaround to ensure the tool call is valid.
              arguments: "",
            };
            eventHandler?.("reportStreamEvent", {
              type: "toolCallInvocation",
              uuid: `${msgUUID}:tool_call_invocation`,
              content: `Assembling Tool Call: ${result.functionCall.name}(${result.functionCall.arguments})`,
            });
          }
        }

        if (chunk.type === "content_block_delta") {
          if (chunk.delta.type === "text_delta") {
            result.textResponse += chunk.delta.text;
            eventHandler?.("reportStreamEvent", {
              type: "textResponseChunk",
              uuid: msgUUID,
              content: chunk.delta.text,
            });
          }

          if (chunk.delta.type === "input_json_delta") {
            result.functionCall.arguments += chunk.delta.partial_json;
            eventHandler?.("reportStreamEvent", {
              type: "toolCallInvocation",
              uuid: `${msgUUID}:tool_call_invocation`,
              content: `Assembling Tool Call: ${result.functionCall.name}(${result.functionCall.arguments})`,
            });
          }
        }
      }

      // Record accumulated usage
      this.recordUsage(usage);
      if (result.functionCall) {
        result.functionCall.arguments = safeJsonParse(
          result.functionCall.arguments,
          {}
        );
        messages.push({
          role: "assistant",
          content: [
            { type: "text", text: result.textResponse },
            {
              type: "tool_use",
              id: result.functionCall.id,
              name: result.functionCall.name,
              input: result.functionCall.arguments,
            },
          ],
        });
        return {
          textResponse: result.textResponse,
          functionCall: {
            name: result.functionCall.name,
            arguments: result.functionCall.arguments,
          },
          cost: 0,
          uuid: msgUUID,
        };
      }

      return {
        textResponse: result.textResponse,
        functionCall: null,
        cost: 0,
        uuid: msgUUID,
      };
    } catch (error) {
      // If invalid Auth error we need to abort because no amount of waiting
      // will make auth better.
      if (error instanceof Anthropic.AuthenticationError) throw error;

      if (
        error instanceof Anthropic.RateLimitError ||
        error instanceof Anthropic.InternalServerError ||
        error instanceof Anthropic.APIError // Also will catch AuthenticationError!!!
      ) {
        throw new RetryError(error.message);
      }

      throw error;
    }
  }

  /**
   * Create a completion based on the received messages.
   *
   * @param messages A list of messages to send to the Anthropic API.
   * @param functions
   * @returns The completion.
   */
  async complete(messages, functions = []) {
    await this.assertModelMaxTokens();
    this.resetUsage();

    try {
      const [systemPrompt, chats] = this.#prepareMessages(messages);
      const response = await this.client.messages.create(
        {
          model: this.model,
          max_tokens: this.maxTokens,
          system: this.#buildSystemPrompt(systemPrompt),
          messages: chats,
          stream: false,
          ...(Array.isArray(functions) && functions?.length > 0
            ? { tools: this.#formatFunctions(functions) }
            : {}),
        },
        { headers: { "anthropic-beta": "tools-2024-04-04" } } // Required to we can use tools.
      );

      // Record usage from response (Anthropic uses input_tokens/output_tokens)
      if (response.usage) this.recordUsage(response.usage);

      // We know that we need to call a tool. So we are about to recurse through completions/handleExecution
      // https://docs.anthropic.com/claude/docs/tool-use#how-tool-use-works
      if (response.stop_reason === "tool_use") {
        // Get the tool call explicitly.
        const toolCall = response.content.find(
          (res) => res.type === "tool_use"
        );

        // Here we need the chain of thought the model may or may not have generated alongside the call.
        // this needs to be in a very specific format so we always ensure there is a 2-item content array
        // so that we can ensure the tool_call content is correct. For anthropic all text items must not
        // be empty, but the api will still return empty text so we need to make 100% sure text is not empty
        // or the tool call will fail.
        // wtf.
        let thought = response.content.find((res) => res.type === "text");
        thought =
          thought?.content?.length > 0
            ? {
                role: thought.role,
                content: [
                  { type: "text", text: thought.content },
                  { ...toolCall },
                ],
              }
            : {
                role: "assistant",
                content: [
                  {
                    type: "text",
                    text: `Okay, im going to use ${toolCall.name} to help me.`,
                  },
                  { ...toolCall },
                ],
              };

        // Modify messages forcefully by adding system thought so that tool_use/tool_result
        // messaging works with Anthropic's disastrous tool calling API.
        messages.push(thought);

        const functionArgs = toolCall.input;
        return {
          result: null,
          functionCall: {
            name: toolCall.name,
            arguments: functionArgs,
          },
          cost: 0,
          usage: this.getUsage(),
        };
      }

      const completion = response.content.find((msg) => msg.type === "text");
      return {
        textResponse:
          completion?.text ??
          "The model failed to complete the task and return back a valid response.",
        cost: 0,
        usage: this.getUsage(),
      };
    } catch (error) {
      // If invalid Auth error we need to abort because no amount of waiting
      // will make auth better.
      if (error instanceof Anthropic.AuthenticationError) throw error;

      if (
        error instanceof Anthropic.RateLimitError ||
        error instanceof Anthropic.InternalServerError ||
        error instanceof Anthropic.APIError // Also will catch AuthenticationError!!!
      ) {
        throw new RetryError(error.message);
      }

      throw error;
    }
  }
}

module.exports = AnthropicProvider;