diff --git a/litellm/llms/anthropic.py b/litellm/llms/anthropic.py index da51e887d..629197d51 100644 --- a/litellm/llms/anthropic.py +++ b/litellm/llms/anthropic.py @@ -385,6 +385,11 @@ class AnthropicConfig: if "user_id" in anthropic_message_request["metadata"]: new_kwargs["user"] = anthropic_message_request["metadata"]["user_id"] + # Pass litellm proxy specific metadata + if "litellm_metadata" in anthropic_message_request: + # metadata will be passed to litellm.acompletion(), it's a litellm_param + new_kwargs["metadata"] = anthropic_message_request.pop("litellm_metadata") + ## CONVERT TOOL CHOICE if "tool_choice" in anthropic_message_request: new_kwargs["tool_choice"] = self.translate_anthropic_tool_choice_to_openai( diff --git a/litellm/proxy/litellm_pre_call_utils.py b/litellm/proxy/litellm_pre_call_utils.py index 8909b1da3..7384dc30b 100644 --- a/litellm/proxy/litellm_pre_call_utils.py +++ b/litellm/proxy/litellm_pre_call_utils.py @@ -39,6 +39,9 @@ def _get_metadata_variable_name(request: Request) -> str: """ if "thread" in request.url.path or "assistant" in request.url.path: return "litellm_metadata" + if "/v1/messages" in request.url.path: + # anthropic API has a field called metadata + return "litellm_metadata" else: return "metadata" diff --git a/litellm/proxy/proxy_server.py b/litellm/proxy/proxy_server.py index 0ac1d82e0..106b95453 100644 --- a/litellm/proxy/proxy_server.py +++ b/litellm/proxy/proxy_server.py @@ -657,7 +657,11 @@ async def _PROXY_track_cost_callback( global prisma_client, custom_db_client try: # check if it has collected an entire stream response - verbose_proxy_logger.debug("Proxy: In track_cost_callback for: %s", kwargs) + verbose_proxy_logger.debug( + "Proxy: In track_cost_callback for: kwargs=%s and completion_response: %s", + kwargs, + completion_response, + ) verbose_proxy_logger.debug( f"kwargs stream: {kwargs.get('stream', None)} + complete streaming response: {kwargs.get('complete_streaming_response', None)}" ) diff --git a/litellm/tests/test_anthropic_completion.py b/litellm/tests/test_anthropic_completion.py index cac0945d8..15d150a56 100644 --- a/litellm/tests/test_anthropic_completion.py +++ b/litellm/tests/test_anthropic_completion.py @@ -48,6 +48,42 @@ def test_anthropic_completion_input_translation(): ] +def test_anthropic_completion_input_translation_with_metadata(): + """ + Tests that cost tracking works as expected with LiteLLM Proxy + + LiteLLM Proxy will insert litellm_metadata for anthropic endpoints to track user_api_key and user_api_key_team_id + + This test ensures that the `litellm_metadata` is not present in the translated input + It ensures that `litellm.acompletion()` will receieve metadata which is a litellm specific param + """ + data = { + "model": "gpt-3.5-turbo", + "messages": [{"role": "user", "content": "Hey, how's it going?"}], + "litellm_metadata": { + "user_api_key": "88dc28d0f030c55ed4ab77ed8faf098196cb1c05df778539800c9f1243fe6b4b", + "user_api_key_alias": None, + "user_api_end_user_max_budget": None, + "litellm_api_version": "1.40.19", + "global_max_parallel_requests": None, + "user_api_key_user_id": "default_user_id", + "user_api_key_org_id": None, + "user_api_key_team_id": None, + "user_api_key_team_alias": None, + "user_api_key_team_max_budget": None, + "user_api_key_team_spend": None, + "user_api_key_spend": 0.0, + "user_api_key_max_budget": None, + "user_api_key_metadata": {}, + }, + } + translated_input = anthropic_adapter.translate_completion_input_params(kwargs=data) + + assert "litellm_metadata" not in translated_input + assert "metadata" in translated_input + assert translated_input["metadata"] == data["litellm_metadata"] + + def test_anthropic_completion_e2e(): litellm.set_verbose = True diff --git a/litellm/types/llms/anthropic.py b/litellm/types/llms/anthropic.py index 33f413ece..b41980afd 100644 --- a/litellm/types/llms/anthropic.py +++ b/litellm/types/llms/anthropic.py @@ -1,4 +1,4 @@ -from typing import Iterable, List, Optional, Union +from typing import Any, Dict, Iterable, List, Optional, Union from pydantic import BaseModel, validator from typing_extensions import Literal, Required, TypedDict @@ -113,6 +113,9 @@ class AnthropicMessagesRequest(TypedDict, total=False): top_k: int top_p: float + # litellm param - used for tracking litellm proxy metadata in the request + litellm_metadata: dict + class ContentTextBlockDelta(TypedDict): """ diff --git a/litellm/types/llms/openai.py b/litellm/types/llms/openai.py index 294e299db..35e442119 100644 --- a/litellm/types/llms/openai.py +++ b/litellm/types/llms/openai.py @@ -436,6 +436,7 @@ class ChatCompletionRequest(TypedDict, total=False): function_call: Union[str, dict] functions: List user: str + metadata: dict # litellm specific param class ChatCompletionDeltaChunk(TypedDict, total=False):