forked from phoenix/litellm-mirror
Merge pull request #4841 from BerriAI/litellm_anthropic_cost_tracking
[Feat] - /v1/messages support usage tracking on spendLogs
This commit is contained in:
commit
92708a2737
6 changed files with 54 additions and 2 deletions
|
@ -385,6 +385,11 @@ class AnthropicConfig:
|
||||||
if "user_id" in anthropic_message_request["metadata"]:
|
if "user_id" in anthropic_message_request["metadata"]:
|
||||||
new_kwargs["user"] = anthropic_message_request["metadata"]["user_id"]
|
new_kwargs["user"] = anthropic_message_request["metadata"]["user_id"]
|
||||||
|
|
||||||
|
# Pass litellm proxy specific metadata
|
||||||
|
if "litellm_metadata" in anthropic_message_request:
|
||||||
|
# metadata will be passed to litellm.acompletion(), it's a litellm_param
|
||||||
|
new_kwargs["metadata"] = anthropic_message_request.pop("litellm_metadata")
|
||||||
|
|
||||||
## CONVERT TOOL CHOICE
|
## CONVERT TOOL CHOICE
|
||||||
if "tool_choice" in anthropic_message_request:
|
if "tool_choice" in anthropic_message_request:
|
||||||
new_kwargs["tool_choice"] = self.translate_anthropic_tool_choice_to_openai(
|
new_kwargs["tool_choice"] = self.translate_anthropic_tool_choice_to_openai(
|
||||||
|
|
|
@ -39,6 +39,9 @@ def _get_metadata_variable_name(request: Request) -> str:
|
||||||
"""
|
"""
|
||||||
if "thread" in request.url.path or "assistant" in request.url.path:
|
if "thread" in request.url.path or "assistant" in request.url.path:
|
||||||
return "litellm_metadata"
|
return "litellm_metadata"
|
||||||
|
if "/v1/messages" in request.url.path:
|
||||||
|
# anthropic API has a field called metadata
|
||||||
|
return "litellm_metadata"
|
||||||
else:
|
else:
|
||||||
return "metadata"
|
return "metadata"
|
||||||
|
|
||||||
|
|
|
@ -657,7 +657,11 @@ async def _PROXY_track_cost_callback(
|
||||||
global prisma_client, custom_db_client
|
global prisma_client, custom_db_client
|
||||||
try:
|
try:
|
||||||
# check if it has collected an entire stream response
|
# check if it has collected an entire stream response
|
||||||
verbose_proxy_logger.debug("Proxy: In track_cost_callback for: %s", kwargs)
|
verbose_proxy_logger.debug(
|
||||||
|
"Proxy: In track_cost_callback for: kwargs=%s and completion_response: %s",
|
||||||
|
kwargs,
|
||||||
|
completion_response,
|
||||||
|
)
|
||||||
verbose_proxy_logger.debug(
|
verbose_proxy_logger.debug(
|
||||||
f"kwargs stream: {kwargs.get('stream', None)} + complete streaming response: {kwargs.get('complete_streaming_response', None)}"
|
f"kwargs stream: {kwargs.get('stream', None)} + complete streaming response: {kwargs.get('complete_streaming_response', None)}"
|
||||||
)
|
)
|
||||||
|
|
|
@ -48,6 +48,42 @@ def test_anthropic_completion_input_translation():
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|
||||||
|
def test_anthropic_completion_input_translation_with_metadata():
|
||||||
|
"""
|
||||||
|
Tests that cost tracking works as expected with LiteLLM Proxy
|
||||||
|
|
||||||
|
LiteLLM Proxy will insert litellm_metadata for anthropic endpoints to track user_api_key and user_api_key_team_id
|
||||||
|
|
||||||
|
This test ensures that the `litellm_metadata` is not present in the translated input
|
||||||
|
It ensures that `litellm.acompletion()` will receieve metadata which is a litellm specific param
|
||||||
|
"""
|
||||||
|
data = {
|
||||||
|
"model": "gpt-3.5-turbo",
|
||||||
|
"messages": [{"role": "user", "content": "Hey, how's it going?"}],
|
||||||
|
"litellm_metadata": {
|
||||||
|
"user_api_key": "88dc28d0f030c55ed4ab77ed8faf098196cb1c05df778539800c9f1243fe6b4b",
|
||||||
|
"user_api_key_alias": None,
|
||||||
|
"user_api_end_user_max_budget": None,
|
||||||
|
"litellm_api_version": "1.40.19",
|
||||||
|
"global_max_parallel_requests": None,
|
||||||
|
"user_api_key_user_id": "default_user_id",
|
||||||
|
"user_api_key_org_id": None,
|
||||||
|
"user_api_key_team_id": None,
|
||||||
|
"user_api_key_team_alias": None,
|
||||||
|
"user_api_key_team_max_budget": None,
|
||||||
|
"user_api_key_team_spend": None,
|
||||||
|
"user_api_key_spend": 0.0,
|
||||||
|
"user_api_key_max_budget": None,
|
||||||
|
"user_api_key_metadata": {},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
translated_input = anthropic_adapter.translate_completion_input_params(kwargs=data)
|
||||||
|
|
||||||
|
assert "litellm_metadata" not in translated_input
|
||||||
|
assert "metadata" in translated_input
|
||||||
|
assert translated_input["metadata"] == data["litellm_metadata"]
|
||||||
|
|
||||||
|
|
||||||
def test_anthropic_completion_e2e():
|
def test_anthropic_completion_e2e():
|
||||||
litellm.set_verbose = True
|
litellm.set_verbose = True
|
||||||
|
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
from typing import Iterable, List, Optional, Union
|
from typing import Any, Dict, Iterable, List, Optional, Union
|
||||||
|
|
||||||
from pydantic import BaseModel, validator
|
from pydantic import BaseModel, validator
|
||||||
from typing_extensions import Literal, Required, TypedDict
|
from typing_extensions import Literal, Required, TypedDict
|
||||||
|
@ -113,6 +113,9 @@ class AnthropicMessagesRequest(TypedDict, total=False):
|
||||||
top_k: int
|
top_k: int
|
||||||
top_p: float
|
top_p: float
|
||||||
|
|
||||||
|
# litellm param - used for tracking litellm proxy metadata in the request
|
||||||
|
litellm_metadata: dict
|
||||||
|
|
||||||
|
|
||||||
class ContentTextBlockDelta(TypedDict):
|
class ContentTextBlockDelta(TypedDict):
|
||||||
"""
|
"""
|
||||||
|
|
|
@ -436,6 +436,7 @@ class ChatCompletionRequest(TypedDict, total=False):
|
||||||
function_call: Union[str, dict]
|
function_call: Union[str, dict]
|
||||||
functions: List
|
functions: List
|
||||||
user: str
|
user: str
|
||||||
|
metadata: dict # litellm specific param
|
||||||
|
|
||||||
|
|
||||||
class ChatCompletionDeltaChunk(TypedDict, total=False):
|
class ChatCompletionDeltaChunk(TypedDict, total=False):
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue