Litellm dev 12 24 2024 p4 (#7407)

* fix(invoke_handler.py): fix mock response iterator to handle tool calling

returns tool call if returned by model response

* fix(prometheus.py): add new 'tokens_by_tag' metric on prometheus

allows tracking 'token usage' by task

* feat(prometheus.py): add input + output token tracking by tag

* feat(prometheus.py): add tag based deployment failure tracking

allows admin to track failure by use-case
This commit is contained in:
Krish Dholakia 2024-12-24 20:24:06 -08:00 committed by GitHub
parent 81be0b4090
commit 39dabb2e89
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
5 changed files with 209 additions and 12 deletions

View file

@ -76,7 +76,7 @@ class PrometheusLogger(CustomLogger):
UserAPIKeyLabelNames.TEAM.value, UserAPIKeyLabelNames.TEAM.value,
UserAPIKeyLabelNames.TEAM_ALIAS.value, UserAPIKeyLabelNames.TEAM_ALIAS.value,
UserAPIKeyLabelNames.USER.value, UserAPIKeyLabelNames.USER.value,
UserAPIKeyLabelNames.LITELLM_MODEL.value, UserAPIKeyLabelNames.v1_LITELLM_MODEL_NAME.value,
], ],
buckets=LATENCY_BUCKETS, buckets=LATENCY_BUCKETS,
) )
@ -85,7 +85,7 @@ class PrometheusLogger(CustomLogger):
"litellm_llm_api_latency_metric", "litellm_llm_api_latency_metric",
"Total latency (seconds) for a models LLM API call", "Total latency (seconds) for a models LLM API call",
labelnames=[ labelnames=[
UserAPIKeyLabelNames.LITELLM_MODEL.value, UserAPIKeyLabelNames.v1_LITELLM_MODEL_NAME.value,
UserAPIKeyLabelNames.API_KEY_HASH.value, UserAPIKeyLabelNames.API_KEY_HASH.value,
UserAPIKeyLabelNames.API_KEY_ALIAS.value, UserAPIKeyLabelNames.API_KEY_ALIAS.value,
UserAPIKeyLabelNames.TEAM.value, UserAPIKeyLabelNames.TEAM.value,
@ -140,6 +140,14 @@ class PrometheusLogger(CustomLogger):
], ],
) )
# Counter for tokens by tag
self.litellm_tokens_by_tag_metric = Counter(
"litellm_total_tokens_by_tag",
"Total number of input + output tokens from LLM requests by custom metadata tags",
labelnames=[
UserAPIKeyLabelNames.TAG.value,
],
)
self.litellm_input_tokens_metric = Counter( self.litellm_input_tokens_metric = Counter(
"litellm_input_tokens", "litellm_input_tokens",
"Total number of input tokens from LLM requests", "Total number of input tokens from LLM requests",
@ -153,6 +161,16 @@ class PrometheusLogger(CustomLogger):
"user", "user",
], ],
) )
# Counter for input tokens by tag
self.litellm_input_tokens_by_tag_metric = Counter(
"litellm_input_tokens_by_tag",
"Total number of input tokens from LLM requests by custom metadata tags",
labelnames=[
UserAPIKeyLabelNames.TAG.value,
],
)
self.litellm_output_tokens_metric = Counter( self.litellm_output_tokens_metric = Counter(
"litellm_output_tokens", "litellm_output_tokens",
"Total number of output tokens from LLM requests", "Total number of output tokens from LLM requests",
@ -167,6 +185,15 @@ class PrometheusLogger(CustomLogger):
], ],
) )
# Counter for output tokens by tag
self.litellm_output_tokens_by_tag_metric = Counter(
"litellm_output_tokens_by_tag",
"Total number of output tokens from LLM requests by custom metadata tags",
labelnames=[
UserAPIKeyLabelNames.TAG.value,
],
)
# Remaining Budget for Team # Remaining Budget for Team
self.litellm_remaining_team_budget_metric = Gauge( self.litellm_remaining_team_budget_metric = Gauge(
"litellm_remaining_team_budget_metric", "litellm_remaining_team_budget_metric",
@ -237,10 +264,10 @@ class PrometheusLogger(CustomLogger):
# Get all keys # Get all keys
_logged_llm_labels = [ _logged_llm_labels = [
"litellm_model_name", UserAPIKeyLabelNames.v2_LITELLM_MODEL_NAME.value,
"model_id", UserAPIKeyLabelNames.MODEL_ID.value,
"api_base", UserAPIKeyLabelNames.API_BASE.value,
"api_provider", UserAPIKeyLabelNames.API_PROVIDER.value,
] ]
team_and_key_labels = [ team_and_key_labels = [
"hashed_api_key", "hashed_api_key",
@ -275,6 +302,16 @@ class PrometheusLogger(CustomLogger):
+ EXCEPTION_LABELS + EXCEPTION_LABELS
+ team_and_key_labels, + team_and_key_labels,
) )
self.litellm_deployment_failure_by_tag_responses = Counter(
"litellm_deployment_failure_by_tag_responses",
"Total number of failed LLM API calls for a specific LLM deploymeny by custom metadata tags",
labelnames=[
UserAPIKeyLabelNames.REQUESTED_MODEL.value,
UserAPIKeyLabelNames.TAG.value,
]
+ _logged_llm_labels
+ EXCEPTION_LABELS,
)
self.litellm_deployment_total_requests = Counter( self.litellm_deployment_total_requests = Counter(
name="litellm_deployment_total_requests", name="litellm_deployment_total_requests",
documentation="LLM Deployment Analytics - Total number of LLM API calls via litellm - success + failure", documentation="LLM Deployment Analytics - Total number of LLM API calls via litellm - success + failure",
@ -490,6 +527,14 @@ class PrometheusLogger(CustomLogger):
user_id, user_id,
).inc(standard_logging_payload["total_tokens"]) ).inc(standard_logging_payload["total_tokens"])
_tags = standard_logging_payload["request_tags"]
for tag in _tags:
self.litellm_tokens_by_tag_metric.labels(
**{
UserAPIKeyLabelNames.TAG.value: tag,
}
).inc(standard_logging_payload["total_tokens"])
self.litellm_input_tokens_metric.labels( self.litellm_input_tokens_metric.labels(
end_user_id, end_user_id,
user_api_key, user_api_key,
@ -500,6 +545,13 @@ class PrometheusLogger(CustomLogger):
user_id, user_id,
).inc(standard_logging_payload["prompt_tokens"]) ).inc(standard_logging_payload["prompt_tokens"])
for tag in _tags:
self.litellm_input_tokens_by_tag_metric.labels(
**{
UserAPIKeyLabelNames.TAG.value: tag,
}
).inc(standard_logging_payload["prompt_tokens"])
self.litellm_output_tokens_metric.labels( self.litellm_output_tokens_metric.labels(
end_user_id, end_user_id,
user_api_key, user_api_key,
@ -510,6 +562,13 @@ class PrometheusLogger(CustomLogger):
user_id, user_id,
).inc(standard_logging_payload["completion_tokens"]) ).inc(standard_logging_payload["completion_tokens"])
for tag in _tags:
self.litellm_output_tokens_by_tag_metric.labels(
**{
UserAPIKeyLabelNames.TAG.value: tag,
}
).inc(standard_logging_payload["completion_tokens"])
def _increment_remaining_budget_metrics( def _increment_remaining_budget_metrics(
self, self,
user_api_team: Optional[str], user_api_team: Optional[str],
@ -651,7 +710,7 @@ class PrometheusLogger(CustomLogger):
api_call_total_time_seconds = api_call_total_time.total_seconds() api_call_total_time_seconds = api_call_total_time.total_seconds()
self.litellm_llm_api_latency_metric.labels( self.litellm_llm_api_latency_metric.labels(
**{ **{
UserAPIKeyLabelNames.LITELLM_MODEL.value: model, UserAPIKeyLabelNames.v1_LITELLM_MODEL_NAME.value: model,
UserAPIKeyLabelNames.API_KEY_HASH.value: user_api_key, UserAPIKeyLabelNames.API_KEY_HASH.value: user_api_key,
UserAPIKeyLabelNames.API_KEY_ALIAS.value: user_api_key_alias, UserAPIKeyLabelNames.API_KEY_ALIAS.value: user_api_key_alias,
UserAPIKeyLabelNames.TEAM.value: user_api_team, UserAPIKeyLabelNames.TEAM.value: user_api_team,
@ -686,7 +745,7 @@ class PrometheusLogger(CustomLogger):
UserAPIKeyLabelNames.USER.value: standard_logging_payload[ UserAPIKeyLabelNames.USER.value: standard_logging_payload[
"metadata" "metadata"
]["user_api_key_user_id"], ]["user_api_key_user_id"],
UserAPIKeyLabelNames.LITELLM_MODEL.value: model, UserAPIKeyLabelNames.v1_LITELLM_MODEL_NAME.value: model,
} }
).observe(total_time_seconds) ).observe(total_time_seconds)
@ -862,6 +921,24 @@ class PrometheusLogger(CustomLogger):
], ],
).inc() ).inc()
# tag based tracking
_tags = standard_logging_payload["request_tags"]
for tag in _tags:
self.litellm_deployment_failure_by_tag_responses.labels(
**{
UserAPIKeyLabelNames.REQUESTED_MODEL.value: model_group,
UserAPIKeyLabelNames.TAG.value: tag,
UserAPIKeyLabelNames.v2_LITELLM_MODEL_NAME.value: litellm_model_name,
UserAPIKeyLabelNames.MODEL_ID.value: model_id,
UserAPIKeyLabelNames.API_BASE.value: api_base,
UserAPIKeyLabelNames.API_PROVIDER.value: llm_provider,
UserAPIKeyLabelNames.EXCEPTION_CLASS.value: exception.__class__.__name__,
UserAPIKeyLabelNames.EXCEPTION_STATUS.value: str(
getattr(exception, "status_code", None)
),
}
).inc()
self.litellm_deployment_total_requests.labels( self.litellm_deployment_total_requests.labels(
litellm_model_name=litellm_model_name, litellm_model_name=litellm_model_name,
model_id=model_id, model_id=model_id,
@ -881,8 +958,12 @@ class PrometheusLogger(CustomLogger):
).inc() ).inc()
pass pass
except Exception: except Exception as e:
pass verbose_logger.debug(
"Prometheus Error: set_llm_deployment_failure_metrics. Exception occured - {}".format(
str(e)
)
)
def set_llm_deployment_success_metrics( def set_llm_deployment_success_metrics(
self, self,

View file

@ -9,7 +9,17 @@ import types
import urllib.parse import urllib.parse
import uuid import uuid
from functools import partial from functools import partial
from typing import Any, AsyncIterator, Callable, Iterator, List, Optional, Tuple, Union from typing import (
Any,
AsyncIterator,
Callable,
Iterator,
List,
Optional,
Tuple,
Union,
cast,
)
import httpx # type: ignore import httpx # type: ignore
@ -36,8 +46,10 @@ from litellm.llms.custom_httpx.http_handler import (
from litellm.types.llms.bedrock import * from litellm.types.llms.bedrock import *
from litellm.types.llms.openai import ( from litellm.types.llms.openai import (
ChatCompletionToolCallChunk, ChatCompletionToolCallChunk,
ChatCompletionToolCallFunctionChunk,
ChatCompletionUsageBlock, ChatCompletionUsageBlock,
) )
from litellm.types.utils import ChatCompletionMessageToolCall, Choices
from litellm.types.utils import GenericStreamingChunk as GChunk from litellm.types.utils import GenericStreamingChunk as GChunk
from litellm.types.utils import ModelResponse, Usage from litellm.types.utils import ModelResponse, Usage
from litellm.utils import CustomStreamWrapper, get_secret from litellm.utils import CustomStreamWrapper, get_secret
@ -1294,11 +1306,25 @@ class MockResponseIterator: # for returning ai21 streaming responses
chunk_usage: Usage = getattr(chunk_data, "usage") chunk_usage: Usage = getattr(chunk_data, "usage")
text = chunk_data.choices[0].message.content or "" # type: ignore text = chunk_data.choices[0].message.content or "" # type: ignore
tool_use = None tool_use = None
_model_response_tool_call = cast(
Optional[List[ChatCompletionMessageToolCall]],
cast(Choices, chunk_data.choices[0]).message.tool_calls,
)
if self.json_mode is True: if self.json_mode is True:
text, tool_use = self._handle_json_mode_chunk( text, tool_use = self._handle_json_mode_chunk(
text=text, text=text,
tool_calls=chunk_data.choices[0].message.tool_calls, # type: ignore tool_calls=chunk_data.choices[0].message.tool_calls, # type: ignore
) )
elif _model_response_tool_call is not None:
tool_use = ChatCompletionToolCallChunk(
id=_model_response_tool_call[0].id,
type="function",
function=ChatCompletionToolCallFunctionChunk(
name=_model_response_tool_call[0].function.name,
arguments=_model_response_tool_call[0].function.arguments,
),
index=0,
)
processed_chunk = GChunk( processed_chunk = GChunk(
text=text, text=text,
tool_use=tool_use, tool_use=tool_use,

View file

@ -53,4 +53,11 @@ class UserAPIKeyLabelNames(Enum):
TEAM = "team" TEAM = "team"
TEAM_ALIAS = "team_alias" TEAM_ALIAS = "team_alias"
REQUESTED_MODEL = REQUESTED_MODEL REQUESTED_MODEL = REQUESTED_MODEL
LITELLM_MODEL = "model" v1_LITELLM_MODEL_NAME = "model"
v2_LITELLM_MODEL_NAME = "litellm_model_name"
TAG = "tag"
MODEL_ID = "model_id"
API_BASE = "api_base"
API_PROVIDER = "api_provider"
EXCEPTION_STATUS = EXCEPTION_STATUS
EXCEPTION_CLASS = EXCEPTION_CLASS

View file

@ -745,3 +745,20 @@ def test_stream_chunk_builder_empty_initial_chunk():
id = ChunkProcessor._get_chunk_id(chunks) id = ChunkProcessor._get_chunk_id(chunks)
assert id == "1" assert id == "1"
import json
def get_current_weather(location, unit="fahrenheit"):
"""Get the current weather in a given location"""
if "tokyo" in location.lower():
return json.dumps({"location": "Tokyo", "temperature": "10", "unit": "celsius"})
elif "san francisco" in location.lower():
return json.dumps(
{"location": "San Francisco", "temperature": "72", "unit": "fahrenheit"}
)
elif "paris" in location.lower():
return json.dumps({"location": "Paris", "temperature": "22", "unit": "celsius"})
else:
return json.dumps({"location": location, "temperature": "unknown"})

View file

@ -3990,3 +3990,69 @@ def test_streaming_api_base():
stream=True, stream=True,
) )
assert "https://api.openai.com" in stream._hidden_params["api_base"] assert "https://api.openai.com" in stream._hidden_params["api_base"]
def test_mock_response_iterator_tool_use():
"""
Relevant Issue: https://github.com/BerriAI/litellm/issues/7364
"""
from litellm.llms.bedrock.chat.invoke_handler import MockResponseIterator
from litellm.types.utils import (
ChatCompletionMessageToolCall,
Function,
Message,
Usage,
CompletionTokensDetailsWrapper,
PromptTokensDetailsWrapper,
Choices,
)
litellm.set_verbose = False
response = ModelResponse(
id="chatcmpl-Ai8KRI5vJPZXQ9SQvEJfTVuVqkyEZ",
created=1735081811,
model="o1-2024-12-17",
object="chat.completion",
system_fingerprint="fp_e6d02d4a78",
choices=[
Choices(
finish_reason="tool_calls",
index=0,
message=Message(
content=None,
role="assistant",
tool_calls=[
ChatCompletionMessageToolCall(
function=Function(
arguments='{"location":"San Francisco, CA","unit":"fahrenheit"}',
name="get_current_weather",
),
id="call_BfRX2S7YCKL0BtxbWMl89ZNk",
type="function",
)
],
function_call=None,
),
)
],
usage=Usage(
completion_tokens=1955,
prompt_tokens=85,
total_tokens=2040,
completion_tokens_details=CompletionTokensDetailsWrapper(
accepted_prediction_tokens=0,
audio_tokens=0,
reasoning_tokens=1920,
rejected_prediction_tokens=0,
text_tokens=None,
),
prompt_tokens_details=PromptTokensDetailsWrapper(
audio_tokens=0, cached_tokens=0, text_tokens=None, image_tokens=None
),
),
service_tier=None,
)
completion_stream = MockResponseIterator(model_response=response)
response_chunk = completion_stream._chunk_parser(chunk_data=response)
assert response_chunk["tool_use"] is not None