mirror of
https://github.com/BerriAI/litellm.git
synced 2025-04-26 03:04:13 +00:00
Litellm dev 12 24 2024 p4 (#7407)
* fix(invoke_handler.py): fix mock response iterator to handle tool calling returns tool call if returned by model response * fix(prometheus.py): add new 'tokens_by_tag' metric on prometheus allows tracking 'token usage' by task * feat(prometheus.py): add input + output token tracking by tag * feat(prometheus.py): add tag based deployment failure tracking allows admin to track failure by use-case
This commit is contained in:
parent
81be0b4090
commit
39dabb2e89
5 changed files with 209 additions and 12 deletions
|
@ -76,7 +76,7 @@ class PrometheusLogger(CustomLogger):
|
||||||
UserAPIKeyLabelNames.TEAM.value,
|
UserAPIKeyLabelNames.TEAM.value,
|
||||||
UserAPIKeyLabelNames.TEAM_ALIAS.value,
|
UserAPIKeyLabelNames.TEAM_ALIAS.value,
|
||||||
UserAPIKeyLabelNames.USER.value,
|
UserAPIKeyLabelNames.USER.value,
|
||||||
UserAPIKeyLabelNames.LITELLM_MODEL.value,
|
UserAPIKeyLabelNames.v1_LITELLM_MODEL_NAME.value,
|
||||||
],
|
],
|
||||||
buckets=LATENCY_BUCKETS,
|
buckets=LATENCY_BUCKETS,
|
||||||
)
|
)
|
||||||
|
@ -85,7 +85,7 @@ class PrometheusLogger(CustomLogger):
|
||||||
"litellm_llm_api_latency_metric",
|
"litellm_llm_api_latency_metric",
|
||||||
"Total latency (seconds) for a models LLM API call",
|
"Total latency (seconds) for a models LLM API call",
|
||||||
labelnames=[
|
labelnames=[
|
||||||
UserAPIKeyLabelNames.LITELLM_MODEL.value,
|
UserAPIKeyLabelNames.v1_LITELLM_MODEL_NAME.value,
|
||||||
UserAPIKeyLabelNames.API_KEY_HASH.value,
|
UserAPIKeyLabelNames.API_KEY_HASH.value,
|
||||||
UserAPIKeyLabelNames.API_KEY_ALIAS.value,
|
UserAPIKeyLabelNames.API_KEY_ALIAS.value,
|
||||||
UserAPIKeyLabelNames.TEAM.value,
|
UserAPIKeyLabelNames.TEAM.value,
|
||||||
|
@ -140,6 +140,14 @@ class PrometheusLogger(CustomLogger):
|
||||||
],
|
],
|
||||||
)
|
)
|
||||||
|
|
||||||
|
# Counter for tokens by tag
|
||||||
|
self.litellm_tokens_by_tag_metric = Counter(
|
||||||
|
"litellm_total_tokens_by_tag",
|
||||||
|
"Total number of input + output tokens from LLM requests by custom metadata tags",
|
||||||
|
labelnames=[
|
||||||
|
UserAPIKeyLabelNames.TAG.value,
|
||||||
|
],
|
||||||
|
)
|
||||||
self.litellm_input_tokens_metric = Counter(
|
self.litellm_input_tokens_metric = Counter(
|
||||||
"litellm_input_tokens",
|
"litellm_input_tokens",
|
||||||
"Total number of input tokens from LLM requests",
|
"Total number of input tokens from LLM requests",
|
||||||
|
@ -153,6 +161,16 @@ class PrometheusLogger(CustomLogger):
|
||||||
"user",
|
"user",
|
||||||
],
|
],
|
||||||
)
|
)
|
||||||
|
|
||||||
|
# Counter for input tokens by tag
|
||||||
|
self.litellm_input_tokens_by_tag_metric = Counter(
|
||||||
|
"litellm_input_tokens_by_tag",
|
||||||
|
"Total number of input tokens from LLM requests by custom metadata tags",
|
||||||
|
labelnames=[
|
||||||
|
UserAPIKeyLabelNames.TAG.value,
|
||||||
|
],
|
||||||
|
)
|
||||||
|
|
||||||
self.litellm_output_tokens_metric = Counter(
|
self.litellm_output_tokens_metric = Counter(
|
||||||
"litellm_output_tokens",
|
"litellm_output_tokens",
|
||||||
"Total number of output tokens from LLM requests",
|
"Total number of output tokens from LLM requests",
|
||||||
|
@ -167,6 +185,15 @@ class PrometheusLogger(CustomLogger):
|
||||||
],
|
],
|
||||||
)
|
)
|
||||||
|
|
||||||
|
# Counter for output tokens by tag
|
||||||
|
self.litellm_output_tokens_by_tag_metric = Counter(
|
||||||
|
"litellm_output_tokens_by_tag",
|
||||||
|
"Total number of output tokens from LLM requests by custom metadata tags",
|
||||||
|
labelnames=[
|
||||||
|
UserAPIKeyLabelNames.TAG.value,
|
||||||
|
],
|
||||||
|
)
|
||||||
|
|
||||||
# Remaining Budget for Team
|
# Remaining Budget for Team
|
||||||
self.litellm_remaining_team_budget_metric = Gauge(
|
self.litellm_remaining_team_budget_metric = Gauge(
|
||||||
"litellm_remaining_team_budget_metric",
|
"litellm_remaining_team_budget_metric",
|
||||||
|
@ -237,10 +264,10 @@ class PrometheusLogger(CustomLogger):
|
||||||
|
|
||||||
# Get all keys
|
# Get all keys
|
||||||
_logged_llm_labels = [
|
_logged_llm_labels = [
|
||||||
"litellm_model_name",
|
UserAPIKeyLabelNames.v2_LITELLM_MODEL_NAME.value,
|
||||||
"model_id",
|
UserAPIKeyLabelNames.MODEL_ID.value,
|
||||||
"api_base",
|
UserAPIKeyLabelNames.API_BASE.value,
|
||||||
"api_provider",
|
UserAPIKeyLabelNames.API_PROVIDER.value,
|
||||||
]
|
]
|
||||||
team_and_key_labels = [
|
team_and_key_labels = [
|
||||||
"hashed_api_key",
|
"hashed_api_key",
|
||||||
|
@ -275,6 +302,16 @@ class PrometheusLogger(CustomLogger):
|
||||||
+ EXCEPTION_LABELS
|
+ EXCEPTION_LABELS
|
||||||
+ team_and_key_labels,
|
+ team_and_key_labels,
|
||||||
)
|
)
|
||||||
|
self.litellm_deployment_failure_by_tag_responses = Counter(
|
||||||
|
"litellm_deployment_failure_by_tag_responses",
|
||||||
|
"Total number of failed LLM API calls for a specific LLM deploymeny by custom metadata tags",
|
||||||
|
labelnames=[
|
||||||
|
UserAPIKeyLabelNames.REQUESTED_MODEL.value,
|
||||||
|
UserAPIKeyLabelNames.TAG.value,
|
||||||
|
]
|
||||||
|
+ _logged_llm_labels
|
||||||
|
+ EXCEPTION_LABELS,
|
||||||
|
)
|
||||||
self.litellm_deployment_total_requests = Counter(
|
self.litellm_deployment_total_requests = Counter(
|
||||||
name="litellm_deployment_total_requests",
|
name="litellm_deployment_total_requests",
|
||||||
documentation="LLM Deployment Analytics - Total number of LLM API calls via litellm - success + failure",
|
documentation="LLM Deployment Analytics - Total number of LLM API calls via litellm - success + failure",
|
||||||
|
@ -490,6 +527,14 @@ class PrometheusLogger(CustomLogger):
|
||||||
user_id,
|
user_id,
|
||||||
).inc(standard_logging_payload["total_tokens"])
|
).inc(standard_logging_payload["total_tokens"])
|
||||||
|
|
||||||
|
_tags = standard_logging_payload["request_tags"]
|
||||||
|
for tag in _tags:
|
||||||
|
self.litellm_tokens_by_tag_metric.labels(
|
||||||
|
**{
|
||||||
|
UserAPIKeyLabelNames.TAG.value: tag,
|
||||||
|
}
|
||||||
|
).inc(standard_logging_payload["total_tokens"])
|
||||||
|
|
||||||
self.litellm_input_tokens_metric.labels(
|
self.litellm_input_tokens_metric.labels(
|
||||||
end_user_id,
|
end_user_id,
|
||||||
user_api_key,
|
user_api_key,
|
||||||
|
@ -500,6 +545,13 @@ class PrometheusLogger(CustomLogger):
|
||||||
user_id,
|
user_id,
|
||||||
).inc(standard_logging_payload["prompt_tokens"])
|
).inc(standard_logging_payload["prompt_tokens"])
|
||||||
|
|
||||||
|
for tag in _tags:
|
||||||
|
self.litellm_input_tokens_by_tag_metric.labels(
|
||||||
|
**{
|
||||||
|
UserAPIKeyLabelNames.TAG.value: tag,
|
||||||
|
}
|
||||||
|
).inc(standard_logging_payload["prompt_tokens"])
|
||||||
|
|
||||||
self.litellm_output_tokens_metric.labels(
|
self.litellm_output_tokens_metric.labels(
|
||||||
end_user_id,
|
end_user_id,
|
||||||
user_api_key,
|
user_api_key,
|
||||||
|
@ -510,6 +562,13 @@ class PrometheusLogger(CustomLogger):
|
||||||
user_id,
|
user_id,
|
||||||
).inc(standard_logging_payload["completion_tokens"])
|
).inc(standard_logging_payload["completion_tokens"])
|
||||||
|
|
||||||
|
for tag in _tags:
|
||||||
|
self.litellm_output_tokens_by_tag_metric.labels(
|
||||||
|
**{
|
||||||
|
UserAPIKeyLabelNames.TAG.value: tag,
|
||||||
|
}
|
||||||
|
).inc(standard_logging_payload["completion_tokens"])
|
||||||
|
|
||||||
def _increment_remaining_budget_metrics(
|
def _increment_remaining_budget_metrics(
|
||||||
self,
|
self,
|
||||||
user_api_team: Optional[str],
|
user_api_team: Optional[str],
|
||||||
|
@ -651,7 +710,7 @@ class PrometheusLogger(CustomLogger):
|
||||||
api_call_total_time_seconds = api_call_total_time.total_seconds()
|
api_call_total_time_seconds = api_call_total_time.total_seconds()
|
||||||
self.litellm_llm_api_latency_metric.labels(
|
self.litellm_llm_api_latency_metric.labels(
|
||||||
**{
|
**{
|
||||||
UserAPIKeyLabelNames.LITELLM_MODEL.value: model,
|
UserAPIKeyLabelNames.v1_LITELLM_MODEL_NAME.value: model,
|
||||||
UserAPIKeyLabelNames.API_KEY_HASH.value: user_api_key,
|
UserAPIKeyLabelNames.API_KEY_HASH.value: user_api_key,
|
||||||
UserAPIKeyLabelNames.API_KEY_ALIAS.value: user_api_key_alias,
|
UserAPIKeyLabelNames.API_KEY_ALIAS.value: user_api_key_alias,
|
||||||
UserAPIKeyLabelNames.TEAM.value: user_api_team,
|
UserAPIKeyLabelNames.TEAM.value: user_api_team,
|
||||||
|
@ -686,7 +745,7 @@ class PrometheusLogger(CustomLogger):
|
||||||
UserAPIKeyLabelNames.USER.value: standard_logging_payload[
|
UserAPIKeyLabelNames.USER.value: standard_logging_payload[
|
||||||
"metadata"
|
"metadata"
|
||||||
]["user_api_key_user_id"],
|
]["user_api_key_user_id"],
|
||||||
UserAPIKeyLabelNames.LITELLM_MODEL.value: model,
|
UserAPIKeyLabelNames.v1_LITELLM_MODEL_NAME.value: model,
|
||||||
}
|
}
|
||||||
).observe(total_time_seconds)
|
).observe(total_time_seconds)
|
||||||
|
|
||||||
|
@ -862,6 +921,24 @@ class PrometheusLogger(CustomLogger):
|
||||||
],
|
],
|
||||||
).inc()
|
).inc()
|
||||||
|
|
||||||
|
# tag based tracking
|
||||||
|
_tags = standard_logging_payload["request_tags"]
|
||||||
|
for tag in _tags:
|
||||||
|
self.litellm_deployment_failure_by_tag_responses.labels(
|
||||||
|
**{
|
||||||
|
UserAPIKeyLabelNames.REQUESTED_MODEL.value: model_group,
|
||||||
|
UserAPIKeyLabelNames.TAG.value: tag,
|
||||||
|
UserAPIKeyLabelNames.v2_LITELLM_MODEL_NAME.value: litellm_model_name,
|
||||||
|
UserAPIKeyLabelNames.MODEL_ID.value: model_id,
|
||||||
|
UserAPIKeyLabelNames.API_BASE.value: api_base,
|
||||||
|
UserAPIKeyLabelNames.API_PROVIDER.value: llm_provider,
|
||||||
|
UserAPIKeyLabelNames.EXCEPTION_CLASS.value: exception.__class__.__name__,
|
||||||
|
UserAPIKeyLabelNames.EXCEPTION_STATUS.value: str(
|
||||||
|
getattr(exception, "status_code", None)
|
||||||
|
),
|
||||||
|
}
|
||||||
|
).inc()
|
||||||
|
|
||||||
self.litellm_deployment_total_requests.labels(
|
self.litellm_deployment_total_requests.labels(
|
||||||
litellm_model_name=litellm_model_name,
|
litellm_model_name=litellm_model_name,
|
||||||
model_id=model_id,
|
model_id=model_id,
|
||||||
|
@ -881,8 +958,12 @@ class PrometheusLogger(CustomLogger):
|
||||||
).inc()
|
).inc()
|
||||||
|
|
||||||
pass
|
pass
|
||||||
except Exception:
|
except Exception as e:
|
||||||
pass
|
verbose_logger.debug(
|
||||||
|
"Prometheus Error: set_llm_deployment_failure_metrics. Exception occured - {}".format(
|
||||||
|
str(e)
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
def set_llm_deployment_success_metrics(
|
def set_llm_deployment_success_metrics(
|
||||||
self,
|
self,
|
||||||
|
|
|
@ -9,7 +9,17 @@ import types
|
||||||
import urllib.parse
|
import urllib.parse
|
||||||
import uuid
|
import uuid
|
||||||
from functools import partial
|
from functools import partial
|
||||||
from typing import Any, AsyncIterator, Callable, Iterator, List, Optional, Tuple, Union
|
from typing import (
|
||||||
|
Any,
|
||||||
|
AsyncIterator,
|
||||||
|
Callable,
|
||||||
|
Iterator,
|
||||||
|
List,
|
||||||
|
Optional,
|
||||||
|
Tuple,
|
||||||
|
Union,
|
||||||
|
cast,
|
||||||
|
)
|
||||||
|
|
||||||
import httpx # type: ignore
|
import httpx # type: ignore
|
||||||
|
|
||||||
|
@ -36,8 +46,10 @@ from litellm.llms.custom_httpx.http_handler import (
|
||||||
from litellm.types.llms.bedrock import *
|
from litellm.types.llms.bedrock import *
|
||||||
from litellm.types.llms.openai import (
|
from litellm.types.llms.openai import (
|
||||||
ChatCompletionToolCallChunk,
|
ChatCompletionToolCallChunk,
|
||||||
|
ChatCompletionToolCallFunctionChunk,
|
||||||
ChatCompletionUsageBlock,
|
ChatCompletionUsageBlock,
|
||||||
)
|
)
|
||||||
|
from litellm.types.utils import ChatCompletionMessageToolCall, Choices
|
||||||
from litellm.types.utils import GenericStreamingChunk as GChunk
|
from litellm.types.utils import GenericStreamingChunk as GChunk
|
||||||
from litellm.types.utils import ModelResponse, Usage
|
from litellm.types.utils import ModelResponse, Usage
|
||||||
from litellm.utils import CustomStreamWrapper, get_secret
|
from litellm.utils import CustomStreamWrapper, get_secret
|
||||||
|
@ -1294,11 +1306,25 @@ class MockResponseIterator: # for returning ai21 streaming responses
|
||||||
chunk_usage: Usage = getattr(chunk_data, "usage")
|
chunk_usage: Usage = getattr(chunk_data, "usage")
|
||||||
text = chunk_data.choices[0].message.content or "" # type: ignore
|
text = chunk_data.choices[0].message.content or "" # type: ignore
|
||||||
tool_use = None
|
tool_use = None
|
||||||
|
_model_response_tool_call = cast(
|
||||||
|
Optional[List[ChatCompletionMessageToolCall]],
|
||||||
|
cast(Choices, chunk_data.choices[0]).message.tool_calls,
|
||||||
|
)
|
||||||
if self.json_mode is True:
|
if self.json_mode is True:
|
||||||
text, tool_use = self._handle_json_mode_chunk(
|
text, tool_use = self._handle_json_mode_chunk(
|
||||||
text=text,
|
text=text,
|
||||||
tool_calls=chunk_data.choices[0].message.tool_calls, # type: ignore
|
tool_calls=chunk_data.choices[0].message.tool_calls, # type: ignore
|
||||||
)
|
)
|
||||||
|
elif _model_response_tool_call is not None:
|
||||||
|
tool_use = ChatCompletionToolCallChunk(
|
||||||
|
id=_model_response_tool_call[0].id,
|
||||||
|
type="function",
|
||||||
|
function=ChatCompletionToolCallFunctionChunk(
|
||||||
|
name=_model_response_tool_call[0].function.name,
|
||||||
|
arguments=_model_response_tool_call[0].function.arguments,
|
||||||
|
),
|
||||||
|
index=0,
|
||||||
|
)
|
||||||
processed_chunk = GChunk(
|
processed_chunk = GChunk(
|
||||||
text=text,
|
text=text,
|
||||||
tool_use=tool_use,
|
tool_use=tool_use,
|
||||||
|
|
|
@ -53,4 +53,11 @@ class UserAPIKeyLabelNames(Enum):
|
||||||
TEAM = "team"
|
TEAM = "team"
|
||||||
TEAM_ALIAS = "team_alias"
|
TEAM_ALIAS = "team_alias"
|
||||||
REQUESTED_MODEL = REQUESTED_MODEL
|
REQUESTED_MODEL = REQUESTED_MODEL
|
||||||
LITELLM_MODEL = "model"
|
v1_LITELLM_MODEL_NAME = "model"
|
||||||
|
v2_LITELLM_MODEL_NAME = "litellm_model_name"
|
||||||
|
TAG = "tag"
|
||||||
|
MODEL_ID = "model_id"
|
||||||
|
API_BASE = "api_base"
|
||||||
|
API_PROVIDER = "api_provider"
|
||||||
|
EXCEPTION_STATUS = EXCEPTION_STATUS
|
||||||
|
EXCEPTION_CLASS = EXCEPTION_CLASS
|
||||||
|
|
|
@ -745,3 +745,20 @@ def test_stream_chunk_builder_empty_initial_chunk():
|
||||||
|
|
||||||
id = ChunkProcessor._get_chunk_id(chunks)
|
id = ChunkProcessor._get_chunk_id(chunks)
|
||||||
assert id == "1"
|
assert id == "1"
|
||||||
|
|
||||||
|
|
||||||
|
import json
|
||||||
|
|
||||||
|
|
||||||
|
def get_current_weather(location, unit="fahrenheit"):
|
||||||
|
"""Get the current weather in a given location"""
|
||||||
|
if "tokyo" in location.lower():
|
||||||
|
return json.dumps({"location": "Tokyo", "temperature": "10", "unit": "celsius"})
|
||||||
|
elif "san francisco" in location.lower():
|
||||||
|
return json.dumps(
|
||||||
|
{"location": "San Francisco", "temperature": "72", "unit": "fahrenheit"}
|
||||||
|
)
|
||||||
|
elif "paris" in location.lower():
|
||||||
|
return json.dumps({"location": "Paris", "temperature": "22", "unit": "celsius"})
|
||||||
|
else:
|
||||||
|
return json.dumps({"location": location, "temperature": "unknown"})
|
||||||
|
|
|
@ -3990,3 +3990,69 @@ def test_streaming_api_base():
|
||||||
stream=True,
|
stream=True,
|
||||||
)
|
)
|
||||||
assert "https://api.openai.com" in stream._hidden_params["api_base"]
|
assert "https://api.openai.com" in stream._hidden_params["api_base"]
|
||||||
|
|
||||||
|
|
||||||
|
def test_mock_response_iterator_tool_use():
|
||||||
|
"""
|
||||||
|
Relevant Issue: https://github.com/BerriAI/litellm/issues/7364
|
||||||
|
"""
|
||||||
|
from litellm.llms.bedrock.chat.invoke_handler import MockResponseIterator
|
||||||
|
from litellm.types.utils import (
|
||||||
|
ChatCompletionMessageToolCall,
|
||||||
|
Function,
|
||||||
|
Message,
|
||||||
|
Usage,
|
||||||
|
CompletionTokensDetailsWrapper,
|
||||||
|
PromptTokensDetailsWrapper,
|
||||||
|
Choices,
|
||||||
|
)
|
||||||
|
|
||||||
|
litellm.set_verbose = False
|
||||||
|
response = ModelResponse(
|
||||||
|
id="chatcmpl-Ai8KRI5vJPZXQ9SQvEJfTVuVqkyEZ",
|
||||||
|
created=1735081811,
|
||||||
|
model="o1-2024-12-17",
|
||||||
|
object="chat.completion",
|
||||||
|
system_fingerprint="fp_e6d02d4a78",
|
||||||
|
choices=[
|
||||||
|
Choices(
|
||||||
|
finish_reason="tool_calls",
|
||||||
|
index=0,
|
||||||
|
message=Message(
|
||||||
|
content=None,
|
||||||
|
role="assistant",
|
||||||
|
tool_calls=[
|
||||||
|
ChatCompletionMessageToolCall(
|
||||||
|
function=Function(
|
||||||
|
arguments='{"location":"San Francisco, CA","unit":"fahrenheit"}',
|
||||||
|
name="get_current_weather",
|
||||||
|
),
|
||||||
|
id="call_BfRX2S7YCKL0BtxbWMl89ZNk",
|
||||||
|
type="function",
|
||||||
|
)
|
||||||
|
],
|
||||||
|
function_call=None,
|
||||||
|
),
|
||||||
|
)
|
||||||
|
],
|
||||||
|
usage=Usage(
|
||||||
|
completion_tokens=1955,
|
||||||
|
prompt_tokens=85,
|
||||||
|
total_tokens=2040,
|
||||||
|
completion_tokens_details=CompletionTokensDetailsWrapper(
|
||||||
|
accepted_prediction_tokens=0,
|
||||||
|
audio_tokens=0,
|
||||||
|
reasoning_tokens=1920,
|
||||||
|
rejected_prediction_tokens=0,
|
||||||
|
text_tokens=None,
|
||||||
|
),
|
||||||
|
prompt_tokens_details=PromptTokensDetailsWrapper(
|
||||||
|
audio_tokens=0, cached_tokens=0, text_tokens=None, image_tokens=None
|
||||||
|
),
|
||||||
|
),
|
||||||
|
service_tier=None,
|
||||||
|
)
|
||||||
|
completion_stream = MockResponseIterator(model_response=response)
|
||||||
|
response_chunk = completion_stream._chunk_parser(chunk_data=response)
|
||||||
|
|
||||||
|
assert response_chunk["tool_use"] is not None
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue