Litellm dev 12 24 2024 p4 (#7407)

* fix(invoke_handler.py): fix mock response iterator to handle tool calling

returns tool call if returned by model response

* fix(prometheus.py): add new 'tokens_by_tag' metric on prometheus

allows tracking 'token usage' by task

* feat(prometheus.py): add input + output token tracking by tag

* feat(prometheus.py): add tag based deployment failure tracking

allows admin to track failure by use-case
This commit is contained in:
Krish Dholakia 2024-12-24 20:24:06 -08:00 committed by GitHub
parent 81be0b4090
commit 39dabb2e89
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
5 changed files with 209 additions and 12 deletions

View file

@ -9,7 +9,17 @@ import types
import urllib.parse
import uuid
from functools import partial
from typing import Any, AsyncIterator, Callable, Iterator, List, Optional, Tuple, Union
from typing import (
Any,
AsyncIterator,
Callable,
Iterator,
List,
Optional,
Tuple,
Union,
cast,
)
import httpx # type: ignore
@ -36,8 +46,10 @@ from litellm.llms.custom_httpx.http_handler import (
from litellm.types.llms.bedrock import *
from litellm.types.llms.openai import (
ChatCompletionToolCallChunk,
ChatCompletionToolCallFunctionChunk,
ChatCompletionUsageBlock,
)
from litellm.types.utils import ChatCompletionMessageToolCall, Choices
from litellm.types.utils import GenericStreamingChunk as GChunk
from litellm.types.utils import ModelResponse, Usage
from litellm.utils import CustomStreamWrapper, get_secret
@ -1294,11 +1306,25 @@ class MockResponseIterator: # for returning ai21 streaming responses
chunk_usage: Usage = getattr(chunk_data, "usage")
text = chunk_data.choices[0].message.content or "" # type: ignore
tool_use = None
_model_response_tool_call = cast(
Optional[List[ChatCompletionMessageToolCall]],
cast(Choices, chunk_data.choices[0]).message.tool_calls,
)
if self.json_mode is True:
text, tool_use = self._handle_json_mode_chunk(
text=text,
tool_calls=chunk_data.choices[0].message.tool_calls, # type: ignore
)
elif _model_response_tool_call is not None:
tool_use = ChatCompletionToolCallChunk(
id=_model_response_tool_call[0].id,
type="function",
function=ChatCompletionToolCallFunctionChunk(
name=_model_response_tool_call[0].function.name,
arguments=_model_response_tool_call[0].function.arguments,
),
index=0,
)
processed_chunk = GChunk(
text=text,
tool_use=tool_use,