mirror of
https://github.com/BerriAI/litellm.git
synced 2025-04-26 03:04:13 +00:00
Litellm dev 12 24 2024 p4 (#7407)
* fix(invoke_handler.py): fix mock response iterator to handle tool calling returns tool call if returned by model response * fix(prometheus.py): add new 'tokens_by_tag' metric on prometheus allows tracking 'token usage' by task * feat(prometheus.py): add input + output token tracking by tag * feat(prometheus.py): add tag based deployment failure tracking allows admin to track failure by use-case
This commit is contained in:
parent
81be0b4090
commit
39dabb2e89
5 changed files with 209 additions and 12 deletions
|
@ -9,7 +9,17 @@ import types
|
|||
import urllib.parse
|
||||
import uuid
|
||||
from functools import partial
|
||||
from typing import Any, AsyncIterator, Callable, Iterator, List, Optional, Tuple, Union
|
||||
from typing import (
|
||||
Any,
|
||||
AsyncIterator,
|
||||
Callable,
|
||||
Iterator,
|
||||
List,
|
||||
Optional,
|
||||
Tuple,
|
||||
Union,
|
||||
cast,
|
||||
)
|
||||
|
||||
import httpx # type: ignore
|
||||
|
||||
|
@ -36,8 +46,10 @@ from litellm.llms.custom_httpx.http_handler import (
|
|||
from litellm.types.llms.bedrock import *
|
||||
from litellm.types.llms.openai import (
|
||||
ChatCompletionToolCallChunk,
|
||||
ChatCompletionToolCallFunctionChunk,
|
||||
ChatCompletionUsageBlock,
|
||||
)
|
||||
from litellm.types.utils import ChatCompletionMessageToolCall, Choices
|
||||
from litellm.types.utils import GenericStreamingChunk as GChunk
|
||||
from litellm.types.utils import ModelResponse, Usage
|
||||
from litellm.utils import CustomStreamWrapper, get_secret
|
||||
|
@ -1294,11 +1306,25 @@ class MockResponseIterator: # for returning ai21 streaming responses
|
|||
chunk_usage: Usage = getattr(chunk_data, "usage")
|
||||
text = chunk_data.choices[0].message.content or "" # type: ignore
|
||||
tool_use = None
|
||||
_model_response_tool_call = cast(
|
||||
Optional[List[ChatCompletionMessageToolCall]],
|
||||
cast(Choices, chunk_data.choices[0]).message.tool_calls,
|
||||
)
|
||||
if self.json_mode is True:
|
||||
text, tool_use = self._handle_json_mode_chunk(
|
||||
text=text,
|
||||
tool_calls=chunk_data.choices[0].message.tool_calls, # type: ignore
|
||||
)
|
||||
elif _model_response_tool_call is not None:
|
||||
tool_use = ChatCompletionToolCallChunk(
|
||||
id=_model_response_tool_call[0].id,
|
||||
type="function",
|
||||
function=ChatCompletionToolCallFunctionChunk(
|
||||
name=_model_response_tool_call[0].function.name,
|
||||
arguments=_model_response_tool_call[0].function.arguments,
|
||||
),
|
||||
index=0,
|
||||
)
|
||||
processed_chunk = GChunk(
|
||||
text=text,
|
||||
tool_use=tool_use,
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue