mirror of
https://github.com/BerriAI/litellm.git
synced 2025-04-25 18:54:30 +00:00
LiteLLM Minor Fixes & Improvements (01/08/2025) - p2 (#7643)
* fix(streaming_chunk_builder_utils.py): add test for groq tool calling + streaming + combine chunks Addresses https://github.com/BerriAI/litellm/issues/7621 * fix(streaming_utils.py): fix modelresponseiterator for openai like chunk parser ensures chunk parser uses the correct tool call id when translating the chunk Fixes https://github.com/BerriAI/litellm/issues/7621 * build(model_hub.tsx): display cost pricing on model hub * build(model_hub.tsx): show cost per token pricing + complete model information * fix(types/utils.py): fix usage object handling
This commit is contained in:
parent
39ee4c6bb4
commit
1e3370f3cb
9 changed files with 206 additions and 21 deletions
|
@ -103,7 +103,8 @@ class ChunkProcessor:
|
|||
def get_combined_tool_content(
|
||||
self, tool_call_chunks: List[Dict[str, Any]]
|
||||
) -> List[ChatCompletionMessageToolCall]:
|
||||
argument_list: List = []
|
||||
|
||||
argument_list: List[str] = []
|
||||
delta = tool_call_chunks[0]["choices"][0]["delta"]
|
||||
id = None
|
||||
name = None
|
||||
|
@ -171,6 +172,7 @@ class ChunkProcessor:
|
|||
),
|
||||
)
|
||||
)
|
||||
|
||||
return tool_calls_list
|
||||
|
||||
def get_combined_function_call_content(
|
||||
|
|
|
@ -17,7 +17,7 @@ class ModelResponseIterator:
|
|||
|
||||
def chunk_parser(self, chunk: dict) -> GenericStreamingChunk:
|
||||
try:
|
||||
processed_chunk = litellm.ModelResponse(**chunk, stream=True) # type: ignore
|
||||
processed_chunk = litellm.ModelResponseStream(**chunk)
|
||||
|
||||
text = ""
|
||||
tool_use: Optional[ChatCompletionToolCallChunk] = None
|
||||
|
@ -46,7 +46,7 @@ class ModelResponseIterator:
|
|||
.delta.tool_calls[0] # type: ignore
|
||||
.function.arguments,
|
||||
),
|
||||
index=processed_chunk.choices[0].index,
|
||||
index=processed_chunk.choices[0].delta.tool_calls[0].index,
|
||||
)
|
||||
|
||||
if processed_chunk.choices[0].finish_reason is not None:
|
||||
|
|
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
|
@ -25,7 +25,3 @@ model_list:
|
|||
identifier: deepseek-ai/DeepSeek-V3-Base
|
||||
revision: main
|
||||
auth_token: os.environ/HUGGINGFACE_API_KEY
|
||||
|
||||
service_callback: ["prometheus_system"]
|
||||
callbacks: ["prometheus"]
|
||||
cache: true
|
|
@ -847,6 +847,13 @@ class ModelResponseStream(ModelResponseBase):
|
|||
else:
|
||||
created = created
|
||||
|
||||
if (
|
||||
"usage" in kwargs
|
||||
and kwargs["usage"] is not None
|
||||
and isinstance(kwargs["usage"], dict)
|
||||
):
|
||||
kwargs["usage"] = Usage(**kwargs["usage"])
|
||||
|
||||
kwargs["id"] = id
|
||||
kwargs["created"] = created
|
||||
kwargs["object"] = "chat.completion.chunk"
|
||||
|
|
|
@ -747,6 +747,125 @@ def test_stream_chunk_builder_empty_initial_chunk():
|
|||
assert id == "1"
|
||||
|
||||
|
||||
def test_stream_chunk_builder_tool_calls_list():
|
||||
from litellm.litellm_core_utils.streaming_chunk_builder_utils import (
|
||||
ChunkProcessor,
|
||||
)
|
||||
from litellm.types.utils import (
|
||||
ChatCompletionMessageToolCall,
|
||||
Function,
|
||||
ModelResponseStream,
|
||||
Delta,
|
||||
StreamingChoices,
|
||||
ChatCompletionDeltaToolCall,
|
||||
)
|
||||
|
||||
chunks = [
|
||||
ModelResponseStream(
|
||||
id="chatcmpl-f323f7a5-2da0-4f86-8ed7-c653c5a359d9",
|
||||
created=1736388417,
|
||||
model="llama-3.3-70b-versatile",
|
||||
object="chat.completion.chunk",
|
||||
system_fingerprint=None,
|
||||
choices=[
|
||||
StreamingChoices(
|
||||
finish_reason=None,
|
||||
index=0,
|
||||
delta=Delta(
|
||||
content="",
|
||||
role="assistant",
|
||||
function_call=None,
|
||||
tool_calls=[
|
||||
ChatCompletionDeltaToolCall(
|
||||
id="call_9y79",
|
||||
function=Function(
|
||||
arguments='{"location": "San Francisco", "unit": "celsius"}',
|
||||
name="get_current_weather",
|
||||
),
|
||||
type="function",
|
||||
index=0,
|
||||
)
|
||||
],
|
||||
audio=None,
|
||||
),
|
||||
logprobs=None,
|
||||
)
|
||||
],
|
||||
stream_options=None,
|
||||
),
|
||||
ModelResponseStream(
|
||||
id="chatcmpl-f323f7a5-2da0-4f86-8ed7-c653c5a359d9",
|
||||
created=1736388417,
|
||||
model="llama-3.3-70b-versatile",
|
||||
object="chat.completion.chunk",
|
||||
system_fingerprint=None,
|
||||
choices=[
|
||||
StreamingChoices(
|
||||
finish_reason=None,
|
||||
index=0,
|
||||
delta=Delta(
|
||||
content="",
|
||||
role=None,
|
||||
function_call=None,
|
||||
tool_calls=[
|
||||
ChatCompletionDeltaToolCall(
|
||||
id="call_pfp7",
|
||||
function=Function(
|
||||
arguments='{"location": "Tokyo", "unit": "celsius"}',
|
||||
name="get_current_weather",
|
||||
),
|
||||
type="function",
|
||||
index=1,
|
||||
)
|
||||
],
|
||||
audio=None,
|
||||
),
|
||||
logprobs=None,
|
||||
)
|
||||
],
|
||||
stream_options=None,
|
||||
),
|
||||
ModelResponseStream(
|
||||
id="chatcmpl-f323f7a5-2da0-4f86-8ed7-c653c5a359d9",
|
||||
created=1736388417,
|
||||
model="llama-3.3-70b-versatile",
|
||||
object="chat.completion.chunk",
|
||||
system_fingerprint=None,
|
||||
choices=[
|
||||
StreamingChoices(
|
||||
finish_reason=None,
|
||||
index=0,
|
||||
delta=Delta(
|
||||
content="",
|
||||
role=None,
|
||||
function_call=None,
|
||||
tool_calls=[
|
||||
ChatCompletionDeltaToolCall(
|
||||
id="call_hyj5",
|
||||
function=Function(
|
||||
arguments='{"location": "Paris", "unit": "celsius"}',
|
||||
name="get_current_weather",
|
||||
),
|
||||
type="function",
|
||||
index=2,
|
||||
)
|
||||
],
|
||||
audio=None,
|
||||
),
|
||||
logprobs=None,
|
||||
)
|
||||
],
|
||||
stream_options=None,
|
||||
),
|
||||
]
|
||||
|
||||
processor = ChunkProcessor(chunks=chunks)
|
||||
|
||||
tool_calls = processor.get_combined_tool_content(tool_call_chunks=chunks)
|
||||
print(f"tool_calls: {tool_calls}")
|
||||
assert len(tool_calls) == 3
|
||||
|
||||
|
||||
import json
|
||||
|
||||
|
||||
|
@ -762,3 +881,55 @@ def get_current_weather(location, unit="fahrenheit"):
|
|||
return json.dumps({"location": "Paris", "temperature": "22", "unit": "celsius"})
|
||||
else:
|
||||
return json.dumps({"location": location, "temperature": "unknown"})
|
||||
|
||||
|
||||
@pytest.fixture(scope="module", autouse=True)
|
||||
def load_env():
|
||||
messages = [
|
||||
{"role": "system", "content": "You are a helpful AI assistant"},
|
||||
{
|
||||
"role": "user",
|
||||
"content": "What's the weather like in San Francisco, Tokyo, and Paris?",
|
||||
},
|
||||
]
|
||||
tools = [
|
||||
{
|
||||
"type": "function",
|
||||
"function": litellm.utils.function_to_dict(get_current_weather),
|
||||
}
|
||||
]
|
||||
OPENAI_GPT4oMINI = {
|
||||
"messages": messages,
|
||||
"model": "gpt-4o-mini",
|
||||
"temperature": 0.0,
|
||||
"tools": tools,
|
||||
"stream": True,
|
||||
}
|
||||
LLAMA3_3 = {
|
||||
"messages": messages,
|
||||
"model": "groq/llama-3.3-70b-versatile",
|
||||
"api_base": "https://api.groq.com/openai/v1",
|
||||
"temperature": 0.0,
|
||||
"tools": tools,
|
||||
"stream": True,
|
||||
}
|
||||
return OPENAI_GPT4oMINI, LLAMA3_3
|
||||
|
||||
|
||||
def execute_completion(opts: dict):
|
||||
partial_streaming_chunks = []
|
||||
response_gen = litellm.completion(**opts)
|
||||
for i, part in enumerate(response_gen):
|
||||
partial_streaming_chunks.append(part)
|
||||
assembly = litellm.stream_chunk_builder(partial_streaming_chunks)
|
||||
print(assembly.choices[0].message.tool_calls)
|
||||
assert len(assembly.choices[0].message.tool_calls) == 3, (
|
||||
assembly.choices[0].message.tool_calls[0].function.arguments[0]
|
||||
)
|
||||
print(assembly.choices[0].message.tool_calls)
|
||||
|
||||
|
||||
def test_grok_bug(load_env):
|
||||
litellm.set_verbose = True
|
||||
_, LLAMA3_3 = load_env
|
||||
execute_completion(LLAMA3_3)
|
||||
|
|
|
@ -34,6 +34,8 @@ interface ModelInfo {
|
|||
supports_vision: boolean;
|
||||
max_input_tokens?: number;
|
||||
max_output_tokens?: number;
|
||||
input_cost_per_token?: number;
|
||||
output_cost_per_token?: number;
|
||||
supported_openai_params?: string[];
|
||||
}
|
||||
|
||||
|
@ -161,26 +163,29 @@ const ModelHub: React.FC<ModelHubProps> = ({
|
|||
</Tooltip>
|
||||
</pre>
|
||||
<div className="my-5">
|
||||
<Text>Mode: {model.mode}</Text>
|
||||
<Text>
|
||||
Supports Function Calling:{" "}
|
||||
{model?.supports_function_calling == true ? "Yes" : "No"}
|
||||
</Text>
|
||||
<Text>
|
||||
Supports Vision:{" "}
|
||||
{model?.supports_vision == true ? "Yes" : "No"}
|
||||
</Text>
|
||||
<Text>
|
||||
Max Input Tokens:{" "}
|
||||
{model?.max_input_tokens
|
||||
? model?.max_input_tokens
|
||||
: "N/A"}
|
||||
: "Unknown"}
|
||||
</Text>
|
||||
<Text>
|
||||
Max Output Tokens:{" "}
|
||||
{model?.max_output_tokens
|
||||
? model?.max_output_tokens
|
||||
: "N/A"}
|
||||
: "Unknown"}
|
||||
</Text>
|
||||
<Text>
|
||||
Input Cost Per Token:{" "}
|
||||
{model?.input_cost_per_token
|
||||
? `$${(model.input_cost_per_token * 1_000_000).toFixed(2)}`
|
||||
: "Unknown"}
|
||||
</Text>
|
||||
<Text>
|
||||
Output Cost Per Token:{" "}
|
||||
{model?.output_cost_per_token
|
||||
? `$${(model.output_cost_per_token * 1_000_000).toFixed(2)}`
|
||||
: "Unknown"}
|
||||
</Text>
|
||||
</div>
|
||||
<div style={{ marginTop: "auto", textAlign: "right" }}>
|
||||
|
@ -245,12 +250,19 @@ const ModelHub: React.FC<ModelHubProps> = ({
|
|||
|
||||
<TabGroup>
|
||||
<TabList>
|
||||
<Tab>Model Information</Tab>
|
||||
<Tab>OpenAI Python SDK</Tab>
|
||||
<Tab>Supported OpenAI Params</Tab>
|
||||
<Tab>LlamaIndex</Tab>
|
||||
<Tab>Langchain Py</Tab>
|
||||
</TabList>
|
||||
<TabPanels>
|
||||
<TabPanel>
|
||||
<Text>
|
||||
<strong>Model Group:</strong>
|
||||
<pre>{JSON.stringify(selectedModel, null, 2)}</pre>
|
||||
</Text>
|
||||
</TabPanel>
|
||||
<TabPanel>
|
||||
<SyntaxHighlighter language="python">
|
||||
{`
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue