LiteLLM Minor Fixes & Improvements (09/27/2024) (#5938)

* fix(langfuse.py): prevent double logging requester metadata

Fixes https://github.com/BerriAI/litellm/issues/5935

* build(model_prices_and_context_window.json): add mistral pixtral cost tracking

Closes https://github.com/BerriAI/litellm/issues/5837

* handle streaming for azure ai studio error

* [Perf Proxy] parallel request limiter - use one cache update call (#5932)

* fix parallel request limiter - use one cache update call

* ci/cd run again

* run ci/cd again

* use docker username password

* fix config.yml

* fix config

* fix config

* fix config.yml

* ci/cd run again

* use correct typing for batch set cache

* fix async_set_cache_pipeline

* fix only check user id tpm / rpm limits when limits set

* fix test_openai_azure_embedding_with_oidc_and_cf

* fix(groq/chat/transformation.py): Fixes https://github.com/BerriAI/litellm/issues/5839

* feat(anthropic/chat.py): return 'retry-after' headers from anthropic

Fixes https://github.com/BerriAI/litellm/issues/4387

* feat: raise validation error if message has tool calls without passing `tools` param for anthropic/bedrock

Closes https://github.com/BerriAI/litellm/issues/5747

* [Feature]#5940, add max_workers parameter for the batch_completion (#5947)

* handle streaming for azure ai studio error

* bump: version 1.48.2 → 1.48.3

* docs(data_security.md): add legal/compliance faq's

Make it easier for companies to use litellm

* docs: resolve imports

* [Feature]#5940, add max_workers parameter for the batch_completion method

---------

Co-authored-by: Ishaan Jaff <ishaanjaffer0324@gmail.com>
Co-authored-by: Krrish Dholakia <krrishdholakia@gmail.com>
Co-authored-by: josearangos <josearangos@Joses-MacBook-Pro.local>

* fix(converse_transformation.py): fix default message value

* fix(utils.py): fix get_model_info to handle finetuned models

Fixes issue for standard logging payloads, where model_map_value was null for finetuned openai models

* fix(litellm_pre_call_utils.py): add debug statement for data sent after updating with team/key callbacks

* fix: fix linting errors

* fix(anthropic/chat/handler.py): fix cache creation input tokens

* fix(exception_mapping_utils.py): fix missing imports

* fix(anthropic/chat/handler.py): fix usage block translation

* test: fix test

* test: fix tests

* style(types/utils.py): trigger new build

* test: fix test

---------

Co-authored-by: Ishaan Jaff <ishaanjaffer0324@gmail.com>
Co-authored-by: Jose Alberto Arango Sanchez <jose.arangos@udea.edu.co>
Co-authored-by: josearangos <josearangos@Joses-MacBook-Pro.local>
This commit is contained in:
Krish Dholakia 2024-09-27 22:52:57 -07:00 committed by GitHub
parent 6883c49acf
commit 02565cd58d
35 changed files with 3657 additions and 2820 deletions

View file

@ -45,11 +45,12 @@ def get_current_weather(location, unit="fahrenheit"):
@pytest.mark.parametrize(
"model",
[
# "gpt-3.5-turbo-1106",
"gpt-3.5-turbo-1106",
# "mistral/mistral-large-latest",
# "claude-3-haiku-20240307",
# "gemini/gemini-1.5-pro",
"anthropic.claude-3-sonnet-20240229-v1:0",
"groq/llama3-8b-8192",
],
)
@pytest.mark.flaky(retries=3, delay=1)
@ -154,6 +155,105 @@ def test_aaparallel_function_call(model):
# test_parallel_function_call()
from litellm.types.utils import ChatCompletionMessageToolCall, Function, Message
@pytest.mark.parametrize(
"model, provider",
[
(
"anthropic.claude-3-sonnet-20240229-v1:0",
"bedrock",
),
("claude-3-haiku-20240307", "anthropic"),
],
)
@pytest.mark.parametrize(
"messages, expected_error_msg",
[
(
[
{
"role": "user",
"content": "What's the weather like in San Francisco, Tokyo, and Paris? - give me 3 responses",
},
Message(
content="Here are the current weather conditions for San Francisco, Tokyo, and Paris:",
role="assistant",
tool_calls=[
ChatCompletionMessageToolCall(
index=1,
function=Function(
arguments='{"location": "San Francisco, CA", "unit": "fahrenheit"}',
name="get_current_weather",
),
id="tooluse_Jj98qn6xQlOP_PiQr-w9iA",
type="function",
)
],
function_call=None,
),
{
"tool_call_id": "tooluse_Jj98qn6xQlOP_PiQr-w9iA",
"role": "tool",
"name": "get_current_weather",
"content": '{"location": "San Francisco", "temperature": "72", "unit": "fahrenheit"}',
},
],
True,
),
(
[
{
"role": "user",
"content": "What's the weather like in San Francisco, Tokyo, and Paris? - give me 3 responses",
}
],
False,
),
],
)
def test_parallel_function_call_anthropic_error_msg(
model, provider, messages, expected_error_msg
):
"""
Anthropic doesn't support tool calling without `tools=` param specified.
Ensure this error is thrown when `tools=` param is not specified. But tool call requests are made.
Reference Issue: https://github.com/BerriAI/litellm/issues/5747, https://github.com/BerriAI/litellm/issues/5388
"""
try:
litellm.set_verbose = True
messages = messages
if expected_error_msg:
with pytest.raises(litellm.UnsupportedParamsError) as e:
second_response = litellm.completion(
model=model,
messages=messages,
temperature=0.2,
seed=22,
drop_params=True,
) # get a new response from the model where it can see the function response
print("second response\n", second_response)
else:
second_response = litellm.completion(
model=model,
messages=messages,
temperature=0.2,
seed=22,
drop_params=True,
) # get a new response from the model where it can see the function response
print("second response\n", second_response)
except litellm.InternalServerError as e:
print(e)
except litellm.RateLimitError as e:
print(e)
except Exception as e:
pytest.fail(f"Error occurred: {e}")
def test_parallel_function_call_stream():
try: