LiteLLM Minor Fixes & Improvements (09/27/2024) (#5938)

* fix(langfuse.py): prevent double logging requester metadata Fixes https://github.com/BerriAI/litellm/issues/5935 * build(model_prices_and_context_window.json): add mistral pixtral cost tracking Closes https://github.com/BerriAI/litellm/issues/5837 * handle streaming for azure ai studio error * [Perf Proxy] parallel request limiter - use one cache update call (#5932) * fix parallel request limiter - use one cache update call * ci/cd run again * run ci/cd again * use docker username password * fix config.yml * fix config * fix config * fix config.yml * ci/cd run again * use correct typing for batch set cache * fix async_set_cache_pipeline * fix only check user id tpm / rpm limits when limits set * fix test_openai_azure_embedding_with_oidc_and_cf * fix(groq/chat/transformation.py): Fixes https://github.com/BerriAI/litellm/issues/5839 * feat(anthropic/chat.py): return 'retry-after' headers from anthropic Fixes https://github.com/BerriAI/litellm/issues/4387 * feat: raise validation error if message has tool calls without passing `tools` param for anthropic/bedrock Closes https://github.com/BerriAI/litellm/issues/5747 * [Feature]#5940, add max_workers parameter for the batch_completion (#5947) * handle streaming for azure ai studio error * bump: version 1.48.2 → 1.48.3 * docs(data_security.md): add legal/compliance faq's Make it easier for companies to use litellm * docs: resolve imports * [Feature]#5940, add max_workers parameter for the batch_completion method --------- Co-authored-by: Ishaan Jaff <ishaanjaffer0324@gmail.com> Co-authored-by: Krrish Dholakia <krrishdholakia@gmail.com> Co-authored-by: josearangos <josearangos@Joses-MacBook-Pro.local> * fix(converse_transformation.py): fix default message value * fix(utils.py): fix get_model_info to handle finetuned models Fixes issue for standard logging payloads, where model_map_value was null for finetuned openai models * fix(litellm_pre_call_utils.py): add debug statement for data sent after updating with team/key callbacks * fix: fix linting errors * fix(anthropic/chat/handler.py): fix cache creation input tokens * fix(exception_mapping_utils.py): fix missing imports * fix(anthropic/chat/handler.py): fix usage block translation * test: fix test * test: fix tests * style(types/utils.py): trigger new build * test: fix test --------- Co-authored-by: Ishaan Jaff <ishaanjaffer0324@gmail.com> Co-authored-by: Jose Alberto Arango Sanchez <jose.arangos@udea.edu.co> Co-authored-by: josearangos <josearangos@Joses-MacBook-Pro.local>
2025-04-27 11:43:54 +00:00 · 2024-09-27 22:52:57 -07:00 · 2024-09-27 22:52:57 -07:00 · 02565cd58d
commit 02565cd58d
parent 6883c49acf
35 changed files with 3657 additions and 2820 deletions
--- a/litellm/tests/test_function_calling.py
+++ b/litellm/tests/test_function_calling.py
@ -45,11 +45,12 @@ def get_current_weather(location, unit="fahrenheit"):
@pytest.mark.parametrize(
    "model",
    [
-        # "gpt-3.5-turbo-1106",
+        "gpt-3.5-turbo-1106",
        # "mistral/mistral-large-latest",
        # "claude-3-haiku-20240307",
        # "gemini/gemini-1.5-pro",
        "anthropic.claude-3-sonnet-20240229-v1:0",
+        "groq/llama3-8b-8192",
    ],
 )
@pytest.mark.flaky(retries=3, delay=1)
@ -154,6 +155,105 @@ def test_aaparallel_function_call(model):

 # test_parallel_function_call()

+from litellm.types.utils import ChatCompletionMessageToolCall, Function, Message
+
+
+@pytest.mark.parametrize(
+    "model, provider",
+    [
+        (
+            "anthropic.claude-3-sonnet-20240229-v1:0",
+            "bedrock",
+        ),
+        ("claude-3-haiku-20240307", "anthropic"),
+    ],
+)
+@pytest.mark.parametrize(
+    "messages, expected_error_msg",
+    [
+        (
+            [
+                {
+                    "role": "user",
+                    "content": "What's the weather like in San Francisco, Tokyo, and Paris? - give me 3 responses",
+                },
+                Message(
+                    content="Here are the current weather conditions for San Francisco, Tokyo, and Paris:",
+                    role="assistant",
+                    tool_calls=[
+                        ChatCompletionMessageToolCall(
+                            index=1,
+                            function=Function(
+                                arguments='{"location": "San Francisco, CA", "unit": "fahrenheit"}',
+                                name="get_current_weather",
+                            ),
+                            id="tooluse_Jj98qn6xQlOP_PiQr-w9iA",
+                            type="function",
+                        )
+                    ],
+                    function_call=None,
+                ),
+                {
+                    "tool_call_id": "tooluse_Jj98qn6xQlOP_PiQr-w9iA",
+                    "role": "tool",
+                    "name": "get_current_weather",
+                    "content": '{"location": "San Francisco", "temperature": "72", "unit": "fahrenheit"}',
+                },
+            ],
+            True,
+        ),
+        (
+            [
+                {
+                    "role": "user",
+                    "content": "What's the weather like in San Francisco, Tokyo, and Paris? - give me 3 responses",
+                }
+            ],
+            False,
+        ),
+    ],
+)
+def test_parallel_function_call_anthropic_error_msg(
+    model, provider, messages, expected_error_msg
+):
+    """
+    Anthropic doesn't support tool calling without `tools=` param specified.
+
+    Ensure this error is thrown when `tools=` param is not specified. But tool call requests are made.
+
+    Reference Issue: https://github.com/BerriAI/litellm/issues/5747, https://github.com/BerriAI/litellm/issues/5388
+    """
+    try:
+        litellm.set_verbose = True
+
+        messages = messages
+
+        if expected_error_msg:
+            with pytest.raises(litellm.UnsupportedParamsError) as e:
+                second_response = litellm.completion(
+                    model=model,
+                    messages=messages,
+                    temperature=0.2,
+                    seed=22,
+                    drop_params=True,
+                )  # get a new response from the model where it can see the function response
+                print("second response\n", second_response)
+        else:
+            second_response = litellm.completion(
+                model=model,
+                messages=messages,
+                temperature=0.2,
+                seed=22,
+                drop_params=True,
+            )  # get a new response from the model where it can see the function response
+            print("second response\n", second_response)
+    except litellm.InternalServerError as e:
+        print(e)
+    except litellm.RateLimitError as e:
+        print(e)
+    except Exception as e:
+        pytest.fail(f"Error occurred: {e}")
+

 def test_parallel_function_call_stream():
    try: