LiteLLM Minor Fixes & Improvements (10/09/2024) (#6139)

* fix(utils.py): don't return 'none' response headers Fixes https://github.com/BerriAI/litellm/issues/6123 * fix(vertex_and_google_ai_studio_gemini.py): support parsing out additional properties and strict value for tool calls Fixes https://github.com/BerriAI/litellm/issues/6136 * fix(cost_calculator.py): set default character value to none Fixes https://github.com/BerriAI/litellm/issues/6133#issuecomment-2403290196 * fix(google.py): fix cost per token / cost per char conversion Fixes https://github.com/BerriAI/litellm/issues/6133#issuecomment-2403370287 * build(model_prices_and_context_window.json): update gemini pricing Fixes https://github.com/BerriAI/litellm/issues/6133 * build(model_prices_and_context_window.json): update gemini pricing * fix(litellm_logging.py): fix streaming caching logging when 'turn_off_message_logging' enabled Stores unredacted response in cache * build(model_prices_and_context_window.json): update gemini-1.5-flash pricing * fix(cost_calculator.py): fix default prompt_character count logic Fixes error in gemini cost calculation * fix(cost_calculator.py): fix cost calc for tts models
2024-10-10 00:42:11 -07:00 · 2024-10-10 00:42:11 -07:00 · 6005450c8f
commit 6005450c8f
parent 60baa65e0e
16 changed files with 788 additions and 534 deletions
--- a/tests/llm_translation/test_optional_params.py
+++ b/tests/llm_translation/test_optional_params.py
@ -664,9 +664,39 @@ def test_unmapped_gemini_model_params():
    assert optional_params["stop_sequences"] == ["stop_word"]


-def test_drop_nested_params_vllm():
+def _check_additional_properties(schema):
+    if isinstance(schema, dict):
+        # Remove the 'additionalProperties' key if it exists and is set to False
+        if "additionalProperties" in schema or "strict" in schema:
+            raise ValueError(
+                "additionalProperties and strict should not be in the schema"
+            )
+
+        # Recursively process all dictionary values
+        for key, value in schema.items():
+            _check_additional_properties(value)
+
+    elif isinstance(schema, list):
+        # Recursively process all items in the list
+        for item in schema:
+            _check_additional_properties(item)
+
+    return schema
+
+
+@pytest.mark.parametrize(
+    "provider, model",
+    [
+        ("hosted_vllm", "my-vllm-model"),
+        ("gemini", "gemini-1.5-pro"),
+        ("vertex_ai", "gemini-1.5-pro"),
+    ],
+)
+def test_drop_nested_params_add_prop_and_strict(provider, model):
    """
    Relevant issue - https://github.com/BerriAI/litellm/issues/5288
+
+    Relevant issue - https://github.com/BerriAI/litellm/issues/6136
    """
    tools = [
        {
@ -690,8 +720,8 @@ def test_drop_nested_params_vllm():
    ]
    tool_choice = {"type": "function", "function": {"name": "structure_output"}}
    optional_params = get_optional_params(
-        model="my-vllm-model",
-        custom_llm_provider="hosted_vllm",
+        model=model,
+        custom_llm_provider=provider,
        temperature=0.2,
        tools=tools,
        tool_choice=tool_choice,
@ -700,7 +730,5 @@ def test_drop_nested_params_vllm():
            ["tools", "function", "additionalProperties"],
        ],
    )
-    print(optional_params["tools"][0]["function"])

-    assert "additionalProperties" not in optional_params["tools"][0]["function"]
-    assert "strict" not in optional_params["tools"][0]["function"]
+    _check_additional_properties(optional_params["tools"])
--- a/tests/llm_translation/test_vertex.py
+++ b/tests/llm_translation/test_vertex.py
@ -0,0 +1,83 @@
+import json
+import os
+import sys
+import traceback
+
+from dotenv import load_dotenv
+
+load_dotenv()
+import io
+from unittest.mock import AsyncMock, MagicMock, patch
+
+sys.path.insert(
+    0, os.path.abspath("../..")
+)  # Adds the parent directory to the system path
+
+import litellm
+
+
+def test_completion_pydantic_obj_2():
+    from pydantic import BaseModel
+    from litellm.llms.custom_httpx.http_handler import HTTPHandler
+
+    litellm.set_verbose = True
+
+    class CalendarEvent(BaseModel):
+        name: str
+        date: str
+        participants: list[str]
+
+    class EventsList(BaseModel):
+        events: list[CalendarEvent]
+
+    messages = [
+        {"role": "user", "content": "List important events from the 20th century."}
+    ]
+    expected_request_body = {
+        "contents": [
+            {
+                "role": "user",
+                "parts": [{"text": "List important events from the 20th century."}],
+            }
+        ],
+        "generationConfig": {
+            "response_mime_type": "application/json",
+            "response_schema": {
+                "properties": {
+                    "events": {
+                        "items": {
+                            "properties": {
+                                "name": {"type": "string"},
+                                "date": {"type": "string"},
+                                "participants": {
+                                    "items": {"type": "string"},
+                                    "type": "array",
+                                },
+                            },
+                            "type": "object",
+                        },
+                        "type": "array",
+                    }
+                },
+                "type": "object",
+            },
+        },
+    }
+    client = HTTPHandler()
+    with patch.object(client, "post", new=MagicMock()) as mock_post:
+        mock_post.return_value = expected_request_body
+        try:
+            litellm.completion(
+                model="gemini/gemini-1.5-pro",
+                messages=messages,
+                response_format=EventsList,
+                client=client,
+            )
+        except Exception as e:
+            print(e)
+
+        mock_post.assert_called_once()
+
+        print(mock_post.call_args.kwargs)
+
+        assert mock_post.call_args.kwargs["json"] == expected_request_body
--- a/tests/local_testing/test_caching.py
+++ b/tests/local_testing/test_caching.py
@ -2209,3 +2209,28 @@ async def test_redis_proxy_batch_redis_get_cache():

    print(response._hidden_params)
    assert "cache_key" in response._hidden_params
+
+
+def test_logging_turn_off_message_logging_streaming():
+    litellm.turn_off_message_logging = True
+    mock_obj = Cache(type="local")
+    litellm.cache = mock_obj
+
+    with patch.object(mock_obj, "add_cache", new=MagicMock()) as mock_client:
+        print(f"mock_obj.add_cache: {mock_obj.add_cache}")
+
+        resp = litellm.completion(
+            model="gpt-3.5-turbo",
+            messages=[{"role": "user", "content": "hi"}],
+            mock_response="hello",
+            stream=True,
+        )
+
+        for chunk in resp:
+            continue
+
+        time.sleep(1)
+
+        mock_client.assert_called_once()
+
+        assert mock_client.call_args.args[0].choices[0].message.content == "hello"
--- a/tests/local_testing/test_completion.py
+++ b/tests/local_testing/test_completion.py
@ -1711,31 +1711,6 @@ def test_completion_perplexity_api():
 # test_completion_perplexity_api()


-@pytest.mark.skip(
-    reason="too many requests. Hitting gemini rate limits. Convert to mock test."
-)
-def test_completion_pydantic_obj_2():
-    from pydantic import BaseModel
-
-    litellm.set_verbose = True
-
-    class CalendarEvent(BaseModel):
-        name: str
-        date: str
-        participants: list[str]
-
-    class EventsList(BaseModel):
-        events: list[CalendarEvent]
-
-    messages = [
-        {"role": "user", "content": "List important events from the 20th century."}
-    ]
-
-    response = litellm.completion(
-        model="gemini/gemini-1.5-pro", messages=messages, response_format=EventsList
-    )
-
-
@pytest.mark.skip(reason="this test is flaky")
 def test_completion_perplexity_api_2():
    try:
@ -4573,12 +4548,7 @@ async def test_completion_ai21_chat():

@pytest.mark.parametrize(
    "model",
-    [
-        "gpt-4o",
-        "azure/chatgpt-v-2",
-        "claude-3-sonnet-20240229",
-        "fireworks_ai/mixtral-8x7b-instruct",
-    ],
+    ["gpt-4o", "azure/chatgpt-v-2", "claude-3-sonnet-20240229"],
 )
@pytest.mark.parametrize(
    "stream",
@ -4594,5 +4564,7 @@ def test_completion_response_ratelimit_headers(model, stream):
    additional_headers = hidden_params.get("additional_headers", {})

    print(additional_headers)
+    for k, v in additional_headers.items():
+        assert v != "None" and v is not None
    assert "x-ratelimit-remaining-requests" in additional_headers
    assert "x-ratelimit-remaining-tokens" in additional_headers
--- a/tests/local_testing/test_completion_cost.py
+++ b/tests/local_testing/test_completion_cost.py
@ -2359,3 +2359,131 @@ def test_together_ai_embedding_completion_cost():
        custom_llm_provider="together_ai",
        call_type="embedding",
    )
+
+
+def test_completion_cost_params():
+    """
+    Relevant Issue: https://github.com/BerriAI/litellm/issues/6133
+    """
+    litellm.set_verbose = True
+    resp1_prompt_cost, resp1_completion_cost = cost_per_token(
+        model="gemini-1.5-pro-002",
+        prompt_tokens=1000,
+        completion_tokens=1000,
+        custom_llm_provider="vertex_ai_beta",
+    )
+
+    resp2_prompt_cost, resp2_completion_cost = cost_per_token(
+        model="gemini-1.5-pro-002", prompt_tokens=1000, completion_tokens=1000
+    )
+
+    assert resp2_prompt_cost > 0
+
+    assert resp1_prompt_cost == resp2_prompt_cost
+    assert resp1_completion_cost == resp2_completion_cost
+
+    resp3_prompt_cost, resp3_completion_cost = cost_per_token(
+        model="vertex_ai/gemini-1.5-pro-002", prompt_tokens=1000, completion_tokens=1000
+    )
+
+    assert resp3_prompt_cost > 0
+
+    assert resp3_prompt_cost == resp1_prompt_cost
+    assert resp3_completion_cost == resp1_completion_cost
+
+
+def test_completion_cost_params_2():
+    """
+    Relevant Issue: https://github.com/BerriAI/litellm/issues/6133
+    """
+    litellm.set_verbose = True
+
+    prompt_characters = 1000
+    completion_characters = 1000
+    resp1_prompt_cost, resp1_completion_cost = cost_per_token(
+        model="gemini-1.5-pro-002",
+        prompt_characters=prompt_characters,
+        completion_characters=completion_characters,
+        prompt_tokens=1000,
+        completion_tokens=1000,
+    )
+
+    print(resp1_prompt_cost, resp1_completion_cost)
+
+    model_info = litellm.get_model_info("gemini-1.5-pro-002")
+    input_cost_per_character = model_info["input_cost_per_character"]
+    output_cost_per_character = model_info["output_cost_per_character"]
+
+    assert resp1_prompt_cost == input_cost_per_character * prompt_characters
+    assert resp1_completion_cost == output_cost_per_character * completion_characters
+
+
+def test_completion_cost_params_gemini_3():
+    from litellm.utils import Choices, Message, ModelResponse, Usage
+
+    from litellm.litellm_core_utils.llm_cost_calc.google import cost_per_character
+
+    os.environ["LITELLM_LOCAL_MODEL_COST_MAP"] = "True"
+    litellm.model_cost = litellm.get_model_cost_map(url="")
+
+    response = ModelResponse(
+        id="chatcmpl-61043504-4439-48be-9996-e29bdee24dc3",
+        choices=[
+            Choices(
+                finish_reason="stop",
+                index=0,
+                message=Message(
+                    content="Sí. \n",
+                    role="assistant",
+                    tool_calls=None,
+                    function_call=None,
+                ),
+            )
+        ],
+        created=1728529259,
+        model="gemini-1.5-flash",
+        object="chat.completion",
+        system_fingerprint=None,
+        usage=Usage(
+            completion_tokens=2,
+            prompt_tokens=3771,
+            total_tokens=3773,
+            completion_tokens_details=None,
+            prompt_tokens_details=None,
+        ),
+        vertex_ai_grounding_metadata=[],
+        vertex_ai_safety_results=[
+            [
+                {
+                    "category": "HARM_CATEGORY_SEXUALLY_EXPLICIT",
+                    "probability": "NEGLIGIBLE",
+                },
+                {"category": "HARM_CATEGORY_HATE_SPEECH", "probability": "NEGLIGIBLE"},
+                {"category": "HARM_CATEGORY_HARASSMENT", "probability": "NEGLIGIBLE"},
+                {
+                    "category": "HARM_CATEGORY_DANGEROUS_CONTENT",
+                    "probability": "NEGLIGIBLE",
+                },
+            ]
+        ],
+        vertex_ai_citation_metadata=[],
+    )
+
+    pc, cc = cost_per_character(
+        **{
+            "model": "gemini-1.5-flash",
+            "custom_llm_provider": "vertex_ai",
+            "prompt_tokens": 3771,
+            "completion_tokens": 2,
+            "prompt_characters": None,
+            "completion_characters": 3,
+        }
+    )
+
+    model_info = litellm.get_model_info("gemini-1.5-flash")
+
+    assert round(pc, 10) == round(3771 * model_info["input_cost_per_token"], 10)
+    assert round(cc, 10) == round(
+        3 * model_info["output_cost_per_character"],
+        10,
+    )
--- a/tests/local_testing/test_custom_callback_input.py
+++ b/tests/local_testing/test_custom_callback_input.py
@ -1414,6 +1414,7 @@ def test_logging_standard_payload_llm_headers(stream):
    with patch.object(
        customHandler, "log_success_event", new=MagicMock()
    ) as mock_client:
+
        resp = litellm.completion(
            model="gpt-3.5-turbo",
            messages=[{"role": "user", "content": "Hey, how's it going?"}],
--- a/tests/local_testing/test_get_model_info.py
+++ b/tests/local_testing/test_get_model_info.py
@ -68,3 +68,9 @@ def test_get_model_info_finetuned_models():
    info = litellm.get_model_info("ft:gpt-3.5-turbo:my-org:custom_suffix:id")
    print("info", info)
    assert info["input_cost_per_token"] == 0.000003
+
+
+def test_get_model_info_gemini_pro():
+    info = litellm.get_model_info("gemini-1.5-pro-002")
+    print("info", info)
+    assert info["key"] == "gemini-1.5-pro-002"