LiteLLM Minor Fixes & Improvements (11/29/2024) (#6965)

* fix(factory.py): ensure tool call converts image url Fixes https://github.com/BerriAI/litellm/issues/6953 * fix(transformation.py): support mp4 + pdf url's for vertex ai Fixes https://github.com/BerriAI/litellm/issues/6936 * fix(http_handler.py): mask gemini api key in error logs Fixes https://github.com/BerriAI/litellm/issues/6963 * docs(prometheus.md): update prometheus FAQs * feat(auth_checks.py): ensure specific model access > wildcard model access if wildcard model is in access group, but specific model is not - deny access * fix(auth_checks.py): handle auth checks for team based model access groups handles scenario where model access group used for wildcard models * fix(internal_user_endpoints.py): support adding guardrails on `/user/update` Fixes https://github.com/BerriAI/litellm/issues/6942 * fix(key_management_endpoints.py): fix prepare_metadata_fields helper * fix: fix tests * build(requirements.txt): bump openai dep version fixes proxies argument * test: fix tests * fix(http_handler.py): fix error message masking * fix(bedrock_guardrails.py): pass in prepped data * test: fix test * test: fix nvidia nim test * fix(http_handler.py): return original response headers * fix: revert maskedhttpstatuserror * test: update tests * test: cleanup test * fix(key_management_endpoints.py): fix metadata field update logic * fix(key_management_endpoints.py): maintain initial order of guardrails in key update * fix(key_management_endpoints.py): handle prepare metadata * fix: fix linting errors * fix: fix linting errors * fix: fix linting errors * fix: fix key management errors * fix(key_management_endpoints.py): update metadata * test: update test * refactor: add more debug statements * test: skip flaky test * test: fix test * fix: fix test * fix: fix update metadata logic * fix: fix test * ci(config.yml): change db url for e2e ui testing
2024-12-01 05:24:11 -08:00 · 2024-12-01 05:24:11 -08:00 · 859b47f08b
commit 859b47f08b
parent bd59f18809
37 changed files with 1040 additions and 714 deletions
--- a/tests/llm_translation/test_nvidia_nim.py
+++ b/tests/llm_translation/test_nvidia_nim.py
@ -12,95 +12,78 @@ sys.path.insert(
 import httpx
 import pytest
 from respx import MockRouter
+from unittest.mock import patch, MagicMock, AsyncMock

 import litellm
 from litellm import Choices, Message, ModelResponse, EmbeddingResponse, Usage
 from litellm import completion


-@pytest.mark.respx
-def test_completion_nvidia_nim(respx_mock: MockRouter):
+def test_completion_nvidia_nim():
+    from openai import OpenAI
+
    litellm.set_verbose = True
-    mock_response = ModelResponse(
-        id="cmpl-mock",
-        choices=[Choices(message=Message(content="Mocked response", role="assistant"))],
-        created=int(datetime.now().timestamp()),
-        model="databricks/dbrx-instruct",
-    )
    model_name = "nvidia_nim/databricks/dbrx-instruct"
+    client = OpenAI(
+        api_key="fake-api-key",
+    )

-    mock_request = respx_mock.post(
-        "https://integrate.api.nvidia.com/v1/chat/completions"
-    ).mock(return_value=httpx.Response(200, json=mock_response.dict()))
-    try:
-        response = completion(
-            model=model_name,
-            messages=[
-                {
-                    "role": "user",
-                    "content": "What's the weather like in Boston today in Fahrenheit?",
-                }
-            ],
-            presence_penalty=0.5,
-            frequency_penalty=0.1,
-        )
+    with patch.object(
+        client.chat.completions.with_raw_response, "create"
+    ) as mock_client:
+        try:
+            completion(
+                model=model_name,
+                messages=[
+                    {
+                        "role": "user",
+                        "content": "What's the weather like in Boston today in Fahrenheit?",
+                    }
+                ],
+                presence_penalty=0.5,
+                frequency_penalty=0.1,
+                client=client,
+            )
+        except Exception as e:
+            print(e)
        # Add any assertions here to check the response
-        print(response)
-        assert response.choices[0].message.content is not None
-        assert len(response.choices[0].message.content) > 0

-        assert mock_request.called
-        request_body = json.loads(mock_request.calls[0].request.content)
+        mock_client.assert_called_once()
+        request_body = mock_client.call_args.kwargs

        print("request_body: ", request_body)

-        assert request_body == {
-            "messages": [
-                {
-                    "role": "user",
-                    "content": "What's the weather like in Boston today in Fahrenheit?",
-                }
-            ],
-            "model": "databricks/dbrx-instruct",
-            "frequency_penalty": 0.1,
-            "presence_penalty": 0.5,
-        }
-    except litellm.exceptions.Timeout as e:
-        pass
-    except Exception as e:
-        pytest.fail(f"Error occurred: {e}")
-
-
-def test_embedding_nvidia_nim(respx_mock: MockRouter):
-    litellm.set_verbose = True
-    mock_response = EmbeddingResponse(
-        model="nvidia_nim/databricks/dbrx-instruct",
-        data=[
+        assert request_body["messages"] == [
            {
-                "embedding": [0.1, 0.2, 0.3],
-                "index": 0,
-            }
-        ],
-        usage=Usage(
-            prompt_tokens=10,
-            completion_tokens=0,
-            total_tokens=10,
-        ),
+                "role": "user",
+                "content": "What's the weather like in Boston today in Fahrenheit?",
+            },
+        ]
+        assert request_body["model"] == "databricks/dbrx-instruct"
+        assert request_body["frequency_penalty"] == 0.1
+        assert request_body["presence_penalty"] == 0.5
+
+
+def test_embedding_nvidia_nim():
+    litellm.set_verbose = True
+    from openai import OpenAI
+
+    client = OpenAI(
+        api_key="fake-api-key",
    )
-    mock_request = respx_mock.post(
-        "https://integrate.api.nvidia.com/v1/embeddings"
-    ).mock(return_value=httpx.Response(200, json=mock_response.dict()))
-    response = litellm.embedding(
-        model="nvidia_nim/nvidia/nv-embedqa-e5-v5",
-        input="What is the meaning of life?",
-        input_type="passage",
-    )
-    assert mock_request.called
-    request_body = json.loads(mock_request.calls[0].request.content)
-    print("request_body: ", request_body)
-    assert request_body == {
-        "input": "What is the meaning of life?",
-        "model": "nvidia/nv-embedqa-e5-v5",
-        "input_type": "passage",
-        "encoding_format": "base64",
-    }
+    with patch.object(client.embeddings.with_raw_response, "create") as mock_client:
+        try:
+            litellm.embedding(
+                model="nvidia_nim/nvidia/nv-embedqa-e5-v5",
+                input="What is the meaning of life?",
+                input_type="passage",
+                client=client,
+            )
+        except Exception as e:
+            print(e)
+        mock_client.assert_called_once()
+        request_body = mock_client.call_args.kwargs
+        print("request_body: ", request_body)
+        assert request_body["input"] == "What is the meaning of life?"
+        assert request_body["model"] == "nvidia/nv-embedqa-e5-v5"
+        assert request_body["extra_body"]["input_type"] == "passage"