LiteLLM Minor Fixes & Improvements (11/29/2024) (#6965)

* fix(factory.py): ensure tool call converts image url Fixes https://github.com/BerriAI/litellm/issues/6953 * fix(transformation.py): support mp4 + pdf url's for vertex ai Fixes https://github.com/BerriAI/litellm/issues/6936 * fix(http_handler.py): mask gemini api key in error logs Fixes https://github.com/BerriAI/litellm/issues/6963 * docs(prometheus.md): update prometheus FAQs * feat(auth_checks.py): ensure specific model access > wildcard model access if wildcard model is in access group, but specific model is not - deny access * fix(auth_checks.py): handle auth checks for team based model access groups handles scenario where model access group used for wildcard models * fix(internal_user_endpoints.py): support adding guardrails on `/user/update` Fixes https://github.com/BerriAI/litellm/issues/6942 * fix(key_management_endpoints.py): fix prepare_metadata_fields helper * fix: fix tests * build(requirements.txt): bump openai dep version fixes proxies argument * test: fix tests * fix(http_handler.py): fix error message masking * fix(bedrock_guardrails.py): pass in prepped data * test: fix test * test: fix nvidia nim test * fix(http_handler.py): return original response headers * fix: revert maskedhttpstatuserror * test: update tests * test: cleanup test * fix(key_management_endpoints.py): fix metadata field update logic * fix(key_management_endpoints.py): maintain initial order of guardrails in key update * fix(key_management_endpoints.py): handle prepare metadata * fix: fix linting errors * fix: fix linting errors * fix: fix linting errors * fix: fix key management errors * fix(key_management_endpoints.py): update metadata * test: update test * refactor: add more debug statements * test: skip flaky test * test: fix test * fix: fix test * fix: fix update metadata logic * fix: fix test * ci(config.yml): change db url for e2e ui testing
2024-12-01 05:24:11 -08:00 · 2024-12-01 05:24:11 -08:00 · 859b47f08b
commit 859b47f08b
parent bd59f18809
37 changed files with 1040 additions and 714 deletions
--- a/tests/llm_translation/test_openai_prediction_param.py
+++ b/tests/llm_translation/test_openai_prediction_param.py
@ -1,225 +0,0 @@
-import json
-import os
-import sys
-from datetime import datetime
-from unittest.mock import AsyncMock
-
-sys.path.insert(
-    0, os.path.abspath("../..")
-)  # Adds the parent directory to the system path
-
-
-import httpx
-import pytest
-from respx import MockRouter
-
-import litellm
-from litellm import Choices, Message, ModelResponse
-
-
-def test_openai_prediction_param():
-    litellm.set_verbose = True
-    code = """
-    /// <summary>
-    /// Represents a user with a first name, last name, and username.
-    /// </summary>
-    public class User
-    {
-        /// <summary>
-        /// Gets or sets the user's first name.
-        /// </summary>
-        public string FirstName { get; set; }
-
-        /// <summary>
-        /// Gets or sets the user's last name.
-        /// </summary>
-        public string LastName { get; set; }
-
-        /// <summary>
-        /// Gets or sets the user's username.
-        /// </summary>
-        public string Username { get; set; }
-    }
-    """
-
-    completion = litellm.completion(
-        model="gpt-4o-mini",
-        messages=[
-            {
-                "role": "user",
-                "content": "Replace the Username property with an Email property. Respond only with code, and with no markdown formatting.",
-            },
-            {"role": "user", "content": code},
-        ],
-        prediction={"type": "content", "content": code},
-    )
-
-    print(completion)
-
-    assert (
-        completion.usage.completion_tokens_details.accepted_prediction_tokens > 0
-        or completion.usage.completion_tokens_details.rejected_prediction_tokens > 0
-    )
-
-
-@pytest.mark.asyncio
-@pytest.mark.respx
-async def test_openai_prediction_param_mock(respx_mock: MockRouter):
-    """
-    Tests that prediction parameter is correctly passed to the API
-    """
-    litellm.set_verbose = True
-
-    code = """
-    /// <summary>
-    /// Represents a user with a first name, last name, and username.
-    /// </summary>
-    public class User
-    {
-        /// <summary>
-        /// Gets or sets the user's first name.
-        /// </summary>
-        public string FirstName { get; set; }
-
-        /// <summary>
-        /// Gets or sets the user's last name.
-        /// </summary>
-        public string LastName { get; set; }
-
-        /// <summary>
-        /// Gets or sets the user's username.
-        /// </summary>
-        public string Username { get; set; }
-    }
-    """
-
-    mock_response = ModelResponse(
-        id="chatcmpl-AQ5RmV8GvVSRxEcDxnuXlQnsibiY9",
-        choices=[
-            Choices(
-                message=Message(
-                    content=code.replace("Username", "Email").replace(
-                        "username", "email"
-                    ),
-                    role="assistant",
-                )
-            )
-        ],
-        created=int(datetime.now().timestamp()),
-        model="gpt-4o-mini-2024-07-18",
-        usage={
-            "completion_tokens": 207,
-            "prompt_tokens": 175,
-            "total_tokens": 382,
-            "completion_tokens_details": {
-                "accepted_prediction_tokens": 0,
-                "reasoning_tokens": 0,
-                "rejected_prediction_tokens": 80,
-            },
-        },
-    )
-
-    mock_request = respx_mock.post("https://api.openai.com/v1/chat/completions").mock(
-        return_value=httpx.Response(200, json=mock_response.dict())
-    )
-
-    completion = await litellm.acompletion(
-        model="gpt-4o-mini",
-        messages=[
-            {
-                "role": "user",
-                "content": "Replace the Username property with an Email property. Respond only with code, and with no markdown formatting.",
-            },
-            {"role": "user", "content": code},
-        ],
-        prediction={"type": "content", "content": code},
-    )
-
-    assert mock_request.called
-    request_body = json.loads(mock_request.calls[0].request.content)
-
-    # Verify the request contains the prediction parameter
-    assert "prediction" in request_body
-    # verify prediction is correctly sent to the API
-    assert request_body["prediction"] == {"type": "content", "content": code}
-
-    # Verify the completion tokens details
-    assert completion.usage.completion_tokens_details.accepted_prediction_tokens == 0
-    assert completion.usage.completion_tokens_details.rejected_prediction_tokens == 80
-
-
-@pytest.mark.asyncio
-async def test_openai_prediction_param_with_caching():
-    """
-    Tests using `prediction` parameter with caching
-    """
-    from litellm.caching.caching import LiteLLMCacheType
-    import logging
-    from litellm._logging import verbose_logger
-
-    verbose_logger.setLevel(logging.DEBUG)
-    import time
-
-    litellm.set_verbose = True
-    litellm.cache = litellm.Cache(type=LiteLLMCacheType.LOCAL)
-    code = """
-    /// <summary>
-    /// Represents a user with a first name, last name, and username.
-    /// </summary>
-    public class User
-    {
-        /// <summary>
-        /// Gets or sets the user's first name.
-        /// </summary>
-        public string FirstName { get; set; }
-
-        /// <summary>
-        /// Gets or sets the user's last name.
-        /// </summary>
-        public string LastName { get; set; }
-
-        /// <summary>
-        /// Gets or sets the user's username.
-        /// </summary>
-        public string Username { get; set; }
-    }
-    """
-
-    completion_response_1 = litellm.completion(
-        model="gpt-4o-mini",
-        messages=[
-            {
-                "role": "user",
-                "content": "Replace the Username property with an Email property. Respond only with code, and with no markdown formatting.",
-            },
-            {"role": "user", "content": code},
-        ],
-        prediction={"type": "content", "content": code},
-    )
-
-    time.sleep(0.5)
-
-    # cache hit
-    completion_response_2 = litellm.completion(
-        model="gpt-4o-mini",
-        messages=[
-            {
-                "role": "user",
-                "content": "Replace the Username property with an Email property. Respond only with code, and with no markdown formatting.",
-            },
-            {"role": "user", "content": code},
-        ],
-        prediction={"type": "content", "content": code},
-    )
-
-    assert completion_response_1.id == completion_response_2.id
-
-    completion_response_3 = litellm.completion(
-        model="gpt-4o-mini",
-        messages=[
-            {"role": "user", "content": "What is the first name of the user?"},
-        ],
-        prediction={"type": "content", "content": code + "FirstName"},
-    )
-
-    assert completion_response_3.id != completion_response_1.id