(feat) add Predicted Outputs for OpenAI (#6594)

* bump openai to openai==1.54.0 * add 'prediction' param * testing fix bedrock deprecated cohere.command-text-v14 * test test_openai_prediction_param.py * test_openai_prediction_param_with_caching * doc Predicted Outputs * doc Predicted Output
2024-11-05 10:46:57 +05:30 · 2024-11-05 10:46:57 +05:30 · c047d51cc8
commit c047d51cc8
parent 57b1bb5e06
12 changed files with 362 additions and 13 deletions
--- a/tests/llm_translation/test_openai_prediction_param.py
+++ b/tests/llm_translation/test_openai_prediction_param.py
@ -0,0 +1,225 @@
+import json
+import os
+import sys
+from datetime import datetime
+from unittest.mock import AsyncMock
+
+sys.path.insert(
+    0, os.path.abspath("../..")
+)  # Adds the parent directory to the system path
+
+
+import httpx
+import pytest
+from respx import MockRouter
+
+import litellm
+from litellm import Choices, Message, ModelResponse
+
+
+def test_openai_prediction_param():
+    litellm.set_verbose = True
+    code = """
+    /// <summary>
+    /// Represents a user with a first name, last name, and username.
+    /// </summary>
+    public class User
+    {
+        /// <summary>
+        /// Gets or sets the user's first name.
+        /// </summary>
+        public string FirstName { get; set; }
+
+        /// <summary>
+        /// Gets or sets the user's last name.
+        /// </summary>
+        public string LastName { get; set; }
+
+        /// <summary>
+        /// Gets or sets the user's username.
+        /// </summary>
+        public string Username { get; set; }
+    }
+    """
+
+    completion = litellm.completion(
+        model="gpt-4o-mini",
+        messages=[
+            {
+                "role": "user",
+                "content": "Replace the Username property with an Email property. Respond only with code, and with no markdown formatting.",
+            },
+            {"role": "user", "content": code},
+        ],
+        prediction={"type": "content", "content": code},
+    )
+
+    print(completion)
+
+    assert (
+        completion.usage.completion_tokens_details.accepted_prediction_tokens > 0
+        or completion.usage.completion_tokens_details.rejected_prediction_tokens > 0
+    )
+
+
+@pytest.mark.asyncio
+@pytest.mark.respx
+async def test_openai_prediction_param_mock(respx_mock: MockRouter):
+    """
+    Tests that prediction parameter is correctly passed to the API
+    """
+    litellm.set_verbose = True
+
+    code = """
+    /// <summary>
+    /// Represents a user with a first name, last name, and username.
+    /// </summary>
+    public class User
+    {
+        /// <summary>
+        /// Gets or sets the user's first name.
+        /// </summary>
+        public string FirstName { get; set; }
+
+        /// <summary>
+        /// Gets or sets the user's last name.
+        /// </summary>
+        public string LastName { get; set; }
+
+        /// <summary>
+        /// Gets or sets the user's username.
+        /// </summary>
+        public string Username { get; set; }
+    }
+    """
+
+    mock_response = ModelResponse(
+        id="chatcmpl-AQ5RmV8GvVSRxEcDxnuXlQnsibiY9",
+        choices=[
+            Choices(
+                message=Message(
+                    content=code.replace("Username", "Email").replace(
+                        "username", "email"
+                    ),
+                    role="assistant",
+                )
+            )
+        ],
+        created=int(datetime.now().timestamp()),
+        model="gpt-4o-mini-2024-07-18",
+        usage={
+            "completion_tokens": 207,
+            "prompt_tokens": 175,
+            "total_tokens": 382,
+            "completion_tokens_details": {
+                "accepted_prediction_tokens": 0,
+                "reasoning_tokens": 0,
+                "rejected_prediction_tokens": 80,
+            },
+        },
+    )
+
+    mock_request = respx_mock.post("https://api.openai.com/v1/chat/completions").mock(
+        return_value=httpx.Response(200, json=mock_response.dict())
+    )
+
+    completion = await litellm.acompletion(
+        model="gpt-4o-mini",
+        messages=[
+            {
+                "role": "user",
+                "content": "Replace the Username property with an Email property. Respond only with code, and with no markdown formatting.",
+            },
+            {"role": "user", "content": code},
+        ],
+        prediction={"type": "content", "content": code},
+    )
+
+    assert mock_request.called
+    request_body = json.loads(mock_request.calls[0].request.content)
+
+    # Verify the request contains the prediction parameter
+    assert "prediction" in request_body
+    # verify prediction is correctly sent to the API
+    assert request_body["prediction"] == {"type": "content", "content": code}
+
+    # Verify the completion tokens details
+    assert completion.usage.completion_tokens_details.accepted_prediction_tokens == 0
+    assert completion.usage.completion_tokens_details.rejected_prediction_tokens == 80
+
+
+@pytest.mark.asyncio
+async def test_openai_prediction_param_with_caching():
+    """
+    Tests using `prediction` parameter with caching
+    """
+    from litellm.caching.caching import LiteLLMCacheType
+    import logging
+    from litellm._logging import verbose_logger
+
+    verbose_logger.setLevel(logging.DEBUG)
+    import time
+
+    litellm.set_verbose = True
+    litellm.cache = litellm.Cache(type=LiteLLMCacheType.LOCAL)
+    code = """
+    /// <summary>
+    /// Represents a user with a first name, last name, and username.
+    /// </summary>
+    public class User
+    {
+        /// <summary>
+        /// Gets or sets the user's first name.
+        /// </summary>
+        public string FirstName { get; set; }
+
+        /// <summary>
+        /// Gets or sets the user's last name.
+        /// </summary>
+        public string LastName { get; set; }
+
+        /// <summary>
+        /// Gets or sets the user's username.
+        /// </summary>
+        public string Username { get; set; }
+    }
+    """
+
+    completion_response_1 = litellm.completion(
+        model="gpt-4o-mini",
+        messages=[
+            {
+                "role": "user",
+                "content": "Replace the Username property with an Email property. Respond only with code, and with no markdown formatting.",
+            },
+            {"role": "user", "content": code},
+        ],
+        prediction={"type": "content", "content": code},
+    )
+
+    time.sleep(0.5)
+
+    # cache hit
+    completion_response_2 = litellm.completion(
+        model="gpt-4o-mini",
+        messages=[
+            {
+                "role": "user",
+                "content": "Replace the Username property with an Email property. Respond only with code, and with no markdown formatting.",
+            },
+            {"role": "user", "content": code},
+        ],
+        prediction={"type": "content", "content": code},
+    )
+
+    assert completion_response_1.id == completion_response_2.id
+
+    completion_response_3 = litellm.completion(
+        model="gpt-4o-mini",
+        messages=[
+            {"role": "user", "content": "What is the first name of the user?"},
+        ],
+        prediction={"type": "content", "content": code + "FirstName"},
+    )
+
+    assert completion_response_3.id != completion_response_1.id