litellm/tests/llm_translation/test_openai_prediction_param.py
Ishaan Jaff 4f7a195560 (feat) add Predicted Outputs for OpenAI (#6594)
* bump openai to openai==1.54.0

* add 'prediction' param

* testing fix bedrock deprecated cohere.command-text-v14

* test test_openai_prediction_param.py

* test_openai_prediction_param_with_caching

* doc Predicted Outputs

* doc Predicted Output
2024-11-05 23:45:52 +05:30

225 lines
6.4 KiB
Python

import json
import os
import sys
from datetime import datetime
from unittest.mock import AsyncMock
sys.path.insert(
0, os.path.abspath("../..")
) # Adds the parent directory to the system path
import httpx
import pytest
from respx import MockRouter
import litellm
from litellm import Choices, Message, ModelResponse
def test_openai_prediction_param():
litellm.set_verbose = True
code = """
/// <summary>
/// Represents a user with a first name, last name, and username.
/// </summary>
public class User
{
/// <summary>
/// Gets or sets the user's first name.
/// </summary>
public string FirstName { get; set; }
/// <summary>
/// Gets or sets the user's last name.
/// </summary>
public string LastName { get; set; }
/// <summary>
/// Gets or sets the user's username.
/// </summary>
public string Username { get; set; }
}
"""
completion = litellm.completion(
model="gpt-4o-mini",
messages=[
{
"role": "user",
"content": "Replace the Username property with an Email property. Respond only with code, and with no markdown formatting.",
},
{"role": "user", "content": code},
],
prediction={"type": "content", "content": code},
)
print(completion)
assert (
completion.usage.completion_tokens_details.accepted_prediction_tokens > 0
or completion.usage.completion_tokens_details.rejected_prediction_tokens > 0
)
@pytest.mark.asyncio
@pytest.mark.respx
async def test_openai_prediction_param_mock(respx_mock: MockRouter):
"""
Tests that prediction parameter is correctly passed to the API
"""
litellm.set_verbose = True
code = """
/// <summary>
/// Represents a user with a first name, last name, and username.
/// </summary>
public class User
{
/// <summary>
/// Gets or sets the user's first name.
/// </summary>
public string FirstName { get; set; }
/// <summary>
/// Gets or sets the user's last name.
/// </summary>
public string LastName { get; set; }
/// <summary>
/// Gets or sets the user's username.
/// </summary>
public string Username { get; set; }
}
"""
mock_response = ModelResponse(
id="chatcmpl-AQ5RmV8GvVSRxEcDxnuXlQnsibiY9",
choices=[
Choices(
message=Message(
content=code.replace("Username", "Email").replace(
"username", "email"
),
role="assistant",
)
)
],
created=int(datetime.now().timestamp()),
model="gpt-4o-mini-2024-07-18",
usage={
"completion_tokens": 207,
"prompt_tokens": 175,
"total_tokens": 382,
"completion_tokens_details": {
"accepted_prediction_tokens": 0,
"reasoning_tokens": 0,
"rejected_prediction_tokens": 80,
},
},
)
mock_request = respx_mock.post("https://api.openai.com/v1/chat/completions").mock(
return_value=httpx.Response(200, json=mock_response.dict())
)
completion = await litellm.acompletion(
model="gpt-4o-mini",
messages=[
{
"role": "user",
"content": "Replace the Username property with an Email property. Respond only with code, and with no markdown formatting.",
},
{"role": "user", "content": code},
],
prediction={"type": "content", "content": code},
)
assert mock_request.called
request_body = json.loads(mock_request.calls[0].request.content)
# Verify the request contains the prediction parameter
assert "prediction" in request_body
# verify prediction is correctly sent to the API
assert request_body["prediction"] == {"type": "content", "content": code}
# Verify the completion tokens details
assert completion.usage.completion_tokens_details.accepted_prediction_tokens == 0
assert completion.usage.completion_tokens_details.rejected_prediction_tokens == 80
@pytest.mark.asyncio
async def test_openai_prediction_param_with_caching():
"""
Tests using `prediction` parameter with caching
"""
from litellm.caching.caching import LiteLLMCacheType
import logging
from litellm._logging import verbose_logger
verbose_logger.setLevel(logging.DEBUG)
import time
litellm.set_verbose = True
litellm.cache = litellm.Cache(type=LiteLLMCacheType.LOCAL)
code = """
/// <summary>
/// Represents a user with a first name, last name, and username.
/// </summary>
public class User
{
/// <summary>
/// Gets or sets the user's first name.
/// </summary>
public string FirstName { get; set; }
/// <summary>
/// Gets or sets the user's last name.
/// </summary>
public string LastName { get; set; }
/// <summary>
/// Gets or sets the user's username.
/// </summary>
public string Username { get; set; }
}
"""
completion_response_1 = litellm.completion(
model="gpt-4o-mini",
messages=[
{
"role": "user",
"content": "Replace the Username property with an Email property. Respond only with code, and with no markdown formatting.",
},
{"role": "user", "content": code},
],
prediction={"type": "content", "content": code},
)
time.sleep(0.5)
# cache hit
completion_response_2 = litellm.completion(
model="gpt-4o-mini",
messages=[
{
"role": "user",
"content": "Replace the Username property with an Email property. Respond only with code, and with no markdown formatting.",
},
{"role": "user", "content": code},
],
prediction={"type": "content", "content": code},
)
assert completion_response_1.id == completion_response_2.id
completion_response_3 = litellm.completion(
model="gpt-4o-mini",
messages=[
{"role": "user", "content": "What is the first name of the user?"},
],
prediction={"type": "content", "content": code + "FirstName"},
)
assert completion_response_3.id != completion_response_1.id