import json import os import sys from datetime import datetime from unittest.mock import AsyncMock sys.path.insert( 0, os.path.abspath("../..") ) # Adds the parent directory to the system path import httpx import pytest from respx import MockRouter import litellm from litellm import Choices, Message, ModelResponse def test_openai_prediction_param(): litellm.set_verbose = True code = """ /// /// Represents a user with a first name, last name, and username. /// public class User { /// /// Gets or sets the user's first name. /// public string FirstName { get; set; } /// /// Gets or sets the user's last name. /// public string LastName { get; set; } /// /// Gets or sets the user's username. /// public string Username { get; set; } } """ completion = litellm.completion( model="gpt-4o-mini", messages=[ { "role": "user", "content": "Replace the Username property with an Email property. Respond only with code, and with no markdown formatting.", }, {"role": "user", "content": code}, ], prediction={"type": "content", "content": code}, ) print(completion) assert ( completion.usage.completion_tokens_details.accepted_prediction_tokens > 0 or completion.usage.completion_tokens_details.rejected_prediction_tokens > 0 ) @pytest.mark.asyncio @pytest.mark.respx async def test_openai_prediction_param_mock(respx_mock: MockRouter): """ Tests that prediction parameter is correctly passed to the API """ litellm.set_verbose = True code = """ /// /// Represents a user with a first name, last name, and username. /// public class User { /// /// Gets or sets the user's first name. /// public string FirstName { get; set; } /// /// Gets or sets the user's last name. /// public string LastName { get; set; } /// /// Gets or sets the user's username. /// public string Username { get; set; } } """ mock_response = ModelResponse( id="chatcmpl-AQ5RmV8GvVSRxEcDxnuXlQnsibiY9", choices=[ Choices( message=Message( content=code.replace("Username", "Email").replace( "username", "email" ), role="assistant", ) ) ], created=int(datetime.now().timestamp()), model="gpt-4o-mini-2024-07-18", usage={ "completion_tokens": 207, "prompt_tokens": 175, "total_tokens": 382, "completion_tokens_details": { "accepted_prediction_tokens": 0, "reasoning_tokens": 0, "rejected_prediction_tokens": 80, }, }, ) mock_request = respx_mock.post("https://api.openai.com/v1/chat/completions").mock( return_value=httpx.Response(200, json=mock_response.dict()) ) completion = await litellm.acompletion( model="gpt-4o-mini", messages=[ { "role": "user", "content": "Replace the Username property with an Email property. Respond only with code, and with no markdown formatting.", }, {"role": "user", "content": code}, ], prediction={"type": "content", "content": code}, ) assert mock_request.called request_body = json.loads(mock_request.calls[0].request.content) # Verify the request contains the prediction parameter assert "prediction" in request_body # verify prediction is correctly sent to the API assert request_body["prediction"] == {"type": "content", "content": code} # Verify the completion tokens details assert completion.usage.completion_tokens_details.accepted_prediction_tokens == 0 assert completion.usage.completion_tokens_details.rejected_prediction_tokens == 80 @pytest.mark.asyncio async def test_openai_prediction_param_with_caching(): """ Tests using `prediction` parameter with caching """ from litellm.caching.caching import LiteLLMCacheType import logging from litellm._logging import verbose_logger verbose_logger.setLevel(logging.DEBUG) import time litellm.set_verbose = True litellm.cache = litellm.Cache(type=LiteLLMCacheType.LOCAL) code = """ /// /// Represents a user with a first name, last name, and username. /// public class User { /// /// Gets or sets the user's first name. /// public string FirstName { get; set; } /// /// Gets or sets the user's last name. /// public string LastName { get; set; } /// /// Gets or sets the user's username. /// public string Username { get; set; } } """ completion_response_1 = litellm.completion( model="gpt-4o-mini", messages=[ { "role": "user", "content": "Replace the Username property with an Email property. Respond only with code, and with no markdown formatting.", }, {"role": "user", "content": code}, ], prediction={"type": "content", "content": code}, ) time.sleep(0.5) # cache hit completion_response_2 = litellm.completion( model="gpt-4o-mini", messages=[ { "role": "user", "content": "Replace the Username property with an Email property. Respond only with code, and with no markdown formatting.", }, {"role": "user", "content": code}, ], prediction={"type": "content", "content": code}, ) assert completion_response_1.id == completion_response_2.id completion_response_3 = litellm.completion( model="gpt-4o-mini", messages=[ {"role": "user", "content": "What is the first name of the user?"}, ], prediction={"type": "content", "content": code + "FirstName"}, ) assert completion_response_3.id != completion_response_1.id