forked from phoenix/litellm-mirror
[Feat] Add max_completion_tokens
param (#5691)
* add max_completion_tokens * add max_completion_tokens * add max_completion_tokens support for OpenAI models * add max_completion_tokens param * add max_completion_tokens for bedrock converse models * add test for converse maxTokens * fix openai o1 param mapping test * move test optional params * add max_completion_tokens for anthropic api * fix conftest * add max_completion tokens for vertex ai partner models * add max_completion_tokens for fireworks ai * add max_completion_tokens for hf rest api * add test for param mapping * add param mapping for vertex, gemini + testing * predibase is the most unstable and unusable llm api in prod, can't handle our ci/cd * add max_completion_tokens to openai supported params * fix fireworks ai param mapping
This commit is contained in:
parent
415a3ede9e
commit
85acdb9193
31 changed files with 591 additions and 35 deletions
101
tests/llm_translation/test_openai_o1.py
Normal file
101
tests/llm_translation/test_openai_o1.py
Normal file
|
@ -0,0 +1,101 @@
|
|||
import json
|
||||
import os
|
||||
import sys
|
||||
from datetime import datetime
|
||||
from unittest.mock import AsyncMock
|
||||
|
||||
sys.path.insert(
|
||||
0, os.path.abspath("../..")
|
||||
) # Adds the parent directory to the system path
|
||||
|
||||
|
||||
import httpx
|
||||
import pytest
|
||||
from respx import MockRouter
|
||||
|
||||
import litellm
|
||||
from litellm import Choices, Message, ModelResponse
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
@pytest.mark.respx
|
||||
async def test_o1_handle_system_role(respx_mock: MockRouter):
|
||||
"""
|
||||
Tests that:
|
||||
- max_tokens is translated to 'max_completion_tokens'
|
||||
- role 'system' is translated to 'user'
|
||||
"""
|
||||
litellm.set_verbose = True
|
||||
|
||||
mock_response = ModelResponse(
|
||||
id="cmpl-mock",
|
||||
choices=[Choices(message=Message(content="Mocked response", role="assistant"))],
|
||||
created=int(datetime.now().timestamp()),
|
||||
model="o1-preview",
|
||||
)
|
||||
|
||||
mock_request = respx_mock.post("https://api.openai.com/v1/chat/completions").mock(
|
||||
return_value=httpx.Response(200, json=mock_response.dict())
|
||||
)
|
||||
|
||||
response = await litellm.acompletion(
|
||||
model="o1-preview",
|
||||
max_tokens=10,
|
||||
messages=[{"role": "system", "content": "Hello!"}],
|
||||
)
|
||||
|
||||
assert mock_request.called
|
||||
request_body = json.loads(mock_request.calls[0].request.content)
|
||||
|
||||
print("request_body: ", request_body)
|
||||
|
||||
assert request_body == {
|
||||
"model": "o1-preview",
|
||||
"max_completion_tokens": 10,
|
||||
"messages": [{"role": "user", "content": "Hello!"}],
|
||||
}
|
||||
|
||||
print(f"response: {response}")
|
||||
assert isinstance(response, ModelResponse)
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
@pytest.mark.respx
|
||||
@pytest.mark.parametrize("model", ["gpt-4", "gpt-4-0314", "gpt-4-32k", "o1-preview"])
|
||||
async def test_o1_max_completion_tokens(respx_mock: MockRouter, model: str):
|
||||
"""
|
||||
Tests that:
|
||||
- max_completion_tokens is passed directly to OpenAI chat completion models
|
||||
"""
|
||||
litellm.set_verbose = True
|
||||
|
||||
mock_response = ModelResponse(
|
||||
id="cmpl-mock",
|
||||
choices=[Choices(message=Message(content="Mocked response", role="assistant"))],
|
||||
created=int(datetime.now().timestamp()),
|
||||
model=model,
|
||||
)
|
||||
|
||||
mock_request = respx_mock.post("https://api.openai.com/v1/chat/completions").mock(
|
||||
return_value=httpx.Response(200, json=mock_response.dict())
|
||||
)
|
||||
|
||||
response = await litellm.acompletion(
|
||||
model=model,
|
||||
max_completion_tokens=10,
|
||||
messages=[{"role": "user", "content": "Hello!"}],
|
||||
)
|
||||
|
||||
assert mock_request.called
|
||||
request_body = json.loads(mock_request.calls[0].request.content)
|
||||
|
||||
print("request_body: ", request_body)
|
||||
|
||||
assert request_body == {
|
||||
"model": model,
|
||||
"max_completion_tokens": 10,
|
||||
"messages": [{"role": "user", "content": "Hello!"}],
|
||||
}
|
||||
|
||||
print(f"response: {response}")
|
||||
assert isinstance(response, ModelResponse)
|
Loading…
Add table
Add a link
Reference in a new issue