[Feat] Add max_completion_tokens param (#5691)

* add max_completion_tokens * add max_completion_tokens * add max_completion_tokens support for OpenAI models * add max_completion_tokens param * add max_completion_tokens for bedrock converse models * add test for converse maxTokens * fix openai o1 param mapping test * move test optional params * add max_completion_tokens for anthropic api * fix conftest * add max_completion tokens for vertex ai partner models * add max_completion_tokens for fireworks ai * add max_completion_tokens for hf rest api * add test for param mapping * add param mapping for vertex, gemini + testing * predibase is the most unstable and unusable llm api in prod, can't handle our ci/cd * add max_completion_tokens to openai supported params * fix fireworks ai param mapping
2024-09-14 14:57:01 -07:00 · 2024-09-14 14:57:01 -07:00 · 85acdb9193
commit 85acdb9193
parent 415a3ede9e
31 changed files with 591 additions and 35 deletions
--- a/tests/llm_translation/test_openai_o1.py
+++ b/tests/llm_translation/test_openai_o1.py
@ -0,0 +1,101 @@
+import json
+import os
+import sys
+from datetime import datetime
+from unittest.mock import AsyncMock
+
+sys.path.insert(
+    0, os.path.abspath("../..")
+)  # Adds the parent directory to the system path
+
+
+import httpx
+import pytest
+from respx import MockRouter
+
+import litellm
+from litellm import Choices, Message, ModelResponse
+
+
+@pytest.mark.asyncio
+@pytest.mark.respx
+async def test_o1_handle_system_role(respx_mock: MockRouter):
+    """
+    Tests that:
+    - max_tokens is translated to 'max_completion_tokens'
+    - role 'system' is translated to 'user'
+    """
+    litellm.set_verbose = True
+
+    mock_response = ModelResponse(
+        id="cmpl-mock",
+        choices=[Choices(message=Message(content="Mocked response", role="assistant"))],
+        created=int(datetime.now().timestamp()),
+        model="o1-preview",
+    )
+
+    mock_request = respx_mock.post("https://api.openai.com/v1/chat/completions").mock(
+        return_value=httpx.Response(200, json=mock_response.dict())
+    )
+
+    response = await litellm.acompletion(
+        model="o1-preview",
+        max_tokens=10,
+        messages=[{"role": "system", "content": "Hello!"}],
+    )
+
+    assert mock_request.called
+    request_body = json.loads(mock_request.calls[0].request.content)
+
+    print("request_body: ", request_body)
+
+    assert request_body == {
+        "model": "o1-preview",
+        "max_completion_tokens": 10,
+        "messages": [{"role": "user", "content": "Hello!"}],
+    }
+
+    print(f"response: {response}")
+    assert isinstance(response, ModelResponse)
+
+
+@pytest.mark.asyncio
+@pytest.mark.respx
+@pytest.mark.parametrize("model", ["gpt-4", "gpt-4-0314", "gpt-4-32k", "o1-preview"])
+async def test_o1_max_completion_tokens(respx_mock: MockRouter, model: str):
+    """
+    Tests that:
+    - max_completion_tokens is passed directly to OpenAI chat completion models
+    """
+    litellm.set_verbose = True
+
+    mock_response = ModelResponse(
+        id="cmpl-mock",
+        choices=[Choices(message=Message(content="Mocked response", role="assistant"))],
+        created=int(datetime.now().timestamp()),
+        model=model,
+    )
+
+    mock_request = respx_mock.post("https://api.openai.com/v1/chat/completions").mock(
+        return_value=httpx.Response(200, json=mock_response.dict())
+    )
+
+    response = await litellm.acompletion(
+        model=model,
+        max_completion_tokens=10,
+        messages=[{"role": "user", "content": "Hello!"}],
+    )
+
+    assert mock_request.called
+    request_body = json.loads(mock_request.calls[0].request.content)
+
+    print("request_body: ", request_body)
+
+    assert request_body == {
+        "model": model,
+        "max_completion_tokens": 10,
+        "messages": [{"role": "user", "content": "Hello!"}],
+    }
+
+    print(f"response: {response}")
+    assert isinstance(response, ModelResponse)