forked from phoenix/litellm-mirror
* fix(factory.py): ensure tool call converts image url Fixes https://github.com/BerriAI/litellm/issues/6953 * fix(transformation.py): support mp4 + pdf url's for vertex ai Fixes https://github.com/BerriAI/litellm/issues/6936 * fix(http_handler.py): mask gemini api key in error logs Fixes https://github.com/BerriAI/litellm/issues/6963 * docs(prometheus.md): update prometheus FAQs * feat(auth_checks.py): ensure specific model access > wildcard model access if wildcard model is in access group, but specific model is not - deny access * fix(auth_checks.py): handle auth checks for team based model access groups handles scenario where model access group used for wildcard models * fix(internal_user_endpoints.py): support adding guardrails on `/user/update` Fixes https://github.com/BerriAI/litellm/issues/6942 * fix(key_management_endpoints.py): fix prepare_metadata_fields helper * fix: fix tests * build(requirements.txt): bump openai dep version fixes proxies argument * test: fix tests * fix(http_handler.py): fix error message masking * fix(bedrock_guardrails.py): pass in prepped data * test: fix test * test: fix nvidia nim test * fix(http_handler.py): return original response headers * fix: revert maskedhttpstatuserror * test: update tests * test: cleanup test * fix(key_management_endpoints.py): fix metadata field update logic * fix(key_management_endpoints.py): maintain initial order of guardrails in key update * fix(key_management_endpoints.py): handle prepare metadata * fix: fix linting errors * fix: fix linting errors * fix: fix linting errors * fix: fix key management errors * fix(key_management_endpoints.py): update metadata * test: update test * refactor: add more debug statements * test: skip flaky test * test: fix test * fix: fix test * fix: fix update metadata logic * fix: fix test * ci(config.yml): change db url for e2e ui testing
110 lines
3 KiB
Python
110 lines
3 KiB
Python
import json
|
|
import os
|
|
import sys
|
|
from datetime import datetime
|
|
from unittest.mock import AsyncMock, patch, MagicMock
|
|
|
|
sys.path.insert(
|
|
0, os.path.abspath("../..")
|
|
) # Adds the parent directory to the system path
|
|
|
|
|
|
import httpx
|
|
import pytest
|
|
from respx import MockRouter
|
|
|
|
import litellm
|
|
from litellm import Choices, Message, ModelResponse
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_o1_handle_system_role():
|
|
"""
|
|
Tests that:
|
|
- max_tokens is translated to 'max_completion_tokens'
|
|
- role 'system' is translated to 'user'
|
|
"""
|
|
from openai import AsyncOpenAI
|
|
|
|
litellm.set_verbose = True
|
|
|
|
client = AsyncOpenAI(api_key="fake-api-key")
|
|
|
|
with patch.object(
|
|
client.chat.completions.with_raw_response, "create"
|
|
) as mock_client:
|
|
try:
|
|
await litellm.acompletion(
|
|
model="o1-preview",
|
|
max_tokens=10,
|
|
messages=[{"role": "system", "content": "Hello!"}],
|
|
client=client,
|
|
)
|
|
except Exception as e:
|
|
print(f"Error: {e}")
|
|
|
|
mock_client.assert_called_once()
|
|
request_body = mock_client.call_args.kwargs
|
|
|
|
print("request_body: ", request_body)
|
|
|
|
assert request_body["model"] == "o1-preview"
|
|
assert request_body["max_completion_tokens"] == 10
|
|
assert request_body["messages"] == [{"role": "user", "content": "Hello!"}]
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
@pytest.mark.parametrize("model", ["gpt-4", "gpt-4-0314", "gpt-4-32k", "o1-preview"])
|
|
async def test_o1_max_completion_tokens(model: str):
|
|
"""
|
|
Tests that:
|
|
- max_completion_tokens is passed directly to OpenAI chat completion models
|
|
"""
|
|
from openai import AsyncOpenAI
|
|
|
|
litellm.set_verbose = True
|
|
|
|
client = AsyncOpenAI(api_key="fake-api-key")
|
|
|
|
with patch.object(
|
|
client.chat.completions.with_raw_response, "create"
|
|
) as mock_client:
|
|
try:
|
|
await litellm.acompletion(
|
|
model=model,
|
|
max_completion_tokens=10,
|
|
messages=[{"role": "user", "content": "Hello!"}],
|
|
client=client,
|
|
)
|
|
except Exception as e:
|
|
print(f"Error: {e}")
|
|
|
|
mock_client.assert_called_once()
|
|
request_body = mock_client.call_args.kwargs
|
|
|
|
print("request_body: ", request_body)
|
|
|
|
assert request_body["model"] == model
|
|
assert request_body["max_completion_tokens"] == 10
|
|
assert request_body["messages"] == [{"role": "user", "content": "Hello!"}]
|
|
|
|
|
|
def test_litellm_responses():
|
|
"""
|
|
ensures that type of completion_tokens_details is correctly handled / returned
|
|
"""
|
|
from litellm import ModelResponse
|
|
from litellm.types.utils import CompletionTokensDetails
|
|
|
|
response = ModelResponse(
|
|
usage={
|
|
"completion_tokens": 436,
|
|
"prompt_tokens": 14,
|
|
"total_tokens": 450,
|
|
"completion_tokens_details": {"reasoning_tokens": 0},
|
|
}
|
|
)
|
|
|
|
print("response: ", response)
|
|
|
|
assert isinstance(response.usage.completion_tokens_details, CompletionTokensDetails)
|