litellm/tests/llm_translation/test_max_completion_tokens.py
Krish Dholakia d46660ea0f
LiteLLM Minor Fixes & Improvements (09/18/2024) (#5772)
* fix(proxy_server.py): fix azure key vault logic to not require client id/secret

* feat(cost_calculator.py): support fireworks ai cost tracking

* build(docker-compose.yml): add lines for mounting config.yaml to docker compose

Closes https://github.com/BerriAI/litellm/issues/5739

* fix(input.md): update docs to clarify litellm supports content as a list of dictionaries

Fixes https://github.com/BerriAI/litellm/issues/5755

* fix(input.md): update input.md to include all message values

* fix(image_handling.py): follow image url redirects

Fixes https://github.com/BerriAI/litellm/issues/5763

* fix(router.py): Fix model key/base leak in error message

Fixes https://github.com/BerriAI/litellm/issues/5762

* fix(http_handler.py): fix linting error

* fix(azure.py): fix logging to show azure_ad_token being used

Fixes https://github.com/BerriAI/litellm/issues/5767

* fix(_redis.py): add redis sentinel support

Closes https://github.com/BerriAI/litellm/issues/4381

* feat(_redis.py): add redis sentinel support

Closes https://github.com/BerriAI/litellm/issues/4381

* test(test_completion_cost.py): fix test

* Databricks Integration: Integrate Databricks SDK as optional mechanism for fetching API base and token, if unspecified (#5746)

* LiteLLM Minor Fixes & Improvements (09/16/2024)  (#5723)

* coverage (#5713)

Signed-off-by: dbczumar <corey.zumar@databricks.com>

* Move (#5714)

Signed-off-by: dbczumar <corey.zumar@databricks.com>

* fix(litellm_logging.py): fix logging client re-init (#5710)

Fixes https://github.com/BerriAI/litellm/issues/5695

* fix(presidio.py): Fix logging_hook response and add support for additional presidio variables in guardrails config

Fixes https://github.com/BerriAI/litellm/issues/5682

* feat(o1_handler.py): fake streaming for openai o1 models

Fixes https://github.com/BerriAI/litellm/issues/5694

* docs: deprecated traceloop integration in favor of native otel (#5249)

* fix: fix linting errors

* fix: fix linting errors

* fix(main.py): fix o1 import

---------

Signed-off-by: dbczumar <corey.zumar@databricks.com>
Co-authored-by: Corey Zumar <39497902+dbczumar@users.noreply.github.com>
Co-authored-by: Nir Gazit <nirga@users.noreply.github.com>

* feat(spend_management_endpoints.py): expose `/global/spend/refresh` endpoint for updating material view (#5730)

* feat(spend_management_endpoints.py): expose `/global/spend/refresh` endpoint for updating material view

Supports having `MonthlyGlobalSpend` view be a material view, and exposes an endpoint to refresh it

* fix(custom_logger.py): reset calltype

* fix: fix linting errors

* fix: fix linting error

* fix

Signed-off-by: dbczumar <corey.zumar@databricks.com>

* fix: fix import

* Fix

Signed-off-by: dbczumar <corey.zumar@databricks.com>

* fix

Signed-off-by: dbczumar <corey.zumar@databricks.com>

* DB test

Signed-off-by: dbczumar <corey.zumar@databricks.com>

* Coverage

Signed-off-by: dbczumar <corey.zumar@databricks.com>

* progress

Signed-off-by: dbczumar <corey.zumar@databricks.com>

* fix

Signed-off-by: dbczumar <corey.zumar@databricks.com>

* fix

Signed-off-by: dbczumar <corey.zumar@databricks.com>

* fix

Signed-off-by: dbczumar <corey.zumar@databricks.com>

* fix test name

Signed-off-by: dbczumar <corey.zumar@databricks.com>

---------

Signed-off-by: dbczumar <corey.zumar@databricks.com>
Co-authored-by: Krish Dholakia <krrishdholakia@gmail.com>
Co-authored-by: Nir Gazit <nirga@users.noreply.github.com>

* test: fix test

* test(test_databricks.py): fix test

* fix(databricks/chat.py): handle custom endpoint (e.g. sagemaker)

* Apply code scanning fix for clear-text logging of sensitive information

Co-authored-by: Copilot Autofix powered by AI <62310815+github-advanced-security[bot]@users.noreply.github.com>

* fix(__init__.py): fix known fireworks ai models

---------

Signed-off-by: dbczumar <corey.zumar@databricks.com>
Co-authored-by: Corey Zumar <39497902+dbczumar@users.noreply.github.com>
Co-authored-by: Nir Gazit <nirga@users.noreply.github.com>
Co-authored-by: Copilot Autofix powered by AI <62310815+github-advanced-security[bot]@users.noreply.github.com>
2024-09-19 13:25:29 -07:00

344 lines
11 KiB
Python

import json
import os
import sys
sys.path.insert(
0, os.path.abspath("../..")
) # Adds the parent directory to the system path
from datetime import datetime
from unittest.mock import AsyncMock
from dotenv import load_dotenv
load_dotenv()
import httpx
import pytest
from respx import MockRouter
import litellm
from litellm import Choices, Message, ModelResponse
# Adds the parent directory to the system path
def return_mocked_response(model: str):
if model == "bedrock/mistral.mistral-large-2407-v1:0":
return {
"metrics": {"latencyMs": 316},
"output": {
"message": {
"content": [{"text": "Hello! How are you doing today? How can"}],
"role": "assistant",
}
},
"stopReason": "max_tokens",
"usage": {"inputTokens": 5, "outputTokens": 10, "totalTokens": 15},
}
@pytest.mark.parametrize(
"model",
[
"bedrock/mistral.mistral-large-2407-v1:0",
],
)
@pytest.mark.respx
@pytest.mark.asyncio()
async def test_bedrock_max_completion_tokens(model: str, respx_mock: MockRouter):
"""
Tests that:
- max_completion_tokens is passed as max_tokens to bedrock models
"""
litellm.set_verbose = True
mock_response = return_mocked_response(model)
_model = model.split("/")[1]
print("\n\nmock_response: ", mock_response)
url = f"https://bedrock-runtime.us-west-2.amazonaws.com/model/{_model}/converse"
mock_request = respx_mock.post(url).mock(
return_value=httpx.Response(200, json=mock_response)
)
response = await litellm.acompletion(
model=model,
max_completion_tokens=10,
messages=[{"role": "user", "content": "Hello!"}],
)
assert mock_request.called
request_body = json.loads(mock_request.calls[0].request.content)
print("request_body: ", request_body)
assert request_body == {
"messages": [{"role": "user", "content": [{"text": "Hello!"}]}],
"additionalModelRequestFields": {},
"system": [],
"inferenceConfig": {"maxTokens": 10},
}
print(f"response: {response}")
assert isinstance(response, ModelResponse)
@pytest.mark.parametrize(
"model",
["anthropic/claude-3-sonnet-20240229", "anthropic/claude-3-opus-20240229,"],
)
@pytest.mark.respx
@pytest.mark.asyncio()
async def test_anthropic_api_max_completion_tokens(model: str, respx_mock: MockRouter):
"""
Tests that:
- max_completion_tokens is passed as max_tokens to anthropic models
"""
litellm.set_verbose = True
mock_response = {
"content": [{"text": "Hi! My name is Claude.", "type": "text"}],
"id": "msg_013Zva2CMHLNnXjNJJKqJ2EF",
"model": "claude-3-5-sonnet-20240620",
"role": "assistant",
"stop_reason": "end_turn",
"stop_sequence": None,
"type": "message",
"usage": {"input_tokens": 2095, "output_tokens": 503},
}
print("\n\nmock_response: ", mock_response)
url = f"https://api.anthropic.com/v1/messages"
mock_request = respx_mock.post(url).mock(
return_value=httpx.Response(200, json=mock_response)
)
response = await litellm.acompletion(
model=model,
max_completion_tokens=10,
messages=[{"role": "user", "content": "Hello!"}],
)
assert mock_request.called
request_body = json.loads(mock_request.calls[0].request.content)
print("request_body: ", request_body)
assert request_body == {
"messages": [{"role": "user", "content": [{"type": "text", "text": "Hello!"}]}],
"max_tokens": 10,
"model": model.split("/")[-1],
}
print(f"response: {response}")
assert isinstance(response, ModelResponse)
def test_all_model_configs():
from litellm.llms.vertex_ai_and_google_ai_studio.vertex_ai_partner_models.ai21.transformation import (
VertexAIAi21Config,
)
from litellm.llms.vertex_ai_and_google_ai_studio.vertex_ai_partner_models.llama3.transformation import (
VertexAILlama3Config,
)
assert (
"max_completion_tokens" in VertexAILlama3Config().get_supported_openai_params()
)
assert VertexAILlama3Config().map_openai_params(
{"max_completion_tokens": 10}, {}, "llama3"
) == {"max_tokens": 10}
assert "max_completion_tokens" in VertexAIAi21Config().get_supported_openai_params()
assert VertexAIAi21Config().map_openai_params(
{"max_completion_tokens": 10}, {}, "llama3"
) == {"max_tokens": 10}
from litellm.llms.fireworks_ai.chat.fireworks_ai_transformation import (
FireworksAIConfig,
)
assert "max_completion_tokens" in FireworksAIConfig().get_supported_openai_params()
assert FireworksAIConfig().map_openai_params(
{"max_completion_tokens": 10}, {}, "llama3"
) == {"max_tokens": 10}
from litellm.llms.huggingface_restapi import HuggingfaceConfig
assert "max_completion_tokens" in HuggingfaceConfig().get_supported_openai_params()
assert HuggingfaceConfig().map_openai_params({"max_completion_tokens": 10}, {}) == {
"max_new_tokens": 10
}
from litellm.llms.nvidia_nim import NvidiaNimConfig
assert "max_completion_tokens" in NvidiaNimConfig().get_supported_openai_params(
model="llama3"
)
assert NvidiaNimConfig().map_openai_params(
model="llama3",
non_default_params={"max_completion_tokens": 10},
optional_params={},
) == {"max_tokens": 10}
from litellm.llms.ollama_chat import OllamaChatConfig
assert "max_completion_tokens" in OllamaChatConfig().get_supported_openai_params()
assert OllamaChatConfig().map_openai_params(
model="llama3",
non_default_params={"max_completion_tokens": 10},
optional_params={},
) == {"num_predict": 10}
from litellm.llms.predibase import PredibaseConfig
assert "max_completion_tokens" in PredibaseConfig().get_supported_openai_params()
assert PredibaseConfig().map_openai_params(
{"max_completion_tokens": 10},
{},
) == {"max_new_tokens": 10}
from litellm.llms.text_completion_codestral import MistralTextCompletionConfig
assert (
"max_completion_tokens"
in MistralTextCompletionConfig().get_supported_openai_params()
)
assert MistralTextCompletionConfig().map_openai_params(
{"max_completion_tokens": 10},
{},
) == {"max_tokens": 10}
from litellm.llms.volcengine import VolcEngineConfig
assert "max_completion_tokens" in VolcEngineConfig().get_supported_openai_params(
model="llama3"
)
assert VolcEngineConfig().map_openai_params(
model="llama3",
non_default_params={"max_completion_tokens": 10},
optional_params={},
) == {"max_tokens": 10}
from litellm.llms.AI21.chat import AI21ChatConfig
assert "max_completion_tokens" in AI21ChatConfig().get_supported_openai_params(
"jamba-1.5-mini@001"
)
assert AI21ChatConfig().map_openai_params(
model="jamba-1.5-mini@001",
non_default_params={"max_completion_tokens": 10},
optional_params={},
) == {"max_tokens": 10}
from litellm.llms.AzureOpenAI.azure import AzureOpenAIConfig
assert "max_completion_tokens" in AzureOpenAIConfig().get_supported_openai_params()
assert AzureOpenAIConfig().map_openai_params(
model="gpt-3.5-turbo",
non_default_params={"max_completion_tokens": 10},
optional_params={},
api_version="2022-12-01",
drop_params=False,
) == {"max_tokens": 10}
from litellm.llms.bedrock.chat.converse_transformation import AmazonConverseConfig
assert (
"max_completion_tokens"
in AmazonConverseConfig().get_supported_openai_params(
model="anthropic.claude-3-sonnet-20240229-v1:0"
)
)
assert AmazonConverseConfig().map_openai_params(
model="anthropic.claude-3-sonnet-20240229-v1:0",
non_default_params={"max_completion_tokens": 10},
optional_params={},
drop_params=False,
) == {"maxTokens": 10}
from litellm.llms.text_completion_codestral import MistralTextCompletionConfig
assert (
"max_completion_tokens"
in MistralTextCompletionConfig().get_supported_openai_params()
)
assert MistralTextCompletionConfig().map_openai_params(
non_default_params={"max_completion_tokens": 10},
optional_params={},
) == {"max_tokens": 10}
from litellm.llms.bedrock.common_utils import (
AmazonAnthropicClaude3Config,
AmazonAnthropicConfig,
)
assert (
"max_completion_tokens"
in AmazonAnthropicClaude3Config().get_supported_openai_params()
)
assert AmazonAnthropicClaude3Config().map_openai_params(
non_default_params={"max_completion_tokens": 10},
optional_params={},
) == {"max_tokens": 10}
assert (
"max_completion_tokens" in AmazonAnthropicConfig().get_supported_openai_params()
)
assert AmazonAnthropicConfig().map_openai_params(
non_default_params={"max_completion_tokens": 10},
optional_params={},
) == {"max_tokens_to_sample": 10}
from litellm.llms.databricks.chat import DatabricksConfig
assert "max_completion_tokens" in DatabricksConfig().get_supported_openai_params()
assert DatabricksConfig().map_openai_params(
non_default_params={"max_completion_tokens": 10},
optional_params={},
) == {"max_tokens": 10}
from litellm.llms.vertex_ai_and_google_ai_studio.vertex_ai_anthropic import (
VertexAIAnthropicConfig,
)
assert (
"max_completion_tokens"
in VertexAIAnthropicConfig().get_supported_openai_params()
)
assert VertexAIAnthropicConfig().map_openai_params(
non_default_params={"max_completion_tokens": 10},
optional_params={},
) == {"max_tokens": 10}
from litellm.llms.vertex_ai_and_google_ai_studio.gemini.vertex_and_google_ai_studio_gemini import (
VertexAIConfig,
GoogleAIStudioGeminiConfig,
VertexGeminiConfig,
)
assert "max_completion_tokens" in VertexAIConfig().get_supported_openai_params()
assert VertexAIConfig().map_openai_params(
non_default_params={"max_completion_tokens": 10},
optional_params={},
) == {"max_output_tokens": 10}
assert (
"max_completion_tokens"
in GoogleAIStudioGeminiConfig().get_supported_openai_params()
)
assert GoogleAIStudioGeminiConfig().map_openai_params(
model="gemini-1.0-pro",
non_default_params={"max_completion_tokens": 10},
optional_params={},
) == {"max_output_tokens": 10}
assert "max_completion_tokens" in VertexGeminiConfig().get_supported_openai_params()
assert VertexGeminiConfig().map_openai_params(
model="gemini-1.0-pro",
non_default_params={"max_completion_tokens": 10},
optional_params={},
drop_params=False,
) == {"max_output_tokens": 10}