forked from phoenix/litellm-mirror
* fix(proxy_server.py): fix azure key vault logic to not require client id/secret * feat(cost_calculator.py): support fireworks ai cost tracking * build(docker-compose.yml): add lines for mounting config.yaml to docker compose Closes https://github.com/BerriAI/litellm/issues/5739 * fix(input.md): update docs to clarify litellm supports content as a list of dictionaries Fixes https://github.com/BerriAI/litellm/issues/5755 * fix(input.md): update input.md to include all message values * fix(image_handling.py): follow image url redirects Fixes https://github.com/BerriAI/litellm/issues/5763 * fix(router.py): Fix model key/base leak in error message Fixes https://github.com/BerriAI/litellm/issues/5762 * fix(http_handler.py): fix linting error * fix(azure.py): fix logging to show azure_ad_token being used Fixes https://github.com/BerriAI/litellm/issues/5767 * fix(_redis.py): add redis sentinel support Closes https://github.com/BerriAI/litellm/issues/4381 * feat(_redis.py): add redis sentinel support Closes https://github.com/BerriAI/litellm/issues/4381 * test(test_completion_cost.py): fix test * Databricks Integration: Integrate Databricks SDK as optional mechanism for fetching API base and token, if unspecified (#5746) * LiteLLM Minor Fixes & Improvements (09/16/2024) (#5723) * coverage (#5713) Signed-off-by: dbczumar <corey.zumar@databricks.com> * Move (#5714) Signed-off-by: dbczumar <corey.zumar@databricks.com> * fix(litellm_logging.py): fix logging client re-init (#5710) Fixes https://github.com/BerriAI/litellm/issues/5695 * fix(presidio.py): Fix logging_hook response and add support for additional presidio variables in guardrails config Fixes https://github.com/BerriAI/litellm/issues/5682 * feat(o1_handler.py): fake streaming for openai o1 models Fixes https://github.com/BerriAI/litellm/issues/5694 * docs: deprecated traceloop integration in favor of native otel (#5249) * fix: fix linting errors * fix: fix linting errors * fix(main.py): fix o1 import --------- Signed-off-by: dbczumar <corey.zumar@databricks.com> Co-authored-by: Corey Zumar <39497902+dbczumar@users.noreply.github.com> Co-authored-by: Nir Gazit <nirga@users.noreply.github.com> * feat(spend_management_endpoints.py): expose `/global/spend/refresh` endpoint for updating material view (#5730) * feat(spend_management_endpoints.py): expose `/global/spend/refresh` endpoint for updating material view Supports having `MonthlyGlobalSpend` view be a material view, and exposes an endpoint to refresh it * fix(custom_logger.py): reset calltype * fix: fix linting errors * fix: fix linting error * fix Signed-off-by: dbczumar <corey.zumar@databricks.com> * fix: fix import * Fix Signed-off-by: dbczumar <corey.zumar@databricks.com> * fix Signed-off-by: dbczumar <corey.zumar@databricks.com> * DB test Signed-off-by: dbczumar <corey.zumar@databricks.com> * Coverage Signed-off-by: dbczumar <corey.zumar@databricks.com> * progress Signed-off-by: dbczumar <corey.zumar@databricks.com> * fix Signed-off-by: dbczumar <corey.zumar@databricks.com> * fix Signed-off-by: dbczumar <corey.zumar@databricks.com> * fix Signed-off-by: dbczumar <corey.zumar@databricks.com> * fix test name Signed-off-by: dbczumar <corey.zumar@databricks.com> --------- Signed-off-by: dbczumar <corey.zumar@databricks.com> Co-authored-by: Krish Dholakia <krrishdholakia@gmail.com> Co-authored-by: Nir Gazit <nirga@users.noreply.github.com> * test: fix test * test(test_databricks.py): fix test * fix(databricks/chat.py): handle custom endpoint (e.g. sagemaker) * Apply code scanning fix for clear-text logging of sensitive information Co-authored-by: Copilot Autofix powered by AI <62310815+github-advanced-security[bot]@users.noreply.github.com> * fix(__init__.py): fix known fireworks ai models --------- Signed-off-by: dbczumar <corey.zumar@databricks.com> Co-authored-by: Corey Zumar <39497902+dbczumar@users.noreply.github.com> Co-authored-by: Nir Gazit <nirga@users.noreply.github.com> Co-authored-by: Copilot Autofix powered by AI <62310815+github-advanced-security[bot]@users.noreply.github.com>
344 lines
11 KiB
Python
344 lines
11 KiB
Python
import json
|
|
import os
|
|
import sys
|
|
|
|
sys.path.insert(
|
|
0, os.path.abspath("../..")
|
|
) # Adds the parent directory to the system path
|
|
from datetime import datetime
|
|
from unittest.mock import AsyncMock
|
|
from dotenv import load_dotenv
|
|
|
|
load_dotenv()
|
|
import httpx
|
|
import pytest
|
|
from respx import MockRouter
|
|
|
|
import litellm
|
|
from litellm import Choices, Message, ModelResponse
|
|
|
|
# Adds the parent directory to the system path
|
|
|
|
|
|
def return_mocked_response(model: str):
|
|
if model == "bedrock/mistral.mistral-large-2407-v1:0":
|
|
return {
|
|
"metrics": {"latencyMs": 316},
|
|
"output": {
|
|
"message": {
|
|
"content": [{"text": "Hello! How are you doing today? How can"}],
|
|
"role": "assistant",
|
|
}
|
|
},
|
|
"stopReason": "max_tokens",
|
|
"usage": {"inputTokens": 5, "outputTokens": 10, "totalTokens": 15},
|
|
}
|
|
|
|
|
|
@pytest.mark.parametrize(
|
|
"model",
|
|
[
|
|
"bedrock/mistral.mistral-large-2407-v1:0",
|
|
],
|
|
)
|
|
@pytest.mark.respx
|
|
@pytest.mark.asyncio()
|
|
async def test_bedrock_max_completion_tokens(model: str, respx_mock: MockRouter):
|
|
"""
|
|
Tests that:
|
|
- max_completion_tokens is passed as max_tokens to bedrock models
|
|
"""
|
|
litellm.set_verbose = True
|
|
|
|
mock_response = return_mocked_response(model)
|
|
_model = model.split("/")[1]
|
|
print("\n\nmock_response: ", mock_response)
|
|
url = f"https://bedrock-runtime.us-west-2.amazonaws.com/model/{_model}/converse"
|
|
mock_request = respx_mock.post(url).mock(
|
|
return_value=httpx.Response(200, json=mock_response)
|
|
)
|
|
|
|
response = await litellm.acompletion(
|
|
model=model,
|
|
max_completion_tokens=10,
|
|
messages=[{"role": "user", "content": "Hello!"}],
|
|
)
|
|
|
|
assert mock_request.called
|
|
request_body = json.loads(mock_request.calls[0].request.content)
|
|
|
|
print("request_body: ", request_body)
|
|
|
|
assert request_body == {
|
|
"messages": [{"role": "user", "content": [{"text": "Hello!"}]}],
|
|
"additionalModelRequestFields": {},
|
|
"system": [],
|
|
"inferenceConfig": {"maxTokens": 10},
|
|
}
|
|
print(f"response: {response}")
|
|
assert isinstance(response, ModelResponse)
|
|
|
|
|
|
@pytest.mark.parametrize(
|
|
"model",
|
|
["anthropic/claude-3-sonnet-20240229", "anthropic/claude-3-opus-20240229,"],
|
|
)
|
|
@pytest.mark.respx
|
|
@pytest.mark.asyncio()
|
|
async def test_anthropic_api_max_completion_tokens(model: str, respx_mock: MockRouter):
|
|
"""
|
|
Tests that:
|
|
- max_completion_tokens is passed as max_tokens to anthropic models
|
|
"""
|
|
litellm.set_verbose = True
|
|
|
|
mock_response = {
|
|
"content": [{"text": "Hi! My name is Claude.", "type": "text"}],
|
|
"id": "msg_013Zva2CMHLNnXjNJJKqJ2EF",
|
|
"model": "claude-3-5-sonnet-20240620",
|
|
"role": "assistant",
|
|
"stop_reason": "end_turn",
|
|
"stop_sequence": None,
|
|
"type": "message",
|
|
"usage": {"input_tokens": 2095, "output_tokens": 503},
|
|
}
|
|
|
|
print("\n\nmock_response: ", mock_response)
|
|
url = f"https://api.anthropic.com/v1/messages"
|
|
mock_request = respx_mock.post(url).mock(
|
|
return_value=httpx.Response(200, json=mock_response)
|
|
)
|
|
|
|
response = await litellm.acompletion(
|
|
model=model,
|
|
max_completion_tokens=10,
|
|
messages=[{"role": "user", "content": "Hello!"}],
|
|
)
|
|
|
|
assert mock_request.called
|
|
request_body = json.loads(mock_request.calls[0].request.content)
|
|
|
|
print("request_body: ", request_body)
|
|
|
|
assert request_body == {
|
|
"messages": [{"role": "user", "content": [{"type": "text", "text": "Hello!"}]}],
|
|
"max_tokens": 10,
|
|
"model": model.split("/")[-1],
|
|
}
|
|
print(f"response: {response}")
|
|
assert isinstance(response, ModelResponse)
|
|
|
|
|
|
def test_all_model_configs():
|
|
from litellm.llms.vertex_ai_and_google_ai_studio.vertex_ai_partner_models.ai21.transformation import (
|
|
VertexAIAi21Config,
|
|
)
|
|
from litellm.llms.vertex_ai_and_google_ai_studio.vertex_ai_partner_models.llama3.transformation import (
|
|
VertexAILlama3Config,
|
|
)
|
|
|
|
assert (
|
|
"max_completion_tokens" in VertexAILlama3Config().get_supported_openai_params()
|
|
)
|
|
assert VertexAILlama3Config().map_openai_params(
|
|
{"max_completion_tokens": 10}, {}, "llama3"
|
|
) == {"max_tokens": 10}
|
|
|
|
assert "max_completion_tokens" in VertexAIAi21Config().get_supported_openai_params()
|
|
assert VertexAIAi21Config().map_openai_params(
|
|
{"max_completion_tokens": 10}, {}, "llama3"
|
|
) == {"max_tokens": 10}
|
|
|
|
from litellm.llms.fireworks_ai.chat.fireworks_ai_transformation import (
|
|
FireworksAIConfig,
|
|
)
|
|
|
|
assert "max_completion_tokens" in FireworksAIConfig().get_supported_openai_params()
|
|
assert FireworksAIConfig().map_openai_params(
|
|
{"max_completion_tokens": 10}, {}, "llama3"
|
|
) == {"max_tokens": 10}
|
|
|
|
from litellm.llms.huggingface_restapi import HuggingfaceConfig
|
|
|
|
assert "max_completion_tokens" in HuggingfaceConfig().get_supported_openai_params()
|
|
assert HuggingfaceConfig().map_openai_params({"max_completion_tokens": 10}, {}) == {
|
|
"max_new_tokens": 10
|
|
}
|
|
|
|
from litellm.llms.nvidia_nim import NvidiaNimConfig
|
|
|
|
assert "max_completion_tokens" in NvidiaNimConfig().get_supported_openai_params(
|
|
model="llama3"
|
|
)
|
|
assert NvidiaNimConfig().map_openai_params(
|
|
model="llama3",
|
|
non_default_params={"max_completion_tokens": 10},
|
|
optional_params={},
|
|
) == {"max_tokens": 10}
|
|
|
|
from litellm.llms.ollama_chat import OllamaChatConfig
|
|
|
|
assert "max_completion_tokens" in OllamaChatConfig().get_supported_openai_params()
|
|
assert OllamaChatConfig().map_openai_params(
|
|
model="llama3",
|
|
non_default_params={"max_completion_tokens": 10},
|
|
optional_params={},
|
|
) == {"num_predict": 10}
|
|
|
|
from litellm.llms.predibase import PredibaseConfig
|
|
|
|
assert "max_completion_tokens" in PredibaseConfig().get_supported_openai_params()
|
|
assert PredibaseConfig().map_openai_params(
|
|
{"max_completion_tokens": 10},
|
|
{},
|
|
) == {"max_new_tokens": 10}
|
|
|
|
from litellm.llms.text_completion_codestral import MistralTextCompletionConfig
|
|
|
|
assert (
|
|
"max_completion_tokens"
|
|
in MistralTextCompletionConfig().get_supported_openai_params()
|
|
)
|
|
assert MistralTextCompletionConfig().map_openai_params(
|
|
{"max_completion_tokens": 10},
|
|
{},
|
|
) == {"max_tokens": 10}
|
|
|
|
from litellm.llms.volcengine import VolcEngineConfig
|
|
|
|
assert "max_completion_tokens" in VolcEngineConfig().get_supported_openai_params(
|
|
model="llama3"
|
|
)
|
|
assert VolcEngineConfig().map_openai_params(
|
|
model="llama3",
|
|
non_default_params={"max_completion_tokens": 10},
|
|
optional_params={},
|
|
) == {"max_tokens": 10}
|
|
|
|
from litellm.llms.AI21.chat import AI21ChatConfig
|
|
|
|
assert "max_completion_tokens" in AI21ChatConfig().get_supported_openai_params(
|
|
"jamba-1.5-mini@001"
|
|
)
|
|
assert AI21ChatConfig().map_openai_params(
|
|
model="jamba-1.5-mini@001",
|
|
non_default_params={"max_completion_tokens": 10},
|
|
optional_params={},
|
|
) == {"max_tokens": 10}
|
|
|
|
from litellm.llms.AzureOpenAI.azure import AzureOpenAIConfig
|
|
|
|
assert "max_completion_tokens" in AzureOpenAIConfig().get_supported_openai_params()
|
|
assert AzureOpenAIConfig().map_openai_params(
|
|
model="gpt-3.5-turbo",
|
|
non_default_params={"max_completion_tokens": 10},
|
|
optional_params={},
|
|
api_version="2022-12-01",
|
|
drop_params=False,
|
|
) == {"max_tokens": 10}
|
|
|
|
from litellm.llms.bedrock.chat.converse_transformation import AmazonConverseConfig
|
|
|
|
assert (
|
|
"max_completion_tokens"
|
|
in AmazonConverseConfig().get_supported_openai_params(
|
|
model="anthropic.claude-3-sonnet-20240229-v1:0"
|
|
)
|
|
)
|
|
assert AmazonConverseConfig().map_openai_params(
|
|
model="anthropic.claude-3-sonnet-20240229-v1:0",
|
|
non_default_params={"max_completion_tokens": 10},
|
|
optional_params={},
|
|
drop_params=False,
|
|
) == {"maxTokens": 10}
|
|
|
|
from litellm.llms.text_completion_codestral import MistralTextCompletionConfig
|
|
|
|
assert (
|
|
"max_completion_tokens"
|
|
in MistralTextCompletionConfig().get_supported_openai_params()
|
|
)
|
|
assert MistralTextCompletionConfig().map_openai_params(
|
|
non_default_params={"max_completion_tokens": 10},
|
|
optional_params={},
|
|
) == {"max_tokens": 10}
|
|
|
|
from litellm.llms.bedrock.common_utils import (
|
|
AmazonAnthropicClaude3Config,
|
|
AmazonAnthropicConfig,
|
|
)
|
|
|
|
assert (
|
|
"max_completion_tokens"
|
|
in AmazonAnthropicClaude3Config().get_supported_openai_params()
|
|
)
|
|
|
|
assert AmazonAnthropicClaude3Config().map_openai_params(
|
|
non_default_params={"max_completion_tokens": 10},
|
|
optional_params={},
|
|
) == {"max_tokens": 10}
|
|
|
|
assert (
|
|
"max_completion_tokens" in AmazonAnthropicConfig().get_supported_openai_params()
|
|
)
|
|
|
|
assert AmazonAnthropicConfig().map_openai_params(
|
|
non_default_params={"max_completion_tokens": 10},
|
|
optional_params={},
|
|
) == {"max_tokens_to_sample": 10}
|
|
|
|
from litellm.llms.databricks.chat import DatabricksConfig
|
|
|
|
assert "max_completion_tokens" in DatabricksConfig().get_supported_openai_params()
|
|
|
|
assert DatabricksConfig().map_openai_params(
|
|
non_default_params={"max_completion_tokens": 10},
|
|
optional_params={},
|
|
) == {"max_tokens": 10}
|
|
|
|
from litellm.llms.vertex_ai_and_google_ai_studio.vertex_ai_anthropic import (
|
|
VertexAIAnthropicConfig,
|
|
)
|
|
|
|
assert (
|
|
"max_completion_tokens"
|
|
in VertexAIAnthropicConfig().get_supported_openai_params()
|
|
)
|
|
|
|
assert VertexAIAnthropicConfig().map_openai_params(
|
|
non_default_params={"max_completion_tokens": 10},
|
|
optional_params={},
|
|
) == {"max_tokens": 10}
|
|
|
|
from litellm.llms.vertex_ai_and_google_ai_studio.gemini.vertex_and_google_ai_studio_gemini import (
|
|
VertexAIConfig,
|
|
GoogleAIStudioGeminiConfig,
|
|
VertexGeminiConfig,
|
|
)
|
|
|
|
assert "max_completion_tokens" in VertexAIConfig().get_supported_openai_params()
|
|
|
|
assert VertexAIConfig().map_openai_params(
|
|
non_default_params={"max_completion_tokens": 10},
|
|
optional_params={},
|
|
) == {"max_output_tokens": 10}
|
|
|
|
assert (
|
|
"max_completion_tokens"
|
|
in GoogleAIStudioGeminiConfig().get_supported_openai_params()
|
|
)
|
|
|
|
assert GoogleAIStudioGeminiConfig().map_openai_params(
|
|
model="gemini-1.0-pro",
|
|
non_default_params={"max_completion_tokens": 10},
|
|
optional_params={},
|
|
) == {"max_output_tokens": 10}
|
|
|
|
assert "max_completion_tokens" in VertexGeminiConfig().get_supported_openai_params()
|
|
|
|
assert VertexGeminiConfig().map_openai_params(
|
|
model="gemini-1.0-pro",
|
|
non_default_params={"max_completion_tokens": 10},
|
|
optional_params={},
|
|
drop_params=False,
|
|
) == {"max_output_tokens": 10}
|