mirror of
https://github.com/BerriAI/litellm.git
synced 2025-04-27 03:34:10 +00:00
* fix(utils.py): fix vertex ai optional param handling don't pass max retries to unsupported route Fixes https://github.com/BerriAI/litellm/issues/8254 * fix(get_supported_openai_params.py): fix linting error * fix(get_supported_openai_params.py): default to openai-like spec * test: fix test * fix: fix linting error * Improved wildcard route handling on `/models` and `/model_group/info` (#8473) * fix(model_checks.py): update returning known model from wildcard to filter based on given model prefix ensures wildcard route - `vertex_ai/gemini-*` just returns known vertex_ai/gemini- models * test(test_proxy_utils.py): add unit testing for new 'get_known_models_from_wildcard' helper * test(test_models.py): add e2e testing for `/model_group/info` endpoint * feat(prometheus.py): support tracking total requests by user_email on prometheus adds initial support for tracking total requests by user_email * test(test_prometheus.py): add testing to ensure user email is always tracked * test: update testing for new prometheus metric * test(test_prometheus_unit_tests.py): add user email to total proxy metric * test: update tests * test: fix spend tests * test: fix test * fix(pagerduty.py): fix linting error * (Bug fix) - Using `include_usage` for /completions requests + unit testing (#8484) * pass stream options (#8419) * test_completion_streaming_usage_metrics * test_text_completion_include_usage --------- Co-authored-by: Kaushik Deka <55996465+Kaushikdkrikhanu@users.noreply.github.com> * fix naming docker stable release * build(model_prices_and_context_window.json): handle azure model update * docs(token_auth.md): clarify scopes can be a list or comma separated string * docs: fix docs * add sonar pricings (#8476) * add sonar pricings * Update model_prices_and_context_window.json * Update model_prices_and_context_window.json * Update model_prices_and_context_window_backup.json * update load testing script * fix test_async_router_context_window_fallback * pplx - fix supports tool choice openai param (#8496) * fix prom check startup (#8492) * test_async_router_context_window_fallback * ci(config.yml): mark daily docker builds with `-nightly` (#8499) Resolves https://github.com/BerriAI/litellm/discussions/8495 * (Redis Cluster) - Fixes for using redis cluster + pipeline (#8442) * update RedisCluster creation * update RedisClusterCache * add redis ClusterCache * update async_set_cache_pipeline * cleanup redis cluster usage * fix redis pipeline * test_init_async_client_returns_same_instance * fix redis cluster * update mypy_path * fix init_redis_cluster * remove stub * test redis commit * ClusterPipeline * fix import * RedisCluster import * fix redis cluster * Potential fix for code scanning alert no. 2129: Clear-text logging of sensitive information Co-authored-by: Copilot Autofix powered by AI <62310815+github-advanced-security[bot]@users.noreply.github.com> * fix naming of redis cluster integration * test_redis_caching_ttl_pipeline * fix async_set_cache_pipeline --------- Co-authored-by: Copilot Autofix powered by AI <62310815+github-advanced-security[bot]@users.noreply.github.com> * Litellm UI stable version 02 12 2025 (#8497) * fix(key_management_endpoints.py): fix `/key/list` to include `return_full_object` as a top-level query param Allows user to specify they want the keys as a list of objects * refactor(key_list.tsx): initial refactor of key table in user dashboard offloads key filtering logic to backend api prevents common error of user not being able to see their keys * fix(key_management_endpoints.py): allow internal user to query `/key/list` to see their keys * fix(key_management_endpoints.py): add validation checks and filtering to `/key/list` endpoint allow internal user to see their keys. not anybody else's * fix(view_key_table.tsx): fix issue where internal user could not see default team keys * fix: fix linting error * fix: fix linting error * fix: fix linting error * fix: fix linting error * fix: fix linting error * fix: fix linting error * fix: fix linting error * test_supports_tool_choice * test_async_router_context_window_fallback * fix: fix test (#8501) * Litellm dev 02 12 2025 p1 (#8494) * Resolves https://github.com/BerriAI/litellm/issues/6625 (#8459) - enables no auth for SMTP Signed-off-by: Regli Daniel <daniel.regli1@sanitas.com> * add sonar pricings (#8476) * add sonar pricings * Update model_prices_and_context_window.json * Update model_prices_and_context_window.json * Update model_prices_and_context_window_backup.json * test: fix test --------- Signed-off-by: Regli Daniel <daniel.regli1@sanitas.com> Co-authored-by: Dani Regli <1daniregli@gmail.com> Co-authored-by: Lucca Zenóbio <luccazen@gmail.com> * test: fix test * UI Fixes p2 (#8502) * refactor(admin.tsx): cleanup add new admin flow removes buggy flow. Ensures just 1 simple way to add users / update roles. * fix(user_search_modal.tsx): ensure 'add member' button is always visible * fix(edit_membership.tsx): ensure 'save changes' button always visible * fix(internal_user_endpoints.py): ensure user in org can be deleted Fixes issue where user couldn't be deleted if they were a member of an org * fix: fix linting error * add phoenix docs for observability integration (#8522) * Add files via upload * Update arize_integration.md * Update arize_integration.md * add Phoenix docs * Added custom_attributes to additional_keys which can be sent to athina (#8518) * (UI) fix log details page (#8524) * rollback changes to view logs page * ui new build * add interface for prefetch * fix spread operation * fix max size for request view page * clean up table * ui fix column on request logs page * ui new build * Add UI Support for Admins to Call /cache/ping and View Cache Analytics (#8475) (#8519) * [Bug] UI: Newly created key does not display on the View Key Page (#8039) - Fixed issue where all keys appeared blank for admin users. - Implemented filtering of data via team settings to ensure all keys are displayed correctly. * Fix: - Updated the validator to allow model editing when `keyTeam.team_alias === "Default Team"`. - Ensured other teams still follow the original validation rules. * - added some classes in global.css - added text wrap in output of request,response and metadata in index.tsx - fixed styles of table in table.tsx * - added full payload when we open single log entry - added Combined Info Card in index.tsx * fix: keys not showing on refresh for internal user * merge * main merge * cache page * ca remove * terms change * fix:places caching inside exp --------- Signed-off-by: Regli Daniel <daniel.regli1@sanitas.com> Co-authored-by: Ishaan Jaff <ishaanjaffer0324@gmail.com> Co-authored-by: Kaushik Deka <55996465+Kaushikdkrikhanu@users.noreply.github.com> Co-authored-by: Lucca Zenóbio <luccazen@gmail.com> Co-authored-by: Copilot Autofix powered by AI <62310815+github-advanced-security[bot]@users.noreply.github.com> Co-authored-by: Dani Regli <1daniregli@gmail.com> Co-authored-by: exiao <exiao@users.noreply.github.com> Co-authored-by: vivek-athina <153479827+vivek-athina@users.noreply.github.com> Co-authored-by: Taha Ali <123803932+tahaali-dev@users.noreply.github.com>
398 lines
12 KiB
Python
398 lines
12 KiB
Python
import json
|
|
import os
|
|
import sys
|
|
|
|
sys.path.insert(
|
|
0, os.path.abspath("../..")
|
|
) # Adds the parent directory to the system path
|
|
from datetime import datetime
|
|
from unittest.mock import AsyncMock
|
|
from dotenv import load_dotenv
|
|
|
|
load_dotenv()
|
|
import httpx
|
|
import pytest
|
|
from respx import MockRouter
|
|
from unittest.mock import patch, MagicMock, AsyncMock
|
|
|
|
import litellm
|
|
from litellm import Choices, Message, ModelResponse
|
|
|
|
# Adds the parent directory to the system path
|
|
|
|
|
|
def return_mocked_response(model: str):
|
|
if model == "bedrock/mistral.mistral-large-2407-v1:0":
|
|
return {
|
|
"metrics": {"latencyMs": 316},
|
|
"output": {
|
|
"message": {
|
|
"content": [{"text": "Hello! How are you doing today? How can"}],
|
|
"role": "assistant",
|
|
}
|
|
},
|
|
"stopReason": "max_tokens",
|
|
"usage": {"inputTokens": 5, "outputTokens": 10, "totalTokens": 15},
|
|
}
|
|
|
|
|
|
@pytest.mark.parametrize(
|
|
"model",
|
|
[
|
|
"bedrock/mistral.mistral-large-2407-v1:0",
|
|
],
|
|
)
|
|
@pytest.mark.asyncio()
|
|
async def test_bedrock_max_completion_tokens(model: str):
|
|
"""
|
|
Tests that:
|
|
- max_completion_tokens is passed as max_tokens to bedrock models
|
|
"""
|
|
from litellm.llms.custom_httpx.http_handler import AsyncHTTPHandler
|
|
|
|
litellm.set_verbose = True
|
|
|
|
client = AsyncHTTPHandler()
|
|
|
|
mock_response = return_mocked_response(model)
|
|
_model = model.split("/")[1]
|
|
print("\n\nmock_response: ", mock_response)
|
|
|
|
with patch.object(client, "post") as mock_client:
|
|
try:
|
|
response = await litellm.acompletion(
|
|
model=model,
|
|
max_completion_tokens=10,
|
|
messages=[{"role": "user", "content": "Hello!"}],
|
|
client=client,
|
|
)
|
|
except Exception as e:
|
|
print(f"Error: {e}")
|
|
|
|
mock_client.assert_called_once()
|
|
request_body = json.loads(mock_client.call_args.kwargs["data"])
|
|
|
|
print("request_body: ", request_body)
|
|
|
|
assert request_body == {
|
|
"messages": [{"role": "user", "content": [{"text": "Hello!"}]}],
|
|
"additionalModelRequestFields": {},
|
|
"system": [],
|
|
"inferenceConfig": {"maxTokens": 10},
|
|
}
|
|
|
|
|
|
@pytest.mark.parametrize(
|
|
"model",
|
|
["anthropic/claude-3-sonnet-20240229", "anthropic/claude-3-opus-20240229"],
|
|
)
|
|
@pytest.mark.asyncio()
|
|
async def test_anthropic_api_max_completion_tokens(model: str):
|
|
"""
|
|
Tests that:
|
|
- max_completion_tokens is passed as max_tokens to anthropic models
|
|
"""
|
|
litellm.set_verbose = True
|
|
from litellm.llms.custom_httpx.http_handler import HTTPHandler
|
|
|
|
mock_response = {
|
|
"content": [{"text": "Hi! My name is Claude.", "type": "text"}],
|
|
"id": "msg_013Zva2CMHLNnXjNJJKqJ2EF",
|
|
"model": "claude-3-5-sonnet-20240620",
|
|
"role": "assistant",
|
|
"stop_reason": "end_turn",
|
|
"stop_sequence": None,
|
|
"type": "message",
|
|
"usage": {"input_tokens": 2095, "output_tokens": 503},
|
|
}
|
|
|
|
client = HTTPHandler()
|
|
|
|
print("\n\nmock_response: ", mock_response)
|
|
|
|
with patch.object(client, "post") as mock_client:
|
|
try:
|
|
response = await litellm.acompletion(
|
|
model=model,
|
|
max_completion_tokens=10,
|
|
messages=[{"role": "user", "content": "Hello!"}],
|
|
client=client,
|
|
)
|
|
except Exception as e:
|
|
print(f"Error: {e}")
|
|
mock_client.assert_called_once()
|
|
request_body = mock_client.call_args.kwargs["json"]
|
|
|
|
print("request_body: ", request_body)
|
|
|
|
assert request_body == {
|
|
"messages": [
|
|
{"role": "user", "content": [{"type": "text", "text": "Hello!"}]}
|
|
],
|
|
"max_tokens": 10,
|
|
"model": model.split("/")[-1],
|
|
}
|
|
|
|
|
|
def test_all_model_configs():
|
|
from litellm.llms.vertex_ai.vertex_ai_partner_models.ai21.transformation import (
|
|
VertexAIAi21Config,
|
|
)
|
|
from litellm.llms.vertex_ai.vertex_ai_partner_models.llama3.transformation import (
|
|
VertexAILlama3Config,
|
|
)
|
|
|
|
assert (
|
|
"max_completion_tokens"
|
|
in VertexAILlama3Config().get_supported_openai_params(model="llama3")
|
|
)
|
|
assert VertexAILlama3Config().map_openai_params(
|
|
{"max_completion_tokens": 10}, {}, "llama3", drop_params=False
|
|
) == {"max_tokens": 10}
|
|
|
|
assert "max_completion_tokens" in VertexAIAi21Config().get_supported_openai_params()
|
|
assert VertexAIAi21Config().map_openai_params(
|
|
{"max_completion_tokens": 10}, {}, "llama3", drop_params=False
|
|
) == {"max_tokens": 10}
|
|
|
|
from litellm.llms.fireworks_ai.chat.transformation import (
|
|
FireworksAIConfig,
|
|
)
|
|
|
|
assert "max_completion_tokens" in FireworksAIConfig().get_supported_openai_params(
|
|
model="llama3"
|
|
)
|
|
assert FireworksAIConfig().map_openai_params(
|
|
model="llama3",
|
|
non_default_params={"max_completion_tokens": 10},
|
|
optional_params={},
|
|
drop_params=False,
|
|
) == {"max_tokens": 10}
|
|
|
|
from litellm.llms.huggingface.chat.handler import HuggingfaceConfig
|
|
|
|
assert "max_completion_tokens" in HuggingfaceConfig().get_supported_openai_params(
|
|
model="llama3"
|
|
)
|
|
assert HuggingfaceConfig().map_openai_params(
|
|
non_default_params={"max_completion_tokens": 10},
|
|
optional_params={},
|
|
model="llama3",
|
|
drop_params=False,
|
|
) == {"max_new_tokens": 10}
|
|
|
|
from litellm.llms.nvidia_nim.chat import NvidiaNimConfig
|
|
|
|
assert "max_completion_tokens" in NvidiaNimConfig().get_supported_openai_params(
|
|
model="llama3"
|
|
)
|
|
assert NvidiaNimConfig().map_openai_params(
|
|
model="llama3",
|
|
non_default_params={"max_completion_tokens": 10},
|
|
optional_params={},
|
|
drop_params=False,
|
|
) == {"max_tokens": 10}
|
|
|
|
from litellm.llms.ollama_chat import OllamaChatConfig
|
|
|
|
assert "max_completion_tokens" in OllamaChatConfig().get_supported_openai_params(
|
|
model="llama3"
|
|
)
|
|
assert OllamaChatConfig().map_openai_params(
|
|
model="llama3",
|
|
non_default_params={"max_completion_tokens": 10},
|
|
optional_params={},
|
|
drop_params=False,
|
|
) == {"num_predict": 10}
|
|
|
|
from litellm.llms.predibase.chat.transformation import PredibaseConfig
|
|
|
|
assert "max_completion_tokens" in PredibaseConfig().get_supported_openai_params(
|
|
model="llama3"
|
|
)
|
|
assert PredibaseConfig().map_openai_params(
|
|
model="llama3",
|
|
non_default_params={"max_completion_tokens": 10},
|
|
optional_params={},
|
|
drop_params=False,
|
|
) == {"max_new_tokens": 10}
|
|
|
|
from litellm.llms.codestral.completion.transformation import (
|
|
CodestralTextCompletionConfig,
|
|
)
|
|
|
|
assert (
|
|
"max_completion_tokens"
|
|
in CodestralTextCompletionConfig().get_supported_openai_params(model="llama3")
|
|
)
|
|
assert CodestralTextCompletionConfig().map_openai_params(
|
|
model="llama3",
|
|
non_default_params={"max_completion_tokens": 10},
|
|
optional_params={},
|
|
drop_params=False,
|
|
) == {"max_tokens": 10}
|
|
|
|
from litellm.llms.volcengine import VolcEngineConfig
|
|
|
|
assert "max_completion_tokens" in VolcEngineConfig().get_supported_openai_params(
|
|
model="llama3"
|
|
)
|
|
assert VolcEngineConfig().map_openai_params(
|
|
model="llama3",
|
|
non_default_params={"max_completion_tokens": 10},
|
|
optional_params={},
|
|
drop_params=False,
|
|
) == {"max_tokens": 10}
|
|
|
|
from litellm.llms.ai21.chat.transformation import AI21ChatConfig
|
|
|
|
assert "max_completion_tokens" in AI21ChatConfig().get_supported_openai_params(
|
|
"jamba-1.5-mini@001"
|
|
)
|
|
assert AI21ChatConfig().map_openai_params(
|
|
model="jamba-1.5-mini@001",
|
|
non_default_params={"max_completion_tokens": 10},
|
|
optional_params={},
|
|
drop_params=False,
|
|
) == {"max_tokens": 10}
|
|
|
|
from litellm.llms.azure.chat.gpt_transformation import AzureOpenAIConfig
|
|
|
|
assert "max_completion_tokens" in AzureOpenAIConfig().get_supported_openai_params(
|
|
model="gpt-3.5-turbo"
|
|
)
|
|
assert AzureOpenAIConfig().map_openai_params(
|
|
model="gpt-3.5-turbo",
|
|
non_default_params={"max_completion_tokens": 10},
|
|
optional_params={},
|
|
api_version="2022-12-01",
|
|
drop_params=False,
|
|
) == {"max_completion_tokens": 10}
|
|
|
|
from litellm.llms.bedrock.chat.converse_transformation import AmazonConverseConfig
|
|
|
|
assert (
|
|
"max_completion_tokens"
|
|
in AmazonConverseConfig().get_supported_openai_params(
|
|
model="anthropic.claude-3-sonnet-20240229-v1:0"
|
|
)
|
|
)
|
|
assert AmazonConverseConfig().map_openai_params(
|
|
model="anthropic.claude-3-sonnet-20240229-v1:0",
|
|
non_default_params={"max_completion_tokens": 10},
|
|
optional_params={},
|
|
drop_params=False,
|
|
) == {"maxTokens": 10}
|
|
|
|
from litellm.llms.codestral.completion.transformation import (
|
|
CodestralTextCompletionConfig,
|
|
)
|
|
|
|
assert (
|
|
"max_completion_tokens"
|
|
in CodestralTextCompletionConfig().get_supported_openai_params(model="llama3")
|
|
)
|
|
assert CodestralTextCompletionConfig().map_openai_params(
|
|
model="llama3",
|
|
non_default_params={"max_completion_tokens": 10},
|
|
optional_params={},
|
|
drop_params=False,
|
|
) == {"max_tokens": 10}
|
|
|
|
from litellm import (
|
|
AmazonAnthropicClaude3Config,
|
|
AmazonAnthropicConfig,
|
|
)
|
|
|
|
assert (
|
|
"max_completion_tokens"
|
|
in AmazonAnthropicClaude3Config().get_supported_openai_params(
|
|
model="anthropic.claude-3-sonnet-20240229-v1:0"
|
|
)
|
|
)
|
|
|
|
assert AmazonAnthropicClaude3Config().map_openai_params(
|
|
non_default_params={"max_completion_tokens": 10},
|
|
optional_params={},
|
|
model="anthropic.claude-3-sonnet-20240229-v1:0",
|
|
drop_params=False,
|
|
) == {"max_tokens": 10}
|
|
|
|
assert (
|
|
"max_completion_tokens" in AmazonAnthropicConfig().get_supported_openai_params()
|
|
)
|
|
|
|
assert AmazonAnthropicConfig().map_openai_params(
|
|
non_default_params={"max_completion_tokens": 10},
|
|
optional_params={},
|
|
) == {"max_tokens_to_sample": 10}
|
|
|
|
from litellm.llms.databricks.chat.handler import DatabricksConfig
|
|
|
|
assert "max_completion_tokens" in DatabricksConfig().get_supported_openai_params()
|
|
|
|
assert DatabricksConfig().map_openai_params(
|
|
model="databricks/llama-3-70b-instruct",
|
|
drop_params=False,
|
|
non_default_params={"max_completion_tokens": 10},
|
|
optional_params={},
|
|
) == {"max_tokens": 10}
|
|
|
|
from litellm.llms.vertex_ai.vertex_ai_partner_models.anthropic.transformation import (
|
|
VertexAIAnthropicConfig,
|
|
)
|
|
|
|
assert (
|
|
"max_completion_tokens"
|
|
in VertexAIAnthropicConfig().get_supported_openai_params(
|
|
model="claude-3-5-sonnet-20240620"
|
|
)
|
|
)
|
|
|
|
assert VertexAIAnthropicConfig().map_openai_params(
|
|
non_default_params={"max_completion_tokens": 10},
|
|
optional_params={},
|
|
model="claude-3-5-sonnet-20240620",
|
|
drop_params=False,
|
|
) == {"max_tokens": 10}
|
|
|
|
from litellm.llms.vertex_ai.gemini.vertex_and_google_ai_studio_gemini import (
|
|
VertexGeminiConfig,
|
|
)
|
|
from litellm.llms.gemini.chat.transformation import GoogleAIStudioGeminiConfig
|
|
|
|
assert "max_completion_tokens" in VertexGeminiConfig().get_supported_openai_params(
|
|
model="gemini-1.0-pro"
|
|
)
|
|
|
|
assert VertexGeminiConfig().map_openai_params(
|
|
model="gemini-1.0-pro",
|
|
non_default_params={"max_completion_tokens": 10},
|
|
optional_params={},
|
|
drop_params=False,
|
|
) == {"max_output_tokens": 10}
|
|
|
|
assert (
|
|
"max_completion_tokens"
|
|
in GoogleAIStudioGeminiConfig().get_supported_openai_params(
|
|
model="gemini-1.0-pro"
|
|
)
|
|
)
|
|
|
|
assert GoogleAIStudioGeminiConfig().map_openai_params(
|
|
model="gemini-1.0-pro",
|
|
non_default_params={"max_completion_tokens": 10},
|
|
optional_params={},
|
|
drop_params=False,
|
|
) == {"max_output_tokens": 10}
|
|
|
|
assert "max_completion_tokens" in VertexGeminiConfig().get_supported_openai_params(
|
|
model="gemini-1.0-pro"
|
|
)
|
|
|
|
assert VertexGeminiConfig().map_openai_params(
|
|
model="gemini-1.0-pro",
|
|
non_default_params={"max_completion_tokens": 10},
|
|
optional_params={},
|
|
drop_params=False,
|
|
) == {"max_output_tokens": 10}
|