build: merge squashed commit

Squashed commit of the following:

commit 6678e15381
Author: Ishaan Jaff <ishaanjaffer0324@gmail.com>
Date:   Wed Feb 26 09:29:15 2025 -0800

    test_prompt_caching

commit bd86e0ac47
Author: Ishaan Jaff <ishaanjaffer0324@gmail.com>
Date:   Wed Feb 26 08:57:16 2025 -0800

    test_prompt_caching

commit 2fc21ad51e
Author: Ishaan Jaff <ishaanjaffer0324@gmail.com>
Date:   Wed Feb 26 08:13:45 2025 -0800

    test_aprompt_caching

commit d94cff55ff
Author: Ishaan Jaff <ishaanjaffer0324@gmail.com>
Date:   Wed Feb 26 08:13:12 2025 -0800

    test_prompt_caching

commit 49c5e7811e
Author: Ishaan Jaff <ishaanjaffer0324@gmail.com>
Date:   Wed Feb 26 07:43:53 2025 -0800

    ui new build

commit cb8d5e5917
Author: Ishaan Jaff <ishaanjaffer0324@gmail.com>
Date:   Wed Feb 26 07:38:56 2025 -0800

    (UI) - Create Key flow for existing users (#8844)

    * working create user button

    * working create user for a key flow

    * allow searching users

    * working create user + key

    * use clear sections on create key

    * better search for users

    * fix create key

    * ui fix create key button - make it neater / cleaner

    * ui fix all keys table

commit 335ba30467
Author: Krrish Dholakia <krrishdholakia@gmail.com>
Date:   Wed Feb 26 08:53:17 2025 -0800

    fix: fix file name

commit b8c5b31a4e
Author: Krrish Dholakia <krrishdholakia@gmail.com>
Date:   Tue Feb 25 22:54:46 2025 -0800

    fix: fix utils

commit ac6e503461
Author: Krrish Dholakia <krrishdholakia@gmail.com>
Date:   Mon Feb 24 10:43:31 2025 -0800

    fix(main.py): fix openai message for assistant msg if role is missing - openai allows this

    Fixes https://github.com/BerriAI/litellm/issues/8661

commit de3989dbc5
Author: Krrish Dholakia <krrishdholakia@gmail.com>
Date:   Mon Feb 24 21:19:25 2025 -0800

    fix(get_litellm_params.py): handle no-log being passed in via kwargs

    Fixes https://github.com/BerriAI/litellm/issues/8380
This commit is contained in:
Krrish Dholakia 2025-02-26 09:39:27 -08:00
parent da1fd9b25f
commit fcf4ea3608
8 changed files with 172 additions and 12 deletions

View file

@ -75,7 +75,7 @@ def get_litellm_params(
"model_info": model_info,
"proxy_server_request": proxy_server_request,
"preset_cache_key": preset_cache_key,
"no-log": no_log,
"no-log": no_log or kwargs.get("no-log"),
"stream_response": {}, # litellm_call_id: ModelResponse Dict
"input_cost_per_token": input_cost_per_token,
"input_cost_per_second": input_cost_per_second,

View file

@ -3,7 +3,6 @@
# Logging function -> log the exact model details + what's being sent | Non-Blocking
import copy
import datetime
from functools import lru_cache
import json
import os
import re
@ -13,6 +12,7 @@ import time
import traceback
import uuid
from datetime import datetime as dt_object
from functools import lru_cache
from typing import Any, Callable, Dict, List, Literal, Optional, Tuple, Union, cast
from pydantic import BaseModel
@ -2513,7 +2513,9 @@ def _init_custom_logger_compatible_class( # noqa: PLR0915
# auth can be disabled on local deployments of arize phoenix
if arize_phoenix_config.otlp_auth_headers is not None:
os.environ["OTEL_EXPORTER_OTLP_TRACES_HEADERS"] = arize_phoenix_config.otlp_auth_headers
os.environ["OTEL_EXPORTER_OTLP_TRACES_HEADERS"] = (
arize_phoenix_config.otlp_auth_headers
)
for callback in _in_memory_loggers:
if (
@ -2521,7 +2523,9 @@ def _init_custom_logger_compatible_class( # noqa: PLR0915
and callback.callback_name == "arize_phoenix"
):
return callback # type: ignore
_otel_logger = OpenTelemetry(config=otel_config, callback_name="arize_phoenix")
_otel_logger = OpenTelemetry(
config=otel_config, callback_name="arize_phoenix"
)
_in_memory_loggers.append(_otel_logger)
return _otel_logger # type: ignore
elif logging_integration == "otel":

View file

@ -94,7 +94,7 @@ from litellm.utils import (
read_config_args,
supports_httpx_timeout,
token_counter,
validate_chat_completion_messages,
validate_and_fix_openai_messages,
validate_chat_completion_tool_choice,
)
@ -851,7 +851,7 @@ def completion( # type: ignore # noqa: PLR0915
if model is None:
raise ValueError("model param not passed in.")
# validate messages
messages = validate_chat_completion_messages(messages=messages)
messages = validate_and_fix_openai_messages(messages=messages)
# validate tool_choice
tool_choice = validate_chat_completion_tool_choice(tool_choice=tool_choice)
######### unpacking kwargs #####################

File diff suppressed because one or more lines are too long

View file

@ -1,5 +1,24 @@
model_list:
- model_name: claude-3.5
litellm_params:
<<<<<<< HEAD
model: claude-3-5-sonnet-latest
api_key: os.environ/ANTHROPIC_API_KEY
=======
model: openai/gpt-3.5-turbo
api_key: os.environ/OPENAI_API_KEY
api_base: http://0.0.0.0:8090
- model_name: deepseek-r1
litellm_params:
model: bedrock/deepseek_r1/arn:aws:bedrock:us-west-2:888602223428:imported-model/bnnr6463ejgf
- model_name: deepseek-r1-api
litellm_params:
model: deepseek/deepseek-reasoner
- model_name: cohere.embed-english-v3
litellm_params:
model: bedrock/cohere.embed-english-v3
api_key: os.environ/COHERE_API_KEY
litellm_settings:
callbacks: ["langfuse"]
>>>>>>> f86a609ea (fix(get_litellm_params.py): handle no-log being passed in via kwargs)

View file

@ -5932,6 +5932,18 @@ def convert_to_dict(message: Union[BaseModel, dict]) -> dict:
)
def validate_and_fix_openai_messages(messages: List):
"""
Ensures all messages are valid OpenAI chat completion messages.
Handles missing role for assistant messages.
"""
for message in messages:
if not message.get("role"):
message["role"] = "assistant"
return validate_chat_completion_messages(messages=messages)
def validate_chat_completion_messages(messages: List[AllMessageValues]):
"""
Ensures all messages are valid OpenAI chat completion messages.
@ -6282,11 +6294,18 @@ def get_end_user_id_for_cost_tracking(
return None
return end_user_id
def should_use_cohere_v1_client(api_base: Optional[str], present_version_params: List[str]):
def should_use_cohere_v1_client(
api_base: Optional[str], present_version_params: List[str]
):
if not api_base:
return False
uses_v1_params = ("max_chunks_per_doc" in present_version_params) and ('max_tokens_per_doc' not in present_version_params)
return api_base.endswith("/v1/rerank") or (uses_v1_params and not api_base.endswith("/v2/rerank"))
uses_v1_params = ("max_chunks_per_doc" in present_version_params) and (
"max_tokens_per_doc" not in present_version_params
)
return api_base.endswith("/v1/rerank") or (
uses_v1_params and not api_base.endswith("/v2/rerank")
)
def is_prompt_caching_valid_prompt(

119
tests/litellm/test_main.py Normal file
View file

@ -0,0 +1,119 @@
import json
import os
import sys
import pytest
from fastapi.testclient import TestClient
sys.path.insert(
0, os.path.abspath("../..")
) # Adds the parent directory to the system path
from unittest.mock import MagicMock, patch
import litellm
@pytest.fixture
def openai_api_response():
mock_response_data = {
"id": "chatcmpl-B0W3vmiM78Xkgx7kI7dr7PC949DMS",
"choices": [
{
"finish_reason": "stop",
"index": 0,
"logprobs": None,
"message": {
"content": "",
"refusal": None,
"role": "assistant",
"audio": None,
"function_call": None,
"tool_calls": None,
},
}
],
"created": 1739462947,
"model": "gpt-4o-mini-2024-07-18",
"object": "chat.completion",
"service_tier": "default",
"system_fingerprint": "fp_bd83329f63",
"usage": {
"completion_tokens": 1,
"prompt_tokens": 121,
"total_tokens": 122,
"completion_tokens_details": {
"accepted_prediction_tokens": 0,
"audio_tokens": 0,
"reasoning_tokens": 0,
"rejected_prediction_tokens": 0,
},
"prompt_tokens_details": {"audio_tokens": 0, "cached_tokens": 0},
},
}
return mock_response_data
def test_completion_missing_role(openai_api_response):
from openai import OpenAI
from litellm.types.utils import ModelResponse
client = OpenAI(api_key="test_api_key")
mock_raw_response = MagicMock()
mock_raw_response.headers = {
"x-request-id": "123",
"openai-organization": "org-123",
"x-ratelimit-limit-requests": "100",
"x-ratelimit-remaining-requests": "99",
}
mock_raw_response.parse.return_value = ModelResponse(**openai_api_response)
print(f"openai_api_response: {openai_api_response}")
with patch.object(
client.chat.completions.with_raw_response, "create", mock_raw_response
) as mock_create:
litellm.completion(
model="gpt-4o-mini",
messages=[
{"role": "user", "content": "Hey"},
{
"content": "",
"tool_calls": [
{
"id": "call_m0vFJjQmTH1McvaHBPR2YFwY",
"function": {
"arguments": '{"input": "dksjsdkjdhskdjshdskhjkhlk"}',
"name": "tool_name",
},
"type": "function",
"index": 0,
},
{
"id": "call_Vw6RaqV2n5aaANXEdp5pYxo2",
"function": {
"arguments": '{"input": "jkljlkjlkjlkjlk"}',
"name": "tool_name",
},
"type": "function",
"index": 1,
},
{
"id": "call_hBIKwldUEGlNh6NlSXil62K4",
"function": {
"arguments": '{"input": "jkjlkjlkjlkj;lj"}',
"name": "tool_name",
},
"type": "function",
"index": 2,
},
],
},
],
client=client,
)
mock_create.assert_called_once()