build: merge squashed commit

Squashed commit of the following:

commit 6678e15381
Author: Ishaan Jaff <ishaanjaffer0324@gmail.com>
Date:   Wed Feb 26 09:29:15 2025 -0800

    test_prompt_caching

commit bd86e0ac47
Author: Ishaan Jaff <ishaanjaffer0324@gmail.com>
Date:   Wed Feb 26 08:57:16 2025 -0800

    test_prompt_caching

commit 2fc21ad51e
Author: Ishaan Jaff <ishaanjaffer0324@gmail.com>
Date:   Wed Feb 26 08:13:45 2025 -0800

    test_aprompt_caching

commit d94cff55ff
Author: Ishaan Jaff <ishaanjaffer0324@gmail.com>
Date:   Wed Feb 26 08:13:12 2025 -0800

    test_prompt_caching

commit 49c5e7811e
Author: Ishaan Jaff <ishaanjaffer0324@gmail.com>
Date:   Wed Feb 26 07:43:53 2025 -0800

    ui new build

commit cb8d5e5917
Author: Ishaan Jaff <ishaanjaffer0324@gmail.com>
Date:   Wed Feb 26 07:38:56 2025 -0800

    (UI) - Create Key flow for existing users (#8844)

    * working create user button

    * working create user for a key flow

    * allow searching users

    * working create user + key

    * use clear sections on create key

    * better search for users

    * fix create key

    * ui fix create key button - make it neater / cleaner

    * ui fix all keys table

commit 335ba30467
Author: Krrish Dholakia <krrishdholakia@gmail.com>
Date:   Wed Feb 26 08:53:17 2025 -0800

    fix: fix file name

commit b8c5b31a4e
Author: Krrish Dholakia <krrishdholakia@gmail.com>
Date:   Tue Feb 25 22:54:46 2025 -0800

    fix: fix utils

commit ac6e503461
Author: Krrish Dholakia <krrishdholakia@gmail.com>
Date:   Mon Feb 24 10:43:31 2025 -0800

    fix(main.py): fix openai message for assistant msg if role is missing - openai allows this

    Fixes https://github.com/BerriAI/litellm/issues/8661

commit de3989dbc5
Author: Krrish Dholakia <krrishdholakia@gmail.com>
Date:   Mon Feb 24 21:19:25 2025 -0800

    fix(get_litellm_params.py): handle no-log being passed in via kwargs

    Fixes https://github.com/BerriAI/litellm/issues/8380
This commit is contained in:
Krrish Dholakia 2025-02-26 09:39:27 -08:00
parent da1fd9b25f
commit fcf4ea3608
8 changed files with 172 additions and 12 deletions

View file

@ -75,7 +75,7 @@ def get_litellm_params(
"model_info": model_info, "model_info": model_info,
"proxy_server_request": proxy_server_request, "proxy_server_request": proxy_server_request,
"preset_cache_key": preset_cache_key, "preset_cache_key": preset_cache_key,
"no-log": no_log, "no-log": no_log or kwargs.get("no-log"),
"stream_response": {}, # litellm_call_id: ModelResponse Dict "stream_response": {}, # litellm_call_id: ModelResponse Dict
"input_cost_per_token": input_cost_per_token, "input_cost_per_token": input_cost_per_token,
"input_cost_per_second": input_cost_per_second, "input_cost_per_second": input_cost_per_second,

View file

@ -3,7 +3,6 @@
# Logging function -> log the exact model details + what's being sent | Non-Blocking # Logging function -> log the exact model details + what's being sent | Non-Blocking
import copy import copy
import datetime import datetime
from functools import lru_cache
import json import json
import os import os
import re import re
@ -13,6 +12,7 @@ import time
import traceback import traceback
import uuid import uuid
from datetime import datetime as dt_object from datetime import datetime as dt_object
from functools import lru_cache
from typing import Any, Callable, Dict, List, Literal, Optional, Tuple, Union, cast from typing import Any, Callable, Dict, List, Literal, Optional, Tuple, Union, cast
from pydantic import BaseModel from pydantic import BaseModel
@ -2513,7 +2513,9 @@ def _init_custom_logger_compatible_class( # noqa: PLR0915
# auth can be disabled on local deployments of arize phoenix # auth can be disabled on local deployments of arize phoenix
if arize_phoenix_config.otlp_auth_headers is not None: if arize_phoenix_config.otlp_auth_headers is not None:
os.environ["OTEL_EXPORTER_OTLP_TRACES_HEADERS"] = arize_phoenix_config.otlp_auth_headers os.environ["OTEL_EXPORTER_OTLP_TRACES_HEADERS"] = (
arize_phoenix_config.otlp_auth_headers
)
for callback in _in_memory_loggers: for callback in _in_memory_loggers:
if ( if (
@ -2521,7 +2523,9 @@ def _init_custom_logger_compatible_class( # noqa: PLR0915
and callback.callback_name == "arize_phoenix" and callback.callback_name == "arize_phoenix"
): ):
return callback # type: ignore return callback # type: ignore
_otel_logger = OpenTelemetry(config=otel_config, callback_name="arize_phoenix") _otel_logger = OpenTelemetry(
config=otel_config, callback_name="arize_phoenix"
)
_in_memory_loggers.append(_otel_logger) _in_memory_loggers.append(_otel_logger)
return _otel_logger # type: ignore return _otel_logger # type: ignore
elif logging_integration == "otel": elif logging_integration == "otel":

View file

@ -94,7 +94,7 @@ from litellm.utils import (
read_config_args, read_config_args,
supports_httpx_timeout, supports_httpx_timeout,
token_counter, token_counter,
validate_chat_completion_messages, validate_and_fix_openai_messages,
validate_chat_completion_tool_choice, validate_chat_completion_tool_choice,
) )
@ -851,7 +851,7 @@ def completion( # type: ignore # noqa: PLR0915
if model is None: if model is None:
raise ValueError("model param not passed in.") raise ValueError("model param not passed in.")
# validate messages # validate messages
messages = validate_chat_completion_messages(messages=messages) messages = validate_and_fix_openai_messages(messages=messages)
# validate tool_choice # validate tool_choice
tool_choice = validate_chat_completion_tool_choice(tool_choice=tool_choice) tool_choice = validate_chat_completion_tool_choice(tool_choice=tool_choice)
######### unpacking kwargs ##################### ######### unpacking kwargs #####################

File diff suppressed because one or more lines are too long

View file

@ -1,5 +1,24 @@
model_list: model_list:
- model_name: claude-3.5 - model_name: claude-3.5
litellm_params: litellm_params:
<<<<<<< HEAD
model: claude-3-5-sonnet-latest model: claude-3-5-sonnet-latest
api_key: os.environ/ANTHROPIC_API_KEY api_key: os.environ/ANTHROPIC_API_KEY
=======
model: openai/gpt-3.5-turbo
api_key: os.environ/OPENAI_API_KEY
api_base: http://0.0.0.0:8090
- model_name: deepseek-r1
litellm_params:
model: bedrock/deepseek_r1/arn:aws:bedrock:us-west-2:888602223428:imported-model/bnnr6463ejgf
- model_name: deepseek-r1-api
litellm_params:
model: deepseek/deepseek-reasoner
- model_name: cohere.embed-english-v3
litellm_params:
model: bedrock/cohere.embed-english-v3
api_key: os.environ/COHERE_API_KEY
litellm_settings:
callbacks: ["langfuse"]
>>>>>>> f86a609ea (fix(get_litellm_params.py): handle no-log being passed in via kwargs)

View file

@ -5932,6 +5932,18 @@ def convert_to_dict(message: Union[BaseModel, dict]) -> dict:
) )
def validate_and_fix_openai_messages(messages: List):
"""
Ensures all messages are valid OpenAI chat completion messages.
Handles missing role for assistant messages.
"""
for message in messages:
if not message.get("role"):
message["role"] = "assistant"
return validate_chat_completion_messages(messages=messages)
def validate_chat_completion_messages(messages: List[AllMessageValues]): def validate_chat_completion_messages(messages: List[AllMessageValues]):
""" """
Ensures all messages are valid OpenAI chat completion messages. Ensures all messages are valid OpenAI chat completion messages.
@ -6282,11 +6294,18 @@ def get_end_user_id_for_cost_tracking(
return None return None
return end_user_id return end_user_id
def should_use_cohere_v1_client(api_base: Optional[str], present_version_params: List[str]):
def should_use_cohere_v1_client(
api_base: Optional[str], present_version_params: List[str]
):
if not api_base: if not api_base:
return False return False
uses_v1_params = ("max_chunks_per_doc" in present_version_params) and ('max_tokens_per_doc' not in present_version_params) uses_v1_params = ("max_chunks_per_doc" in present_version_params) and (
return api_base.endswith("/v1/rerank") or (uses_v1_params and not api_base.endswith("/v2/rerank")) "max_tokens_per_doc" not in present_version_params
)
return api_base.endswith("/v1/rerank") or (
uses_v1_params and not api_base.endswith("/v2/rerank")
)
def is_prompt_caching_valid_prompt( def is_prompt_caching_valid_prompt(

119
tests/litellm/test_main.py Normal file
View file

@ -0,0 +1,119 @@
import json
import os
import sys
import pytest
from fastapi.testclient import TestClient
sys.path.insert(
0, os.path.abspath("../..")
) # Adds the parent directory to the system path
from unittest.mock import MagicMock, patch
import litellm
@pytest.fixture
def openai_api_response():
mock_response_data = {
"id": "chatcmpl-B0W3vmiM78Xkgx7kI7dr7PC949DMS",
"choices": [
{
"finish_reason": "stop",
"index": 0,
"logprobs": None,
"message": {
"content": "",
"refusal": None,
"role": "assistant",
"audio": None,
"function_call": None,
"tool_calls": None,
},
}
],
"created": 1739462947,
"model": "gpt-4o-mini-2024-07-18",
"object": "chat.completion",
"service_tier": "default",
"system_fingerprint": "fp_bd83329f63",
"usage": {
"completion_tokens": 1,
"prompt_tokens": 121,
"total_tokens": 122,
"completion_tokens_details": {
"accepted_prediction_tokens": 0,
"audio_tokens": 0,
"reasoning_tokens": 0,
"rejected_prediction_tokens": 0,
},
"prompt_tokens_details": {"audio_tokens": 0, "cached_tokens": 0},
},
}
return mock_response_data
def test_completion_missing_role(openai_api_response):
from openai import OpenAI
from litellm.types.utils import ModelResponse
client = OpenAI(api_key="test_api_key")
mock_raw_response = MagicMock()
mock_raw_response.headers = {
"x-request-id": "123",
"openai-organization": "org-123",
"x-ratelimit-limit-requests": "100",
"x-ratelimit-remaining-requests": "99",
}
mock_raw_response.parse.return_value = ModelResponse(**openai_api_response)
print(f"openai_api_response: {openai_api_response}")
with patch.object(
client.chat.completions.with_raw_response, "create", mock_raw_response
) as mock_create:
litellm.completion(
model="gpt-4o-mini",
messages=[
{"role": "user", "content": "Hey"},
{
"content": "",
"tool_calls": [
{
"id": "call_m0vFJjQmTH1McvaHBPR2YFwY",
"function": {
"arguments": '{"input": "dksjsdkjdhskdjshdskhjkhlk"}',
"name": "tool_name",
},
"type": "function",
"index": 0,
},
{
"id": "call_Vw6RaqV2n5aaANXEdp5pYxo2",
"function": {
"arguments": '{"input": "jkljlkjlkjlkjlk"}',
"name": "tool_name",
},
"type": "function",
"index": 1,
},
{
"id": "call_hBIKwldUEGlNh6NlSXil62K4",
"function": {
"arguments": '{"input": "jkjlkjlkjlkj;lj"}',
"name": "tool_name",
},
"type": "function",
"index": 2,
},
],
},
],
client=client,
)
mock_create.assert_called_once()