LiteLLM Minor Fixes & Improvements (10/16/2024) (#6265)

* fix(caching_handler.py): handle positional arguments in add cache logic

Fixes https://github.com/BerriAI/litellm/issues/6264

* feat(litellm_pre_call_utils.py): allow forwarding openai org id to backend client

https://github.com/BerriAI/litellm/issues/6237

* docs(configs.md): add 'forward_openai_org_id' to docs

* fix(proxy_server.py): return model info if user_model is set

Fixes https://github.com/BerriAI/litellm/issues/6233

* fix(hosted_vllm/chat/transformation.py): don't set tools unless non-none

* fix(openai.py): improve debug log for openai 'str' error

Addresses https://github.com/BerriAI/litellm/issues/6272

* fix(proxy_server.py): fix linting error

* fix(proxy_server.py): fix linting errors

* test: skip WIP test

* docs(openai.md): add docs on passing openai org id from client to openai
This commit is contained in:
Krish Dholakia 2024-10-16 22:16:23 -07:00 committed by GitHub
parent 43878bd2a0
commit 38a9a106d2
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
14 changed files with 371 additions and 47 deletions

View file

@ -2298,3 +2298,70 @@ def test_basic_caching_import():
assert Cache is not None
print("Cache imported successfully")
@pytest.mark.parametrize("sync_mode", [True, False])
@pytest.mark.asyncio()
async def test_caching_kwargs_input(sync_mode):
from litellm import acompletion
from litellm.caching.caching_handler import LLMCachingHandler
from litellm.types.utils import (
Choices,
EmbeddingResponse,
Message,
ModelResponse,
Usage,
CompletionTokensDetails,
PromptTokensDetails,
)
from datetime import datetime
llm_caching_handler = LLMCachingHandler(
original_function=acompletion, request_kwargs={}, start_time=datetime.now()
)
input = {
"result": ModelResponse(
id="chatcmpl-AJ119H5XsDnYiZPp5axJ5d7niwqeR",
choices=[
Choices(
finish_reason="stop",
index=0,
message=Message(
content="Hello! I'm just a computer program, so I don't have feelings, but I'm here to assist you. How can I help you today?",
role="assistant",
tool_calls=None,
function_call=None,
),
)
],
created=1729095507,
model="gpt-3.5-turbo-0125",
object="chat.completion",
system_fingerprint=None,
usage=Usage(
completion_tokens=31,
prompt_tokens=16,
total_tokens=47,
completion_tokens_details=CompletionTokensDetails(
audio_tokens=None, reasoning_tokens=0
),
prompt_tokens_details=PromptTokensDetails(
audio_tokens=None, cached_tokens=0
),
),
service_tier=None,
),
"kwargs": {
"messages": [{"role": "user", "content": "42HHey, how's it going?"}],
"caching": True,
"litellm_call_id": "fae2aa4f-9f75-4f11-8c9c-63ab8d9fae26",
"preset_cache_key": "2f69f5640d5e0f25315d0e132f1278bb643554d14565d2c61d61564b10ade90f",
},
"args": ("gpt-3.5-turbo",),
}
if sync_mode is True:
llm_caching_handler.sync_set_cache(**input)
else:
input["original_function"] = acompletion
await llm_caching_handler.async_set_cache(**input)