Litellm dev 12 30 2024 p1 (#7480)

* test(azure_openai_o1.py): initial commit with testing for azure openai o1 preview model

* fix(base_llm_unit_tests.py): handle azure o1 preview response format tests

skip as o1 on azure doesn't support tool calling yet

* fix: initial commit of azure o1 handler using openai caller

simplifies calling + allows fake streaming logic alr. implemented for openai to just work

* feat(azure/o1_handler.py): fake o1 streaming for azure o1 models

azure does not currently support streaming for o1

* feat(o1_transformation.py): support overriding 'should_fake_stream' on azure/o1 via 'supports_native_streaming' param on model info

enables user to toggle on when azure allows o1 streaming without needing to bump versions

* style(router.py): remove 'give feedback/get help' messaging when router is used

Prevents noisy messaging

Closes https://github.com/BerriAI/litellm/issues/5942

* test: fix azure o1 test

* test: fix tests

* fix: fix test
This commit is contained in:
Krish Dholakia 2024-12-30 21:52:52 -08:00 committed by GitHub
parent f0ed02d3ee
commit 0178e75cd9
17 changed files with 273 additions and 141 deletions

View file

@ -91,6 +91,40 @@ class BaseLLMChatTest(ABC):
# for OpenAI the content contains the JSON schema, so we need to assert that the content is not None
assert response.choices[0].message.content is not None
def test_streaming(self):
"""Check if litellm handles streaming correctly"""
base_completion_call_args = self.get_base_completion_call_args()
litellm.set_verbose = True
messages = [
{
"role": "user",
"content": [{"type": "text", "text": "Hello, how are you?"}],
}
]
try:
response = self.completion_function(
**base_completion_call_args,
messages=messages,
stream=True,
)
assert response is not None
assert isinstance(response, CustomStreamWrapper)
except litellm.InternalServerError:
pytest.skip("Model is overloaded")
# for OpenAI the content contains the JSON schema, so we need to assert that the content is not None
chunks = []
for chunk in response:
print(chunk)
chunks.append(chunk)
resp = litellm.stream_chunk_builder(chunks=chunks)
print(resp)
# assert resp.usage.prompt_tokens > 0
# assert resp.usage.completion_tokens > 0
# assert resp.usage.total_tokens > 0
def test_pydantic_model_input(self):
litellm.set_verbose = True
@ -154,9 +188,14 @@ class BaseLLMChatTest(ABC):
"""
Test that the JSON response format is supported by the LLM API
"""
from litellm.utils import supports_response_schema
base_completion_call_args = self.get_base_completion_call_args()
litellm.set_verbose = True
if not supports_response_schema(base_completion_call_args["model"], None):
pytest.skip("Model does not support response schema")
messages = [
{
"role": "system",
@ -225,9 +264,15 @@ class BaseLLMChatTest(ABC):
"""
Test that the JSON response format with streaming is supported by the LLM API
"""
from litellm.utils import supports_response_schema
base_completion_call_args = self.get_base_completion_call_args()
litellm.set_verbose = True
base_completion_call_args = self.get_base_completion_call_args()
if not supports_response_schema(base_completion_call_args["model"], None):
pytest.skip("Model does not support response schema")
messages = [
{
"role": "system",