mirror of
https://github.com/BerriAI/litellm.git
synced 2025-04-25 18:54:30 +00:00
Litellm dev 12 30 2024 p1 (#7480)
* test(azure_openai_o1.py): initial commit with testing for azure openai o1 preview model * fix(base_llm_unit_tests.py): handle azure o1 preview response format tests skip as o1 on azure doesn't support tool calling yet * fix: initial commit of azure o1 handler using openai caller simplifies calling + allows fake streaming logic alr. implemented for openai to just work * feat(azure/o1_handler.py): fake o1 streaming for azure o1 models azure does not currently support streaming for o1 * feat(o1_transformation.py): support overriding 'should_fake_stream' on azure/o1 via 'supports_native_streaming' param on model info enables user to toggle on when azure allows o1 streaming without needing to bump versions * style(router.py): remove 'give feedback/get help' messaging when router is used Prevents noisy messaging Closes https://github.com/BerriAI/litellm/issues/5942 * test: fix azure o1 test * test: fix tests * fix: fix test
This commit is contained in:
parent
f0ed02d3ee
commit
0178e75cd9
17 changed files with 273 additions and 141 deletions
|
@ -91,6 +91,40 @@ class BaseLLMChatTest(ABC):
|
|||
# for OpenAI the content contains the JSON schema, so we need to assert that the content is not None
|
||||
assert response.choices[0].message.content is not None
|
||||
|
||||
def test_streaming(self):
|
||||
"""Check if litellm handles streaming correctly"""
|
||||
base_completion_call_args = self.get_base_completion_call_args()
|
||||
litellm.set_verbose = True
|
||||
messages = [
|
||||
{
|
||||
"role": "user",
|
||||
"content": [{"type": "text", "text": "Hello, how are you?"}],
|
||||
}
|
||||
]
|
||||
try:
|
||||
response = self.completion_function(
|
||||
**base_completion_call_args,
|
||||
messages=messages,
|
||||
stream=True,
|
||||
)
|
||||
assert response is not None
|
||||
assert isinstance(response, CustomStreamWrapper)
|
||||
except litellm.InternalServerError:
|
||||
pytest.skip("Model is overloaded")
|
||||
|
||||
# for OpenAI the content contains the JSON schema, so we need to assert that the content is not None
|
||||
chunks = []
|
||||
for chunk in response:
|
||||
print(chunk)
|
||||
chunks.append(chunk)
|
||||
|
||||
resp = litellm.stream_chunk_builder(chunks=chunks)
|
||||
print(resp)
|
||||
|
||||
# assert resp.usage.prompt_tokens > 0
|
||||
# assert resp.usage.completion_tokens > 0
|
||||
# assert resp.usage.total_tokens > 0
|
||||
|
||||
def test_pydantic_model_input(self):
|
||||
litellm.set_verbose = True
|
||||
|
||||
|
@ -154,9 +188,14 @@ class BaseLLMChatTest(ABC):
|
|||
"""
|
||||
Test that the JSON response format is supported by the LLM API
|
||||
"""
|
||||
from litellm.utils import supports_response_schema
|
||||
|
||||
base_completion_call_args = self.get_base_completion_call_args()
|
||||
litellm.set_verbose = True
|
||||
|
||||
if not supports_response_schema(base_completion_call_args["model"], None):
|
||||
pytest.skip("Model does not support response schema")
|
||||
|
||||
messages = [
|
||||
{
|
||||
"role": "system",
|
||||
|
@ -225,9 +264,15 @@ class BaseLLMChatTest(ABC):
|
|||
"""
|
||||
Test that the JSON response format with streaming is supported by the LLM API
|
||||
"""
|
||||
from litellm.utils import supports_response_schema
|
||||
|
||||
base_completion_call_args = self.get_base_completion_call_args()
|
||||
litellm.set_verbose = True
|
||||
|
||||
base_completion_call_args = self.get_base_completion_call_args()
|
||||
if not supports_response_schema(base_completion_call_args["model"], None):
|
||||
pytest.skip("Model does not support response schema")
|
||||
|
||||
messages = [
|
||||
{
|
||||
"role": "system",
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue