Litellm dev 12 30 2024 p1 (#7480)

* test(azure_openai_o1.py): initial commit with testing for azure openai o1 preview model * fix(base_llm_unit_tests.py): handle azure o1 preview response format tests skip as o1 on azure doesn't support tool calling yet * fix: initial commit of azure o1 handler using openai caller simplifies calling + allows fake streaming logic alr. implemented for openai to just work * feat(azure/o1_handler.py): fake o1 streaming for azure o1 models azure does not currently support streaming for o1 * feat(o1_transformation.py): support overriding 'should_fake_stream' on azure/o1 via 'supports_native_streaming' param on model info enables user to toggle on when azure allows o1 streaming without needing to bump versions * style(router.py): remove 'give feedback/get help' messaging when router is used Prevents noisy messaging Closes https://github.com/BerriAI/litellm/issues/5942 * test: fix azure o1 test * test: fix tests * fix: fix test
2025-04-25 18:54:30 +00:00 · 2024-12-30 21:52:52 -08:00 · 2024-12-30 21:52:52 -08:00 · 0178e75cd9
commit 0178e75cd9
parent f0ed02d3ee
17 changed files with 273 additions and 141 deletions
--- a/tests/llm_translation/base_llm_unit_tests.py
+++ b/tests/llm_translation/base_llm_unit_tests.py
@ -91,6 +91,40 @@ class BaseLLMChatTest(ABC):
        # for OpenAI the content contains the JSON schema, so we need to assert that the content is not None
        assert response.choices[0].message.content is not None

+    def test_streaming(self):
+        """Check if litellm handles streaming correctly"""
+        base_completion_call_args = self.get_base_completion_call_args()
+        litellm.set_verbose = True
+        messages = [
+            {
+                "role": "user",
+                "content": [{"type": "text", "text": "Hello, how are you?"}],
+            }
+        ]
+        try:
+            response = self.completion_function(
+                **base_completion_call_args,
+                messages=messages,
+                stream=True,
+            )
+            assert response is not None
+            assert isinstance(response, CustomStreamWrapper)
+        except litellm.InternalServerError:
+            pytest.skip("Model is overloaded")
+
+        # for OpenAI the content contains the JSON schema, so we need to assert that the content is not None
+        chunks = []
+        for chunk in response:
+            print(chunk)
+            chunks.append(chunk)
+
+        resp = litellm.stream_chunk_builder(chunks=chunks)
+        print(resp)
+
+        # assert resp.usage.prompt_tokens > 0
+        # assert resp.usage.completion_tokens > 0
+        # assert resp.usage.total_tokens > 0
+
    def test_pydantic_model_input(self):
        litellm.set_verbose = True

@ -154,9 +188,14 @@ class BaseLLMChatTest(ABC):
        """
        Test that the JSON response format is supported by the LLM API
        """
+        from litellm.utils import supports_response_schema
+
        base_completion_call_args = self.get_base_completion_call_args()
        litellm.set_verbose = True

+        if not supports_response_schema(base_completion_call_args["model"], None):
+            pytest.skip("Model does not support response schema")
+
        messages = [
            {
                "role": "system",
@ -225,9 +264,15 @@ class BaseLLMChatTest(ABC):
        """
        Test that the JSON response format with streaming is supported by the LLM API
        """
+        from litellm.utils import supports_response_schema
+
        base_completion_call_args = self.get_base_completion_call_args()
        litellm.set_verbose = True

+        base_completion_call_args = self.get_base_completion_call_args()
+        if not supports_response_schema(base_completion_call_args["model"], None):
+            pytest.skip("Model does not support response schema")
+
        messages = [
            {
                "role": "system",