o1 - add image param handling (#7312)

* fix(openai.py): fix returning o1 non-streaming requests fixes issue where fake stream always true for o1 * build(model_prices_and_context_window.json): add 'supports_vision' for o1 models * fix: add internal server error exception mapping * fix(base_llm_unit_tests.py): drop temperature from test * test: mark prompt caching as a flaky test
2025-04-25 10:44:24 +00:00 · 2024-12-19 11:22:25 -08:00 · 2024-12-19 11:22:25 -08:00 · 62b00cf28d
commit 62b00cf28d
parent a101c1fff4
9 changed files with 68 additions and 79 deletions
--- a/litellm/llms/openai/openai.py
+++ b/litellm/llms/openai/openai.py
@ -453,18 +453,18 @@ class OpenAIChatCompletion(BaseLLM):
        super().completion()
        try:
            fake_stream: bool = False
-            if custom_llm_provider is not None and model is not None:
-                provider_config = ProviderConfigManager.get_provider_chat_config(
-                    model=model, provider=LlmProviders(custom_llm_provider)
-                )
-                fake_stream = provider_config.should_fake_stream(
-                    model=model, custom_llm_provider=custom_llm_provider
-                )
            inference_params = optional_params.copy()
            stream_options: Optional[dict] = inference_params.pop(
                "stream_options", None
            )
            stream: Optional[bool] = inference_params.pop("stream", False)
+            if custom_llm_provider is not None and model is not None:
+                provider_config = ProviderConfigManager.get_provider_chat_config(
+                    model=model, provider=LlmProviders(custom_llm_provider)
+                )
+                fake_stream = provider_config.should_fake_stream(
+                    model=model, custom_llm_provider=custom_llm_provider, stream=stream
+                )
            if headers:
                inference_params["extra_headers"] = headers
            if model is None or messages is None:
@ -502,7 +502,6 @@ class OpenAIChatCompletion(BaseLLM):
                    litellm_params=litellm_params,
                    headers=headers or {},
                )
-
                try:
                    max_retries = data.pop("max_retries", 2)
                    if acompletion is True: