Litellm dev 12 25 2025 p2 (#7420)

* test: add new test image embedding to base llm unit tests Addresses https://github.com/BerriAI/litellm/issues/6515 * fix(bedrock/embed/multimodal-embeddings): strip data prefix from image urls for bedrock multimodal embeddings Fix https://github.com/BerriAI/litellm/issues/6515 * feat: initial commit for fireworks ai audio transcription support Relevant issue: https://github.com/BerriAI/litellm/issues/7134 * test: initial fireworks ai test * feat(fireworks_ai/): implemented fireworks ai audio transcription config * fix(utils.py): register fireworks ai audio transcription config, in config manager * fix(utils.py): add fireworks ai param translation to 'get_optional_params_transcription' * refactor(fireworks_ai/): define text completion route with model name handling moves model name handling to specific fireworks routes, as required by their api * refactor(fireworks_ai/chat): define transform_Request - allows fixing model if accounts/ is missing * fix: fix linting errors * fix: fix linting errors * fix: fix linting errors * fix: fix linting errors * fix(handler.py): fix linting errors * fix(main.py): fix tgai text completion route * refactor(together_ai/completion): refactors together ai text completion route to just use provider transform request * refactor: move test_fine_tuning_api out of local_testing reduces local testing ci/cd time
2025-04-25 18:54:30 +00:00 · 2024-12-25 18:35:34 -08:00 · 2024-12-25 18:35:34 -08:00 · 760328b6ad
commit 760328b6ad
parent 9237357bcc
33 changed files with 709 additions and 177 deletions
--- a/litellm/llms/openai/openai.py
+++ b/litellm/llms/openai/openai.py
@ -441,10 +441,13 @@ class OpenAIChatCompletion(BaseLLM):
                "stream_options", None
            )
            stream: Optional[bool] = inference_params.pop("stream", False)
+            provider_config: Optional[BaseConfig] = None
            if custom_llm_provider is not None and model is not None:
                provider_config = ProviderConfigManager.get_provider_chat_config(
                    model=model, provider=LlmProviders(custom_llm_provider)
                )
+
+            if provider_config:
                fake_stream = provider_config.should_fake_stream(
                    model=model, custom_llm_provider=custom_llm_provider, stream=stream
                )
@ -464,10 +467,7 @@ class OpenAIChatCompletion(BaseLLM):
            if custom_llm_provider is not None and custom_llm_provider != "openai":
                model_response.model = f"{custom_llm_provider}/{model}"

-            if messages is not None and custom_llm_provider is not None:
-                provider_config = ProviderConfigManager.get_provider_chat_config(
-                    model=model, provider=LlmProviders(custom_llm_provider)
-                )
+            if messages is not None and provider_config is not None:
                if isinstance(provider_config, OpenAIGPTConfig) or isinstance(
                    provider_config, OpenAIConfig
                ):
@ -478,13 +478,22 @@ class OpenAIChatCompletion(BaseLLM):
            for _ in range(
                2
            ):  # if call fails due to alternating messages, retry with reformatted message
-                data = OpenAIConfig().transform_request(
-                    model=model,
-                    messages=messages,
-                    optional_params=inference_params,
-                    litellm_params=litellm_params,
-                    headers=headers or {},
-                )
+                if provider_config is not None:
+                    data = provider_config.transform_request(
+                        model=model,
+                        messages=messages,
+                        optional_params=inference_params,
+                        litellm_params=litellm_params,
+                        headers=headers or {},
+                    )
+                else:
+                    data = OpenAIConfig().transform_request(
+                        model=model,
+                        messages=messages,
+                        optional_params=inference_params,
+                        litellm_params=litellm_params,
+                        headers=headers or {},
+                    )
                try:
                    max_retries = data.pop("max_retries", 2)
                    if acompletion is True: