Litellm dev 12 25 2025 p2 (#7420)

* test: add new test image embedding to base llm unit tests

Addresses https://github.com/BerriAI/litellm/issues/6515

* fix(bedrock/embed/multimodal-embeddings): strip data prefix from image urls for bedrock multimodal embeddings

Fix https://github.com/BerriAI/litellm/issues/6515

* feat: initial commit for fireworks ai audio transcription support

Relevant issue: https://github.com/BerriAI/litellm/issues/7134

* test: initial fireworks ai test

* feat(fireworks_ai/): implemented fireworks ai audio transcription config

* fix(utils.py): register fireworks ai audio transcription config, in config manager

* fix(utils.py): add fireworks ai param translation to 'get_optional_params_transcription'

* refactor(fireworks_ai/): define text completion route with model name handling

moves model name handling to specific fireworks routes, as required by their api

* refactor(fireworks_ai/chat): define transform_Request - allows fixing model if accounts/ is missing

* fix: fix linting errors

* fix: fix linting errors

* fix: fix linting errors

* fix: fix linting errors

* fix(handler.py): fix linting errors

* fix(main.py): fix tgai text completion route

* refactor(together_ai/completion): refactors together ai text completion route to just use provider transform request

* refactor: move test_fine_tuning_api out of local_testing

reduces local testing ci/cd time
This commit is contained in:
Krish Dholakia 2024-12-25 18:35:34 -08:00 committed by GitHub
parent 9237357bcc
commit 760328b6ad
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
33 changed files with 709 additions and 177 deletions

View file

@ -441,10 +441,13 @@ class OpenAIChatCompletion(BaseLLM):
"stream_options", None
)
stream: Optional[bool] = inference_params.pop("stream", False)
provider_config: Optional[BaseConfig] = None
if custom_llm_provider is not None and model is not None:
provider_config = ProviderConfigManager.get_provider_chat_config(
model=model, provider=LlmProviders(custom_llm_provider)
)
if provider_config:
fake_stream = provider_config.should_fake_stream(
model=model, custom_llm_provider=custom_llm_provider, stream=stream
)
@ -464,10 +467,7 @@ class OpenAIChatCompletion(BaseLLM):
if custom_llm_provider is not None and custom_llm_provider != "openai":
model_response.model = f"{custom_llm_provider}/{model}"
if messages is not None and custom_llm_provider is not None:
provider_config = ProviderConfigManager.get_provider_chat_config(
model=model, provider=LlmProviders(custom_llm_provider)
)
if messages is not None and provider_config is not None:
if isinstance(provider_config, OpenAIGPTConfig) or isinstance(
provider_config, OpenAIConfig
):
@ -478,13 +478,22 @@ class OpenAIChatCompletion(BaseLLM):
for _ in range(
2
): # if call fails due to alternating messages, retry with reformatted message
data = OpenAIConfig().transform_request(
model=model,
messages=messages,
optional_params=inference_params,
litellm_params=litellm_params,
headers=headers or {},
)
if provider_config is not None:
data = provider_config.transform_request(
model=model,
messages=messages,
optional_params=inference_params,
litellm_params=litellm_params,
headers=headers or {},
)
else:
data = OpenAIConfig().transform_request(
model=model,
messages=messages,
optional_params=inference_params,
litellm_params=litellm_params,
headers=headers or {},
)
try:
max_retries = data.pop("max_retries", 2)
if acompletion is True: