(feat) add Vertex Batches API support in OpenAI format (#7032)

* working request * working transform * working request * transform vertex batch response * add _async_create_batch * move gcs functions to base * fix _get_content_from_openai_file * transform_openai_file_content_to_vertex_ai_file_content * fix transform vertex gcs bucket upload to OAI files format * working e2e test * _get_gcs_object_name * fix linting * add doc string * fix transform_gcs_bucket_response_to_openai_file_object * use vertex for batch endpoints * add batches support for vertex * test_vertex_batches_endpoint * test_vertex_batch_prediction * fix gcs bucket base auth * docs clean up batches * docs Batch API * docs vertex batches api * test_get_gcs_logging_config_without_service_account * undo change * fix vertex md * test_get_gcs_logging_config_without_service_account * ci/cd run again
2025-04-25 18:54:30 +00:00 · 2024-12-04 19:40:28 -08:00 · 2024-12-04 19:40:28 -08:00 · 0eef9df396
commit 0eef9df396
parent dd5ccdd889
20 changed files with 1347 additions and 424 deletions
--- a/litellm/proxy/proxy_server.py
+++ b/litellm/proxy/proxy_server.py
@ -5225,6 +5225,7 @@ async def create_batch(
            is_router_model = is_known_model(model=router_model, llm_router=llm_router)

        _create_batch_data = CreateBatchRequest(**data)
+        custom_llm_provider = provider or _create_batch_data.pop("custom_llm_provider", None)  # type: ignore

        if (
            litellm.enable_loadbalancing_on_batch_endpoints is True
@ -5241,10 +5242,10 @@ async def create_batch(

            response = await llm_router.acreate_batch(**_create_batch_data)  # type: ignore
        else:
-            if provider is None:
-                provider = "openai"
+            if custom_llm_provider is None:
+                custom_llm_provider = "openai"
            response = await litellm.acreate_batch(
-                custom_llm_provider=provider, **_create_batch_data  # type: ignore
+                custom_llm_provider=custom_llm_provider, **_create_batch_data  # type: ignore
            )

        ### ALERTING ###