(performance improvement - vertex embeddings) ~111.11% faster (#6000)

* use vertex llm as base class for embeddings * use correct vertex class in main.py * set_headers in vertex llm base * add types for vertex embedding requests * add embedding handler for vertex * use async mode for vertex embedding tests * use vertexAI textEmbeddingConfig * fix linting * add sync and async mode testing for vertex ai embeddings
2025-04-26 03:04:13 +00:00 · 2024-10-01 14:16:21 -07:00 · 2024-10-01 14:16:21 -07:00 · eef9bad9a6
commit eef9bad9a6
parent 18a28ef977
8 changed files with 497 additions and 300 deletions
--- a/litellm/proxy/pass_through_endpoints/success_handler.py
+++ b/litellm/proxy/pass_through_endpoints/success_handler.py
@ -129,9 +129,6 @@ class PassThroughEndpointLogging:
            from litellm.llms.vertex_ai_and_google_ai_studio.image_generation.image_generation_handler import (
                VertexImageGeneration,
            )
-            from litellm.llms.vertex_ai_and_google_ai_studio.vertex_embeddings.embedding_handler import (
-                transform_vertex_response_to_openai,
-            )
            from litellm.types.utils import PassthroughCallTypes

            vertex_image_generation_class = VertexImageGeneration()
@ -157,7 +154,7 @@ class PassThroughEndpointLogging:
                    PassthroughCallTypes.passthrough_image_generation.value
                )
            else:
-                litellm_prediction_response = await transform_vertex_response_to_openai(
+                litellm_prediction_response = litellm.vertexAITextEmbeddingConfig.transform_vertex_response_to_openai(
                    response=_json_response,
                    model=model,
                    model_response=litellm.EmbeddingResponse(),