(performance improvement - vertex embeddings) ~111.11% faster (#6000)

* use vertex llm as base class for embeddings

* use correct vertex class in main.py

* set_headers in vertex llm base

* add types for vertex embedding requests

* add embedding handler for vertex

* use async mode for vertex embedding tests

* use vertexAI textEmbeddingConfig

* fix linting

* add sync and async mode testing for vertex ai embeddings
This commit is contained in:
Ishaan Jaff 2024-10-01 14:16:21 -07:00 committed by GitHub
parent 18a28ef977
commit eef9bad9a6
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
8 changed files with 497 additions and 300 deletions

View file

@ -129,9 +129,6 @@ class PassThroughEndpointLogging:
from litellm.llms.vertex_ai_and_google_ai_studio.image_generation.image_generation_handler import (
VertexImageGeneration,
)
from litellm.llms.vertex_ai_and_google_ai_studio.vertex_embeddings.embedding_handler import (
transform_vertex_response_to_openai,
)
from litellm.types.utils import PassthroughCallTypes
vertex_image_generation_class = VertexImageGeneration()
@ -157,7 +154,7 @@ class PassThroughEndpointLogging:
PassthroughCallTypes.passthrough_image_generation.value
)
else:
litellm_prediction_response = await transform_vertex_response_to_openai(
litellm_prediction_response = litellm.vertexAITextEmbeddingConfig.transform_vertex_response_to_openai(
response=_json_response,
model=model,
model_response=litellm.EmbeddingResponse(),