fix(vertex_ai.py): add async embedding support for vertex ai

2025-04-25 18:54:30 +00:00 · 2024-02-03 10:35:17 -08:00 · 2024-02-03 10:35:17 -08:00 · 0ffdf57dec
commit 0ffdf57dec
parent 5bf51a6058
3 changed files with 102 additions and 0 deletions
--- a/litellm/llms/vertex_ai.py
+++ b/litellm/llms/vertex_ai.py
@ -945,6 +945,7 @@ def embedding(
    encoding=None,
    vertex_project=None,
    vertex_location=None,
    aembedding=False,
 ):
    # logic for parsing in - calling - parsing out model embedding calls
    try:
@ -972,9 +973,95 @@ def embedding(
    try:
        llm_model = TextEmbeddingModel.from_pretrained(model)
    except Exception as e:
        raise VertexAIError(status_code=422, message=str(e))
    if aembedding == True:
        return async_embedding(
            model=model,
            client=llm_model,
            input=input,
            logging_obj=logging_obj,
            model_response=model_response,
            optional_params=optional_params,
            encoding=encoding,
        )
    request_str = f"""embeddings = llm_model.get_embeddings({input})"""
    ## LOGGING PRE-CALL
    logging_obj.pre_call(
        input=input,
        api_key=None,
        additional_args={
            "complete_input_dict": optional_params,
            "request_str": request_str,
        },
    )
    try:
        embeddings = llm_model.get_embeddings(input)
    except Exception as e:
        raise VertexAIError(status_code=500, message=str(e))
    ## LOGGING POST-CALL
    logging_obj.post_call(input=input, api_key=None, original_response=embeddings)
    ## Populate OpenAI compliant dictionary
    embedding_response = []
    for idx, embedding in enumerate(embeddings):
        embedding_response.append(
            {
                "object": "embedding",
                "index": idx,
                "embedding": embedding.values,
            }
        )
    model_response["object"] = "list"
    model_response["data"] = embedding_response
    model_response["model"] = model
    input_tokens = 0
    input_str = "".join(input)
    input_tokens += len(encoding.encode(input_str))
    usage = Usage(
        prompt_tokens=input_tokens, completion_tokens=0, total_tokens=input_tokens
    )
    model_response.usage = usage
    return model_response
 async def async_embedding(
    model: str,
    input: Union[list, str],
    logging_obj=None,
    model_response=None,
    optional_params=None,
    encoding=None,
    client=None,
 ):
    """
    Async embedding implementation
    """
    request_str = f"""embeddings = llm_model.get_embeddings({input})"""
    ## LOGGING PRE-CALL
    logging_obj.pre_call(
        input=input,
        api_key=None,
        additional_args={
            "complete_input_dict": optional_params,
            "request_str": request_str,
        },
    )
    try:
        embeddings = await client.get_embeddings_async(input)
    except Exception as e:
        raise VertexAIError(status_code=500, message=str(e))
    ## LOGGING POST-CALL
    logging_obj.post_call(input=input, api_key=None, original_response=embeddings)
    ## Populate OpenAI compliant dictionary
    embedding_response = []
    for idx, embedding in enumerate(embeddings):
--- a/litellm/main.py
+++ b/litellm/main.py
@ -2211,6 +2211,7 @@ async def aembedding(*args, **kwargs):
            or custom_llm_provider == "deepinfra"
            or custom_llm_provider == "perplexity"
            or custom_llm_provider == "ollama"
            or custom_llm_provider == "vertex_ai"
        ):  # currently implemented aiohttp calls for just azure and openai, soon all.
            # Await normally
            init_response = await loop.run_in_executor(None, func_with_context)
@ -2549,6 +2550,7 @@ def embedding(
                model_response=EmbeddingResponse(),
                vertex_project=vertex_ai_project,
                vertex_location=vertex_ai_location,
                aembedding=aembedding,
            )
        elif custom_llm_provider == "oobabooga":
            response = oobabooga.embedding(
--- a/litellm/tests/test_embedding.py
+++ b/litellm/tests/test_embedding.py
@ -243,6 +243,19 @@ def test_vertexai_embedding():
        pytest.fail(f"Error occurred: {e}")
@pytest.mark.asyncio
 async def test_vertexai_aembedding():
    try:
        # litellm.set_verbose=True
        response = await litellm.aembedding(
            model="textembedding-gecko@001",
            input=["good morning from litellm", "this is another item"],
        )
        print(f"response: {response}")
    except Exception as e:
        pytest.fail(f"Error occurred: {e}")
 def test_bedrock_embedding_titan():
    try:
        # this tests if we support str input for bedrock embedding