mirror of
https://github.com/BerriAI/litellm.git
synced 2025-04-26 11:14:04 +00:00
add initial support for multimodal_embedding vertex
This commit is contained in:
parent
710ae63957
commit
be6eb52036
2 changed files with 279 additions and 13 deletions
|
@ -38,12 +38,15 @@ from litellm.types.llms.vertex_ai import (
|
||||||
FunctionDeclaration,
|
FunctionDeclaration,
|
||||||
GenerateContentResponseBody,
|
GenerateContentResponseBody,
|
||||||
GenerationConfig,
|
GenerationConfig,
|
||||||
|
Instance,
|
||||||
|
InstanceVideo,
|
||||||
PartType,
|
PartType,
|
||||||
RequestBody,
|
RequestBody,
|
||||||
SafetSettingsConfig,
|
SafetSettingsConfig,
|
||||||
SystemInstructions,
|
SystemInstructions,
|
||||||
ToolConfig,
|
ToolConfig,
|
||||||
Tools,
|
Tools,
|
||||||
|
VertexMultimodalEmbeddingRequest,
|
||||||
)
|
)
|
||||||
from litellm.types.utils import GenericStreamingChunk
|
from litellm.types.utils import GenericStreamingChunk
|
||||||
from litellm.utils import CustomStreamWrapper, ModelResponse, Usage
|
from litellm.utils import CustomStreamWrapper, ModelResponse, Usage
|
||||||
|
@ -1537,6 +1540,253 @@ class VertexLLM(BaseLLM):
|
||||||
|
|
||||||
return model_response
|
return model_response
|
||||||
|
|
||||||
|
def multimodal_embedding(
|
||||||
|
self,
|
||||||
|
model: str,
|
||||||
|
input: Union[list, str],
|
||||||
|
print_verbose,
|
||||||
|
model_response: litellm.EmbeddingResponse,
|
||||||
|
optional_params: dict,
|
||||||
|
api_key: Optional[str] = None,
|
||||||
|
logging_obj=None,
|
||||||
|
encoding=None,
|
||||||
|
vertex_project=None,
|
||||||
|
vertex_location=None,
|
||||||
|
vertex_credentials=None,
|
||||||
|
aembedding=False,
|
||||||
|
timeout=300,
|
||||||
|
client=None,
|
||||||
|
):
|
||||||
|
# if aembedding is True:
|
||||||
|
# return self.aimage_generation(
|
||||||
|
# prompt=prompt,
|
||||||
|
# vertex_project=vertex_project,
|
||||||
|
# vertex_location=vertex_location,
|
||||||
|
# vertex_credentials=vertex_credentials,
|
||||||
|
# model=model,
|
||||||
|
# client=client,
|
||||||
|
# optional_params=optional_params,
|
||||||
|
# timeout=timeout,
|
||||||
|
# logging_obj=logging_obj,
|
||||||
|
# model_response=model_response,
|
||||||
|
# )
|
||||||
|
|
||||||
|
if client is None:
|
||||||
|
_params = {}
|
||||||
|
if timeout is not None:
|
||||||
|
if isinstance(timeout, float) or isinstance(timeout, int):
|
||||||
|
_httpx_timeout = httpx.Timeout(timeout)
|
||||||
|
_params["timeout"] = _httpx_timeout
|
||||||
|
else:
|
||||||
|
_params["timeout"] = httpx.Timeout(timeout=600.0, connect=5.0)
|
||||||
|
|
||||||
|
sync_handler: HTTPHandler = HTTPHandler(**_params) # type: ignore
|
||||||
|
else:
|
||||||
|
sync_handler = client # type: ignore
|
||||||
|
|
||||||
|
url = f"https://{vertex_location}-aiplatform.googleapis.com/v1/projects/{vertex_project}/locations/{vertex_location}/publishers/google/models/{model}:predict"
|
||||||
|
|
||||||
|
auth_header, _ = self._ensure_access_token(
|
||||||
|
credentials=vertex_credentials, project_id=vertex_project
|
||||||
|
)
|
||||||
|
optional_params = optional_params or {}
|
||||||
|
|
||||||
|
request_data = VertexMultimodalEmbeddingRequest()
|
||||||
|
vertex_request_instance = Instance(**optional_params)
|
||||||
|
|
||||||
|
# if "image" in optional_params:
|
||||||
|
# vertex_request_instance["image"] = optional_params["image"]
|
||||||
|
|
||||||
|
# if "video" in optional_params:
|
||||||
|
# vertex_request_instance["video"] = optional_params["video"]
|
||||||
|
|
||||||
|
# if "text" in optional_params:
|
||||||
|
# vertex_request_instance["text"] = optional_params["text"]
|
||||||
|
if isinstance(input, str):
|
||||||
|
vertex_request_instance["text"] = input
|
||||||
|
|
||||||
|
request_data["instances"] = [vertex_request_instance]
|
||||||
|
|
||||||
|
request_str = f"\n curl -X POST \\\n -H \"Authorization: Bearer {auth_header[:10] + 'XXXXXXXXXX'}\" \\\n -H \"Content-Type: application/json; charset=utf-8\" \\\n -d {request_data} \\\n \"{url}\""
|
||||||
|
logging_obj.pre_call(
|
||||||
|
input=input,
|
||||||
|
api_key=None,
|
||||||
|
additional_args={
|
||||||
|
"complete_input_dict": optional_params,
|
||||||
|
"request_str": request_str,
|
||||||
|
},
|
||||||
|
)
|
||||||
|
|
||||||
|
logging_obj.pre_call(
|
||||||
|
input=input,
|
||||||
|
api_key=None,
|
||||||
|
additional_args={
|
||||||
|
"complete_input_dict": optional_params,
|
||||||
|
"request_str": request_str,
|
||||||
|
},
|
||||||
|
)
|
||||||
|
|
||||||
|
response = sync_handler.post(
|
||||||
|
url=url,
|
||||||
|
headers={
|
||||||
|
"Content-Type": "application/json; charset=utf-8",
|
||||||
|
"Authorization": f"Bearer {auth_header}",
|
||||||
|
},
|
||||||
|
data=json.dumps(request_data),
|
||||||
|
)
|
||||||
|
|
||||||
|
if response.status_code != 200:
|
||||||
|
raise Exception(f"Error: {response.status_code} {response.text}")
|
||||||
|
"""
|
||||||
|
Vertex AI Image generation response example:
|
||||||
|
{
|
||||||
|
"predictions": [
|
||||||
|
{
|
||||||
|
"bytesBase64Encoded": "BASE64_IMG_BYTES",
|
||||||
|
"mimeType": "image/png"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"mimeType": "image/png",
|
||||||
|
"bytesBase64Encoded": "BASE64_IMG_BYTES"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
"""
|
||||||
|
|
||||||
|
_json_response = response.json()
|
||||||
|
if "predictions" not in _json_response:
|
||||||
|
raise litellm.InternalServerError(
|
||||||
|
message=f"embedding response does not contain 'predictions', got {_json_response}",
|
||||||
|
llm_provider="vertex_ai",
|
||||||
|
model=model,
|
||||||
|
)
|
||||||
|
_predictions = _json_response["predictions"]
|
||||||
|
|
||||||
|
model_response.data = _predictions
|
||||||
|
model_response.model = model
|
||||||
|
|
||||||
|
return model_response
|
||||||
|
|
||||||
|
# async def aimage_generation(
|
||||||
|
# self,
|
||||||
|
# prompt: str,
|
||||||
|
# vertex_project: Optional[str],
|
||||||
|
# vertex_location: Optional[str],
|
||||||
|
# vertex_credentials: Optional[str],
|
||||||
|
# model_response: litellm.ImageResponse,
|
||||||
|
# model: Optional[
|
||||||
|
# str
|
||||||
|
# ] = "imagegeneration", # vertex ai uses imagegeneration as the default model
|
||||||
|
# client: Optional[AsyncHTTPHandler] = None,
|
||||||
|
# optional_params: Optional[dict] = None,
|
||||||
|
# timeout: Optional[int] = None,
|
||||||
|
# logging_obj=None,
|
||||||
|
# ):
|
||||||
|
# response = None
|
||||||
|
# if client is None:
|
||||||
|
# _params = {}
|
||||||
|
# if timeout is not None:
|
||||||
|
# if isinstance(timeout, float) or isinstance(timeout, int):
|
||||||
|
# _httpx_timeout = httpx.Timeout(timeout)
|
||||||
|
# _params["timeout"] = _httpx_timeout
|
||||||
|
# else:
|
||||||
|
# _params["timeout"] = httpx.Timeout(timeout=600.0, connect=5.0)
|
||||||
|
|
||||||
|
# self.async_handler = AsyncHTTPHandler(**_params) # type: ignore
|
||||||
|
# else:
|
||||||
|
# self.async_handler = client # type: ignore
|
||||||
|
|
||||||
|
# # make POST request to
|
||||||
|
# # https://us-central1-aiplatform.googleapis.com/v1/projects/PROJECT_ID/locations/us-central1/publishers/google/models/imagegeneration:predict
|
||||||
|
# url = f"https://{vertex_location}-aiplatform.googleapis.com/v1/projects/{vertex_project}/locations/{vertex_location}/publishers/google/models/{model}:predict"
|
||||||
|
|
||||||
|
# """
|
||||||
|
# Docs link: https://console.cloud.google.com/vertex-ai/publishers/google/model-garden/imagegeneration?project=adroit-crow-413218
|
||||||
|
# curl -X POST \
|
||||||
|
# -H "Authorization: Bearer $(gcloud auth print-access-token)" \
|
||||||
|
# -H "Content-Type: application/json; charset=utf-8" \
|
||||||
|
# -d {
|
||||||
|
# "instances": [
|
||||||
|
# {
|
||||||
|
# "prompt": "a cat"
|
||||||
|
# }
|
||||||
|
# ],
|
||||||
|
# "parameters": {
|
||||||
|
# "sampleCount": 1
|
||||||
|
# }
|
||||||
|
# } \
|
||||||
|
# "https://us-central1-aiplatform.googleapis.com/v1/projects/PROJECT_ID/locations/us-central1/publishers/google/models/imagegeneration:predict"
|
||||||
|
# """
|
||||||
|
# auth_header, _ = self._ensure_access_token(
|
||||||
|
# credentials=vertex_credentials, project_id=vertex_project
|
||||||
|
# )
|
||||||
|
# optional_params = optional_params or {
|
||||||
|
# "sampleCount": 1
|
||||||
|
# } # default optional params
|
||||||
|
|
||||||
|
# request_data = {
|
||||||
|
# "instances": [{"prompt": prompt}],
|
||||||
|
# "parameters": optional_params,
|
||||||
|
# }
|
||||||
|
|
||||||
|
# request_str = f"\n curl -X POST \\\n -H \"Authorization: Bearer {auth_header[:10] + 'XXXXXXXXXX'}\" \\\n -H \"Content-Type: application/json; charset=utf-8\" \\\n -d {request_data} \\\n \"{url}\""
|
||||||
|
# logging_obj.pre_call(
|
||||||
|
# input=prompt,
|
||||||
|
# api_key=None,
|
||||||
|
# additional_args={
|
||||||
|
# "complete_input_dict": optional_params,
|
||||||
|
# "request_str": request_str,
|
||||||
|
# },
|
||||||
|
# )
|
||||||
|
|
||||||
|
# response = await self.async_handler.post(
|
||||||
|
# url=url,
|
||||||
|
# headers={
|
||||||
|
# "Content-Type": "application/json; charset=utf-8",
|
||||||
|
# "Authorization": f"Bearer {auth_header}",
|
||||||
|
# },
|
||||||
|
# data=json.dumps(request_data),
|
||||||
|
# )
|
||||||
|
|
||||||
|
# if response.status_code != 200:
|
||||||
|
# raise Exception(f"Error: {response.status_code} {response.text}")
|
||||||
|
# """
|
||||||
|
# Vertex AI Image generation response example:
|
||||||
|
# {
|
||||||
|
# "predictions": [
|
||||||
|
# {
|
||||||
|
# "bytesBase64Encoded": "BASE64_IMG_BYTES",
|
||||||
|
# "mimeType": "image/png"
|
||||||
|
# },
|
||||||
|
# {
|
||||||
|
# "mimeType": "image/png",
|
||||||
|
# "bytesBase64Encoded": "BASE64_IMG_BYTES"
|
||||||
|
# }
|
||||||
|
# ]
|
||||||
|
# }
|
||||||
|
# """
|
||||||
|
|
||||||
|
# _json_response = response.json()
|
||||||
|
|
||||||
|
# if "predictions" not in _json_response:
|
||||||
|
# raise litellm.InternalServerError(
|
||||||
|
# message=f"image generation response does not contain 'predictions', got {_json_response}",
|
||||||
|
# llm_provider="vertex_ai",
|
||||||
|
# model=model,
|
||||||
|
# )
|
||||||
|
|
||||||
|
# _predictions = _json_response["predictions"]
|
||||||
|
|
||||||
|
# _response_data: List[Image] = []
|
||||||
|
# for _prediction in _predictions:
|
||||||
|
# _bytes_base64_encoded = _prediction["bytesBase64Encoded"]
|
||||||
|
# image_object = Image(b64_json=_bytes_base64_encoded)
|
||||||
|
# _response_data.append(image_object)
|
||||||
|
|
||||||
|
# model_response.data = _response_data
|
||||||
|
|
||||||
|
# return model_response
|
||||||
|
|
||||||
|
|
||||||
class ModelResponseIterator:
|
class ModelResponseIterator:
|
||||||
def __init__(self, streaming_response, sync_stream: bool):
|
def __init__(self, streaming_response, sync_stream: bool):
|
||||||
|
|
|
@ -3477,19 +3477,35 @@ def embedding(
|
||||||
or get_secret("VERTEX_CREDENTIALS")
|
or get_secret("VERTEX_CREDENTIALS")
|
||||||
)
|
)
|
||||||
|
|
||||||
response = vertex_ai.embedding(
|
if "image" in optional_params or "video" in optional_params:
|
||||||
model=model,
|
# multimodal embedding is supported on vertex httpx
|
||||||
input=input,
|
response = vertex_chat_completion.multimodal_embedding(
|
||||||
encoding=encoding,
|
model=model,
|
||||||
logging_obj=logging,
|
input=input,
|
||||||
optional_params=optional_params,
|
encoding=encoding,
|
||||||
model_response=EmbeddingResponse(),
|
logging_obj=logging,
|
||||||
vertex_project=vertex_ai_project,
|
optional_params=optional_params,
|
||||||
vertex_location=vertex_ai_location,
|
model_response=EmbeddingResponse(),
|
||||||
vertex_credentials=vertex_credentials,
|
vertex_project=vertex_ai_project,
|
||||||
aembedding=aembedding,
|
vertex_location=vertex_ai_location,
|
||||||
print_verbose=print_verbose,
|
vertex_credentials=vertex_credentials,
|
||||||
)
|
aembedding=aembedding,
|
||||||
|
print_verbose=print_verbose,
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
response = vertex_ai.embedding(
|
||||||
|
model=model,
|
||||||
|
input=input,
|
||||||
|
encoding=encoding,
|
||||||
|
logging_obj=logging,
|
||||||
|
optional_params=optional_params,
|
||||||
|
model_response=EmbeddingResponse(),
|
||||||
|
vertex_project=vertex_ai_project,
|
||||||
|
vertex_location=vertex_ai_location,
|
||||||
|
vertex_credentials=vertex_credentials,
|
||||||
|
aembedding=aembedding,
|
||||||
|
print_verbose=print_verbose,
|
||||||
|
)
|
||||||
elif custom_llm_provider == "oobabooga":
|
elif custom_llm_provider == "oobabooga":
|
||||||
response = oobabooga.embedding(
|
response = oobabooga.embedding(
|
||||||
model=model,
|
model=model,
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue