mirror of
https://github.com/BerriAI/litellm.git
synced 2025-04-26 03:04:13 +00:00
refactor vtx image gen
This commit is contained in:
parent
6393d2391e
commit
aa13977136
3 changed files with 226 additions and 244 deletions
|
@ -13,7 +13,6 @@ from typing import Any, Callable, Dict, List, Literal, Optional, Tuple, Union
|
||||||
|
|
||||||
import httpx # type: ignore
|
import httpx # type: ignore
|
||||||
import requests # type: ignore
|
import requests # type: ignore
|
||||||
from openai.types.image import Image
|
|
||||||
|
|
||||||
import litellm
|
import litellm
|
||||||
import litellm.litellm_core_utils
|
import litellm.litellm_core_utils
|
||||||
|
@ -1488,248 +1487,6 @@ class VertexLLM(BaseLLM):
|
||||||
encoding=encoding,
|
encoding=encoding,
|
||||||
)
|
)
|
||||||
|
|
||||||
def image_generation(
|
|
||||||
self,
|
|
||||||
prompt: str,
|
|
||||||
vertex_project: Optional[str],
|
|
||||||
vertex_location: Optional[str],
|
|
||||||
vertex_credentials: Optional[str],
|
|
||||||
model_response: litellm.ImageResponse,
|
|
||||||
model: Optional[
|
|
||||||
str
|
|
||||||
] = "imagegeneration", # vertex ai uses imagegeneration as the default model
|
|
||||||
client: Optional[Any] = None,
|
|
||||||
optional_params: Optional[dict] = None,
|
|
||||||
timeout: Optional[int] = None,
|
|
||||||
logging_obj=None,
|
|
||||||
aimg_generation=False,
|
|
||||||
):
|
|
||||||
if aimg_generation is True:
|
|
||||||
return self.aimage_generation(
|
|
||||||
prompt=prompt,
|
|
||||||
vertex_project=vertex_project,
|
|
||||||
vertex_location=vertex_location,
|
|
||||||
vertex_credentials=vertex_credentials,
|
|
||||||
model=model,
|
|
||||||
client=client,
|
|
||||||
optional_params=optional_params,
|
|
||||||
timeout=timeout,
|
|
||||||
logging_obj=logging_obj,
|
|
||||||
model_response=model_response,
|
|
||||||
)
|
|
||||||
|
|
||||||
if client is None:
|
|
||||||
_params = {}
|
|
||||||
if timeout is not None:
|
|
||||||
if isinstance(timeout, float) or isinstance(timeout, int):
|
|
||||||
_httpx_timeout = httpx.Timeout(timeout)
|
|
||||||
_params["timeout"] = _httpx_timeout
|
|
||||||
else:
|
|
||||||
_params["timeout"] = httpx.Timeout(timeout=600.0, connect=5.0)
|
|
||||||
|
|
||||||
sync_handler: HTTPHandler = HTTPHandler(**_params) # type: ignore
|
|
||||||
else:
|
|
||||||
sync_handler = client # type: ignore
|
|
||||||
|
|
||||||
url = f"https://{vertex_location}-aiplatform.googleapis.com/v1/projects/{vertex_project}/locations/{vertex_location}/publishers/google/models/{model}:predict"
|
|
||||||
|
|
||||||
auth_header, _ = self._ensure_access_token(
|
|
||||||
credentials=vertex_credentials, project_id=vertex_project
|
|
||||||
)
|
|
||||||
optional_params = optional_params or {
|
|
||||||
"sampleCount": 1
|
|
||||||
} # default optional params
|
|
||||||
|
|
||||||
request_data = {
|
|
||||||
"instances": [{"prompt": prompt}],
|
|
||||||
"parameters": optional_params,
|
|
||||||
}
|
|
||||||
|
|
||||||
request_str = f"\n curl -X POST \\\n -H \"Authorization: Bearer {auth_header[:10] + 'XXXXXXXXXX'}\" \\\n -H \"Content-Type: application/json; charset=utf-8\" \\\n -d {request_data} \\\n \"{url}\""
|
|
||||||
logging_obj.pre_call(
|
|
||||||
input=prompt,
|
|
||||||
api_key=None,
|
|
||||||
additional_args={
|
|
||||||
"complete_input_dict": optional_params,
|
|
||||||
"request_str": request_str,
|
|
||||||
},
|
|
||||||
)
|
|
||||||
|
|
||||||
logging_obj.pre_call(
|
|
||||||
input=prompt,
|
|
||||||
api_key=None,
|
|
||||||
additional_args={
|
|
||||||
"complete_input_dict": optional_params,
|
|
||||||
"request_str": request_str,
|
|
||||||
},
|
|
||||||
)
|
|
||||||
|
|
||||||
response = sync_handler.post(
|
|
||||||
url=url,
|
|
||||||
headers={
|
|
||||||
"Content-Type": "application/json; charset=utf-8",
|
|
||||||
"Authorization": f"Bearer {auth_header}",
|
|
||||||
},
|
|
||||||
data=json.dumps(request_data),
|
|
||||||
)
|
|
||||||
|
|
||||||
if response.status_code != 200:
|
|
||||||
raise Exception(f"Error: {response.status_code} {response.text}")
|
|
||||||
"""
|
|
||||||
Vertex AI Image generation response example:
|
|
||||||
{
|
|
||||||
"predictions": [
|
|
||||||
{
|
|
||||||
"bytesBase64Encoded": "BASE64_IMG_BYTES",
|
|
||||||
"mimeType": "image/png"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"mimeType": "image/png",
|
|
||||||
"bytesBase64Encoded": "BASE64_IMG_BYTES"
|
|
||||||
}
|
|
||||||
]
|
|
||||||
}
|
|
||||||
"""
|
|
||||||
|
|
||||||
_json_response = response.json()
|
|
||||||
if "predictions" not in _json_response:
|
|
||||||
raise litellm.InternalServerError(
|
|
||||||
message=f"image generation response does not contain 'predictions', got {_json_response}",
|
|
||||||
llm_provider="vertex_ai",
|
|
||||||
model=model,
|
|
||||||
)
|
|
||||||
_predictions = _json_response["predictions"]
|
|
||||||
|
|
||||||
_response_data: List[Image] = []
|
|
||||||
for _prediction in _predictions:
|
|
||||||
_bytes_base64_encoded = _prediction["bytesBase64Encoded"]
|
|
||||||
image_object = Image(b64_json=_bytes_base64_encoded)
|
|
||||||
_response_data.append(image_object)
|
|
||||||
|
|
||||||
model_response.data = _response_data
|
|
||||||
|
|
||||||
return model_response
|
|
||||||
|
|
||||||
async def aimage_generation(
|
|
||||||
self,
|
|
||||||
prompt: str,
|
|
||||||
vertex_project: Optional[str],
|
|
||||||
vertex_location: Optional[str],
|
|
||||||
vertex_credentials: Optional[str],
|
|
||||||
model_response: litellm.ImageResponse,
|
|
||||||
model: Optional[
|
|
||||||
str
|
|
||||||
] = "imagegeneration", # vertex ai uses imagegeneration as the default model
|
|
||||||
client: Optional[AsyncHTTPHandler] = None,
|
|
||||||
optional_params: Optional[dict] = None,
|
|
||||||
timeout: Optional[int] = None,
|
|
||||||
logging_obj=None,
|
|
||||||
):
|
|
||||||
response = None
|
|
||||||
if client is None:
|
|
||||||
_params = {}
|
|
||||||
if timeout is not None:
|
|
||||||
if isinstance(timeout, float) or isinstance(timeout, int):
|
|
||||||
_httpx_timeout = httpx.Timeout(timeout)
|
|
||||||
_params["timeout"] = _httpx_timeout
|
|
||||||
else:
|
|
||||||
_params["timeout"] = httpx.Timeout(timeout=600.0, connect=5.0)
|
|
||||||
|
|
||||||
self.async_handler = AsyncHTTPHandler(**_params) # type: ignore
|
|
||||||
else:
|
|
||||||
self.async_handler = client # type: ignore
|
|
||||||
|
|
||||||
# make POST request to
|
|
||||||
# https://us-central1-aiplatform.googleapis.com/v1/projects/PROJECT_ID/locations/us-central1/publishers/google/models/imagegeneration:predict
|
|
||||||
url = f"https://{vertex_location}-aiplatform.googleapis.com/v1/projects/{vertex_project}/locations/{vertex_location}/publishers/google/models/{model}:predict"
|
|
||||||
|
|
||||||
"""
|
|
||||||
Docs link: https://console.cloud.google.com/vertex-ai/publishers/google/model-garden/imagegeneration?project=adroit-crow-413218
|
|
||||||
curl -X POST \
|
|
||||||
-H "Authorization: Bearer $(gcloud auth print-access-token)" \
|
|
||||||
-H "Content-Type: application/json; charset=utf-8" \
|
|
||||||
-d {
|
|
||||||
"instances": [
|
|
||||||
{
|
|
||||||
"prompt": "a cat"
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"parameters": {
|
|
||||||
"sampleCount": 1
|
|
||||||
}
|
|
||||||
} \
|
|
||||||
"https://us-central1-aiplatform.googleapis.com/v1/projects/PROJECT_ID/locations/us-central1/publishers/google/models/imagegeneration:predict"
|
|
||||||
"""
|
|
||||||
auth_header, _ = self._ensure_access_token(
|
|
||||||
credentials=vertex_credentials, project_id=vertex_project
|
|
||||||
)
|
|
||||||
optional_params = optional_params or {
|
|
||||||
"sampleCount": 1
|
|
||||||
} # default optional params
|
|
||||||
|
|
||||||
request_data = {
|
|
||||||
"instances": [{"prompt": prompt}],
|
|
||||||
"parameters": optional_params,
|
|
||||||
}
|
|
||||||
|
|
||||||
request_str = f"\n curl -X POST \\\n -H \"Authorization: Bearer {auth_header[:10] + 'XXXXXXXXXX'}\" \\\n -H \"Content-Type: application/json; charset=utf-8\" \\\n -d {request_data} \\\n \"{url}\""
|
|
||||||
logging_obj.pre_call(
|
|
||||||
input=prompt,
|
|
||||||
api_key=None,
|
|
||||||
additional_args={
|
|
||||||
"complete_input_dict": optional_params,
|
|
||||||
"request_str": request_str,
|
|
||||||
},
|
|
||||||
)
|
|
||||||
|
|
||||||
response = await self.async_handler.post(
|
|
||||||
url=url,
|
|
||||||
headers={
|
|
||||||
"Content-Type": "application/json; charset=utf-8",
|
|
||||||
"Authorization": f"Bearer {auth_header}",
|
|
||||||
},
|
|
||||||
data=json.dumps(request_data),
|
|
||||||
)
|
|
||||||
|
|
||||||
if response.status_code != 200:
|
|
||||||
raise Exception(f"Error: {response.status_code} {response.text}")
|
|
||||||
"""
|
|
||||||
Vertex AI Image generation response example:
|
|
||||||
{
|
|
||||||
"predictions": [
|
|
||||||
{
|
|
||||||
"bytesBase64Encoded": "BASE64_IMG_BYTES",
|
|
||||||
"mimeType": "image/png"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"mimeType": "image/png",
|
|
||||||
"bytesBase64Encoded": "BASE64_IMG_BYTES"
|
|
||||||
}
|
|
||||||
]
|
|
||||||
}
|
|
||||||
"""
|
|
||||||
|
|
||||||
_json_response = response.json()
|
|
||||||
|
|
||||||
if "predictions" not in _json_response:
|
|
||||||
raise litellm.InternalServerError(
|
|
||||||
message=f"image generation response does not contain 'predictions', got {_json_response}",
|
|
||||||
llm_provider="vertex_ai",
|
|
||||||
model=model,
|
|
||||||
)
|
|
||||||
|
|
||||||
_predictions = _json_response["predictions"]
|
|
||||||
|
|
||||||
_response_data: List[Image] = []
|
|
||||||
for _prediction in _predictions:
|
|
||||||
_bytes_base64_encoded = _prediction["bytesBase64Encoded"]
|
|
||||||
image_object = Image(b64_json=_bytes_base64_encoded)
|
|
||||||
_response_data.append(image_object)
|
|
||||||
|
|
||||||
model_response.data = _response_data
|
|
||||||
|
|
||||||
return model_response
|
|
||||||
|
|
||||||
|
|
||||||
class ModelResponseIterator:
|
class ModelResponseIterator:
|
||||||
def __init__(self, streaming_response, sync_stream: bool):
|
def __init__(self, streaming_response, sync_stream: bool):
|
||||||
|
|
|
@ -0,0 +1,225 @@
|
||||||
|
import json
|
||||||
|
from typing import Any, Dict, List, Optional
|
||||||
|
|
||||||
|
import httpx
|
||||||
|
from openai.types.image import Image
|
||||||
|
|
||||||
|
import litellm
|
||||||
|
from litellm.llms.custom_httpx.http_handler import AsyncHTTPHandler, HTTPHandler
|
||||||
|
from litellm.llms.vertex_ai_and_google_ai_studio.gemini.vertex_and_google_ai_studio_gemini import (
|
||||||
|
VertexLLM,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class VertexImageGeneration(VertexLLM):
|
||||||
|
def process_image_generation_response(
|
||||||
|
self,
|
||||||
|
json_response: Dict[str, Any],
|
||||||
|
model_response: litellm.ImageResponse,
|
||||||
|
model: str,
|
||||||
|
) -> litellm.ImageResponse:
|
||||||
|
if "predictions" not in json_response:
|
||||||
|
raise litellm.InternalServerError(
|
||||||
|
message=f"image generation response does not contain 'predictions', got {json_response}",
|
||||||
|
llm_provider="vertex_ai",
|
||||||
|
model=model,
|
||||||
|
)
|
||||||
|
|
||||||
|
predictions = json_response["predictions"]
|
||||||
|
response_data: List[Image] = []
|
||||||
|
|
||||||
|
for prediction in predictions:
|
||||||
|
bytes_base64_encoded = prediction["bytesBase64Encoded"]
|
||||||
|
image_object = Image(b64_json=bytes_base64_encoded)
|
||||||
|
response_data.append(image_object)
|
||||||
|
|
||||||
|
model_response.data = response_data
|
||||||
|
return model_response
|
||||||
|
|
||||||
|
def image_generation(
|
||||||
|
self,
|
||||||
|
prompt: str,
|
||||||
|
vertex_project: Optional[str],
|
||||||
|
vertex_location: Optional[str],
|
||||||
|
vertex_credentials: Optional[str],
|
||||||
|
model_response: litellm.ImageResponse,
|
||||||
|
model: Optional[
|
||||||
|
str
|
||||||
|
] = "imagegeneration", # vertex ai uses imagegeneration as the default model
|
||||||
|
client: Optional[Any] = None,
|
||||||
|
optional_params: Optional[dict] = None,
|
||||||
|
timeout: Optional[int] = None,
|
||||||
|
logging_obj=None,
|
||||||
|
aimg_generation=False,
|
||||||
|
):
|
||||||
|
if aimg_generation is True:
|
||||||
|
return self.aimage_generation(
|
||||||
|
prompt=prompt,
|
||||||
|
vertex_project=vertex_project,
|
||||||
|
vertex_location=vertex_location,
|
||||||
|
vertex_credentials=vertex_credentials,
|
||||||
|
model=model,
|
||||||
|
client=client,
|
||||||
|
optional_params=optional_params,
|
||||||
|
timeout=timeout,
|
||||||
|
logging_obj=logging_obj,
|
||||||
|
model_response=model_response,
|
||||||
|
)
|
||||||
|
|
||||||
|
if client is None:
|
||||||
|
_params = {}
|
||||||
|
if timeout is not None:
|
||||||
|
if isinstance(timeout, float) or isinstance(timeout, int):
|
||||||
|
_httpx_timeout = httpx.Timeout(timeout)
|
||||||
|
_params["timeout"] = _httpx_timeout
|
||||||
|
else:
|
||||||
|
_params["timeout"] = httpx.Timeout(timeout=600.0, connect=5.0)
|
||||||
|
|
||||||
|
sync_handler: HTTPHandler = HTTPHandler(**_params) # type: ignore
|
||||||
|
else:
|
||||||
|
sync_handler = client # type: ignore
|
||||||
|
|
||||||
|
url = f"https://{vertex_location}-aiplatform.googleapis.com/v1/projects/{vertex_project}/locations/{vertex_location}/publishers/google/models/{model}:predict"
|
||||||
|
|
||||||
|
auth_header, _ = self._ensure_access_token(
|
||||||
|
credentials=vertex_credentials, project_id=vertex_project
|
||||||
|
)
|
||||||
|
optional_params = optional_params or {
|
||||||
|
"sampleCount": 1
|
||||||
|
} # default optional params
|
||||||
|
|
||||||
|
request_data = {
|
||||||
|
"instances": [{"prompt": prompt}],
|
||||||
|
"parameters": optional_params,
|
||||||
|
}
|
||||||
|
|
||||||
|
request_str = f"\n curl -X POST \\\n -H \"Authorization: Bearer {auth_header[:10] + 'XXXXXXXXXX'}\" \\\n -H \"Content-Type: application/json; charset=utf-8\" \\\n -d {request_data} \\\n \"{url}\""
|
||||||
|
logging_obj.pre_call(
|
||||||
|
input=prompt,
|
||||||
|
api_key=None,
|
||||||
|
additional_args={
|
||||||
|
"complete_input_dict": optional_params,
|
||||||
|
"request_str": request_str,
|
||||||
|
},
|
||||||
|
)
|
||||||
|
|
||||||
|
logging_obj.pre_call(
|
||||||
|
input=prompt,
|
||||||
|
api_key=None,
|
||||||
|
additional_args={
|
||||||
|
"complete_input_dict": optional_params,
|
||||||
|
"request_str": request_str,
|
||||||
|
},
|
||||||
|
)
|
||||||
|
|
||||||
|
response = sync_handler.post(
|
||||||
|
url=url,
|
||||||
|
headers={
|
||||||
|
"Content-Type": "application/json; charset=utf-8",
|
||||||
|
"Authorization": f"Bearer {auth_header}",
|
||||||
|
},
|
||||||
|
data=json.dumps(request_data),
|
||||||
|
)
|
||||||
|
|
||||||
|
if response.status_code != 200:
|
||||||
|
raise Exception(f"Error: {response.status_code} {response.text}")
|
||||||
|
|
||||||
|
json_response = response.json()
|
||||||
|
return self.process_image_generation_response(
|
||||||
|
json_response, model_response, model
|
||||||
|
)
|
||||||
|
|
||||||
|
async def aimage_generation(
|
||||||
|
self,
|
||||||
|
prompt: str,
|
||||||
|
vertex_project: Optional[str],
|
||||||
|
vertex_location: Optional[str],
|
||||||
|
vertex_credentials: Optional[str],
|
||||||
|
model_response: litellm.ImageResponse,
|
||||||
|
model: Optional[
|
||||||
|
str
|
||||||
|
] = "imagegeneration", # vertex ai uses imagegeneration as the default model
|
||||||
|
client: Optional[AsyncHTTPHandler] = None,
|
||||||
|
optional_params: Optional[dict] = None,
|
||||||
|
timeout: Optional[int] = None,
|
||||||
|
logging_obj=None,
|
||||||
|
):
|
||||||
|
response = None
|
||||||
|
if client is None:
|
||||||
|
_params = {}
|
||||||
|
if timeout is not None:
|
||||||
|
if isinstance(timeout, float) or isinstance(timeout, int):
|
||||||
|
_httpx_timeout = httpx.Timeout(timeout)
|
||||||
|
_params["timeout"] = _httpx_timeout
|
||||||
|
else:
|
||||||
|
_params["timeout"] = httpx.Timeout(timeout=600.0, connect=5.0)
|
||||||
|
|
||||||
|
self.async_handler = AsyncHTTPHandler(**_params) # type: ignore
|
||||||
|
else:
|
||||||
|
self.async_handler = client # type: ignore
|
||||||
|
|
||||||
|
# make POST request to
|
||||||
|
# https://us-central1-aiplatform.googleapis.com/v1/projects/PROJECT_ID/locations/us-central1/publishers/google/models/imagegeneration:predict
|
||||||
|
url = f"https://{vertex_location}-aiplatform.googleapis.com/v1/projects/{vertex_project}/locations/{vertex_location}/publishers/google/models/{model}:predict"
|
||||||
|
|
||||||
|
"""
|
||||||
|
Docs link: https://console.cloud.google.com/vertex-ai/publishers/google/model-garden/imagegeneration?project=adroit-crow-413218
|
||||||
|
curl -X POST \
|
||||||
|
-H "Authorization: Bearer $(gcloud auth print-access-token)" \
|
||||||
|
-H "Content-Type: application/json; charset=utf-8" \
|
||||||
|
-d {
|
||||||
|
"instances": [
|
||||||
|
{
|
||||||
|
"prompt": "a cat"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"parameters": {
|
||||||
|
"sampleCount": 1
|
||||||
|
}
|
||||||
|
} \
|
||||||
|
"https://us-central1-aiplatform.googleapis.com/v1/projects/PROJECT_ID/locations/us-central1/publishers/google/models/imagegeneration:predict"
|
||||||
|
"""
|
||||||
|
auth_header, _ = self._ensure_access_token(
|
||||||
|
credentials=vertex_credentials, project_id=vertex_project
|
||||||
|
)
|
||||||
|
optional_params = optional_params or {
|
||||||
|
"sampleCount": 1
|
||||||
|
} # default optional params
|
||||||
|
|
||||||
|
request_data = {
|
||||||
|
"instances": [{"prompt": prompt}],
|
||||||
|
"parameters": optional_params,
|
||||||
|
}
|
||||||
|
|
||||||
|
request_str = f"\n curl -X POST \\\n -H \"Authorization: Bearer {auth_header[:10] + 'XXXXXXXXXX'}\" \\\n -H \"Content-Type: application/json; charset=utf-8\" \\\n -d {request_data} \\\n \"{url}\""
|
||||||
|
logging_obj.pre_call(
|
||||||
|
input=prompt,
|
||||||
|
api_key=None,
|
||||||
|
additional_args={
|
||||||
|
"complete_input_dict": optional_params,
|
||||||
|
"request_str": request_str,
|
||||||
|
},
|
||||||
|
)
|
||||||
|
|
||||||
|
response = await self.async_handler.post(
|
||||||
|
url=url,
|
||||||
|
headers={
|
||||||
|
"Content-Type": "application/json; charset=utf-8",
|
||||||
|
"Authorization": f"Bearer {auth_header}",
|
||||||
|
},
|
||||||
|
data=json.dumps(request_data),
|
||||||
|
)
|
||||||
|
|
||||||
|
if response.status_code != 200:
|
||||||
|
raise Exception(f"Error: {response.status_code} {response.text}")
|
||||||
|
|
||||||
|
json_response = response.json()
|
||||||
|
return self.process_image_generation_response(
|
||||||
|
json_response, model_response, model
|
||||||
|
)
|
||||||
|
|
||||||
|
def is_image_generation_response(self, json_response: Dict[str, Any]) -> bool:
|
||||||
|
if "predictions" in json_response:
|
||||||
|
if "bytesBase64Encoded" in json_response["predictions"][0]:
|
||||||
|
return True
|
||||||
|
return False
|
|
@ -844,7 +844,7 @@ async def _PROXY_track_cost_callback(
|
||||||
kwargs["stream"] == True and "complete_streaming_response" in kwargs
|
kwargs["stream"] == True and "complete_streaming_response" in kwargs
|
||||||
):
|
):
|
||||||
raise Exception(
|
raise Exception(
|
||||||
f"Model not in litellm model cost map. Add custom pricing - https://docs.litellm.ai/docs/proxy/custom_pricing"
|
f"Model not in litellm model cost map. Passed model = {kwargs.get('model')} - Add custom pricing - https://docs.litellm.ai/docs/proxy/custom_pricing"
|
||||||
)
|
)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
error_msg = f"error in tracking cost callback - {traceback.format_exc()}"
|
error_msg = f"error in tracking cost callback - {traceback.format_exc()}"
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue