mirror of
https://github.com/BerriAI/litellm.git
synced 2025-04-25 18:54:30 +00:00
refactor: add black formatting
This commit is contained in:
parent
b87d630b0a
commit
4905929de3
156 changed files with 19723 additions and 10869 deletions
|
@ -8,17 +8,21 @@ from litellm.utils import ModelResponse, Usage, CustomStreamWrapper
|
|||
import litellm
|
||||
import httpx
|
||||
|
||||
|
||||
class VertexAIError(Exception):
|
||||
def __init__(self, status_code, message):
|
||||
self.status_code = status_code
|
||||
self.message = message
|
||||
self.request = httpx.Request(method="POST", url=" https://cloud.google.com/vertex-ai/")
|
||||
self.request = httpx.Request(
|
||||
method="POST", url=" https://cloud.google.com/vertex-ai/"
|
||||
)
|
||||
self.response = httpx.Response(status_code=status_code, request=self.request)
|
||||
super().__init__(
|
||||
self.message
|
||||
) # Call the base class constructor with the parameters it needs
|
||||
|
||||
class VertexAIConfig():
|
||||
|
||||
class VertexAIConfig:
|
||||
"""
|
||||
Reference: https://cloud.google.com/vertex-ai/docs/generative-ai/chat/test-chat-prompts
|
||||
|
||||
|
@ -34,28 +38,42 @@ class VertexAIConfig():
|
|||
|
||||
Note: Please make sure to modify the default parameters as required for your use case.
|
||||
"""
|
||||
temperature: Optional[float]=None
|
||||
max_output_tokens: Optional[int]=None
|
||||
top_p: Optional[float]=None
|
||||
top_k: Optional[int]=None
|
||||
|
||||
def __init__(self,
|
||||
temperature: Optional[float]=None,
|
||||
max_output_tokens: Optional[int]=None,
|
||||
top_p: Optional[float]=None,
|
||||
top_k: Optional[int]=None) -> None:
|
||||
|
||||
temperature: Optional[float] = None
|
||||
max_output_tokens: Optional[int] = None
|
||||
top_p: Optional[float] = None
|
||||
top_k: Optional[int] = None
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
temperature: Optional[float] = None,
|
||||
max_output_tokens: Optional[int] = None,
|
||||
top_p: Optional[float] = None,
|
||||
top_k: Optional[int] = None,
|
||||
) -> None:
|
||||
locals_ = locals()
|
||||
for key, value in locals_.items():
|
||||
if key != 'self' and value is not None:
|
||||
if key != "self" and value is not None:
|
||||
setattr(self.__class__, key, value)
|
||||
|
||||
|
||||
@classmethod
|
||||
def get_config(cls):
|
||||
return {k: v for k, v in cls.__dict__.items()
|
||||
if not k.startswith('__')
|
||||
and not isinstance(v, (types.FunctionType, types.BuiltinFunctionType, classmethod, staticmethod))
|
||||
and v is not None}
|
||||
return {
|
||||
k: v
|
||||
for k, v in cls.__dict__.items()
|
||||
if not k.startswith("__")
|
||||
and not isinstance(
|
||||
v,
|
||||
(
|
||||
types.FunctionType,
|
||||
types.BuiltinFunctionType,
|
||||
classmethod,
|
||||
staticmethod,
|
||||
),
|
||||
)
|
||||
and v is not None
|
||||
}
|
||||
|
||||
|
||||
def _get_image_bytes_from_url(image_url: str) -> bytes:
|
||||
try:
|
||||
|
@ -65,7 +83,7 @@ def _get_image_bytes_from_url(image_url: str) -> bytes:
|
|||
return image_bytes
|
||||
except requests.exceptions.RequestException as e:
|
||||
# Handle any request exceptions (e.g., connection error, timeout)
|
||||
return b'' # Return an empty bytes object or handle the error as needed
|
||||
return b"" # Return an empty bytes object or handle the error as needed
|
||||
|
||||
|
||||
def _load_image_from_url(image_url: str):
|
||||
|
@ -78,13 +96,18 @@ def _load_image_from_url(image_url: str):
|
|||
Returns:
|
||||
Image: The loaded image.
|
||||
"""
|
||||
from vertexai.preview.generative_models import GenerativeModel, Part, GenerationConfig, Image
|
||||
from vertexai.preview.generative_models import (
|
||||
GenerativeModel,
|
||||
Part,
|
||||
GenerationConfig,
|
||||
Image,
|
||||
)
|
||||
|
||||
image_bytes = _get_image_bytes_from_url(image_url)
|
||||
return Image.from_bytes(image_bytes)
|
||||
|
||||
def _gemini_vision_convert_messages(
|
||||
messages: list
|
||||
):
|
||||
|
||||
def _gemini_vision_convert_messages(messages: list):
|
||||
"""
|
||||
Converts given messages for GPT-4 Vision to Gemini format.
|
||||
|
||||
|
@ -95,7 +118,7 @@ def _gemini_vision_convert_messages(
|
|||
|
||||
Returns:
|
||||
tuple: A tuple containing the prompt (a string) and the processed images (a list of objects representing the images).
|
||||
|
||||
|
||||
Raises:
|
||||
VertexAIError: If the import of the 'vertexai' module fails, indicating that 'google-cloud-aiplatform' needs to be installed.
|
||||
Exception: If any other exception occurs during the execution of the function.
|
||||
|
@ -115,11 +138,23 @@ def _gemini_vision_convert_messages(
|
|||
try:
|
||||
import vertexai
|
||||
except:
|
||||
raise VertexAIError(status_code=400,message="vertexai import failed please run `pip install google-cloud-aiplatform`")
|
||||
try:
|
||||
from vertexai.preview.language_models import ChatModel, CodeChatModel, InputOutputTextPair
|
||||
raise VertexAIError(
|
||||
status_code=400,
|
||||
message="vertexai import failed please run `pip install google-cloud-aiplatform`",
|
||||
)
|
||||
try:
|
||||
from vertexai.preview.language_models import (
|
||||
ChatModel,
|
||||
CodeChatModel,
|
||||
InputOutputTextPair,
|
||||
)
|
||||
from vertexai.language_models import TextGenerationModel, CodeGenerationModel
|
||||
from vertexai.preview.generative_models import GenerativeModel, Part, GenerationConfig, Image
|
||||
from vertexai.preview.generative_models import (
|
||||
GenerativeModel,
|
||||
Part,
|
||||
GenerationConfig,
|
||||
Image,
|
||||
)
|
||||
|
||||
# given messages for gpt-4 vision, convert them for gemini
|
||||
# https://github.com/GoogleCloudPlatform/generative-ai/blob/main/gemini/getting-started/intro_gemini_python.ipynb
|
||||
|
@ -159,6 +194,7 @@ def _gemini_vision_convert_messages(
|
|||
except Exception as e:
|
||||
raise e
|
||||
|
||||
|
||||
def completion(
|
||||
model: str,
|
||||
messages: list,
|
||||
|
@ -171,30 +207,38 @@ def completion(
|
|||
optional_params=None,
|
||||
litellm_params=None,
|
||||
logger_fn=None,
|
||||
acompletion: bool=False
|
||||
acompletion: bool = False,
|
||||
):
|
||||
try:
|
||||
import vertexai
|
||||
except:
|
||||
raise VertexAIError(status_code=400,message="vertexai import failed please run `pip install google-cloud-aiplatform`")
|
||||
try:
|
||||
from vertexai.preview.language_models import ChatModel, CodeChatModel, InputOutputTextPair
|
||||
raise VertexAIError(
|
||||
status_code=400,
|
||||
message="vertexai import failed please run `pip install google-cloud-aiplatform`",
|
||||
)
|
||||
try:
|
||||
from vertexai.preview.language_models import (
|
||||
ChatModel,
|
||||
CodeChatModel,
|
||||
InputOutputTextPair,
|
||||
)
|
||||
from vertexai.language_models import TextGenerationModel, CodeGenerationModel
|
||||
from vertexai.preview.generative_models import GenerativeModel, Part, GenerationConfig
|
||||
from vertexai.preview.generative_models import (
|
||||
GenerativeModel,
|
||||
Part,
|
||||
GenerationConfig,
|
||||
)
|
||||
from google.cloud.aiplatform_v1beta1.types import content as gapic_content_types
|
||||
|
||||
|
||||
vertexai.init(
|
||||
project=vertex_project, location=vertex_location
|
||||
)
|
||||
vertexai.init(project=vertex_project, location=vertex_location)
|
||||
|
||||
## Load Config
|
||||
config = litellm.VertexAIConfig.get_config()
|
||||
for k, v in config.items():
|
||||
if k not in optional_params:
|
||||
for k, v in config.items():
|
||||
if k not in optional_params:
|
||||
optional_params[k] = v
|
||||
|
||||
## Process safety settings into format expected by vertex AI
|
||||
## Process safety settings into format expected by vertex AI
|
||||
safety_settings = None
|
||||
if "safety_settings" in optional_params:
|
||||
safety_settings = optional_params.pop("safety_settings")
|
||||
|
@ -202,17 +246,25 @@ def completion(
|
|||
raise ValueError("safety_settings must be a list")
|
||||
if len(safety_settings) > 0 and not isinstance(safety_settings[0], dict):
|
||||
raise ValueError("safety_settings must be a list of dicts")
|
||||
safety_settings=[gapic_content_types.SafetySetting(x) for x in safety_settings]
|
||||
safety_settings = [
|
||||
gapic_content_types.SafetySetting(x) for x in safety_settings
|
||||
]
|
||||
|
||||
# vertexai does not use an API key, it looks for credentials.json in the environment
|
||||
|
||||
prompt = " ".join([message["content"] for message in messages if isinstance(message["content"], str)])
|
||||
prompt = " ".join(
|
||||
[
|
||||
message["content"]
|
||||
for message in messages
|
||||
if isinstance(message["content"], str)
|
||||
]
|
||||
)
|
||||
|
||||
mode = ""
|
||||
mode = ""
|
||||
|
||||
request_str = ""
|
||||
response_obj = None
|
||||
if model in litellm.vertex_language_models:
|
||||
if model in litellm.vertex_language_models:
|
||||
llm_model = GenerativeModel(model)
|
||||
mode = ""
|
||||
request_str += f"llm_model = GenerativeModel({model})\n"
|
||||
|
@ -232,31 +284,76 @@ def completion(
|
|||
llm_model = CodeGenerationModel.from_pretrained(model)
|
||||
mode = "text"
|
||||
request_str += f"llm_model = CodeGenerationModel.from_pretrained({model})\n"
|
||||
else: # vertex_code_llm_models
|
||||
else: # vertex_code_llm_models
|
||||
llm_model = CodeChatModel.from_pretrained(model)
|
||||
mode = "chat"
|
||||
request_str += f"llm_model = CodeChatModel.from_pretrained({model})\n"
|
||||
|
||||
if acompletion == True: # [TODO] expand support to vertex ai chat + text models
|
||||
if optional_params.get("stream", False) is True:
|
||||
|
||||
if acompletion == True: # [TODO] expand support to vertex ai chat + text models
|
||||
if optional_params.get("stream", False) is True:
|
||||
# async streaming
|
||||
return async_streaming(llm_model=llm_model, mode=mode, prompt=prompt, logging_obj=logging_obj, request_str=request_str, model=model, model_response=model_response, messages=messages, print_verbose=print_verbose, **optional_params)
|
||||
return async_completion(llm_model=llm_model, mode=mode, prompt=prompt, logging_obj=logging_obj, request_str=request_str, model=model, model_response=model_response, encoding=encoding, messages=messages,print_verbose=print_verbose,**optional_params)
|
||||
return async_streaming(
|
||||
llm_model=llm_model,
|
||||
mode=mode,
|
||||
prompt=prompt,
|
||||
logging_obj=logging_obj,
|
||||
request_str=request_str,
|
||||
model=model,
|
||||
model_response=model_response,
|
||||
messages=messages,
|
||||
print_verbose=print_verbose,
|
||||
**optional_params,
|
||||
)
|
||||
return async_completion(
|
||||
llm_model=llm_model,
|
||||
mode=mode,
|
||||
prompt=prompt,
|
||||
logging_obj=logging_obj,
|
||||
request_str=request_str,
|
||||
model=model,
|
||||
model_response=model_response,
|
||||
encoding=encoding,
|
||||
messages=messages,
|
||||
print_verbose=print_verbose,
|
||||
**optional_params,
|
||||
)
|
||||
|
||||
if mode == "":
|
||||
|
||||
if "stream" in optional_params and optional_params["stream"] == True:
|
||||
stream = optional_params.pop("stream")
|
||||
request_str += f"llm_model.generate_content({prompt}, generation_config=GenerationConfig(**{optional_params}), safety_settings={safety_settings}, stream={stream})\n"
|
||||
## LOGGING
|
||||
logging_obj.pre_call(input=prompt, api_key=None, additional_args={"complete_input_dict": optional_params, "request_str": request_str})
|
||||
model_response = llm_model.generate_content(prompt, generation_config=GenerationConfig(**optional_params), safety_settings=safety_settings, stream=stream)
|
||||
logging_obj.pre_call(
|
||||
input=prompt,
|
||||
api_key=None,
|
||||
additional_args={
|
||||
"complete_input_dict": optional_params,
|
||||
"request_str": request_str,
|
||||
},
|
||||
)
|
||||
model_response = llm_model.generate_content(
|
||||
prompt,
|
||||
generation_config=GenerationConfig(**optional_params),
|
||||
safety_settings=safety_settings,
|
||||
stream=stream,
|
||||
)
|
||||
optional_params["stream"] = True
|
||||
return model_response
|
||||
request_str += f"llm_model.generate_content({prompt}, generation_config=GenerationConfig(**{optional_params}), safety_settings={safety_settings}).text\n"
|
||||
## LOGGING
|
||||
logging_obj.pre_call(input=prompt, api_key=None, additional_args={"complete_input_dict": optional_params, "request_str": request_str})
|
||||
response_obj = llm_model.generate_content(prompt, generation_config=GenerationConfig(**optional_params), safety_settings=safety_settings)
|
||||
logging_obj.pre_call(
|
||||
input=prompt,
|
||||
api_key=None,
|
||||
additional_args={
|
||||
"complete_input_dict": optional_params,
|
||||
"request_str": request_str,
|
||||
},
|
||||
)
|
||||
response_obj = llm_model.generate_content(
|
||||
prompt,
|
||||
generation_config=GenerationConfig(**optional_params),
|
||||
safety_settings=safety_settings,
|
||||
)
|
||||
completion_response = response_obj.text
|
||||
response_obj = response_obj._raw_response
|
||||
elif mode == "vision":
|
||||
|
@ -268,21 +365,35 @@ def completion(
|
|||
if "stream" in optional_params and optional_params["stream"] == True:
|
||||
stream = optional_params.pop("stream")
|
||||
request_str += f"response = llm_model.generate_content({content}, generation_config=GenerationConfig(**{optional_params}), safety_settings={safety_settings}, stream={stream})\n"
|
||||
logging_obj.pre_call(input=prompt, api_key=None, additional_args={"complete_input_dict": optional_params, "request_str": request_str})
|
||||
|
||||
logging_obj.pre_call(
|
||||
input=prompt,
|
||||
api_key=None,
|
||||
additional_args={
|
||||
"complete_input_dict": optional_params,
|
||||
"request_str": request_str,
|
||||
},
|
||||
)
|
||||
|
||||
model_response = llm_model.generate_content(
|
||||
contents=content,
|
||||
generation_config=GenerationConfig(**optional_params),
|
||||
safety_settings=safety_settings,
|
||||
stream=True
|
||||
stream=True,
|
||||
)
|
||||
optional_params["stream"] = True
|
||||
return model_response
|
||||
|
||||
request_str += f"response = llm_model.generate_content({content})\n"
|
||||
## LOGGING
|
||||
logging_obj.pre_call(input=prompt, api_key=None, additional_args={"complete_input_dict": optional_params, "request_str": request_str})
|
||||
|
||||
logging_obj.pre_call(
|
||||
input=prompt,
|
||||
api_key=None,
|
||||
additional_args={
|
||||
"complete_input_dict": optional_params,
|
||||
"request_str": request_str,
|
||||
},
|
||||
)
|
||||
|
||||
## LLM Call
|
||||
response = llm_model.generate_content(
|
||||
contents=content,
|
||||
|
@ -293,88 +404,150 @@ def completion(
|
|||
response_obj = response._raw_response
|
||||
elif mode == "chat":
|
||||
chat = llm_model.start_chat()
|
||||
request_str+= f"chat = llm_model.start_chat()\n"
|
||||
request_str += f"chat = llm_model.start_chat()\n"
|
||||
|
||||
if "stream" in optional_params and optional_params["stream"] == True:
|
||||
# NOTE: VertexAI does not accept stream=True as a param and raises an error,
|
||||
# we handle this by removing 'stream' from optional params and sending the request
|
||||
# after we get the response we add optional_params["stream"] = True, since main.py needs to know it's a streaming response to then transform it for the OpenAI format
|
||||
optional_params.pop("stream", None) # vertex ai raises an error when passing stream in optional params
|
||||
request_str += f"chat.send_message_streaming({prompt}, **{optional_params})\n"
|
||||
optional_params.pop(
|
||||
"stream", None
|
||||
) # vertex ai raises an error when passing stream in optional params
|
||||
request_str += (
|
||||
f"chat.send_message_streaming({prompt}, **{optional_params})\n"
|
||||
)
|
||||
## LOGGING
|
||||
logging_obj.pre_call(input=prompt, api_key=None, additional_args={"complete_input_dict": optional_params, "request_str": request_str})
|
||||
logging_obj.pre_call(
|
||||
input=prompt,
|
||||
api_key=None,
|
||||
additional_args={
|
||||
"complete_input_dict": optional_params,
|
||||
"request_str": request_str,
|
||||
},
|
||||
)
|
||||
model_response = chat.send_message_streaming(prompt, **optional_params)
|
||||
optional_params["stream"] = True
|
||||
return model_response
|
||||
|
||||
request_str += f"chat.send_message({prompt}, **{optional_params}).text\n"
|
||||
## LOGGING
|
||||
logging_obj.pre_call(input=prompt, api_key=None, additional_args={"complete_input_dict": optional_params, "request_str": request_str})
|
||||
logging_obj.pre_call(
|
||||
input=prompt,
|
||||
api_key=None,
|
||||
additional_args={
|
||||
"complete_input_dict": optional_params,
|
||||
"request_str": request_str,
|
||||
},
|
||||
)
|
||||
completion_response = chat.send_message(prompt, **optional_params).text
|
||||
elif mode == "text":
|
||||
if "stream" in optional_params and optional_params["stream"] == True:
|
||||
optional_params.pop("stream", None) # See note above on handling streaming for vertex ai
|
||||
request_str += f"llm_model.predict_streaming({prompt}, **{optional_params})\n"
|
||||
optional_params.pop(
|
||||
"stream", None
|
||||
) # See note above on handling streaming for vertex ai
|
||||
request_str += (
|
||||
f"llm_model.predict_streaming({prompt}, **{optional_params})\n"
|
||||
)
|
||||
## LOGGING
|
||||
logging_obj.pre_call(input=prompt, api_key=None, additional_args={"complete_input_dict": optional_params, "request_str": request_str})
|
||||
logging_obj.pre_call(
|
||||
input=prompt,
|
||||
api_key=None,
|
||||
additional_args={
|
||||
"complete_input_dict": optional_params,
|
||||
"request_str": request_str,
|
||||
},
|
||||
)
|
||||
model_response = llm_model.predict_streaming(prompt, **optional_params)
|
||||
optional_params["stream"] = True
|
||||
return model_response
|
||||
|
||||
request_str += f"llm_model.predict({prompt}, **{optional_params}).text\n"
|
||||
## LOGGING
|
||||
logging_obj.pre_call(input=prompt, api_key=None, additional_args={"complete_input_dict": optional_params, "request_str": request_str})
|
||||
logging_obj.pre_call(
|
||||
input=prompt,
|
||||
api_key=None,
|
||||
additional_args={
|
||||
"complete_input_dict": optional_params,
|
||||
"request_str": request_str,
|
||||
},
|
||||
)
|
||||
completion_response = llm_model.predict(prompt, **optional_params).text
|
||||
|
||||
|
||||
## LOGGING
|
||||
logging_obj.post_call(
|
||||
input=prompt, api_key=None, original_response=completion_response
|
||||
)
|
||||
|
||||
## RESPONSE OBJECT
|
||||
if len(str(completion_response)) > 0:
|
||||
model_response["choices"][0]["message"][
|
||||
"content"
|
||||
] = str(completion_response)
|
||||
if len(str(completion_response)) > 0:
|
||||
model_response["choices"][0]["message"]["content"] = str(
|
||||
completion_response
|
||||
)
|
||||
model_response["choices"][0]["message"]["content"] = str(completion_response)
|
||||
model_response["created"] = int(time.time())
|
||||
model_response["model"] = model
|
||||
## CALCULATING USAGE
|
||||
if model in litellm.vertex_language_models and response_obj is not None:
|
||||
model_response["choices"][0].finish_reason = response_obj.candidates[0].finish_reason.name
|
||||
usage = Usage(prompt_tokens=response_obj.usage_metadata.prompt_token_count,
|
||||
completion_tokens=response_obj.usage_metadata.candidates_token_count,
|
||||
total_tokens=response_obj.usage_metadata.total_token_count)
|
||||
else:
|
||||
prompt_tokens = len(
|
||||
encoding.encode(prompt)
|
||||
)
|
||||
model_response["choices"][0].finish_reason = response_obj.candidates[
|
||||
0
|
||||
].finish_reason.name
|
||||
usage = Usage(
|
||||
prompt_tokens=response_obj.usage_metadata.prompt_token_count,
|
||||
completion_tokens=response_obj.usage_metadata.candidates_token_count,
|
||||
total_tokens=response_obj.usage_metadata.total_token_count,
|
||||
)
|
||||
else:
|
||||
prompt_tokens = len(encoding.encode(prompt))
|
||||
completion_tokens = len(
|
||||
encoding.encode(model_response["choices"][0]["message"].get("content", ""))
|
||||
encoding.encode(
|
||||
model_response["choices"][0]["message"].get("content", "")
|
||||
)
|
||||
)
|
||||
usage = Usage(
|
||||
prompt_tokens=prompt_tokens,
|
||||
completion_tokens=completion_tokens,
|
||||
total_tokens=prompt_tokens + completion_tokens
|
||||
)
|
||||
prompt_tokens=prompt_tokens,
|
||||
completion_tokens=completion_tokens,
|
||||
total_tokens=prompt_tokens + completion_tokens,
|
||||
)
|
||||
model_response.usage = usage
|
||||
return model_response
|
||||
except Exception as e:
|
||||
except Exception as e:
|
||||
raise VertexAIError(status_code=500, message=str(e))
|
||||
|
||||
async def async_completion(llm_model, mode: str, prompt: str, model: str, model_response: ModelResponse, logging_obj=None, request_str=None, encoding=None, messages = None, print_verbose = None, **optional_params):
|
||||
|
||||
async def async_completion(
|
||||
llm_model,
|
||||
mode: str,
|
||||
prompt: str,
|
||||
model: str,
|
||||
model_response: ModelResponse,
|
||||
logging_obj=None,
|
||||
request_str=None,
|
||||
encoding=None,
|
||||
messages=None,
|
||||
print_verbose=None,
|
||||
**optional_params,
|
||||
):
|
||||
"""
|
||||
Add support for acompletion calls for gemini-pro
|
||||
"""
|
||||
try:
|
||||
try:
|
||||
from vertexai.preview.generative_models import GenerationConfig
|
||||
|
||||
if mode == "":
|
||||
# gemini-pro
|
||||
chat = llm_model.start_chat()
|
||||
## LOGGING
|
||||
logging_obj.pre_call(input=prompt, api_key=None, additional_args={"complete_input_dict": optional_params, "request_str": request_str})
|
||||
response_obj = await chat.send_message_async(prompt, generation_config=GenerationConfig(**optional_params))
|
||||
logging_obj.pre_call(
|
||||
input=prompt,
|
||||
api_key=None,
|
||||
additional_args={
|
||||
"complete_input_dict": optional_params,
|
||||
"request_str": request_str,
|
||||
},
|
||||
)
|
||||
response_obj = await chat.send_message_async(
|
||||
prompt, generation_config=GenerationConfig(**optional_params)
|
||||
)
|
||||
completion_response = response_obj.text
|
||||
response_obj = response_obj._raw_response
|
||||
elif mode == "vision":
|
||||
|
@ -386,12 +559,18 @@ async def async_completion(llm_model, mode: str, prompt: str, model: str, model_
|
|||
|
||||
request_str += f"response = llm_model.generate_content({content})\n"
|
||||
## LOGGING
|
||||
logging_obj.pre_call(input=prompt, api_key=None, additional_args={"complete_input_dict": optional_params, "request_str": request_str})
|
||||
|
||||
logging_obj.pre_call(
|
||||
input=prompt,
|
||||
api_key=None,
|
||||
additional_args={
|
||||
"complete_input_dict": optional_params,
|
||||
"request_str": request_str,
|
||||
},
|
||||
)
|
||||
|
||||
## LLM Call
|
||||
response = await llm_model._generate_content_async(
|
||||
contents=content,
|
||||
generation_config=GenerationConfig(**optional_params)
|
||||
contents=content, generation_config=GenerationConfig(**optional_params)
|
||||
)
|
||||
completion_response = response.text
|
||||
response_obj = response._raw_response
|
||||
|
@ -399,14 +578,28 @@ async def async_completion(llm_model, mode: str, prompt: str, model: str, model_
|
|||
# chat-bison etc.
|
||||
chat = llm_model.start_chat()
|
||||
## LOGGING
|
||||
logging_obj.pre_call(input=prompt, api_key=None, additional_args={"complete_input_dict": optional_params, "request_str": request_str})
|
||||
logging_obj.pre_call(
|
||||
input=prompt,
|
||||
api_key=None,
|
||||
additional_args={
|
||||
"complete_input_dict": optional_params,
|
||||
"request_str": request_str,
|
||||
},
|
||||
)
|
||||
response_obj = await chat.send_message_async(prompt, **optional_params)
|
||||
completion_response = response_obj.text
|
||||
elif mode == "text":
|
||||
# gecko etc.
|
||||
request_str += f"llm_model.predict({prompt}, **{optional_params}).text\n"
|
||||
## LOGGING
|
||||
logging_obj.pre_call(input=prompt, api_key=None, additional_args={"complete_input_dict": optional_params, "request_str": request_str})
|
||||
logging_obj.pre_call(
|
||||
input=prompt,
|
||||
api_key=None,
|
||||
additional_args={
|
||||
"complete_input_dict": optional_params,
|
||||
"request_str": request_str,
|
||||
},
|
||||
)
|
||||
response_obj = await llm_model.predict_async(prompt, **optional_params)
|
||||
completion_response = response_obj.text
|
||||
|
||||
|
@ -416,51 +609,77 @@ async def async_completion(llm_model, mode: str, prompt: str, model: str, model_
|
|||
)
|
||||
|
||||
## RESPONSE OBJECT
|
||||
if len(str(completion_response)) > 0:
|
||||
model_response["choices"][0]["message"][
|
||||
"content"
|
||||
] = str(completion_response)
|
||||
if len(str(completion_response)) > 0:
|
||||
model_response["choices"][0]["message"]["content"] = str(
|
||||
completion_response
|
||||
)
|
||||
model_response["choices"][0]["message"]["content"] = str(completion_response)
|
||||
model_response["created"] = int(time.time())
|
||||
model_response["model"] = model
|
||||
## CALCULATING USAGE
|
||||
if model in litellm.vertex_language_models and response_obj is not None:
|
||||
model_response["choices"][0].finish_reason = response_obj.candidates[0].finish_reason.name
|
||||
usage = Usage(prompt_tokens=response_obj.usage_metadata.prompt_token_count,
|
||||
completion_tokens=response_obj.usage_metadata.candidates_token_count,
|
||||
total_tokens=response_obj.usage_metadata.total_token_count)
|
||||
model_response["choices"][0].finish_reason = response_obj.candidates[
|
||||
0
|
||||
].finish_reason.name
|
||||
usage = Usage(
|
||||
prompt_tokens=response_obj.usage_metadata.prompt_token_count,
|
||||
completion_tokens=response_obj.usage_metadata.candidates_token_count,
|
||||
total_tokens=response_obj.usage_metadata.total_token_count,
|
||||
)
|
||||
else:
|
||||
prompt_tokens = len(
|
||||
encoding.encode(prompt)
|
||||
)
|
||||
prompt_tokens = len(encoding.encode(prompt))
|
||||
completion_tokens = len(
|
||||
encoding.encode(model_response["choices"][0]["message"].get("content", ""))
|
||||
encoding.encode(
|
||||
model_response["choices"][0]["message"].get("content", "")
|
||||
)
|
||||
)
|
||||
usage = Usage(
|
||||
prompt_tokens=prompt_tokens,
|
||||
completion_tokens=completion_tokens,
|
||||
total_tokens=prompt_tokens + completion_tokens
|
||||
)
|
||||
prompt_tokens=prompt_tokens,
|
||||
completion_tokens=completion_tokens,
|
||||
total_tokens=prompt_tokens + completion_tokens,
|
||||
)
|
||||
model_response.usage = usage
|
||||
return model_response
|
||||
except Exception as e:
|
||||
except Exception as e:
|
||||
raise VertexAIError(status_code=500, message=str(e))
|
||||
|
||||
async def async_streaming(llm_model, mode: str, prompt: str, model: str, model_response: ModelResponse, logging_obj=None, request_str=None, messages = None, print_verbose = None, **optional_params):
|
||||
|
||||
async def async_streaming(
|
||||
llm_model,
|
||||
mode: str,
|
||||
prompt: str,
|
||||
model: str,
|
||||
model_response: ModelResponse,
|
||||
logging_obj=None,
|
||||
request_str=None,
|
||||
messages=None,
|
||||
print_verbose=None,
|
||||
**optional_params,
|
||||
):
|
||||
"""
|
||||
Add support for async streaming calls for gemini-pro
|
||||
"""
|
||||
from vertexai.preview.generative_models import GenerationConfig
|
||||
if mode == "":
|
||||
|
||||
if mode == "":
|
||||
# gemini-pro
|
||||
chat = llm_model.start_chat()
|
||||
stream = optional_params.pop("stream")
|
||||
request_str += f"chat.send_message_async({prompt},generation_config=GenerationConfig(**{optional_params}), stream={stream})\n"
|
||||
## LOGGING
|
||||
logging_obj.pre_call(input=prompt, api_key=None, additional_args={"complete_input_dict": optional_params, "request_str": request_str})
|
||||
response = await chat.send_message_async(prompt, generation_config=GenerationConfig(**optional_params), stream=stream)
|
||||
logging_obj.pre_call(
|
||||
input=prompt,
|
||||
api_key=None,
|
||||
additional_args={
|
||||
"complete_input_dict": optional_params,
|
||||
"request_str": request_str,
|
||||
},
|
||||
)
|
||||
response = await chat.send_message_async(
|
||||
prompt, generation_config=GenerationConfig(**optional_params), stream=stream
|
||||
)
|
||||
optional_params["stream"] = True
|
||||
elif mode == "vision":
|
||||
elif mode == "vision":
|
||||
stream = optional_params.pop("stream")
|
||||
|
||||
print_verbose("\nMaking VertexAI Gemini Pro Vision Call")
|
||||
|
@ -470,33 +689,68 @@ async def async_streaming(llm_model, mode: str, prompt: str, model: str, model_r
|
|||
content = [prompt] + images
|
||||
stream = optional_params.pop("stream")
|
||||
request_str += f"response = llm_model.generate_content({content}, generation_config=GenerationConfig(**{optional_params}), stream={stream})\n"
|
||||
logging_obj.pre_call(input=prompt, api_key=None, additional_args={"complete_input_dict": optional_params, "request_str": request_str})
|
||||
|
||||
logging_obj.pre_call(
|
||||
input=prompt,
|
||||
api_key=None,
|
||||
additional_args={
|
||||
"complete_input_dict": optional_params,
|
||||
"request_str": request_str,
|
||||
},
|
||||
)
|
||||
|
||||
response = llm_model._generate_content_streaming_async(
|
||||
contents=content,
|
||||
generation_config=GenerationConfig(**optional_params),
|
||||
stream=True
|
||||
stream=True,
|
||||
)
|
||||
optional_params["stream"] = True
|
||||
elif mode == "chat":
|
||||
chat = llm_model.start_chat()
|
||||
optional_params.pop("stream", None) # vertex ai raises an error when passing stream in optional params
|
||||
request_str += f"chat.send_message_streaming_async({prompt}, **{optional_params})\n"
|
||||
optional_params.pop(
|
||||
"stream", None
|
||||
) # vertex ai raises an error when passing stream in optional params
|
||||
request_str += (
|
||||
f"chat.send_message_streaming_async({prompt}, **{optional_params})\n"
|
||||
)
|
||||
## LOGGING
|
||||
logging_obj.pre_call(input=prompt, api_key=None, additional_args={"complete_input_dict": optional_params, "request_str": request_str})
|
||||
logging_obj.pre_call(
|
||||
input=prompt,
|
||||
api_key=None,
|
||||
additional_args={
|
||||
"complete_input_dict": optional_params,
|
||||
"request_str": request_str,
|
||||
},
|
||||
)
|
||||
response = chat.send_message_streaming_async(prompt, **optional_params)
|
||||
optional_params["stream"] = True
|
||||
elif mode == "text":
|
||||
optional_params.pop("stream", None) # See note above on handling streaming for vertex ai
|
||||
request_str += f"llm_model.predict_streaming_async({prompt}, **{optional_params})\n"
|
||||
optional_params.pop(
|
||||
"stream", None
|
||||
) # See note above on handling streaming for vertex ai
|
||||
request_str += (
|
||||
f"llm_model.predict_streaming_async({prompt}, **{optional_params})\n"
|
||||
)
|
||||
## LOGGING
|
||||
logging_obj.pre_call(input=prompt, api_key=None, additional_args={"complete_input_dict": optional_params, "request_str": request_str})
|
||||
logging_obj.pre_call(
|
||||
input=prompt,
|
||||
api_key=None,
|
||||
additional_args={
|
||||
"complete_input_dict": optional_params,
|
||||
"request_str": request_str,
|
||||
},
|
||||
)
|
||||
response = llm_model.predict_streaming_async(prompt, **optional_params)
|
||||
|
||||
streamwrapper = CustomStreamWrapper(completion_stream=response, model=model, custom_llm_provider="vertex_ai",logging_obj=logging_obj)
|
||||
streamwrapper = CustomStreamWrapper(
|
||||
completion_stream=response,
|
||||
model=model,
|
||||
custom_llm_provider="vertex_ai",
|
||||
logging_obj=logging_obj,
|
||||
)
|
||||
async for transformed_chunk in streamwrapper:
|
||||
yield transformed_chunk
|
||||
|
||||
|
||||
def embedding():
|
||||
# logic for parsing in - calling - parsing out model embedding calls
|
||||
pass
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue