mirror of
https://github.com/BerriAI/litellm.git
synced 2025-04-27 11:43:54 +00:00
4274 lines
210 KiB
Text
4274 lines
210 KiB
Text
============================= test session starts ==============================
|
||
platform darwin -- Python 3.11.4, pytest-8.2.0, pluggy-1.5.0
|
||
rootdir: /Users/krrishdholakia/Documents/litellm
|
||
configfile: pyproject.toml
|
||
plugins: asyncio-0.23.6, mock-3.14.0, anyio-4.2.0
|
||
asyncio: mode=Mode.STRICT
|
||
collected 1 item
|
||
|
||
test_amazing_vertex_completion.py F [100%]
|
||
|
||
=================================== FAILURES ===================================
|
||
____________________________ test_gemini_pro_vision ____________________________
|
||
|
||
model = 'gemini-1.5-flash-preview-0514'
|
||
messages = [{'content': [{'text': 'Whats in this image?', 'type': 'text'}, {'image_url': {'url': 'gs://cloud-samples-data/generative-ai/image/boats.jpeg'}, 'type': 'image_url'}], 'role': 'user'}]
|
||
model_response = ModelResponse(id='chatcmpl-722df0e7-4e2d-44e6-9e2c-49823faa0189', choices=[Choices(finish_reason='stop', index=0, mess... role='assistant'))], created=1716145725, model=None, object='chat.completion', system_fingerprint=None, usage=Usage())
|
||
print_verbose = <function print_verbose at 0x10c6c20c0>
|
||
encoding = <Encoding 'cl100k_base'>
|
||
logging_obj = <litellm.utils.Logging object at 0x1059c53d0>
|
||
vertex_project = None, vertex_location = None, vertex_credentials = None
|
||
optional_params = {}
|
||
litellm_params = {'acompletion': False, 'api_base': '', 'api_key': None, 'completion_call_id': None, ...}
|
||
logger_fn = None, acompletion = False
|
||
|
||
def completion(
|
||
model: str,
|
||
messages: list,
|
||
model_response: ModelResponse,
|
||
print_verbose: Callable,
|
||
encoding,
|
||
logging_obj,
|
||
vertex_project=None,
|
||
vertex_location=None,
|
||
vertex_credentials=None,
|
||
optional_params=None,
|
||
litellm_params=None,
|
||
logger_fn=None,
|
||
acompletion: bool = False,
|
||
):
|
||
try:
|
||
import vertexai
|
||
except:
|
||
raise VertexAIError(
|
||
status_code=400,
|
||
message="vertexai import failed please run `pip install google-cloud-aiplatform`",
|
||
)
|
||
|
||
if not (
|
||
hasattr(vertexai, "preview") or hasattr(vertexai.preview, "language_models")
|
||
):
|
||
raise VertexAIError(
|
||
status_code=400,
|
||
message="""Upgrade vertex ai. Run `pip install "google-cloud-aiplatform>=1.38"`""",
|
||
)
|
||
try:
|
||
from vertexai.preview.language_models import (
|
||
ChatModel,
|
||
CodeChatModel,
|
||
InputOutputTextPair,
|
||
)
|
||
from vertexai.language_models import TextGenerationModel, CodeGenerationModel
|
||
from vertexai.preview.generative_models import (
|
||
GenerativeModel,
|
||
Part,
|
||
GenerationConfig,
|
||
)
|
||
from google.cloud import aiplatform # type: ignore
|
||
from google.protobuf import json_format # type: ignore
|
||
from google.protobuf.struct_pb2 import Value # type: ignore
|
||
from google.cloud.aiplatform_v1beta1.types import content as gapic_content_types # type: ignore
|
||
import google.auth # type: ignore
|
||
import proto # type: ignore
|
||
|
||
## Load credentials with the correct quota project ref: https://github.com/googleapis/python-aiplatform/issues/2557#issuecomment-1709284744
|
||
print_verbose(
|
||
f"VERTEX AI: vertex_project={vertex_project}; vertex_location={vertex_location}"
|
||
)
|
||
if vertex_credentials is not None and isinstance(vertex_credentials, str):
|
||
import google.oauth2.service_account
|
||
|
||
json_obj = json.loads(vertex_credentials)
|
||
|
||
creds = google.oauth2.service_account.Credentials.from_service_account_info(
|
||
json_obj,
|
||
scopes=["https://www.googleapis.com/auth/cloud-platform"],
|
||
)
|
||
else:
|
||
creds, _ = google.auth.default(quota_project_id=vertex_project)
|
||
print_verbose(
|
||
f"VERTEX AI: creds={creds}; google application credentials: {os.getenv('GOOGLE_APPLICATION_CREDENTIALS')}"
|
||
)
|
||
vertexai.init(
|
||
project=vertex_project, location=vertex_location, credentials=creds
|
||
)
|
||
|
||
## Load Config
|
||
config = litellm.VertexAIConfig.get_config()
|
||
for k, v in config.items():
|
||
if k not in optional_params:
|
||
optional_params[k] = v
|
||
|
||
## Process safety settings into format expected by vertex AI
|
||
safety_settings = None
|
||
if "safety_settings" in optional_params:
|
||
safety_settings = optional_params.pop("safety_settings")
|
||
if not isinstance(safety_settings, list):
|
||
raise ValueError("safety_settings must be a list")
|
||
if len(safety_settings) > 0 and not isinstance(safety_settings[0], dict):
|
||
raise ValueError("safety_settings must be a list of dicts")
|
||
safety_settings = [
|
||
gapic_content_types.SafetySetting(x) for x in safety_settings
|
||
]
|
||
|
||
# vertexai does not use an API key, it looks for credentials.json in the environment
|
||
|
||
prompt = " ".join(
|
||
[
|
||
message["content"]
|
||
for message in messages
|
||
if isinstance(message["content"], str)
|
||
]
|
||
)
|
||
|
||
mode = ""
|
||
|
||
request_str = ""
|
||
response_obj = None
|
||
async_client = None
|
||
instances = None
|
||
client_options = {
|
||
"api_endpoint": f"{vertex_location}-aiplatform.googleapis.com"
|
||
}
|
||
if (
|
||
model in litellm.vertex_language_models
|
||
or model in litellm.vertex_vision_models
|
||
):
|
||
llm_model = GenerativeModel(model)
|
||
mode = "vision"
|
||
request_str += f"llm_model = GenerativeModel({model})\n"
|
||
elif model in litellm.vertex_chat_models:
|
||
llm_model = ChatModel.from_pretrained(model)
|
||
mode = "chat"
|
||
request_str += f"llm_model = ChatModel.from_pretrained({model})\n"
|
||
elif model in litellm.vertex_text_models:
|
||
llm_model = TextGenerationModel.from_pretrained(model)
|
||
mode = "text"
|
||
request_str += f"llm_model = TextGenerationModel.from_pretrained({model})\n"
|
||
elif model in litellm.vertex_code_text_models:
|
||
llm_model = CodeGenerationModel.from_pretrained(model)
|
||
mode = "text"
|
||
request_str += f"llm_model = CodeGenerationModel.from_pretrained({model})\n"
|
||
elif model in litellm.vertex_code_chat_models: # vertex_code_llm_models
|
||
llm_model = CodeChatModel.from_pretrained(model)
|
||
mode = "chat"
|
||
request_str += f"llm_model = CodeChatModel.from_pretrained({model})\n"
|
||
elif model == "private":
|
||
mode = "private"
|
||
model = optional_params.pop("model_id", None)
|
||
# private endpoint requires a dict instead of JSON
|
||
instances = [optional_params.copy()]
|
||
instances[0]["prompt"] = prompt
|
||
llm_model = aiplatform.PrivateEndpoint(
|
||
endpoint_name=model,
|
||
project=vertex_project,
|
||
location=vertex_location,
|
||
)
|
||
request_str += f"llm_model = aiplatform.PrivateEndpoint(endpoint_name={model}, project={vertex_project}, location={vertex_location})\n"
|
||
else: # assume vertex model garden on public endpoint
|
||
mode = "custom"
|
||
|
||
instances = [optional_params.copy()]
|
||
instances[0]["prompt"] = prompt
|
||
instances = [
|
||
json_format.ParseDict(instance_dict, Value())
|
||
for instance_dict in instances
|
||
]
|
||
# Will determine the API used based on async parameter
|
||
llm_model = None
|
||
|
||
# NOTE: async prediction and streaming under "private" mode isn't supported by aiplatform right now
|
||
if acompletion == True:
|
||
data = {
|
||
"llm_model": llm_model,
|
||
"mode": mode,
|
||
"prompt": prompt,
|
||
"logging_obj": logging_obj,
|
||
"request_str": request_str,
|
||
"model": model,
|
||
"model_response": model_response,
|
||
"encoding": encoding,
|
||
"messages": messages,
|
||
"print_verbose": print_verbose,
|
||
"client_options": client_options,
|
||
"instances": instances,
|
||
"vertex_location": vertex_location,
|
||
"vertex_project": vertex_project,
|
||
"safety_settings": safety_settings,
|
||
**optional_params,
|
||
}
|
||
if optional_params.get("stream", False) is True:
|
||
# async streaming
|
||
return async_streaming(**data)
|
||
|
||
return async_completion(**data)
|
||
|
||
if mode == "vision":
|
||
print_verbose("\nMaking VertexAI Gemini Pro / Pro Vision Call")
|
||
print_verbose(f"\nProcessing input messages = {messages}")
|
||
tools = optional_params.pop("tools", None)
|
||
content = _gemini_convert_messages_text(messages=messages)
|
||
stream = optional_params.pop("stream", False)
|
||
if stream == True:
|
||
request_str += f"response = llm_model.generate_content({content}, generation_config=GenerationConfig(**{optional_params}), safety_settings={safety_settings}, stream={stream})\n"
|
||
logging_obj.pre_call(
|
||
input=prompt,
|
||
api_key=None,
|
||
additional_args={
|
||
"complete_input_dict": optional_params,
|
||
"request_str": request_str,
|
||
},
|
||
)
|
||
|
||
model_response = llm_model.generate_content(
|
||
contents={"content": content},
|
||
generation_config=optional_params,
|
||
safety_settings=safety_settings,
|
||
stream=True,
|
||
tools=tools,
|
||
)
|
||
|
||
return model_response
|
||
|
||
request_str += f"response = llm_model.generate_content({content})\n"
|
||
## LOGGING
|
||
logging_obj.pre_call(
|
||
input=prompt,
|
||
api_key=None,
|
||
additional_args={
|
||
"complete_input_dict": optional_params,
|
||
"request_str": request_str,
|
||
},
|
||
)
|
||
|
||
## LLM Call
|
||
> response = llm_model.generate_content(
|
||
contents=content,
|
||
generation_config=optional_params,
|
||
safety_settings=safety_settings,
|
||
tools=tools,
|
||
)
|
||
|
||
../llms/vertex_ai.py:740:
|
||
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
|
||
../proxy/myenv/lib/python3.11/site-packages/vertexai/generative_models/_generative_models.py:405: in generate_content
|
||
return self._generate_content(
|
||
../proxy/myenv/lib/python3.11/site-packages/vertexai/generative_models/_generative_models.py:487: in _generate_content
|
||
request = self._prepare_request(
|
||
../proxy/myenv/lib/python3.11/site-packages/vertexai/generative_models/_generative_models.py:274: in _prepare_request
|
||
contents = [
|
||
../proxy/myenv/lib/python3.11/site-packages/vertexai/generative_models/_generative_models.py:275: in <listcomp>
|
||
gapic_content_types.Content(content_dict) for content_dict in contents
|
||
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
|
||
|
||
self = <[AttributeError('Unknown field for Content: _pb') raised in repr()] Content object at 0x1646aaa90>
|
||
mapping = {'parts': [{'text': 'Whats in this image?'}, file_data {
|
||
mime_type: "image/jpeg"
|
||
file_uri: "gs://cloud-samples-data/generative-ai/image/boats.jpeg"
|
||
}
|
||
], 'role': 'user'}
|
||
ignore_unknown_fields = False, kwargs = {}
|
||
params = {'parts': [text: "Whats in this image?"
|
||
, file_data {
|
||
mime_type: "image/jpeg"
|
||
file_uri: "gs://cloud-samples-data/generative-ai/image/boats.jpeg"
|
||
}
|
||
], 'role': 'user'}
|
||
marshal = <proto.marshal.marshal.Marshal object at 0x10c6a3190>, key = 'parts'
|
||
value = [{'text': 'Whats in this image?'}, file_data {
|
||
mime_type: "image/jpeg"
|
||
file_uri: "gs://cloud-samples-data/generative-ai/image/boats.jpeg"
|
||
}
|
||
]
|
||
pb_value = [text: "Whats in this image?"
|
||
, file_data {
|
||
mime_type: "image/jpeg"
|
||
file_uri: "gs://cloud-samples-data/generative-ai/image/boats.jpeg"
|
||
}
|
||
]
|
||
|
||
def __init__(
|
||
self,
|
||
mapping=None,
|
||
*,
|
||
ignore_unknown_fields=False,
|
||
**kwargs,
|
||
):
|
||
# We accept several things for `mapping`:
|
||
# * An instance of this class.
|
||
# * An instance of the underlying protobuf descriptor class.
|
||
# * A dict
|
||
# * Nothing (keyword arguments only).
|
||
if mapping is None:
|
||
if not kwargs:
|
||
# Special fast path for empty construction.
|
||
super().__setattr__("_pb", self._meta.pb())
|
||
return
|
||
|
||
mapping = kwargs
|
||
elif isinstance(mapping, self._meta.pb):
|
||
# Make a copy of the mapping.
|
||
# This is a constructor for a new object, so users will assume
|
||
# that it will not have side effects on the arguments being
|
||
# passed in.
|
||
#
|
||
# The `wrap` method on the metaclass is the public API for taking
|
||
# ownership of the passed in protobuf object.
|
||
mapping = copy.deepcopy(mapping)
|
||
if kwargs:
|
||
mapping.MergeFrom(self._meta.pb(**kwargs))
|
||
|
||
super().__setattr__("_pb", mapping)
|
||
return
|
||
elif isinstance(mapping, type(self)):
|
||
# Just use the above logic on mapping's underlying pb.
|
||
self.__init__(mapping=mapping._pb, **kwargs)
|
||
return
|
||
elif isinstance(mapping, collections.abc.Mapping):
|
||
# Can't have side effects on mapping.
|
||
mapping = copy.copy(mapping)
|
||
# kwargs entries take priority for duplicate keys.
|
||
mapping.update(kwargs)
|
||
else:
|
||
# Sanity check: Did we get something not a map? Error if so.
|
||
raise TypeError(
|
||
"Invalid constructor input for %s: %r"
|
||
% (
|
||
self.__class__.__name__,
|
||
mapping,
|
||
)
|
||
)
|
||
|
||
params = {}
|
||
# Update the mapping to address any values that need to be
|
||
# coerced.
|
||
marshal = self._meta.marshal
|
||
for key, value in mapping.items():
|
||
(key, pb_type) = self._get_pb_type_from_key(key)
|
||
if pb_type is None:
|
||
if ignore_unknown_fields:
|
||
continue
|
||
|
||
raise ValueError(
|
||
"Unknown field for {}: {}".format(self.__class__.__name__, key)
|
||
)
|
||
|
||
try:
|
||
pb_value = marshal.to_proto(pb_type, value)
|
||
except ValueError:
|
||
# Underscores may be appended to field names
|
||
# that collide with python or proto-plus keywords.
|
||
# In case a key only exists with a `_` suffix, coerce the key
|
||
# to include the `_` suffix. It's not possible to
|
||
# natively define the same field with a trailing underscore in protobuf.
|
||
# See related issue
|
||
# https://github.com/googleapis/python-api-core/issues/227
|
||
if isinstance(value, dict):
|
||
if _upb:
|
||
# In UPB, pb_type is MessageMeta which doesn't expose attrs like it used to in Python/CPP.
|
||
keys_to_update = [
|
||
item
|
||
for item in value
|
||
if item not in pb_type.DESCRIPTOR.fields_by_name
|
||
and f"{item}_" in pb_type.DESCRIPTOR.fields_by_name
|
||
]
|
||
else:
|
||
keys_to_update = [
|
||
item
|
||
for item in value
|
||
if not hasattr(pb_type, item)
|
||
and hasattr(pb_type, f"{item}_")
|
||
]
|
||
for item in keys_to_update:
|
||
value[f"{item}_"] = value.pop(item)
|
||
|
||
pb_value = marshal.to_proto(pb_type, value)
|
||
|
||
if pb_value is not None:
|
||
params[key] = pb_value
|
||
|
||
# Create the internal protocol buffer.
|
||
> super().__setattr__("_pb", self._meta.pb(**params))
|
||
E TypeError: Parameter to MergeFrom() must be instance of same class: expected <class 'Part'> got <class 'vertexai.generative_models._generative_models.Part'>.
|
||
|
||
../proxy/myenv/lib/python3.11/site-packages/proto/message.py:615: TypeError
|
||
|
||
During handling of the above exception, another exception occurred:
|
||
|
||
model = 'gemini-1.5-flash-preview-0514'
|
||
messages = [{'content': [{'text': 'Whats in this image?', 'type': 'text'}, {'image_url': {'url': 'gs://cloud-samples-data/generative-ai/image/boats.jpeg'}, 'type': 'image_url'}], 'role': 'user'}]
|
||
timeout = 600.0, temperature = None, top_p = None, n = None, stream = None
|
||
stream_options = None, stop = None, max_tokens = None, presence_penalty = None
|
||
frequency_penalty = None, logit_bias = None, user = None, response_format = None
|
||
seed = None, tools = None, tool_choice = None, logprobs = None
|
||
top_logprobs = None, deployment_id = None, extra_headers = None
|
||
functions = None, function_call = None, base_url = None, api_version = None
|
||
api_key = None, model_list = None
|
||
kwargs = {'litellm_call_id': '7f48b7ab-47b3-4beb-b2b5-fa298be49d3f', 'litellm_logging_obj': <litellm.utils.Logging object at 0x1059c53d0>}
|
||
args = {'acompletion': False, 'api_base': None, 'api_key': None, 'api_version': None, ...}
|
||
api_base = None, mock_response = None, force_timeout = 600, logger_fn = None
|
||
verbose = False, custom_llm_provider = 'vertex_ai'
|
||
|
||
@client
|
||
def completion(
|
||
model: str,
|
||
# Optional OpenAI params: see https://platform.openai.com/docs/api-reference/chat/create
|
||
messages: List = [],
|
||
timeout: Optional[Union[float, str, httpx.Timeout]] = None,
|
||
temperature: Optional[float] = None,
|
||
top_p: Optional[float] = None,
|
||
n: Optional[int] = None,
|
||
stream: Optional[bool] = None,
|
||
stream_options: Optional[dict] = None,
|
||
stop=None,
|
||
max_tokens: Optional[int] = None,
|
||
presence_penalty: Optional[float] = None,
|
||
frequency_penalty: Optional[float] = None,
|
||
logit_bias: Optional[dict] = None,
|
||
user: Optional[str] = None,
|
||
# openai v1.0+ new params
|
||
response_format: Optional[dict] = None,
|
||
seed: Optional[int] = None,
|
||
tools: Optional[List] = None,
|
||
tool_choice: Optional[str] = None,
|
||
logprobs: Optional[bool] = None,
|
||
top_logprobs: Optional[int] = None,
|
||
deployment_id=None,
|
||
extra_headers: Optional[dict] = None,
|
||
# soon to be deprecated params by OpenAI
|
||
functions: Optional[List] = None,
|
||
function_call: Optional[str] = None,
|
||
# set api_base, api_version, api_key
|
||
base_url: Optional[str] = None,
|
||
api_version: Optional[str] = None,
|
||
api_key: Optional[str] = None,
|
||
model_list: Optional[list] = None, # pass in a list of api_base,keys, etc.
|
||
# Optional liteLLM function params
|
||
**kwargs,
|
||
) -> Union[ModelResponse, CustomStreamWrapper]:
|
||
"""
|
||
Perform a completion() using any of litellm supported llms (example gpt-4, gpt-3.5-turbo, claude-2, command-nightly)
|
||
Parameters:
|
||
model (str): The name of the language model to use for text completion. see all supported LLMs: https://docs.litellm.ai/docs/providers/
|
||
messages (List): A list of message objects representing the conversation context (default is an empty list).
|
||
|
||
OPTIONAL PARAMS
|
||
functions (List, optional): A list of functions to apply to the conversation messages (default is an empty list).
|
||
function_call (str, optional): The name of the function to call within the conversation (default is an empty string).
|
||
temperature (float, optional): The temperature parameter for controlling the randomness of the output (default is 1.0).
|
||
top_p (float, optional): The top-p parameter for nucleus sampling (default is 1.0).
|
||
n (int, optional): The number of completions to generate (default is 1).
|
||
stream (bool, optional): If True, return a streaming response (default is False).
|
||
stream_options (dict, optional): A dictionary containing options for the streaming response. Only set this when you set stream: true.
|
||
stop(string/list, optional): - Up to 4 sequences where the LLM API will stop generating further tokens.
|
||
max_tokens (integer, optional): The maximum number of tokens in the generated completion (default is infinity).
|
||
presence_penalty (float, optional): It is used to penalize new tokens based on their existence in the text so far.
|
||
frequency_penalty: It is used to penalize new tokens based on their frequency in the text so far.
|
||
logit_bias (dict, optional): Used to modify the probability of specific tokens appearing in the completion.
|
||
user (str, optional): A unique identifier representing your end-user. This can help the LLM provider to monitor and detect abuse.
|
||
logprobs (bool, optional): Whether to return log probabilities of the output tokens or not. If true, returns the log probabilities of each output token returned in the content of message
|
||
top_logprobs (int, optional): An integer between 0 and 5 specifying the number of most likely tokens to return at each token position, each with an associated log probability. logprobs must be set to true if this parameter is used.
|
||
metadata (dict, optional): Pass in additional metadata to tag your completion calls - eg. prompt version, details, etc.
|
||
api_base (str, optional): Base URL for the API (default is None).
|
||
api_version (str, optional): API version (default is None).
|
||
api_key (str, optional): API key (default is None).
|
||
model_list (list, optional): List of api base, version, keys
|
||
extra_headers (dict, optional): Additional headers to include in the request.
|
||
|
||
LITELLM Specific Params
|
||
mock_response (str, optional): If provided, return a mock completion response for testing or debugging purposes (default is None).
|
||
custom_llm_provider (str, optional): Used for Non-OpenAI LLMs, Example usage for bedrock, set model="amazon.titan-tg1-large" and custom_llm_provider="bedrock"
|
||
max_retries (int, optional): The number of retries to attempt (default is 0).
|
||
Returns:
|
||
ModelResponse: A response object containing the generated completion and associated metadata.
|
||
|
||
Note:
|
||
- This function is used to perform completions() using the specified language model.
|
||
- It supports various optional parameters for customizing the completion behavior.
|
||
- If 'mock_response' is provided, a mock completion response is returned for testing or debugging.
|
||
"""
|
||
######### unpacking kwargs #####################
|
||
args = locals()
|
||
api_base = kwargs.get("api_base", None)
|
||
mock_response = kwargs.get("mock_response", None)
|
||
force_timeout = kwargs.get("force_timeout", 600) ## deprecated
|
||
logger_fn = kwargs.get("logger_fn", None)
|
||
verbose = kwargs.get("verbose", False)
|
||
custom_llm_provider = kwargs.get("custom_llm_provider", None)
|
||
litellm_logging_obj = kwargs.get("litellm_logging_obj", None)
|
||
id = kwargs.get("id", None)
|
||
metadata = kwargs.get("metadata", None)
|
||
model_info = kwargs.get("model_info", None)
|
||
proxy_server_request = kwargs.get("proxy_server_request", None)
|
||
fallbacks = kwargs.get("fallbacks", None)
|
||
headers = kwargs.get("headers", None) or extra_headers
|
||
num_retries = kwargs.get("num_retries", None) ## deprecated
|
||
max_retries = kwargs.get("max_retries", None)
|
||
context_window_fallback_dict = kwargs.get("context_window_fallback_dict", None)
|
||
organization = kwargs.get("organization", None)
|
||
### CUSTOM MODEL COST ###
|
||
input_cost_per_token = kwargs.get("input_cost_per_token", None)
|
||
output_cost_per_token = kwargs.get("output_cost_per_token", None)
|
||
input_cost_per_second = kwargs.get("input_cost_per_second", None)
|
||
output_cost_per_second = kwargs.get("output_cost_per_second", None)
|
||
### CUSTOM PROMPT TEMPLATE ###
|
||
initial_prompt_value = kwargs.get("initial_prompt_value", None)
|
||
roles = kwargs.get("roles", None)
|
||
final_prompt_value = kwargs.get("final_prompt_value", None)
|
||
bos_token = kwargs.get("bos_token", None)
|
||
eos_token = kwargs.get("eos_token", None)
|
||
preset_cache_key = kwargs.get("preset_cache_key", None)
|
||
hf_model_name = kwargs.get("hf_model_name", None)
|
||
supports_system_message = kwargs.get("supports_system_message", None)
|
||
### TEXT COMPLETION CALLS ###
|
||
text_completion = kwargs.get("text_completion", False)
|
||
atext_completion = kwargs.get("atext_completion", False)
|
||
### ASYNC CALLS ###
|
||
acompletion = kwargs.get("acompletion", False)
|
||
client = kwargs.get("client", None)
|
||
### Admin Controls ###
|
||
no_log = kwargs.get("no-log", False)
|
||
######## end of unpacking kwargs ###########
|
||
openai_params = [
|
||
"functions",
|
||
"function_call",
|
||
"temperature",
|
||
"temperature",
|
||
"top_p",
|
||
"n",
|
||
"stream",
|
||
"stream_options",
|
||
"stop",
|
||
"max_tokens",
|
||
"presence_penalty",
|
||
"frequency_penalty",
|
||
"logit_bias",
|
||
"user",
|
||
"request_timeout",
|
||
"api_base",
|
||
"api_version",
|
||
"api_key",
|
||
"deployment_id",
|
||
"organization",
|
||
"base_url",
|
||
"default_headers",
|
||
"timeout",
|
||
"response_format",
|
||
"seed",
|
||
"tools",
|
||
"tool_choice",
|
||
"max_retries",
|
||
"logprobs",
|
||
"top_logprobs",
|
||
"extra_headers",
|
||
]
|
||
litellm_params = [
|
||
"metadata",
|
||
"acompletion",
|
||
"atext_completion",
|
||
"text_completion",
|
||
"caching",
|
||
"mock_response",
|
||
"api_key",
|
||
"api_version",
|
||
"api_base",
|
||
"force_timeout",
|
||
"logger_fn",
|
||
"verbose",
|
||
"custom_llm_provider",
|
||
"litellm_logging_obj",
|
||
"litellm_call_id",
|
||
"use_client",
|
||
"id",
|
||
"fallbacks",
|
||
"azure",
|
||
"headers",
|
||
"model_list",
|
||
"num_retries",
|
||
"context_window_fallback_dict",
|
||
"retry_policy",
|
||
"roles",
|
||
"final_prompt_value",
|
||
"bos_token",
|
||
"eos_token",
|
||
"request_timeout",
|
||
"complete_response",
|
||
"self",
|
||
"client",
|
||
"rpm",
|
||
"tpm",
|
||
"max_parallel_requests",
|
||
"input_cost_per_token",
|
||
"output_cost_per_token",
|
||
"input_cost_per_second",
|
||
"output_cost_per_second",
|
||
"hf_model_name",
|
||
"model_info",
|
||
"proxy_server_request",
|
||
"preset_cache_key",
|
||
"caching_groups",
|
||
"ttl",
|
||
"cache",
|
||
"no-log",
|
||
"base_model",
|
||
"stream_timeout",
|
||
"supports_system_message",
|
||
"region_name",
|
||
"allowed_model_region",
|
||
"model_config",
|
||
]
|
||
|
||
default_params = openai_params + litellm_params
|
||
non_default_params = {
|
||
k: v for k, v in kwargs.items() if k not in default_params
|
||
} # model-specific params - pass them straight to the model/provider
|
||
|
||
try:
|
||
if base_url is not None:
|
||
api_base = base_url
|
||
if max_retries is not None: # openai allows openai.OpenAI(max_retries=3)
|
||
num_retries = max_retries
|
||
logging = litellm_logging_obj
|
||
fallbacks = fallbacks or litellm.model_fallbacks
|
||
if fallbacks is not None:
|
||
return completion_with_fallbacks(**args)
|
||
if model_list is not None:
|
||
deployments = [
|
||
m["litellm_params"] for m in model_list if m["model_name"] == model
|
||
]
|
||
return batch_completion_models(deployments=deployments, **args)
|
||
if litellm.model_alias_map and model in litellm.model_alias_map:
|
||
model = litellm.model_alias_map[
|
||
model
|
||
] # update the model to the actual value if an alias has been passed in
|
||
model_response = ModelResponse()
|
||
setattr(model_response, "usage", litellm.Usage())
|
||
if (
|
||
kwargs.get("azure", False) == True
|
||
): # don't remove flag check, to remain backwards compatible for repos like Codium
|
||
custom_llm_provider = "azure"
|
||
if deployment_id != None: # azure llms
|
||
model = deployment_id
|
||
custom_llm_provider = "azure"
|
||
model, custom_llm_provider, dynamic_api_key, api_base = get_llm_provider(
|
||
model=model,
|
||
custom_llm_provider=custom_llm_provider,
|
||
api_base=api_base,
|
||
api_key=api_key,
|
||
)
|
||
if model_response is not None and hasattr(model_response, "_hidden_params"):
|
||
model_response._hidden_params["custom_llm_provider"] = custom_llm_provider
|
||
model_response._hidden_params["region_name"] = kwargs.get(
|
||
"aws_region_name", None
|
||
) # support region-based pricing for bedrock
|
||
|
||
### TIMEOUT LOGIC ###
|
||
timeout = timeout or kwargs.get("request_timeout", 600) or 600
|
||
# set timeout for 10 minutes by default
|
||
if isinstance(timeout, httpx.Timeout) and not supports_httpx_timeout(
|
||
custom_llm_provider
|
||
):
|
||
timeout = timeout.read or 600 # default 10 min timeout
|
||
elif not isinstance(timeout, httpx.Timeout):
|
||
timeout = float(timeout) # type: ignore
|
||
|
||
### REGISTER CUSTOM MODEL PRICING -- IF GIVEN ###
|
||
if input_cost_per_token is not None and output_cost_per_token is not None:
|
||
litellm.register_model(
|
||
{
|
||
f"{custom_llm_provider}/{model}": {
|
||
"input_cost_per_token": input_cost_per_token,
|
||
"output_cost_per_token": output_cost_per_token,
|
||
"litellm_provider": custom_llm_provider,
|
||
},
|
||
model: {
|
||
"input_cost_per_token": input_cost_per_token,
|
||
"output_cost_per_token": output_cost_per_token,
|
||
"litellm_provider": custom_llm_provider,
|
||
},
|
||
}
|
||
)
|
||
elif (
|
||
input_cost_per_second is not None
|
||
): # time based pricing just needs cost in place
|
||
output_cost_per_second = output_cost_per_second
|
||
litellm.register_model(
|
||
{
|
||
f"{custom_llm_provider}/{model}": {
|
||
"input_cost_per_second": input_cost_per_second,
|
||
"output_cost_per_second": output_cost_per_second,
|
||
"litellm_provider": custom_llm_provider,
|
||
},
|
||
model: {
|
||
"input_cost_per_second": input_cost_per_second,
|
||
"output_cost_per_second": output_cost_per_second,
|
||
"litellm_provider": custom_llm_provider,
|
||
},
|
||
}
|
||
)
|
||
### BUILD CUSTOM PROMPT TEMPLATE -- IF GIVEN ###
|
||
custom_prompt_dict = {} # type: ignore
|
||
if (
|
||
initial_prompt_value
|
||
or roles
|
||
or final_prompt_value
|
||
or bos_token
|
||
or eos_token
|
||
):
|
||
custom_prompt_dict = {model: {}}
|
||
if initial_prompt_value:
|
||
custom_prompt_dict[model]["initial_prompt_value"] = initial_prompt_value
|
||
if roles:
|
||
custom_prompt_dict[model]["roles"] = roles
|
||
if final_prompt_value:
|
||
custom_prompt_dict[model]["final_prompt_value"] = final_prompt_value
|
||
if bos_token:
|
||
custom_prompt_dict[model]["bos_token"] = bos_token
|
||
if eos_token:
|
||
custom_prompt_dict[model]["eos_token"] = eos_token
|
||
|
||
if (
|
||
supports_system_message is not None
|
||
and isinstance(supports_system_message, bool)
|
||
and supports_system_message == False
|
||
):
|
||
messages = map_system_message_pt(messages=messages)
|
||
model_api_key = get_api_key(
|
||
llm_provider=custom_llm_provider, dynamic_api_key=api_key
|
||
) # get the api key from the environment if required for the model
|
||
|
||
if dynamic_api_key is not None:
|
||
api_key = dynamic_api_key
|
||
# check if user passed in any of the OpenAI optional params
|
||
optional_params = get_optional_params(
|
||
functions=functions,
|
||
function_call=function_call,
|
||
temperature=temperature,
|
||
top_p=top_p,
|
||
n=n,
|
||
stream=stream,
|
||
stream_options=stream_options,
|
||
stop=stop,
|
||
max_tokens=max_tokens,
|
||
presence_penalty=presence_penalty,
|
||
frequency_penalty=frequency_penalty,
|
||
logit_bias=logit_bias,
|
||
user=user,
|
||
# params to identify the model
|
||
model=model,
|
||
custom_llm_provider=custom_llm_provider,
|
||
response_format=response_format,
|
||
seed=seed,
|
||
tools=tools,
|
||
tool_choice=tool_choice,
|
||
max_retries=max_retries,
|
||
logprobs=logprobs,
|
||
top_logprobs=top_logprobs,
|
||
extra_headers=extra_headers,
|
||
**non_default_params,
|
||
)
|
||
|
||
if litellm.add_function_to_prompt and optional_params.get(
|
||
"functions_unsupported_model", None
|
||
): # if user opts to add it to prompt, when API doesn't support function calling
|
||
functions_unsupported_model = optional_params.pop(
|
||
"functions_unsupported_model"
|
||
)
|
||
messages = function_call_prompt(
|
||
messages=messages, functions=functions_unsupported_model
|
||
)
|
||
|
||
# For logging - save the values of the litellm-specific params passed in
|
||
litellm_params = get_litellm_params(
|
||
acompletion=acompletion,
|
||
api_key=api_key,
|
||
force_timeout=force_timeout,
|
||
logger_fn=logger_fn,
|
||
verbose=verbose,
|
||
custom_llm_provider=custom_llm_provider,
|
||
api_base=api_base,
|
||
litellm_call_id=kwargs.get("litellm_call_id", None),
|
||
model_alias_map=litellm.model_alias_map,
|
||
completion_call_id=id,
|
||
metadata=metadata,
|
||
model_info=model_info,
|
||
proxy_server_request=proxy_server_request,
|
||
preset_cache_key=preset_cache_key,
|
||
no_log=no_log,
|
||
input_cost_per_second=input_cost_per_second,
|
||
input_cost_per_token=input_cost_per_token,
|
||
output_cost_per_second=output_cost_per_second,
|
||
output_cost_per_token=output_cost_per_token,
|
||
)
|
||
logging.update_environment_variables(
|
||
model=model,
|
||
user=user,
|
||
optional_params=optional_params,
|
||
litellm_params=litellm_params,
|
||
)
|
||
if mock_response:
|
||
return mock_completion(
|
||
model,
|
||
messages,
|
||
stream=stream,
|
||
mock_response=mock_response,
|
||
logging=logging,
|
||
acompletion=acompletion,
|
||
)
|
||
if custom_llm_provider == "azure":
|
||
# azure configs
|
||
api_type = get_secret("AZURE_API_TYPE") or "azure"
|
||
|
||
api_base = api_base or litellm.api_base or get_secret("AZURE_API_BASE")
|
||
|
||
api_version = (
|
||
api_version or litellm.api_version or get_secret("AZURE_API_VERSION")
|
||
)
|
||
|
||
api_key = (
|
||
api_key
|
||
or litellm.api_key
|
||
or litellm.azure_key
|
||
or get_secret("AZURE_OPENAI_API_KEY")
|
||
or get_secret("AZURE_API_KEY")
|
||
)
|
||
|
||
azure_ad_token = optional_params.get("extra_body", {}).pop(
|
||
"azure_ad_token", None
|
||
) or get_secret("AZURE_AD_TOKEN")
|
||
|
||
headers = headers or litellm.headers
|
||
|
||
## LOAD CONFIG - if set
|
||
config = litellm.AzureOpenAIConfig.get_config()
|
||
for k, v in config.items():
|
||
if (
|
||
k not in optional_params
|
||
): # completion(top_k=3) > azure_config(top_k=3) <- allows for dynamic variables to be passed in
|
||
optional_params[k] = v
|
||
|
||
## COMPLETION CALL
|
||
response = azure_chat_completions.completion(
|
||
model=model,
|
||
messages=messages,
|
||
headers=headers,
|
||
api_key=api_key,
|
||
api_base=api_base,
|
||
api_version=api_version,
|
||
api_type=api_type,
|
||
azure_ad_token=azure_ad_token,
|
||
model_response=model_response,
|
||
print_verbose=print_verbose,
|
||
optional_params=optional_params,
|
||
litellm_params=litellm_params,
|
||
logger_fn=logger_fn,
|
||
logging_obj=logging,
|
||
acompletion=acompletion,
|
||
timeout=timeout, # type: ignore
|
||
client=client, # pass AsyncAzureOpenAI, AzureOpenAI client
|
||
)
|
||
|
||
if optional_params.get("stream", False) or acompletion == True:
|
||
## LOGGING
|
||
logging.post_call(
|
||
input=messages,
|
||
api_key=api_key,
|
||
original_response=response,
|
||
additional_args={
|
||
"headers": headers,
|
||
"api_version": api_version,
|
||
"api_base": api_base,
|
||
},
|
||
)
|
||
elif custom_llm_provider == "azure_text":
|
||
# azure configs
|
||
api_type = get_secret("AZURE_API_TYPE") or "azure"
|
||
|
||
api_base = api_base or litellm.api_base or get_secret("AZURE_API_BASE")
|
||
|
||
api_version = (
|
||
api_version or litellm.api_version or get_secret("AZURE_API_VERSION")
|
||
)
|
||
|
||
api_key = (
|
||
api_key
|
||
or litellm.api_key
|
||
or litellm.azure_key
|
||
or get_secret("AZURE_OPENAI_API_KEY")
|
||
or get_secret("AZURE_API_KEY")
|
||
)
|
||
|
||
azure_ad_token = optional_params.get("extra_body", {}).pop(
|
||
"azure_ad_token", None
|
||
) or get_secret("AZURE_AD_TOKEN")
|
||
|
||
headers = headers or litellm.headers
|
||
|
||
## LOAD CONFIG - if set
|
||
config = litellm.AzureOpenAIConfig.get_config()
|
||
for k, v in config.items():
|
||
if (
|
||
k not in optional_params
|
||
): # completion(top_k=3) > azure_config(top_k=3) <- allows for dynamic variables to be passed in
|
||
optional_params[k] = v
|
||
|
||
## COMPLETION CALL
|
||
response = azure_text_completions.completion(
|
||
model=model,
|
||
messages=messages,
|
||
headers=headers,
|
||
api_key=api_key,
|
||
api_base=api_base,
|
||
api_version=api_version,
|
||
api_type=api_type,
|
||
azure_ad_token=azure_ad_token,
|
||
model_response=model_response,
|
||
print_verbose=print_verbose,
|
||
optional_params=optional_params,
|
||
litellm_params=litellm_params,
|
||
logger_fn=logger_fn,
|
||
logging_obj=logging,
|
||
acompletion=acompletion,
|
||
timeout=timeout,
|
||
client=client, # pass AsyncAzureOpenAI, AzureOpenAI client
|
||
)
|
||
|
||
if optional_params.get("stream", False) or acompletion == True:
|
||
## LOGGING
|
||
logging.post_call(
|
||
input=messages,
|
||
api_key=api_key,
|
||
original_response=response,
|
||
additional_args={
|
||
"headers": headers,
|
||
"api_version": api_version,
|
||
"api_base": api_base,
|
||
},
|
||
)
|
||
elif (
|
||
model in litellm.open_ai_chat_completion_models
|
||
or custom_llm_provider == "custom_openai"
|
||
or custom_llm_provider == "deepinfra"
|
||
or custom_llm_provider == "perplexity"
|
||
or custom_llm_provider == "groq"
|
||
or custom_llm_provider == "deepseek"
|
||
or custom_llm_provider == "anyscale"
|
||
or custom_llm_provider == "mistral"
|
||
or custom_llm_provider == "openai"
|
||
or custom_llm_provider == "together_ai"
|
||
or custom_llm_provider in litellm.openai_compatible_providers
|
||
or "ft:gpt-3.5-turbo" in model # finetune gpt-3.5-turbo
|
||
): # allow user to make an openai call with a custom base
|
||
# note: if a user sets a custom base - we should ensure this works
|
||
# allow for the setting of dynamic and stateful api-bases
|
||
api_base = (
|
||
api_base # for deepinfra/perplexity/anyscale/groq we check in get_llm_provider and pass in the api base from there
|
||
or litellm.api_base
|
||
or get_secret("OPENAI_API_BASE")
|
||
or "https://api.openai.com/v1"
|
||
)
|
||
openai.organization = (
|
||
organization
|
||
or litellm.organization
|
||
or get_secret("OPENAI_ORGANIZATION")
|
||
or None # default - https://github.com/openai/openai-python/blob/284c1799070c723c6a553337134148a7ab088dd8/openai/util.py#L105
|
||
)
|
||
# set API KEY
|
||
api_key = (
|
||
api_key
|
||
or litellm.api_key # for deepinfra/perplexity/anyscale we check in get_llm_provider and pass in the api key from there
|
||
or litellm.openai_key
|
||
or get_secret("OPENAI_API_KEY")
|
||
)
|
||
|
||
headers = headers or litellm.headers
|
||
|
||
## LOAD CONFIG - if set
|
||
config = litellm.OpenAIConfig.get_config()
|
||
for k, v in config.items():
|
||
if (
|
||
k not in optional_params
|
||
): # completion(top_k=3) > openai_config(top_k=3) <- allows for dynamic variables to be passed in
|
||
optional_params[k] = v
|
||
|
||
## COMPLETION CALL
|
||
try:
|
||
response = openai_chat_completions.completion(
|
||
model=model,
|
||
messages=messages,
|
||
headers=headers,
|
||
model_response=model_response,
|
||
print_verbose=print_verbose,
|
||
api_key=api_key,
|
||
api_base=api_base,
|
||
acompletion=acompletion,
|
||
logging_obj=logging,
|
||
optional_params=optional_params,
|
||
litellm_params=litellm_params,
|
||
logger_fn=logger_fn,
|
||
timeout=timeout, # type: ignore
|
||
custom_prompt_dict=custom_prompt_dict,
|
||
client=client, # pass AsyncOpenAI, OpenAI client
|
||
organization=organization,
|
||
custom_llm_provider=custom_llm_provider,
|
||
)
|
||
except Exception as e:
|
||
## LOGGING - log the original exception returned
|
||
logging.post_call(
|
||
input=messages,
|
||
api_key=api_key,
|
||
original_response=str(e),
|
||
additional_args={"headers": headers},
|
||
)
|
||
raise e
|
||
|
||
if optional_params.get("stream", False):
|
||
## LOGGING
|
||
logging.post_call(
|
||
input=messages,
|
||
api_key=api_key,
|
||
original_response=response,
|
||
additional_args={"headers": headers},
|
||
)
|
||
elif (
|
||
custom_llm_provider == "text-completion-openai"
|
||
or "ft:babbage-002" in model
|
||
or "ft:davinci-002" in model # support for finetuned completion models
|
||
):
|
||
openai.api_type = "openai"
|
||
|
||
api_base = (
|
||
api_base
|
||
or litellm.api_base
|
||
or get_secret("OPENAI_API_BASE")
|
||
or "https://api.openai.com/v1"
|
||
)
|
||
|
||
openai.api_version = None
|
||
# set API KEY
|
||
|
||
api_key = (
|
||
api_key
|
||
or litellm.api_key
|
||
or litellm.openai_key
|
||
or get_secret("OPENAI_API_KEY")
|
||
)
|
||
|
||
headers = headers or litellm.headers
|
||
|
||
## LOAD CONFIG - if set
|
||
config = litellm.OpenAITextCompletionConfig.get_config()
|
||
for k, v in config.items():
|
||
if (
|
||
k not in optional_params
|
||
): # completion(top_k=3) > openai_text_config(top_k=3) <- allows for dynamic variables to be passed in
|
||
optional_params[k] = v
|
||
if litellm.organization:
|
||
openai.organization = litellm.organization
|
||
|
||
if (
|
||
len(messages) > 0
|
||
and "content" in messages[0]
|
||
and type(messages[0]["content"]) == list
|
||
):
|
||
# text-davinci-003 can accept a string or array, if it's an array, assume the array is set in messages[0]['content']
|
||
# https://platform.openai.com/docs/api-reference/completions/create
|
||
prompt = messages[0]["content"]
|
||
else:
|
||
prompt = " ".join([message["content"] for message in messages]) # type: ignore
|
||
|
||
## COMPLETION CALL
|
||
_response = openai_text_completions.completion(
|
||
model=model,
|
||
messages=messages,
|
||
model_response=model_response,
|
||
print_verbose=print_verbose,
|
||
api_key=api_key,
|
||
api_base=api_base,
|
||
acompletion=acompletion,
|
||
client=client, # pass AsyncOpenAI, OpenAI client
|
||
logging_obj=logging,
|
||
optional_params=optional_params,
|
||
litellm_params=litellm_params,
|
||
logger_fn=logger_fn,
|
||
timeout=timeout, # type: ignore
|
||
)
|
||
|
||
if (
|
||
optional_params.get("stream", False) == False
|
||
and acompletion == False
|
||
and text_completion == False
|
||
):
|
||
# convert to chat completion response
|
||
_response = litellm.OpenAITextCompletionConfig().convert_to_chat_model_response_object(
|
||
response_object=_response, model_response_object=model_response
|
||
)
|
||
|
||
if optional_params.get("stream", False) or acompletion == True:
|
||
## LOGGING
|
||
logging.post_call(
|
||
input=messages,
|
||
api_key=api_key,
|
||
original_response=_response,
|
||
additional_args={"headers": headers},
|
||
)
|
||
response = _response
|
||
elif (
|
||
"replicate" in model
|
||
or custom_llm_provider == "replicate"
|
||
or model in litellm.replicate_models
|
||
):
|
||
# Setting the relevant API KEY for replicate, replicate defaults to using os.environ.get("REPLICATE_API_TOKEN")
|
||
replicate_key = None
|
||
replicate_key = (
|
||
api_key
|
||
or litellm.replicate_key
|
||
or litellm.api_key
|
||
or get_secret("REPLICATE_API_KEY")
|
||
or get_secret("REPLICATE_API_TOKEN")
|
||
)
|
||
|
||
api_base = (
|
||
api_base
|
||
or litellm.api_base
|
||
or get_secret("REPLICATE_API_BASE")
|
||
or "https://api.replicate.com/v1"
|
||
)
|
||
|
||
custom_prompt_dict = custom_prompt_dict or litellm.custom_prompt_dict
|
||
|
||
model_response = replicate.completion( # type: ignore
|
||
model=model,
|
||
messages=messages,
|
||
api_base=api_base,
|
||
model_response=model_response,
|
||
print_verbose=print_verbose,
|
||
optional_params=optional_params,
|
||
litellm_params=litellm_params,
|
||
logger_fn=logger_fn,
|
||
encoding=encoding, # for calculating input/output tokens
|
||
api_key=replicate_key,
|
||
logging_obj=logging,
|
||
custom_prompt_dict=custom_prompt_dict,
|
||
acompletion=acompletion,
|
||
)
|
||
|
||
if optional_params.get("stream", False) == True:
|
||
## LOGGING
|
||
logging.post_call(
|
||
input=messages,
|
||
api_key=replicate_key,
|
||
original_response=model_response,
|
||
)
|
||
|
||
response = model_response
|
||
elif (
|
||
"clarifai" in model
|
||
or custom_llm_provider == "clarifai"
|
||
or model in litellm.clarifai_models
|
||
):
|
||
clarifai_key = None
|
||
clarifai_key = (
|
||
api_key
|
||
or litellm.clarifai_key
|
||
or litellm.api_key
|
||
or get_secret("CLARIFAI_API_KEY")
|
||
or get_secret("CLARIFAI_API_TOKEN")
|
||
)
|
||
|
||
api_base = (
|
||
api_base
|
||
or litellm.api_base
|
||
or get_secret("CLARIFAI_API_BASE")
|
||
or "https://api.clarifai.com/v2"
|
||
)
|
||
|
||
custom_prompt_dict = custom_prompt_dict or litellm.custom_prompt_dict
|
||
model_response = clarifai.completion(
|
||
model=model,
|
||
messages=messages,
|
||
api_base=api_base,
|
||
model_response=model_response,
|
||
print_verbose=print_verbose,
|
||
optional_params=optional_params,
|
||
litellm_params=litellm_params,
|
||
acompletion=acompletion,
|
||
logger_fn=logger_fn,
|
||
encoding=encoding, # for calculating input/output tokens
|
||
api_key=clarifai_key,
|
||
logging_obj=logging,
|
||
custom_prompt_dict=custom_prompt_dict,
|
||
)
|
||
|
||
if "stream" in optional_params and optional_params["stream"] == True:
|
||
# don't try to access stream object,
|
||
## LOGGING
|
||
logging.post_call(
|
||
input=messages,
|
||
api_key=api_key,
|
||
original_response=model_response,
|
||
)
|
||
|
||
if optional_params.get("stream", False) or acompletion == True:
|
||
## LOGGING
|
||
logging.post_call(
|
||
input=messages,
|
||
api_key=clarifai_key,
|
||
original_response=model_response,
|
||
)
|
||
response = model_response
|
||
|
||
elif custom_llm_provider == "anthropic":
|
||
api_key = (
|
||
api_key
|
||
or litellm.anthropic_key
|
||
or litellm.api_key
|
||
or os.environ.get("ANTHROPIC_API_KEY")
|
||
)
|
||
custom_prompt_dict = custom_prompt_dict or litellm.custom_prompt_dict
|
||
|
||
if (model == "claude-2") or (model == "claude-instant-1"):
|
||
# call anthropic /completion, only use this route for claude-2, claude-instant-1
|
||
api_base = (
|
||
api_base
|
||
or litellm.api_base
|
||
or get_secret("ANTHROPIC_API_BASE")
|
||
or "https://api.anthropic.com/v1/complete"
|
||
)
|
||
response = anthropic_text_completions.completion(
|
||
model=model,
|
||
messages=messages,
|
||
api_base=api_base,
|
||
acompletion=acompletion,
|
||
custom_prompt_dict=litellm.custom_prompt_dict,
|
||
model_response=model_response,
|
||
print_verbose=print_verbose,
|
||
optional_params=optional_params,
|
||
litellm_params=litellm_params,
|
||
logger_fn=logger_fn,
|
||
encoding=encoding, # for calculating input/output tokens
|
||
api_key=api_key,
|
||
logging_obj=logging,
|
||
headers=headers,
|
||
)
|
||
else:
|
||
# call /messages
|
||
# default route for all anthropic models
|
||
api_base = (
|
||
api_base
|
||
or litellm.api_base
|
||
or get_secret("ANTHROPIC_API_BASE")
|
||
or "https://api.anthropic.com/v1/messages"
|
||
)
|
||
response = anthropic_chat_completions.completion(
|
||
model=model,
|
||
messages=messages,
|
||
api_base=api_base,
|
||
acompletion=acompletion,
|
||
custom_prompt_dict=litellm.custom_prompt_dict,
|
||
model_response=model_response,
|
||
print_verbose=print_verbose,
|
||
optional_params=optional_params,
|
||
litellm_params=litellm_params,
|
||
logger_fn=logger_fn,
|
||
encoding=encoding, # for calculating input/output tokens
|
||
api_key=api_key,
|
||
logging_obj=logging,
|
||
headers=headers,
|
||
)
|
||
if optional_params.get("stream", False) or acompletion == True:
|
||
## LOGGING
|
||
logging.post_call(
|
||
input=messages,
|
||
api_key=api_key,
|
||
original_response=response,
|
||
)
|
||
response = response
|
||
elif custom_llm_provider == "nlp_cloud":
|
||
nlp_cloud_key = (
|
||
api_key
|
||
or litellm.nlp_cloud_key
|
||
or get_secret("NLP_CLOUD_API_KEY")
|
||
or litellm.api_key
|
||
)
|
||
|
||
api_base = (
|
||
api_base
|
||
or litellm.api_base
|
||
or get_secret("NLP_CLOUD_API_BASE")
|
||
or "https://api.nlpcloud.io/v1/gpu/"
|
||
)
|
||
|
||
response = nlp_cloud.completion(
|
||
model=model,
|
||
messages=messages,
|
||
api_base=api_base,
|
||
model_response=model_response,
|
||
print_verbose=print_verbose,
|
||
optional_params=optional_params,
|
||
litellm_params=litellm_params,
|
||
logger_fn=logger_fn,
|
||
encoding=encoding,
|
||
api_key=nlp_cloud_key,
|
||
logging_obj=logging,
|
||
)
|
||
|
||
if "stream" in optional_params and optional_params["stream"] == True:
|
||
# don't try to access stream object,
|
||
response = CustomStreamWrapper(
|
||
response,
|
||
model,
|
||
custom_llm_provider="nlp_cloud",
|
||
logging_obj=logging,
|
||
)
|
||
|
||
if optional_params.get("stream", False) or acompletion == True:
|
||
## LOGGING
|
||
logging.post_call(
|
||
input=messages,
|
||
api_key=api_key,
|
||
original_response=response,
|
||
)
|
||
|
||
response = response
|
||
elif custom_llm_provider == "aleph_alpha":
|
||
aleph_alpha_key = (
|
||
api_key
|
||
or litellm.aleph_alpha_key
|
||
or get_secret("ALEPH_ALPHA_API_KEY")
|
||
or get_secret("ALEPHALPHA_API_KEY")
|
||
or litellm.api_key
|
||
)
|
||
|
||
api_base = (
|
||
api_base
|
||
or litellm.api_base
|
||
or get_secret("ALEPH_ALPHA_API_BASE")
|
||
or "https://api.aleph-alpha.com/complete"
|
||
)
|
||
|
||
model_response = aleph_alpha.completion(
|
||
model=model,
|
||
messages=messages,
|
||
api_base=api_base,
|
||
model_response=model_response,
|
||
print_verbose=print_verbose,
|
||
optional_params=optional_params,
|
||
litellm_params=litellm_params,
|
||
logger_fn=logger_fn,
|
||
encoding=encoding,
|
||
default_max_tokens_to_sample=litellm.max_tokens,
|
||
api_key=aleph_alpha_key,
|
||
logging_obj=logging, # model call logging done inside the class as we make need to modify I/O to fit aleph alpha's requirements
|
||
)
|
||
|
||
if "stream" in optional_params and optional_params["stream"] == True:
|
||
# don't try to access stream object,
|
||
response = CustomStreamWrapper(
|
||
model_response,
|
||
model,
|
||
custom_llm_provider="aleph_alpha",
|
||
logging_obj=logging,
|
||
)
|
||
return response
|
||
response = model_response
|
||
elif custom_llm_provider == "cohere":
|
||
cohere_key = (
|
||
api_key
|
||
or litellm.cohere_key
|
||
or get_secret("COHERE_API_KEY")
|
||
or get_secret("CO_API_KEY")
|
||
or litellm.api_key
|
||
)
|
||
|
||
api_base = (
|
||
api_base
|
||
or litellm.api_base
|
||
or get_secret("COHERE_API_BASE")
|
||
or "https://api.cohere.ai/v1/generate"
|
||
)
|
||
|
||
model_response = cohere.completion(
|
||
model=model,
|
||
messages=messages,
|
||
api_base=api_base,
|
||
model_response=model_response,
|
||
print_verbose=print_verbose,
|
||
optional_params=optional_params,
|
||
litellm_params=litellm_params,
|
||
logger_fn=logger_fn,
|
||
encoding=encoding,
|
||
api_key=cohere_key,
|
||
logging_obj=logging, # model call logging done inside the class as we make need to modify I/O to fit aleph alpha's requirements
|
||
)
|
||
|
||
if "stream" in optional_params and optional_params["stream"] == True:
|
||
# don't try to access stream object,
|
||
response = CustomStreamWrapper(
|
||
model_response,
|
||
model,
|
||
custom_llm_provider="cohere",
|
||
logging_obj=logging,
|
||
)
|
||
return response
|
||
response = model_response
|
||
elif custom_llm_provider == "cohere_chat":
|
||
cohere_key = (
|
||
api_key
|
||
or litellm.cohere_key
|
||
or get_secret("COHERE_API_KEY")
|
||
or get_secret("CO_API_KEY")
|
||
or litellm.api_key
|
||
)
|
||
|
||
api_base = (
|
||
api_base
|
||
or litellm.api_base
|
||
or get_secret("COHERE_API_BASE")
|
||
or "https://api.cohere.ai/v1/chat"
|
||
)
|
||
|
||
model_response = cohere_chat.completion(
|
||
model=model,
|
||
messages=messages,
|
||
api_base=api_base,
|
||
model_response=model_response,
|
||
print_verbose=print_verbose,
|
||
optional_params=optional_params,
|
||
litellm_params=litellm_params,
|
||
logger_fn=logger_fn,
|
||
encoding=encoding,
|
||
api_key=cohere_key,
|
||
logging_obj=logging, # model call logging done inside the class as we make need to modify I/O to fit aleph alpha's requirements
|
||
)
|
||
|
||
if "stream" in optional_params and optional_params["stream"] == True:
|
||
# don't try to access stream object,
|
||
response = CustomStreamWrapper(
|
||
model_response,
|
||
model,
|
||
custom_llm_provider="cohere_chat",
|
||
logging_obj=logging,
|
||
)
|
||
return response
|
||
response = model_response
|
||
elif custom_llm_provider == "maritalk":
|
||
maritalk_key = (
|
||
api_key
|
||
or litellm.maritalk_key
|
||
or get_secret("MARITALK_API_KEY")
|
||
or litellm.api_key
|
||
)
|
||
|
||
api_base = (
|
||
api_base
|
||
or litellm.api_base
|
||
or get_secret("MARITALK_API_BASE")
|
||
or "https://chat.maritaca.ai/api/chat/inference"
|
||
)
|
||
|
||
model_response = maritalk.completion(
|
||
model=model,
|
||
messages=messages,
|
||
api_base=api_base,
|
||
model_response=model_response,
|
||
print_verbose=print_verbose,
|
||
optional_params=optional_params,
|
||
litellm_params=litellm_params,
|
||
logger_fn=logger_fn,
|
||
encoding=encoding,
|
||
api_key=maritalk_key,
|
||
logging_obj=logging,
|
||
)
|
||
|
||
if "stream" in optional_params and optional_params["stream"] == True:
|
||
# don't try to access stream object,
|
||
response = CustomStreamWrapper(
|
||
model_response,
|
||
model,
|
||
custom_llm_provider="maritalk",
|
||
logging_obj=logging,
|
||
)
|
||
return response
|
||
response = model_response
|
||
elif custom_llm_provider == "huggingface":
|
||
custom_llm_provider = "huggingface"
|
||
huggingface_key = (
|
||
api_key
|
||
or litellm.huggingface_key
|
||
or os.environ.get("HF_TOKEN")
|
||
or os.environ.get("HUGGINGFACE_API_KEY")
|
||
or litellm.api_key
|
||
)
|
||
hf_headers = headers or litellm.headers
|
||
|
||
custom_prompt_dict = custom_prompt_dict or litellm.custom_prompt_dict
|
||
model_response = huggingface.completion(
|
||
model=model,
|
||
messages=messages,
|
||
api_base=api_base, # type: ignore
|
||
headers=hf_headers,
|
||
model_response=model_response,
|
||
print_verbose=print_verbose,
|
||
optional_params=optional_params,
|
||
litellm_params=litellm_params,
|
||
logger_fn=logger_fn,
|
||
encoding=encoding,
|
||
api_key=huggingface_key,
|
||
acompletion=acompletion,
|
||
logging_obj=logging,
|
||
custom_prompt_dict=custom_prompt_dict,
|
||
timeout=timeout, # type: ignore
|
||
)
|
||
if (
|
||
"stream" in optional_params
|
||
and optional_params["stream"] == True
|
||
and acompletion is False
|
||
):
|
||
# don't try to access stream object,
|
||
response = CustomStreamWrapper(
|
||
model_response,
|
||
model,
|
||
custom_llm_provider="huggingface",
|
||
logging_obj=logging,
|
||
)
|
||
return response
|
||
response = model_response
|
||
elif custom_llm_provider == "oobabooga":
|
||
custom_llm_provider = "oobabooga"
|
||
model_response = oobabooga.completion(
|
||
model=model,
|
||
messages=messages,
|
||
model_response=model_response,
|
||
api_base=api_base, # type: ignore
|
||
print_verbose=print_verbose,
|
||
optional_params=optional_params,
|
||
litellm_params=litellm_params,
|
||
api_key=None,
|
||
logger_fn=logger_fn,
|
||
encoding=encoding,
|
||
logging_obj=logging,
|
||
)
|
||
if "stream" in optional_params and optional_params["stream"] == True:
|
||
# don't try to access stream object,
|
||
response = CustomStreamWrapper(
|
||
model_response,
|
||
model,
|
||
custom_llm_provider="oobabooga",
|
||
logging_obj=logging,
|
||
)
|
||
return response
|
||
response = model_response
|
||
elif custom_llm_provider == "openrouter":
|
||
api_base = api_base or litellm.api_base or "https://openrouter.ai/api/v1"
|
||
|
||
api_key = (
|
||
api_key
|
||
or litellm.api_key
|
||
or litellm.openrouter_key
|
||
or get_secret("OPENROUTER_API_KEY")
|
||
or get_secret("OR_API_KEY")
|
||
)
|
||
|
||
openrouter_site_url = get_secret("OR_SITE_URL") or "https://litellm.ai"
|
||
|
||
openrouter_app_name = get_secret("OR_APP_NAME") or "liteLLM"
|
||
|
||
headers = (
|
||
headers
|
||
or litellm.headers
|
||
or {
|
||
"HTTP-Referer": openrouter_site_url,
|
||
"X-Title": openrouter_app_name,
|
||
}
|
||
)
|
||
|
||
## Load Config
|
||
config = openrouter.OpenrouterConfig.get_config()
|
||
for k, v in config.items():
|
||
if k == "extra_body":
|
||
# we use openai 'extra_body' to pass openrouter specific params - transforms, route, models
|
||
if "extra_body" in optional_params:
|
||
optional_params[k].update(v)
|
||
else:
|
||
optional_params[k] = v
|
||
elif k not in optional_params:
|
||
optional_params[k] = v
|
||
|
||
data = {"model": model, "messages": messages, **optional_params}
|
||
|
||
## COMPLETION CALL
|
||
response = openai_chat_completions.completion(
|
||
model=model,
|
||
messages=messages,
|
||
headers=headers,
|
||
api_key=api_key,
|
||
api_base=api_base,
|
||
model_response=model_response,
|
||
print_verbose=print_verbose,
|
||
optional_params=optional_params,
|
||
litellm_params=litellm_params,
|
||
logger_fn=logger_fn,
|
||
logging_obj=logging,
|
||
acompletion=acompletion,
|
||
timeout=timeout, # type: ignore
|
||
)
|
||
## LOGGING
|
||
logging.post_call(
|
||
input=messages, api_key=openai.api_key, original_response=response
|
||
)
|
||
elif (
|
||
custom_llm_provider == "together_ai"
|
||
or ("togethercomputer" in model)
|
||
or (model in litellm.together_ai_models)
|
||
):
|
||
"""
|
||
Deprecated. We now do together ai calls via the openai client - https://docs.together.ai/docs/openai-api-compatibility
|
||
"""
|
||
custom_llm_provider = "together_ai"
|
||
together_ai_key = (
|
||
api_key
|
||
or litellm.togetherai_api_key
|
||
or get_secret("TOGETHER_AI_TOKEN")
|
||
or get_secret("TOGETHERAI_API_KEY")
|
||
or litellm.api_key
|
||
)
|
||
|
||
api_base = (
|
||
api_base
|
||
or litellm.api_base
|
||
or get_secret("TOGETHERAI_API_BASE")
|
||
or "https://api.together.xyz/inference"
|
||
)
|
||
|
||
custom_prompt_dict = custom_prompt_dict or litellm.custom_prompt_dict
|
||
|
||
model_response = together_ai.completion(
|
||
model=model,
|
||
messages=messages,
|
||
api_base=api_base,
|
||
model_response=model_response,
|
||
print_verbose=print_verbose,
|
||
optional_params=optional_params,
|
||
litellm_params=litellm_params,
|
||
logger_fn=logger_fn,
|
||
encoding=encoding,
|
||
api_key=together_ai_key,
|
||
logging_obj=logging,
|
||
custom_prompt_dict=custom_prompt_dict,
|
||
)
|
||
if (
|
||
"stream_tokens" in optional_params
|
||
and optional_params["stream_tokens"] == True
|
||
):
|
||
# don't try to access stream object,
|
||
response = CustomStreamWrapper(
|
||
model_response,
|
||
model,
|
||
custom_llm_provider="together_ai",
|
||
logging_obj=logging,
|
||
)
|
||
return response
|
||
response = model_response
|
||
elif custom_llm_provider == "palm":
|
||
palm_api_key = api_key or get_secret("PALM_API_KEY") or litellm.api_key
|
||
|
||
# palm does not support streaming as yet :(
|
||
model_response = palm.completion(
|
||
model=model,
|
||
messages=messages,
|
||
model_response=model_response,
|
||
print_verbose=print_verbose,
|
||
optional_params=optional_params,
|
||
litellm_params=litellm_params,
|
||
logger_fn=logger_fn,
|
||
encoding=encoding,
|
||
api_key=palm_api_key,
|
||
logging_obj=logging,
|
||
)
|
||
# fake palm streaming
|
||
if "stream" in optional_params and optional_params["stream"] == True:
|
||
# fake streaming for palm
|
||
resp_string = model_response["choices"][0]["message"]["content"]
|
||
response = CustomStreamWrapper(
|
||
resp_string, model, custom_llm_provider="palm", logging_obj=logging
|
||
)
|
||
return response
|
||
response = model_response
|
||
elif custom_llm_provider == "gemini":
|
||
gemini_api_key = (
|
||
api_key
|
||
or get_secret("GEMINI_API_KEY")
|
||
or get_secret("PALM_API_KEY") # older palm api key should also work
|
||
or litellm.api_key
|
||
)
|
||
|
||
# palm does not support streaming as yet :(
|
||
model_response = gemini.completion(
|
||
model=model,
|
||
messages=messages,
|
||
model_response=model_response,
|
||
print_verbose=print_verbose,
|
||
optional_params=optional_params,
|
||
litellm_params=litellm_params,
|
||
logger_fn=logger_fn,
|
||
encoding=encoding,
|
||
api_key=gemini_api_key,
|
||
logging_obj=logging,
|
||
acompletion=acompletion,
|
||
custom_prompt_dict=custom_prompt_dict,
|
||
)
|
||
if (
|
||
"stream" in optional_params
|
||
and optional_params["stream"] == True
|
||
and acompletion == False
|
||
):
|
||
response = CustomStreamWrapper(
|
||
iter(model_response),
|
||
model,
|
||
custom_llm_provider="gemini",
|
||
logging_obj=logging,
|
||
)
|
||
return response
|
||
response = model_response
|
||
elif custom_llm_provider == "vertex_ai":
|
||
vertex_ai_project = (
|
||
optional_params.pop("vertex_project", None)
|
||
or optional_params.pop("vertex_ai_project", None)
|
||
or litellm.vertex_project
|
||
or get_secret("VERTEXAI_PROJECT")
|
||
)
|
||
vertex_ai_location = (
|
||
optional_params.pop("vertex_location", None)
|
||
or optional_params.pop("vertex_ai_location", None)
|
||
or litellm.vertex_location
|
||
or get_secret("VERTEXAI_LOCATION")
|
||
)
|
||
vertex_credentials = (
|
||
optional_params.pop("vertex_credentials", None)
|
||
or optional_params.pop("vertex_ai_credentials", None)
|
||
or get_secret("VERTEXAI_CREDENTIALS")
|
||
)
|
||
new_params = deepcopy(optional_params)
|
||
if "claude-3" in model:
|
||
model_response = vertex_ai_anthropic.completion(
|
||
model=model,
|
||
messages=messages,
|
||
model_response=model_response,
|
||
print_verbose=print_verbose,
|
||
optional_params=new_params,
|
||
litellm_params=litellm_params,
|
||
logger_fn=logger_fn,
|
||
encoding=encoding,
|
||
vertex_location=vertex_ai_location,
|
||
vertex_project=vertex_ai_project,
|
||
vertex_credentials=vertex_credentials,
|
||
logging_obj=logging,
|
||
acompletion=acompletion,
|
||
)
|
||
else:
|
||
> model_response = vertex_ai.completion(
|
||
model=model,
|
||
messages=messages,
|
||
model_response=model_response,
|
||
print_verbose=print_verbose,
|
||
optional_params=new_params,
|
||
litellm_params=litellm_params,
|
||
logger_fn=logger_fn,
|
||
encoding=encoding,
|
||
vertex_location=vertex_ai_location,
|
||
vertex_project=vertex_ai_project,
|
||
vertex_credentials=vertex_credentials,
|
||
logging_obj=logging,
|
||
acompletion=acompletion,
|
||
)
|
||
|
||
../main.py:1824:
|
||
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
|
||
|
||
model = 'gemini-1.5-flash-preview-0514'
|
||
messages = [{'content': [{'text': 'Whats in this image?', 'type': 'text'}, {'image_url': {'url': 'gs://cloud-samples-data/generative-ai/image/boats.jpeg'}, 'type': 'image_url'}], 'role': 'user'}]
|
||
model_response = ModelResponse(id='chatcmpl-722df0e7-4e2d-44e6-9e2c-49823faa0189', choices=[Choices(finish_reason='stop', index=0, mess... role='assistant'))], created=1716145725, model=None, object='chat.completion', system_fingerprint=None, usage=Usage())
|
||
print_verbose = <function print_verbose at 0x10c6c20c0>
|
||
encoding = <Encoding 'cl100k_base'>
|
||
logging_obj = <litellm.utils.Logging object at 0x1059c53d0>
|
||
vertex_project = None, vertex_location = None, vertex_credentials = None
|
||
optional_params = {}
|
||
litellm_params = {'acompletion': False, 'api_base': '', 'api_key': None, 'completion_call_id': None, ...}
|
||
logger_fn = None, acompletion = False
|
||
|
||
def completion(
|
||
model: str,
|
||
messages: list,
|
||
model_response: ModelResponse,
|
||
print_verbose: Callable,
|
||
encoding,
|
||
logging_obj,
|
||
vertex_project=None,
|
||
vertex_location=None,
|
||
vertex_credentials=None,
|
||
optional_params=None,
|
||
litellm_params=None,
|
||
logger_fn=None,
|
||
acompletion: bool = False,
|
||
):
|
||
try:
|
||
import vertexai
|
||
except:
|
||
raise VertexAIError(
|
||
status_code=400,
|
||
message="vertexai import failed please run `pip install google-cloud-aiplatform`",
|
||
)
|
||
|
||
if not (
|
||
hasattr(vertexai, "preview") or hasattr(vertexai.preview, "language_models")
|
||
):
|
||
raise VertexAIError(
|
||
status_code=400,
|
||
message="""Upgrade vertex ai. Run `pip install "google-cloud-aiplatform>=1.38"`""",
|
||
)
|
||
try:
|
||
from vertexai.preview.language_models import (
|
||
ChatModel,
|
||
CodeChatModel,
|
||
InputOutputTextPair,
|
||
)
|
||
from vertexai.language_models import TextGenerationModel, CodeGenerationModel
|
||
from vertexai.preview.generative_models import (
|
||
GenerativeModel,
|
||
Part,
|
||
GenerationConfig,
|
||
)
|
||
from google.cloud import aiplatform # type: ignore
|
||
from google.protobuf import json_format # type: ignore
|
||
from google.protobuf.struct_pb2 import Value # type: ignore
|
||
from google.cloud.aiplatform_v1beta1.types import content as gapic_content_types # type: ignore
|
||
import google.auth # type: ignore
|
||
import proto # type: ignore
|
||
|
||
## Load credentials with the correct quota project ref: https://github.com/googleapis/python-aiplatform/issues/2557#issuecomment-1709284744
|
||
print_verbose(
|
||
f"VERTEX AI: vertex_project={vertex_project}; vertex_location={vertex_location}"
|
||
)
|
||
if vertex_credentials is not None and isinstance(vertex_credentials, str):
|
||
import google.oauth2.service_account
|
||
|
||
json_obj = json.loads(vertex_credentials)
|
||
|
||
creds = google.oauth2.service_account.Credentials.from_service_account_info(
|
||
json_obj,
|
||
scopes=["https://www.googleapis.com/auth/cloud-platform"],
|
||
)
|
||
else:
|
||
creds, _ = google.auth.default(quota_project_id=vertex_project)
|
||
print_verbose(
|
||
f"VERTEX AI: creds={creds}; google application credentials: {os.getenv('GOOGLE_APPLICATION_CREDENTIALS')}"
|
||
)
|
||
vertexai.init(
|
||
project=vertex_project, location=vertex_location, credentials=creds
|
||
)
|
||
|
||
## Load Config
|
||
config = litellm.VertexAIConfig.get_config()
|
||
for k, v in config.items():
|
||
if k not in optional_params:
|
||
optional_params[k] = v
|
||
|
||
## Process safety settings into format expected by vertex AI
|
||
safety_settings = None
|
||
if "safety_settings" in optional_params:
|
||
safety_settings = optional_params.pop("safety_settings")
|
||
if not isinstance(safety_settings, list):
|
||
raise ValueError("safety_settings must be a list")
|
||
if len(safety_settings) > 0 and not isinstance(safety_settings[0], dict):
|
||
raise ValueError("safety_settings must be a list of dicts")
|
||
safety_settings = [
|
||
gapic_content_types.SafetySetting(x) for x in safety_settings
|
||
]
|
||
|
||
# vertexai does not use an API key, it looks for credentials.json in the environment
|
||
|
||
prompt = " ".join(
|
||
[
|
||
message["content"]
|
||
for message in messages
|
||
if isinstance(message["content"], str)
|
||
]
|
||
)
|
||
|
||
mode = ""
|
||
|
||
request_str = ""
|
||
response_obj = None
|
||
async_client = None
|
||
instances = None
|
||
client_options = {
|
||
"api_endpoint": f"{vertex_location}-aiplatform.googleapis.com"
|
||
}
|
||
if (
|
||
model in litellm.vertex_language_models
|
||
or model in litellm.vertex_vision_models
|
||
):
|
||
llm_model = GenerativeModel(model)
|
||
mode = "vision"
|
||
request_str += f"llm_model = GenerativeModel({model})\n"
|
||
elif model in litellm.vertex_chat_models:
|
||
llm_model = ChatModel.from_pretrained(model)
|
||
mode = "chat"
|
||
request_str += f"llm_model = ChatModel.from_pretrained({model})\n"
|
||
elif model in litellm.vertex_text_models:
|
||
llm_model = TextGenerationModel.from_pretrained(model)
|
||
mode = "text"
|
||
request_str += f"llm_model = TextGenerationModel.from_pretrained({model})\n"
|
||
elif model in litellm.vertex_code_text_models:
|
||
llm_model = CodeGenerationModel.from_pretrained(model)
|
||
mode = "text"
|
||
request_str += f"llm_model = CodeGenerationModel.from_pretrained({model})\n"
|
||
elif model in litellm.vertex_code_chat_models: # vertex_code_llm_models
|
||
llm_model = CodeChatModel.from_pretrained(model)
|
||
mode = "chat"
|
||
request_str += f"llm_model = CodeChatModel.from_pretrained({model})\n"
|
||
elif model == "private":
|
||
mode = "private"
|
||
model = optional_params.pop("model_id", None)
|
||
# private endpoint requires a dict instead of JSON
|
||
instances = [optional_params.copy()]
|
||
instances[0]["prompt"] = prompt
|
||
llm_model = aiplatform.PrivateEndpoint(
|
||
endpoint_name=model,
|
||
project=vertex_project,
|
||
location=vertex_location,
|
||
)
|
||
request_str += f"llm_model = aiplatform.PrivateEndpoint(endpoint_name={model}, project={vertex_project}, location={vertex_location})\n"
|
||
else: # assume vertex model garden on public endpoint
|
||
mode = "custom"
|
||
|
||
instances = [optional_params.copy()]
|
||
instances[0]["prompt"] = prompt
|
||
instances = [
|
||
json_format.ParseDict(instance_dict, Value())
|
||
for instance_dict in instances
|
||
]
|
||
# Will determine the API used based on async parameter
|
||
llm_model = None
|
||
|
||
# NOTE: async prediction and streaming under "private" mode isn't supported by aiplatform right now
|
||
if acompletion == True:
|
||
data = {
|
||
"llm_model": llm_model,
|
||
"mode": mode,
|
||
"prompt": prompt,
|
||
"logging_obj": logging_obj,
|
||
"request_str": request_str,
|
||
"model": model,
|
||
"model_response": model_response,
|
||
"encoding": encoding,
|
||
"messages": messages,
|
||
"print_verbose": print_verbose,
|
||
"client_options": client_options,
|
||
"instances": instances,
|
||
"vertex_location": vertex_location,
|
||
"vertex_project": vertex_project,
|
||
"safety_settings": safety_settings,
|
||
**optional_params,
|
||
}
|
||
if optional_params.get("stream", False) is True:
|
||
# async streaming
|
||
return async_streaming(**data)
|
||
|
||
return async_completion(**data)
|
||
|
||
if mode == "vision":
|
||
print_verbose("\nMaking VertexAI Gemini Pro / Pro Vision Call")
|
||
print_verbose(f"\nProcessing input messages = {messages}")
|
||
tools = optional_params.pop("tools", None)
|
||
content = _gemini_convert_messages_text(messages=messages)
|
||
stream = optional_params.pop("stream", False)
|
||
if stream == True:
|
||
request_str += f"response = llm_model.generate_content({content}, generation_config=GenerationConfig(**{optional_params}), safety_settings={safety_settings}, stream={stream})\n"
|
||
logging_obj.pre_call(
|
||
input=prompt,
|
||
api_key=None,
|
||
additional_args={
|
||
"complete_input_dict": optional_params,
|
||
"request_str": request_str,
|
||
},
|
||
)
|
||
|
||
model_response = llm_model.generate_content(
|
||
contents={"content": content},
|
||
generation_config=optional_params,
|
||
safety_settings=safety_settings,
|
||
stream=True,
|
||
tools=tools,
|
||
)
|
||
|
||
return model_response
|
||
|
||
request_str += f"response = llm_model.generate_content({content})\n"
|
||
## LOGGING
|
||
logging_obj.pre_call(
|
||
input=prompt,
|
||
api_key=None,
|
||
additional_args={
|
||
"complete_input_dict": optional_params,
|
||
"request_str": request_str,
|
||
},
|
||
)
|
||
|
||
## LLM Call
|
||
response = llm_model.generate_content(
|
||
contents=content,
|
||
generation_config=optional_params,
|
||
safety_settings=safety_settings,
|
||
tools=tools,
|
||
)
|
||
|
||
if tools is not None and bool(
|
||
getattr(response.candidates[0].content.parts[0], "function_call", None)
|
||
):
|
||
function_call = response.candidates[0].content.parts[0].function_call
|
||
args_dict = {}
|
||
|
||
# Check if it's a RepeatedComposite instance
|
||
for key, val in function_call.args.items():
|
||
if isinstance(
|
||
val, proto.marshal.collections.repeated.RepeatedComposite
|
||
):
|
||
# If so, convert to list
|
||
args_dict[key] = [v for v in val]
|
||
else:
|
||
args_dict[key] = val
|
||
|
||
try:
|
||
args_str = json.dumps(args_dict)
|
||
except Exception as e:
|
||
raise VertexAIError(status_code=422, message=str(e))
|
||
message = litellm.Message(
|
||
content=None,
|
||
tool_calls=[
|
||
{
|
||
"id": f"call_{str(uuid.uuid4())}",
|
||
"function": {
|
||
"arguments": args_str,
|
||
"name": function_call.name,
|
||
},
|
||
"type": "function",
|
||
}
|
||
],
|
||
)
|
||
completion_response = message
|
||
else:
|
||
completion_response = response.text
|
||
response_obj = response._raw_response
|
||
optional_params["tools"] = tools
|
||
elif mode == "chat":
|
||
chat = llm_model.start_chat()
|
||
request_str += f"chat = llm_model.start_chat()\n"
|
||
|
||
if "stream" in optional_params and optional_params["stream"] == True:
|
||
# NOTE: VertexAI does not accept stream=True as a param and raises an error,
|
||
# we handle this by removing 'stream' from optional params and sending the request
|
||
# after we get the response we add optional_params["stream"] = True, since main.py needs to know it's a streaming response to then transform it for the OpenAI format
|
||
optional_params.pop(
|
||
"stream", None
|
||
) # vertex ai raises an error when passing stream in optional params
|
||
request_str += (
|
||
f"chat.send_message_streaming({prompt}, **{optional_params})\n"
|
||
)
|
||
## LOGGING
|
||
logging_obj.pre_call(
|
||
input=prompt,
|
||
api_key=None,
|
||
additional_args={
|
||
"complete_input_dict": optional_params,
|
||
"request_str": request_str,
|
||
},
|
||
)
|
||
model_response = chat.send_message_streaming(prompt, **optional_params)
|
||
|
||
return model_response
|
||
|
||
request_str += f"chat.send_message({prompt}, **{optional_params}).text\n"
|
||
## LOGGING
|
||
logging_obj.pre_call(
|
||
input=prompt,
|
||
api_key=None,
|
||
additional_args={
|
||
"complete_input_dict": optional_params,
|
||
"request_str": request_str,
|
||
},
|
||
)
|
||
completion_response = chat.send_message(prompt, **optional_params).text
|
||
elif mode == "text":
|
||
if "stream" in optional_params and optional_params["stream"] == True:
|
||
optional_params.pop(
|
||
"stream", None
|
||
) # See note above on handling streaming for vertex ai
|
||
request_str += (
|
||
f"llm_model.predict_streaming({prompt}, **{optional_params})\n"
|
||
)
|
||
## LOGGING
|
||
logging_obj.pre_call(
|
||
input=prompt,
|
||
api_key=None,
|
||
additional_args={
|
||
"complete_input_dict": optional_params,
|
||
"request_str": request_str,
|
||
},
|
||
)
|
||
model_response = llm_model.predict_streaming(prompt, **optional_params)
|
||
|
||
return model_response
|
||
|
||
request_str += f"llm_model.predict({prompt}, **{optional_params}).text\n"
|
||
## LOGGING
|
||
logging_obj.pre_call(
|
||
input=prompt,
|
||
api_key=None,
|
||
additional_args={
|
||
"complete_input_dict": optional_params,
|
||
"request_str": request_str,
|
||
},
|
||
)
|
||
completion_response = llm_model.predict(prompt, **optional_params).text
|
||
elif mode == "custom":
|
||
"""
|
||
Vertex AI Model Garden
|
||
"""
|
||
## LOGGING
|
||
logging_obj.pre_call(
|
||
input=prompt,
|
||
api_key=None,
|
||
additional_args={
|
||
"complete_input_dict": optional_params,
|
||
"request_str": request_str,
|
||
},
|
||
)
|
||
llm_model = aiplatform.gapic.PredictionServiceClient(
|
||
client_options=client_options
|
||
)
|
||
request_str += f"llm_model = aiplatform.gapic.PredictionServiceClient(client_options={client_options})\n"
|
||
endpoint_path = llm_model.endpoint_path(
|
||
project=vertex_project, location=vertex_location, endpoint=model
|
||
)
|
||
request_str += (
|
||
f"llm_model.predict(endpoint={endpoint_path}, instances={instances})\n"
|
||
)
|
||
response = llm_model.predict(
|
||
endpoint=endpoint_path, instances=instances
|
||
).predictions
|
||
|
||
completion_response = response[0]
|
||
if (
|
||
isinstance(completion_response, str)
|
||
and "\nOutput:\n" in completion_response
|
||
):
|
||
completion_response = completion_response.split("\nOutput:\n", 1)[1]
|
||
if "stream" in optional_params and optional_params["stream"] == True:
|
||
response = TextStreamer(completion_response)
|
||
return response
|
||
elif mode == "private":
|
||
"""
|
||
Vertex AI Model Garden deployed on private endpoint
|
||
"""
|
||
## LOGGING
|
||
logging_obj.pre_call(
|
||
input=prompt,
|
||
api_key=None,
|
||
additional_args={
|
||
"complete_input_dict": optional_params,
|
||
"request_str": request_str,
|
||
},
|
||
)
|
||
request_str += f"llm_model.predict(instances={instances})\n"
|
||
response = llm_model.predict(instances=instances).predictions
|
||
|
||
completion_response = response[0]
|
||
if (
|
||
isinstance(completion_response, str)
|
||
and "\nOutput:\n" in completion_response
|
||
):
|
||
completion_response = completion_response.split("\nOutput:\n", 1)[1]
|
||
if "stream" in optional_params and optional_params["stream"] == True:
|
||
response = TextStreamer(completion_response)
|
||
return response
|
||
|
||
## LOGGING
|
||
logging_obj.post_call(
|
||
input=prompt, api_key=None, original_response=completion_response
|
||
)
|
||
|
||
## RESPONSE OBJECT
|
||
if isinstance(completion_response, litellm.Message):
|
||
model_response["choices"][0]["message"] = completion_response
|
||
elif len(str(completion_response)) > 0:
|
||
model_response["choices"][0]["message"]["content"] = str(
|
||
completion_response
|
||
)
|
||
model_response["created"] = int(time.time())
|
||
model_response["model"] = model
|
||
## CALCULATING USAGE
|
||
if model in litellm.vertex_language_models and response_obj is not None:
|
||
model_response["choices"][0].finish_reason = map_finish_reason(
|
||
response_obj.candidates[0].finish_reason.name
|
||
)
|
||
usage = Usage(
|
||
prompt_tokens=response_obj.usage_metadata.prompt_token_count,
|
||
completion_tokens=response_obj.usage_metadata.candidates_token_count,
|
||
total_tokens=response_obj.usage_metadata.total_token_count,
|
||
)
|
||
else:
|
||
# init prompt tokens
|
||
# this block attempts to get usage from response_obj if it exists, if not it uses the litellm token counter
|
||
prompt_tokens, completion_tokens, total_tokens = 0, 0, 0
|
||
if response_obj is not None:
|
||
if hasattr(response_obj, "usage_metadata") and hasattr(
|
||
response_obj.usage_metadata, "prompt_token_count"
|
||
):
|
||
prompt_tokens = response_obj.usage_metadata.prompt_token_count
|
||
completion_tokens = (
|
||
response_obj.usage_metadata.candidates_token_count
|
||
)
|
||
else:
|
||
prompt_tokens = len(encoding.encode(prompt))
|
||
completion_tokens = len(
|
||
encoding.encode(
|
||
model_response["choices"][0]["message"].get("content", "")
|
||
)
|
||
)
|
||
|
||
usage = Usage(
|
||
prompt_tokens=prompt_tokens,
|
||
completion_tokens=completion_tokens,
|
||
total_tokens=prompt_tokens + completion_tokens,
|
||
)
|
||
setattr(model_response, "usage", usage)
|
||
return model_response
|
||
except Exception as e:
|
||
if isinstance(e, VertexAIError):
|
||
raise e
|
||
> raise VertexAIError(status_code=500, message=str(e))
|
||
E litellm.llms.vertex_ai.VertexAIError: Parameter to MergeFrom() must be instance of same class: expected <class 'Part'> got <class 'vertexai.generative_models._generative_models.Part'>.
|
||
|
||
../llms/vertex_ai.py:971: VertexAIError
|
||
|
||
During handling of the above exception, another exception occurred:
|
||
|
||
args = ()
|
||
kwargs = {'litellm_call_id': '7f48b7ab-47b3-4beb-b2b5-fa298be49d3f', 'litellm_logging_obj': <litellm.utils.Logging object at 0x...i/image/boats.jpeg'}, 'type': 'image_url'}], 'role': 'user'}], 'model': 'vertex_ai/gemini-1.5-flash-preview-0514', ...}
|
||
result = None, start_time = datetime.datetime(2024, 5, 19, 12, 8, 45, 542377)
|
||
logging_obj = <litellm.utils.Logging object at 0x1059c53d0>
|
||
call_type = 'completion', model = 'vertex_ai/gemini-1.5-flash-preview-0514'
|
||
k = 'litellm_logging_obj'
|
||
|
||
@wraps(original_function)
|
||
def wrapper(*args, **kwargs):
|
||
# DO NOT MOVE THIS. It always needs to run first
|
||
# Check if this is an async function. If so only execute the async function
|
||
if (
|
||
kwargs.get("acompletion", False) == True
|
||
or kwargs.get("aembedding", False) == True
|
||
or kwargs.get("aimg_generation", False) == True
|
||
or kwargs.get("amoderation", False) == True
|
||
or kwargs.get("atext_completion", False) == True
|
||
or kwargs.get("atranscription", False) == True
|
||
):
|
||
# [OPTIONAL] CHECK MAX RETRIES / REQUEST
|
||
if litellm.num_retries_per_request is not None:
|
||
# check if previous_models passed in as ['litellm_params']['metadata]['previous_models']
|
||
previous_models = kwargs.get("metadata", {}).get(
|
||
"previous_models", None
|
||
)
|
||
if previous_models is not None:
|
||
if litellm.num_retries_per_request <= len(previous_models):
|
||
raise Exception(f"Max retries per request hit!")
|
||
|
||
# MODEL CALL
|
||
result = original_function(*args, **kwargs)
|
||
if "stream" in kwargs and kwargs["stream"] == True:
|
||
if (
|
||
"complete_response" in kwargs
|
||
and kwargs["complete_response"] == True
|
||
):
|
||
chunks = []
|
||
for idx, chunk in enumerate(result):
|
||
chunks.append(chunk)
|
||
return litellm.stream_chunk_builder(
|
||
chunks, messages=kwargs.get("messages", None)
|
||
)
|
||
else:
|
||
return result
|
||
|
||
return result
|
||
|
||
# Prints Exactly what was passed to litellm function - don't execute any logic here - it should just print
|
||
print_args_passed_to_litellm(original_function, args, kwargs)
|
||
start_time = datetime.datetime.now()
|
||
result = None
|
||
logging_obj = kwargs.get("litellm_logging_obj", None)
|
||
|
||
# only set litellm_call_id if its not in kwargs
|
||
call_type = original_function.__name__
|
||
if "litellm_call_id" not in kwargs:
|
||
kwargs["litellm_call_id"] = str(uuid.uuid4())
|
||
try:
|
||
model = args[0] if len(args) > 0 else kwargs["model"]
|
||
except:
|
||
model = None
|
||
if (
|
||
call_type != CallTypes.image_generation.value
|
||
and call_type != CallTypes.text_completion.value
|
||
):
|
||
raise ValueError("model param not passed in.")
|
||
|
||
try:
|
||
if logging_obj is None:
|
||
logging_obj, kwargs = function_setup(
|
||
original_function.__name__, rules_obj, start_time, *args, **kwargs
|
||
)
|
||
kwargs["litellm_logging_obj"] = logging_obj
|
||
|
||
# CHECK FOR 'os.environ/' in kwargs
|
||
for k, v in kwargs.items():
|
||
if v is not None and isinstance(v, str) and v.startswith("os.environ/"):
|
||
kwargs[k] = litellm.get_secret(v)
|
||
# [OPTIONAL] CHECK BUDGET
|
||
if litellm.max_budget:
|
||
if litellm._current_cost > litellm.max_budget:
|
||
raise BudgetExceededError(
|
||
current_cost=litellm._current_cost,
|
||
max_budget=litellm.max_budget,
|
||
)
|
||
|
||
# [OPTIONAL] CHECK MAX RETRIES / REQUEST
|
||
if litellm.num_retries_per_request is not None:
|
||
# check if previous_models passed in as ['litellm_params']['metadata]['previous_models']
|
||
previous_models = kwargs.get("metadata", {}).get(
|
||
"previous_models", None
|
||
)
|
||
if previous_models is not None:
|
||
if litellm.num_retries_per_request <= len(previous_models):
|
||
raise Exception(f"Max retries per request hit!")
|
||
|
||
# [OPTIONAL] CHECK CACHE
|
||
print_verbose(
|
||
f"SYNC kwargs[caching]: {kwargs.get('caching', False)}; litellm.cache: {litellm.cache}; kwargs.get('cache')['no-cache']: {kwargs.get('cache', {}).get('no-cache', False)}"
|
||
)
|
||
# if caching is false or cache["no-cache"]==True, don't run this
|
||
if (
|
||
(
|
||
(
|
||
(
|
||
kwargs.get("caching", None) is None
|
||
and litellm.cache is not None
|
||
)
|
||
or kwargs.get("caching", False) == True
|
||
)
|
||
and kwargs.get("cache", {}).get("no-cache", False) != True
|
||
)
|
||
and kwargs.get("aembedding", False) != True
|
||
and kwargs.get("atext_completion", False) != True
|
||
and kwargs.get("acompletion", False) != True
|
||
and kwargs.get("aimg_generation", False) != True
|
||
and kwargs.get("atranscription", False) != True
|
||
): # allow users to control returning cached responses from the completion function
|
||
# checking cache
|
||
print_verbose(f"INSIDE CHECKING CACHE")
|
||
if (
|
||
litellm.cache is not None
|
||
and str(original_function.__name__)
|
||
in litellm.cache.supported_call_types
|
||
):
|
||
print_verbose(f"Checking Cache")
|
||
preset_cache_key = litellm.cache.get_cache_key(*args, **kwargs)
|
||
kwargs["preset_cache_key"] = (
|
||
preset_cache_key # for streaming calls, we need to pass the preset_cache_key
|
||
)
|
||
cached_result = litellm.cache.get_cache(*args, **kwargs)
|
||
if cached_result != None:
|
||
if "detail" in cached_result:
|
||
# implies an error occurred
|
||
pass
|
||
else:
|
||
call_type = original_function.__name__
|
||
print_verbose(
|
||
f"Cache Response Object routing: call_type - {call_type}; cached_result instace: {type(cached_result)}"
|
||
)
|
||
if call_type == CallTypes.completion.value and isinstance(
|
||
cached_result, dict
|
||
):
|
||
cached_result = convert_to_model_response_object(
|
||
response_object=cached_result,
|
||
model_response_object=ModelResponse(),
|
||
stream=kwargs.get("stream", False),
|
||
)
|
||
|
||
if kwargs.get("stream", False) == True:
|
||
cached_result = CustomStreamWrapper(
|
||
completion_stream=cached_result,
|
||
model=model,
|
||
custom_llm_provider="cached_response",
|
||
logging_obj=logging_obj,
|
||
)
|
||
elif call_type == CallTypes.embedding.value and isinstance(
|
||
cached_result, dict
|
||
):
|
||
cached_result = convert_to_model_response_object(
|
||
response_object=cached_result,
|
||
response_type="embedding",
|
||
)
|
||
|
||
# LOG SUCCESS
|
||
cache_hit = True
|
||
end_time = datetime.datetime.now()
|
||
(
|
||
model,
|
||
custom_llm_provider,
|
||
dynamic_api_key,
|
||
api_base,
|
||
) = litellm.get_llm_provider(
|
||
model=model,
|
||
custom_llm_provider=kwargs.get(
|
||
"custom_llm_provider", None
|
||
),
|
||
api_base=kwargs.get("api_base", None),
|
||
api_key=kwargs.get("api_key", None),
|
||
)
|
||
print_verbose(
|
||
f"Async Wrapper: Completed Call, calling async_success_handler: {logging_obj.async_success_handler}"
|
||
)
|
||
logging_obj.update_environment_variables(
|
||
model=model,
|
||
user=kwargs.get("user", None),
|
||
optional_params={},
|
||
litellm_params={
|
||
"logger_fn": kwargs.get("logger_fn", None),
|
||
"acompletion": False,
|
||
"metadata": kwargs.get("metadata", {}),
|
||
"model_info": kwargs.get("model_info", {}),
|
||
"proxy_server_request": kwargs.get(
|
||
"proxy_server_request", None
|
||
),
|
||
"preset_cache_key": kwargs.get(
|
||
"preset_cache_key", None
|
||
),
|
||
"stream_response": kwargs.get(
|
||
"stream_response", {}
|
||
),
|
||
},
|
||
input=kwargs.get("messages", ""),
|
||
api_key=kwargs.get("api_key", None),
|
||
original_response=str(cached_result),
|
||
additional_args=None,
|
||
stream=kwargs.get("stream", False),
|
||
)
|
||
threading.Thread(
|
||
target=logging_obj.success_handler,
|
||
args=(cached_result, start_time, end_time, cache_hit),
|
||
).start()
|
||
return cached_result
|
||
|
||
# CHECK MAX TOKENS
|
||
if (
|
||
kwargs.get("max_tokens", None) is not None
|
||
and model is not None
|
||
and litellm.modify_params
|
||
== True # user is okay with params being modified
|
||
and (
|
||
call_type == CallTypes.acompletion.value
|
||
or call_type == CallTypes.completion.value
|
||
)
|
||
):
|
||
try:
|
||
base_model = model
|
||
if kwargs.get("hf_model_name", None) is not None:
|
||
base_model = f"huggingface/{kwargs.get('hf_model_name')}"
|
||
max_output_tokens = (
|
||
get_max_tokens(model=base_model) or 4096
|
||
) # assume min context window is 4k tokens
|
||
user_max_tokens = kwargs.get("max_tokens")
|
||
## Scenario 1: User limit + prompt > model limit
|
||
messages = None
|
||
if len(args) > 1:
|
||
messages = args[1]
|
||
elif kwargs.get("messages", None):
|
||
messages = kwargs["messages"]
|
||
input_tokens = token_counter(model=base_model, messages=messages)
|
||
input_tokens += max(
|
||
0.1 * input_tokens, 10
|
||
) # give at least a 10 token buffer. token counting can be imprecise.
|
||
if input_tokens > max_output_tokens:
|
||
pass # allow call to fail normally
|
||
elif user_max_tokens + input_tokens > max_output_tokens:
|
||
user_max_tokens = max_output_tokens - input_tokens
|
||
print_verbose(f"user_max_tokens: {user_max_tokens}")
|
||
kwargs["max_tokens"] = int(
|
||
round(user_max_tokens)
|
||
) # make sure max tokens is always an int
|
||
except Exception as e:
|
||
print_verbose(f"Error while checking max token limit: {str(e)}")
|
||
# MODEL CALL
|
||
> result = original_function(*args, **kwargs)
|
||
|
||
../utils.py:3211:
|
||
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
|
||
../main.py:2368: in completion
|
||
raise exception_type(
|
||
../utils.py:9709: in exception_type
|
||
raise e
|
||
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
|
||
|
||
model = 'gemini-1.5-flash-preview-0514'
|
||
original_exception = VertexAIError("Parameter to MergeFrom() must be instance of same class: expected <class 'Part'> got <class 'vertexai.generative_models._generative_models.Part'>.")
|
||
custom_llm_provider = 'vertex_ai'
|
||
completion_kwargs = {'acompletion': False, 'api_base': None, 'api_key': None, 'api_version': None, ...}
|
||
extra_kwargs = {'litellm_call_id': '7f48b7ab-47b3-4beb-b2b5-fa298be49d3f', 'litellm_logging_obj': <litellm.utils.Logging object at 0x1059c53d0>}
|
||
|
||
def exception_type(
|
||
model,
|
||
original_exception,
|
||
custom_llm_provider,
|
||
completion_kwargs={},
|
||
extra_kwargs={},
|
||
):
|
||
global user_logger_fn, liteDebuggerClient
|
||
exception_mapping_worked = False
|
||
if litellm.suppress_debug_info is False:
|
||
print() # noqa
|
||
print( # noqa
|
||
"\033[1;31mGive Feedback / Get Help: https://github.com/BerriAI/litellm/issues/new\033[0m" # noqa
|
||
) # noqa
|
||
print( # noqa
|
||
"LiteLLM.Info: If you need to debug this error, use `litellm.set_verbose=True'." # noqa
|
||
) # noqa
|
||
print() # noqa
|
||
try:
|
||
if model:
|
||
error_str = str(original_exception)
|
||
if isinstance(original_exception, BaseException):
|
||
exception_type = type(original_exception).__name__
|
||
else:
|
||
exception_type = ""
|
||
|
||
################################################################################
|
||
# Common Extra information needed for all providers
|
||
# We pass num retries, api_base, vertex_deployment etc to the exception here
|
||
################################################################################
|
||
extra_information = ""
|
||
try:
|
||
_api_base = litellm.get_api_base(
|
||
model=model, optional_params=extra_kwargs
|
||
)
|
||
messages = litellm.get_first_chars_messages(kwargs=completion_kwargs)
|
||
_vertex_project = extra_kwargs.get("vertex_project")
|
||
_vertex_location = extra_kwargs.get("vertex_location")
|
||
_metadata = extra_kwargs.get("metadata", {}) or {}
|
||
_model_group = _metadata.get("model_group")
|
||
_deployment = _metadata.get("deployment")
|
||
extra_information = f"\nModel: {model}"
|
||
if _api_base:
|
||
extra_information += f"\nAPI Base: {_api_base}"
|
||
if messages and len(messages) > 0:
|
||
extra_information += f"\nMessages: {messages}"
|
||
|
||
if _model_group is not None:
|
||
extra_information += f"\nmodel_group: {_model_group}\n"
|
||
if _deployment is not None:
|
||
extra_information += f"\ndeployment: {_deployment}\n"
|
||
if _vertex_project is not None:
|
||
extra_information += f"\nvertex_project: {_vertex_project}\n"
|
||
if _vertex_location is not None:
|
||
extra_information += f"\nvertex_location: {_vertex_location}\n"
|
||
|
||
# on litellm proxy add key name + team to exceptions
|
||
extra_information = _add_key_name_and_team_to_alert(
|
||
request_info=extra_information, metadata=_metadata
|
||
)
|
||
except:
|
||
# DO NOT LET this Block raising the original exception
|
||
pass
|
||
|
||
################################################################################
|
||
# End of Common Extra information Needed for all providers
|
||
################################################################################
|
||
|
||
################################################################################
|
||
#################### Start of Provider Exception mapping ####################
|
||
################################################################################
|
||
|
||
if "Request Timeout Error" in error_str or "Request timed out" in error_str:
|
||
exception_mapping_worked = True
|
||
raise Timeout(
|
||
message=f"APITimeoutError - Request timed out. \nerror_str: {error_str}",
|
||
model=model,
|
||
llm_provider=custom_llm_provider,
|
||
litellm_debug_info=extra_information,
|
||
)
|
||
|
||
if (
|
||
custom_llm_provider == "openai"
|
||
or custom_llm_provider == "text-completion-openai"
|
||
or custom_llm_provider == "custom_openai"
|
||
or custom_llm_provider in litellm.openai_compatible_providers
|
||
):
|
||
# custom_llm_provider is openai, make it OpenAI
|
||
if hasattr(original_exception, "message"):
|
||
message = original_exception.message
|
||
else:
|
||
message = str(original_exception)
|
||
if message is not None and isinstance(message, str):
|
||
message = message.replace("OPENAI", custom_llm_provider.upper())
|
||
message = message.replace("openai", custom_llm_provider)
|
||
message = message.replace("OpenAI", custom_llm_provider)
|
||
if custom_llm_provider == "openai":
|
||
exception_provider = "OpenAI" + "Exception"
|
||
else:
|
||
exception_provider = (
|
||
custom_llm_provider[0].upper()
|
||
+ custom_llm_provider[1:]
|
||
+ "Exception"
|
||
)
|
||
|
||
if "This model's maximum context length is" in error_str:
|
||
exception_mapping_worked = True
|
||
raise ContextWindowExceededError(
|
||
message=f"{exception_provider} - {message}",
|
||
llm_provider=custom_llm_provider,
|
||
model=model,
|
||
response=original_exception.response,
|
||
litellm_debug_info=extra_information,
|
||
)
|
||
elif (
|
||
"invalid_request_error" in error_str
|
||
and "model_not_found" in error_str
|
||
):
|
||
exception_mapping_worked = True
|
||
raise NotFoundError(
|
||
message=f"{exception_provider} - {message}",
|
||
llm_provider=custom_llm_provider,
|
||
model=model,
|
||
response=original_exception.response,
|
||
litellm_debug_info=extra_information,
|
||
)
|
||
elif (
|
||
"invalid_request_error" in error_str
|
||
and "content_policy_violation" in error_str
|
||
):
|
||
exception_mapping_worked = True
|
||
raise ContentPolicyViolationError(
|
||
message=f"{exception_provider} - {message}",
|
||
llm_provider=custom_llm_provider,
|
||
model=model,
|
||
response=original_exception.response,
|
||
litellm_debug_info=extra_information,
|
||
)
|
||
elif (
|
||
"invalid_request_error" in error_str
|
||
and "Incorrect API key provided" not in error_str
|
||
):
|
||
exception_mapping_worked = True
|
||
raise BadRequestError(
|
||
message=f"{exception_provider} - {message}",
|
||
llm_provider=custom_llm_provider,
|
||
model=model,
|
||
response=original_exception.response,
|
||
litellm_debug_info=extra_information,
|
||
)
|
||
elif "Request too large" in error_str:
|
||
raise RateLimitError(
|
||
message=f"{exception_provider} - {message}",
|
||
model=model,
|
||
llm_provider=custom_llm_provider,
|
||
response=original_exception.response,
|
||
litellm_debug_info=extra_information,
|
||
)
|
||
elif (
|
||
"The api_key client option must be set either by passing api_key to the client or by setting the OPENAI_API_KEY environment variable"
|
||
in error_str
|
||
):
|
||
exception_mapping_worked = True
|
||
raise AuthenticationError(
|
||
message=f"{exception_provider} - {message}",
|
||
llm_provider=custom_llm_provider,
|
||
model=model,
|
||
response=original_exception.response,
|
||
litellm_debug_info=extra_information,
|
||
)
|
||
elif "Mistral API raised a streaming error" in error_str:
|
||
exception_mapping_worked = True
|
||
_request = httpx.Request(
|
||
method="POST", url="https://api.openai.com/v1"
|
||
)
|
||
raise APIError(
|
||
status_code=500,
|
||
message=f"{exception_provider} - {message}",
|
||
llm_provider=custom_llm_provider,
|
||
model=model,
|
||
request=_request,
|
||
litellm_debug_info=extra_information,
|
||
)
|
||
elif hasattr(original_exception, "status_code"):
|
||
exception_mapping_worked = True
|
||
if original_exception.status_code == 401:
|
||
exception_mapping_worked = True
|
||
raise AuthenticationError(
|
||
message=f"{exception_provider} - {message}",
|
||
llm_provider=custom_llm_provider,
|
||
model=model,
|
||
response=original_exception.response,
|
||
litellm_debug_info=extra_information,
|
||
)
|
||
elif original_exception.status_code == 404:
|
||
exception_mapping_worked = True
|
||
raise NotFoundError(
|
||
message=f"{exception_provider} - {message}",
|
||
model=model,
|
||
llm_provider=custom_llm_provider,
|
||
response=original_exception.response,
|
||
litellm_debug_info=extra_information,
|
||
)
|
||
elif original_exception.status_code == 408:
|
||
exception_mapping_worked = True
|
||
raise Timeout(
|
||
message=f"{exception_provider} - {message}",
|
||
model=model,
|
||
llm_provider=custom_llm_provider,
|
||
litellm_debug_info=extra_information,
|
||
)
|
||
elif original_exception.status_code == 422:
|
||
exception_mapping_worked = True
|
||
raise BadRequestError(
|
||
message=f"{exception_provider} - {message}",
|
||
model=model,
|
||
llm_provider=custom_llm_provider,
|
||
response=original_exception.response,
|
||
litellm_debug_info=extra_information,
|
||
)
|
||
elif original_exception.status_code == 429:
|
||
exception_mapping_worked = True
|
||
raise RateLimitError(
|
||
message=f"{exception_provider} - {message}",
|
||
model=model,
|
||
llm_provider=custom_llm_provider,
|
||
response=original_exception.response,
|
||
litellm_debug_info=extra_information,
|
||
)
|
||
elif original_exception.status_code == 503:
|
||
exception_mapping_worked = True
|
||
raise ServiceUnavailableError(
|
||
message=f"{exception_provider} - {message}",
|
||
model=model,
|
||
llm_provider=custom_llm_provider,
|
||
response=original_exception.response,
|
||
litellm_debug_info=extra_information,
|
||
)
|
||
elif original_exception.status_code == 504: # gateway timeout error
|
||
exception_mapping_worked = True
|
||
raise Timeout(
|
||
message=f"{exception_provider} - {message}",
|
||
model=model,
|
||
llm_provider=custom_llm_provider,
|
||
litellm_debug_info=extra_information,
|
||
)
|
||
else:
|
||
exception_mapping_worked = True
|
||
raise APIError(
|
||
status_code=original_exception.status_code,
|
||
message=f"{exception_provider} - {message}",
|
||
llm_provider=custom_llm_provider,
|
||
model=model,
|
||
request=original_exception.request,
|
||
litellm_debug_info=extra_information,
|
||
)
|
||
else:
|
||
# if no status code then it is an APIConnectionError: https://github.com/openai/openai-python#handling-errors
|
||
raise APIConnectionError(
|
||
message=f"{exception_provider} - {message}",
|
||
llm_provider=custom_llm_provider,
|
||
model=model,
|
||
litellm_debug_info=extra_information,
|
||
request=httpx.Request(
|
||
method="POST", url="https://api.openai.com/v1/"
|
||
),
|
||
)
|
||
elif custom_llm_provider == "anthropic": # one of the anthropics
|
||
if hasattr(original_exception, "message"):
|
||
if (
|
||
"prompt is too long" in original_exception.message
|
||
or "prompt: length" in original_exception.message
|
||
):
|
||
exception_mapping_worked = True
|
||
raise ContextWindowExceededError(
|
||
message=original_exception.message,
|
||
model=model,
|
||
llm_provider="anthropic",
|
||
response=original_exception.response,
|
||
)
|
||
if "Invalid API Key" in original_exception.message:
|
||
exception_mapping_worked = True
|
||
raise AuthenticationError(
|
||
message=original_exception.message,
|
||
model=model,
|
||
llm_provider="anthropic",
|
||
response=original_exception.response,
|
||
)
|
||
if hasattr(original_exception, "status_code"):
|
||
print_verbose(f"status_code: {original_exception.status_code}")
|
||
if original_exception.status_code == 401:
|
||
exception_mapping_worked = True
|
||
raise AuthenticationError(
|
||
message=f"AnthropicException - {original_exception.message}",
|
||
llm_provider="anthropic",
|
||
model=model,
|
||
response=original_exception.response,
|
||
)
|
||
elif (
|
||
original_exception.status_code == 400
|
||
or original_exception.status_code == 413
|
||
):
|
||
exception_mapping_worked = True
|
||
raise BadRequestError(
|
||
message=f"AnthropicException - {original_exception.message}",
|
||
model=model,
|
||
llm_provider="anthropic",
|
||
response=original_exception.response,
|
||
)
|
||
elif original_exception.status_code == 408:
|
||
exception_mapping_worked = True
|
||
raise Timeout(
|
||
message=f"AnthropicException - {original_exception.message}",
|
||
model=model,
|
||
llm_provider="anthropic",
|
||
)
|
||
elif original_exception.status_code == 429:
|
||
exception_mapping_worked = True
|
||
raise RateLimitError(
|
||
message=f"AnthropicException - {original_exception.message}",
|
||
llm_provider="anthropic",
|
||
model=model,
|
||
response=original_exception.response,
|
||
)
|
||
elif original_exception.status_code == 500:
|
||
exception_mapping_worked = True
|
||
raise APIError(
|
||
status_code=500,
|
||
message=f"AnthropicException - {original_exception.message}. Handle with `litellm.APIError`.",
|
||
llm_provider="anthropic",
|
||
model=model,
|
||
request=original_exception.request,
|
||
)
|
||
elif custom_llm_provider == "replicate":
|
||
if "Incorrect authentication token" in error_str:
|
||
exception_mapping_worked = True
|
||
raise AuthenticationError(
|
||
message=f"ReplicateException - {error_str}",
|
||
llm_provider="replicate",
|
||
model=model,
|
||
response=original_exception.response,
|
||
)
|
||
elif "input is too long" in error_str:
|
||
exception_mapping_worked = True
|
||
raise ContextWindowExceededError(
|
||
message=f"ReplicateException - {error_str}",
|
||
model=model,
|
||
llm_provider="replicate",
|
||
response=original_exception.response,
|
||
)
|
||
elif exception_type == "ModelError":
|
||
exception_mapping_worked = True
|
||
raise BadRequestError(
|
||
message=f"ReplicateException - {error_str}",
|
||
model=model,
|
||
llm_provider="replicate",
|
||
response=original_exception.response,
|
||
)
|
||
elif "Request was throttled" in error_str:
|
||
exception_mapping_worked = True
|
||
raise RateLimitError(
|
||
message=f"ReplicateException - {error_str}",
|
||
llm_provider="replicate",
|
||
model=model,
|
||
response=original_exception.response,
|
||
)
|
||
elif hasattr(original_exception, "status_code"):
|
||
if original_exception.status_code == 401:
|
||
exception_mapping_worked = True
|
||
raise AuthenticationError(
|
||
message=f"ReplicateException - {original_exception.message}",
|
||
llm_provider="replicate",
|
||
model=model,
|
||
response=original_exception.response,
|
||
)
|
||
elif (
|
||
original_exception.status_code == 400
|
||
or original_exception.status_code == 422
|
||
or original_exception.status_code == 413
|
||
):
|
||
exception_mapping_worked = True
|
||
raise BadRequestError(
|
||
message=f"ReplicateException - {original_exception.message}",
|
||
model=model,
|
||
llm_provider="replicate",
|
||
response=original_exception.response,
|
||
)
|
||
elif original_exception.status_code == 408:
|
||
exception_mapping_worked = True
|
||
raise Timeout(
|
||
message=f"ReplicateException - {original_exception.message}",
|
||
model=model,
|
||
llm_provider="replicate",
|
||
)
|
||
elif original_exception.status_code == 429:
|
||
exception_mapping_worked = True
|
||
raise RateLimitError(
|
||
message=f"ReplicateException - {original_exception.message}",
|
||
llm_provider="replicate",
|
||
model=model,
|
||
response=original_exception.response,
|
||
)
|
||
elif original_exception.status_code == 500:
|
||
exception_mapping_worked = True
|
||
raise ServiceUnavailableError(
|
||
message=f"ReplicateException - {original_exception.message}",
|
||
llm_provider="replicate",
|
||
model=model,
|
||
response=original_exception.response,
|
||
)
|
||
exception_mapping_worked = True
|
||
raise APIError(
|
||
status_code=500,
|
||
message=f"ReplicateException - {str(original_exception)}",
|
||
llm_provider="replicate",
|
||
model=model,
|
||
request=httpx.Request(
|
||
method="POST",
|
||
url="https://api.replicate.com/v1/deployments",
|
||
),
|
||
)
|
||
elif custom_llm_provider == "watsonx":
|
||
if "token_quota_reached" in error_str:
|
||
exception_mapping_worked = True
|
||
raise RateLimitError(
|
||
message=f"WatsonxException: Rate Limit Errror - {error_str}",
|
||
llm_provider="watsonx",
|
||
model=model,
|
||
response=original_exception.response,
|
||
)
|
||
elif custom_llm_provider == "predibase":
|
||
if "authorization denied for" in error_str:
|
||
exception_mapping_worked = True
|
||
|
||
# Predibase returns the raw API Key in the response - this block ensures it's not returned in the exception
|
||
if (
|
||
error_str is not None
|
||
and isinstance(error_str, str)
|
||
and "bearer" in error_str.lower()
|
||
):
|
||
# only keep the first 10 chars after the occurnence of "bearer"
|
||
_bearer_token_start_index = error_str.lower().find("bearer")
|
||
error_str = error_str[: _bearer_token_start_index + 14]
|
||
error_str += "XXXXXXX" + '"'
|
||
|
||
raise AuthenticationError(
|
||
message=f"PredibaseException: Authentication Error - {error_str}",
|
||
llm_provider="predibase",
|
||
model=model,
|
||
response=original_exception.response,
|
||
litellm_debug_info=extra_information,
|
||
)
|
||
elif custom_llm_provider == "bedrock":
|
||
if (
|
||
"too many tokens" in error_str
|
||
or "expected maxLength:" in error_str
|
||
or "Input is too long" in error_str
|
||
or "prompt: length: 1.." in error_str
|
||
or "Too many input tokens" in error_str
|
||
):
|
||
exception_mapping_worked = True
|
||
raise ContextWindowExceededError(
|
||
message=f"BedrockException: Context Window Error - {error_str}",
|
||
model=model,
|
||
llm_provider="bedrock",
|
||
response=original_exception.response,
|
||
)
|
||
elif "Malformed input request" in error_str:
|
||
exception_mapping_worked = True
|
||
raise BadRequestError(
|
||
message=f"BedrockException - {error_str}",
|
||
model=model,
|
||
llm_provider="bedrock",
|
||
response=original_exception.response,
|
||
)
|
||
elif (
|
||
"Unable to locate credentials" in error_str
|
||
or "The security token included in the request is invalid"
|
||
in error_str
|
||
):
|
||
exception_mapping_worked = True
|
||
raise AuthenticationError(
|
||
message=f"BedrockException Invalid Authentication - {error_str}",
|
||
model=model,
|
||
llm_provider="bedrock",
|
||
response=original_exception.response,
|
||
)
|
||
elif "AccessDeniedException" in error_str:
|
||
exception_mapping_worked = True
|
||
raise PermissionDeniedError(
|
||
message=f"BedrockException PermissionDeniedError - {error_str}",
|
||
model=model,
|
||
llm_provider="bedrock",
|
||
response=original_exception.response,
|
||
)
|
||
elif (
|
||
"throttlingException" in error_str
|
||
or "ThrottlingException" in error_str
|
||
):
|
||
exception_mapping_worked = True
|
||
raise RateLimitError(
|
||
message=f"BedrockException: Rate Limit Error - {error_str}",
|
||
model=model,
|
||
llm_provider="bedrock",
|
||
response=original_exception.response,
|
||
)
|
||
elif (
|
||
"Connect timeout on endpoint URL" in error_str
|
||
or "timed out" in error_str
|
||
):
|
||
exception_mapping_worked = True
|
||
raise Timeout(
|
||
message=f"BedrockException: Timeout Error - {error_str}",
|
||
model=model,
|
||
llm_provider="bedrock",
|
||
)
|
||
elif hasattr(original_exception, "status_code"):
|
||
if original_exception.status_code == 500:
|
||
exception_mapping_worked = True
|
||
raise ServiceUnavailableError(
|
||
message=f"BedrockException - {original_exception.message}",
|
||
llm_provider="bedrock",
|
||
model=model,
|
||
response=httpx.Response(
|
||
status_code=500,
|
||
request=httpx.Request(
|
||
method="POST", url="https://api.openai.com/v1/"
|
||
),
|
||
),
|
||
)
|
||
elif original_exception.status_code == 401:
|
||
exception_mapping_worked = True
|
||
raise AuthenticationError(
|
||
message=f"BedrockException - {original_exception.message}",
|
||
llm_provider="bedrock",
|
||
model=model,
|
||
response=original_exception.response,
|
||
)
|
||
elif original_exception.status_code == 400:
|
||
exception_mapping_worked = True
|
||
raise BadRequestError(
|
||
message=f"BedrockException - {original_exception.message}",
|
||
llm_provider="bedrock",
|
||
model=model,
|
||
response=original_exception.response,
|
||
)
|
||
elif original_exception.status_code == 404:
|
||
exception_mapping_worked = True
|
||
raise NotFoundError(
|
||
message=f"BedrockException - {original_exception.message}",
|
||
llm_provider="bedrock",
|
||
model=model,
|
||
response=original_exception.response,
|
||
)
|
||
elif original_exception.status_code == 408:
|
||
exception_mapping_worked = True
|
||
raise Timeout(
|
||
message=f"BedrockException - {original_exception.message}",
|
||
model=model,
|
||
llm_provider=custom_llm_provider,
|
||
litellm_debug_info=extra_information,
|
||
)
|
||
elif original_exception.status_code == 422:
|
||
exception_mapping_worked = True
|
||
raise BadRequestError(
|
||
message=f"BedrockException - {original_exception.message}",
|
||
model=model,
|
||
llm_provider=custom_llm_provider,
|
||
response=original_exception.response,
|
||
litellm_debug_info=extra_information,
|
||
)
|
||
elif original_exception.status_code == 429:
|
||
exception_mapping_worked = True
|
||
raise RateLimitError(
|
||
message=f"BedrockException - {original_exception.message}",
|
||
model=model,
|
||
llm_provider=custom_llm_provider,
|
||
response=original_exception.response,
|
||
litellm_debug_info=extra_information,
|
||
)
|
||
elif original_exception.status_code == 503:
|
||
exception_mapping_worked = True
|
||
raise ServiceUnavailableError(
|
||
message=f"BedrockException - {original_exception.message}",
|
||
model=model,
|
||
llm_provider=custom_llm_provider,
|
||
response=original_exception.response,
|
||
litellm_debug_info=extra_information,
|
||
)
|
||
elif original_exception.status_code == 504: # gateway timeout error
|
||
exception_mapping_worked = True
|
||
raise Timeout(
|
||
message=f"BedrockException - {original_exception.message}",
|
||
model=model,
|
||
llm_provider=custom_llm_provider,
|
||
litellm_debug_info=extra_information,
|
||
)
|
||
elif custom_llm_provider == "sagemaker":
|
||
if "Unable to locate credentials" in error_str:
|
||
exception_mapping_worked = True
|
||
raise BadRequestError(
|
||
message=f"SagemakerException - {error_str}",
|
||
model=model,
|
||
llm_provider="sagemaker",
|
||
response=original_exception.response,
|
||
)
|
||
elif (
|
||
"Input validation error: `best_of` must be > 0 and <= 2"
|
||
in error_str
|
||
):
|
||
exception_mapping_worked = True
|
||
raise BadRequestError(
|
||
message=f"SagemakerException - the value of 'n' must be > 0 and <= 2 for sagemaker endpoints",
|
||
model=model,
|
||
llm_provider="sagemaker",
|
||
response=original_exception.response,
|
||
)
|
||
elif (
|
||
"`inputs` tokens + `max_new_tokens` must be <=" in error_str
|
||
or "instance type with more CPU capacity or memory" in error_str
|
||
):
|
||
exception_mapping_worked = True
|
||
raise ContextWindowExceededError(
|
||
message=f"SagemakerException - {error_str}",
|
||
model=model,
|
||
llm_provider="sagemaker",
|
||
response=original_exception.response,
|
||
)
|
||
elif custom_llm_provider == "vertex_ai":
|
||
if (
|
||
"Vertex AI API has not been used in project" in error_str
|
||
or "Unable to find your project" in error_str
|
||
):
|
||
exception_mapping_worked = True
|
||
raise BadRequestError(
|
||
message=f"VertexAIException - {error_str}",
|
||
model=model,
|
||
llm_provider="vertex_ai",
|
||
response=original_exception.response,
|
||
litellm_debug_info=extra_information,
|
||
)
|
||
elif (
|
||
"None Unknown Error." in error_str
|
||
or "Content has no parts." in error_str
|
||
):
|
||
exception_mapping_worked = True
|
||
raise APIError(
|
||
message=f"VertexAIException - {error_str}",
|
||
status_code=500,
|
||
model=model,
|
||
llm_provider="vertex_ai",
|
||
request=original_exception.request,
|
||
litellm_debug_info=extra_information,
|
||
)
|
||
elif "403" in error_str:
|
||
exception_mapping_worked = True
|
||
raise BadRequestError(
|
||
message=f"VertexAIException - {error_str}",
|
||
model=model,
|
||
llm_provider="vertex_ai",
|
||
response=original_exception.response,
|
||
litellm_debug_info=extra_information,
|
||
)
|
||
elif "The response was blocked." in error_str:
|
||
exception_mapping_worked = True
|
||
raise UnprocessableEntityError(
|
||
message=f"VertexAIException - {error_str}",
|
||
model=model,
|
||
llm_provider="vertex_ai",
|
||
litellm_debug_info=extra_information,
|
||
response=httpx.Response(
|
||
status_code=429,
|
||
request=httpx.Request(
|
||
method="POST",
|
||
url=" https://cloud.google.com/vertex-ai/",
|
||
),
|
||
),
|
||
)
|
||
elif (
|
||
"429 Quota exceeded" in error_str
|
||
or "IndexError: list index out of range" in error_str
|
||
or "429 Unable to submit request because the service is temporarily out of capacity."
|
||
in error_str
|
||
):
|
||
exception_mapping_worked = True
|
||
raise RateLimitError(
|
||
message=f"VertexAIException - {error_str}",
|
||
model=model,
|
||
llm_provider="vertex_ai",
|
||
litellm_debug_info=extra_information,
|
||
response=httpx.Response(
|
||
status_code=429,
|
||
request=httpx.Request(
|
||
method="POST",
|
||
url=" https://cloud.google.com/vertex-ai/",
|
||
),
|
||
),
|
||
)
|
||
if hasattr(original_exception, "status_code"):
|
||
if original_exception.status_code == 400:
|
||
exception_mapping_worked = True
|
||
raise BadRequestError(
|
||
message=f"VertexAIException - {error_str}",
|
||
model=model,
|
||
llm_provider="vertex_ai",
|
||
litellm_debug_info=extra_information,
|
||
response=original_exception.response,
|
||
)
|
||
if original_exception.status_code == 500:
|
||
exception_mapping_worked = True
|
||
> raise APIError(
|
||
message=f"VertexAIException - {error_str}",
|
||
status_code=500,
|
||
model=model,
|
||
llm_provider="vertex_ai",
|
||
litellm_debug_info=extra_information,
|
||
request=original_exception.request,
|
||
E litellm.exceptions.APIError: VertexAIException - Parameter to MergeFrom() must be instance of same class: expected <class 'Part'> got <class 'vertexai.generative_models._generative_models.Part'>.
|
||
|
||
../utils.py:8922: APIError
|
||
|
||
During handling of the above exception, another exception occurred:
|
||
|
||
def test_gemini_pro_vision():
|
||
try:
|
||
load_vertex_ai_credentials()
|
||
litellm.set_verbose = True
|
||
litellm.num_retries = 3
|
||
> resp = litellm.completion(
|
||
model="vertex_ai/gemini-1.5-flash-preview-0514",
|
||
messages=[
|
||
{
|
||
"role": "user",
|
||
"content": [
|
||
{"type": "text", "text": "Whats in this image?"},
|
||
{
|
||
"type": "image_url",
|
||
"image_url": {
|
||
"url": "gs://cloud-samples-data/generative-ai/image/boats.jpeg"
|
||
},
|
||
},
|
||
],
|
||
}
|
||
],
|
||
)
|
||
|
||
test_amazing_vertex_completion.py:510:
|
||
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
|
||
../utils.py:3289: in wrapper
|
||
return litellm.completion_with_retries(*args, **kwargs)
|
||
../main.py:2401: in completion_with_retries
|
||
return retryer(original_function, *args, **kwargs)
|
||
../proxy/myenv/lib/python3.11/site-packages/tenacity/__init__.py:379: in __call__
|
||
do = self.iter(retry_state=retry_state)
|
||
../proxy/myenv/lib/python3.11/site-packages/tenacity/__init__.py:325: in iter
|
||
raise retry_exc.reraise()
|
||
../proxy/myenv/lib/python3.11/site-packages/tenacity/__init__.py:158: in reraise
|
||
raise self.last_attempt.result()
|
||
/Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/concurrent/futures/_base.py:449: in result
|
||
return self.__get_result()
|
||
/Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/concurrent/futures/_base.py:401: in __get_result
|
||
raise self._exception
|
||
../proxy/myenv/lib/python3.11/site-packages/tenacity/__init__.py:382: in __call__
|
||
result = fn(*args, **kwargs)
|
||
../utils.py:3317: in wrapper
|
||
raise e
|
||
../utils.py:3211: in wrapper
|
||
result = original_function(*args, **kwargs)
|
||
../main.py:2368: in completion
|
||
raise exception_type(
|
||
../utils.py:9709: in exception_type
|
||
raise e
|
||
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
|
||
|
||
model = 'gemini-1.5-flash-preview-0514'
|
||
original_exception = VertexAIError("Parameter to MergeFrom() must be instance of same class: expected <class 'Part'> got <class 'vertexai.generative_models._generative_models.Part'>.")
|
||
custom_llm_provider = 'vertex_ai'
|
||
completion_kwargs = {'acompletion': False, 'api_base': None, 'api_key': None, 'api_version': None, ...}
|
||
extra_kwargs = {'litellm_call_id': '7f48b7ab-47b3-4beb-b2b5-fa298be49d3f', 'litellm_logging_obj': <litellm.utils.Logging object at 0x1059c53d0>}
|
||
|
||
def exception_type(
|
||
model,
|
||
original_exception,
|
||
custom_llm_provider,
|
||
completion_kwargs={},
|
||
extra_kwargs={},
|
||
):
|
||
global user_logger_fn, liteDebuggerClient
|
||
exception_mapping_worked = False
|
||
if litellm.suppress_debug_info is False:
|
||
print() # noqa
|
||
print( # noqa
|
||
"\033[1;31mGive Feedback / Get Help: https://github.com/BerriAI/litellm/issues/new\033[0m" # noqa
|
||
) # noqa
|
||
print( # noqa
|
||
"LiteLLM.Info: If you need to debug this error, use `litellm.set_verbose=True'." # noqa
|
||
) # noqa
|
||
print() # noqa
|
||
try:
|
||
if model:
|
||
error_str = str(original_exception)
|
||
if isinstance(original_exception, BaseException):
|
||
exception_type = type(original_exception).__name__
|
||
else:
|
||
exception_type = ""
|
||
|
||
################################################################################
|
||
# Common Extra information needed for all providers
|
||
# We pass num retries, api_base, vertex_deployment etc to the exception here
|
||
################################################################################
|
||
extra_information = ""
|
||
try:
|
||
_api_base = litellm.get_api_base(
|
||
model=model, optional_params=extra_kwargs
|
||
)
|
||
messages = litellm.get_first_chars_messages(kwargs=completion_kwargs)
|
||
_vertex_project = extra_kwargs.get("vertex_project")
|
||
_vertex_location = extra_kwargs.get("vertex_location")
|
||
_metadata = extra_kwargs.get("metadata", {}) or {}
|
||
_model_group = _metadata.get("model_group")
|
||
_deployment = _metadata.get("deployment")
|
||
extra_information = f"\nModel: {model}"
|
||
if _api_base:
|
||
extra_information += f"\nAPI Base: {_api_base}"
|
||
if messages and len(messages) > 0:
|
||
extra_information += f"\nMessages: {messages}"
|
||
|
||
if _model_group is not None:
|
||
extra_information += f"\nmodel_group: {_model_group}\n"
|
||
if _deployment is not None:
|
||
extra_information += f"\ndeployment: {_deployment}\n"
|
||
if _vertex_project is not None:
|
||
extra_information += f"\nvertex_project: {_vertex_project}\n"
|
||
if _vertex_location is not None:
|
||
extra_information += f"\nvertex_location: {_vertex_location}\n"
|
||
|
||
# on litellm proxy add key name + team to exceptions
|
||
extra_information = _add_key_name_and_team_to_alert(
|
||
request_info=extra_information, metadata=_metadata
|
||
)
|
||
except:
|
||
# DO NOT LET this Block raising the original exception
|
||
pass
|
||
|
||
################################################################################
|
||
# End of Common Extra information Needed for all providers
|
||
################################################################################
|
||
|
||
################################################################################
|
||
#################### Start of Provider Exception mapping ####################
|
||
################################################################################
|
||
|
||
if "Request Timeout Error" in error_str or "Request timed out" in error_str:
|
||
exception_mapping_worked = True
|
||
raise Timeout(
|
||
message=f"APITimeoutError - Request timed out. \nerror_str: {error_str}",
|
||
model=model,
|
||
llm_provider=custom_llm_provider,
|
||
litellm_debug_info=extra_information,
|
||
)
|
||
|
||
if (
|
||
custom_llm_provider == "openai"
|
||
or custom_llm_provider == "text-completion-openai"
|
||
or custom_llm_provider == "custom_openai"
|
||
or custom_llm_provider in litellm.openai_compatible_providers
|
||
):
|
||
# custom_llm_provider is openai, make it OpenAI
|
||
if hasattr(original_exception, "message"):
|
||
message = original_exception.message
|
||
else:
|
||
message = str(original_exception)
|
||
if message is not None and isinstance(message, str):
|
||
message = message.replace("OPENAI", custom_llm_provider.upper())
|
||
message = message.replace("openai", custom_llm_provider)
|
||
message = message.replace("OpenAI", custom_llm_provider)
|
||
if custom_llm_provider == "openai":
|
||
exception_provider = "OpenAI" + "Exception"
|
||
else:
|
||
exception_provider = (
|
||
custom_llm_provider[0].upper()
|
||
+ custom_llm_provider[1:]
|
||
+ "Exception"
|
||
)
|
||
|
||
if "This model's maximum context length is" in error_str:
|
||
exception_mapping_worked = True
|
||
raise ContextWindowExceededError(
|
||
message=f"{exception_provider} - {message}",
|
||
llm_provider=custom_llm_provider,
|
||
model=model,
|
||
response=original_exception.response,
|
||
litellm_debug_info=extra_information,
|
||
)
|
||
elif (
|
||
"invalid_request_error" in error_str
|
||
and "model_not_found" in error_str
|
||
):
|
||
exception_mapping_worked = True
|
||
raise NotFoundError(
|
||
message=f"{exception_provider} - {message}",
|
||
llm_provider=custom_llm_provider,
|
||
model=model,
|
||
response=original_exception.response,
|
||
litellm_debug_info=extra_information,
|
||
)
|
||
elif (
|
||
"invalid_request_error" in error_str
|
||
and "content_policy_violation" in error_str
|
||
):
|
||
exception_mapping_worked = True
|
||
raise ContentPolicyViolationError(
|
||
message=f"{exception_provider} - {message}",
|
||
llm_provider=custom_llm_provider,
|
||
model=model,
|
||
response=original_exception.response,
|
||
litellm_debug_info=extra_information,
|
||
)
|
||
elif (
|
||
"invalid_request_error" in error_str
|
||
and "Incorrect API key provided" not in error_str
|
||
):
|
||
exception_mapping_worked = True
|
||
raise BadRequestError(
|
||
message=f"{exception_provider} - {message}",
|
||
llm_provider=custom_llm_provider,
|
||
model=model,
|
||
response=original_exception.response,
|
||
litellm_debug_info=extra_information,
|
||
)
|
||
elif "Request too large" in error_str:
|
||
raise RateLimitError(
|
||
message=f"{exception_provider} - {message}",
|
||
model=model,
|
||
llm_provider=custom_llm_provider,
|
||
response=original_exception.response,
|
||
litellm_debug_info=extra_information,
|
||
)
|
||
elif (
|
||
"The api_key client option must be set either by passing api_key to the client or by setting the OPENAI_API_KEY environment variable"
|
||
in error_str
|
||
):
|
||
exception_mapping_worked = True
|
||
raise AuthenticationError(
|
||
message=f"{exception_provider} - {message}",
|
||
llm_provider=custom_llm_provider,
|
||
model=model,
|
||
response=original_exception.response,
|
||
litellm_debug_info=extra_information,
|
||
)
|
||
elif "Mistral API raised a streaming error" in error_str:
|
||
exception_mapping_worked = True
|
||
_request = httpx.Request(
|
||
method="POST", url="https://api.openai.com/v1"
|
||
)
|
||
raise APIError(
|
||
status_code=500,
|
||
message=f"{exception_provider} - {message}",
|
||
llm_provider=custom_llm_provider,
|
||
model=model,
|
||
request=_request,
|
||
litellm_debug_info=extra_information,
|
||
)
|
||
elif hasattr(original_exception, "status_code"):
|
||
exception_mapping_worked = True
|
||
if original_exception.status_code == 401:
|
||
exception_mapping_worked = True
|
||
raise AuthenticationError(
|
||
message=f"{exception_provider} - {message}",
|
||
llm_provider=custom_llm_provider,
|
||
model=model,
|
||
response=original_exception.response,
|
||
litellm_debug_info=extra_information,
|
||
)
|
||
elif original_exception.status_code == 404:
|
||
exception_mapping_worked = True
|
||
raise NotFoundError(
|
||
message=f"{exception_provider} - {message}",
|
||
model=model,
|
||
llm_provider=custom_llm_provider,
|
||
response=original_exception.response,
|
||
litellm_debug_info=extra_information,
|
||
)
|
||
elif original_exception.status_code == 408:
|
||
exception_mapping_worked = True
|
||
raise Timeout(
|
||
message=f"{exception_provider} - {message}",
|
||
model=model,
|
||
llm_provider=custom_llm_provider,
|
||
litellm_debug_info=extra_information,
|
||
)
|
||
elif original_exception.status_code == 422:
|
||
exception_mapping_worked = True
|
||
raise BadRequestError(
|
||
message=f"{exception_provider} - {message}",
|
||
model=model,
|
||
llm_provider=custom_llm_provider,
|
||
response=original_exception.response,
|
||
litellm_debug_info=extra_information,
|
||
)
|
||
elif original_exception.status_code == 429:
|
||
exception_mapping_worked = True
|
||
raise RateLimitError(
|
||
message=f"{exception_provider} - {message}",
|
||
model=model,
|
||
llm_provider=custom_llm_provider,
|
||
response=original_exception.response,
|
||
litellm_debug_info=extra_information,
|
||
)
|
||
elif original_exception.status_code == 503:
|
||
exception_mapping_worked = True
|
||
raise ServiceUnavailableError(
|
||
message=f"{exception_provider} - {message}",
|
||
model=model,
|
||
llm_provider=custom_llm_provider,
|
||
response=original_exception.response,
|
||
litellm_debug_info=extra_information,
|
||
)
|
||
elif original_exception.status_code == 504: # gateway timeout error
|
||
exception_mapping_worked = True
|
||
raise Timeout(
|
||
message=f"{exception_provider} - {message}",
|
||
model=model,
|
||
llm_provider=custom_llm_provider,
|
||
litellm_debug_info=extra_information,
|
||
)
|
||
else:
|
||
exception_mapping_worked = True
|
||
raise APIError(
|
||
status_code=original_exception.status_code,
|
||
message=f"{exception_provider} - {message}",
|
||
llm_provider=custom_llm_provider,
|
||
model=model,
|
||
request=original_exception.request,
|
||
litellm_debug_info=extra_information,
|
||
)
|
||
else:
|
||
# if no status code then it is an APIConnectionError: https://github.com/openai/openai-python#handling-errors
|
||
raise APIConnectionError(
|
||
message=f"{exception_provider} - {message}",
|
||
llm_provider=custom_llm_provider,
|
||
model=model,
|
||
litellm_debug_info=extra_information,
|
||
request=httpx.Request(
|
||
method="POST", url="https://api.openai.com/v1/"
|
||
),
|
||
)
|
||
elif custom_llm_provider == "anthropic": # one of the anthropics
|
||
if hasattr(original_exception, "message"):
|
||
if (
|
||
"prompt is too long" in original_exception.message
|
||
or "prompt: length" in original_exception.message
|
||
):
|
||
exception_mapping_worked = True
|
||
raise ContextWindowExceededError(
|
||
message=original_exception.message,
|
||
model=model,
|
||
llm_provider="anthropic",
|
||
response=original_exception.response,
|
||
)
|
||
if "Invalid API Key" in original_exception.message:
|
||
exception_mapping_worked = True
|
||
raise AuthenticationError(
|
||
message=original_exception.message,
|
||
model=model,
|
||
llm_provider="anthropic",
|
||
response=original_exception.response,
|
||
)
|
||
if hasattr(original_exception, "status_code"):
|
||
print_verbose(f"status_code: {original_exception.status_code}")
|
||
if original_exception.status_code == 401:
|
||
exception_mapping_worked = True
|
||
raise AuthenticationError(
|
||
message=f"AnthropicException - {original_exception.message}",
|
||
llm_provider="anthropic",
|
||
model=model,
|
||
response=original_exception.response,
|
||
)
|
||
elif (
|
||
original_exception.status_code == 400
|
||
or original_exception.status_code == 413
|
||
):
|
||
exception_mapping_worked = True
|
||
raise BadRequestError(
|
||
message=f"AnthropicException - {original_exception.message}",
|
||
model=model,
|
||
llm_provider="anthropic",
|
||
response=original_exception.response,
|
||
)
|
||
elif original_exception.status_code == 408:
|
||
exception_mapping_worked = True
|
||
raise Timeout(
|
||
message=f"AnthropicException - {original_exception.message}",
|
||
model=model,
|
||
llm_provider="anthropic",
|
||
)
|
||
elif original_exception.status_code == 429:
|
||
exception_mapping_worked = True
|
||
raise RateLimitError(
|
||
message=f"AnthropicException - {original_exception.message}",
|
||
llm_provider="anthropic",
|
||
model=model,
|
||
response=original_exception.response,
|
||
)
|
||
elif original_exception.status_code == 500:
|
||
exception_mapping_worked = True
|
||
raise APIError(
|
||
status_code=500,
|
||
message=f"AnthropicException - {original_exception.message}. Handle with `litellm.APIError`.",
|
||
llm_provider="anthropic",
|
||
model=model,
|
||
request=original_exception.request,
|
||
)
|
||
elif custom_llm_provider == "replicate":
|
||
if "Incorrect authentication token" in error_str:
|
||
exception_mapping_worked = True
|
||
raise AuthenticationError(
|
||
message=f"ReplicateException - {error_str}",
|
||
llm_provider="replicate",
|
||
model=model,
|
||
response=original_exception.response,
|
||
)
|
||
elif "input is too long" in error_str:
|
||
exception_mapping_worked = True
|
||
raise ContextWindowExceededError(
|
||
message=f"ReplicateException - {error_str}",
|
||
model=model,
|
||
llm_provider="replicate",
|
||
response=original_exception.response,
|
||
)
|
||
elif exception_type == "ModelError":
|
||
exception_mapping_worked = True
|
||
raise BadRequestError(
|
||
message=f"ReplicateException - {error_str}",
|
||
model=model,
|
||
llm_provider="replicate",
|
||
response=original_exception.response,
|
||
)
|
||
elif "Request was throttled" in error_str:
|
||
exception_mapping_worked = True
|
||
raise RateLimitError(
|
||
message=f"ReplicateException - {error_str}",
|
||
llm_provider="replicate",
|
||
model=model,
|
||
response=original_exception.response,
|
||
)
|
||
elif hasattr(original_exception, "status_code"):
|
||
if original_exception.status_code == 401:
|
||
exception_mapping_worked = True
|
||
raise AuthenticationError(
|
||
message=f"ReplicateException - {original_exception.message}",
|
||
llm_provider="replicate",
|
||
model=model,
|
||
response=original_exception.response,
|
||
)
|
||
elif (
|
||
original_exception.status_code == 400
|
||
or original_exception.status_code == 422
|
||
or original_exception.status_code == 413
|
||
):
|
||
exception_mapping_worked = True
|
||
raise BadRequestError(
|
||
message=f"ReplicateException - {original_exception.message}",
|
||
model=model,
|
||
llm_provider="replicate",
|
||
response=original_exception.response,
|
||
)
|
||
elif original_exception.status_code == 408:
|
||
exception_mapping_worked = True
|
||
raise Timeout(
|
||
message=f"ReplicateException - {original_exception.message}",
|
||
model=model,
|
||
llm_provider="replicate",
|
||
)
|
||
elif original_exception.status_code == 429:
|
||
exception_mapping_worked = True
|
||
raise RateLimitError(
|
||
message=f"ReplicateException - {original_exception.message}",
|
||
llm_provider="replicate",
|
||
model=model,
|
||
response=original_exception.response,
|
||
)
|
||
elif original_exception.status_code == 500:
|
||
exception_mapping_worked = True
|
||
raise ServiceUnavailableError(
|
||
message=f"ReplicateException - {original_exception.message}",
|
||
llm_provider="replicate",
|
||
model=model,
|
||
response=original_exception.response,
|
||
)
|
||
exception_mapping_worked = True
|
||
raise APIError(
|
||
status_code=500,
|
||
message=f"ReplicateException - {str(original_exception)}",
|
||
llm_provider="replicate",
|
||
model=model,
|
||
request=httpx.Request(
|
||
method="POST",
|
||
url="https://api.replicate.com/v1/deployments",
|
||
),
|
||
)
|
||
elif custom_llm_provider == "watsonx":
|
||
if "token_quota_reached" in error_str:
|
||
exception_mapping_worked = True
|
||
raise RateLimitError(
|
||
message=f"WatsonxException: Rate Limit Errror - {error_str}",
|
||
llm_provider="watsonx",
|
||
model=model,
|
||
response=original_exception.response,
|
||
)
|
||
elif custom_llm_provider == "predibase":
|
||
if "authorization denied for" in error_str:
|
||
exception_mapping_worked = True
|
||
|
||
# Predibase returns the raw API Key in the response - this block ensures it's not returned in the exception
|
||
if (
|
||
error_str is not None
|
||
and isinstance(error_str, str)
|
||
and "bearer" in error_str.lower()
|
||
):
|
||
# only keep the first 10 chars after the occurnence of "bearer"
|
||
_bearer_token_start_index = error_str.lower().find("bearer")
|
||
error_str = error_str[: _bearer_token_start_index + 14]
|
||
error_str += "XXXXXXX" + '"'
|
||
|
||
raise AuthenticationError(
|
||
message=f"PredibaseException: Authentication Error - {error_str}",
|
||
llm_provider="predibase",
|
||
model=model,
|
||
response=original_exception.response,
|
||
litellm_debug_info=extra_information,
|
||
)
|
||
elif custom_llm_provider == "bedrock":
|
||
if (
|
||
"too many tokens" in error_str
|
||
or "expected maxLength:" in error_str
|
||
or "Input is too long" in error_str
|
||
or "prompt: length: 1.." in error_str
|
||
or "Too many input tokens" in error_str
|
||
):
|
||
exception_mapping_worked = True
|
||
raise ContextWindowExceededError(
|
||
message=f"BedrockException: Context Window Error - {error_str}",
|
||
model=model,
|
||
llm_provider="bedrock",
|
||
response=original_exception.response,
|
||
)
|
||
elif "Malformed input request" in error_str:
|
||
exception_mapping_worked = True
|
||
raise BadRequestError(
|
||
message=f"BedrockException - {error_str}",
|
||
model=model,
|
||
llm_provider="bedrock",
|
||
response=original_exception.response,
|
||
)
|
||
elif (
|
||
"Unable to locate credentials" in error_str
|
||
or "The security token included in the request is invalid"
|
||
in error_str
|
||
):
|
||
exception_mapping_worked = True
|
||
raise AuthenticationError(
|
||
message=f"BedrockException Invalid Authentication - {error_str}",
|
||
model=model,
|
||
llm_provider="bedrock",
|
||
response=original_exception.response,
|
||
)
|
||
elif "AccessDeniedException" in error_str:
|
||
exception_mapping_worked = True
|
||
raise PermissionDeniedError(
|
||
message=f"BedrockException PermissionDeniedError - {error_str}",
|
||
model=model,
|
||
llm_provider="bedrock",
|
||
response=original_exception.response,
|
||
)
|
||
elif (
|
||
"throttlingException" in error_str
|
||
or "ThrottlingException" in error_str
|
||
):
|
||
exception_mapping_worked = True
|
||
raise RateLimitError(
|
||
message=f"BedrockException: Rate Limit Error - {error_str}",
|
||
model=model,
|
||
llm_provider="bedrock",
|
||
response=original_exception.response,
|
||
)
|
||
elif (
|
||
"Connect timeout on endpoint URL" in error_str
|
||
or "timed out" in error_str
|
||
):
|
||
exception_mapping_worked = True
|
||
raise Timeout(
|
||
message=f"BedrockException: Timeout Error - {error_str}",
|
||
model=model,
|
||
llm_provider="bedrock",
|
||
)
|
||
elif hasattr(original_exception, "status_code"):
|
||
if original_exception.status_code == 500:
|
||
exception_mapping_worked = True
|
||
raise ServiceUnavailableError(
|
||
message=f"BedrockException - {original_exception.message}",
|
||
llm_provider="bedrock",
|
||
model=model,
|
||
response=httpx.Response(
|
||
status_code=500,
|
||
request=httpx.Request(
|
||
method="POST", url="https://api.openai.com/v1/"
|
||
),
|
||
),
|
||
)
|
||
elif original_exception.status_code == 401:
|
||
exception_mapping_worked = True
|
||
raise AuthenticationError(
|
||
message=f"BedrockException - {original_exception.message}",
|
||
llm_provider="bedrock",
|
||
model=model,
|
||
response=original_exception.response,
|
||
)
|
||
elif original_exception.status_code == 400:
|
||
exception_mapping_worked = True
|
||
raise BadRequestError(
|
||
message=f"BedrockException - {original_exception.message}",
|
||
llm_provider="bedrock",
|
||
model=model,
|
||
response=original_exception.response,
|
||
)
|
||
elif original_exception.status_code == 404:
|
||
exception_mapping_worked = True
|
||
raise NotFoundError(
|
||
message=f"BedrockException - {original_exception.message}",
|
||
llm_provider="bedrock",
|
||
model=model,
|
||
response=original_exception.response,
|
||
)
|
||
elif original_exception.status_code == 408:
|
||
exception_mapping_worked = True
|
||
raise Timeout(
|
||
message=f"BedrockException - {original_exception.message}",
|
||
model=model,
|
||
llm_provider=custom_llm_provider,
|
||
litellm_debug_info=extra_information,
|
||
)
|
||
elif original_exception.status_code == 422:
|
||
exception_mapping_worked = True
|
||
raise BadRequestError(
|
||
message=f"BedrockException - {original_exception.message}",
|
||
model=model,
|
||
llm_provider=custom_llm_provider,
|
||
response=original_exception.response,
|
||
litellm_debug_info=extra_information,
|
||
)
|
||
elif original_exception.status_code == 429:
|
||
exception_mapping_worked = True
|
||
raise RateLimitError(
|
||
message=f"BedrockException - {original_exception.message}",
|
||
model=model,
|
||
llm_provider=custom_llm_provider,
|
||
response=original_exception.response,
|
||
litellm_debug_info=extra_information,
|
||
)
|
||
elif original_exception.status_code == 503:
|
||
exception_mapping_worked = True
|
||
raise ServiceUnavailableError(
|
||
message=f"BedrockException - {original_exception.message}",
|
||
model=model,
|
||
llm_provider=custom_llm_provider,
|
||
response=original_exception.response,
|
||
litellm_debug_info=extra_information,
|
||
)
|
||
elif original_exception.status_code == 504: # gateway timeout error
|
||
exception_mapping_worked = True
|
||
raise Timeout(
|
||
message=f"BedrockException - {original_exception.message}",
|
||
model=model,
|
||
llm_provider=custom_llm_provider,
|
||
litellm_debug_info=extra_information,
|
||
)
|
||
elif custom_llm_provider == "sagemaker":
|
||
if "Unable to locate credentials" in error_str:
|
||
exception_mapping_worked = True
|
||
raise BadRequestError(
|
||
message=f"SagemakerException - {error_str}",
|
||
model=model,
|
||
llm_provider="sagemaker",
|
||
response=original_exception.response,
|
||
)
|
||
elif (
|
||
"Input validation error: `best_of` must be > 0 and <= 2"
|
||
in error_str
|
||
):
|
||
exception_mapping_worked = True
|
||
raise BadRequestError(
|
||
message=f"SagemakerException - the value of 'n' must be > 0 and <= 2 for sagemaker endpoints",
|
||
model=model,
|
||
llm_provider="sagemaker",
|
||
response=original_exception.response,
|
||
)
|
||
elif (
|
||
"`inputs` tokens + `max_new_tokens` must be <=" in error_str
|
||
or "instance type with more CPU capacity or memory" in error_str
|
||
):
|
||
exception_mapping_worked = True
|
||
raise ContextWindowExceededError(
|
||
message=f"SagemakerException - {error_str}",
|
||
model=model,
|
||
llm_provider="sagemaker",
|
||
response=original_exception.response,
|
||
)
|
||
elif custom_llm_provider == "vertex_ai":
|
||
if (
|
||
"Vertex AI API has not been used in project" in error_str
|
||
or "Unable to find your project" in error_str
|
||
):
|
||
exception_mapping_worked = True
|
||
raise BadRequestError(
|
||
message=f"VertexAIException - {error_str}",
|
||
model=model,
|
||
llm_provider="vertex_ai",
|
||
response=original_exception.response,
|
||
litellm_debug_info=extra_information,
|
||
)
|
||
elif (
|
||
"None Unknown Error." in error_str
|
||
or "Content has no parts." in error_str
|
||
):
|
||
exception_mapping_worked = True
|
||
raise APIError(
|
||
message=f"VertexAIException - {error_str}",
|
||
status_code=500,
|
||
model=model,
|
||
llm_provider="vertex_ai",
|
||
request=original_exception.request,
|
||
litellm_debug_info=extra_information,
|
||
)
|
||
elif "403" in error_str:
|
||
exception_mapping_worked = True
|
||
raise BadRequestError(
|
||
message=f"VertexAIException - {error_str}",
|
||
model=model,
|
||
llm_provider="vertex_ai",
|
||
response=original_exception.response,
|
||
litellm_debug_info=extra_information,
|
||
)
|
||
elif "The response was blocked." in error_str:
|
||
exception_mapping_worked = True
|
||
raise UnprocessableEntityError(
|
||
message=f"VertexAIException - {error_str}",
|
||
model=model,
|
||
llm_provider="vertex_ai",
|
||
litellm_debug_info=extra_information,
|
||
response=httpx.Response(
|
||
status_code=429,
|
||
request=httpx.Request(
|
||
method="POST",
|
||
url=" https://cloud.google.com/vertex-ai/",
|
||
),
|
||
),
|
||
)
|
||
elif (
|
||
"429 Quota exceeded" in error_str
|
||
or "IndexError: list index out of range" in error_str
|
||
or "429 Unable to submit request because the service is temporarily out of capacity."
|
||
in error_str
|
||
):
|
||
exception_mapping_worked = True
|
||
raise RateLimitError(
|
||
message=f"VertexAIException - {error_str}",
|
||
model=model,
|
||
llm_provider="vertex_ai",
|
||
litellm_debug_info=extra_information,
|
||
response=httpx.Response(
|
||
status_code=429,
|
||
request=httpx.Request(
|
||
method="POST",
|
||
url=" https://cloud.google.com/vertex-ai/",
|
||
),
|
||
),
|
||
)
|
||
if hasattr(original_exception, "status_code"):
|
||
if original_exception.status_code == 400:
|
||
exception_mapping_worked = True
|
||
raise BadRequestError(
|
||
message=f"VertexAIException - {error_str}",
|
||
model=model,
|
||
llm_provider="vertex_ai",
|
||
litellm_debug_info=extra_information,
|
||
response=original_exception.response,
|
||
)
|
||
if original_exception.status_code == 500:
|
||
exception_mapping_worked = True
|
||
> raise APIError(
|
||
message=f"VertexAIException - {error_str}",
|
||
status_code=500,
|
||
model=model,
|
||
llm_provider="vertex_ai",
|
||
litellm_debug_info=extra_information,
|
||
request=original_exception.request,
|
||
E litellm.exceptions.APIError: VertexAIException - Parameter to MergeFrom() must be instance of same class: expected <class 'Part'> got <class 'vertexai.generative_models._generative_models.Part'>.
|
||
|
||
../utils.py:8922: APIError
|
||
|
||
During handling of the above exception, another exception occurred:
|
||
|
||
def test_gemini_pro_vision():
|
||
try:
|
||
load_vertex_ai_credentials()
|
||
litellm.set_verbose = True
|
||
litellm.num_retries = 3
|
||
resp = litellm.completion(
|
||
model="vertex_ai/gemini-1.5-flash-preview-0514",
|
||
messages=[
|
||
{
|
||
"role": "user",
|
||
"content": [
|
||
{"type": "text", "text": "Whats in this image?"},
|
||
{
|
||
"type": "image_url",
|
||
"image_url": {
|
||
"url": "gs://cloud-samples-data/generative-ai/image/boats.jpeg"
|
||
},
|
||
},
|
||
],
|
||
}
|
||
],
|
||
)
|
||
print(resp)
|
||
|
||
prompt_tokens = resp.usage.prompt_tokens
|
||
|
||
# DO Not DELETE this ASSERT
|
||
# Google counts the prompt tokens for us, we should ensure we use the tokens from the orignal response
|
||
assert prompt_tokens == 263 # the gemini api returns 263 to us
|
||
except litellm.RateLimitError as e:
|
||
pass
|
||
except Exception as e:
|
||
if "500 Internal error encountered.'" in str(e):
|
||
pass
|
||
else:
|
||
> pytest.fail(f"An exception occurred - {str(e)}")
|
||
E Failed: An exception occurred - VertexAIException - Parameter to MergeFrom() must be instance of same class: expected <class 'Part'> got <class 'vertexai.generative_models._generative_models.Part'>.
|
||
|
||
test_amazing_vertex_completion.py:540: Failed
|
||
---------------------------- Captured stdout setup -----------------------------
|
||
<module 'litellm' from '/Users/krrishdholakia/Documents/litellm/litellm/__init__.py'>
|
||
----------------------------- Captured stdout call -----------------------------
|
||
loading vertex ai credentials
|
||
Read vertexai file path
|
||
|
||
|
||
[92mRequest to litellm:[0m
|
||
[92mlitellm.completion(model='vertex_ai/gemini-1.5-flash-preview-0514', messages=[{'role': 'user', 'content': [{'type': 'text', 'text': 'Whats in this image?'}, {'type': 'image_url', 'image_url': {'url': 'gs://cloud-samples-data/generative-ai/image/boats.jpeg'}}]}])[0m
|
||
|
||
|
||
self.optional_params: {}
|
||
SYNC kwargs[caching]: False; litellm.cache: None; kwargs.get('cache')['no-cache']: False
|
||
(start) INSIDE THE VERTEX AI OPTIONAL PARAM BLOCK
|
||
(end) INSIDE THE VERTEX AI OPTIONAL PARAM BLOCK - optional_params: {}
|
||
Final returned optional params: {}
|
||
self.optional_params: {}
|
||
VERTEX AI: vertex_project=None; vertex_location=None
|
||
VERTEX AI: creds=<google.oauth2.service_account.Credentials object at 0x164696490>; google application credentials: /var/folders/gf/5h3fnlwx40sdrycs4y5qzqx40000gn/T/tmpolsest5s
|
||
|
||
Making VertexAI Gemini Pro / Pro Vision Call
|
||
|
||
Processing input messages = [{'role': 'user', 'content': [{'type': 'text', 'text': 'Whats in this image?'}, {'type': 'image_url', 'image_url': {'url': 'gs://cloud-samples-data/generative-ai/image/boats.jpeg'}}]}]
|
||
[92m
|
||
Request Sent from LiteLLM:
|
||
llm_model = GenerativeModel(gemini-1.5-flash-preview-0514)
|
||
response = llm_model.generate_content([{'role': 'user', 'parts': [{'text': 'Whats in this image?'}, file_data {
|
||
mime_type: "image/jpeg"
|
||
file_uri: "gs://cloud-samples-data/generative-ai/image/boats.jpeg"
|
||
}
|
||
]}])
|
||
[0m
|
||
|
||
|
||
[1;31mGive Feedback / Get Help: https://github.com/BerriAI/litellm/issues/new[0m
|
||
LiteLLM.Info: If you need to debug this error, use `litellm.set_verbose=True'.
|
||
|
||
Logging Details: logger_fn - None | callable(logger_fn) - False
|
||
|
||
|
||
[92mRequest to litellm:[0m
|
||
[92mlitellm.completion(model='vertex_ai/gemini-1.5-flash-preview-0514', messages=[{'role': 'user', 'content': [{'type': 'text', 'text': 'Whats in this image?'}, {'type': 'image_url', 'image_url': {'url': 'gs://cloud-samples-data/generative-ai/image/boats.jpeg'}}]}], litellm_call_id='7f48b7ab-47b3-4beb-b2b5-fa298be49d3f', litellm_logging_obj=<litellm.utils.Logging object at 0x1059c53d0>)[0m
|
||
|
||
|
||
SYNC kwargs[caching]: False; litellm.cache: None; kwargs.get('cache')['no-cache']: False
|
||
(start) INSIDE THE VERTEX AI OPTIONAL PARAM BLOCK
|
||
(end) INSIDE THE VERTEX AI OPTIONAL PARAM BLOCK - optional_params: {}
|
||
Final returned optional params: {}
|
||
self.optional_params: {}
|
||
VERTEX AI: vertex_project=None; vertex_location=None
|
||
VERTEX AI: creds=<google.oauth2.service_account.Credentials object at 0x164c00f10>; google application credentials: /var/folders/gf/5h3fnlwx40sdrycs4y5qzqx40000gn/T/tmpolsest5s
|
||
|
||
Making VertexAI Gemini Pro / Pro Vision Call
|
||
|
||
Processing input messages = [{'role': 'user', 'content': [{'type': 'text', 'text': 'Whats in this image?'}, {'type': 'image_url', 'image_url': {'url': 'gs://cloud-samples-data/generative-ai/image/boats.jpeg'}}]}]
|
||
[92m
|
||
Request Sent from LiteLLM:
|
||
llm_model = GenerativeModel(gemini-1.5-flash-preview-0514)
|
||
response = llm_model.generate_content([{'role': 'user', 'parts': [{'text': 'Whats in this image?'}, file_data {
|
||
mime_type: "image/jpeg"
|
||
file_uri: "gs://cloud-samples-data/generative-ai/image/boats.jpeg"
|
||
}
|
||
]}])
|
||
[0m
|
||
|
||
|
||
[1;31mGive Feedback / Get Help: https://github.com/BerriAI/litellm/issues/new[0m
|
||
LiteLLM.Info: If you need to debug this error, use `litellm.set_verbose=True'.
|
||
|
||
Logging Details: logger_fn - None | callable(logger_fn) - False
|
||
Logging Details LiteLLM-Failure Call
|
||
self.failure_callback: []
|
||
|
||
|
||
[92mRequest to litellm:[0m
|
||
[92mlitellm.completion(model='vertex_ai/gemini-1.5-flash-preview-0514', messages=[{'role': 'user', 'content': [{'type': 'text', 'text': 'Whats in this image?'}, {'type': 'image_url', 'image_url': {'url': 'gs://cloud-samples-data/generative-ai/image/boats.jpeg'}}]}], litellm_call_id='7f48b7ab-47b3-4beb-b2b5-fa298be49d3f', litellm_logging_obj=<litellm.utils.Logging object at 0x1059c53d0>)[0m
|
||
|
||
|
||
SYNC kwargs[caching]: False; litellm.cache: None; kwargs.get('cache')['no-cache']: False
|
||
(start) INSIDE THE VERTEX AI OPTIONAL PARAM BLOCK
|
||
(end) INSIDE THE VERTEX AI OPTIONAL PARAM BLOCK - optional_params: {}
|
||
Final returned optional params: {}
|
||
self.optional_params: {}
|
||
VERTEX AI: vertex_project=None; vertex_location=None
|
||
VERTEX AI: creds=<google.oauth2.service_account.Credentials object at 0x1635f7750>; google application credentials: /var/folders/gf/5h3fnlwx40sdrycs4y5qzqx40000gn/T/tmpolsest5s
|
||
|
||
Making VertexAI Gemini Pro / Pro Vision Call
|
||
|
||
Processing input messages = [{'role': 'user', 'content': [{'type': 'text', 'text': 'Whats in this image?'}, {'type': 'image_url', 'image_url': {'url': 'gs://cloud-samples-data/generative-ai/image/boats.jpeg'}}]}]
|
||
[92m
|
||
Request Sent from LiteLLM:
|
||
llm_model = GenerativeModel(gemini-1.5-flash-preview-0514)
|
||
response = llm_model.generate_content([{'role': 'user', 'parts': [{'text': 'Whats in this image?'}, file_data {
|
||
mime_type: "image/jpeg"
|
||
file_uri: "gs://cloud-samples-data/generative-ai/image/boats.jpeg"
|
||
}
|
||
]}])
|
||
[0m
|
||
|
||
|
||
[1;31mGive Feedback / Get Help: https://github.com/BerriAI/litellm/issues/new[0m
|
||
LiteLLM.Info: If you need to debug this error, use `litellm.set_verbose=True'.
|
||
|
||
Logging Details: logger_fn - None | callable(logger_fn) - False
|
||
Logging Details LiteLLM-Failure Call
|
||
self.failure_callback: []
|
||
|
||
|
||
[92mRequest to litellm:[0m
|
||
[92mlitellm.completion(model='vertex_ai/gemini-1.5-flash-preview-0514', messages=[{'role': 'user', 'content': [{'type': 'text', 'text': 'Whats in this image?'}, {'type': 'image_url', 'image_url': {'url': 'gs://cloud-samples-data/generative-ai/image/boats.jpeg'}}]}], litellm_call_id='7f48b7ab-47b3-4beb-b2b5-fa298be49d3f', litellm_logging_obj=<litellm.utils.Logging object at 0x1059c53d0>)[0m
|
||
|
||
|
||
SYNC kwargs[caching]: False; litellm.cache: None; kwargs.get('cache')['no-cache']: False
|
||
(start) INSIDE THE VERTEX AI OPTIONAL PARAM BLOCK
|
||
(end) INSIDE THE VERTEX AI OPTIONAL PARAM BLOCK - optional_params: {}
|
||
Final returned optional params: {}
|
||
self.optional_params: {}
|
||
VERTEX AI: vertex_project=None; vertex_location=None
|
||
VERTEX AI: creds=<google.oauth2.service_account.Credentials object at 0x1646c5d50>; google application credentials: /var/folders/gf/5h3fnlwx40sdrycs4y5qzqx40000gn/T/tmpolsest5s
|
||
|
||
Making VertexAI Gemini Pro / Pro Vision Call
|
||
|
||
Processing input messages = [{'role': 'user', 'content': [{'type': 'text', 'text': 'Whats in this image?'}, {'type': 'image_url', 'image_url': {'url': 'gs://cloud-samples-data/generative-ai/image/boats.jpeg'}}]}]
|
||
[92m
|
||
Request Sent from LiteLLM:
|
||
llm_model = GenerativeModel(gemini-1.5-flash-preview-0514)
|
||
response = llm_model.generate_content([{'role': 'user', 'parts': [{'text': 'Whats in this image?'}, file_data {
|
||
mime_type: "image/jpeg"
|
||
file_uri: "gs://cloud-samples-data/generative-ai/image/boats.jpeg"
|
||
}
|
||
]}])
|
||
[0m
|
||
|
||
|
||
[1;31mGive Feedback / Get Help: https://github.com/BerriAI/litellm/issues/new[0m
|
||
LiteLLM.Info: If you need to debug this error, use `litellm.set_verbose=True'.
|
||
|
||
Logging Details: logger_fn - None | callable(logger_fn) - False
|
||
Logging Details LiteLLM-Failure Call
|
||
self.failure_callback: []
|
||
=============================== warnings summary ===============================
|
||
../proxy/myenv/lib/python3.11/site-packages/pydantic/_internal/_config.py:284: 25 warnings
|
||
/Users/krrishdholakia/Documents/litellm/litellm/proxy/myenv/lib/python3.11/site-packages/pydantic/_internal/_config.py:284: PydanticDeprecatedSince20: Support for class-based `config` is deprecated, use ConfigDict instead. Deprecated in Pydantic V2.0 to be removed in V3.0. See Pydantic V2 Migration Guide at https://errors.pydantic.dev/2.7/migration/
|
||
warnings.warn(DEPRECATION_MESSAGE, DeprecationWarning)
|
||
|
||
../proxy/_types.py:255
|
||
/Users/krrishdholakia/Documents/litellm/litellm/proxy/_types.py:255: PydanticDeprecatedSince20: Pydantic V1 style `@root_validator` validators are deprecated. You should migrate to Pydantic V2 style `@model_validator` validators, see the migration guide for more details. Deprecated in Pydantic V2.0 to be removed in V3.0. See Pydantic V2 Migration Guide at https://errors.pydantic.dev/2.7/migration/
|
||
@root_validator(pre=True)
|
||
|
||
../proxy/_types.py:342
|
||
/Users/krrishdholakia/Documents/litellm/litellm/proxy/_types.py:342: PydanticDeprecatedSince20: `pydantic.config.Extra` is deprecated, use literal values instead (e.g. `extra='allow'`). Deprecated in Pydantic V2.0 to be removed in V3.0. See Pydantic V2 Migration Guide at https://errors.pydantic.dev/2.7/migration/
|
||
extra = Extra.allow # Allow extra fields
|
||
|
||
../proxy/_types.py:345
|
||
/Users/krrishdholakia/Documents/litellm/litellm/proxy/_types.py:345: PydanticDeprecatedSince20: Pydantic V1 style `@root_validator` validators are deprecated. You should migrate to Pydantic V2 style `@model_validator` validators, see the migration guide for more details. Deprecated in Pydantic V2.0 to be removed in V3.0. See Pydantic V2 Migration Guide at https://errors.pydantic.dev/2.7/migration/
|
||
@root_validator(pre=True)
|
||
|
||
../proxy/_types.py:374
|
||
/Users/krrishdholakia/Documents/litellm/litellm/proxy/_types.py:374: PydanticDeprecatedSince20: Pydantic V1 style `@root_validator` validators are deprecated. You should migrate to Pydantic V2 style `@model_validator` validators, see the migration guide for more details. Deprecated in Pydantic V2.0 to be removed in V3.0. See Pydantic V2 Migration Guide at https://errors.pydantic.dev/2.7/migration/
|
||
@root_validator(pre=True)
|
||
|
||
../proxy/_types.py:421
|
||
/Users/krrishdholakia/Documents/litellm/litellm/proxy/_types.py:421: PydanticDeprecatedSince20: Pydantic V1 style `@root_validator` validators are deprecated. You should migrate to Pydantic V2 style `@model_validator` validators, see the migration guide for more details. Deprecated in Pydantic V2.0 to be removed in V3.0. See Pydantic V2 Migration Guide at https://errors.pydantic.dev/2.7/migration/
|
||
@root_validator(pre=True)
|
||
|
||
../proxy/_types.py:490
|
||
/Users/krrishdholakia/Documents/litellm/litellm/proxy/_types.py:490: PydanticDeprecatedSince20: Pydantic V1 style `@root_validator` validators are deprecated. You should migrate to Pydantic V2 style `@model_validator` validators, see the migration guide for more details. Deprecated in Pydantic V2.0 to be removed in V3.0. See Pydantic V2 Migration Guide at https://errors.pydantic.dev/2.7/migration/
|
||
@root_validator(pre=True)
|
||
|
||
../proxy/_types.py:510
|
||
/Users/krrishdholakia/Documents/litellm/litellm/proxy/_types.py:510: PydanticDeprecatedSince20: Pydantic V1 style `@root_validator` validators are deprecated. You should migrate to Pydantic V2 style `@model_validator` validators, see the migration guide for more details. Deprecated in Pydantic V2.0 to be removed in V3.0. See Pydantic V2 Migration Guide at https://errors.pydantic.dev/2.7/migration/
|
||
@root_validator(pre=True)
|
||
|
||
../proxy/_types.py:523
|
||
/Users/krrishdholakia/Documents/litellm/litellm/proxy/_types.py:523: PydanticDeprecatedSince20: Pydantic V1 style `@root_validator` validators are deprecated. You should migrate to Pydantic V2 style `@model_validator` validators, see the migration guide for more details. Deprecated in Pydantic V2.0 to be removed in V3.0. See Pydantic V2 Migration Guide at https://errors.pydantic.dev/2.7/migration/
|
||
@root_validator(pre=True)
|
||
|
||
../proxy/_types.py:568
|
||
/Users/krrishdholakia/Documents/litellm/litellm/proxy/_types.py:568: PydanticDeprecatedSince20: Pydantic V1 style `@root_validator` validators are deprecated. You should migrate to Pydantic V2 style `@model_validator` validators, see the migration guide for more details. Deprecated in Pydantic V2.0 to be removed in V3.0. See Pydantic V2 Migration Guide at https://errors.pydantic.dev/2.7/migration/
|
||
@root_validator(pre=True)
|
||
|
||
../proxy/_types.py:605
|
||
/Users/krrishdholakia/Documents/litellm/litellm/proxy/_types.py:605: PydanticDeprecatedSince20: Pydantic V1 style `@root_validator` validators are deprecated. You should migrate to Pydantic V2 style `@model_validator` validators, see the migration guide for more details. Deprecated in Pydantic V2.0 to be removed in V3.0. See Pydantic V2 Migration Guide at https://errors.pydantic.dev/2.7/migration/
|
||
@root_validator(pre=True)
|
||
|
||
../proxy/_types.py:923
|
||
/Users/krrishdholakia/Documents/litellm/litellm/proxy/_types.py:923: PydanticDeprecatedSince20: Pydantic V1 style `@root_validator` validators are deprecated. You should migrate to Pydantic V2 style `@model_validator` validators, see the migration guide for more details. Deprecated in Pydantic V2.0 to be removed in V3.0. See Pydantic V2 Migration Guide at https://errors.pydantic.dev/2.7/migration/
|
||
@root_validator(pre=True)
|
||
|
||
../proxy/_types.py:950
|
||
/Users/krrishdholakia/Documents/litellm/litellm/proxy/_types.py:950: PydanticDeprecatedSince20: Pydantic V1 style `@root_validator` validators are deprecated. You should migrate to Pydantic V2 style `@model_validator` validators, see the migration guide for more details. Deprecated in Pydantic V2.0 to be removed in V3.0. See Pydantic V2 Migration Guide at https://errors.pydantic.dev/2.7/migration/
|
||
@root_validator(pre=True)
|
||
|
||
../proxy/_types.py:971
|
||
/Users/krrishdholakia/Documents/litellm/litellm/proxy/_types.py:971: PydanticDeprecatedSince20: Pydantic V1 style `@root_validator` validators are deprecated. You should migrate to Pydantic V2 style `@model_validator` validators, see the migration guide for more details. Deprecated in Pydantic V2.0 to be removed in V3.0. See Pydantic V2 Migration Guide at https://errors.pydantic.dev/2.7/migration/
|
||
@root_validator(pre=True)
|
||
|
||
../utils.py:60
|
||
/Users/krrishdholakia/Documents/litellm/litellm/utils.py:60: DeprecationWarning: open_text is deprecated. Use files() instead. Refer to https://importlib-resources.readthedocs.io/en/latest/using.html#migrating-from-legacy for migration advice.
|
||
with resources.open_text("litellm.llms.tokenizers", "anthropic_tokenizer.json") as f:
|
||
|
||
-- Docs: https://docs.pytest.org/en/stable/how-to/capture-warnings.html
|
||
=========================== short test summary info ============================
|
||
FAILED test_amazing_vertex_completion.py::test_gemini_pro_vision - Failed: An...
|
||
======================== 1 failed, 39 warnings in 2.09s ========================
|