Merge branch 'main' into litellm_bedrock_converse_api

This commit is contained in:
Krish Dholakia 2024-06-07 08:49:52 -07:00 committed by GitHub
commit 26993c067e
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
82 changed files with 2540 additions and 1147 deletions

View file

@ -1374,8 +1374,12 @@ class Logging:
callback_func=callback,
)
except Exception as e:
traceback.print_exc()
print_verbose(
verbose_logger.error(
"litellm.Logging.pre_call(): Exception occured - {}".format(
str(e)
)
)
verbose_logger.debug(
f"LiteLLM.LoggingError: [Non-Blocking] Exception occurred while input logging with integrations {traceback.format_exc()}"
)
print_verbose(
@ -4062,6 +4066,7 @@ def openai_token_counter(
for c in value:
if c["type"] == "text":
text += c["text"]
num_tokens += len(encoding.encode(c["text"], disallowed_special=()))
elif c["type"] == "image_url":
if isinstance(c["image_url"], dict):
image_url_dict = c["image_url"]
@ -6194,6 +6199,27 @@ def calculate_max_parallel_requests(
return None
def _get_order_filtered_deployments(healthy_deployments: List[Dict]) -> List:
min_order = min(
(
deployment["litellm_params"]["order"]
for deployment in healthy_deployments
if "order" in deployment["litellm_params"]
),
default=None,
)
if min_order is not None:
filtered_deployments = [
deployment
for deployment in healthy_deployments
if deployment["litellm_params"].get("order") == min_order
]
return filtered_deployments
return healthy_deployments
def _get_model_region(
custom_llm_provider: str, litellm_params: LiteLLM_Params
) -> Optional[str]:
@ -7336,6 +7362,10 @@ def get_provider_fields(custom_llm_provider: str) -> List[ProviderField]:
if custom_llm_provider == "databricks":
return litellm.DatabricksConfig().get_required_params()
elif custom_llm_provider == "ollama":
return litellm.OllamaConfig().get_required_params()
else:
return []
@ -9782,8 +9812,7 @@ def exception_type(
elif custom_llm_provider == "azure":
if "Internal server error" in error_str:
exception_mapping_worked = True
raise APIError(
status_code=500,
raise litellm.InternalServerError(
message=f"AzureException Internal server error - {original_exception.message}",
llm_provider="azure",
model=model,
@ -10033,6 +10062,8 @@ def get_secret(
):
key_management_system = litellm._key_management_system
key_management_settings = litellm._key_management_settings
args = locals()
if secret_name.startswith("os.environ/"):
secret_name = secret_name.replace("os.environ/", "")
@ -10120,13 +10151,13 @@ def get_secret(
key_manager = "local"
if (
key_manager == KeyManagementSystem.AZURE_KEY_VAULT
key_manager == KeyManagementSystem.AZURE_KEY_VAULT.value
or type(client).__module__ + "." + type(client).__name__
== "azure.keyvault.secrets._client.SecretClient"
): # support Azure Secret Client - from azure.keyvault.secrets import SecretClient
secret = client.get_secret(secret_name).value
elif (
key_manager == KeyManagementSystem.GOOGLE_KMS
key_manager == KeyManagementSystem.GOOGLE_KMS.value
or client.__class__.__name__ == "KeyManagementServiceClient"
):
encrypted_secret: Any = os.getenv(secret_name)
@ -10154,6 +10185,25 @@ def get_secret(
secret = response.plaintext.decode(
"utf-8"
) # assumes the original value was encoded with utf-8
elif key_manager == KeyManagementSystem.AWS_KMS.value:
"""
Only check the tokens which start with 'aws_kms/'. This prevents latency impact caused by checking all keys.
"""
encrypted_value = os.getenv(secret_name, None)
if encrypted_value is None:
raise Exception("encrypted value for AWS KMS cannot be None.")
# Decode the base64 encoded ciphertext
ciphertext_blob = base64.b64decode(encrypted_value)
# Set up the parameters for the decrypt call
params = {"CiphertextBlob": ciphertext_blob}
# Perform the decryption
response = client.decrypt(**params)
# Extract and decode the plaintext
plaintext = response["Plaintext"]
secret = plaintext.decode("utf-8")
elif key_manager == KeyManagementSystem.AWS_SECRET_MANAGER.value:
try:
get_secret_value_response = client.get_secret_value(
@ -10174,10 +10224,14 @@ def get_secret(
for k, v in secret_dict.items():
secret = v
print_verbose(f"secret: {secret}")
elif key_manager == "local":
secret = os.getenv(secret_name)
else: # assume the default is infisicial client
secret = client.get_secret(secret_name).secret_value
except Exception as e: # check if it's in os.environ
print_verbose(f"An exception occurred - {str(e)}")
verbose_logger.error(
f"An exception occurred - {str(e)}\n\n{traceback.format_exc()}"
)
secret = os.getenv(secret_name)
try:
secret_value_as_bool = ast.literal_eval(secret)
@ -10511,7 +10565,12 @@ class CustomStreamWrapper:
"finish_reason": finish_reason,
}
except Exception as e:
traceback.print_exc()
verbose_logger.error(
"litellm.CustomStreamWrapper.handle_predibase_chunk(): Exception occured - {}".format(
str(e)
)
)
verbose_logger.debug(traceback.format_exc())
raise e
def handle_huggingface_chunk(self, chunk):
@ -10555,7 +10614,12 @@ class CustomStreamWrapper:
"finish_reason": finish_reason,
}
except Exception as e:
traceback.print_exc()
verbose_logger.error(
"litellm.CustomStreamWrapper.handle_huggingface_chunk(): Exception occured - {}".format(
str(e)
)
)
verbose_logger.debug(traceback.format_exc())
raise e
def handle_ai21_chunk(self, chunk): # fake streaming
@ -10790,7 +10854,12 @@ class CustomStreamWrapper:
"usage": usage,
}
except Exception as e:
traceback.print_exc()
verbose_logger.error(
"litellm.CustomStreamWrapper.handle_openai_chat_completion_chunk(): Exception occured - {}".format(
str(e)
)
)
verbose_logger.debug(traceback.format_exc())
raise e
def handle_azure_text_completion_chunk(self, chunk):
@ -10871,7 +10940,12 @@ class CustomStreamWrapper:
else:
return ""
except:
traceback.print_exc()
verbose_logger.error(
"litellm.CustomStreamWrapper.handle_baseten_chunk(): Exception occured - {}".format(
str(e)
)
)
verbose_logger.debug(traceback.format_exc())
return ""
def handle_cloudlfare_stream(self, chunk):
@ -11070,7 +11144,12 @@ class CustomStreamWrapper:
"is_finished": True,
}
except:
traceback.print_exc()
verbose_logger.error(
"litellm.CustomStreamWrapper.handle_clarifai_chunk(): Exception occured - {}".format(
str(e)
)
)
verbose_logger.debug(traceback.format_exc())
return ""
def model_response_creator(self):
@ -11557,7 +11636,12 @@ class CustomStreamWrapper:
tool["type"] = "function"
model_response.choices[0].delta = Delta(**_json_delta)
except Exception as e:
traceback.print_exc()
verbose_logger.error(
"litellm.CustomStreamWrapper.chunk_creator(): Exception occured - {}".format(
str(e)
)
)
verbose_logger.debug(traceback.format_exc())
model_response.choices[0].delta = Delta()
else:
try: