forked from phoenix/litellm-mirror
Merge branch 'main' into litellm_svc_logger
This commit is contained in:
commit
2cf3133669
84 changed files with 3848 additions and 5302 deletions
186
litellm/utils.py
186
litellm/utils.py
|
@ -239,6 +239,8 @@ def map_finish_reason(
|
|||
return "length"
|
||||
elif finish_reason == "tool_use": # anthropic
|
||||
return "tool_calls"
|
||||
elif finish_reason == "content_filtered":
|
||||
return "content_filter"
|
||||
return finish_reason
|
||||
|
||||
|
||||
|
@ -1372,8 +1374,12 @@ class Logging:
|
|||
callback_func=callback,
|
||||
)
|
||||
except Exception as e:
|
||||
traceback.print_exc()
|
||||
print_verbose(
|
||||
verbose_logger.error(
|
||||
"litellm.Logging.pre_call(): Exception occured - {}".format(
|
||||
str(e)
|
||||
)
|
||||
)
|
||||
verbose_logger.debug(
|
||||
f"LiteLLM.LoggingError: [Non-Blocking] Exception occurred while input logging with integrations {traceback.format_exc()}"
|
||||
)
|
||||
print_verbose(
|
||||
|
@ -4060,6 +4066,9 @@ def openai_token_counter(
|
|||
for c in value:
|
||||
if c["type"] == "text":
|
||||
text += c["text"]
|
||||
num_tokens += len(
|
||||
encoding.encode(c["text"], disallowed_special=())
|
||||
)
|
||||
elif c["type"] == "image_url":
|
||||
if isinstance(c["image_url"], dict):
|
||||
image_url_dict = c["image_url"]
|
||||
|
@ -5634,19 +5643,29 @@ def get_optional_params(
|
|||
optional_params["stream"] = stream
|
||||
elif "anthropic" in model:
|
||||
_check_valid_arg(supported_params=supported_params)
|
||||
# anthropic params on bedrock
|
||||
# \"max_tokens_to_sample\":300,\"temperature\":0.5,\"top_p\":1,\"stop_sequences\":[\"\\\\n\\\\nHuman:\"]}"
|
||||
if model.startswith("anthropic.claude-3"):
|
||||
optional_params = (
|
||||
litellm.AmazonAnthropicClaude3Config().map_openai_params(
|
||||
if "aws_bedrock_client" in passed_params: # deprecated boto3.invoke route.
|
||||
if model.startswith("anthropic.claude-3"):
|
||||
optional_params = (
|
||||
litellm.AmazonAnthropicClaude3Config().map_openai_params(
|
||||
non_default_params=non_default_params,
|
||||
optional_params=optional_params,
|
||||
)
|
||||
)
|
||||
else:
|
||||
optional_params = litellm.AmazonAnthropicConfig().map_openai_params(
|
||||
non_default_params=non_default_params,
|
||||
optional_params=optional_params,
|
||||
)
|
||||
)
|
||||
else:
|
||||
optional_params = litellm.AmazonAnthropicConfig().map_openai_params(
|
||||
else: # bedrock httpx route
|
||||
optional_params = litellm.AmazonConverseConfig().map_openai_params(
|
||||
model=model,
|
||||
non_default_params=non_default_params,
|
||||
optional_params=optional_params,
|
||||
drop_params=(
|
||||
drop_params
|
||||
if drop_params is not None and isinstance(drop_params, bool)
|
||||
else False
|
||||
),
|
||||
)
|
||||
elif "amazon" in model: # amazon titan llms
|
||||
_check_valid_arg(supported_params=supported_params)
|
||||
|
@ -6198,6 +6217,27 @@ def calculate_max_parallel_requests(
|
|||
return None
|
||||
|
||||
|
||||
def _get_order_filtered_deployments(healthy_deployments: List[Dict]) -> List:
|
||||
min_order = min(
|
||||
(
|
||||
deployment["litellm_params"]["order"]
|
||||
for deployment in healthy_deployments
|
||||
if "order" in deployment["litellm_params"]
|
||||
),
|
||||
default=None,
|
||||
)
|
||||
|
||||
if min_order is not None:
|
||||
filtered_deployments = [
|
||||
deployment
|
||||
for deployment in healthy_deployments
|
||||
if deployment["litellm_params"].get("order") == min_order
|
||||
]
|
||||
|
||||
return filtered_deployments
|
||||
return healthy_deployments
|
||||
|
||||
|
||||
def _get_model_region(
|
||||
custom_llm_provider: str, litellm_params: LiteLLM_Params
|
||||
) -> Optional[str]:
|
||||
|
@ -6403,20 +6443,7 @@ def get_supported_openai_params(
|
|||
- None if unmapped
|
||||
"""
|
||||
if custom_llm_provider == "bedrock":
|
||||
if model.startswith("anthropic.claude-3"):
|
||||
return litellm.AmazonAnthropicClaude3Config().get_supported_openai_params()
|
||||
elif model.startswith("anthropic"):
|
||||
return litellm.AmazonAnthropicConfig().get_supported_openai_params()
|
||||
elif model.startswith("ai21"):
|
||||
return ["max_tokens", "temperature", "top_p", "stream"]
|
||||
elif model.startswith("amazon"):
|
||||
return ["max_tokens", "temperature", "stop", "top_p", "stream"]
|
||||
elif model.startswith("meta"):
|
||||
return ["max_tokens", "temperature", "top_p", "stream"]
|
||||
elif model.startswith("cohere"):
|
||||
return ["stream", "temperature", "max_tokens"]
|
||||
elif model.startswith("mistral"):
|
||||
return ["max_tokens", "temperature", "stop", "top_p", "stream"]
|
||||
return litellm.AmazonConverseConfig().get_supported_openai_params(model=model)
|
||||
elif custom_llm_provider == "ollama":
|
||||
return litellm.OllamaConfig().get_supported_openai_params()
|
||||
elif custom_llm_provider == "ollama_chat":
|
||||
|
@ -8516,7 +8543,11 @@ def exception_type(
|
|||
extra_information = f"\nModel: {model}"
|
||||
if _api_base:
|
||||
extra_information += f"\nAPI Base: `{_api_base}`"
|
||||
if messages and len(messages) > 0:
|
||||
if (
|
||||
messages
|
||||
and len(messages) > 0
|
||||
and litellm.redact_messages_in_exceptions is False
|
||||
):
|
||||
extra_information += f"\nMessages: `{messages}`"
|
||||
|
||||
if _model_group is not None:
|
||||
|
@ -9803,8 +9834,7 @@ def exception_type(
|
|||
elif custom_llm_provider == "azure":
|
||||
if "Internal server error" in error_str:
|
||||
exception_mapping_worked = True
|
||||
raise APIError(
|
||||
status_code=500,
|
||||
raise litellm.InternalServerError(
|
||||
message=f"AzureException Internal server error - {original_exception.message}",
|
||||
llm_provider="azure",
|
||||
model=model,
|
||||
|
@ -10054,6 +10084,8 @@ def get_secret(
|
|||
):
|
||||
key_management_system = litellm._key_management_system
|
||||
key_management_settings = litellm._key_management_settings
|
||||
args = locals()
|
||||
|
||||
if secret_name.startswith("os.environ/"):
|
||||
secret_name = secret_name.replace("os.environ/", "")
|
||||
|
||||
|
@ -10141,13 +10173,13 @@ def get_secret(
|
|||
key_manager = "local"
|
||||
|
||||
if (
|
||||
key_manager == KeyManagementSystem.AZURE_KEY_VAULT
|
||||
key_manager == KeyManagementSystem.AZURE_KEY_VAULT.value
|
||||
or type(client).__module__ + "." + type(client).__name__
|
||||
== "azure.keyvault.secrets._client.SecretClient"
|
||||
): # support Azure Secret Client - from azure.keyvault.secrets import SecretClient
|
||||
secret = client.get_secret(secret_name).value
|
||||
elif (
|
||||
key_manager == KeyManagementSystem.GOOGLE_KMS
|
||||
key_manager == KeyManagementSystem.GOOGLE_KMS.value
|
||||
or client.__class__.__name__ == "KeyManagementServiceClient"
|
||||
):
|
||||
encrypted_secret: Any = os.getenv(secret_name)
|
||||
|
@ -10175,6 +10207,25 @@ def get_secret(
|
|||
secret = response.plaintext.decode(
|
||||
"utf-8"
|
||||
) # assumes the original value was encoded with utf-8
|
||||
elif key_manager == KeyManagementSystem.AWS_KMS.value:
|
||||
"""
|
||||
Only check the tokens which start with 'aws_kms/'. This prevents latency impact caused by checking all keys.
|
||||
"""
|
||||
encrypted_value = os.getenv(secret_name, None)
|
||||
if encrypted_value is None:
|
||||
raise Exception("encrypted value for AWS KMS cannot be None.")
|
||||
# Decode the base64 encoded ciphertext
|
||||
ciphertext_blob = base64.b64decode(encrypted_value)
|
||||
|
||||
# Set up the parameters for the decrypt call
|
||||
params = {"CiphertextBlob": ciphertext_blob}
|
||||
|
||||
# Perform the decryption
|
||||
response = client.decrypt(**params)
|
||||
|
||||
# Extract and decode the plaintext
|
||||
plaintext = response["Plaintext"]
|
||||
secret = plaintext.decode("utf-8")
|
||||
elif key_manager == KeyManagementSystem.AWS_SECRET_MANAGER.value:
|
||||
try:
|
||||
get_secret_value_response = client.get_secret_value(
|
||||
|
@ -10195,10 +10246,14 @@ def get_secret(
|
|||
for k, v in secret_dict.items():
|
||||
secret = v
|
||||
print_verbose(f"secret: {secret}")
|
||||
elif key_manager == "local":
|
||||
secret = os.getenv(secret_name)
|
||||
else: # assume the default is infisicial client
|
||||
secret = client.get_secret(secret_name).secret_value
|
||||
except Exception as e: # check if it's in os.environ
|
||||
print_verbose(f"An exception occurred - {str(e)}")
|
||||
verbose_logger.error(
|
||||
f"An exception occurred - {str(e)}\n\n{traceback.format_exc()}"
|
||||
)
|
||||
secret = os.getenv(secret_name)
|
||||
try:
|
||||
secret_value_as_bool = ast.literal_eval(secret)
|
||||
|
@ -10532,7 +10587,12 @@ class CustomStreamWrapper:
|
|||
"finish_reason": finish_reason,
|
||||
}
|
||||
except Exception as e:
|
||||
traceback.print_exc()
|
||||
verbose_logger.error(
|
||||
"litellm.CustomStreamWrapper.handle_predibase_chunk(): Exception occured - {}".format(
|
||||
str(e)
|
||||
)
|
||||
)
|
||||
verbose_logger.debug(traceback.format_exc())
|
||||
raise e
|
||||
|
||||
def handle_huggingface_chunk(self, chunk):
|
||||
|
@ -10576,7 +10636,12 @@ class CustomStreamWrapper:
|
|||
"finish_reason": finish_reason,
|
||||
}
|
||||
except Exception as e:
|
||||
traceback.print_exc()
|
||||
verbose_logger.error(
|
||||
"litellm.CustomStreamWrapper.handle_huggingface_chunk(): Exception occured - {}".format(
|
||||
str(e)
|
||||
)
|
||||
)
|
||||
verbose_logger.debug(traceback.format_exc())
|
||||
raise e
|
||||
|
||||
def handle_ai21_chunk(self, chunk): # fake streaming
|
||||
|
@ -10811,7 +10876,12 @@ class CustomStreamWrapper:
|
|||
"usage": usage,
|
||||
}
|
||||
except Exception as e:
|
||||
traceback.print_exc()
|
||||
verbose_logger.error(
|
||||
"litellm.CustomStreamWrapper.handle_openai_chat_completion_chunk(): Exception occured - {}".format(
|
||||
str(e)
|
||||
)
|
||||
)
|
||||
verbose_logger.debug(traceback.format_exc())
|
||||
raise e
|
||||
|
||||
def handle_azure_text_completion_chunk(self, chunk):
|
||||
|
@ -10892,7 +10962,12 @@ class CustomStreamWrapper:
|
|||
else:
|
||||
return ""
|
||||
except:
|
||||
traceback.print_exc()
|
||||
verbose_logger.error(
|
||||
"litellm.CustomStreamWrapper.handle_baseten_chunk(): Exception occured - {}".format(
|
||||
str(e)
|
||||
)
|
||||
)
|
||||
verbose_logger.debug(traceback.format_exc())
|
||||
return ""
|
||||
|
||||
def handle_cloudlfare_stream(self, chunk):
|
||||
|
@ -11091,7 +11166,12 @@ class CustomStreamWrapper:
|
|||
"is_finished": True,
|
||||
}
|
||||
except:
|
||||
traceback.print_exc()
|
||||
verbose_logger.error(
|
||||
"litellm.CustomStreamWrapper.handle_clarifai_chunk(): Exception occured - {}".format(
|
||||
str(e)
|
||||
)
|
||||
)
|
||||
verbose_logger.debug(traceback.format_exc())
|
||||
return ""
|
||||
|
||||
def model_response_creator(self):
|
||||
|
@ -11332,12 +11412,27 @@ class CustomStreamWrapper:
|
|||
if response_obj["is_finished"]:
|
||||
self.received_finish_reason = response_obj["finish_reason"]
|
||||
elif self.custom_llm_provider == "bedrock":
|
||||
from litellm.types.llms.bedrock import GenericStreamingChunk
|
||||
|
||||
if self.received_finish_reason is not None:
|
||||
raise StopIteration
|
||||
response_obj = self.handle_bedrock_stream(chunk)
|
||||
response_obj: GenericStreamingChunk = chunk
|
||||
completion_obj["content"] = response_obj["text"]
|
||||
|
||||
if response_obj["is_finished"]:
|
||||
self.received_finish_reason = response_obj["finish_reason"]
|
||||
|
||||
if (
|
||||
self.stream_options
|
||||
and self.stream_options.get("include_usage", False) is True
|
||||
and response_obj["usage"] is not None
|
||||
):
|
||||
self.sent_stream_usage = True
|
||||
model_response.usage = litellm.Usage(
|
||||
prompt_tokens=response_obj["usage"]["inputTokens"],
|
||||
completion_tokens=response_obj["usage"]["outputTokens"],
|
||||
total_tokens=response_obj["usage"]["totalTokens"],
|
||||
)
|
||||
elif self.custom_llm_provider == "sagemaker":
|
||||
print_verbose(f"ENTERS SAGEMAKER STREAMING for chunk {chunk}")
|
||||
response_obj = self.handle_sagemaker_stream(chunk)
|
||||
|
@ -11563,7 +11658,12 @@ class CustomStreamWrapper:
|
|||
tool["type"] = "function"
|
||||
model_response.choices[0].delta = Delta(**_json_delta)
|
||||
except Exception as e:
|
||||
traceback.print_exc()
|
||||
verbose_logger.error(
|
||||
"litellm.CustomStreamWrapper.chunk_creator(): Exception occured - {}".format(
|
||||
str(e)
|
||||
)
|
||||
)
|
||||
verbose_logger.debug(traceback.format_exc())
|
||||
model_response.choices[0].delta = Delta()
|
||||
else:
|
||||
try:
|
||||
|
@ -11599,7 +11699,7 @@ class CustomStreamWrapper:
|
|||
and hasattr(model_response, "usage")
|
||||
and hasattr(model_response.usage, "prompt_tokens")
|
||||
):
|
||||
if self.sent_first_chunk == False:
|
||||
if self.sent_first_chunk is False:
|
||||
completion_obj["role"] = "assistant"
|
||||
self.sent_first_chunk = True
|
||||
model_response.choices[0].delta = Delta(**completion_obj)
|
||||
|
@ -11767,6 +11867,8 @@ class CustomStreamWrapper:
|
|||
|
||||
def __next__(self):
|
||||
try:
|
||||
if self.completion_stream is None:
|
||||
self.fetch_sync_stream()
|
||||
while True:
|
||||
if (
|
||||
isinstance(self.completion_stream, str)
|
||||
|
@ -11841,6 +11943,14 @@ class CustomStreamWrapper:
|
|||
custom_llm_provider=self.custom_llm_provider,
|
||||
)
|
||||
|
||||
def fetch_sync_stream(self):
|
||||
if self.completion_stream is None and self.make_call is not None:
|
||||
# Call make_call to get the completion stream
|
||||
self.completion_stream = self.make_call(client=litellm.module_level_client)
|
||||
self._stream_iter = self.completion_stream.__iter__()
|
||||
|
||||
return self.completion_stream
|
||||
|
||||
async def fetch_stream(self):
|
||||
if self.completion_stream is None and self.make_call is not None:
|
||||
# Call make_call to get the completion stream
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue