mirror of
https://github.com/BerriAI/litellm.git
synced 2025-04-25 18:54:30 +00:00
Merge branch 'main' into litellm_security_fix
This commit is contained in:
commit
718b547646
31 changed files with 2394 additions and 5332 deletions
123
litellm/utils.py
123
litellm/utils.py
|
@ -239,6 +239,8 @@ def map_finish_reason(
|
|||
return "length"
|
||||
elif finish_reason == "tool_use": # anthropic
|
||||
return "tool_calls"
|
||||
elif finish_reason == "content_filtered":
|
||||
return "content_filter"
|
||||
return finish_reason
|
||||
|
||||
|
||||
|
@ -5655,19 +5657,29 @@ def get_optional_params(
|
|||
optional_params["stream"] = stream
|
||||
elif "anthropic" in model:
|
||||
_check_valid_arg(supported_params=supported_params)
|
||||
# anthropic params on bedrock
|
||||
# \"max_tokens_to_sample\":300,\"temperature\":0.5,\"top_p\":1,\"stop_sequences\":[\"\\\\n\\\\nHuman:\"]}"
|
||||
if model.startswith("anthropic.claude-3"):
|
||||
optional_params = (
|
||||
litellm.AmazonAnthropicClaude3Config().map_openai_params(
|
||||
if "aws_bedrock_client" in passed_params: # deprecated boto3.invoke route.
|
||||
if model.startswith("anthropic.claude-3"):
|
||||
optional_params = (
|
||||
litellm.AmazonAnthropicClaude3Config().map_openai_params(
|
||||
non_default_params=non_default_params,
|
||||
optional_params=optional_params,
|
||||
)
|
||||
)
|
||||
else:
|
||||
optional_params = litellm.AmazonAnthropicConfig().map_openai_params(
|
||||
non_default_params=non_default_params,
|
||||
optional_params=optional_params,
|
||||
)
|
||||
)
|
||||
else:
|
||||
optional_params = litellm.AmazonAnthropicConfig().map_openai_params(
|
||||
else: # bedrock httpx route
|
||||
optional_params = litellm.AmazonConverseConfig().map_openai_params(
|
||||
model=model,
|
||||
non_default_params=non_default_params,
|
||||
optional_params=optional_params,
|
||||
drop_params=(
|
||||
drop_params
|
||||
if drop_params is not None and isinstance(drop_params, bool)
|
||||
else False
|
||||
),
|
||||
)
|
||||
elif "amazon" in model: # amazon titan llms
|
||||
_check_valid_arg(supported_params=supported_params)
|
||||
|
@ -6445,20 +6457,7 @@ def get_supported_openai_params(
|
|||
- None if unmapped
|
||||
"""
|
||||
if custom_llm_provider == "bedrock":
|
||||
if model.startswith("anthropic.claude-3"):
|
||||
return litellm.AmazonAnthropicClaude3Config().get_supported_openai_params()
|
||||
elif model.startswith("anthropic"):
|
||||
return litellm.AmazonAnthropicConfig().get_supported_openai_params()
|
||||
elif model.startswith("ai21"):
|
||||
return ["max_tokens", "temperature", "top_p", "stream"]
|
||||
elif model.startswith("amazon"):
|
||||
return ["max_tokens", "temperature", "stop", "top_p", "stream"]
|
||||
elif model.startswith("meta"):
|
||||
return ["max_tokens", "temperature", "top_p", "stream"]
|
||||
elif model.startswith("cohere"):
|
||||
return ["stream", "temperature", "max_tokens"]
|
||||
elif model.startswith("mistral"):
|
||||
return ["max_tokens", "temperature", "stop", "top_p", "stream"]
|
||||
return litellm.AmazonConverseConfig().get_supported_openai_params(model=model)
|
||||
elif custom_llm_provider == "ollama":
|
||||
return litellm.OllamaConfig().get_supported_openai_params()
|
||||
elif custom_llm_provider == "ollama_chat":
|
||||
|
@ -8558,7 +8557,11 @@ def exception_type(
|
|||
extra_information = f"\nModel: {model}"
|
||||
if _api_base:
|
||||
extra_information += f"\nAPI Base: `{_api_base}`"
|
||||
if messages and len(messages) > 0:
|
||||
if (
|
||||
messages
|
||||
and len(messages) > 0
|
||||
and litellm.redact_messages_in_exceptions is False
|
||||
):
|
||||
extra_information += f"\nMessages: `{messages}`"
|
||||
|
||||
if _model_group is not None:
|
||||
|
@ -9124,7 +9127,7 @@ def exception_type(
|
|||
if "Unable to locate credentials" in error_str:
|
||||
exception_mapping_worked = True
|
||||
raise BadRequestError(
|
||||
message=f"SagemakerException - {error_str}",
|
||||
message=f"litellm.BadRequestError: SagemakerException - {error_str}",
|
||||
model=model,
|
||||
llm_provider="sagemaker",
|
||||
response=original_exception.response,
|
||||
|
@ -9158,10 +9161,16 @@ def exception_type(
|
|||
):
|
||||
exception_mapping_worked = True
|
||||
raise BadRequestError(
|
||||
message=f"VertexAIException BadRequestError - {error_str}",
|
||||
message=f"litellm.BadRequestError: VertexAIException - {error_str}",
|
||||
model=model,
|
||||
llm_provider="vertex_ai",
|
||||
response=original_exception.response,
|
||||
response=httpx.Response(
|
||||
status_code=429,
|
||||
request=httpx.Request(
|
||||
method="POST",
|
||||
url=" https://cloud.google.com/vertex-ai/",
|
||||
),
|
||||
),
|
||||
litellm_debug_info=extra_information,
|
||||
)
|
||||
elif (
|
||||
|
@ -9169,12 +9178,19 @@ def exception_type(
|
|||
or "Content has no parts." in error_str
|
||||
):
|
||||
exception_mapping_worked = True
|
||||
raise APIError(
|
||||
message=f"VertexAIException APIError - {error_str}",
|
||||
raise litellm.InternalServerError(
|
||||
message=f"litellm.InternalServerError: VertexAIException - {error_str}",
|
||||
status_code=500,
|
||||
model=model,
|
||||
llm_provider="vertex_ai",
|
||||
request=original_exception.request,
|
||||
request=(
|
||||
original_exception.request
|
||||
if hasattr(original_exception, "request")
|
||||
else httpx.Request(
|
||||
method="POST",
|
||||
url=" https://cloud.google.com/vertex-ai/",
|
||||
)
|
||||
),
|
||||
litellm_debug_info=extra_information,
|
||||
)
|
||||
elif "403" in error_str:
|
||||
|
@ -9183,7 +9199,13 @@ def exception_type(
|
|||
message=f"VertexAIException BadRequestError - {error_str}",
|
||||
model=model,
|
||||
llm_provider="vertex_ai",
|
||||
response=original_exception.response,
|
||||
response=httpx.Response(
|
||||
status_code=429,
|
||||
request=httpx.Request(
|
||||
method="POST",
|
||||
url=" https://cloud.google.com/vertex-ai/",
|
||||
),
|
||||
),
|
||||
litellm_debug_info=extra_information,
|
||||
)
|
||||
elif "The response was blocked." in error_str:
|
||||
|
@ -9230,12 +9252,18 @@ def exception_type(
|
|||
model=model,
|
||||
llm_provider="vertex_ai",
|
||||
litellm_debug_info=extra_information,
|
||||
response=original_exception.response,
|
||||
response=httpx.Response(
|
||||
status_code=429,
|
||||
request=httpx.Request(
|
||||
method="POST",
|
||||
url=" https://cloud.google.com/vertex-ai/",
|
||||
),
|
||||
),
|
||||
)
|
||||
if original_exception.status_code == 500:
|
||||
exception_mapping_worked = True
|
||||
raise APIError(
|
||||
message=f"VertexAIException APIError - {error_str}",
|
||||
raise litellm.InternalServerError(
|
||||
message=f"VertexAIException InternalServerError - {error_str}",
|
||||
status_code=500,
|
||||
model=model,
|
||||
llm_provider="vertex_ai",
|
||||
|
@ -11423,12 +11451,27 @@ class CustomStreamWrapper:
|
|||
if response_obj["is_finished"]:
|
||||
self.received_finish_reason = response_obj["finish_reason"]
|
||||
elif self.custom_llm_provider == "bedrock":
|
||||
from litellm.types.llms.bedrock import GenericStreamingChunk
|
||||
|
||||
if self.received_finish_reason is not None:
|
||||
raise StopIteration
|
||||
response_obj = self.handle_bedrock_stream(chunk)
|
||||
response_obj: GenericStreamingChunk = chunk
|
||||
completion_obj["content"] = response_obj["text"]
|
||||
|
||||
if response_obj["is_finished"]:
|
||||
self.received_finish_reason = response_obj["finish_reason"]
|
||||
|
||||
if (
|
||||
self.stream_options
|
||||
and self.stream_options.get("include_usage", False) is True
|
||||
and response_obj["usage"] is not None
|
||||
):
|
||||
self.sent_stream_usage = True
|
||||
model_response.usage = litellm.Usage(
|
||||
prompt_tokens=response_obj["usage"]["inputTokens"],
|
||||
completion_tokens=response_obj["usage"]["outputTokens"],
|
||||
total_tokens=response_obj["usage"]["totalTokens"],
|
||||
)
|
||||
elif self.custom_llm_provider == "sagemaker":
|
||||
print_verbose(f"ENTERS SAGEMAKER STREAMING for chunk {chunk}")
|
||||
response_obj = self.handle_sagemaker_stream(chunk)
|
||||
|
@ -11695,7 +11738,7 @@ class CustomStreamWrapper:
|
|||
and hasattr(model_response, "usage")
|
||||
and hasattr(model_response.usage, "prompt_tokens")
|
||||
):
|
||||
if self.sent_first_chunk == False:
|
||||
if self.sent_first_chunk is False:
|
||||
completion_obj["role"] = "assistant"
|
||||
self.sent_first_chunk = True
|
||||
model_response.choices[0].delta = Delta(**completion_obj)
|
||||
|
@ -11863,6 +11906,8 @@ class CustomStreamWrapper:
|
|||
|
||||
def __next__(self):
|
||||
try:
|
||||
if self.completion_stream is None:
|
||||
self.fetch_sync_stream()
|
||||
while True:
|
||||
if (
|
||||
isinstance(self.completion_stream, str)
|
||||
|
@ -11937,6 +11982,14 @@ class CustomStreamWrapper:
|
|||
custom_llm_provider=self.custom_llm_provider,
|
||||
)
|
||||
|
||||
def fetch_sync_stream(self):
|
||||
if self.completion_stream is None and self.make_call is not None:
|
||||
# Call make_call to get the completion stream
|
||||
self.completion_stream = self.make_call(client=litellm.module_level_client)
|
||||
self._stream_iter = self.completion_stream.__iter__()
|
||||
|
||||
return self.completion_stream
|
||||
|
||||
async def fetch_stream(self):
|
||||
if self.completion_stream is None and self.make_call is not None:
|
||||
# Call make_call to get the completion stream
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue