Merge branch 'main' into litellm_security_fix

2025-04-25 18:54:30 +00:00 · 2024-06-07 16:52:25 -07:00 · 2024-06-07 16:52:25 -07:00 · 718b547646
commit 718b547646
parent beddb58852 5c46b386d0
31 changed files with 2394 additions and 5332 deletions
--- a/litellm/utils.py
+++ b/litellm/utils.py
@ -239,6 +239,8 @@ def map_finish_reason(
        return "length"
    elif finish_reason == "tool_use":  # anthropic
        return "tool_calls"
+    elif finish_reason == "content_filtered":
+        return "content_filter"
    return finish_reason


@ -5655,19 +5657,29 @@ def get_optional_params(
                optional_params["stream"] = stream
        elif "anthropic" in model:
            _check_valid_arg(supported_params=supported_params)
-            # anthropic params on bedrock
-            # \"max_tokens_to_sample\":300,\"temperature\":0.5,\"top_p\":1,\"stop_sequences\":[\"\\\\n\\\\nHuman:\"]}"
-            if model.startswith("anthropic.claude-3"):
-                optional_params = (
-                    litellm.AmazonAnthropicClaude3Config().map_openai_params(
+            if "aws_bedrock_client" in passed_params:  # deprecated boto3.invoke route.
+                if model.startswith("anthropic.claude-3"):
+                    optional_params = (
+                        litellm.AmazonAnthropicClaude3Config().map_openai_params(
+                            non_default_params=non_default_params,
+                            optional_params=optional_params,
+                        )
+                    )
+                else:
+                    optional_params = litellm.AmazonAnthropicConfig().map_openai_params(
                        non_default_params=non_default_params,
                        optional_params=optional_params,
                    )
-                )
-            else:
-                optional_params = litellm.AmazonAnthropicConfig().map_openai_params(
+            else:  # bedrock httpx route
+                optional_params = litellm.AmazonConverseConfig().map_openai_params(
+                    model=model,
                    non_default_params=non_default_params,
                    optional_params=optional_params,
+                    drop_params=(
+                        drop_params
+                        if drop_params is not None and isinstance(drop_params, bool)
+                        else False
+                    ),
                )
        elif "amazon" in model:  # amazon titan llms
            _check_valid_arg(supported_params=supported_params)
@ -6445,20 +6457,7 @@ def get_supported_openai_params(
    - None if unmapped
    """
    if custom_llm_provider == "bedrock":
-        if model.startswith("anthropic.claude-3"):
-            return litellm.AmazonAnthropicClaude3Config().get_supported_openai_params()
-        elif model.startswith("anthropic"):
-            return litellm.AmazonAnthropicConfig().get_supported_openai_params()
-        elif model.startswith("ai21"):
-            return ["max_tokens", "temperature", "top_p", "stream"]
-        elif model.startswith("amazon"):
-            return ["max_tokens", "temperature", "stop", "top_p", "stream"]
-        elif model.startswith("meta"):
-            return ["max_tokens", "temperature", "top_p", "stream"]
-        elif model.startswith("cohere"):
-            return ["stream", "temperature", "max_tokens"]
-        elif model.startswith("mistral"):
-            return ["max_tokens", "temperature", "stop", "top_p", "stream"]
+        return litellm.AmazonConverseConfig().get_supported_openai_params(model=model)
    elif custom_llm_provider == "ollama":
        return litellm.OllamaConfig().get_supported_openai_params()
    elif custom_llm_provider == "ollama_chat":
@ -8558,7 +8557,11 @@ def exception_type(
                extra_information = f"\nModel: {model}"
                if _api_base:
                    extra_information += f"\nAPI Base: `{_api_base}`"
-                if messages and len(messages) > 0:
+                if (
+                    messages
+                    and len(messages) > 0
+                    and litellm.redact_messages_in_exceptions is False
+                ):
                    extra_information += f"\nMessages: `{messages}`"

                if _model_group is not None:
@ -9124,7 +9127,7 @@ def exception_type(
                if "Unable to locate credentials" in error_str:
                    exception_mapping_worked = True
                    raise BadRequestError(
-                        message=f"SagemakerException - {error_str}",
+                        message=f"litellm.BadRequestError: SagemakerException - {error_str}",
                        model=model,
                        llm_provider="sagemaker",
                        response=original_exception.response,
@ -9158,10 +9161,16 @@ def exception_type(
                ):
                    exception_mapping_worked = True
                    raise BadRequestError(
-                        message=f"VertexAIException BadRequestError - {error_str}",
+                        message=f"litellm.BadRequestError: VertexAIException - {error_str}",
                        model=model,
                        llm_provider="vertex_ai",
-                        response=original_exception.response,
+                        response=httpx.Response(
+                            status_code=429,
+                            request=httpx.Request(
+                                method="POST",
+                                url=" https://cloud.google.com/vertex-ai/",
+                            ),
+                        ),
                        litellm_debug_info=extra_information,
                    )
                elif (
@ -9169,12 +9178,19 @@ def exception_type(
                    or "Content has no parts." in error_str
                ):
                    exception_mapping_worked = True
-                    raise APIError(
-                        message=f"VertexAIException APIError - {error_str}",
+                    raise litellm.InternalServerError(
+                        message=f"litellm.InternalServerError: VertexAIException - {error_str}",
                        status_code=500,
                        model=model,
                        llm_provider="vertex_ai",
-                        request=original_exception.request,
+                        request=(
+                            original_exception.request
+                            if hasattr(original_exception, "request")
+                            else httpx.Request(
+                                method="POST",
+                                url=" https://cloud.google.com/vertex-ai/",
+                            )
+                        ),
                        litellm_debug_info=extra_information,
                    )
                elif "403" in error_str:
@ -9183,7 +9199,13 @@ def exception_type(
                        message=f"VertexAIException BadRequestError - {error_str}",
                        model=model,
                        llm_provider="vertex_ai",
-                        response=original_exception.response,
+                        response=httpx.Response(
+                            status_code=429,
+                            request=httpx.Request(
+                                method="POST",
+                                url=" https://cloud.google.com/vertex-ai/",
+                            ),
+                        ),
                        litellm_debug_info=extra_information,
                    )
                elif "The response was blocked." in error_str:
@ -9230,12 +9252,18 @@ def exception_type(
                            model=model,
                            llm_provider="vertex_ai",
                            litellm_debug_info=extra_information,
-                            response=original_exception.response,
+                            response=httpx.Response(
+                                status_code=429,
+                                request=httpx.Request(
+                                    method="POST",
+                                    url=" https://cloud.google.com/vertex-ai/",
+                                ),
+                            ),
                        )
                    if original_exception.status_code == 500:
                        exception_mapping_worked = True
-                        raise APIError(
-                            message=f"VertexAIException APIError - {error_str}",
+                        raise litellm.InternalServerError(
+                            message=f"VertexAIException InternalServerError - {error_str}",
                            status_code=500,
                            model=model,
                            llm_provider="vertex_ai",
@ -11423,12 +11451,27 @@ class CustomStreamWrapper:
                if response_obj["is_finished"]:
                    self.received_finish_reason = response_obj["finish_reason"]
            elif self.custom_llm_provider == "bedrock":
+                from litellm.types.llms.bedrock import GenericStreamingChunk
+
                if self.received_finish_reason is not None:
                    raise StopIteration
-                response_obj = self.handle_bedrock_stream(chunk)
+                response_obj: GenericStreamingChunk = chunk
                completion_obj["content"] = response_obj["text"]
+
                if response_obj["is_finished"]:
                    self.received_finish_reason = response_obj["finish_reason"]
+
+                if (
+                    self.stream_options
+                    and self.stream_options.get("include_usage", False) is True
+                    and response_obj["usage"] is not None
+                ):
+                    self.sent_stream_usage = True
+                    model_response.usage = litellm.Usage(
+                        prompt_tokens=response_obj["usage"]["inputTokens"],
+                        completion_tokens=response_obj["usage"]["outputTokens"],
+                        total_tokens=response_obj["usage"]["totalTokens"],
+                    )
            elif self.custom_llm_provider == "sagemaker":
                print_verbose(f"ENTERS SAGEMAKER STREAMING for chunk {chunk}")
                response_obj = self.handle_sagemaker_stream(chunk)
@ -11695,7 +11738,7 @@ class CustomStreamWrapper:
                and hasattr(model_response, "usage")
                and hasattr(model_response.usage, "prompt_tokens")
            ):
-                if self.sent_first_chunk == False:
+                if self.sent_first_chunk is False:
                    completion_obj["role"] = "assistant"
                    self.sent_first_chunk = True
                model_response.choices[0].delta = Delta(**completion_obj)
@ -11863,6 +11906,8 @@ class CustomStreamWrapper:

    def __next__(self):
        try:
+            if self.completion_stream is None:
+                self.fetch_sync_stream()
            while True:
                if (
                    isinstance(self.completion_stream, str)
@ -11937,6 +11982,14 @@ class CustomStreamWrapper:
                    custom_llm_provider=self.custom_llm_provider,
                )

+    def fetch_sync_stream(self):
+        if self.completion_stream is None and self.make_call is not None:
+            # Call make_call to get the completion stream
+            self.completion_stream = self.make_call(client=litellm.module_level_client)
+            self._stream_iter = self.completion_stream.__iter__()
+
+        return self.completion_stream
+
    async def fetch_stream(self):
        if self.completion_stream is None and self.make_call is not None:
            # Call make_call to get the completion stream