refactor(main.py): only route anthropic calls through converse api

v0 scope let's move function calling to converse api
2024-06-07 08:47:51 -07:00 · 2024-06-07 08:47:51 -07:00 · 35e4323095
commit 35e4323095
parent 51ba5652a0
6 changed files with 263 additions and 4332 deletions
--- a/litellm/llms/bedrock_httpx.py
+++ b/litellm/llms/bedrock_httpx.py
@ -1125,7 +1125,7 @@ class AmazonConverseConfig:
        maxTokens: Optional[int] = None,
        stopSequences: Optional[List[str]] = None,
        temperature: Optional[int] = None,
-        top_p: Optional[int] = None,
+        topP: Optional[int] = None,
    ) -> None:
        locals_ = locals()
        for key, value in locals_.items():
@ -1481,6 +1481,93 @@ class BedrockConverseLLM(BaseLLM):

            return session.get_credentials()

+    async def async_streaming(
+        self,
+        model: str,
+        messages: list,
+        api_base: str,
+        model_response: ModelResponse,
+        print_verbose: Callable,
+        data: str,
+        timeout: Optional[Union[float, httpx.Timeout]],
+        encoding,
+        logging_obj,
+        stream,
+        optional_params: dict,
+        litellm_params=None,
+        logger_fn=None,
+        headers={},
+        client: Optional[AsyncHTTPHandler] = None,
+    ) -> CustomStreamWrapper:
+        streaming_response = CustomStreamWrapper(
+            completion_stream=None,
+            make_call=partial(
+                make_call,
+                client=client,
+                api_base=api_base,
+                headers=headers,
+                data=data,
+                model=model,
+                messages=messages,
+                logging_obj=logging_obj,
+            ),
+            model=model,
+            custom_llm_provider="bedrock",
+            logging_obj=logging_obj,
+        )
+        return streaming_response
+
+    async def async_completion(
+        self,
+        model: str,
+        messages: list,
+        api_base: str,
+        model_response: ModelResponse,
+        print_verbose: Callable,
+        data: str,
+        timeout: Optional[Union[float, httpx.Timeout]],
+        encoding,
+        logging_obj,
+        stream,
+        optional_params: dict,
+        litellm_params=None,
+        logger_fn=None,
+        headers={},
+        client: Optional[AsyncHTTPHandler] = None,
+    ) -> Union[ModelResponse, CustomStreamWrapper]:
+        if client is None:
+            _params = {}
+            if timeout is not None:
+                if isinstance(timeout, float) or isinstance(timeout, int):
+                    timeout = httpx.Timeout(timeout)
+                _params["timeout"] = timeout
+            client = AsyncHTTPHandler(**_params)  # type: ignore
+        else:
+            client = client  # type: ignore
+
+        try:
+            response = await client.post(api_base, headers=headers, data=data)  # type: ignore
+            response.raise_for_status()
+        except httpx.HTTPStatusError as err:
+            error_code = err.response.status_code
+            raise BedrockError(status_code=error_code, message=err.response.text)
+        except httpx.TimeoutException as e:
+            raise BedrockError(status_code=408, message="Timeout error occurred.")
+
+        return self.process_response(
+            model=model,
+            response=response,
+            model_response=model_response,
+            stream=stream if isinstance(stream, bool) else False,
+            logging_obj=logging_obj,
+            api_key="",
+            data=data,
+            messages=messages,
+            print_verbose=print_verbose,
+            optional_params=optional_params,
+            encoding=encoding,
+        )
+
    def completion(
        self,
        model: str,
@ -1504,7 +1591,7 @@ class BedrockConverseLLM(BaseLLM):
            from botocore.auth import SigV4Auth
            from botocore.awsrequest import AWSRequest
            from botocore.credentials import Credentials
-        except ImportError as e:
+        except ImportError:
            raise ImportError("Missing boto3 to call bedrock. Run 'pip install boto3'.")

        ## SETUP ##
@ -1658,6 +1745,46 @@ class BedrockConverseLLM(BaseLLM):
        )

        ### ROUTING (ASYNC, STREAMING, SYNC)
+        if acompletion:
+            if isinstance(client, HTTPHandler):
+                client = None
+            if stream is True and provider != "ai21":
+                return self.async_streaming(
+                    model=model,
+                    messages=messages,
+                    data=data,
+                    api_base=prepped.url,
+                    model_response=model_response,
+                    print_verbose=print_verbose,
+                    encoding=encoding,
+                    logging_obj=logging_obj,
+                    optional_params=optional_params,
+                    stream=True,
+                    litellm_params=litellm_params,
+                    logger_fn=logger_fn,
+                    headers=prepped.headers,
+                    timeout=timeout,
+                    client=client,
+                )  # type: ignore
+            ### ASYNC COMPLETION
+            return self.async_completion(
+                model=model,
+                messages=messages,
+                data=data,
+                api_base=prepped.url,
+                model_response=model_response,
+                print_verbose=print_verbose,
+                encoding=encoding,
+                logging_obj=logging_obj,
+                optional_params=optional_params,
+                stream=stream,  # type: ignore
+                litellm_params=litellm_params,
+                logger_fn=logger_fn,
+                headers=prepped.headers,
+                timeout=timeout,
+                client=client,
+            )  # type: ignore
+
        if (stream is not None and stream is True) and provider != "ai21":

            streaming_response = CustomStreamWrapper(
@ -1666,7 +1793,7 @@ class BedrockConverseLLM(BaseLLM):
                    make_sync_call,
                    client=None,
                    api_base=prepped.url,
-                    headers=prepped.headers,
+                    headers=prepped.headers,  # type: ignore
                    data=data,
                    model=model,
                    messages=messages,
@ -1702,7 +1829,7 @@ class BedrockConverseLLM(BaseLLM):
        except httpx.HTTPStatusError as err:
            error_code = err.response.status_code
            raise BedrockError(status_code=error_code, message=response.text)
-        except httpx.TimeoutException as e:
+        except httpx.TimeoutException:
            raise BedrockError(status_code=408, message="Timeout error occurred.")

        return self.process_response(
@ -1737,7 +1864,7 @@ class AWSEventStreamDecoder:
        self.model = model
        self.parser = EventStreamJSONParser()

-    def _chunk_parser(self, chunk_data: dict) -> GenericStreamingChunk:
+    def converse_chunk_parser(self, chunk_data: dict) -> GenericStreamingChunk:
        text = ""
        tool_str = ""
        is_finished = False
@ -1762,7 +1889,7 @@ class AWSEventStreamDecoder:
        )
        return response

-    def _old_chunk_parser(self, chunk_data: dict) -> GenericStreamingChunk:
+    def _chunk_parser(self, chunk_data: dict) -> GenericStreamingChunk:
        text = ""
        is_finished = False
        finish_reason = ""
@ -1774,19 +1901,8 @@ class AWSEventStreamDecoder:
            is_finished = True
            finish_reason = "stop"
        ######## bedrock.anthropic mappings ###############
-        elif "completion" in chunk_data:  # not claude-3
-            text = chunk_data["completion"]  # bedrock.anthropic
-            stop_reason = chunk_data.get("stop_reason", None)
-            if stop_reason != None:
-                is_finished = True
-                finish_reason = stop_reason
        elif "delta" in chunk_data:
-            if chunk_data["delta"].get("text", None) is not None:
-                text = chunk_data["delta"]["text"]
-            stop_reason = chunk_data["delta"].get("stop_reason", None)
-            if stop_reason != None:
-                is_finished = True
-                finish_reason = stop_reason
+            return self.converse_chunk_parser(chunk_data=chunk_data)
        ######## bedrock.mistral mappings ###############
        elif "outputs" in chunk_data:
            if (
@ -1851,9 +1967,15 @@ class AWSEventStreamDecoder:

    def _parse_message_from_event(self, event) -> Optional[str]:
        response_dict = event.to_response_dict()
+        parsed_response = self.parser.parse(response_dict, get_response_stream_shape())
        if response_dict["status_code"] != 200:
            raise ValueError(f"Bad response code, expected 200: {response_dict}")
-
+        if "chunk" in parsed_response:
+            chunk = parsed_response.get("chunk")
+            if not chunk:
+                return None
+            return chunk.get("bytes").decode()  # type: ignore[no-any-return]
+        else:
            chunk = response_dict.get("body")
            if not chunk:
                return None
--- a/litellm/llms/custom_httpx/http_handler.py
+++ b/litellm/llms/custom_httpx/http_handler.py
@ -168,7 +168,6 @@ class HTTPHandler:
        return response

    def __del__(self) -> None:
-        traceback.print_stack()
        try:
            self.close()
        except Exception:
--- a/litellm/main.py
+++ b/litellm/main.py
@ -121,7 +121,8 @@ azure_text_completions = AzureTextCompletion()
 huggingface = Huggingface()
 predibase_chat_completions = PredibaseChatCompletion()
 triton_chat_completions = TritonChatCompletion()
-bedrock_chat_completion = BedrockConverseLLM()
+bedrock_chat_completion = BedrockLLM()
+bedrock_converse_chat_completion = BedrockConverseLLM()
 vertex_chat_completion = VertexLLM()
 ####### COMPLETION ENDPOINTS ################

@ -2096,6 +2097,24 @@ def completion(
                            custom_llm_provider="bedrock",
                            logging_obj=logging,
                        )
+            else:
+                if model.startswith("anthropic"):
+                    response = bedrock_converse_chat_completion.completion(
+                        model=model,
+                        messages=messages,
+                        custom_prompt_dict=custom_prompt_dict,
+                        model_response=model_response,
+                        print_verbose=print_verbose,
+                        optional_params=optional_params,
+                        litellm_params=litellm_params,
+                        logger_fn=logger_fn,
+                        encoding=encoding,
+                        logging_obj=logging,
+                        extra_headers=extra_headers,
+                        timeout=timeout,
+                        acompletion=acompletion,
+                        client=client,
+                    )
                else:
                    response = bedrock_chat_completion.completion(
                        model=model,
--- a/litellm/tests/log.txt
+++ b/litellm/tests/log.txt
--- a/litellm/tests/test_streaming.py
+++ b/litellm/tests/test_streaming.py
@ -1288,14 +1288,14 @@ async def test_completion_replicate_llama3_streaming(sync_mode):
@pytest.mark.parametrize(
    "model",
    [
-        # "bedrock/cohere.command-r-plus-v1:0",
-        # "anthropic.claude-3-sonnet-20240229-v1:0",
-        # "anthropic.claude-instant-v1",
-        # "bedrock/ai21.j2-mid",
-        # "mistral.mistral-7b-instruct-v0:2",
-        # "bedrock/amazon.titan-tg1-large",
-        # "meta.llama3-8b-instruct-v1:0",
-        "cohere.command-text-v14"
+        "bedrock/cohere.command-r-plus-v1:0",
+        "anthropic.claude-3-sonnet-20240229-v1:0",
+        "anthropic.claude-instant-v1",
+        "bedrock/ai21.j2-mid",
+        "mistral.mistral-7b-instruct-v0:2",
+        "bedrock/amazon.titan-tg1-large",
+        "meta.llama3-8b-instruct-v1:0",
+        "cohere.command-text-v14",
    ],
 )
@pytest.mark.asyncio
@ -1324,8 +1324,6 @@ async def test_bedrock_httpx_streaming(sync_mode, model):
                raise Exception("finish reason not set")
            if complete_response.strip() == "":
                raise Exception("Empty response received")
-
-            assert False
        else:
            response: litellm.CustomStreamWrapper = await litellm.acompletion(  # type: ignore
                model=model,
--- a/litellm/utils.py
+++ b/litellm/utils.py
@ -5620,13 +5620,80 @@ def get_optional_params(
        supported_params = get_supported_openai_params(
            model=model, custom_llm_provider=custom_llm_provider
        )
+        if "ai21" in model:
+            _check_valid_arg(supported_params=supported_params)
+            # params "maxTokens":200,"temperature":0,"topP":250,"stop_sequences":[],
+            # https://us-west-2.console.aws.amazon.com/bedrock/home?region=us-west-2#/providers?model=j2-ultra
+            if max_tokens is not None:
+                optional_params["maxTokens"] = max_tokens
+            if temperature is not None:
+                optional_params["temperature"] = temperature
+            if top_p is not None:
+                optional_params["topP"] = top_p
+            if stream:
+                optional_params["stream"] = stream
+        elif "anthropic" in model:
            _check_valid_arg(supported_params=supported_params)
            optional_params = litellm.AmazonConverseConfig().map_openai_params(
                model=model,
                non_default_params=non_default_params,
                optional_params=optional_params,
-            drop_params=drop_params,
+                drop_params=(
+                    drop_params
+                    if drop_params is not None and isinstance(drop_params, bool)
+                    else False
+                ),
            )
+        elif "amazon" in model:  # amazon titan llms
+            _check_valid_arg(supported_params=supported_params)
+            # see https://us-west-2.console.aws.amazon.com/bedrock/home?region=us-west-2#/providers?model=titan-large
+            if max_tokens is not None:
+                optional_params["maxTokenCount"] = max_tokens
+            if temperature is not None:
+                optional_params["temperature"] = temperature
+            if stop is not None:
+                filtered_stop = _map_and_modify_arg(
+                    {"stop": stop}, provider="bedrock", model=model
+                )
+                optional_params["stopSequences"] = filtered_stop["stop"]
+            if top_p is not None:
+                optional_params["topP"] = top_p
+            if stream:
+                optional_params["stream"] = stream
+        elif "meta" in model:  # amazon / meta llms
+            _check_valid_arg(supported_params=supported_params)
+            # see https://us-west-2.console.aws.amazon.com/bedrock/home?region=us-west-2#/providers?model=titan-large
+            if max_tokens is not None:
+                optional_params["max_gen_len"] = max_tokens
+            if temperature is not None:
+                optional_params["temperature"] = temperature
+            if top_p is not None:
+                optional_params["top_p"] = top_p
+            if stream:
+                optional_params["stream"] = stream
+        elif "cohere" in model:  # cohere models on bedrock
+            _check_valid_arg(supported_params=supported_params)
+            # handle cohere params
+            if stream:
+                optional_params["stream"] = stream
+            if temperature is not None:
+                optional_params["temperature"] = temperature
+            if max_tokens is not None:
+                optional_params["max_tokens"] = max_tokens
+        elif "mistral" in model:
+            _check_valid_arg(supported_params=supported_params)
+            # mistral params on bedrock
+            # \"max_tokens\":400,\"temperature\":0.7,\"top_p\":0.7,\"stop\":[\"\\\\n\\\\nHuman:\"]}"
+            if max_tokens is not None:
+                optional_params["max_tokens"] = max_tokens
+            if temperature is not None:
+                optional_params["temperature"] = temperature
+            if top_p is not None:
+                optional_params["top_p"] = top_p
+            if stop is not None:
+                optional_params["stop"] = stop
+            if stream is not None:
+                optional_params["stream"] = stream
    elif custom_llm_provider == "aleph_alpha":
        supported_params = [
            "max_tokens",