From 4a3b08496129841597b1188820a3c45a01ee9abf Mon Sep 17 00:00:00 2001
From: Krrish Dholakia <krrishdholakia@gmail.com>
Date: Sat, 11 May 2024 13:43:08 -0700
Subject: [PATCH] feat(bedrock_httpx.py): moves to using httpx client for
 bedrock cohere calls

---
 .../generic_api_callback.py                   |   3 -
 litellm/integrations/aispend.py               |   2 -
 litellm/integrations/berrispend.py            |   1 -
 litellm/integrations/clickhouse.py            |   4 -
 litellm/integrations/custom_logger.py         |   2 -
 litellm/integrations/datadog.py               |   2 -
 litellm/integrations/dynamodb.py              |   2 -
 litellm/integrations/helicone.py              |   2 -
 litellm/integrations/langfuse.py              |   4 +-
 litellm/integrations/langsmith.py             |   2 -
 litellm/integrations/lunary.py                |   9 +-
 litellm/integrations/openmeter.py             |   2 -
 litellm/integrations/prometheus.py            |   2 -
 litellm/integrations/prometheus_services.py   |   2 -
 litellm/integrations/prompt_layer.py          |   2 -
 litellm/integrations/s3.py                    |   4 +-
 litellm/integrations/slack_alerting.py        |   2 -
 litellm/integrations/supabase.py              |   2 -
 litellm/integrations/weights_biases.py        |  11 +-
 litellm/llms/bedrock_httpx.py                 | 124 ++++++++++++++++++
 litellm/main.py                               |   3 +-
 .../proxy/example_config_yaml/custom_auth.py  |   3 -
 litellm/router_strategy/least_busy.py         |   2 -
 litellm/router_strategy/lowest_cost.py        |   3 +-
 litellm/router_strategy/lowest_latency.py     |   2 -
 litellm/router_strategy/lowest_tpm_rpm.py     |   2 -
 litellm/router_strategy/lowest_tpm_rpm_v2.py  |   2 -
 litellm/tests/test_completion.py              |   9 ++
 litellm/utils.py                              |   1 -
 29 files changed, 147 insertions(+), 64 deletions(-)
 create mode 100644 litellm/llms/bedrock_httpx.py

diff --git a/enterprise/enterprise_callbacks/generic_api_callback.py b/enterprise/enterprise_callbacks/generic_api_callback.py
index 076c13d5e..cf1d22e8f 100644
--- a/enterprise/enterprise_callbacks/generic_api_callback.py
+++ b/enterprise/enterprise_callbacks/generic_api_callback.py
@@ -10,7 +10,6 @@ from litellm.caching import DualCache
 
 from typing import Literal, Union
 
-dotenv.load_dotenv()  # Loading env variables using dotenv
 import traceback
 
 
@@ -19,8 +18,6 @@ import traceback
 
 import dotenv, os
 import requests
-
-dotenv.load_dotenv()  # Loading env variables using dotenv
 import traceback
 import datetime, subprocess, sys
 import litellm, uuid
diff --git a/litellm/integrations/aispend.py b/litellm/integrations/aispend.py
index a893f8923..2fe8ea0df 100644
--- a/litellm/integrations/aispend.py
+++ b/litellm/integrations/aispend.py
@@ -1,8 +1,6 @@
 #### What this does ####
 #    On success + failure, log events to aispend.io
 import dotenv, os
-
-dotenv.load_dotenv()  # Loading env variables using dotenv
 import traceback
 import datetime
 
diff --git a/litellm/integrations/berrispend.py b/litellm/integrations/berrispend.py
index 1f0ae4581..7d30b706c 100644
--- a/litellm/integrations/berrispend.py
+++ b/litellm/integrations/berrispend.py
@@ -3,7 +3,6 @@
 import dotenv, os
 import requests  # type: ignore
 
-dotenv.load_dotenv()  # Loading env variables using dotenv
 import traceback
 import datetime
 
diff --git a/litellm/integrations/clickhouse.py b/litellm/integrations/clickhouse.py
index 7d1fb37d9..0c38b8626 100644
--- a/litellm/integrations/clickhouse.py
+++ b/litellm/integrations/clickhouse.py
@@ -8,8 +8,6 @@ from litellm.proxy._types import UserAPIKeyAuth
 from litellm.caching import DualCache
 
 from typing import Literal, Union
-
-dotenv.load_dotenv()  # Loading env variables using dotenv
 import traceback
 
 
@@ -18,8 +16,6 @@ import traceback
 
 import dotenv, os
 import requests
-
-dotenv.load_dotenv()  # Loading env variables using dotenv
 import traceback
 import datetime, subprocess, sys
 import litellm, uuid
diff --git a/litellm/integrations/custom_logger.py b/litellm/integrations/custom_logger.py
index 8a3e0f467..d50882592 100644
--- a/litellm/integrations/custom_logger.py
+++ b/litellm/integrations/custom_logger.py
@@ -6,8 +6,6 @@ from litellm.proxy._types import UserAPIKeyAuth
 from litellm.caching import DualCache
 
 from typing import Literal, Union, Optional
-
-dotenv.load_dotenv()  # Loading env variables using dotenv
 import traceback
 
 
diff --git a/litellm/integrations/datadog.py b/litellm/integrations/datadog.py
index d969341fc..6d5e08faf 100644
--- a/litellm/integrations/datadog.py
+++ b/litellm/integrations/datadog.py
@@ -3,8 +3,6 @@
 
 import dotenv, os
 import requests  # type: ignore
-
-dotenv.load_dotenv()  # Loading env variables using dotenv
 import traceback
 import datetime, subprocess, sys
 import litellm, uuid
diff --git a/litellm/integrations/dynamodb.py b/litellm/integrations/dynamodb.py
index b5462ee7f..21ccabe4b 100644
--- a/litellm/integrations/dynamodb.py
+++ b/litellm/integrations/dynamodb.py
@@ -3,8 +3,6 @@
 
 import dotenv, os
 import requests  # type: ignore
-
-dotenv.load_dotenv()  # Loading env variables using dotenv
 import traceback
 import datetime, subprocess, sys
 import litellm, uuid
diff --git a/litellm/integrations/helicone.py b/litellm/integrations/helicone.py
index c8c107541..85e73258e 100644
--- a/litellm/integrations/helicone.py
+++ b/litellm/integrations/helicone.py
@@ -3,8 +3,6 @@
 import dotenv, os
 import requests  # type: ignore
 import litellm
-
-dotenv.load_dotenv()  # Loading env variables using dotenv
 import traceback
 
 
diff --git a/litellm/integrations/langfuse.py b/litellm/integrations/langfuse.py
index 1e957dfcf..f27d19968 100644
--- a/litellm/integrations/langfuse.py
+++ b/litellm/integrations/langfuse.py
@@ -1,8 +1,6 @@
 #### What this does ####
 #    On success, logs events to Langfuse
-import dotenv, os
-
-dotenv.load_dotenv()  # Loading env variables using dotenv
+import os
 import copy
 import traceback
 from packaging.version import Version
diff --git a/litellm/integrations/langsmith.py b/litellm/integrations/langsmith.py
index 8a0fb3852..92e440215 100644
--- a/litellm/integrations/langsmith.py
+++ b/litellm/integrations/langsmith.py
@@ -3,8 +3,6 @@
 import dotenv, os  # type: ignore
 import requests  # type: ignore
 from datetime import datetime
-
-dotenv.load_dotenv()  # Loading env variables using dotenv
 import traceback
 import asyncio
 import types
diff --git a/litellm/integrations/lunary.py b/litellm/integrations/lunary.py
index 6ddf2ca59..52316f315 100644
--- a/litellm/integrations/lunary.py
+++ b/litellm/integrations/lunary.py
@@ -2,14 +2,11 @@
 #    On success + failure, log events to lunary.ai
 from datetime import datetime, timezone
 import traceback
-import dotenv
 import importlib
 import sys
 
 import packaging
 
-dotenv.load_dotenv()
-
 
 # convert to {completion: xx, tokens: xx}
 def parse_usage(usage):
@@ -62,14 +59,16 @@ class LunaryLogger:
             version = importlib.metadata.version("lunary")
             # if version < 0.1.43 then raise ImportError
             if packaging.version.Version(version) < packaging.version.Version("0.1.43"):
-                print(
+                print(  # noqa
                     "Lunary version outdated. Required: >= 0.1.43. Upgrade via 'pip install lunary --upgrade'"
                 )
                 raise ImportError
 
             self.lunary_client = lunary
         except ImportError:
-            print("Lunary not installed. Please install it using 'pip install lunary'")
+            print(  # noqa
+                "Lunary not installed. Please install it using 'pip install lunary'"
+            )  # noqa
             raise ImportError
 
     def log_event(
diff --git a/litellm/integrations/openmeter.py b/litellm/integrations/openmeter.py
index a454739d5..2c470d6f4 100644
--- a/litellm/integrations/openmeter.py
+++ b/litellm/integrations/openmeter.py
@@ -3,8 +3,6 @@
 
 import dotenv, os, json
 import litellm
-
-dotenv.load_dotenv()  # Loading env variables using dotenv
 import traceback
 from litellm.integrations.custom_logger import CustomLogger
 from litellm.llms.custom_httpx.http_handler import AsyncHTTPHandler, HTTPHandler
diff --git a/litellm/integrations/prometheus.py b/litellm/integrations/prometheus.py
index 577946ce1..6fbc6ca4c 100644
--- a/litellm/integrations/prometheus.py
+++ b/litellm/integrations/prometheus.py
@@ -4,8 +4,6 @@
 
 import dotenv, os
 import requests  # type: ignore
-
-dotenv.load_dotenv()  # Loading env variables using dotenv
 import traceback
 import datetime, subprocess, sys
 import litellm, uuid
diff --git a/litellm/integrations/prometheus_services.py b/litellm/integrations/prometheus_services.py
index d276bb85b..8fce8930d 100644
--- a/litellm/integrations/prometheus_services.py
+++ b/litellm/integrations/prometheus_services.py
@@ -5,8 +5,6 @@
 
 import dotenv, os
 import requests  # type: ignore
-
-dotenv.load_dotenv()  # Loading env variables using dotenv
 import traceback
 import datetime, subprocess, sys
 import litellm, uuid
diff --git a/litellm/integrations/prompt_layer.py b/litellm/integrations/prompt_layer.py
index ce610e1ef..531ed75fe 100644
--- a/litellm/integrations/prompt_layer.py
+++ b/litellm/integrations/prompt_layer.py
@@ -3,8 +3,6 @@
 import dotenv, os
 import requests  # type: ignore
 from pydantic import BaseModel
-
-dotenv.load_dotenv()  # Loading env variables using dotenv
 import traceback
 
 
diff --git a/litellm/integrations/s3.py b/litellm/integrations/s3.py
index d31b15840..d131e44f0 100644
--- a/litellm/integrations/s3.py
+++ b/litellm/integrations/s3.py
@@ -1,9 +1,7 @@
 #### What this does ####
 #    On success + failure, log events to Supabase
 
-import dotenv, os
-
-dotenv.load_dotenv()  # Loading env variables using dotenv
+import os
 import traceback
 import datetime, subprocess, sys
 import litellm, uuid
diff --git a/litellm/integrations/slack_alerting.py b/litellm/integrations/slack_alerting.py
index 07c3585f0..d03922bc1 100644
--- a/litellm/integrations/slack_alerting.py
+++ b/litellm/integrations/slack_alerting.py
@@ -2,8 +2,6 @@
 #    Class for sending Slack Alerts #
 import dotenv, os
 from litellm.proxy._types import UserAPIKeyAuth
-
-dotenv.load_dotenv()  # Loading env variables using dotenv
 from litellm._logging import verbose_logger, verbose_proxy_logger
 import litellm, threading
 from typing import List, Literal, Any, Union, Optional, Dict
diff --git a/litellm/integrations/supabase.py b/litellm/integrations/supabase.py
index 58beba8a3..4e6bf517f 100644
--- a/litellm/integrations/supabase.py
+++ b/litellm/integrations/supabase.py
@@ -3,8 +3,6 @@
 
 import dotenv, os
 import requests  # type: ignore
-
-dotenv.load_dotenv()  # Loading env variables using dotenv
 import traceback
 import datetime, subprocess, sys
 import litellm
diff --git a/litellm/integrations/weights_biases.py b/litellm/integrations/weights_biases.py
index 53e6070a5..a56233b22 100644
--- a/litellm/integrations/weights_biases.py
+++ b/litellm/integrations/weights_biases.py
@@ -21,11 +21,11 @@ try:
         # contains a (known) object attribute
         object: Literal["chat.completion", "edit", "text_completion"]
 
-        def __getitem__(self, key: K) -> V:
-            ...  # pragma: no cover
+        def __getitem__(self, key: K) -> V: ...  # noqa
 
-        def get(self, key: K, default: Optional[V] = None) -> Optional[V]:
-            ...  # pragma: no cover
+        def get(  # noqa
+            self, key: K, default: Optional[V] = None
+        ) -> Optional[V]: ...  # pragma: no cover
 
     class OpenAIRequestResponseResolver:
         def __call__(
@@ -173,12 +173,11 @@ except:
 
 #### What this does ####
 #    On success, logs events to Langfuse
-import dotenv, os
+import os
 import requests
 import requests
 from datetime import datetime
 
-dotenv.load_dotenv()  # Loading env variables using dotenv
 import traceback
 
 
diff --git a/litellm/llms/bedrock_httpx.py b/litellm/llms/bedrock_httpx.py
new file mode 100644
index 000000000..c6b0327e6
--- /dev/null
+++ b/litellm/llms/bedrock_httpx.py
@@ -0,0 +1,124 @@
+# What is this?
+## Initial implementation of calling bedrock via httpx client (allows for async calls).
+## V0 - just covers cohere command-r support
+
+import os, types
+import json
+from enum import Enum
+import requests, copy  # type: ignore
+import time
+from typing import Callable, Optional, List, Literal, Union
+from litellm.utils import (
+    ModelResponse,
+    Usage,
+    map_finish_reason,
+    CustomStreamWrapper,
+    Message,
+    Choices,
+    get_secret,
+)
+import litellm
+from .prompt_templates.factory import prompt_factory, custom_prompt
+from litellm.llms.custom_httpx.http_handler import AsyncHTTPHandler
+from .base import BaseLLM
+import httpx  # type: ignore
+from .bedrock import BedrockError
+
+
+class BedrockLLM(BaseLLM):
+    """
+    Example call
+
+    ```
+    curl --location --request POST 'https://bedrock-runtime.{aws_region_name}.amazonaws.com/model/{bedrock_model_name}/invoke' \
+        --header 'Content-Type: application/json' \
+        --header 'Accept: application/json' \
+        --user "$AWS_ACCESS_KEY_ID":"$AWS_SECRET_ACCESS_KEY" \
+        --aws-sigv4 "aws:amz:us-east-1:bedrock" \
+        --data-raw '{
+        "prompt": "Hi",
+        "temperature": 0,
+        "p": 0.9,
+        "max_tokens": 4096
+        }'
+    ```
+    """
+
+    def __init__(self) -> None:
+        super().__init__()
+
+    def get_credentials(
+        self,
+        aws_access_key_id: Optional[str] = None,
+        aws_secret_access_key: Optional[str] = None,
+        aws_region_name: Optional[str] = None,
+        aws_session_name: Optional[str] = None,
+        aws_profile_name: Optional[str] = None,
+        aws_role_name: Optional[str] = None,
+    ):
+        """
+        Return a boto3.Credentials object
+        """
+        import boto3
+
+        ## CHECK IS  'os.environ/' passed in
+        params_to_check: List[Optional[str]] = [
+            aws_access_key_id,
+            aws_secret_access_key,
+            aws_region_name,
+            aws_session_name,
+            aws_profile_name,
+            aws_role_name,
+        ]
+
+        # Iterate over parameters and update if needed
+        for i, param in enumerate(params_to_check):
+            if param and param.startswith("os.environ/"):
+                _v = get_secret(param)
+                if _v is not None and isinstance(_v, str):
+                    params_to_check[i] = _v
+        # Assign updated values back to parameters
+        (
+            aws_access_key_id,
+            aws_secret_access_key,
+            aws_region_name,
+            aws_session_name,
+            aws_profile_name,
+            aws_role_name,
+        ) = params_to_check
+
+        ### CHECK STS ###
+        if aws_role_name is not None and aws_session_name is not None:
+            sts_client = boto3.client(
+                "sts",
+                aws_access_key_id=aws_access_key_id,  # [OPTIONAL]
+                aws_secret_access_key=aws_secret_access_key,  # [OPTIONAL]
+            )
+
+            sts_response = sts_client.assume_role(
+                RoleArn=aws_role_name, RoleSessionName=aws_session_name
+            )
+
+            return sts_response["Credentials"]
+        elif aws_profile_name is not None:  ### CHECK SESSION ###
+            # uses auth values from AWS profile usually stored in ~/.aws/credentials
+            client = boto3.Session(profile_name=aws_profile_name)
+
+            return client.get_credentials()
+        else:
+            session = boto3.Session(
+                aws_access_key_id=aws_access_key_id,
+                aws_secret_access_key=aws_secret_access_key,
+                region_name=aws_region_name,
+            )
+
+            return session.get_credentials()
+
+    def completion(self, *args, **kwargs) -> Union[ModelResponse, CustomStreamWrapper]:
+        ## get credentials
+        ## generate signature
+        ## make request
+        return super().completion(*args, **kwargs)
+
+    def embedding(self, *args, **kwargs):
+        return super().embedding(*args, **kwargs)
diff --git a/litellm/main.py b/litellm/main.py
index 9afdc7da2..8be71de0b 100644
--- a/litellm/main.py
+++ b/litellm/main.py
@@ -75,6 +75,7 @@ from .llms.anthropic import AnthropicChatCompletion
 from .llms.anthropic_text import AnthropicTextCompletion
 from .llms.huggingface_restapi import Huggingface
 from .llms.predibase import PredibaseChatCompletion
+from .llms.bedrock_httpx import BedrockLLM
 from .llms.triton import TritonChatCompletion
 from .llms.prompt_templates.factory import (
     prompt_factory,
@@ -104,7 +105,6 @@ from litellm.utils import (
 )
 
 ####### ENVIRONMENT VARIABLES ###################
-dotenv.load_dotenv()  # Loading env variables using dotenv
 openai_chat_completions = OpenAIChatCompletion()
 openai_text_completions = OpenAITextCompletion()
 anthropic_chat_completions = AnthropicChatCompletion()
@@ -114,6 +114,7 @@ azure_text_completions = AzureTextCompletion()
 huggingface = Huggingface()
 predibase_chat_completions = PredibaseChatCompletion()
 triton_chat_completions = TritonChatCompletion()
+bedrock_chat_completion = BedrockLLM()
 ####### COMPLETION ENDPOINTS ################
 
 
diff --git a/litellm/proxy/example_config_yaml/custom_auth.py b/litellm/proxy/example_config_yaml/custom_auth.py
index a764a647a..6cecf466c 100644
--- a/litellm/proxy/example_config_yaml/custom_auth.py
+++ b/litellm/proxy/example_config_yaml/custom_auth.py
@@ -1,10 +1,7 @@
 from litellm.proxy._types import UserAPIKeyAuth, GenerateKeyRequest
 from fastapi import Request
-from dotenv import load_dotenv
 import os
 
-load_dotenv()
-
 
 async def user_api_key_auth(request: Request, api_key: str) -> UserAPIKeyAuth:
     try:
diff --git a/litellm/router_strategy/least_busy.py b/litellm/router_strategy/least_busy.py
index 54d44b41d..417651fb3 100644
--- a/litellm/router_strategy/least_busy.py
+++ b/litellm/router_strategy/least_busy.py
@@ -8,8 +8,6 @@
 
 import dotenv, os, requests, random  # type: ignore
 from typing import Optional
-
-dotenv.load_dotenv()  # Loading env variables using dotenv
 import traceback
 from litellm.caching import DualCache
 from litellm.integrations.custom_logger import CustomLogger
diff --git a/litellm/router_strategy/lowest_cost.py b/litellm/router_strategy/lowest_cost.py
index 279af2ae9..fde7781b9 100644
--- a/litellm/router_strategy/lowest_cost.py
+++ b/litellm/router_strategy/lowest_cost.py
@@ -1,12 +1,11 @@
 #### What this does ####
 #   picks based on response time (for streaming, this is time to first token)
 from pydantic import BaseModel, Extra, Field, root_validator
-import dotenv, os, requests, random  # type: ignore
+import os, requests, random  # type: ignore
 from typing import Optional, Union, List, Dict
 from datetime import datetime, timedelta
 import random
 
-dotenv.load_dotenv()  # Loading env variables using dotenv
 import traceback
 from litellm.caching import DualCache
 from litellm.integrations.custom_logger import CustomLogger
diff --git a/litellm/router_strategy/lowest_latency.py b/litellm/router_strategy/lowest_latency.py
index afdfc1779..a7b93d344 100644
--- a/litellm/router_strategy/lowest_latency.py
+++ b/litellm/router_strategy/lowest_latency.py
@@ -5,8 +5,6 @@ import dotenv, os, requests, random  # type: ignore
 from typing import Optional, Union, List, Dict
 from datetime import datetime, timedelta
 import random
-
-dotenv.load_dotenv()  # Loading env variables using dotenv
 import traceback
 from litellm.caching import DualCache
 from litellm.integrations.custom_logger import CustomLogger
diff --git a/litellm/router_strategy/lowest_tpm_rpm.py b/litellm/router_strategy/lowest_tpm_rpm.py
index 0a7773a84..625db7048 100644
--- a/litellm/router_strategy/lowest_tpm_rpm.py
+++ b/litellm/router_strategy/lowest_tpm_rpm.py
@@ -4,8 +4,6 @@
 import dotenv, os, requests, random
 from typing import Optional, Union, List, Dict
 from datetime import datetime
-
-dotenv.load_dotenv()  # Loading env variables using dotenv
 import traceback
 from litellm import token_counter
 from litellm.caching import DualCache
diff --git a/litellm/router_strategy/lowest_tpm_rpm_v2.py b/litellm/router_strategy/lowest_tpm_rpm_v2.py
index f7a55d970..23e55f4a3 100644
--- a/litellm/router_strategy/lowest_tpm_rpm_v2.py
+++ b/litellm/router_strategy/lowest_tpm_rpm_v2.py
@@ -5,8 +5,6 @@ import dotenv, os, requests, random
 from typing import Optional, Union, List, Dict
 import datetime as datetime_og
 from datetime import datetime
-
-dotenv.load_dotenv()  # Loading env variables using dotenv
 import traceback, asyncio, httpx
 import litellm
 from litellm import token_counter
diff --git a/litellm/tests/test_completion.py b/litellm/tests/test_completion.py
index 04f4cc511..214dc105b 100644
--- a/litellm/tests/test_completion.py
+++ b/litellm/tests/test_completion.py
@@ -2584,6 +2584,15 @@ def test_completion_chat_sagemaker_mistral():
 # test_completion_chat_sagemaker_mistral()
 
 
+def test_completion_bedrock_command_r():
+    response = completion(
+        model="bedrock/cohere.command-r-plus-v1:0",
+        messages=[{"role": "user", "content": "Hey! how's it going?"}],
+    )
+
+    print(f"response: {response}")
+
+
 def test_completion_bedrock_titan_null_response():
     try:
         response = completion(
diff --git a/litellm/utils.py b/litellm/utils.py
index 9218f92a3..0fd7963ae 100644
--- a/litellm/utils.py
+++ b/litellm/utils.py
@@ -117,7 +117,6 @@ MAX_THREADS = 100
 
 # Create a ThreadPoolExecutor
 executor = ThreadPoolExecutor(max_workers=MAX_THREADS)
-dotenv.load_dotenv()  # Loading env variables using dotenv
 sentry_sdk_instance = None
 capture_exception = None
 add_breadcrumb = None