From 4a3b08496129841597b1188820a3c45a01ee9abf Mon Sep 17 00:00:00 2001 From: Krrish Dholakia Date: Sat, 11 May 2024 13:43:08 -0700 Subject: [PATCH] feat(bedrock_httpx.py): moves to using httpx client for bedrock cohere calls --- .../generic_api_callback.py | 3 - litellm/integrations/aispend.py | 2 - litellm/integrations/berrispend.py | 1 - litellm/integrations/clickhouse.py | 4 - litellm/integrations/custom_logger.py | 2 - litellm/integrations/datadog.py | 2 - litellm/integrations/dynamodb.py | 2 - litellm/integrations/helicone.py | 2 - litellm/integrations/langfuse.py | 4 +- litellm/integrations/langsmith.py | 2 - litellm/integrations/lunary.py | 9 +- litellm/integrations/openmeter.py | 2 - litellm/integrations/prometheus.py | 2 - litellm/integrations/prometheus_services.py | 2 - litellm/integrations/prompt_layer.py | 2 - litellm/integrations/s3.py | 4 +- litellm/integrations/slack_alerting.py | 2 - litellm/integrations/supabase.py | 2 - litellm/integrations/weights_biases.py | 11 +- litellm/llms/bedrock_httpx.py | 124 ++++++++++++++++++ litellm/main.py | 3 +- .../proxy/example_config_yaml/custom_auth.py | 3 - litellm/router_strategy/least_busy.py | 2 - litellm/router_strategy/lowest_cost.py | 3 +- litellm/router_strategy/lowest_latency.py | 2 - litellm/router_strategy/lowest_tpm_rpm.py | 2 - litellm/router_strategy/lowest_tpm_rpm_v2.py | 2 - litellm/tests/test_completion.py | 9 ++ litellm/utils.py | 1 - 29 files changed, 147 insertions(+), 64 deletions(-) create mode 100644 litellm/llms/bedrock_httpx.py diff --git a/enterprise/enterprise_callbacks/generic_api_callback.py b/enterprise/enterprise_callbacks/generic_api_callback.py index 076c13d5e..cf1d22e8f 100644 --- a/enterprise/enterprise_callbacks/generic_api_callback.py +++ b/enterprise/enterprise_callbacks/generic_api_callback.py @@ -10,7 +10,6 @@ from litellm.caching import DualCache from typing import Literal, Union -dotenv.load_dotenv() # Loading env variables using dotenv import traceback @@ -19,8 +18,6 @@ import traceback import dotenv, os import requests - -dotenv.load_dotenv() # Loading env variables using dotenv import traceback import datetime, subprocess, sys import litellm, uuid diff --git a/litellm/integrations/aispend.py b/litellm/integrations/aispend.py index a893f8923..2fe8ea0df 100644 --- a/litellm/integrations/aispend.py +++ b/litellm/integrations/aispend.py @@ -1,8 +1,6 @@ #### What this does #### # On success + failure, log events to aispend.io import dotenv, os - -dotenv.load_dotenv() # Loading env variables using dotenv import traceback import datetime diff --git a/litellm/integrations/berrispend.py b/litellm/integrations/berrispend.py index 1f0ae4581..7d30b706c 100644 --- a/litellm/integrations/berrispend.py +++ b/litellm/integrations/berrispend.py @@ -3,7 +3,6 @@ import dotenv, os import requests # type: ignore -dotenv.load_dotenv() # Loading env variables using dotenv import traceback import datetime diff --git a/litellm/integrations/clickhouse.py b/litellm/integrations/clickhouse.py index 7d1fb37d9..0c38b8626 100644 --- a/litellm/integrations/clickhouse.py +++ b/litellm/integrations/clickhouse.py @@ -8,8 +8,6 @@ from litellm.proxy._types import UserAPIKeyAuth from litellm.caching import DualCache from typing import Literal, Union - -dotenv.load_dotenv() # Loading env variables using dotenv import traceback @@ -18,8 +16,6 @@ import traceback import dotenv, os import requests - -dotenv.load_dotenv() # Loading env variables using dotenv import traceback import datetime, subprocess, sys import litellm, uuid diff --git a/litellm/integrations/custom_logger.py b/litellm/integrations/custom_logger.py index 8a3e0f467..d50882592 100644 --- a/litellm/integrations/custom_logger.py +++ b/litellm/integrations/custom_logger.py @@ -6,8 +6,6 @@ from litellm.proxy._types import UserAPIKeyAuth from litellm.caching import DualCache from typing import Literal, Union, Optional - -dotenv.load_dotenv() # Loading env variables using dotenv import traceback diff --git a/litellm/integrations/datadog.py b/litellm/integrations/datadog.py index d969341fc..6d5e08faf 100644 --- a/litellm/integrations/datadog.py +++ b/litellm/integrations/datadog.py @@ -3,8 +3,6 @@ import dotenv, os import requests # type: ignore - -dotenv.load_dotenv() # Loading env variables using dotenv import traceback import datetime, subprocess, sys import litellm, uuid diff --git a/litellm/integrations/dynamodb.py b/litellm/integrations/dynamodb.py index b5462ee7f..21ccabe4b 100644 --- a/litellm/integrations/dynamodb.py +++ b/litellm/integrations/dynamodb.py @@ -3,8 +3,6 @@ import dotenv, os import requests # type: ignore - -dotenv.load_dotenv() # Loading env variables using dotenv import traceback import datetime, subprocess, sys import litellm, uuid diff --git a/litellm/integrations/helicone.py b/litellm/integrations/helicone.py index c8c107541..85e73258e 100644 --- a/litellm/integrations/helicone.py +++ b/litellm/integrations/helicone.py @@ -3,8 +3,6 @@ import dotenv, os import requests # type: ignore import litellm - -dotenv.load_dotenv() # Loading env variables using dotenv import traceback diff --git a/litellm/integrations/langfuse.py b/litellm/integrations/langfuse.py index 1e957dfcf..f27d19968 100644 --- a/litellm/integrations/langfuse.py +++ b/litellm/integrations/langfuse.py @@ -1,8 +1,6 @@ #### What this does #### # On success, logs events to Langfuse -import dotenv, os - -dotenv.load_dotenv() # Loading env variables using dotenv +import os import copy import traceback from packaging.version import Version diff --git a/litellm/integrations/langsmith.py b/litellm/integrations/langsmith.py index 8a0fb3852..92e440215 100644 --- a/litellm/integrations/langsmith.py +++ b/litellm/integrations/langsmith.py @@ -3,8 +3,6 @@ import dotenv, os # type: ignore import requests # type: ignore from datetime import datetime - -dotenv.load_dotenv() # Loading env variables using dotenv import traceback import asyncio import types diff --git a/litellm/integrations/lunary.py b/litellm/integrations/lunary.py index 6ddf2ca59..52316f315 100644 --- a/litellm/integrations/lunary.py +++ b/litellm/integrations/lunary.py @@ -2,14 +2,11 @@ # On success + failure, log events to lunary.ai from datetime import datetime, timezone import traceback -import dotenv import importlib import sys import packaging -dotenv.load_dotenv() - # convert to {completion: xx, tokens: xx} def parse_usage(usage): @@ -62,14 +59,16 @@ class LunaryLogger: version = importlib.metadata.version("lunary") # if version < 0.1.43 then raise ImportError if packaging.version.Version(version) < packaging.version.Version("0.1.43"): - print( + print( # noqa "Lunary version outdated. Required: >= 0.1.43. Upgrade via 'pip install lunary --upgrade'" ) raise ImportError self.lunary_client = lunary except ImportError: - print("Lunary not installed. Please install it using 'pip install lunary'") + print( # noqa + "Lunary not installed. Please install it using 'pip install lunary'" + ) # noqa raise ImportError def log_event( diff --git a/litellm/integrations/openmeter.py b/litellm/integrations/openmeter.py index a454739d5..2c470d6f4 100644 --- a/litellm/integrations/openmeter.py +++ b/litellm/integrations/openmeter.py @@ -3,8 +3,6 @@ import dotenv, os, json import litellm - -dotenv.load_dotenv() # Loading env variables using dotenv import traceback from litellm.integrations.custom_logger import CustomLogger from litellm.llms.custom_httpx.http_handler import AsyncHTTPHandler, HTTPHandler diff --git a/litellm/integrations/prometheus.py b/litellm/integrations/prometheus.py index 577946ce1..6fbc6ca4c 100644 --- a/litellm/integrations/prometheus.py +++ b/litellm/integrations/prometheus.py @@ -4,8 +4,6 @@ import dotenv, os import requests # type: ignore - -dotenv.load_dotenv() # Loading env variables using dotenv import traceback import datetime, subprocess, sys import litellm, uuid diff --git a/litellm/integrations/prometheus_services.py b/litellm/integrations/prometheus_services.py index d276bb85b..8fce8930d 100644 --- a/litellm/integrations/prometheus_services.py +++ b/litellm/integrations/prometheus_services.py @@ -5,8 +5,6 @@ import dotenv, os import requests # type: ignore - -dotenv.load_dotenv() # Loading env variables using dotenv import traceback import datetime, subprocess, sys import litellm, uuid diff --git a/litellm/integrations/prompt_layer.py b/litellm/integrations/prompt_layer.py index ce610e1ef..531ed75fe 100644 --- a/litellm/integrations/prompt_layer.py +++ b/litellm/integrations/prompt_layer.py @@ -3,8 +3,6 @@ import dotenv, os import requests # type: ignore from pydantic import BaseModel - -dotenv.load_dotenv() # Loading env variables using dotenv import traceback diff --git a/litellm/integrations/s3.py b/litellm/integrations/s3.py index d31b15840..d131e44f0 100644 --- a/litellm/integrations/s3.py +++ b/litellm/integrations/s3.py @@ -1,9 +1,7 @@ #### What this does #### # On success + failure, log events to Supabase -import dotenv, os - -dotenv.load_dotenv() # Loading env variables using dotenv +import os import traceback import datetime, subprocess, sys import litellm, uuid diff --git a/litellm/integrations/slack_alerting.py b/litellm/integrations/slack_alerting.py index 07c3585f0..d03922bc1 100644 --- a/litellm/integrations/slack_alerting.py +++ b/litellm/integrations/slack_alerting.py @@ -2,8 +2,6 @@ # Class for sending Slack Alerts # import dotenv, os from litellm.proxy._types import UserAPIKeyAuth - -dotenv.load_dotenv() # Loading env variables using dotenv from litellm._logging import verbose_logger, verbose_proxy_logger import litellm, threading from typing import List, Literal, Any, Union, Optional, Dict diff --git a/litellm/integrations/supabase.py b/litellm/integrations/supabase.py index 58beba8a3..4e6bf517f 100644 --- a/litellm/integrations/supabase.py +++ b/litellm/integrations/supabase.py @@ -3,8 +3,6 @@ import dotenv, os import requests # type: ignore - -dotenv.load_dotenv() # Loading env variables using dotenv import traceback import datetime, subprocess, sys import litellm diff --git a/litellm/integrations/weights_biases.py b/litellm/integrations/weights_biases.py index 53e6070a5..a56233b22 100644 --- a/litellm/integrations/weights_biases.py +++ b/litellm/integrations/weights_biases.py @@ -21,11 +21,11 @@ try: # contains a (known) object attribute object: Literal["chat.completion", "edit", "text_completion"] - def __getitem__(self, key: K) -> V: - ... # pragma: no cover + def __getitem__(self, key: K) -> V: ... # noqa - def get(self, key: K, default: Optional[V] = None) -> Optional[V]: - ... # pragma: no cover + def get( # noqa + self, key: K, default: Optional[V] = None + ) -> Optional[V]: ... # pragma: no cover class OpenAIRequestResponseResolver: def __call__( @@ -173,12 +173,11 @@ except: #### What this does #### # On success, logs events to Langfuse -import dotenv, os +import os import requests import requests from datetime import datetime -dotenv.load_dotenv() # Loading env variables using dotenv import traceback diff --git a/litellm/llms/bedrock_httpx.py b/litellm/llms/bedrock_httpx.py new file mode 100644 index 000000000..c6b0327e6 --- /dev/null +++ b/litellm/llms/bedrock_httpx.py @@ -0,0 +1,124 @@ +# What is this? +## Initial implementation of calling bedrock via httpx client (allows for async calls). +## V0 - just covers cohere command-r support + +import os, types +import json +from enum import Enum +import requests, copy # type: ignore +import time +from typing import Callable, Optional, List, Literal, Union +from litellm.utils import ( + ModelResponse, + Usage, + map_finish_reason, + CustomStreamWrapper, + Message, + Choices, + get_secret, +) +import litellm +from .prompt_templates.factory import prompt_factory, custom_prompt +from litellm.llms.custom_httpx.http_handler import AsyncHTTPHandler +from .base import BaseLLM +import httpx # type: ignore +from .bedrock import BedrockError + + +class BedrockLLM(BaseLLM): + """ + Example call + + ``` + curl --location --request POST 'https://bedrock-runtime.{aws_region_name}.amazonaws.com/model/{bedrock_model_name}/invoke' \ + --header 'Content-Type: application/json' \ + --header 'Accept: application/json' \ + --user "$AWS_ACCESS_KEY_ID":"$AWS_SECRET_ACCESS_KEY" \ + --aws-sigv4 "aws:amz:us-east-1:bedrock" \ + --data-raw '{ + "prompt": "Hi", + "temperature": 0, + "p": 0.9, + "max_tokens": 4096 + }' + ``` + """ + + def __init__(self) -> None: + super().__init__() + + def get_credentials( + self, + aws_access_key_id: Optional[str] = None, + aws_secret_access_key: Optional[str] = None, + aws_region_name: Optional[str] = None, + aws_session_name: Optional[str] = None, + aws_profile_name: Optional[str] = None, + aws_role_name: Optional[str] = None, + ): + """ + Return a boto3.Credentials object + """ + import boto3 + + ## CHECK IS 'os.environ/' passed in + params_to_check: List[Optional[str]] = [ + aws_access_key_id, + aws_secret_access_key, + aws_region_name, + aws_session_name, + aws_profile_name, + aws_role_name, + ] + + # Iterate over parameters and update if needed + for i, param in enumerate(params_to_check): + if param and param.startswith("os.environ/"): + _v = get_secret(param) + if _v is not None and isinstance(_v, str): + params_to_check[i] = _v + # Assign updated values back to parameters + ( + aws_access_key_id, + aws_secret_access_key, + aws_region_name, + aws_session_name, + aws_profile_name, + aws_role_name, + ) = params_to_check + + ### CHECK STS ### + if aws_role_name is not None and aws_session_name is not None: + sts_client = boto3.client( + "sts", + aws_access_key_id=aws_access_key_id, # [OPTIONAL] + aws_secret_access_key=aws_secret_access_key, # [OPTIONAL] + ) + + sts_response = sts_client.assume_role( + RoleArn=aws_role_name, RoleSessionName=aws_session_name + ) + + return sts_response["Credentials"] + elif aws_profile_name is not None: ### CHECK SESSION ### + # uses auth values from AWS profile usually stored in ~/.aws/credentials + client = boto3.Session(profile_name=aws_profile_name) + + return client.get_credentials() + else: + session = boto3.Session( + aws_access_key_id=aws_access_key_id, + aws_secret_access_key=aws_secret_access_key, + region_name=aws_region_name, + ) + + return session.get_credentials() + + def completion(self, *args, **kwargs) -> Union[ModelResponse, CustomStreamWrapper]: + ## get credentials + ## generate signature + ## make request + return super().completion(*args, **kwargs) + + def embedding(self, *args, **kwargs): + return super().embedding(*args, **kwargs) diff --git a/litellm/main.py b/litellm/main.py index 9afdc7da2..8be71de0b 100644 --- a/litellm/main.py +++ b/litellm/main.py @@ -75,6 +75,7 @@ from .llms.anthropic import AnthropicChatCompletion from .llms.anthropic_text import AnthropicTextCompletion from .llms.huggingface_restapi import Huggingface from .llms.predibase import PredibaseChatCompletion +from .llms.bedrock_httpx import BedrockLLM from .llms.triton import TritonChatCompletion from .llms.prompt_templates.factory import ( prompt_factory, @@ -104,7 +105,6 @@ from litellm.utils import ( ) ####### ENVIRONMENT VARIABLES ################### -dotenv.load_dotenv() # Loading env variables using dotenv openai_chat_completions = OpenAIChatCompletion() openai_text_completions = OpenAITextCompletion() anthropic_chat_completions = AnthropicChatCompletion() @@ -114,6 +114,7 @@ azure_text_completions = AzureTextCompletion() huggingface = Huggingface() predibase_chat_completions = PredibaseChatCompletion() triton_chat_completions = TritonChatCompletion() +bedrock_chat_completion = BedrockLLM() ####### COMPLETION ENDPOINTS ################ diff --git a/litellm/proxy/example_config_yaml/custom_auth.py b/litellm/proxy/example_config_yaml/custom_auth.py index a764a647a..6cecf466c 100644 --- a/litellm/proxy/example_config_yaml/custom_auth.py +++ b/litellm/proxy/example_config_yaml/custom_auth.py @@ -1,10 +1,7 @@ from litellm.proxy._types import UserAPIKeyAuth, GenerateKeyRequest from fastapi import Request -from dotenv import load_dotenv import os -load_dotenv() - async def user_api_key_auth(request: Request, api_key: str) -> UserAPIKeyAuth: try: diff --git a/litellm/router_strategy/least_busy.py b/litellm/router_strategy/least_busy.py index 54d44b41d..417651fb3 100644 --- a/litellm/router_strategy/least_busy.py +++ b/litellm/router_strategy/least_busy.py @@ -8,8 +8,6 @@ import dotenv, os, requests, random # type: ignore from typing import Optional - -dotenv.load_dotenv() # Loading env variables using dotenv import traceback from litellm.caching import DualCache from litellm.integrations.custom_logger import CustomLogger diff --git a/litellm/router_strategy/lowest_cost.py b/litellm/router_strategy/lowest_cost.py index 279af2ae9..fde7781b9 100644 --- a/litellm/router_strategy/lowest_cost.py +++ b/litellm/router_strategy/lowest_cost.py @@ -1,12 +1,11 @@ #### What this does #### # picks based on response time (for streaming, this is time to first token) from pydantic import BaseModel, Extra, Field, root_validator -import dotenv, os, requests, random # type: ignore +import os, requests, random # type: ignore from typing import Optional, Union, List, Dict from datetime import datetime, timedelta import random -dotenv.load_dotenv() # Loading env variables using dotenv import traceback from litellm.caching import DualCache from litellm.integrations.custom_logger import CustomLogger diff --git a/litellm/router_strategy/lowest_latency.py b/litellm/router_strategy/lowest_latency.py index afdfc1779..a7b93d344 100644 --- a/litellm/router_strategy/lowest_latency.py +++ b/litellm/router_strategy/lowest_latency.py @@ -5,8 +5,6 @@ import dotenv, os, requests, random # type: ignore from typing import Optional, Union, List, Dict from datetime import datetime, timedelta import random - -dotenv.load_dotenv() # Loading env variables using dotenv import traceback from litellm.caching import DualCache from litellm.integrations.custom_logger import CustomLogger diff --git a/litellm/router_strategy/lowest_tpm_rpm.py b/litellm/router_strategy/lowest_tpm_rpm.py index 0a7773a84..625db7048 100644 --- a/litellm/router_strategy/lowest_tpm_rpm.py +++ b/litellm/router_strategy/lowest_tpm_rpm.py @@ -4,8 +4,6 @@ import dotenv, os, requests, random from typing import Optional, Union, List, Dict from datetime import datetime - -dotenv.load_dotenv() # Loading env variables using dotenv import traceback from litellm import token_counter from litellm.caching import DualCache diff --git a/litellm/router_strategy/lowest_tpm_rpm_v2.py b/litellm/router_strategy/lowest_tpm_rpm_v2.py index f7a55d970..23e55f4a3 100644 --- a/litellm/router_strategy/lowest_tpm_rpm_v2.py +++ b/litellm/router_strategy/lowest_tpm_rpm_v2.py @@ -5,8 +5,6 @@ import dotenv, os, requests, random from typing import Optional, Union, List, Dict import datetime as datetime_og from datetime import datetime - -dotenv.load_dotenv() # Loading env variables using dotenv import traceback, asyncio, httpx import litellm from litellm import token_counter diff --git a/litellm/tests/test_completion.py b/litellm/tests/test_completion.py index 04f4cc511..214dc105b 100644 --- a/litellm/tests/test_completion.py +++ b/litellm/tests/test_completion.py @@ -2584,6 +2584,15 @@ def test_completion_chat_sagemaker_mistral(): # test_completion_chat_sagemaker_mistral() +def test_completion_bedrock_command_r(): + response = completion( + model="bedrock/cohere.command-r-plus-v1:0", + messages=[{"role": "user", "content": "Hey! how's it going?"}], + ) + + print(f"response: {response}") + + def test_completion_bedrock_titan_null_response(): try: response = completion( diff --git a/litellm/utils.py b/litellm/utils.py index 9218f92a3..0fd7963ae 100644 --- a/litellm/utils.py +++ b/litellm/utils.py @@ -117,7 +117,6 @@ MAX_THREADS = 100 # Create a ThreadPoolExecutor executor = ThreadPoolExecutor(max_workers=MAX_THREADS) -dotenv.load_dotenv() # Loading env variables using dotenv sentry_sdk_instance = None capture_exception = None add_breadcrumb = None