refactor(bedrock.py-+-cohere.py): making bedrock and cohere compatible with openai v1 sdk

2025-04-24 18:24:20 +00:00 · 2023-11-11 17:33:19 -08:00 · 2023-11-11 17:33:19 -08:00 · 547598a134
commit 547598a134
parent 39c2597c33
7 changed files with 82 additions and 74 deletions
--- a/litellm/llms/bedrock.py
+++ b/litellm/llms/bedrock.py
@ -6,11 +6,14 @@ from typing import Callable, Optional
 import litellm
 from litellm.utils import ModelResponse, get_secret
 from .prompt_templates.factory import prompt_factory, custom_prompt
+import httpx

 class BedrockError(Exception):
    def __init__(self, status_code, message):
        self.status_code = status_code
        self.message = message
+        self.request = httpx.Request(method="POST", url="https://us-west-2.console.aws.amazon.com/bedrock")
+        self.response = httpx.Response(status_code=status_code, request=self.request)
        super().__init__(
            self.message
        )  # Call the base class constructor with the parameters it needs
--- a/litellm/llms/cohere.py
+++ b/litellm/llms/cohere.py
@ -6,11 +6,14 @@ import time, traceback
 from typing import Callable, Optional
 from litellm.utils import ModelResponse, Choices, Message
 import litellm
+import httpx

 class CohereError(Exception):
    def __init__(self, status_code, message):
        self.status_code = status_code
        self.message = message
+        self.request = httpx.Request(method="POST", url="https://api.cohere.ai/v1/generate")
+        self.response = httpx.Response(status_code=status_code, request=self.request)
        super().__init__(
            self.message
        )  # Call the base class constructor with the parameters it needs
--- a/litellm/llms/huggingface_restapi.py
+++ b/litellm/llms/huggingface_restapi.py
@ -2,7 +2,7 @@
 import os, copy, types
 import json
 from enum import Enum
-import requests
+import httpx, requests
 import time
 import litellm
 from typing import Callable, Dict, List, Any
@ -14,6 +14,8 @@ class HuggingfaceError(Exception):
    def __init__(self, status_code, message):
        self.status_code = status_code
        self.message = message
+        self.response = httpx.Response(status_code=status_code)
+        self.request = self.response.request
        super().__init__(
            self.message
        )  # Call the base class constructor with the parameters it needs
--- a/litellm/tests/test_async_fn.py
+++ b/litellm/tests/test_async_fn.py
@ -28,7 +28,7 @@ def test_async_response():
        user_message = "Hello, how are you?"
        messages = [{"content": user_message, "role": "user"}]
        try:
-            response = await acompletion(model="azure/chatgpt-v-2", messages=messages)
+            response = await acompletion(model="command-nightly", messages=messages)
            print(f"response: {response}")
        except Exception as e:
            pytest.fail(f"An exception occurred: {e}")
@ -42,7 +42,7 @@ def test_get_response_streaming():
        user_message = "write a short poem in one sentence"
        messages = [{"content": user_message, "role": "user"}]
        try:
-            response = await acompletion(model="azure/chatgpt-v-2", messages=messages, stream=True)
+            response = await acompletion(model="command-nightly", messages=messages, stream=True)
            print(type(response))

            import inspect
@ -65,7 +65,7 @@ def test_get_response_streaming():
    asyncio.run(test_async_call())


-test_get_response_streaming()
+# test_get_response_streaming()

 def test_get_response_non_openai_streaming():
    import asyncio
--- a/litellm/tests/test_completion.py
+++ b/litellm/tests/test_completion.py
@ -395,7 +395,7 @@ def test_completion_cohere(): # commenting for now as the cohere endpoint is bei
    except Exception as e:
        pytest.fail(f"Error occurred: {e}")

-# test_completion_cohere() #
+# test_completion_cohere() 


 def test_completion_openai():
@ -634,7 +634,7 @@ def test_completion_azure():
    except Exception as e:
        pytest.fail(f"Error occurred: {e}")

-test_completion_azure()
+# test_completion_azure()
 def test_completion_azure2():
    # test if we can pass api_base, api_version and api_key in compleition()
    try:
@ -941,7 +941,7 @@ def test_completion_bedrock_claude():
        pass
    except Exception as e:
        pytest.fail(f"Error occurred: {e}")
-# test_completion_bedrock_claude()
+test_completion_bedrock_claude()

 def test_completion_bedrock_cohere():
    print("calling bedrock cohere")
--- a/litellm/tests/test_exceptions.py
+++ b/litellm/tests/test_exceptions.py
@ -42,9 +42,10 @@ models = ["command-nightly"]
 # Test 1: Context Window Errors 
@pytest.mark.parametrize("model", models)
 def test_context_window(model):
-    sample_text = "Say error 50 times" * 10000
+    sample_text = "Say error 50 times" * 1000000
    messages = [{"content": sample_text, "role": "user"}]
    try:
+        litellm.set_verbose = False
        response = completion(model=model, messages=messages)
        print(f"response: {response}")
        print("FAILED!")
@ -67,8 +68,8 @@ def test_context_window_with_fallbacks(model):

 # for model in litellm.models_by_provider["bedrock"]:
 #     test_context_window(model=model)
-# test_context_window(model="azure/chatgpt-v-2")
-# test_context_window_with_fallbacks(model="azure/chatgpt-v-2")
+# test_context_window(model="command-nightly")
+# test_context_window_with_fallbacks(model="command-nightly")
 # Test 2: InvalidAuth Errors
@pytest.mark.parametrize("model", models)
 def invalid_auth(model):  # set the model key to an invalid key, depending on the model
@ -78,7 +79,7 @@ def invalid_auth(model):  # set the model key to an invalid key, depending on th
        if model == "gpt-3.5-turbo" or model == "gpt-3.5-turbo-instruct":
            temporary_key = os.environ["OPENAI_API_KEY"]
            os.environ["OPENAI_API_KEY"] = "bad-key"
-        elif model == "bedrock/anthropic.claude-v2":
+        elif "bedrock" in model:
            temporary_aws_access_key = os.environ["AWS_ACCESS_KEY_ID"]
            os.environ["AWS_ACCESS_KEY_ID"] = "bad-key"
            temporary_aws_region_name = os.environ["AWS_REGION_NAME"]
@ -163,7 +164,7 @@ def invalid_auth(model):  # set the model key to an invalid key, depending on th

 # for model in litellm.models_by_provider["bedrock"]:
 #     invalid_auth(model=model)
-# invalid_auth(model="azure/chatgpt-v-2")
+# invalid_auth(model="command-nightly")

 # Test 3: Invalid Request Error 
@pytest.mark.parametrize("model", models)
@ -173,7 +174,7 @@ def test_invalid_request_error(model):
    with pytest.raises(BadRequestError):
        completion(model=model, messages=messages, max_tokens="hello world")

-# test_invalid_request_error(model="azure/chatgpt-v-2")
+# test_invalid_request_error(model="command-nightly")
 # Test 3: Rate Limit Errors
 # def test_model_call(model):
 #     try:
--- a/litellm/tests/test_streaming.py
+++ b/litellm/tests/test_streaming.py
@ -136,37 +136,37 @@ def streaming_format_tests(idx, chunk):
    print(f"extracted chunk: {extracted_chunk}")
    return extracted_chunk, finished

-# def test_completion_cohere_stream():
+def test_completion_cohere_stream():
 # this is a flaky test due to the cohere API endpoint being unstable
-#     try:
-#         messages = [
-#             {"role": "system", "content": "You are a helpful assistant."},
-#             {
-#                 "role": "user",
-#                 "content": "how does a court case get to the Supreme Court?",
-#             },
-#         ]
-#         response = completion(
-#             model="command-nightly", messages=messages, stream=True, max_tokens=50,
-#         )
-#         complete_response = ""
-#         # Add any assertions here to check the response
-#         has_finish_reason = False
-#         for idx, chunk in enumerate(response):
-#             chunk, finished = streaming_format_tests(idx, chunk)
-#             has_finish_reason = finished
-#             if finished:
-#                 break
-#             complete_response += chunk
-#         if has_finish_reason is False:
-#             raise Exception("Finish reason not in final chunk")
-#         if complete_response.strip() == "": 
-#             raise Exception("Empty response received")
-#         print(f"completion_response: {complete_response}")
-#     except Exception as e:
-#         pytest.fail(f"Error occurred: {e}")
+    try:
+        messages = [
+            {"role": "system", "content": "You are a helpful assistant."},
+            {
+                "role": "user",
+                "content": "how does a court case get to the Supreme Court?",
+            },
+        ]
+        response = completion(
+            model="command-nightly", messages=messages, stream=True, max_tokens=50,
+        )
+        complete_response = ""
+        # Add any assertions here to check the response
+        has_finish_reason = False
+        for idx, chunk in enumerate(response):
+            chunk, finished = streaming_format_tests(idx, chunk)
+            has_finish_reason = finished
+            if finished:
+                break
+            complete_response += chunk
+        if has_finish_reason is False:
+            raise Exception("Finish reason not in final chunk")
+        if complete_response.strip() == "": 
+            raise Exception("Empty response received")
+        print(f"completion_response: {complete_response}")
+    except Exception as e:
+        pytest.fail(f"Error occurred: {e}")

-# test_completion_cohere_stream()
+test_completion_cohere_stream()

 def test_completion_cohere_stream_bad_key():
    try:
@ -372,7 +372,7 @@ def test_completion_azure_stream():
        print(f"completion_response: {complete_response}")
    except Exception as e:
        pytest.fail(f"Error occurred: {e}")
-test_completion_azure_stream() 
+# test_completion_azure_stream() 

 def test_completion_claude_stream():
    try:
@ -634,40 +634,39 @@ def test_completion_replicate_stream_bad_key():

 # test_completion_replicate_stream_bad_key()

-# def test_completion_bedrock_claude_stream():
-#     try:
-#         litellm.set_verbose=False
-#         response = completion(
-#             model="bedrock/anthropic.claude-instant-v1", 
-#             messages=[{"role": "user", "content": "Be as verbose as possible and give as many details as possible, how does a court case get to the Supreme Court?"}],
-#             temperature=1,
-#             max_tokens=20,
-#             stream=True,
-#         )
-#         print(response)
-#         complete_response = ""
-#         has_finish_reason = False
-#         # Add any assertions here to check the response
-#         for idx, chunk in enumerate(response):
-#             # print
-#             chunk, finished = streaming_format_tests(idx, chunk)
-#             has_finish_reason = finished
-#             complete_response += chunk
-#             if finished:
-#                 break
-#         if has_finish_reason is False:
-#             raise Exception("finish reason not set for last chunk")
-#         if complete_response.strip() == "": 
-#             raise Exception("Empty response received")
-#         print(f"completion_response: {complete_response}")
-#     except RateLimitError:
-#         pass
-#     except Exception as e:
-#         pytest.fail(f"Error occurred: {e}")
+def test_completion_bedrock_claude_stream():
+    try:
+        litellm.set_verbose=False
+        response = completion(
+            model="bedrock/anthropic.claude-instant-v1", 
+            messages=[{"role": "user", "content": "Be as verbose as possible and give as many details as possible, how does a court case get to the Supreme Court?"}],
+            temperature=1,
+            max_tokens=20,
+            stream=True,
+        )
+        print(response)
+        complete_response = ""
+        has_finish_reason = False
+        # Add any assertions here to check the response
+        for idx, chunk in enumerate(response):
+            # print
+            chunk, finished = streaming_format_tests(idx, chunk)
+            has_finish_reason = finished
+            complete_response += chunk
+            if finished:
+                break
+        if has_finish_reason is False:
+            raise Exception("finish reason not set for last chunk")
+        if complete_response.strip() == "": 
+            raise Exception("Empty response received")
+        print(f"completion_response: {complete_response}")
+    except RateLimitError:
+        pass
+    except Exception as e:
+        pytest.fail(f"Error occurred: {e}")

 # test_completion_bedrock_claude_stream() 

-
 # def test_completion_sagemaker_stream():
 #     try:
 #         response = completion(