fixes to testing

2023-09-16 11:39:49 -07:00 · 2023-09-16 11:39:49 -07:00 · ce827faa93
commit ce827faa93
parent 09d63a6e73
7 changed files with 91 additions and 167 deletions
--- a/litellm/pycache/main.cpython-311.pyc
+++ b/litellm/pycache/main.cpython-311.pyc
--- a/litellm/pycache/utils.cpython-311.pyc
+++ b/litellm/pycache/utils.cpython-311.pyc
--- a/litellm/main.py
+++ b/litellm/main.py
@ -132,6 +132,7 @@ def completion(
    # model specific optional params
    top_k=40,# used by text-bison only
    task: Optional[str]="text-generation-inference", # used by huggingface inference endpoints
    return_full_text: bool = False, # used by huggingface TGI
    remove_input: bool = True, # used by nlp cloud models - prevents input text from being returned as part of output
    request_timeout=0,  # unused var for old version of OpenAI API
    fallbacks=[],
@ -181,7 +182,8 @@ def completion(
            custom_llm_provider=custom_llm_provider,
            top_k=top_k,
            task=task,
-            remove_input=remove_input
+            remove_input=remove_input,
            return_full_text=return_full_text
        )
        # For logging - save the values of the litellm-specific params passed in
        litellm_params = get_litellm_params(
--- a/litellm/tests/test_completion.py
+++ b/litellm/tests/test_completion.py
@ -92,25 +92,6 @@ def test_completion_with_litellm_call_id():
        pytest.fail(f"Error occurred: {e}")
 def test_completion_claude_stream():
    try:
        messages = [
            {"role": "system", "content": "You are a helpful assistant."},
            {
                "role": "user",
                "content": "how does a court case get to the Supreme Court?",
            },
        ]
        response = completion(model="claude-2", messages=messages, stream=True)
        # Add any assertions here to check the response
        for chunk in response:
            print(chunk["choices"][0]["delta"])  # same as openai format
            print(chunk["choices"][0]["finish_reason"])
            print(chunk["choices"][0]["delta"]["content"])
    except Exception as e:
        pytest.fail(f"Error occurred: {e}")
 # test_completion_claude_stream()
 def test_completion_nlp_cloud():
    try:
        messages = [
@ -125,26 +106,6 @@ def test_completion_nlp_cloud():
    except Exception as e:
        pytest.fail(f"Error occurred: {e}")
 def test_completion_nlp_cloud_streaming():
    try:
        messages = [
            {"role": "system", "content": "You are a helpful assistant."},
            {
                "role": "user",
                "content": "how does a court case get to the Supreme Court?",
            },
        ]
        response = completion(model="dolphin", messages=messages, stream=True, logger_fn=logger_fn)
        # Add any assertions here to check the response
        for chunk in response:
            print(chunk["choices"][0]["delta"]["content"])  # same as openai format
            print(chunk["choices"][0]["finish_reason"])
            print(chunk["choices"][0]["delta"]["content"])
    except Exception as e:
        pytest.fail(f"Error occurred: {e}")
 # test_completion_nlp_cloud_streaming()
 # test_completion_nlp_cloud_streaming()
 # def test_completion_hf_api():
 #     try:
 #         user_message = "write some code to find the sum of two numbers"
@ -327,69 +288,6 @@ def test_completion_openai_with_more_optional_params():
        pytest.fail(f"Error occurred: {e}")
 def test_completion_openai_with_stream():
    try:
        response = completion(
            model="gpt-3.5-turbo",
            messages=messages,
            temperature=0.5,
            top_p=0.1,
            n=2,
            max_tokens=150,
            presence_penalty=0.5,
            stream=True,
            frequency_penalty=-0.5,
            logit_bias={27000: 5},
            user="ishaan_dev@berri.ai",
        )
        # Add any assertions here to check the response
        print(response)
        for chunk in response:
            print(chunk)
            if chunk["choices"][0]["finish_reason"] == "stop" or chunk["choices"][0]["finish_reason"] == "length":
                break
            print(chunk["choices"][0]["finish_reason"])
            print(chunk["choices"][0]["delta"]["content"])
    except Exception as e:
        pytest.fail(f"Error occurred: {e}")
 # test_completion_openai_with_stream()
 def test_completion_openai_with_functions():
    function1 = [
        {
            "name": "get_current_weather",
            "description": "Get the current weather in a given location",
            "parameters": {
                "type": "object",
                "properties": {
                    "location": {
                        "type": "string",
                        "description": "The city and state, e.g. San Francisco, CA",
                    },
                    "unit": {"type": "string", "enum": ["celsius", "fahrenheit"]},
                },
                "required": ["location"],
            },
        }
    ]
    try:
        response = completion(
            model="gpt-3.5-turbo", messages=messages, functions=function1, stream=True
        )
        # Add any assertions here to check the response
        print(response)
        for chunk in response:
            print(chunk)
            if chunk["choices"][0]["finish_reason"] == "stop":
                break
            print(chunk["choices"][0]["finish_reason"])
            print(chunk["choices"][0]["delta"]["content"])
    except Exception as e:
        pytest.fail(f"Error occurred: {e}")
 # test_completion_openai_with_functions()
 # def test_completion_openai_azure_with_functions():
 #     function1 = [
 #         {
@ -544,20 +442,6 @@ def test_completion_replicate_vicuna():
    except Exception as e:
        pytest.fail(f"Error occurred: {e}")
 # test_completion_replicate_vicuna()
 def test_completion_replicate_llama_stream():
    model_name = "replicate/llama-2-70b-chat:2c1608e18606fad2812020dc541930f2d0495ce32eee50074220b87300bc16e1"
    try:
        response = completion(model=model_name, messages=messages, stream=True)
        # Add any assertions here to check the response
        for chunk in response:
            print(chunk)
            print(chunk["choices"][0]["delta"]["content"])
    except Exception as e:
        pytest.fail(f"Error occurred: {e}")
 # test_completion_replicate_llama_stream()
 # def test_completion_replicate_stability_stream():
 #     model_name = "stability-ai/stablelm-tuned-alpha-7b:c49dae362cbaecd2ceabb5bd34fdb68413c4ff775111fea065d259d577757beb"
 #     try:
@ -653,26 +537,7 @@ def test_completion_bedrock_ai21():
    except Exception as e:
        pytest.fail(f"Error occurred: {e}")
 def test_completion_bedrock_ai21_stream():
    try:
        litellm.set_verbose = False
        response = completion(
            model="bedrock/amazon.titan-tg1-large", 
            messages=[{"role": "user", "content": "Be as verbose as possible and give as many details as possible, how does a court case get to the Supreme Court?"}],
            temperature=1,
            max_tokens=4096,
            stream=True,
        )
        # Add any assertions here to check the response 
        print(response)
        for chunk in response:
            print(chunk)
    except Exception as e:
        pytest.fail(f"Error occurred: {e}")
 # test_completion_bedrock_ai21_stream()
 # test_completion_sagemaker()
 ######## Test VLLM ########
 # def test_completion_vllm():
 #     try:
--- a/litellm/tests/test_streaming.py
+++ b/litellm/tests/test_streaming.py
@ -213,7 +213,31 @@ def test_completion_cohere_stream():
        print(f"completion_response: {complete_response}")
    except Exception as e:
        pytest.fail(f"Error occurred: {e}")
-        
+
 def test_completion_bedrock_ai21_stream():
    try:
        litellm.set_verbose = False
        response = completion(
            model="bedrock/amazon.titan-tg1-large", 
            messages=[{"role": "user", "content": "Be as verbose as possible and give as many details as possible, how does a court case get to the Supreme Court?"}],
            temperature=1,
            max_tokens=4096,
            stream=True,
        )
        # Add any assertions here to check the response 
        print(response)
        for idx, chunk in enumerate(response):
            chunk, finished = streaming_format_tests(idx, chunk)
            if finished:
                break
            complete_response += chunk
        if complete_response.strip() == "": 
            raise Exception("Empty response received")
    except Exception as e:
        pytest.fail(f"Error occurred: {e}")
 # test_completion_cohere_stream()
 # test on openai completion call
@ -301,34 +325,66 @@ def test_together_ai_completion_call_starcoder():
    except:
        print(f"error occurred: {traceback.format_exc()}")
        pass
-# test_together_ai_completion_call_starcoder()
+
-# test on aleph alpha completion call - commented out as it's expensive to run this on circle ci for every build
+def test_completion_nlp_cloud_streaming():
-# def test_aleph_alpha_call():
+    try:
-#     try:
+        messages = [
-#         start_time = time.time()
+            {"role": "system", "content": "You are a helpful assistant."},
-#         response = completion(
+            {
-#             model="luminous-base",
+                "role": "user",
-#             messages=messages,
+                "content": "how does a court case get to the Supreme Court?",
-#             logger_fn=logger_fn,
+            },
-#             stream=True,
+        ]
-#         )
+        response = completion(model="dolphin", messages=messages, stream=True, logger_fn=logger_fn)
-#         complete_response = ""
+        # Add any assertions here to check the response
-#         print(f"returned response object: {response}")
+        for idx, chunk in enumerate(response):
-#         for chunk in response:
+            chunk, finished = streaming_format_tests(idx, chunk)
-#             chunk_time = time.time()
+            if finished:
-#             complete_response += (
+                break
-#                 chunk["choices"][0]["delta"]["content"]
+            complete_response += chunk
-#                 if len(chunk["choices"][0]["delta"].keys()) > 0
+        if complete_response == "":
-#                 else ""
+            raise Exception("Empty response received")
-#             )
+    except Exception as e:
-#             if len(complete_response) > 0:
+        pytest.fail(f"Error occurred: {e}")
-#                 print(complete_response)
+
-#         if complete_response == "":
+
-#             raise Exception("Empty response received")
+#### Test Function calling + streaming ####
-#     except:
+
-#         print(f"error occurred: {traceback.format_exc()}")
+def test_completion_openai_with_functions():
-#         pass
+    function1 = [
-#### Test Async streaming 
+        {
            "name": "get_current_weather",
            "description": "Get the current weather in a given location",
            "parameters": {
                "type": "object",
                "properties": {
                    "location": {
                        "type": "string",
                        "description": "The city and state, e.g. San Francisco, CA",
                    },
                    "unit": {"type": "string", "enum": ["celsius", "fahrenheit"]},
                },
                "required": ["location"],
            },
        }
    ]
    try:
        response = completion(
            model="gpt-3.5-turbo", messages=messages, functions=function1, stream=True
        )
        # Add any assertions here to check the response
        print(response)
        for chunk in response:
            print(chunk)
            if chunk["choices"][0]["finish_reason"] == "stop":
                break
            print(chunk["choices"][0]["finish_reason"])
            print(chunk["choices"][0]["delta"]["content"])
    except Exception as e:
        pytest.fail(f"Error occurred: {e}")
 test_completion_openai_with_functions()
 #### Test Async streaming ####
 # # test on ai21 completion call
 async def ai21_async_completion_call():
--- a/litellm/utils.py
+++ b/litellm/utils.py
@ -828,6 +828,7 @@ def get_optional_params(  # use the openai defaults
    model=None,
    custom_llm_provider="",
    top_k=40,
    return_full_text=False,
    task=None
 ):
    optional_params = {}
@ -885,6 +886,7 @@ def get_optional_params(  # use the openai defaults
            optional_params["max_new_tokens"] = max_tokens
        if presence_penalty != 0:
            optional_params["repetition_penalty"] = presence_penalty
        optional_params["return_full_text"] = return_full_text
        optional_params["details"] = True
        optional_params["task"] = task
    elif custom_llm_provider == "together_ai" or ("togethercomputer" in model):
@ -2507,7 +2509,6 @@ class CustomStreamWrapper:
        model_response = ModelResponse(stream=True, model=self.model)
        try:
            # return this for all models
            print_verbose(f"self.sent_first_chunk: {self.sent_first_chunk}")
            if self.sent_first_chunk == False:
                model_response.choices[0].delta.role = "assistant"
                self.sent_first_chunk = True
--- a/pyproject.toml
+++ b/pyproject.toml
@ -1,6 +1,6 @@
 [tool.poetry]
 name = "litellm"
-version = "0.1.677"
+version = "0.1.678"
 description = "Library to easily interface with LLM API providers"
 authors = ["BerriAI"]
 license = "MIT License"