refactor: move all testing to top-level of repo

Closes https://github.com/BerriAI/litellm/issues/486
2024-09-28 13:23:39 -07:00 · 2024-09-28 13:23:39 -07:00 · 3560f0ef2c
commit 3560f0ef2c
parent 5403c5828c
213 changed files with 74 additions and 217 deletions
--- a/tests/local_testing/test_async_fn.py
+++ b/tests/local_testing/test_async_fn.py
@ -0,0 +1,351 @@
+#### What this tests ####
+#    This tests the the acompletion function #
+
+import asyncio
+import logging
+import os
+import sys
+import traceback
+
+import pytest
+
+sys.path.insert(
+    0, os.path.abspath("../..")
+)  # Adds the parent directory to the system path
+import litellm
+from litellm import acompletion, acreate, completion
+
+litellm.num_retries = 3
+
+
+@pytest.mark.skip(reason="anyscale stopped serving public api endpoints")
+def test_sync_response_anyscale():
+    litellm.set_verbose = False
+    user_message = "Hello, how are you?"
+    messages = [{"content": user_message, "role": "user"}]
+    try:
+        response = completion(
+            model="anyscale/mistralai/Mistral-7B-Instruct-v0.1",
+            messages=messages,
+            timeout=5,
+        )
+    except litellm.Timeout as e:
+        pass
+    except Exception as e:
+        pytest.fail(f"An exception occurred: {e}")
+
+
+# test_sync_response_anyscale()
+
+
+def test_async_response_openai():
+    import asyncio
+
+    litellm.set_verbose = True
+
+    async def test_get_response():
+        user_message = "Hello, how are you?"
+        messages = [{"content": user_message, "role": "user"}]
+        tools = [
+            {
+                "type": "function",
+                "function": {
+                    "name": "get_current_weather",
+                    "description": "Get the current weather in a given location",
+                    "parameters": {
+                        "type": "object",
+                        "properties": {
+                            "location": {
+                                "type": "string",
+                                "description": "The city and state, e.g. San Francisco, CA",
+                            },
+                            "unit": {
+                                "type": "string",
+                                "enum": ["celsius", "fahrenheit"],
+                            },
+                        },
+                        "required": ["location"],
+                    },
+                },
+            }
+        ]
+        try:
+            response = await acompletion(
+                model="gpt-3.5-turbo",
+                messages=messages,
+                tools=tools,
+                parallel_tool_calls=True,
+                timeout=5,
+            )
+            print(f"response: {response}")
+            print(f"response ms: {response._response_ms}")
+        except litellm.Timeout as e:
+            pass
+        except Exception as e:
+            pytest.fail(f"An exception occurred: {e}")
+            print(e)
+
+    asyncio.run(test_get_response())
+
+
+# test_async_response_openai()
+
+
+def test_async_response_azure():
+    import asyncio
+
+    litellm.set_verbose = True
+
+    async def test_get_response():
+        user_message = "What do you know?"
+        messages = [{"content": user_message, "role": "user"}]
+        try:
+            response = await acompletion(
+                model="azure/gpt-turbo",
+                messages=messages,
+                base_url=os.getenv("CLOUDFLARE_AZURE_BASE_URL"),
+                api_key=os.getenv("AZURE_FRANCE_API_KEY"),
+            )
+            print(f"response: {response}")
+        except litellm.Timeout as e:
+            pass
+        except litellm.InternalServerError:
+            pass
+        except Exception as e:
+            pytest.fail(f"An exception occurred: {e}")
+
+    asyncio.run(test_get_response())
+
+
+# test_async_response_azure()
+
+
+@pytest.mark.skip(reason="anyscale stopped serving public api endpoints")
+def test_async_anyscale_response():
+    import asyncio
+
+    litellm.set_verbose = True
+
+    async def test_get_response():
+        user_message = "Hello, how are you?"
+        messages = [{"content": user_message, "role": "user"}]
+        try:
+            response = await acompletion(
+                model="anyscale/mistralai/Mistral-7B-Instruct-v0.1",
+                messages=messages,
+                timeout=5,
+            )
+            # response = await response
+            print(f"response: {response}")
+        except litellm.Timeout as e:
+            pass
+        except Exception as e:
+            pytest.fail(f"An exception occurred: {e}")
+
+    asyncio.run(test_get_response())
+
+
+# test_async_anyscale_response()
+
+
+@pytest.mark.skip(reason="Flaky test-cloudflare is very unstable")
+def test_async_completion_cloudflare():
+    try:
+        litellm.set_verbose = True
+
+        async def test():
+            response = await litellm.acompletion(
+                model="cloudflare/@cf/meta/llama-2-7b-chat-int8",
+                messages=[{"content": "what llm are you", "role": "user"}],
+                max_tokens=5,
+                num_retries=3,
+            )
+            print(response)
+            return response
+
+        response = asyncio.run(test())
+        text_response = response["choices"][0]["message"]["content"]
+        assert len(text_response) > 1  # more than 1 chars in response
+
+    except Exception as e:
+        pytest.fail(f"Error occurred: {e}")
+
+
+# test_async_completion_cloudflare()
+
+
+@pytest.mark.skip(reason="Flaky test")
+def test_get_cloudflare_response_streaming():
+    import asyncio
+
+    async def test_async_call():
+        user_message = "write a short poem in one sentence"
+        messages = [{"content": user_message, "role": "user"}]
+        try:
+            litellm.set_verbose = False
+            response = await acompletion(
+                model="cloudflare/@cf/meta/llama-2-7b-chat-int8",
+                messages=messages,
+                stream=True,
+                num_retries=3,  # cloudflare ai workers is EXTREMELY UNSTABLE
+            )
+            print(type(response))
+
+            import inspect
+
+            is_async_generator = inspect.isasyncgen(response)
+            print(is_async_generator)
+
+            output = ""
+            i = 0
+            async for chunk in response:
+                print(chunk)
+                token = chunk["choices"][0]["delta"].get("content", "")
+                if token == None:
+                    continue  # openai v1.0.0 returns content=None
+                output += token
+            assert output is not None, "output cannot be None."
+            assert isinstance(output, str), "output needs to be of type str"
+            assert len(output) > 0, "Length of output needs to be greater than 0."
+            print(f"output: {output}")
+        except litellm.Timeout as e:
+            pass
+        except Exception as e:
+            pytest.fail(f"An exception occurred: {e}")
+
+    asyncio.run(test_async_call())
+
+
+@pytest.mark.asyncio
+async def test_hf_completion_tgi():
+    # litellm.set_verbose=True
+    try:
+        response = await acompletion(
+            model="huggingface/HuggingFaceH4/zephyr-7b-beta",
+            messages=[{"content": "Hello, how are you?", "role": "user"}],
+        )
+        # Add any assertions here to check the response
+        print(response)
+    except litellm.APIError as e:
+        print("got an api error")
+        pass
+    except litellm.Timeout as e:
+        print("got a timeout error")
+        pass
+    except litellm.RateLimitError as e:
+        # this will catch the model is overloaded error
+        print("got a rate limit error")
+        pass
+    except Exception as e:
+        if "Model is overloaded" in str(e):
+            pass
+        else:
+            pytest.fail(f"Error occurred: {e}")
+
+
+# test_get_cloudflare_response_streaming()
+
+
+@pytest.mark.skip(reason="AWS Suspended Account")
+@pytest.mark.asyncio
+async def test_completion_sagemaker():
+    # litellm.set_verbose=True
+    try:
+        response = await acompletion(
+            model="sagemaker/berri-benchmarking-Llama-2-70b-chat-hf-4",
+            messages=[{"content": "Hello, how are you?", "role": "user"}],
+        )
+        # Add any assertions here to check the response
+        print(response)
+    except litellm.Timeout as e:
+        pass
+    except Exception as e:
+        pytest.fail(f"Error occurred: {e}")
+
+
+def test_get_response_streaming():
+    import asyncio
+
+    async def test_async_call():
+        user_message = "write a short poem in one sentence"
+        messages = [{"content": user_message, "role": "user"}]
+        try:
+            litellm.set_verbose = True
+            response = await acompletion(
+                model="gpt-3.5-turbo", messages=messages, stream=True, timeout=5
+            )
+            print(type(response))
+
+            import inspect
+
+            is_async_generator = inspect.isasyncgen(response)
+            print(is_async_generator)
+
+            output = ""
+            i = 0
+            async for chunk in response:
+                token = chunk["choices"][0]["delta"].get("content", "")
+                if token == None:
+                    continue  # openai v1.0.0 returns content=None
+                output += token
+            assert output is not None, "output cannot be None."
+            assert isinstance(output, str), "output needs to be of type str"
+            assert len(output) > 0, "Length of output needs to be greater than 0."
+            print(f"output: {output}")
+        except litellm.Timeout as e:
+            pass
+        except Exception as e:
+            pytest.fail(f"An exception occurred: {e}")
+
+    asyncio.run(test_async_call())
+
+
+# test_get_response_streaming()
+
+
+@pytest.mark.skip(reason="anyscale stopped serving public api endpoints")
+def test_get_response_non_openai_streaming():
+    import asyncio
+
+    litellm.set_verbose = True
+    litellm.num_retries = 0
+
+    async def test_async_call():
+        user_message = "Hello, how are you?"
+        messages = [{"content": user_message, "role": "user"}]
+        try:
+            response = await acompletion(
+                model="anyscale/mistralai/Mistral-7B-Instruct-v0.1",
+                messages=messages,
+                stream=True,
+                timeout=5,
+            )
+            print(type(response))
+
+            import inspect
+
+            is_async_generator = inspect.isasyncgen(response)
+            print(is_async_generator)
+
+            output = ""
+            i = 0
+            async for chunk in response:
+                token = chunk["choices"][0]["delta"].get("content", None)
+                if token == None:
+                    continue
+                print(token)
+                output += token
+            print(f"output: {output}")
+            assert output is not None, "output cannot be None."
+            assert isinstance(output, str), "output needs to be of type str"
+            assert len(output) > 0, "Length of output needs to be greater than 0."
+        except litellm.Timeout as e:
+            pass
+        except Exception as e:
+            pytest.fail(f"An exception occurred: {e}")
+        return response
+
+    asyncio.run(test_async_call())
+
+
+# test_get_response_non_openai_streaming()