#### What this tests #### # This tests the the acompletion function # import asyncio import logging import os import sys import traceback import pytest sys.path.insert( 0, os.path.abspath("../..") ) # Adds the parent directory to the system path import litellm from litellm import acompletion, acreate, completion litellm.num_retries = 3 @pytest.mark.skip(reason="anyscale stopped serving public api endpoints") def test_sync_response_anyscale(): litellm.set_verbose = False user_message = "Hello, how are you?" messages = [{"content": user_message, "role": "user"}] try: response = completion( model="anyscale/mistralai/Mistral-7B-Instruct-v0.1", messages=messages, timeout=5, ) except litellm.Timeout as e: pass except Exception as e: pytest.fail(f"An exception occurred: {e}") # test_sync_response_anyscale() def test_async_response_openai(): import asyncio litellm.set_verbose = True async def test_get_response(): user_message = "Hello, how are you?" messages = [{"content": user_message, "role": "user"}] tools = [ { "type": "function", "function": { "name": "get_current_weather", "description": "Get the current weather in a given location", "parameters": { "type": "object", "properties": { "location": { "type": "string", "description": "The city and state, e.g. San Francisco, CA", }, "unit": { "type": "string", "enum": ["celsius", "fahrenheit"], }, }, "required": ["location"], }, }, } ] try: response = await acompletion( model="gpt-3.5-turbo", messages=messages, tools=tools, parallel_tool_calls=True, timeout=5, ) print(f"response: {response}") print(f"response ms: {response._response_ms}") except litellm.Timeout as e: pass except Exception as e: pytest.fail(f"An exception occurred: {e}") print(e) asyncio.run(test_get_response()) # test_async_response_openai() def test_async_response_azure(): import asyncio litellm.set_verbose = True async def test_get_response(): user_message = "What do you know?" messages = [{"content": user_message, "role": "user"}] try: response = await acompletion( model="azure/gpt-turbo", messages=messages, base_url=os.getenv("CLOUDFLARE_AZURE_BASE_URL"), api_key=os.getenv("AZURE_FRANCE_API_KEY"), ) print(f"response: {response}") except litellm.Timeout as e: pass except litellm.InternalServerError: pass except Exception as e: pytest.fail(f"An exception occurred: {e}") asyncio.run(test_get_response()) # test_async_response_azure() @pytest.mark.skip(reason="anyscale stopped serving public api endpoints") def test_async_anyscale_response(): import asyncio litellm.set_verbose = True async def test_get_response(): user_message = "Hello, how are you?" messages = [{"content": user_message, "role": "user"}] try: response = await acompletion( model="anyscale/mistralai/Mistral-7B-Instruct-v0.1", messages=messages, timeout=5, ) # response = await response print(f"response: {response}") except litellm.Timeout as e: pass except Exception as e: pytest.fail(f"An exception occurred: {e}") asyncio.run(test_get_response()) # test_async_anyscale_response() @pytest.mark.skip(reason="Flaky test-cloudflare is very unstable") def test_async_completion_cloudflare(): try: litellm.set_verbose = True async def test(): response = await litellm.acompletion( model="cloudflare/@cf/meta/llama-2-7b-chat-int8", messages=[{"content": "what llm are you", "role": "user"}], max_tokens=5, num_retries=3, ) print(response) return response response = asyncio.run(test()) text_response = response["choices"][0]["message"]["content"] assert len(text_response) > 1 # more than 1 chars in response except Exception as e: pytest.fail(f"Error occurred: {e}") # test_async_completion_cloudflare() @pytest.mark.skip(reason="Flaky test") def test_get_cloudflare_response_streaming(): import asyncio async def test_async_call(): user_message = "write a short poem in one sentence" messages = [{"content": user_message, "role": "user"}] try: litellm.set_verbose = False response = await acompletion( model="cloudflare/@cf/meta/llama-2-7b-chat-int8", messages=messages, stream=True, num_retries=3, # cloudflare ai workers is EXTREMELY UNSTABLE ) print(type(response)) import inspect is_async_generator = inspect.isasyncgen(response) print(is_async_generator) output = "" i = 0 async for chunk in response: print(chunk) token = chunk["choices"][0]["delta"].get("content", "") if token == None: continue # openai v1.0.0 returns content=None output += token assert output is not None, "output cannot be None." assert isinstance(output, str), "output needs to be of type str" assert len(output) > 0, "Length of output needs to be greater than 0." print(f"output: {output}") except litellm.Timeout as e: pass except Exception as e: pytest.fail(f"An exception occurred: {e}") asyncio.run(test_async_call()) @pytest.mark.asyncio async def test_hf_completion_tgi(): # litellm.set_verbose=True try: response = await acompletion( model="huggingface/HuggingFaceH4/zephyr-7b-beta", messages=[{"content": "Hello, how are you?", "role": "user"}], ) # Add any assertions here to check the response print(response) except litellm.APIError as e: print("got an api error") pass except litellm.Timeout as e: print("got a timeout error") pass except litellm.RateLimitError as e: # this will catch the model is overloaded error print("got a rate limit error") pass except Exception as e: if "Model is overloaded" in str(e): pass else: pytest.fail(f"Error occurred: {e}") # test_get_cloudflare_response_streaming() @pytest.mark.skip(reason="AWS Suspended Account") @pytest.mark.asyncio async def test_completion_sagemaker(): # litellm.set_verbose=True try: response = await acompletion( model="sagemaker/berri-benchmarking-Llama-2-70b-chat-hf-4", messages=[{"content": "Hello, how are you?", "role": "user"}], ) # Add any assertions here to check the response print(response) except litellm.Timeout as e: pass except Exception as e: pytest.fail(f"Error occurred: {e}") def test_get_response_streaming(): import asyncio async def test_async_call(): user_message = "write a short poem in one sentence" messages = [{"content": user_message, "role": "user"}] try: litellm.set_verbose = True response = await acompletion( model="gpt-3.5-turbo", messages=messages, stream=True, timeout=5 ) print(type(response)) import inspect is_async_generator = inspect.isasyncgen(response) print(is_async_generator) output = "" i = 0 async for chunk in response: token = chunk["choices"][0]["delta"].get("content", "") if token == None: continue # openai v1.0.0 returns content=None output += token assert output is not None, "output cannot be None." assert isinstance(output, str), "output needs to be of type str" assert len(output) > 0, "Length of output needs to be greater than 0." print(f"output: {output}") except litellm.Timeout as e: pass except Exception as e: pytest.fail(f"An exception occurred: {e}") asyncio.run(test_async_call()) # test_get_response_streaming() @pytest.mark.skip(reason="anyscale stopped serving public api endpoints") def test_get_response_non_openai_streaming(): import asyncio litellm.set_verbose = True litellm.num_retries = 0 async def test_async_call(): user_message = "Hello, how are you?" messages = [{"content": user_message, "role": "user"}] try: response = await acompletion( model="anyscale/mistralai/Mistral-7B-Instruct-v0.1", messages=messages, stream=True, timeout=5, ) print(type(response)) import inspect is_async_generator = inspect.isasyncgen(response) print(is_async_generator) output = "" i = 0 async for chunk in response: token = chunk["choices"][0]["delta"].get("content", None) if token == None: continue print(token) output += token print(f"output: {output}") assert output is not None, "output cannot be None." assert isinstance(output, str), "output needs to be of type str" assert len(output) > 0, "Length of output needs to be greater than 0." except litellm.Timeout as e: pass except Exception as e: pytest.fail(f"An exception occurred: {e}") return response asyncio.run(test_async_call()) # test_get_response_non_openai_streaming()