diff --git a/litellm/tests/test_async_fn.py b/litellm/tests/test_async_fn.py index 485e86e7f..e2e364b5e 100644 --- a/litellm/tests/test_async_fn.py +++ b/litellm/tests/test_async_fn.py @@ -130,6 +130,30 @@ def test_async_anyscale_response(): # test_async_anyscale_response() +def test_async_completion_cloudflare(): + try: + litellm.set_verbose = True + + async def test(): + response = await litellm.acompletion( + model="cloudflare/@cf/meta/llama-2-7b-chat-int8", + messages=[{"content": "what llm are you", "role": "user"}], + max_tokens=50, + ) + print(response) + return response + + response = asyncio.run(test()) + text_response = response["choices"][0]["message"]["content"] + assert len(text_response) > 5 # more than 5 chars in response + + except Exception as e: + pytest.fail(f"Error occurred: {e}") + + +test_async_completion_cloudflare() + + def test_get_response_streaming(): import asyncio diff --git a/litellm/tests/test_completion.py b/litellm/tests/test_completion.py index 20f9a6380..dd7cf8f2b 100644 --- a/litellm/tests/test_completion.py +++ b/litellm/tests/test_completion.py @@ -2009,6 +2009,7 @@ def test_completion_cloudflare(): response = completion( model="cloudflare/@cf/meta/llama-2-7b-chat-int8", messages=[{"content": "what llm are you", "role": "user"}], + max_tokens=15, ) print(response) @@ -2018,13 +2019,6 @@ def test_completion_cloudflare(): test_completion_cloudflare() -# async def get_response(generator): -# async for elem in generator: -# print(elem) -# return - -# test_completion_together_ai_stream() - def test_moderation(): import openai