diff --git a/litellm/tests/test_async_fn.py b/litellm/tests/test_async_fn.py index 80642ea01..d0f4426b6 100644 --- a/litellm/tests/test_async_fn.py +++ b/litellm/tests/test_async_fn.py @@ -11,46 +11,46 @@ sys.path.insert( ) # Adds the parent directory to the system path from litellm import acompletion, acreate -@pytest.mark.asyncio -async def test_get_response(): - user_message = "Hello, how are you?" - messages = [{"content": user_message, "role": "user"}] - try: - response = await acompletion(model="gpt-3.5-turbo", messages=messages) - except Exception as e: - pass +def test_async_response(): + import asyncio + async def test_get_response(): + user_message = "Hello, how are you?" + messages = [{"content": user_message, "role": "user"}] + try: + response = await acompletion(model="gpt-3.5-turbo", messages=messages) + except Exception as e: + pass -response = asyncio.run(test_get_response()) + response = asyncio.run(test_get_response()) # print(response) -@pytest.mark.asyncio -async def test_get_response_streaming(): - user_message = "Hello, how are you?" - messages = [{"content": user_message, "role": "user"}] - try: - response = await acompletion(model="gpt-3.5-turbo", messages=messages, stream=True) - print(type(response)) +def test_get_response_streaming(): + import asyncio + async def test_async_call(): + user_message = "Hello, how are you?" + messages = [{"content": user_message, "role": "user"}] + try: + response = await acompletion(model="gpt-3.5-turbo", messages=messages, stream=True) + print(type(response)) - import inspect + import inspect - is_async_generator = inspect.isasyncgen(response) - print(is_async_generator) + is_async_generator = inspect.isasyncgen(response) + print(is_async_generator) - output = "" - async for chunk in response: - token = chunk["choices"][0]["delta"].get("content", "") - output += token - print(output) + output = "" + async for chunk in response: + token = chunk["choices"][0]["delta"].get("content", "") + output += token + print(output) - assert output is not None, "output cannot be None." - assert isinstance(output, str), "output needs to be of type str" - assert len(output) > 0, "Length of output needs to be greater than 0." + assert output is not None, "output cannot be None." + assert isinstance(output, str), "output needs to be of type str" + assert len(output) > 0, "Length of output needs to be greater than 0." - except Exception as e: - pass - return response - -# response = asyncio.run(test_get_response_streaming()) -# print(response) + except Exception as e: + pass + return response + asyncio.run(test_async_call()) diff --git a/litellm/utils.py b/litellm/utils.py index cf0ec1c60..278325a4a 100644 --- a/litellm/utils.py +++ b/litellm/utils.py @@ -20,7 +20,6 @@ import aiohttp import logging import asyncio from tokenizers import Tokenizer -import pkg_resources from dataclasses import ( dataclass, field, @@ -875,6 +874,7 @@ def get_replicate_completion_pricing(completion_response=None, total_time=0.0): def _select_tokenizer(model: str): # cohere + import pkg_resources if model in litellm.cohere_models: tokenizer = Tokenizer.from_pretrained("Cohere/command-nightly") return {"type": "huggingface_tokenizer", "tokenizer": tokenizer}