diff --git a/litellm/tests/test_async_fn.py b/litellm/tests/test_async_fn.py
index 80642ea01..d0f4426b6 100644
--- a/litellm/tests/test_async_fn.py
+++ b/litellm/tests/test_async_fn.py
@@ -11,46 +11,46 @@ sys.path.insert(
 )  # Adds the parent directory to the system path
 from litellm import acompletion, acreate
 
-@pytest.mark.asyncio
-async def test_get_response():
-    user_message = "Hello, how are you?"
-    messages = [{"content": user_message, "role": "user"}]
-    try:
-        response = await acompletion(model="gpt-3.5-turbo", messages=messages)
-    except Exception as e:
-        pass
+def test_async_response():
+    import asyncio
+    async def test_get_response():
+        user_message = "Hello, how are you?"
+        messages = [{"content": user_message, "role": "user"}]
+        try:
+            response = await acompletion(model="gpt-3.5-turbo", messages=messages)
+        except Exception as e:
+            pass
 
-response = asyncio.run(test_get_response())
+    response = asyncio.run(test_get_response())
 # print(response)
 
-@pytest.mark.asyncio
-async def test_get_response_streaming():
-    user_message = "Hello, how are you?"
-    messages = [{"content": user_message, "role": "user"}]
-    try:
-        response = await acompletion(model="gpt-3.5-turbo", messages=messages, stream=True)
-        print(type(response))
+def test_get_response_streaming():
+    import asyncio
+    async def test_async_call():
+        user_message = "Hello, how are you?"
+        messages = [{"content": user_message, "role": "user"}]
+        try:
+            response = await acompletion(model="gpt-3.5-turbo", messages=messages, stream=True)
+            print(type(response))
 
-        import inspect
+            import inspect
 
-        is_async_generator = inspect.isasyncgen(response)
-        print(is_async_generator)
+            is_async_generator = inspect.isasyncgen(response)
+            print(is_async_generator)
 
-        output = ""
-        async for chunk in response:
-            token = chunk["choices"][0]["delta"].get("content", "")
-            output += token
-            print(output)
+            output = ""
+            async for chunk in response:
+                token = chunk["choices"][0]["delta"].get("content", "")
+                output += token
+                print(output)
 
-        assert output is not None, "output cannot be None."
-        assert isinstance(output, str), "output needs to be of type str"
-        assert len(output) > 0, "Length of output needs to be greater than 0."
+            assert output is not None, "output cannot be None."
+            assert isinstance(output, str), "output needs to be of type str"
+            assert len(output) > 0, "Length of output needs to be greater than 0."
 
-    except Exception as e:
-        pass
-    return response
-
-# response = asyncio.run(test_get_response_streaming())
-# print(response)
+        except Exception as e:
+            pass
+        return response
+    asyncio.run(test_async_call())
 
 
diff --git a/litellm/utils.py b/litellm/utils.py
index cf0ec1c60..278325a4a 100644
--- a/litellm/utils.py
+++ b/litellm/utils.py
@@ -20,7 +20,6 @@ import aiohttp
 import logging
 import asyncio
 from tokenizers import Tokenizer
-import pkg_resources
 from dataclasses import (
     dataclass,
     field,
@@ -875,6 +874,7 @@ def get_replicate_completion_pricing(completion_response=None, total_time=0.0):
 
 def _select_tokenizer(model: str): 
     # cohere 
+    import pkg_resources
     if model in litellm.cohere_models:
         tokenizer = Tokenizer.from_pretrained("Cohere/command-nightly")
         return {"type": "huggingface_tokenizer", "tokenizer": tokenizer}