diff --git a/tests/llm_translation/Readme.md b/tests/llm_translation/Readme.md
index 174c81b4e..db84e7c33 100644
--- a/tests/llm_translation/Readme.md
+++ b/tests/llm_translation/Readme.md
@@ -1 +1,3 @@
-More tests under `litellm/litellm/tests/*`.
\ No newline at end of file
+Unit tests for individual LLM providers. 
+
+Name of the test file is the name of the LLM provider - e.g. `test_openai.py` is for OpenAI. 
\ No newline at end of file
diff --git a/tests/llm_translation/test_max_completion_tokens.py b/tests/llm_translation/test_max_completion_tokens.py
index 093bafa9a..6ac681b80 100644
--- a/tests/llm_translation/test_max_completion_tokens.py
+++ b/tests/llm_translation/test_max_completion_tokens.py
@@ -42,7 +42,6 @@ def return_mocked_response(model: str):
         "bedrock/mistral.mistral-large-2407-v1:0",
     ],
 )
-@pytest.mark.respx
 @pytest.mark.asyncio()
 async def test_bedrock_max_completion_tokens(model: str):
     """
@@ -87,7 +86,6 @@ async def test_bedrock_max_completion_tokens(model: str):
     "model",
     ["anthropic/claude-3-sonnet-20240229", "anthropic/claude-3-opus-20240229"],
 )
-@pytest.mark.respx
 @pytest.mark.asyncio()
 async def test_anthropic_api_max_completion_tokens(model: str):
     """
diff --git a/tests/llm_translation/test_nvidia_nim.py b/tests/llm_translation/test_nvidia_nim.py
index 52ef1043f..ca0374d45 100644
--- a/tests/llm_translation/test_nvidia_nim.py
+++ b/tests/llm_translation/test_nvidia_nim.py
@@ -19,7 +19,6 @@ from litellm import Choices, Message, ModelResponse, EmbeddingResponse, Usage
 from litellm import completion
 
 
-@pytest.mark.respx
 def test_completion_nvidia_nim():
     from openai import OpenAI
 
diff --git a/tests/llm_translation/test_openai_prediction_param.py b/tests/llm_translation/test_openai.py
similarity index 57%
rename from tests/llm_translation/test_openai_prediction_param.py
rename to tests/llm_translation/test_openai.py
index ebfdf061f..82f8009fb 100644
--- a/tests/llm_translation/test_openai_prediction_param.py
+++ b/tests/llm_translation/test_openai.py
@@ -2,7 +2,7 @@ import json
 import os
 import sys
 from datetime import datetime
-from unittest.mock import AsyncMock
+from unittest.mock import AsyncMock, patch
 
 sys.path.insert(
     0, os.path.abspath("../..")
@@ -63,8 +63,7 @@ def test_openai_prediction_param():
 
 
 @pytest.mark.asyncio
-@pytest.mark.respx
-async def test_openai_prediction_param_mock(respx_mock: MockRouter):
+async def test_openai_prediction_param_mock():
     """
     Tests that prediction parameter is correctly passed to the API
     """
@@ -92,60 +91,36 @@ async def test_openai_prediction_param_mock(respx_mock: MockRouter):
         public string Username { get; set; }
     }
     """
+    from openai import AsyncOpenAI
 
-    mock_response = ModelResponse(
-        id="chatcmpl-AQ5RmV8GvVSRxEcDxnuXlQnsibiY9",
-        choices=[
-            Choices(
-                message=Message(
-                    content=code.replace("Username", "Email").replace(
-                        "username", "email"
-                    ),
-                    role="assistant",
-                )
+    client = AsyncOpenAI(api_key="fake-api-key")
+
+    with patch.object(
+        client.chat.completions.with_raw_response, "create"
+    ) as mock_client:
+        try:
+            await litellm.acompletion(
+                model="gpt-4o-mini",
+                messages=[
+                    {
+                        "role": "user",
+                        "content": "Replace the Username property with an Email property. Respond only with code, and with no markdown formatting.",
+                    },
+                    {"role": "user", "content": code},
+                ],
+                prediction={"type": "content", "content": code},
+                client=client,
             )
-        ],
-        created=int(datetime.now().timestamp()),
-        model="gpt-4o-mini-2024-07-18",
-        usage={
-            "completion_tokens": 207,
-            "prompt_tokens": 175,
-            "total_tokens": 382,
-            "completion_tokens_details": {
-                "accepted_prediction_tokens": 0,
-                "reasoning_tokens": 0,
-                "rejected_prediction_tokens": 80,
-            },
-        },
-    )
+        except Exception as e:
+            print(f"Error: {e}")
 
-    mock_request = respx_mock.post("https://api.openai.com/v1/chat/completions").mock(
-        return_value=httpx.Response(200, json=mock_response.dict())
-    )
+        mock_client.assert_called_once()
+        request_body = mock_client.call_args.kwargs
 
-    completion = await litellm.acompletion(
-        model="gpt-4o-mini",
-        messages=[
-            {
-                "role": "user",
-                "content": "Replace the Username property with an Email property. Respond only with code, and with no markdown formatting.",
-            },
-            {"role": "user", "content": code},
-        ],
-        prediction={"type": "content", "content": code},
-    )
-
-    assert mock_request.called
-    request_body = json.loads(mock_request.calls[0].request.content)
-
-    # Verify the request contains the prediction parameter
-    assert "prediction" in request_body
-    # verify prediction is correctly sent to the API
-    assert request_body["prediction"] == {"type": "content", "content": code}
-
-    # Verify the completion tokens details
-    assert completion.usage.completion_tokens_details.accepted_prediction_tokens == 0
-    assert completion.usage.completion_tokens_details.rejected_prediction_tokens == 80
+        # Verify the request contains the prediction parameter
+        assert "prediction" in request_body
+        # verify prediction is correctly sent to the API
+        assert request_body["prediction"] == {"type": "content", "content": code}
 
 
 @pytest.mark.asyncio
@@ -223,3 +198,80 @@ async def test_openai_prediction_param_with_caching():
     )
 
     assert completion_response_3.id != completion_response_1.id
+
+
+@pytest.mark.asyncio()
+@pytest.mark.respx
+async def test_vision_with_custom_model(respx_mock: MockRouter):
+    """
+    Tests that an OpenAI compatible endpoint when sent an image will receive the image in the request
+
+    """
+    import base64
+    import requests
+
+    litellm.set_verbose = True
+    api_base = "https://my-custom.api.openai.com"
+
+    # Fetch and encode a test image
+    url = "https://dummyimage.com/100/100/fff&text=Test+image"
+    response = requests.get(url)
+    file_data = response.content
+    encoded_file = base64.b64encode(file_data).decode("utf-8")
+    base64_image = f"data:image/png;base64,{encoded_file}"
+
+    mock_response = ModelResponse(
+        id="cmpl-mock",
+        choices=[Choices(message=Message(content="Mocked response", role="assistant"))],
+        created=int(datetime.now().timestamp()),
+        model="my-custom-model",
+    )
+
+    mock_request = respx_mock.post(f"{api_base}/chat/completions").mock(
+        return_value=httpx.Response(200, json=mock_response.dict())
+    )
+
+    response = await litellm.acompletion(
+        model="openai/my-custom-model",
+        max_tokens=10,
+        api_base=api_base,  # use the mock api
+        messages=[
+            {
+                "role": "user",
+                "content": [
+                    {"type": "text", "text": "What's in this image?"},
+                    {
+                        "type": "image_url",
+                        "image_url": {"url": base64_image},
+                    },
+                ],
+            }
+        ],
+    )
+
+    assert mock_request.called
+    request_body = json.loads(mock_request.calls[0].request.content)
+
+    print("request_body: ", request_body)
+
+    assert request_body == {
+        "messages": [
+            {
+                "role": "user",
+                "content": [
+                    {"type": "text", "text": "What's in this image?"},
+                    {
+                        "type": "image_url",
+                        "image_url": {
+                            "url": "data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAGQAAABkBAMAAACCzIhnAAAAG1BMVEURAAD///+ln5/h39/Dv79qX18uHx+If39MPz9oMSdmAAAACXBIWXMAAA7EAAAOxAGVKw4bAAABB0lEQVRYhe2SzWrEIBCAh2A0jxEs4j6GLDS9hqWmV5Flt0cJS+lRwv742DXpEjY1kOZW6HwHFZnPmVEBEARBEARB/jd0KYA/bcUYbPrRLh6amXHJ/K+ypMoyUaGthILzw0l+xI0jsO7ZcmCcm4ILd+QuVYgpHOmDmz6jBeJImdcUCmeBqQpuqRIbVmQsLCrAalrGpfoEqEogqbLTWuXCPCo+Ki1XGqgQ+jVVuhB8bOaHkvmYuzm/b0KYLWwoK58oFqi6XfxQ4Uz7d6WeKpna6ytUs5e8betMcqAv5YPC5EZB2Lm9FIn0/VP6R58+/GEY1X1egVoZ/3bt/EqF6malgSAIgiDIH+QL41409QMY0LMAAAAASUVORK5CYII="
+                        },
+                    },
+                ],
+            }
+        ],
+        "model": "my-custom-model",
+        "max_tokens": 10,
+    }
+
+    print(f"response: {response}")
+    assert isinstance(response, ModelResponse)
diff --git a/tests/llm_translation/test_openai_o1.py b/tests/llm_translation/test_openai_o1.py
index fd4b1ea5a..2bb82c6a2 100644
--- a/tests/llm_translation/test_openai_o1.py
+++ b/tests/llm_translation/test_openai_o1.py
@@ -2,7 +2,7 @@ import json
 import os
 import sys
 from datetime import datetime
-from unittest.mock import AsyncMock
+from unittest.mock import AsyncMock, patch, MagicMock
 
 sys.path.insert(
     0, os.path.abspath("../..")
@@ -18,87 +18,75 @@ from litellm import Choices, Message, ModelResponse
 
 
 @pytest.mark.asyncio
-@pytest.mark.respx
-async def test_o1_handle_system_role(respx_mock: MockRouter):
+async def test_o1_handle_system_role():
     """
     Tests that:
     - max_tokens is translated to 'max_completion_tokens'
     - role 'system' is translated to 'user'
     """
+    from openai import AsyncOpenAI
+
     litellm.set_verbose = True
 
-    mock_response = ModelResponse(
-        id="cmpl-mock",
-        choices=[Choices(message=Message(content="Mocked response", role="assistant"))],
-        created=int(datetime.now().timestamp()),
-        model="o1-preview",
-    )
+    client = AsyncOpenAI(api_key="fake-api-key")
 
-    mock_request = respx_mock.post("https://api.openai.com/v1/chat/completions").mock(
-        return_value=httpx.Response(200, json=mock_response.dict())
-    )
+    with patch.object(
+        client.chat.completions.with_raw_response, "create"
+    ) as mock_client:
+        try:
+            await litellm.acompletion(
+                model="o1-preview",
+                max_tokens=10,
+                messages=[{"role": "system", "content": "Hello!"}],
+                client=client,
+            )
+        except Exception as e:
+            print(f"Error: {e}")
 
-    response = await litellm.acompletion(
-        model="o1-preview",
-        max_tokens=10,
-        messages=[{"role": "system", "content": "Hello!"}],
-    )
+        mock_client.assert_called_once()
+        request_body = mock_client.call_args.kwargs
 
-    assert mock_request.called
-    request_body = json.loads(mock_request.calls[0].request.content)
+        print("request_body: ", request_body)
 
-    print("request_body: ", request_body)
-
-    assert request_body == {
-        "model": "o1-preview",
-        "max_completion_tokens": 10,
-        "messages": [{"role": "user", "content": "Hello!"}],
-    }
-
-    print(f"response: {response}")
-    assert isinstance(response, ModelResponse)
+        assert request_body["model"] == "o1-preview"
+        assert request_body["max_completion_tokens"] == 10
+        assert request_body["messages"] == [{"role": "user", "content": "Hello!"}]
 
 
 @pytest.mark.asyncio
-@pytest.mark.respx
 @pytest.mark.parametrize("model", ["gpt-4", "gpt-4-0314", "gpt-4-32k", "o1-preview"])
-async def test_o1_max_completion_tokens(respx_mock: MockRouter, model: str):
+async def test_o1_max_completion_tokens(model: str):
     """
     Tests that:
     - max_completion_tokens is passed directly to OpenAI chat completion models
     """
+    from openai import AsyncOpenAI
+
     litellm.set_verbose = True
 
-    mock_response = ModelResponse(
-        id="cmpl-mock",
-        choices=[Choices(message=Message(content="Mocked response", role="assistant"))],
-        created=int(datetime.now().timestamp()),
-        model=model,
-    )
+    client = AsyncOpenAI(api_key="fake-api-key")
 
-    mock_request = respx_mock.post("https://api.openai.com/v1/chat/completions").mock(
-        return_value=httpx.Response(200, json=mock_response.dict())
-    )
+    with patch.object(
+        client.chat.completions.with_raw_response, "create"
+    ) as mock_client:
+        try:
+            await litellm.acompletion(
+                model=model,
+                max_completion_tokens=10,
+                messages=[{"role": "user", "content": "Hello!"}],
+                client=client,
+            )
+        except Exception as e:
+            print(f"Error: {e}")
 
-    response = await litellm.acompletion(
-        model=model,
-        max_completion_tokens=10,
-        messages=[{"role": "user", "content": "Hello!"}],
-    )
+        mock_client.assert_called_once()
+        request_body = mock_client.call_args.kwargs
 
-    assert mock_request.called
-    request_body = json.loads(mock_request.calls[0].request.content)
+        print("request_body: ", request_body)
 
-    print("request_body: ", request_body)
-
-    assert request_body == {
-        "model": model,
-        "max_completion_tokens": 10,
-        "messages": [{"role": "user", "content": "Hello!"}],
-    }
-
-    print(f"response: {response}")
-    assert isinstance(response, ModelResponse)
+        assert request_body["model"] == model
+        assert request_body["max_completion_tokens"] == 10
+        assert request_body["messages"] == [{"role": "user", "content": "Hello!"}]
 
 
 def test_litellm_responses():
diff --git a/tests/llm_translation/test_supports_vision.py b/tests/llm_translation/test_supports_vision.py
deleted file mode 100644
index 01188d3b9..000000000
--- a/tests/llm_translation/test_supports_vision.py
+++ /dev/null
@@ -1,94 +0,0 @@
-import json
-import os
-import sys
-from datetime import datetime
-from unittest.mock import AsyncMock
-
-sys.path.insert(
-    0, os.path.abspath("../..")
-)  # Adds the parent directory to the system path
-
-
-import httpx
-import pytest
-from respx import MockRouter
-
-import litellm
-from litellm import Choices, Message, ModelResponse
-
-
-@pytest.mark.asyncio()
-@pytest.mark.respx
-async def test_vision_with_custom_model(respx_mock: MockRouter):
-    """
-    Tests that an OpenAI compatible endpoint when sent an image will receive the image in the request
-
-    """
-    import base64
-    import requests
-
-    litellm.set_verbose = True
-    api_base = "https://my-custom.api.openai.com"
-
-    # Fetch and encode a test image
-    url = "https://dummyimage.com/100/100/fff&text=Test+image"
-    response = requests.get(url)
-    file_data = response.content
-    encoded_file = base64.b64encode(file_data).decode("utf-8")
-    base64_image = f"data:image/png;base64,{encoded_file}"
-
-    mock_response = ModelResponse(
-        id="cmpl-mock",
-        choices=[Choices(message=Message(content="Mocked response", role="assistant"))],
-        created=int(datetime.now().timestamp()),
-        model="my-custom-model",
-    )
-
-    mock_request = respx_mock.post(f"{api_base}/chat/completions").mock(
-        return_value=httpx.Response(200, json=mock_response.dict())
-    )
-
-    response = await litellm.acompletion(
-        model="openai/my-custom-model",
-        max_tokens=10,
-        api_base=api_base,  # use the mock api
-        messages=[
-            {
-                "role": "user",
-                "content": [
-                    {"type": "text", "text": "What's in this image?"},
-                    {
-                        "type": "image_url",
-                        "image_url": {"url": base64_image},
-                    },
-                ],
-            }
-        ],
-    )
-
-    assert mock_request.called
-    request_body = json.loads(mock_request.calls[0].request.content)
-
-    print("request_body: ", request_body)
-
-    assert request_body == {
-        "messages": [
-            {
-                "role": "user",
-                "content": [
-                    {"type": "text", "text": "What's in this image?"},
-                    {
-                        "type": "image_url",
-                        "image_url": {
-                            "url": "data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAGQAAABkBAMAAACCzIhnAAAAG1BMVEURAAD///+ln5/h39/Dv79qX18uHx+If39MPz9oMSdmAAAACXBIWXMAAA7EAAAOxAGVKw4bAAABB0lEQVRYhe2SzWrEIBCAh2A0jxEs4j6GLDS9hqWmV5Flt0cJS+lRwv742DXpEjY1kOZW6HwHFZnPmVEBEARBEARB/jd0KYA/bcUYbPrRLh6amXHJ/K+ypMoyUaGthILzw0l+xI0jsO7ZcmCcm4ILd+QuVYgpHOmDmz6jBeJImdcUCmeBqQpuqRIbVmQsLCrAalrGpfoEqEogqbLTWuXCPCo+Ki1XGqgQ+jVVuhB8bOaHkvmYuzm/b0KYLWwoK58oFqi6XfxQ4Uz7d6WeKpna6ytUs5e8betMcqAv5YPC5EZB2Lm9FIn0/VP6R58+/GEY1X1egVoZ/3bt/EqF6malgSAIgiDIH+QL41409QMY0LMAAAAASUVORK5CYII="
-                        },
-                    },
-                ],
-            }
-        ],
-        "model": "my-custom-model",
-        "max_tokens": 10,
-    }
-
-    print(f"response: {response}")
-    assert isinstance(response, ModelResponse)
diff --git a/tests/llm_translation/test_text_completion_unit_tests.py b/tests/llm_translation/test_text_completion_unit_tests.py
index 9d5359a4a..ca239ebd4 100644
--- a/tests/llm_translation/test_text_completion_unit_tests.py
+++ b/tests/llm_translation/test_text_completion_unit_tests.py
@@ -6,6 +6,7 @@ from unittest.mock import AsyncMock
 import pytest
 import httpx
 from respx import MockRouter
+from unittest.mock import patch, MagicMock, AsyncMock
 
 sys.path.insert(
     0, os.path.abspath("../..")
@@ -68,13 +69,16 @@ def test_convert_dict_to_text_completion_response():
     assert response.choices[0].logprobs.top_logprobs == [None, {",": -2.1568563}]
 
 
+@pytest.mark.skip(
+    reason="need to migrate huggingface to support httpx client being passed in"
+)
 @pytest.mark.asyncio
 @pytest.mark.respx
-async def test_huggingface_text_completion_logprobs(respx_mock: MockRouter):
+async def test_huggingface_text_completion_logprobs():
     """Test text completion with Hugging Face, focusing on logprobs structure"""
     litellm.set_verbose = True
+    from litellm.llms.custom_httpx.http_handler import HTTPHandler, AsyncHTTPHandler
 
-    # Mock the raw response from Hugging Face
     mock_response = [
         {
             "generated_text": ",\n\nI have a question...",  # truncated for brevity
@@ -91,46 +95,48 @@ async def test_huggingface_text_completion_logprobs(respx_mock: MockRouter):
         }
     ]
 
-    # Mock the API request
-    mock_request = respx_mock.post(
-        "https://api-inference.huggingface.co/models/mistralai/Mistral-7B-v0.1"
-    ).mock(return_value=httpx.Response(200, json=mock_response))
+    return_val = AsyncMock()
 
-    response = await litellm.atext_completion(
-        model="huggingface/mistralai/Mistral-7B-v0.1",
-        prompt="good morning",
-    )
+    return_val.json.return_value = mock_response
 
-    # Verify the request
-    assert mock_request.called
-    request_body = json.loads(mock_request.calls[0].request.content)
-    assert request_body == {
-        "inputs": "good morning",
-        "parameters": {"details": True, "return_full_text": False},
-        "stream": False,
-    }
+    client = AsyncHTTPHandler()
+    with patch.object(client, "post", return_value=return_val) as mock_post:
+        response = await litellm.atext_completion(
+            model="huggingface/mistralai/Mistral-7B-v0.1",
+            prompt="good morning",
+            client=client,
+        )
 
-    print("response=", response)
+        # Verify the request
+        mock_post.assert_called_once()
+        request_body = json.loads(mock_post.call_args.kwargs["data"])
+        assert request_body == {
+            "inputs": "good morning",
+            "parameters": {"details": True, "return_full_text": False},
+            "stream": False,
+        }
 
-    # Verify response structure
-    assert isinstance(response, TextCompletionResponse)
-    assert response.object == "text_completion"
-    assert response.model == "mistralai/Mistral-7B-v0.1"
+        print("response=", response)
 
-    # Verify logprobs structure
-    choice = response.choices[0]
-    assert choice.finish_reason == "length"
-    assert choice.index == 0
-    assert isinstance(choice.logprobs.tokens, list)
-    assert isinstance(choice.logprobs.token_logprobs, list)
-    assert isinstance(choice.logprobs.text_offset, list)
-    assert isinstance(choice.logprobs.top_logprobs, list)
-    assert choice.logprobs.tokens == [",", "\n"]
-    assert choice.logprobs.token_logprobs == [-1.7626953, -1.7314453]
-    assert choice.logprobs.text_offset == [0, 1]
-    assert choice.logprobs.top_logprobs == [{}, {}]
+        # Verify response structure
+        assert isinstance(response, TextCompletionResponse)
+        assert response.object == "text_completion"
+        assert response.model == "mistralai/Mistral-7B-v0.1"
 
-    # Verify usage
-    assert response.usage["completion_tokens"] > 0
-    assert response.usage["prompt_tokens"] > 0
-    assert response.usage["total_tokens"] > 0
+        # Verify logprobs structure
+        choice = response.choices[0]
+        assert choice.finish_reason == "length"
+        assert choice.index == 0
+        assert isinstance(choice.logprobs.tokens, list)
+        assert isinstance(choice.logprobs.token_logprobs, list)
+        assert isinstance(choice.logprobs.text_offset, list)
+        assert isinstance(choice.logprobs.top_logprobs, list)
+        assert choice.logprobs.tokens == [",", "\n"]
+        assert choice.logprobs.token_logprobs == [-1.7626953, -1.7314453]
+        assert choice.logprobs.text_offset == [0, 1]
+        assert choice.logprobs.top_logprobs == [{}, {}]
+
+        # Verify usage
+        assert response.usage["completion_tokens"] > 0
+        assert response.usage["prompt_tokens"] > 0
+        assert response.usage["total_tokens"] > 0
diff --git a/tests/local_testing/test_azure_openai.py b/tests/local_testing/test_azure_openai.py
index e82419c17..fa4226b14 100644
--- a/tests/local_testing/test_azure_openai.py
+++ b/tests/local_testing/test_azure_openai.py
@@ -33,7 +33,7 @@ from litellm.router import Router
 
 @pytest.mark.asyncio()
 @pytest.mark.respx()
-async def test_azure_tenant_id_auth(respx_mock: MockRouter):
+async def test_aaaaazure_tenant_id_auth(respx_mock: MockRouter):
     """
 
     Tests when we set  tenant_id, client_id, client_secret they don't get sent with the request