diff --git a/tests/llm_translation/test_vertex.py b/tests/llm_translation/test_vertex.py
index e4f8af6ac..73960020d 100644
--- a/tests/llm_translation/test_vertex.py
+++ b/tests/llm_translation/test_vertex.py
@@ -17,7 +17,6 @@ import litellm
 from litellm import get_optional_params
 from litellm.llms.custom_httpx.http_handler import HTTPHandler
 import httpx
-from respx import MockRouter
 
 
 def test_completion_pydantic_obj_2():
@@ -1355,69 +1354,3 @@ def test_vertex_embedding_url(model, expected_url):
 
     assert url == expected_url
     assert endpoint == "predict"
-
-
-@pytest.mark.asyncio
-@pytest.mark.respx
-async def test_vertexai_embedding(respx_mock: MockRouter):
-    """
-    Tests that:
-    - Request URL and body are correctly formatted for Vertex AI embeddings
-    - Response is properly parsed into litellm's embedding response format
-    """
-    litellm.set_verbose = True
-
-    # Test input
-    input_text = ["good morning from litellm", "this is another item"]
-
-    # Expected request/response
-    expected_url = "https://us-central1-aiplatform.googleapis.com/v1/projects/633608382793/locations/us-central1/endpoints/1004708436694269952:predict"
-    expected_request = {
-        "instances": [
-            {"inputs": "good morning from litellm"},
-            {"inputs": "this is another item"},
-        ],
-        "parameters": {},
-    }
-
-    mock_response = {
-        "predictions": [
-            [[-0.000431762, -0.04416759, -0.03443353]],  # Truncated embedding vector
-            [[-0.000431762, -0.04416759, -0.03443353]],  # Truncated embedding vector
-        ],
-        "deployedModelId": "2275167734310371328",
-        "model": "projects/633608382793/locations/us-central1/models/snowflake-arctic-embed-m-long-1731622468876",
-        "modelDisplayName": "snowflake-arctic-embed-m-long-1731622468876",
-        "modelVersionId": "1",
-    }
-
-    # Setup mock request
-    mock_request = respx_mock.post(expected_url).mock(
-        return_value=httpx.Response(200, json=mock_response)
-    )
-
-    # Make request
-    response = await litellm.aembedding(
-        vertex_project="633608382793",
-        model="vertex_ai/1004708436694269952",
-        input=input_text,
-    )
-
-    # Assert request was made correctly
-    assert mock_request.called
-    request_body = json.loads(mock_request.calls[0].request.content)
-    print("\n\nrequest_body", request_body)
-    print("\n\nexpected_request", expected_request)
-    assert request_body == expected_request
-
-    # Assert response structure
-    assert response is not None
-    assert hasattr(response, "data")
-    assert len(response.data) == len(input_text)
-
-    # Assert embedding structure
-    for embedding in response.data:
-        assert "embedding" in embedding
-        assert isinstance(embedding["embedding"], list)
-        assert len(embedding["embedding"]) > 0
-        assert all(isinstance(x, float) for x in embedding["embedding"])
diff --git a/tests/local_testing/test_amazing_vertex_completion.py b/tests/local_testing/test_amazing_vertex_completion.py
index 2de53696f..081e67a5c 100644
--- a/tests/local_testing/test_amazing_vertex_completion.py
+++ b/tests/local_testing/test_amazing_vertex_completion.py
@@ -18,6 +18,8 @@ import json
 import os
 import tempfile
 from unittest.mock import AsyncMock, MagicMock, patch
+from respx import MockRouter
+import httpx
 
 import pytest
 
@@ -3051,3 +3053,70 @@ def test_custom_api_base(api_base):
         assert url == api_base + ":"
     else:
         assert url == test_endpoint
+
+
+@pytest.mark.asyncio
+@pytest.mark.respx
+async def test_vertexai_embedding_finetuned(respx_mock: MockRouter):
+    """
+    Tests that:
+    - Request URL and body are correctly formatted for Vertex AI embeddings
+    - Response is properly parsed into litellm's embedding response format
+    """
+    load_vertex_ai_credentials()
+    litellm.set_verbose = True
+
+    # Test input
+    input_text = ["good morning from litellm", "this is another item"]
+
+    # Expected request/response
+    expected_url = "https://us-central1-aiplatform.googleapis.com/v1/projects/633608382793/locations/us-central1/endpoints/1004708436694269952:predict"
+    expected_request = {
+        "instances": [
+            {"inputs": "good morning from litellm"},
+            {"inputs": "this is another item"},
+        ],
+        "parameters": {},
+    }
+
+    mock_response = {
+        "predictions": [
+            [[-0.000431762, -0.04416759, -0.03443353]],  # Truncated embedding vector
+            [[-0.000431762, -0.04416759, -0.03443353]],  # Truncated embedding vector
+        ],
+        "deployedModelId": "2275167734310371328",
+        "model": "projects/633608382793/locations/us-central1/models/snowflake-arctic-embed-m-long-1731622468876",
+        "modelDisplayName": "snowflake-arctic-embed-m-long-1731622468876",
+        "modelVersionId": "1",
+    }
+
+    # Setup mock request
+    mock_request = respx_mock.post(expected_url).mock(
+        return_value=httpx.Response(200, json=mock_response)
+    )
+
+    # Make request
+    response = await litellm.aembedding(
+        vertex_project="633608382793",
+        model="vertex_ai/1004708436694269952",
+        input=input_text,
+    )
+
+    # Assert request was made correctly
+    assert mock_request.called
+    request_body = json.loads(mock_request.calls[0].request.content)
+    print("\n\nrequest_body", request_body)
+    print("\n\nexpected_request", expected_request)
+    assert request_body == expected_request
+
+    # Assert response structure
+    assert response is not None
+    assert hasattr(response, "data")
+    assert len(response.data) == len(input_text)
+
+    # Assert embedding structure
+    for embedding in response.data:
+        assert "embedding" in embedding
+        assert isinstance(embedding["embedding"], list)
+        assert len(embedding["embedding"]) > 0
+        assert all(isinstance(x, float) for x in embedding["embedding"])