diff --git a/docs/my-website/docs/pass_through/vertex_ai.md b/docs/my-website/docs/pass_through/vertex_ai.md
index 03190c839..744c5e3ff 100644
--- a/docs/my-website/docs/pass_through/vertex_ai.md
+++ b/docs/my-website/docs/pass_through/vertex_ai.md
@@ -4,17 +4,9 @@ import TabItem from '@theme/TabItem';
 
 # Vertex AI SDK
 
-Use VertexAI SDK to call endpoints on LiteLLM Gateway (native provider format)
-
-:::tip
-
-Looking for the Unified API (OpenAI format) for VertexAI ? [Go here - using vertexAI with LiteLLM SDK or LiteLLM Proxy Server](../providers/vertex.md)
-
-:::
-
 Pass-through endpoints for Vertex AI - call provider-specific endpoint, in native format (no translation).
 
-Just replace `https://REGION-aiplatform.googleapis.com` with `LITELLM_PROXY_BASE_URL/vertex-ai`
+Just replace `https://REGION-aiplatform.googleapis.com` with `LITELLM_PROXY_BASE_URL/vertex_ai`
 
 
 #### **Example Usage**
@@ -23,9 +15,9 @@ Just replace `https://REGION-aiplatform.googleapis.com` with `LITELLM_PROXY_BASE
 <TabItem value="curl" label="curl">
 
 ```bash
-curl http://localhost:4000/vertex-ai/publishers/google/models/gemini-1.0-pro:generateContent \
+curl http://localhost:4000/vertex_ai/publishers/google/models/gemini-1.0-pro:generateContent \
   -H "Content-Type: application/json" \
-  -H "Authorization: Bearer sk-1234" \
+  -H "x-litellm-api-key: Bearer sk-1234" \
   -d '{
     "contents":[{
       "role": "user", 
@@ -43,7 +35,7 @@ const { VertexAI } = require('@google-cloud/vertexai');
 const vertexAI = new VertexAI({
     project: 'your-project-id', // enter your vertex project id
     location: 'us-central1', // enter your vertex region
-    apiEndpoint: "localhost:4000/vertex-ai" // <proxy-server-url>/vertex-ai # note, do not include 'https://' in the url
+    apiEndpoint: "localhost:4000/vertex_ai" // <proxy-server-url>/vertex_ai # note, do not include 'https://' in the url
 });
 
 const model = vertexAI.getGenerativeModel({
@@ -87,7 +79,7 @@ generateContent();
 - Tuning API
 - CountTokens API
 
-## Authentication to Vertex AI
+#### Authentication to Vertex AI
 
 LiteLLM Proxy Server supports two methods of authentication to Vertex AI:
 
@@ -116,9 +108,9 @@ from vertexai.preview.generative_models import GenerativeModel
 LITE_LLM_ENDPOINT = "http://localhost:4000"
 
 vertexai.init(
-    project="<your-vertex-ai-project-id>", # enter your project id
-    location="<your-vertex-ai-location>", # enter your region
-    api_endpoint=f"{LITE_LLM_ENDPOINT}/vertex-ai", # route on litellm
+    project="<your-vertex_ai-project-id>", # enter your project id
+    location="<your-vertex_ai-location>", # enter your region
+    api_endpoint=f"{LITE_LLM_ENDPOINT}/vertex_ai", # route on litellm
     api_transport="rest",
 )
 
@@ -158,7 +150,7 @@ from google.auth.credentials import Credentials
 from vertexai.generative_models import GenerativeModel
 
 LITELLM_PROXY_API_KEY = "sk-1234"
-LITELLM_PROXY_BASE = "http://0.0.0.0:4000/vertex-ai"
+LITELLM_PROXY_BASE = "http://0.0.0.0:4000/vertex_ai"
 
 import datetime
 
@@ -219,7 +211,7 @@ import vertexai
 from vertexai.generative_models import GenerativeModel
 
 LITELLM_PROXY_API_KEY = "sk-1234"
-LITELLM_PROXY_BASE = "http://0.0.0.0:4000/vertex-ai"
+LITELLM_PROXY_BASE = "http://0.0.0.0:4000/vertex_ai"
 
 vertexai.init(
     project="adroit-crow-413218",
@@ -247,7 +239,7 @@ from google.auth.credentials import Credentials
 from vertexai.generative_models import GenerativeModel
 
 LITELLM_PROXY_API_KEY = "sk-1234"
-LITELLM_PROXY_BASE = "http://0.0.0.0:4000/vertex-ai"
+LITELLM_PROXY_BASE = "http://0.0.0.0:4000/vertex_ai"
 
 import datetime
 
@@ -297,9 +289,9 @@ print(response.text)
 <TabItem value="Curl" label="Curl">
 
 ```shell
-curl http://localhost:4000/vertex-ai/publishers/google/models/gemini-1.5-flash-001:generateContent \
+curl http://localhost:4000/vertex_ai/publishers/google/models/gemini-1.5-flash-001:generateContent \
   -H "Content-Type: application/json" \
-  -H "Authorization: Bearer sk-1234" \
+  -H "x-litellm-api-key: Bearer sk-1234" \
   -d '{"contents":[{"role": "user", "parts":[{"text": "hi"}]}]}'
 ```
 
@@ -320,7 +312,7 @@ import vertexai
 from vertexai.generative_models import GenerativeModel
 
 LITELLM_PROXY_API_KEY = "sk-1234"
-LITELLM_PROXY_BASE = "http://0.0.0.0:4000/vertex-ai"
+LITELLM_PROXY_BASE = "http://0.0.0.0:4000/vertex_ai"
 
 import datetime
 
@@ -358,7 +350,7 @@ from google.auth.credentials import Credentials
 from vertexai.generative_models import GenerativeModel
 
 LITELLM_PROXY_API_KEY = "sk-1234"
-LITELLM_PROXY_BASE = "http://0.0.0.0:4000/vertex-ai"
+LITELLM_PROXY_BASE = "http://0.0.0.0:4000/vertex_ai"
 
 import datetime
 
@@ -413,9 +405,9 @@ def embed_text(
 <TabItem value="curl" label="Curl">
 
 ```shell
-curl http://localhost:4000/vertex-ai/publishers/google/models/textembedding-gecko@001:predict \
+curl http://localhost:4000/vertex_ai/publishers/google/models/textembedding-gecko@001:predict \
   -H "Content-Type: application/json" \
-  -H "Authorization: Bearer sk-1234" \
+  -H "x-litellm-api-key: Bearer sk-1234" \
   -d '{"instances":[{"content": "gm"}]}'
 ```
 
@@ -437,7 +429,7 @@ import vertexai
 from google.auth.credentials import Credentials
 
 LITELLM_PROXY_API_KEY = "sk-1234"
-LITELLM_PROXY_BASE = "http://0.0.0.0:4000/vertex-ai"
+LITELLM_PROXY_BASE = "http://0.0.0.0:4000/vertex_ai"
 
 import datetime
 
@@ -482,7 +474,7 @@ import vertexai
 from google.auth.credentials import Credentials
 
 LITELLM_PROXY_API_KEY = "sk-1234"
-LITELLM_PROXY_BASE = "http://0.0.0.0:4000/vertex-ai"
+LITELLM_PROXY_BASE = "http://0.0.0.0:4000/vertex_ai"
 
 import datetime
 
@@ -547,9 +539,9 @@ print(f"Created output image using {len(images[0]._image_bytes)} bytes")
 <TabItem value="curl" label="Curl">
 
 ```shell
-curl http://localhost:4000/vertex-ai/publishers/google/models/imagen-3.0-generate-001:predict \
+curl http://localhost:4000/vertex_ai/publishers/google/models/imagen-3.0-generate-001:predict \
   -H "Content-Type: application/json" \
-  -H "Authorization: Bearer sk-1234" \
+  -H "x-litellm-api-key: Bearer sk-1234" \
   -d '{"instances":[{"prompt": "make an otter"}], "parameters": {"sampleCount": 1}}'
 ```
 
@@ -571,7 +563,7 @@ from vertexai.generative_models import GenerativeModel
 import vertexai
 
 LITELLM_PROXY_API_KEY = "sk-1234"
-LITELLM_PROXY_BASE = "http://0.0.0.0:4000/vertex-ai"
+LITELLM_PROXY_BASE = "http://0.0.0.0:4000/vertex_ai"
 
 import datetime
 
@@ -614,7 +606,7 @@ import vertexai
 from google.auth.credentials import Credentials
 
 LITELLM_PROXY_API_KEY = "sk-1234"
-LITELLM_PROXY_BASE = "http://0.0.0.0:4000/vertex-ai"
+LITELLM_PROXY_BASE = "http://0.0.0.0:4000/vertex_ai"
 
 import datetime
 
@@ -677,9 +669,9 @@ print(f"Total Token Count: {usage_metadata.total_token_count}")
 
 
 ```shell
-curl http://localhost:4000/vertex-ai/publishers/google/models/gemini-1.5-flash-001:countTokens \
+curl http://localhost:4000/vertex_ai/publishers/google/models/gemini-1.5-flash-001:countTokens \
   -H "Content-Type: application/json" \
-  -H "Authorization: Bearer sk-1234" \
+  -H "x-litellm-api-key: Bearer sk-1234" \
   -d '{"contents":[{"role": "user", "parts":[{"text": "hi"}]}]}'
 ```
 
@@ -700,7 +692,7 @@ from vertexai.preview.tuning import sft
 import vertexai
 
 LITELLM_PROXY_API_KEY = "sk-1234"
-LITELLM_PROXY_BASE = "http://0.0.0.0:4000/vertex-ai"
+LITELLM_PROXY_BASE = "http://0.0.0.0:4000/vertex_ai"
 
 
 vertexai.init(
@@ -741,7 +733,7 @@ import vertexai
 from google.auth.credentials import Credentials
 
 LITELLM_PROXY_API_KEY = "sk-1234"
-LITELLM_PROXY_BASE = "http://0.0.0.0:4000/vertex-ai"
+LITELLM_PROXY_BASE = "http://0.0.0.0:4000/vertex_ai"
 
 import datetime
 
@@ -801,9 +793,9 @@ print(sft_tuning_job.experiment)
 <TabItem value="curl" label="Curl">
 
 ```shell
-curl http://localhost:4000/vertex-ai/tuningJobs \
+curl http://localhost:4000/vertex_ai/tuningJobs \
       -H "Content-Type: application/json" \
-      -H "Authorization: Bearer sk-1234" \
+      -H "x-litellm-api-key: Bearer sk-1234" \
       -d '{
   "baseModel": "gemini-1.0-pro-002",
   "supervisedTuningSpec" : {
@@ -872,8 +864,8 @@ httpx_client = httpx.Client(timeout=30)
 
 print("Creating cached content")
 create_cache = httpx_client.post(
-    url=f"{LITELLM_BASE_URL}/vertex-ai/cachedContents",
-    headers={"Authorization": f"Bearer {LITELLM_PROXY_API_KEY}"},
+    url=f"{LITELLM_BASE_URL}/vertex_ai/cachedContents",
+    headers={"x-litellm-api-key": f"Bearer {LITELLM_PROXY_API_KEY}"},
     json={
         "model": "gemini-1.5-pro-001",
         "contents": [
@@ -920,5 +912,130 @@ response = client.chat.completions.create(
 print("Response from proxy:", response)
 ```
 
+</TabItem>
+</Tabs>
+
+
+## Advanced
+
+Pre-requisites
+- [Setup proxy with DB](../proxy/virtual_keys.md#setup)
+
+Use this, to avoid giving developers the raw Anthropic API key, but still letting them use Anthropic endpoints.
+
+### Use with Virtual Keys 
+
+1. Setup environment
+
+```bash
+export DATABASE_URL=""
+export LITELLM_MASTER_KEY=""
+```
+
+```bash
+litellm
+
+# RUNNING on http://0.0.0.0:4000
+```
+
+2. Generate virtual key 
+
+```bash
+curl -X POST 'http://0.0.0.0:4000/key/generate' \
+-H 'x-litellm-api-key: Bearer sk-1234' \
+-H 'Content-Type: application/json' \
+-d '{}'
+```
+
+Expected Response 
+
+```bash
+{
+    ...
+    "key": "sk-1234ewknldferwedojwojw"
+}
+```
+
+3. Test it! 
+
+
+```bash
+curl http://localhost:4000/vertex_ai/publishers/google/models/gemini-1.0-pro:generateContent \
+  -H "Content-Type: application/json" \
+  -H "x-litellm-api-key: Bearer sk-1234" \
+  -d '{
+    "contents":[{
+      "role": "user", 
+      "parts":[{"text": "How are you doing today?"}]
+    }]
+  }'
+```
+
+### Send `tags` in request headers
+
+Use this if you wants `tags` to be tracked in the LiteLLM DB and on logging callbacks
+
+Pass `tags` in request headers as a comma separated list. In the example below the following tags will be tracked 
+
+```
+tags: ["vertex-js-sdk", "pass-through-endpoint"]
+```
+
+<Tabs>
+<TabItem value="curl" label="curl">
+
+```bash
+curl http://localhost:4000/vertex-ai/publishers/google/models/gemini-1.0-pro:generateContent \
+  -H "Content-Type: application/json" \
+  -H "x-litellm-api-key: Bearer sk-1234" \
+  -H "tags: vertex-js-sdk,pass-through-endpoint" \
+  -d '{
+    "contents":[{
+      "role": "user", 
+      "parts":[{"text": "How are you doing today?"}]
+    }]
+  }'
+```
+
+</TabItem>
+<TabItem value="js" label="Vertex Node.js SDK">
+
+```javascript
+const { VertexAI } = require('@google-cloud/vertexai');
+
+const vertexAI = new VertexAI({
+    project: 'your-project-id', // enter your vertex project id
+    location: 'us-central1', // enter your vertex region
+    apiEndpoint: "localhost:4000/vertex_ai" // <proxy-server-url>/vertex_ai # note, do not include 'https://' in the url
+});
+
+const model = vertexAI.getGenerativeModel({
+    model: 'gemini-1.0-pro'
+}, {
+    customHeaders: {
+        "x-litellm-api-key": "sk-1234", // Your litellm Virtual Key
+        "tags": "vertex-js-sdk,pass-through-endpoint"
+    }
+});
+
+async function generateContent() {
+    try {
+        const prompt = {
+            contents: [{
+                role: 'user',
+                parts: [{ text: 'How are you doing today?' }]
+            }]
+        };
+
+        const response = await model.generateContent(prompt);
+        console.log('Response:', response);
+    } catch (error) {
+        console.error('Error:', error);
+    }
+}
+
+generateContent();
+```
+
 </TabItem>
 </Tabs>
\ No newline at end of file
diff --git a/litellm/proxy/pass_through_endpoints/pass_through_endpoints.py b/litellm/proxy/pass_through_endpoints/pass_through_endpoints.py
index f60fd0166..77e723679 100644
--- a/litellm/proxy/pass_through_endpoints/pass_through_endpoints.py
+++ b/litellm/proxy/pass_through_endpoints/pass_through_endpoints.py
@@ -388,6 +388,7 @@ async def pass_through_request(  # noqa: PLR0915
             _parsed_body=_parsed_body,
             passthrough_logging_payload=passthrough_logging_payload,
             litellm_call_id=litellm_call_id,
+            request=request,
         )
         # done for supporting 'parallel_request_limiter.py' with pass-through endpoints
         logging_obj.update_environment_variables(
@@ -567,6 +568,7 @@ async def pass_through_request(  # noqa: PLR0915
 
 
 def _init_kwargs_for_pass_through_endpoint(
+    request: Request,
     user_api_key_dict: UserAPIKeyAuth,
     passthrough_logging_payload: PassthroughStandardLoggingPayload,
     _parsed_body: Optional[dict] = None,
@@ -582,6 +584,12 @@ def _init_kwargs_for_pass_through_endpoint(
     }
     if _litellm_metadata:
         _metadata.update(_litellm_metadata)
+
+    _metadata = _update_metadata_with_tags_in_header(
+        request=request,
+        metadata=_metadata,
+    )
+
     kwargs = {
         "litellm_params": {
             "metadata": _metadata,
@@ -593,6 +601,13 @@ def _init_kwargs_for_pass_through_endpoint(
     return kwargs
 
 
+def _update_metadata_with_tags_in_header(request: Request, metadata: dict) -> dict:
+    _tags = request.headers.get("tags")
+    if _tags:
+        metadata["tags"] = _tags.split(",")
+    return metadata
+
+
 def create_pass_through_route(
     endpoint,
     target: str,
diff --git a/litellm/proxy/vertex_ai_endpoints/vertex_endpoints.py b/litellm/proxy/vertex_ai_endpoints/vertex_endpoints.py
index fbf37ce8d..1a0d09a88 100644
--- a/litellm/proxy/vertex_ai_endpoints/vertex_endpoints.py
+++ b/litellm/proxy/vertex_ai_endpoints/vertex_endpoints.py
@@ -113,7 +113,12 @@ def construct_target_url(
 
 
 @router.api_route(
-    "/vertex-ai/{endpoint:path}", methods=["GET", "POST", "PUT", "DELETE"]
+    "/vertex-ai/{endpoint:path}",
+    methods=["GET", "POST", "PUT", "DELETE"],
+    include_in_schema=False,
+)
+@router.api_route(
+    "/vertex_ai/{endpoint:path}", methods=["GET", "POST", "PUT", "DELETE"]
 )
 async def vertex_proxy_route(
     endpoint: str,
diff --git a/tests/pass_through_tests/test_local_vertex.js b/tests/pass_through_tests/test_local_vertex.js
index 7ae9b942a..9ee603e7a 100644
--- a/tests/pass_through_tests/test_local_vertex.js
+++ b/tests/pass_through_tests/test_local_vertex.js
@@ -1,31 +1,22 @@
 const { VertexAI, RequestOptions } = require('@google-cloud/vertexai');
 
 
-// Import fetch if the SDK uses it
-const originalFetch = global.fetch || require('node-fetch');
-
-// Monkey-patch the fetch used internally
-global.fetch = async function patchedFetch(url, options) {
-    // Modify the URL to use HTTP instead of HTTPS
-    if (url.startsWith('https://localhost:4000')) {
-        url = url.replace('https://', 'http://');
-    }
-    console.log('Patched fetch sending request to:', url);
-    return originalFetch(url, options);
-};
 
 const vertexAI = new VertexAI({
     project: 'adroit-crow-413218',
     location: 'us-central1',
-    apiEndpoint: "localhost:4000/vertex-ai"
+    apiEndpoint: "127.0.0.1:4000/vertex-ai"
 });
 
+// Create customHeaders using Headers
+const customHeaders = new Headers({
+    "X-Litellm-Api-Key": "sk-1234",
+    tags: "vertexjs,test-2"
+});
 
 // Use customHeaders in RequestOptions
 const requestOptions = {
-    customHeaders: new Headers({
-        "x-litellm-api-key": "sk-1234"
-    })
+    customHeaders: customHeaders,
 };
 
 const generativeModel = vertexAI.getGenerativeModel(
@@ -33,7 +24,7 @@ const generativeModel = vertexAI.getGenerativeModel(
     requestOptions
 );
 
-async function streamingResponse() {
+async function testModel() {
     try {
         const request = {
             contents: [{role: 'user', parts: [{text: 'How are you doing today tell me your name?'}]}],
@@ -49,20 +40,4 @@ async function streamingResponse() {
     }
 }
 
-
-async function nonStreamingResponse() {
-    try {
-        const request = {
-            contents: [{role: 'user', parts: [{text: 'How are you doing today tell me your name?'}]}],
-          };
-        const response = await generativeModel.generateContent(request);
-        console.log('non streaming response: ', JSON.stringify(response));
-    } catch (error) {
-        console.error('Error:', error);
-    }
-}
-
-
-
-streamingResponse();
-nonStreamingResponse();
\ No newline at end of file
+testModel();
\ No newline at end of file
diff --git a/tests/pass_through_tests/test_vertex_ai.py b/tests/pass_through_tests/test_vertex_ai.py
index dee0d59eb..99b513e82 100644
--- a/tests/pass_through_tests/test_vertex_ai.py
+++ b/tests/pass_through_tests/test_vertex_ai.py
@@ -99,7 +99,7 @@ async def test_basic_vertex_ai_pass_through_with_spendlog():
     vertexai.init(
         project="adroit-crow-413218",
         location="us-central1",
-        api_endpoint=f"{LITE_LLM_ENDPOINT}/vertex-ai",
+        api_endpoint=f"{LITE_LLM_ENDPOINT}/vertex_ai",
         api_transport="rest",
     )
 
@@ -131,7 +131,7 @@ async def test_basic_vertex_ai_pass_through_streaming_with_spendlog():
     vertexai.init(
         project="adroit-crow-413218",
         location="us-central1",
-        api_endpoint=f"{LITE_LLM_ENDPOINT}/vertex-ai",
+        api_endpoint=f"{LITE_LLM_ENDPOINT}/vertex_ai",
         api_transport="rest",
     )
 
@@ -170,7 +170,7 @@ async def test_vertex_ai_pass_through_endpoint_context_caching():
     vertexai.init(
         project="adroit-crow-413218",
         location="us-central1",
-        api_endpoint=f"{LITE_LLM_ENDPOINT}/vertex-ai",
+        api_endpoint=f"{LITE_LLM_ENDPOINT}/vertex_ai",
         api_transport="rest",
     )
 
diff --git a/tests/pass_through_tests/test_vertex_with_spend.test.js b/tests/pass_through_tests/test_vertex_with_spend.test.js
new file mode 100644
index 000000000..8a5b91557
--- /dev/null
+++ b/tests/pass_through_tests/test_vertex_with_spend.test.js
@@ -0,0 +1,194 @@
+const { VertexAI, RequestOptions } = require('@google-cloud/vertexai');
+const fs = require('fs');
+const path = require('path');
+const os = require('os');
+const { writeFileSync } = require('fs');
+
+
+// Import fetch if the SDK uses it
+const originalFetch = global.fetch || require('node-fetch');
+
+let lastCallId;
+
+// Monkey-patch the fetch used internally
+global.fetch = async function patchedFetch(url, options) {
+    // Modify the URL to use HTTP instead of HTTPS
+    if (url.startsWith('https://127.0.0.1:4000')) {
+        url = url.replace('https://', 'http://');
+    }
+    console.log('Patched fetch sending request to:', url);
+    
+    const response = await originalFetch(url, options);
+    
+    // Store the call ID if it exists
+    lastCallId = response.headers.get('x-litellm-call-id');
+        
+    return response;
+};
+
+function loadVertexAiCredentials() {
+    console.log("loading vertex ai credentials");
+    const filepath = path.dirname(__filename);
+    const vertexKeyPath = path.join(filepath, "vertex_key.json");
+
+    // Initialize default empty service account data
+    let serviceAccountKeyData = {};
+
+    // Try to read existing vertex_key.json
+    try {
+        const content = fs.readFileSync(vertexKeyPath, 'utf8');
+        if (content && content.trim()) {
+            serviceAccountKeyData = JSON.parse(content);
+        }
+    } catch (error) {
+        // File doesn't exist or is invalid, continue with empty object
+    }
+
+    // Update with environment variables
+    const privateKeyId = process.env.VERTEX_AI_PRIVATE_KEY_ID || "";
+    const privateKey = (process.env.VERTEX_AI_PRIVATE_KEY || "").replace(/\\n/g, "\n");
+    
+    serviceAccountKeyData.private_key_id = privateKeyId;
+    serviceAccountKeyData.private_key = privateKey;
+
+    // Create temporary file
+    const tempFilePath = path.join(os.tmpdir(), `vertex-credentials-${Date.now()}.json`);
+    writeFileSync(tempFilePath, JSON.stringify(serviceAccountKeyData, null, 2));
+    
+    // Set environment variable
+    process.env.GOOGLE_APPLICATION_CREDENTIALS = tempFilePath;
+}
+
+// Run credential loading before tests
+// beforeAll(() => {
+//     loadVertexAiCredentials();
+// });
+
+
+
+describe('Vertex AI Tests', () => {
+    test('should successfully generate non-streaming content with tags', async () => {
+        const vertexAI = new VertexAI({
+            project: 'adroit-crow-413218',
+            location: 'us-central1',
+            apiEndpoint: "127.0.0.1:4000/vertex_ai"
+        });
+
+        const customHeaders = new Headers({
+            "x-litellm-api-key": "sk-1234",
+            "tags": "vertex-js-sdk,pass-through-endpoint"
+        });
+
+        const requestOptions = {
+            customHeaders: customHeaders
+        };
+
+        const generativeModel = vertexAI.getGenerativeModel(
+            { model: 'gemini-1.0-pro' },
+            requestOptions
+        );
+
+        const request = {
+            contents: [{role: 'user', parts: [{text: 'Say "hello test" and nothing else'}]}]
+        };
+
+        const result = await generativeModel.generateContent(request);
+        expect(result).toBeDefined();
+        
+        // Use the captured callId
+        const callId = lastCallId;
+        console.log("Captured Call ID:", callId);
+
+        // Wait for spend to be logged
+        await new Promise(resolve => setTimeout(resolve, 15000));
+
+        // Check spend logs
+        const spendResponse = await fetch(
+            `http://127.0.0.1:4000/spend/logs?request_id=${callId}`,
+            {
+                headers: {
+                    'Authorization': 'Bearer sk-1234'
+                }
+            }
+        );
+        
+        const spendData = await spendResponse.json();
+        console.log("spendData", spendData)
+        expect(spendData).toBeDefined();
+        expect(spendData[0].request_id).toBe(callId);
+        expect(spendData[0].call_type).toBe('pass_through_endpoint');
+        expect(spendData[0].request_tags).toEqual(['vertex-js-sdk', 'pass-through-endpoint']);
+        expect(spendData[0].metadata).toHaveProperty('user_api_key');
+        expect(spendData[0].model).toContain('gemini');
+        expect(spendData[0].spend).toBeGreaterThan(0);
+    }, 25000);
+
+    test('should successfully generate streaming content with tags', async () => {
+        const vertexAI = new VertexAI({
+            project: 'adroit-crow-413218',
+            location: 'us-central1',
+            apiEndpoint: "127.0.0.1:4000/vertex_ai"
+        });
+
+        const customHeaders = new Headers({
+            "x-litellm-api-key": "sk-1234",
+            "tags": "vertex-js-sdk,pass-through-endpoint"
+        });
+
+        const requestOptions = {
+            customHeaders: customHeaders
+        };
+
+        const generativeModel = vertexAI.getGenerativeModel(
+            { model: 'gemini-1.0-pro' },
+            requestOptions
+        );
+
+        const request = {
+            contents: [{role: 'user', parts: [{text: 'Say "hello test" and nothing else'}]}]
+        };
+
+        const streamingResult = await generativeModel.generateContentStream(request);
+        expect(streamingResult).toBeDefined();
+
+
+        // Add some assertions
+        expect(streamingResult).toBeDefined();
+        
+        for await (const item of streamingResult.stream) {
+            console.log('stream chunk:', JSON.stringify(item));
+            expect(item).toBeDefined();
+        }
+
+        const aggregatedResponse = await streamingResult.response;
+        console.log('aggregated response:', JSON.stringify(aggregatedResponse));
+        expect(aggregatedResponse).toBeDefined();
+
+        // Use the captured callId
+        const callId = lastCallId;
+        console.log("Captured Call ID:", callId);
+
+        // Wait for spend to be logged
+        await new Promise(resolve => setTimeout(resolve, 15000));
+
+        // Check spend logs
+        const spendResponse = await fetch(
+            `http://127.0.0.1:4000/spend/logs?request_id=${callId}`,
+            {
+                headers: {
+                    'Authorization': 'Bearer sk-1234'
+                }
+            }
+        );
+        
+        const spendData = await spendResponse.json();
+        console.log("spendData", spendData)
+        expect(spendData).toBeDefined();
+        expect(spendData[0].request_id).toBe(callId);
+        expect(spendData[0].call_type).toBe('pass_through_endpoint');
+        expect(spendData[0].request_tags).toEqual(['vertex-js-sdk', 'pass-through-endpoint']);
+        expect(spendData[0].metadata).toHaveProperty('user_api_key');
+        expect(spendData[0].model).toContain('gemini');
+        expect(spendData[0].spend).toBeGreaterThan(0);
+    }, 25000);
+});
\ No newline at end of file
diff --git a/tests/pass_through_unit_tests/test_pass_through_unit_tests.py b/tests/pass_through_unit_tests/test_pass_through_unit_tests.py
new file mode 100644
index 000000000..c55bdc7a8
--- /dev/null
+++ b/tests/pass_through_unit_tests/test_pass_through_unit_tests.py
@@ -0,0 +1,165 @@
+import json
+import os
+import sys
+from datetime import datetime
+from unittest.mock import AsyncMock, Mock, patch, MagicMock
+
+sys.path.insert(
+    0, os.path.abspath("../..")
+)  # Adds the parent directory to the system path
+
+import httpx
+import pytest
+import litellm
+from typing import AsyncGenerator
+from litellm.litellm_core_utils.litellm_logging import Logging as LiteLLMLoggingObj
+from litellm.proxy.pass_through_endpoints.types import EndpointType
+from litellm.proxy.pass_through_endpoints.success_handler import (
+    PassThroughEndpointLogging,
+)
+from litellm.proxy.pass_through_endpoints.streaming_handler import (
+    PassThroughStreamingHandler,
+)
+
+from fastapi import Request
+from litellm.proxy._types import UserAPIKeyAuth
+from litellm.proxy.pass_through_endpoints.pass_through_endpoints import (
+    _init_kwargs_for_pass_through_endpoint,
+    _update_metadata_with_tags_in_header,
+)
+from litellm.proxy.pass_through_endpoints.types import PassthroughStandardLoggingPayload
+
+
+@pytest.fixture
+def mock_request():
+    # Create a mock request with headers
+    class MockRequest:
+        def __init__(self, headers=None):
+            self.headers = headers or {}
+
+    return MockRequest
+
+
+@pytest.fixture
+def mock_user_api_key_dict():
+    return UserAPIKeyAuth(
+        api_key="test-key",
+        user_id="test-user",
+        team_id="test-team",
+    )
+
+
+def test_update_metadata_with_tags_in_header_no_tags(mock_request):
+    """
+    No tags should be added to metadata if they do not exist in headers
+    """
+    # Test when no tags are present in headers
+    request = mock_request(headers={})
+    metadata = {"existing": "value"}
+
+    result = _update_metadata_with_tags_in_header(request=request, metadata=metadata)
+
+    assert result == {"existing": "value"}
+    assert "tags" not in result
+
+
+def test_update_metadata_with_tags_in_header_with_tags(mock_request):
+    """
+    Tags should be added to metadata if they exist in headers
+    """
+    # Test when tags are present in headers
+    request = mock_request(headers={"tags": "tag1,tag2,tag3"})
+    metadata = {"existing": "value"}
+
+    result = _update_metadata_with_tags_in_header(request=request, metadata=metadata)
+
+    assert result == {"existing": "value", "tags": ["tag1", "tag2", "tag3"]}
+
+
+def test_init_kwargs_for_pass_through_endpoint_basic(
+    mock_request, mock_user_api_key_dict
+):
+    """
+    Basic test for init_kwargs_for_pass_through_endpoint
+
+    - metadata should contain user_api_key, user_api_key_user_id, user_api_key_team_id, user_api_key_end_user_id  from `mock_user_api_key_dict`
+    """
+    request = mock_request()
+    passthrough_payload = PassthroughStandardLoggingPayload(
+        url="https://test.com",
+        request_body={},
+    )
+
+    result = _init_kwargs_for_pass_through_endpoint(
+        request=request,
+        user_api_key_dict=mock_user_api_key_dict,
+        passthrough_logging_payload=passthrough_payload,
+        litellm_call_id="test-call-id",
+    )
+
+    assert result["call_type"] == "pass_through_endpoint"
+    assert result["litellm_call_id"] == "test-call-id"
+    assert result["passthrough_logging_payload"] == passthrough_payload
+
+    # Check metadata
+    expected_metadata = {
+        "user_api_key": "test-key",
+        "user_api_key_user_id": "test-user",
+        "user_api_key_team_id": "test-team",
+        "user_api_key_end_user_id": "test-user",
+    }
+    assert result["litellm_params"]["metadata"] == expected_metadata
+
+
+def test_init_kwargs_with_litellm_metadata(mock_request, mock_user_api_key_dict):
+    """
+    Expected behavior: litellm_metadata should be merged with default metadata
+
+    see usage example here: https://docs.litellm.ai/docs/pass_through/anthropic_completion#send-litellm_metadata-tags
+    """
+    request = mock_request()
+    parsed_body = {
+        "litellm_metadata": {"custom_field": "custom_value", "tags": ["tag1", "tag2"]}
+    }
+    passthrough_payload = PassthroughStandardLoggingPayload(
+        url="https://test.com",
+        request_body={},
+    )
+
+    result = _init_kwargs_for_pass_through_endpoint(
+        request=request,
+        user_api_key_dict=mock_user_api_key_dict,
+        passthrough_logging_payload=passthrough_payload,
+        _parsed_body=parsed_body,
+        litellm_call_id="test-call-id",
+    )
+
+    # Check that litellm_metadata was merged with default metadata
+    metadata = result["litellm_params"]["metadata"]
+    print("metadata", metadata)
+    assert metadata["custom_field"] == "custom_value"
+    assert metadata["tags"] == ["tag1", "tag2"]
+    assert metadata["user_api_key"] == "test-key"
+
+
+def test_init_kwargs_with_tags_in_header(mock_request, mock_user_api_key_dict):
+    """
+    Tags should be added to metadata if they exist in headers
+    """
+    request = mock_request(headers={"tags": "tag1,tag2"})
+    passthrough_payload = PassthroughStandardLoggingPayload(
+        url="https://test.com",
+        request_body={},
+    )
+
+    result = _init_kwargs_for_pass_through_endpoint(
+        request=request,
+        user_api_key_dict=mock_user_api_key_dict,
+        passthrough_logging_payload=passthrough_payload,
+        litellm_call_id="test-call-id",
+    )
+
+    # Check that tags were added to metadata
+    metadata = result["litellm_params"]["metadata"]
+    print("metadata", metadata)
+    assert metadata["tags"] == ["tag1", "tag2"]