From b78043f904eefe7fcc42d00e4cd029e1cd49a0be Mon Sep 17 00:00:00 2001
From: Krrish Dholakia <krrishdholakia@gmail.com>
Date: Sat, 29 Jun 2024 19:14:19 -0700
Subject: [PATCH] test(test_amazing_vertex_completion.py): reduce vertex tests
 - quota exhaustion

---
 .../tests/test_amazing_vertex_completion.py   | 42 -------------------
 1 file changed, 42 deletions(-)

diff --git a/litellm/tests/test_amazing_vertex_completion.py b/litellm/tests/test_amazing_vertex_completion.py
index 5788d331c..3a48bcb6c 100644
--- a/litellm/tests/test_amazing_vertex_completion.py
+++ b/litellm/tests/test_amazing_vertex_completion.py
@@ -1111,48 +1111,6 @@ async def test_gemini_pro_httpx_custom_api_base(provider):
         assert "hello" in mock_call.call_args.kwargs["headers"]
 
 
-@pytest.mark.parametrize("sync_mode", [True, False])
-@pytest.mark.parametrize("provider", ["vertex_ai_beta"])  # "vertex_ai",
-@pytest.mark.asyncio
-async def test_gemini_pro_httpx_custom_api_base_streaming_real_call(
-    provider, sync_mode
-):
-    load_vertex_ai_credentials()
-    import random
-
-    litellm.set_verbose = True
-    messages = [
-        {
-            "role": "user",
-            "content": "Hey, how's it going?",
-        }
-    ]
-
-    vertex_region = random.sample(["asia-southeast1", "us-central1"], k=1)[0]
-    if sync_mode is True:
-        response = completion(
-            model="vertex_ai_beta/gemini-1.5-flash",
-            messages=messages,
-            api_base="https://gateway.ai.cloudflare.com/v1/fa4cdcab1f32b95ca3b53fd36043d691/test/google-vertex-ai/v1/projects/adroit-crow-413218/locations/us-central1/publishers/google/models/gemini-1.5-flash",
-            stream=True,
-            vertex_region=vertex_region,
-        )
-
-        for chunk in response:
-            print(chunk)
-    else:
-        response = await litellm.acompletion(
-            model="vertex_ai_beta/gemini-1.5-flash",
-            messages=messages,
-            api_base="https://gateway.ai.cloudflare.com/v1/fa4cdcab1f32b95ca3b53fd36043d691/test/google-vertex-ai/v1/projects/adroit-crow-413218/locations/us-central1/publishers/google/models/gemini-1.5-flash",
-            stream=True,
-            vertex_region=vertex_region,
-        )
-
-        async for chunk in response:
-            print(chunk)
-
-
 @pytest.mark.skip(reason="exhausted vertex quota. need to refactor to mock the call")
 @pytest.mark.parametrize("sync_mode", [True])
 @pytest.mark.parametrize("provider", ["vertex_ai"])