From 1ab5d49132bbeb54c31046882165b86d03a2c3b3 Mon Sep 17 00:00:00 2001
From: Krish Dholakia <krrishdholakia@gmail.com>
Date: Tue, 25 Mar 2025 23:46:20 -0700
Subject: [PATCH 01/11] Support `litellm.api_base` for vertex_ai + gemini/
 across completion, embedding, image_generation (#9516)

* test(tests): add unit testing for litellm_proxy integration

* fix(cost_calculator.py): fix tracking cost in sdk when calling proxy

* fix(main.py): respect litellm.api_base on `vertex_ai/` and `gemini/` routes

* fix(main.py): consistently support custom api base across gemini + vertexai on embedding + completion

* feat(vertex_ai/): test

* fix: fix linting error

* test: set api base as None before starting loadtest
---
 litellm/cost_calculator.py                    |  9 ++-
 litellm/llms/vertex_ai/common_utils.py        | 14 +++-
 .../image_generation_handler.py               | 81 +++++++++++--------
 .../vertex_embeddings/embedding_handler.py    |  2 +-
 litellm/main.py                               | 28 +++++++
 litellm/proxy/_new_secret_config.yaml         |  2 +-
 tests/litellm/test_cost_calculator.py         |  6 +-
 .../test_litellm_proxy_provider.py            | 77 +++++++++++++++++-
 .../test_vertex_embeddings_load_test.py       |  3 +-
 .../test_amazing_vertex_completion.py         | 44 ++++++++++
 10 files changed, 223 insertions(+), 43 deletions(-)

diff --git a/litellm/cost_calculator.py b/litellm/cost_calculator.py
index 55736772af..f5731618a3 100644
--- a/litellm/cost_calculator.py
+++ b/litellm/cost_calculator.py
@@ -828,11 +828,14 @@ def get_response_cost_from_hidden_params(
         _hidden_params_dict = hidden_params
 
     additional_headers = _hidden_params_dict.get("additional_headers", {})
-    if additional_headers and "x-litellm-response-cost" in additional_headers:
-        response_cost = additional_headers["x-litellm-response-cost"]
+    if (
+        additional_headers
+        and "llm_provider-x-litellm-response-cost" in additional_headers
+    ):
+        response_cost = additional_headers["llm_provider-x-litellm-response-cost"]
         if response_cost is None:
             return None
-        return float(additional_headers["x-litellm-response-cost"])
+        return float(additional_headers["llm_provider-x-litellm-response-cost"])
     return None
 
 
diff --git a/litellm/llms/vertex_ai/common_utils.py b/litellm/llms/vertex_ai/common_utils.py
index a3f91fbacc..0d792527b4 100644
--- a/litellm/llms/vertex_ai/common_utils.py
+++ b/litellm/llms/vertex_ai/common_utils.py
@@ -55,7 +55,9 @@ def get_supports_response_schema(
 
 from typing import Literal, Optional
 
-all_gemini_url_modes = Literal["chat", "embedding", "batch_embedding"]
+all_gemini_url_modes = Literal[
+    "chat", "embedding", "batch_embedding", "image_generation"
+]
 
 
 def _get_vertex_url(
@@ -91,7 +93,11 @@ def _get_vertex_url(
         if model.isdigit():
             # https://us-central1-aiplatform.googleapis.com/v1/projects/$PROJECT_ID/locations/us-central1/endpoints/$ENDPOINT_ID:predict
             url = f"https://{vertex_location}-aiplatform.googleapis.com/{vertex_api_version}/projects/{vertex_project}/locations/{vertex_location}/endpoints/{model}:{endpoint}"
-
+    elif mode == "image_generation":
+        endpoint = "predict"
+        url = f"https://{vertex_location}-aiplatform.googleapis.com/v1/projects/{vertex_project}/locations/{vertex_location}/publishers/google/models/{model}:{endpoint}"
+        if model.isdigit():
+            url = f"https://{vertex_location}-aiplatform.googleapis.com/{vertex_api_version}/projects/{vertex_project}/locations/{vertex_location}/endpoints/{model}:{endpoint}"
     if not url or not endpoint:
         raise ValueError(f"Unable to get vertex url/endpoint for mode: {mode}")
     return url, endpoint
@@ -127,6 +133,10 @@ def _get_gemini_url(
         url = "https://generativelanguage.googleapis.com/v1beta/{}:{}?key={}".format(
             _gemini_model_name, endpoint, gemini_api_key
         )
+    elif mode == "image_generation":
+        raise ValueError(
+            "LiteLLM's `gemini/` route does not support image generation yet. Let us know if you need this feature by opening an issue at https://github.com/BerriAI/litellm/issues"
+        )
 
     return url, endpoint
 
diff --git a/litellm/llms/vertex_ai/image_generation/image_generation_handler.py b/litellm/llms/vertex_ai/image_generation/image_generation_handler.py
index 1d5322c08d..e83f4b6f03 100644
--- a/litellm/llms/vertex_ai/image_generation/image_generation_handler.py
+++ b/litellm/llms/vertex_ai/image_generation/image_generation_handler.py
@@ -43,22 +43,23 @@ class VertexImageGeneration(VertexLLM):
     def image_generation(
         self,
         prompt: str,
+        api_base: Optional[str],
         vertex_project: Optional[str],
         vertex_location: Optional[str],
         vertex_credentials: Optional[VERTEX_CREDENTIALS_TYPES],
         model_response: ImageResponse,
         logging_obj: Any,
-        model: Optional[
-            str
-        ] = "imagegeneration",  # vertex ai uses imagegeneration as the default model
+        model: str = "imagegeneration",  # vertex ai uses imagegeneration as the default model
         client: Optional[Any] = None,
         optional_params: Optional[dict] = None,
         timeout: Optional[int] = None,
         aimg_generation=False,
+        extra_headers: Optional[dict] = None,
     ) -> ImageResponse:
         if aimg_generation is True:
             return self.aimage_generation(  # type: ignore
                 prompt=prompt,
+                api_base=api_base,
                 vertex_project=vertex_project,
                 vertex_location=vertex_location,
                 vertex_credentials=vertex_credentials,
@@ -83,13 +84,27 @@ class VertexImageGeneration(VertexLLM):
         else:
             sync_handler = client  # type: ignore
 
-        url = f"https://{vertex_location}-aiplatform.googleapis.com/v1/projects/{vertex_project}/locations/{vertex_location}/publishers/google/models/{model}:predict"
+        # url = f"https://{vertex_location}-aiplatform.googleapis.com/v1/projects/{vertex_project}/locations/{vertex_location}/publishers/google/models/{model}:predict"
 
+        auth_header: Optional[str] = None
         auth_header, _ = self._ensure_access_token(
             credentials=vertex_credentials,
             project_id=vertex_project,
             custom_llm_provider="vertex_ai",
         )
+        auth_header, api_base = self._get_token_and_url(
+            model=model,
+            gemini_api_key=None,
+            auth_header=auth_header,
+            vertex_project=vertex_project,
+            vertex_location=vertex_location,
+            vertex_credentials=vertex_credentials,
+            stream=False,
+            custom_llm_provider="vertex_ai",
+            api_base=api_base,
+            should_use_v1beta1_features=False,
+            mode="image_generation",
+        )
         optional_params = optional_params or {
             "sampleCount": 1
         }  # default optional params
@@ -99,31 +114,21 @@ class VertexImageGeneration(VertexLLM):
             "parameters": optional_params,
         }
 
-        request_str = f"\n curl -X POST \\\n -H \"Authorization: Bearer {auth_header[:10] + 'XXXXXXXXXX'}\" \\\n -H \"Content-Type: application/json; charset=utf-8\" \\\n -d {request_data} \\\n \"{url}\""
-        logging_obj.pre_call(
-            input=prompt,
-            api_key=None,
-            additional_args={
-                "complete_input_dict": optional_params,
-                "request_str": request_str,
-            },
-        )
+        headers = self.set_headers(auth_header=auth_header, extra_headers=extra_headers)
 
         logging_obj.pre_call(
             input=prompt,
-            api_key=None,
+            api_key="",
             additional_args={
                 "complete_input_dict": optional_params,
-                "request_str": request_str,
+                "api_base": api_base,
+                "headers": headers,
             },
         )
 
         response = sync_handler.post(
-            url=url,
-            headers={
-                "Content-Type": "application/json; charset=utf-8",
-                "Authorization": f"Bearer {auth_header}",
-            },
+            url=api_base,
+            headers=headers,
             data=json.dumps(request_data),
         )
 
@@ -138,17 +143,17 @@ class VertexImageGeneration(VertexLLM):
     async def aimage_generation(
         self,
         prompt: str,
+        api_base: Optional[str],
         vertex_project: Optional[str],
         vertex_location: Optional[str],
         vertex_credentials: Optional[VERTEX_CREDENTIALS_TYPES],
         model_response: litellm.ImageResponse,
         logging_obj: Any,
-        model: Optional[
-            str
-        ] = "imagegeneration",  # vertex ai uses imagegeneration as the default model
+        model: str = "imagegeneration",  # vertex ai uses imagegeneration as the default model
         client: Optional[AsyncHTTPHandler] = None,
         optional_params: Optional[dict] = None,
         timeout: Optional[int] = None,
+        extra_headers: Optional[dict] = None,
     ):
         response = None
         if client is None:
@@ -169,7 +174,6 @@ class VertexImageGeneration(VertexLLM):
 
         # make POST request to
         # https://us-central1-aiplatform.googleapis.com/v1/projects/PROJECT_ID/locations/us-central1/publishers/google/models/imagegeneration:predict
-        url = f"https://{vertex_location}-aiplatform.googleapis.com/v1/projects/{vertex_project}/locations/{vertex_location}/publishers/google/models/{model}:predict"
 
         """
         Docs link: https://console.cloud.google.com/vertex-ai/publishers/google/model-garden/imagegeneration?project=adroit-crow-413218
@@ -188,11 +192,25 @@ class VertexImageGeneration(VertexLLM):
         } \
         "https://us-central1-aiplatform.googleapis.com/v1/projects/PROJECT_ID/locations/us-central1/publishers/google/models/imagegeneration:predict"
         """
+        auth_header: Optional[str] = None
         auth_header, _ = self._ensure_access_token(
             credentials=vertex_credentials,
             project_id=vertex_project,
             custom_llm_provider="vertex_ai",
         )
+        auth_header, api_base = self._get_token_and_url(
+            model=model,
+            gemini_api_key=None,
+            auth_header=auth_header,
+            vertex_project=vertex_project,
+            vertex_location=vertex_location,
+            vertex_credentials=vertex_credentials,
+            stream=False,
+            custom_llm_provider="vertex_ai",
+            api_base=api_base,
+            should_use_v1beta1_features=False,
+            mode="image_generation",
+        )
         optional_params = optional_params or {
             "sampleCount": 1
         }  # default optional params
@@ -202,22 +220,21 @@ class VertexImageGeneration(VertexLLM):
             "parameters": optional_params,
         }
 
-        request_str = f"\n curl -X POST \\\n -H \"Authorization: Bearer {auth_header[:10] + 'XXXXXXXXXX'}\" \\\n -H \"Content-Type: application/json; charset=utf-8\" \\\n -d {request_data} \\\n \"{url}\""
+        headers = self.set_headers(auth_header=auth_header, extra_headers=extra_headers)
+
         logging_obj.pre_call(
             input=prompt,
-            api_key=None,
+            api_key="",
             additional_args={
                 "complete_input_dict": optional_params,
-                "request_str": request_str,
+                "api_base": api_base,
+                "headers": headers,
             },
         )
 
         response = await self.async_handler.post(
-            url=url,
-            headers={
-                "Content-Type": "application/json; charset=utf-8",
-                "Authorization": f"Bearer {auth_header}",
-            },
+            url=api_base,
+            headers=headers,
             data=json.dumps(request_data),
         )
 
diff --git a/litellm/llms/vertex_ai/vertex_embeddings/embedding_handler.py b/litellm/llms/vertex_ai/vertex_embeddings/embedding_handler.py
index 3ef40703e8..2e8051d4d2 100644
--- a/litellm/llms/vertex_ai/vertex_embeddings/embedding_handler.py
+++ b/litellm/llms/vertex_ai/vertex_embeddings/embedding_handler.py
@@ -111,7 +111,7 @@ class VertexEmbedding(VertexBase):
         )
 
         try:
-            response = client.post(api_base, headers=headers, json=vertex_request)  # type: ignore
+            response = client.post(url=api_base, headers=headers, json=vertex_request)  # type: ignore
             response.raise_for_status()
         except httpx.HTTPStatusError as err:
             error_code = err.response.status_code
diff --git a/litellm/main.py b/litellm/main.py
index 3d4152d634..94e19aab0c 100644
--- a/litellm/main.py
+++ b/litellm/main.py
@@ -2350,6 +2350,8 @@ def completion(  # type: ignore # noqa: PLR0915
                 or litellm.api_key
             )
 
+            api_base = api_base or litellm.api_base or get_secret("GEMINI_API_BASE")
+
             new_params = deepcopy(optional_params)
             response = vertex_chat_completion.completion(  # type: ignore
                 model=model,
@@ -2392,6 +2394,8 @@ def completion(  # type: ignore # noqa: PLR0915
                 or get_secret("VERTEXAI_CREDENTIALS")
             )
 
+            api_base = api_base or litellm.api_base or get_secret("VERTEXAI_API_BASE")
+
             new_params = deepcopy(optional_params)
             if (
                 model.startswith("meta/")
@@ -3657,6 +3661,8 @@ def embedding(  # noqa: PLR0915
                 api_key or get_secret_str("GEMINI_API_KEY") or litellm.api_key
             )
 
+            api_base = api_base or litellm.api_base or get_secret_str("GEMINI_API_BASE")
+
             response = google_batch_embeddings.batch_embeddings(  # type: ignore
                 model=model,
                 input=input,
@@ -3671,6 +3677,8 @@ def embedding(  # noqa: PLR0915
                 print_verbose=print_verbose,
                 custom_llm_provider="gemini",
                 api_key=gemini_api_key,
+                api_base=api_base,
+                client=client,
             )
 
         elif custom_llm_provider == "vertex_ai":
@@ -3695,6 +3703,13 @@ def embedding(  # noqa: PLR0915
                 or get_secret_str("VERTEX_CREDENTIALS")
             )
 
+            api_base = (
+                api_base
+                or litellm.api_base
+                or get_secret_str("VERTEXAI_API_BASE")
+                or get_secret_str("VERTEX_API_BASE")
+            )
+
             if (
                 "image" in optional_params
                 or "video" in optional_params
@@ -3716,6 +3731,7 @@ def embedding(  # noqa: PLR0915
                     print_verbose=print_verbose,
                     custom_llm_provider="vertex_ai",
                     client=client,
+                    api_base=api_base,
                 )
             else:
                 response = vertex_embedding.embedding(
@@ -3733,6 +3749,8 @@ def embedding(  # noqa: PLR0915
                     aembedding=aembedding,
                     print_verbose=print_verbose,
                     api_key=api_key,
+                    api_base=api_base,
+                    client=client,
                 )
         elif custom_llm_provider == "oobabooga":
             response = oobabooga.embedding(
@@ -4695,6 +4713,14 @@ def image_generation(  # noqa: PLR0915
                 or optional_params.pop("vertex_ai_credentials", None)
                 or get_secret_str("VERTEXAI_CREDENTIALS")
             )
+
+            api_base = (
+                api_base
+                or litellm.api_base
+                or get_secret_str("VERTEXAI_API_BASE")
+                or get_secret_str("VERTEX_API_BASE")
+            )
+
             model_response = vertex_image_generation.image_generation(
                 model=model,
                 prompt=prompt,
@@ -4706,6 +4732,8 @@ def image_generation(  # noqa: PLR0915
                 vertex_location=vertex_ai_location,
                 vertex_credentials=vertex_credentials,
                 aimg_generation=aimg_generation,
+                api_base=api_base,
+                client=client,
             )
         elif (
             custom_llm_provider in litellm._custom_providers
diff --git a/litellm/proxy/_new_secret_config.yaml b/litellm/proxy/_new_secret_config.yaml
index cf09749d81..09db9f10ee 100644
--- a/litellm/proxy/_new_secret_config.yaml
+++ b/litellm/proxy/_new_secret_config.yaml
@@ -1,5 +1,5 @@
 model_list:
- - model_name: "gpt-3.5-turbo"
+ - model_name: "gpt-4o"
    litellm_params:
     model: azure/chatgpt-v-2
     api_key: os.environ/AZURE_API_KEY
diff --git a/tests/litellm/test_cost_calculator.py b/tests/litellm/test_cost_calculator.py
index 9c9f6d9043..c0073e2c56 100644
--- a/tests/litellm/test_cost_calculator.py
+++ b/tests/litellm/test_cost_calculator.py
@@ -15,9 +15,11 @@ from pydantic import BaseModel
 from litellm.cost_calculator import response_cost_calculator
 
 
-def test_cost_calculator():
+def test_cost_calculator_with_response_cost_in_additional_headers():
     class MockResponse(BaseModel):
-        _hidden_params = {"additional_headers": {"x-litellm-response-cost": 1000}}
+        _hidden_params = {
+            "additional_headers": {"llm_provider-x-litellm-response-cost": 1000}
+        }
 
     result = response_cost_calculator(
         response_object=MockResponse(),
diff --git a/tests/llm_translation/test_litellm_proxy_provider.py b/tests/llm_translation/test_litellm_proxy_provider.py
index 8484a66dad..c38e386063 100644
--- a/tests/llm_translation/test_litellm_proxy_provider.py
+++ b/tests/llm_translation/test_litellm_proxy_provider.py
@@ -31,7 +31,7 @@ async def test_litellm_gateway_from_sdk():
     openai_client = OpenAI(api_key="fake-key")
 
     with patch.object(
-        openai_client.chat.completions, "create", new=MagicMock()
+        openai_client.chat.completions.with_raw_response, "create", new=MagicMock()
     ) as mock_call:
         try:
             completion(
@@ -374,3 +374,78 @@ async def test_litellm_gateway_from_sdk_rerank(is_async):
         assert request_body["query"] == "What is machine learning?"
         assert request_body["model"] == "rerank-english-v2.0"
         assert len(request_body["documents"]) == 2
+
+
+def test_litellm_gateway_from_sdk_with_response_cost_in_additional_headers():
+    litellm.set_verbose = True
+    litellm._turn_on_debug()
+
+    from openai import OpenAI
+
+    openai_client = OpenAI(api_key="fake-key")
+
+    # Create mock response object
+    mock_response = MagicMock()
+    mock_response.headers = {"x-litellm-response-cost": "120"}
+    mock_response.parse.return_value = litellm.ModelResponse(
+        **{
+            "id": "chatcmpl-BEkxQvRGp9VAushfAsOZCbhMFLsoy",
+            "choices": [
+                {
+                    "finish_reason": "stop",
+                    "index": 0,
+                    "logprobs": None,
+                    "message": {
+                        "content": "Hello! How can I assist you today?",
+                        "refusal": None,
+                        "role": "assistant",
+                        "annotations": [],
+                        "audio": None,
+                        "function_call": None,
+                        "tool_calls": None,
+                    },
+                }
+            ],
+            "created": 1742856796,
+            "model": "gpt-4o-2024-08-06",
+            "object": "chat.completion",
+            "service_tier": "default",
+            "system_fingerprint": "fp_6ec83003ad",
+            "usage": {
+                "completion_tokens": 10,
+                "prompt_tokens": 9,
+                "total_tokens": 19,
+                "completion_tokens_details": {
+                    "accepted_prediction_tokens": 0,
+                    "audio_tokens": 0,
+                    "reasoning_tokens": 0,
+                    "rejected_prediction_tokens": 0,
+                },
+                "prompt_tokens_details": {"audio_tokens": 0, "cached_tokens": 0},
+            },
+        }
+    )
+
+    with patch.object(
+        openai_client.chat.completions.with_raw_response,
+        "create",
+        return_value=mock_response,
+    ) as mock_call:
+        response = litellm.completion(
+            model="litellm_proxy/gpt-4o",
+            messages=[{"role": "user", "content": "Hello world"}],
+            api_base="http://0.0.0.0:4000",
+            api_key="sk-PIp1h0RekR",
+            client=openai_client,
+        )
+
+        # Assert the headers were properly passed through
+        print(f"additional_headers: {response._hidden_params['additional_headers']}")
+        assert (
+            response._hidden_params["additional_headers"][
+                "llm_provider-x-litellm-response-cost"
+            ]
+            == "120"
+        )
+
+        assert response._hidden_params["response_cost"] == 120
diff --git a/tests/load_tests/test_vertex_embeddings_load_test.py b/tests/load_tests/test_vertex_embeddings_load_test.py
index eb440c9437..24543e29d0 100644
--- a/tests/load_tests/test_vertex_embeddings_load_test.py
+++ b/tests/load_tests/test_vertex_embeddings_load_test.py
@@ -109,12 +109,13 @@ def analyze_results(vertex_times):
 
 
 @pytest.mark.asyncio
-async def test_embedding_performance():
+async def test_embedding_performance(monkeypatch):
     """
     Run load test on vertex AI embeddings to ensure vertex median response time is less than 300ms
 
     20 RPS for 20 seconds
     """
+    monkeypatch.setattr(litellm, "api_base", None)
     duration_seconds = 20
     requests_per_second = 20
     vertex_times = await run_load_test(duration_seconds, requests_per_second)
diff --git a/tests/local_testing/test_amazing_vertex_completion.py b/tests/local_testing/test_amazing_vertex_completion.py
index 25993d6d5b..5e3ebf6a66 100644
--- a/tests/local_testing/test_amazing_vertex_completion.py
+++ b/tests/local_testing/test_amazing_vertex_completion.py
@@ -31,6 +31,7 @@ from litellm import (
     completion,
     completion_cost,
     embedding,
+    image_generation,
 )
 from litellm.llms.vertex_ai.gemini.transformation import (
     _gemini_convert_messages_with_history,
@@ -3327,3 +3328,46 @@ def test_signed_s3_url_with_format():
         json_str = json.dumps(mock_client.call_args.kwargs["json"])
         assert "image/jpeg" in json_str
         assert "image/png" not in json_str
+
+
+@pytest.mark.parametrize("provider", ["vertex_ai", "gemini"])
+@pytest.mark.parametrize("route", ["completion", "embedding", "image_generation"])
+def test_litellm_api_base(monkeypatch, provider, route):
+    from litellm.llms.custom_httpx.http_handler import HTTPHandler
+
+    client = HTTPHandler()
+
+    import litellm
+
+    monkeypatch.setattr(litellm, "api_base", "https://litellm.com")
+
+    load_vertex_ai_credentials()
+
+    if route == "image_generation" and provider == "gemini":
+        pytest.skip("Gemini does not support image generation")
+
+    with patch.object(client, "post", new=MagicMock()) as mock_client:
+        try:
+            if route == "completion":
+                response = completion(
+                    model=f"{provider}/gemini-2.0-flash-001",
+                    messages=[{"role": "user", "content": "Hello, world!"}],
+                    client=client,
+                )
+            elif route == "embedding":
+                response = embedding(
+                    model=f"{provider}/gemini-2.0-flash-001",
+                    input=["Hello, world!"],
+                    client=client,
+                )
+            elif route == "image_generation":
+                response = image_generation(
+                    model=f"{provider}/gemini-2.0-flash-001",
+                    prompt="Hello, world!",
+                    client=client,
+                )
+        except Exception as e:
+            print(e)
+
+        mock_client.assert_called()
+        assert mock_client.call_args.kwargs["url"].startswith("https://litellm.com")

From 519d89593ea2311bee23512304ee52e69556604b Mon Sep 17 00:00:00 2001
From: Ishaan Jaff <ishaanjaffer0324@gmail.com>
Date: Wed, 26 Mar 2025 08:12:51 -0700
Subject: [PATCH 02/11] run ci/cd again

---
 tests/local_testing/test_completion.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/local_testing/test_completion.py b/tests/local_testing/test_completion.py
index 59f5a38f08..dd8b26141c 100644
--- a/tests/local_testing/test_completion.py
+++ b/tests/local_testing/test_completion.py
@@ -11,7 +11,7 @@ import os
 
 sys.path.insert(
     0, os.path.abspath("../..")
-)  # Adds the parent directory to the system-path
+)  # Adds the parent directory to the system path
 
 
 import os

From ad5d26f664593f0c1c53ae62f895833a75b84e39 Mon Sep 17 00:00:00 2001
From: Ishaan Jaff <ishaanjaffer0324@gmail.com>
Date: Wed, 26 Mar 2025 08:36:11 -0700
Subject: [PATCH 03/11] fix user_api_key_auth example config

---
 litellm/proxy/example_config_yaml/custom_auth.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/litellm/proxy/example_config_yaml/custom_auth.py b/litellm/proxy/example_config_yaml/custom_auth.py
index 7d797623c5..e59db22719 100644
--- a/litellm/proxy/example_config_yaml/custom_auth.py
+++ b/litellm/proxy/example_config_yaml/custom_auth.py
@@ -7,7 +7,7 @@ from litellm.proxy._types import GenerateKeyRequest, UserAPIKeyAuth
 
 async def user_api_key_auth(request: Request, api_key: str) -> UserAPIKeyAuth:
     try:
-        modified_master_key = f"{os.getenv('PROXY_MASTER_KEY')}-1234"
+        modified_master_key = f"{os.getenv('LITELLM_MASTER_KEY')}-1234"
         if api_key == modified_master_key:
             return UserAPIKeyAuth(api_key=api_key)
         raise Exception

From 5618a2e7c282ca3c152b5860c1559900e32d3245 Mon Sep 17 00:00:00 2001
From: Ishaan Jaff <ishaanjaffer0324@gmail.com>
Date: Wed, 26 Mar 2025 08:42:55 -0700
Subject: [PATCH 04/11] fix caching unit tests

---
 .circleci/config.yml | 1 -
 1 file changed, 1 deletion(-)

diff --git a/.circleci/config.yml b/.circleci/config.yml
index 304f96bdbb..6a87d8b1b2 100644
--- a/.circleci/config.yml
+++ b/.circleci/config.yml
@@ -242,7 +242,6 @@ jobs:
 
     steps:
       - checkout
-      - setup_google_dns
       - run:
           name: Show git commit hash
           command: |

From 571c4d97ad6f9d5eb1360b6a8b0d7fda6a34299c Mon Sep 17 00:00:00 2001
From: Ishaan Jaff <ishaanjaffer0324@gmail.com>
Date: Wed, 26 Mar 2025 08:46:23 -0700
Subject: [PATCH 05/11] fix load_testing

---
 .circleci/config.yml | 1 -
 1 file changed, 1 deletion(-)

diff --git a/.circleci/config.yml b/.circleci/config.yml
index 6a87d8b1b2..38614e657e 100644
--- a/.circleci/config.yml
+++ b/.circleci/config.yml
@@ -629,7 +629,6 @@ jobs:
 
     steps:
       - checkout
-      - setup_google_dns
       - run:
           name: Install Dependencies
           command: |

From 4e9531042307f8e65125a4d064ff3b604564acf3 Mon Sep 17 00:00:00 2001
From: Ishaan Jaff <ishaanjaffer0324@gmail.com>
Date: Wed, 26 Mar 2025 08:55:18 -0700
Subject: [PATCH 06/11] fix DNS resolution

---
 .circleci/config.yml | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/.circleci/config.yml b/.circleci/config.yml
index 38614e657e..67af43f8f8 100644
--- a/.circleci/config.yml
+++ b/.circleci/config.yml
@@ -9,7 +9,11 @@ commands:
       - run:
           name: "Configure Google DNS"
           command: |
-            echo "nameserver 8.8.8.8" | sudo tee /etc/resolv.conf
+            # Backup original resolv.conf
+            sudo cp /etc/resolv.conf /etc/resolv.conf.backup
+            # Add both local and Google DNS servers
+            echo "nameserver 127.0.0.11" | sudo tee /etc/resolv.conf
+            echo "nameserver 8.8.8.8" | sudo tee -a /etc/resolv.conf
             echo "nameserver 8.8.4.4" | sudo tee -a /etc/resolv.conf
 
 jobs:
@@ -242,6 +246,7 @@ jobs:
 
     steps:
       - checkout
+      - setup_google_dns
       - run:
           name: Show git commit hash
           command: |

From 0d4d8d95f4fb1754ada34743d7666eef830d2909 Mon Sep 17 00:00:00 2001
From: Krrish Dholakia <krrishdholakia@gmail.com>
Date: Wed, 26 Mar 2025 08:58:35 -0700
Subject: [PATCH 07/11] build(model_prices_and_context_window.json): add
 commercial rate limits for gemini 2.0 flash lite

---
 litellm/model_prices_and_context_window_backup.json | 2 ++
 model_prices_and_context_window.json                | 2 ++
 2 files changed, 4 insertions(+)

diff --git a/litellm/model_prices_and_context_window_backup.json b/litellm/model_prices_and_context_window_backup.json
index f9eb63ccca..06f7238c01 100644
--- a/litellm/model_prices_and_context_window_backup.json
+++ b/litellm/model_prices_and_context_window_backup.json
@@ -4694,6 +4694,8 @@
         "output_cost_per_token": 0.0000003,
         "litellm_provider": "gemini",
         "mode": "chat",
+        "tpm": 4000000,
+        "rpm": 4000,
         "supports_system_messages": true,
         "supports_function_calling": true,
         "supports_vision": true,
diff --git a/model_prices_and_context_window.json b/model_prices_and_context_window.json
index f9eb63ccca..06f7238c01 100644
--- a/model_prices_and_context_window.json
+++ b/model_prices_and_context_window.json
@@ -4694,6 +4694,8 @@
         "output_cost_per_token": 0.0000003,
         "litellm_provider": "gemini",
         "mode": "chat",
+        "tpm": 4000000,
+        "rpm": 4000,
         "supports_system_messages": true,
         "supports_function_calling": true,
         "supports_vision": true,

From a7f4941cd703119abfc8c0e55b05573fdc858bd8 Mon Sep 17 00:00:00 2001
From: Ishaan Jaff <ishaanjaffer0324@gmail.com>
Date: Wed, 26 Mar 2025 09:25:08 -0700
Subject: [PATCH 08/11] DNS lookup for Redis host

---
 .circleci/config.yml | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/.circleci/config.yml b/.circleci/config.yml
index 67af43f8f8..b2c83e3845 100644
--- a/.circleci/config.yml
+++ b/.circleci/config.yml
@@ -4,7 +4,7 @@ orbs:
   node: circleci/node@5.1.0  # Add this line to declare the node orb
 
 commands:
-  setup_google_dns:
+  c:
     steps:
       - run:
           name: "Configure Google DNS"
@@ -247,6 +247,9 @@ jobs:
     steps:
       - checkout
       - setup_google_dns
+      - run:
+          name: DNS lookup for Redis host
+          command: dig redis-19899.c239.us-east-1-2.ec2.redns.redis-cloud.com +short
       - run:
           name: Show git commit hash
           command: |

From 228253f904a7399aabf5c2b30a4060fbc5c0ad56 Mon Sep 17 00:00:00 2001
From: Ishaan Jaff <ishaanjaffer0324@gmail.com>
Date: Wed, 26 Mar 2025 09:26:33 -0700
Subject: [PATCH 09/11] setup_google_dns

---
 .circleci/config.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.circleci/config.yml b/.circleci/config.yml
index b2c83e3845..4faafdbd3d 100644
--- a/.circleci/config.yml
+++ b/.circleci/config.yml
@@ -4,7 +4,7 @@ orbs:
   node: circleci/node@5.1.0  # Add this line to declare the node orb
 
 commands:
-  c:
+  setup_google_dns:
     steps:
       - run:
           name: "Configure Google DNS"

From e375d76cb95265ede2da6086ac7423145988b8c9 Mon Sep 17 00:00:00 2001
From: Ishaan Jaff <ishaanjaffer0324@gmail.com>
Date: Wed, 26 Mar 2025 09:28:59 -0700
Subject: [PATCH 10/11] DNS lookup for Redis host

---
 .circleci/config.yml | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/.circleci/config.yml b/.circleci/config.yml
index 4faafdbd3d..3b4005baab 100644
--- a/.circleci/config.yml
+++ b/.circleci/config.yml
@@ -249,7 +249,10 @@ jobs:
       - setup_google_dns
       - run:
           name: DNS lookup for Redis host
-          command: dig redis-19899.c239.us-east-1-2.ec2.redns.redis-cloud.com +short
+          command: |
+            sudo apt-get update
+            sudo apt-get install -y dnsutils
+            dig redis-19899.c239.us-east-1-2.ec2.redns.redis-cloud.com +short
       - run:
           name: Show git commit hash
           command: |

From 868ab05c0a7dde03ca91fca35fba91d67ada3bb7 Mon Sep 17 00:00:00 2001
From: Ishaan Jaff <ishaanjaffer0324@gmail.com>
Date: Wed, 26 Mar 2025 10:25:50 -0700
Subject: [PATCH 11/11] fix vertex embedding perf test

---
 tests/load_tests/test_vertex_embeddings_load_test.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/load_tests/test_vertex_embeddings_load_test.py b/tests/load_tests/test_vertex_embeddings_load_test.py
index 24543e29d0..7c593fb549 100644
--- a/tests/load_tests/test_vertex_embeddings_load_test.py
+++ b/tests/load_tests/test_vertex_embeddings_load_test.py
@@ -59,7 +59,7 @@ def load_vertex_ai_credentials():
 
 async def create_async_vertex_embedding_task():
     load_vertex_ai_credentials()
-    base_url = "https://exampleopenaiendpoint-production.up.railway.app/v1/projects/pathrise-convert-1606954137718/locations/us-central1/publishers/google/models/embedding-gecko-001:predict"
+    base_url = "https://exampleopenaiendpoint-production.up.railway.app/v1/projects/pathrise-convert-1606954137718/locations/us-central1/publishers/google/models/textembedding-gecko@001"
     embedding_args = {
         "model": "vertex_ai/textembedding-gecko",
         "input": "This is a test sentence for embedding.",