From 90dd60fa711a8fb6b7e531d7555bfb51ff7ceff7 Mon Sep 17 00:00:00 2001
From: Joe Cheng <joe@posit.co>
Date: Fri, 2 Aug 2024 12:41:13 -0700
Subject: [PATCH 01/35] fix(main.py): Handle bedrock tool calling in
 stream_chunk_builder

Fixes #5022.

The streaming chunks from Anthropic seem to violate an assumption
that is implicit in the stream_chunk_builder implementation: that
only tool_calls OR function_calls OR content will appear in a
streamed response. The repro in #5022 shows that you can get
content followed by tool calls.

These changes properly handle these combinations by building
separate lists of each type of chunk (note that in theory a chunk
could appear in multiple lists, e.g. both delta.tool_calls and
delta.content being present on one chunk).
---
 litellm/main.py | 53 ++++++++++++++++++++++++++++++++-----------------
 1 file changed, 35 insertions(+), 18 deletions(-)

diff --git a/litellm/main.py b/litellm/main.py
index 67b935a55c..989e0b1106 100644
--- a/litellm/main.py
+++ b/litellm/main.py
@@ -5078,12 +5078,16 @@ def stream_chunk_builder(
     combined_content = ""
     combined_arguments = ""
 
-    if (
-        "tool_calls" in chunks[0]["choices"][0]["delta"]
-        and chunks[0]["choices"][0]["delta"]["tool_calls"] is not None
-    ):
+    tool_call_chunks = [
+        chunk
+        for chunk in chunks
+        if "tool_calls" in chunk["choices"][0]["delta"]
+        and chunk["choices"][0]["delta"]["tool_calls"] is not None
+    ]
+
+    if len(tool_call_chunks) > 0:
         argument_list = []
-        delta = chunks[0]["choices"][0]["delta"]
+        delta = tool_call_chunks[0]["choices"][0]["delta"]
         message = response["choices"][0]["message"]
         message["tool_calls"] = []
         id = None
@@ -5094,7 +5098,7 @@ def stream_chunk_builder(
         prev_id = None
         curr_id = None
         curr_index = 0
-        for chunk in chunks:
+        for chunk in tool_call_chunks:
             choices = chunk["choices"]
             for choice in choices:
                 delta = choice.get("delta", {})
@@ -5140,12 +5144,17 @@ def stream_chunk_builder(
         )
         response["choices"][0]["message"]["content"] = None
         response["choices"][0]["message"]["tool_calls"] = tool_calls_list
-    elif (
-        "function_call" in chunks[0]["choices"][0]["delta"]
-        and chunks[0]["choices"][0]["delta"]["function_call"] is not None
-    ):
+
+    function_call_chunks = [
+        chunk
+        for chunk in chunks
+        if "function_calls" in chunk["choices"][0]["delta"]
+        and chunk["choices"][0]["delta"]["function_calls"] is not None
+    ]
+
+    if len(function_call_chunks) > 0:
         argument_list = []
-        delta = chunks[0]["choices"][0]["delta"]
+        delta = function_call_chunks[0]["choices"][0]["delta"]
         function_call = delta.get("function_call", "")
         function_call_name = function_call.name
 
@@ -5153,7 +5162,7 @@ def stream_chunk_builder(
         message["function_call"] = {}
         message["function_call"]["name"] = function_call_name
 
-        for chunk in chunks:
+        for chunk in function_call_chunks:
             choices = chunk["choices"]
             for choice in choices:
                 delta = choice.get("delta", {})
@@ -5170,7 +5179,15 @@ def stream_chunk_builder(
         response["choices"][0]["message"]["function_call"][
             "arguments"
         ] = combined_arguments
-    else:
+    
+    content_chunks = [
+        chunk
+        for chunk in chunks
+        if "content" in chunk["choices"][0]["delta"]
+        and chunk["choices"][0]["delta"]["content"] is not None
+    ]
+
+    if len(content_chunks) > 0:
         for chunk in chunks:
             choices = chunk["choices"]
             for choice in choices:
@@ -5186,12 +5203,12 @@ def stream_chunk_builder(
         # Update the "content" field within the response dictionary
         response["choices"][0]["message"]["content"] = combined_content
 
+    completion_output = ""
     if len(combined_content) > 0:
-        completion_output = combined_content
-    elif len(combined_arguments) > 0:
-        completion_output = combined_arguments
-    else:
-        completion_output = ""
+        completion_output += combined_content
+    if len(combined_arguments) > 0:
+        completion_output += combined_arguments
+
     # # Update usage information if needed
     prompt_tokens = 0
     completion_tokens = 0

From 33f4411f17b73e62332ff658ebadcb9579b99783 Mon Sep 17 00:00:00 2001
From: Joe Cheng <joe@posit.co>
Date: Fri, 2 Aug 2024 13:05:23 -0700
Subject: [PATCH 02/35] Fix tool call coalescing

The previous code seemed to assume that the tool call index property
started at 0, but Anthropic sometimes returns them starting at 1.
This was causing an extra null-ish tool call to be materialized.
---
 litellm/main.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/litellm/main.py b/litellm/main.py
index 989e0b1106..36267aec1d 100644
--- a/litellm/main.py
+++ b/litellm/main.py
@@ -5094,7 +5094,7 @@ def stream_chunk_builder(
         name = None
         type = None
         tool_calls_list = []
-        prev_index = 0
+        prev_index = None
         prev_id = None
         curr_id = None
         curr_index = 0
@@ -5120,6 +5120,8 @@ def stream_chunk_builder(
                         name = tool_calls[0].function.name
                     if tool_calls[0].type:
                         type = tool_calls[0].type
+            if prev_index is None:
+                prev_index = curr_index
             if curr_index != prev_index:  # new tool call
                 combined_arguments = "".join(argument_list)
                 tool_calls_list.append(
@@ -5138,6 +5140,7 @@ def stream_chunk_builder(
         tool_calls_list.append(
             {
                 "id": id,
+                "index": curr_index,
                 "function": {"arguments": combined_arguments, "name": name},
                 "type": type,
             }

From 1fbfc09b443337b531be863b8a631b8c3bd8e136 Mon Sep 17 00:00:00 2001
From: Joe Cheng <joe@posit.co>
Date: Fri, 2 Aug 2024 20:51:03 -0700
Subject: [PATCH 03/35] Add unit test

---
 litellm/tests/stream_chunk_testdata.py     | 543 +++++++++++++++++++++
 litellm/tests/test_stream_chunk_builder.py |  23 +
 2 files changed, 566 insertions(+)
 create mode 100644 litellm/tests/stream_chunk_testdata.py

diff --git a/litellm/tests/stream_chunk_testdata.py b/litellm/tests/stream_chunk_testdata.py
new file mode 100644
index 0000000000..6be9d1ebdf
--- /dev/null
+++ b/litellm/tests/stream_chunk_testdata.py
@@ -0,0 +1,543 @@
+from litellm.types.utils import (
+    ChatCompletionDeltaToolCall,
+    Delta,
+    Function,
+    ModelResponse,
+    StreamingChoices,
+)
+
+chunks = [
+    ModelResponse(
+        id="chatcmpl-634a6ad3-483a-44a1-8cdd-3befbeb4ac2f",
+        choices=[
+            StreamingChoices(
+                finish_reason=None,
+                index=0,
+                delta=Delta(
+                    content="To answer",
+                    role="assistant",
+                    function_call=None,
+                    tool_calls=None,
+                ),
+                logprobs=None,
+            )
+        ],
+        created=1722656356,
+        model="claude-3-5-sonnet-20240620",
+        object="chat.completion.chunk",
+        system_fingerprint=None,
+    ),
+    ModelResponse(
+        id="chatcmpl-634a6ad3-483a-44a1-8cdd-3befbeb4ac2f",
+        choices=[
+            StreamingChoices(
+                finish_reason=None,
+                index=0,
+                delta=Delta(
+                    content=" your", role=None, function_call=None, tool_calls=None
+                ),
+                logprobs=None,
+            )
+        ],
+        created=1722656356,
+        model="claude-3-5-sonnet-20240620",
+        object="chat.completion.chunk",
+        system_fingerprint=None,
+    ),
+    ModelResponse(
+        id="chatcmpl-634a6ad3-483a-44a1-8cdd-3befbeb4ac2f",
+        choices=[
+            StreamingChoices(
+                finish_reason=None,
+                index=0,
+                delta=Delta(
+                    content=" question about",
+                    role=None,
+                    function_call=None,
+                    tool_calls=None,
+                ),
+                logprobs=None,
+            )
+        ],
+        created=1722656356,
+        model="claude-3-5-sonnet-20240620",
+        object="chat.completion.chunk",
+        system_fingerprint=None,
+    ),
+    ModelResponse(
+        id="chatcmpl-634a6ad3-483a-44a1-8cdd-3befbeb4ac2f",
+        choices=[
+            StreamingChoices(
+                finish_reason=None,
+                index=0,
+                delta=Delta(
+                    content=" how", role=None, function_call=None, tool_calls=None
+                ),
+                logprobs=None,
+            )
+        ],
+        created=1722656356,
+        model="claude-3-5-sonnet-20240620",
+        object="chat.completion.chunk",
+        system_fingerprint=None,
+    ),
+    ModelResponse(
+        id="chatcmpl-634a6ad3-483a-44a1-8cdd-3befbeb4ac2f",
+        choices=[
+            StreamingChoices(
+                finish_reason=None,
+                index=0,
+                delta=Delta(
+                    content=" many rows are in the ",
+                    role=None,
+                    function_call=None,
+                    tool_calls=None,
+                ),
+                logprobs=None,
+            )
+        ],
+        created=1722656356,
+        model="claude-3-5-sonnet-20240620",
+        object="chat.completion.chunk",
+        system_fingerprint=None,
+    ),
+    ModelResponse(
+        id="chatcmpl-634a6ad3-483a-44a1-8cdd-3befbeb4ac2f",
+        choices=[
+            StreamingChoices(
+                finish_reason=None,
+                index=0,
+                delta=Delta(
+                    content="'users' table, I",
+                    role=None,
+                    function_call=None,
+                    tool_calls=None,
+                ),
+                logprobs=None,
+            )
+        ],
+        created=1722656356,
+        model="claude-3-5-sonnet-20240620",
+        object="chat.completion.chunk",
+        system_fingerprint=None,
+    ),
+    ModelResponse(
+        id="chatcmpl-634a6ad3-483a-44a1-8cdd-3befbeb4ac2f",
+        choices=[
+            StreamingChoices(
+                finish_reason=None,
+                index=0,
+                delta=Delta(
+                    content="'ll", role=None, function_call=None, tool_calls=None
+                ),
+                logprobs=None,
+            )
+        ],
+        created=1722656356,
+        model="claude-3-5-sonnet-20240620",
+        object="chat.completion.chunk",
+        system_fingerprint=None,
+    ),
+    ModelResponse(
+        id="chatcmpl-634a6ad3-483a-44a1-8cdd-3befbeb4ac2f",
+        choices=[
+            StreamingChoices(
+                finish_reason=None,
+                index=0,
+                delta=Delta(
+                    content=" need to", role=None, function_call=None, tool_calls=None
+                ),
+                logprobs=None,
+            )
+        ],
+        created=1722656356,
+        model="claude-3-5-sonnet-20240620",
+        object="chat.completion.chunk",
+        system_fingerprint=None,
+    ),
+    ModelResponse(
+        id="chatcmpl-634a6ad3-483a-44a1-8cdd-3befbeb4ac2f",
+        choices=[
+            StreamingChoices(
+                finish_reason=None,
+                index=0,
+                delta=Delta(
+                    content=" run", role=None, function_call=None, tool_calls=None
+                ),
+                logprobs=None,
+            )
+        ],
+        created=1722656356,
+        model="claude-3-5-sonnet-20240620",
+        object="chat.completion.chunk",
+        system_fingerprint=None,
+    ),
+    ModelResponse(
+        id="chatcmpl-634a6ad3-483a-44a1-8cdd-3befbeb4ac2f",
+        choices=[
+            StreamingChoices(
+                finish_reason=None,
+                index=0,
+                delta=Delta(
+                    content=" a SQL query.",
+                    role=None,
+                    function_call=None,
+                    tool_calls=None,
+                ),
+                logprobs=None,
+            )
+        ],
+        created=1722656356,
+        model="claude-3-5-sonnet-20240620",
+        object="chat.completion.chunk",
+        system_fingerprint=None,
+    ),
+    ModelResponse(
+        id="chatcmpl-634a6ad3-483a-44a1-8cdd-3befbeb4ac2f",
+        choices=[
+            StreamingChoices(
+                finish_reason=None,
+                index=0,
+                delta=Delta(
+                    content=" Let", role=None, function_call=None, tool_calls=None
+                ),
+                logprobs=None,
+            )
+        ],
+        created=1722656356,
+        model="claude-3-5-sonnet-20240620",
+        object="chat.completion.chunk",
+        system_fingerprint=None,
+    ),
+    ModelResponse(
+        id="chatcmpl-634a6ad3-483a-44a1-8cdd-3befbeb4ac2f",
+        choices=[
+            StreamingChoices(
+                finish_reason=None,
+                index=0,
+                delta=Delta(
+                    content=" me", role=None, function_call=None, tool_calls=None
+                ),
+                logprobs=None,
+            )
+        ],
+        created=1722656356,
+        model="claude-3-5-sonnet-20240620",
+        object="chat.completion.chunk",
+        system_fingerprint=None,
+    ),
+    ModelResponse(
+        id="chatcmpl-634a6ad3-483a-44a1-8cdd-3befbeb4ac2f",
+        choices=[
+            StreamingChoices(
+                finish_reason=None,
+                index=0,
+                delta=Delta(
+                    content=" ", role=None, function_call=None, tool_calls=None
+                ),
+                logprobs=None,
+            )
+        ],
+        created=1722656356,
+        model="claude-3-5-sonnet-20240620",
+        object="chat.completion.chunk",
+        system_fingerprint=None,
+    ),
+    ModelResponse(
+        id="chatcmpl-634a6ad3-483a-44a1-8cdd-3befbeb4ac2f",
+        choices=[
+            StreamingChoices(
+                finish_reason=None,
+                index=0,
+                delta=Delta(
+                    content="do that for",
+                    role=None,
+                    function_call=None,
+                    tool_calls=None,
+                ),
+                logprobs=None,
+            )
+        ],
+        created=1722656356,
+        model="claude-3-5-sonnet-20240620",
+        object="chat.completion.chunk",
+        system_fingerprint=None,
+    ),
+    ModelResponse(
+        id="chatcmpl-634a6ad3-483a-44a1-8cdd-3befbeb4ac2f",
+        choices=[
+            StreamingChoices(
+                finish_reason=None,
+                index=0,
+                delta=Delta(
+                    content=" you.", role=None, function_call=None, tool_calls=None
+                ),
+                logprobs=None,
+            )
+        ],
+        created=1722656356,
+        model="claude-3-5-sonnet-20240620",
+        object="chat.completion.chunk",
+        system_fingerprint=None,
+    ),
+    ModelResponse(
+        id="chatcmpl-634a6ad3-483a-44a1-8cdd-3befbeb4ac2f",
+        choices=[
+            StreamingChoices(
+                finish_reason=None,
+                index=0,
+                delta=Delta(
+                    content="",
+                    role=None,
+                    function_call=None,
+                    tool_calls=[
+                        ChatCompletionDeltaToolCall(
+                            id="toolu_01H3AjkLpRtGQrof13CBnWfK",
+                            function=Function(arguments="", name="sql_query"),
+                            type="function",
+                            index=1,
+                        )
+                    ],
+                ),
+                logprobs=None,
+            )
+        ],
+        created=1722656356,
+        model="claude-3-5-sonnet-20240620",
+        object="chat.completion.chunk",
+        system_fingerprint=None,
+    ),
+    ModelResponse(
+        id="chatcmpl-634a6ad3-483a-44a1-8cdd-3befbeb4ac2f",
+        choices=[
+            StreamingChoices(
+                finish_reason=None,
+                index=0,
+                delta=Delta(
+                    content="",
+                    role=None,
+                    function_call=None,
+                    tool_calls=[
+                        ChatCompletionDeltaToolCall(
+                            id=None,
+                            function=Function(arguments="", name=None),
+                            type="function",
+                            index=1,
+                        )
+                    ],
+                ),
+                logprobs=None,
+            )
+        ],
+        created=1722656356,
+        model="claude-3-5-sonnet-20240620",
+        object="chat.completion.chunk",
+        system_fingerprint=None,
+    ),
+    ModelResponse(
+        id="chatcmpl-634a6ad3-483a-44a1-8cdd-3befbeb4ac2f",
+        choices=[
+            StreamingChoices(
+                finish_reason=None,
+                index=0,
+                delta=Delta(
+                    content="",
+                    role=None,
+                    function_call=None,
+                    tool_calls=[
+                        ChatCompletionDeltaToolCall(
+                            id=None,
+                            function=Function(arguments='{"', name=None),
+                            type="function",
+                            index=1,
+                        )
+                    ],
+                ),
+                logprobs=None,
+            )
+        ],
+        created=1722656357,
+        model="claude-3-5-sonnet-20240620",
+        object="chat.completion.chunk",
+        system_fingerprint=None,
+    ),
+    ModelResponse(
+        id="chatcmpl-634a6ad3-483a-44a1-8cdd-3befbeb4ac2f",
+        choices=[
+            StreamingChoices(
+                finish_reason=None,
+                index=0,
+                delta=Delta(
+                    content="",
+                    role=None,
+                    function_call=None,
+                    tool_calls=[
+                        ChatCompletionDeltaToolCall(
+                            id=None,
+                            function=Function(arguments='query": ', name=None),
+                            type="function",
+                            index=1,
+                        )
+                    ],
+                ),
+                logprobs=None,
+            )
+        ],
+        created=1722656357,
+        model="claude-3-5-sonnet-20240620",
+        object="chat.completion.chunk",
+        system_fingerprint=None,
+    ),
+    ModelResponse(
+        id="chatcmpl-634a6ad3-483a-44a1-8cdd-3befbeb4ac2f",
+        choices=[
+            StreamingChoices(
+                finish_reason=None,
+                index=0,
+                delta=Delta(
+                    content="",
+                    role=None,
+                    function_call=None,
+                    tool_calls=[
+                        ChatCompletionDeltaToolCall(
+                            id=None,
+                            function=Function(arguments='"SELECT C', name=None),
+                            type="function",
+                            index=1,
+                        )
+                    ],
+                ),
+                logprobs=None,
+            )
+        ],
+        created=1722656357,
+        model="claude-3-5-sonnet-20240620",
+        object="chat.completion.chunk",
+        system_fingerprint=None,
+    ),
+    ModelResponse(
+        id="chatcmpl-634a6ad3-483a-44a1-8cdd-3befbeb4ac2f",
+        choices=[
+            StreamingChoices(
+                finish_reason=None,
+                index=0,
+                delta=Delta(
+                    content="",
+                    role=None,
+                    function_call=None,
+                    tool_calls=[
+                        ChatCompletionDeltaToolCall(
+                            id=None,
+                            function=Function(arguments="OUNT(*", name=None),
+                            type="function",
+                            index=1,
+                        )
+                    ],
+                ),
+                logprobs=None,
+            )
+        ],
+        created=1722656357,
+        model="claude-3-5-sonnet-20240620",
+        object="chat.completion.chunk",
+        system_fingerprint=None,
+    ),
+    ModelResponse(
+        id="chatcmpl-634a6ad3-483a-44a1-8cdd-3befbeb4ac2f",
+        choices=[
+            StreamingChoices(
+                finish_reason=None,
+                index=0,
+                delta=Delta(
+                    content="",
+                    role=None,
+                    function_call=None,
+                    tool_calls=[
+                        ChatCompletionDeltaToolCall(
+                            id=None,
+                            function=Function(arguments=") ", name=None),
+                            type="function",
+                            index=1,
+                        )
+                    ],
+                ),
+                logprobs=None,
+            )
+        ],
+        created=1722656357,
+        model="claude-3-5-sonnet-20240620",
+        object="chat.completion.chunk",
+        system_fingerprint=None,
+    ),
+    ModelResponse(
+        id="chatcmpl-634a6ad3-483a-44a1-8cdd-3befbeb4ac2f",
+        choices=[
+            StreamingChoices(
+                finish_reason=None,
+                index=0,
+                delta=Delta(
+                    content="",
+                    role=None,
+                    function_call=None,
+                    tool_calls=[
+                        ChatCompletionDeltaToolCall(
+                            id=None,
+                            function=Function(arguments="FROM use", name=None),
+                            type="function",
+                            index=1,
+                        )
+                    ],
+                ),
+                logprobs=None,
+            )
+        ],
+        created=1722656357,
+        model="claude-3-5-sonnet-20240620",
+        object="chat.completion.chunk",
+        system_fingerprint=None,
+    ),
+    ModelResponse(
+        id="chatcmpl-634a6ad3-483a-44a1-8cdd-3befbeb4ac2f",
+        choices=[
+            StreamingChoices(
+                finish_reason=None,
+                index=0,
+                delta=Delta(
+                    content="",
+                    role=None,
+                    function_call=None,
+                    tool_calls=[
+                        ChatCompletionDeltaToolCall(
+                            id=None,
+                            function=Function(arguments='rs;"}', name=None),
+                            type="function",
+                            index=1,
+                        )
+                    ],
+                ),
+                logprobs=None,
+            )
+        ],
+        created=1722656357,
+        model="claude-3-5-sonnet-20240620",
+        object="chat.completion.chunk",
+        system_fingerprint=None,
+    ),
+    ModelResponse(
+        id="chatcmpl-634a6ad3-483a-44a1-8cdd-3befbeb4ac2f",
+        choices=[
+            StreamingChoices(
+                finish_reason="tool_calls",
+                index=0,
+                delta=Delta(
+                    content=None, role=None, function_call=None, tool_calls=None
+                ),
+                logprobs=None,
+            )
+        ],
+        created=1722656357,
+        model="claude-3-5-sonnet-20240620",
+        object="chat.completion.chunk",
+        system_fingerprint=None,
+    ),
+]
diff --git a/litellm/tests/test_stream_chunk_builder.py b/litellm/tests/test_stream_chunk_builder.py
index 342b070ae7..78d2617f1e 100644
--- a/litellm/tests/test_stream_chunk_builder.py
+++ b/litellm/tests/test_stream_chunk_builder.py
@@ -18,6 +18,8 @@ from openai import OpenAI
 import litellm
 from litellm import completion, stream_chunk_builder
 
+import litellm.tests.stream_chunk_testdata
+
 dotenv.load_dotenv()
 
 user_message = "What is the current weather in Boston?"
@@ -196,3 +198,24 @@ def test_stream_chunk_builder_litellm_usage_chunks():
     # assert prompt tokens are the same
 
     assert gemini_pt == stream_rebuilt_pt
+
+
+def test_stream_chunk_builder_litellm_mixed_calls():
+    response = stream_chunk_builder(litellm.tests.stream_chunk_testdata.chunks)
+    assert (
+        response.choices[0].message.content
+        == "To answer your question about how many rows are in the 'users' table, I'll need to run a SQL query. Let me do that for you."
+    )
+
+    print(response.choices[0].message.tool_calls[0].to_dict())
+
+    assert len(response.choices[0].message.tool_calls) == 1
+    assert response.choices[0].message.tool_calls[0].to_dict() == {
+        "index": 1,
+        "function": {
+            "arguments": '{"query": "SELECT COUNT(*) FROM users;"}',
+            "name": "sql_query",
+        },
+        "id": "toolu_01H3AjkLpRtGQrof13CBnWfK",
+        "type": "function",
+    }

From 19474c3f0fc9d1e0d9328736657a66cf7994f71f Mon Sep 17 00:00:00 2001
From: Ishaan Jaff <ishaanjaffer0324@gmail.com>
Date: Sat, 3 Aug 2024 11:46:39 -0700
Subject: [PATCH 04/35] support v1/projects/tuningJobs

---
 .../proxy/fine_tuning_endpoints/endpoints.py  | 69 +++++++++++++++++++
 1 file changed, 69 insertions(+)

diff --git a/litellm/proxy/fine_tuning_endpoints/endpoints.py b/litellm/proxy/fine_tuning_endpoints/endpoints.py
index cda226b5aa..c2d89dd251 100644
--- a/litellm/proxy/fine_tuning_endpoints/endpoints.py
+++ b/litellm/proxy/fine_tuning_endpoints/endpoints.py
@@ -429,3 +429,72 @@ async def retrieve_fine_tuning_job(
                 param=getattr(e, "param", "None"),
                 code=getattr(e, "status_code", 500),
             )
+
+
+@router.post(
+    "/v1/projects/tuningJobs",
+    dependencies=[Depends(user_api_key_auth)],
+    tags=["fine-tuning"],
+    summary="✨ (Enterprise) Create Fine-Tuning Jobs",
+)
+async def vertex_create_fine_tuning_job(
+    request: Request,
+    fastapi_response: Response,
+    user_api_key_dict: UserAPIKeyAuth = Depends(user_api_key_auth),
+):
+    """
+    this is a pass through endpoint for the Vertex AI API. /tuningJobs endpoint
+
+    it uses the vertex ai credentials on the proxy and forwards to vertex ai api
+    """
+    try:
+        from litellm.fine_tuning.main import vertex_fine_tuning_apis_instance
+        from litellm.proxy.proxy_server import (
+            add_litellm_data_to_request,
+            general_settings,
+            get_custom_headers,
+            premium_user,
+            proxy_config,
+            proxy_logging_obj,
+            version,
+        )
+
+        # get configs for custom_llm_provider
+        llm_provider_config = get_fine_tuning_provider_config(
+            custom_llm_provider="vertex_ai"
+        )
+
+        vertex_project = llm_provider_config.get("vertex_project", None)
+        vertex_location = llm_provider_config.get("vertex_location", None)
+        vertex_credentials = llm_provider_config.get("vertex_credentials", None)
+        request_data_json = await request.json()
+        response = await vertex_fine_tuning_apis_instance.pass_through_vertex_ai_fine_tuning_job(
+            request_data=request_data_json,
+            vertex_project=vertex_project,
+            vertex_location=vertex_location,
+            vertex_credentials=vertex_credentials,
+        )
+
+        return response
+    except Exception as e:
+        verbose_proxy_logger.error(
+            "litellm.proxy.proxy_server.v1/projects/tuningJobs(): Exception occurred - {}".format(
+                str(e)
+            )
+        )
+        verbose_proxy_logger.debug(traceback.format_exc())
+        if isinstance(e, HTTPException):
+            raise ProxyException(
+                message=getattr(e, "message", str(e.detail)),
+                type=getattr(e, "type", "None"),
+                param=getattr(e, "param", "None"),
+                code=getattr(e, "status_code", status.HTTP_400_BAD_REQUEST),
+            )
+        else:
+            error_msg = f"{str(e)}"
+            raise ProxyException(
+                message=getattr(e, "message", error_msg),
+                type=getattr(e, "type", "None"),
+                param=getattr(e, "param", "None"),
+                code=getattr(e, "status_code", 500),
+            )

From 2e596c6c5b0ec8446745339f3c6a7351f727ad2f Mon Sep 17 00:00:00 2001
From: Ishaan Jaff <ishaanjaffer0324@gmail.com>
Date: Sat, 3 Aug 2024 11:47:43 -0700
Subject: [PATCH 05/35] add support for pass through vertex ai ft jobs

---
 litellm/llms/fine_tuning_apis/vertex_ai.py | 42 ++++++++++++++++++++++
 1 file changed, 42 insertions(+)

diff --git a/litellm/llms/fine_tuning_apis/vertex_ai.py b/litellm/llms/fine_tuning_apis/vertex_ai.py
index f370652d26..618894245d 100644
--- a/litellm/llms/fine_tuning_apis/vertex_ai.py
+++ b/litellm/llms/fine_tuning_apis/vertex_ai.py
@@ -240,3 +240,45 @@ class VertexFineTuningAPI(VertexLLM):
             vertex_response
         )
         return open_ai_response
+
+    async def pass_through_vertex_ai_fine_tuning_job(
+        self,
+        request_data: dict,
+        vertex_project: str,
+        vertex_location: str,
+        vertex_credentials: str,
+    ):
+        auth_header, _ = self._get_token_and_url(
+            model="",
+            gemini_api_key=None,
+            vertex_credentials=vertex_credentials,
+            vertex_project=vertex_project,
+            vertex_location=vertex_location,
+            stream=False,
+            custom_llm_provider="vertex_ai_beta",
+            api_base="",
+        )
+
+        headers = {
+            "Authorization": f"Bearer {auth_header}",
+            "Content-Type": "application/json",
+        }
+
+        fine_tuning_url = f"https://{vertex_location}-aiplatform.googleapis.com/v1/projects/{vertex_project}/locations/{vertex_location}/tuningJobs"
+
+        if self.async_handler is None:
+            raise ValueError("VertexAI Fine Tuning - async_handler is not initialized")
+
+        response = await self.async_handler.post(
+            headers=headers,
+            url=fine_tuning_url,
+            json=request_data,  # type: ignore
+        )
+
+        if response.status_code != 200:
+            raise Exception(
+                f"Error creating fine tuning job. Status code: {response.status_code}. Response: {response.text}"
+            )
+
+        response_json = response.json()
+        return response_json

From af91740fbb0e3c9a9eede61d0303861cd9c811e8 Mon Sep 17 00:00:00 2001
From: Ishaan Jaff <ishaanjaffer0324@gmail.com>
Date: Sat, 3 Aug 2024 11:56:43 -0700
Subject: [PATCH 06/35] docs native vertex ft endpoint

---
 docs/my-website/docs/fine_tuning.md | 24 +++++++++++++++++++++++-
 1 file changed, 23 insertions(+), 1 deletion(-)

diff --git a/docs/my-website/docs/fine_tuning.md b/docs/my-website/docs/fine_tuning.md
index c69f4c1e66..fd3cbc792d 100644
--- a/docs/my-website/docs/fine_tuning.md
+++ b/docs/my-website/docs/fine_tuning.md
@@ -124,7 +124,7 @@ ft_job = await client.fine_tuning.jobs.create(
 ```
 </TabItem>
 
-<TabItem value="curl" label="curl">
+<TabItem value="curl" label="curl (Unified API)">
 
 ```shell
 curl http://localhost:4000/v1/fine_tuning/jobs \
@@ -136,6 +136,28 @@ curl http://localhost:4000/v1/fine_tuning/jobs \
     "training_file": "gs://cloud-samples-data/ai-platform/generative_ai/sft_train_data.jsonl"
     }'
 ```
+</TabItem>
+
+<TabItem value="curl-vtx" label="curl (VertexAI API)">
+
+:::info
+
+Use this to create Fine tuning Jobs in [the Vertex AI API Format](https://cloud.google.com/vertex-ai/generative-ai/docs/model-reference/tuning#create-tuning)
+
+:::
+
+```shell
+curl http://localhost:4000/v1/projects/tuningJobs \
+      -H "Content-Type: application/json" \
+      -H "Authorization: Bearer sk-1234" \
+      -d '{
+  "baseModel": "gemini-1.0-pro-002",
+  "supervisedTuningSpec" : {
+      "training_dataset_uri": "gs://cloud-samples-data/ai-platform/generative_ai/sft_train_data.jsonl"
+  }
+}'
+```
+
 </TabItem>
 </Tabs>
 

From b7be609d6e3d197e0dec4083bf1ed5d9fa148a42 Mon Sep 17 00:00:00 2001
From: Joe Cheng <joe@posit.co>
Date: Sat, 3 Aug 2024 11:58:46 -0700
Subject: [PATCH 07/35] Use correct key name

---
 litellm/main.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/litellm/main.py b/litellm/main.py
index 36267aec1d..f6a43023a3 100644
--- a/litellm/main.py
+++ b/litellm/main.py
@@ -5151,8 +5151,8 @@ def stream_chunk_builder(
     function_call_chunks = [
         chunk
         for chunk in chunks
-        if "function_calls" in chunk["choices"][0]["delta"]
-        and chunk["choices"][0]["delta"]["function_calls"] is not None
+        if "function_call" in chunk["choices"][0]["delta"]
+        and chunk["choices"][0]["delta"]["function_call"] is not None
     ]
 
     if len(function_call_chunks) > 0:

From 6b8806b45f970cb2446654d2c379f8dcaa93ce3c Mon Sep 17 00:00:00 2001
From: Krrish Dholakia <krrishdholakia@gmail.com>
Date: Sat, 3 Aug 2024 12:34:11 -0700
Subject: [PATCH 08/35] feat(router.py): add flag for mock testing
 loadbalancing for rate limit errors

---
 litellm/proxy/_new_secret_config.yaml | 13 ++++++----
 litellm/router.py                     | 34 ++++++++++++++++++++++-----
 2 files changed, 36 insertions(+), 11 deletions(-)

diff --git a/litellm/proxy/_new_secret_config.yaml b/litellm/proxy/_new_secret_config.yaml
index 238fe7136a..47b93ccd2f 100644
--- a/litellm/proxy/_new_secret_config.yaml
+++ b/litellm/proxy/_new_secret_config.yaml
@@ -1,7 +1,10 @@
 model_list:
-  - model_name: "*"
+  - model_name: "gpt-4"
     litellm_params:
-      model: "*"
-
-# litellm_settings:
-#   failure_callback: ["langfuse"]
+      model: "gpt-4"
+  - model_name: "gpt-4"
+    litellm_params:
+      model: "gpt-4o"
+  - model_name: "gpt-4o-mini"
+    litellm_params:
+      model: "gpt-4o-mini"
\ No newline at end of file
diff --git a/litellm/router.py b/litellm/router.py
index 108ca706c5..0448139d2c 100644
--- a/litellm/router.py
+++ b/litellm/router.py
@@ -2468,6 +2468,8 @@ class Router:
                         verbose_router_logger.info(
                             f"No fallback model group found for original model_group={model_group}. Fallbacks={fallbacks}"
                         )
+                        if hasattr(original_exception, "message"):
+                            original_exception.message += f"No fallback model group found for original model_group={model_group}. Fallbacks={fallbacks}"
                         raise original_exception
                     for mg in fallback_model_group:
                         """
@@ -2492,14 +2494,19 @@ class Router:
                             return response
                         except Exception as e:
                             raise e
-            except Exception as e:
-                verbose_router_logger.error(f"An exception occurred - {str(e)}")
-                verbose_router_logger.debug(traceback.format_exc())
+            except Exception as new_exception:
+                verbose_router_logger.error(
+                    "litellm.router.py::async_function_with_fallbacks() - Error occurred while trying to do fallbacks - {}\n{}".format(
+                        str(new_exception), traceback.format_exc()
+                    )
+                )
 
             if hasattr(original_exception, "message"):
                 # add the available fallbacks to the exception
-                original_exception.message += "\nReceived Model Group={}\nAvailable Model Group Fallbacks={}".format(
-                    model_group, fallback_model_group
+                original_exception.message += "\nReceived Model Group={}\nAvailable Model Group Fallbacks={}\nCooldown Deployments={}".format(
+                    model_group,
+                    fallback_model_group,
+                    await self._async_get_cooldown_deployments_with_debug_info(),
                 )
             raise original_exception
 
@@ -2508,6 +2515,9 @@ class Router:
             f"Inside async function with retries: args - {args}; kwargs - {kwargs}"
         )
         original_function = kwargs.pop("original_function")
+        mock_testing_rate_limit_error = kwargs.pop(
+            "mock_testing_rate_limit_error", None
+        )
         fallbacks = kwargs.pop("fallbacks", self.fallbacks)
         context_window_fallbacks = kwargs.pop(
             "context_window_fallbacks", self.context_window_fallbacks
@@ -2515,13 +2525,25 @@ class Router:
         content_policy_fallbacks = kwargs.pop(
             "content_policy_fallbacks", self.content_policy_fallbacks
         )
-
+        model_group = kwargs.get("model")
         num_retries = kwargs.pop("num_retries")
 
         verbose_router_logger.debug(
             f"async function w/ retries: original_function - {original_function}, num_retries - {num_retries}"
         )
         try:
+            if (
+                mock_testing_rate_limit_error is not None
+                and mock_testing_rate_limit_error is True
+            ):
+                verbose_router_logger.info(
+                    "litellm.router.py::async_function_with_retries() - mock_testing_rate_limit_error=True. Raising litellm.RateLimitError."
+                )
+                raise litellm.RateLimitError(
+                    model=model_group,
+                    llm_provider="",
+                    message=f"This is a mock exception for model={model_group}, to trigger a rate limit error.",
+                )
             # if the function call is successful, no exception will be raised and we'll break out of the loop
             response = await original_function(*args, **kwargs)
             return response

From 1d892a41d21540e6f5a8bb7df900dc3bae16b805 Mon Sep 17 00:00:00 2001
From: Krrish Dholakia <krrishdholakia@gmail.com>
Date: Sat, 3 Aug 2024 12:44:04 -0700
Subject: [PATCH 09/35] docs(proxy/reliability.md): add docs on testing if
 loadbalancing is working as expected

---
 docs/my-website/docs/proxy/reliability.md | 23 ++++++++++++++++++++++-
 1 file changed, 22 insertions(+), 1 deletion(-)

diff --git a/docs/my-website/docs/proxy/reliability.md b/docs/my-website/docs/proxy/reliability.md
index a3f03b3d76..cb6550a478 100644
--- a/docs/my-website/docs/proxy/reliability.md
+++ b/docs/my-website/docs/proxy/reliability.md
@@ -50,7 +50,7 @@ Detailed information about [routing strategies can be found here](../routing)
 $ litellm --config /path/to/config.yaml
 ```
 
-### Test - Load Balancing
+### Test - Simple Call
 
 Here requests with model=gpt-3.5-turbo will be routed across multiple instances of azure/gpt-3.5-turbo
 
@@ -138,6 +138,27 @@ print(response)
 </Tabs>
 
 
+### Test - Loadbalancing
+
+In this request, the following will occur:
+1. A rate limit exception will be raised 
+2. LiteLLM proxy will retry the request on the model group (default is 3).
+
+```bash
+curl -X POST 'http://0.0.0.0:4000/chat/completions' \
+-H 'Content-Type: application/json' \
+-H 'Authorization: Bearer sk-1234' \
+-d '{
+  "model": "gpt-3.5-turbo",
+  "messages": [
+        {"role": "user", "content": "Hi there!"}
+    ],
+    "mock_testing_rate_limit_error": true
+}'
+```
+
+[**See Code**](https://github.com/BerriAI/litellm/blob/6b8806b45f970cb2446654d2c379f8dcaa93ce3c/litellm/router.py#L2535)
+
 ### Test - Client Side Fallbacks
 In this request the following will occur:
 1. The request to `model="zephyr-beta"` will fail

From 7a0792c918615142af0811cdfeb92fa445efe2ff Mon Sep 17 00:00:00 2001
From: Krrish Dholakia <krrishdholakia@gmail.com>
Date: Sat, 3 Aug 2024 12:49:39 -0700
Subject: [PATCH 10/35] fix(router.py): move deployment cooldown list message
 to error log, not client-side

don't show user all deployments
---
 litellm/router.py | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/litellm/router.py b/litellm/router.py
index 0448139d2c..e31de5332e 100644
--- a/litellm/router.py
+++ b/litellm/router.py
@@ -2496,17 +2496,18 @@ class Router:
                             raise e
             except Exception as new_exception:
                 verbose_router_logger.error(
-                    "litellm.router.py::async_function_with_fallbacks() - Error occurred while trying to do fallbacks - {}\n{}".format(
-                        str(new_exception), traceback.format_exc()
+                    "litellm.router.py::async_function_with_fallbacks() - Error occurred while trying to do fallbacks - {}\n{}\n\nDebug Information:\nCooldown Deployments={}".format(
+                        str(new_exception),
+                        traceback.format_exc(),
+                        await self._async_get_cooldown_deployments_with_debug_info(),
                     )
                 )
 
             if hasattr(original_exception, "message"):
                 # add the available fallbacks to the exception
-                original_exception.message += "\nReceived Model Group={}\nAvailable Model Group Fallbacks={}\nCooldown Deployments={}".format(
+                original_exception.message += "\nReceived Model Group={}\nAvailable Model Group Fallbacks={}".format(
                     model_group,
                     fallback_model_group,
-                    await self._async_get_cooldown_deployments_with_debug_info(),
                 )
             raise original_exception
 

From 4a43f9f4110bf9ddbd10c7fada5da840c7386c1b Mon Sep 17 00:00:00 2001
From: Ishaan Jaff <ishaanjaffer0324@gmail.com>
Date: Sat, 3 Aug 2024 12:24:23 -0700
Subject: [PATCH 11/35] docs supported models / providers

---
 docs/my-website/sidebars.js | 88 ++++++++++++++++++-------------------
 1 file changed, 44 insertions(+), 44 deletions(-)

diff --git a/docs/my-website/sidebars.js b/docs/my-website/sidebars.js
index 69fd32cb33..3e39348b97 100644
--- a/docs/my-website/sidebars.js
+++ b/docs/my-website/sidebars.js
@@ -83,50 +83,7 @@ const sidebars = {
     },
     {
       type: "category",
-      label: "Completion()",
-      link: {
-        type: "generated-index",
-        title: "Completion()",
-        description: "Details on the completion() function",
-        slug: "/completion",
-      },
-      items: [
-        "completion/input",
-        "completion/provider_specific_params",
-        "completion/json_mode",
-        "completion/drop_params",
-        "completion/prompt_formatting",
-        "completion/output",
-        "exception_mapping",
-        "completion/stream",
-        "completion/message_trimming",
-        "completion/function_call",
-        "completion/vision",
-        "completion/model_alias",
-        "completion/batching",
-        "completion/mock_requests",
-        "completion/reliable_completions",
-      ],
-    },
-    {
-      type: "category",
-      label: "Embedding(), Image Generation(), Assistants(), Moderation(), Audio Transcriptions(), TTS(), Batches(), Fine-Tuning()",
-      items: [
-        "embedding/supported_embedding",
-        "embedding/async_embedding",
-        "embedding/moderation",
-        "image_generation",
-        "audio_transcription",
-        "text_to_speech",
-        "assistants",
-        "batches",
-        "fine_tuning",
-        "anthropic_completion"
-      ],
-    },
-    {
-      type: "category",
-      label: "Supported Models & Providers",
+      label: "💯 Supported Models & Providers",
       link: {
         type: "generated-index",
         title: "Providers",
@@ -183,6 +140,49 @@ const sidebars = {
         
       ],
     },
+    {
+      type: "category",
+      label: "litellm.completion()",
+      link: {
+        type: "generated-index",
+        title: "Completion()",
+        description: "Details on the completion() function",
+        slug: "/completion",
+      },
+      items: [
+        "completion/input",
+        "completion/provider_specific_params",
+        "completion/json_mode",
+        "completion/drop_params",
+        "completion/prompt_formatting",
+        "completion/output",
+        "exception_mapping",
+        "completion/stream",
+        "completion/message_trimming",
+        "completion/function_call",
+        "completion/vision",
+        "completion/model_alias",
+        "completion/batching",
+        "completion/mock_requests",
+        "completion/reliable_completions",
+      ],
+    },
+    {
+      type: "category",
+      label: "Embedding(), Image Generation(), Assistants(), Moderation(), Audio Transcriptions(), TTS(), Batches(), Fine-Tuning()",
+      items: [
+        "embedding/supported_embedding",
+        "embedding/async_embedding",
+        "embedding/moderation",
+        "image_generation",
+        "audio_transcription",
+        "text_to_speech",
+        "assistants",
+        "batches",
+        "fine_tuning",
+        "anthropic_completion"
+      ],
+    },
     "proxy/custom_pricing",
     "routing",
     "scheduler",

From 1894aefd058a9c460a28f12889fe197f58eb33b0 Mon Sep 17 00:00:00 2001
From: Ishaan Jaff <ishaanjaffer0324@gmail.com>
Date: Sat, 3 Aug 2024 12:34:22 -0700
Subject: [PATCH 12/35] docs clean up organization

---
 docs/my-website/sidebars.js | 27 ++++++++++++++++-----------
 1 file changed, 16 insertions(+), 11 deletions(-)

diff --git a/docs/my-website/sidebars.js b/docs/my-website/sidebars.js
index 3e39348b97..afb778373f 100644
--- a/docs/my-website/sidebars.js
+++ b/docs/my-website/sidebars.js
@@ -184,10 +184,6 @@ const sidebars = {
       ],
     },
     "proxy/custom_pricing",
-    "routing",
-    "scheduler",
-    "set_keys",
-    "budget_manager",
     {
       type: "category", 
       label: "Secret Manager", 
@@ -196,6 +192,22 @@ const sidebars = {
         "oidc"
       ]
     },
+    {
+      type: "category",
+      label: "🚅 LiteLLM Python SDK",
+      items: [
+        "routing",
+        "scheduler",
+        "set_keys",
+        "budget_manager",
+        "caching/all_caches",
+        {
+          type: "category",
+          label: "LangChain, LlamaIndex, Instructor Integration",
+          items: ["langchain/langchain", "tutorials/instructor"],
+        },
+      ],
+    },
     "completion/token_usage",
     "load_test",
     {
@@ -227,14 +239,12 @@ const sidebars = {
         `observability/telemetry`,
       ],
     },
-    "caching/all_caches",
     {
       type: "category",
       label: "Tutorials",
       items: [
         'tutorials/azure_openai',
         'tutorials/instructor',
-        'tutorials/oobabooga',
         "tutorials/gradio_integration",
         "tutorials/huggingface_codellama",
         "tutorials/huggingface_tutorial",
@@ -246,11 +256,6 @@ const sidebars = {
         "tutorials/model_fallbacks",
       ],
     },
-    {
-      type: "category",
-      label: "LangChain, LlamaIndex, Instructor Integration",
-      items: ["langchain/langchain", "tutorials/instructor"],
-    },
     {
       type: "category",
       label: "Extras",

From 942e77dfa848e1b2fab69058d9a3522f4aa2921f Mon Sep 17 00:00:00 2001
From: Ishaan Jaff <ishaanjaffer0324@gmail.com>
Date: Sat, 3 Aug 2024 12:47:22 -0700
Subject: [PATCH 13/35] organize docs

---
 docs/my-website/docs/proxy/custom_pricing.md | 69 ++------------------
 docs/my-website/docs/sdk_custom_pricing.md   | 65 ++++++++++++++++++
 docs/my-website/sidebars.js                  |  5 +-
 3 files changed, 72 insertions(+), 67 deletions(-)
 create mode 100644 docs/my-website/docs/sdk_custom_pricing.md

diff --git a/docs/my-website/docs/proxy/custom_pricing.md b/docs/my-website/docs/proxy/custom_pricing.md
index 0b747f1193..51634021b7 100644
--- a/docs/my-website/docs/proxy/custom_pricing.md
+++ b/docs/my-website/docs/proxy/custom_pricing.md
@@ -1,6 +1,6 @@
 import Image from '@theme/IdealImage';
 
-# Custom Pricing - Sagemaker, etc. 
+# Custom LLM Pricing - Sagemaker, Azure, etc
 
 Use this to register custom pricing for models. 
 
@@ -16,39 +16,9 @@ LiteLLM already has pricing for any model in our [model cost map](https://github
 
 :::
 
-## Quick Start 
+## Cost Per Second (e.g. Sagemaker)
 
-Register custom pricing for sagemaker completion model. 
-
-For cost per second pricing, you **just** need to register `input_cost_per_second`. 
-
-```python
-# !pip install boto3 
-from litellm import completion, completion_cost 
-
-os.environ["AWS_ACCESS_KEY_ID"] = ""
-os.environ["AWS_SECRET_ACCESS_KEY"] = ""
-os.environ["AWS_REGION_NAME"] = ""
-
-
-def test_completion_sagemaker():
-    try:
-        print("testing sagemaker")
-        response = completion(
-            model="sagemaker/berri-benchmarking-Llama-2-70b-chat-hf-4",
-            messages=[{"role": "user", "content": "Hey, how's it going?"}],
-            input_cost_per_second=0.000420,
-        )
-        # Add any assertions here to check the response
-        print(response)
-        cost = completion_cost(completion_response=response)
-        print(cost)
-    except Exception as e:
-        raise Exception(f"Error occurred: {e}")
-
-```
-
-### Usage with OpenAI Proxy Server
+### Usage with LiteLLM Proxy Server
 
 **Step 1: Add pricing to config.yaml**
 ```yaml
@@ -75,38 +45,7 @@ litellm /path/to/config.yaml
 
 ## Cost Per Token (e.g. Azure)
 
-
-```python
-# !pip install boto3 
-from litellm import completion, completion_cost 
-
-## set ENV variables
-os.environ["AZURE_API_KEY"] = ""
-os.environ["AZURE_API_BASE"] = ""
-os.environ["AZURE_API_VERSION"] = ""
-
-
-def test_completion_azure_model():
-    try:
-        print("testing azure custom pricing")
-        # azure call
-        response = completion(
-          model = "azure/<your_deployment_name>", 
-          messages = [{ "content": "Hello, how are you?","role": "user"}]
-          input_cost_per_token=0.005,
-          output_cost_per_token=1,
-        )
-        # Add any assertions here to check the response
-        print(response)
-        cost = completion_cost(completion_response=response)
-        print(cost)
-    except Exception as e:
-        raise Exception(f"Error occurred: {e}")
-
-test_completion_azure_model()
-```
-
-### Usage with OpenAI Proxy Server
+### Usage with LiteLLM Proxy Server
 
 ```yaml
 model_list:
diff --git a/docs/my-website/docs/sdk_custom_pricing.md b/docs/my-website/docs/sdk_custom_pricing.md
new file mode 100644
index 0000000000..c857711510
--- /dev/null
+++ b/docs/my-website/docs/sdk_custom_pricing.md
@@ -0,0 +1,65 @@
+# Custom Pricing - SageMaker, Azure, etc
+
+Register custom pricing for sagemaker completion model. 
+
+For cost per second pricing, you **just** need to register `input_cost_per_second`. 
+
+```python
+# !pip install boto3 
+from litellm import completion, completion_cost 
+
+os.environ["AWS_ACCESS_KEY_ID"] = ""
+os.environ["AWS_SECRET_ACCESS_KEY"] = ""
+os.environ["AWS_REGION_NAME"] = ""
+
+
+def test_completion_sagemaker():
+    try:
+        print("testing sagemaker")
+        response = completion(
+            model="sagemaker/berri-benchmarking-Llama-2-70b-chat-hf-4",
+            messages=[{"role": "user", "content": "Hey, how's it going?"}],
+            input_cost_per_second=0.000420,
+        )
+        # Add any assertions here to check the response
+        print(response)
+        cost = completion_cost(completion_response=response)
+        print(cost)
+    except Exception as e:
+        raise Exception(f"Error occurred: {e}")
+
+```
+
+
+## Cost Per Token (e.g. Azure)
+
+
+```python
+# !pip install boto3 
+from litellm import completion, completion_cost 
+
+## set ENV variables
+os.environ["AZURE_API_KEY"] = ""
+os.environ["AZURE_API_BASE"] = ""
+os.environ["AZURE_API_VERSION"] = ""
+
+
+def test_completion_azure_model():
+    try:
+        print("testing azure custom pricing")
+        # azure call
+        response = completion(
+          model = "azure/<your_deployment_name>", 
+          messages = [{ "content": "Hello, how are you?","role": "user"}]
+          input_cost_per_token=0.005,
+          output_cost_per_token=1,
+        )
+        # Add any assertions here to check the response
+        print(response)
+        cost = completion_cost(completion_response=response)
+        print(cost)
+    except Exception as e:
+        raise Exception(f"Error occurred: {e}")
+
+test_completion_azure_model()
+```
\ No newline at end of file
diff --git a/docs/my-website/sidebars.js b/docs/my-website/sidebars.js
index afb778373f..6674d91ac7 100644
--- a/docs/my-website/sidebars.js
+++ b/docs/my-website/sidebars.js
@@ -42,6 +42,7 @@ const sidebars = {
         "proxy/configs",
         "proxy/reliability",
         "proxy/cost_tracking",
+        "proxy/custom_pricing",
         "proxy/self_serve",
         "proxy/virtual_keys",
         {
@@ -183,7 +184,6 @@ const sidebars = {
         "anthropic_completion"
       ],
     },
-    "proxy/custom_pricing",
     {
       type: "category", 
       label: "Secret Manager", 
@@ -199,6 +199,8 @@ const sidebars = {
         "routing",
         "scheduler",
         "set_keys",
+        "completion/token_usage",
+        "sdk_custom_pricing",
         "budget_manager",
         "caching/all_caches",
         {
@@ -208,7 +210,6 @@ const sidebars = {
         },
       ],
     },
-    "completion/token_usage",
     "load_test",
     {
       type: "category",

From 203cc35abce2ab3357240bd12a781d8c2e9e383d Mon Sep 17 00:00:00 2001
From: Ishaan Jaff <ishaanjaffer0324@gmail.com>
Date: Sat, 3 Aug 2024 12:49:35 -0700
Subject: [PATCH 14/35] docs - use consistent name for LiteLLM proxy server

---
 README.md                                     |   4 +-
 cookbook/litellm_router/error_log.txt         | 152 +++++++++---------
 cookbook/litellm_router/request_log.txt       |   4 +-
 .../test_questions/question3.txt              |   2 +-
 docs/my-website/docs/budget_manager.md        |   4 +-
 docs/my-website/docs/index.md                 |   4 +-
 docs/my-website/docs/proxy/deploy.md          |  10 +-
 docs/my-website/docs/proxy_server.md          |   2 +-
 docs/my-website/docs/routing.md               |   4 +-
 docs/my-website/docs/secret.md                |   4 +-
 docs/my-website/docs/simple_proxy_old_doc.md  |   2 +-
 docs/my-website/sidebars.js                   |   4 +-
 docs/my-website/src/pages/index.md            |   2 +-
 13 files changed, 99 insertions(+), 99 deletions(-)

diff --git a/README.md b/README.md
index 306f07ec26..2153ae948e 100644
--- a/README.md
+++ b/README.md
@@ -11,7 +11,7 @@
         <p align="center">Call all LLM APIs using the OpenAI format [Bedrock, Huggingface, VertexAI, TogetherAI, Azure, OpenAI, Groq etc.]
         <br>
     </p>
-<h4 align="center"><a href="https://docs.litellm.ai/docs/simple_proxy" target="_blank">OpenAI Proxy Server</a> | <a href="https://docs.litellm.ai/docs/hosted" target="_blank"> Hosted Proxy (Preview)</a> | <a href="https://docs.litellm.ai/docs/enterprise"target="_blank">Enterprise Tier</a></h4>
+<h4 align="center"><a href="https://docs.litellm.ai/docs/simple_proxy" target="_blank">LiteLLM Proxy Server</a> | <a href="https://docs.litellm.ai/docs/hosted" target="_blank"> Hosted Proxy (Preview)</a> | <a href="https://docs.litellm.ai/docs/enterprise"target="_blank">Enterprise Tier</a></h4>
 <h4 align="center">
     <a href="https://pypi.org/project/litellm/" target="_blank">
         <img src="https://img.shields.io/pypi/v/litellm.svg" alt="PyPI Version">
@@ -35,7 +35,7 @@ LiteLLM manages:
 - Translate inputs to provider's `completion`, `embedding`, and `image_generation` endpoints
 - [Consistent output](https://docs.litellm.ai/docs/completion/output), text responses will always be available at `['choices'][0]['message']['content']`
 - Retry/fallback logic across multiple deployments (e.g. Azure/OpenAI) - [Router](https://docs.litellm.ai/docs/routing)
-- Set Budgets & Rate limits per project, api key, model [OpenAI Proxy Server](https://docs.litellm.ai/docs/simple_proxy)
+- Set Budgets & Rate limits per project, api key, model [LiteLLM Proxy Server](https://docs.litellm.ai/docs/simple_proxy)
 
 [**Jump to OpenAI Proxy Docs**](https://github.com/BerriAI/litellm?tab=readme-ov-file#openai-proxy---docs) <br>
 [**Jump to Supported LLM Providers**](https://github.com/BerriAI/litellm?tab=readme-ov-file#supported-providers-docs)
diff --git a/cookbook/litellm_router/error_log.txt b/cookbook/litellm_router/error_log.txt
index 6853ef4659..983b47cbbb 100644
--- a/cookbook/litellm_router/error_log.txt
+++ b/cookbook/litellm_router/error_log.txt
@@ -1,10 +1,10 @@
-Question: What endpoints does the litellm proxy have 💥 OpenAI Proxy Server
+Question: What endpoints does the litellm proxy have 💥 LiteLLM Proxy Server
 LiteLLM Server manages:
 
 Calling 10
 Exception: Expecting value: line 1 column 1 (char 0)
 
-Question: What endpoints does the litellm proxy have 💥 OpenAI Proxy Server
+Question: What endpoints does the litellm proxy have 💥 LiteLLM Proxy Server
 LiteLLM Server manages:
 
 Calling 10
@@ -21,13 +21,13 @@ Exception: Expecting value: line 1 column 1 (char 0)
 Question: Does litellm support ooobagooba llms? how can i call oobagooba llms. Call all LLM APIs using the Ope
 Exception: Expecting value: line 1 column 1 (char 0)
 
-Question: What endpoints does the litellm proxy have 💥 OpenAI Proxy Server
+Question: What endpoints does the litellm proxy have 💥 LiteLLM Proxy Server
 LiteLLM Server manages:
 
 Calling 10
 Exception: Expecting value: line 1 column 1 (char 0)
 
-Question: What endpoints does the litellm proxy have 💥 OpenAI Proxy Server
+Question: What endpoints does the litellm proxy have 💥 LiteLLM Proxy Server
 LiteLLM Server manages:
 
 Calling 10
@@ -49,7 +49,7 @@ Exception: Expecting value: line 1 column 1 (char 0)
 Question: Does litellm support ooobagooba llms? how can i call oobagooba llms. Call all LLM APIs using the Ope
 Exception: Expecting value: line 1 column 1 (char 0)
 
-Question: What endpoints does the litellm proxy have 💥 OpenAI Proxy Server
+Question: What endpoints does the litellm proxy have 💥 LiteLLM Proxy Server
 LiteLLM Server manages:
 
 Calling 10
@@ -61,7 +61,7 @@ Exception: Expecting value: line 1 column 1 (char 0)
 Question: Does litellm support ooobagooba llms? how can i call oobagooba llms. Call all LLM APIs using the Ope
 Exception: Expecting value: line 1 column 1 (char 0)
 
-Question: What endpoints does the litellm proxy have 💥 OpenAI Proxy Server
+Question: What endpoints does the litellm proxy have 💥 LiteLLM Proxy Server
 LiteLLM Server manages:
 
 Calling 10
@@ -70,7 +70,7 @@ Exception: Expecting value: line 1 column 1 (char 0)
 Question: Does litellm support ooobagooba llms? how can i call oobagooba llms. Call all LLM APIs using the Ope
 Exception: Expecting value: line 1 column 1 (char 0)
 
-Question: What endpoints does the litellm proxy have 💥 OpenAI Proxy Server
+Question: What endpoints does the litellm proxy have 💥 LiteLLM Proxy Server
 LiteLLM Server manages:
 
 Calling 10
@@ -79,7 +79,7 @@ Exception: Expecting value: line 1 column 1 (char 0)
 Question: Does litellm support ooobagooba llms? how can i call oobagooba llms. Call all LLM APIs using the Ope
 Exception: Expecting value: line 1 column 1 (char 0)
 
-Question: What endpoints does the litellm proxy have 💥 OpenAI Proxy Server
+Question: What endpoints does the litellm proxy have 💥 LiteLLM Proxy Server
 LiteLLM Server manages:
 
 Calling 10
@@ -109,7 +109,7 @@ Question: Given this context, what is litellm? LiteLLM about: About
 Call all LLM APIs using the OpenAI format.
 Exception: Expecting value: line 1 column 1 (char 0)
 
-Question: What endpoints does the litellm proxy have 💥 OpenAI Proxy Server
+Question: What endpoints does the litellm proxy have 💥 LiteLLM Proxy Server
 LiteLLM Server manages:
 
 Calling 10
@@ -128,7 +128,7 @@ Exception: Expecting value: line 1 column 1 (char 0)
 Question: Does litellm support ooobagooba llms? how can i call oobagooba llms. Call all LLM APIs using the Ope
 Exception: Expecting value: line 1 column 1 (char 0)
 
-Question: What endpoints does the litellm proxy have 💥 OpenAI Proxy Server
+Question: What endpoints does the litellm proxy have 💥 LiteLLM Proxy Server
 LiteLLM Server manages:
 
 Calling 10
@@ -148,7 +148,7 @@ Exception: Expecting value: line 1 column 1 (char 0)
 Question: Does litellm support ooobagooba llms? how can i call oobagooba llms. Call all LLM APIs using the Ope
 Exception: Expecting value: line 1 column 1 (char 0)
 
-Question: What endpoints does the litellm proxy have 💥 OpenAI Proxy Server
+Question: What endpoints does the litellm proxy have 💥 LiteLLM Proxy Server
 LiteLLM Server manages:
 
 Calling 10
@@ -162,7 +162,7 @@ Question: Given this context, what is litellm? LiteLLM about: About
 Call all LLM APIs using the OpenAI format.
 Exception: Expecting value: line 1 column 1 (char 0)
 
-Question: What endpoints does the litellm proxy have 💥 OpenAI Proxy Server
+Question: What endpoints does the litellm proxy have 💥 LiteLLM Proxy Server
 LiteLLM Server manages:
 
 Calling 10
@@ -174,7 +174,7 @@ Exception: Expecting value: line 1 column 1 (char 0)
 Question: Does litellm support ooobagooba llms? how can i call oobagooba llms. Call all LLM APIs using the Ope
 Exception: Expecting value: line 1 column 1 (char 0)
 
-Question: What endpoints does the litellm proxy have 💥 OpenAI Proxy Server
+Question: What endpoints does the litellm proxy have 💥 LiteLLM Proxy Server
 LiteLLM Server manages:
 
 Calling 10
@@ -184,7 +184,7 @@ Question: Given this context, what is litellm? LiteLLM about: About
 Call all LLM APIs using the OpenAI format.
 Exception: Expecting value: line 1 column 1 (char 0)
 
-Question: What endpoints does the litellm proxy have 💥 OpenAI Proxy Server
+Question: What endpoints does the litellm proxy have 💥 LiteLLM Proxy Server
 LiteLLM Server manages:
 
 Calling 10
@@ -193,19 +193,19 @@ Exception: Expecting value: line 1 column 1 (char 0)
 Question: Does litellm support ooobagooba llms? how can i call oobagooba llms. Call all LLM APIs using the Ope
 Exception: Expecting value: line 1 column 1 (char 0)
 
-Question: What endpoints does the litellm proxy have 💥 OpenAI Proxy Server
+Question: What endpoints does the litellm proxy have 💥 LiteLLM Proxy Server
 LiteLLM Server manages:
 
 Calling 10
 Exception: Expecting value: line 1 column 1 (char 0)
 
-Question: What endpoints does the litellm proxy have 💥 OpenAI Proxy Server
+Question: What endpoints does the litellm proxy have 💥 LiteLLM Proxy Server
 LiteLLM Server manages:
 
 Calling 10
 Exception: Expecting value: line 1 column 1 (char 0)
 
-Question: What endpoints does the litellm proxy have 💥 OpenAI Proxy Server
+Question: What endpoints does the litellm proxy have 💥 LiteLLM Proxy Server
 LiteLLM Server manages:
 
 Calling 10
@@ -214,7 +214,7 @@ Exception: Expecting value: line 1 column 1 (char 0)
 Question: Does litellm support ooobagooba llms? how can i call oobagooba llms. Call all LLM APIs using the Ope
 Exception: Expecting value: line 1 column 1 (char 0)
 
-Question: What endpoints does the litellm proxy have 💥 OpenAI Proxy Server
+Question: What endpoints does the litellm proxy have 💥 LiteLLM Proxy Server
 LiteLLM Server manages:
 
 Calling 10
@@ -234,7 +234,7 @@ Exception: Expecting value: line 1 column 1 (char 0)
 Question: Does litellm support ooobagooba llms? how can i call oobagooba llms. Call all LLM APIs using the Ope
 Exception: Expecting value: line 1 column 1 (char 0)
 
-Question: What endpoints does the litellm proxy have 💥 OpenAI Proxy Server
+Question: What endpoints does the litellm proxy have 💥 LiteLLM Proxy Server
 LiteLLM Server manages:
 
 Calling 10
@@ -244,7 +244,7 @@ Question: Given this context, what is litellm? LiteLLM about: About
 Call all LLM APIs using the OpenAI format.
 Exception: Expecting value: line 1 column 1 (char 0)
 
-Question: What endpoints does the litellm proxy have 💥 OpenAI Proxy Server
+Question: What endpoints does the litellm proxy have 💥 LiteLLM Proxy Server
 LiteLLM Server manages:
 
 Calling 10
@@ -253,7 +253,7 @@ Exception: Expecting value: line 1 column 1 (char 0)
 Question: Does litellm support ooobagooba llms? how can i call oobagooba llms. Call all LLM APIs using the Ope
 Exception: Expecting value: line 1 column 1 (char 0)
 
-Question: What endpoints does the litellm proxy have 💥 OpenAI Proxy Server
+Question: What endpoints does the litellm proxy have 💥 LiteLLM Proxy Server
 LiteLLM Server manages:
 
 Calling 10
@@ -267,31 +267,31 @@ Question: Given this context, what is litellm? LiteLLM about: About
 Call all LLM APIs using the OpenAI format.
 Exception: Expecting value: line 1 column 1 (char 0)
 
-Question: What endpoints does the litellm proxy have 💥 OpenAI Proxy Server
+Question: What endpoints does the litellm proxy have 💥 LiteLLM Proxy Server
 LiteLLM Server manages:
 
 Calling 10
 Exception: Expecting value: line 1 column 1 (char 0)
 
-Question: What endpoints does the litellm proxy have 💥 OpenAI Proxy Server
+Question: What endpoints does the litellm proxy have 💥 LiteLLM Proxy Server
 LiteLLM Server manages:
 
 Calling 10
 Exception: Expecting value: line 1 column 1 (char 0)
 
-Question: What endpoints does the litellm proxy have 💥 OpenAI Proxy Server
+Question: What endpoints does the litellm proxy have 💥 LiteLLM Proxy Server
 LiteLLM Server manages:
 
 Calling 10
 Exception: Expecting value: line 1 column 1 (char 0)
 
-Question: What endpoints does the litellm proxy have 💥 OpenAI Proxy Server
+Question: What endpoints does the litellm proxy have 💥 LiteLLM Proxy Server
 LiteLLM Server manages:
 
 Calling 10
 Exception: Expecting value: line 1 column 1 (char 0)
 
-Question: What endpoints does the litellm proxy have 💥 OpenAI Proxy Server
+Question: What endpoints does the litellm proxy have 💥 LiteLLM Proxy Server
 LiteLLM Server manages:
 
 Calling 10
@@ -305,7 +305,7 @@ Question: Given this context, what is litellm? LiteLLM about: About
 Call all LLM APIs using the OpenAI format.
 Exception: Expecting value: line 1 column 1 (char 0)
 
-Question: What endpoints does the litellm proxy have 💥 OpenAI Proxy Server
+Question: What endpoints does the litellm proxy have 💥 LiteLLM Proxy Server
 LiteLLM Server manages:
 
 Calling 10
@@ -330,7 +330,7 @@ Question: Given this context, what is litellm? LiteLLM about: About
 Call all LLM APIs using the OpenAI format.
 Exception: Expecting value: line 1 column 1 (char 0)
 
-Question: What endpoints does the litellm proxy have 💥 OpenAI Proxy Server
+Question: What endpoints does the litellm proxy have 💥 LiteLLM Proxy Server
 LiteLLM Server manages:
 
 Calling 10
@@ -339,7 +339,7 @@ Exception: Expecting value: line 1 column 1 (char 0)
 Question: Does litellm support ooobagooba llms? how can i call oobagooba llms. Call all LLM APIs using the Ope
 Exception: Expecting value: line 1 column 1 (char 0)
 
-Question: What endpoints does the litellm proxy have 💥 OpenAI Proxy Server
+Question: What endpoints does the litellm proxy have 💥 LiteLLM Proxy Server
 LiteLLM Server manages:
 
 Calling 10
@@ -360,7 +360,7 @@ Question: Given this context, what is litellm? LiteLLM about: About
 Call all LLM APIs using the OpenAI format.
 Exception: Expecting value: line 1 column 1 (char 0)
 
-Question: What endpoints does the litellm proxy have 💥 OpenAI Proxy Server
+Question: What endpoints does the litellm proxy have 💥 LiteLLM Proxy Server
 LiteLLM Server manages:
 
 Calling 10
@@ -369,7 +369,7 @@ Exception: Expecting value: line 1 column 1 (char 0)
 Question: Does litellm support ooobagooba llms? how can i call oobagooba llms. Call all LLM APIs using the Ope
 Exception: Expecting value: line 1 column 1 (char 0)
 
-Question: What endpoints does the litellm proxy have 💥 OpenAI Proxy Server
+Question: What endpoints does the litellm proxy have 💥 LiteLLM Proxy Server
 LiteLLM Server manages:
 
 Calling 10
@@ -378,7 +378,7 @@ Exception: Expecting value: line 1 column 1 (char 0)
 Question: Does litellm support ooobagooba llms? how can i call oobagooba llms. Call all LLM APIs using the Ope
 Exception: Expecting value: line 1 column 1 (char 0)
 
-Question: What endpoints does the litellm proxy have 💥 OpenAI Proxy Server
+Question: What endpoints does the litellm proxy have 💥 LiteLLM Proxy Server
 LiteLLM Server manages:
 
 Calling 10
@@ -388,7 +388,7 @@ Question: Given this context, what is litellm? LiteLLM about: About
 Call all LLM APIs using the OpenAI format.
 Exception: Expecting value: line 1 column 1 (char 0)
 
-Question: What endpoints does the litellm proxy have 💥 OpenAI Proxy Server
+Question: What endpoints does the litellm proxy have 💥 LiteLLM Proxy Server
 LiteLLM Server manages:
 
 Calling 10
@@ -409,7 +409,7 @@ Exception: Expecting value: line 1 column 1 (char 0)
 Question: Does litellm support ooobagooba llms? how can i call oobagooba llms. Call all LLM APIs using the Ope
 Exception: Expecting value: line 1 column 1 (char 0)
 
-Question: What endpoints does the litellm proxy have 💥 OpenAI Proxy Server
+Question: What endpoints does the litellm proxy have 💥 LiteLLM Proxy Server
 LiteLLM Server manages:
 
 Calling 10
@@ -422,13 +422,13 @@ Exception: Expecting value: line 1 column 1 (char 0)
 Question: Does litellm support ooobagooba llms? how can i call oobagooba llms. Call all LLM APIs using the Ope
 Exception: Expecting value: line 1 column 1 (char 0)
 
-Question: What endpoints does the litellm proxy have 💥 OpenAI Proxy Server
+Question: What endpoints does the litellm proxy have 💥 LiteLLM Proxy Server
 LiteLLM Server manages:
 
 Calling 10
 Exception: Expecting value: line 1 column 1 (char 0)
 
-Question: What endpoints does the litellm proxy have 💥 OpenAI Proxy Server
+Question: What endpoints does the litellm proxy have 💥 LiteLLM Proxy Server
 LiteLLM Server manages:
 
 Calling 10
@@ -438,7 +438,7 @@ Question: Given this context, what is litellm? LiteLLM about: About
 Call all LLM APIs using the OpenAI format.
 Exception: Expecting value: line 1 column 1 (char 0)
 
-Question: What endpoints does the litellm proxy have 💥 OpenAI Proxy Server
+Question: What endpoints does the litellm proxy have 💥 LiteLLM Proxy Server
 LiteLLM Server manages:
 
 Calling 10
@@ -462,7 +462,7 @@ Question: Given this context, what is litellm? LiteLLM about: About
 Call all LLM APIs using the OpenAI format.
 Exception: 'Response' object has no attribute 'get'
 
-Question: What endpoints does the litellm proxy have 💥 OpenAI Proxy Server
+Question: What endpoints does the litellm proxy have 💥 LiteLLM Proxy Server
 LiteLLM Server manages:
 
 Calling 10
@@ -482,7 +482,7 @@ Exception: 'Response' object has no attribute 'get'
 Question: Does litellm support ooobagooba llms? how can i call oobagooba llms. Call all LLM APIs using the Ope
 Exception: 'Response' object has no attribute 'get'
 
-Question: What endpoints does the litellm proxy have 💥 OpenAI Proxy Server
+Question: What endpoints does the litellm proxy have 💥 LiteLLM Proxy Server
 LiteLLM Server manages:
 
 Calling 10
@@ -492,7 +492,7 @@ Question: Given this context, what is litellm? LiteLLM about: About
 Call all LLM APIs using the OpenAI format.
 Exception: 'Response' object has no attribute 'get'
 
-Question: What endpoints does the litellm proxy have 💥 OpenAI Proxy Server
+Question: What endpoints does the litellm proxy have 💥 LiteLLM Proxy Server
 LiteLLM Server manages:
 
 Calling 10
@@ -516,7 +516,7 @@ Question: Given this context, what is litellm? LiteLLM about: About
 Call all LLM APIs using the OpenAI format.
 Exception: 'Response' object has no attribute 'get'
 
-Question: What endpoints does the litellm proxy have 💥 OpenAI Proxy Server
+Question: What endpoints does the litellm proxy have 💥 LiteLLM Proxy Server
 LiteLLM Server manages:
 
 Calling 10
@@ -529,7 +529,7 @@ Exception: 'Response' object has no attribute 'get'
 Question: Does litellm support ooobagooba llms? how can i call oobagooba llms. Call all LLM APIs using the Ope
 Exception: 'Response' object has no attribute 'get'
 
-Question: What endpoints does the litellm proxy have 💥 OpenAI Proxy Server
+Question: What endpoints does the litellm proxy have 💥 LiteLLM Proxy Server
 LiteLLM Server manages:
 
 Calling 10
@@ -546,13 +546,13 @@ Question: Given this context, what is litellm? LiteLLM about: About
 Call all LLM APIs using the OpenAI format.
 Exception: 'Response' object has no attribute 'get'
 
-Question: What endpoints does the litellm proxy have 💥 OpenAI Proxy Server
+Question: What endpoints does the litellm proxy have 💥 LiteLLM Proxy Server
 LiteLLM Server manages:
 
 Calling 10
 Exception: 'Response' object has no attribute 'get'
 
-Question: What endpoints does the litellm proxy have 💥 OpenAI Proxy Server
+Question: What endpoints does the litellm proxy have 💥 LiteLLM Proxy Server
 LiteLLM Server manages:
 
 Calling 10
@@ -580,13 +580,13 @@ Question: Given this context, what is litellm? LiteLLM about: About
 Call all LLM APIs using the OpenAI format.
 Exception: 'Response' object has no attribute 'get'
 
-Question: What endpoints does the litellm proxy have 💥 OpenAI Proxy Server
+Question: What endpoints does the litellm proxy have 💥 LiteLLM Proxy Server
 LiteLLM Server manages:
 
 Calling 10
 Exception: 'Response' object has no attribute 'get'
 
-Question: What endpoints does the litellm proxy have 💥 OpenAI Proxy Server
+Question: What endpoints does the litellm proxy have 💥 LiteLLM Proxy Server
 LiteLLM Server manages:
 
 Calling 10
@@ -624,7 +624,7 @@ Question: Given this context, what is litellm? LiteLLM about: About
 Call all LLM APIs using the OpenAI format.
 Exception: 'Response' object has no attribute 'get'
 
-Question: What endpoints does the litellm proxy have 💥 OpenAI Proxy Server
+Question: What endpoints does the litellm proxy have 💥 LiteLLM Proxy Server
 LiteLLM Server manages:
 
 Calling 10
@@ -638,13 +638,13 @@ Question: Given this context, what is litellm? LiteLLM about: About
 Call all LLM APIs using the OpenAI format.
 Exception: 'Response' object has no attribute 'get'
 
-Question: What endpoints does the litellm proxy have 💥 OpenAI Proxy Server
+Question: What endpoints does the litellm proxy have 💥 LiteLLM Proxy Server
 LiteLLM Server manages:
 
 Calling 10
 Exception: 'Response' object has no attribute 'get'
 
-Question: What endpoints does the litellm proxy have 💥 OpenAI Proxy Server
+Question: What endpoints does the litellm proxy have 💥 LiteLLM Proxy Server
 LiteLLM Server manages:
 
 Calling 10
@@ -660,7 +660,7 @@ Question: Given this context, what is litellm? LiteLLM about: About
 Call all LLM APIs using the OpenAI format.
 Exception: 'Response' object has no attribute 'get'
 
-Question: What endpoints does the litellm proxy have 💥 OpenAI Proxy Server
+Question: What endpoints does the litellm proxy have 💥 LiteLLM Proxy Server
 LiteLLM Server manages:
 
 Calling 10
@@ -681,7 +681,7 @@ Question: Given this context, what is litellm? LiteLLM about: About
 Call all LLM APIs using the OpenAI format.
 Exception: 'Response' object has no attribute 'get'
 
-Question: What endpoints does the litellm proxy have 💥 OpenAI Proxy Server
+Question: What endpoints does the litellm proxy have 💥 LiteLLM Proxy Server
 LiteLLM Server manages:
 
 Calling 10
@@ -691,31 +691,31 @@ Question: Given this context, what is litellm? LiteLLM about: About
 Call all LLM APIs using the OpenAI format.
 Exception: 'Response' object has no attribute 'get'
 
-Question: What endpoints does the litellm proxy have 💥 OpenAI Proxy Server
+Question: What endpoints does the litellm proxy have 💥 LiteLLM Proxy Server
 LiteLLM Server manages:
 
 Calling 10
 Exception: 'Response' object has no attribute 'get'
 
-Question: What endpoints does the litellm proxy have 💥 OpenAI Proxy Server
+Question: What endpoints does the litellm proxy have 💥 LiteLLM Proxy Server
 LiteLLM Server manages:
 
 Calling 10
 Exception: 'Response' object has no attribute 'get'
 
-Question: What endpoints does the litellm proxy have 💥 OpenAI Proxy Server
+Question: What endpoints does the litellm proxy have 💥 LiteLLM Proxy Server
 LiteLLM Server manages:
 
 Calling 10
 Exception: 'Response' object has no attribute 'get'
 
-Question: What endpoints does the litellm proxy have 💥 OpenAI Proxy Server
+Question: What endpoints does the litellm proxy have 💥 LiteLLM Proxy Server
 LiteLLM Server manages:
 
 Calling 10
 Exception: 'Response' object has no attribute 'get'
 
-Question: What endpoints does the litellm proxy have 💥 OpenAI Proxy Server
+Question: What endpoints does the litellm proxy have 💥 LiteLLM Proxy Server
 LiteLLM Server manages:
 
 Calling 10
@@ -771,7 +771,7 @@ Question: Given this context, what is litellm? LiteLLM about: About
 Call all LLM APIs using the OpenAI format.
 Exception: 'Response' object has no attribute 'get'
 
-Question: What endpoints does the litellm proxy have 💥 OpenAI Proxy Server
+Question: What endpoints does the litellm proxy have 💥 LiteLLM Proxy Server
 LiteLLM Server manages:
 
 Calling 10
@@ -780,7 +780,7 @@ Exception: 'Response' object has no attribute 'get'
 Question: Does litellm support ooobagooba llms? how can i call oobagooba llms. Call all LLM APIs using the Ope
 Exception: 'Response' object has no attribute 'get'
 
-Question: What endpoints does the litellm proxy have 💥 OpenAI Proxy Server
+Question: What endpoints does the litellm proxy have 💥 LiteLLM Proxy Server
 LiteLLM Server manages:
 
 Calling 10
@@ -800,7 +800,7 @@ Question: Given this context, what is litellm? LiteLLM about: About
 Call all LLM APIs using the OpenAI format.
 Exception: 'Response' object has no attribute 'get'
 
-Question: What endpoints does the litellm proxy have 💥 OpenAI Proxy Server
+Question: What endpoints does the litellm proxy have 💥 LiteLLM Proxy Server
 LiteLLM Server manages:
 
 Calling 10
@@ -820,7 +820,7 @@ Exception: 'Response' object has no attribute 'get'
 Question: Does litellm support ooobagooba llms? how can i call oobagooba llms. Call all LLM APIs using the Ope
 Exception: 'Response' object has no attribute 'get'
 
-Question: What endpoints does the litellm proxy have 💥 OpenAI Proxy Server
+Question: What endpoints does the litellm proxy have 💥 LiteLLM Proxy Server
 LiteLLM Server manages:
 
 Calling 10
@@ -830,7 +830,7 @@ Question: Given this context, what is litellm? LiteLLM about: About
 Call all LLM APIs using the OpenAI format.
 Exception: 'Response' object has no attribute 'get'
 
-Question: What endpoints does the litellm proxy have 💥 OpenAI Proxy Server
+Question: What endpoints does the litellm proxy have 💥 LiteLLM Proxy Server
 LiteLLM Server manages:
 
 Calling 10
@@ -840,7 +840,7 @@ Question: Given this context, what is litellm? LiteLLM about: About
 Call all LLM APIs using the OpenAI format.
 Exception: 'Response' object has no attribute 'get'
 
-Question: What endpoints does the litellm proxy have 💥 OpenAI Proxy Server
+Question: What endpoints does the litellm proxy have 💥 LiteLLM Proxy Server
 LiteLLM Server manages:
 
 Calling 10
@@ -850,7 +850,7 @@ Question: Given this context, what is litellm? LiteLLM about: About
 Call all LLM APIs using the OpenAI format.
 Exception: 'Response' object has no attribute 'get'
 
-Question: What endpoints does the litellm proxy have 💥 OpenAI Proxy Server
+Question: What endpoints does the litellm proxy have 💥 LiteLLM Proxy Server
 LiteLLM Server manages:
 
 Calling 10
@@ -862,13 +862,13 @@ Exception: 'Response' object has no attribute 'get'
 Question: Does litellm support ooobagooba llms? how can i call oobagooba llms. Call all LLM APIs using the Ope
 Exception: 'Response' object has no attribute 'get'
 
-Question: What endpoints does the litellm proxy have 💥 OpenAI Proxy Server
+Question: What endpoints does the litellm proxy have 💥 LiteLLM Proxy Server
 LiteLLM Server manages:
 
 Calling 10
 Exception: 'Response' object has no attribute 'get'
 
-Question: What endpoints does the litellm proxy have 💥 OpenAI Proxy Server
+Question: What endpoints does the litellm proxy have 💥 LiteLLM Proxy Server
 LiteLLM Server manages:
 
 Calling 10
@@ -877,7 +877,7 @@ Exception: 'Response' object has no attribute 'get'
 Question: Does litellm support ooobagooba llms? how can i call oobagooba llms. Call all LLM APIs using the Ope
 Exception: 'Response' object has no attribute 'get'
 
-Question: What endpoints does the litellm proxy have 💥 OpenAI Proxy Server
+Question: What endpoints does the litellm proxy have 💥 LiteLLM Proxy Server
 LiteLLM Server manages:
 
 Calling 10
@@ -898,7 +898,7 @@ Question: Given this context, what is litellm? LiteLLM about: About
 Call all LLM APIs using the OpenAI format.
 Exception: 'Response' object has no attribute 'get'
 
-Question: What endpoints does the litellm proxy have 💥 OpenAI Proxy Server
+Question: What endpoints does the litellm proxy have 💥 LiteLLM Proxy Server
 LiteLLM Server manages:
 
 Calling 10
@@ -919,7 +919,7 @@ Question: Given this context, what is litellm? LiteLLM about: About
 Call all LLM APIs using the OpenAI format.
 Exception: 'Response' object has no attribute 'get'
 
-Question: What endpoints does the litellm proxy have 💥 OpenAI Proxy Server
+Question: What endpoints does the litellm proxy have 💥 LiteLLM Proxy Server
 LiteLLM Server manages:
 
 Calling 10
@@ -936,19 +936,19 @@ Question: Given this context, what is litellm? LiteLLM about: About
 Call all LLM APIs using the OpenAI format.
 Exception: 'Response' object has no attribute 'get'
 
-Question: What endpoints does the litellm proxy have 💥 OpenAI Proxy Server
+Question: What endpoints does the litellm proxy have 💥 LiteLLM Proxy Server
 LiteLLM Server manages:
 
 Calling 10
 Exception: 'Response' object has no attribute 'get'
 
-Question: What endpoints does the litellm proxy have 💥 OpenAI Proxy Server
+Question: What endpoints does the litellm proxy have 💥 LiteLLM Proxy Server
 LiteLLM Server manages:
 
 Calling 10
 Exception: 'Response' object has no attribute 'get'
 
-Question: What endpoints does the litellm proxy have 💥 OpenAI Proxy Server
+Question: What endpoints does the litellm proxy have 💥 LiteLLM Proxy Server
 LiteLLM Server manages:
 
 Calling 10
@@ -961,25 +961,25 @@ Exception: 'Response' object has no attribute 'get'
 Question: Does litellm support ooobagooba llms? how can i call oobagooba llms. Call all LLM APIs using the Ope
 Exception: 'Response' object has no attribute 'get'
 
-Question: What endpoints does the litellm proxy have 💥 OpenAI Proxy Server
+Question: What endpoints does the litellm proxy have 💥 LiteLLM Proxy Server
 LiteLLM Server manages:
 
 Calling 10
 Exception: 'Response' object has no attribute 'get'
 
-Question: What endpoints does the litellm proxy have 💥 OpenAI Proxy Server
+Question: What endpoints does the litellm proxy have 💥 LiteLLM Proxy Server
 LiteLLM Server manages:
 
 Calling 10
 Exception: 'Response' object has no attribute 'get'
 
-Question: What endpoints does the litellm proxy have 💥 OpenAI Proxy Server
+Question: What endpoints does the litellm proxy have 💥 LiteLLM Proxy Server
 LiteLLM Server manages:
 
 Calling 10
 Exception: 'Response' object has no attribute 'get'
 
-Question: What endpoints does the litellm proxy have 💥 OpenAI Proxy Server
+Question: What endpoints does the litellm proxy have 💥 LiteLLM Proxy Server
 LiteLLM Server manages:
 
 Calling 10
@@ -993,7 +993,7 @@ Question: Given this context, what is litellm? LiteLLM about: About
 Call all LLM APIs using the OpenAI format.
 Exception: 'Response' object has no attribute 'get'
 
-Question: What endpoints does the litellm proxy have 💥 OpenAI Proxy Server
+Question: What endpoints does the litellm proxy have 💥 LiteLLM Proxy Server
 LiteLLM Server manages:
 
 Calling 10
diff --git a/cookbook/litellm_router/request_log.txt b/cookbook/litellm_router/request_log.txt
index 0aed749049..821d87ab56 100644
--- a/cookbook/litellm_router/request_log.txt
+++ b/cookbook/litellm_router/request_log.txt
@@ -20,7 +20,7 @@ Call all LLM APIs using the OpenAI format.
 Response ID: 52dbbd49-eedb-4c11-8382-3ca7deb1af35 Url: /queue/response/52dbbd49-eedb-4c11-8382-3ca7deb1af35
 Time: 3.50 seconds
 
-Question: What endpoints does the litellm proxy have 💥 OpenAI Proxy Server
+Question: What endpoints does the litellm proxy have 💥 LiteLLM Proxy Server
 LiteLLM Server manages:
 
 Calling 10
@@ -35,7 +35,7 @@ Question: Does litellm support ooobagooba llms? how can i call oobagooba llms. C
 Response ID: ae1e2b71-d711-456d-8df0-13ce0709eb04 Url: /queue/response/ae1e2b71-d711-456d-8df0-13ce0709eb04
 Time: 5.60 seconds
 
-Question: What endpoints does the litellm proxy have 💥 OpenAI Proxy Server
+Question: What endpoints does the litellm proxy have 💥 LiteLLM Proxy Server
 LiteLLM Server manages:
 
 Calling 10
diff --git a/cookbook/litellm_router/test_questions/question3.txt b/cookbook/litellm_router/test_questions/question3.txt
index a122787504..d6006f9c73 100644
--- a/cookbook/litellm_router/test_questions/question3.txt
+++ b/cookbook/litellm_router/test_questions/question3.txt
@@ -1,4 +1,4 @@
-What endpoints does the litellm proxy have 💥 OpenAI Proxy Server
+What endpoints does the litellm proxy have 💥 LiteLLM Proxy Server
 LiteLLM Server manages:
 
 Calling 100+ LLMs Huggingface/Bedrock/TogetherAI/etc. in the OpenAI ChatCompletions & Completions format
diff --git a/docs/my-website/docs/budget_manager.md b/docs/my-website/docs/budget_manager.md
index 1a2c7e7eec..6bea96ef9c 100644
--- a/docs/my-website/docs/budget_manager.md
+++ b/docs/my-website/docs/budget_manager.md
@@ -7,14 +7,14 @@ Don't want to get crazy bills because either while you're calling LLM APIs **or*
 
 :::info
 
-If you want a server to manage user keys, budgets, etc. use our [OpenAI Proxy Server](./proxy/virtual_keys.md)
+If you want a server to manage user keys, budgets, etc. use our [LiteLLM Proxy Server](./proxy/virtual_keys.md)
 
 :::
 
 LiteLLM exposes: 
 * `litellm.max_budget`: a global variable you can use to set the max budget (in USD) across all your litellm calls. If this budget is exceeded, it will raise a BudgetExceededError 
 * `BudgetManager`: A class to help set budgets per user. BudgetManager creates a dictionary to manage the user budgets, where the key is user and the object is their current cost + model-specific costs. 
-* `OpenAI Proxy Server`: A server to call 100+ LLMs with an openai-compatible endpoint. Manages user budgets, spend tracking, load balancing etc. 
+* `LiteLLM Proxy Server`: A server to call 100+ LLMs with an openai-compatible endpoint. Manages user budgets, spend tracking, load balancing etc. 
 
 ## quick start
 
diff --git a/docs/my-website/docs/index.md b/docs/my-website/docs/index.md
index 6b472ee6c6..a560ecf76d 100644
--- a/docs/my-website/docs/index.md
+++ b/docs/my-website/docs/index.md
@@ -10,11 +10,11 @@ https://github.com/BerriAI/litellm
 - Translate inputs to provider's `completion`, `embedding`, and `image_generation` endpoints
 - [Consistent output](https://docs.litellm.ai/docs/completion/output), text responses will always be available at `['choices'][0]['message']['content']`
 - Retry/fallback logic across multiple deployments (e.g. Azure/OpenAI) - [Router](https://docs.litellm.ai/docs/routing)
-- Track spend & set budgets per project [OpenAI Proxy Server](https://docs.litellm.ai/docs/simple_proxy)
+- Track spend & set budgets per project [LiteLLM Proxy Server](https://docs.litellm.ai/docs/simple_proxy)
 
 ## How to use LiteLLM
 You can use litellm through either:
-1. [OpenAI proxy Server](#openai-proxy) - Server to call 100+ LLMs, load balance, cost tracking across projects
+1. [LiteLLM Proxy Server](#openai-proxy) - Server to call 100+ LLMs, load balance, cost tracking across projects
 2. [LiteLLM python SDK](#basic-usage) - Python Client to call 100+ LLMs, load balance, cost tracking
 
 ## LiteLLM Python SDK
diff --git a/docs/my-website/docs/proxy/deploy.md b/docs/my-website/docs/proxy/deploy.md
index 35fc0a5086..c7617196e5 100644
--- a/docs/my-website/docs/proxy/deploy.md
+++ b/docs/my-website/docs/proxy/deploy.md
@@ -246,7 +246,7 @@ helm install lite-helm ./litellm-helm
 kubectl --namespace default port-forward $POD_NAME 8080:$CONTAINER_PORT
 ```
 
-Your OpenAI proxy server is now running on `http://127.0.0.1:4000`.
+Your LiteLLM Proxy Server is now running on `http://127.0.0.1:4000`.
 
 </TabItem>
 
@@ -301,7 +301,7 @@ docker run \
     --config /app/config.yaml --detailed_debug
 ```
 
-Your OpenAI proxy server is now running on `http://0.0.0.0:4000`.
+Your LiteLLM Proxy Server is now running on `http://0.0.0.0:4000`.
 
 </TabItem>
 <TabItem value="kubernetes-deploy" label="Kubernetes">
@@ -399,7 +399,7 @@ kubectl apply -f /path/to/service.yaml
 kubectl port-forward service/litellm-service 4000:4000
 ```
 
-Your OpenAI proxy server is now running on `http://0.0.0.0:4000`.
+Your LiteLLM Proxy Server is now running on `http://0.0.0.0:4000`.
 
 </TabItem>
 
@@ -441,7 +441,7 @@ kubectl \
   4000:4000
 ```
 
-Your OpenAI proxy server is now running on `http://127.0.0.1:4000`.
+Your LiteLLM Proxy Server is now running on `http://127.0.0.1:4000`.
 
 
 If you need to set your litellm proxy config.yaml, you can find this in [values.yaml](https://github.com/BerriAI/litellm/blob/main/deploy/charts/litellm-helm/values.yaml)
@@ -486,7 +486,7 @@ helm install lite-helm ./litellm-helm
 kubectl --namespace default port-forward $POD_NAME 8080:$CONTAINER_PORT
 ```
 
-Your OpenAI proxy server is now running on `http://127.0.0.1:4000`.
+Your LiteLLM Proxy Server is now running on `http://127.0.0.1:4000`.
 
 </TabItem>
 </Tabs>
diff --git a/docs/my-website/docs/proxy_server.md b/docs/my-website/docs/proxy_server.md
index ef9352ab1f..0d08db7444 100644
--- a/docs/my-website/docs/proxy_server.md
+++ b/docs/my-website/docs/proxy_server.md
@@ -1,7 +1,7 @@
 import Tabs from '@theme/Tabs';
 import TabItem from '@theme/TabItem';
 
-# [OLD PROXY 👉 [NEW proxy here](./simple_proxy)] Local OpenAI Proxy Server
+# [OLD PROXY 👉 [NEW proxy here](./simple_proxy)] Local LiteLLM Proxy Server
 
 A fast, and lightweight OpenAI-compatible server to call 100+ LLM APIs. 
 
diff --git a/docs/my-website/docs/routing.md b/docs/my-website/docs/routing.md
index 905954e979..d83755e68d 100644
--- a/docs/my-website/docs/routing.md
+++ b/docs/my-website/docs/routing.md
@@ -14,7 +14,7 @@ In production, litellm supports using Redis as a way to track cooldown server an
 
 :::info
 
-If you want a server to load balance across different LLM APIs, use our [OpenAI Proxy Server](./proxy/load_balancing.md)
+If you want a server to load balance across different LLM APIs, use our [LiteLLM Proxy Server](./proxy/load_balancing.md)
 
 :::
 
@@ -1637,7 +1637,7 @@ response = router.completion(
 
 ## Deploy Router 
 
-If you want a server to load balance across different LLM APIs, use our [OpenAI Proxy Server](./simple_proxy#load-balancing---multiple-instances-of-1-model)
+If you want a server to load balance across different LLM APIs, use our [LiteLLM Proxy Server](./simple_proxy#load-balancing---multiple-instances-of-1-model)
 
 
 ## Init Params for the litellm.Router
diff --git a/docs/my-website/docs/secret.md b/docs/my-website/docs/secret.md
index 91ae383686..c44f2cd10c 100644
--- a/docs/my-website/docs/secret.md
+++ b/docs/my-website/docs/secret.md
@@ -90,7 +90,7 @@ litellm.secret_manager = client
 litellm.get_secret("your-test-key")
 ```
 
-### Usage with OpenAI Proxy Server
+### Usage with LiteLLM Proxy Server
 
 1. Install Proxy dependencies 
 ```bash
@@ -129,7 +129,7 @@ litellm --config /path/to/config.yaml
 
 Use encrypted keys from Google KMS on the proxy
 
-### Usage with OpenAI Proxy Server
+### Usage with LiteLLM Proxy Server
 
 ## Step 1. Add keys to env 
 ```
diff --git a/docs/my-website/docs/simple_proxy_old_doc.md b/docs/my-website/docs/simple_proxy_old_doc.md
index 195728d1be..2d68db3296 100644
--- a/docs/my-website/docs/simple_proxy_old_doc.md
+++ b/docs/my-website/docs/simple_proxy_old_doc.md
@@ -2,7 +2,7 @@ import Image from '@theme/IdealImage';
 import Tabs from '@theme/Tabs';
 import TabItem from '@theme/TabItem';
 
-# 💥 OpenAI Proxy Server
+# 💥 LiteLLM Proxy Server
 
 LiteLLM Server manages:
 
diff --git a/docs/my-website/sidebars.js b/docs/my-website/sidebars.js
index 6674d91ac7..e57f340c70 100644
--- a/docs/my-website/sidebars.js
+++ b/docs/my-website/sidebars.js
@@ -20,10 +20,10 @@ const sidebars = {
     { type: "doc", id: "index" }, // NEW
     {
       type: "category",
-      label: "💥 OpenAI Proxy Server",
+      label: "💥 LiteLLM Proxy Server",
       link: {
         type: "generated-index",
-        title: "💥 OpenAI Proxy Server",
+        title: "💥 LiteLLM Proxy Server",
         description: `Proxy Server to call 100+ LLMs in a unified interface & track spend, set budgets per virtual key/user`,
         slug: "/simple_proxy",
       },
diff --git a/docs/my-website/src/pages/index.md b/docs/my-website/src/pages/index.md
index 308ed08317..36d47aedf7 100644
--- a/docs/my-website/src/pages/index.md
+++ b/docs/my-website/src/pages/index.md
@@ -10,7 +10,7 @@ https://github.com/BerriAI/litellm
 - Translate inputs to provider's `completion`, `embedding`, and `image_generation` endpoints
 - [Consistent output](https://docs.litellm.ai/docs/completion/output), text responses will always be available at `['choices'][0]['message']['content']`
 - Retry/fallback logic across multiple deployments (e.g. Azure/OpenAI) - [Router](https://docs.litellm.ai/docs/routing)
-- Track spend & set budgets per project [OpenAI Proxy Server](https://docs.litellm.ai/docs/simple_proxy)
+- Track spend & set budgets per project [LiteLLM Proxy Server](https://docs.litellm.ai/docs/simple_proxy)
 
 ## Basic usage
 

From 58de3f948650a57b2595ab211e67592630afad30 Mon Sep 17 00:00:00 2001
From: Krrish Dholakia <krrishdholakia@gmail.com>
Date: Sat, 3 Aug 2024 12:53:35 -0700
Subject: [PATCH 15/35] fix(vertex_httpx.py): fix linting error

---
 litellm/llms/vertex_httpx.py | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/litellm/llms/vertex_httpx.py b/litellm/llms/vertex_httpx.py
index 9995373f32..954a30b801 100644
--- a/litellm/llms/vertex_httpx.py
+++ b/litellm/llms/vertex_httpx.py
@@ -13,6 +13,7 @@ from typing import Any, Callable, Dict, List, Literal, Optional, Tuple, Union
 
 import httpx  # type: ignore
 import requests  # type: ignore
+from openai.types.image import Image
 
 import litellm
 import litellm.litellm_core_utils
@@ -1341,10 +1342,10 @@ class VertexLLM(BaseLLM):
         _json_response = response.json()
         _predictions = _json_response["predictions"]
 
-        _response_data: List[litellm.ImageObject] = []
+        _response_data: List[Image] = []
         for _prediction in _predictions:
             _bytes_base64_encoded = _prediction["bytesBase64Encoded"]
-            image_object = litellm.ImageObject(b64_json=_bytes_base64_encoded)
+            image_object = Image(b64_json=_bytes_base64_encoded)
             _response_data.append(image_object)
 
         model_response.data = _response_data
@@ -1453,10 +1454,10 @@ class VertexLLM(BaseLLM):
         _json_response = response.json()
         _predictions = _json_response["predictions"]
 
-        _response_data: List[litellm.ImageObject] = []
+        _response_data: List[Image] = []
         for _prediction in _predictions:
             _bytes_base64_encoded = _prediction["bytesBase64Encoded"]
-            image_object = litellm.ImageObject(b64_json=_bytes_base64_encoded)
+            image_object = Image(b64_json=_bytes_base64_encoded)
             _response_data.append(image_object)
 
         model_response.data = _response_data

From cfdbb3d2374ddc576ed7e3c4f8e8041efee84dca Mon Sep 17 00:00:00 2001
From: Ishaan Jaff <ishaanjaffer0324@gmail.com>
Date: Sat, 3 Aug 2024 12:53:47 -0700
Subject: [PATCH 16/35] docs secret manager

---
 docs/my-website/docs/secret.md | 31 ++++---------------------------
 docs/my-website/sidebars.js    | 16 ++++++++--------
 2 files changed, 12 insertions(+), 35 deletions(-)

diff --git a/docs/my-website/docs/secret.md b/docs/my-website/docs/secret.md
index c44f2cd10c..c2b6774c0b 100644
--- a/docs/my-website/docs/secret.md
+++ b/docs/my-website/docs/secret.md
@@ -61,7 +61,7 @@ litellm --config /path/to/config.yaml
 ```
 
 ## Azure Key Vault
-
+<!-- 
 ### Quick Start
 
 ```python 
@@ -88,7 +88,7 @@ import litellm
 litellm.secret_manager = client
 
 litellm.get_secret("your-test-key")
-```
+``` -->
 
 ### Usage with LiteLLM Proxy Server
 
@@ -160,29 +160,6 @@ $ litellm --test
 
 [Quick Test Proxy](./proxy/quick_start#using-litellm-proxy---curl-request-openai-package-langchain-langchain-js)
 
-
-## Infisical Secret Manager
-Integrates with [Infisical's Secret Manager](https://infisical.com/) for secure storage and retrieval of API keys and sensitive data.
-
-### Usage
-liteLLM manages reading in your LLM API secrets/env variables from Infisical for you
-
-```python
-import litellm
-from infisical import InfisicalClient
-
-litellm.secret_manager = InfisicalClient(token="your-token")
-
-messages = [
-    {"role": "system", "content": "You are a helpful assistant."},
-    {"role": "user", "content": "What's the weather like today?"},
-]
-
-response = litellm.completion(model="gpt-3.5-turbo", messages=messages)
-
-print(response)
-```
-
-
+<!-- 
 ## .env Files
-If no secret manager client is specified, Litellm automatically uses the `.env` file to manage sensitive data.
+If no secret manager client is specified, Litellm automatically uses the `.env` file to manage sensitive data. -->
diff --git a/docs/my-website/sidebars.js b/docs/my-website/sidebars.js
index e57f340c70..27084f3b45 100644
--- a/docs/my-website/sidebars.js
+++ b/docs/my-website/sidebars.js
@@ -50,6 +50,14 @@ const sidebars = {
           label: "🪢 Logging",
           items: ["proxy/logging", "proxy/bucket", "proxy/streaming_logging"],
         },
+        {
+          type: "category", 
+          label: "Secret Manager - storing LLM API Keys", 
+          items: [
+            "secret", 
+            "oidc"
+          ]
+        },
         "proxy/team_logging",
         "proxy/guardrails",
         "proxy/tag_routing",
@@ -184,14 +192,6 @@ const sidebars = {
         "anthropic_completion"
       ],
     },
-    {
-      type: "category", 
-      label: "Secret Manager", 
-      items: [
-        "secret", 
-        "oidc"
-      ]
-    },
     {
       type: "category",
       label: "🚅 LiteLLM Python SDK",

From af04bbeccaf713a4fe6beca531b1596737456f3c Mon Sep 17 00:00:00 2001
From: Ishaan Jaff <ishaanjaffer0324@gmail.com>
Date: Sat, 3 Aug 2024 16:52:03 -0700
Subject: [PATCH 17/35] set native vertex endpoints

---
 litellm/proxy/proxy_server.py | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/litellm/proxy/proxy_server.py b/litellm/proxy/proxy_server.py
index 0f57a5fd13..83126b9545 100644
--- a/litellm/proxy/proxy_server.py
+++ b/litellm/proxy/proxy_server.py
@@ -213,6 +213,8 @@ from litellm.proxy.utils import (
     send_email,
     update_spend,
 )
+from litellm.proxy.vertex_ai_endpoints.vertex_endpoints import router as vertex_router
+from litellm.proxy.vertex_ai_endpoints.vertex_endpoints import set_default_vertex_config
 from litellm.router import (
     AssistantsTypedDict,
     Deployment,
@@ -1818,6 +1820,10 @@ class ProxyConfig:
         files_config = config.get("files_settings", None)
         set_files_config(config=files_config)
 
+        ## default config for vertex ai routes
+        default_vertex_config = config.get("default_vertex_config", None)
+        set_default_vertex_config(config=default_vertex_config)
+
         ## ROUTER SETTINGS (e.g. routing_strategy, ...)
         router_settings = config.get("router_settings", None)
         if router_settings and isinstance(router_settings, dict):
@@ -9631,6 +9637,7 @@ def cleanup_router_config_variables():
 
 app.include_router(router)
 app.include_router(fine_tuning_router)
+app.include_router(vertex_router)
 app.include_router(health_router)
 app.include_router(key_management_router)
 app.include_router(internal_user_router)

From 64c008045f2c0bd1812dcd85a7585b06bf0dfdd9 Mon Sep 17 00:00:00 2001
From: Ishaan Jaff <ishaanjaffer0324@gmail.com>
Date: Sat, 3 Aug 2024 16:52:43 -0700
Subject: [PATCH 18/35] use native endpoints

---
 litellm/llms/fine_tuning_apis/vertex_ai.py    |   9 +-
 .../proxy/fine_tuning_endpoints/endpoints.py  |  69 ----------
 litellm/proxy/proxy_config.yaml               |   5 +
 .../vertex_ai_endpoints/vertex_endpoints.py   | 120 ++++++++++++++++++
 4 files changed, 131 insertions(+), 72 deletions(-)
 create mode 100644 litellm/proxy/vertex_ai_endpoints/vertex_endpoints.py

diff --git a/litellm/llms/fine_tuning_apis/vertex_ai.py b/litellm/llms/fine_tuning_apis/vertex_ai.py
index 618894245d..189ace11ad 100644
--- a/litellm/llms/fine_tuning_apis/vertex_ai.py
+++ b/litellm/llms/fine_tuning_apis/vertex_ai.py
@@ -241,12 +241,13 @@ class VertexFineTuningAPI(VertexLLM):
         )
         return open_ai_response
 
-    async def pass_through_vertex_ai_fine_tuning_job(
+    async def pass_through_vertex_ai_POST_request(
         self,
         request_data: dict,
         vertex_project: str,
         vertex_location: str,
         vertex_credentials: str,
+        request_route: str,
     ):
         auth_header, _ = self._get_token_and_url(
             model="",
@@ -264,14 +265,16 @@ class VertexFineTuningAPI(VertexLLM):
             "Content-Type": "application/json",
         }
 
-        fine_tuning_url = f"https://{vertex_location}-aiplatform.googleapis.com/v1/projects/{vertex_project}/locations/{vertex_location}/tuningJobs"
+        url = None
+        if request_route == "tuningJobs":
+            url = f"https://{vertex_location}-aiplatform.googleapis.com/v1/projects/{vertex_project}/locations/{vertex_location}/tuningJobs"
 
         if self.async_handler is None:
             raise ValueError("VertexAI Fine Tuning - async_handler is not initialized")
 
         response = await self.async_handler.post(
             headers=headers,
-            url=fine_tuning_url,
+            url=url,
             json=request_data,  # type: ignore
         )
 
diff --git a/litellm/proxy/fine_tuning_endpoints/endpoints.py b/litellm/proxy/fine_tuning_endpoints/endpoints.py
index c2d89dd251..cda226b5aa 100644
--- a/litellm/proxy/fine_tuning_endpoints/endpoints.py
+++ b/litellm/proxy/fine_tuning_endpoints/endpoints.py
@@ -429,72 +429,3 @@ async def retrieve_fine_tuning_job(
                 param=getattr(e, "param", "None"),
                 code=getattr(e, "status_code", 500),
             )
-
-
-@router.post(
-    "/v1/projects/tuningJobs",
-    dependencies=[Depends(user_api_key_auth)],
-    tags=["fine-tuning"],
-    summary="✨ (Enterprise) Create Fine-Tuning Jobs",
-)
-async def vertex_create_fine_tuning_job(
-    request: Request,
-    fastapi_response: Response,
-    user_api_key_dict: UserAPIKeyAuth = Depends(user_api_key_auth),
-):
-    """
-    this is a pass through endpoint for the Vertex AI API. /tuningJobs endpoint
-
-    it uses the vertex ai credentials on the proxy and forwards to vertex ai api
-    """
-    try:
-        from litellm.fine_tuning.main import vertex_fine_tuning_apis_instance
-        from litellm.proxy.proxy_server import (
-            add_litellm_data_to_request,
-            general_settings,
-            get_custom_headers,
-            premium_user,
-            proxy_config,
-            proxy_logging_obj,
-            version,
-        )
-
-        # get configs for custom_llm_provider
-        llm_provider_config = get_fine_tuning_provider_config(
-            custom_llm_provider="vertex_ai"
-        )
-
-        vertex_project = llm_provider_config.get("vertex_project", None)
-        vertex_location = llm_provider_config.get("vertex_location", None)
-        vertex_credentials = llm_provider_config.get("vertex_credentials", None)
-        request_data_json = await request.json()
-        response = await vertex_fine_tuning_apis_instance.pass_through_vertex_ai_fine_tuning_job(
-            request_data=request_data_json,
-            vertex_project=vertex_project,
-            vertex_location=vertex_location,
-            vertex_credentials=vertex_credentials,
-        )
-
-        return response
-    except Exception as e:
-        verbose_proxy_logger.error(
-            "litellm.proxy.proxy_server.v1/projects/tuningJobs(): Exception occurred - {}".format(
-                str(e)
-            )
-        )
-        verbose_proxy_logger.debug(traceback.format_exc())
-        if isinstance(e, HTTPException):
-            raise ProxyException(
-                message=getattr(e, "message", str(e.detail)),
-                type=getattr(e, "type", "None"),
-                param=getattr(e, "param", "None"),
-                code=getattr(e, "status_code", status.HTTP_400_BAD_REQUEST),
-            )
-        else:
-            error_msg = f"{str(e)}"
-            raise ProxyException(
-                message=getattr(e, "message", error_msg),
-                type=getattr(e, "type", "None"),
-                param=getattr(e, "param", "None"),
-                code=getattr(e, "status_code", 500),
-            )
diff --git a/litellm/proxy/proxy_config.yaml b/litellm/proxy/proxy_config.yaml
index aa2bfc5252..0750a39376 100644
--- a/litellm/proxy/proxy_config.yaml
+++ b/litellm/proxy/proxy_config.yaml
@@ -48,6 +48,11 @@ files_settings:
   - custom_llm_provider: openai
     api_key: os.environ/OPENAI_API_KEY
 
+default_vertex_config:
+  vertex_project: "adroit-crow-413218"
+  vertex_location: "us-central1"
+  vertex_credentials: "/Users/ishaanjaffer/Downloads/adroit-crow-413218-a956eef1a2a8.json"
+
 
 
 general_settings: 
diff --git a/litellm/proxy/vertex_ai_endpoints/vertex_endpoints.py b/litellm/proxy/vertex_ai_endpoints/vertex_endpoints.py
new file mode 100644
index 0000000000..be09a4932d
--- /dev/null
+++ b/litellm/proxy/vertex_ai_endpoints/vertex_endpoints.py
@@ -0,0 +1,120 @@
+import asyncio
+import traceback
+from datetime import datetime, timedelta, timezone
+from typing import List, Optional
+
+import fastapi
+import httpx
+from fastapi import (
+    APIRouter,
+    Depends,
+    File,
+    Form,
+    Header,
+    HTTPException,
+    Request,
+    Response,
+    UploadFile,
+    status,
+)
+
+import litellm
+from litellm._logging import verbose_proxy_logger
+from litellm.batches.main import FileObject
+from litellm.fine_tuning.main import vertex_fine_tuning_apis_instance
+from litellm.proxy._types import *
+from litellm.proxy.auth.user_api_key_auth import user_api_key_auth
+
+router = APIRouter()
+default_vertex_config = None
+
+
+def set_default_vertex_config(config):
+    global default_vertex_config
+    if config is None:
+        return
+
+    if not isinstance(config, list):
+        raise ValueError("invalid files config, expected a list is not a list")
+
+    for element in config:
+        if isinstance(element, dict):
+            for key, value in element.items():
+                if isinstance(value, str) and value.startswith("os.environ/"):
+                    element[key] = litellm.get_secret(value)
+
+    default_vertex_config = config
+
+
+def exception_handler(e: Exception):
+    verbose_proxy_logger.error(
+        "litellm.proxy.proxy_server.v1/projects/tuningJobs(): Exception occurred - {}".format(
+            str(e)
+        )
+    )
+    verbose_proxy_logger.debug(traceback.format_exc())
+    if isinstance(e, HTTPException):
+        return ProxyException(
+            message=getattr(e, "message", str(e.detail)),
+            type=getattr(e, "type", "None"),
+            param=getattr(e, "param", "None"),
+            code=getattr(e, "status_code", status.HTTP_400_BAD_REQUEST),
+        )
+    else:
+        error_msg = f"{str(e)}"
+        return ProxyException(
+            message=getattr(e, "message", error_msg),
+            type=getattr(e, "type", "None"),
+            param=getattr(e, "param", "None"),
+            code=getattr(e, "status_code", 500),
+        )
+
+
+async def execute_post_vertex_ai_request(
+    request: Request,
+    route: str,
+):
+    from litellm.fine_tuning.main import vertex_fine_tuning_apis_instance
+
+    vertex_project = default_vertex_config.get("vertex_project", None)
+    vertex_location = default_vertex_config.get("vertex_location", None)
+    vertex_credentials = default_vertex_config.get("vertex_credentials", None)
+    request_data_json = await request.json()
+
+    response = (
+        await vertex_fine_tuning_apis_instance.pass_through_vertex_ai_POST_request(
+            request_data=request_data_json,
+            vertex_project=vertex_project,
+            vertex_location=vertex_location,
+            vertex_credentials=vertex_credentials,
+            request_route=route,
+        )
+    )
+
+    return response
+
+
+@router.post(
+    "/vertex-ai/tuningJobs",
+    dependencies=[Depends(user_api_key_auth)],
+    tags=["Vertex AI endpoints"],
+)
+async def vertex_create_fine_tuning_job(
+    request: Request,
+    fastapi_response: Response,
+    endpoint_name: str,
+    user_api_key_dict: UserAPIKeyAuth = Depends(user_api_key_auth),
+):
+    """
+    this is a pass through endpoint for the Vertex AI API. /tuningJobs endpoint
+
+    it uses the vertex ai credentials on the proxy and forwards to vertex ai api
+    """
+    try:
+        response = await execute_post_vertex_ai_request(
+            request=request,
+            route="/tuningJobs",
+        )
+        return response
+    except Exception as e:
+        raise exception_handler(e) from e

From 8051781af015bbbfd2c52a80067bd7078cef8358 Mon Sep 17 00:00:00 2001
From: Ishaan Jaff <ishaanjaffer0324@gmail.com>
Date: Sat, 3 Aug 2024 16:56:33 -0700
Subject: [PATCH 19/35] working code for vertex ai routes

---
 litellm/llms/fine_tuning_apis/vertex_ai.py         |  2 +-
 .../proxy/vertex_ai_endpoints/vertex_endpoints.py  | 14 ++++++--------
 2 files changed, 7 insertions(+), 9 deletions(-)

diff --git a/litellm/llms/fine_tuning_apis/vertex_ai.py b/litellm/llms/fine_tuning_apis/vertex_ai.py
index 189ace11ad..cb238b04b7 100644
--- a/litellm/llms/fine_tuning_apis/vertex_ai.py
+++ b/litellm/llms/fine_tuning_apis/vertex_ai.py
@@ -266,7 +266,7 @@ class VertexFineTuningAPI(VertexLLM):
         }
 
         url = None
-        if request_route == "tuningJobs":
+        if request_route == "/tuningJobs":
             url = f"https://{vertex_location}-aiplatform.googleapis.com/v1/projects/{vertex_project}/locations/{vertex_location}/tuningJobs"
 
         if self.async_handler is None:
diff --git a/litellm/proxy/vertex_ai_endpoints/vertex_endpoints.py b/litellm/proxy/vertex_ai_endpoints/vertex_endpoints.py
index be09a4932d..659459cec3 100644
--- a/litellm/proxy/vertex_ai_endpoints/vertex_endpoints.py
+++ b/litellm/proxy/vertex_ai_endpoints/vertex_endpoints.py
@@ -34,14 +34,13 @@ def set_default_vertex_config(config):
     if config is None:
         return
 
-    if not isinstance(config, list):
-        raise ValueError("invalid files config, expected a list is not a list")
+    if not isinstance(config, dict):
+        raise ValueError("invalid config, vertex default config must be a dictionary")
 
-    for element in config:
-        if isinstance(element, dict):
-            for key, value in element.items():
-                if isinstance(value, str) and value.startswith("os.environ/"):
-                    element[key] = litellm.get_secret(value)
+    if isinstance(config, dict):
+        for key, value in config.items():
+            if isinstance(value, str) and value.startswith("os.environ/"):
+                config[key] = litellm.get_secret(value)
 
     default_vertex_config = config
 
@@ -102,7 +101,6 @@ async def execute_post_vertex_ai_request(
 async def vertex_create_fine_tuning_job(
     request: Request,
     fastapi_response: Response,
-    endpoint_name: str,
     user_api_key_dict: UserAPIKeyAuth = Depends(user_api_key_auth),
 ):
     """

From c98733863afed9154be4d91ccff6434ec8297e56 Mon Sep 17 00:00:00 2001
From: Ishaan Jaff <ishaanjaffer0324@gmail.com>
Date: Sat, 3 Aug 2024 17:17:54 -0700
Subject: [PATCH 20/35] add vertex generateContent

---
 litellm/llms/fine_tuning_apis/vertex_ai.py    |  4 +
 .../vertex_ai_endpoints/vertex_endpoints.py   | 83 ++++++++++++++++++-
 2 files changed, 86 insertions(+), 1 deletion(-)

diff --git a/litellm/llms/fine_tuning_apis/vertex_ai.py b/litellm/llms/fine_tuning_apis/vertex_ai.py
index cb238b04b7..2d3e8d1c38 100644
--- a/litellm/llms/fine_tuning_apis/vertex_ai.py
+++ b/litellm/llms/fine_tuning_apis/vertex_ai.py
@@ -268,6 +268,10 @@ class VertexFineTuningAPI(VertexLLM):
         url = None
         if request_route == "/tuningJobs":
             url = f"https://{vertex_location}-aiplatform.googleapis.com/v1/projects/{vertex_project}/locations/{vertex_location}/tuningJobs"
+        elif "/tuningJobs/" in request_route and "cancel" in request_route:
+            url = f"https://{vertex_location}-aiplatform.googleapis.com/v1/projects/{vertex_project}/locations/{vertex_location}/tuningJobs{request_route}"
+        elif "generateContent" in request_route:
+            url = f"https://{vertex_location}-aiplatform.googleapis.com/v1/projects/{vertex_project}/locations/{vertex_location}{request_route}"
 
         if self.async_handler is None:
             raise ValueError("VertexAI Fine Tuning - async_handler is not initialized")
diff --git a/litellm/proxy/vertex_ai_endpoints/vertex_endpoints.py b/litellm/proxy/vertex_ai_endpoints/vertex_endpoints.py
index 659459cec3..7ef552508b 100644
--- a/litellm/proxy/vertex_ai_endpoints/vertex_endpoints.py
+++ b/litellm/proxy/vertex_ai_endpoints/vertex_endpoints.py
@@ -1,3 +1,4 @@
+import ast
 import asyncio
 import traceback
 from datetime import datetime, timedelta, timezone
@@ -78,7 +79,21 @@ async def execute_post_vertex_ai_request(
     vertex_project = default_vertex_config.get("vertex_project", None)
     vertex_location = default_vertex_config.get("vertex_location", None)
     vertex_credentials = default_vertex_config.get("vertex_credentials", None)
-    request_data_json = await request.json()
+
+    request_data_json = {}
+    body = await request.body()
+    body_str = body.decode()
+    if len(body_str) > 0:
+        try:
+            request_data_json = ast.literal_eval(body_str)
+        except:
+            request_data_json = json.loads(body_str)
+
+    verbose_proxy_logger.debug(
+        "Request received by LiteLLM:\n{}".format(
+            json.dumps(request_data_json, indent=4)
+        ),
+    )
 
     response = (
         await vertex_fine_tuning_apis_instance.pass_through_vertex_ai_POST_request(
@@ -93,6 +108,41 @@ async def execute_post_vertex_ai_request(
     return response
 
 
+@router.post(
+    "/vertex-ai/publishers/google/models/{model_id:path}:generateContent",
+    dependencies=[Depends(user_api_key_auth)],
+    tags=["Vertex AI endpoints"],
+)
+async def vertex_generate_content(
+    request: Request,
+    fastapi_response: Response,
+    model_id: str,
+    user_api_key_dict: UserAPIKeyAuth = Depends(user_api_key_auth),
+):
+    """
+    this is a pass through endpoint for the Vertex AI API. /generateContent endpoint
+
+    Example Curl:
+    ```
+    curl http://localhost:4000/vertex-ai/publishers/google/models/gemini-1.5-flash-001:generateContent \
+      -H "Content-Type: application/json" \
+      -H "Authorization: Bearer sk-1234" \
+      -d '{"contents":[{"role": "user", "parts":[{"text": "hi"}]}]}'
+    ```
+
+    Vertex API Reference: https://cloud.google.com/vertex-ai/generative-ai/docs/model-reference/inference#rest
+    it uses the vertex ai credentials on the proxy and forwards to vertex ai api
+    """
+    try:
+        response = await execute_post_vertex_ai_request(
+            request=request,
+            route=f"/publishers/google/models/{model_id}:generateContent",
+        )
+        return response
+    except Exception as e:
+        raise exception_handler(e) from e
+
+
 @router.post(
     "/vertex-ai/tuningJobs",
     dependencies=[Depends(user_api_key_auth)],
@@ -106,6 +156,8 @@ async def vertex_create_fine_tuning_job(
     """
     this is a pass through endpoint for the Vertex AI API. /tuningJobs endpoint
 
+    Vertex API Reference: https://cloud.google.com/vertex-ai/generative-ai/docs/model-reference/tuning
+
     it uses the vertex ai credentials on the proxy and forwards to vertex ai api
     """
     try:
@@ -116,3 +168,32 @@ async def vertex_create_fine_tuning_job(
         return response
     except Exception as e:
         raise exception_handler(e) from e
+
+
+@router.post(
+    "/vertex-ai/tuningJobs/{job_id:path}:cancel",
+    dependencies=[Depends(user_api_key_auth)],
+    tags=["Vertex AI endpoints"],
+)
+async def vertex_cancel_fine_tuning_job(
+    request: Request,
+    job_id: str,
+    fastapi_response: Response,
+    user_api_key_dict: UserAPIKeyAuth = Depends(user_api_key_auth),
+):
+    """
+    this is a pass through endpoint for the Vertex AI API. tuningJobs/{job_id:path}:cancel
+
+    Vertex API Reference: https://cloud.google.com/vertex-ai/generative-ai/docs/model-reference/tuning#cancel_a_tuning_job
+
+    it uses the vertex ai credentials on the proxy and forwards to vertex ai api
+    """
+    try:
+
+        response = await execute_post_vertex_ai_request(
+            request=request,
+            route=f"/tuningJobs/{job_id}:cancel",
+        )
+        return response
+    except Exception as e:
+        raise exception_handler(e) from e

From 2d5c57e5457c09ed902dc18914a65953da87983a Mon Sep 17 00:00:00 2001
From: Ishaan Jaff <ishaanjaffer0324@gmail.com>
Date: Sat, 3 Aug 2024 17:26:49 -0700
Subject: [PATCH 21/35] add vertex embeddings endpoints

---
 litellm/llms/fine_tuning_apis/vertex_ai.py    |  3 +-
 .../vertex_ai_endpoints/vertex_endpoints.py   | 39 +++++++++++++++++++
 2 files changed, 41 insertions(+), 1 deletion(-)

diff --git a/litellm/llms/fine_tuning_apis/vertex_ai.py b/litellm/llms/fine_tuning_apis/vertex_ai.py
index 2d3e8d1c38..f964e7bba2 100644
--- a/litellm/llms/fine_tuning_apis/vertex_ai.py
+++ b/litellm/llms/fine_tuning_apis/vertex_ai.py
@@ -272,7 +272,8 @@ class VertexFineTuningAPI(VertexLLM):
             url = f"https://{vertex_location}-aiplatform.googleapis.com/v1/projects/{vertex_project}/locations/{vertex_location}/tuningJobs{request_route}"
         elif "generateContent" in request_route:
             url = f"https://{vertex_location}-aiplatform.googleapis.com/v1/projects/{vertex_project}/locations/{vertex_location}{request_route}"
-
+        elif "predict" in request_route:
+            url = f"https://{vertex_location}-aiplatform.googleapis.com/v1/projects/{vertex_project}/locations/{vertex_location}{request_route}"
         if self.async_handler is None:
             raise ValueError("VertexAI Fine Tuning - async_handler is not initialized")
 
diff --git a/litellm/proxy/vertex_ai_endpoints/vertex_endpoints.py b/litellm/proxy/vertex_ai_endpoints/vertex_endpoints.py
index 7ef552508b..da63aa0250 100644
--- a/litellm/proxy/vertex_ai_endpoints/vertex_endpoints.py
+++ b/litellm/proxy/vertex_ai_endpoints/vertex_endpoints.py
@@ -143,6 +143,45 @@ async def vertex_generate_content(
         raise exception_handler(e) from e
 
 
+@router.post(
+    "/vertex-ai/publishers/google/models/{model_id:path}:predict",
+    dependencies=[Depends(user_api_key_auth)],
+    tags=["Vertex AI endpoints"],
+)
+async def vertex_predict_endpoint(
+    request: Request,
+    fastapi_response: Response,
+    model_id: str,
+    user_api_key_dict: UserAPIKeyAuth = Depends(user_api_key_auth),
+):
+    """
+    this is a pass through endpoint for the Vertex AI API. /predict endpoint
+    Use this for:
+    - Embeddings API - Text Embedding, Multi Modal Embedding
+    - Imagen API
+    - Code Completion API
+
+    Example Curl:
+    ```
+    curl http://localhost:4000/vertex-ai/publishers/google/models/textembedding-gecko@001:predict \
+      -H "Content-Type: application/json" \
+      -H "Authorization: Bearer sk-1234" \
+      -d '{"instances":[{"content": "gm"}]}'
+    ```
+
+    Vertex API Reference: https://cloud.google.com/vertex-ai/generative-ai/docs/model-reference/text-embeddings-api#generative-ai-get-text-embedding-drest
+    it uses the vertex ai credentials on the proxy and forwards to vertex ai api
+    """
+    try:
+        response = await execute_post_vertex_ai_request(
+            request=request,
+            route=f"/publishers/google/models/{model_id}:predict",
+        )
+        return response
+    except Exception as e:
+        raise exception_handler(e) from e
+
+
 @router.post(
     "/vertex-ai/tuningJobs",
     dependencies=[Depends(user_api_key_auth)],

From c8438715af437a336573808ee913660d0dbad130 Mon Sep 17 00:00:00 2001
From: Ishaan Jaff <ishaanjaffer0324@gmail.com>
Date: Sat, 3 Aug 2024 17:34:10 -0700
Subject: [PATCH 22/35] add vertex ai countTokens endpoint

---
 litellm/llms/fine_tuning_apis/vertex_ai.py    |  5 ++
 .../vertex_ai_endpoints/vertex_endpoints.py   | 63 +++++++++++++++++++
 2 files changed, 68 insertions(+)

diff --git a/litellm/llms/fine_tuning_apis/vertex_ai.py b/litellm/llms/fine_tuning_apis/vertex_ai.py
index f964e7bba2..c24deca941 100644
--- a/litellm/llms/fine_tuning_apis/vertex_ai.py
+++ b/litellm/llms/fine_tuning_apis/vertex_ai.py
@@ -274,6 +274,11 @@ class VertexFineTuningAPI(VertexLLM):
             url = f"https://{vertex_location}-aiplatform.googleapis.com/v1/projects/{vertex_project}/locations/{vertex_location}{request_route}"
         elif "predict" in request_route:
             url = f"https://{vertex_location}-aiplatform.googleapis.com/v1/projects/{vertex_project}/locations/{vertex_location}{request_route}"
+        elif "/batchPredictionJobs" in request_route:
+            url = f"https://{vertex_location}-aiplatform.googleapis.com/v1/projects/{vertex_project}/locations/{vertex_location}{request_route}"
+        elif "countTokens" in request_route:
+            url = f"https://{vertex_location}-aiplatform.googleapis.com/v1/projects/{vertex_project}/locations/{vertex_location}{request_route}"
+
         if self.async_handler is None:
             raise ValueError("VertexAI Fine Tuning - async_handler is not initialized")
 
diff --git a/litellm/proxy/vertex_ai_endpoints/vertex_endpoints.py b/litellm/proxy/vertex_ai_endpoints/vertex_endpoints.py
index da63aa0250..c7ce354b25 100644
--- a/litellm/proxy/vertex_ai_endpoints/vertex_endpoints.py
+++ b/litellm/proxy/vertex_ai_endpoints/vertex_endpoints.py
@@ -182,6 +182,69 @@ async def vertex_predict_endpoint(
         raise exception_handler(e) from e
 
 
+@router.post(
+    "/vertex-ai/publishers/google/models/{model_id:path}:countTokens",
+    dependencies=[Depends(user_api_key_auth)],
+    tags=["Vertex AI endpoints"],
+)
+async def vertex_countTokens_endpoint(
+    request: Request,
+    fastapi_response: Response,
+    model_id: str,
+    user_api_key_dict: UserAPIKeyAuth = Depends(user_api_key_auth),
+):
+    """
+    this is a pass through endpoint for the Vertex AI API. /countTokens endpoint
+    https://cloud.google.com/vertex-ai/generative-ai/docs/model-reference/count-tokens#curl
+
+
+    Example Curl:
+    ```
+    curl http://localhost:4000/vertex-ai/publishers/google/models/gemini-1.5-flash-001:countTokens \
+      -H "Content-Type: application/json" \
+      -H "Authorization: Bearer sk-1234" \
+      -d '{"contents":[{"role": "user", "parts":[{"text": "hi"}]}]}'
+    ```
+
+    it uses the vertex ai credentials on the proxy and forwards to vertex ai api
+    """
+    try:
+        response = await execute_post_vertex_ai_request(
+            request=request,
+            route=f"/publishers/google/models/{model_id}:countTokens",
+        )
+        return response
+    except Exception as e:
+        raise exception_handler(e) from e
+
+
+@router.post(
+    "/vertex-ai/batchPredictionJobs",
+    dependencies=[Depends(user_api_key_auth)],
+    tags=["Vertex AI endpoints"],
+)
+async def vertex_create_batch_prediction_job(
+    request: Request,
+    fastapi_response: Response,
+    user_api_key_dict: UserAPIKeyAuth = Depends(user_api_key_auth),
+):
+    """
+    this is a pass through endpoint for the Vertex AI API. /batchPredictionJobs endpoint
+
+    Vertex API Reference: https://cloud.google.com/vertex-ai/generative-ai/docs/model-reference/batch-prediction-api#syntax
+
+    it uses the vertex ai credentials on the proxy and forwards to vertex ai api
+    """
+    try:
+        response = await execute_post_vertex_ai_request(
+            request=request,
+            route="/batchPredictionJobs",
+        )
+        return response
+    except Exception as e:
+        raise exception_handler(e) from e
+
+
 @router.post(
     "/vertex-ai/tuningJobs",
     dependencies=[Depends(user_api_key_auth)],

From 94e5d0f73437086e4b5f8864f9f80873bcd58a20 Mon Sep 17 00:00:00 2001
From: Ishaan Jaff <ishaanjaffer0324@gmail.com>
Date: Sat, 3 Aug 2024 17:41:45 -0700
Subject: [PATCH 23/35] docs add vertex ai endpoints

---
 docs/my-website/docs/vertex_ai.md | 40 +++++++++++++++++++++++++++++++
 docs/my-website/sidebars.js       |  5 ++--
 2 files changed, 43 insertions(+), 2 deletions(-)
 create mode 100644 docs/my-website/docs/vertex_ai.md

diff --git a/docs/my-website/docs/vertex_ai.md b/docs/my-website/docs/vertex_ai.md
new file mode 100644
index 0000000000..c18044c204
--- /dev/null
+++ b/docs/my-website/docs/vertex_ai.md
@@ -0,0 +1,40 @@
+# [BETA] Vertex AI Endpoints
+
+## Supported APIs
+
+- Gemini API
+- Embeddings API
+- Imagen API
+- Code Completion API
+- Batch prediction API
+- Tuning API
+- CountTokens API
+
+## Quick Start Usage 
+
+#### 1. Set `default_vertex_config` on your `config.yaml`
+
+
+Add the following credentials to your litellm config.yaml to use the Vertex AI endpoints.
+
+```yaml
+default_vertex_config:
+  vertex_project: "adroit-crow-413218"
+  vertex_location: "us-central1"
+  vertex_credentials: "/Users/ishaanjaffer/Downloads/adroit-crow-413218-a956eef1a2a8.json" # Add path to service account.json
+```
+
+#### 2. Start litellm proxy
+
+```shell
+litellm --config /path/to/config.yaml
+```
+
+#### 3. Test it 
+
+```shell
+curl http://localhost:4000/vertex-ai/publishers/google/models/textembedding-gecko@001:countTokens \
+-H "Content-Type: application/json" \
+-H "Authorization: Bearer sk-1234" \
+-d '{"instances":[{"content": "gm"}]}'
+```
\ No newline at end of file
diff --git a/docs/my-website/sidebars.js b/docs/my-website/sidebars.js
index 69fd32cb33..a57a9aecd7 100644
--- a/docs/my-website/sidebars.js
+++ b/docs/my-website/sidebars.js
@@ -110,7 +110,7 @@ const sidebars = {
     },
     {
       type: "category",
-      label: "Embedding(), Image Generation(), Assistants(), Moderation(), Audio Transcriptions(), TTS(), Batches(), Fine-Tuning()",
+      label: "Supported Endpoints - /images, /audio/speech, /assistants etc",
       items: [
         "embedding/supported_embedding",
         "embedding/async_embedding",
@@ -121,7 +121,8 @@ const sidebars = {
         "assistants",
         "batches",
         "fine_tuning",
-        "anthropic_completion"
+        "anthropic_completion",
+        "vertex_ai"
       ],
     },
     {

From 50c9fa38f902b54e6cd8f98823636fe7203a3cd7 Mon Sep 17 00:00:00 2001
From: Ishaan Jaff <ishaanjaffer0324@gmail.com>
Date: Sat, 3 Aug 2024 17:44:28 -0700
Subject: [PATCH 24/35] docs link to vertex ai endpoints

---
 docs/my-website/docs/proxy/user_keys.md | 3 +++
 docs/my-website/docs/vertex_ai.md       | 2 +-
 2 files changed, 4 insertions(+), 1 deletion(-)

diff --git a/docs/my-website/docs/proxy/user_keys.md b/docs/my-website/docs/proxy/user_keys.md
index 75e547d17e..79d019a20c 100644
--- a/docs/my-website/docs/proxy/user_keys.md
+++ b/docs/my-website/docs/proxy/user_keys.md
@@ -23,6 +23,9 @@ LiteLLM Proxy is **Azure OpenAI-compatible**:
 LiteLLM Proxy is **Anthropic-compatible**: 
 * /messages 
 
+LiteLLM Proxy is **Vertex AI compatible**:
+- [Supports ALL Vertex Endpoints](../vertex_ai)
+
 This doc covers:
 
 *   /chat/completion
diff --git a/docs/my-website/docs/vertex_ai.md b/docs/my-website/docs/vertex_ai.md
index c18044c204..2c9a6279a1 100644
--- a/docs/my-website/docs/vertex_ai.md
+++ b/docs/my-website/docs/vertex_ai.md
@@ -1,6 +1,6 @@
 # [BETA] Vertex AI Endpoints
 
-## Supported APIs
+## Supported API Endpoints
 
 - Gemini API
 - Embeddings API

From 7f95a865fefe925b5d98ab7c46d8e433cb02dec1 Mon Sep 17 00:00:00 2001
From: Ishaan Jaff <ishaanjaffer0324@gmail.com>
Date: Sat, 3 Aug 2024 17:56:38 -0700
Subject: [PATCH 25/35] docs add example curl command

---
 docs/my-website/docs/vertex_ai.md | 37 +++++++++++++++++++++++++++++++
 1 file changed, 37 insertions(+)

diff --git a/docs/my-website/docs/vertex_ai.md b/docs/my-website/docs/vertex_ai.md
index 2c9a6279a1..7ae06f3af7 100644
--- a/docs/my-website/docs/vertex_ai.md
+++ b/docs/my-website/docs/vertex_ai.md
@@ -37,4 +37,41 @@ curl http://localhost:4000/vertex-ai/publishers/google/models/textembedding-geck
 -H "Content-Type: application/json" \
 -H "Authorization: Bearer sk-1234" \
 -d '{"instances":[{"content": "gm"}]}'
+```
+
+
+### Gemini API (Generate Content)
+
+```shell
+curl http://localhost:4000/vertex-ai/publishers/google/models/gemini-1.5-flash-001:generateContent \
+  -H "Content-Type: application/json" \
+  -H "Authorization: Bearer sk-1234" \
+  -d '{"contents":[{"role": "user", "parts":[{"text": "hi"}]}]}'
+```
+
+### Embeddings API
+
+```shell
+curl http://localhost:4000/vertex-ai/publishers/google/models/textembedding-gecko@001:predict \
+  -H "Content-Type: application/json" \
+  -H "Authorization: Bearer sk-1234" \
+  -d '{"instances":[{"content": "gm"}]}'
+```
+
+### Imagen API
+
+```shell
+curl http://localhost:4000/vertex-ai/publishers/google/models/imagen-3.0-generate-001:predict \
+  -H "Content-Type: application/json" \
+  -H "Authorization: Bearer sk-1234" \
+  -d '{"instances":[{"prompt": "make an otter"}], "parameters": {"sampleCount": 1}}'
+```
+
+### Count Tokens API
+
+```shell
+curl http://localhost:4000/vertex-ai/publishers/google/models/gemini-1.5-flash-001:countTokens \
+  -H "Content-Type: application/json" \
+  -H "Authorization: Bearer sk-1234" \
+  -d '{"contents":[{"role": "user", "parts":[{"text": "hi"}]}]}'
 ```
\ No newline at end of file

From 9d2eab555ba679b0bd41594556d3f8184cc80bc3 Mon Sep 17 00:00:00 2001
From: Ishaan Jaff <ishaanjaffer0324@gmail.com>
Date: Sat, 3 Aug 2024 17:58:20 -0700
Subject: [PATCH 26/35] docs tuning api

---
 docs/my-website/docs/vertex_ai.md | 18 +++++++++++++++++-
 1 file changed, 17 insertions(+), 1 deletion(-)

diff --git a/docs/my-website/docs/vertex_ai.md b/docs/my-website/docs/vertex_ai.md
index 7ae06f3af7..d9c8616a0b 100644
--- a/docs/my-website/docs/vertex_ai.md
+++ b/docs/my-website/docs/vertex_ai.md
@@ -38,7 +38,7 @@ curl http://localhost:4000/vertex-ai/publishers/google/models/textembedding-geck
 -H "Authorization: Bearer sk-1234" \
 -d '{"instances":[{"content": "gm"}]}'
 ```
-
+## Usage Examples
 
 ### Gemini API (Generate Content)
 
@@ -74,4 +74,20 @@ curl http://localhost:4000/vertex-ai/publishers/google/models/gemini-1.5-flash-0
   -H "Content-Type: application/json" \
   -H "Authorization: Bearer sk-1234" \
   -d '{"contents":[{"role": "user", "parts":[{"text": "hi"}]}]}'
+```
+
+### Tuning API 
+
+Create Fine Tuning Job
+
+```shell
+curl http://localhost:4000/vertex-ai/tuningJobs \
+      -H "Content-Type: application/json" \
+      -H "Authorization: Bearer sk-1234" \
+      -d '{
+  "baseModel": "gemini-1.0-pro-002",
+  "supervisedTuningSpec" : {
+      "training_dataset_uri": "gs://cloud-samples-data/ai-platform/generative_ai/sft_train_data.jsonl"
+  }
+}'
 ```
\ No newline at end of file

From 3a94aac34389762b7fb81b17ec2a1b9c9041a726 Mon Sep 17 00:00:00 2001
From: Ishaan Jaff <ishaanjaffer0324@gmail.com>
Date: Sat, 3 Aug 2024 17:59:46 -0700
Subject: [PATCH 27/35] docs - fix merge conflicts

---
 docs/my-website/sidebars.js | 49 +++----------------------------------
 1 file changed, 3 insertions(+), 46 deletions(-)

diff --git a/docs/my-website/sidebars.js b/docs/my-website/sidebars.js
index 65483e3925..6f6bcfeeab 100644
--- a/docs/my-website/sidebars.js
+++ b/docs/my-website/sidebars.js
@@ -90,50 +90,6 @@ const sidebars = {
         "proxy/cli", 
       ]
     },
-    {
-      type: "category",
-      label: "Completion()",
-      link: {
-        type: "generated-index",
-        title: "Completion()",
-        description: "Details on the completion() function",
-        slug: "/completion",
-      },
-      items: [
-        "completion/input",
-        "completion/provider_specific_params",
-        "completion/json_mode",
-        "completion/drop_params",
-        "completion/prompt_formatting",
-        "completion/output",
-        "exception_mapping",
-        "completion/stream",
-        "completion/message_trimming",
-        "completion/function_call",
-        "completion/vision",
-        "completion/model_alias",
-        "completion/batching",
-        "completion/mock_requests",
-        "completion/reliable_completions",
-      ],
-    },
-    {
-      type: "category",
-      label: "Supported Endpoints - /images, /audio/speech, /assistants etc",
-      items: [
-        "embedding/supported_embedding",
-        "embedding/async_embedding",
-        "embedding/moderation",
-        "image_generation",
-        "audio_transcription",
-        "text_to_speech",
-        "assistants",
-        "batches",
-        "fine_tuning",
-        "anthropic_completion",
-        "vertex_ai"
-      ],
-    },
     {
       type: "category",
       label: "💯 Supported Models & Providers",
@@ -222,7 +178,7 @@ const sidebars = {
     },
     {
       type: "category",
-      label: "Embedding(), Image Generation(), Assistants(), Moderation(), Audio Transcriptions(), TTS(), Batches(), Fine-Tuning()",
+      label: "Supported Endpoints - /images, /audio/speech, /assistants etc",
       items: [
         "embedding/supported_embedding",
         "embedding/async_embedding",
@@ -233,7 +189,8 @@ const sidebars = {
         "assistants",
         "batches",
         "fine_tuning",
-        "anthropic_completion"
+        "anthropic_completion",
+        "vertex_ai"
       ],
     },
     {

From e73eb19678979e2b9e11ae62cbb9247cffb78c91 Mon Sep 17 00:00:00 2001
From: Ishaan Jaff <ishaanjaffer0324@gmail.com>
Date: Sat, 3 Aug 2024 18:09:48 -0700
Subject: [PATCH 28/35] docs default vertex

---
 litellm/llms/fine_tuning_apis/vertex_ai.py            | 3 ++-
 litellm/proxy/vertex_ai_endpoints/vertex_endpoints.py | 4 ++++
 2 files changed, 6 insertions(+), 1 deletion(-)

diff --git a/litellm/llms/fine_tuning_apis/vertex_ai.py b/litellm/llms/fine_tuning_apis/vertex_ai.py
index c24deca941..5f96f04831 100644
--- a/litellm/llms/fine_tuning_apis/vertex_ai.py
+++ b/litellm/llms/fine_tuning_apis/vertex_ai.py
@@ -278,7 +278,8 @@ class VertexFineTuningAPI(VertexLLM):
             url = f"https://{vertex_location}-aiplatform.googleapis.com/v1/projects/{vertex_project}/locations/{vertex_location}{request_route}"
         elif "countTokens" in request_route:
             url = f"https://{vertex_location}-aiplatform.googleapis.com/v1/projects/{vertex_project}/locations/{vertex_location}{request_route}"
-
+        else:
+            raise ValueError(f"Unsupported Vertex AI request route: {request_route}")
         if self.async_handler is None:
             raise ValueError("VertexAI Fine Tuning - async_handler is not initialized")
 
diff --git a/litellm/proxy/vertex_ai_endpoints/vertex_endpoints.py b/litellm/proxy/vertex_ai_endpoints/vertex_endpoints.py
index c7ce354b25..b8c04583c3 100644
--- a/litellm/proxy/vertex_ai_endpoints/vertex_endpoints.py
+++ b/litellm/proxy/vertex_ai_endpoints/vertex_endpoints.py
@@ -76,6 +76,10 @@ async def execute_post_vertex_ai_request(
 ):
     from litellm.fine_tuning.main import vertex_fine_tuning_apis_instance
 
+    if default_vertex_config is None:
+        raise ValueError(
+            "Vertex credentials not added on litellm proxy, please add `default_vertex_config` on your config.yaml"
+        )
     vertex_project = default_vertex_config.get("vertex_project", None)
     vertex_location = default_vertex_config.get("vertex_location", None)
     vertex_credentials = default_vertex_config.get("vertex_credentials", None)

From b6cf433ed9ceaf5bd5c9e74161906eae73e2646e Mon Sep 17 00:00:00 2001
From: Ishaan Jaff <ishaanjaffer0324@gmail.com>
Date: Sat, 3 Aug 2024 18:12:38 -0700
Subject: [PATCH 29/35] =?UTF-8?q?bump:=20version=201.42.11=20=E2=86=92=201?=
 =?UTF-8?q?.42.12?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 pyproject.toml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/pyproject.toml b/pyproject.toml
index 6293b77fb0..a803c8e0fb 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "litellm"
-version = "1.42.11"
+version = "1.42.12"
 description = "Library to easily interface with LLM API providers"
 authors = ["BerriAI"]
 license = "MIT"
@@ -91,7 +91,7 @@ requires = ["poetry-core", "wheel"]
 build-backend = "poetry.core.masonry.api"
 
 [tool.commitizen]
-version = "1.42.11"
+version = "1.42.12"
 version_files = [
     "pyproject.toml:^version"
 ]

From 1c1631222c8428fee37cd992aa7fbee8a161a059 Mon Sep 17 00:00:00 2001
From: Ishaan Jaff <ishaanjaffer0324@gmail.com>
Date: Sat, 3 Aug 2024 18:32:52 -0700
Subject: [PATCH 30/35] fix test test_aimage_generation_vertex_ai

---
 litellm/tests/test_image_generation.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/litellm/tests/test_image_generation.py b/litellm/tests/test_image_generation.py
index e59cf28651..6648b202b9 100644
--- a/litellm/tests/test_image_generation.py
+++ b/litellm/tests/test_image_generation.py
@@ -7,6 +7,7 @@ import sys
 import traceback
 
 from dotenv import load_dotenv
+from openai.types.image import Image
 
 logging.basicConfig(level=logging.DEBUG)
 load_dotenv()
@@ -218,7 +219,7 @@ async def test_aimage_generation_vertex_ai(sync_mode):
         assert len(response.data) > 0
 
         for d in response.data:
-            assert isinstance(d, litellm.ImageObject)
+            assert isinstance(d, Image)
             print("data in response.data", d)
             assert d.b64_json is not None
     except litellm.ServiceUnavailableError as e:

From 009697425d15ad27b513ea3b365a11145170fbe9 Mon Sep 17 00:00:00 2001
From: Ishaan Jaff <ishaanjaffer0324@gmail.com>
Date: Sat, 3 Aug 2024 18:34:26 -0700
Subject: [PATCH 31/35] fix fine tuning tests

---
 litellm/tests/test_fine_tuning_api.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/litellm/tests/test_fine_tuning_api.py b/litellm/tests/test_fine_tuning_api.py
index 20a58c4d00..f02af1b927 100644
--- a/litellm/tests/test_fine_tuning_api.py
+++ b/litellm/tests/test_fine_tuning_api.py
@@ -80,6 +80,8 @@ def test_create_fine_tune_job():
     except openai.RateLimitError:
         pass
     except Exception as e:
+        if "Job has already completed" in str(e):
+            pass
         pytest.fail(f"Error occurred: {e}")
 
 

From 7fae2aa394c403182b2537eb6e25b599a24b1aee Mon Sep 17 00:00:00 2001
From: Ishaan Jaff <ishaanjaffer0324@gmail.com>
Date: Sat, 3 Aug 2024 18:48:10 -0700
Subject: [PATCH 32/35] ci/cd run again

---
 litellm/tests/test_completion.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/litellm/tests/test_completion.py b/litellm/tests/test_completion.py
index c26035ad0a..eec163f26a 100644
--- a/litellm/tests/test_completion.py
+++ b/litellm/tests/test_completion.py
@@ -23,7 +23,7 @@ from litellm import RateLimitError, Timeout, completion, completion_cost, embedd
 from litellm.llms.custom_httpx.http_handler import AsyncHTTPHandler, HTTPHandler
 from litellm.llms.prompt_templates.factory import anthropic_messages_pt
 
-# litellm.num_retries = 3
+# litellm.num_retries=3
 litellm.cache = None
 litellm.success_callback = []
 user_message = "Write a short poem about the sky"

From 93b0d239979cc1e653058773c3970dd06ac5f2c6 Mon Sep 17 00:00:00 2001
From: Ishaan Jaff <ishaanjaffer0324@gmail.com>
Date: Sat, 3 Aug 2024 19:02:11 -0700
Subject: [PATCH 33/35] fix fine tune api tests

---
 litellm/tests/test_fine_tuning_api.py | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/litellm/tests/test_fine_tuning_api.py b/litellm/tests/test_fine_tuning_api.py
index f02af1b927..412ffb497c 100644
--- a/litellm/tests/test_fine_tuning_api.py
+++ b/litellm/tests/test_fine_tuning_api.py
@@ -81,8 +81,9 @@ def test_create_fine_tune_job():
         pass
     except Exception as e:
         if "Job has already completed" in str(e):
-            pass
-        pytest.fail(f"Error occurred: {e}")
+            return
+        else:
+            pytest.fail(f"Error occurred: {e}")
 
 
 @pytest.mark.asyncio
@@ -137,7 +138,7 @@ async def test_create_fine_tune_jobs_async():
         pass
     except Exception as e:
         if "Job has already completed" in str(e):
-            pass
+            return
         else:
             pytest.fail(f"Error occurred: {e}")
     pass

From 22bc70633e5c42f7789d8a42418659b2b39a77eb Mon Sep 17 00:00:00 2001
From: Ishaan Jaff <ishaanjaffer0324@gmail.com>
Date: Sat, 3 Aug 2024 19:10:16 -0700
Subject: [PATCH 34/35] docs add when to use litellm

---
 docs/my-website/docs/index.md | 32 +++++++++++++++++++++++++++++---
 1 file changed, 29 insertions(+), 3 deletions(-)

diff --git a/docs/my-website/docs/index.md b/docs/my-website/docs/index.md
index a560ecf76d..dffc9501a8 100644
--- a/docs/my-website/docs/index.md
+++ b/docs/my-website/docs/index.md
@@ -10,14 +10,40 @@ https://github.com/BerriAI/litellm
 - Translate inputs to provider's `completion`, `embedding`, and `image_generation` endpoints
 - [Consistent output](https://docs.litellm.ai/docs/completion/output), text responses will always be available at `['choices'][0]['message']['content']`
 - Retry/fallback logic across multiple deployments (e.g. Azure/OpenAI) - [Router](https://docs.litellm.ai/docs/routing)
-- Track spend & set budgets per project [LiteLLM Proxy Server](https://docs.litellm.ai/docs/simple_proxy)
+- Track spend & set budgets per project [OpenAI Proxy Server](https://docs.litellm.ai/docs/simple_proxy)
 
 ## How to use LiteLLM
 You can use litellm through either:
-1. [LiteLLM Proxy Server](#openai-proxy) - Server to call 100+ LLMs, load balance, cost tracking across projects
+1. [OpenAI proxy Server](#openai-proxy) - Server to call 100+ LLMs, load balance, cost tracking across projects
 2. [LiteLLM python SDK](#basic-usage) - Python Client to call 100+ LLMs, load balance, cost tracking
 
-## LiteLLM Python SDK
+### When to use LiteLLM Proxy Server
+
+:::tip
+
+Use LiteLLM Proxy Server if you want a **central service to access multiple LLMs**
+
+Typically used by Gen AI Enablement /  ML PLatform Teams
+
+:::
+
+  - LiteLLM Proxy gives you a unified interface to access multiple LLMs (100+ LLMs)
+  - Track LLM Usage and setup guardrails
+  - Customize Logging, Guardrails, Caching per project
+
+### When to use LiteLLM Python SDK
+
+:::tip
+
+  Use LiteLLM Python SDK if you want to use LiteLLM in your **python code**
+
+Typically used by developers building llm projects
+
+:::
+
+  - LiteLLM SDK gives you a unified interface to access multiple LLMs (100+ LLMs) 
+  - Retry/fallback logic across multiple deployments (e.g. Azure/OpenAI) - [Router](https://docs.litellm.ai/docs/routing)
+
 
 ### Basic usage 
 

From d4796d6369e09e7eefe7ceaacfb882d21c5818f8 Mon Sep 17 00:00:00 2001
From: Krrish Dholakia <krrishdholakia@gmail.com>
Date: Sat, 3 Aug 2024 20:57:40 -0700
Subject: [PATCH 35/35] docs(sidebar.js): cleanup

---
 docs/my-website/sidebars.js | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/my-website/sidebars.js b/docs/my-website/sidebars.js
index 6f6bcfeeab..0305a7d81b 100644
--- a/docs/my-website/sidebars.js
+++ b/docs/my-website/sidebars.js
@@ -24,7 +24,7 @@ const sidebars = {
       link: {
         type: "generated-index",
         title: "💥 LiteLLM Proxy Server",
-        description: `Proxy Server to call 100+ LLMs in a unified interface & track spend, set budgets per virtual key/user`,
+        description: `OpenAI Proxy Server to call 100+ LLMs in a unified interface & track spend, set budgets per virtual key/user`,
         slug: "/simple_proxy",
       },
       items: [