From b873b16f36bdced3b8b599b8fc83fb1d203fc871 Mon Sep 17 00:00:00 2001
From: Krrish Dholakia <krrishdholakia@gmail.com>
Date: Wed, 13 Nov 2024 19:53:19 +0530
Subject: [PATCH] feat(pass_through_endpoints/): add anthropic/ pass-through
 endpoint

adds new `anthropic/` pass-through endpoint + refactors docs
---
 docs/my-website/docs/anthropic_completion.md  |  54 ----
 .../docs/pass_through/anthropic_completion.md | 282 ++++++++++++++++++
 docs/my-website/sidebars.js                   |   4 +-
 litellm/proxy/_new_secret_config.yaml         |   2 +-
 litellm/proxy/proxy_server.py                 |   9 +-
 .../google_ai_studio_endpoints.py             |  45 +++
 6 files changed, 338 insertions(+), 58 deletions(-)
 delete mode 100644 docs/my-website/docs/anthropic_completion.md
 create mode 100644 docs/my-website/docs/pass_through/anthropic_completion.md

diff --git a/docs/my-website/docs/anthropic_completion.md b/docs/my-website/docs/anthropic_completion.md
deleted file mode 100644
index ca65f3f6f..000000000
--- a/docs/my-website/docs/anthropic_completion.md
+++ /dev/null
@@ -1,54 +0,0 @@
-# [BETA] Anthropic `/v1/messages`
-
-Call 100+ LLMs in the Anthropic format. 
-
-
-1. Setup config.yaml 
-
-```yaml
-model_list:
-  - model_name: my-test-model
-    litellm_params:
-      model: gpt-3.5-turbo
-```
-
-2. Start proxy 
-
-```bash
-litellm --config /path/to/config.yaml
-```
-
-3. Test it! 
-
-```bash
-curl -X POST 'http://0.0.0.0:4000/v1/messages' \
--H 'x-api-key: sk-1234' \
--H 'content-type: application/json' \
--D '{
-    "model": "my-test-model",
-    "max_tokens": 1024,
-    "messages": [
-        {"role": "user", "content": "Hello, world"}
-    ]
-}'
-```
-
-## Test with Anthropic SDK 
-
-```python
-import os
-from anthropic import Anthropic
-
-client = Anthropic(api_key="sk-1234", base_url="http://0.0.0.0:4000") # 👈 CONNECT TO PROXY
-
-message = client.messages.create(
-    messages=[
-        {
-            "role": "user",
-            "content": "Hello, Claude",
-        }
-    ],
-    model="my-test-model", # 👈 set 'model_name'
-)
-print(message.content)
-```
\ No newline at end of file
diff --git a/docs/my-website/docs/pass_through/anthropic_completion.md b/docs/my-website/docs/pass_through/anthropic_completion.md
new file mode 100644
index 000000000..0c6a5f1b6
--- /dev/null
+++ b/docs/my-website/docs/pass_through/anthropic_completion.md
@@ -0,0 +1,282 @@
+# Anthropic `/v1/messages`
+
+Pass-through endpoints for Anthropic - call provider-specific endpoint, in native format (no translation).
+
+Just replace `https://api.anthropic.com` with `LITELLM_PROXY_BASE_URL/anthropic` 🚀
+
+#### **Example Usage**
+```bash
+curl --request POST \
+  --url http://0.0.0.0:4000/anthropic/v1/messages \
+  --header 'accept: application/json' \
+  --header 'content-type: application/json' \
+  --header "Authorization: bearer sk-anything" \
+  --data '{
+        "model": "claude-3-5-sonnet-20241022",
+        "max_tokens": 1024,
+        "messages": [
+            {"role": "user", "content": "Hello, world"}
+        ]
+    }'
+```
+
+Supports **ALL** Anthropic Endpoints (including streaming).
+
+[**See All Anthropic Endpoints**](https://docs.anthropic.com/en/api/messages)
+
+## Quick Start
+
+Let's call the Anthropic [`/messages` endpoint](https://docs.anthropic.com/en/api/messages)
+
+1. Add Anthropic API Key to your environment 
+
+```bash
+export ANTHROPIC_API_KEY=""
+```
+
+2. Start LiteLLM Proxy 
+
+```bash
+litellm
+
+# RUNNING on http://0.0.0.0:4000
+```
+
+3. Test it! 
+
+Let's call the Anthropic /messages endpoint
+
+```bash
+curl http://0.0.0.0:4000/anthropic/v1/messages \
+     --header "x-api-key: $LITELLM_API_KEY" \
+     --header "anthropic-version: 2023-06-01" \
+     --header "content-type: application/json" \
+     --data \
+    '{
+        "model": "claude-3-5-sonnet-20241022",
+        "max_tokens": 1024,
+        "messages": [
+            {"role": "user", "content": "Hello, world"}
+        ]
+    }'
+```
+
+
+## Examples
+
+Anything after `http://0.0.0.0:4000/anthropic` is treated as a provider-specific route, and handled accordingly.
+
+Key Changes: 
+
+| **Original Endpoint**                                | **Replace With**                  |
+|------------------------------------------------------|-----------------------------------|
+| `https://api.anthropic.com`          | `http://0.0.0.0:4000/anthropic` (LITELLM_PROXY_BASE_URL="http://0.0.0.0:4000")      |
+| `bearer $ANTHROPIC_API_KEY`                                 | `bearer anything` (use `bearer LITELLM_VIRTUAL_KEY` if Virtual Keys are setup on proxy)                    |
+    
+
+### **Example 1: Messages endpoint**
+
+#### LiteLLM Proxy Call 
+
+```bash
+curl --request POST \
+  --url http://0.0.0.0:4000/anthropic/v1/messages \
+  --header "x-api-key: $LITELLM_API_KEY" \
+    --header "anthropic-version: 2023-06-01" \
+    --header "content-type: application/json" \
+  --data '{
+    "model": "claude-3-5-sonnet-20241022",
+    "max_tokens": 1024,
+    "messages": [
+        {"role": "user", "content": "Hello, world"}
+    ]
+  }'
+```
+
+#### Direct Anthropic API Call 
+
+```bash
+curl https://api.anthropic.com/v1/messages \
+     --header "x-api-key: $ANTHROPIC_API_KEY" \
+     --header "anthropic-version: 2023-06-01" \
+     --header "content-type: application/json" \
+     --data \
+    '{
+        "model": "claude-3-5-sonnet-20241022",
+        "max_tokens": 1024,
+        "messages": [
+            {"role": "user", "content": "Hello, world"}
+        ]
+    }'
+```
+
+### **Example 2: Token Counting API**
+
+#### LiteLLM Proxy Call 
+
+```bash
+curl --request POST \
+    --url http://0.0.0.0:4000/anthropic/v1/messages/count_tokens \
+    --header "x-api-key: $LITELLM_API_KEY" \
+    --header "anthropic-version: 2023-06-01" \
+    --header "anthropic-beta: token-counting-2024-11-01" \
+    --header "content-type: application/json" \
+    --data \
+    '{
+        "model": "claude-3-5-sonnet-20241022",
+        "messages": [
+            {"role": "user", "content": "Hello, world"}
+        ]
+    }'
+```
+
+#### Direct Anthropic API Call 
+
+```bash
+curl https://api.anthropic.com/v1/messages/count_tokens \
+     --header "x-api-key: $ANTHROPIC_API_KEY" \
+     --header "anthropic-version: 2023-06-01" \
+     --header "anthropic-beta: token-counting-2024-11-01" \
+     --header "content-type: application/json" \
+     --data \
+'{
+    "model": "claude-3-5-sonnet-20241022",
+    "messages": [
+        {"role": "user", "content": "Hello, world"}
+    ]
+}'
+```
+
+### **Example 3: Batch Messages**
+
+
+#### LiteLLM Proxy Call 
+
+```bash
+curl --request POST \
+    --url http://0.0.0.0:4000/anthropic/v1/messages/batches \
+    --header "x-api-key: $LITELLM_API_KEY" \
+    --header "anthropic-version: 2023-06-01" \
+    --header "anthropic-beta: message-batches-2024-09-24" \
+    --header "content-type: application/json" \
+    --data \
+'{
+    "requests": [
+        {
+            "custom_id": "my-first-request",
+            "params": {
+                "model": "claude-3-5-sonnet-20241022",
+                "max_tokens": 1024,
+                "messages": [
+                    {"role": "user", "content": "Hello, world"}
+                ]
+            }
+        },
+        {
+            "custom_id": "my-second-request",
+            "params": {
+                "model": "claude-3-5-sonnet-20241022",
+                "max_tokens": 1024,
+                "messages": [
+                    {"role": "user", "content": "Hi again, friend"}
+                ]
+            }
+        }
+    ]
+}'
+```
+
+#### Direct Anthropic API Call 
+
+```bash
+curl https://api.anthropic.com/v1/messages/batches \
+     --header "x-api-key: $ANTHROPIC_API_KEY" \
+     --header "anthropic-version: 2023-06-01" \
+     --header "anthropic-beta: message-batches-2024-09-24" \
+     --header "content-type: application/json" \
+     --data \
+'{
+    "requests": [
+        {
+            "custom_id": "my-first-request",
+            "params": {
+                "model": "claude-3-5-sonnet-20241022",
+                "max_tokens": 1024,
+                "messages": [
+                    {"role": "user", "content": "Hello, world"}
+                ]
+            }
+        },
+        {
+            "custom_id": "my-second-request",
+            "params": {
+                "model": "claude-3-5-sonnet-20241022",
+                "max_tokens": 1024,
+                "messages": [
+                    {"role": "user", "content": "Hi again, friend"}
+                ]
+            }
+        }
+    ]
+}'
+```
+
+
+## Advanced - Use with Virtual Keys 
+
+Pre-requisites
+- [Setup proxy with DB](../proxy/virtual_keys.md#setup)
+
+Use this, to avoid giving developers the raw Anthropic API key, but still letting them use Anthropic endpoints.
+
+### Usage
+
+1. Setup environment
+
+```bash
+export DATABASE_URL=""
+export LITELLM_MASTER_KEY=""
+export COHERE_API_KEY=""
+```
+
+```bash
+litellm
+
+# RUNNING on http://0.0.0.0:4000
+```
+
+2. Generate virtual key 
+
+```bash
+curl -X POST 'http://0.0.0.0:4000/key/generate' \
+-H 'Authorization: Bearer sk-1234' \
+-H 'Content-Type: application/json' \
+-d '{}'
+```
+
+Expected Response 
+
+```bash
+{
+    ...
+    "key": "sk-1234ewknldferwedojwojw"
+}
+```
+
+3. Test it! 
+
+
+```bash
+curl --request POST \
+  --url http://0.0.0.0:4000/anthropic/v1/messages \
+  --header 'accept: application/json' \
+  --header 'content-type: application/json' \
+  --header "Authorization: bearer sk-1234ewknldferwedojwojw" \
+  --data '{
+    "model": "claude-3-5-sonnet-20241022",
+    "max_tokens": 1024,
+    "messages": [
+        {"role": "user", "content": "Hello, world"}
+    ]
+  }'
+```
\ No newline at end of file
diff --git a/docs/my-website/sidebars.js b/docs/my-website/sidebars.js
index 1dc33f554..dd8443a28 100644
--- a/docs/my-website/sidebars.js
+++ b/docs/my-website/sidebars.js
@@ -65,12 +65,12 @@ const sidebars = {
         },
         {
           type: "category",
-          label: "Use with Provider SDKs",
+          label: "Pass-through Endpoints (Provider-specific)",
           items: [
             "pass_through/vertex_ai",
             "pass_through/google_ai_studio",
             "pass_through/cohere",
-            "anthropic_completion",
+            "pass_through/anthropic_completion",
             "pass_through/bedrock",
             "pass_through/langfuse"
           ],
diff --git a/litellm/proxy/_new_secret_config.yaml b/litellm/proxy/_new_secret_config.yaml
index 806b55994..911f15b86 100644
--- a/litellm/proxy/_new_secret_config.yaml
+++ b/litellm/proxy/_new_secret_config.yaml
@@ -68,7 +68,7 @@ model_list:
 
 litellm_settings:
   fallbacks: [{ "claude-3-5-sonnet-20240620": ["claude-3-5-sonnet-aihubmix"] }]
-  callbacks: ["otel", "prometheus"]
+  # callbacks: ["otel", "prometheus"]
   default_redis_batch_cache_expiry: 10
   # default_team_settings: 
   #   - team_id: "dbe2f686-a686-4896-864a-4c3924458709"
diff --git a/litellm/proxy/proxy_server.py b/litellm/proxy/proxy_server.py
index c9c6af77f..bbf4b0b93 100644
--- a/litellm/proxy/proxy_server.py
+++ b/litellm/proxy/proxy_server.py
@@ -1072,7 +1072,7 @@ async def update_cache(  # noqa: PLR0915
     end_user_id: Optional[str],
     team_id: Optional[str],
     response_cost: Optional[float],
-    parent_otel_span: Optional[Span],
+    parent_otel_span: Optional[Span],  # type: ignore
 ):
     """
     Use this to update the cache with new user spend.
@@ -5655,6 +5655,13 @@ async def anthropic_response(  # noqa: PLR0915
     request: Request,
     user_api_key_dict: UserAPIKeyAuth = Depends(user_api_key_auth),
 ):
+    """
+    This is a BETA endpoint that calls 100+ LLMs in the anthropic format.
+
+    To do a simple pass-through for anthropic, do `{PROXY_BASE_URL}/anthropic/v1/messages`
+
+    Docs - https://docs.litellm.ai/docs/anthropic_completion
+    """
     from litellm import adapter_completion
     from litellm.adapters.anthropic_adapter import anthropic_adapter
 
diff --git a/litellm/proxy/vertex_ai_endpoints/google_ai_studio_endpoints.py b/litellm/proxy/vertex_ai_endpoints/google_ai_studio_endpoints.py
index 667a21a3c..c4a64fa21 100644
--- a/litellm/proxy/vertex_ai_endpoints/google_ai_studio_endpoints.py
+++ b/litellm/proxy/vertex_ai_endpoints/google_ai_studio_endpoints.py
@@ -155,6 +155,51 @@ async def cohere_proxy_route(
     return received_value
 
 
+@router.api_route(
+    "/anthropic/{endpoint:path}", methods=["GET", "POST", "PUT", "DELETE"]
+)
+async def anthropic_proxy_route(
+    endpoint: str,
+    request: Request,
+    fastapi_response: Response,
+    user_api_key_dict: UserAPIKeyAuth = Depends(user_api_key_auth),
+):
+    base_target_url = "https://api.anthropic.com"
+    encoded_endpoint = httpx.URL(endpoint).path
+
+    # Ensure endpoint starts with '/' for proper URL construction
+    if not encoded_endpoint.startswith("/"):
+        encoded_endpoint = "/" + encoded_endpoint
+
+    # Construct the full target URL using httpx
+    base_url = httpx.URL(base_target_url)
+    updated_url = base_url.copy_with(path=encoded_endpoint)
+
+    # Add or update query parameters
+    anthropic_api_key = litellm.utils.get_secret(secret_name="ANTHROPIC_API_KEY")
+
+    ## check for streaming
+    is_streaming_request = False
+    if "stream" in str(updated_url):
+        is_streaming_request = True
+
+    ## CREATE PASS-THROUGH
+    endpoint_func = create_pass_through_route(
+        endpoint=endpoint,
+        target=str(updated_url),
+        custom_headers={"x-api-key": "{}".format(anthropic_api_key)},
+        _forward_headers=True,
+    )  # dynamically construct pass-through endpoint based on incoming path
+    received_value = await endpoint_func(
+        request,
+        fastapi_response,
+        user_api_key_dict,
+        stream=is_streaming_request,  # type: ignore
+    )
+
+    return received_value
+
+
 @router.api_route("/bedrock/{endpoint:path}", methods=["GET", "POST", "PUT", "DELETE"])
 async def bedrock_proxy_route(
     endpoint: str,