From b873b16f36bdced3b8b599b8fc83fb1d203fc871 Mon Sep 17 00:00:00 2001 From: Krrish Dholakia Date: Wed, 13 Nov 2024 19:53:19 +0530 Subject: [PATCH] feat(pass_through_endpoints/): add anthropic/ pass-through endpoint adds new `anthropic/` pass-through endpoint + refactors docs --- docs/my-website/docs/anthropic_completion.md | 54 ---- .../docs/pass_through/anthropic_completion.md | 282 ++++++++++++++++++ docs/my-website/sidebars.js | 4 +- litellm/proxy/_new_secret_config.yaml | 2 +- litellm/proxy/proxy_server.py | 9 +- .../google_ai_studio_endpoints.py | 45 +++ 6 files changed, 338 insertions(+), 58 deletions(-) delete mode 100644 docs/my-website/docs/anthropic_completion.md create mode 100644 docs/my-website/docs/pass_through/anthropic_completion.md diff --git a/docs/my-website/docs/anthropic_completion.md b/docs/my-website/docs/anthropic_completion.md deleted file mode 100644 index ca65f3f6f..000000000 --- a/docs/my-website/docs/anthropic_completion.md +++ /dev/null @@ -1,54 +0,0 @@ -# [BETA] Anthropic `/v1/messages` - -Call 100+ LLMs in the Anthropic format. - - -1. Setup config.yaml - -```yaml -model_list: - - model_name: my-test-model - litellm_params: - model: gpt-3.5-turbo -``` - -2. Start proxy - -```bash -litellm --config /path/to/config.yaml -``` - -3. Test it! - -```bash -curl -X POST 'http://0.0.0.0:4000/v1/messages' \ --H 'x-api-key: sk-1234' \ --H 'content-type: application/json' \ --D '{ - "model": "my-test-model", - "max_tokens": 1024, - "messages": [ - {"role": "user", "content": "Hello, world"} - ] -}' -``` - -## Test with Anthropic SDK - -```python -import os -from anthropic import Anthropic - -client = Anthropic(api_key="sk-1234", base_url="http://0.0.0.0:4000") # 👈 CONNECT TO PROXY - -message = client.messages.create( - messages=[ - { - "role": "user", - "content": "Hello, Claude", - } - ], - model="my-test-model", # 👈 set 'model_name' -) -print(message.content) -``` \ No newline at end of file diff --git a/docs/my-website/docs/pass_through/anthropic_completion.md b/docs/my-website/docs/pass_through/anthropic_completion.md new file mode 100644 index 000000000..0c6a5f1b6 --- /dev/null +++ b/docs/my-website/docs/pass_through/anthropic_completion.md @@ -0,0 +1,282 @@ +# Anthropic `/v1/messages` + +Pass-through endpoints for Anthropic - call provider-specific endpoint, in native format (no translation). + +Just replace `https://api.anthropic.com` with `LITELLM_PROXY_BASE_URL/anthropic` 🚀 + +#### **Example Usage** +```bash +curl --request POST \ + --url http://0.0.0.0:4000/anthropic/v1/messages \ + --header 'accept: application/json' \ + --header 'content-type: application/json' \ + --header "Authorization: bearer sk-anything" \ + --data '{ + "model": "claude-3-5-sonnet-20241022", + "max_tokens": 1024, + "messages": [ + {"role": "user", "content": "Hello, world"} + ] + }' +``` + +Supports **ALL** Anthropic Endpoints (including streaming). + +[**See All Anthropic Endpoints**](https://docs.anthropic.com/en/api/messages) + +## Quick Start + +Let's call the Anthropic [`/messages` endpoint](https://docs.anthropic.com/en/api/messages) + +1. Add Anthropic API Key to your environment + +```bash +export ANTHROPIC_API_KEY="" +``` + +2. Start LiteLLM Proxy + +```bash +litellm + +# RUNNING on http://0.0.0.0:4000 +``` + +3. Test it! + +Let's call the Anthropic /messages endpoint + +```bash +curl http://0.0.0.0:4000/anthropic/v1/messages \ + --header "x-api-key: $LITELLM_API_KEY" \ + --header "anthropic-version: 2023-06-01" \ + --header "content-type: application/json" \ + --data \ + '{ + "model": "claude-3-5-sonnet-20241022", + "max_tokens": 1024, + "messages": [ + {"role": "user", "content": "Hello, world"} + ] + }' +``` + + +## Examples + +Anything after `http://0.0.0.0:4000/anthropic` is treated as a provider-specific route, and handled accordingly. + +Key Changes: + +| **Original Endpoint** | **Replace With** | +|------------------------------------------------------|-----------------------------------| +| `https://api.anthropic.com` | `http://0.0.0.0:4000/anthropic` (LITELLM_PROXY_BASE_URL="http://0.0.0.0:4000") | +| `bearer $ANTHROPIC_API_KEY` | `bearer anything` (use `bearer LITELLM_VIRTUAL_KEY` if Virtual Keys are setup on proxy) | + + +### **Example 1: Messages endpoint** + +#### LiteLLM Proxy Call + +```bash +curl --request POST \ + --url http://0.0.0.0:4000/anthropic/v1/messages \ + --header "x-api-key: $LITELLM_API_KEY" \ + --header "anthropic-version: 2023-06-01" \ + --header "content-type: application/json" \ + --data '{ + "model": "claude-3-5-sonnet-20241022", + "max_tokens": 1024, + "messages": [ + {"role": "user", "content": "Hello, world"} + ] + }' +``` + +#### Direct Anthropic API Call + +```bash +curl https://api.anthropic.com/v1/messages \ + --header "x-api-key: $ANTHROPIC_API_KEY" \ + --header "anthropic-version: 2023-06-01" \ + --header "content-type: application/json" \ + --data \ + '{ + "model": "claude-3-5-sonnet-20241022", + "max_tokens": 1024, + "messages": [ + {"role": "user", "content": "Hello, world"} + ] + }' +``` + +### **Example 2: Token Counting API** + +#### LiteLLM Proxy Call + +```bash +curl --request POST \ + --url http://0.0.0.0:4000/anthropic/v1/messages/count_tokens \ + --header "x-api-key: $LITELLM_API_KEY" \ + --header "anthropic-version: 2023-06-01" \ + --header "anthropic-beta: token-counting-2024-11-01" \ + --header "content-type: application/json" \ + --data \ + '{ + "model": "claude-3-5-sonnet-20241022", + "messages": [ + {"role": "user", "content": "Hello, world"} + ] + }' +``` + +#### Direct Anthropic API Call + +```bash +curl https://api.anthropic.com/v1/messages/count_tokens \ + --header "x-api-key: $ANTHROPIC_API_KEY" \ + --header "anthropic-version: 2023-06-01" \ + --header "anthropic-beta: token-counting-2024-11-01" \ + --header "content-type: application/json" \ + --data \ +'{ + "model": "claude-3-5-sonnet-20241022", + "messages": [ + {"role": "user", "content": "Hello, world"} + ] +}' +``` + +### **Example 3: Batch Messages** + + +#### LiteLLM Proxy Call + +```bash +curl --request POST \ + --url http://0.0.0.0:4000/anthropic/v1/messages/batches \ + --header "x-api-key: $LITELLM_API_KEY" \ + --header "anthropic-version: 2023-06-01" \ + --header "anthropic-beta: message-batches-2024-09-24" \ + --header "content-type: application/json" \ + --data \ +'{ + "requests": [ + { + "custom_id": "my-first-request", + "params": { + "model": "claude-3-5-sonnet-20241022", + "max_tokens": 1024, + "messages": [ + {"role": "user", "content": "Hello, world"} + ] + } + }, + { + "custom_id": "my-second-request", + "params": { + "model": "claude-3-5-sonnet-20241022", + "max_tokens": 1024, + "messages": [ + {"role": "user", "content": "Hi again, friend"} + ] + } + } + ] +}' +``` + +#### Direct Anthropic API Call + +```bash +curl https://api.anthropic.com/v1/messages/batches \ + --header "x-api-key: $ANTHROPIC_API_KEY" \ + --header "anthropic-version: 2023-06-01" \ + --header "anthropic-beta: message-batches-2024-09-24" \ + --header "content-type: application/json" \ + --data \ +'{ + "requests": [ + { + "custom_id": "my-first-request", + "params": { + "model": "claude-3-5-sonnet-20241022", + "max_tokens": 1024, + "messages": [ + {"role": "user", "content": "Hello, world"} + ] + } + }, + { + "custom_id": "my-second-request", + "params": { + "model": "claude-3-5-sonnet-20241022", + "max_tokens": 1024, + "messages": [ + {"role": "user", "content": "Hi again, friend"} + ] + } + } + ] +}' +``` + + +## Advanced - Use with Virtual Keys + +Pre-requisites +- [Setup proxy with DB](../proxy/virtual_keys.md#setup) + +Use this, to avoid giving developers the raw Anthropic API key, but still letting them use Anthropic endpoints. + +### Usage + +1. Setup environment + +```bash +export DATABASE_URL="" +export LITELLM_MASTER_KEY="" +export COHERE_API_KEY="" +``` + +```bash +litellm + +# RUNNING on http://0.0.0.0:4000 +``` + +2. Generate virtual key + +```bash +curl -X POST 'http://0.0.0.0:4000/key/generate' \ +-H 'Authorization: Bearer sk-1234' \ +-H 'Content-Type: application/json' \ +-d '{}' +``` + +Expected Response + +```bash +{ + ... + "key": "sk-1234ewknldferwedojwojw" +} +``` + +3. Test it! + + +```bash +curl --request POST \ + --url http://0.0.0.0:4000/anthropic/v1/messages \ + --header 'accept: application/json' \ + --header 'content-type: application/json' \ + --header "Authorization: bearer sk-1234ewknldferwedojwojw" \ + --data '{ + "model": "claude-3-5-sonnet-20241022", + "max_tokens": 1024, + "messages": [ + {"role": "user", "content": "Hello, world"} + ] + }' +``` \ No newline at end of file diff --git a/docs/my-website/sidebars.js b/docs/my-website/sidebars.js index 1dc33f554..dd8443a28 100644 --- a/docs/my-website/sidebars.js +++ b/docs/my-website/sidebars.js @@ -65,12 +65,12 @@ const sidebars = { }, { type: "category", - label: "Use with Provider SDKs", + label: "Pass-through Endpoints (Provider-specific)", items: [ "pass_through/vertex_ai", "pass_through/google_ai_studio", "pass_through/cohere", - "anthropic_completion", + "pass_through/anthropic_completion", "pass_through/bedrock", "pass_through/langfuse" ], diff --git a/litellm/proxy/_new_secret_config.yaml b/litellm/proxy/_new_secret_config.yaml index 806b55994..911f15b86 100644 --- a/litellm/proxy/_new_secret_config.yaml +++ b/litellm/proxy/_new_secret_config.yaml @@ -68,7 +68,7 @@ model_list: litellm_settings: fallbacks: [{ "claude-3-5-sonnet-20240620": ["claude-3-5-sonnet-aihubmix"] }] - callbacks: ["otel", "prometheus"] + # callbacks: ["otel", "prometheus"] default_redis_batch_cache_expiry: 10 # default_team_settings: # - team_id: "dbe2f686-a686-4896-864a-4c3924458709" diff --git a/litellm/proxy/proxy_server.py b/litellm/proxy/proxy_server.py index c9c6af77f..bbf4b0b93 100644 --- a/litellm/proxy/proxy_server.py +++ b/litellm/proxy/proxy_server.py @@ -1072,7 +1072,7 @@ async def update_cache( # noqa: PLR0915 end_user_id: Optional[str], team_id: Optional[str], response_cost: Optional[float], - parent_otel_span: Optional[Span], + parent_otel_span: Optional[Span], # type: ignore ): """ Use this to update the cache with new user spend. @@ -5655,6 +5655,13 @@ async def anthropic_response( # noqa: PLR0915 request: Request, user_api_key_dict: UserAPIKeyAuth = Depends(user_api_key_auth), ): + """ + This is a BETA endpoint that calls 100+ LLMs in the anthropic format. + + To do a simple pass-through for anthropic, do `{PROXY_BASE_URL}/anthropic/v1/messages` + + Docs - https://docs.litellm.ai/docs/anthropic_completion + """ from litellm import adapter_completion from litellm.adapters.anthropic_adapter import anthropic_adapter diff --git a/litellm/proxy/vertex_ai_endpoints/google_ai_studio_endpoints.py b/litellm/proxy/vertex_ai_endpoints/google_ai_studio_endpoints.py index 667a21a3c..c4a64fa21 100644 --- a/litellm/proxy/vertex_ai_endpoints/google_ai_studio_endpoints.py +++ b/litellm/proxy/vertex_ai_endpoints/google_ai_studio_endpoints.py @@ -155,6 +155,51 @@ async def cohere_proxy_route( return received_value +@router.api_route( + "/anthropic/{endpoint:path}", methods=["GET", "POST", "PUT", "DELETE"] +) +async def anthropic_proxy_route( + endpoint: str, + request: Request, + fastapi_response: Response, + user_api_key_dict: UserAPIKeyAuth = Depends(user_api_key_auth), +): + base_target_url = "https://api.anthropic.com" + encoded_endpoint = httpx.URL(endpoint).path + + # Ensure endpoint starts with '/' for proper URL construction + if not encoded_endpoint.startswith("/"): + encoded_endpoint = "/" + encoded_endpoint + + # Construct the full target URL using httpx + base_url = httpx.URL(base_target_url) + updated_url = base_url.copy_with(path=encoded_endpoint) + + # Add or update query parameters + anthropic_api_key = litellm.utils.get_secret(secret_name="ANTHROPIC_API_KEY") + + ## check for streaming + is_streaming_request = False + if "stream" in str(updated_url): + is_streaming_request = True + + ## CREATE PASS-THROUGH + endpoint_func = create_pass_through_route( + endpoint=endpoint, + target=str(updated_url), + custom_headers={"x-api-key": "{}".format(anthropic_api_key)}, + _forward_headers=True, + ) # dynamically construct pass-through endpoint based on incoming path + received_value = await endpoint_func( + request, + fastapi_response, + user_api_key_dict, + stream=is_streaming_request, # type: ignore + ) + + return received_value + + @router.api_route("/bedrock/{endpoint:path}", methods=["GET", "POST", "PUT", "DELETE"]) async def bedrock_proxy_route( endpoint: str,