From d75f6f74f3d62ff4d312d09dcef05db822f14070 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fabr=C3=ADcio=20Ceolin?= Date: Sat, 10 Aug 2024 12:12:55 -0300 Subject: [PATCH 01/60] Follow redirects --- litellm/llms/ollama_chat.py | 1 + 1 file changed, 1 insertion(+) diff --git a/litellm/llms/ollama_chat.py b/litellm/llms/ollama_chat.py index b0dd5d905a..ea84fa95cf 100644 --- a/litellm/llms/ollama_chat.py +++ b/litellm/llms/ollama_chat.py @@ -356,6 +356,7 @@ def ollama_completion_stream(url, api_key, data, logging_obj): "json": data, "method": "POST", "timeout": litellm.request_timeout, + "follow_redirects": True } if api_key is not None: _request["headers"] = {"Authorization": "Bearer {}".format(api_key)} From 584542817e7949acc1df3c8a3e6d5a9a032bcca4 Mon Sep 17 00:00:00 2001 From: Ishaan Jaff Date: Mon, 12 Aug 2024 16:06:10 -0700 Subject: [PATCH 02/60] feat gcs log user api key metadata --- litellm/integrations/gcs_bucket.py | 21 +++++++++++++++++++-- 1 file changed, 19 insertions(+), 2 deletions(-) diff --git a/litellm/integrations/gcs_bucket.py b/litellm/integrations/gcs_bucket.py index 3fb778e242..a16d952861 100644 --- a/litellm/integrations/gcs_bucket.py +++ b/litellm/integrations/gcs_bucket.py @@ -13,7 +13,7 @@ from litellm.litellm_core_utils.logging_utils import ( convert_litellm_response_object_to_dict, ) from litellm.llms.custom_httpx.http_handler import AsyncHTTPHandler -from litellm.proxy._types import CommonProxyErrors, SpendLogsPayload +from litellm.proxy._types import CommonProxyErrors, SpendLogsMetadata, SpendLogsPayload class RequestKwargs(TypedDict): @@ -27,6 +27,8 @@ class GCSBucketPayload(TypedDict): response_obj: Optional[Dict] start_time: str end_time: str + response_cost: Optional[float] + spend_log_metadata: str class GCSBucketLogger(CustomLogger): @@ -78,11 +80,12 @@ class GCSBucketLogger(CustomLogger): kwargs, response_obj, start_time_str, end_time_str ) + json_logged_payload = json.dumps(logging_payload) object_name = response_obj["id"] response = await self.async_httpx_client.post( headers=headers, url=f"https://storage.googleapis.com/upload/storage/v1/b/{self.BUCKET_NAME}/o?uploadType=media&name={object_name}", - json=logging_payload, + data=json_logged_payload, ) if response.status_code != 200: @@ -121,6 +124,10 @@ class GCSBucketLogger(CustomLogger): async def get_gcs_payload( self, kwargs, response_obj, start_time, end_time ) -> GCSBucketPayload: + from litellm.proxy.spend_tracking.spend_tracking_utils import ( + get_logging_payload, + ) + request_kwargs = RequestKwargs( model=kwargs.get("model", None), messages=kwargs.get("messages", None), @@ -131,11 +138,21 @@ class GCSBucketLogger(CustomLogger): response_obj=response_obj ) + _spend_log_payload: SpendLogsPayload = get_logging_payload( + kwargs=kwargs, + response_obj=response_obj, + start_time=start_time, + end_time=end_time, + end_user_id=kwargs.get("end_user_id", None), + ) + gcs_payload: GCSBucketPayload = GCSBucketPayload( request_kwargs=request_kwargs, response_obj=response_dict, start_time=start_time, end_time=end_time, + spend_log_metadata=_spend_log_payload["metadata"], + response_cost=kwargs.get("response_cost", None), ) return gcs_payload From 98e68ef4dba182d909edff3750926cf20d317a5a Mon Sep 17 00:00:00 2001 From: Ishaan Jaff Date: Mon, 12 Aug 2024 16:07:08 -0700 Subject: [PATCH 03/60] test gcs logging payload --- litellm/tests/test_gcs_bucket.py | 59 +++++++++++++++++++++++++++++--- 1 file changed, 55 insertions(+), 4 deletions(-) diff --git a/litellm/tests/test_gcs_bucket.py b/litellm/tests/test_gcs_bucket.py index c5a6fb76ac..754b499342 100644 --- a/litellm/tests/test_gcs_bucket.py +++ b/litellm/tests/test_gcs_bucket.py @@ -63,7 +63,7 @@ def load_vertex_ai_credentials(): @pytest.mark.asyncio async def test_basic_gcs_logger(): - load_vertex_ai_credentials() + # load_vertex_ai_credentials() gcs_logger = GCSBucketLogger() print("GCSBucketLogger", gcs_logger) @@ -75,6 +75,41 @@ async def test_basic_gcs_logger(): max_tokens=10, user="ishaan-2", mock_response="Hi!", + metadata={ + "tags": ["model-anthropic-claude-v2.1", "app-ishaan-prod"], + "user_api_key": "88dc28d0f030c55ed4ab77ed8faf098196cb1c05df778539800c9f1243fe6b4b", + "user_api_key_alias": None, + "user_api_end_user_max_budget": None, + "litellm_api_version": "0.0.0", + "global_max_parallel_requests": None, + "user_api_key_user_id": "116544810872468347480", + "user_api_key_org_id": None, + "user_api_key_team_id": None, + "user_api_key_team_alias": None, + "user_api_key_metadata": {}, + "requester_ip_address": "127.0.0.1", + "spend_logs_metadata": {"hello": "world"}, + "headers": { + "content-type": "application/json", + "user-agent": "PostmanRuntime/7.32.3", + "accept": "*/*", + "postman-token": "92300061-eeaa-423b-a420-0b44896ecdc4", + "host": "localhost:4000", + "accept-encoding": "gzip, deflate, br", + "connection": "keep-alive", + "content-length": "163", + }, + "endpoint": "http://localhost:4000/chat/completions", + "model_group": "gpt-3.5-turbo", + "deployment": "azure/chatgpt-v-2", + "model_info": { + "id": "4bad40a1eb6bebd1682800f16f44b9f06c52a6703444c99c7f9f32e9de3693b4", + "db_model": False, + }, + "api_base": "https://openai-gpt-4-test-v-1.openai.azure.com/", + "caching_groups": None, + "raw_request": "\n\nPOST Request Sent from LiteLLM:\ncurl -X POST \\\nhttps://openai-gpt-4-test-v-1.openai.azure.com//openai/ \\\n-H 'Authorization: *****' \\\n-d '{'model': 'chatgpt-v-2', 'messages': [{'role': 'system', 'content': 'you are a helpful assistant.\\n'}, {'role': 'user', 'content': 'bom dia'}], 'stream': False, 'max_tokens': 10, 'user': '116544810872468347480', 'extra_body': {}}'\n", + }, ) print("response", response) @@ -83,11 +118,14 @@ async def test_basic_gcs_logger(): # Check if object landed on GCS object_from_gcs = await gcs_logger.download_gcs_object(object_name=response.id) + print("object from gcs=", object_from_gcs) # convert object_from_gcs from bytes to DICT - object_from_gcs = json.loads(object_from_gcs) - print("object_from_gcs", object_from_gcs) + parsed_data = json.loads(object_from_gcs) + print("object_from_gcs as dict", parsed_data) - gcs_payload = GCSBucketPayload(**object_from_gcs) + print("type of object_from_gcs", type(parsed_data)) + + gcs_payload = GCSBucketPayload(**parsed_data) print("gcs_payload", gcs_payload) @@ -97,6 +135,19 @@ async def test_basic_gcs_logger(): ] assert gcs_payload["response_obj"]["choices"][0]["message"]["content"] == "Hi!" + assert gcs_payload["response_cost"] > 0.0 + + gcs_payload["spend_log_metadata"] = json.loads(gcs_payload["spend_log_metadata"]) + + assert ( + gcs_payload["spend_log_metadata"]["user_api_key"] + == "88dc28d0f030c55ed4ab77ed8faf098196cb1c05df778539800c9f1243fe6b4b" + ) + assert ( + gcs_payload["spend_log_metadata"]["user_api_key_user_id"] + == "116544810872468347480" + ) + # Delete Object from GCS print("deleting object from GCS") await gcs_logger.delete_gcs_object(object_name=response.id) From cd0d5f211d1273d0347b263fa888f3235c0c2b17 Mon Sep 17 00:00:00 2001 From: Ishaan Jaff Date: Mon, 12 Aug 2024 16:28:12 -0700 Subject: [PATCH 04/60] feat log responses in folders --- litellm/integrations/gcs_bucket.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/litellm/integrations/gcs_bucket.py b/litellm/integrations/gcs_bucket.py index a16d952861..46f55f8f01 100644 --- a/litellm/integrations/gcs_bucket.py +++ b/litellm/integrations/gcs_bucket.py @@ -81,7 +81,12 @@ class GCSBucketLogger(CustomLogger): ) json_logged_payload = json.dumps(logging_payload) - object_name = response_obj["id"] + + # Get the current date + current_date = datetime.now().strftime("%Y-%m-%d") + + # Modify the object_name to include the date-based folder + object_name = f"{current_date}/{response_obj['id']}" response = await self.async_httpx_client.post( headers=headers, url=f"https://storage.googleapis.com/upload/storage/v1/b/{self.BUCKET_NAME}/o?uploadType=media&name={object_name}", From 8a7571ad7250bbc8a419db6c1b3a4c895087af3f Mon Sep 17 00:00:00 2001 From: Ishaan Jaff Date: Mon, 12 Aug 2024 16:33:35 -0700 Subject: [PATCH 05/60] tes logging to gcs buckets --- litellm/tests/test_gcs_bucket.py | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/litellm/tests/test_gcs_bucket.py b/litellm/tests/test_gcs_bucket.py index 754b499342..607599d903 100644 --- a/litellm/tests/test_gcs_bucket.py +++ b/litellm/tests/test_gcs_bucket.py @@ -9,6 +9,7 @@ import json import logging import tempfile import uuid +from datetime import datetime import pytest @@ -116,8 +117,17 @@ async def test_basic_gcs_logger(): await asyncio.sleep(5) + # Get the current date + # Get the current date + current_date = datetime.now().strftime("%Y-%m-%d") + + # Modify the object_name to include the date-based folder + object_name = f"{current_date}%2F{response.id}" + + print("object_name", object_name) + # Check if object landed on GCS - object_from_gcs = await gcs_logger.download_gcs_object(object_name=response.id) + object_from_gcs = await gcs_logger.download_gcs_object(object_name=object_name) print("object from gcs=", object_from_gcs) # convert object_from_gcs from bytes to DICT parsed_data = json.loads(object_from_gcs) @@ -150,4 +160,4 @@ async def test_basic_gcs_logger(): # Delete Object from GCS print("deleting object from GCS") - await gcs_logger.delete_gcs_object(object_name=response.id) + # await gcs_logger.delete_gcs_object(object_name=response.id) From 9a976b3d43ca36d17d2ab64381a14b70ea5ee1ea Mon Sep 17 00:00:00 2001 From: Ishaan Jaff Date: Mon, 12 Aug 2024 16:34:27 -0700 Subject: [PATCH 06/60] fix gcs test --- litellm/tests/test_gcs_bucket.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/litellm/tests/test_gcs_bucket.py b/litellm/tests/test_gcs_bucket.py index 607599d903..b30978bad5 100644 --- a/litellm/tests/test_gcs_bucket.py +++ b/litellm/tests/test_gcs_bucket.py @@ -160,4 +160,4 @@ async def test_basic_gcs_logger(): # Delete Object from GCS print("deleting object from GCS") - # await gcs_logger.delete_gcs_object(object_name=response.id) + await gcs_logger.delete_gcs_object(object_name=object_name) From 4fbda3de38baabcff5314cf15c74acd95c6539e9 Mon Sep 17 00:00:00 2001 From: Krrish Dholakia Date: Mon, 12 Aug 2024 16:44:44 -0700 Subject: [PATCH 07/60] fix(cost_calculator.py): fix cost calc --- litellm/cost_calculator.py | 14 +++++++++++--- litellm/tests/test_custom_logger.py | 16 +++++++++++----- 2 files changed, 22 insertions(+), 8 deletions(-) diff --git a/litellm/cost_calculator.py b/litellm/cost_calculator.py index 6eec8d3cd5..a3cb847a4f 100644 --- a/litellm/cost_calculator.py +++ b/litellm/cost_calculator.py @@ -490,10 +490,18 @@ def completion_cost( isinstance(completion_response, BaseModel) or isinstance(completion_response, dict) ): # tts returns a custom class - if isinstance(completion_response, BaseModel) and not isinstance( - completion_response, litellm.Usage + + usage_obj: Optional[Union[dict, litellm.Usage]] = completion_response.get( + "usage", {} + ) + if isinstance(usage_obj, BaseModel) and not isinstance( + usage_obj, litellm.Usage ): - completion_response = litellm.Usage(**completion_response.model_dump()) + setattr( + completion_response, + "usage", + litellm.Usage(**usage_obj.model_dump()), + ) # get input/output tokens from completion_response prompt_tokens = completion_response.get("usage", {}).get("prompt_tokens", 0) completion_tokens = completion_response.get("usage", {}).get( diff --git a/litellm/tests/test_custom_logger.py b/litellm/tests/test_custom_logger.py index e3407c9e11..465012bffb 100644 --- a/litellm/tests/test_custom_logger.py +++ b/litellm/tests/test_custom_logger.py @@ -1,11 +1,17 @@ ### What this tests #### -import sys, os, time, inspect, asyncio, traceback +import asyncio +import inspect +import os +import sys +import time +import traceback + import pytest sys.path.insert(0, os.path.abspath("../..")) -from litellm import completion, embedding import litellm +from litellm import completion, embedding from litellm.integrations.custom_logger import CustomLogger @@ -201,7 +207,7 @@ def test_async_custom_handler_stream(): print("complete_streaming_response: ", complete_streaming_response) assert response_in_success_handler == complete_streaming_response except Exception as e: - pytest.fail(f"Error occurred: {e}") + pytest.fail(f"Error occurred: {e}\n{traceback.format_exc()}") # test_async_custom_handler_stream() @@ -457,11 +463,11 @@ async def test_cost_tracking_with_caching(): def test_redis_cache_completion_stream(): - from litellm import Cache - # Important Test - This tests if we can add to streaming cache, when custom callbacks are set import random + from litellm import Cache + try: print("\nrunning test_redis_cache_completion_stream") litellm.set_verbose = True From 1a70da3ab386b07681dc0dcbb6438b49e845afd9 Mon Sep 17 00:00:00 2001 From: Ishaan Jaff Date: Mon, 12 Aug 2024 16:06:10 -0700 Subject: [PATCH 08/60] feat gcs log user api key metadata --- litellm/integrations/gcs_bucket.py | 24 ++++++++++++++++++------ 1 file changed, 18 insertions(+), 6 deletions(-) diff --git a/litellm/integrations/gcs_bucket.py b/litellm/integrations/gcs_bucket.py index 46f55f8f01..3a76c6de23 100644 --- a/litellm/integrations/gcs_bucket.py +++ b/litellm/integrations/gcs_bucket.py @@ -14,6 +14,7 @@ from litellm.litellm_core_utils.logging_utils import ( ) from litellm.llms.custom_httpx.http_handler import AsyncHTTPHandler from litellm.proxy._types import CommonProxyErrors, SpendLogsMetadata, SpendLogsPayload +from litellm.proxy._types import CommonProxyErrors, SpendLogsMetadata, SpendLogsPayload class RequestKwargs(TypedDict): @@ -29,6 +30,8 @@ class GCSBucketPayload(TypedDict): end_time: str response_cost: Optional[float] spend_log_metadata: str + response_cost: Optional[float] + spend_log_metadata: str class GCSBucketLogger(CustomLogger): @@ -81,12 +84,7 @@ class GCSBucketLogger(CustomLogger): ) json_logged_payload = json.dumps(logging_payload) - - # Get the current date - current_date = datetime.now().strftime("%Y-%m-%d") - - # Modify the object_name to include the date-based folder - object_name = f"{current_date}/{response_obj['id']}" + object_name = response_obj["id"] response = await self.async_httpx_client.post( headers=headers, url=f"https://storage.googleapis.com/upload/storage/v1/b/{self.BUCKET_NAME}/o?uploadType=media&name={object_name}", @@ -133,6 +131,10 @@ class GCSBucketLogger(CustomLogger): get_logging_payload, ) + from litellm.proxy.spend_tracking.spend_tracking_utils import ( + get_logging_payload, + ) + request_kwargs = RequestKwargs( model=kwargs.get("model", None), messages=kwargs.get("messages", None), @@ -151,6 +153,14 @@ class GCSBucketLogger(CustomLogger): end_user_id=kwargs.get("end_user_id", None), ) + _spend_log_payload: SpendLogsPayload = get_logging_payload( + kwargs=kwargs, + response_obj=response_obj, + start_time=start_time, + end_time=end_time, + end_user_id=kwargs.get("end_user_id", None), + ) + gcs_payload: GCSBucketPayload = GCSBucketPayload( request_kwargs=request_kwargs, response_obj=response_dict, @@ -158,6 +168,8 @@ class GCSBucketLogger(CustomLogger): end_time=end_time, spend_log_metadata=_spend_log_payload["metadata"], response_cost=kwargs.get("response_cost", None), + spend_log_metadata=_spend_log_payload["metadata"], + response_cost=kwargs.get("response_cost", None), ) return gcs_payload From 23c6e9d348bfcd87805198a950f0508eab0c1699 Mon Sep 17 00:00:00 2001 From: Ishaan Jaff Date: Mon, 12 Aug 2024 16:07:08 -0700 Subject: [PATCH 09/60] test gcs logging payload --- litellm/tests/test_gcs_bucket.py | 11 +---------- 1 file changed, 1 insertion(+), 10 deletions(-) diff --git a/litellm/tests/test_gcs_bucket.py b/litellm/tests/test_gcs_bucket.py index b30978bad5..4fa9d8ef43 100644 --- a/litellm/tests/test_gcs_bucket.py +++ b/litellm/tests/test_gcs_bucket.py @@ -117,17 +117,8 @@ async def test_basic_gcs_logger(): await asyncio.sleep(5) - # Get the current date - # Get the current date - current_date = datetime.now().strftime("%Y-%m-%d") - - # Modify the object_name to include the date-based folder - object_name = f"{current_date}%2F{response.id}" - - print("object_name", object_name) - # Check if object landed on GCS - object_from_gcs = await gcs_logger.download_gcs_object(object_name=object_name) + object_from_gcs = await gcs_logger.download_gcs_object(object_name=response.id) print("object from gcs=", object_from_gcs) # convert object_from_gcs from bytes to DICT parsed_data = json.loads(object_from_gcs) From c0ce3c5f140bb6c367eed6095af13f8849983f47 Mon Sep 17 00:00:00 2001 From: Ishaan Jaff Date: Mon, 12 Aug 2024 16:28:12 -0700 Subject: [PATCH 10/60] feat log responses in folders --- litellm/integrations/gcs_bucket.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/litellm/integrations/gcs_bucket.py b/litellm/integrations/gcs_bucket.py index 3a76c6de23..c948668eb5 100644 --- a/litellm/integrations/gcs_bucket.py +++ b/litellm/integrations/gcs_bucket.py @@ -84,7 +84,12 @@ class GCSBucketLogger(CustomLogger): ) json_logged_payload = json.dumps(logging_payload) - object_name = response_obj["id"] + + # Get the current date + current_date = datetime.now().strftime("%Y-%m-%d") + + # Modify the object_name to include the date-based folder + object_name = f"{current_date}/{response_obj['id']}" response = await self.async_httpx_client.post( headers=headers, url=f"https://storage.googleapis.com/upload/storage/v1/b/{self.BUCKET_NAME}/o?uploadType=media&name={object_name}", From 5473445437d12a15ce4df49da54a3199acc050ec Mon Sep 17 00:00:00 2001 From: Ishaan Jaff Date: Mon, 12 Aug 2024 16:33:35 -0700 Subject: [PATCH 11/60] tes logging to gcs buckets --- litellm/tests/test_gcs_bucket.py | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/litellm/tests/test_gcs_bucket.py b/litellm/tests/test_gcs_bucket.py index 4fa9d8ef43..607599d903 100644 --- a/litellm/tests/test_gcs_bucket.py +++ b/litellm/tests/test_gcs_bucket.py @@ -117,8 +117,17 @@ async def test_basic_gcs_logger(): await asyncio.sleep(5) + # Get the current date + # Get the current date + current_date = datetime.now().strftime("%Y-%m-%d") + + # Modify the object_name to include the date-based folder + object_name = f"{current_date}%2F{response.id}" + + print("object_name", object_name) + # Check if object landed on GCS - object_from_gcs = await gcs_logger.download_gcs_object(object_name=response.id) + object_from_gcs = await gcs_logger.download_gcs_object(object_name=object_name) print("object from gcs=", object_from_gcs) # convert object_from_gcs from bytes to DICT parsed_data = json.loads(object_from_gcs) @@ -151,4 +160,4 @@ async def test_basic_gcs_logger(): # Delete Object from GCS print("deleting object from GCS") - await gcs_logger.delete_gcs_object(object_name=object_name) + # await gcs_logger.delete_gcs_object(object_name=response.id) From 96582251b616abb5463a88f61947bcbf90277b75 Mon Sep 17 00:00:00 2001 From: Ishaan Jaff Date: Mon, 12 Aug 2024 16:34:27 -0700 Subject: [PATCH 12/60] fix gcs test --- litellm/tests/test_gcs_bucket.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/litellm/tests/test_gcs_bucket.py b/litellm/tests/test_gcs_bucket.py index 607599d903..b30978bad5 100644 --- a/litellm/tests/test_gcs_bucket.py +++ b/litellm/tests/test_gcs_bucket.py @@ -160,4 +160,4 @@ async def test_basic_gcs_logger(): # Delete Object from GCS print("deleting object from GCS") - # await gcs_logger.delete_gcs_object(object_name=response.id) + await gcs_logger.delete_gcs_object(object_name=object_name) From 3a1e2568dbdc166a59d9ac895c44da49639043ca Mon Sep 17 00:00:00 2001 From: Ishaan Jaff Date: Mon, 12 Aug 2024 17:42:04 -0700 Subject: [PATCH 13/60] fix gcs logging test --- litellm/tests/test_gcs_bucket.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/litellm/tests/test_gcs_bucket.py b/litellm/tests/test_gcs_bucket.py index b30978bad5..c21988c73d 100644 --- a/litellm/tests/test_gcs_bucket.py +++ b/litellm/tests/test_gcs_bucket.py @@ -64,7 +64,7 @@ def load_vertex_ai_credentials(): @pytest.mark.asyncio async def test_basic_gcs_logger(): - # load_vertex_ai_credentials() + load_vertex_ai_credentials() gcs_logger = GCSBucketLogger() print("GCSBucketLogger", gcs_logger) From dd10896f32b4755af1ea8e67caddd14f3d53130c Mon Sep 17 00:00:00 2001 From: Krrish Dholakia Date: Mon, 12 Aug 2024 18:47:25 -0700 Subject: [PATCH 14/60] refactor(test_users.py): refactor test for user info to use mock endpoints --- .../internal_user_endpoints.py | 11 +++++- litellm/tests/test_proxy_server.py | 38 +++++++++++++++++++ tests/test_users.py | 7 ---- 3 files changed, 47 insertions(+), 9 deletions(-) diff --git a/litellm/proxy/management_endpoints/internal_user_endpoints.py b/litellm/proxy/management_endpoints/internal_user_endpoints.py index 8e2358c992..a0e020b11f 100644 --- a/litellm/proxy/management_endpoints/internal_user_endpoints.py +++ b/litellm/proxy/management_endpoints/internal_user_endpoints.py @@ -312,7 +312,7 @@ async def user_info( try: if prisma_client is None: raise Exception( - f"Database not connected. Connect a database to your proxy - https://docs.litellm.ai/docs/simple_proxy#managing-auth---virtual-keys" + "Database not connected. Connect a database to your proxy - https://docs.litellm.ai/docs/simple_proxy#managing-auth---virtual-keys" ) ## GET USER ROW ## if user_id is not None: @@ -365,7 +365,14 @@ async def user_info( getattr(caller_user_info, "user_role", None) == LitellmUserRoles.PROXY_ADMIN ): - teams_2 = await prisma_client.db.litellm_teamtable.find_many() + from litellm.proxy.management_endpoints.team_endpoints import list_team + + teams_2 = await list_team( + http_request=Request( + scope={"type": "http", "path": "/user/info"}, + ), + user_api_key_dict=user_api_key_dict, + ) else: teams_2 = await prisma_client.get_data( team_id_list=caller_user_info.teams, diff --git a/litellm/tests/test_proxy_server.py b/litellm/tests/test_proxy_server.py index dee20a273c..757eef6d62 100644 --- a/litellm/tests/test_proxy_server.py +++ b/litellm/tests/test_proxy_server.py @@ -928,3 +928,41 @@ async def test_create_team_member_add(prisma_client, new_member_method): mock_client.call_args.kwargs["data"]["create"]["budget_duration"] == litellm.internal_user_budget_duration ) + + +@pytest.mark.asyncio +async def test_user_info_team_list(prisma_client): + """Assert user_info for admin calls team_list function""" + from litellm.proxy._types import LiteLLM_UserTable + + setattr(litellm.proxy.proxy_server, "prisma_client", prisma_client) + setattr(litellm.proxy.proxy_server, "master_key", "sk-1234") + await litellm.proxy.proxy_server.prisma_client.connect() + + from litellm.proxy.management_endpoints.internal_user_endpoints import user_info + + with patch( + "litellm.proxy.management_endpoints.team_endpoints.list_team", + new_callable=AsyncMock, + ) as mock_client: + + prisma_client.get_data = AsyncMock( + return_value=LiteLLM_UserTable( + user_role="proxy_admin", + user_id="default_user_id", + max_budget=None, + user_email="", + ) + ) + + try: + await user_info( + user_id=None, + user_api_key_dict=UserAPIKeyAuth( + api_key="sk-1234", user_id="default_user_id" + ), + ) + except Exception: + pass + + mock_client.assert_called() diff --git a/tests/test_users.py b/tests/test_users.py index 632dd8f36c..8113fd0801 100644 --- a/tests/test_users.py +++ b/tests/test_users.py @@ -99,13 +99,6 @@ async def test_user_info(): ) assert status == 403 - ## check if returned teams as admin == all teams ## - admin_info = await get_user_info( - session=session, get_user="", call_user="sk-1234", view_all=True - ) - all_teams = await list_teams(session=session, i=0) - assert len(admin_info["teams"]) == len(all_teams) - @pytest.mark.asyncio async def test_user_update(): From 9fcb6f8f57ca50fc4b8b859505f65edab9ec68c4 Mon Sep 17 00:00:00 2001 From: Krrish Dholakia Date: Mon, 12 Aug 2024 21:21:40 -0700 Subject: [PATCH 15/60] fix(litellm_pre_call_utils.py): support routing to logging project by api key --- litellm/integrations/gcs_bucket.py | 17 ----- litellm/integrations/langfuse.py | 2 +- litellm/proxy/litellm_pre_call_utils.py | 68 +++++++++++++++++-- litellm/tests/test_proxy_server.py | 89 +++++++++++++++++++++++++ 4 files changed, 151 insertions(+), 25 deletions(-) diff --git a/litellm/integrations/gcs_bucket.py b/litellm/integrations/gcs_bucket.py index c948668eb5..46f55f8f01 100644 --- a/litellm/integrations/gcs_bucket.py +++ b/litellm/integrations/gcs_bucket.py @@ -14,7 +14,6 @@ from litellm.litellm_core_utils.logging_utils import ( ) from litellm.llms.custom_httpx.http_handler import AsyncHTTPHandler from litellm.proxy._types import CommonProxyErrors, SpendLogsMetadata, SpendLogsPayload -from litellm.proxy._types import CommonProxyErrors, SpendLogsMetadata, SpendLogsPayload class RequestKwargs(TypedDict): @@ -30,8 +29,6 @@ class GCSBucketPayload(TypedDict): end_time: str response_cost: Optional[float] spend_log_metadata: str - response_cost: Optional[float] - spend_log_metadata: str class GCSBucketLogger(CustomLogger): @@ -136,10 +133,6 @@ class GCSBucketLogger(CustomLogger): get_logging_payload, ) - from litellm.proxy.spend_tracking.spend_tracking_utils import ( - get_logging_payload, - ) - request_kwargs = RequestKwargs( model=kwargs.get("model", None), messages=kwargs.get("messages", None), @@ -158,14 +151,6 @@ class GCSBucketLogger(CustomLogger): end_user_id=kwargs.get("end_user_id", None), ) - _spend_log_payload: SpendLogsPayload = get_logging_payload( - kwargs=kwargs, - response_obj=response_obj, - start_time=start_time, - end_time=end_time, - end_user_id=kwargs.get("end_user_id", None), - ) - gcs_payload: GCSBucketPayload = GCSBucketPayload( request_kwargs=request_kwargs, response_obj=response_dict, @@ -173,8 +158,6 @@ class GCSBucketLogger(CustomLogger): end_time=end_time, spend_log_metadata=_spend_log_payload["metadata"], response_cost=kwargs.get("response_cost", None), - spend_log_metadata=_spend_log_payload["metadata"], - response_cost=kwargs.get("response_cost", None), ) return gcs_payload diff --git a/litellm/integrations/langfuse.py b/litellm/integrations/langfuse.py index df4be3a5bc..7a127f912b 100644 --- a/litellm/integrations/langfuse.py +++ b/litellm/integrations/langfuse.py @@ -48,7 +48,7 @@ class LangFuseLogger: "secret_key": self.secret_key, "host": self.langfuse_host, "release": self.langfuse_release, - "debug": self.langfuse_debug, + "debug": True, "flush_interval": flush_interval, # flush interval in seconds } diff --git a/litellm/proxy/litellm_pre_call_utils.py b/litellm/proxy/litellm_pre_call_utils.py index 13f9475c5c..631f476922 100644 --- a/litellm/proxy/litellm_pre_call_utils.py +++ b/litellm/proxy/litellm_pre_call_utils.py @@ -5,7 +5,12 @@ from fastapi import Request import litellm from litellm._logging import verbose_logger, verbose_proxy_logger -from litellm.proxy._types import CommonProxyErrors, TeamCallbackMetadata, UserAPIKeyAuth +from litellm.proxy._types import ( + AddTeamCallback, + CommonProxyErrors, + TeamCallbackMetadata, + UserAPIKeyAuth, +) from litellm.types.utils import SupportedCacheControls if TYPE_CHECKING: @@ -59,6 +64,42 @@ def safe_add_api_version_from_query_params(data: dict, request: Request): verbose_logger.error("error checking api version in query params: %s", str(e)) +def convert_key_logging_metadata_to_callback( + data: AddTeamCallback, team_callback_settings_obj: Optional[TeamCallbackMetadata] +) -> TeamCallbackMetadata: + if team_callback_settings_obj is None: + team_callback_settings_obj = TeamCallbackMetadata() + if data.callback_type == "success": + if team_callback_settings_obj.success_callback is None: + team_callback_settings_obj.success_callback = [] + + if data.callback_name not in team_callback_settings_obj.success_callback: + team_callback_settings_obj.success_callback.append(data.callback_name) + elif data.callback_type == "failure": + if team_callback_settings_obj.failure_callback is None: + team_callback_settings_obj.failure_callback = [] + + if data.callback_name not in team_callback_settings_obj.failure_callback: + team_callback_settings_obj.failure_callback.append(data.callback_name) + elif data.callback_type == "success_and_failure": + if team_callback_settings_obj.success_callback is None: + team_callback_settings_obj.success_callback = [] + if team_callback_settings_obj.failure_callback is None: + team_callback_settings_obj.failure_callback = [] + if data.callback_name not in team_callback_settings_obj.success_callback: + team_callback_settings_obj.success_callback.append(data.callback_name) + + if data.callback_name in team_callback_settings_obj.failure_callback: + team_callback_settings_obj.failure_callback.append(data.callback_name) + + for var, value in data.callback_vars.items(): + if team_callback_settings_obj.callback_vars is None: + team_callback_settings_obj.callback_vars = {} + team_callback_settings_obj.callback_vars[var] = litellm.get_secret(value) + + return team_callback_settings_obj + + async def add_litellm_data_to_request( data: dict, request: Request, @@ -214,6 +255,7 @@ async def add_litellm_data_to_request( } # add the team-specific configs to the completion call # Team Callbacks controls + callback_settings_obj: Optional[TeamCallbackMetadata] = None if user_api_key_dict.team_metadata is not None: team_metadata = user_api_key_dict.team_metadata if "callback_settings" in team_metadata: @@ -231,13 +273,25 @@ async def add_litellm_data_to_request( } } """ - data["success_callback"] = callback_settings_obj.success_callback - data["failure_callback"] = callback_settings_obj.failure_callback + elif ( + user_api_key_dict.metadata is not None + and "logging" in user_api_key_dict.metadata + ): + for item in user_api_key_dict.metadata["logging"]: - if callback_settings_obj.callback_vars is not None: - # unpack callback_vars in data - for k, v in callback_settings_obj.callback_vars.items(): - data[k] = v + callback_settings_obj = convert_key_logging_metadata_to_callback( + data=AddTeamCallback(**item), + team_callback_settings_obj=callback_settings_obj, + ) + + if callback_settings_obj is not None: + data["success_callback"] = callback_settings_obj.success_callback + data["failure_callback"] = callback_settings_obj.failure_callback + + if callback_settings_obj.callback_vars is not None: + # unpack callback_vars in data + for k, v in callback_settings_obj.callback_vars.items(): + data[k] = v return data diff --git a/litellm/tests/test_proxy_server.py b/litellm/tests/test_proxy_server.py index 757eef6d62..890446e566 100644 --- a/litellm/tests/test_proxy_server.py +++ b/litellm/tests/test_proxy_server.py @@ -966,3 +966,92 @@ async def test_user_info_team_list(prisma_client): pass mock_client.assert_called() + + +@pytest.mark.asyncio +async def test_add_callback_via_key(prisma_client): + """ + Test if callback specified in key, is used. + """ + global headers + import json + + from fastapi import HTTPException, Request, Response + from starlette.datastructures import URL + + from litellm.proxy.proxy_server import chat_completion + + setattr(litellm.proxy.proxy_server, "prisma_client", prisma_client) + setattr(litellm.proxy.proxy_server, "master_key", "sk-1234") + await litellm.proxy.proxy_server.prisma_client.connect() + + litellm.set_verbose = True + + try: + # Your test data + test_data = { + "model": "azure/chatgpt-v-2", + "messages": [ + {"role": "user", "content": "write 1 sentence poem"}, + ], + "max_tokens": 10, + "mock_response": "Hello world", + "api_key": "my-fake-key", + } + + request = Request(scope={"type": "http", "method": "POST", "headers": {}}) + request._url = URL(url="/chat/completions") + + json_bytes = json.dumps(test_data).encode("utf-8") + + request._body = json_bytes + + with patch.object( + litellm.litellm_core_utils.litellm_logging, + "LangFuseLogger", + new=MagicMock(), + ) as mock_client: + resp = await chat_completion( + request=request, + fastapi_response=Response(), + user_api_key_dict=UserAPIKeyAuth( + metadata={ + "logging": [ + { + "callback_name": "langfuse", # 'otel', 'langfuse', 'lunary' + "callback_type": "success", # set, if required by integration - future improvement, have logging tools work for success + failure by default + "callback_vars": { + "langfuse_public_key": "os.environ/LANGFUSE_PUBLIC_KEY", + "langfuse_secret_key": "os.environ/LANGFUSE_SECRET_KEY", + "langfuse_host": "https://us.cloud.langfuse.com", + }, + } + ] + } + ), + ) + print(resp) + mock_client.assert_called() + mock_client.return_value.log_event.assert_called() + args, kwargs = mock_client.return_value.log_event.call_args + print("KWARGS - {}".format(kwargs)) + kwargs = kwargs["kwargs"] + print(kwargs) + assert "user_api_key_metadata" in kwargs["litellm_params"]["metadata"] + assert ( + "logging" + in kwargs["litellm_params"]["metadata"]["user_api_key_metadata"] + ) + checked_keys = False + for item in kwargs["litellm_params"]["metadata"]["user_api_key_metadata"][ + "logging" + ]: + for k, v in item["callback_vars"].items(): + print("k={}, v={}".format(k, v)) + if "key" in k: + assert "os.environ" in v + checked_keys = True + + assert checked_keys + except Exception as e: + pytest.fail(f"LiteLLM Proxy test failed. Exception - {str(e)}") From 46d8f694c1a5577411373256cd084a21267cc398 Mon Sep 17 00:00:00 2001 From: Krrish Dholakia Date: Mon, 12 Aug 2024 23:20:43 -0700 Subject: [PATCH 16/60] fix(langfuse.py'): cleanup --- litellm/integrations/langfuse.py | 2 +- litellm/tests/test_proxy_server.py | 2 -- 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/litellm/integrations/langfuse.py b/litellm/integrations/langfuse.py index 7a127f912b..df4be3a5bc 100644 --- a/litellm/integrations/langfuse.py +++ b/litellm/integrations/langfuse.py @@ -48,7 +48,7 @@ class LangFuseLogger: "secret_key": self.secret_key, "host": self.langfuse_host, "release": self.langfuse_release, - "debug": True, + "debug": self.langfuse_debug, "flush_interval": flush_interval, # flush interval in seconds } diff --git a/litellm/tests/test_proxy_server.py b/litellm/tests/test_proxy_server.py index b943096396..00c58d1243 100644 --- a/litellm/tests/test_proxy_server.py +++ b/litellm/tests/test_proxy_server.py @@ -1033,9 +1033,7 @@ async def test_add_callback_via_key(prisma_client): mock_client.assert_called() mock_client.return_value.log_event.assert_called() args, kwargs = mock_client.return_value.log_event.call_args - print("KWARGS - {}".format(kwargs)) kwargs = kwargs["kwargs"] - print(kwargs) assert "user_api_key_metadata" in kwargs["litellm_params"]["metadata"] assert ( "logging" From 69b9207ec564f9c39324787884db8e1ee310edf8 Mon Sep 17 00:00:00 2001 From: Ishaan Jaff Date: Tue, 13 Aug 2024 16:57:19 -0700 Subject: [PATCH 17/60] fix make prisma readable --- litellm/proxy/utils.py | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/litellm/proxy/utils.py b/litellm/proxy/utils.py index d1d17d0ef5..4df037fc34 100644 --- a/litellm/proxy/utils.py +++ b/litellm/proxy/utils.py @@ -14,6 +14,7 @@ from datetime import datetime, timedelta from email.mime.multipart import MIMEMultipart from email.mime.text import MIMEText from functools import wraps +from pathlib import Path from typing import TYPE_CHECKING, Any, List, Literal, Optional, Tuple, Union import backoff @@ -815,6 +816,17 @@ class PrismaClient: org_list_transactons: dict = {} spend_log_transactions: List = [] + def ensure_prisma_has_writable_dirs(self, path: str | Path) -> None: + import stat + + for root, dirs, _ in os.walk(path): + for directory in dirs: + dir_path = os.path.join(root, directory) + os.makedirs(dir_path, exist_ok=True) + os.chmod( + dir_path, os.stat(dir_path).st_mode | stat.S_IWRITE | stat.S_IEXEC + ) + def __init__(self, database_url: str, proxy_logging_obj: ProxyLogging): verbose_proxy_logger.debug( "LiteLLM: DATABASE_URL Set in config, trying to 'pip install prisma'" @@ -846,6 +858,22 @@ class PrismaClient: # Now you can import the Prisma Client from prisma import Prisma # type: ignore verbose_proxy_logger.debug("Connecting Prisma Client to DB..") + import importlib.util + + # Get the location of the 'prisma' package + package_name = "prisma" + spec = importlib.util.find_spec(package_name) + print("spec = ", spec) # noqa + + if spec and spec.origin: + print("spec origin= ", spec.origin) # noqa + _base_prisma_package_dir = os.path.dirname(spec.origin) + print("base prisma package dir = ", _base_prisma_package_dir) # noqa + else: + raise ImportError(f"Package {package_name} not found.") + + # Use the package directory in your method call + self.ensure_prisma_has_writable_dirs(path=_base_prisma_package_dir) self.db = Prisma() # Client to connect to Prisma db verbose_proxy_logger.debug("Success - Connected Prisma Client to DB") From 0d4dae3f4c58c3ada9a9874a5e0eb0c86e7261b0 Mon Sep 17 00:00:00 2001 From: Ishaan Jaff Date: Tue, 13 Aug 2024 18:38:10 -0700 Subject: [PATCH 18/60] skip prisma gen step --- litellm/proxy/utils.py | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/litellm/proxy/utils.py b/litellm/proxy/utils.py index 4df037fc34..4237a011b4 100644 --- a/litellm/proxy/utils.py +++ b/litellm/proxy/utils.py @@ -844,17 +844,17 @@ class PrismaClient: dname = os.path.dirname(abspath) os.chdir(dname) - try: - subprocess.run(["prisma", "generate"]) - subprocess.run( - ["prisma", "db", "push", "--accept-data-loss"] - ) # this looks like a weird edge case when prisma just wont start on render. we need to have the --accept-data-loss - except Exception as e: - raise Exception( - f"Unable to run prisma commands. Run `pip install prisma` Got Exception: {(str(e))}" - ) - finally: - os.chdir(original_dir) + # try: + # subprocess.run(["prisma", "generate"]) + # subprocess.run( + # ["prisma", "db", "push", "--accept-data-loss"] + # ) # this looks like a weird edge case when prisma just wont start on render. we need to have the --accept-data-loss + # except Exception as e: + # raise Exception( + # f"Unable to run prisma commands. Run `pip install prisma` Got Exception: {(str(e))}" + # ) + # finally: + # os.chdir(original_dir) # Now you can import the Prisma Client from prisma import Prisma # type: ignore verbose_proxy_logger.debug("Connecting Prisma Client to DB..") From 8b8f602e98b95ad40d3714117bd342c51e39298d Mon Sep 17 00:00:00 2001 From: Ishaan Jaff Date: Tue, 13 Aug 2024 18:40:00 -0700 Subject: [PATCH 19/60] temp set prisma pems --- set_prisma_permissions.py | 39 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 39 insertions(+) create mode 100644 set_prisma_permissions.py diff --git a/set_prisma_permissions.py b/set_prisma_permissions.py new file mode 100644 index 0000000000..0973b90b88 --- /dev/null +++ b/set_prisma_permissions.py @@ -0,0 +1,39 @@ +import os +import importlib +from pathlib import Path + + +# Get the location of the 'prisma' package +package_name = "prisma" +spec = importlib.util.find_spec(package_name) +print("spec = ", spec) # noqa + +if spec and spec.origin: + print("spec origin= ", spec.origin) # noqa + _base_prisma_package_dir = os.path.dirname(spec.origin) + print("base prisma package dir = ", _base_prisma_package_dir) # noqa +else: + raise ImportError(f"Package {package_name} not found.") + + +def ensure_prisma_has_writable_dirs(path: str | Path) -> None: + import stat + + for root, dirs, _ in os.walk(path): + for directory in dirs: + dir_path = os.path.join(root, directory) + os.makedirs(dir_path, exist_ok=True) + print("making dir for prisma = ", dir_path) + os.chmod(dir_path, os.stat(dir_path).st_mode | stat.S_IWRITE | stat.S_IEXEC) + + # make this file writable - prisma/schema.prisma + file_path = os.path.join(path, "schema.prisma") + print("making file for prisma = ", file_path) + # make entire directory writable + os.chmod(path, os.stat(path).st_mode | stat.S_IWRITE | stat.S_IEXEC) + + os.chmod(file_path, os.stat(file_path).st_mode | stat.S_IWRITE | stat.S_IEXEC) + + +# Use the package directory in your method call +ensure_prisma_has_writable_dirs(path=_base_prisma_package_dir) From 1dd39a9b9d2153519a46c461cd62f3b3448f875a Mon Sep 17 00:00:00 2001 From: Ishaan Jaff Date: Tue, 13 Aug 2024 19:17:01 -0700 Subject: [PATCH 20/60] fix prisma issues --- litellm/proxy/utils.py | 28 ---------------------------- 1 file changed, 28 deletions(-) diff --git a/litellm/proxy/utils.py b/litellm/proxy/utils.py index 4237a011b4..f16e604f66 100644 --- a/litellm/proxy/utils.py +++ b/litellm/proxy/utils.py @@ -14,7 +14,6 @@ from datetime import datetime, timedelta from email.mime.multipart import MIMEMultipart from email.mime.text import MIMEText from functools import wraps -from pathlib import Path from typing import TYPE_CHECKING, Any, List, Literal, Optional, Tuple, Union import backoff @@ -816,17 +815,6 @@ class PrismaClient: org_list_transactons: dict = {} spend_log_transactions: List = [] - def ensure_prisma_has_writable_dirs(self, path: str | Path) -> None: - import stat - - for root, dirs, _ in os.walk(path): - for directory in dirs: - dir_path = os.path.join(root, directory) - os.makedirs(dir_path, exist_ok=True) - os.chmod( - dir_path, os.stat(dir_path).st_mode | stat.S_IWRITE | stat.S_IEXEC - ) - def __init__(self, database_url: str, proxy_logging_obj: ProxyLogging): verbose_proxy_logger.debug( "LiteLLM: DATABASE_URL Set in config, trying to 'pip install prisma'" @@ -858,22 +846,6 @@ class PrismaClient: # Now you can import the Prisma Client from prisma import Prisma # type: ignore verbose_proxy_logger.debug("Connecting Prisma Client to DB..") - import importlib.util - - # Get the location of the 'prisma' package - package_name = "prisma" - spec = importlib.util.find_spec(package_name) - print("spec = ", spec) # noqa - - if spec and spec.origin: - print("spec origin= ", spec.origin) # noqa - _base_prisma_package_dir = os.path.dirname(spec.origin) - print("base prisma package dir = ", _base_prisma_package_dir) # noqa - else: - raise ImportError(f"Package {package_name} not found.") - - # Use the package directory in your method call - self.ensure_prisma_has_writable_dirs(path=_base_prisma_package_dir) self.db = Prisma() # Client to connect to Prisma db verbose_proxy_logger.debug("Success - Connected Prisma Client to DB") From 6dc71d61daa4bba7bb4772837ec8aa3d69486ff9 Mon Sep 17 00:00:00 2001 From: Ishaan Jaff Date: Tue, 13 Aug 2024 19:29:40 -0700 Subject: [PATCH 21/60] fic docker file to run in non root model --- Dockerfile | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/Dockerfile b/Dockerfile index c8e9956b29..bd840eaf54 100644 --- a/Dockerfile +++ b/Dockerfile @@ -62,6 +62,11 @@ COPY --from=builder /wheels/ /wheels/ RUN pip install *.whl /wheels/* --no-index --find-links=/wheels/ && rm -f *.whl && rm -rf /wheels # Generate prisma client +ENV PRISMA_BINARY_CACHE_DIR=/app/prisma +RUN mkdir -p /.cache +RUN chmod -R 777 /.cache +RUN pip install nodejs-bin +RUN pip install prisma RUN prisma generate RUN chmod +x entrypoint.sh From 8b4b378562239533b485b8cc4b179b7c6edffaba Mon Sep 17 00:00:00 2001 From: Krrish Dholakia Date: Tue, 13 Aug 2024 20:26:24 -0700 Subject: [PATCH 22/60] feat(user_api_key_auth.py): support calling langfuse with litellm user_api_key_auth --- litellm/proxy/_new_secret_config.yaml | 10 +- litellm/proxy/auth/user_api_key_auth.py | 43 ++++++- .../pass_through_endpoints.py | 2 +- litellm/tests/test_pass_through_endpoints.py | 112 +++++++++++++++++- 4 files changed, 160 insertions(+), 7 deletions(-) diff --git a/litellm/proxy/_new_secret_config.yaml b/litellm/proxy/_new_secret_config.yaml index 87a561e318..bc3e0680f8 100644 --- a/litellm/proxy/_new_secret_config.yaml +++ b/litellm/proxy/_new_secret_config.yaml @@ -3,5 +3,11 @@ model_list: litellm_params: model: "*" -litellm_settings: - success_callback: ["langsmith"] \ No newline at end of file +general_settings: + master_key: sk-1234 + pass_through_endpoints: + - path: "/api/public/ingestion" # route you want to add to LiteLLM Proxy Server + target: "https://us.cloud.langfuse.com/api/public/ingestion" # URL this route should forward + headers: + LANGFUSE_PUBLIC_KEY: "os.environ/LANGFUSE_PUBLIC_KEY" # your langfuse account public key + LANGFUSE_SECRET_KEY: "os.environ/LANGFUSE_SECRET_KEY" # your langfuse account secret key \ No newline at end of file diff --git a/litellm/proxy/auth/user_api_key_auth.py b/litellm/proxy/auth/user_api_key_auth.py index 9bbbc1a430..3df90f37fa 100644 --- a/litellm/proxy/auth/user_api_key_auth.py +++ b/litellm/proxy/auth/user_api_key_auth.py @@ -86,7 +86,7 @@ def _get_bearer_token( if api_key.startswith("Bearer "): # ensure Bearer token passed in api_key = api_key.replace("Bearer ", "") # extract the token else: - api_key = "" + api_key = api_key return api_key @@ -138,7 +138,6 @@ async def user_api_key_auth( pass_through_endpoints: Optional[List[dict]] = general_settings.get( "pass_through_endpoints", None ) - if isinstance(api_key, str): passed_in_key = api_key api_key = _get_bearer_token(api_key=api_key) @@ -367,6 +366,40 @@ async def user_api_key_auth( parent_otel_span=parent_otel_span, ) #### ELSE #### + + ## CHECK PASS-THROUGH ENDPOINTS ## + if pass_through_endpoints is not None: + for endpoint in pass_through_endpoints: + if endpoint.get("path", "") == route: + ## IF AUTH DISABLED + if endpoint.get("auth") is not True: + return UserAPIKeyAuth() + ## IF AUTH ENABLED + ### IF CUSTOM PARSER REQUIRED + if ( + endpoint.get("custom_auth_parser") is not None + and endpoint.get("custom_auth_parser") == "langfuse" + ): + """ + - langfuse returns {'Authorization': 'Basic YW55dGhpbmc6YW55dGhpbmc'} + - check the langfuse public key if it contains the litellm api key + """ + import base64 + + api_key = api_key.replace("Basic ", "").strip() + decoded_bytes = base64.b64decode(api_key) + decoded_str = decoded_bytes.decode("utf-8") + api_key = decoded_str.split(":")[0] + else: + headers = endpoint.get("headers", None) + if headers is not None: + header_key = headers.get("litellm_user_api_key", "") + if ( + isinstance(request.headers, dict) + and request.headers.get(key=header_key) is not None + ): + api_key = request.headers.get(key=header_key) + if master_key is None: if isinstance(api_key, str): return UserAPIKeyAuth( @@ -533,7 +566,11 @@ async def user_api_key_auth( if isinstance( api_key, str ): # if generated token, make sure it starts with sk-. - assert api_key.startswith("sk-") # prevent token hashes from being used + assert api_key.startswith( + "sk-" + ), "LiteLLM Virtual Key expected. Received={}, expected to start with 'sk-'.".format( + api_key + ) # prevent token hashes from being used else: verbose_logger.warning( "litellm.proxy.proxy_server.user_api_key_auth(): Warning - Key={} is not a string.".format( diff --git a/litellm/proxy/pass_through_endpoints/pass_through_endpoints.py b/litellm/proxy/pass_through_endpoints/pass_through_endpoints.py index d71863497f..15129854a3 100644 --- a/litellm/proxy/pass_through_endpoints/pass_through_endpoints.py +++ b/litellm/proxy/pass_through_endpoints/pass_through_endpoints.py @@ -309,7 +309,7 @@ async def pass_through_request( json=_parsed_body, ) - if response.status_code != 200: + if response.status_code >= 300: raise HTTPException(status_code=response.status_code, detail=response.text) content = await response.aread() diff --git a/litellm/tests/test_pass_through_endpoints.py b/litellm/tests/test_pass_through_endpoints.py index 4f52f3d192..0f57ca68f9 100644 --- a/litellm/tests/test_pass_through_endpoints.py +++ b/litellm/tests/test_pass_through_endpoints.py @@ -1,5 +1,6 @@ import os import sys +from typing import Optional import pytest from fastapi import FastAPI @@ -30,6 +31,7 @@ def client(): async def test_pass_through_endpoint(client, monkeypatch): # Mock the httpx.AsyncClient.request method monkeypatch.setattr("httpx.AsyncClient.request", mock_request) + import litellm # Define a pass-through endpoint pass_through_endpoints = [ @@ -42,6 +44,11 @@ async def test_pass_through_endpoint(client, monkeypatch): # Initialize the pass-through endpoint await initialize_pass_through_endpoints(pass_through_endpoints) + general_settings: Optional[dict] = ( + getattr(litellm.proxy.proxy_server, "general_settings", {}) or {} + ) + general_settings.update({"pass_through_endpoints": pass_through_endpoints}) + setattr(litellm.proxy.proxy_server, "general_settings", general_settings) # Make a request to the pass-through endpoint response = client.post("/test-endpoint", json={"prompt": "Hello, world!"}) @@ -54,6 +61,7 @@ async def test_pass_through_endpoint(client, monkeypatch): @pytest.mark.asyncio async def test_pass_through_endpoint_rerank(client): _cohere_api_key = os.environ.get("COHERE_API_KEY") + import litellm # Define a pass-through endpoint pass_through_endpoints = [ @@ -66,6 +74,11 @@ async def test_pass_through_endpoint_rerank(client): # Initialize the pass-through endpoint await initialize_pass_through_endpoints(pass_through_endpoints) + general_settings: Optional[dict] = ( + getattr(litellm.proxy.proxy_server, "general_settings", {}) or {} + ) + general_settings.update({"pass_through_endpoints": pass_through_endpoints}) + setattr(litellm.proxy.proxy_server, "general_settings", general_settings) _json_data = { "model": "rerank-english-v3.0", @@ -87,7 +100,7 @@ async def test_pass_through_endpoint_rerank(client): @pytest.mark.parametrize( "auth, rpm_limit, expected_error_code", - [(True, 0, 429), (True, 1, 200), (False, 0, 401)], + [(True, 0, 429), (True, 1, 200), (False, 0, 200)], ) @pytest.mark.asyncio async def test_pass_through_endpoint_rpm_limit(auth, expected_error_code, rpm_limit): @@ -123,6 +136,11 @@ async def test_pass_through_endpoint_rpm_limit(auth, expected_error_code, rpm_li # Initialize the pass-through endpoint await initialize_pass_through_endpoints(pass_through_endpoints) + general_settings: Optional[dict] = ( + getattr(litellm.proxy.proxy_server, "general_settings", {}) or {} + ) + general_settings.update({"pass_through_endpoints": pass_through_endpoints}) + setattr(litellm.proxy.proxy_server, "general_settings", general_settings) _json_data = { "model": "rerank-english-v3.0", @@ -146,6 +164,93 @@ async def test_pass_through_endpoint_rpm_limit(auth, expected_error_code, rpm_li assert response.status_code == expected_error_code +@pytest.mark.parametrize( + "auth, rpm_limit, expected_error_code", + [(True, 0, 429), (True, 1, 207), (False, 0, 207)], +) +@pytest.mark.asyncio +async def test_pass_through_endpoint_pass_through_keys_langfuse( + auth, expected_error_code, rpm_limit +): + client = TestClient(app) + import litellm + from litellm.proxy._types import UserAPIKeyAuth + from litellm.proxy.proxy_server import ProxyLogging, hash_token, user_api_key_cache + + mock_api_key = "sk-my-test-key" + cache_value = UserAPIKeyAuth(token=hash_token(mock_api_key), rpm_limit=rpm_limit) + + _cohere_api_key = os.environ.get("COHERE_API_KEY") + + user_api_key_cache.set_cache(key=hash_token(mock_api_key), value=cache_value) + + proxy_logging_obj = ProxyLogging(user_api_key_cache=user_api_key_cache) + proxy_logging_obj._init_litellm_callbacks() + + setattr(litellm.proxy.proxy_server, "user_api_key_cache", user_api_key_cache) + setattr(litellm.proxy.proxy_server, "master_key", "sk-1234") + setattr(litellm.proxy.proxy_server, "prisma_client", "FAKE-VAR") + setattr(litellm.proxy.proxy_server, "proxy_logging_obj", proxy_logging_obj) + + # Define a pass-through endpoint + pass_through_endpoints = [ + { + "path": "/api/public/ingestion", + "target": "https://us.cloud.langfuse.com/api/public/ingestion", + "auth": auth, + "custom_auth_parser": "langfuse", + "headers": { + "LANGFUSE_PUBLIC_KEY": "os.environ/LANGFUSE_PUBLIC_KEY", + "LANGFUSE_SECRET_KEY": "os.environ/LANGFUSE_SECRET_KEY", + }, + } + ] + + # Initialize the pass-through endpoint + await initialize_pass_through_endpoints(pass_through_endpoints) + general_settings: Optional[dict] = ( + getattr(litellm.proxy.proxy_server, "general_settings", {}) or {} + ) + general_settings.update({"pass_through_endpoints": pass_through_endpoints}) + setattr(litellm.proxy.proxy_server, "general_settings", general_settings) + + _json_data = { + "batch": [ + { + "id": "80e2141f-0ca6-47b7-9c06-dde5e97de690", + "type": "trace-create", + "body": { + "id": "0687af7b-4a75-4de8-a4f6-cba1cdc00865", + "timestamp": "2024-08-14T02:38:56.092950Z", + "name": "test-trace-litellm-proxy-passthrough", + }, + "timestamp": "2024-08-14T02:38:56.093352Z", + } + ], + "metadata": { + "batch_size": 1, + "sdk_integration": "default", + "sdk_name": "python", + "sdk_version": "2.27.0", + "public_key": "anything", + }, + } + + # Make a request to the pass-through endpoint + response = client.post( + "/api/public/ingestion", + json=_json_data, + headers={"Authorization": "Basic c2stbXktdGVzdC1rZXk6YW55dGhpbmc="}, + ) + + print("JSON response: ", _json_data) + + print("RESPONSE RECEIVED - {}".format(response.text)) + + # Assert the response + assert response.status_code == expected_error_code + + @pytest.mark.asyncio async def test_pass_through_endpoint_anthropic(client): import litellm @@ -178,6 +283,11 @@ async def test_pass_through_endpoint_anthropic(client): # Initialize the pass-through endpoint await initialize_pass_through_endpoints(pass_through_endpoints) + general_settings: Optional[dict] = ( + getattr(litellm.proxy.proxy_server, "general_settings", {}) or {} + ) + general_settings.update({"pass_through_endpoints": pass_through_endpoints}) + setattr(litellm.proxy.proxy_server, "general_settings", general_settings) _json_data = { "model": "gpt-3.5-turbo", From 72b6d372445af2ac8b3aeaefd05dcf296a2123bf Mon Sep 17 00:00:00 2001 From: Krrish Dholakia Date: Tue, 13 Aug 2024 21:27:59 -0700 Subject: [PATCH 23/60] test(test_proxy_server.py): refactor test to work on ci/cd --- litellm/tests/test_proxy_server.py | 116 ++++++++++++++++++++++++++++- 1 file changed, 115 insertions(+), 1 deletion(-) diff --git a/litellm/tests/test_proxy_server.py b/litellm/tests/test_proxy_server.py index 00c58d1243..9220256571 100644 --- a/litellm/tests/test_proxy_server.py +++ b/litellm/tests/test_proxy_server.py @@ -967,6 +967,8 @@ async def test_user_info_team_list(prisma_client): mock_client.assert_called() + +# @pytest.mark.skip(reason="Local test") @pytest.mark.asyncio async def test_add_callback_via_key(prisma_client): """ @@ -1051,4 +1053,116 @@ async def test_add_callback_via_key(prisma_client): assert checked_keys except Exception as e: - pytest.fail(f"LiteLLM Proxy test failed. Exception - {str(e)}") \ No newline at end of file + pytest.fail(f"LiteLLM Proxy test failed. Exception - {str(e)}") + + +@pytest.mark.asyncio +async def test_add_callback_via_key_litellm_pre_call_utils(prisma_client): + import json + + from fastapi import HTTPException, Request, Response + from starlette.datastructures import URL + + from litellm.proxy.litellm_pre_call_utils import add_litellm_data_to_request + + setattr(litellm.proxy.proxy_server, "prisma_client", prisma_client) + setattr(litellm.proxy.proxy_server, "master_key", "sk-1234") + await litellm.proxy.proxy_server.prisma_client.connect() + + proxy_config = getattr(litellm.proxy.proxy_server, "proxy_config") + + request = Request(scope={"type": "http", "method": "POST", "headers": {}}) + request._url = URL(url="/chat/completions") + + test_data = { + "model": "azure/chatgpt-v-2", + "messages": [ + {"role": "user", "content": "write 1 sentence poem"}, + ], + "max_tokens": 10, + "mock_response": "Hello world", + "api_key": "my-fake-key", + } + + json_bytes = json.dumps(test_data).encode("utf-8") + + request._body = json_bytes + + data = { + "data": { + "model": "azure/chatgpt-v-2", + "messages": [{"role": "user", "content": "write 1 sentence poem"}], + "max_tokens": 10, + "mock_response": "Hello world", + "api_key": "my-fake-key", + }, + "request": request, + "user_api_key_dict": UserAPIKeyAuth( + token=None, + key_name=None, + key_alias=None, + spend=0.0, + max_budget=None, + expires=None, + models=[], + aliases={}, + config={}, + user_id=None, + team_id=None, + max_parallel_requests=None, + metadata={ + "logging": [ + { + "callback_name": "langfuse", + "callback_type": "success", + "callback_vars": { + "langfuse_public_key": "os.environ/LANGFUSE_PUBLIC_KEY", + "langfuse_secret_key": "os.environ/LANGFUSE_SECRET_KEY", + "langfuse_host": "https://us.cloud.langfuse.com", + }, + } + ] + }, + tpm_limit=None, + rpm_limit=None, + budget_duration=None, + budget_reset_at=None, + allowed_cache_controls=[], + permissions={}, + model_spend={}, + model_max_budget={}, + soft_budget_cooldown=False, + litellm_budget_table=None, + org_id=None, + team_spend=None, + team_alias=None, + team_tpm_limit=None, + team_rpm_limit=None, + team_max_budget=None, + team_models=[], + team_blocked=False, + soft_budget=None, + team_model_aliases=None, + team_member_spend=None, + team_metadata=None, + end_user_id=None, + end_user_tpm_limit=None, + end_user_rpm_limit=None, + end_user_max_budget=None, + last_refreshed_at=None, + api_key=None, + user_role=None, + allowed_model_region=None, + parent_otel_span=None, + ), + "proxy_config": proxy_config, + "general_settings": {}, + "version": "0.0.0", + } + + new_data = await add_litellm_data_to_request(**data) + + assert "success_callback" in new_data + assert new_data["success_callback"] == ["langfuse"] + assert "langfuse_public_key" in new_data + assert "langfuse_secret_key" in new_data From 691e53c7644582de5b57ac6c0917e0ccbc6578c9 Mon Sep 17 00:00:00 2001 From: Krrish Dholakia Date: Tue, 13 Aug 2024 21:36:16 -0700 Subject: [PATCH 24/60] test(test_proxy_server.py): skip local test --- litellm/tests/test_proxy_server.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/litellm/tests/test_proxy_server.py b/litellm/tests/test_proxy_server.py index 9220256571..9a1c091267 100644 --- a/litellm/tests/test_proxy_server.py +++ b/litellm/tests/test_proxy_server.py @@ -968,7 +968,7 @@ async def test_user_info_team_list(prisma_client): mock_client.assert_called() -# @pytest.mark.skip(reason="Local test") +@pytest.mark.skip(reason="Local test") @pytest.mark.asyncio async def test_add_callback_via_key(prisma_client): """ From 9387662eabb2768057000f770ad4c1944332b148 Mon Sep 17 00:00:00 2001 From: Krrish Dholakia Date: Tue, 13 Aug 2024 22:00:33 -0700 Subject: [PATCH 25/60] fix(user_api_key_auth.py): more precisely expand scope to handle 'basic' tokens --- litellm/proxy/_experimental/out/404.html | 1 - litellm/proxy/_experimental/out/model_hub.html | 1 - litellm/proxy/_experimental/out/onboarding.html | 1 - litellm/proxy/auth/user_api_key_auth.py | 4 +++- 4 files changed, 3 insertions(+), 4 deletions(-) delete mode 100644 litellm/proxy/_experimental/out/404.html delete mode 100644 litellm/proxy/_experimental/out/model_hub.html delete mode 100644 litellm/proxy/_experimental/out/onboarding.html diff --git a/litellm/proxy/_experimental/out/404.html b/litellm/proxy/_experimental/out/404.html deleted file mode 100644 index 0de1d45fae..0000000000 --- a/litellm/proxy/_experimental/out/404.html +++ /dev/null @@ -1 +0,0 @@ -404: This page could not be found.LiteLLM Dashboard

404

This page could not be found.

\ No newline at end of file diff --git a/litellm/proxy/_experimental/out/model_hub.html b/litellm/proxy/_experimental/out/model_hub.html deleted file mode 100644 index 2476ecba73..0000000000 --- a/litellm/proxy/_experimental/out/model_hub.html +++ /dev/null @@ -1 +0,0 @@ -LiteLLM Dashboard \ No newline at end of file diff --git a/litellm/proxy/_experimental/out/onboarding.html b/litellm/proxy/_experimental/out/onboarding.html deleted file mode 100644 index 0ea4969e32..0000000000 --- a/litellm/proxy/_experimental/out/onboarding.html +++ /dev/null @@ -1 +0,0 @@ -LiteLLM Dashboard \ No newline at end of file diff --git a/litellm/proxy/auth/user_api_key_auth.py b/litellm/proxy/auth/user_api_key_auth.py index 3df90f37fa..5ae149f1bd 100644 --- a/litellm/proxy/auth/user_api_key_auth.py +++ b/litellm/proxy/auth/user_api_key_auth.py @@ -85,8 +85,10 @@ def _get_bearer_token( ): if api_key.startswith("Bearer "): # ensure Bearer token passed in api_key = api_key.replace("Bearer ", "") # extract the token + elif api_key.startswith("Basic "): + api_key = api_key.replace("Basic ", "") # handle langfuse input else: - api_key = api_key + api_key = "" return api_key From da61511a8e68eebd9333a7c947f6d8863b8ee48d Mon Sep 17 00:00:00 2001 From: Ishaan Jaff Date: Wed, 14 Aug 2024 08:39:16 -0700 Subject: [PATCH 26/60] feat log fail events on gcs --- litellm/integrations/gcs_bucket.py | 67 +++++++++++++++++-- .../spend_tracking/spend_tracking_utils.py | 2 + 2 files changed, 64 insertions(+), 5 deletions(-) diff --git a/litellm/integrations/gcs_bucket.py b/litellm/integrations/gcs_bucket.py index 46f55f8f01..6525f680a1 100644 --- a/litellm/integrations/gcs_bucket.py +++ b/litellm/integrations/gcs_bucket.py @@ -1,5 +1,6 @@ import json import os +import uuid from datetime import datetime from typing import Any, Dict, List, Optional, TypedDict, Union @@ -29,6 +30,8 @@ class GCSBucketPayload(TypedDict): end_time: str response_cost: Optional[float] spend_log_metadata: str + exception: Optional[str] + log_event_type: Optional[str] class GCSBucketLogger(CustomLogger): @@ -79,6 +82,7 @@ class GCSBucketLogger(CustomLogger): logging_payload: GCSBucketPayload = await self.get_gcs_payload( kwargs, response_obj, start_time_str, end_time_str ) + logging_payload["log_event_type"] = "successful_api_call" json_logged_payload = json.dumps(logging_payload) @@ -103,7 +107,49 @@ class GCSBucketLogger(CustomLogger): verbose_logger.error("GCS Bucket logging error: %s", str(e)) async def async_log_failure_event(self, kwargs, response_obj, start_time, end_time): - pass + from litellm.proxy.proxy_server import premium_user + + if premium_user is not True: + raise ValueError( + f"GCS Bucket logging is a premium feature. Please upgrade to use it. {CommonProxyErrors.not_premium_user.value}" + ) + try: + verbose_logger.debug( + "GCS Logger: async_log_failure_event logging kwargs: %s, response_obj: %s", + kwargs, + response_obj, + ) + + start_time_str = start_time.strftime("%Y-%m-%d %H:%M:%S") + end_time_str = end_time.strftime("%Y-%m-%d %H:%M:%S") + headers = await self.construct_request_headers() + + logging_payload: GCSBucketPayload = await self.get_gcs_payload( + kwargs, response_obj, start_time_str, end_time_str + ) + logging_payload["log_event_type"] = "failed_api_call" + + json_logged_payload = json.dumps(logging_payload) + + # Get the current date + current_date = datetime.now().strftime("%Y-%m-%d") + + # Modify the object_name to include the date-based folder + object_name = f"{current_date}/{uuid.uuid4().hex}" + response = await self.async_httpx_client.post( + headers=headers, + url=f"https://storage.googleapis.com/upload/storage/v1/b/{self.BUCKET_NAME}/o?uploadType=media&name={object_name}", + data=json_logged_payload, + ) + + if response.status_code != 200: + verbose_logger.error("GCS Bucket logging error: %s", str(response.text)) + + verbose_logger.debug("GCS Bucket response %s", response) + verbose_logger.debug("GCS Bucket status code %s", response.status_code) + verbose_logger.debug("GCS Bucket response.text %s", response.text) + except Exception as e: + verbose_logger.error("GCS Bucket logging error: %s", str(e)) async def construct_request_headers(self) -> Dict[str, str]: from litellm import vertex_chat_completion @@ -139,9 +185,18 @@ class GCSBucketLogger(CustomLogger): optional_params=kwargs.get("optional_params", None), ) response_dict = {} - response_dict = convert_litellm_response_object_to_dict( - response_obj=response_obj - ) + if response_obj: + response_dict = convert_litellm_response_object_to_dict( + response_obj=response_obj + ) + + exception_str = None + + # Handle logging exception attributes + if "exception" in kwargs: + exception_str = kwargs.get("exception", "") + if not isinstance(exception_str, str): + exception_str = str(exception_str) _spend_log_payload: SpendLogsPayload = get_logging_payload( kwargs=kwargs, @@ -156,8 +211,10 @@ class GCSBucketLogger(CustomLogger): response_obj=response_dict, start_time=start_time, end_time=end_time, - spend_log_metadata=_spend_log_payload["metadata"], + spend_log_metadata=_spend_log_payload.get("metadata", ""), response_cost=kwargs.get("response_cost", None), + exception=exception_str, + log_event_type=None, ) return gcs_payload diff --git a/litellm/proxy/spend_tracking/spend_tracking_utils.py b/litellm/proxy/spend_tracking/spend_tracking_utils.py index cd7004e41d..6a28d70b17 100644 --- a/litellm/proxy/spend_tracking/spend_tracking_utils.py +++ b/litellm/proxy/spend_tracking/spend_tracking_utils.py @@ -21,6 +21,8 @@ def get_logging_payload( if kwargs is None: kwargs = {} + if response_obj is None: + response_obj = {} # standardize this function to be used across, s3, dynamoDB, langfuse logging litellm_params = kwargs.get("litellm_params", {}) metadata = ( From 9bd112d97010b27b1a46101bf4ea520f1d1c8947 Mon Sep 17 00:00:00 2001 From: Ishaan Jaff Date: Wed, 14 Aug 2024 08:40:02 -0700 Subject: [PATCH 27/60] fix test for gcs bucket --- litellm/proxy/proxy_config.yaml | 5 +---- litellm/tests/test_gcs_bucket.py | 1 + 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/litellm/proxy/proxy_config.yaml b/litellm/proxy/proxy_config.yaml index 660c27f249..4a1fc84a80 100644 --- a/litellm/proxy/proxy_config.yaml +++ b/litellm/proxy/proxy_config.yaml @@ -39,7 +39,4 @@ general_settings: litellm_settings: fallbacks: [{"gemini-1.5-pro-001": ["gpt-4o"]}] - success_callback: ["langfuse", "prometheus"] - langfuse_default_tags: ["cache_hit", "cache_key", "proxy_base_url", "user_api_key_alias", "user_api_key_user_id", "user_api_key_user_email", "user_api_key_team_alias", "semantic-similarity", "proxy_base_url"] - failure_callback: ["prometheus"] - cache: True + callbacks: ["gcs_bucket"] diff --git a/litellm/tests/test_gcs_bucket.py b/litellm/tests/test_gcs_bucket.py index c21988c73d..b26dfec038 100644 --- a/litellm/tests/test_gcs_bucket.py +++ b/litellm/tests/test_gcs_bucket.py @@ -147,6 +147,7 @@ async def test_basic_gcs_logger(): assert gcs_payload["response_cost"] > 0.0 + assert gcs_payload["log_event_type"] == "successful_api_call" gcs_payload["spend_log_metadata"] = json.loads(gcs_payload["spend_log_metadata"]) assert ( From e1c70a6954b1446a5a1997f1f52cd4ed3f21bbb8 Mon Sep 17 00:00:00 2001 From: Ishaan Jaff Date: Wed, 14 Aug 2024 08:55:51 -0700 Subject: [PATCH 28/60] log failure calls on gcs + testing --- litellm/integrations/gcs_bucket.py | 9 ++- litellm/tests/test_gcs_bucket.py | 110 +++++++++++++++++++++++++++++ 2 files changed, 118 insertions(+), 1 deletion(-) diff --git a/litellm/integrations/gcs_bucket.py b/litellm/integrations/gcs_bucket.py index 6525f680a1..be7f8e39c2 100644 --- a/litellm/integrations/gcs_bucket.py +++ b/litellm/integrations/gcs_bucket.py @@ -129,13 +129,20 @@ class GCSBucketLogger(CustomLogger): ) logging_payload["log_event_type"] = "failed_api_call" + _litellm_params = kwargs.get("litellm_params") or {} + metadata = _litellm_params.get("metadata") or {} + json_logged_payload = json.dumps(logging_payload) # Get the current date current_date = datetime.now().strftime("%Y-%m-%d") # Modify the object_name to include the date-based folder - object_name = f"{current_date}/{uuid.uuid4().hex}" + object_name = f"{current_date}/failure-{uuid.uuid4().hex}" + + if "gcs_log_id" in metadata: + object_name = metadata["gcs_log_id"] + response = await self.async_httpx_client.post( headers=headers, url=f"https://storage.googleapis.com/upload/storage/v1/b/{self.BUCKET_NAME}/o?uploadType=media&name={object_name}", diff --git a/litellm/tests/test_gcs_bucket.py b/litellm/tests/test_gcs_bucket.py index b26dfec038..f0aaf8d8dd 100644 --- a/litellm/tests/test_gcs_bucket.py +++ b/litellm/tests/test_gcs_bucket.py @@ -162,3 +162,113 @@ async def test_basic_gcs_logger(): # Delete Object from GCS print("deleting object from GCS") await gcs_logger.delete_gcs_object(object_name=object_name) + + +@pytest.mark.asyncio +async def test_basic_gcs_logger_failure(): + load_vertex_ai_credentials() + gcs_logger = GCSBucketLogger() + print("GCSBucketLogger", gcs_logger) + + gcs_log_id = f"failure-test-{uuid.uuid4().hex}" + + litellm.callbacks = [gcs_logger] + + try: + response = await litellm.acompletion( + model="gpt-3.5-turbo", + temperature=0.7, + messages=[{"role": "user", "content": "This is a test"}], + max_tokens=10, + user="ishaan-2", + mock_response=litellm.BadRequestError( + model="gpt-3.5-turbo", + message="Error: 400: Bad Request: Invalid API key, please check your API key and try again.", + llm_provider="openai", + ), + metadata={ + "gcs_log_id": gcs_log_id, + "tags": ["model-anthropic-claude-v2.1", "app-ishaan-prod"], + "user_api_key": "88dc28d0f030c55ed4ab77ed8faf098196cb1c05df778539800c9f1243fe6b4b", + "user_api_key_alias": None, + "user_api_end_user_max_budget": None, + "litellm_api_version": "0.0.0", + "global_max_parallel_requests": None, + "user_api_key_user_id": "116544810872468347480", + "user_api_key_org_id": None, + "user_api_key_team_id": None, + "user_api_key_team_alias": None, + "user_api_key_metadata": {}, + "requester_ip_address": "127.0.0.1", + "spend_logs_metadata": {"hello": "world"}, + "headers": { + "content-type": "application/json", + "user-agent": "PostmanRuntime/7.32.3", + "accept": "*/*", + "postman-token": "92300061-eeaa-423b-a420-0b44896ecdc4", + "host": "localhost:4000", + "accept-encoding": "gzip, deflate, br", + "connection": "keep-alive", + "content-length": "163", + }, + "endpoint": "http://localhost:4000/chat/completions", + "model_group": "gpt-3.5-turbo", + "deployment": "azure/chatgpt-v-2", + "model_info": { + "id": "4bad40a1eb6bebd1682800f16f44b9f06c52a6703444c99c7f9f32e9de3693b4", + "db_model": False, + }, + "api_base": "https://openai-gpt-4-test-v-1.openai.azure.com/", + "caching_groups": None, + "raw_request": "\n\nPOST Request Sent from LiteLLM:\ncurl -X POST \\\nhttps://openai-gpt-4-test-v-1.openai.azure.com//openai/ \\\n-H 'Authorization: *****' \\\n-d '{'model': 'chatgpt-v-2', 'messages': [{'role': 'system', 'content': 'you are a helpful assistant.\\n'}, {'role': 'user', 'content': 'bom dia'}], 'stream': False, 'max_tokens': 10, 'user': '116544810872468347480', 'extra_body': {}}'\n", + }, + ) + except: + pass + + await asyncio.sleep(5) + + # Get the current date + # Get the current date + current_date = datetime.now().strftime("%Y-%m-%d") + + # Modify the object_name to include the date-based folder + object_name = gcs_log_id + + print("object_name", object_name) + + # Check if object landed on GCS + object_from_gcs = await gcs_logger.download_gcs_object(object_name=object_name) + print("object from gcs=", object_from_gcs) + # convert object_from_gcs from bytes to DICT + parsed_data = json.loads(object_from_gcs) + print("object_from_gcs as dict", parsed_data) + + print("type of object_from_gcs", type(parsed_data)) + + gcs_payload = GCSBucketPayload(**parsed_data) + + print("gcs_payload", gcs_payload) + + assert gcs_payload["request_kwargs"]["model"] == "gpt-3.5-turbo" + assert gcs_payload["request_kwargs"]["messages"] == [ + {"role": "user", "content": "This is a test"} + ] + + assert gcs_payload["response_cost"] == 0 + assert gcs_payload["log_event_type"] == "failed_api_call" + + gcs_payload["spend_log_metadata"] = json.loads(gcs_payload["spend_log_metadata"]) + + assert ( + gcs_payload["spend_log_metadata"]["user_api_key"] + == "88dc28d0f030c55ed4ab77ed8faf098196cb1c05df778539800c9f1243fe6b4b" + ) + assert ( + gcs_payload["spend_log_metadata"]["user_api_key_user_id"] + == "116544810872468347480" + ) + + # Delete Object from GCS + print("deleting object from GCS") + await gcs_logger.delete_gcs_object(object_name=object_name) From 38868a0a451b2f389a3c0dfb2b62fdbd8449c583 Mon Sep 17 00:00:00 2001 From: Ishaan Jaff Date: Wed, 14 Aug 2024 09:08:14 -0700 Subject: [PATCH 29/60] use litellm_ prefix for new deployment metrics --- docs/my-website/docs/proxy/prometheus.md | 14 ++--- litellm/integrations/prometheus.py | 52 +++++++++---------- .../prometheus_helpers/prometheus_api.py | 4 +- litellm/tests/test_prometheus.py | 6 +-- 4 files changed, 38 insertions(+), 38 deletions(-) diff --git a/docs/my-website/docs/proxy/prometheus.md b/docs/my-website/docs/proxy/prometheus.md index 6c856f58b3..4b913d2e82 100644 --- a/docs/my-website/docs/proxy/prometheus.md +++ b/docs/my-website/docs/proxy/prometheus.md @@ -72,15 +72,15 @@ http://localhost:4000/metrics | Metric Name | Description | |----------------------|--------------------------------------| -| `deployment_state` | The state of the deployment: 0 = healthy, 1 = partial outage, 2 = complete outage. | +| `litellm_deployment_state` | The state of the deployment: 0 = healthy, 1 = partial outage, 2 = complete outage. | | `litellm_remaining_requests_metric` | Track `x-ratelimit-remaining-requests` returned from LLM API Deployment | | `litellm_remaining_tokens` | Track `x-ratelimit-remaining-tokens` return from LLM API Deployment | - `llm_deployment_success_responses` | Total number of successful LLM API calls for deployment | -| `llm_deployment_failure_responses` | Total number of failed LLM API calls for deployment | -| `llm_deployment_total_requests` | Total number of LLM API calls for deployment - success + failure | -| `llm_deployment_latency_per_output_token` | Latency per output token for deployment | -| `llm_deployment_successful_fallbacks` | Number of successful fallback requests from primary model -> fallback model | -| `llm_deployment_failed_fallbacks` | Number of failed fallback requests from primary model -> fallback model | + `litellm_deployment_success_responses` | Total number of successful LLM API calls for deployment | +| `litellm_deployment_failure_responses` | Total number of failed LLM API calls for deployment | +| `litellm_deployment_total_requests` | Total number of LLM API calls for deployment - success + failure | +| `litellm_deployment_latency_per_output_token` | Latency per output token for deployment | +| `litellm_deployment_successful_fallbacks` | Number of successful fallback requests from primary model -> fallback model | +| `litellm_deployment_failed_fallbacks` | Number of failed fallback requests from primary model -> fallback model | diff --git a/litellm/integrations/prometheus.py b/litellm/integrations/prometheus.py index 8797807ac6..08431fd7af 100644 --- a/litellm/integrations/prometheus.py +++ b/litellm/integrations/prometheus.py @@ -141,42 +141,42 @@ class PrometheusLogger(CustomLogger): ] # Metric for deployment state - self.deployment_state = Gauge( - "deployment_state", + self.litellm_deployment_state = Gauge( + "litellm_deployment_state", "LLM Deployment Analytics - The state of the deployment: 0 = healthy, 1 = partial outage, 2 = complete outage", labelnames=_logged_llm_labels, ) - self.llm_deployment_success_responses = Counter( - name="llm_deployment_success_responses", + self.litellm_deployment_success_responses = Counter( + name="litellm_deployment_success_responses", documentation="LLM Deployment Analytics - Total number of successful LLM API calls via litellm", labelnames=_logged_llm_labels, ) - self.llm_deployment_failure_responses = Counter( - name="llm_deployment_failure_responses", + self.litellm_deployment_failure_responses = Counter( + name="litellm_deployment_failure_responses", documentation="LLM Deployment Analytics - Total number of failed LLM API calls via litellm", labelnames=_logged_llm_labels, ) - self.llm_deployment_total_requests = Counter( - name="llm_deployment_total_requests", + self.litellm_deployment_total_requests = Counter( + name="litellm_deployment_total_requests", documentation="LLM Deployment Analytics - Total number of LLM API calls via litellm - success + failure", labelnames=_logged_llm_labels, ) # Deployment Latency tracking - self.llm_deployment_latency_per_output_token = Histogram( - name="llm_deployment_latency_per_output_token", + self.litellm_deployment_latency_per_output_token = Histogram( + name="litellm_deployment_latency_per_output_token", documentation="LLM Deployment Analytics - Latency per output token", labelnames=_logged_llm_labels, ) - self.llm_deployment_successful_fallbacks = Counter( - "llm_deployment_successful_fallbacks", + self.litellm_deployment_successful_fallbacks = Counter( + "litellm_deployment_successful_fallbacks", "LLM Deployment Analytics - Number of successful fallback requests from primary model -> fallback model", ["primary_model", "fallback_model"], ) - self.llm_deployment_failed_fallbacks = Counter( - "llm_deployment_failed_fallbacks", + self.litellm_deployment_failed_fallbacks = Counter( + "litellm_deployment_failed_fallbacks", "LLM Deployment Analytics - Number of failed fallback requests from primary model -> fallback model", ["primary_model", "fallback_model"], ) @@ -358,14 +358,14 @@ class PrometheusLogger(CustomLogger): api_provider=llm_provider, ) - self.llm_deployment_failure_responses.labels( + self.litellm_deployment_failure_responses.labels( litellm_model_name=litellm_model_name, model_id=model_id, api_base=api_base, api_provider=llm_provider, ).inc() - self.llm_deployment_total_requests.labels( + self.litellm_deployment_total_requests.labels( litellm_model_name=litellm_model_name, model_id=model_id, api_base=api_base, @@ -438,14 +438,14 @@ class PrometheusLogger(CustomLogger): api_provider=llm_provider, ) - self.llm_deployment_success_responses.labels( + self.litellm_deployment_success_responses.labels( litellm_model_name=litellm_model_name, model_id=model_id, api_base=api_base, api_provider=llm_provider, ).inc() - self.llm_deployment_total_requests.labels( + self.litellm_deployment_total_requests.labels( litellm_model_name=litellm_model_name, model_id=model_id, api_base=api_base, @@ -475,7 +475,7 @@ class PrometheusLogger(CustomLogger): latency_per_token = None if output_tokens is not None and output_tokens > 0: latency_per_token = _latency_seconds / output_tokens - self.llm_deployment_latency_per_output_token.labels( + self.litellm_deployment_latency_per_output_token.labels( litellm_model_name=litellm_model_name, model_id=model_id, api_base=api_base, @@ -497,7 +497,7 @@ class PrometheusLogger(CustomLogger): kwargs, ) _new_model = kwargs.get("model") - self.llm_deployment_successful_fallbacks.labels( + self.litellm_deployment_successful_fallbacks.labels( primary_model=original_model_group, fallback_model=_new_model ).inc() @@ -508,11 +508,11 @@ class PrometheusLogger(CustomLogger): kwargs, ) _new_model = kwargs.get("model") - self.llm_deployment_failed_fallbacks.labels( + self.litellm_deployment_failed_fallbacks.labels( primary_model=original_model_group, fallback_model=_new_model ).inc() - def set_deployment_state( + def set_litellm_deployment_state( self, state: int, litellm_model_name: str, @@ -520,7 +520,7 @@ class PrometheusLogger(CustomLogger): api_base: str, api_provider: str, ): - self.deployment_state.labels( + self.litellm_deployment_state.labels( litellm_model_name, model_id, api_base, api_provider ).set(state) @@ -531,7 +531,7 @@ class PrometheusLogger(CustomLogger): api_base: str, api_provider: str, ): - self.set_deployment_state( + self.set_litellm_deployment_state( 0, litellm_model_name, model_id, api_base, api_provider ) @@ -542,7 +542,7 @@ class PrometheusLogger(CustomLogger): api_base: str, api_provider: str, ): - self.set_deployment_state( + self.set_litellm_deployment_state( 1, litellm_model_name, model_id, api_base, api_provider ) @@ -553,7 +553,7 @@ class PrometheusLogger(CustomLogger): api_base: str, api_provider: str, ): - self.set_deployment_state( + self.set_litellm_deployment_state( 2, litellm_model_name, model_id, api_base, api_provider ) diff --git a/litellm/integrations/prometheus_helpers/prometheus_api.py b/litellm/integrations/prometheus_helpers/prometheus_api.py index 86764df7dd..13ccc15620 100644 --- a/litellm/integrations/prometheus_helpers/prometheus_api.py +++ b/litellm/integrations/prometheus_helpers/prometheus_api.py @@ -41,8 +41,8 @@ async def get_fallback_metric_from_prometheus(): """ response_message = "" relevant_metrics = [ - "llm_deployment_successful_fallbacks_total", - "llm_deployment_failed_fallbacks_total", + "litellm_deployment_successful_fallbacks_total", + "litellm_deployment_failed_fallbacks_total", ] for metric in relevant_metrics: response_json = await get_metric_from_prometheus( diff --git a/litellm/tests/test_prometheus.py b/litellm/tests/test_prometheus.py index 64e824e6db..7574beb9d9 100644 --- a/litellm/tests/test_prometheus.py +++ b/litellm/tests/test_prometheus.py @@ -76,6 +76,6 @@ async def test_async_prometheus_success_logging(): print("metrics from prometheus", metrics) assert metrics["litellm_requests_metric_total"] == 1.0 assert metrics["litellm_total_tokens_total"] == 30.0 - assert metrics["llm_deployment_success_responses_total"] == 1.0 - assert metrics["llm_deployment_total_requests_total"] == 1.0 - assert metrics["llm_deployment_latency_per_output_token_bucket"] == 1.0 + assert metrics["litellm_deployment_success_responses_total"] == 1.0 + assert metrics["litellm_deployment_total_requests_total"] == 1.0 + assert metrics["litellm_deployment_latency_per_output_token_bucket"] == 1.0 From f096cd3caf252edcde02f50dddc70c3569cfea8d Mon Sep 17 00:00:00 2001 From: Ishaan Jaff Date: Wed, 14 Aug 2024 09:24:22 -0700 Subject: [PATCH 30/60] fix use normal prisma --- litellm/proxy/utils.py | 22 +++++++++++----------- set_prisma_permissions.py | 39 --------------------------------------- 2 files changed, 11 insertions(+), 50 deletions(-) delete mode 100644 set_prisma_permissions.py diff --git a/litellm/proxy/utils.py b/litellm/proxy/utils.py index f16e604f66..d1d17d0ef5 100644 --- a/litellm/proxy/utils.py +++ b/litellm/proxy/utils.py @@ -832,17 +832,17 @@ class PrismaClient: dname = os.path.dirname(abspath) os.chdir(dname) - # try: - # subprocess.run(["prisma", "generate"]) - # subprocess.run( - # ["prisma", "db", "push", "--accept-data-loss"] - # ) # this looks like a weird edge case when prisma just wont start on render. we need to have the --accept-data-loss - # except Exception as e: - # raise Exception( - # f"Unable to run prisma commands. Run `pip install prisma` Got Exception: {(str(e))}" - # ) - # finally: - # os.chdir(original_dir) + try: + subprocess.run(["prisma", "generate"]) + subprocess.run( + ["prisma", "db", "push", "--accept-data-loss"] + ) # this looks like a weird edge case when prisma just wont start on render. we need to have the --accept-data-loss + except Exception as e: + raise Exception( + f"Unable to run prisma commands. Run `pip install prisma` Got Exception: {(str(e))}" + ) + finally: + os.chdir(original_dir) # Now you can import the Prisma Client from prisma import Prisma # type: ignore verbose_proxy_logger.debug("Connecting Prisma Client to DB..") diff --git a/set_prisma_permissions.py b/set_prisma_permissions.py deleted file mode 100644 index 0973b90b88..0000000000 --- a/set_prisma_permissions.py +++ /dev/null @@ -1,39 +0,0 @@ -import os -import importlib -from pathlib import Path - - -# Get the location of the 'prisma' package -package_name = "prisma" -spec = importlib.util.find_spec(package_name) -print("spec = ", spec) # noqa - -if spec and spec.origin: - print("spec origin= ", spec.origin) # noqa - _base_prisma_package_dir = os.path.dirname(spec.origin) - print("base prisma package dir = ", _base_prisma_package_dir) # noqa -else: - raise ImportError(f"Package {package_name} not found.") - - -def ensure_prisma_has_writable_dirs(path: str | Path) -> None: - import stat - - for root, dirs, _ in os.walk(path): - for directory in dirs: - dir_path = os.path.join(root, directory) - os.makedirs(dir_path, exist_ok=True) - print("making dir for prisma = ", dir_path) - os.chmod(dir_path, os.stat(dir_path).st_mode | stat.S_IWRITE | stat.S_IEXEC) - - # make this file writable - prisma/schema.prisma - file_path = os.path.join(path, "schema.prisma") - print("making file for prisma = ", file_path) - # make entire directory writable - os.chmod(path, os.stat(path).st_mode | stat.S_IWRITE | stat.S_IEXEC) - - os.chmod(file_path, os.stat(file_path).st_mode | stat.S_IWRITE | stat.S_IEXEC) - - -# Use the package directory in your method call -ensure_prisma_has_writable_dirs(path=_base_prisma_package_dir) From faf939388773341a6b6588e0ab9d4aa6c4cd9f32 Mon Sep 17 00:00:00 2001 From: Ishaan Jaff Date: Wed, 14 Aug 2024 09:26:47 -0700 Subject: [PATCH 31/60] allow running as non-root user --- Dockerfile.database | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/Dockerfile.database b/Dockerfile.database index 22084bab89..c995939e5b 100644 --- a/Dockerfile.database +++ b/Dockerfile.database @@ -62,6 +62,11 @@ RUN pip install PyJWT --no-cache-dir RUN chmod +x build_admin_ui.sh && ./build_admin_ui.sh # Generate prisma client +ENV PRISMA_BINARY_CACHE_DIR=/app/prisma +RUN mkdir -p /.cache +RUN chmod -R 777 /.cache +RUN pip install nodejs-bin +RUN pip install prisma RUN prisma generate RUN chmod +x entrypoint.sh From 74ddd24cbe2a15c792e556ce9c21214997208838 Mon Sep 17 00:00:00 2001 From: Krrish Dholakia Date: Wed, 14 Aug 2024 09:53:19 -0700 Subject: [PATCH 32/60] test(test_pass_through_endpoints.py): fix test --- litellm/tests/test_pass_through_endpoints.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/litellm/tests/test_pass_through_endpoints.py b/litellm/tests/test_pass_through_endpoints.py index 0f57ca68f9..63e9e01860 100644 --- a/litellm/tests/test_pass_through_endpoints.py +++ b/litellm/tests/test_pass_through_endpoints.py @@ -169,7 +169,7 @@ async def test_pass_through_endpoint_rpm_limit(auth, expected_error_code, rpm_li [(True, 0, 429), (True, 1, 207), (False, 0, 207)], ) @pytest.mark.asyncio -async def test_pass_through_endpoint_pass_through_keys_langfuse( +async def test_aaapass_through_endpoint_pass_through_keys_langfuse( auth, expected_error_code, rpm_limit ): client = TestClient(app) @@ -196,7 +196,7 @@ async def test_pass_through_endpoint_pass_through_keys_langfuse( pass_through_endpoints = [ { "path": "/api/public/ingestion", - "target": "https://us.cloud.langfuse.com/api/public/ingestion", + "target": "https://cloud.langfuse.com/api/public/ingestion", "auth": auth, "custom_auth_parser": "langfuse", "headers": { From ce61da6ff3fb3f8e79d77d2f9a5a902ed8976a08 Mon Sep 17 00:00:00 2001 From: Krrish Dholakia Date: Wed, 14 Aug 2024 09:59:13 -0700 Subject: [PATCH 33/60] test(test_function_call_parsing.py): fix test --- litellm/tests/test_function_call_parsing.py | 108 +++++++++++--------- 1 file changed, 57 insertions(+), 51 deletions(-) diff --git a/litellm/tests/test_function_call_parsing.py b/litellm/tests/test_function_call_parsing.py index d223a7c8f6..fab9cf110c 100644 --- a/litellm/tests/test_function_call_parsing.py +++ b/litellm/tests/test_function_call_parsing.py @@ -1,23 +1,27 @@ # What is this? ## Test to make sure function call response always works with json.loads() -> no extra parsing required. Relevant issue - https://github.com/BerriAI/litellm/issues/2654 -import sys, os +import os +import sys import traceback + from dotenv import load_dotenv load_dotenv() -import os, io +import io +import os sys.path.insert( 0, os.path.abspath("../..") ) # Adds the parent directory to the system path -import pytest -import litellm import json import warnings - -from litellm import completion from typing import List +import pytest + +import litellm +from litellm import completion + # Just a stub to keep the sample code simple class Trade: @@ -78,58 +82,60 @@ def trade(model_name: str) -> List[Trade]: }, } - response = completion( - model_name, - [ - { - "role": "system", - "content": """You are an expert asset manager, managing a portfolio. + try: + response = completion( + model_name, + [ + { + "role": "system", + "content": """You are an expert asset manager, managing a portfolio. - Always use the `trade` function. Make sure that you call it correctly. For example, the following is a valid call: + Always use the `trade` function. Make sure that you call it correctly. For example, the following is a valid call: + ``` + trade({ + "orders": [ + {"action": "buy", "asset": "BTC", "amount": 0.1}, + {"action": "sell", "asset": "ETH", "amount": 0.2} + ] + }) + ``` + + If there are no trades to make, call `trade` with an empty array: + ``` + trade({ "orders": [] }) + ``` + """, + }, + { + "role": "user", + "content": """Manage the portfolio. + + Don't jabber. + + This is the current market data: ``` - trade({ - "orders": [ - {"action": "buy", "asset": "BTC", "amount": 0.1}, - {"action": "sell", "asset": "ETH", "amount": 0.2} - ] - }) + {market_data} ``` - If there are no trades to make, call `trade` with an empty array: + Your portfolio is as follows: ``` - trade({ "orders": [] }) + {portfolio} ``` - """, + """.replace( + "{market_data}", "BTC: 64,000 USD\nETH: 3,500 USD" + ).replace( + "{portfolio}", "USD: 1000, BTC: 0.1, ETH: 0.2" + ), + }, + ], + tools=[tool_spec], + tool_choice={ + "type": "function", + "function": {"name": tool_spec["function"]["name"]}, # type: ignore }, - { - "role": "user", - "content": """Manage the portfolio. - - Don't jabber. - - This is the current market data: - ``` - {market_data} - ``` - - Your portfolio is as follows: - ``` - {portfolio} - ``` - """.replace( - "{market_data}", "BTC: 64,000 USD\nETH: 3,500 USD" - ).replace( - "{portfolio}", "USD: 1000, BTC: 0.1, ETH: 0.2" - ), - }, - ], - tools=[tool_spec], - tool_choice={ - "type": "function", - "function": {"name": tool_spec["function"]["name"]}, # type: ignore - }, - ) - + ) + except litellm.InternalServerError: + pass calls = response.choices[0].message.tool_calls trades = [trade for call in calls for trade in parse_call(call)] return trades From a081ccdc50b6309c70a415a4b69813a27610250d Mon Sep 17 00:00:00 2001 From: Paul Gauthier Date: Wed, 14 Aug 2024 10:14:19 -0700 Subject: [PATCH 34/60] vertex_ai/claude-3-5-sonnet@20240620 support prefill --- model_prices_and_context_window.json | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/model_prices_and_context_window.json b/model_prices_and_context_window.json index e31e6b3f4f..e620c3fad9 100644 --- a/model_prices_and_context_window.json +++ b/model_prices_and_context_window.json @@ -2085,7 +2085,8 @@ "litellm_provider": "vertex_ai-anthropic_models", "mode": "chat", "supports_function_calling": true, - "supports_vision": true + "supports_vision": true, + "supports_assistant_prefill": true }, "vertex_ai/claude-3-haiku@20240307": { "max_tokens": 4096, From 21ed36a8243e3188c8a3c3e37a715c2cec00faf3 Mon Sep 17 00:00:00 2001 From: Ishaan Jaff Date: Wed, 14 Aug 2024 10:42:06 -0700 Subject: [PATCH 35/60] =?UTF-8?q?bump:=20version=201.43.10=20=E2=86=92=201?= =?UTF-8?q?.43.11?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- pyproject.toml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 5ae04ea924..b6c52157e6 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "litellm" -version = "1.43.10" +version = "1.43.11" description = "Library to easily interface with LLM API providers" authors = ["BerriAI"] license = "MIT" @@ -91,7 +91,7 @@ requires = ["poetry-core", "wheel"] build-backend = "poetry.core.masonry.api" [tool.commitizen] -version = "1.43.10" +version = "1.43.11" version_files = [ "pyproject.toml:^version" ] From a31d334113709651d9f4f6e770dacc50ebf5bd50 Mon Sep 17 00:00:00 2001 From: Ishaan Jaff Date: Wed, 14 Aug 2024 10:42:08 -0700 Subject: [PATCH 36/60] =?UTF-8?q?bump:=20version=201.43.11=20=E2=86=92=201?= =?UTF-8?q?.43.12?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- pyproject.toml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index b6c52157e6..73fa657017 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "litellm" -version = "1.43.11" +version = "1.43.12" description = "Library to easily interface with LLM API providers" authors = ["BerriAI"] license = "MIT" @@ -91,7 +91,7 @@ requires = ["poetry-core", "wheel"] build-backend = "poetry.core.masonry.api" [tool.commitizen] -version = "1.43.11" +version = "1.43.12" version_files = [ "pyproject.toml:^version" ] From b6bc36e4d48fd1cade7867ca87475c8b34573c35 Mon Sep 17 00:00:00 2001 From: Krrish Dholakia Date: Wed, 14 Aug 2024 10:48:42 -0700 Subject: [PATCH 37/60] test(test_pass_through_endpoints.py): correctly reset test --- litellm/tests/test_pass_through_endpoints.py | 150 +++++++++++-------- 1 file changed, 90 insertions(+), 60 deletions(-) diff --git a/litellm/tests/test_pass_through_endpoints.py b/litellm/tests/test_pass_through_endpoints.py index 63e9e01860..951592b711 100644 --- a/litellm/tests/test_pass_through_endpoints.py +++ b/litellm/tests/test_pass_through_endpoints.py @@ -172,83 +172,113 @@ async def test_pass_through_endpoint_rpm_limit(auth, expected_error_code, rpm_li async def test_aaapass_through_endpoint_pass_through_keys_langfuse( auth, expected_error_code, rpm_limit ): + client = TestClient(app) import litellm from litellm.proxy._types import UserAPIKeyAuth from litellm.proxy.proxy_server import ProxyLogging, hash_token, user_api_key_cache - mock_api_key = "sk-my-test-key" - cache_value = UserAPIKeyAuth(token=hash_token(mock_api_key), rpm_limit=rpm_limit) + # Store original values + original_user_api_key_cache = getattr( + litellm.proxy.proxy_server, "user_api_key_cache", None + ) + original_master_key = getattr(litellm.proxy.proxy_server, "master_key", None) + original_prisma_client = getattr(litellm.proxy.proxy_server, "prisma_client", None) + original_proxy_logging_obj = getattr( + litellm.proxy.proxy_server, "proxy_logging_obj", None + ) - _cohere_api_key = os.environ.get("COHERE_API_KEY") + try: - user_api_key_cache.set_cache(key=hash_token(mock_api_key), value=cache_value) + mock_api_key = "sk-my-test-key" + cache_value = UserAPIKeyAuth( + token=hash_token(mock_api_key), rpm_limit=rpm_limit + ) - proxy_logging_obj = ProxyLogging(user_api_key_cache=user_api_key_cache) - proxy_logging_obj._init_litellm_callbacks() + _cohere_api_key = os.environ.get("COHERE_API_KEY") - setattr(litellm.proxy.proxy_server, "user_api_key_cache", user_api_key_cache) - setattr(litellm.proxy.proxy_server, "master_key", "sk-1234") - setattr(litellm.proxy.proxy_server, "prisma_client", "FAKE-VAR") - setattr(litellm.proxy.proxy_server, "proxy_logging_obj", proxy_logging_obj) + user_api_key_cache.set_cache(key=hash_token(mock_api_key), value=cache_value) - # Define a pass-through endpoint - pass_through_endpoints = [ - { - "path": "/api/public/ingestion", - "target": "https://cloud.langfuse.com/api/public/ingestion", - "auth": auth, - "custom_auth_parser": "langfuse", - "headers": { - "LANGFUSE_PUBLIC_KEY": "os.environ/LANGFUSE_PUBLIC_KEY", - "LANGFUSE_SECRET_KEY": "os.environ/LANGFUSE_SECRET_KEY", + proxy_logging_obj = ProxyLogging(user_api_key_cache=user_api_key_cache) + proxy_logging_obj._init_litellm_callbacks() + + setattr(litellm.proxy.proxy_server, "user_api_key_cache", user_api_key_cache) + setattr(litellm.proxy.proxy_server, "master_key", "sk-1234") + setattr(litellm.proxy.proxy_server, "prisma_client", "FAKE-VAR") + setattr(litellm.proxy.proxy_server, "proxy_logging_obj", proxy_logging_obj) + + # Define a pass-through endpoint + pass_through_endpoints = [ + { + "path": "/api/public/ingestion", + "target": "https://us.cloud.langfuse.com/api/public/ingestion", + "auth": auth, + "custom_auth_parser": "langfuse", + "headers": { + "LANGFUSE_PUBLIC_KEY": "os.environ/LANGFUSE_PUBLIC_KEY", + "LANGFUSE_SECRET_KEY": "os.environ/LANGFUSE_SECRET_KEY", + }, + } + ] + + # Initialize the pass-through endpoint + await initialize_pass_through_endpoints(pass_through_endpoints) + general_settings: Optional[dict] = ( + getattr(litellm.proxy.proxy_server, "general_settings", {}) or {} + ) + old_general_settings = general_settings + general_settings.update({"pass_through_endpoints": pass_through_endpoints}) + setattr(litellm.proxy.proxy_server, "general_settings", general_settings) + + _json_data = { + "batch": [ + { + "id": "80e2141f-0ca6-47b7-9c06-dde5e97de690", + "type": "trace-create", + "body": { + "id": "0687af7b-4a75-4de8-a4f6-cba1cdc00865", + "timestamp": "2024-08-14T02:38:56.092950Z", + "name": "test-trace-litellm-proxy-passthrough", + }, + "timestamp": "2024-08-14T02:38:56.093352Z", + } + ], + "metadata": { + "batch_size": 1, + "sdk_integration": "default", + "sdk_name": "python", + "sdk_version": "2.27.0", + "public_key": "anything", }, } - ] - # Initialize the pass-through endpoint - await initialize_pass_through_endpoints(pass_through_endpoints) - general_settings: Optional[dict] = ( - getattr(litellm.proxy.proxy_server, "general_settings", {}) or {} - ) - general_settings.update({"pass_through_endpoints": pass_through_endpoints}) - setattr(litellm.proxy.proxy_server, "general_settings", general_settings) + # Make a request to the pass-through endpoint + response = client.post( + "/api/public/ingestion", + json=_json_data, + headers={"Authorization": "Basic c2stbXktdGVzdC1rZXk6YW55dGhpbmc="}, + ) - _json_data = { - "batch": [ - { - "id": "80e2141f-0ca6-47b7-9c06-dde5e97de690", - "type": "trace-create", - "body": { - "id": "0687af7b-4a75-4de8-a4f6-cba1cdc00865", - "timestamp": "2024-08-14T02:38:56.092950Z", - "name": "test-trace-litellm-proxy-passthrough", - }, - "timestamp": "2024-08-14T02:38:56.093352Z", - } - ], - "metadata": { - "batch_size": 1, - "sdk_integration": "default", - "sdk_name": "python", - "sdk_version": "2.27.0", - "public_key": "anything", - }, - } + print("JSON response: ", _json_data) - # Make a request to the pass-through endpoint - response = client.post( - "/api/public/ingestion", - json=_json_data, - headers={"Authorization": "Basic c2stbXktdGVzdC1rZXk6YW55dGhpbmc="}, - ) + print("RESPONSE RECEIVED - {}".format(response.text)) - print("JSON response: ", _json_data) + # Assert the response + assert response.status_code == expected_error_code - print("RESPONSE RECEIVED - {}".format(response.text)) - - # Assert the response - assert response.status_code == expected_error_code + setattr(litellm.proxy.proxy_server, "general_settings", old_general_settings) + finally: + # Reset to original values + setattr( + litellm.proxy.proxy_server, + "user_api_key_cache", + original_user_api_key_cache, + ) + setattr(litellm.proxy.proxy_server, "master_key", original_master_key) + setattr(litellm.proxy.proxy_server, "prisma_client", original_prisma_client) + setattr( + litellm.proxy.proxy_server, "proxy_logging_obj", original_proxy_logging_obj + ) @pytest.mark.asyncio From 83526bf052a2fb682de79fda8b8b1eadc4ed044f Mon Sep 17 00:00:00 2001 From: Aaron Bach Date: Wed, 14 Aug 2024 13:20:22 -0600 Subject: [PATCH 38/60] Update prices/context windows for Perplexity Llama 3.1 models --- model_prices_and_context_window.json | 63 ++++++++++++++++++++++++++++ 1 file changed, 63 insertions(+) diff --git a/model_prices_and_context_window.json b/model_prices_and_context_window.json index e31e6b3f4f..d19f57593a 100644 --- a/model_prices_and_context_window.json +++ b/model_prices_and_context_window.json @@ -4531,6 +4531,69 @@ "litellm_provider": "perplexity", "mode": "chat" }, + "perplexity/llama-3.1-70b-instruct": { + "max_tokens": 131072, + "max_input_tokens": 131072, + "max_output_tokens": 131072, + "input_cost_per_token": 0.000001, + "output_cost_per_token": 0.000001, + "litellm_provider": "perplexity", + "mode": "chat" + }, + "perplexity/llama-3.1-8b-instruct": { + "max_tokens": 131072, + "max_input_tokens": 131072, + "max_output_tokens": 131072, + "input_cost_per_token": 0.0000002, + "output_cost_per_token": 0.0000002, + "litellm_provider": "perplexity", + "mode": "chat" + }, + "perplexity/llama-3.1-sonar-huge-128k-online": { + "max_tokens": 127072, + "max_input_tokens": 127072, + "max_output_tokens": 127072, + "input_cost_per_token": 0.000005, + "output_cost_per_token": 0.000005, + "litellm_provider": "perplexity", + "mode": "chat" + }, + "perplexity/llama-3.1-sonar-large-128k-online": { + "max_tokens": 127072, + "max_input_tokens": 127072, + "max_output_tokens": 127072, + "input_cost_per_token": 0.000001, + "output_cost_per_token": 0.000001, + "litellm_provider": "perplexity", + "mode": "chat" + }, + "perplexity/llama-3.1-sonar-large-128k-chat": { + "max_tokens": 131072, + "max_input_tokens": 131072, + "max_output_tokens": 131072, + "input_cost_per_token": 0.000001, + "output_cost_per_token": 0.000001, + "litellm_provider": "perplexity", + "mode": "chat" + }, + "perplexity/llama-3.1-sonar-small-128k-chat": { + "max_tokens": 131072, + "max_input_tokens": 131072, + "max_output_tokens": 131072, + "input_cost_per_token": 0.0000002, + "output_cost_per_token": 0.0000002, + "litellm_provider": "perplexity", + "mode": "chat" + }, + "perplexity/llama-3.1-sonar-small-128k-online": { + "max_tokens": 127072, + "max_input_tokens": 127072, + "max_output_tokens": 127072, + "input_cost_per_token": 0.0000002, + "output_cost_per_token": 0.0000002, + "litellm_provider": "perplexity", + "mode": "chat" + }, "perplexity/pplx-7b-chat": { "max_tokens": 8192, "max_input_tokens": 8192, From 8fd1bbb7243b44fd8759b0fa0f2d10dc351d9151 Mon Sep 17 00:00:00 2001 From: Ishaan Jaff Date: Wed, 14 Aug 2024 13:08:03 -0700 Subject: [PATCH 39/60] feat - anthropic api context caching v0 --- litellm/llms/anthropic.py | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/litellm/llms/anthropic.py b/litellm/llms/anthropic.py index 6f05aa226e..fd4009b973 100644 --- a/litellm/llms/anthropic.py +++ b/litellm/llms/anthropic.py @@ -901,6 +901,7 @@ class AnthropicChatCompletion(BaseLLM): # Separate system prompt from rest of message system_prompt_indices = [] system_prompt = "" + system_prompt_dict = None for idx, message in enumerate(messages): if message["role"] == "system": valid_content: bool = False @@ -912,6 +913,16 @@ class AnthropicChatCompletion(BaseLLM): system_prompt += content.get("text", "") valid_content = True + # Handle Anthropic API context caching + if "cache_control" in message: + system_prompt_dict = [ + { + "cache_control": message["cache_control"], + "text": system_prompt, + "type": "text", + } + ] + if valid_content: system_prompt_indices.append(idx) if len(system_prompt_indices) > 0: @@ -919,6 +930,10 @@ class AnthropicChatCompletion(BaseLLM): messages.pop(idx) if len(system_prompt) > 0: optional_params["system"] = system_prompt + + # Handling anthropic API Prompt Caching + if system_prompt_dict is not None: + optional_params["system"] = system_prompt_dict # Format rest of message according to anthropic guidelines try: messages = prompt_factory( From bc0669d9a3187bbd8e7db4814941322e65aa17fb Mon Sep 17 00:00:00 2001 From: Ishaan Jaff Date: Wed, 14 Aug 2024 13:49:07 -0700 Subject: [PATCH 40/60] test passing cache controls through anthropic msg --- litellm/tests/test_prompt_factory.py | 45 ++++++++++++++++++++++++++++ 1 file changed, 45 insertions(+) diff --git a/litellm/tests/test_prompt_factory.py b/litellm/tests/test_prompt_factory.py index f7a715a220..2351e2c121 100644 --- a/litellm/tests/test_prompt_factory.py +++ b/litellm/tests/test_prompt_factory.py @@ -260,3 +260,48 @@ def test_anthropic_messages_tool_call(): translated_messages[-1]["content"][0]["tool_use_id"] == "bc8cb4b6-88c4-4138-8993-3a9d9cd51656" ) + + +def test_anthropic_cache_controls_pt(): + messages = [ + # marked for caching with the cache_control parameter, so that this checkpoint can read from the previous cache. + { + "role": "user", + "content": [ + { + "type": "text", + "text": "What are the key terms and conditions in this agreement?", + "cache_control": {"type": "ephemeral"}, + } + ], + }, + { + "role": "assistant", + "content": "Certainly! the key terms and conditions are the following: the contract is 1 year long for $10/mo", + }, + # The final turn is marked with cache-control, for continuing in followups. + { + "role": "user", + "content": [ + { + "type": "text", + "text": "What are the key terms and conditions in this agreement?", + "cache_control": {"type": "ephemeral"}, + } + ], + }, + ] + + translated_messages = anthropic_messages_pt( + messages, model="claude-3-5-sonnet-20240620", llm_provider="anthropic" + ) + + for i, msg in enumerate(translated_messages): + if i == 0: + assert msg["content"][0]["cache_control"] == {"type": "ephemeral"} + elif i == 1: + assert "cache_controls" not in msg["content"][0] + elif i == 2: + assert msg["content"][0]["cache_control"] == {"type": "ephemeral"} + + print("translated_messages: ", translated_messages) From 8404475ba3fa4cb984f79c9dcf1e61c5adfb2110 Mon Sep 17 00:00:00 2001 From: Ishaan Jaff Date: Wed, 14 Aug 2024 14:07:48 -0700 Subject: [PATCH 41/60] add testing for test_anthropic_cache_controls_pt --- litellm/tests/test_prompt_factory.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/litellm/tests/test_prompt_factory.py b/litellm/tests/test_prompt_factory.py index 2351e2c121..93e92a7926 100644 --- a/litellm/tests/test_prompt_factory.py +++ b/litellm/tests/test_prompt_factory.py @@ -263,6 +263,7 @@ def test_anthropic_messages_tool_call(): def test_anthropic_cache_controls_pt(): + "see anthropic docs for this: https://docs.anthropic.com/en/docs/build-with-claude/prompt-caching#continuing-a-multi-turn-conversation" messages = [ # marked for caching with the cache_control parameter, so that this checkpoint can read from the previous cache. { @@ -290,6 +291,11 @@ def test_anthropic_cache_controls_pt(): } ], }, + { + "role": "assistant", + "content": "Certainly! the key terms and conditions are the following: the contract is 1 year long for $10/mo", + "cache_control": {"type": "ephemeral"}, + }, ] translated_messages = anthropic_messages_pt( @@ -303,5 +309,7 @@ def test_anthropic_cache_controls_pt(): assert "cache_controls" not in msg["content"][0] elif i == 2: assert msg["content"][0]["cache_control"] == {"type": "ephemeral"} + elif i == 3: + assert msg["content"][0]["cache_control"] == {"type": "ephemeral"} print("translated_messages: ", translated_messages) From 65a3acf5431c03f2131f8793d46e97e9ded41f97 Mon Sep 17 00:00:00 2001 From: Krrish Dholakia Date: Wed, 14 Aug 2024 14:08:12 -0700 Subject: [PATCH 42/60] build(model_prices_and_context_window.json): add 'supports_assistant_prefill' to all vertex ai anthropic models --- ...odel_prices_and_context_window_backup.json | 75 ++++++++++++++++++- model_prices_and_context_window.json | 9 ++- 2 files changed, 77 insertions(+), 7 deletions(-) diff --git a/litellm/model_prices_and_context_window_backup.json b/litellm/model_prices_and_context_window_backup.json index e31e6b3f4f..d30270c5c8 100644 --- a/litellm/model_prices_and_context_window_backup.json +++ b/litellm/model_prices_and_context_window_backup.json @@ -2074,7 +2074,8 @@ "litellm_provider": "vertex_ai-anthropic_models", "mode": "chat", "supports_function_calling": true, - "supports_vision": true + "supports_vision": true, + "supports_assistant_prefill": true }, "vertex_ai/claude-3-5-sonnet@20240620": { "max_tokens": 4096, @@ -2085,7 +2086,8 @@ "litellm_provider": "vertex_ai-anthropic_models", "mode": "chat", "supports_function_calling": true, - "supports_vision": true + "supports_vision": true, + "supports_assistant_prefill": true }, "vertex_ai/claude-3-haiku@20240307": { "max_tokens": 4096, @@ -2096,7 +2098,8 @@ "litellm_provider": "vertex_ai-anthropic_models", "mode": "chat", "supports_function_calling": true, - "supports_vision": true + "supports_vision": true, + "supports_assistant_prefill": true }, "vertex_ai/claude-3-opus@20240229": { "max_tokens": 4096, @@ -2107,7 +2110,8 @@ "litellm_provider": "vertex_ai-anthropic_models", "mode": "chat", "supports_function_calling": true, - "supports_vision": true + "supports_vision": true, + "supports_assistant_prefill": true }, "vertex_ai/meta/llama3-405b-instruct-maas": { "max_tokens": 32000, @@ -4531,6 +4535,69 @@ "litellm_provider": "perplexity", "mode": "chat" }, + "perplexity/llama-3.1-70b-instruct": { + "max_tokens": 131072, + "max_input_tokens": 131072, + "max_output_tokens": 131072, + "input_cost_per_token": 0.000001, + "output_cost_per_token": 0.000001, + "litellm_provider": "perplexity", + "mode": "chat" + }, + "perplexity/llama-3.1-8b-instruct": { + "max_tokens": 131072, + "max_input_tokens": 131072, + "max_output_tokens": 131072, + "input_cost_per_token": 0.0000002, + "output_cost_per_token": 0.0000002, + "litellm_provider": "perplexity", + "mode": "chat" + }, + "perplexity/llama-3.1-sonar-huge-128k-online": { + "max_tokens": 127072, + "max_input_tokens": 127072, + "max_output_tokens": 127072, + "input_cost_per_token": 0.000005, + "output_cost_per_token": 0.000005, + "litellm_provider": "perplexity", + "mode": "chat" + }, + "perplexity/llama-3.1-sonar-large-128k-online": { + "max_tokens": 127072, + "max_input_tokens": 127072, + "max_output_tokens": 127072, + "input_cost_per_token": 0.000001, + "output_cost_per_token": 0.000001, + "litellm_provider": "perplexity", + "mode": "chat" + }, + "perplexity/llama-3.1-sonar-large-128k-chat": { + "max_tokens": 131072, + "max_input_tokens": 131072, + "max_output_tokens": 131072, + "input_cost_per_token": 0.000001, + "output_cost_per_token": 0.000001, + "litellm_provider": "perplexity", + "mode": "chat" + }, + "perplexity/llama-3.1-sonar-small-128k-chat": { + "max_tokens": 131072, + "max_input_tokens": 131072, + "max_output_tokens": 131072, + "input_cost_per_token": 0.0000002, + "output_cost_per_token": 0.0000002, + "litellm_provider": "perplexity", + "mode": "chat" + }, + "perplexity/llama-3.1-sonar-small-128k-online": { + "max_tokens": 127072, + "max_input_tokens": 127072, + "max_output_tokens": 127072, + "input_cost_per_token": 0.0000002, + "output_cost_per_token": 0.0000002, + "litellm_provider": "perplexity", + "mode": "chat" + }, "perplexity/pplx-7b-chat": { "max_tokens": 8192, "max_input_tokens": 8192, diff --git a/model_prices_and_context_window.json b/model_prices_and_context_window.json index 9eaa7c1b13..d30270c5c8 100644 --- a/model_prices_and_context_window.json +++ b/model_prices_and_context_window.json @@ -2074,7 +2074,8 @@ "litellm_provider": "vertex_ai-anthropic_models", "mode": "chat", "supports_function_calling": true, - "supports_vision": true + "supports_vision": true, + "supports_assistant_prefill": true }, "vertex_ai/claude-3-5-sonnet@20240620": { "max_tokens": 4096, @@ -2097,7 +2098,8 @@ "litellm_provider": "vertex_ai-anthropic_models", "mode": "chat", "supports_function_calling": true, - "supports_vision": true + "supports_vision": true, + "supports_assistant_prefill": true }, "vertex_ai/claude-3-opus@20240229": { "max_tokens": 4096, @@ -2108,7 +2110,8 @@ "litellm_provider": "vertex_ai-anthropic_models", "mode": "chat", "supports_function_calling": true, - "supports_vision": true + "supports_vision": true, + "supports_assistant_prefill": true }, "vertex_ai/meta/llama3-405b-instruct-maas": { "max_tokens": 32000, From d97fb1d04e7677dff29386d2e6136d951790a58e Mon Sep 17 00:00:00 2001 From: Krrish Dholakia Date: Wed, 14 Aug 2024 14:13:21 -0700 Subject: [PATCH 43/60] test(test_pass_through_endpoints.py): fix langfuse base --- litellm/tests/test_pass_through_endpoints.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/litellm/tests/test_pass_through_endpoints.py b/litellm/tests/test_pass_through_endpoints.py index 951592b711..d78a40d378 100644 --- a/litellm/tests/test_pass_through_endpoints.py +++ b/litellm/tests/test_pass_through_endpoints.py @@ -211,7 +211,7 @@ async def test_aaapass_through_endpoint_pass_through_keys_langfuse( pass_through_endpoints = [ { "path": "/api/public/ingestion", - "target": "https://us.cloud.langfuse.com/api/public/ingestion", + "target": "https://cloud.langfuse.com/api/public/ingestion", "auth": auth, "custom_auth_parser": "langfuse", "headers": { From 2f47348d8e7aaafdb58d59c7610dff806474dadd Mon Sep 17 00:00:00 2001 From: Krrish Dholakia Date: Wed, 14 Aug 2024 14:19:05 -0700 Subject: [PATCH 44/60] fix(factory.py): support assistant messages as a list of dictionaries - cohere messages api Fixes https://github.com/BerriAI/litellm/pull/5121 --- litellm/llms/prompt_templates/factory.py | 12 ++++++------ litellm/tests/test_completion.py | 6 ++++-- 2 files changed, 10 insertions(+), 8 deletions(-) diff --git a/litellm/llms/prompt_templates/factory.py b/litellm/llms/prompt_templates/factory.py index 7c3c7e80fb..f39273c1a2 100644 --- a/litellm/llms/prompt_templates/factory.py +++ b/litellm/llms/prompt_templates/factory.py @@ -1701,12 +1701,12 @@ def cohere_messages_pt_v2( assistant_tool_calls: List[ToolCallObject] = [] ## MERGE CONSECUTIVE ASSISTANT CONTENT ## while msg_i < len(messages) and messages[msg_i]["role"] == "assistant": - assistant_text = ( - messages[msg_i].get("content") or "" - ) # either string or none - if assistant_text: - assistant_content += assistant_text - + if isinstance(messages[msg_i]["content"], list): + for m in messages[msg_i]["content"]: + if m.get("type", "") == "text": + assistant_content += m["text"] + else: + assistant_content += messages[msg_i]["content"] if messages[msg_i].get( "tool_calls", [] ): # support assistant tool invoke conversion diff --git a/litellm/tests/test_completion.py b/litellm/tests/test_completion.py index 4ea9ee3b0f..83031aba08 100644 --- a/litellm/tests/test_completion.py +++ b/litellm/tests/test_completion.py @@ -3705,19 +3705,21 @@ def test_completion_anyscale_api(): # test_completion_anyscale_api() -@pytest.mark.skip(reason="flaky test, times out frequently") +# @pytest.mark.skip(reason="flaky test, times out frequently") def test_completion_cohere(): try: # litellm.set_verbose=True messages = [ {"role": "system", "content": "You're a good bot"}, + {"role": "assistant", "content": [{"text": "2", "type": "text"}]}, + {"role": "assistant", "content": [{"text": "3", "type": "text"}]}, { "role": "user", "content": "Hey", }, ] response = completion( - model="command-nightly", + model="command-r", messages=messages, ) print(response) From 1ff93ed664cb383e279f9edb77654fc33a5db75b Mon Sep 17 00:00:00 2001 From: Krrish Dholakia Date: Wed, 14 Aug 2024 14:39:48 -0700 Subject: [PATCH 45/60] docs(model_management.md): add section on adding additional model information to proxy config --- .../my-website/docs/proxy/model_management.md | 116 ++++++++++++++++-- litellm/proxy/_new_secret_config.yaml | 9 +- 2 files changed, 107 insertions(+), 18 deletions(-) diff --git a/docs/my-website/docs/proxy/model_management.md b/docs/my-website/docs/proxy/model_management.md index 02ce4ba23b..a8cc66ae76 100644 --- a/docs/my-website/docs/proxy/model_management.md +++ b/docs/my-website/docs/proxy/model_management.md @@ -17,7 +17,7 @@ model_list: ## Get Model Information - `/model/info` -Retrieve detailed information about each model listed in the `/model/info` endpoint, including descriptions from the `config.yaml` file, and additional model info (e.g. max tokens, cost per input token, etc.) pulled the model_info you set and the litellm model cost map. Sensitive details like API keys are excluded for security purposes. +Retrieve detailed information about each model listed in the `/model/info` endpoint, including descriptions from the `config.yaml` file, and additional model info (e.g. max tokens, cost per input token, etc.) pulled from the model_info you set and the [litellm model cost map](https://github.com/BerriAI/litellm/blob/main/model_prices_and_context_window.json). Sensitive details like API keys are excluded for security purposes. - + + ```bash curl -X POST "http://0.0.0.0:4000/model/new" \ - -H "accept: application/json" \ - -H "Content-Type: application/json" \ - -d '{ "model_name": "azure-gpt-turbo", "litellm_params": {"model": "azure/gpt-3.5-turbo", "api_key": "os.environ/AZURE_API_KEY", "api_base": "my-azure-api-base"} }' + -H "accept: application/json" \ + -H "Content-Type: application/json" \ + -d '{ "model_name": "azure-gpt-turbo", "litellm_params": {"model": "azure/gpt-3.5-turbo", "api_key": "os.environ/AZURE_API_KEY", "api_base": "my-azure-api-base"} }' ``` - + + + +```yaml +model_list: + - model_name: gpt-3.5-turbo ### RECEIVED MODEL NAME ### `openai.chat.completions.create(model="gpt-3.5-turbo",...)` + litellm_params: # all params accepted by litellm.completion() - https://github.com/BerriAI/litellm/blob/9b46ec05b02d36d6e4fb5c32321e51e7f56e4a6e/litellm/types/router.py#L297 + model: azure/gpt-turbo-small-eu ### MODEL NAME sent to `litellm.completion()` ### + api_base: https://my-endpoint-europe-berri-992.openai.azure.com/ + api_key: "os.environ/AZURE_API_KEY_EU" # does os.getenv("AZURE_API_KEY_EU") + rpm: 6 # [OPTIONAL] Rate limit for this deployment: in requests per minute (rpm) + model_info: + my_custom_key: my_custom_value # additional model metadata +``` + + @@ -85,4 +96,83 @@ Keep in mind that as both endpoints are in [BETA], you may need to visit the ass - Get Model Information: [Issue #933](https://github.com/BerriAI/litellm/issues/933) - Add a New Model: [Issue #964](https://github.com/BerriAI/litellm/issues/964) -Feedback on the beta endpoints is valuable and helps improve the API for all users. \ No newline at end of file +Feedback on the beta endpoints is valuable and helps improve the API for all users. + + +## Add Additional Model Information + +If you want the ability to add a display name, description, and labels for models, just use `model_info:` + +```yaml +model_list: + - model_name: "gpt-4" + litellm_params: + model: "gpt-4" + api_key: "os.environ/OPENAI_API_KEY" + model_info: # 👈 KEY CHANGE + my_custom_key: "my_custom_value" +``` + +### Usage + +1. Add additional information to model + +```yaml +model_list: + - model_name: "gpt-4" + litellm_params: + model: "gpt-4" + api_key: "os.environ/OPENAI_API_KEY" + model_info: # 👈 KEY CHANGE + my_custom_key: "my_custom_value" +``` + +2. Call with `/model/info` + +Use a key with access to the model `gpt-4`. + +```bash +curl -L -X GET 'http://0.0.0.0:4000/v1/model/info' \ +-H 'Authorization: Bearer LITELLM_KEY' \ +``` + +3. **Expected Response** + +Returned `model_info = Your custom model_info + (if exists) LITELLM MODEL INFO` + + +[**How LiteLLM Model Info is found**](https://github.com/BerriAI/litellm/blob/9b46ec05b02d36d6e4fb5c32321e51e7f56e4a6e/litellm/proxy/proxy_server.py#L7460) + +[Tell us how this can be improved!](https://github.com/BerriAI/litellm/issues) + +```bash +{ + "data": [ + { + "model_name": "gpt-4", + "litellm_params": { + "model": "gpt-4" + }, + "model_info": { + "id": "e889baacd17f591cce4c63639275ba5e8dc60765d6c553e6ee5a504b19e50ddc", + "db_model": false, + "my_custom_key": "my_custom_value", # 👈 CUSTOM INFO + "key": "gpt-4", # 👈 KEY in LiteLLM MODEL INFO/COST MAP - https://github.com/BerriAI/litellm/blob/main/model_prices_and_context_window.json + "max_tokens": 4096, + "max_input_tokens": 8192, + "max_output_tokens": 4096, + "input_cost_per_token": 3e-05, + "input_cost_per_character": null, + "input_cost_per_token_above_128k_tokens": null, + "output_cost_per_token": 6e-05, + "output_cost_per_character": null, + "output_cost_per_token_above_128k_tokens": null, + "output_cost_per_character_above_128k_tokens": null, + "output_vector_size": null, + "litellm_provider": "openai", + "mode": "chat" + } + }, + ] +} +``` diff --git a/litellm/proxy/_new_secret_config.yaml b/litellm/proxy/_new_secret_config.yaml index 87a561e318..dfa5c16520 100644 --- a/litellm/proxy/_new_secret_config.yaml +++ b/litellm/proxy/_new_secret_config.yaml @@ -1,7 +1,6 @@ model_list: - - model_name: "*" + - model_name: "gpt-4" litellm_params: - model: "*" - -litellm_settings: - success_callback: ["langsmith"] \ No newline at end of file + model: "gpt-4" + model_info: + my_custom_key: "my_custom_value" \ No newline at end of file From 41ce2ef904d71fced7314bcdaa4226a0ecfa22ae Mon Sep 17 00:00:00 2001 From: Ishaan Jaff Date: Wed, 14 Aug 2024 14:56:49 -0700 Subject: [PATCH 46/60] add anthropic cache controls --- litellm/llms/prompt_templates/factory.py | 62 +++++++++++++++++++----- litellm/types/llms/anthropic.py | 6 ++- 2 files changed, 54 insertions(+), 14 deletions(-) diff --git a/litellm/llms/prompt_templates/factory.py b/litellm/llms/prompt_templates/factory.py index 7c3c7e80fb..66658e23a4 100644 --- a/litellm/llms/prompt_templates/factory.py +++ b/litellm/llms/prompt_templates/factory.py @@ -1224,6 +1224,19 @@ def convert_to_anthropic_tool_invoke( return anthropic_tool_invoke +def add_cache_control_to_content( + anthropic_content_element: Union[ + dict, AnthropicMessagesImageParam, AnthropicMessagesTextParam + ], + orignal_content_element: dict, +): + if "cache_control" in orignal_content_element: + anthropic_content_element["cache_control"] = orignal_content_element[ + "cache_control" + ] + return anthropic_content_element + + def anthropic_messages_pt( messages: list, model: str, @@ -1264,18 +1277,31 @@ def anthropic_messages_pt( image_chunk = convert_to_anthropic_image_obj( m["image_url"]["url"] ) - user_content.append( - AnthropicMessagesImageParam( - type="image", - source=AnthropicImageParamSource( - type="base64", - media_type=image_chunk["media_type"], - data=image_chunk["data"], - ), - ) + + _anthropic_content_element = AnthropicMessagesImageParam( + type="image", + source=AnthropicImageParamSource( + type="base64", + media_type=image_chunk["media_type"], + data=image_chunk["data"], + ), ) + + anthropic_content_element = add_cache_control_to_content( + anthropic_content_element=_anthropic_content_element, + orignal_content_element=m, + ) + user_content.append(anthropic_content_element) elif m.get("type", "") == "text": - user_content.append({"type": "text", "text": m["text"]}) + _anthropic_text_content_element = { + "type": "text", + "text": m["text"], + } + anthropic_content_element = add_cache_control_to_content( + anthropic_content_element=_anthropic_text_content_element, + orignal_content_element=m, + ) + user_content.append(anthropic_content_element) elif ( messages[msg_i]["role"] == "tool" or messages[msg_i]["role"] == "function" @@ -1306,6 +1332,10 @@ def anthropic_messages_pt( anthropic_message = AnthropicMessagesTextParam( type="text", text=m.get("text") ) + anthropic_message = add_cache_control_to_content( + anthropic_content_element=anthropic_message, + orignal_content_element=m, + ) assistant_content.append(anthropic_message) elif ( "content" in messages[msg_i] @@ -1313,9 +1343,17 @@ def anthropic_messages_pt( and len(messages[msg_i]["content"]) > 0 # don't pass empty text blocks. anthropic api raises errors. ): - assistant_content.append( - {"type": "text", "text": messages[msg_i]["content"]} + + _anthropic_text_content_element = { + "type": "text", + "text": messages[msg_i]["content"], + } + + anthropic_content_element = add_cache_control_to_content( + anthropic_content_element=_anthropic_text_content_element, + orignal_content_element=messages[msg_i], ) + assistant_content.append(anthropic_content_element) if messages[msg_i].get( "tool_calls", [] diff --git a/litellm/types/llms/anthropic.py b/litellm/types/llms/anthropic.py index 36bcb6cc73..2eb2aef549 100644 --- a/litellm/types/llms/anthropic.py +++ b/litellm/types/llms/anthropic.py @@ -15,9 +15,10 @@ class AnthropicMessagesTool(TypedDict, total=False): input_schema: Required[dict] -class AnthropicMessagesTextParam(TypedDict): +class AnthropicMessagesTextParam(TypedDict, total=False): type: Literal["text"] text: str + cache_control: Optional[dict] class AnthropicMessagesToolUseParam(TypedDict): @@ -54,9 +55,10 @@ class AnthropicImageParamSource(TypedDict): data: str -class AnthropicMessagesImageParam(TypedDict): +class AnthropicMessagesImageParam(TypedDict, total=False): type: Literal["image"] source: AnthropicImageParamSource + cache_control: Optional[dict] class AnthropicMessagesToolResultContent(TypedDict): From c14a432607a640d79def0b8e3a7587a6023237b8 Mon Sep 17 00:00:00 2001 From: Ishaan Jaff Date: Wed, 14 Aug 2024 14:59:46 -0700 Subject: [PATCH 47/60] test amnthropic prompt caching --- docs/my-website/docs/providers/anthropic.md | 46 +++++++++++++++++ litellm/tests/test_completion.py | 57 ++++++++++++++++++++- 2 files changed, 102 insertions(+), 1 deletion(-) diff --git a/docs/my-website/docs/providers/anthropic.md b/docs/my-website/docs/providers/anthropic.md index 2227b7a6b5..503140158c 100644 --- a/docs/my-website/docs/providers/anthropic.md +++ b/docs/my-website/docs/providers/anthropic.md @@ -208,6 +208,52 @@ print(response) +## **Prompt Caching** + +Use Anthropic Prompt Caching + + +[Relevant Anthropic API Docs](https://docs.anthropic.com/en/docs/build-with-claude/prompt-caching) + + + + +```python +from litellm import completion + +resp = litellm.completion( + model="vertex_ai_beta/gemini-1.0-pro-001", + messages=[{"role": "user", "content": "Who won the world cup?"}], + tools=tools, + ) + +print(resp) +``` + + + +```bash +curl http://localhost:4000/v1/chat/completions \ + -H "Content-Type: application/json" \ + -H "Authorization: Bearer sk-1234" \ + -d '{ + "model": "gemini-pro", + "messages": [ + {"role": "user", "content": "Hello, Claude!"} + ], + "tools": [ + { + "googleSearchRetrieval": {} + } + ] + }' + +``` + + + + + ## Supported Models `Model Name` 👉 Human-friendly name. diff --git a/litellm/tests/test_completion.py b/litellm/tests/test_completion.py index 4ea9ee3b0f..969805fb0a 100644 --- a/litellm/tests/test_completion.py +++ b/litellm/tests/test_completion.py @@ -3449,7 +3449,62 @@ def response_format_tests(response: litellm.ModelResponse): assert isinstance(response.usage.total_tokens, int) # type: ignore -@pytest.mark.parametrize("sync_mode", [True, False]) +@pytest.mark.asyncio() +async def test_anthropic_api_prompt_caching_2(): + litellm.set_verbose = True + response = await litellm.acompletion( + model="anthropic/claude-3-5-sonnet-20240620", + messages=[ + # System Message + { + "role": "system", + "content": [ + { + "type": "text", + "text": "Here is the full text of a complex legal agreement" + * 400, + "cache_control": {"type": "ephemeral"}, + } + ], + }, + # marked for caching with the cache_control parameter, so that this checkpoint can read from the previous cache. + { + "role": "user", + "content": [ + { + "type": "text", + "text": "What are the key terms and conditions in this agreement?", + "cache_control": {"type": "ephemeral"}, + } + ], + }, + { + "role": "assistant", + "content": "Certainly! the key terms and conditions are the following: the contract is 1 year long for $10/mo", + }, + # The final turn is marked with cache-control, for continuing in followups. + { + "role": "user", + "content": [ + { + "type": "text", + "text": "What are the key terms and conditions in this agreement?", + "cache_control": {"type": "ephemeral"}, + } + ], + }, + ], + temperature=0.2, + max_tokens=10, + extra_headers={ + "anthropic-version": "2023-06-01", + "anthropic-beta": "prompt-caching-2024-07-31", + }, + ) + + print("response=", response) + + @pytest.mark.parametrize( "model", [ From 14bb0c670c82e88bcf31bea7241b83e2e1dad852 Mon Sep 17 00:00:00 2001 From: Ishaan Jaff Date: Wed, 14 Aug 2024 15:06:10 -0700 Subject: [PATCH 48/60] test test_anthropic_api_prompt_caching_basic --- litellm/llms/anthropic.py | 6 ++++++ litellm/tests/test_completion.py | 10 +++++++++- 2 files changed, 15 insertions(+), 1 deletion(-) diff --git a/litellm/llms/anthropic.py b/litellm/llms/anthropic.py index fd4009b973..c9f7856e9b 100644 --- a/litellm/llms/anthropic.py +++ b/litellm/llms/anthropic.py @@ -759,6 +759,7 @@ class AnthropicChatCompletion(BaseLLM): ## CALCULATING USAGE prompt_tokens = completion_response["usage"]["input_tokens"] completion_tokens = completion_response["usage"]["output_tokens"] + _usage = completion_response["usage"] total_tokens = prompt_tokens + completion_tokens model_response.created = int(time.time()) @@ -768,6 +769,11 @@ class AnthropicChatCompletion(BaseLLM): completion_tokens=completion_tokens, total_tokens=total_tokens, ) + + if "cache_creation_input_tokens" in _usage: + usage["cache_creation_input_tokens"] = _usage["cache_creation_input_tokens"] + if "cache_read_input_tokens" in _usage: + usage["cache_read_input_tokens"] = _usage["cache_read_input_tokens"] setattr(model_response, "usage", usage) # type: ignore return model_response diff --git a/litellm/tests/test_completion.py b/litellm/tests/test_completion.py index 969805fb0a..869339f786 100644 --- a/litellm/tests/test_completion.py +++ b/litellm/tests/test_completion.py @@ -3450,7 +3450,7 @@ def response_format_tests(response: litellm.ModelResponse): @pytest.mark.asyncio() -async def test_anthropic_api_prompt_caching_2(): +async def test_anthropic_api_prompt_caching_basic(): litellm.set_verbose = True response = await litellm.acompletion( model="anthropic/claude-3-5-sonnet-20240620", @@ -3504,6 +3504,14 @@ async def test_anthropic_api_prompt_caching_2(): print("response=", response) + assert "cache_read_input_tokens" in response.usage + assert "cache_creation_input_tokens" in response.usage + + # Assert either a cache entry was created or cache was read - changes depending on the anthropic api ttl + assert (response.usage.cache_read_input_tokens > 0) or ( + response.usage.cache_creation_input_tokens > 0 + ) + @pytest.mark.parametrize( "model", From 83ee2e21d8572af9cb1e56f507b764bb53f22f7f Mon Sep 17 00:00:00 2001 From: Ishaan Jaff Date: Wed, 14 Aug 2024 15:18:11 -0700 Subject: [PATCH 49/60] pass cache_control in tool call --- litellm/llms/anthropic.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/litellm/llms/anthropic.py b/litellm/llms/anthropic.py index c9f7856e9b..19fca056bd 100644 --- a/litellm/llms/anthropic.py +++ b/litellm/llms/anthropic.py @@ -975,6 +975,8 @@ class AnthropicChatCompletion(BaseLLM): else: # assume openai tool call new_tool = tool["function"] new_tool["input_schema"] = new_tool.pop("parameters") # rename key + if "cache_control" in tool: + new_tool["cache_control"] = tool["cache_control"] anthropic_tools.append(new_tool) optional_params["tools"] = anthropic_tools From 6f864ca7c7bb844ed0437ed0a41ec8f2caf0f64c Mon Sep 17 00:00:00 2001 From: Ishaan Jaff Date: Wed, 14 Aug 2024 15:26:25 -0700 Subject: [PATCH 50/60] docs Caching - Continuing Multi-Turn Convo --- docs/my-website/docs/providers/anthropic.md | 120 ++++++++++++-------- 1 file changed, 74 insertions(+), 46 deletions(-) diff --git a/docs/my-website/docs/providers/anthropic.md b/docs/my-website/docs/providers/anthropic.md index 503140158c..80581209d0 100644 --- a/docs/my-website/docs/providers/anthropic.md +++ b/docs/my-website/docs/providers/anthropic.md @@ -208,52 +208,6 @@ print(response) -## **Prompt Caching** - -Use Anthropic Prompt Caching - - -[Relevant Anthropic API Docs](https://docs.anthropic.com/en/docs/build-with-claude/prompt-caching) - - - - -```python -from litellm import completion - -resp = litellm.completion( - model="vertex_ai_beta/gemini-1.0-pro-001", - messages=[{"role": "user", "content": "Who won the world cup?"}], - tools=tools, - ) - -print(resp) -``` - - - -```bash -curl http://localhost:4000/v1/chat/completions \ - -H "Content-Type: application/json" \ - -H "Authorization: Bearer sk-1234" \ - -d '{ - "model": "gemini-pro", - "messages": [ - {"role": "user", "content": "Hello, Claude!"} - ], - "tools": [ - { - "googleSearchRetrieval": {} - } - ] - }' - -``` - - - - - ## Supported Models `Model Name` 👉 Human-friendly name. @@ -271,6 +225,80 @@ curl http://localhost:4000/v1/chat/completions \ | claude-instant-1.2 | `completion('claude-instant-1.2', messages)` | `os.environ['ANTHROPIC_API_KEY']` | | claude-instant-1 | `completion('claude-instant-1', messages)` | `os.environ['ANTHROPIC_API_KEY']` | +## **Prompt Caching** + +Use Anthropic Prompt Caching + + +[Relevant Anthropic API Docs](https://docs.anthropic.com/en/docs/build-with-claude/prompt-caching) + +### Caching - Large Context Caching + +### Caching - Tools definitions + +### Caching - Continuing Multi-Turn Convo + + + + + +```python +import litellm + +response = await litellm.acompletion( + model="anthropic/claude-3-5-sonnet-20240620", + messages=[ + # System Message + { + "role": "system", + "content": [ + { + "type": "text", + "text": "Here is the full text of a complex legal agreement" + * 400, + "cache_control": {"type": "ephemeral"}, + } + ], + }, + # marked for caching with the cache_control parameter, so that this checkpoint can read from the previous cache. + { + "role": "user", + "content": [ + { + "type": "text", + "text": "What are the key terms and conditions in this agreement?", + "cache_control": {"type": "ephemeral"}, + } + ], + }, + { + "role": "assistant", + "content": "Certainly! the key terms and conditions are the following: the contract is 1 year long for $10/mo", + }, + # The final turn is marked with cache-control, for continuing in followups. + { + "role": "user", + "content": [ + { + "type": "text", + "text": "What are the key terms and conditions in this agreement?", + "cache_control": {"type": "ephemeral"}, + } + ], + }, + ], + extra_headers={ + "anthropic-version": "2023-06-01", + "anthropic-beta": "prompt-caching-2024-07-31", + }, +) +``` + + + + + + ## Passing Extra Headers to Anthropic API Pass `extra_headers: dict` to `litellm.completion` From 30f24f69405b1badba0f2e898c7639cc67b08d2f Mon Sep 17 00:00:00 2001 From: Ishaan Jaff Date: Wed, 14 Aug 2024 15:27:20 -0700 Subject: [PATCH 51/60] fix bedrock test --- litellm/tests/test_completion.py | 1 + 1 file changed, 1 insertion(+) diff --git a/litellm/tests/test_completion.py b/litellm/tests/test_completion.py index 869339f786..7dbdd31c0a 100644 --- a/litellm/tests/test_completion.py +++ b/litellm/tests/test_completion.py @@ -3526,6 +3526,7 @@ async def test_anthropic_api_prompt_caching_basic(): "cohere.command-text-v14", ], ) +@pytest.mark.parametrize("sync_mode", [True, False]) @pytest.mark.asyncio async def test_completion_bedrock_httpx_models(sync_mode, model): litellm.set_verbose = True From 5591acba88e9490ad0745a5eddd5c9c09077e669 Mon Sep 17 00:00:00 2001 From: Krrish Dholakia Date: Wed, 14 Aug 2024 15:44:17 -0700 Subject: [PATCH 52/60] fix(factory.py): handle assistant null content --- litellm/llms/prompt_templates/factory.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/litellm/llms/prompt_templates/factory.py b/litellm/llms/prompt_templates/factory.py index f39273c1a2..4e552b3b07 100644 --- a/litellm/llms/prompt_templates/factory.py +++ b/litellm/llms/prompt_templates/factory.py @@ -1705,7 +1705,9 @@ def cohere_messages_pt_v2( for m in messages[msg_i]["content"]: if m.get("type", "") == "text": assistant_content += m["text"] - else: + elif messages[msg_i].get("content") is not None and isinstance( + messages[msg_i]["content"], str + ): assistant_content += messages[msg_i]["content"] if messages[msg_i].get( "tool_calls", [] From 4e2e8101c629cb17593008181adb02f969d4c73a Mon Sep 17 00:00:00 2001 From: Marc Abramowitz Date: Wed, 14 Aug 2024 15:47:57 -0700 Subject: [PATCH 53/60] Use AZURE_API_VERSION as default azure openai version Without this change, the default version of the Azure OpenAI API is hardcoded in the code as an old version, `"2024-02-01"`. This change allows the user to set the default version of the Azure OpenAI API by setting the environment variable `AZURE_API_VERSION` or by using the command-line parameter `--api_version`. --- litellm/router_utils/client_initalization_utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/litellm/router_utils/client_initalization_utils.py b/litellm/router_utils/client_initalization_utils.py index 073a87901a..f396defb51 100644 --- a/litellm/router_utils/client_initalization_utils.py +++ b/litellm/router_utils/client_initalization_utils.py @@ -190,7 +190,7 @@ def set_client(litellm_router_instance: LitellmRouter, model: dict): if azure_ad_token.startswith("oidc/"): azure_ad_token = get_azure_ad_token_from_oidc(azure_ad_token) if api_version is None: - api_version = litellm.AZURE_DEFAULT_API_VERSION + api_version = os.getenv("AZURE_API_VERSION", litellm.AZURE_DEFAULT_API_VERSION) if "gateway.ai.cloudflare.com" in api_base: if not api_base.endswith("/"): From 0290b15333a94cd541758a5ead1cdeae8ee22b72 Mon Sep 17 00:00:00 2001 From: Ishaan Jaff Date: Wed, 14 Aug 2024 16:19:14 -0700 Subject: [PATCH 54/60] add test for caching tool calls --- docs/my-website/docs/providers/anthropic.md | 44 ++++++++ litellm/tests/test_completion.py | 117 +++++++++++++++++++- 2 files changed, 160 insertions(+), 1 deletion(-) diff --git a/docs/my-website/docs/providers/anthropic.md b/docs/my-website/docs/providers/anthropic.md index 80581209d0..a3bca9d567 100644 --- a/docs/my-website/docs/providers/anthropic.md +++ b/docs/my-website/docs/providers/anthropic.md @@ -236,6 +236,50 @@ Use Anthropic Prompt Caching ### Caching - Tools definitions + + + + +```python +import litellm + +response = await litellm.acompletion( + model="anthropic/claude-3-5-sonnet-20240620", + messages = [{"role": "user", "content": "What's the weather like in Boston today?"}] + tools = [ + { + "type": "function", + "function": { + "name": "get_current_weather", + "description": "Get the current weather in a given location", + "parameters": { + "type": "object", + "properties": { + "location": { + "type": "string", + "description": "The city and state, e.g. San Francisco, CA", + }, + "unit": {"type": "string", "enum": ["celsius", "fahrenheit"]}, + }, + "required": ["location"], + }, + "cache_control": {"type": "ephemeral"} + }, + } + ], + extra_headers={ + "anthropic-version": "2023-06-01", + "anthropic-beta": "prompt-caching-2024-07-31", + }, +) +``` + + + + + + + ### Caching - Continuing Multi-Turn Convo diff --git a/litellm/tests/test_completion.py b/litellm/tests/test_completion.py index 7dbdd31c0a..7f73d62945 100644 --- a/litellm/tests/test_completion.py +++ b/litellm/tests/test_completion.py @@ -14,7 +14,7 @@ sys.path.insert( ) # Adds the parent directory to the system path import os -from unittest.mock import MagicMock, patch +from unittest.mock import AsyncMock, MagicMock, patch import pytest @@ -3513,6 +3513,121 @@ async def test_anthropic_api_prompt_caching_basic(): ) +@pytest.mark.asyncio +async def test_litellm_acompletion_httpx_call(): + # Arrange: Set up the MagicMock for the httpx.AsyncClient + mock_response = AsyncMock() + + def return_val(): + return { + "id": "msg_01XFDUDYJgAACzvnptvVoYEL", + "type": "message", + "role": "assistant", + "content": [{"type": "text", "text": "Hello!"}], + "model": "claude-3-5-sonnet-20240620", + "stop_reason": "end_turn", + "stop_sequence": None, + "usage": {"input_tokens": 12, "output_tokens": 6}, + } + + mock_response.json = return_val + + litellm.set_verbose = True + with patch( + "litellm.llms.custom_httpx.http_handler.AsyncHTTPHandler.post", + return_value=mock_response, + ) as mock_post: + # Act: Call the litellm.acompletion function + response = await litellm.acompletion( + api_key="mock_api_key", + model="anthropic/claude-3-5-sonnet-20240620", + messages=[ + {"role": "user", "content": "What's the weather like in Boston today?"} + ], + tools=[ + { + "type": "function", + "function": { + "name": "get_current_weather", + "description": "Get the current weather in a given location", + "parameters": { + "type": "object", + "properties": { + "location": { + "type": "string", + "description": "The city and state, e.g. San Francisco, CA", + }, + "unit": { + "type": "string", + "enum": ["celsius", "fahrenheit"], + }, + }, + "required": ["location"], + }, + "cache_control": {"type": "ephemeral"}, + }, + } + ], + extra_headers={ + "anthropic-version": "2023-06-01", + "anthropic-beta": "prompt-caching-2024-07-31", + }, + ) + + # Print what was called on the mock + print("call args=", mock_post.call_args) + + expected_url = "https://api.anthropic.com/v1/messages" + expected_headers = { + "accept": "application/json", + "content-type": "application/json", + "anthropic-version": "2023-06-01", + "anthropic-beta": "prompt-caching-2024-07-31", + "x-api-key": "mock_api_key", + } + + expected_json = { + "messages": [ + { + "role": "user", + "content": [ + { + "type": "text", + "text": "What's the weather like in Boston today?", + } + ], + } + ], + "tools": [ + { + "name": "get_current_weather", + "description": "Get the current weather in a given location", + "cache_control": {"type": "ephemeral"}, + "input_schema": { + "type": "object", + "properties": { + "location": { + "type": "string", + "description": "The city and state, e.g. San Francisco, CA", + }, + "unit": { + "type": "string", + "enum": ["celsius", "fahrenheit"], + }, + }, + "required": ["location"], + }, + } + ], + "max_tokens": 4096, + "model": "claude-3-5-sonnet-20240620", + } + + mock_post.assert_called_once_with( + expected_url, json=expected_json, headers=expected_headers, timeout=600.0 + ) + + @pytest.mark.parametrize( "model", [ From b17437e7b19e2387b75b9caecf698083c84fa85e Mon Sep 17 00:00:00 2001 From: Ishaan Jaff Date: Wed, 14 Aug 2024 16:28:12 -0700 Subject: [PATCH 55/60] move claude prompt caching to diff file --- .../tests/test_anthropic_prompt_caching.py | 222 ++++++++++++++++++ litellm/tests/test_completion.py | 179 -------------- 2 files changed, 222 insertions(+), 179 deletions(-) create mode 100644 litellm/tests/test_anthropic_prompt_caching.py diff --git a/litellm/tests/test_anthropic_prompt_caching.py b/litellm/tests/test_anthropic_prompt_caching.py new file mode 100644 index 0000000000..8f57e96065 --- /dev/null +++ b/litellm/tests/test_anthropic_prompt_caching.py @@ -0,0 +1,222 @@ +import json +import os +import sys +import traceback + +from dotenv import load_dotenv + +load_dotenv() +import io +import os + +sys.path.insert( + 0, os.path.abspath("../..") +) # Adds the parent directory to the system path + +import os +from unittest.mock import AsyncMock, MagicMock, patch + +import pytest + +import litellm +from litellm import RateLimitError, Timeout, completion, completion_cost, embedding +from litellm.llms.custom_httpx.http_handler import AsyncHTTPHandler, HTTPHandler +from litellm.llms.prompt_templates.factory import anthropic_messages_pt + +# litellm.num_retries =3 +litellm.cache = None +litellm.success_callback = [] +user_message = "Write a short poem about the sky" +messages = [{"content": user_message, "role": "user"}] + + +def logger_fn(user_model_dict): + print(f"user_model_dict: {user_model_dict}") + + +@pytest.fixture(autouse=True) +def reset_callbacks(): + print("\npytest fixture - resetting callbacks") + litellm.success_callback = [] + litellm._async_success_callback = [] + litellm.failure_callback = [] + litellm.callbacks = [] + + +@pytest.mark.asyncio +async def test_litellm_anthropic_prompt_caching_tools(): + # Arrange: Set up the MagicMock for the httpx.AsyncClient + mock_response = AsyncMock() + + def return_val(): + return { + "id": "msg_01XFDUDYJgAACzvnptvVoYEL", + "type": "message", + "role": "assistant", + "content": [{"type": "text", "text": "Hello!"}], + "model": "claude-3-5-sonnet-20240620", + "stop_reason": "end_turn", + "stop_sequence": None, + "usage": {"input_tokens": 12, "output_tokens": 6}, + } + + mock_response.json = return_val + + litellm.set_verbose = True + with patch( + "litellm.llms.custom_httpx.http_handler.AsyncHTTPHandler.post", + return_value=mock_response, + ) as mock_post: + # Act: Call the litellm.acompletion function + response = await litellm.acompletion( + api_key="mock_api_key", + model="anthropic/claude-3-5-sonnet-20240620", + messages=[ + {"role": "user", "content": "What's the weather like in Boston today?"} + ], + tools=[ + { + "type": "function", + "function": { + "name": "get_current_weather", + "description": "Get the current weather in a given location", + "parameters": { + "type": "object", + "properties": { + "location": { + "type": "string", + "description": "The city and state, e.g. San Francisco, CA", + }, + "unit": { + "type": "string", + "enum": ["celsius", "fahrenheit"], + }, + }, + "required": ["location"], + }, + "cache_control": {"type": "ephemeral"}, + }, + } + ], + extra_headers={ + "anthropic-version": "2023-06-01", + "anthropic-beta": "prompt-caching-2024-07-31", + }, + ) + + # Print what was called on the mock + print("call args=", mock_post.call_args) + + expected_url = "https://api.anthropic.com/v1/messages" + expected_headers = { + "accept": "application/json", + "content-type": "application/json", + "anthropic-version": "2023-06-01", + "anthropic-beta": "prompt-caching-2024-07-31", + "x-api-key": "mock_api_key", + } + + expected_json = { + "messages": [ + { + "role": "user", + "content": [ + { + "type": "text", + "text": "What's the weather like in Boston today?", + } + ], + } + ], + "tools": [ + { + "name": "get_current_weather", + "description": "Get the current weather in a given location", + "cache_control": {"type": "ephemeral"}, + "input_schema": { + "type": "object", + "properties": { + "location": { + "type": "string", + "description": "The city and state, e.g. San Francisco, CA", + }, + "unit": { + "type": "string", + "enum": ["celsius", "fahrenheit"], + }, + }, + "required": ["location"], + }, + } + ], + "max_tokens": 4096, + "model": "claude-3-5-sonnet-20240620", + } + + mock_post.assert_called_once_with( + expected_url, json=expected_json, headers=expected_headers, timeout=600.0 + ) + + +@pytest.mark.asyncio() +async def test_anthropic_api_prompt_caching_basic(): + litellm.set_verbose = True + response = await litellm.acompletion( + model="anthropic/claude-3-5-sonnet-20240620", + messages=[ + # System Message + { + "role": "system", + "content": [ + { + "type": "text", + "text": "Here is the full text of a complex legal agreement" + * 400, + "cache_control": {"type": "ephemeral"}, + } + ], + }, + # marked for caching with the cache_control parameter, so that this checkpoint can read from the previous cache. + { + "role": "user", + "content": [ + { + "type": "text", + "text": "What are the key terms and conditions in this agreement?", + "cache_control": {"type": "ephemeral"}, + } + ], + }, + { + "role": "assistant", + "content": "Certainly! the key terms and conditions are the following: the contract is 1 year long for $10/mo", + }, + # The final turn is marked with cache-control, for continuing in followups. + { + "role": "user", + "content": [ + { + "type": "text", + "text": "What are the key terms and conditions in this agreement?", + "cache_control": {"type": "ephemeral"}, + } + ], + }, + ], + temperature=0.2, + max_tokens=10, + extra_headers={ + "anthropic-version": "2023-06-01", + "anthropic-beta": "prompt-caching-2024-07-31", + }, + ) + + print("response=", response) + + assert "cache_read_input_tokens" in response.usage + assert "cache_creation_input_tokens" in response.usage + + # Assert either a cache entry was created or cache was read - changes depending on the anthropic api ttl + assert (response.usage.cache_read_input_tokens > 0) or ( + response.usage.cache_creation_input_tokens > 0 + ) diff --git a/litellm/tests/test_completion.py b/litellm/tests/test_completion.py index 7f73d62945..b945d3d1e2 100644 --- a/litellm/tests/test_completion.py +++ b/litellm/tests/test_completion.py @@ -3449,185 +3449,6 @@ def response_format_tests(response: litellm.ModelResponse): assert isinstance(response.usage.total_tokens, int) # type: ignore -@pytest.mark.asyncio() -async def test_anthropic_api_prompt_caching_basic(): - litellm.set_verbose = True - response = await litellm.acompletion( - model="anthropic/claude-3-5-sonnet-20240620", - messages=[ - # System Message - { - "role": "system", - "content": [ - { - "type": "text", - "text": "Here is the full text of a complex legal agreement" - * 400, - "cache_control": {"type": "ephemeral"}, - } - ], - }, - # marked for caching with the cache_control parameter, so that this checkpoint can read from the previous cache. - { - "role": "user", - "content": [ - { - "type": "text", - "text": "What are the key terms and conditions in this agreement?", - "cache_control": {"type": "ephemeral"}, - } - ], - }, - { - "role": "assistant", - "content": "Certainly! the key terms and conditions are the following: the contract is 1 year long for $10/mo", - }, - # The final turn is marked with cache-control, for continuing in followups. - { - "role": "user", - "content": [ - { - "type": "text", - "text": "What are the key terms and conditions in this agreement?", - "cache_control": {"type": "ephemeral"}, - } - ], - }, - ], - temperature=0.2, - max_tokens=10, - extra_headers={ - "anthropic-version": "2023-06-01", - "anthropic-beta": "prompt-caching-2024-07-31", - }, - ) - - print("response=", response) - - assert "cache_read_input_tokens" in response.usage - assert "cache_creation_input_tokens" in response.usage - - # Assert either a cache entry was created or cache was read - changes depending on the anthropic api ttl - assert (response.usage.cache_read_input_tokens > 0) or ( - response.usage.cache_creation_input_tokens > 0 - ) - - -@pytest.mark.asyncio -async def test_litellm_acompletion_httpx_call(): - # Arrange: Set up the MagicMock for the httpx.AsyncClient - mock_response = AsyncMock() - - def return_val(): - return { - "id": "msg_01XFDUDYJgAACzvnptvVoYEL", - "type": "message", - "role": "assistant", - "content": [{"type": "text", "text": "Hello!"}], - "model": "claude-3-5-sonnet-20240620", - "stop_reason": "end_turn", - "stop_sequence": None, - "usage": {"input_tokens": 12, "output_tokens": 6}, - } - - mock_response.json = return_val - - litellm.set_verbose = True - with patch( - "litellm.llms.custom_httpx.http_handler.AsyncHTTPHandler.post", - return_value=mock_response, - ) as mock_post: - # Act: Call the litellm.acompletion function - response = await litellm.acompletion( - api_key="mock_api_key", - model="anthropic/claude-3-5-sonnet-20240620", - messages=[ - {"role": "user", "content": "What's the weather like in Boston today?"} - ], - tools=[ - { - "type": "function", - "function": { - "name": "get_current_weather", - "description": "Get the current weather in a given location", - "parameters": { - "type": "object", - "properties": { - "location": { - "type": "string", - "description": "The city and state, e.g. San Francisco, CA", - }, - "unit": { - "type": "string", - "enum": ["celsius", "fahrenheit"], - }, - }, - "required": ["location"], - }, - "cache_control": {"type": "ephemeral"}, - }, - } - ], - extra_headers={ - "anthropic-version": "2023-06-01", - "anthropic-beta": "prompt-caching-2024-07-31", - }, - ) - - # Print what was called on the mock - print("call args=", mock_post.call_args) - - expected_url = "https://api.anthropic.com/v1/messages" - expected_headers = { - "accept": "application/json", - "content-type": "application/json", - "anthropic-version": "2023-06-01", - "anthropic-beta": "prompt-caching-2024-07-31", - "x-api-key": "mock_api_key", - } - - expected_json = { - "messages": [ - { - "role": "user", - "content": [ - { - "type": "text", - "text": "What's the weather like in Boston today?", - } - ], - } - ], - "tools": [ - { - "name": "get_current_weather", - "description": "Get the current weather in a given location", - "cache_control": {"type": "ephemeral"}, - "input_schema": { - "type": "object", - "properties": { - "location": { - "type": "string", - "description": "The city and state, e.g. San Francisco, CA", - }, - "unit": { - "type": "string", - "enum": ["celsius", "fahrenheit"], - }, - }, - "required": ["location"], - }, - } - ], - "max_tokens": 4096, - "model": "claude-3-5-sonnet-20240620", - } - - mock_post.assert_called_once_with( - expected_url, json=expected_json, headers=expected_headers, timeout=600.0 - ) - - @pytest.mark.parametrize( "model", [ From 7fc2657a26500b9a5fcbf3f703c4db8de11fe8ef Mon Sep 17 00:00:00 2001 From: Ishaan Jaff Date: Wed, 14 Aug 2024 17:03:10 -0700 Subject: [PATCH 56/60] add test for large context in system message for anthropic --- litellm/llms/anthropic.py | 36 ++++--- .../tests/test_anthropic_prompt_caching.py | 99 +++++++++++++++++++ litellm/types/llms/anthropic.py | 8 +- litellm/types/llms/openai.py | 2 +- 4 files changed, 128 insertions(+), 17 deletions(-) diff --git a/litellm/llms/anthropic.py b/litellm/llms/anthropic.py index 19fca056bd..cf58163461 100644 --- a/litellm/llms/anthropic.py +++ b/litellm/llms/anthropic.py @@ -35,6 +35,7 @@ from litellm.types.llms.anthropic import ( AnthropicResponseContentBlockText, AnthropicResponseContentBlockToolUse, AnthropicResponseUsageBlock, + AnthropicSystemMessageContent, ContentBlockDelta, ContentBlockStart, ContentBlockStop, @@ -907,7 +908,7 @@ class AnthropicChatCompletion(BaseLLM): # Separate system prompt from rest of message system_prompt_indices = [] system_prompt = "" - system_prompt_dict = None + anthropic_system_message_list = None for idx, message in enumerate(messages): if message["role"] == "system": valid_content: bool = False @@ -915,19 +916,24 @@ class AnthropicChatCompletion(BaseLLM): system_prompt += message["content"] valid_content = True elif isinstance(message["content"], list): - for content in message["content"]: - system_prompt += content.get("text", "") - valid_content = True + for _content in message["content"]: + anthropic_system_message_content = ( + AnthropicSystemMessageContent( + type=_content.get("type"), + text=_content.get("text"), + ) + ) + if "cache_control" in _content: + anthropic_system_message_content["cache_control"] = ( + _content["cache_control"] + ) - # Handle Anthropic API context caching - if "cache_control" in message: - system_prompt_dict = [ - { - "cache_control": message["cache_control"], - "text": system_prompt, - "type": "text", - } - ] + if anthropic_system_message_list is None: + anthropic_system_message_list = [] + anthropic_system_message_list.append( + anthropic_system_message_content + ) + valid_content = True if valid_content: system_prompt_indices.append(idx) @@ -938,8 +944,8 @@ class AnthropicChatCompletion(BaseLLM): optional_params["system"] = system_prompt # Handling anthropic API Prompt Caching - if system_prompt_dict is not None: - optional_params["system"] = system_prompt_dict + if anthropic_system_message_list is not None: + optional_params["system"] = anthropic_system_message_list # Format rest of message according to anthropic guidelines try: messages = prompt_factory( diff --git a/litellm/tests/test_anthropic_prompt_caching.py b/litellm/tests/test_anthropic_prompt_caching.py index 8f57e96065..87bfc23f84 100644 --- a/litellm/tests/test_anthropic_prompt_caching.py +++ b/litellm/tests/test_anthropic_prompt_caching.py @@ -220,3 +220,102 @@ async def test_anthropic_api_prompt_caching_basic(): assert (response.usage.cache_read_input_tokens > 0) or ( response.usage.cache_creation_input_tokens > 0 ) + + +@pytest.mark.asyncio +async def test_litellm_anthropic_prompt_caching_system(): + # https://docs.anthropic.com/en/docs/build-with-claude/prompt-caching#prompt-caching-examples + # LArge Context Caching Example + mock_response = AsyncMock() + + def return_val(): + return { + "id": "msg_01XFDUDYJgAACzvnptvVoYEL", + "type": "message", + "role": "assistant", + "content": [{"type": "text", "text": "Hello!"}], + "model": "claude-3-5-sonnet-20240620", + "stop_reason": "end_turn", + "stop_sequence": None, + "usage": {"input_tokens": 12, "output_tokens": 6}, + } + + mock_response.json = return_val + + litellm.set_verbose = True + with patch( + "litellm.llms.custom_httpx.http_handler.AsyncHTTPHandler.post", + return_value=mock_response, + ) as mock_post: + # Act: Call the litellm.acompletion function + response = await litellm.acompletion( + api_key="mock_api_key", + model="anthropic/claude-3-5-sonnet-20240620", + messages=[ + { + "role": "system", + "content": [ + { + "type": "text", + "text": "You are an AI assistant tasked with analyzing legal documents.", + }, + { + "type": "text", + "text": "Here is the full text of a complex legal agreement", + "cache_control": {"type": "ephemeral"}, + }, + ], + }, + { + "role": "user", + "content": "what are the key terms and conditions in this agreement?", + }, + ], + extra_headers={ + "anthropic-version": "2023-06-01", + "anthropic-beta": "prompt-caching-2024-07-31", + }, + ) + + # Print what was called on the mock + print("call args=", mock_post.call_args) + + expected_url = "https://api.anthropic.com/v1/messages" + expected_headers = { + "accept": "application/json", + "content-type": "application/json", + "anthropic-version": "2023-06-01", + "anthropic-beta": "prompt-caching-2024-07-31", + "x-api-key": "mock_api_key", + } + + expected_json = { + "system": [ + { + "type": "text", + "text": "You are an AI assistant tasked with analyzing legal documents.", + }, + { + "type": "text", + "text": "Here is the full text of a complex legal agreement", + "cache_control": {"type": "ephemeral"}, + }, + ], + "messages": [ + { + "role": "user", + "content": [ + { + "type": "text", + "text": "what are the key terms and conditions in this agreement?", + } + ], + } + ], + "max_tokens": 4096, + "model": "claude-3-5-sonnet-20240620", + } + + mock_post.assert_called_once_with( + expected_url, json=expected_json, headers=expected_headers, timeout=600.0 + ) diff --git a/litellm/types/llms/anthropic.py b/litellm/types/llms/anthropic.py index 2eb2aef549..f14aa20c73 100644 --- a/litellm/types/llms/anthropic.py +++ b/litellm/types/llms/anthropic.py @@ -94,6 +94,12 @@ class AnthropicMetadata(TypedDict, total=False): user_id: str +class AnthropicSystemMessageContent(TypedDict, total=False): + type: str + text: str + cache_control: Optional[dict] + + class AnthropicMessagesRequest(TypedDict, total=False): model: Required[str] messages: Required[ @@ -108,7 +114,7 @@ class AnthropicMessagesRequest(TypedDict, total=False): metadata: AnthropicMetadata stop_sequences: List[str] stream: bool - system: str + system: Union[str, List] temperature: float tool_choice: AnthropicMessagesToolChoice tools: List[AnthropicMessagesTool] diff --git a/litellm/types/llms/openai.py b/litellm/types/llms/openai.py index 0d67d5d602..5d2c416f9c 100644 --- a/litellm/types/llms/openai.py +++ b/litellm/types/llms/openai.py @@ -361,7 +361,7 @@ class ChatCompletionToolMessage(TypedDict): class ChatCompletionSystemMessage(TypedDict, total=False): role: Required[Literal["system"]] - content: Required[str] + content: Required[Union[str, List]] name: str From f186da28be00fa24d1d0ad1466a72c59768204d8 Mon Sep 17 00:00:00 2001 From: Ishaan Jaff Date: Wed, 14 Aug 2024 17:07:51 -0700 Subject: [PATCH 57/60] docs add examples doing context caching anthropic sdk --- docs/my-website/docs/providers/anthropic.md | 80 ++++++++++++++++----- 1 file changed, 64 insertions(+), 16 deletions(-) diff --git a/docs/my-website/docs/providers/anthropic.md b/docs/my-website/docs/providers/anthropic.md index a3bca9d567..0520e4ef80 100644 --- a/docs/my-website/docs/providers/anthropic.md +++ b/docs/my-website/docs/providers/anthropic.md @@ -234,8 +234,52 @@ Use Anthropic Prompt Caching ### Caching - Large Context Caching +This example demonstrates basic Prompt Caching usage, caching the full text of the legal agreement as a prefix while keeping the user instruction uncached. + + + + +```python +response = await litellm.acompletion( + model="anthropic/claude-3-5-sonnet-20240620", + messages=[ + { + "role": "system", + "content": [ + { + "type": "text", + "text": "You are an AI assistant tasked with analyzing legal documents.", + }, + { + "type": "text", + "text": "Here is the full text of a complex legal agreement", + "cache_control": {"type": "ephemeral"}, + }, + ], + }, + { + "role": "user", + "content": "what are the key terms and conditions in this agreement?", + }, + ], + extra_headers={ + "anthropic-version": "2023-06-01", + "anthropic-beta": "prompt-caching-2024-07-31", + }, +) + +``` + + + + + + ### Caching - Tools definitions +In this example, we demonstrate caching tool definitions. + +The cache_control parameter is placed on the final tool @@ -282,6 +326,11 @@ response = await litellm.acompletion( ### Caching - Continuing Multi-Turn Convo +In this example, we demonstrate how to use Prompt Caching in a multi-turn conversation. + +The cache_control parameter is placed on the system message to designate it as part of the static prefix. + +The conversation history (previous messages) is included in the messages array. The final turn is marked with cache-control, for continuing in followups. The second-to-last user message is marked for caching with the cache_control parameter, so that this checkpoint can read from the previous cache. @@ -343,22 +392,7 @@ response = await litellm.acompletion( -## Passing Extra Headers to Anthropic API - -Pass `extra_headers: dict` to `litellm.completion` - -```python -from litellm import completion -messages = [{"role": "user", "content": "What is Anthropic?"}] -response = completion( - model="claude-3-5-sonnet-20240620", - messages=messages, - extra_headers={"anthropic-beta": "max-tokens-3-5-sonnet-2024-07-15"} -) -``` -## Advanced - -## Usage - Function Calling +## **Function/Tool Calling** :::info @@ -547,6 +581,20 @@ resp = litellm.completion( print(f"\nResponse: {resp}") ``` +## **Passing Extra Headers to Anthropic API** + +Pass `extra_headers: dict` to `litellm.completion` + +```python +from litellm import completion +messages = [{"role": "user", "content": "What is Anthropic?"}] +response = completion( + model="claude-3-5-sonnet-20240620", + messages=messages, + extra_headers={"anthropic-beta": "max-tokens-3-5-sonnet-2024-07-15"} +) +``` + ## Usage - "Assistant Pre-fill" You can "put words in Claude's mouth" by including an `assistant` role message as the last item in the `messages` array. From 2019682a97da645d18b4b65f34c89484c8f81063 Mon Sep 17 00:00:00 2001 From: Ishaan Jaff Date: Wed, 14 Aug 2024 17:13:26 -0700 Subject: [PATCH 58/60] docs add examples with litellm proxy --- docs/my-website/docs/providers/anthropic.md | 139 +++++++++++++++++++- 1 file changed, 136 insertions(+), 3 deletions(-) diff --git a/docs/my-website/docs/providers/anthropic.md b/docs/my-website/docs/providers/anthropic.md index 0520e4ef80..85628e8f73 100644 --- a/docs/my-website/docs/providers/anthropic.md +++ b/docs/my-website/docs/providers/anthropic.md @@ -270,7 +270,45 @@ response = await litellm.acompletion( ``` - + + +```python +import openai +client = openai.AsyncOpenAI( + api_key="anything", # litellm proxy api key + base_url="http://0.0.0.0:4000" # litellm proxy base url +) + + +response = await client.chat.completions.create( + model="anthropic/claude-3-5-sonnet-20240620", + messages=[ + { + "role": "system", + "content": [ + { + "type": "text", + "text": "You are an AI assistant tasked with analyzing legal documents.", + }, + { + "type": "text", + "text": "Here is the full text of a complex legal agreement", + "cache_control": {"type": "ephemeral"}, + }, + ], + }, + { + "role": "user", + "content": "what are the key terms and conditions in this agreement?", + }, + ], + extra_headers={ + "anthropic-version": "2023-06-01", + "anthropic-beta": "prompt-caching-2024-07-31", + }, +) + +``` @@ -318,7 +356,45 @@ response = await litellm.acompletion( ) ``` - + + +```python +import openai +client = openai.AsyncOpenAI( + api_key="anything", # litellm proxy api key + base_url="http://0.0.0.0:4000" # litellm proxy base url +) + +response = await client.chat.completions.create( + model="anthropic/claude-3-5-sonnet-20240620", + messages = [{"role": "user", "content": "What's the weather like in Boston today?"}] + tools = [ + { + "type": "function", + "function": { + "name": "get_current_weather", + "description": "Get the current weather in a given location", + "parameters": { + "type": "object", + "properties": { + "location": { + "type": "string", + "description": "The city and state, e.g. San Francisco, CA", + }, + "unit": {"type": "string", "enum": ["celsius", "fahrenheit"]}, + }, + "required": ["location"], + }, + "cache_control": {"type": "ephemeral"} + }, + } + ], + extra_headers={ + "anthropic-version": "2023-06-01", + "anthropic-beta": "prompt-caching-2024-07-31", + }, +) +``` @@ -387,7 +463,64 @@ response = await litellm.acompletion( ) ``` - + + + +```python +import openai +client = openai.AsyncOpenAI( + api_key="anything", # litellm proxy api key + base_url="http://0.0.0.0:4000" # litellm proxy base url +) + +response = await client.chat.completions.create( + model="anthropic/claude-3-5-sonnet-20240620", + messages=[ + # System Message + { + "role": "system", + "content": [ + { + "type": "text", + "text": "Here is the full text of a complex legal agreement" + * 400, + "cache_control": {"type": "ephemeral"}, + } + ], + }, + # marked for caching with the cache_control parameter, so that this checkpoint can read from the previous cache. + { + "role": "user", + "content": [ + { + "type": "text", + "text": "What are the key terms and conditions in this agreement?", + "cache_control": {"type": "ephemeral"}, + } + ], + }, + { + "role": "assistant", + "content": "Certainly! the key terms and conditions are the following: the contract is 1 year long for $10/mo", + }, + # The final turn is marked with cache-control, for continuing in followups. + { + "role": "user", + "content": [ + { + "type": "text", + "text": "What are the key terms and conditions in this agreement?", + "cache_control": {"type": "ephemeral"}, + } + ], + }, + ], + extra_headers={ + "anthropic-version": "2023-06-01", + "anthropic-beta": "prompt-caching-2024-07-31", + }, +) +``` From 43738c7d9ed2f80ba740fc9df8e3259f7d3c95d8 Mon Sep 17 00:00:00 2001 From: Ishaan Jaff Date: Wed, 14 Aug 2024 17:42:48 -0700 Subject: [PATCH 59/60] docs using proxy with context caaching anthropic --- docs/my-website/docs/providers/anthropic.md | 29 +++++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/docs/my-website/docs/providers/anthropic.md b/docs/my-website/docs/providers/anthropic.md index 85628e8f73..2a7804bfda 100644 --- a/docs/my-website/docs/providers/anthropic.md +++ b/docs/my-website/docs/providers/anthropic.md @@ -272,6 +272,16 @@ response = await litellm.acompletion( +:::info + +LiteLLM Proxy is OpenAI compatible + +This is an example using the OpenAI Python SDK sending a request to LiteLLM Proxy + +Assuming you have a model=`anthropic/claude-3-5-sonnet-20240620` on the [litellm proxy config.yaml](#usage-with-litellm-proxy) + +::: + ```python import openai client = openai.AsyncOpenAI( @@ -358,6 +368,16 @@ response = await litellm.acompletion( +:::info + +LiteLLM Proxy is OpenAI compatible + +This is an example using the OpenAI Python SDK sending a request to LiteLLM Proxy + +Assuming you have a model=`anthropic/claude-3-5-sonnet-20240620` on the [litellm proxy config.yaml](#usage-with-litellm-proxy) + +::: + ```python import openai client = openai.AsyncOpenAI( @@ -465,6 +485,15 @@ response = await litellm.acompletion( +:::info + +LiteLLM Proxy is OpenAI compatible + +This is an example using the OpenAI Python SDK sending a request to LiteLLM Proxy + +Assuming you have a model=`anthropic/claude-3-5-sonnet-20240620` on the [litellm proxy config.yaml](#usage-with-litellm-proxy) + +::: ```python import openai From 3da4b07140f993f7bcb036c700c21e399db11e76 Mon Sep 17 00:00:00 2001 From: Ishaan Jaff Date: Wed, 14 Aug 2024 17:47:20 -0700 Subject: [PATCH 60/60] =?UTF-8?q?bump:=20version=201.43.12=20=E2=86=92=201?= =?UTF-8?q?.43.13?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- pyproject.toml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 73fa657017..97703d7088 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "litellm" -version = "1.43.12" +version = "1.43.13" description = "Library to easily interface with LLM API providers" authors = ["BerriAI"] license = "MIT" @@ -91,7 +91,7 @@ requires = ["poetry-core", "wheel"] build-backend = "poetry.core.masonry.api" [tool.commitizen] -version = "1.43.12" +version = "1.43.13" version_files = [ "pyproject.toml:^version" ]