From 2be9c185e51a060bd61fd641b962561b050f6b87 Mon Sep 17 00:00:00 2001 From: Ishaan Jaff Date: Thu, 8 Aug 2024 11:35:41 -0700 Subject: [PATCH] add example using litellm proxy with gemini context caching --- .../tests/test_gemini_context_caching.py | 56 +++++++------------ 1 file changed, 20 insertions(+), 36 deletions(-) diff --git a/litellm/proxy/tests/test_gemini_context_caching.py b/litellm/proxy/tests/test_gemini_context_caching.py index 6ee143dba1..91ec1723cf 100644 --- a/litellm/proxy/tests/test_gemini_context_caching.py +++ b/litellm/proxy/tests/test_gemini_context_caching.py @@ -1,54 +1,38 @@ import datetime -import httpx import openai +import vertexai +from vertexai.generative_models import Content, Part +from vertexai.preview import caching +from vertexai.preview.generative_models import GenerativeModel -# Set Litellm proxy variables here -LITELLM_BASE_URL = "http://0.0.0.0:4000" -LITELLM_PROXY_API_KEY = "sk-1234" - -client = openai.OpenAI(api_key=LITELLM_PROXY_API_KEY, base_url=LITELLM_BASE_URL) -httpx_client = httpx.Client(timeout=30) - -################################ -# First create a cachedContents object +client = openai.OpenAI(api_key="sk-1234", base_url="http://0.0.0.0:4000") +vertexai.init(project="adroit-crow-413218", location="us-central1") print("creating cached content") -create_cache = httpx_client.post( - url=f"{LITELLM_BASE_URL}/vertex-ai/cachedContents", - headers={"Authorization": f"Bearer {LITELLM_PROXY_API_KEY}"}, - json={ - "model": "gemini-1.5-pro-001", - "contents": [ - { - "role": "user", - "parts": [ - { - "text": "This is sample text to demonstrate explicit caching." - * 4000 - } - ], - } - ], - }, +contents_here: list[Content] = [ + Content(role="user", parts=[Part.from_text("huge string of text here" * 10000)]) +] +cached_content = caching.CachedContent.create( + model_name="gemini-1.5-pro-001", + contents=contents_here, + expire_time=datetime.datetime(2024, 8, 10), ) -print("response from create_cache", create_cache) -create_cache_response = create_cache.json() -print("json from create_cache", create_cache_response) -cached_content_name = create_cache_response["name"] -################################# -# Use the `cachedContents` object in your /chat/completions +created_Caches = caching.CachedContent.list() + +print("created_Caches contents=", created_Caches) + response = client.chat.completions.create( # type: ignore model="gemini-1.5-pro-001", max_tokens=8192, messages=[ { "role": "user", - "content": "what is the sample text about?", + "content": "quote all everything above this message", }, ], - temperature="0.7", - extra_body={"cached_content": cached_content_name}, # 👈 key change + temperature=0.7, + extra_body={"cached_content": cached_content.resource_name}, ) print("response from proxy", response)