From 2be9c185e51a060bd61fd641b962561b050f6b87 Mon Sep 17 00:00:00 2001
From: Ishaan Jaff <ishaanjaffer0324@gmail.com>
Date: Thu, 8 Aug 2024 11:35:41 -0700
Subject: [PATCH] add example using litellm proxy with gemini context caching

---
 .../tests/test_gemini_context_caching.py      | 56 +++++++------------
 1 file changed, 20 insertions(+), 36 deletions(-)

diff --git a/litellm/proxy/tests/test_gemini_context_caching.py b/litellm/proxy/tests/test_gemini_context_caching.py
index 6ee143dba1..91ec1723cf 100644
--- a/litellm/proxy/tests/test_gemini_context_caching.py
+++ b/litellm/proxy/tests/test_gemini_context_caching.py
@@ -1,54 +1,38 @@
 import datetime
 
-import httpx
 import openai
+import vertexai
+from vertexai.generative_models import Content, Part
+from vertexai.preview import caching
+from vertexai.preview.generative_models import GenerativeModel
 
-# Set Litellm proxy variables here
-LITELLM_BASE_URL = "http://0.0.0.0:4000"
-LITELLM_PROXY_API_KEY = "sk-1234"
-
-client = openai.OpenAI(api_key=LITELLM_PROXY_API_KEY, base_url=LITELLM_BASE_URL)
-httpx_client = httpx.Client(timeout=30)
-
-################################
-# First create a cachedContents object
+client = openai.OpenAI(api_key="sk-1234", base_url="http://0.0.0.0:4000")
+vertexai.init(project="adroit-crow-413218", location="us-central1")
 print("creating cached content")
-create_cache = httpx_client.post(
-    url=f"{LITELLM_BASE_URL}/vertex-ai/cachedContents",
-    headers={"Authorization": f"Bearer {LITELLM_PROXY_API_KEY}"},
-    json={
-        "model": "gemini-1.5-pro-001",
-        "contents": [
-            {
-                "role": "user",
-                "parts": [
-                    {
-                        "text": "This is sample text to demonstrate explicit caching."
-                        * 4000
-                    }
-                ],
-            }
-        ],
-    },
+contents_here: list[Content] = [
+    Content(role="user", parts=[Part.from_text("huge string of text here" * 10000)])
+]
+cached_content = caching.CachedContent.create(
+    model_name="gemini-1.5-pro-001",
+    contents=contents_here,
+    expire_time=datetime.datetime(2024, 8, 10),
 )
-print("response from create_cache", create_cache)
-create_cache_response = create_cache.json()
-print("json from create_cache", create_cache_response)
-cached_content_name = create_cache_response["name"]
 
-#################################
-# Use the `cachedContents` object in your /chat/completions
+created_Caches = caching.CachedContent.list()
+
+print("created_Caches contents=", created_Caches)
+
 response = client.chat.completions.create(  # type: ignore
     model="gemini-1.5-pro-001",
     max_tokens=8192,
     messages=[
         {
             "role": "user",
-            "content": "what is the sample text about?",
+            "content": "quote all everything above this message",
         },
     ],
-    temperature="0.7",
-    extra_body={"cached_content": cached_content_name},  # 👈 key change
+    temperature=0.7,
+    extra_body={"cached_content": cached_content.resource_name},
 )
 
 print("response from proxy", response)