add example using litellm proxy with gemini context caching

2025-04-27 11:43:54 +00:00 · 2024-08-08 11:35:41 -07:00 · 2024-08-08 11:35:41 -07:00 · 2be9c185e5
commit 2be9c185e5
parent 7d37ff8070
1 changed files with 20 additions and 36 deletions
--- a/litellm/proxy/tests/test_gemini_context_caching.py
+++ b/litellm/proxy/tests/test_gemini_context_caching.py
@ -1,54 +1,38 @@
 import datetime

-import httpx
 import openai
+import vertexai
+from vertexai.generative_models import Content, Part
+from vertexai.preview import caching
+from vertexai.preview.generative_models import GenerativeModel

-# Set Litellm proxy variables here
-LITELLM_BASE_URL = "http://0.0.0.0:4000"
-LITELLM_PROXY_API_KEY = "sk-1234"
-
-client = openai.OpenAI(api_key=LITELLM_PROXY_API_KEY, base_url=LITELLM_BASE_URL)
-httpx_client = httpx.Client(timeout=30)
-
-################################
-# First create a cachedContents object
+client = openai.OpenAI(api_key="sk-1234", base_url="http://0.0.0.0:4000")
+vertexai.init(project="adroit-crow-413218", location="us-central1")
 print("creating cached content")
-create_cache = httpx_client.post(
-    url=f"{LITELLM_BASE_URL}/vertex-ai/cachedContents",
-    headers={"Authorization": f"Bearer {LITELLM_PROXY_API_KEY}"},
-    json={
-        "model": "gemini-1.5-pro-001",
-        "contents": [
-            {
-                "role": "user",
-                "parts": [
-                    {
-                        "text": "This is sample text to demonstrate explicit caching."
-                        * 4000
-                    }
-                ],
-            }
-        ],
-    },
+contents_here: list[Content] = [
+    Content(role="user", parts=[Part.from_text("huge string of text here" * 10000)])
+]
+cached_content = caching.CachedContent.create(
+    model_name="gemini-1.5-pro-001",
+    contents=contents_here,
+    expire_time=datetime.datetime(2024, 8, 10),
 )
-print("response from create_cache", create_cache)
-create_cache_response = create_cache.json()
-print("json from create_cache", create_cache_response)
-cached_content_name = create_cache_response["name"]

-#################################
-# Use the `cachedContents` object in your /chat/completions
+created_Caches = caching.CachedContent.list()
+
+print("created_Caches contents=", created_Caches)
+
 response = client.chat.completions.create(  # type: ignore
    model="gemini-1.5-pro-001",
    max_tokens=8192,
    messages=[
        {
            "role": "user",
-            "content": "what is the sample text about?",
+            "content": "quote all everything above this message",
        },
    ],
-    temperature="0.7",
-    extra_body={"cached_content": cached_content_name},  # 👈 key change
+    temperature=0.7,
+    extra_body={"cached_content": cached_content.resource_name},
 )

 print("response from proxy", response)