mirror of
https://github.com/BerriAI/litellm.git
synced 2025-04-27 11:43:54 +00:00
add example using litellm proxy with gemini context caching
This commit is contained in:
parent
7d37ff8070
commit
2be9c185e5
1 changed files with 20 additions and 36 deletions
|
@ -1,54 +1,38 @@
|
|||
import datetime
|
||||
|
||||
import httpx
|
||||
import openai
|
||||
import vertexai
|
||||
from vertexai.generative_models import Content, Part
|
||||
from vertexai.preview import caching
|
||||
from vertexai.preview.generative_models import GenerativeModel
|
||||
|
||||
# Set Litellm proxy variables here
|
||||
LITELLM_BASE_URL = "http://0.0.0.0:4000"
|
||||
LITELLM_PROXY_API_KEY = "sk-1234"
|
||||
|
||||
client = openai.OpenAI(api_key=LITELLM_PROXY_API_KEY, base_url=LITELLM_BASE_URL)
|
||||
httpx_client = httpx.Client(timeout=30)
|
||||
|
||||
################################
|
||||
# First create a cachedContents object
|
||||
client = openai.OpenAI(api_key="sk-1234", base_url="http://0.0.0.0:4000")
|
||||
vertexai.init(project="adroit-crow-413218", location="us-central1")
|
||||
print("creating cached content")
|
||||
create_cache = httpx_client.post(
|
||||
url=f"{LITELLM_BASE_URL}/vertex-ai/cachedContents",
|
||||
headers={"Authorization": f"Bearer {LITELLM_PROXY_API_KEY}"},
|
||||
json={
|
||||
"model": "gemini-1.5-pro-001",
|
||||
"contents": [
|
||||
{
|
||||
"role": "user",
|
||||
"parts": [
|
||||
{
|
||||
"text": "This is sample text to demonstrate explicit caching."
|
||||
* 4000
|
||||
}
|
||||
],
|
||||
}
|
||||
],
|
||||
},
|
||||
contents_here: list[Content] = [
|
||||
Content(role="user", parts=[Part.from_text("huge string of text here" * 10000)])
|
||||
]
|
||||
cached_content = caching.CachedContent.create(
|
||||
model_name="gemini-1.5-pro-001",
|
||||
contents=contents_here,
|
||||
expire_time=datetime.datetime(2024, 8, 10),
|
||||
)
|
||||
print("response from create_cache", create_cache)
|
||||
create_cache_response = create_cache.json()
|
||||
print("json from create_cache", create_cache_response)
|
||||
cached_content_name = create_cache_response["name"]
|
||||
|
||||
#################################
|
||||
# Use the `cachedContents` object in your /chat/completions
|
||||
created_Caches = caching.CachedContent.list()
|
||||
|
||||
print("created_Caches contents=", created_Caches)
|
||||
|
||||
response = client.chat.completions.create( # type: ignore
|
||||
model="gemini-1.5-pro-001",
|
||||
max_tokens=8192,
|
||||
messages=[
|
||||
{
|
||||
"role": "user",
|
||||
"content": "what is the sample text about?",
|
||||
"content": "quote all everything above this message",
|
||||
},
|
||||
],
|
||||
temperature="0.7",
|
||||
extra_body={"cached_content": cached_content_name}, # 👈 key change
|
||||
temperature=0.7,
|
||||
extra_body={"cached_content": cached_content.resource_name},
|
||||
)
|
||||
|
||||
print("response from proxy", response)
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue