forked from phoenix/litellm-mirror
* ci(config.yml): add a 'check_code_quality' step Addresses https://github.com/BerriAI/litellm/issues/5991 * ci(config.yml): check why circle ci doesn't pick up this test * ci(config.yml): fix to run 'check_code_quality' tests * fix(__init__.py): fix unprotected import * fix(__init__.py): don't remove unused imports * build(ruff.toml): update ruff.toml to ignore unused imports * fix: fix: ruff + pyright - fix linting + type-checking errors * fix: fix linting errors * fix(lago.py): fix module init error * fix: fix linting errors * ci(config.yml): cd into correct dir for checks * fix(proxy_server.py): fix linting error * fix(utils.py): fix bare except causes ruff linting errors * fix: ruff - fix remaining linting errors * fix(clickhouse.py): use standard logging object * fix(__init__.py): fix unprotected import * fix: ruff - fix linting errors * fix: fix linting errors * ci(config.yml): cleanup code qa step (formatting handled in local_testing) * fix(_health_endpoints.py): fix ruff linting errors * ci(config.yml): just use ruff in check_code_quality pipeline for now * build(custom_guardrail.py): include missing file * style(embedding_handler.py): fix ruff check
54 lines
1.5 KiB
Python
54 lines
1.5 KiB
Python
import datetime
|
|
|
|
import httpx
|
|
import openai
|
|
|
|
# Set Litellm proxy variables here
|
|
LITELLM_BASE_URL = "http://0.0.0.0:4000"
|
|
LITELLM_PROXY_API_KEY = "sk-1234"
|
|
|
|
client = openai.OpenAI(api_key=LITELLM_PROXY_API_KEY, base_url=LITELLM_BASE_URL)
|
|
httpx_client = httpx.Client(timeout=30)
|
|
|
|
################################
|
|
# First create a cachedContents object
|
|
print("creating cached content")
|
|
create_cache = httpx_client.post(
|
|
url=f"{LITELLM_BASE_URL}/vertex-ai/cachedContents",
|
|
headers={"Authorization": f"Bearer {LITELLM_PROXY_API_KEY}"},
|
|
json={
|
|
"model": "gemini-1.5-pro-001",
|
|
"contents": [
|
|
{
|
|
"role": "user",
|
|
"parts": [
|
|
{
|
|
"text": "This is sample text to demonstrate explicit caching."
|
|
* 4000
|
|
}
|
|
],
|
|
}
|
|
],
|
|
},
|
|
)
|
|
print("response from create_cache", create_cache)
|
|
create_cache_response = create_cache.json()
|
|
print("json from create_cache", create_cache_response)
|
|
cached_content_name = create_cache_response["name"]
|
|
|
|
#################################
|
|
# Use the `cachedContents` object in your /chat/completions
|
|
response = client.chat.completions.create( # type: ignore
|
|
model="gemini-1.5-pro-001",
|
|
max_tokens=8192,
|
|
messages=[
|
|
{
|
|
"role": "user",
|
|
"content": "what is the sample text about?",
|
|
},
|
|
],
|
|
temperature="0.7",
|
|
extra_body={"cached_content": cached_content_name}, # 👈 key change
|
|
)
|
|
|
|
print("response from proxy", response)
|