mirror of
https://github.com/BerriAI/litellm.git
synced 2025-04-25 18:54:30 +00:00
feat(vertex_ai_context_caching.py): support making context caching calls to vertex ai in a normal chat completion call (anthropic caching format)
Closes https://github.com/BerriAI/litellm/issues/5213
This commit is contained in:
parent
c503ff435e
commit
b0cc1df2d6
16 changed files with 594 additions and 90 deletions
|
@ -69,6 +69,7 @@ from litellm.litellm_core_utils.redact_messages import (
|
|||
from litellm.litellm_core_utils.token_counter import get_modified_max_tokens
|
||||
from litellm.llms.custom_httpx.http_handler import AsyncHTTPHandler, HTTPHandler
|
||||
from litellm.types.llms.openai import (
|
||||
AllMessageValues,
|
||||
ChatCompletionNamedToolChoiceParam,
|
||||
ChatCompletionToolParam,
|
||||
)
|
||||
|
@ -11549,3 +11550,25 @@ class ModelResponseListIterator:
|
|||
class CustomModelResponseIterator(Iterable):
|
||||
def __init__(self) -> None:
|
||||
super().__init__()
|
||||
|
||||
|
||||
def is_cached_message(message: AllMessageValues) -> bool:
|
||||
"""
|
||||
Returns true, if message is marked as needing to be cached.
|
||||
|
||||
Used for anthropic/gemini context caching.
|
||||
|
||||
Follows the anthropic format {"cache_control": {"type": "ephemeral"}}
|
||||
"""
|
||||
if message["content"] is None or isinstance(message["content"], str):
|
||||
return False
|
||||
|
||||
for content in message["content"]:
|
||||
if (
|
||||
content["type"] == "text"
|
||||
and content.get("cache_control") is not None
|
||||
and content["cache_control"]["type"] == "ephemeral" # type: ignore
|
||||
):
|
||||
return True
|
||||
|
||||
return False
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue