mirror of
https://github.com/BerriAI/litellm.git
synced 2025-04-25 10:44:24 +00:00
Litellm dev readd prompt caching (#7299)
* fix(router.py): re-add saving model id on prompt caching valid successful deployment * fix(router.py): introduce optional pre_call_checks isolate prompt caching logic in a separate file * fix(prompt_caching_deployment_check.py): fix import * fix(router.py): new 'async_filter_deployments' event hook allows custom logger to filter deployments returned to routing strategy * feat(prompt_caching_deployment_check.py): initial working commit of prompt caching based routing * fix(cooldown_callbacks.py): fix linting error * fix(budget_limiter.py): move budget logger to async_filter_deployment hook * test: add unit test * test(test_router_helper_utils.py): add unit testing * fix(budget_limiter.py): fix linting errors * docs(config_settings.md): add 'optional_pre_call_checks' to router_settings param docs
This commit is contained in:
parent
d214d3cc3f
commit
2f08341a08
12 changed files with 276 additions and 74 deletions
|
@ -172,22 +172,3 @@ class PromptCachingCache:
|
|||
|
||||
cache_key = PromptCachingCache.get_prompt_caching_cache_key(messages, tools)
|
||||
return self.cache.get_cache(cache_key)
|
||||
|
||||
async def async_get_prompt_caching_deployment(
|
||||
self,
|
||||
router: litellm_router,
|
||||
messages: Optional[List[AllMessageValues]],
|
||||
tools: Optional[List[ChatCompletionToolParam]],
|
||||
) -> Optional[dict]:
|
||||
model_id_dict = await self.async_get_model_id(
|
||||
messages=messages,
|
||||
tools=tools,
|
||||
)
|
||||
|
||||
if model_id_dict is not None:
|
||||
healthy_deployment_pydantic_obj = router.get_deployment(
|
||||
model_id=model_id_dict["model_id"]
|
||||
)
|
||||
if healthy_deployment_pydantic_obj is not None:
|
||||
return healthy_deployment_pydantic_obj.model_dump(exclude_none=True)
|
||||
return None
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue