mirror of
https://github.com/BerriAI/litellm.git
synced 2025-04-25 10:44:24 +00:00
Litellm router max depth (#6501)
* feat(router.py): add check for max fallback depth Prevent infinite loop for fallbacks Closes https://github.com/BerriAI/litellm/issues/6498 * test: update test * (fix) Prometheus - Log Postgres DB latency, status on prometheus (#6484) * fix logging DB fails on prometheus * unit testing log to otel wrapper * unit testing for service logger + prometheus * use LATENCY buckets for service logging * fix service logging * docs clarify vertex vs gemini * (router_strategy/) ensure all async functions use async cache methods (#6489) * fix router strat * use async set / get cache in router_strategy * add coverage for router strategy * fix imports * fix batch_get_cache * use async methods for least busy * fix least busy use async methods * fix test_dual_cache_increment * test async_get_available_deployment when routing_strategy="least-busy" * (fix) proxy - fix when `STORE_MODEL_IN_DB` should be set (#6492) * set store_model_in_db at the top * correctly use store_model_in_db global * (fix) `PrometheusServicesLogger` `_get_metric` should return metric in Registry (#6486) * fix logging DB fails on prometheus * unit testing log to otel wrapper * unit testing for service logger + prometheus * use LATENCY buckets for service logging * fix service logging * fix _get_metric in prom services logger * add clear doc string * unit testing for prom service logger * bump: version 1.51.0 → 1.51.1 * Add `azure/gpt-4o-mini-2024-07-18` to model_prices_and_context_window.json (#6477) * Update utils.py (#6468) Fixed missing keys * (perf) Litellm redis router fix - ~100ms improvement (#6483) * docs(exception_mapping.md): add missing exception types Fixes https://github.com/Aider-AI/aider/issues/2120#issuecomment-2438971183 * fix(main.py): register custom model pricing with specific key Ensure custom model pricing is registered to the specific model+provider key combination * test: make testing more robust for custom pricing * fix(redis_cache.py): instrument otel logging for sync redis calls ensures complete coverage for all redis cache calls * refactor: pass parent_otel_span for redis caching calls in router allows for more observability into what calls are causing latency issues * test: update tests with new params * refactor: ensure e2e otel tracing for router * refactor(router.py): add more otel tracing acrosss router catch all latency issues for router requests * fix: fix linting error * fix(router.py): fix linting error * fix: fix test * test: fix tests * fix(dual_cache.py): pass ttl to redis cache * fix: fix param * perf(cooldown_cache.py): improve cooldown cache, to store cache results in memory for 5s, prevents redis call from being made on each request reduces 100ms latency per call with caching enabled on router * fix: fix test * fix(cooldown_cache.py): handle if a result is None * fix(cooldown_cache.py): add debug statements * refactor(dual_cache.py): move to using an in-memory check for batch get cache, to prevent redis from being hit for every call * fix(cooldown_cache.py): fix linting erropr * build: merge main --------- Co-authored-by: Ishaan Jaff <ishaanjaffer0324@gmail.com> Co-authored-by: Xingyao Wang <xingyao@all-hands.dev> Co-authored-by: vibhanshu-ob <115142120+vibhanshu-ob@users.noreply.github.com>
This commit is contained in:
parent
1e403a8447
commit
56e9047818
11 changed files with 165 additions and 235 deletions
104
litellm/main.py
104
litellm/main.py
|
@ -3236,62 +3236,10 @@ def embedding( # noqa: PLR0915
|
|||
"encoding_format",
|
||||
]
|
||||
litellm_params = [
|
||||
"metadata",
|
||||
"aembedding",
|
||||
"caching",
|
||||
"mock_response",
|
||||
"api_key",
|
||||
"api_version",
|
||||
"api_base",
|
||||
"force_timeout",
|
||||
"logger_fn",
|
||||
"verbose",
|
||||
"custom_llm_provider",
|
||||
"litellm_logging_obj",
|
||||
"litellm_call_id",
|
||||
"use_client",
|
||||
"id",
|
||||
"fallbacks",
|
||||
"azure",
|
||||
"headers",
|
||||
"model_list",
|
||||
"num_retries",
|
||||
"context_window_fallback_dict",
|
||||
"retry_policy",
|
||||
"roles",
|
||||
"final_prompt_value",
|
||||
"bos_token",
|
||||
"eos_token",
|
||||
"request_timeout",
|
||||
"complete_response",
|
||||
"self",
|
||||
"client",
|
||||
"rpm",
|
||||
"tpm",
|
||||
"max_parallel_requests",
|
||||
"input_cost_per_token",
|
||||
"output_cost_per_token",
|
||||
"input_cost_per_second",
|
||||
"output_cost_per_second",
|
||||
"hf_model_name",
|
||||
"proxy_server_request",
|
||||
"model_info",
|
||||
"preset_cache_key",
|
||||
"caching_groups",
|
||||
"ttl",
|
||||
"cache",
|
||||
"no-log",
|
||||
"region_name",
|
||||
"allowed_model_region",
|
||||
"model_config",
|
||||
"cooldown_time",
|
||||
"tags",
|
||||
"azure_ad_token_provider",
|
||||
"tenant_id",
|
||||
"client_id",
|
||||
"client_secret",
|
||||
"extra_headers",
|
||||
]
|
||||
] + all_litellm_params
|
||||
|
||||
default_params = openai_params + litellm_params
|
||||
non_default_params = {
|
||||
k: v for k, v in kwargs.items() if k not in default_params
|
||||
|
@ -4489,53 +4437,7 @@ def image_generation( # noqa: PLR0915
|
|||
"size",
|
||||
"style",
|
||||
]
|
||||
litellm_params = [
|
||||
"metadata",
|
||||
"aimg_generation",
|
||||
"caching",
|
||||
"mock_response",
|
||||
"api_key",
|
||||
"api_version",
|
||||
"api_base",
|
||||
"force_timeout",
|
||||
"logger_fn",
|
||||
"verbose",
|
||||
"custom_llm_provider",
|
||||
"litellm_logging_obj",
|
||||
"litellm_call_id",
|
||||
"use_client",
|
||||
"id",
|
||||
"fallbacks",
|
||||
"azure",
|
||||
"headers",
|
||||
"model_list",
|
||||
"num_retries",
|
||||
"context_window_fallback_dict",
|
||||
"retry_policy",
|
||||
"roles",
|
||||
"final_prompt_value",
|
||||
"bos_token",
|
||||
"eos_token",
|
||||
"request_timeout",
|
||||
"complete_response",
|
||||
"self",
|
||||
"client",
|
||||
"rpm",
|
||||
"tpm",
|
||||
"max_parallel_requests",
|
||||
"input_cost_per_token",
|
||||
"output_cost_per_token",
|
||||
"hf_model_name",
|
||||
"proxy_server_request",
|
||||
"model_info",
|
||||
"preset_cache_key",
|
||||
"caching_groups",
|
||||
"ttl",
|
||||
"cache",
|
||||
"region_name",
|
||||
"allowed_model_region",
|
||||
"model_config",
|
||||
]
|
||||
litellm_params = all_litellm_params
|
||||
default_params = openai_params + litellm_params
|
||||
non_default_params = {
|
||||
k: v for k, v in kwargs.items() if k not in default_params
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue