Merge branch 'main' into litellm_add_bedrock_guardrails

2025-04-26 19:24:27 +00:00 · 2024-08-22 17:28:49 -07:00 · 2024-08-22 17:28:49 -07:00 · c23cf18a70
commit c23cf18a70
parent 550da1153e 849cfa9bde
14 changed files with 173 additions and 22 deletions
--- a/docs/my-website/docs/projects/dbally.md
+++ b/docs/my-website/docs/projects/dbally.md
@ -0,0 +1,3 @@
+Efficient, consistent and secure library for querying structured data with natural language. Query any database with over 100 LLMs ❤️ 🚅.
+
+🔗 [GitHub](https://github.com/deepsense-ai/db-ally)
--- a/docs/my-website/docs/proxy/configs.md
+++ b/docs/my-website/docs/proxy/configs.md
@ -727,6 +727,7 @@ general_settings:
    "completion_model": "string",
    "disable_spend_logs": "boolean", # turn off writing each transaction to the db
    "disable_master_key_return": "boolean", # turn off returning master key on UI (checked on '/user/info' endpoint)
+    "disable_retry_on_max_parallel_request_limit_error": "boolean", # turn off retries when max parallel request limit is reached
    "disable_reset_budget": "boolean", # turn off reset budget scheduled task
    "disable_adding_master_key_hash_to_db": "boolean", # turn off storing master key hash in db, for spend tracking
    "enable_jwt_auth": "boolean", # allow proxy admin to auth in via jwt tokens with 'litellm_proxy_admin' in claims
@ -751,7 +752,8 @@ general_settings:
    },
    "otel": true,
    "custom_auth": "string",
-    "max_parallel_requests": 0,
+    "max_parallel_requests": 0, # the max parallel requests allowed per deployment 
+    "global_max_parallel_requests": 0, # the max parallel requests allowed on the proxy all up 
    "infer_model_from_keys": true,
    "background_health_checks": true,
    "health_check_interval": 300,
--- a/docs/my-website/docs/proxy/prometheus.md
+++ b/docs/my-website/docs/proxy/prometheus.md
@ -68,6 +68,15 @@ http://localhost:4000/metrics
 | `litellm_total_tokens`         | input + output tokens per `"user", "key", "model", "team", "end-user"`     |
 | `litellm_llm_api_failed_requests_metric`   | Number of failed LLM API requests per `"user", "key", "model", "team", "end-user"`    |

+### Request Latency Metrics 
+
+| Metric Name          | Description                          |
+|----------------------|--------------------------------------|
+| `litellm_request_total_latency_metric`             | Total latency (seconds) for a request to LiteLLM Proxy Server - tracked for labels `litellm_call_id`, `model` |
+| `litellm_llm_api_latency_metric`             | latency (seconds) for just the LLM API call - tracked for labels `litellm_call_id`, `model` |
+
+
+
 ### LLM API / Provider Metrics

 | Metric Name          | Description                          |
--- a/docs/my-website/sidebars.js
+++ b/docs/my-website/sidebars.js
@ -292,6 +292,7 @@ const sidebars = {
          items: [
            "projects/Docq.AI",
            "projects/OpenInterpreter",
+            "projects/dbally",
            "projects/FastREPL",
            "projects/PROMPTMETHEUS",
            "projects/Codium PR Agent",