Merge branch 'main' into litellm_add_bedrock_guardrails

This commit is contained in:
Ishaan Jaff 2024-08-22 17:28:49 -07:00 committed by GitHub
commit c23cf18a70
14 changed files with 173 additions and 22 deletions

View file

@ -0,0 +1,3 @@
Efficient, consistent and secure library for querying structured data with natural language. Query any database with over 100 LLMs ❤️ 🚅.
🔗 [GitHub](https://github.com/deepsense-ai/db-ally)

View file

@ -727,6 +727,7 @@ general_settings:
"completion_model": "string",
"disable_spend_logs": "boolean", # turn off writing each transaction to the db
"disable_master_key_return": "boolean", # turn off returning master key on UI (checked on '/user/info' endpoint)
"disable_retry_on_max_parallel_request_limit_error": "boolean", # turn off retries when max parallel request limit is reached
"disable_reset_budget": "boolean", # turn off reset budget scheduled task
"disable_adding_master_key_hash_to_db": "boolean", # turn off storing master key hash in db, for spend tracking
"enable_jwt_auth": "boolean", # allow proxy admin to auth in via jwt tokens with 'litellm_proxy_admin' in claims
@ -751,7 +752,8 @@ general_settings:
},
"otel": true,
"custom_auth": "string",
"max_parallel_requests": 0,
"max_parallel_requests": 0, # the max parallel requests allowed per deployment
"global_max_parallel_requests": 0, # the max parallel requests allowed on the proxy all up
"infer_model_from_keys": true,
"background_health_checks": true,
"health_check_interval": 300,

View file

@ -68,6 +68,15 @@ http://localhost:4000/metrics
| `litellm_total_tokens` | input + output tokens per `"user", "key", "model", "team", "end-user"` |
| `litellm_llm_api_failed_requests_metric` | Number of failed LLM API requests per `"user", "key", "model", "team", "end-user"` |
### Request Latency Metrics
| Metric Name | Description |
|----------------------|--------------------------------------|
| `litellm_request_total_latency_metric` | Total latency (seconds) for a request to LiteLLM Proxy Server - tracked for labels `litellm_call_id`, `model` |
| `litellm_llm_api_latency_metric` | latency (seconds) for just the LLM API call - tracked for labels `litellm_call_id`, `model` |
### LLM API / Provider Metrics
| Metric Name | Description |

View file

@ -292,6 +292,7 @@ const sidebars = {
items: [
"projects/Docq.AI",
"projects/OpenInterpreter",
"projects/dbally",
"projects/FastREPL",
"projects/PROMPTMETHEUS",
"projects/Codium PR Agent",