Merge branch 'BerriAI:main' into ollama-image-handling

This commit is contained in:
frob 2024-05-01 22:29:37 +02:00 committed by GitHub
commit 465f491e7f
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
52 changed files with 1148 additions and 426 deletions

View file

@ -11,40 +11,37 @@ You can find the Dockerfile to build litellm proxy [here](https://github.com/Ber
<TabItem value="basic" label="Basic">
**Step 1. Create a file called `litellm_config.yaml`**
### Step 1. CREATE config.yaml
Example `litellm_config.yaml` (the `os.environ/` prefix means litellm will read `AZURE_API_BASE` from the env)
```yaml
model_list:
Example `litellm_config.yaml`
```yaml
model_list:
- model_name: azure-gpt-3.5
litellm_params:
model: azure/<your-azure-model-deployment>
api_base: os.environ/AZURE_API_BASE
api_key: os.environ/AZURE_API_KEY
api_base: os.environ/AZURE_API_BASE # runs os.getenv("AZURE_API_BASE")
api_key: os.environ/AZURE_API_KEY # runs os.getenv("AZURE_API_KEY")
api_version: "2023-07-01-preview"
```
```
**Step 2. Run litellm docker image**
See the latest available ghcr docker image here:
https://github.com/berriai/litellm/pkgs/container/litellm
Your litellm config.yaml should be called `litellm_config.yaml` in the directory you run this command.
The `-v` command will mount that file
### Step 2. RUN Docker Image
Pass `AZURE_API_KEY` and `AZURE_API_BASE` since we set them in step 1
```shell
docker run \
```shell
docker run \
-v $(pwd)/litellm_config.yaml:/app/config.yaml \
-e AZURE_API_KEY=d6*********** \
-e AZURE_API_BASE=https://openai-***********/ \
-p 4000:4000 \
ghcr.io/berriai/litellm:main-latest \
--config /app/config.yaml --detailed_debug
```
```
**Step 3. Send a Test Request**
Get Latest Image 👉 [here](https://github.com/berriai/litellm/pkgs/container/litellm)
### Step 3. TEST Request
Pass `model=azure-gpt-3.5` this was set on step 1

View file

@ -278,6 +278,36 @@ router_settings:
routing_strategy_args: {"ttl": 10}
```
### Set Lowest Latency Buffer
Set a buffer within which deployments are candidates for making calls to.
E.g.
if you have 5 deployments
```
https://litellm-prod-1.openai.azure.com/: 0.07s
https://litellm-prod-2.openai.azure.com/: 0.1s
https://litellm-prod-3.openai.azure.com/: 0.1s
https://litellm-prod-4.openai.azure.com/: 0.1s
https://litellm-prod-5.openai.azure.com/: 4.66s
```
to prevent initially overloading `prod-1`, with all requests - we can set a buffer of 50%, to consider deployments `prod-2, prod-3, prod-4`.
**In Router**
```python
router = Router(..., routing_strategy_args={"lowest_latency_buffer": 0.5})
```
**In Proxy**
```yaml
router_settings:
routing_strategy_args: {"lowest_latency_buffer": 0.5}
```
</TabItem>
<TabItem value="simple-shuffle" label="(Default) Weighted Pick (Async)">

View file

@ -79,7 +79,7 @@ class LangFuseLogger:
print_verbose,
level="DEFAULT",
status_message=None,
):
) -> dict:
# Method definition
try:
@ -111,6 +111,7 @@ class LangFuseLogger:
pass
# end of processing langfuse ########################
print(f"response obj type: {type(response_obj)}")
if (
level == "ERROR"
and status_message is not None
@ -140,8 +141,11 @@ class LangFuseLogger:
input = prompt
output = response_obj["data"]
print_verbose(f"OUTPUT IN LANGFUSE: {output}; original: {response_obj}")
trace_id = None
generation_id = None
if self._is_langfuse_v2():
self._log_langfuse_v2(
print("INSIDE V2 LANGFUSE")
trace_id, generation_id = self._log_langfuse_v2(
user_id,
metadata,
litellm_params,
@ -171,10 +175,12 @@ class LangFuseLogger:
f"Langfuse Layer Logging - final response object: {response_obj}"
)
verbose_logger.info(f"Langfuse Layer Logging - logging success")
return {"trace_id": trace_id, "generation_id": generation_id}
except:
traceback.print_exc()
verbose_logger.debug(f"Langfuse Layer Error - {traceback.format_exc()}")
pass
return {"trace_id": None, "generation_id": None}
async def _async_log_event(
self, kwargs, response_obj, start_time, end_time, user_id, print_verbose
@ -246,7 +252,7 @@ class LangFuseLogger:
response_obj,
level,
print_verbose,
):
) -> tuple:
import langfuse
try:
@ -272,11 +278,14 @@ class LangFuseLogger:
## DO NOT SET TRACE_NAME if trace-id set. this can lead to overwriting of past traces.
trace_name = f"litellm-{kwargs.get('call_type', 'completion')}"
if existing_trace_id is not None:
trace_params = {"id": existing_trace_id}
else: # don't overwrite an existing trace
trace_params = {
"name": trace_name,
"input": input,
"user_id": metadata.get("trace_user_id", user_id),
"id": trace_id or existing_trace_id,
"id": trace_id,
"session_id": metadata.get("session_id", None),
}
@ -341,6 +350,7 @@ class LangFuseLogger:
kwargs["cache_hit"] = False
tags.append(f"cache_hit:{kwargs['cache_hit']}")
clean_metadata["cache_hit"] = kwargs["cache_hit"]
if existing_trace_id is None:
trace_params.update({"tags": tags})
proxy_server_request = litellm_params.get("proxy_server_request", None)
@ -363,6 +373,7 @@ class LangFuseLogger:
print_verbose(f"trace_params: {trace_params}")
print(f"trace_params: {trace_params}")
trace = self.Langfuse.trace(**trace_params)
generation_id = None
@ -414,6 +425,10 @@ class LangFuseLogger:
print_verbose(f"generation_params: {generation_params}")
trace.generation(**generation_params)
generation_client = trace.generation(**generation_params)
print(f"LANGFUSE TRACE ID - {generation_client.trace_id}")
return generation_client.trace_id, generation_id
except Exception as e:
verbose_logger.debug(f"Langfuse Layer Error - {traceback.format_exc()}")
return None, None

View file

@ -238,12 +238,13 @@ def get_ollama_response(
## RESPONSE OBJECT
model_response["choices"][0]["finish_reason"] = "stop"
if optional_params.get("format", "") == "json":
function_call = json.loads(response_json["response"])
message = litellm.Message(
content=None,
tool_calls=[
{
"id": f"call_{str(uuid.uuid4())}",
"function": {"arguments": response_json["response"], "name": ""},
"function": {"name": function_call["name"], "arguments": json.dumps(function_call["arguments"])},
"type": "function",
}
],
@ -335,15 +336,13 @@ async def ollama_acompletion(url, data, model_response, encoding, logging_obj):
## RESPONSE OBJECT
model_response["choices"][0]["finish_reason"] = "stop"
if data.get("format", "") == "json":
function_call = json.loads(response_json["response"])
message = litellm.Message(
content=None,
tool_calls=[
{
"id": f"call_{str(uuid.uuid4())}",
"function": {
"arguments": response_json["response"],
"name": "",
},
"function": {"name": function_call["name"], "arguments": json.dumps(function_call["arguments"])},
"type": "function",
}
],

View file

@ -285,15 +285,13 @@ def get_ollama_response(
## RESPONSE OBJECT
model_response["choices"][0]["finish_reason"] = "stop"
if data.get("format", "") == "json":
function_call = json.loads(response_json["message"]["content"])
message = litellm.Message(
content=None,
tool_calls=[
{
"id": f"call_{str(uuid.uuid4())}",
"function": {
"arguments": response_json["message"]["content"],
"name": "",
},
"function": {"name": function_call["name"], "arguments": json.dumps(function_call["arguments"])},
"type": "function",
}
],
@ -415,15 +413,13 @@ async def ollama_acompletion(
## RESPONSE OBJECT
model_response["choices"][0]["finish_reason"] = "stop"
if data.get("format", "") == "json":
function_call = json.loads(response_json["message"]["content"])
message = litellm.Message(
content=None,
tool_calls=[
{
"id": f"call_{str(uuid.uuid4())}",
"function": {
"arguments": response_json["message"]["content"],
"name": function_name or "",
},
"function": {"name": function_call["name"], "arguments": json.dumps(function_call["arguments"])},
"type": "function",
}
],

View file

@ -360,7 +360,7 @@ def mock_completion(
model: str,
messages: List,
stream: Optional[bool] = False,
mock_response: str = "This is a mock request",
mock_response: Union[str, Exception] = "This is a mock request",
logging=None,
**kwargs,
):
@ -387,6 +387,20 @@ def mock_completion(
- If 'stream' is True, it returns a response that mimics the behavior of a streaming completion.
"""
try:
## LOGGING
if logging is not None:
logging.pre_call(
input=messages,
api_key="mock-key",
)
if isinstance(mock_response, Exception):
raise litellm.APIError(
status_code=500, # type: ignore
message=str(mock_response),
llm_provider="openai", # type: ignore
model=model, # type: ignore
request=httpx.Request(method="POST", url="https://api.openai.com/v1/"),
)
model_response = ModelResponse(stream=stream)
if stream is True:
# don't try to access stream object,

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

View file

@ -1 +1 @@
!function(){"use strict";var e,t,n,r,o,u,i,c,f,a={},l={};function d(e){var t=l[e];if(void 0!==t)return t.exports;var n=l[e]={id:e,loaded:!1,exports:{}},r=!0;try{a[e](n,n.exports,d),r=!1}finally{r&&delete l[e]}return n.loaded=!0,n.exports}d.m=a,e=[],d.O=function(t,n,r,o){if(n){o=o||0;for(var u=e.length;u>0&&e[u-1][2]>o;u--)e[u]=e[u-1];e[u]=[n,r,o];return}for(var i=1/0,u=0;u<e.length;u++){for(var n=e[u][0],r=e[u][1],o=e[u][2],c=!0,f=0;f<n.length;f++)i>=o&&Object.keys(d.O).every(function(e){return d.O[e](n[f])})?n.splice(f--,1):(c=!1,o<i&&(i=o));if(c){e.splice(u--,1);var a=r();void 0!==a&&(t=a)}}return t},d.n=function(e){var t=e&&e.__esModule?function(){return e.default}:function(){return e};return d.d(t,{a:t}),t},n=Object.getPrototypeOf?function(e){return Object.getPrototypeOf(e)}:function(e){return e.__proto__},d.t=function(e,r){if(1&r&&(e=this(e)),8&r||"object"==typeof e&&e&&(4&r&&e.__esModule||16&r&&"function"==typeof e.then))return e;var o=Object.create(null);d.r(o);var u={};t=t||[null,n({}),n([]),n(n)];for(var i=2&r&&e;"object"==typeof i&&!~t.indexOf(i);i=n(i))Object.getOwnPropertyNames(i).forEach(function(t){u[t]=function(){return e[t]}});return u.default=function(){return e},d.d(o,u),o},d.d=function(e,t){for(var n in t)d.o(t,n)&&!d.o(e,n)&&Object.defineProperty(e,n,{enumerable:!0,get:t[n]})},d.f={},d.e=function(e){return Promise.all(Object.keys(d.f).reduce(function(t,n){return d.f[n](e,t),t},[]))},d.u=function(e){},d.miniCssF=function(e){return"static/css/5e699db73bf6f8c2.css"},d.g=function(){if("object"==typeof globalThis)return globalThis;try{return this||Function("return this")()}catch(e){if("object"==typeof window)return window}}(),d.o=function(e,t){return Object.prototype.hasOwnProperty.call(e,t)},r={},o="_N_E:",d.l=function(e,t,n,u){if(r[e]){r[e].push(t);return}if(void 0!==n)for(var i,c,f=document.getElementsByTagName("script"),a=0;a<f.length;a++){var l=f[a];if(l.getAttribute("src")==e||l.getAttribute("data-webpack")==o+n){i=l;break}}i||(c=!0,(i=document.createElement("script")).charset="utf-8",i.timeout=120,d.nc&&i.setAttribute("nonce",d.nc),i.setAttribute("data-webpack",o+n),i.src=d.tu(e)),r[e]=[t];var s=function(t,n){i.onerror=i.onload=null,clearTimeout(p);var o=r[e];if(delete r[e],i.parentNode&&i.parentNode.removeChild(i),o&&o.forEach(function(e){return e(n)}),t)return t(n)},p=setTimeout(s.bind(null,void 0,{type:"timeout",target:i}),12e4);i.onerror=s.bind(null,i.onerror),i.onload=s.bind(null,i.onload),c&&document.head.appendChild(i)},d.r=function(e){"undefined"!=typeof Symbol&&Symbol.toStringTag&&Object.defineProperty(e,Symbol.toStringTag,{value:"Module"}),Object.defineProperty(e,"__esModule",{value:!0})},d.nmd=function(e){return e.paths=[],e.children||(e.children=[]),e},d.tt=function(){return void 0===u&&(u={createScriptURL:function(e){return e}},"undefined"!=typeof trustedTypes&&trustedTypes.createPolicy&&(u=trustedTypes.createPolicy("nextjs#bundler",u))),u},d.tu=function(e){return d.tt().createScriptURL(e)},d.p="/ui/_next/",i={272:0},d.f.j=function(e,t){var n=d.o(i,e)?i[e]:void 0;if(0!==n){if(n)t.push(n[2]);else if(272!=e){var r=new Promise(function(t,r){n=i[e]=[t,r]});t.push(n[2]=r);var o=d.p+d.u(e),u=Error();d.l(o,function(t){if(d.o(i,e)&&(0!==(n=i[e])&&(i[e]=void 0),n)){var r=t&&("load"===t.type?"missing":t.type),o=t&&t.target&&t.target.src;u.message="Loading chunk "+e+" failed.\n("+r+": "+o+")",u.name="ChunkLoadError",u.type=r,u.request=o,n[1](u)}},"chunk-"+e,e)}else i[e]=0}},d.O.j=function(e){return 0===i[e]},c=function(e,t){var n,r,o=t[0],u=t[1],c=t[2],f=0;if(o.some(function(e){return 0!==i[e]})){for(n in u)d.o(u,n)&&(d.m[n]=u[n]);if(c)var a=c(d)}for(e&&e(t);f<o.length;f++)r=o[f],d.o(i,r)&&i[r]&&i[r][0](),i[r]=0;return d.O(a)},(f=self.webpackChunk_N_E=self.webpackChunk_N_E||[]).forEach(c.bind(null,0)),f.push=c.bind(null,f.push.bind(f))}();
!function(){"use strict";var e,t,n,r,o,u,i,c,f,a={},l={};function d(e){var t=l[e];if(void 0!==t)return t.exports;var n=l[e]={id:e,loaded:!1,exports:{}},r=!0;try{a[e](n,n.exports,d),r=!1}finally{r&&delete l[e]}return n.loaded=!0,n.exports}d.m=a,e=[],d.O=function(t,n,r,o){if(n){o=o||0;for(var u=e.length;u>0&&e[u-1][2]>o;u--)e[u]=e[u-1];e[u]=[n,r,o];return}for(var i=1/0,u=0;u<e.length;u++){for(var n=e[u][0],r=e[u][1],o=e[u][2],c=!0,f=0;f<n.length;f++)i>=o&&Object.keys(d.O).every(function(e){return d.O[e](n[f])})?n.splice(f--,1):(c=!1,o<i&&(i=o));if(c){e.splice(u--,1);var a=r();void 0!==a&&(t=a)}}return t},d.n=function(e){var t=e&&e.__esModule?function(){return e.default}:function(){return e};return d.d(t,{a:t}),t},n=Object.getPrototypeOf?function(e){return Object.getPrototypeOf(e)}:function(e){return e.__proto__},d.t=function(e,r){if(1&r&&(e=this(e)),8&r||"object"==typeof e&&e&&(4&r&&e.__esModule||16&r&&"function"==typeof e.then))return e;var o=Object.create(null);d.r(o);var u={};t=t||[null,n({}),n([]),n(n)];for(var i=2&r&&e;"object"==typeof i&&!~t.indexOf(i);i=n(i))Object.getOwnPropertyNames(i).forEach(function(t){u[t]=function(){return e[t]}});return u.default=function(){return e},d.d(o,u),o},d.d=function(e,t){for(var n in t)d.o(t,n)&&!d.o(e,n)&&Object.defineProperty(e,n,{enumerable:!0,get:t[n]})},d.f={},d.e=function(e){return Promise.all(Object.keys(d.f).reduce(function(t,n){return d.f[n](e,t),t},[]))},d.u=function(e){},d.miniCssF=function(e){return"static/css/4ccaa87c9648acfb.css"},d.g=function(){if("object"==typeof globalThis)return globalThis;try{return this||Function("return this")()}catch(e){if("object"==typeof window)return window}}(),d.o=function(e,t){return Object.prototype.hasOwnProperty.call(e,t)},r={},o="_N_E:",d.l=function(e,t,n,u){if(r[e]){r[e].push(t);return}if(void 0!==n)for(var i,c,f=document.getElementsByTagName("script"),a=0;a<f.length;a++){var l=f[a];if(l.getAttribute("src")==e||l.getAttribute("data-webpack")==o+n){i=l;break}}i||(c=!0,(i=document.createElement("script")).charset="utf-8",i.timeout=120,d.nc&&i.setAttribute("nonce",d.nc),i.setAttribute("data-webpack",o+n),i.src=d.tu(e)),r[e]=[t];var s=function(t,n){i.onerror=i.onload=null,clearTimeout(p);var o=r[e];if(delete r[e],i.parentNode&&i.parentNode.removeChild(i),o&&o.forEach(function(e){return e(n)}),t)return t(n)},p=setTimeout(s.bind(null,void 0,{type:"timeout",target:i}),12e4);i.onerror=s.bind(null,i.onerror),i.onload=s.bind(null,i.onload),c&&document.head.appendChild(i)},d.r=function(e){"undefined"!=typeof Symbol&&Symbol.toStringTag&&Object.defineProperty(e,Symbol.toStringTag,{value:"Module"}),Object.defineProperty(e,"__esModule",{value:!0})},d.nmd=function(e){return e.paths=[],e.children||(e.children=[]),e},d.tt=function(){return void 0===u&&(u={createScriptURL:function(e){return e}},"undefined"!=typeof trustedTypes&&trustedTypes.createPolicy&&(u=trustedTypes.createPolicy("nextjs#bundler",u))),u},d.tu=function(e){return d.tt().createScriptURL(e)},d.p="/ui/_next/",i={272:0},d.f.j=function(e,t){var n=d.o(i,e)?i[e]:void 0;if(0!==n){if(n)t.push(n[2]);else if(272!=e){var r=new Promise(function(t,r){n=i[e]=[t,r]});t.push(n[2]=r);var o=d.p+d.u(e),u=Error();d.l(o,function(t){if(d.o(i,e)&&(0!==(n=i[e])&&(i[e]=void 0),n)){var r=t&&("load"===t.type?"missing":t.type),o=t&&t.target&&t.target.src;u.message="Loading chunk "+e+" failed.\n("+r+": "+o+")",u.name="ChunkLoadError",u.type=r,u.request=o,n[1](u)}},"chunk-"+e,e)}else i[e]=0}},d.O.j=function(e){return 0===i[e]},c=function(e,t){var n,r,o=t[0],u=t[1],c=t[2],f=0;if(o.some(function(e){return 0!==i[e]})){for(n in u)d.o(u,n)&&(d.m[n]=u[n]);if(c)var a=c(d)}for(e&&e(t);f<o.length;f++)r=o[f],d.o(i,r)&&i[r]&&i[r][0](),i[r]=0;return d.O(a)},(f=self.webpackChunk_N_E=self.webpackChunk_N_E||[]).forEach(c.bind(null,0)),f.push=c.bind(null,f.push.bind(f))}();

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

View file

@ -1 +1 @@
<!DOCTYPE html><html id="__next_error__"><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width, initial-scale=1"/><link rel="preload" as="script" fetchPriority="low" href="/ui/_next/static/chunks/webpack-ccae12a25017afa5.js" crossorigin=""/><script src="/ui/_next/static/chunks/fd9d1056-dafd44dfa2da140c.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/69-e49705773ae41779.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/main-app-9b4fb13a7db53edf.js" async="" crossorigin=""></script><title>LiteLLM Dashboard</title><meta name="description" content="LiteLLM Proxy Admin UI"/><link rel="icon" href="/ui/favicon.ico" type="image/x-icon" sizes="16x16"/><meta name="next-size-adjust"/><script src="/ui/_next/static/chunks/polyfills-c67a75d1b6f99dc8.js" crossorigin="" noModule=""></script></head><body><script src="/ui/_next/static/chunks/webpack-ccae12a25017afa5.js" crossorigin="" async=""></script><script>(self.__next_f=self.__next_f||[]).push([0]);self.__next_f.push([2,null])</script><script>self.__next_f.push([1,"1:HL[\"/ui/_next/static/media/c9a5bc6a7c948fb0-s.p.woff2\",\"font\",{\"crossOrigin\":\"\",\"type\":\"font/woff2\"}]\n2:HL[\"/ui/_next/static/css/5e699db73bf6f8c2.css\",\"style\",{\"crossOrigin\":\"\"}]\n0:\"$L3\"\n"])</script><script>self.__next_f.push([1,"4:I[47690,[],\"\"]\n6:I[77831,[],\"\"]\n7:I[27125,[\"447\",\"static/chunks/447-9f8d32190ff7d16d.js\",\"931\",\"static/chunks/app/page-508c39694bd40fe9.js\"],\"\"]\n8:I[5613,[],\"\"]\n9:I[31778,[],\"\"]\nb:I[48955,[],\"\"]\nc:[]\n"])</script><script>self.__next_f.push([1,"3:[[[\"$\",\"link\",\"0\",{\"rel\":\"stylesheet\",\"href\":\"/ui/_next/static/css/5e699db73bf6f8c2.css\",\"precedence\":\"next\",\"crossOrigin\":\"\"}]],[\"$\",\"$L4\",null,{\"buildId\":\"kbGdRQFfI6W3bEwfzmJDI\",\"assetPrefix\":\"/ui\",\"initialCanonicalUrl\":\"/\",\"initialTree\":[\"\",{\"children\":[\"__PAGE__\",{}]},\"$undefined\",\"$undefined\",true],\"initialSeedData\":[\"\",{\"children\":[\"__PAGE__\",{},[\"$L5\",[\"$\",\"$L6\",null,{\"propsForComponent\":{\"params\":{}},\"Component\":\"$7\",\"isStaticGeneration\":true}],null]]},[null,[\"$\",\"html\",null,{\"lang\":\"en\",\"children\":[\"$\",\"body\",null,{\"className\":\"__className_c23dc8\",\"children\":[\"$\",\"$L8\",null,{\"parallelRouterKey\":\"children\",\"segmentPath\":[\"children\"],\"loading\":\"$undefined\",\"loadingStyles\":\"$undefined\",\"loadingScripts\":\"$undefined\",\"hasLoading\":false,\"error\":\"$undefined\",\"errorStyles\":\"$undefined\",\"errorScripts\":\"$undefined\",\"template\":[\"$\",\"$L9\",null,{}],\"templateStyles\":\"$undefined\",\"templateScripts\":\"$undefined\",\"notFound\":[[\"$\",\"title\",null,{\"children\":\"404: This page could not be found.\"}],[\"$\",\"div\",null,{\"style\":{\"fontFamily\":\"system-ui,\\\"Segoe UI\\\",Roboto,Helvetica,Arial,sans-serif,\\\"Apple Color Emoji\\\",\\\"Segoe UI Emoji\\\"\",\"height\":\"100vh\",\"textAlign\":\"center\",\"display\":\"flex\",\"flexDirection\":\"column\",\"alignItems\":\"center\",\"justifyContent\":\"center\"},\"children\":[\"$\",\"div\",null,{\"children\":[[\"$\",\"style\",null,{\"dangerouslySetInnerHTML\":{\"__html\":\"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}\"}}],[\"$\",\"h1\",null,{\"className\":\"next-error-h1\",\"style\":{\"display\":\"inline-block\",\"margin\":\"0 20px 0 0\",\"padding\":\"0 23px 0 0\",\"fontSize\":24,\"fontWeight\":500,\"verticalAlign\":\"top\",\"lineHeight\":\"49px\"},\"children\":\"404\"}],[\"$\",\"div\",null,{\"style\":{\"display\":\"inline-block\"},\"children\":[\"$\",\"h2\",null,{\"style\":{\"fontSize\":14,\"fontWeight\":400,\"lineHeight\":\"49px\",\"margin\":0},\"children\":\"This page could not be found.\"}]}]]}]}]],\"notFoundStyles\":[],\"styles\":null}]}]}],null]],\"initialHead\":[false,\"$La\"],\"globalErrorComponent\":\"$b\",\"missingSlots\":\"$Wc\"}]]\n"])</script><script>self.__next_f.push([1,"a:[[\"$\",\"meta\",\"0\",{\"name\":\"viewport\",\"content\":\"width=device-width, initial-scale=1\"}],[\"$\",\"meta\",\"1\",{\"charSet\":\"utf-8\"}],[\"$\",\"title\",\"2\",{\"children\":\"LiteLLM Dashboard\"}],[\"$\",\"meta\",\"3\",{\"name\":\"description\",\"content\":\"LiteLLM Proxy Admin UI\"}],[\"$\",\"link\",\"4\",{\"rel\":\"icon\",\"href\":\"/ui/favicon.ico\",\"type\":\"image/x-icon\",\"sizes\":\"16x16\"}],[\"$\",\"meta\",\"5\",{\"name\":\"next-size-adjust\"}]]\n5:null\n"])</script><script>self.__next_f.push([1,""])</script></body></html>
<!DOCTYPE html><html id="__next_error__"><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width, initial-scale=1"/><link rel="preload" as="script" fetchPriority="low" href="/ui/_next/static/chunks/webpack-4acf5608f06a35df.js" crossorigin=""/><script src="/ui/_next/static/chunks/fd9d1056-dafd44dfa2da140c.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/69-e49705773ae41779.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/main-app-9b4fb13a7db53edf.js" async="" crossorigin=""></script><title>LiteLLM Dashboard</title><meta name="description" content="LiteLLM Proxy Admin UI"/><link rel="icon" href="/ui/favicon.ico" type="image/x-icon" sizes="16x16"/><meta name="next-size-adjust"/><script src="/ui/_next/static/chunks/polyfills-c67a75d1b6f99dc8.js" crossorigin="" noModule=""></script></head><body><script src="/ui/_next/static/chunks/webpack-4acf5608f06a35df.js" crossorigin="" async=""></script><script>(self.__next_f=self.__next_f||[]).push([0]);self.__next_f.push([2,null])</script><script>self.__next_f.push([1,"1:HL[\"/ui/_next/static/media/c9a5bc6a7c948fb0-s.p.woff2\",\"font\",{\"crossOrigin\":\"\",\"type\":\"font/woff2\"}]\n2:HL[\"/ui/_next/static/css/4ccaa87c9648acfb.css\",\"style\",{\"crossOrigin\":\"\"}]\n0:\"$L3\"\n"])</script><script>self.__next_f.push([1,"4:I[47690,[],\"\"]\n6:I[77831,[],\"\"]\n7:I[46414,[\"761\",\"static/chunks/761-05f8a8451296476c.js\",\"931\",\"static/chunks/app/page-e710f07514d9286b.js\"],\"\"]\n8:I[5613,[],\"\"]\n9:I[31778,[],\"\"]\nb:I[48955,[],\"\"]\nc:[]\n"])</script><script>self.__next_f.push([1,"3:[[[\"$\",\"link\",\"0\",{\"rel\":\"stylesheet\",\"href\":\"/ui/_next/static/css/4ccaa87c9648acfb.css\",\"precedence\":\"next\",\"crossOrigin\":\"\"}]],[\"$\",\"$L4\",null,{\"buildId\":\"7aR2yOE4Bz0za1EnxRCsv\",\"assetPrefix\":\"/ui\",\"initialCanonicalUrl\":\"/\",\"initialTree\":[\"\",{\"children\":[\"__PAGE__\",{}]},\"$undefined\",\"$undefined\",true],\"initialSeedData\":[\"\",{\"children\":[\"__PAGE__\",{},[\"$L5\",[\"$\",\"$L6\",null,{\"propsForComponent\":{\"params\":{}},\"Component\":\"$7\",\"isStaticGeneration\":true}],null]]},[null,[\"$\",\"html\",null,{\"lang\":\"en\",\"children\":[\"$\",\"body\",null,{\"className\":\"__className_c23dc8\",\"children\":[\"$\",\"$L8\",null,{\"parallelRouterKey\":\"children\",\"segmentPath\":[\"children\"],\"loading\":\"$undefined\",\"loadingStyles\":\"$undefined\",\"loadingScripts\":\"$undefined\",\"hasLoading\":false,\"error\":\"$undefined\",\"errorStyles\":\"$undefined\",\"errorScripts\":\"$undefined\",\"template\":[\"$\",\"$L9\",null,{}],\"templateStyles\":\"$undefined\",\"templateScripts\":\"$undefined\",\"notFound\":[[\"$\",\"title\",null,{\"children\":\"404: This page could not be found.\"}],[\"$\",\"div\",null,{\"style\":{\"fontFamily\":\"system-ui,\\\"Segoe UI\\\",Roboto,Helvetica,Arial,sans-serif,\\\"Apple Color Emoji\\\",\\\"Segoe UI Emoji\\\"\",\"height\":\"100vh\",\"textAlign\":\"center\",\"display\":\"flex\",\"flexDirection\":\"column\",\"alignItems\":\"center\",\"justifyContent\":\"center\"},\"children\":[\"$\",\"div\",null,{\"children\":[[\"$\",\"style\",null,{\"dangerouslySetInnerHTML\":{\"__html\":\"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}\"}}],[\"$\",\"h1\",null,{\"className\":\"next-error-h1\",\"style\":{\"display\":\"inline-block\",\"margin\":\"0 20px 0 0\",\"padding\":\"0 23px 0 0\",\"fontSize\":24,\"fontWeight\":500,\"verticalAlign\":\"top\",\"lineHeight\":\"49px\"},\"children\":\"404\"}],[\"$\",\"div\",null,{\"style\":{\"display\":\"inline-block\"},\"children\":[\"$\",\"h2\",null,{\"style\":{\"fontSize\":14,\"fontWeight\":400,\"lineHeight\":\"49px\",\"margin\":0},\"children\":\"This page could not be found.\"}]}]]}]}]],\"notFoundStyles\":[],\"styles\":null}]}]}],null]],\"initialHead\":[false,\"$La\"],\"globalErrorComponent\":\"$b\",\"missingSlots\":\"$Wc\"}]]\n"])</script><script>self.__next_f.push([1,"a:[[\"$\",\"meta\",\"0\",{\"name\":\"viewport\",\"content\":\"width=device-width, initial-scale=1\"}],[\"$\",\"meta\",\"1\",{\"charSet\":\"utf-8\"}],[\"$\",\"title\",\"2\",{\"children\":\"LiteLLM Dashboard\"}],[\"$\",\"meta\",\"3\",{\"name\":\"description\",\"content\":\"LiteLLM Proxy Admin UI\"}],[\"$\",\"link\",\"4\",{\"rel\":\"icon\",\"href\":\"/ui/favicon.ico\",\"type\":\"image/x-icon\",\"sizes\":\"16x16\"}],[\"$\",\"meta\",\"5\",{\"name\":\"next-size-adjust\"}]]\n5:null\n"])</script><script>self.__next_f.push([1,""])</script></body></html>

View file

@ -1,7 +1,7 @@
2:I[77831,[],""]
3:I[27125,["447","static/chunks/447-9f8d32190ff7d16d.js","931","static/chunks/app/page-508c39694bd40fe9.js"],""]
3:I[46414,["761","static/chunks/761-05f8a8451296476c.js","931","static/chunks/app/page-e710f07514d9286b.js"],""]
4:I[5613,[],""]
5:I[31778,[],""]
0:["kbGdRQFfI6W3bEwfzmJDI",[[["",{"children":["__PAGE__",{}]},"$undefined","$undefined",true],["",{"children":["__PAGE__",{},["$L1",["$","$L2",null,{"propsForComponent":{"params":{}},"Component":"$3","isStaticGeneration":true}],null]]},[null,["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_c23dc8","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[],"styles":null}]}]}],null]],[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/5e699db73bf6f8c2.css","precedence":"next","crossOrigin":""}]],"$L6"]]]]
0:["7aR2yOE4Bz0za1EnxRCsv",[[["",{"children":["__PAGE__",{}]},"$undefined","$undefined",true],["",{"children":["__PAGE__",{},["$L1",["$","$L2",null,{"propsForComponent":{"params":{}},"Component":"$3","isStaticGeneration":true}],null]]},[null,["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_c23dc8","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[],"styles":null}]}]}],null]],[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/4ccaa87c9648acfb.css","precedence":"next","crossOrigin":""}]],"$L6"]]]]
6:[["$","meta","0",{"name":"viewport","content":"width=device-width, initial-scale=1"}],["$","meta","1",{"charSet":"utf-8"}],["$","title","2",{"children":"LiteLLM Dashboard"}],["$","meta","3",{"name":"description","content":"LiteLLM Proxy Admin UI"}],["$","link","4",{"rel":"icon","href":"/ui/favicon.ico","type":"image/x-icon","sizes":"16x16"}],["$","meta","5",{"name":"next-size-adjust"}]]
1:null

View file

@ -916,6 +916,7 @@ class LiteLLM_ErrorLogs(LiteLLMBase):
request_id: Optional[str] = str(uuid.uuid4())
api_base: Optional[str] = ""
model_group: Optional[str] = ""
litellm_model_name: Optional[str] = ""
model_id: Optional[str] = ""
request_kwargs: Optional[dict] = {}
exception_type: Optional[str] = ""

View file

@ -1258,6 +1258,7 @@ async def _PROXY_failure_handler(
request_id=str(uuid.uuid4()),
model_group=_model_group,
model_id=_model_id,
litellm_model_name=kwargs.get("model"),
request_kwargs=_optional_params,
api_base=api_base,
exception_type=_exception_type,
@ -7523,9 +7524,9 @@ async def model_info_v2(
)
async def model_metrics(
user_api_key_dict: UserAPIKeyAuth = Depends(user_api_key_auth),
_selected_model_group: Optional[str] = None,
startTime: Optional[datetime] = datetime.now() - timedelta(days=30),
endTime: Optional[datetime] = datetime.now(),
_selected_model_group: Optional[str] = "gpt-4-32k",
startTime: Optional[datetime] = None,
endTime: Optional[datetime] = None,
):
global prisma_client, llm_router
if prisma_client is None:
@ -7535,65 +7536,153 @@ async def model_metrics(
param="None",
code=status.HTTP_500_INTERNAL_SERVER_ERROR,
)
if _selected_model_group and llm_router is not None:
_model_list = llm_router.get_model_list()
_relevant_api_bases = []
for model in _model_list:
if model["model_name"] == _selected_model_group:
_litellm_params = model["litellm_params"]
_api_base = _litellm_params.get("api_base", "")
_relevant_api_bases.append(_api_base)
_relevant_api_bases.append(_api_base + "/openai/")
startTime = startTime or datetime.now() - timedelta(days=30)
endTime = endTime or datetime.now()
sql_query = """
SELECT
CASE WHEN api_base = '' THEN model ELSE CONCAT(model, '-', api_base) END AS combined_model_api_base,
COUNT(*) AS num_requests,
AVG(EXTRACT(epoch FROM ("endTime" - "startTime"))) AS avg_latency_seconds
FROM "LiteLLM_SpendLogs"
WHERE "startTime" >= $1::timestamp AND "endTime" <= $2::timestamp
AND api_base = ANY($3)
GROUP BY CASE WHEN api_base = '' THEN model ELSE CONCAT(model, '-', api_base) END
ORDER BY num_requests DESC
LIMIT 50;
"""
db_response = await prisma_client.db.query_raw(
sql_query, startTime, endTime, _relevant_api_bases
)
else:
sql_query = """
SELECT
CASE WHEN api_base = '' THEN model ELSE CONCAT(model, '-', api_base) END AS combined_model_api_base,
COUNT(*) AS num_requests,
AVG(EXTRACT(epoch FROM ("endTime" - "startTime"))) AS avg_latency_seconds
api_base,
model,
DATE_TRUNC('day', "startTime")::DATE AS day,
AVG(EXTRACT(epoch FROM ("endTime" - "startTime"))) / SUM(total_tokens) AS avg_latency_per_token
FROM
"LiteLLM_SpendLogs"
WHERE "startTime" >= $1::timestamp AND "endTime" <= $2::timestamp
WHERE
"startTime" >= NOW() - INTERVAL '30 days'
AND "model" = $1
GROUP BY
CASE WHEN api_base = '' THEN model ELSE CONCAT(model, '-', api_base) END
api_base,
model,
day
HAVING
SUM(total_tokens) > 0
ORDER BY
num_requests DESC
LIMIT 50;
avg_latency_per_token DESC;
"""
_all_api_bases = set()
db_response = await prisma_client.db.query_raw(
sql_query, _selected_model_group, startTime, endTime
)
_daily_entries: dict = {} # {"Jun 23": {"model1": 0.002, "model2": 0.003}}
if db_response is not None:
for model_data in db_response:
_api_base = model_data["api_base"]
_model = model_data["model"]
_day = model_data["day"]
_avg_latency_per_token = model_data["avg_latency_per_token"]
if _day not in _daily_entries:
_daily_entries[_day] = {}
_combined_model_name = str(_model)
if "https://" in _api_base:
_combined_model_name = str(_api_base)
if "/openai/" in _combined_model_name:
_combined_model_name = _combined_model_name.split("/openai/")[0]
_all_api_bases.add(_combined_model_name)
_daily_entries[_day][_combined_model_name] = _avg_latency_per_token
"""
each entry needs to be like this:
{
date: 'Jun 23',
'gpt-4-https://api.openai.com/v1/': 0.002,
'gpt-43-https://api.openai.com-12/v1/': 0.002,
}
"""
# convert daily entries to list of dicts
response: List[dict] = []
# sort daily entries by date
_daily_entries = dict(sorted(_daily_entries.items(), key=lambda item: item[0]))
for day in _daily_entries:
entry = {"date": str(day)}
for model_key, latency in _daily_entries[day].items():
entry[model_key] = round(latency, 8)
response.append(entry)
return {
"data": response,
"all_api_bases": list(_all_api_bases),
}
@router.get(
"/model/metrics/exceptions",
description="View number of failed requests per model on config.yaml",
tags=["model management"],
include_in_schema=False,
dependencies=[Depends(user_api_key_auth)],
)
async def model_metrics_exceptions(
user_api_key_dict: UserAPIKeyAuth = Depends(user_api_key_auth),
_selected_model_group: Optional[str] = None,
startTime: Optional[datetime] = None,
endTime: Optional[datetime] = None,
):
global prisma_client, llm_router
if prisma_client is None:
raise ProxyException(
message="Prisma Client is not initialized",
type="internal_error",
param="None",
code=status.HTTP_500_INTERNAL_SERVER_ERROR,
)
startTime = startTime or datetime.now() - timedelta(days=30)
endTime = endTime or datetime.now()
"""
"""
sql_query = """
WITH cte AS (
SELECT
CASE WHEN api_base = '' THEN litellm_model_name ELSE CONCAT(litellm_model_name, '-', api_base) END AS combined_model_api_base,
exception_type,
COUNT(*) AS num_exceptions
FROM "LiteLLM_ErrorLogs"
WHERE "startTime" >= $1::timestamp AND "endTime" <= $2::timestamp
GROUP BY combined_model_api_base, exception_type
)
SELECT
combined_model_api_base,
COUNT(*) AS total_exceptions,
json_object_agg(exception_type, num_exceptions) AS exception_counts
FROM cte
GROUP BY combined_model_api_base
ORDER BY total_exceptions DESC
LIMIT 200;
"""
db_response = await prisma_client.db.query_raw(sql_query, startTime, endTime)
response: List[dict] = []
if response is not None:
exception_types = set()
"""
Return Data
{
"combined_model_api_base": "gpt-3.5-turbo-https://api.openai.com/v1/,
"total_exceptions": 5,
"BadRequestException": 5,
"TimeoutException": 2
}
"""
if db_response is not None:
# loop through all models
for model_data in db_response:
model = model_data.get("combined_model_api_base", "")
num_requests = model_data.get("num_requests", 0)
avg_latency_seconds = model_data.get("avg_latency_seconds", 0)
response.append(
{
total_exceptions = model_data.get("total_exceptions", 0)
exception_counts = model_data.get("exception_counts", {})
curr_row = {
"model": model,
"num_requests": num_requests,
"avg_latency_seconds": avg_latency_seconds,
"total_exceptions": total_exceptions,
}
)
return response
curr_row.update(exception_counts)
response.append(curr_row)
for k, v in exception_counts.items():
exception_types.add(k)
return {"data": response, "exception_types": list(exception_types)}
@router.get(

View file

@ -183,6 +183,21 @@ model LiteLLM_SpendLogs {
end_user String?
}
// View spend, model, api_key per request
model LiteLLM_ErrorLogs {
request_id String @id @default(uuid())
startTime DateTime // Assuming start_time is a DateTime field
endTime DateTime // Assuming end_time is a DateTime field
api_base String @default("")
model_group String @default("") // public model_name / model_group
litellm_model_name String @default("") // model passed to litellm
model_id String @default("") // ID of model in ProxyModelTable
request_kwargs Json @default("{}")
exception_type String @default("")
exception_string String @default("")
status_code String @default("")
}
// Beta - allow team members to request access to a model
model LiteLLM_UserNotifications {
request_id String @id

View file

@ -2049,6 +2049,11 @@ async def update_spend(
raise e
### UPDATE KEY TABLE ###
verbose_proxy_logger.debug(
"KEY Spend transactions: {}".format(
len(prisma_client.key_list_transactons.keys())
)
)
if len(prisma_client.key_list_transactons.keys()) > 0:
for i in range(n_retry_times + 1):
start_time = time.time()

View file

@ -290,6 +290,21 @@ class Router:
}
"""
### ROUTING SETUP ###
self.routing_strategy_init(
routing_strategy=routing_strategy,
routing_strategy_args=routing_strategy_args,
)
## COOLDOWNS ##
if isinstance(litellm.failure_callback, list):
litellm.failure_callback.append(self.deployment_callback_on_failure)
else:
litellm.failure_callback = [self.deployment_callback_on_failure]
print( # noqa
f"Intialized router with Routing strategy: {self.routing_strategy}\n\nRouting fallbacks: {self.fallbacks}\n\nRouting context window fallbacks: {self.context_window_fallbacks}\n\nRouter Redis Caching={self.cache.redis_cache}"
) # noqa
self.routing_strategy_args = routing_strategy_args
def routing_strategy_init(self, routing_strategy: str, routing_strategy_args: dict):
if routing_strategy == "least-busy":
self.leastbusy_logger = LeastBusyLoggingHandler(
router_cache=self.cache, model_list=self.model_list
@ -321,15 +336,6 @@ class Router:
)
if isinstance(litellm.callbacks, list):
litellm.callbacks.append(self.lowestlatency_logger) # type: ignore
## COOLDOWNS ##
if isinstance(litellm.failure_callback, list):
litellm.failure_callback.append(self.deployment_callback_on_failure)
else:
litellm.failure_callback = [self.deployment_callback_on_failure]
print( # noqa
f"Intialized router with Routing strategy: {self.routing_strategy}\n\nRouting fallbacks: {self.fallbacks}\n\nRouting context window fallbacks: {self.context_window_fallbacks}\n\nRouter Redis Caching={self.cache.redis_cache}"
) # noqa
self.routing_strategy_args = routing_strategy_args
def print_deployment(self, deployment: dict):
"""
@ -1450,40 +1456,47 @@ class Router:
raise original_exception
### RETRY
#### check if it should retry + back-off if required
if "No models available" in str(
e
) or RouterErrors.no_deployments_available.value in str(e):
timeout = litellm._calculate_retry_after(
remaining_retries=num_retries,
max_retries=num_retries,
min_timeout=self.retry_after,
)
await asyncio.sleep(timeout)
elif RouterErrors.user_defined_ratelimit_error.value in str(e):
raise e # don't wait to retry if deployment hits user-defined rate-limit
# if "No models available" in str(
# e
# ) or RouterErrors.no_deployments_available.value in str(e):
# timeout = litellm._calculate_retry_after(
# remaining_retries=num_retries,
# max_retries=num_retries,
# min_timeout=self.retry_after,
# )
# await asyncio.sleep(timeout)
# elif RouterErrors.user_defined_ratelimit_error.value in str(e):
# raise e # don't wait to retry if deployment hits user-defined rate-limit
elif hasattr(original_exception, "status_code") and litellm._should_retry(
status_code=original_exception.status_code
):
if hasattr(original_exception, "response") and hasattr(
original_exception.response, "headers"
):
timeout = litellm._calculate_retry_after(
remaining_retries=num_retries,
max_retries=num_retries,
response_headers=original_exception.response.headers,
min_timeout=self.retry_after,
)
else:
timeout = litellm._calculate_retry_after(
remaining_retries=num_retries,
max_retries=num_retries,
min_timeout=self.retry_after,
)
await asyncio.sleep(timeout)
else:
raise original_exception
# elif hasattr(original_exception, "status_code") and litellm._should_retry(
# status_code=original_exception.status_code
# ):
# if hasattr(original_exception, "response") and hasattr(
# original_exception.response, "headers"
# ):
# timeout = litellm._calculate_retry_after(
# remaining_retries=num_retries,
# max_retries=num_retries,
# response_headers=original_exception.response.headers,
# min_timeout=self.retry_after,
# )
# else:
# timeout = litellm._calculate_retry_after(
# remaining_retries=num_retries,
# max_retries=num_retries,
# min_timeout=self.retry_after,
# )
# await asyncio.sleep(timeout)
# else:
# raise original_exception
### RETRY
_timeout = self._router_should_retry(
e=original_exception,
remaining_retries=num_retries,
num_retries=num_retries,
)
await asyncio.sleep(_timeout)
## LOGGING
if num_retries > 0:
kwargs = self.log_retry(kwargs=kwargs, e=original_exception)
@ -1505,34 +1518,12 @@ class Router:
## LOGGING
kwargs = self.log_retry(kwargs=kwargs, e=e)
remaining_retries = num_retries - current_attempt
if "No models available" in str(e):
timeout = litellm._calculate_retry_after(
_timeout = self._router_should_retry(
e=original_exception,
remaining_retries=remaining_retries,
max_retries=num_retries,
min_timeout=self.retry_after,
num_retries=num_retries,
)
await asyncio.sleep(timeout)
elif (
hasattr(e, "status_code")
and hasattr(e, "response")
and litellm._should_retry(status_code=e.status_code)
):
if hasattr(e.response, "headers"):
timeout = litellm._calculate_retry_after(
remaining_retries=remaining_retries,
max_retries=num_retries,
response_headers=e.response.headers,
min_timeout=self.retry_after,
)
else:
timeout = litellm._calculate_retry_after(
remaining_retries=remaining_retries,
max_retries=num_retries,
min_timeout=self.retry_after,
)
await asyncio.sleep(timeout)
else:
raise e
await asyncio.sleep(_timeout)
raise original_exception
def function_with_fallbacks(self, *args, **kwargs):
@ -1625,7 +1616,7 @@ class Router:
def _router_should_retry(
self, e: Exception, remaining_retries: int, num_retries: int
):
) -> Union[int, float]:
"""
Calculate back-off, then retry
"""
@ -1636,14 +1627,13 @@ class Router:
response_headers=e.response.headers,
min_timeout=self.retry_after,
)
time.sleep(timeout)
else:
timeout = litellm._calculate_retry_after(
remaining_retries=remaining_retries,
max_retries=num_retries,
min_timeout=self.retry_after,
)
time.sleep(timeout)
return timeout
def function_with_retries(self, *args, **kwargs):
"""
@ -1658,6 +1648,7 @@ class Router:
context_window_fallbacks = kwargs.pop(
"context_window_fallbacks", self.context_window_fallbacks
)
try:
# if the function call is successful, no exception will be raised and we'll break out of the loop
response = original_function(*args, **kwargs)
@ -1677,11 +1668,12 @@ class Router:
if num_retries > 0:
kwargs = self.log_retry(kwargs=kwargs, e=original_exception)
### RETRY
self._router_should_retry(
_timeout = self._router_should_retry(
e=original_exception,
remaining_retries=num_retries,
num_retries=num_retries,
)
time.sleep(_timeout)
for current_attempt in range(num_retries):
verbose_router_logger.debug(
f"retrying request. Current attempt - {current_attempt}; retries left: {num_retries}"
@ -1695,11 +1687,12 @@ class Router:
## LOGGING
kwargs = self.log_retry(kwargs=kwargs, e=e)
remaining_retries = num_retries - current_attempt
self._router_should_retry(
_timeout = self._router_should_retry(
e=e,
remaining_retries=remaining_retries,
num_retries=num_retries,
)
time.sleep(_timeout)
raise original_exception
### HELPER FUNCTIONS
@ -1733,10 +1726,11 @@ class Router:
) # i.e. azure
metadata = kwargs.get("litellm_params", {}).get("metadata", None)
_model_info = kwargs.get("litellm_params", {}).get("model_info", {})
if isinstance(_model_info, dict):
deployment_id = _model_info.get("id", None)
self._set_cooldown_deployments(
deployment_id
exception_status=exception_status, deployment=deployment_id
) # setting deployment_id in cooldown deployments
if custom_llm_provider:
model_name = f"{custom_llm_provider}/{model_name}"
@ -1796,9 +1790,15 @@ class Router:
key=rpm_key, value=request_count, local_only=True
) # don't change existing ttl
def _set_cooldown_deployments(self, deployment: Optional[str] = None):
def _set_cooldown_deployments(
self, exception_status: Union[str, int], deployment: Optional[str] = None
):
"""
Add a model to the list of models being cooled down for that minute, if it exceeds the allowed fails / minute
or
the exception is not one that should be immediately retried (e.g. 401)
"""
if deployment is None:
return
@ -1815,7 +1815,20 @@ class Router:
f"Attempting to add {deployment} to cooldown list. updated_fails: {updated_fails}; self.allowed_fails: {self.allowed_fails}"
)
cooldown_time = self.cooldown_time or 1
if updated_fails > self.allowed_fails:
if isinstance(exception_status, str):
try:
exception_status = int(exception_status)
except Exception as e:
verbose_router_logger.debug(
"Unable to cast exception status to int {}. Defaulting to status=500.".format(
exception_status
)
)
exception_status = 500
_should_retry = litellm._should_retry(status_code=exception_status)
if updated_fails > self.allowed_fails or _should_retry == False:
# get the current cooldown list for that minute
cooldown_key = f"{current_minute}:cooldown_models" # group cooldown models by minute to reduce number of redis calls
cached_value = self.cache.get_cache(key=cooldown_key)
@ -2652,6 +2665,13 @@ class Router:
_casted_value = int(kwargs[var])
setattr(self, var, _casted_value)
else:
if var == "routing_strategy":
self.routing_strategy_init(
routing_strategy=kwargs[var],
routing_strategy_args=kwargs.get(
"routing_strategy_args", {}
),
)
setattr(self, var, kwargs[var])
else:
verbose_router_logger.debug("Setting {} is not allowed".format(var))

View file

@ -19,6 +19,7 @@ def setup_and_teardown():
0, os.path.abspath("../..")
) # Adds the project directory to the system path
import litellm
from litellm import Router
importlib.reload(litellm)
import asyncio

View file

@ -348,6 +348,220 @@ def test_langfuse_logging_function_calling():
# test_langfuse_logging_function_calling()
def test_langfuse_existing_trace_id():
"""
When existing trace id is passed, don't set trace params -> prevents overwriting the trace
Pass 1 logging object with a trace
Pass 2nd logging object with the trace id
Assert no changes to the trace
"""
# Test - if the logs were sent to the correct team on langfuse
import litellm, datetime
from litellm.integrations.langfuse import LangFuseLogger
langfuse_Logger = LangFuseLogger(
langfuse_public_key=os.getenv("LANGFUSE_PROJECT2_PUBLIC"),
langfuse_secret=os.getenv("LANGFUSE_PROJECT2_SECRET"),
)
litellm.success_callback = ["langfuse"]
# langfuse_args = {'kwargs': { 'start_time': 'end_time': datetime.datetime(2024, 5, 1, 7, 31, 29, 903685), 'user_id': None, 'print_verbose': <function print_verbose at 0x109d1f420>, 'level': 'DEFAULT', 'status_message': None}
response_obj = litellm.ModelResponse(
id="chatcmpl-9K5HUAbVRqFrMZKXL0WoC295xhguY",
choices=[
litellm.Choices(
finish_reason="stop",
index=0,
message=litellm.Message(
content="I'm sorry, I am an AI assistant and do not have real-time information. I recommend checking a reliable weather website or app for the most up-to-date weather information in Boston.",
role="assistant",
),
)
],
created=1714573888,
model="gpt-3.5-turbo-0125",
object="chat.completion",
system_fingerprint="fp_3b956da36b",
usage=litellm.Usage(completion_tokens=37, prompt_tokens=14, total_tokens=51),
)
### NEW TRACE ###
message = [{"role": "user", "content": "what's the weather in boston"}]
langfuse_args = {
"response_obj": response_obj,
"kwargs": {
"model": "gpt-3.5-turbo",
"litellm_params": {
"acompletion": False,
"api_key": None,
"force_timeout": 600,
"logger_fn": None,
"verbose": False,
"custom_llm_provider": "openai",
"api_base": "https://api.openai.com/v1/",
"litellm_call_id": "508113a1-c6f1-48ce-a3e1-01c6cce9330e",
"model_alias_map": {},
"completion_call_id": None,
"metadata": None,
"model_info": None,
"proxy_server_request": None,
"preset_cache_key": None,
"no-log": False,
"stream_response": {},
},
"messages": message,
"optional_params": {"temperature": 0.1, "extra_body": {}},
"start_time": "2024-05-01 07:31:27.986164",
"stream": False,
"user": None,
"call_type": "completion",
"litellm_call_id": "508113a1-c6f1-48ce-a3e1-01c6cce9330e",
"completion_start_time": "2024-05-01 07:31:29.903685",
"temperature": 0.1,
"extra_body": {},
"input": [{"role": "user", "content": "what's the weather in boston"}],
"api_key": "my-api-key",
"additional_args": {
"complete_input_dict": {
"model": "gpt-3.5-turbo",
"messages": [
{"role": "user", "content": "what's the weather in boston"}
],
"temperature": 0.1,
"extra_body": {},
}
},
"log_event_type": "successful_api_call",
"end_time": "2024-05-01 07:31:29.903685",
"cache_hit": None,
"response_cost": 6.25e-05,
},
"start_time": datetime.datetime(2024, 5, 1, 7, 31, 27, 986164),
"end_time": datetime.datetime(2024, 5, 1, 7, 31, 29, 903685),
"user_id": None,
"print_verbose": litellm.print_verbose,
"level": "DEFAULT",
"status_message": None,
}
langfuse_response_object = langfuse_Logger.log_event(**langfuse_args)
import langfuse
langfuse_client = langfuse.Langfuse(
public_key=os.getenv("LANGFUSE_PROJECT2_PUBLIC"),
secret_key=os.getenv("LANGFUSE_PROJECT2_SECRET"),
)
trace_id = langfuse_response_object["trace_id"]
langfuse_client.flush()
time.sleep(2)
print(langfuse_client.get_trace(id=trace_id))
initial_langfuse_trace = langfuse_client.get_trace(id=trace_id)
### EXISTING TRACE ###
new_metadata = {"existing_trace_id": trace_id}
new_messages = [{"role": "user", "content": "What do you know?"}]
new_response_obj = litellm.ModelResponse(
id="chatcmpl-9K5HUAbVRqFrMZKXL0WoC295xhguY",
choices=[
litellm.Choices(
finish_reason="stop",
index=0,
message=litellm.Message(
content="What do I know?",
role="assistant",
),
)
],
created=1714573888,
model="gpt-3.5-turbo-0125",
object="chat.completion",
system_fingerprint="fp_3b956da36b",
usage=litellm.Usage(completion_tokens=37, prompt_tokens=14, total_tokens=51),
)
langfuse_args = {
"response_obj": new_response_obj,
"kwargs": {
"model": "gpt-3.5-turbo",
"litellm_params": {
"acompletion": False,
"api_key": None,
"force_timeout": 600,
"logger_fn": None,
"verbose": False,
"custom_llm_provider": "openai",
"api_base": "https://api.openai.com/v1/",
"litellm_call_id": "508113a1-c6f1-48ce-a3e1-01c6cce9330e",
"model_alias_map": {},
"completion_call_id": None,
"metadata": new_metadata,
"model_info": None,
"proxy_server_request": None,
"preset_cache_key": None,
"no-log": False,
"stream_response": {},
},
"messages": new_messages,
"optional_params": {"temperature": 0.1, "extra_body": {}},
"start_time": "2024-05-01 07:31:27.986164",
"stream": False,
"user": None,
"call_type": "completion",
"litellm_call_id": "508113a1-c6f1-48ce-a3e1-01c6cce9330e",
"completion_start_time": "2024-05-01 07:31:29.903685",
"temperature": 0.1,
"extra_body": {},
"input": [{"role": "user", "content": "what's the weather in boston"}],
"api_key": "my-api-key",
"additional_args": {
"complete_input_dict": {
"model": "gpt-3.5-turbo",
"messages": [
{"role": "user", "content": "what's the weather in boston"}
],
"temperature": 0.1,
"extra_body": {},
}
},
"log_event_type": "successful_api_call",
"end_time": "2024-05-01 07:31:29.903685",
"cache_hit": None,
"response_cost": 6.25e-05,
},
"start_time": datetime.datetime(2024, 5, 1, 7, 31, 27, 986164),
"end_time": datetime.datetime(2024, 5, 1, 7, 31, 29, 903685),
"user_id": None,
"print_verbose": litellm.print_verbose,
"level": "DEFAULT",
"status_message": None,
}
langfuse_response_object = langfuse_Logger.log_event(**langfuse_args)
new_trace_id = langfuse_response_object["trace_id"]
assert new_trace_id == trace_id
langfuse_client.flush()
time.sleep(2)
print(langfuse_client.get_trace(id=trace_id))
new_langfuse_trace = langfuse_client.get_trace(id=trace_id)
assert dict(initial_langfuse_trace) == dict(new_langfuse_trace)
def test_langfuse_logging_tool_calling():
litellm.set_verbose = True

View file

@ -104,6 +104,42 @@ def test_router_timeout_init(timeout, ssl_verify):
)
@pytest.mark.parametrize("sync_mode", [False, True])
@pytest.mark.asyncio
async def test_router_retries(sync_mode):
"""
- make sure retries work as expected
"""
model_list = [
{
"model_name": "gpt-3.5-turbo",
"litellm_params": {"model": "gpt-3.5-turbo", "api_key": "bad-key"},
},
{
"model_name": "gpt-3.5-turbo",
"litellm_params": {
"model": "azure/chatgpt-v-2",
"api_key": os.getenv("AZURE_API_KEY"),
"api_base": os.getenv("AZURE_API_BASE"),
"api_version": os.getenv("AZURE_API_VERSION"),
},
},
]
router = Router(model_list=model_list, num_retries=2)
if sync_mode:
router.completion(
model="gpt-3.5-turbo",
messages=[{"role": "user", "content": "Hey, how's it going?"}],
)
else:
await router.acompletion(
model="gpt-3.5-turbo",
messages=[{"role": "user", "content": "Hey, how's it going?"}],
)
@pytest.mark.parametrize(
"mistral_api_base",
[
@ -1118,6 +1154,7 @@ def test_consistent_model_id():
assert id1 == id2
@pytest.mark.skip(reason="local test")
def test_reading_keys_os_environ():
import openai
@ -1217,6 +1254,7 @@ def test_reading_keys_os_environ():
# test_reading_keys_os_environ()
@pytest.mark.skip(reason="local test")
def test_reading_openai_keys_os_environ():
import openai

View file

@ -46,6 +46,7 @@ def test_async_fallbacks(caplog):
router = Router(
model_list=model_list,
fallbacks=[{"gpt-3.5-turbo": ["azure/gpt-3.5-turbo"]}],
num_retries=1,
)
user_message = "Hello, how are you?"
@ -82,6 +83,7 @@ def test_async_fallbacks(caplog):
# - error request, falling back notice, success notice
expected_logs = [
"litellm.acompletion(model=gpt-3.5-turbo)\x1b[31m Exception OpenAIException - Error code: 401 - {'error': {'message': 'Incorrect API key provided: bad-key. You can find your API key at https://platform.openai.com/account/api-keys.', 'type': 'invalid_request_error', 'param': None, 'code': 'invalid_api_key'}}\x1b[0m",
"litellm.acompletion(model=None)\x1b[31m Exception No deployments available for selected model, passed model=gpt-3.5-turbo\x1b[0m",
"Falling back to model_group = azure/gpt-3.5-turbo",
"litellm.acompletion(model=azure/chatgpt-v-2)\x1b[32m 200 OK\x1b[0m",
]

View file

@ -22,10 +22,10 @@ class MyCustomHandler(CustomLogger):
def log_pre_api_call(self, model, messages, kwargs):
print(f"Pre-API Call")
print(
f"previous_models: {kwargs['litellm_params']['metadata']['previous_models']}"
f"previous_models: {kwargs['litellm_params']['metadata'].get('previous_models', None)}"
)
self.previous_models += len(
kwargs["litellm_params"]["metadata"]["previous_models"]
self.previous_models = len(
kwargs["litellm_params"]["metadata"].get("previous_models", [])
) # {"previous_models": [{"model": litellm_model_name, "exception_type": AuthenticationError, "exception_string": <complete_traceback>}]}
print(f"self.previous_models: {self.previous_models}")
@ -127,7 +127,7 @@ def test_sync_fallbacks():
response = router.completion(**kwargs)
print(f"response: {response}")
time.sleep(0.05) # allow a delay as success_callbacks are on a separate thread
assert customHandler.previous_models == 1 # 0 retries, 1 fallback
assert customHandler.previous_models == 4
print("Passed ! Test router_fallbacks: test_sync_fallbacks()")
router.reset()
@ -140,7 +140,7 @@ def test_sync_fallbacks():
@pytest.mark.asyncio
async def test_async_fallbacks():
litellm.set_verbose = False
litellm.set_verbose = True
model_list = [
{ # list of model deployments
"model_name": "azure/gpt-3.5-turbo", # openai model name
@ -209,12 +209,13 @@ async def test_async_fallbacks():
user_message = "Hello, how are you?"
messages = [{"content": user_message, "role": "user"}]
try:
kwargs["model"] = "azure/gpt-3.5-turbo"
response = await router.acompletion(**kwargs)
print(f"customHandler.previous_models: {customHandler.previous_models}")
await asyncio.sleep(
0.05
) # allow a delay as success_callbacks are on a separate thread
assert customHandler.previous_models == 1 # 0 retries, 1 fallback
assert customHandler.previous_models == 4 # 1 init call, 2 retries, 1 fallback
router.reset()
except litellm.Timeout as e:
pass
@ -258,7 +259,6 @@ def test_sync_fallbacks_embeddings():
model_list=model_list,
fallbacks=[{"bad-azure-embedding-model": ["good-azure-embedding-model"]}],
set_verbose=False,
num_retries=0,
)
customHandler = MyCustomHandler()
litellm.callbacks = [customHandler]
@ -269,7 +269,7 @@ def test_sync_fallbacks_embeddings():
response = router.embedding(**kwargs)
print(f"customHandler.previous_models: {customHandler.previous_models}")
time.sleep(0.05) # allow a delay as success_callbacks are on a separate thread
assert customHandler.previous_models == 1 # 0 retries, 1 fallback
assert customHandler.previous_models == 4 # 1 init call, 2 retries, 1 fallback
router.reset()
except litellm.Timeout as e:
pass
@ -323,7 +323,7 @@ async def test_async_fallbacks_embeddings():
await asyncio.sleep(
0.05
) # allow a delay as success_callbacks are on a separate thread
assert customHandler.previous_models == 1 # 0 retries, 1 fallback
assert customHandler.previous_models == 4 # 1 init call, 2 retries, 1 fallback
router.reset()
except litellm.Timeout as e:
pass
@ -394,7 +394,7 @@ def test_dynamic_fallbacks_sync():
},
]
router = Router(model_list=model_list, set_verbose=True, num_retries=0)
router = Router(model_list=model_list, set_verbose=True)
kwargs = {}
kwargs["model"] = "azure/gpt-3.5-turbo"
kwargs["messages"] = [{"role": "user", "content": "Hey, how's it going?"}]
@ -402,7 +402,7 @@ def test_dynamic_fallbacks_sync():
response = router.completion(**kwargs)
print(f"response: {response}")
time.sleep(0.05) # allow a delay as success_callbacks are on a separate thread
assert customHandler.previous_models == 1 # 0 retries, 1 fallback
assert customHandler.previous_models == 4 # 1 init call, 2 retries, 1 fallback
router.reset()
except Exception as e:
pytest.fail(f"An exception occurred - {e}")
@ -488,7 +488,7 @@ async def test_dynamic_fallbacks_async():
await asyncio.sleep(
0.05
) # allow a delay as success_callbacks are on a separate thread
assert customHandler.previous_models == 1 # 0 retries, 1 fallback
assert customHandler.previous_models == 4 # 1 init call, 2 retries, 1 fallback
router.reset()
except Exception as e:
pytest.fail(f"An exception occurred - {e}")
@ -573,7 +573,7 @@ async def test_async_fallbacks_streaming():
await asyncio.sleep(
0.05
) # allow a delay as success_callbacks are on a separate thread
assert customHandler.previous_models == 1 # 0 retries, 1 fallback
assert customHandler.previous_models == 4 # 1 init call, 2 retries, 1 fallback
router.reset()
except litellm.Timeout as e:
pass
@ -752,7 +752,7 @@ async def test_async_fallbacks_max_retries_per_request():
router.reset()
def test_usage_based_routing_fallbacks():
def test_ausage_based_routing_fallbacks():
try:
# [Prod Test]
# IT tests Usage Based Routing with fallbacks
@ -766,9 +766,9 @@ def test_usage_based_routing_fallbacks():
load_dotenv()
# Constants for TPM and RPM allocation
AZURE_FAST_RPM = 3
AZURE_BASIC_RPM = 4
OPENAI_RPM = 10
AZURE_FAST_RPM = 1
AZURE_BASIC_RPM = 1
OPENAI_RPM = 2
ANTHROPIC_RPM = 100000
def get_azure_params(deployment_name: str):

View file

@ -0,0 +1,121 @@
#### What this tests ####
# This tests calling router with fallback models
import sys, os, time
import traceback, asyncio
import pytest
sys.path.insert(
0, os.path.abspath("../..")
) # Adds the parent directory to the system path
import litellm
from litellm import Router
from litellm.integrations.custom_logger import CustomLogger
class MyCustomHandler(CustomLogger):
success: bool = False
failure: bool = False
previous_models: int = 0
def log_pre_api_call(self, model, messages, kwargs):
print(f"Pre-API Call")
print(
f"previous_models: {kwargs['litellm_params']['metadata'].get('previous_models', None)}"
)
self.previous_models = len(
kwargs["litellm_params"]["metadata"].get("previous_models", [])
) # {"previous_models": [{"model": litellm_model_name, "exception_type": AuthenticationError, "exception_string": <complete_traceback>}]}
print(f"self.previous_models: {self.previous_models}")
def log_post_api_call(self, kwargs, response_obj, start_time, end_time):
print(
f"Post-API Call - response object: {response_obj}; model: {kwargs['model']}"
)
def log_stream_event(self, kwargs, response_obj, start_time, end_time):
print(f"On Stream")
def async_log_stream_event(self, kwargs, response_obj, start_time, end_time):
print(f"On Stream")
def log_success_event(self, kwargs, response_obj, start_time, end_time):
print(f"On Success")
async def async_log_success_event(self, kwargs, response_obj, start_time, end_time):
print(f"On Success")
def log_failure_event(self, kwargs, response_obj, start_time, end_time):
print(f"On Failure")
"""
Test sync + async
- Authorization Errors
- Random API Error
"""
@pytest.mark.parametrize("sync_mode", [True, False])
@pytest.mark.parametrize("error_type", ["Authorization Error", "API Error"])
@pytest.mark.asyncio
async def test_router_retries_errors(sync_mode, error_type):
"""
- Auth Error -> 0 retries
- API Error -> 2 retries
"""
_api_key = (
"bad-key" if error_type == "Authorization Error" else os.getenv("AZURE_API_KEY")
)
print(f"_api_key: {_api_key}")
model_list = [
{
"model_name": "azure/gpt-3.5-turbo", # openai model name
"litellm_params": { # params for litellm completion/embedding call
"model": "azure/chatgpt-functioncalling",
"api_key": _api_key,
"api_version": os.getenv("AZURE_API_VERSION"),
"api_base": os.getenv("AZURE_API_BASE"),
},
"tpm": 240000,
"rpm": 1800,
},
]
router = Router(model_list=model_list, allowed_fails=3)
customHandler = MyCustomHandler()
litellm.callbacks = [customHandler]
user_message = "Hello, how are you?"
messages = [{"content": user_message, "role": "user"}]
kwargs = {
"model": "azure/gpt-3.5-turbo",
"messages": messages,
"mock_response": (
None
if error_type == "Authorization Error"
else Exception("Invalid Request")
),
}
try:
if sync_mode:
response = router.completion(**kwargs)
else:
response = await router.acompletion(**kwargs)
except Exception as e:
pass
await asyncio.sleep(
0.05
) # allow a delay as success_callbacks are on a separate thread
print(f"customHandler.previous_models: {customHandler.previous_models}")
if error_type == "Authorization Error":
assert customHandler.previous_models == 0 # 0 retries
else:
assert customHandler.previous_models == 2 # 2 retries

View file

@ -106,7 +106,7 @@ try:
except Exception as e:
verbose_logger.debug(f"Exception import enterprise features {str(e)}")
from typing import cast, List, Dict, Union, Optional, Literal, Any, BinaryIO
from typing import cast, List, Dict, Union, Optional, Literal, Any, BinaryIO, Iterable
from .caching import Cache
from concurrent.futures import ThreadPoolExecutor
@ -1236,7 +1236,10 @@ class Logging:
print_verbose=print_verbose,
)
elif callback == "sentry" and add_breadcrumb:
try:
details_to_log = copy.deepcopy(self.model_call_details)
except:
details_to_log = self.model_call_details
if litellm.turn_off_message_logging:
# make a copy of the _model_Call_details and log it
details_to_log.pop("messages", None)
@ -1327,8 +1330,10 @@ class Logging:
)
elif callback == "sentry" and add_breadcrumb:
print_verbose("reaches sentry breadcrumbing")
try:
details_to_log = copy.deepcopy(self.model_call_details)
except:
details_to_log = self.model_call_details
if litellm.turn_off_message_logging:
# make a copy of the _model_Call_details and log it
details_to_log.pop("messages", None)
@ -2635,7 +2640,11 @@ def function_setup(
dynamic_success_callbacks = kwargs.pop("success_callback")
if add_breadcrumb:
try:
details_to_log = copy.deepcopy(kwargs)
except:
details_to_log = kwargs
if litellm.turn_off_message_logging:
# make a copy of the _model_Call_details and log it
details_to_log.pop("messages", None)
@ -7171,6 +7180,7 @@ def convert_to_model_response_object(
end_time=None,
hidden_params: Optional[dict] = None,
):
received_args = locals()
try:
if response_type == "completion" and (
model_response_object is None
@ -7182,6 +7192,11 @@ def convert_to_model_response_object(
# for returning cached responses, we need to yield a generator
return convert_to_streaming_response(response_object=response_object)
choice_list = []
assert response_object["choices"] is not None and isinstance(
response_object["choices"], Iterable
)
for idx, choice in enumerate(response_object["choices"]):
message = Message(
content=choice["message"].get("content", None),
@ -7303,7 +7318,9 @@ def convert_to_model_response_object(
model_response_object._hidden_params = hidden_params
return model_response_object
except Exception as e:
raise Exception(f"Invalid response object {traceback.format_exc()}")
raise Exception(
f"Invalid response object {traceback.format_exc()}\n\nreceived_args={received_args}"
)
def acreate(*args, **kwargs): ## Thin client to handle the acreate langchain call

View file

@ -1572,6 +1572,17 @@
"litellm_provider": "openrouter",
"mode": "chat"
},
"openrouter/anthropic/claude-3-opus": {
"max_tokens": 4096,
"max_input_tokens": 200000,
"max_output_tokens": 4096,
"input_cost_per_token": 0.000015,
"output_cost_per_token": 0.000075,
"litellm_provider": "openrouter",
"mode": "chat",
"supports_function_calling": true,
"tool_use_system_prompt_tokens": 395
},
"openrouter/google/palm-2-chat-bison": {
"max_tokens": 8000,
"input_cost_per_token": 0.0000005,

View file

@ -1,6 +1,6 @@
[tool.poetry]
name = "litellm"
version = "1.35.33"
version = "1.35.34"
description = "Library to easily interface with LLM API providers"
authors = ["BerriAI"]
license = "MIT"
@ -80,7 +80,7 @@ requires = ["poetry-core", "wheel"]
build-backend = "poetry.core.masonry.api"
[tool.commitizen]
version = "1.35.33"
version = "1.35.34"
version_files = [
"pyproject.toml:^version"
]

View file

@ -190,6 +190,7 @@ model LiteLLM_ErrorLogs {
endTime DateTime // Assuming end_time is a DateTime field
api_base String @default("")
model_group String @default("") // public model_name / model_group
litellm_model_name String @default("") // model passed to litellm
model_id String @default("") // ID of model in ProxyModelTable
request_kwargs Json @default("{}")
exception_type String @default("")

View file

@ -488,7 +488,9 @@ async def test_key_info_spend_values():
)
rounded_response_cost = round(response_cost, 8)
rounded_key_info_spend = round(key_info["info"]["spend"], 8)
assert rounded_response_cost == rounded_key_info_spend
assert (
rounded_response_cost == rounded_key_info_spend
), f"Expected cost= {rounded_response_cost} != Tracked Cost={rounded_key_info_spend}"
@pytest.mark.asyncio

View file

@ -91,7 +91,7 @@ class ExpectNoException(Exception):
@pytest.mark.parametrize(
"num_try_send, num_allowed_send",
[
(2, 2), # sending as many as allowed, ExpectNoException
(2, 3), # sending as many as allowed, ExpectNoException
# (10, 10), # sending as many as allowed, ExpectNoException
(3, 2), # Sending more than allowed, ValueError
# (10, 9), # Sending more than allowed, ValueError

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

View file

@ -1 +1 @@
!function(){"use strict";var e,t,n,r,o,u,i,c,f,a={},l={};function d(e){var t=l[e];if(void 0!==t)return t.exports;var n=l[e]={id:e,loaded:!1,exports:{}},r=!0;try{a[e](n,n.exports,d),r=!1}finally{r&&delete l[e]}return n.loaded=!0,n.exports}d.m=a,e=[],d.O=function(t,n,r,o){if(n){o=o||0;for(var u=e.length;u>0&&e[u-1][2]>o;u--)e[u]=e[u-1];e[u]=[n,r,o];return}for(var i=1/0,u=0;u<e.length;u++){for(var n=e[u][0],r=e[u][1],o=e[u][2],c=!0,f=0;f<n.length;f++)i>=o&&Object.keys(d.O).every(function(e){return d.O[e](n[f])})?n.splice(f--,1):(c=!1,o<i&&(i=o));if(c){e.splice(u--,1);var a=r();void 0!==a&&(t=a)}}return t},d.n=function(e){var t=e&&e.__esModule?function(){return e.default}:function(){return e};return d.d(t,{a:t}),t},n=Object.getPrototypeOf?function(e){return Object.getPrototypeOf(e)}:function(e){return e.__proto__},d.t=function(e,r){if(1&r&&(e=this(e)),8&r||"object"==typeof e&&e&&(4&r&&e.__esModule||16&r&&"function"==typeof e.then))return e;var o=Object.create(null);d.r(o);var u={};t=t||[null,n({}),n([]),n(n)];for(var i=2&r&&e;"object"==typeof i&&!~t.indexOf(i);i=n(i))Object.getOwnPropertyNames(i).forEach(function(t){u[t]=function(){return e[t]}});return u.default=function(){return e},d.d(o,u),o},d.d=function(e,t){for(var n in t)d.o(t,n)&&!d.o(e,n)&&Object.defineProperty(e,n,{enumerable:!0,get:t[n]})},d.f={},d.e=function(e){return Promise.all(Object.keys(d.f).reduce(function(t,n){return d.f[n](e,t),t},[]))},d.u=function(e){},d.miniCssF=function(e){return"static/css/5e699db73bf6f8c2.css"},d.g=function(){if("object"==typeof globalThis)return globalThis;try{return this||Function("return this")()}catch(e){if("object"==typeof window)return window}}(),d.o=function(e,t){return Object.prototype.hasOwnProperty.call(e,t)},r={},o="_N_E:",d.l=function(e,t,n,u){if(r[e]){r[e].push(t);return}if(void 0!==n)for(var i,c,f=document.getElementsByTagName("script"),a=0;a<f.length;a++){var l=f[a];if(l.getAttribute("src")==e||l.getAttribute("data-webpack")==o+n){i=l;break}}i||(c=!0,(i=document.createElement("script")).charset="utf-8",i.timeout=120,d.nc&&i.setAttribute("nonce",d.nc),i.setAttribute("data-webpack",o+n),i.src=d.tu(e)),r[e]=[t];var s=function(t,n){i.onerror=i.onload=null,clearTimeout(p);var o=r[e];if(delete r[e],i.parentNode&&i.parentNode.removeChild(i),o&&o.forEach(function(e){return e(n)}),t)return t(n)},p=setTimeout(s.bind(null,void 0,{type:"timeout",target:i}),12e4);i.onerror=s.bind(null,i.onerror),i.onload=s.bind(null,i.onload),c&&document.head.appendChild(i)},d.r=function(e){"undefined"!=typeof Symbol&&Symbol.toStringTag&&Object.defineProperty(e,Symbol.toStringTag,{value:"Module"}),Object.defineProperty(e,"__esModule",{value:!0})},d.nmd=function(e){return e.paths=[],e.children||(e.children=[]),e},d.tt=function(){return void 0===u&&(u={createScriptURL:function(e){return e}},"undefined"!=typeof trustedTypes&&trustedTypes.createPolicy&&(u=trustedTypes.createPolicy("nextjs#bundler",u))),u},d.tu=function(e){return d.tt().createScriptURL(e)},d.p="/ui/_next/",i={272:0},d.f.j=function(e,t){var n=d.o(i,e)?i[e]:void 0;if(0!==n){if(n)t.push(n[2]);else if(272!=e){var r=new Promise(function(t,r){n=i[e]=[t,r]});t.push(n[2]=r);var o=d.p+d.u(e),u=Error();d.l(o,function(t){if(d.o(i,e)&&(0!==(n=i[e])&&(i[e]=void 0),n)){var r=t&&("load"===t.type?"missing":t.type),o=t&&t.target&&t.target.src;u.message="Loading chunk "+e+" failed.\n("+r+": "+o+")",u.name="ChunkLoadError",u.type=r,u.request=o,n[1](u)}},"chunk-"+e,e)}else i[e]=0}},d.O.j=function(e){return 0===i[e]},c=function(e,t){var n,r,o=t[0],u=t[1],c=t[2],f=0;if(o.some(function(e){return 0!==i[e]})){for(n in u)d.o(u,n)&&(d.m[n]=u[n]);if(c)var a=c(d)}for(e&&e(t);f<o.length;f++)r=o[f],d.o(i,r)&&i[r]&&i[r][0](),i[r]=0;return d.O(a)},(f=self.webpackChunk_N_E=self.webpackChunk_N_E||[]).forEach(c.bind(null,0)),f.push=c.bind(null,f.push.bind(f))}();
!function(){"use strict";var e,t,n,r,o,u,i,c,f,a={},l={};function d(e){var t=l[e];if(void 0!==t)return t.exports;var n=l[e]={id:e,loaded:!1,exports:{}},r=!0;try{a[e](n,n.exports,d),r=!1}finally{r&&delete l[e]}return n.loaded=!0,n.exports}d.m=a,e=[],d.O=function(t,n,r,o){if(n){o=o||0;for(var u=e.length;u>0&&e[u-1][2]>o;u--)e[u]=e[u-1];e[u]=[n,r,o];return}for(var i=1/0,u=0;u<e.length;u++){for(var n=e[u][0],r=e[u][1],o=e[u][2],c=!0,f=0;f<n.length;f++)i>=o&&Object.keys(d.O).every(function(e){return d.O[e](n[f])})?n.splice(f--,1):(c=!1,o<i&&(i=o));if(c){e.splice(u--,1);var a=r();void 0!==a&&(t=a)}}return t},d.n=function(e){var t=e&&e.__esModule?function(){return e.default}:function(){return e};return d.d(t,{a:t}),t},n=Object.getPrototypeOf?function(e){return Object.getPrototypeOf(e)}:function(e){return e.__proto__},d.t=function(e,r){if(1&r&&(e=this(e)),8&r||"object"==typeof e&&e&&(4&r&&e.__esModule||16&r&&"function"==typeof e.then))return e;var o=Object.create(null);d.r(o);var u={};t=t||[null,n({}),n([]),n(n)];for(var i=2&r&&e;"object"==typeof i&&!~t.indexOf(i);i=n(i))Object.getOwnPropertyNames(i).forEach(function(t){u[t]=function(){return e[t]}});return u.default=function(){return e},d.d(o,u),o},d.d=function(e,t){for(var n in t)d.o(t,n)&&!d.o(e,n)&&Object.defineProperty(e,n,{enumerable:!0,get:t[n]})},d.f={},d.e=function(e){return Promise.all(Object.keys(d.f).reduce(function(t,n){return d.f[n](e,t),t},[]))},d.u=function(e){},d.miniCssF=function(e){return"static/css/4ccaa87c9648acfb.css"},d.g=function(){if("object"==typeof globalThis)return globalThis;try{return this||Function("return this")()}catch(e){if("object"==typeof window)return window}}(),d.o=function(e,t){return Object.prototype.hasOwnProperty.call(e,t)},r={},o="_N_E:",d.l=function(e,t,n,u){if(r[e]){r[e].push(t);return}if(void 0!==n)for(var i,c,f=document.getElementsByTagName("script"),a=0;a<f.length;a++){var l=f[a];if(l.getAttribute("src")==e||l.getAttribute("data-webpack")==o+n){i=l;break}}i||(c=!0,(i=document.createElement("script")).charset="utf-8",i.timeout=120,d.nc&&i.setAttribute("nonce",d.nc),i.setAttribute("data-webpack",o+n),i.src=d.tu(e)),r[e]=[t];var s=function(t,n){i.onerror=i.onload=null,clearTimeout(p);var o=r[e];if(delete r[e],i.parentNode&&i.parentNode.removeChild(i),o&&o.forEach(function(e){return e(n)}),t)return t(n)},p=setTimeout(s.bind(null,void 0,{type:"timeout",target:i}),12e4);i.onerror=s.bind(null,i.onerror),i.onload=s.bind(null,i.onload),c&&document.head.appendChild(i)},d.r=function(e){"undefined"!=typeof Symbol&&Symbol.toStringTag&&Object.defineProperty(e,Symbol.toStringTag,{value:"Module"}),Object.defineProperty(e,"__esModule",{value:!0})},d.nmd=function(e){return e.paths=[],e.children||(e.children=[]),e},d.tt=function(){return void 0===u&&(u={createScriptURL:function(e){return e}},"undefined"!=typeof trustedTypes&&trustedTypes.createPolicy&&(u=trustedTypes.createPolicy("nextjs#bundler",u))),u},d.tu=function(e){return d.tt().createScriptURL(e)},d.p="/ui/_next/",i={272:0},d.f.j=function(e,t){var n=d.o(i,e)?i[e]:void 0;if(0!==n){if(n)t.push(n[2]);else if(272!=e){var r=new Promise(function(t,r){n=i[e]=[t,r]});t.push(n[2]=r);var o=d.p+d.u(e),u=Error();d.l(o,function(t){if(d.o(i,e)&&(0!==(n=i[e])&&(i[e]=void 0),n)){var r=t&&("load"===t.type?"missing":t.type),o=t&&t.target&&t.target.src;u.message="Loading chunk "+e+" failed.\n("+r+": "+o+")",u.name="ChunkLoadError",u.type=r,u.request=o,n[1](u)}},"chunk-"+e,e)}else i[e]=0}},d.O.j=function(e){return 0===i[e]},c=function(e,t){var n,r,o=t[0],u=t[1],c=t[2],f=0;if(o.some(function(e){return 0!==i[e]})){for(n in u)d.o(u,n)&&(d.m[n]=u[n]);if(c)var a=c(d)}for(e&&e(t);f<o.length;f++)r=o[f],d.o(i,r)&&i[r]&&i[r][0](),i[r]=0;return d.O(a)},(f=self.webpackChunk_N_E=self.webpackChunk_N_E||[]).forEach(c.bind(null,0)),f.push=c.bind(null,f.push.bind(f))}();

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

View file

@ -1 +1 @@
<!DOCTYPE html><html id="__next_error__"><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width, initial-scale=1"/><link rel="preload" as="script" fetchPriority="low" href="/ui/_next/static/chunks/webpack-ccae12a25017afa5.js" crossorigin=""/><script src="/ui/_next/static/chunks/fd9d1056-dafd44dfa2da140c.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/69-e49705773ae41779.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/main-app-9b4fb13a7db53edf.js" async="" crossorigin=""></script><title>LiteLLM Dashboard</title><meta name="description" content="LiteLLM Proxy Admin UI"/><link rel="icon" href="/ui/favicon.ico" type="image/x-icon" sizes="16x16"/><meta name="next-size-adjust"/><script src="/ui/_next/static/chunks/polyfills-c67a75d1b6f99dc8.js" crossorigin="" noModule=""></script></head><body><script src="/ui/_next/static/chunks/webpack-ccae12a25017afa5.js" crossorigin="" async=""></script><script>(self.__next_f=self.__next_f||[]).push([0]);self.__next_f.push([2,null])</script><script>self.__next_f.push([1,"1:HL[\"/ui/_next/static/media/c9a5bc6a7c948fb0-s.p.woff2\",\"font\",{\"crossOrigin\":\"\",\"type\":\"font/woff2\"}]\n2:HL[\"/ui/_next/static/css/5e699db73bf6f8c2.css\",\"style\",{\"crossOrigin\":\"\"}]\n0:\"$L3\"\n"])</script><script>self.__next_f.push([1,"4:I[47690,[],\"\"]\n6:I[77831,[],\"\"]\n7:I[27125,[\"447\",\"static/chunks/447-9f8d32190ff7d16d.js\",\"931\",\"static/chunks/app/page-508c39694bd40fe9.js\"],\"\"]\n8:I[5613,[],\"\"]\n9:I[31778,[],\"\"]\nb:I[48955,[],\"\"]\nc:[]\n"])</script><script>self.__next_f.push([1,"3:[[[\"$\",\"link\",\"0\",{\"rel\":\"stylesheet\",\"href\":\"/ui/_next/static/css/5e699db73bf6f8c2.css\",\"precedence\":\"next\",\"crossOrigin\":\"\"}]],[\"$\",\"$L4\",null,{\"buildId\":\"kbGdRQFfI6W3bEwfzmJDI\",\"assetPrefix\":\"/ui\",\"initialCanonicalUrl\":\"/\",\"initialTree\":[\"\",{\"children\":[\"__PAGE__\",{}]},\"$undefined\",\"$undefined\",true],\"initialSeedData\":[\"\",{\"children\":[\"__PAGE__\",{},[\"$L5\",[\"$\",\"$L6\",null,{\"propsForComponent\":{\"params\":{}},\"Component\":\"$7\",\"isStaticGeneration\":true}],null]]},[null,[\"$\",\"html\",null,{\"lang\":\"en\",\"children\":[\"$\",\"body\",null,{\"className\":\"__className_c23dc8\",\"children\":[\"$\",\"$L8\",null,{\"parallelRouterKey\":\"children\",\"segmentPath\":[\"children\"],\"loading\":\"$undefined\",\"loadingStyles\":\"$undefined\",\"loadingScripts\":\"$undefined\",\"hasLoading\":false,\"error\":\"$undefined\",\"errorStyles\":\"$undefined\",\"errorScripts\":\"$undefined\",\"template\":[\"$\",\"$L9\",null,{}],\"templateStyles\":\"$undefined\",\"templateScripts\":\"$undefined\",\"notFound\":[[\"$\",\"title\",null,{\"children\":\"404: This page could not be found.\"}],[\"$\",\"div\",null,{\"style\":{\"fontFamily\":\"system-ui,\\\"Segoe UI\\\",Roboto,Helvetica,Arial,sans-serif,\\\"Apple Color Emoji\\\",\\\"Segoe UI Emoji\\\"\",\"height\":\"100vh\",\"textAlign\":\"center\",\"display\":\"flex\",\"flexDirection\":\"column\",\"alignItems\":\"center\",\"justifyContent\":\"center\"},\"children\":[\"$\",\"div\",null,{\"children\":[[\"$\",\"style\",null,{\"dangerouslySetInnerHTML\":{\"__html\":\"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}\"}}],[\"$\",\"h1\",null,{\"className\":\"next-error-h1\",\"style\":{\"display\":\"inline-block\",\"margin\":\"0 20px 0 0\",\"padding\":\"0 23px 0 0\",\"fontSize\":24,\"fontWeight\":500,\"verticalAlign\":\"top\",\"lineHeight\":\"49px\"},\"children\":\"404\"}],[\"$\",\"div\",null,{\"style\":{\"display\":\"inline-block\"},\"children\":[\"$\",\"h2\",null,{\"style\":{\"fontSize\":14,\"fontWeight\":400,\"lineHeight\":\"49px\",\"margin\":0},\"children\":\"This page could not be found.\"}]}]]}]}]],\"notFoundStyles\":[],\"styles\":null}]}]}],null]],\"initialHead\":[false,\"$La\"],\"globalErrorComponent\":\"$b\",\"missingSlots\":\"$Wc\"}]]\n"])</script><script>self.__next_f.push([1,"a:[[\"$\",\"meta\",\"0\",{\"name\":\"viewport\",\"content\":\"width=device-width, initial-scale=1\"}],[\"$\",\"meta\",\"1\",{\"charSet\":\"utf-8\"}],[\"$\",\"title\",\"2\",{\"children\":\"LiteLLM Dashboard\"}],[\"$\",\"meta\",\"3\",{\"name\":\"description\",\"content\":\"LiteLLM Proxy Admin UI\"}],[\"$\",\"link\",\"4\",{\"rel\":\"icon\",\"href\":\"/ui/favicon.ico\",\"type\":\"image/x-icon\",\"sizes\":\"16x16\"}],[\"$\",\"meta\",\"5\",{\"name\":\"next-size-adjust\"}]]\n5:null\n"])</script><script>self.__next_f.push([1,""])</script></body></html>
<!DOCTYPE html><html id="__next_error__"><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width, initial-scale=1"/><link rel="preload" as="script" fetchPriority="low" href="/ui/_next/static/chunks/webpack-4acf5608f06a35df.js" crossorigin=""/><script src="/ui/_next/static/chunks/fd9d1056-dafd44dfa2da140c.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/69-e49705773ae41779.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/main-app-9b4fb13a7db53edf.js" async="" crossorigin=""></script><title>LiteLLM Dashboard</title><meta name="description" content="LiteLLM Proxy Admin UI"/><link rel="icon" href="/ui/favicon.ico" type="image/x-icon" sizes="16x16"/><meta name="next-size-adjust"/><script src="/ui/_next/static/chunks/polyfills-c67a75d1b6f99dc8.js" crossorigin="" noModule=""></script></head><body><script src="/ui/_next/static/chunks/webpack-4acf5608f06a35df.js" crossorigin="" async=""></script><script>(self.__next_f=self.__next_f||[]).push([0]);self.__next_f.push([2,null])</script><script>self.__next_f.push([1,"1:HL[\"/ui/_next/static/media/c9a5bc6a7c948fb0-s.p.woff2\",\"font\",{\"crossOrigin\":\"\",\"type\":\"font/woff2\"}]\n2:HL[\"/ui/_next/static/css/4ccaa87c9648acfb.css\",\"style\",{\"crossOrigin\":\"\"}]\n0:\"$L3\"\n"])</script><script>self.__next_f.push([1,"4:I[47690,[],\"\"]\n6:I[77831,[],\"\"]\n7:I[46414,[\"761\",\"static/chunks/761-05f8a8451296476c.js\",\"931\",\"static/chunks/app/page-e710f07514d9286b.js\"],\"\"]\n8:I[5613,[],\"\"]\n9:I[31778,[],\"\"]\nb:I[48955,[],\"\"]\nc:[]\n"])</script><script>self.__next_f.push([1,"3:[[[\"$\",\"link\",\"0\",{\"rel\":\"stylesheet\",\"href\":\"/ui/_next/static/css/4ccaa87c9648acfb.css\",\"precedence\":\"next\",\"crossOrigin\":\"\"}]],[\"$\",\"$L4\",null,{\"buildId\":\"7aR2yOE4Bz0za1EnxRCsv\",\"assetPrefix\":\"/ui\",\"initialCanonicalUrl\":\"/\",\"initialTree\":[\"\",{\"children\":[\"__PAGE__\",{}]},\"$undefined\",\"$undefined\",true],\"initialSeedData\":[\"\",{\"children\":[\"__PAGE__\",{},[\"$L5\",[\"$\",\"$L6\",null,{\"propsForComponent\":{\"params\":{}},\"Component\":\"$7\",\"isStaticGeneration\":true}],null]]},[null,[\"$\",\"html\",null,{\"lang\":\"en\",\"children\":[\"$\",\"body\",null,{\"className\":\"__className_c23dc8\",\"children\":[\"$\",\"$L8\",null,{\"parallelRouterKey\":\"children\",\"segmentPath\":[\"children\"],\"loading\":\"$undefined\",\"loadingStyles\":\"$undefined\",\"loadingScripts\":\"$undefined\",\"hasLoading\":false,\"error\":\"$undefined\",\"errorStyles\":\"$undefined\",\"errorScripts\":\"$undefined\",\"template\":[\"$\",\"$L9\",null,{}],\"templateStyles\":\"$undefined\",\"templateScripts\":\"$undefined\",\"notFound\":[[\"$\",\"title\",null,{\"children\":\"404: This page could not be found.\"}],[\"$\",\"div\",null,{\"style\":{\"fontFamily\":\"system-ui,\\\"Segoe UI\\\",Roboto,Helvetica,Arial,sans-serif,\\\"Apple Color Emoji\\\",\\\"Segoe UI Emoji\\\"\",\"height\":\"100vh\",\"textAlign\":\"center\",\"display\":\"flex\",\"flexDirection\":\"column\",\"alignItems\":\"center\",\"justifyContent\":\"center\"},\"children\":[\"$\",\"div\",null,{\"children\":[[\"$\",\"style\",null,{\"dangerouslySetInnerHTML\":{\"__html\":\"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}\"}}],[\"$\",\"h1\",null,{\"className\":\"next-error-h1\",\"style\":{\"display\":\"inline-block\",\"margin\":\"0 20px 0 0\",\"padding\":\"0 23px 0 0\",\"fontSize\":24,\"fontWeight\":500,\"verticalAlign\":\"top\",\"lineHeight\":\"49px\"},\"children\":\"404\"}],[\"$\",\"div\",null,{\"style\":{\"display\":\"inline-block\"},\"children\":[\"$\",\"h2\",null,{\"style\":{\"fontSize\":14,\"fontWeight\":400,\"lineHeight\":\"49px\",\"margin\":0},\"children\":\"This page could not be found.\"}]}]]}]}]],\"notFoundStyles\":[],\"styles\":null}]}]}],null]],\"initialHead\":[false,\"$La\"],\"globalErrorComponent\":\"$b\",\"missingSlots\":\"$Wc\"}]]\n"])</script><script>self.__next_f.push([1,"a:[[\"$\",\"meta\",\"0\",{\"name\":\"viewport\",\"content\":\"width=device-width, initial-scale=1\"}],[\"$\",\"meta\",\"1\",{\"charSet\":\"utf-8\"}],[\"$\",\"title\",\"2\",{\"children\":\"LiteLLM Dashboard\"}],[\"$\",\"meta\",\"3\",{\"name\":\"description\",\"content\":\"LiteLLM Proxy Admin UI\"}],[\"$\",\"link\",\"4\",{\"rel\":\"icon\",\"href\":\"/ui/favicon.ico\",\"type\":\"image/x-icon\",\"sizes\":\"16x16\"}],[\"$\",\"meta\",\"5\",{\"name\":\"next-size-adjust\"}]]\n5:null\n"])</script><script>self.__next_f.push([1,""])</script></body></html>

View file

@ -1,7 +1,7 @@
2:I[77831,[],""]
3:I[27125,["447","static/chunks/447-9f8d32190ff7d16d.js","931","static/chunks/app/page-508c39694bd40fe9.js"],""]
3:I[46414,["761","static/chunks/761-05f8a8451296476c.js","931","static/chunks/app/page-e710f07514d9286b.js"],""]
4:I[5613,[],""]
5:I[31778,[],""]
0:["kbGdRQFfI6W3bEwfzmJDI",[[["",{"children":["__PAGE__",{}]},"$undefined","$undefined",true],["",{"children":["__PAGE__",{},["$L1",["$","$L2",null,{"propsForComponent":{"params":{}},"Component":"$3","isStaticGeneration":true}],null]]},[null,["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_c23dc8","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[],"styles":null}]}]}],null]],[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/5e699db73bf6f8c2.css","precedence":"next","crossOrigin":""}]],"$L6"]]]]
0:["7aR2yOE4Bz0za1EnxRCsv",[[["",{"children":["__PAGE__",{}]},"$undefined","$undefined",true],["",{"children":["__PAGE__",{},["$L1",["$","$L2",null,{"propsForComponent":{"params":{}},"Component":"$3","isStaticGeneration":true}],null]]},[null,["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_c23dc8","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[],"styles":null}]}]}],null]],[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/4ccaa87c9648acfb.css","precedence":"next","crossOrigin":""}]],"$L6"]]]]
6:[["$","meta","0",{"name":"viewport","content":"width=device-width, initial-scale=1"}],["$","meta","1",{"charSet":"utf-8"}],["$","title","2",{"children":"LiteLLM Dashboard"}],["$","meta","3",{"name":"description","content":"LiteLLM Proxy Admin UI"}],["$","link","4",{"rel":"icon","href":"/ui/favicon.ico","type":"image/x-icon","sizes":"16x16"}],["$","meta","5",{"name":"next-size-adjust"}]]
1:null

View file

@ -18,8 +18,8 @@ import {
} from "@tremor/react";
import { TabPanel, TabPanels, TabGroup, TabList, Tab, TextInput, Icon } from "@tremor/react";
import { Select, SelectItem, MultiSelect, MultiSelectItem } from "@tremor/react";
import { modelInfoCall, userGetRequesedtModelsCall, modelCreateCall, Model, modelCostMap, modelDeleteCall, healthCheckCall, modelUpdateCall } from "./networking";
import { BarChart } from "@tremor/react";
import { modelInfoCall, userGetRequesedtModelsCall, modelCreateCall, Model, modelCostMap, modelDeleteCall, healthCheckCall, modelUpdateCall, modelMetricsCall, modelExceptionsCall } from "./networking";
import { BarChart, AreaChart } from "@tremor/react";
import {
Button as Button2,
Modal,
@ -193,13 +193,18 @@ const ModelDashboard: React.FC<ModelDashboardProps> = ({
const providers = Object.values(Providers).filter(key => isNaN(Number(key)));
const [selectedProvider, setSelectedProvider] = useState<String>("OpenAI");
const [healthCheckResponse, setHealthCheckResponse] = useState<string>('');
const [editModalVisible, setEditModalVisible] = useState<boolean>(false);
const [selectedModel, setSelectedModel] = useState<any>(null);
const [availableModelGroups, setAvailableModelGroups] = useState<Array<string>>([]);
const [selectedModelGroup, setSelectedModelGroup] = useState<string | null>(null);
const [modelLatencyMetrics, setModelLatencyMetrics] = useState<any[]>([]);
const [modelMetrics, setModelMetrics] = useState<any[]>([]);
const [modelMetricsCategories, setModelMetricsCategories] = useState<any[]>([]);
const [modelExceptions, setModelExceptions] = useState<any[]>([]);
const [allExceptions, setAllExceptions] = useState<any[]>([]);
const [failureTableData, setFailureTableData] = useState<any[]>([]);
const EditModelModal: React.FC<EditModelModalProps> = ({ visible, onCancel, model, onSubmit }) => {
const [form] = Form.useForm();
@ -443,14 +448,71 @@ const handleEditSubmit = async (formValues: Record<string, any>) => {
}
console.log("all_model_groups:", all_model_groups)
let _array_model_groups = Array.from(all_model_groups)
// sort _array_model_groups alphabetically
_array_model_groups = _array_model_groups.sort();
setAvailableModelGroups(_array_model_groups);
// if userRole is Admin, show the pending requests
if (userRole === "Admin" && accessToken) {
const user_requests = await userGetRequesedtModelsCall(accessToken);
console.log("Pending Requests:", pendingRequests);
setPendingRequests(user_requests.requests || []);
const modelMetricsResponse = await modelMetricsCall(
accessToken,
userID,
userRole,
null
);
console.log("Model metrics response:", modelMetricsResponse);
// Sort by latency (avg_latency_per_token)
setModelMetrics(modelMetricsResponse.data);
setModelMetricsCategories(modelMetricsResponse.all_api_bases);
const modelExceptionsResponse = await modelExceptionsCall(
accessToken,
userID,
userRole,
null
)
console.log("Model exceptions response:", modelExceptionsResponse);
setModelExceptions(modelExceptionsResponse.data);
setAllExceptions(modelExceptionsResponse.exception_types);
let modelMetricsData = modelMetricsResponse.data;
let successdeploymentToSuccess: Record<string, number> = {};
for (let i = 0; i < modelMetricsData.length; i++) {
let element = modelMetricsData[i];
let _model_name = element.model;
let _num_requests = element.num_requests;
successdeploymentToSuccess[_model_name] = _num_requests
}
console.log("successdeploymentToSuccess:", successdeploymentToSuccess)
let failureTableData = [];
let _failureData = modelExceptionsResponse.data;
for (let i = 0; i < _failureData.length; i++) {
const model = _failureData[i];
let _model_name = model.model;
let total_exceptions = model.total_exceptions;
let total_Requests = successdeploymentToSuccess[_model_name];
if (total_Requests == null) {
total_Requests = 0
}
let _data = {
model: _model_name,
total_exceptions: total_exceptions,
total_Requests: total_Requests,
failure_rate: total_Requests / total_exceptions
}
failureTableData.push(_data);
// sort failureTableData by failure_rate
failureTableData.sort((a, b) => b.failure_rate - a.failure_rate);
setFailureTableData(failureTableData);
console.log("failureTableData:", failureTableData);
}
} catch (error) {
console.error("There was an error fetching the model data", error);
}
@ -603,6 +665,77 @@ const handleEditSubmit = async (formValues: Record<string, any>) => {
};
const updateModelMetrics = async (modelGroup: string | null) => {
console.log("Updating model metrics for group:", modelGroup);
if (!accessToken || !userID || !userRole) {
return
}
setSelectedModelGroup(modelGroup); // If you want to store the selected model group in state
try {
const modelMetricsResponse = await modelMetricsCall(accessToken, userID, userRole, modelGroup);
console.log("Model metrics response:", modelMetricsResponse);
// Assuming modelMetricsResponse now contains the metric data for the specified model group
setModelMetrics(modelMetricsResponse.data);
setModelMetricsCategories(modelMetricsResponse.all_api_bases);
const modelExceptionsResponse = await modelExceptionsCall(
accessToken,
userID,
userRole,
modelGroup
)
console.log("Model exceptions response:", modelExceptionsResponse);
setModelExceptions(modelExceptionsResponse.data);
setAllExceptions(modelExceptionsResponse.exception_types);
} catch (error) {
console.error("Failed to fetch model metrics", error);
}
}
const customTooltip = (props: any) => {
const { payload, active } = props;
if (!active || !payload) return null;
// Extract the date from the first item in the payload array
const date = payload[0]?.payload?.date;
// Sort the payload array by category.value in descending order
let sortedPayload = payload.sort((a: any, b: any) => b.value - a.value);
// Only show the top 5, the 6th one should be called "X other categories" depending on how many categories were not shown
if (sortedPayload.length > 5) {
let remainingItems = sortedPayload.length - 5;
sortedPayload = sortedPayload.slice(0, 5);
sortedPayload.push({
dataKey: `${remainingItems} other deployments`,
value: payload.slice(5).reduce((acc: number, curr: any) => acc + curr.value, 0),
color: "gray",
});
}
return (
<div className="w-150 rounded-tremor-default border border-tremor-border bg-tremor-background p-2 text-tremor-default shadow-tremor-dropdown">
{date && <p className="text-tremor-content-emphasis mb-2">Date: {date}</p>}
{sortedPayload.map((category: any, idx: number) => (
<div key={idx} className="flex justify-between">
<div className="flex items-center space-x-2">
<div className={`w-2 h-2 mt-1 rounded-full bg-${category.color}-500`} />
<p className="text-tremor-content">{category.dataKey}</p>
</div>
<p className="font-medium text-tremor-content-emphasis text-righ ml-2">
{category.value.toFixed(5)}
</p>
</div>
))}
</div>
);
};
const getPlaceholder = (selectedProvider: string): string => {
if (selectedProvider === Providers.Vertex_AI) {
@ -640,6 +773,7 @@ const handleEditSubmit = async (formValues: Record<string, any>) => {
<Tab>All Models</Tab>
<Tab>Add Model</Tab>
<Tab><pre>/health Models</pre></Tab>
<Tab>Model Analytics</Tab>
</div>
<div className="flex items-center space-x-2">
@ -955,6 +1089,87 @@ const handleEditSubmit = async (formValues: Record<string, any>) => {
</Card>
</TabPanel>
<TabPanel>
<p style={{fontSize: '0.85rem', color: '#808080'}}>View how requests were load balanced within a model group</p>
<Select
className="mb-4 mt-2"
>
{availableModelGroups.map((group, idx) => (
<SelectItem
key={idx}
value={group}
onClick={() => updateModelMetrics(group)}
>
{group}
</SelectItem>
))}
</Select>
<Grid numItems={2}>
<Col>
<Card className="mr-2">
<Title>Avg Latency per Token</Title><p className="text-gray-500 italic"> (seconds/token)</p>
<Text className="text-gray-500 italic mt-1 mb-1">average Latency for successfull requests divided by the total tokens</Text>
{ modelMetrics && modelMetricsCategories && (
<AreaChart
title="Model Latency"
className="h-72"
data={modelMetrics}
showLegend={false}
index="date"
categories={modelMetricsCategories}
connectNulls={true}
customTooltip={customTooltip}
/>
)}
</Card>
</Col>
<Col>
<Card className="ml-2">
<Table>
<TableHead>
<TableRow>
<TableHeaderCell>Model</TableHeaderCell>
<TableHeaderCell>Success Requests</TableHeaderCell>
<TableHeaderCell>Error Requests</TableHeaderCell>
<TableHeaderCell>Failure %</TableHeaderCell>
</TableRow>
</TableHead>
<TableBody>
{failureTableData.map((metric, idx) => (
<TableRow key={idx}>
<TableCell>{metric.model}</TableCell>
<TableCell>{metric.total_Requests}</TableCell>
<TableCell>{metric.total_exceptions}</TableCell>
<TableCell>{metric.failure_rate}%</TableCell>
</TableRow>
))}
</TableBody>
</Table>
</Card>
</Col>
</Grid>
<Card className="mt-4">
<Title>Exceptions per Model</Title>
<BarChart
className="h-72"
data={modelExceptions}
index="model"
categories={allExceptions}
stack={true}
colors={['indigo-300', 'rose-200', '#ffcc33']}
yAxisWidth={30}
/>
</Card>
</TabPanel>
</TabPanels>
</TabGroup>

View file

@ -474,6 +474,43 @@ export const modelMetricsCall = async (
}
};
export const modelExceptionsCall = async (
accessToken: String,
userID: String,
userRole: String,
modelGroup: String | null,
) => {
/**
* Get all models on proxy
*/
try {
let url = proxyBaseUrl ? `${proxyBaseUrl}/model/metrics/exceptions` : `/model/metrics/exceptions`;
const response = await fetch(url, {
method: "GET",
headers: {
Authorization: `Bearer ${accessToken}`,
"Content-Type": "application/json",
},
});
if (!response.ok) {
const errorData = await response.text();
message.error(errorData, 20);
throw new Error("Network response was not ok");
}
const data = await response.json();
// message.info("Received model data");
return data;
// Handle success - you might want to update some state or UI based on the created key
} catch (error) {
console.error("Failed to create key:", error);
throw error;
}
};
export const modelAvailableCall = async (
accessToken: String,
userID: String,

View file

@ -146,10 +146,6 @@ const UsagePage: React.FC<UsagePageProps> = ({
const [topTagsData, setTopTagsData] = useState<any[]>([]);
const [uniqueTeamIds, setUniqueTeamIds] = useState<any[]>([]);
const [totalSpendPerTeam, setTotalSpendPerTeam] = useState<any[]>([]);
const [modelMetrics, setModelMetrics] = useState<any[]>([]);
const [modelLatencyMetrics, setModelLatencyMetrics] = useState<any[]>([]);
const [modelGroups, setModelGroups] = useState<any[]>([]);
const [selectedModelGroup, setSelectedModelGroup] = useState<string | null>(null);
const firstDay = new Date(
currentDate.getFullYear(),
@ -231,25 +227,6 @@ const UsagePage: React.FC<UsagePageProps> = ({
const top_tags = await tagsSpendLogsCall(accessToken);
setTopTagsData(top_tags.top_10_tags);
// get model groups
const _model_groups = await modelInfoCall(accessToken, userID, userRole);
let model_groups = _model_groups.data;
console.log("model groups in model dashboard", model_groups);
let available_model_groups = [];
// loop through each model in model_group, access litellm_params and only inlclude the model if model["litellm_params"]["model"] startswith "azure/"
for (let i = 0; i < model_groups.length; i++) {
let model = model_groups[i];
console.log("model check", model);
let model_group = model["litellm_params"]["model"];
console.log("model group", model_group);
if (model_group.startsWith("azure/")) {
available_model_groups.push(model["model_name"]);
}
}
setModelGroups(available_model_groups);
} else if (userRole == "App Owner") {
await userSpendLogsCall(
accessToken,
@ -286,22 +263,6 @@ const UsagePage: React.FC<UsagePageProps> = ({
}
});
}
const modelMetricsResponse = await modelMetricsCall(
accessToken,
userID,
userRole,
null
);
console.log("Model metrics response:", modelMetricsResponse);
// Sort by latency (avg_latency_seconds)
const sortedByLatency = [...modelMetricsResponse].sort((a, b) => b.avg_latency_seconds - a.avg_latency_seconds);
console.log("Sorted by latency:", sortedByLatency);
setModelMetrics(modelMetricsResponse);
setModelLatencyMetrics(sortedByLatency);
} catch (error) {
console.error("There was an error fetching the data", error);
// Optionally, update your UI to reflect the error state here as well
@ -312,30 +273,6 @@ const UsagePage: React.FC<UsagePageProps> = ({
}, [accessToken, token, userRole, userID, startTime, endTime]);
const updateModelMetrics = async (modelGroup: string | null) => {
console.log("Updating model metrics for group:", modelGroup);
if (!accessToken || !userID || !userRole) {
return
}
setSelectedModelGroup(modelGroup); // If you want to store the selected model group in state
try {
const modelMetricsResponse = await modelMetricsCall(accessToken, userID, userRole, modelGroup);
console.log("Model metrics response:", modelMetricsResponse);
// Assuming modelMetricsResponse now contains the metric data for the specified model group
const sortedByLatency = [...modelMetricsResponse].sort((a, b) => b.avg_latency_seconds - a.avg_latency_seconds);
console.log("Sorted by latency:", sortedByLatency);
setModelMetrics(modelMetricsResponse);
setModelLatencyMetrics(sortedByLatency);
} catch (error) {
console.error("Failed to fetch model metrics", error);
}
}
return (
<div style={{ width: "100%" }} className="p-8">
<ViewUserSpend
@ -350,7 +287,6 @@ const UsagePage: React.FC<UsagePageProps> = ({
<Tab>All Up</Tab>
<Tab>Team Based Usage</Tab>
<Tab>Tag Based Usage</Tab>
<Tab>Model Based Usage</Tab>
</TabList>
<TabPanels>
<TabPanel>
@ -492,60 +428,6 @@ const UsagePage: React.FC<UsagePageProps> = ({
</Grid>
</TabPanel>
<TabPanel>
<Title>Filter By Model Group</Title>
<p style={{fontSize: '0.85rem', color: '#808080'}}>View how requests were load balanced within a model group</p>
<p style={{fontSize: '0.85rem', color: '#808080', fontStyle: 'italic'}}>(Beta feature) only supported for Azure Model Groups</p>
<Select
className="mb-4 mt-2"
defaultValue="all"
>
<SelectItem
value={"all"}
onClick={() => updateModelMetrics(null)}
>
All Model Groups
</SelectItem>
{modelGroups.map((group, idx) => (
<SelectItem
key={idx}
value={group}
onClick={() => updateModelMetrics(group)}
>
{group}
</SelectItem>
))}
</Select>
<Card>
<Title>Number Requests per Model</Title>
<BarChart
data={modelMetrics}
className="h-[50vh]"
index="model"
categories={["num_requests"]}
colors={["blue"]}
yAxisWidth={400}
layout="vertical"
tickGap={5}
/>
</Card>
<Card className="mt-4">
<Title>Latency Per Model</Title>
<BarChart
data={modelLatencyMetrics}
className="h-[50vh]"
index="model"
categories={["avg_latency_seconds"]}
colors={["red"]}
yAxisWidth={400}
layout="vertical"
tickGap={5}
/>
</Card>
</TabPanel>
</TabPanels>
</TabGroup>
</div>