Merge branch 'BerriAI:main' into ollama-image-handling

This commit is contained in:
frob 2024-05-01 22:29:37 +02:00 committed by GitHub
commit 465f491e7f
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
52 changed files with 1148 additions and 426 deletions

View file

@ -11,40 +11,37 @@ You can find the Dockerfile to build litellm proxy [here](https://github.com/Ber
<TabItem value="basic" label="Basic"> <TabItem value="basic" label="Basic">
**Step 1. Create a file called `litellm_config.yaml`** ### Step 1. CREATE config.yaml
Example `litellm_config.yaml` (the `os.environ/` prefix means litellm will read `AZURE_API_BASE` from the env) Example `litellm_config.yaml`
```yaml
model_list:
- model_name: azure-gpt-3.5
litellm_params:
model: azure/<your-azure-model-deployment>
api_base: os.environ/AZURE_API_BASE
api_key: os.environ/AZURE_API_KEY
api_version: "2023-07-01-preview"
```
**Step 2. Run litellm docker image** ```yaml
model_list:
- model_name: azure-gpt-3.5
litellm_params:
model: azure/<your-azure-model-deployment>
api_base: os.environ/AZURE_API_BASE # runs os.getenv("AZURE_API_BASE")
api_key: os.environ/AZURE_API_KEY # runs os.getenv("AZURE_API_KEY")
api_version: "2023-07-01-preview"
```
See the latest available ghcr docker image here:
https://github.com/berriai/litellm/pkgs/container/litellm
Your litellm config.yaml should be called `litellm_config.yaml` in the directory you run this command.
The `-v` command will mount that file
Pass `AZURE_API_KEY` and `AZURE_API_BASE` since we set them in step 1 ### Step 2. RUN Docker Image
```shell ```shell
docker run \ docker run \
-v $(pwd)/litellm_config.yaml:/app/config.yaml \ -v $(pwd)/litellm_config.yaml:/app/config.yaml \
-e AZURE_API_KEY=d6*********** \ -e AZURE_API_KEY=d6*********** \
-e AZURE_API_BASE=https://openai-***********/ \ -e AZURE_API_BASE=https://openai-***********/ \
-p 4000:4000 \ -p 4000:4000 \
ghcr.io/berriai/litellm:main-latest \ ghcr.io/berriai/litellm:main-latest \
--config /app/config.yaml --detailed_debug --config /app/config.yaml --detailed_debug
``` ```
**Step 3. Send a Test Request** Get Latest Image 👉 [here](https://github.com/berriai/litellm/pkgs/container/litellm)
### Step 3. TEST Request
Pass `model=azure-gpt-3.5` this was set on step 1 Pass `model=azure-gpt-3.5` this was set on step 1

View file

@ -278,6 +278,36 @@ router_settings:
routing_strategy_args: {"ttl": 10} routing_strategy_args: {"ttl": 10}
``` ```
### Set Lowest Latency Buffer
Set a buffer within which deployments are candidates for making calls to.
E.g.
if you have 5 deployments
```
https://litellm-prod-1.openai.azure.com/: 0.07s
https://litellm-prod-2.openai.azure.com/: 0.1s
https://litellm-prod-3.openai.azure.com/: 0.1s
https://litellm-prod-4.openai.azure.com/: 0.1s
https://litellm-prod-5.openai.azure.com/: 4.66s
```
to prevent initially overloading `prod-1`, with all requests - we can set a buffer of 50%, to consider deployments `prod-2, prod-3, prod-4`.
**In Router**
```python
router = Router(..., routing_strategy_args={"lowest_latency_buffer": 0.5})
```
**In Proxy**
```yaml
router_settings:
routing_strategy_args: {"lowest_latency_buffer": 0.5}
```
</TabItem> </TabItem>
<TabItem value="simple-shuffle" label="(Default) Weighted Pick (Async)"> <TabItem value="simple-shuffle" label="(Default) Weighted Pick (Async)">

View file

@ -79,7 +79,7 @@ class LangFuseLogger:
print_verbose, print_verbose,
level="DEFAULT", level="DEFAULT",
status_message=None, status_message=None,
): ) -> dict:
# Method definition # Method definition
try: try:
@ -111,6 +111,7 @@ class LangFuseLogger:
pass pass
# end of processing langfuse ######################## # end of processing langfuse ########################
print(f"response obj type: {type(response_obj)}")
if ( if (
level == "ERROR" level == "ERROR"
and status_message is not None and status_message is not None
@ -140,8 +141,11 @@ class LangFuseLogger:
input = prompt input = prompt
output = response_obj["data"] output = response_obj["data"]
print_verbose(f"OUTPUT IN LANGFUSE: {output}; original: {response_obj}") print_verbose(f"OUTPUT IN LANGFUSE: {output}; original: {response_obj}")
trace_id = None
generation_id = None
if self._is_langfuse_v2(): if self._is_langfuse_v2():
self._log_langfuse_v2( print("INSIDE V2 LANGFUSE")
trace_id, generation_id = self._log_langfuse_v2(
user_id, user_id,
metadata, metadata,
litellm_params, litellm_params,
@ -171,10 +175,12 @@ class LangFuseLogger:
f"Langfuse Layer Logging - final response object: {response_obj}" f"Langfuse Layer Logging - final response object: {response_obj}"
) )
verbose_logger.info(f"Langfuse Layer Logging - logging success") verbose_logger.info(f"Langfuse Layer Logging - logging success")
return {"trace_id": trace_id, "generation_id": generation_id}
except: except:
traceback.print_exc() traceback.print_exc()
verbose_logger.debug(f"Langfuse Layer Error - {traceback.format_exc()}") verbose_logger.debug(f"Langfuse Layer Error - {traceback.format_exc()}")
pass return {"trace_id": None, "generation_id": None}
async def _async_log_event( async def _async_log_event(
self, kwargs, response_obj, start_time, end_time, user_id, print_verbose self, kwargs, response_obj, start_time, end_time, user_id, print_verbose
@ -246,7 +252,7 @@ class LangFuseLogger:
response_obj, response_obj,
level, level,
print_verbose, print_verbose,
): ) -> tuple:
import langfuse import langfuse
try: try:
@ -272,18 +278,21 @@ class LangFuseLogger:
## DO NOT SET TRACE_NAME if trace-id set. this can lead to overwriting of past traces. ## DO NOT SET TRACE_NAME if trace-id set. this can lead to overwriting of past traces.
trace_name = f"litellm-{kwargs.get('call_type', 'completion')}" trace_name = f"litellm-{kwargs.get('call_type', 'completion')}"
trace_params = { if existing_trace_id is not None:
"name": trace_name, trace_params = {"id": existing_trace_id}
"input": input, else: # don't overwrite an existing trace
"user_id": metadata.get("trace_user_id", user_id), trace_params = {
"id": trace_id or existing_trace_id, "name": trace_name,
"session_id": metadata.get("session_id", None), "input": input,
} "user_id": metadata.get("trace_user_id", user_id),
"id": trace_id,
"session_id": metadata.get("session_id", None),
}
if level == "ERROR": if level == "ERROR":
trace_params["status_message"] = output trace_params["status_message"] = output
else: else:
trace_params["output"] = output trace_params["output"] = output
cost = kwargs.get("response_cost", None) cost = kwargs.get("response_cost", None)
print_verbose(f"trace: {cost}") print_verbose(f"trace: {cost}")
@ -341,7 +350,8 @@ class LangFuseLogger:
kwargs["cache_hit"] = False kwargs["cache_hit"] = False
tags.append(f"cache_hit:{kwargs['cache_hit']}") tags.append(f"cache_hit:{kwargs['cache_hit']}")
clean_metadata["cache_hit"] = kwargs["cache_hit"] clean_metadata["cache_hit"] = kwargs["cache_hit"]
trace_params.update({"tags": tags}) if existing_trace_id is None:
trace_params.update({"tags": tags})
proxy_server_request = litellm_params.get("proxy_server_request", None) proxy_server_request = litellm_params.get("proxy_server_request", None)
if proxy_server_request: if proxy_server_request:
@ -363,6 +373,7 @@ class LangFuseLogger:
print_verbose(f"trace_params: {trace_params}") print_verbose(f"trace_params: {trace_params}")
print(f"trace_params: {trace_params}")
trace = self.Langfuse.trace(**trace_params) trace = self.Langfuse.trace(**trace_params)
generation_id = None generation_id = None
@ -414,6 +425,10 @@ class LangFuseLogger:
print_verbose(f"generation_params: {generation_params}") print_verbose(f"generation_params: {generation_params}")
trace.generation(**generation_params) generation_client = trace.generation(**generation_params)
print(f"LANGFUSE TRACE ID - {generation_client.trace_id}")
return generation_client.trace_id, generation_id
except Exception as e: except Exception as e:
verbose_logger.debug(f"Langfuse Layer Error - {traceback.format_exc()}") verbose_logger.debug(f"Langfuse Layer Error - {traceback.format_exc()}")
return None, None

View file

@ -238,12 +238,13 @@ def get_ollama_response(
## RESPONSE OBJECT ## RESPONSE OBJECT
model_response["choices"][0]["finish_reason"] = "stop" model_response["choices"][0]["finish_reason"] = "stop"
if optional_params.get("format", "") == "json": if optional_params.get("format", "") == "json":
function_call = json.loads(response_json["response"])
message = litellm.Message( message = litellm.Message(
content=None, content=None,
tool_calls=[ tool_calls=[
{ {
"id": f"call_{str(uuid.uuid4())}", "id": f"call_{str(uuid.uuid4())}",
"function": {"arguments": response_json["response"], "name": ""}, "function": {"name": function_call["name"], "arguments": json.dumps(function_call["arguments"])},
"type": "function", "type": "function",
} }
], ],
@ -335,15 +336,13 @@ async def ollama_acompletion(url, data, model_response, encoding, logging_obj):
## RESPONSE OBJECT ## RESPONSE OBJECT
model_response["choices"][0]["finish_reason"] = "stop" model_response["choices"][0]["finish_reason"] = "stop"
if data.get("format", "") == "json": if data.get("format", "") == "json":
function_call = json.loads(response_json["response"])
message = litellm.Message( message = litellm.Message(
content=None, content=None,
tool_calls=[ tool_calls=[
{ {
"id": f"call_{str(uuid.uuid4())}", "id": f"call_{str(uuid.uuid4())}",
"function": { "function": {"name": function_call["name"], "arguments": json.dumps(function_call["arguments"])},
"arguments": response_json["response"],
"name": "",
},
"type": "function", "type": "function",
} }
], ],

View file

@ -285,15 +285,13 @@ def get_ollama_response(
## RESPONSE OBJECT ## RESPONSE OBJECT
model_response["choices"][0]["finish_reason"] = "stop" model_response["choices"][0]["finish_reason"] = "stop"
if data.get("format", "") == "json": if data.get("format", "") == "json":
function_call = json.loads(response_json["message"]["content"])
message = litellm.Message( message = litellm.Message(
content=None, content=None,
tool_calls=[ tool_calls=[
{ {
"id": f"call_{str(uuid.uuid4())}", "id": f"call_{str(uuid.uuid4())}",
"function": { "function": {"name": function_call["name"], "arguments": json.dumps(function_call["arguments"])},
"arguments": response_json["message"]["content"],
"name": "",
},
"type": "function", "type": "function",
} }
], ],
@ -415,15 +413,13 @@ async def ollama_acompletion(
## RESPONSE OBJECT ## RESPONSE OBJECT
model_response["choices"][0]["finish_reason"] = "stop" model_response["choices"][0]["finish_reason"] = "stop"
if data.get("format", "") == "json": if data.get("format", "") == "json":
function_call = json.loads(response_json["message"]["content"])
message = litellm.Message( message = litellm.Message(
content=None, content=None,
tool_calls=[ tool_calls=[
{ {
"id": f"call_{str(uuid.uuid4())}", "id": f"call_{str(uuid.uuid4())}",
"function": { "function": {"name": function_call["name"], "arguments": json.dumps(function_call["arguments"])},
"arguments": response_json["message"]["content"],
"name": function_name or "",
},
"type": "function", "type": "function",
} }
], ],

View file

@ -360,7 +360,7 @@ def mock_completion(
model: str, model: str,
messages: List, messages: List,
stream: Optional[bool] = False, stream: Optional[bool] = False,
mock_response: str = "This is a mock request", mock_response: Union[str, Exception] = "This is a mock request",
logging=None, logging=None,
**kwargs, **kwargs,
): ):
@ -387,6 +387,20 @@ def mock_completion(
- If 'stream' is True, it returns a response that mimics the behavior of a streaming completion. - If 'stream' is True, it returns a response that mimics the behavior of a streaming completion.
""" """
try: try:
## LOGGING
if logging is not None:
logging.pre_call(
input=messages,
api_key="mock-key",
)
if isinstance(mock_response, Exception):
raise litellm.APIError(
status_code=500, # type: ignore
message=str(mock_response),
llm_provider="openai", # type: ignore
model=model, # type: ignore
request=httpx.Request(method="POST", url="https://api.openai.com/v1/"),
)
model_response = ModelResponse(stream=stream) model_response = ModelResponse(stream=stream)
if stream is True: if stream is True:
# don't try to access stream object, # don't try to access stream object,

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

View file

@ -1 +1 @@
!function(){"use strict";var e,t,n,r,o,u,i,c,f,a={},l={};function d(e){var t=l[e];if(void 0!==t)return t.exports;var n=l[e]={id:e,loaded:!1,exports:{}},r=!0;try{a[e](n,n.exports,d),r=!1}finally{r&&delete l[e]}return n.loaded=!0,n.exports}d.m=a,e=[],d.O=function(t,n,r,o){if(n){o=o||0;for(var u=e.length;u>0&&e[u-1][2]>o;u--)e[u]=e[u-1];e[u]=[n,r,o];return}for(var i=1/0,u=0;u<e.length;u++){for(var n=e[u][0],r=e[u][1],o=e[u][2],c=!0,f=0;f<n.length;f++)i>=o&&Object.keys(d.O).every(function(e){return d.O[e](n[f])})?n.splice(f--,1):(c=!1,o<i&&(i=o));if(c){e.splice(u--,1);var a=r();void 0!==a&&(t=a)}}return t},d.n=function(e){var t=e&&e.__esModule?function(){return e.default}:function(){return e};return d.d(t,{a:t}),t},n=Object.getPrototypeOf?function(e){return Object.getPrototypeOf(e)}:function(e){return e.__proto__},d.t=function(e,r){if(1&r&&(e=this(e)),8&r||"object"==typeof e&&e&&(4&r&&e.__esModule||16&r&&"function"==typeof e.then))return e;var o=Object.create(null);d.r(o);var u={};t=t||[null,n({}),n([]),n(n)];for(var i=2&r&&e;"object"==typeof i&&!~t.indexOf(i);i=n(i))Object.getOwnPropertyNames(i).forEach(function(t){u[t]=function(){return e[t]}});return u.default=function(){return e},d.d(o,u),o},d.d=function(e,t){for(var n in t)d.o(t,n)&&!d.o(e,n)&&Object.defineProperty(e,n,{enumerable:!0,get:t[n]})},d.f={},d.e=function(e){return Promise.all(Object.keys(d.f).reduce(function(t,n){return d.f[n](e,t),t},[]))},d.u=function(e){},d.miniCssF=function(e){return"static/css/5e699db73bf6f8c2.css"},d.g=function(){if("object"==typeof globalThis)return globalThis;try{return this||Function("return this")()}catch(e){if("object"==typeof window)return window}}(),d.o=function(e,t){return Object.prototype.hasOwnProperty.call(e,t)},r={},o="_N_E:",d.l=function(e,t,n,u){if(r[e]){r[e].push(t);return}if(void 0!==n)for(var i,c,f=document.getElementsByTagName("script"),a=0;a<f.length;a++){var l=f[a];if(l.getAttribute("src")==e||l.getAttribute("data-webpack")==o+n){i=l;break}}i||(c=!0,(i=document.createElement("script")).charset="utf-8",i.timeout=120,d.nc&&i.setAttribute("nonce",d.nc),i.setAttribute("data-webpack",o+n),i.src=d.tu(e)),r[e]=[t];var s=function(t,n){i.onerror=i.onload=null,clearTimeout(p);var o=r[e];if(delete r[e],i.parentNode&&i.parentNode.removeChild(i),o&&o.forEach(function(e){return e(n)}),t)return t(n)},p=setTimeout(s.bind(null,void 0,{type:"timeout",target:i}),12e4);i.onerror=s.bind(null,i.onerror),i.onload=s.bind(null,i.onload),c&&document.head.appendChild(i)},d.r=function(e){"undefined"!=typeof Symbol&&Symbol.toStringTag&&Object.defineProperty(e,Symbol.toStringTag,{value:"Module"}),Object.defineProperty(e,"__esModule",{value:!0})},d.nmd=function(e){return e.paths=[],e.children||(e.children=[]),e},d.tt=function(){return void 0===u&&(u={createScriptURL:function(e){return e}},"undefined"!=typeof trustedTypes&&trustedTypes.createPolicy&&(u=trustedTypes.createPolicy("nextjs#bundler",u))),u},d.tu=function(e){return d.tt().createScriptURL(e)},d.p="/ui/_next/",i={272:0},d.f.j=function(e,t){var n=d.o(i,e)?i[e]:void 0;if(0!==n){if(n)t.push(n[2]);else if(272!=e){var r=new Promise(function(t,r){n=i[e]=[t,r]});t.push(n[2]=r);var o=d.p+d.u(e),u=Error();d.l(o,function(t){if(d.o(i,e)&&(0!==(n=i[e])&&(i[e]=void 0),n)){var r=t&&("load"===t.type?"missing":t.type),o=t&&t.target&&t.target.src;u.message="Loading chunk "+e+" failed.\n("+r+": "+o+")",u.name="ChunkLoadError",u.type=r,u.request=o,n[1](u)}},"chunk-"+e,e)}else i[e]=0}},d.O.j=function(e){return 0===i[e]},c=function(e,t){var n,r,o=t[0],u=t[1],c=t[2],f=0;if(o.some(function(e){return 0!==i[e]})){for(n in u)d.o(u,n)&&(d.m[n]=u[n]);if(c)var a=c(d)}for(e&&e(t);f<o.length;f++)r=o[f],d.o(i,r)&&i[r]&&i[r][0](),i[r]=0;return d.O(a)},(f=self.webpackChunk_N_E=self.webpackChunk_N_E||[]).forEach(c.bind(null,0)),f.push=c.bind(null,f.push.bind(f))}(); !function(){"use strict";var e,t,n,r,o,u,i,c,f,a={},l={};function d(e){var t=l[e];if(void 0!==t)return t.exports;var n=l[e]={id:e,loaded:!1,exports:{}},r=!0;try{a[e](n,n.exports,d),r=!1}finally{r&&delete l[e]}return n.loaded=!0,n.exports}d.m=a,e=[],d.O=function(t,n,r,o){if(n){o=o||0;for(var u=e.length;u>0&&e[u-1][2]>o;u--)e[u]=e[u-1];e[u]=[n,r,o];return}for(var i=1/0,u=0;u<e.length;u++){for(var n=e[u][0],r=e[u][1],o=e[u][2],c=!0,f=0;f<n.length;f++)i>=o&&Object.keys(d.O).every(function(e){return d.O[e](n[f])})?n.splice(f--,1):(c=!1,o<i&&(i=o));if(c){e.splice(u--,1);var a=r();void 0!==a&&(t=a)}}return t},d.n=function(e){var t=e&&e.__esModule?function(){return e.default}:function(){return e};return d.d(t,{a:t}),t},n=Object.getPrototypeOf?function(e){return Object.getPrototypeOf(e)}:function(e){return e.__proto__},d.t=function(e,r){if(1&r&&(e=this(e)),8&r||"object"==typeof e&&e&&(4&r&&e.__esModule||16&r&&"function"==typeof e.then))return e;var o=Object.create(null);d.r(o);var u={};t=t||[null,n({}),n([]),n(n)];for(var i=2&r&&e;"object"==typeof i&&!~t.indexOf(i);i=n(i))Object.getOwnPropertyNames(i).forEach(function(t){u[t]=function(){return e[t]}});return u.default=function(){return e},d.d(o,u),o},d.d=function(e,t){for(var n in t)d.o(t,n)&&!d.o(e,n)&&Object.defineProperty(e,n,{enumerable:!0,get:t[n]})},d.f={},d.e=function(e){return Promise.all(Object.keys(d.f).reduce(function(t,n){return d.f[n](e,t),t},[]))},d.u=function(e){},d.miniCssF=function(e){return"static/css/4ccaa87c9648acfb.css"},d.g=function(){if("object"==typeof globalThis)return globalThis;try{return this||Function("return this")()}catch(e){if("object"==typeof window)return window}}(),d.o=function(e,t){return Object.prototype.hasOwnProperty.call(e,t)},r={},o="_N_E:",d.l=function(e,t,n,u){if(r[e]){r[e].push(t);return}if(void 0!==n)for(var i,c,f=document.getElementsByTagName("script"),a=0;a<f.length;a++){var l=f[a];if(l.getAttribute("src")==e||l.getAttribute("data-webpack")==o+n){i=l;break}}i||(c=!0,(i=document.createElement("script")).charset="utf-8",i.timeout=120,d.nc&&i.setAttribute("nonce",d.nc),i.setAttribute("data-webpack",o+n),i.src=d.tu(e)),r[e]=[t];var s=function(t,n){i.onerror=i.onload=null,clearTimeout(p);var o=r[e];if(delete r[e],i.parentNode&&i.parentNode.removeChild(i),o&&o.forEach(function(e){return e(n)}),t)return t(n)},p=setTimeout(s.bind(null,void 0,{type:"timeout",target:i}),12e4);i.onerror=s.bind(null,i.onerror),i.onload=s.bind(null,i.onload),c&&document.head.appendChild(i)},d.r=function(e){"undefined"!=typeof Symbol&&Symbol.toStringTag&&Object.defineProperty(e,Symbol.toStringTag,{value:"Module"}),Object.defineProperty(e,"__esModule",{value:!0})},d.nmd=function(e){return e.paths=[],e.children||(e.children=[]),e},d.tt=function(){return void 0===u&&(u={createScriptURL:function(e){return e}},"undefined"!=typeof trustedTypes&&trustedTypes.createPolicy&&(u=trustedTypes.createPolicy("nextjs#bundler",u))),u},d.tu=function(e){return d.tt().createScriptURL(e)},d.p="/ui/_next/",i={272:0},d.f.j=function(e,t){var n=d.o(i,e)?i[e]:void 0;if(0!==n){if(n)t.push(n[2]);else if(272!=e){var r=new Promise(function(t,r){n=i[e]=[t,r]});t.push(n[2]=r);var o=d.p+d.u(e),u=Error();d.l(o,function(t){if(d.o(i,e)&&(0!==(n=i[e])&&(i[e]=void 0),n)){var r=t&&("load"===t.type?"missing":t.type),o=t&&t.target&&t.target.src;u.message="Loading chunk "+e+" failed.\n("+r+": "+o+")",u.name="ChunkLoadError",u.type=r,u.request=o,n[1](u)}},"chunk-"+e,e)}else i[e]=0}},d.O.j=function(e){return 0===i[e]},c=function(e,t){var n,r,o=t[0],u=t[1],c=t[2],f=0;if(o.some(function(e){return 0!==i[e]})){for(n in u)d.o(u,n)&&(d.m[n]=u[n]);if(c)var a=c(d)}for(e&&e(t);f<o.length;f++)r=o[f],d.o(i,r)&&i[r]&&i[r][0](),i[r]=0;return d.O(a)},(f=self.webpackChunk_N_E=self.webpackChunk_N_E||[]).forEach(c.bind(null,0)),f.push=c.bind(null,f.push.bind(f))}();

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

View file

@ -1 +1 @@
<!DOCTYPE html><html id="__next_error__"><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width, initial-scale=1"/><link rel="preload" as="script" fetchPriority="low" href="/ui/_next/static/chunks/webpack-ccae12a25017afa5.js" crossorigin=""/><script src="/ui/_next/static/chunks/fd9d1056-dafd44dfa2da140c.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/69-e49705773ae41779.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/main-app-9b4fb13a7db53edf.js" async="" crossorigin=""></script><title>LiteLLM Dashboard</title><meta name="description" content="LiteLLM Proxy Admin UI"/><link rel="icon" href="/ui/favicon.ico" type="image/x-icon" sizes="16x16"/><meta name="next-size-adjust"/><script src="/ui/_next/static/chunks/polyfills-c67a75d1b6f99dc8.js" crossorigin="" noModule=""></script></head><body><script src="/ui/_next/static/chunks/webpack-ccae12a25017afa5.js" crossorigin="" async=""></script><script>(self.__next_f=self.__next_f||[]).push([0]);self.__next_f.push([2,null])</script><script>self.__next_f.push([1,"1:HL[\"/ui/_next/static/media/c9a5bc6a7c948fb0-s.p.woff2\",\"font\",{\"crossOrigin\":\"\",\"type\":\"font/woff2\"}]\n2:HL[\"/ui/_next/static/css/5e699db73bf6f8c2.css\",\"style\",{\"crossOrigin\":\"\"}]\n0:\"$L3\"\n"])</script><script>self.__next_f.push([1,"4:I[47690,[],\"\"]\n6:I[77831,[],\"\"]\n7:I[27125,[\"447\",\"static/chunks/447-9f8d32190ff7d16d.js\",\"931\",\"static/chunks/app/page-508c39694bd40fe9.js\"],\"\"]\n8:I[5613,[],\"\"]\n9:I[31778,[],\"\"]\nb:I[48955,[],\"\"]\nc:[]\n"])</script><script>self.__next_f.push([1,"3:[[[\"$\",\"link\",\"0\",{\"rel\":\"stylesheet\",\"href\":\"/ui/_next/static/css/5e699db73bf6f8c2.css\",\"precedence\":\"next\",\"crossOrigin\":\"\"}]],[\"$\",\"$L4\",null,{\"buildId\":\"kbGdRQFfI6W3bEwfzmJDI\",\"assetPrefix\":\"/ui\",\"initialCanonicalUrl\":\"/\",\"initialTree\":[\"\",{\"children\":[\"__PAGE__\",{}]},\"$undefined\",\"$undefined\",true],\"initialSeedData\":[\"\",{\"children\":[\"__PAGE__\",{},[\"$L5\",[\"$\",\"$L6\",null,{\"propsForComponent\":{\"params\":{}},\"Component\":\"$7\",\"isStaticGeneration\":true}],null]]},[null,[\"$\",\"html\",null,{\"lang\":\"en\",\"children\":[\"$\",\"body\",null,{\"className\":\"__className_c23dc8\",\"children\":[\"$\",\"$L8\",null,{\"parallelRouterKey\":\"children\",\"segmentPath\":[\"children\"],\"loading\":\"$undefined\",\"loadingStyles\":\"$undefined\",\"loadingScripts\":\"$undefined\",\"hasLoading\":false,\"error\":\"$undefined\",\"errorStyles\":\"$undefined\",\"errorScripts\":\"$undefined\",\"template\":[\"$\",\"$L9\",null,{}],\"templateStyles\":\"$undefined\",\"templateScripts\":\"$undefined\",\"notFound\":[[\"$\",\"title\",null,{\"children\":\"404: This page could not be found.\"}],[\"$\",\"div\",null,{\"style\":{\"fontFamily\":\"system-ui,\\\"Segoe UI\\\",Roboto,Helvetica,Arial,sans-serif,\\\"Apple Color Emoji\\\",\\\"Segoe UI Emoji\\\"\",\"height\":\"100vh\",\"textAlign\":\"center\",\"display\":\"flex\",\"flexDirection\":\"column\",\"alignItems\":\"center\",\"justifyContent\":\"center\"},\"children\":[\"$\",\"div\",null,{\"children\":[[\"$\",\"style\",null,{\"dangerouslySetInnerHTML\":{\"__html\":\"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}\"}}],[\"$\",\"h1\",null,{\"className\":\"next-error-h1\",\"style\":{\"display\":\"inline-block\",\"margin\":\"0 20px 0 0\",\"padding\":\"0 23px 0 0\",\"fontSize\":24,\"fontWeight\":500,\"verticalAlign\":\"top\",\"lineHeight\":\"49px\"},\"children\":\"404\"}],[\"$\",\"div\",null,{\"style\":{\"display\":\"inline-block\"},\"children\":[\"$\",\"h2\",null,{\"style\":{\"fontSize\":14,\"fontWeight\":400,\"lineHeight\":\"49px\",\"margin\":0},\"children\":\"This page could not be found.\"}]}]]}]}]],\"notFoundStyles\":[],\"styles\":null}]}]}],null]],\"initialHead\":[false,\"$La\"],\"globalErrorComponent\":\"$b\",\"missingSlots\":\"$Wc\"}]]\n"])</script><script>self.__next_f.push([1,"a:[[\"$\",\"meta\",\"0\",{\"name\":\"viewport\",\"content\":\"width=device-width, initial-scale=1\"}],[\"$\",\"meta\",\"1\",{\"charSet\":\"utf-8\"}],[\"$\",\"title\",\"2\",{\"children\":\"LiteLLM Dashboard\"}],[\"$\",\"meta\",\"3\",{\"name\":\"description\",\"content\":\"LiteLLM Proxy Admin UI\"}],[\"$\",\"link\",\"4\",{\"rel\":\"icon\",\"href\":\"/ui/favicon.ico\",\"type\":\"image/x-icon\",\"sizes\":\"16x16\"}],[\"$\",\"meta\",\"5\",{\"name\":\"next-size-adjust\"}]]\n5:null\n"])</script><script>self.__next_f.push([1,""])</script></body></html> <!DOCTYPE html><html id="__next_error__"><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width, initial-scale=1"/><link rel="preload" as="script" fetchPriority="low" href="/ui/_next/static/chunks/webpack-4acf5608f06a35df.js" crossorigin=""/><script src="/ui/_next/static/chunks/fd9d1056-dafd44dfa2da140c.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/69-e49705773ae41779.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/main-app-9b4fb13a7db53edf.js" async="" crossorigin=""></script><title>LiteLLM Dashboard</title><meta name="description" content="LiteLLM Proxy Admin UI"/><link rel="icon" href="/ui/favicon.ico" type="image/x-icon" sizes="16x16"/><meta name="next-size-adjust"/><script src="/ui/_next/static/chunks/polyfills-c67a75d1b6f99dc8.js" crossorigin="" noModule=""></script></head><body><script src="/ui/_next/static/chunks/webpack-4acf5608f06a35df.js" crossorigin="" async=""></script><script>(self.__next_f=self.__next_f||[]).push([0]);self.__next_f.push([2,null])</script><script>self.__next_f.push([1,"1:HL[\"/ui/_next/static/media/c9a5bc6a7c948fb0-s.p.woff2\",\"font\",{\"crossOrigin\":\"\",\"type\":\"font/woff2\"}]\n2:HL[\"/ui/_next/static/css/4ccaa87c9648acfb.css\",\"style\",{\"crossOrigin\":\"\"}]\n0:\"$L3\"\n"])</script><script>self.__next_f.push([1,"4:I[47690,[],\"\"]\n6:I[77831,[],\"\"]\n7:I[46414,[\"761\",\"static/chunks/761-05f8a8451296476c.js\",\"931\",\"static/chunks/app/page-e710f07514d9286b.js\"],\"\"]\n8:I[5613,[],\"\"]\n9:I[31778,[],\"\"]\nb:I[48955,[],\"\"]\nc:[]\n"])</script><script>self.__next_f.push([1,"3:[[[\"$\",\"link\",\"0\",{\"rel\":\"stylesheet\",\"href\":\"/ui/_next/static/css/4ccaa87c9648acfb.css\",\"precedence\":\"next\",\"crossOrigin\":\"\"}]],[\"$\",\"$L4\",null,{\"buildId\":\"7aR2yOE4Bz0za1EnxRCsv\",\"assetPrefix\":\"/ui\",\"initialCanonicalUrl\":\"/\",\"initialTree\":[\"\",{\"children\":[\"__PAGE__\",{}]},\"$undefined\",\"$undefined\",true],\"initialSeedData\":[\"\",{\"children\":[\"__PAGE__\",{},[\"$L5\",[\"$\",\"$L6\",null,{\"propsForComponent\":{\"params\":{}},\"Component\":\"$7\",\"isStaticGeneration\":true}],null]]},[null,[\"$\",\"html\",null,{\"lang\":\"en\",\"children\":[\"$\",\"body\",null,{\"className\":\"__className_c23dc8\",\"children\":[\"$\",\"$L8\",null,{\"parallelRouterKey\":\"children\",\"segmentPath\":[\"children\"],\"loading\":\"$undefined\",\"loadingStyles\":\"$undefined\",\"loadingScripts\":\"$undefined\",\"hasLoading\":false,\"error\":\"$undefined\",\"errorStyles\":\"$undefined\",\"errorScripts\":\"$undefined\",\"template\":[\"$\",\"$L9\",null,{}],\"templateStyles\":\"$undefined\",\"templateScripts\":\"$undefined\",\"notFound\":[[\"$\",\"title\",null,{\"children\":\"404: This page could not be found.\"}],[\"$\",\"div\",null,{\"style\":{\"fontFamily\":\"system-ui,\\\"Segoe UI\\\",Roboto,Helvetica,Arial,sans-serif,\\\"Apple Color Emoji\\\",\\\"Segoe UI Emoji\\\"\",\"height\":\"100vh\",\"textAlign\":\"center\",\"display\":\"flex\",\"flexDirection\":\"column\",\"alignItems\":\"center\",\"justifyContent\":\"center\"},\"children\":[\"$\",\"div\",null,{\"children\":[[\"$\",\"style\",null,{\"dangerouslySetInnerHTML\":{\"__html\":\"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}\"}}],[\"$\",\"h1\",null,{\"className\":\"next-error-h1\",\"style\":{\"display\":\"inline-block\",\"margin\":\"0 20px 0 0\",\"padding\":\"0 23px 0 0\",\"fontSize\":24,\"fontWeight\":500,\"verticalAlign\":\"top\",\"lineHeight\":\"49px\"},\"children\":\"404\"}],[\"$\",\"div\",null,{\"style\":{\"display\":\"inline-block\"},\"children\":[\"$\",\"h2\",null,{\"style\":{\"fontSize\":14,\"fontWeight\":400,\"lineHeight\":\"49px\",\"margin\":0},\"children\":\"This page could not be found.\"}]}]]}]}]],\"notFoundStyles\":[],\"styles\":null}]}]}],null]],\"initialHead\":[false,\"$La\"],\"globalErrorComponent\":\"$b\",\"missingSlots\":\"$Wc\"}]]\n"])</script><script>self.__next_f.push([1,"a:[[\"$\",\"meta\",\"0\",{\"name\":\"viewport\",\"content\":\"width=device-width, initial-scale=1\"}],[\"$\",\"meta\",\"1\",{\"charSet\":\"utf-8\"}],[\"$\",\"title\",\"2\",{\"children\":\"LiteLLM Dashboard\"}],[\"$\",\"meta\",\"3\",{\"name\":\"description\",\"content\":\"LiteLLM Proxy Admin UI\"}],[\"$\",\"link\",\"4\",{\"rel\":\"icon\",\"href\":\"/ui/favicon.ico\",\"type\":\"image/x-icon\",\"sizes\":\"16x16\"}],[\"$\",\"meta\",\"5\",{\"name\":\"next-size-adjust\"}]]\n5:null\n"])</script><script>self.__next_f.push([1,""])</script></body></html>

View file

@ -1,7 +1,7 @@
2:I[77831,[],""] 2:I[77831,[],""]
3:I[27125,["447","static/chunks/447-9f8d32190ff7d16d.js","931","static/chunks/app/page-508c39694bd40fe9.js"],""] 3:I[46414,["761","static/chunks/761-05f8a8451296476c.js","931","static/chunks/app/page-e710f07514d9286b.js"],""]
4:I[5613,[],""] 4:I[5613,[],""]
5:I[31778,[],""] 5:I[31778,[],""]
0:["kbGdRQFfI6W3bEwfzmJDI",[[["",{"children":["__PAGE__",{}]},"$undefined","$undefined",true],["",{"children":["__PAGE__",{},["$L1",["$","$L2",null,{"propsForComponent":{"params":{}},"Component":"$3","isStaticGeneration":true}],null]]},[null,["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_c23dc8","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[],"styles":null}]}]}],null]],[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/5e699db73bf6f8c2.css","precedence":"next","crossOrigin":""}]],"$L6"]]]] 0:["7aR2yOE4Bz0za1EnxRCsv",[[["",{"children":["__PAGE__",{}]},"$undefined","$undefined",true],["",{"children":["__PAGE__",{},["$L1",["$","$L2",null,{"propsForComponent":{"params":{}},"Component":"$3","isStaticGeneration":true}],null]]},[null,["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_c23dc8","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[],"styles":null}]}]}],null]],[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/4ccaa87c9648acfb.css","precedence":"next","crossOrigin":""}]],"$L6"]]]]
6:[["$","meta","0",{"name":"viewport","content":"width=device-width, initial-scale=1"}],["$","meta","1",{"charSet":"utf-8"}],["$","title","2",{"children":"LiteLLM Dashboard"}],["$","meta","3",{"name":"description","content":"LiteLLM Proxy Admin UI"}],["$","link","4",{"rel":"icon","href":"/ui/favicon.ico","type":"image/x-icon","sizes":"16x16"}],["$","meta","5",{"name":"next-size-adjust"}]] 6:[["$","meta","0",{"name":"viewport","content":"width=device-width, initial-scale=1"}],["$","meta","1",{"charSet":"utf-8"}],["$","title","2",{"children":"LiteLLM Dashboard"}],["$","meta","3",{"name":"description","content":"LiteLLM Proxy Admin UI"}],["$","link","4",{"rel":"icon","href":"/ui/favicon.ico","type":"image/x-icon","sizes":"16x16"}],["$","meta","5",{"name":"next-size-adjust"}]]
1:null 1:null

View file

@ -916,6 +916,7 @@ class LiteLLM_ErrorLogs(LiteLLMBase):
request_id: Optional[str] = str(uuid.uuid4()) request_id: Optional[str] = str(uuid.uuid4())
api_base: Optional[str] = "" api_base: Optional[str] = ""
model_group: Optional[str] = "" model_group: Optional[str] = ""
litellm_model_name: Optional[str] = ""
model_id: Optional[str] = "" model_id: Optional[str] = ""
request_kwargs: Optional[dict] = {} request_kwargs: Optional[dict] = {}
exception_type: Optional[str] = "" exception_type: Optional[str] = ""

View file

@ -1258,6 +1258,7 @@ async def _PROXY_failure_handler(
request_id=str(uuid.uuid4()), request_id=str(uuid.uuid4()),
model_group=_model_group, model_group=_model_group,
model_id=_model_id, model_id=_model_id,
litellm_model_name=kwargs.get("model"),
request_kwargs=_optional_params, request_kwargs=_optional_params,
api_base=api_base, api_base=api_base,
exception_type=_exception_type, exception_type=_exception_type,
@ -7523,9 +7524,9 @@ async def model_info_v2(
) )
async def model_metrics( async def model_metrics(
user_api_key_dict: UserAPIKeyAuth = Depends(user_api_key_auth), user_api_key_dict: UserAPIKeyAuth = Depends(user_api_key_auth),
_selected_model_group: Optional[str] = None, _selected_model_group: Optional[str] = "gpt-4-32k",
startTime: Optional[datetime] = datetime.now() - timedelta(days=30), startTime: Optional[datetime] = None,
endTime: Optional[datetime] = datetime.now(), endTime: Optional[datetime] = None,
): ):
global prisma_client, llm_router global prisma_client, llm_router
if prisma_client is None: if prisma_client is None:
@ -7535,65 +7536,153 @@ async def model_metrics(
param="None", param="None",
code=status.HTTP_500_INTERNAL_SERVER_ERROR, code=status.HTTP_500_INTERNAL_SERVER_ERROR,
) )
if _selected_model_group and llm_router is not None: startTime = startTime or datetime.now() - timedelta(days=30)
_model_list = llm_router.get_model_list() endTime = endTime or datetime.now()
_relevant_api_bases = []
for model in _model_list: sql_query = """
if model["model_name"] == _selected_model_group: SELECT
_litellm_params = model["litellm_params"] api_base,
_api_base = _litellm_params.get("api_base", "") model,
_relevant_api_bases.append(_api_base) DATE_TRUNC('day', "startTime")::DATE AS day,
_relevant_api_bases.append(_api_base + "/openai/") AVG(EXTRACT(epoch FROM ("endTime" - "startTime"))) / SUM(total_tokens) AS avg_latency_per_token
FROM
"LiteLLM_SpendLogs"
WHERE
"startTime" >= NOW() - INTERVAL '30 days'
AND "model" = $1
GROUP BY
api_base,
model,
day
HAVING
SUM(total_tokens) > 0
ORDER BY
avg_latency_per_token DESC;
"""
_all_api_bases = set()
db_response = await prisma_client.db.query_raw(
sql_query, _selected_model_group, startTime, endTime
)
_daily_entries: dict = {} # {"Jun 23": {"model1": 0.002, "model2": 0.003}}
if db_response is not None:
for model_data in db_response:
_api_base = model_data["api_base"]
_model = model_data["model"]
_day = model_data["day"]
_avg_latency_per_token = model_data["avg_latency_per_token"]
if _day not in _daily_entries:
_daily_entries[_day] = {}
_combined_model_name = str(_model)
if "https://" in _api_base:
_combined_model_name = str(_api_base)
if "/openai/" in _combined_model_name:
_combined_model_name = _combined_model_name.split("/openai/")[0]
_all_api_bases.add(_combined_model_name)
_daily_entries[_day][_combined_model_name] = _avg_latency_per_token
sql_query = """
SELECT
CASE WHEN api_base = '' THEN model ELSE CONCAT(model, '-', api_base) END AS combined_model_api_base,
COUNT(*) AS num_requests,
AVG(EXTRACT(epoch FROM ("endTime" - "startTime"))) AS avg_latency_seconds
FROM "LiteLLM_SpendLogs"
WHERE "startTime" >= $1::timestamp AND "endTime" <= $2::timestamp
AND api_base = ANY($3)
GROUP BY CASE WHEN api_base = '' THEN model ELSE CONCAT(model, '-', api_base) END
ORDER BY num_requests DESC
LIMIT 50;
""" """
each entry needs to be like this:
{
date: 'Jun 23',
'gpt-4-https://api.openai.com/v1/': 0.002,
'gpt-43-https://api.openai.com-12/v1/': 0.002,
}
"""
# convert daily entries to list of dicts
db_response = await prisma_client.db.query_raw( response: List[dict] = []
sql_query, startTime, endTime, _relevant_api_bases
# sort daily entries by date
_daily_entries = dict(sorted(_daily_entries.items(), key=lambda item: item[0]))
for day in _daily_entries:
entry = {"date": str(day)}
for model_key, latency in _daily_entries[day].items():
entry[model_key] = round(latency, 8)
response.append(entry)
return {
"data": response,
"all_api_bases": list(_all_api_bases),
}
@router.get(
"/model/metrics/exceptions",
description="View number of failed requests per model on config.yaml",
tags=["model management"],
include_in_schema=False,
dependencies=[Depends(user_api_key_auth)],
)
async def model_metrics_exceptions(
user_api_key_dict: UserAPIKeyAuth = Depends(user_api_key_auth),
_selected_model_group: Optional[str] = None,
startTime: Optional[datetime] = None,
endTime: Optional[datetime] = None,
):
global prisma_client, llm_router
if prisma_client is None:
raise ProxyException(
message="Prisma Client is not initialized",
type="internal_error",
param="None",
code=status.HTTP_500_INTERNAL_SERVER_ERROR,
) )
else:
sql_query = """ startTime = startTime or datetime.now() - timedelta(days=30)
SELECT endTime = endTime or datetime.now()
CASE WHEN api_base = '' THEN model ELSE CONCAT(model, '-', api_base) END AS combined_model_api_base,
COUNT(*) AS num_requests, """
AVG(EXTRACT(epoch FROM ("endTime" - "startTime"))) AS avg_latency_seconds """
FROM sql_query = """
"LiteLLM_SpendLogs" WITH cte AS (
SELECT
CASE WHEN api_base = '' THEN litellm_model_name ELSE CONCAT(litellm_model_name, '-', api_base) END AS combined_model_api_base,
exception_type,
COUNT(*) AS num_exceptions
FROM "LiteLLM_ErrorLogs"
WHERE "startTime" >= $1::timestamp AND "endTime" <= $2::timestamp WHERE "startTime" >= $1::timestamp AND "endTime" <= $2::timestamp
GROUP BY GROUP BY combined_model_api_base, exception_type
CASE WHEN api_base = '' THEN model ELSE CONCAT(model, '-', api_base) END )
ORDER BY SELECT
num_requests DESC combined_model_api_base,
LIMIT 50; COUNT(*) AS total_exceptions,
""" json_object_agg(exception_type, num_exceptions) AS exception_counts
FROM cte
db_response = await prisma_client.db.query_raw(sql_query, startTime, endTime) GROUP BY combined_model_api_base
ORDER BY total_exceptions DESC
LIMIT 200;
"""
db_response = await prisma_client.db.query_raw(sql_query, startTime, endTime)
response: List[dict] = [] response: List[dict] = []
if response is not None: exception_types = set()
"""
Return Data
{
"combined_model_api_base": "gpt-3.5-turbo-https://api.openai.com/v1/,
"total_exceptions": 5,
"BadRequestException": 5,
"TimeoutException": 2
}
"""
if db_response is not None:
# loop through all models # loop through all models
for model_data in db_response: for model_data in db_response:
model = model_data.get("combined_model_api_base", "") model = model_data.get("combined_model_api_base", "")
num_requests = model_data.get("num_requests", 0) total_exceptions = model_data.get("total_exceptions", 0)
avg_latency_seconds = model_data.get("avg_latency_seconds", 0) exception_counts = model_data.get("exception_counts", {})
response.append( curr_row = {
{ "model": model,
"model": model, "total_exceptions": total_exceptions,
"num_requests": num_requests, }
"avg_latency_seconds": avg_latency_seconds, curr_row.update(exception_counts)
} response.append(curr_row)
) for k, v in exception_counts.items():
return response exception_types.add(k)
return {"data": response, "exception_types": list(exception_types)}
@router.get( @router.get(

View file

@ -183,6 +183,21 @@ model LiteLLM_SpendLogs {
end_user String? end_user String?
} }
// View spend, model, api_key per request
model LiteLLM_ErrorLogs {
request_id String @id @default(uuid())
startTime DateTime // Assuming start_time is a DateTime field
endTime DateTime // Assuming end_time is a DateTime field
api_base String @default("")
model_group String @default("") // public model_name / model_group
litellm_model_name String @default("") // model passed to litellm
model_id String @default("") // ID of model in ProxyModelTable
request_kwargs Json @default("{}")
exception_type String @default("")
exception_string String @default("")
status_code String @default("")
}
// Beta - allow team members to request access to a model // Beta - allow team members to request access to a model
model LiteLLM_UserNotifications { model LiteLLM_UserNotifications {
request_id String @id request_id String @id

View file

@ -2049,6 +2049,11 @@ async def update_spend(
raise e raise e
### UPDATE KEY TABLE ### ### UPDATE KEY TABLE ###
verbose_proxy_logger.debug(
"KEY Spend transactions: {}".format(
len(prisma_client.key_list_transactons.keys())
)
)
if len(prisma_client.key_list_transactons.keys()) > 0: if len(prisma_client.key_list_transactons.keys()) > 0:
for i in range(n_retry_times + 1): for i in range(n_retry_times + 1):
start_time = time.time() start_time = time.time()

View file

@ -290,6 +290,21 @@ class Router:
} }
""" """
### ROUTING SETUP ### ### ROUTING SETUP ###
self.routing_strategy_init(
routing_strategy=routing_strategy,
routing_strategy_args=routing_strategy_args,
)
## COOLDOWNS ##
if isinstance(litellm.failure_callback, list):
litellm.failure_callback.append(self.deployment_callback_on_failure)
else:
litellm.failure_callback = [self.deployment_callback_on_failure]
print( # noqa
f"Intialized router with Routing strategy: {self.routing_strategy}\n\nRouting fallbacks: {self.fallbacks}\n\nRouting context window fallbacks: {self.context_window_fallbacks}\n\nRouter Redis Caching={self.cache.redis_cache}"
) # noqa
self.routing_strategy_args = routing_strategy_args
def routing_strategy_init(self, routing_strategy: str, routing_strategy_args: dict):
if routing_strategy == "least-busy": if routing_strategy == "least-busy":
self.leastbusy_logger = LeastBusyLoggingHandler( self.leastbusy_logger = LeastBusyLoggingHandler(
router_cache=self.cache, model_list=self.model_list router_cache=self.cache, model_list=self.model_list
@ -321,15 +336,6 @@ class Router:
) )
if isinstance(litellm.callbacks, list): if isinstance(litellm.callbacks, list):
litellm.callbacks.append(self.lowestlatency_logger) # type: ignore litellm.callbacks.append(self.lowestlatency_logger) # type: ignore
## COOLDOWNS ##
if isinstance(litellm.failure_callback, list):
litellm.failure_callback.append(self.deployment_callback_on_failure)
else:
litellm.failure_callback = [self.deployment_callback_on_failure]
print( # noqa
f"Intialized router with Routing strategy: {self.routing_strategy}\n\nRouting fallbacks: {self.fallbacks}\n\nRouting context window fallbacks: {self.context_window_fallbacks}\n\nRouter Redis Caching={self.cache.redis_cache}"
) # noqa
self.routing_strategy_args = routing_strategy_args
def print_deployment(self, deployment: dict): def print_deployment(self, deployment: dict):
""" """
@ -1450,40 +1456,47 @@ class Router:
raise original_exception raise original_exception
### RETRY ### RETRY
#### check if it should retry + back-off if required #### check if it should retry + back-off if required
if "No models available" in str( # if "No models available" in str(
e # e
) or RouterErrors.no_deployments_available.value in str(e): # ) or RouterErrors.no_deployments_available.value in str(e):
timeout = litellm._calculate_retry_after( # timeout = litellm._calculate_retry_after(
remaining_retries=num_retries, # remaining_retries=num_retries,
max_retries=num_retries, # max_retries=num_retries,
min_timeout=self.retry_after, # min_timeout=self.retry_after,
) # )
await asyncio.sleep(timeout) # await asyncio.sleep(timeout)
elif RouterErrors.user_defined_ratelimit_error.value in str(e): # elif RouterErrors.user_defined_ratelimit_error.value in str(e):
raise e # don't wait to retry if deployment hits user-defined rate-limit # raise e # don't wait to retry if deployment hits user-defined rate-limit
elif hasattr(original_exception, "status_code") and litellm._should_retry( # elif hasattr(original_exception, "status_code") and litellm._should_retry(
status_code=original_exception.status_code # status_code=original_exception.status_code
): # ):
if hasattr(original_exception, "response") and hasattr( # if hasattr(original_exception, "response") and hasattr(
original_exception.response, "headers" # original_exception.response, "headers"
): # ):
timeout = litellm._calculate_retry_after( # timeout = litellm._calculate_retry_after(
remaining_retries=num_retries, # remaining_retries=num_retries,
max_retries=num_retries, # max_retries=num_retries,
response_headers=original_exception.response.headers, # response_headers=original_exception.response.headers,
min_timeout=self.retry_after, # min_timeout=self.retry_after,
) # )
else: # else:
timeout = litellm._calculate_retry_after( # timeout = litellm._calculate_retry_after(
remaining_retries=num_retries, # remaining_retries=num_retries,
max_retries=num_retries, # max_retries=num_retries,
min_timeout=self.retry_after, # min_timeout=self.retry_after,
) # )
await asyncio.sleep(timeout) # await asyncio.sleep(timeout)
else: # else:
raise original_exception # raise original_exception
### RETRY
_timeout = self._router_should_retry(
e=original_exception,
remaining_retries=num_retries,
num_retries=num_retries,
)
await asyncio.sleep(_timeout)
## LOGGING ## LOGGING
if num_retries > 0: if num_retries > 0:
kwargs = self.log_retry(kwargs=kwargs, e=original_exception) kwargs = self.log_retry(kwargs=kwargs, e=original_exception)
@ -1505,34 +1518,12 @@ class Router:
## LOGGING ## LOGGING
kwargs = self.log_retry(kwargs=kwargs, e=e) kwargs = self.log_retry(kwargs=kwargs, e=e)
remaining_retries = num_retries - current_attempt remaining_retries = num_retries - current_attempt
if "No models available" in str(e): _timeout = self._router_should_retry(
timeout = litellm._calculate_retry_after( e=original_exception,
remaining_retries=remaining_retries, remaining_retries=remaining_retries,
max_retries=num_retries, num_retries=num_retries,
min_timeout=self.retry_after, )
) await asyncio.sleep(_timeout)
await asyncio.sleep(timeout)
elif (
hasattr(e, "status_code")
and hasattr(e, "response")
and litellm._should_retry(status_code=e.status_code)
):
if hasattr(e.response, "headers"):
timeout = litellm._calculate_retry_after(
remaining_retries=remaining_retries,
max_retries=num_retries,
response_headers=e.response.headers,
min_timeout=self.retry_after,
)
else:
timeout = litellm._calculate_retry_after(
remaining_retries=remaining_retries,
max_retries=num_retries,
min_timeout=self.retry_after,
)
await asyncio.sleep(timeout)
else:
raise e
raise original_exception raise original_exception
def function_with_fallbacks(self, *args, **kwargs): def function_with_fallbacks(self, *args, **kwargs):
@ -1625,7 +1616,7 @@ class Router:
def _router_should_retry( def _router_should_retry(
self, e: Exception, remaining_retries: int, num_retries: int self, e: Exception, remaining_retries: int, num_retries: int
): ) -> Union[int, float]:
""" """
Calculate back-off, then retry Calculate back-off, then retry
""" """
@ -1636,14 +1627,13 @@ class Router:
response_headers=e.response.headers, response_headers=e.response.headers,
min_timeout=self.retry_after, min_timeout=self.retry_after,
) )
time.sleep(timeout)
else: else:
timeout = litellm._calculate_retry_after( timeout = litellm._calculate_retry_after(
remaining_retries=remaining_retries, remaining_retries=remaining_retries,
max_retries=num_retries, max_retries=num_retries,
min_timeout=self.retry_after, min_timeout=self.retry_after,
) )
time.sleep(timeout) return timeout
def function_with_retries(self, *args, **kwargs): def function_with_retries(self, *args, **kwargs):
""" """
@ -1658,6 +1648,7 @@ class Router:
context_window_fallbacks = kwargs.pop( context_window_fallbacks = kwargs.pop(
"context_window_fallbacks", self.context_window_fallbacks "context_window_fallbacks", self.context_window_fallbacks
) )
try: try:
# if the function call is successful, no exception will be raised and we'll break out of the loop # if the function call is successful, no exception will be raised and we'll break out of the loop
response = original_function(*args, **kwargs) response = original_function(*args, **kwargs)
@ -1677,11 +1668,12 @@ class Router:
if num_retries > 0: if num_retries > 0:
kwargs = self.log_retry(kwargs=kwargs, e=original_exception) kwargs = self.log_retry(kwargs=kwargs, e=original_exception)
### RETRY ### RETRY
self._router_should_retry( _timeout = self._router_should_retry(
e=original_exception, e=original_exception,
remaining_retries=num_retries, remaining_retries=num_retries,
num_retries=num_retries, num_retries=num_retries,
) )
time.sleep(_timeout)
for current_attempt in range(num_retries): for current_attempt in range(num_retries):
verbose_router_logger.debug( verbose_router_logger.debug(
f"retrying request. Current attempt - {current_attempt}; retries left: {num_retries}" f"retrying request. Current attempt - {current_attempt}; retries left: {num_retries}"
@ -1695,11 +1687,12 @@ class Router:
## LOGGING ## LOGGING
kwargs = self.log_retry(kwargs=kwargs, e=e) kwargs = self.log_retry(kwargs=kwargs, e=e)
remaining_retries = num_retries - current_attempt remaining_retries = num_retries - current_attempt
self._router_should_retry( _timeout = self._router_should_retry(
e=e, e=e,
remaining_retries=remaining_retries, remaining_retries=remaining_retries,
num_retries=num_retries, num_retries=num_retries,
) )
time.sleep(_timeout)
raise original_exception raise original_exception
### HELPER FUNCTIONS ### HELPER FUNCTIONS
@ -1733,10 +1726,11 @@ class Router:
) # i.e. azure ) # i.e. azure
metadata = kwargs.get("litellm_params", {}).get("metadata", None) metadata = kwargs.get("litellm_params", {}).get("metadata", None)
_model_info = kwargs.get("litellm_params", {}).get("model_info", {}) _model_info = kwargs.get("litellm_params", {}).get("model_info", {})
if isinstance(_model_info, dict): if isinstance(_model_info, dict):
deployment_id = _model_info.get("id", None) deployment_id = _model_info.get("id", None)
self._set_cooldown_deployments( self._set_cooldown_deployments(
deployment_id exception_status=exception_status, deployment=deployment_id
) # setting deployment_id in cooldown deployments ) # setting deployment_id in cooldown deployments
if custom_llm_provider: if custom_llm_provider:
model_name = f"{custom_llm_provider}/{model_name}" model_name = f"{custom_llm_provider}/{model_name}"
@ -1796,9 +1790,15 @@ class Router:
key=rpm_key, value=request_count, local_only=True key=rpm_key, value=request_count, local_only=True
) # don't change existing ttl ) # don't change existing ttl
def _set_cooldown_deployments(self, deployment: Optional[str] = None): def _set_cooldown_deployments(
self, exception_status: Union[str, int], deployment: Optional[str] = None
):
""" """
Add a model to the list of models being cooled down for that minute, if it exceeds the allowed fails / minute Add a model to the list of models being cooled down for that minute, if it exceeds the allowed fails / minute
or
the exception is not one that should be immediately retried (e.g. 401)
""" """
if deployment is None: if deployment is None:
return return
@ -1815,7 +1815,20 @@ class Router:
f"Attempting to add {deployment} to cooldown list. updated_fails: {updated_fails}; self.allowed_fails: {self.allowed_fails}" f"Attempting to add {deployment} to cooldown list. updated_fails: {updated_fails}; self.allowed_fails: {self.allowed_fails}"
) )
cooldown_time = self.cooldown_time or 1 cooldown_time = self.cooldown_time or 1
if updated_fails > self.allowed_fails:
if isinstance(exception_status, str):
try:
exception_status = int(exception_status)
except Exception as e:
verbose_router_logger.debug(
"Unable to cast exception status to int {}. Defaulting to status=500.".format(
exception_status
)
)
exception_status = 500
_should_retry = litellm._should_retry(status_code=exception_status)
if updated_fails > self.allowed_fails or _should_retry == False:
# get the current cooldown list for that minute # get the current cooldown list for that minute
cooldown_key = f"{current_minute}:cooldown_models" # group cooldown models by minute to reduce number of redis calls cooldown_key = f"{current_minute}:cooldown_models" # group cooldown models by minute to reduce number of redis calls
cached_value = self.cache.get_cache(key=cooldown_key) cached_value = self.cache.get_cache(key=cooldown_key)
@ -2652,6 +2665,13 @@ class Router:
_casted_value = int(kwargs[var]) _casted_value = int(kwargs[var])
setattr(self, var, _casted_value) setattr(self, var, _casted_value)
else: else:
if var == "routing_strategy":
self.routing_strategy_init(
routing_strategy=kwargs[var],
routing_strategy_args=kwargs.get(
"routing_strategy_args", {}
),
)
setattr(self, var, kwargs[var]) setattr(self, var, kwargs[var])
else: else:
verbose_router_logger.debug("Setting {} is not allowed".format(var)) verbose_router_logger.debug("Setting {} is not allowed".format(var))

View file

@ -19,6 +19,7 @@ def setup_and_teardown():
0, os.path.abspath("../..") 0, os.path.abspath("../..")
) # Adds the project directory to the system path ) # Adds the project directory to the system path
import litellm import litellm
from litellm import Router
importlib.reload(litellm) importlib.reload(litellm)
import asyncio import asyncio

View file

@ -348,6 +348,220 @@ def test_langfuse_logging_function_calling():
# test_langfuse_logging_function_calling() # test_langfuse_logging_function_calling()
def test_langfuse_existing_trace_id():
"""
When existing trace id is passed, don't set trace params -> prevents overwriting the trace
Pass 1 logging object with a trace
Pass 2nd logging object with the trace id
Assert no changes to the trace
"""
# Test - if the logs were sent to the correct team on langfuse
import litellm, datetime
from litellm.integrations.langfuse import LangFuseLogger
langfuse_Logger = LangFuseLogger(
langfuse_public_key=os.getenv("LANGFUSE_PROJECT2_PUBLIC"),
langfuse_secret=os.getenv("LANGFUSE_PROJECT2_SECRET"),
)
litellm.success_callback = ["langfuse"]
# langfuse_args = {'kwargs': { 'start_time': 'end_time': datetime.datetime(2024, 5, 1, 7, 31, 29, 903685), 'user_id': None, 'print_verbose': <function print_verbose at 0x109d1f420>, 'level': 'DEFAULT', 'status_message': None}
response_obj = litellm.ModelResponse(
id="chatcmpl-9K5HUAbVRqFrMZKXL0WoC295xhguY",
choices=[
litellm.Choices(
finish_reason="stop",
index=0,
message=litellm.Message(
content="I'm sorry, I am an AI assistant and do not have real-time information. I recommend checking a reliable weather website or app for the most up-to-date weather information in Boston.",
role="assistant",
),
)
],
created=1714573888,
model="gpt-3.5-turbo-0125",
object="chat.completion",
system_fingerprint="fp_3b956da36b",
usage=litellm.Usage(completion_tokens=37, prompt_tokens=14, total_tokens=51),
)
### NEW TRACE ###
message = [{"role": "user", "content": "what's the weather in boston"}]
langfuse_args = {
"response_obj": response_obj,
"kwargs": {
"model": "gpt-3.5-turbo",
"litellm_params": {
"acompletion": False,
"api_key": None,
"force_timeout": 600,
"logger_fn": None,
"verbose": False,
"custom_llm_provider": "openai",
"api_base": "https://api.openai.com/v1/",
"litellm_call_id": "508113a1-c6f1-48ce-a3e1-01c6cce9330e",
"model_alias_map": {},
"completion_call_id": None,
"metadata": None,
"model_info": None,
"proxy_server_request": None,
"preset_cache_key": None,
"no-log": False,
"stream_response": {},
},
"messages": message,
"optional_params": {"temperature": 0.1, "extra_body": {}},
"start_time": "2024-05-01 07:31:27.986164",
"stream": False,
"user": None,
"call_type": "completion",
"litellm_call_id": "508113a1-c6f1-48ce-a3e1-01c6cce9330e",
"completion_start_time": "2024-05-01 07:31:29.903685",
"temperature": 0.1,
"extra_body": {},
"input": [{"role": "user", "content": "what's the weather in boston"}],
"api_key": "my-api-key",
"additional_args": {
"complete_input_dict": {
"model": "gpt-3.5-turbo",
"messages": [
{"role": "user", "content": "what's the weather in boston"}
],
"temperature": 0.1,
"extra_body": {},
}
},
"log_event_type": "successful_api_call",
"end_time": "2024-05-01 07:31:29.903685",
"cache_hit": None,
"response_cost": 6.25e-05,
},
"start_time": datetime.datetime(2024, 5, 1, 7, 31, 27, 986164),
"end_time": datetime.datetime(2024, 5, 1, 7, 31, 29, 903685),
"user_id": None,
"print_verbose": litellm.print_verbose,
"level": "DEFAULT",
"status_message": None,
}
langfuse_response_object = langfuse_Logger.log_event(**langfuse_args)
import langfuse
langfuse_client = langfuse.Langfuse(
public_key=os.getenv("LANGFUSE_PROJECT2_PUBLIC"),
secret_key=os.getenv("LANGFUSE_PROJECT2_SECRET"),
)
trace_id = langfuse_response_object["trace_id"]
langfuse_client.flush()
time.sleep(2)
print(langfuse_client.get_trace(id=trace_id))
initial_langfuse_trace = langfuse_client.get_trace(id=trace_id)
### EXISTING TRACE ###
new_metadata = {"existing_trace_id": trace_id}
new_messages = [{"role": "user", "content": "What do you know?"}]
new_response_obj = litellm.ModelResponse(
id="chatcmpl-9K5HUAbVRqFrMZKXL0WoC295xhguY",
choices=[
litellm.Choices(
finish_reason="stop",
index=0,
message=litellm.Message(
content="What do I know?",
role="assistant",
),
)
],
created=1714573888,
model="gpt-3.5-turbo-0125",
object="chat.completion",
system_fingerprint="fp_3b956da36b",
usage=litellm.Usage(completion_tokens=37, prompt_tokens=14, total_tokens=51),
)
langfuse_args = {
"response_obj": new_response_obj,
"kwargs": {
"model": "gpt-3.5-turbo",
"litellm_params": {
"acompletion": False,
"api_key": None,
"force_timeout": 600,
"logger_fn": None,
"verbose": False,
"custom_llm_provider": "openai",
"api_base": "https://api.openai.com/v1/",
"litellm_call_id": "508113a1-c6f1-48ce-a3e1-01c6cce9330e",
"model_alias_map": {},
"completion_call_id": None,
"metadata": new_metadata,
"model_info": None,
"proxy_server_request": None,
"preset_cache_key": None,
"no-log": False,
"stream_response": {},
},
"messages": new_messages,
"optional_params": {"temperature": 0.1, "extra_body": {}},
"start_time": "2024-05-01 07:31:27.986164",
"stream": False,
"user": None,
"call_type": "completion",
"litellm_call_id": "508113a1-c6f1-48ce-a3e1-01c6cce9330e",
"completion_start_time": "2024-05-01 07:31:29.903685",
"temperature": 0.1,
"extra_body": {},
"input": [{"role": "user", "content": "what's the weather in boston"}],
"api_key": "my-api-key",
"additional_args": {
"complete_input_dict": {
"model": "gpt-3.5-turbo",
"messages": [
{"role": "user", "content": "what's the weather in boston"}
],
"temperature": 0.1,
"extra_body": {},
}
},
"log_event_type": "successful_api_call",
"end_time": "2024-05-01 07:31:29.903685",
"cache_hit": None,
"response_cost": 6.25e-05,
},
"start_time": datetime.datetime(2024, 5, 1, 7, 31, 27, 986164),
"end_time": datetime.datetime(2024, 5, 1, 7, 31, 29, 903685),
"user_id": None,
"print_verbose": litellm.print_verbose,
"level": "DEFAULT",
"status_message": None,
}
langfuse_response_object = langfuse_Logger.log_event(**langfuse_args)
new_trace_id = langfuse_response_object["trace_id"]
assert new_trace_id == trace_id
langfuse_client.flush()
time.sleep(2)
print(langfuse_client.get_trace(id=trace_id))
new_langfuse_trace = langfuse_client.get_trace(id=trace_id)
assert dict(initial_langfuse_trace) == dict(new_langfuse_trace)
def test_langfuse_logging_tool_calling(): def test_langfuse_logging_tool_calling():
litellm.set_verbose = True litellm.set_verbose = True

View file

@ -104,6 +104,42 @@ def test_router_timeout_init(timeout, ssl_verify):
) )
@pytest.mark.parametrize("sync_mode", [False, True])
@pytest.mark.asyncio
async def test_router_retries(sync_mode):
"""
- make sure retries work as expected
"""
model_list = [
{
"model_name": "gpt-3.5-turbo",
"litellm_params": {"model": "gpt-3.5-turbo", "api_key": "bad-key"},
},
{
"model_name": "gpt-3.5-turbo",
"litellm_params": {
"model": "azure/chatgpt-v-2",
"api_key": os.getenv("AZURE_API_KEY"),
"api_base": os.getenv("AZURE_API_BASE"),
"api_version": os.getenv("AZURE_API_VERSION"),
},
},
]
router = Router(model_list=model_list, num_retries=2)
if sync_mode:
router.completion(
model="gpt-3.5-turbo",
messages=[{"role": "user", "content": "Hey, how's it going?"}],
)
else:
await router.acompletion(
model="gpt-3.5-turbo",
messages=[{"role": "user", "content": "Hey, how's it going?"}],
)
@pytest.mark.parametrize( @pytest.mark.parametrize(
"mistral_api_base", "mistral_api_base",
[ [
@ -1118,6 +1154,7 @@ def test_consistent_model_id():
assert id1 == id2 assert id1 == id2
@pytest.mark.skip(reason="local test")
def test_reading_keys_os_environ(): def test_reading_keys_os_environ():
import openai import openai
@ -1217,6 +1254,7 @@ def test_reading_keys_os_environ():
# test_reading_keys_os_environ() # test_reading_keys_os_environ()
@pytest.mark.skip(reason="local test")
def test_reading_openai_keys_os_environ(): def test_reading_openai_keys_os_environ():
import openai import openai

View file

@ -46,6 +46,7 @@ def test_async_fallbacks(caplog):
router = Router( router = Router(
model_list=model_list, model_list=model_list,
fallbacks=[{"gpt-3.5-turbo": ["azure/gpt-3.5-turbo"]}], fallbacks=[{"gpt-3.5-turbo": ["azure/gpt-3.5-turbo"]}],
num_retries=1,
) )
user_message = "Hello, how are you?" user_message = "Hello, how are you?"
@ -82,6 +83,7 @@ def test_async_fallbacks(caplog):
# - error request, falling back notice, success notice # - error request, falling back notice, success notice
expected_logs = [ expected_logs = [
"litellm.acompletion(model=gpt-3.5-turbo)\x1b[31m Exception OpenAIException - Error code: 401 - {'error': {'message': 'Incorrect API key provided: bad-key. You can find your API key at https://platform.openai.com/account/api-keys.', 'type': 'invalid_request_error', 'param': None, 'code': 'invalid_api_key'}}\x1b[0m", "litellm.acompletion(model=gpt-3.5-turbo)\x1b[31m Exception OpenAIException - Error code: 401 - {'error': {'message': 'Incorrect API key provided: bad-key. You can find your API key at https://platform.openai.com/account/api-keys.', 'type': 'invalid_request_error', 'param': None, 'code': 'invalid_api_key'}}\x1b[0m",
"litellm.acompletion(model=None)\x1b[31m Exception No deployments available for selected model, passed model=gpt-3.5-turbo\x1b[0m",
"Falling back to model_group = azure/gpt-3.5-turbo", "Falling back to model_group = azure/gpt-3.5-turbo",
"litellm.acompletion(model=azure/chatgpt-v-2)\x1b[32m 200 OK\x1b[0m", "litellm.acompletion(model=azure/chatgpt-v-2)\x1b[32m 200 OK\x1b[0m",
] ]

View file

@ -22,10 +22,10 @@ class MyCustomHandler(CustomLogger):
def log_pre_api_call(self, model, messages, kwargs): def log_pre_api_call(self, model, messages, kwargs):
print(f"Pre-API Call") print(f"Pre-API Call")
print( print(
f"previous_models: {kwargs['litellm_params']['metadata']['previous_models']}" f"previous_models: {kwargs['litellm_params']['metadata'].get('previous_models', None)}"
) )
self.previous_models += len( self.previous_models = len(
kwargs["litellm_params"]["metadata"]["previous_models"] kwargs["litellm_params"]["metadata"].get("previous_models", [])
) # {"previous_models": [{"model": litellm_model_name, "exception_type": AuthenticationError, "exception_string": <complete_traceback>}]} ) # {"previous_models": [{"model": litellm_model_name, "exception_type": AuthenticationError, "exception_string": <complete_traceback>}]}
print(f"self.previous_models: {self.previous_models}") print(f"self.previous_models: {self.previous_models}")
@ -127,7 +127,7 @@ def test_sync_fallbacks():
response = router.completion(**kwargs) response = router.completion(**kwargs)
print(f"response: {response}") print(f"response: {response}")
time.sleep(0.05) # allow a delay as success_callbacks are on a separate thread time.sleep(0.05) # allow a delay as success_callbacks are on a separate thread
assert customHandler.previous_models == 1 # 0 retries, 1 fallback assert customHandler.previous_models == 4
print("Passed ! Test router_fallbacks: test_sync_fallbacks()") print("Passed ! Test router_fallbacks: test_sync_fallbacks()")
router.reset() router.reset()
@ -140,7 +140,7 @@ def test_sync_fallbacks():
@pytest.mark.asyncio @pytest.mark.asyncio
async def test_async_fallbacks(): async def test_async_fallbacks():
litellm.set_verbose = False litellm.set_verbose = True
model_list = [ model_list = [
{ # list of model deployments { # list of model deployments
"model_name": "azure/gpt-3.5-turbo", # openai model name "model_name": "azure/gpt-3.5-turbo", # openai model name
@ -209,12 +209,13 @@ async def test_async_fallbacks():
user_message = "Hello, how are you?" user_message = "Hello, how are you?"
messages = [{"content": user_message, "role": "user"}] messages = [{"content": user_message, "role": "user"}]
try: try:
kwargs["model"] = "azure/gpt-3.5-turbo"
response = await router.acompletion(**kwargs) response = await router.acompletion(**kwargs)
print(f"customHandler.previous_models: {customHandler.previous_models}") print(f"customHandler.previous_models: {customHandler.previous_models}")
await asyncio.sleep( await asyncio.sleep(
0.05 0.05
) # allow a delay as success_callbacks are on a separate thread ) # allow a delay as success_callbacks are on a separate thread
assert customHandler.previous_models == 1 # 0 retries, 1 fallback assert customHandler.previous_models == 4 # 1 init call, 2 retries, 1 fallback
router.reset() router.reset()
except litellm.Timeout as e: except litellm.Timeout as e:
pass pass
@ -258,7 +259,6 @@ def test_sync_fallbacks_embeddings():
model_list=model_list, model_list=model_list,
fallbacks=[{"bad-azure-embedding-model": ["good-azure-embedding-model"]}], fallbacks=[{"bad-azure-embedding-model": ["good-azure-embedding-model"]}],
set_verbose=False, set_verbose=False,
num_retries=0,
) )
customHandler = MyCustomHandler() customHandler = MyCustomHandler()
litellm.callbacks = [customHandler] litellm.callbacks = [customHandler]
@ -269,7 +269,7 @@ def test_sync_fallbacks_embeddings():
response = router.embedding(**kwargs) response = router.embedding(**kwargs)
print(f"customHandler.previous_models: {customHandler.previous_models}") print(f"customHandler.previous_models: {customHandler.previous_models}")
time.sleep(0.05) # allow a delay as success_callbacks are on a separate thread time.sleep(0.05) # allow a delay as success_callbacks are on a separate thread
assert customHandler.previous_models == 1 # 0 retries, 1 fallback assert customHandler.previous_models == 4 # 1 init call, 2 retries, 1 fallback
router.reset() router.reset()
except litellm.Timeout as e: except litellm.Timeout as e:
pass pass
@ -323,7 +323,7 @@ async def test_async_fallbacks_embeddings():
await asyncio.sleep( await asyncio.sleep(
0.05 0.05
) # allow a delay as success_callbacks are on a separate thread ) # allow a delay as success_callbacks are on a separate thread
assert customHandler.previous_models == 1 # 0 retries, 1 fallback assert customHandler.previous_models == 4 # 1 init call, 2 retries, 1 fallback
router.reset() router.reset()
except litellm.Timeout as e: except litellm.Timeout as e:
pass pass
@ -394,7 +394,7 @@ def test_dynamic_fallbacks_sync():
}, },
] ]
router = Router(model_list=model_list, set_verbose=True, num_retries=0) router = Router(model_list=model_list, set_verbose=True)
kwargs = {} kwargs = {}
kwargs["model"] = "azure/gpt-3.5-turbo" kwargs["model"] = "azure/gpt-3.5-turbo"
kwargs["messages"] = [{"role": "user", "content": "Hey, how's it going?"}] kwargs["messages"] = [{"role": "user", "content": "Hey, how's it going?"}]
@ -402,7 +402,7 @@ def test_dynamic_fallbacks_sync():
response = router.completion(**kwargs) response = router.completion(**kwargs)
print(f"response: {response}") print(f"response: {response}")
time.sleep(0.05) # allow a delay as success_callbacks are on a separate thread time.sleep(0.05) # allow a delay as success_callbacks are on a separate thread
assert customHandler.previous_models == 1 # 0 retries, 1 fallback assert customHandler.previous_models == 4 # 1 init call, 2 retries, 1 fallback
router.reset() router.reset()
except Exception as e: except Exception as e:
pytest.fail(f"An exception occurred - {e}") pytest.fail(f"An exception occurred - {e}")
@ -488,7 +488,7 @@ async def test_dynamic_fallbacks_async():
await asyncio.sleep( await asyncio.sleep(
0.05 0.05
) # allow a delay as success_callbacks are on a separate thread ) # allow a delay as success_callbacks are on a separate thread
assert customHandler.previous_models == 1 # 0 retries, 1 fallback assert customHandler.previous_models == 4 # 1 init call, 2 retries, 1 fallback
router.reset() router.reset()
except Exception as e: except Exception as e:
pytest.fail(f"An exception occurred - {e}") pytest.fail(f"An exception occurred - {e}")
@ -573,7 +573,7 @@ async def test_async_fallbacks_streaming():
await asyncio.sleep( await asyncio.sleep(
0.05 0.05
) # allow a delay as success_callbacks are on a separate thread ) # allow a delay as success_callbacks are on a separate thread
assert customHandler.previous_models == 1 # 0 retries, 1 fallback assert customHandler.previous_models == 4 # 1 init call, 2 retries, 1 fallback
router.reset() router.reset()
except litellm.Timeout as e: except litellm.Timeout as e:
pass pass
@ -752,7 +752,7 @@ async def test_async_fallbacks_max_retries_per_request():
router.reset() router.reset()
def test_usage_based_routing_fallbacks(): def test_ausage_based_routing_fallbacks():
try: try:
# [Prod Test] # [Prod Test]
# IT tests Usage Based Routing with fallbacks # IT tests Usage Based Routing with fallbacks
@ -766,9 +766,9 @@ def test_usage_based_routing_fallbacks():
load_dotenv() load_dotenv()
# Constants for TPM and RPM allocation # Constants for TPM and RPM allocation
AZURE_FAST_RPM = 3 AZURE_FAST_RPM = 1
AZURE_BASIC_RPM = 4 AZURE_BASIC_RPM = 1
OPENAI_RPM = 10 OPENAI_RPM = 2
ANTHROPIC_RPM = 100000 ANTHROPIC_RPM = 100000
def get_azure_params(deployment_name: str): def get_azure_params(deployment_name: str):

View file

@ -0,0 +1,121 @@
#### What this tests ####
# This tests calling router with fallback models
import sys, os, time
import traceback, asyncio
import pytest
sys.path.insert(
0, os.path.abspath("../..")
) # Adds the parent directory to the system path
import litellm
from litellm import Router
from litellm.integrations.custom_logger import CustomLogger
class MyCustomHandler(CustomLogger):
success: bool = False
failure: bool = False
previous_models: int = 0
def log_pre_api_call(self, model, messages, kwargs):
print(f"Pre-API Call")
print(
f"previous_models: {kwargs['litellm_params']['metadata'].get('previous_models', None)}"
)
self.previous_models = len(
kwargs["litellm_params"]["metadata"].get("previous_models", [])
) # {"previous_models": [{"model": litellm_model_name, "exception_type": AuthenticationError, "exception_string": <complete_traceback>}]}
print(f"self.previous_models: {self.previous_models}")
def log_post_api_call(self, kwargs, response_obj, start_time, end_time):
print(
f"Post-API Call - response object: {response_obj}; model: {kwargs['model']}"
)
def log_stream_event(self, kwargs, response_obj, start_time, end_time):
print(f"On Stream")
def async_log_stream_event(self, kwargs, response_obj, start_time, end_time):
print(f"On Stream")
def log_success_event(self, kwargs, response_obj, start_time, end_time):
print(f"On Success")
async def async_log_success_event(self, kwargs, response_obj, start_time, end_time):
print(f"On Success")
def log_failure_event(self, kwargs, response_obj, start_time, end_time):
print(f"On Failure")
"""
Test sync + async
- Authorization Errors
- Random API Error
"""
@pytest.mark.parametrize("sync_mode", [True, False])
@pytest.mark.parametrize("error_type", ["Authorization Error", "API Error"])
@pytest.mark.asyncio
async def test_router_retries_errors(sync_mode, error_type):
"""
- Auth Error -> 0 retries
- API Error -> 2 retries
"""
_api_key = (
"bad-key" if error_type == "Authorization Error" else os.getenv("AZURE_API_KEY")
)
print(f"_api_key: {_api_key}")
model_list = [
{
"model_name": "azure/gpt-3.5-turbo", # openai model name
"litellm_params": { # params for litellm completion/embedding call
"model": "azure/chatgpt-functioncalling",
"api_key": _api_key,
"api_version": os.getenv("AZURE_API_VERSION"),
"api_base": os.getenv("AZURE_API_BASE"),
},
"tpm": 240000,
"rpm": 1800,
},
]
router = Router(model_list=model_list, allowed_fails=3)
customHandler = MyCustomHandler()
litellm.callbacks = [customHandler]
user_message = "Hello, how are you?"
messages = [{"content": user_message, "role": "user"}]
kwargs = {
"model": "azure/gpt-3.5-turbo",
"messages": messages,
"mock_response": (
None
if error_type == "Authorization Error"
else Exception("Invalid Request")
),
}
try:
if sync_mode:
response = router.completion(**kwargs)
else:
response = await router.acompletion(**kwargs)
except Exception as e:
pass
await asyncio.sleep(
0.05
) # allow a delay as success_callbacks are on a separate thread
print(f"customHandler.previous_models: {customHandler.previous_models}")
if error_type == "Authorization Error":
assert customHandler.previous_models == 0 # 0 retries
else:
assert customHandler.previous_models == 2 # 2 retries

View file

@ -106,7 +106,7 @@ try:
except Exception as e: except Exception as e:
verbose_logger.debug(f"Exception import enterprise features {str(e)}") verbose_logger.debug(f"Exception import enterprise features {str(e)}")
from typing import cast, List, Dict, Union, Optional, Literal, Any, BinaryIO from typing import cast, List, Dict, Union, Optional, Literal, Any, BinaryIO, Iterable
from .caching import Cache from .caching import Cache
from concurrent.futures import ThreadPoolExecutor from concurrent.futures import ThreadPoolExecutor
@ -1236,7 +1236,10 @@ class Logging:
print_verbose=print_verbose, print_verbose=print_verbose,
) )
elif callback == "sentry" and add_breadcrumb: elif callback == "sentry" and add_breadcrumb:
details_to_log = copy.deepcopy(self.model_call_details) try:
details_to_log = copy.deepcopy(self.model_call_details)
except:
details_to_log = self.model_call_details
if litellm.turn_off_message_logging: if litellm.turn_off_message_logging:
# make a copy of the _model_Call_details and log it # make a copy of the _model_Call_details and log it
details_to_log.pop("messages", None) details_to_log.pop("messages", None)
@ -1327,8 +1330,10 @@ class Logging:
) )
elif callback == "sentry" and add_breadcrumb: elif callback == "sentry" and add_breadcrumb:
print_verbose("reaches sentry breadcrumbing") print_verbose("reaches sentry breadcrumbing")
try:
details_to_log = copy.deepcopy(self.model_call_details) details_to_log = copy.deepcopy(self.model_call_details)
except:
details_to_log = self.model_call_details
if litellm.turn_off_message_logging: if litellm.turn_off_message_logging:
# make a copy of the _model_Call_details and log it # make a copy of the _model_Call_details and log it
details_to_log.pop("messages", None) details_to_log.pop("messages", None)
@ -2635,7 +2640,11 @@ def function_setup(
dynamic_success_callbacks = kwargs.pop("success_callback") dynamic_success_callbacks = kwargs.pop("success_callback")
if add_breadcrumb: if add_breadcrumb:
details_to_log = copy.deepcopy(kwargs) try:
details_to_log = copy.deepcopy(kwargs)
except:
details_to_log = kwargs
if litellm.turn_off_message_logging: if litellm.turn_off_message_logging:
# make a copy of the _model_Call_details and log it # make a copy of the _model_Call_details and log it
details_to_log.pop("messages", None) details_to_log.pop("messages", None)
@ -7171,6 +7180,7 @@ def convert_to_model_response_object(
end_time=None, end_time=None,
hidden_params: Optional[dict] = None, hidden_params: Optional[dict] = None,
): ):
received_args = locals()
try: try:
if response_type == "completion" and ( if response_type == "completion" and (
model_response_object is None model_response_object is None
@ -7182,6 +7192,11 @@ def convert_to_model_response_object(
# for returning cached responses, we need to yield a generator # for returning cached responses, we need to yield a generator
return convert_to_streaming_response(response_object=response_object) return convert_to_streaming_response(response_object=response_object)
choice_list = [] choice_list = []
assert response_object["choices"] is not None and isinstance(
response_object["choices"], Iterable
)
for idx, choice in enumerate(response_object["choices"]): for idx, choice in enumerate(response_object["choices"]):
message = Message( message = Message(
content=choice["message"].get("content", None), content=choice["message"].get("content", None),
@ -7303,7 +7318,9 @@ def convert_to_model_response_object(
model_response_object._hidden_params = hidden_params model_response_object._hidden_params = hidden_params
return model_response_object return model_response_object
except Exception as e: except Exception as e:
raise Exception(f"Invalid response object {traceback.format_exc()}") raise Exception(
f"Invalid response object {traceback.format_exc()}\n\nreceived_args={received_args}"
)
def acreate(*args, **kwargs): ## Thin client to handle the acreate langchain call def acreate(*args, **kwargs): ## Thin client to handle the acreate langchain call

View file

@ -1572,6 +1572,17 @@
"litellm_provider": "openrouter", "litellm_provider": "openrouter",
"mode": "chat" "mode": "chat"
}, },
"openrouter/anthropic/claude-3-opus": {
"max_tokens": 4096,
"max_input_tokens": 200000,
"max_output_tokens": 4096,
"input_cost_per_token": 0.000015,
"output_cost_per_token": 0.000075,
"litellm_provider": "openrouter",
"mode": "chat",
"supports_function_calling": true,
"tool_use_system_prompt_tokens": 395
},
"openrouter/google/palm-2-chat-bison": { "openrouter/google/palm-2-chat-bison": {
"max_tokens": 8000, "max_tokens": 8000,
"input_cost_per_token": 0.0000005, "input_cost_per_token": 0.0000005,

View file

@ -1,6 +1,6 @@
[tool.poetry] [tool.poetry]
name = "litellm" name = "litellm"
version = "1.35.33" version = "1.35.34"
description = "Library to easily interface with LLM API providers" description = "Library to easily interface with LLM API providers"
authors = ["BerriAI"] authors = ["BerriAI"]
license = "MIT" license = "MIT"
@ -80,7 +80,7 @@ requires = ["poetry-core", "wheel"]
build-backend = "poetry.core.masonry.api" build-backend = "poetry.core.masonry.api"
[tool.commitizen] [tool.commitizen]
version = "1.35.33" version = "1.35.34"
version_files = [ version_files = [
"pyproject.toml:^version" "pyproject.toml:^version"
] ]

View file

@ -190,6 +190,7 @@ model LiteLLM_ErrorLogs {
endTime DateTime // Assuming end_time is a DateTime field endTime DateTime // Assuming end_time is a DateTime field
api_base String @default("") api_base String @default("")
model_group String @default("") // public model_name / model_group model_group String @default("") // public model_name / model_group
litellm_model_name String @default("") // model passed to litellm
model_id String @default("") // ID of model in ProxyModelTable model_id String @default("") // ID of model in ProxyModelTable
request_kwargs Json @default("{}") request_kwargs Json @default("{}")
exception_type String @default("") exception_type String @default("")

View file

@ -488,7 +488,9 @@ async def test_key_info_spend_values():
) )
rounded_response_cost = round(response_cost, 8) rounded_response_cost = round(response_cost, 8)
rounded_key_info_spend = round(key_info["info"]["spend"], 8) rounded_key_info_spend = round(key_info["info"]["spend"], 8)
assert rounded_response_cost == rounded_key_info_spend assert (
rounded_response_cost == rounded_key_info_spend
), f"Expected cost= {rounded_response_cost} != Tracked Cost={rounded_key_info_spend}"
@pytest.mark.asyncio @pytest.mark.asyncio

View file

@ -91,7 +91,7 @@ class ExpectNoException(Exception):
@pytest.mark.parametrize( @pytest.mark.parametrize(
"num_try_send, num_allowed_send", "num_try_send, num_allowed_send",
[ [
(2, 2), # sending as many as allowed, ExpectNoException (2, 3), # sending as many as allowed, ExpectNoException
# (10, 10), # sending as many as allowed, ExpectNoException # (10, 10), # sending as many as allowed, ExpectNoException
(3, 2), # Sending more than allowed, ValueError (3, 2), # Sending more than allowed, ValueError
# (10, 9), # Sending more than allowed, ValueError # (10, 9), # Sending more than allowed, ValueError

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

View file

@ -1 +1 @@
!function(){"use strict";var e,t,n,r,o,u,i,c,f,a={},l={};function d(e){var t=l[e];if(void 0!==t)return t.exports;var n=l[e]={id:e,loaded:!1,exports:{}},r=!0;try{a[e](n,n.exports,d),r=!1}finally{r&&delete l[e]}return n.loaded=!0,n.exports}d.m=a,e=[],d.O=function(t,n,r,o){if(n){o=o||0;for(var u=e.length;u>0&&e[u-1][2]>o;u--)e[u]=e[u-1];e[u]=[n,r,o];return}for(var i=1/0,u=0;u<e.length;u++){for(var n=e[u][0],r=e[u][1],o=e[u][2],c=!0,f=0;f<n.length;f++)i>=o&&Object.keys(d.O).every(function(e){return d.O[e](n[f])})?n.splice(f--,1):(c=!1,o<i&&(i=o));if(c){e.splice(u--,1);var a=r();void 0!==a&&(t=a)}}return t},d.n=function(e){var t=e&&e.__esModule?function(){return e.default}:function(){return e};return d.d(t,{a:t}),t},n=Object.getPrototypeOf?function(e){return Object.getPrototypeOf(e)}:function(e){return e.__proto__},d.t=function(e,r){if(1&r&&(e=this(e)),8&r||"object"==typeof e&&e&&(4&r&&e.__esModule||16&r&&"function"==typeof e.then))return e;var o=Object.create(null);d.r(o);var u={};t=t||[null,n({}),n([]),n(n)];for(var i=2&r&&e;"object"==typeof i&&!~t.indexOf(i);i=n(i))Object.getOwnPropertyNames(i).forEach(function(t){u[t]=function(){return e[t]}});return u.default=function(){return e},d.d(o,u),o},d.d=function(e,t){for(var n in t)d.o(t,n)&&!d.o(e,n)&&Object.defineProperty(e,n,{enumerable:!0,get:t[n]})},d.f={},d.e=function(e){return Promise.all(Object.keys(d.f).reduce(function(t,n){return d.f[n](e,t),t},[]))},d.u=function(e){},d.miniCssF=function(e){return"static/css/5e699db73bf6f8c2.css"},d.g=function(){if("object"==typeof globalThis)return globalThis;try{return this||Function("return this")()}catch(e){if("object"==typeof window)return window}}(),d.o=function(e,t){return Object.prototype.hasOwnProperty.call(e,t)},r={},o="_N_E:",d.l=function(e,t,n,u){if(r[e]){r[e].push(t);return}if(void 0!==n)for(var i,c,f=document.getElementsByTagName("script"),a=0;a<f.length;a++){var l=f[a];if(l.getAttribute("src")==e||l.getAttribute("data-webpack")==o+n){i=l;break}}i||(c=!0,(i=document.createElement("script")).charset="utf-8",i.timeout=120,d.nc&&i.setAttribute("nonce",d.nc),i.setAttribute("data-webpack",o+n),i.src=d.tu(e)),r[e]=[t];var s=function(t,n){i.onerror=i.onload=null,clearTimeout(p);var o=r[e];if(delete r[e],i.parentNode&&i.parentNode.removeChild(i),o&&o.forEach(function(e){return e(n)}),t)return t(n)},p=setTimeout(s.bind(null,void 0,{type:"timeout",target:i}),12e4);i.onerror=s.bind(null,i.onerror),i.onload=s.bind(null,i.onload),c&&document.head.appendChild(i)},d.r=function(e){"undefined"!=typeof Symbol&&Symbol.toStringTag&&Object.defineProperty(e,Symbol.toStringTag,{value:"Module"}),Object.defineProperty(e,"__esModule",{value:!0})},d.nmd=function(e){return e.paths=[],e.children||(e.children=[]),e},d.tt=function(){return void 0===u&&(u={createScriptURL:function(e){return e}},"undefined"!=typeof trustedTypes&&trustedTypes.createPolicy&&(u=trustedTypes.createPolicy("nextjs#bundler",u))),u},d.tu=function(e){return d.tt().createScriptURL(e)},d.p="/ui/_next/",i={272:0},d.f.j=function(e,t){var n=d.o(i,e)?i[e]:void 0;if(0!==n){if(n)t.push(n[2]);else if(272!=e){var r=new Promise(function(t,r){n=i[e]=[t,r]});t.push(n[2]=r);var o=d.p+d.u(e),u=Error();d.l(o,function(t){if(d.o(i,e)&&(0!==(n=i[e])&&(i[e]=void 0),n)){var r=t&&("load"===t.type?"missing":t.type),o=t&&t.target&&t.target.src;u.message="Loading chunk "+e+" failed.\n("+r+": "+o+")",u.name="ChunkLoadError",u.type=r,u.request=o,n[1](u)}},"chunk-"+e,e)}else i[e]=0}},d.O.j=function(e){return 0===i[e]},c=function(e,t){var n,r,o=t[0],u=t[1],c=t[2],f=0;if(o.some(function(e){return 0!==i[e]})){for(n in u)d.o(u,n)&&(d.m[n]=u[n]);if(c)var a=c(d)}for(e&&e(t);f<o.length;f++)r=o[f],d.o(i,r)&&i[r]&&i[r][0](),i[r]=0;return d.O(a)},(f=self.webpackChunk_N_E=self.webpackChunk_N_E||[]).forEach(c.bind(null,0)),f.push=c.bind(null,f.push.bind(f))}(); !function(){"use strict";var e,t,n,r,o,u,i,c,f,a={},l={};function d(e){var t=l[e];if(void 0!==t)return t.exports;var n=l[e]={id:e,loaded:!1,exports:{}},r=!0;try{a[e](n,n.exports,d),r=!1}finally{r&&delete l[e]}return n.loaded=!0,n.exports}d.m=a,e=[],d.O=function(t,n,r,o){if(n){o=o||0;for(var u=e.length;u>0&&e[u-1][2]>o;u--)e[u]=e[u-1];e[u]=[n,r,o];return}for(var i=1/0,u=0;u<e.length;u++){for(var n=e[u][0],r=e[u][1],o=e[u][2],c=!0,f=0;f<n.length;f++)i>=o&&Object.keys(d.O).every(function(e){return d.O[e](n[f])})?n.splice(f--,1):(c=!1,o<i&&(i=o));if(c){e.splice(u--,1);var a=r();void 0!==a&&(t=a)}}return t},d.n=function(e){var t=e&&e.__esModule?function(){return e.default}:function(){return e};return d.d(t,{a:t}),t},n=Object.getPrototypeOf?function(e){return Object.getPrototypeOf(e)}:function(e){return e.__proto__},d.t=function(e,r){if(1&r&&(e=this(e)),8&r||"object"==typeof e&&e&&(4&r&&e.__esModule||16&r&&"function"==typeof e.then))return e;var o=Object.create(null);d.r(o);var u={};t=t||[null,n({}),n([]),n(n)];for(var i=2&r&&e;"object"==typeof i&&!~t.indexOf(i);i=n(i))Object.getOwnPropertyNames(i).forEach(function(t){u[t]=function(){return e[t]}});return u.default=function(){return e},d.d(o,u),o},d.d=function(e,t){for(var n in t)d.o(t,n)&&!d.o(e,n)&&Object.defineProperty(e,n,{enumerable:!0,get:t[n]})},d.f={},d.e=function(e){return Promise.all(Object.keys(d.f).reduce(function(t,n){return d.f[n](e,t),t},[]))},d.u=function(e){},d.miniCssF=function(e){return"static/css/4ccaa87c9648acfb.css"},d.g=function(){if("object"==typeof globalThis)return globalThis;try{return this||Function("return this")()}catch(e){if("object"==typeof window)return window}}(),d.o=function(e,t){return Object.prototype.hasOwnProperty.call(e,t)},r={},o="_N_E:",d.l=function(e,t,n,u){if(r[e]){r[e].push(t);return}if(void 0!==n)for(var i,c,f=document.getElementsByTagName("script"),a=0;a<f.length;a++){var l=f[a];if(l.getAttribute("src")==e||l.getAttribute("data-webpack")==o+n){i=l;break}}i||(c=!0,(i=document.createElement("script")).charset="utf-8",i.timeout=120,d.nc&&i.setAttribute("nonce",d.nc),i.setAttribute("data-webpack",o+n),i.src=d.tu(e)),r[e]=[t];var s=function(t,n){i.onerror=i.onload=null,clearTimeout(p);var o=r[e];if(delete r[e],i.parentNode&&i.parentNode.removeChild(i),o&&o.forEach(function(e){return e(n)}),t)return t(n)},p=setTimeout(s.bind(null,void 0,{type:"timeout",target:i}),12e4);i.onerror=s.bind(null,i.onerror),i.onload=s.bind(null,i.onload),c&&document.head.appendChild(i)},d.r=function(e){"undefined"!=typeof Symbol&&Symbol.toStringTag&&Object.defineProperty(e,Symbol.toStringTag,{value:"Module"}),Object.defineProperty(e,"__esModule",{value:!0})},d.nmd=function(e){return e.paths=[],e.children||(e.children=[]),e},d.tt=function(){return void 0===u&&(u={createScriptURL:function(e){return e}},"undefined"!=typeof trustedTypes&&trustedTypes.createPolicy&&(u=trustedTypes.createPolicy("nextjs#bundler",u))),u},d.tu=function(e){return d.tt().createScriptURL(e)},d.p="/ui/_next/",i={272:0},d.f.j=function(e,t){var n=d.o(i,e)?i[e]:void 0;if(0!==n){if(n)t.push(n[2]);else if(272!=e){var r=new Promise(function(t,r){n=i[e]=[t,r]});t.push(n[2]=r);var o=d.p+d.u(e),u=Error();d.l(o,function(t){if(d.o(i,e)&&(0!==(n=i[e])&&(i[e]=void 0),n)){var r=t&&("load"===t.type?"missing":t.type),o=t&&t.target&&t.target.src;u.message="Loading chunk "+e+" failed.\n("+r+": "+o+")",u.name="ChunkLoadError",u.type=r,u.request=o,n[1](u)}},"chunk-"+e,e)}else i[e]=0}},d.O.j=function(e){return 0===i[e]},c=function(e,t){var n,r,o=t[0],u=t[1],c=t[2],f=0;if(o.some(function(e){return 0!==i[e]})){for(n in u)d.o(u,n)&&(d.m[n]=u[n]);if(c)var a=c(d)}for(e&&e(t);f<o.length;f++)r=o[f],d.o(i,r)&&i[r]&&i[r][0](),i[r]=0;return d.O(a)},(f=self.webpackChunk_N_E=self.webpackChunk_N_E||[]).forEach(c.bind(null,0)),f.push=c.bind(null,f.push.bind(f))}();

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

View file

@ -1 +1 @@
<!DOCTYPE html><html id="__next_error__"><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width, initial-scale=1"/><link rel="preload" as="script" fetchPriority="low" href="/ui/_next/static/chunks/webpack-ccae12a25017afa5.js" crossorigin=""/><script src="/ui/_next/static/chunks/fd9d1056-dafd44dfa2da140c.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/69-e49705773ae41779.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/main-app-9b4fb13a7db53edf.js" async="" crossorigin=""></script><title>LiteLLM Dashboard</title><meta name="description" content="LiteLLM Proxy Admin UI"/><link rel="icon" href="/ui/favicon.ico" type="image/x-icon" sizes="16x16"/><meta name="next-size-adjust"/><script src="/ui/_next/static/chunks/polyfills-c67a75d1b6f99dc8.js" crossorigin="" noModule=""></script></head><body><script src="/ui/_next/static/chunks/webpack-ccae12a25017afa5.js" crossorigin="" async=""></script><script>(self.__next_f=self.__next_f||[]).push([0]);self.__next_f.push([2,null])</script><script>self.__next_f.push([1,"1:HL[\"/ui/_next/static/media/c9a5bc6a7c948fb0-s.p.woff2\",\"font\",{\"crossOrigin\":\"\",\"type\":\"font/woff2\"}]\n2:HL[\"/ui/_next/static/css/5e699db73bf6f8c2.css\",\"style\",{\"crossOrigin\":\"\"}]\n0:\"$L3\"\n"])</script><script>self.__next_f.push([1,"4:I[47690,[],\"\"]\n6:I[77831,[],\"\"]\n7:I[27125,[\"447\",\"static/chunks/447-9f8d32190ff7d16d.js\",\"931\",\"static/chunks/app/page-508c39694bd40fe9.js\"],\"\"]\n8:I[5613,[],\"\"]\n9:I[31778,[],\"\"]\nb:I[48955,[],\"\"]\nc:[]\n"])</script><script>self.__next_f.push([1,"3:[[[\"$\",\"link\",\"0\",{\"rel\":\"stylesheet\",\"href\":\"/ui/_next/static/css/5e699db73bf6f8c2.css\",\"precedence\":\"next\",\"crossOrigin\":\"\"}]],[\"$\",\"$L4\",null,{\"buildId\":\"kbGdRQFfI6W3bEwfzmJDI\",\"assetPrefix\":\"/ui\",\"initialCanonicalUrl\":\"/\",\"initialTree\":[\"\",{\"children\":[\"__PAGE__\",{}]},\"$undefined\",\"$undefined\",true],\"initialSeedData\":[\"\",{\"children\":[\"__PAGE__\",{},[\"$L5\",[\"$\",\"$L6\",null,{\"propsForComponent\":{\"params\":{}},\"Component\":\"$7\",\"isStaticGeneration\":true}],null]]},[null,[\"$\",\"html\",null,{\"lang\":\"en\",\"children\":[\"$\",\"body\",null,{\"className\":\"__className_c23dc8\",\"children\":[\"$\",\"$L8\",null,{\"parallelRouterKey\":\"children\",\"segmentPath\":[\"children\"],\"loading\":\"$undefined\",\"loadingStyles\":\"$undefined\",\"loadingScripts\":\"$undefined\",\"hasLoading\":false,\"error\":\"$undefined\",\"errorStyles\":\"$undefined\",\"errorScripts\":\"$undefined\",\"template\":[\"$\",\"$L9\",null,{}],\"templateStyles\":\"$undefined\",\"templateScripts\":\"$undefined\",\"notFound\":[[\"$\",\"title\",null,{\"children\":\"404: This page could not be found.\"}],[\"$\",\"div\",null,{\"style\":{\"fontFamily\":\"system-ui,\\\"Segoe UI\\\",Roboto,Helvetica,Arial,sans-serif,\\\"Apple Color Emoji\\\",\\\"Segoe UI Emoji\\\"\",\"height\":\"100vh\",\"textAlign\":\"center\",\"display\":\"flex\",\"flexDirection\":\"column\",\"alignItems\":\"center\",\"justifyContent\":\"center\"},\"children\":[\"$\",\"div\",null,{\"children\":[[\"$\",\"style\",null,{\"dangerouslySetInnerHTML\":{\"__html\":\"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}\"}}],[\"$\",\"h1\",null,{\"className\":\"next-error-h1\",\"style\":{\"display\":\"inline-block\",\"margin\":\"0 20px 0 0\",\"padding\":\"0 23px 0 0\",\"fontSize\":24,\"fontWeight\":500,\"verticalAlign\":\"top\",\"lineHeight\":\"49px\"},\"children\":\"404\"}],[\"$\",\"div\",null,{\"style\":{\"display\":\"inline-block\"},\"children\":[\"$\",\"h2\",null,{\"style\":{\"fontSize\":14,\"fontWeight\":400,\"lineHeight\":\"49px\",\"margin\":0},\"children\":\"This page could not be found.\"}]}]]}]}]],\"notFoundStyles\":[],\"styles\":null}]}]}],null]],\"initialHead\":[false,\"$La\"],\"globalErrorComponent\":\"$b\",\"missingSlots\":\"$Wc\"}]]\n"])</script><script>self.__next_f.push([1,"a:[[\"$\",\"meta\",\"0\",{\"name\":\"viewport\",\"content\":\"width=device-width, initial-scale=1\"}],[\"$\",\"meta\",\"1\",{\"charSet\":\"utf-8\"}],[\"$\",\"title\",\"2\",{\"children\":\"LiteLLM Dashboard\"}],[\"$\",\"meta\",\"3\",{\"name\":\"description\",\"content\":\"LiteLLM Proxy Admin UI\"}],[\"$\",\"link\",\"4\",{\"rel\":\"icon\",\"href\":\"/ui/favicon.ico\",\"type\":\"image/x-icon\",\"sizes\":\"16x16\"}],[\"$\",\"meta\",\"5\",{\"name\":\"next-size-adjust\"}]]\n5:null\n"])</script><script>self.__next_f.push([1,""])</script></body></html> <!DOCTYPE html><html id="__next_error__"><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width, initial-scale=1"/><link rel="preload" as="script" fetchPriority="low" href="/ui/_next/static/chunks/webpack-4acf5608f06a35df.js" crossorigin=""/><script src="/ui/_next/static/chunks/fd9d1056-dafd44dfa2da140c.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/69-e49705773ae41779.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/main-app-9b4fb13a7db53edf.js" async="" crossorigin=""></script><title>LiteLLM Dashboard</title><meta name="description" content="LiteLLM Proxy Admin UI"/><link rel="icon" href="/ui/favicon.ico" type="image/x-icon" sizes="16x16"/><meta name="next-size-adjust"/><script src="/ui/_next/static/chunks/polyfills-c67a75d1b6f99dc8.js" crossorigin="" noModule=""></script></head><body><script src="/ui/_next/static/chunks/webpack-4acf5608f06a35df.js" crossorigin="" async=""></script><script>(self.__next_f=self.__next_f||[]).push([0]);self.__next_f.push([2,null])</script><script>self.__next_f.push([1,"1:HL[\"/ui/_next/static/media/c9a5bc6a7c948fb0-s.p.woff2\",\"font\",{\"crossOrigin\":\"\",\"type\":\"font/woff2\"}]\n2:HL[\"/ui/_next/static/css/4ccaa87c9648acfb.css\",\"style\",{\"crossOrigin\":\"\"}]\n0:\"$L3\"\n"])</script><script>self.__next_f.push([1,"4:I[47690,[],\"\"]\n6:I[77831,[],\"\"]\n7:I[46414,[\"761\",\"static/chunks/761-05f8a8451296476c.js\",\"931\",\"static/chunks/app/page-e710f07514d9286b.js\"],\"\"]\n8:I[5613,[],\"\"]\n9:I[31778,[],\"\"]\nb:I[48955,[],\"\"]\nc:[]\n"])</script><script>self.__next_f.push([1,"3:[[[\"$\",\"link\",\"0\",{\"rel\":\"stylesheet\",\"href\":\"/ui/_next/static/css/4ccaa87c9648acfb.css\",\"precedence\":\"next\",\"crossOrigin\":\"\"}]],[\"$\",\"$L4\",null,{\"buildId\":\"7aR2yOE4Bz0za1EnxRCsv\",\"assetPrefix\":\"/ui\",\"initialCanonicalUrl\":\"/\",\"initialTree\":[\"\",{\"children\":[\"__PAGE__\",{}]},\"$undefined\",\"$undefined\",true],\"initialSeedData\":[\"\",{\"children\":[\"__PAGE__\",{},[\"$L5\",[\"$\",\"$L6\",null,{\"propsForComponent\":{\"params\":{}},\"Component\":\"$7\",\"isStaticGeneration\":true}],null]]},[null,[\"$\",\"html\",null,{\"lang\":\"en\",\"children\":[\"$\",\"body\",null,{\"className\":\"__className_c23dc8\",\"children\":[\"$\",\"$L8\",null,{\"parallelRouterKey\":\"children\",\"segmentPath\":[\"children\"],\"loading\":\"$undefined\",\"loadingStyles\":\"$undefined\",\"loadingScripts\":\"$undefined\",\"hasLoading\":false,\"error\":\"$undefined\",\"errorStyles\":\"$undefined\",\"errorScripts\":\"$undefined\",\"template\":[\"$\",\"$L9\",null,{}],\"templateStyles\":\"$undefined\",\"templateScripts\":\"$undefined\",\"notFound\":[[\"$\",\"title\",null,{\"children\":\"404: This page could not be found.\"}],[\"$\",\"div\",null,{\"style\":{\"fontFamily\":\"system-ui,\\\"Segoe UI\\\",Roboto,Helvetica,Arial,sans-serif,\\\"Apple Color Emoji\\\",\\\"Segoe UI Emoji\\\"\",\"height\":\"100vh\",\"textAlign\":\"center\",\"display\":\"flex\",\"flexDirection\":\"column\",\"alignItems\":\"center\",\"justifyContent\":\"center\"},\"children\":[\"$\",\"div\",null,{\"children\":[[\"$\",\"style\",null,{\"dangerouslySetInnerHTML\":{\"__html\":\"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}\"}}],[\"$\",\"h1\",null,{\"className\":\"next-error-h1\",\"style\":{\"display\":\"inline-block\",\"margin\":\"0 20px 0 0\",\"padding\":\"0 23px 0 0\",\"fontSize\":24,\"fontWeight\":500,\"verticalAlign\":\"top\",\"lineHeight\":\"49px\"},\"children\":\"404\"}],[\"$\",\"div\",null,{\"style\":{\"display\":\"inline-block\"},\"children\":[\"$\",\"h2\",null,{\"style\":{\"fontSize\":14,\"fontWeight\":400,\"lineHeight\":\"49px\",\"margin\":0},\"children\":\"This page could not be found.\"}]}]]}]}]],\"notFoundStyles\":[],\"styles\":null}]}]}],null]],\"initialHead\":[false,\"$La\"],\"globalErrorComponent\":\"$b\",\"missingSlots\":\"$Wc\"}]]\n"])</script><script>self.__next_f.push([1,"a:[[\"$\",\"meta\",\"0\",{\"name\":\"viewport\",\"content\":\"width=device-width, initial-scale=1\"}],[\"$\",\"meta\",\"1\",{\"charSet\":\"utf-8\"}],[\"$\",\"title\",\"2\",{\"children\":\"LiteLLM Dashboard\"}],[\"$\",\"meta\",\"3\",{\"name\":\"description\",\"content\":\"LiteLLM Proxy Admin UI\"}],[\"$\",\"link\",\"4\",{\"rel\":\"icon\",\"href\":\"/ui/favicon.ico\",\"type\":\"image/x-icon\",\"sizes\":\"16x16\"}],[\"$\",\"meta\",\"5\",{\"name\":\"next-size-adjust\"}]]\n5:null\n"])</script><script>self.__next_f.push([1,""])</script></body></html>

View file

@ -1,7 +1,7 @@
2:I[77831,[],""] 2:I[77831,[],""]
3:I[27125,["447","static/chunks/447-9f8d32190ff7d16d.js","931","static/chunks/app/page-508c39694bd40fe9.js"],""] 3:I[46414,["761","static/chunks/761-05f8a8451296476c.js","931","static/chunks/app/page-e710f07514d9286b.js"],""]
4:I[5613,[],""] 4:I[5613,[],""]
5:I[31778,[],""] 5:I[31778,[],""]
0:["kbGdRQFfI6W3bEwfzmJDI",[[["",{"children":["__PAGE__",{}]},"$undefined","$undefined",true],["",{"children":["__PAGE__",{},["$L1",["$","$L2",null,{"propsForComponent":{"params":{}},"Component":"$3","isStaticGeneration":true}],null]]},[null,["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_c23dc8","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[],"styles":null}]}]}],null]],[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/5e699db73bf6f8c2.css","precedence":"next","crossOrigin":""}]],"$L6"]]]] 0:["7aR2yOE4Bz0za1EnxRCsv",[[["",{"children":["__PAGE__",{}]},"$undefined","$undefined",true],["",{"children":["__PAGE__",{},["$L1",["$","$L2",null,{"propsForComponent":{"params":{}},"Component":"$3","isStaticGeneration":true}],null]]},[null,["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_c23dc8","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[],"styles":null}]}]}],null]],[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/4ccaa87c9648acfb.css","precedence":"next","crossOrigin":""}]],"$L6"]]]]
6:[["$","meta","0",{"name":"viewport","content":"width=device-width, initial-scale=1"}],["$","meta","1",{"charSet":"utf-8"}],["$","title","2",{"children":"LiteLLM Dashboard"}],["$","meta","3",{"name":"description","content":"LiteLLM Proxy Admin UI"}],["$","link","4",{"rel":"icon","href":"/ui/favicon.ico","type":"image/x-icon","sizes":"16x16"}],["$","meta","5",{"name":"next-size-adjust"}]] 6:[["$","meta","0",{"name":"viewport","content":"width=device-width, initial-scale=1"}],["$","meta","1",{"charSet":"utf-8"}],["$","title","2",{"children":"LiteLLM Dashboard"}],["$","meta","3",{"name":"description","content":"LiteLLM Proxy Admin UI"}],["$","link","4",{"rel":"icon","href":"/ui/favicon.ico","type":"image/x-icon","sizes":"16x16"}],["$","meta","5",{"name":"next-size-adjust"}]]
1:null 1:null

View file

@ -18,8 +18,8 @@ import {
} from "@tremor/react"; } from "@tremor/react";
import { TabPanel, TabPanels, TabGroup, TabList, Tab, TextInput, Icon } from "@tremor/react"; import { TabPanel, TabPanels, TabGroup, TabList, Tab, TextInput, Icon } from "@tremor/react";
import { Select, SelectItem, MultiSelect, MultiSelectItem } from "@tremor/react"; import { Select, SelectItem, MultiSelect, MultiSelectItem } from "@tremor/react";
import { modelInfoCall, userGetRequesedtModelsCall, modelCreateCall, Model, modelCostMap, modelDeleteCall, healthCheckCall, modelUpdateCall } from "./networking"; import { modelInfoCall, userGetRequesedtModelsCall, modelCreateCall, Model, modelCostMap, modelDeleteCall, healthCheckCall, modelUpdateCall, modelMetricsCall, modelExceptionsCall } from "./networking";
import { BarChart } from "@tremor/react"; import { BarChart, AreaChart } from "@tremor/react";
import { import {
Button as Button2, Button as Button2,
Modal, Modal,
@ -192,7 +192,6 @@ const ModelDashboard: React.FC<ModelDashboardProps> = ({
const [providerModels, setProviderModels] = useState<Array<string>>([]); // Explicitly typing providerModels as a string array const [providerModels, setProviderModels] = useState<Array<string>>([]); // Explicitly typing providerModels as a string array
const providers = Object.values(Providers).filter(key => isNaN(Number(key))); const providers = Object.values(Providers).filter(key => isNaN(Number(key)));
const [selectedProvider, setSelectedProvider] = useState<String>("OpenAI"); const [selectedProvider, setSelectedProvider] = useState<String>("OpenAI");
const [healthCheckResponse, setHealthCheckResponse] = useState<string>(''); const [healthCheckResponse, setHealthCheckResponse] = useState<string>('');
@ -200,6 +199,12 @@ const ModelDashboard: React.FC<ModelDashboardProps> = ({
const [selectedModel, setSelectedModel] = useState<any>(null); const [selectedModel, setSelectedModel] = useState<any>(null);
const [availableModelGroups, setAvailableModelGroups] = useState<Array<string>>([]); const [availableModelGroups, setAvailableModelGroups] = useState<Array<string>>([]);
const [selectedModelGroup, setSelectedModelGroup] = useState<string | null>(null); const [selectedModelGroup, setSelectedModelGroup] = useState<string | null>(null);
const [modelLatencyMetrics, setModelLatencyMetrics] = useState<any[]>([]);
const [modelMetrics, setModelMetrics] = useState<any[]>([]);
const [modelMetricsCategories, setModelMetricsCategories] = useState<any[]>([]);
const [modelExceptions, setModelExceptions] = useState<any[]>([]);
const [allExceptions, setAllExceptions] = useState<any[]>([]);
const [failureTableData, setFailureTableData] = useState<any[]>([]);
const EditModelModal: React.FC<EditModelModalProps> = ({ visible, onCancel, model, onSubmit }) => { const EditModelModal: React.FC<EditModelModalProps> = ({ visible, onCancel, model, onSubmit }) => {
const [form] = Form.useForm(); const [form] = Form.useForm();
@ -443,14 +448,71 @@ const handleEditSubmit = async (formValues: Record<string, any>) => {
} }
console.log("all_model_groups:", all_model_groups) console.log("all_model_groups:", all_model_groups)
let _array_model_groups = Array.from(all_model_groups) let _array_model_groups = Array.from(all_model_groups)
// sort _array_model_groups alphabetically
_array_model_groups = _array_model_groups.sort();
setAvailableModelGroups(_array_model_groups); setAvailableModelGroups(_array_model_groups);
// if userRole is Admin, show the pending requests const modelMetricsResponse = await modelMetricsCall(
if (userRole === "Admin" && accessToken) { accessToken,
const user_requests = await userGetRequesedtModelsCall(accessToken); userID,
console.log("Pending Requests:", pendingRequests); userRole,
setPendingRequests(user_requests.requests || []); null
);
console.log("Model metrics response:", modelMetricsResponse);
// Sort by latency (avg_latency_per_token)
setModelMetrics(modelMetricsResponse.data);
setModelMetricsCategories(modelMetricsResponse.all_api_bases);
const modelExceptionsResponse = await modelExceptionsCall(
accessToken,
userID,
userRole,
null
)
console.log("Model exceptions response:", modelExceptionsResponse);
setModelExceptions(modelExceptionsResponse.data);
setAllExceptions(modelExceptionsResponse.exception_types);
let modelMetricsData = modelMetricsResponse.data;
let successdeploymentToSuccess: Record<string, number> = {};
for (let i = 0; i < modelMetricsData.length; i++) {
let element = modelMetricsData[i];
let _model_name = element.model;
let _num_requests = element.num_requests;
successdeploymentToSuccess[_model_name] = _num_requests
} }
console.log("successdeploymentToSuccess:", successdeploymentToSuccess)
let failureTableData = [];
let _failureData = modelExceptionsResponse.data;
for (let i = 0; i < _failureData.length; i++) {
const model = _failureData[i];
let _model_name = model.model;
let total_exceptions = model.total_exceptions;
let total_Requests = successdeploymentToSuccess[_model_name];
if (total_Requests == null) {
total_Requests = 0
}
let _data = {
model: _model_name,
total_exceptions: total_exceptions,
total_Requests: total_Requests,
failure_rate: total_Requests / total_exceptions
}
failureTableData.push(_data);
// sort failureTableData by failure_rate
failureTableData.sort((a, b) => b.failure_rate - a.failure_rate);
setFailureTableData(failureTableData);
console.log("failureTableData:", failureTableData);
}
} catch (error) { } catch (error) {
console.error("There was an error fetching the model data", error); console.error("There was an error fetching the model data", error);
} }
@ -603,6 +665,77 @@ const handleEditSubmit = async (formValues: Record<string, any>) => {
}; };
const updateModelMetrics = async (modelGroup: string | null) => {
console.log("Updating model metrics for group:", modelGroup);
if (!accessToken || !userID || !userRole) {
return
}
setSelectedModelGroup(modelGroup); // If you want to store the selected model group in state
try {
const modelMetricsResponse = await modelMetricsCall(accessToken, userID, userRole, modelGroup);
console.log("Model metrics response:", modelMetricsResponse);
// Assuming modelMetricsResponse now contains the metric data for the specified model group
setModelMetrics(modelMetricsResponse.data);
setModelMetricsCategories(modelMetricsResponse.all_api_bases);
const modelExceptionsResponse = await modelExceptionsCall(
accessToken,
userID,
userRole,
modelGroup
)
console.log("Model exceptions response:", modelExceptionsResponse);
setModelExceptions(modelExceptionsResponse.data);
setAllExceptions(modelExceptionsResponse.exception_types);
} catch (error) {
console.error("Failed to fetch model metrics", error);
}
}
const customTooltip = (props: any) => {
const { payload, active } = props;
if (!active || !payload) return null;
// Extract the date from the first item in the payload array
const date = payload[0]?.payload?.date;
// Sort the payload array by category.value in descending order
let sortedPayload = payload.sort((a: any, b: any) => b.value - a.value);
// Only show the top 5, the 6th one should be called "X other categories" depending on how many categories were not shown
if (sortedPayload.length > 5) {
let remainingItems = sortedPayload.length - 5;
sortedPayload = sortedPayload.slice(0, 5);
sortedPayload.push({
dataKey: `${remainingItems} other deployments`,
value: payload.slice(5).reduce((acc: number, curr: any) => acc + curr.value, 0),
color: "gray",
});
}
return (
<div className="w-150 rounded-tremor-default border border-tremor-border bg-tremor-background p-2 text-tremor-default shadow-tremor-dropdown">
{date && <p className="text-tremor-content-emphasis mb-2">Date: {date}</p>}
{sortedPayload.map((category: any, idx: number) => (
<div key={idx} className="flex justify-between">
<div className="flex items-center space-x-2">
<div className={`w-2 h-2 mt-1 rounded-full bg-${category.color}-500`} />
<p className="text-tremor-content">{category.dataKey}</p>
</div>
<p className="font-medium text-tremor-content-emphasis text-righ ml-2">
{category.value.toFixed(5)}
</p>
</div>
))}
</div>
);
};
const getPlaceholder = (selectedProvider: string): string => { const getPlaceholder = (selectedProvider: string): string => {
if (selectedProvider === Providers.Vertex_AI) { if (selectedProvider === Providers.Vertex_AI) {
@ -640,6 +773,7 @@ const handleEditSubmit = async (formValues: Record<string, any>) => {
<Tab>All Models</Tab> <Tab>All Models</Tab>
<Tab>Add Model</Tab> <Tab>Add Model</Tab>
<Tab><pre>/health Models</pre></Tab> <Tab><pre>/health Models</pre></Tab>
<Tab>Model Analytics</Tab>
</div> </div>
<div className="flex items-center space-x-2"> <div className="flex items-center space-x-2">
@ -955,6 +1089,87 @@ const handleEditSubmit = async (formValues: Record<string, any>) => {
</Card> </Card>
</TabPanel> </TabPanel>
<TabPanel>
<p style={{fontSize: '0.85rem', color: '#808080'}}>View how requests were load balanced within a model group</p>
<Select
className="mb-4 mt-2"
>
{availableModelGroups.map((group, idx) => (
<SelectItem
key={idx}
value={group}
onClick={() => updateModelMetrics(group)}
>
{group}
</SelectItem>
))}
</Select>
<Grid numItems={2}>
<Col>
<Card className="mr-2">
<Title>Avg Latency per Token</Title><p className="text-gray-500 italic"> (seconds/token)</p>
<Text className="text-gray-500 italic mt-1 mb-1">average Latency for successfull requests divided by the total tokens</Text>
{ modelMetrics && modelMetricsCategories && (
<AreaChart
title="Model Latency"
className="h-72"
data={modelMetrics}
showLegend={false}
index="date"
categories={modelMetricsCategories}
connectNulls={true}
customTooltip={customTooltip}
/>
)}
</Card>
</Col>
<Col>
<Card className="ml-2">
<Table>
<TableHead>
<TableRow>
<TableHeaderCell>Model</TableHeaderCell>
<TableHeaderCell>Success Requests</TableHeaderCell>
<TableHeaderCell>Error Requests</TableHeaderCell>
<TableHeaderCell>Failure %</TableHeaderCell>
</TableRow>
</TableHead>
<TableBody>
{failureTableData.map((metric, idx) => (
<TableRow key={idx}>
<TableCell>{metric.model}</TableCell>
<TableCell>{metric.total_Requests}</TableCell>
<TableCell>{metric.total_exceptions}</TableCell>
<TableCell>{metric.failure_rate}%</TableCell>
</TableRow>
))}
</TableBody>
</Table>
</Card>
</Col>
</Grid>
<Card className="mt-4">
<Title>Exceptions per Model</Title>
<BarChart
className="h-72"
data={modelExceptions}
index="model"
categories={allExceptions}
stack={true}
colors={['indigo-300', 'rose-200', '#ffcc33']}
yAxisWidth={30}
/>
</Card>
</TabPanel>
</TabPanels> </TabPanels>
</TabGroup> </TabGroup>

View file

@ -474,6 +474,43 @@ export const modelMetricsCall = async (
} }
}; };
export const modelExceptionsCall = async (
accessToken: String,
userID: String,
userRole: String,
modelGroup: String | null,
) => {
/**
* Get all models on proxy
*/
try {
let url = proxyBaseUrl ? `${proxyBaseUrl}/model/metrics/exceptions` : `/model/metrics/exceptions`;
const response = await fetch(url, {
method: "GET",
headers: {
Authorization: `Bearer ${accessToken}`,
"Content-Type": "application/json",
},
});
if (!response.ok) {
const errorData = await response.text();
message.error(errorData, 20);
throw new Error("Network response was not ok");
}
const data = await response.json();
// message.info("Received model data");
return data;
// Handle success - you might want to update some state or UI based on the created key
} catch (error) {
console.error("Failed to create key:", error);
throw error;
}
};
export const modelAvailableCall = async ( export const modelAvailableCall = async (
accessToken: String, accessToken: String,
userID: String, userID: String,

View file

@ -146,10 +146,6 @@ const UsagePage: React.FC<UsagePageProps> = ({
const [topTagsData, setTopTagsData] = useState<any[]>([]); const [topTagsData, setTopTagsData] = useState<any[]>([]);
const [uniqueTeamIds, setUniqueTeamIds] = useState<any[]>([]); const [uniqueTeamIds, setUniqueTeamIds] = useState<any[]>([]);
const [totalSpendPerTeam, setTotalSpendPerTeam] = useState<any[]>([]); const [totalSpendPerTeam, setTotalSpendPerTeam] = useState<any[]>([]);
const [modelMetrics, setModelMetrics] = useState<any[]>([]);
const [modelLatencyMetrics, setModelLatencyMetrics] = useState<any[]>([]);
const [modelGroups, setModelGroups] = useState<any[]>([]);
const [selectedModelGroup, setSelectedModelGroup] = useState<string | null>(null);
const firstDay = new Date( const firstDay = new Date(
currentDate.getFullYear(), currentDate.getFullYear(),
@ -231,25 +227,6 @@ const UsagePage: React.FC<UsagePageProps> = ({
const top_tags = await tagsSpendLogsCall(accessToken); const top_tags = await tagsSpendLogsCall(accessToken);
setTopTagsData(top_tags.top_10_tags); setTopTagsData(top_tags.top_10_tags);
// get model groups
const _model_groups = await modelInfoCall(accessToken, userID, userRole);
let model_groups = _model_groups.data;
console.log("model groups in model dashboard", model_groups);
let available_model_groups = [];
// loop through each model in model_group, access litellm_params and only inlclude the model if model["litellm_params"]["model"] startswith "azure/"
for (let i = 0; i < model_groups.length; i++) {
let model = model_groups[i];
console.log("model check", model);
let model_group = model["litellm_params"]["model"];
console.log("model group", model_group);
if (model_group.startsWith("azure/")) {
available_model_groups.push(model["model_name"]);
}
}
setModelGroups(available_model_groups);
} else if (userRole == "App Owner") { } else if (userRole == "App Owner") {
await userSpendLogsCall( await userSpendLogsCall(
accessToken, accessToken,
@ -286,22 +263,6 @@ const UsagePage: React.FC<UsagePageProps> = ({
} }
}); });
} }
const modelMetricsResponse = await modelMetricsCall(
accessToken,
userID,
userRole,
null
);
console.log("Model metrics response:", modelMetricsResponse);
// Sort by latency (avg_latency_seconds)
const sortedByLatency = [...modelMetricsResponse].sort((a, b) => b.avg_latency_seconds - a.avg_latency_seconds);
console.log("Sorted by latency:", sortedByLatency);
setModelMetrics(modelMetricsResponse);
setModelLatencyMetrics(sortedByLatency);
} catch (error) { } catch (error) {
console.error("There was an error fetching the data", error); console.error("There was an error fetching the data", error);
// Optionally, update your UI to reflect the error state here as well // Optionally, update your UI to reflect the error state here as well
@ -312,30 +273,6 @@ const UsagePage: React.FC<UsagePageProps> = ({
}, [accessToken, token, userRole, userID, startTime, endTime]); }, [accessToken, token, userRole, userID, startTime, endTime]);
const updateModelMetrics = async (modelGroup: string | null) => {
console.log("Updating model metrics for group:", modelGroup);
if (!accessToken || !userID || !userRole) {
return
}
setSelectedModelGroup(modelGroup); // If you want to store the selected model group in state
try {
const modelMetricsResponse = await modelMetricsCall(accessToken, userID, userRole, modelGroup);
console.log("Model metrics response:", modelMetricsResponse);
// Assuming modelMetricsResponse now contains the metric data for the specified model group
const sortedByLatency = [...modelMetricsResponse].sort((a, b) => b.avg_latency_seconds - a.avg_latency_seconds);
console.log("Sorted by latency:", sortedByLatency);
setModelMetrics(modelMetricsResponse);
setModelLatencyMetrics(sortedByLatency);
} catch (error) {
console.error("Failed to fetch model metrics", error);
}
}
return ( return (
<div style={{ width: "100%" }} className="p-8"> <div style={{ width: "100%" }} className="p-8">
<ViewUserSpend <ViewUserSpend
@ -350,7 +287,6 @@ const UsagePage: React.FC<UsagePageProps> = ({
<Tab>All Up</Tab> <Tab>All Up</Tab>
<Tab>Team Based Usage</Tab> <Tab>Team Based Usage</Tab>
<Tab>Tag Based Usage</Tab> <Tab>Tag Based Usage</Tab>
<Tab>Model Based Usage</Tab>
</TabList> </TabList>
<TabPanels> <TabPanels>
<TabPanel> <TabPanel>
@ -492,60 +428,6 @@ const UsagePage: React.FC<UsagePageProps> = ({
</Grid> </Grid>
</TabPanel> </TabPanel>
<TabPanel>
<Title>Filter By Model Group</Title>
<p style={{fontSize: '0.85rem', color: '#808080'}}>View how requests were load balanced within a model group</p>
<p style={{fontSize: '0.85rem', color: '#808080', fontStyle: 'italic'}}>(Beta feature) only supported for Azure Model Groups</p>
<Select
className="mb-4 mt-2"
defaultValue="all"
>
<SelectItem
value={"all"}
onClick={() => updateModelMetrics(null)}
>
All Model Groups
</SelectItem>
{modelGroups.map((group, idx) => (
<SelectItem
key={idx}
value={group}
onClick={() => updateModelMetrics(group)}
>
{group}
</SelectItem>
))}
</Select>
<Card>
<Title>Number Requests per Model</Title>
<BarChart
data={modelMetrics}
className="h-[50vh]"
index="model"
categories={["num_requests"]}
colors={["blue"]}
yAxisWidth={400}
layout="vertical"
tickGap={5}
/>
</Card>
<Card className="mt-4">
<Title>Latency Per Model</Title>
<BarChart
data={modelLatencyMetrics}
className="h-[50vh]"
index="model"
categories={["avg_latency_seconds"]}
colors={["red"]}
yAxisWidth={400}
layout="vertical"
tickGap={5}
/>
</Card>
</TabPanel>
</TabPanels> </TabPanels>
</TabGroup> </TabGroup>
</div> </div>