mirror of
https://github.com/BerriAI/litellm.git
synced 2025-04-26 19:24:27 +00:00
Merge branch 'BerriAI:main' into ollama-image-handling
This commit is contained in:
commit
465f491e7f
52 changed files with 1148 additions and 426 deletions
|
@ -11,40 +11,37 @@ You can find the Dockerfile to build litellm proxy [here](https://github.com/Ber
|
|||
|
||||
<TabItem value="basic" label="Basic">
|
||||
|
||||
**Step 1. Create a file called `litellm_config.yaml`**
|
||||
### Step 1. CREATE config.yaml
|
||||
|
||||
Example `litellm_config.yaml` (the `os.environ/` prefix means litellm will read `AZURE_API_BASE` from the env)
|
||||
```yaml
|
||||
model_list:
|
||||
- model_name: azure-gpt-3.5
|
||||
litellm_params:
|
||||
model: azure/<your-azure-model-deployment>
|
||||
api_base: os.environ/AZURE_API_BASE
|
||||
api_key: os.environ/AZURE_API_KEY
|
||||
api_version: "2023-07-01-preview"
|
||||
```
|
||||
Example `litellm_config.yaml`
|
||||
|
||||
**Step 2. Run litellm docker image**
|
||||
```yaml
|
||||
model_list:
|
||||
- model_name: azure-gpt-3.5
|
||||
litellm_params:
|
||||
model: azure/<your-azure-model-deployment>
|
||||
api_base: os.environ/AZURE_API_BASE # runs os.getenv("AZURE_API_BASE")
|
||||
api_key: os.environ/AZURE_API_KEY # runs os.getenv("AZURE_API_KEY")
|
||||
api_version: "2023-07-01-preview"
|
||||
```
|
||||
|
||||
See the latest available ghcr docker image here:
|
||||
https://github.com/berriai/litellm/pkgs/container/litellm
|
||||
|
||||
Your litellm config.yaml should be called `litellm_config.yaml` in the directory you run this command.
|
||||
The `-v` command will mount that file
|
||||
|
||||
Pass `AZURE_API_KEY` and `AZURE_API_BASE` since we set them in step 1
|
||||
### Step 2. RUN Docker Image
|
||||
|
||||
```shell
|
||||
docker run \
|
||||
-v $(pwd)/litellm_config.yaml:/app/config.yaml \
|
||||
-e AZURE_API_KEY=d6*********** \
|
||||
-e AZURE_API_BASE=https://openai-***********/ \
|
||||
-p 4000:4000 \
|
||||
ghcr.io/berriai/litellm:main-latest \
|
||||
--config /app/config.yaml --detailed_debug
|
||||
```
|
||||
```shell
|
||||
docker run \
|
||||
-v $(pwd)/litellm_config.yaml:/app/config.yaml \
|
||||
-e AZURE_API_KEY=d6*********** \
|
||||
-e AZURE_API_BASE=https://openai-***********/ \
|
||||
-p 4000:4000 \
|
||||
ghcr.io/berriai/litellm:main-latest \
|
||||
--config /app/config.yaml --detailed_debug
|
||||
```
|
||||
|
||||
**Step 3. Send a Test Request**
|
||||
Get Latest Image 👉 [here](https://github.com/berriai/litellm/pkgs/container/litellm)
|
||||
|
||||
### Step 3. TEST Request
|
||||
|
||||
Pass `model=azure-gpt-3.5` this was set on step 1
|
||||
|
||||
|
|
|
@ -278,6 +278,36 @@ router_settings:
|
|||
routing_strategy_args: {"ttl": 10}
|
||||
```
|
||||
|
||||
### Set Lowest Latency Buffer
|
||||
|
||||
Set a buffer within which deployments are candidates for making calls to.
|
||||
|
||||
E.g.
|
||||
|
||||
if you have 5 deployments
|
||||
|
||||
```
|
||||
https://litellm-prod-1.openai.azure.com/: 0.07s
|
||||
https://litellm-prod-2.openai.azure.com/: 0.1s
|
||||
https://litellm-prod-3.openai.azure.com/: 0.1s
|
||||
https://litellm-prod-4.openai.azure.com/: 0.1s
|
||||
https://litellm-prod-5.openai.azure.com/: 4.66s
|
||||
```
|
||||
|
||||
to prevent initially overloading `prod-1`, with all requests - we can set a buffer of 50%, to consider deployments `prod-2, prod-3, prod-4`.
|
||||
|
||||
**In Router**
|
||||
```python
|
||||
router = Router(..., routing_strategy_args={"lowest_latency_buffer": 0.5})
|
||||
```
|
||||
|
||||
**In Proxy**
|
||||
|
||||
```yaml
|
||||
router_settings:
|
||||
routing_strategy_args: {"lowest_latency_buffer": 0.5}
|
||||
```
|
||||
|
||||
</TabItem>
|
||||
<TabItem value="simple-shuffle" label="(Default) Weighted Pick (Async)">
|
||||
|
||||
|
|
|
@ -79,7 +79,7 @@ class LangFuseLogger:
|
|||
print_verbose,
|
||||
level="DEFAULT",
|
||||
status_message=None,
|
||||
):
|
||||
) -> dict:
|
||||
# Method definition
|
||||
|
||||
try:
|
||||
|
@ -111,6 +111,7 @@ class LangFuseLogger:
|
|||
pass
|
||||
|
||||
# end of processing langfuse ########################
|
||||
print(f"response obj type: {type(response_obj)}")
|
||||
if (
|
||||
level == "ERROR"
|
||||
and status_message is not None
|
||||
|
@ -140,8 +141,11 @@ class LangFuseLogger:
|
|||
input = prompt
|
||||
output = response_obj["data"]
|
||||
print_verbose(f"OUTPUT IN LANGFUSE: {output}; original: {response_obj}")
|
||||
trace_id = None
|
||||
generation_id = None
|
||||
if self._is_langfuse_v2():
|
||||
self._log_langfuse_v2(
|
||||
print("INSIDE V2 LANGFUSE")
|
||||
trace_id, generation_id = self._log_langfuse_v2(
|
||||
user_id,
|
||||
metadata,
|
||||
litellm_params,
|
||||
|
@ -171,10 +175,12 @@ class LangFuseLogger:
|
|||
f"Langfuse Layer Logging - final response object: {response_obj}"
|
||||
)
|
||||
verbose_logger.info(f"Langfuse Layer Logging - logging success")
|
||||
|
||||
return {"trace_id": trace_id, "generation_id": generation_id}
|
||||
except:
|
||||
traceback.print_exc()
|
||||
verbose_logger.debug(f"Langfuse Layer Error - {traceback.format_exc()}")
|
||||
pass
|
||||
return {"trace_id": None, "generation_id": None}
|
||||
|
||||
async def _async_log_event(
|
||||
self, kwargs, response_obj, start_time, end_time, user_id, print_verbose
|
||||
|
@ -246,7 +252,7 @@ class LangFuseLogger:
|
|||
response_obj,
|
||||
level,
|
||||
print_verbose,
|
||||
):
|
||||
) -> tuple:
|
||||
import langfuse
|
||||
|
||||
try:
|
||||
|
@ -272,18 +278,21 @@ class LangFuseLogger:
|
|||
## DO NOT SET TRACE_NAME if trace-id set. this can lead to overwriting of past traces.
|
||||
trace_name = f"litellm-{kwargs.get('call_type', 'completion')}"
|
||||
|
||||
trace_params = {
|
||||
"name": trace_name,
|
||||
"input": input,
|
||||
"user_id": metadata.get("trace_user_id", user_id),
|
||||
"id": trace_id or existing_trace_id,
|
||||
"session_id": metadata.get("session_id", None),
|
||||
}
|
||||
if existing_trace_id is not None:
|
||||
trace_params = {"id": existing_trace_id}
|
||||
else: # don't overwrite an existing trace
|
||||
trace_params = {
|
||||
"name": trace_name,
|
||||
"input": input,
|
||||
"user_id": metadata.get("trace_user_id", user_id),
|
||||
"id": trace_id,
|
||||
"session_id": metadata.get("session_id", None),
|
||||
}
|
||||
|
||||
if level == "ERROR":
|
||||
trace_params["status_message"] = output
|
||||
else:
|
||||
trace_params["output"] = output
|
||||
if level == "ERROR":
|
||||
trace_params["status_message"] = output
|
||||
else:
|
||||
trace_params["output"] = output
|
||||
|
||||
cost = kwargs.get("response_cost", None)
|
||||
print_verbose(f"trace: {cost}")
|
||||
|
@ -341,7 +350,8 @@ class LangFuseLogger:
|
|||
kwargs["cache_hit"] = False
|
||||
tags.append(f"cache_hit:{kwargs['cache_hit']}")
|
||||
clean_metadata["cache_hit"] = kwargs["cache_hit"]
|
||||
trace_params.update({"tags": tags})
|
||||
if existing_trace_id is None:
|
||||
trace_params.update({"tags": tags})
|
||||
|
||||
proxy_server_request = litellm_params.get("proxy_server_request", None)
|
||||
if proxy_server_request:
|
||||
|
@ -363,6 +373,7 @@ class LangFuseLogger:
|
|||
|
||||
print_verbose(f"trace_params: {trace_params}")
|
||||
|
||||
print(f"trace_params: {trace_params}")
|
||||
trace = self.Langfuse.trace(**trace_params)
|
||||
|
||||
generation_id = None
|
||||
|
@ -414,6 +425,10 @@ class LangFuseLogger:
|
|||
|
||||
print_verbose(f"generation_params: {generation_params}")
|
||||
|
||||
trace.generation(**generation_params)
|
||||
generation_client = trace.generation(**generation_params)
|
||||
|
||||
print(f"LANGFUSE TRACE ID - {generation_client.trace_id}")
|
||||
return generation_client.trace_id, generation_id
|
||||
except Exception as e:
|
||||
verbose_logger.debug(f"Langfuse Layer Error - {traceback.format_exc()}")
|
||||
return None, None
|
||||
|
|
|
@ -238,12 +238,13 @@ def get_ollama_response(
|
|||
## RESPONSE OBJECT
|
||||
model_response["choices"][0]["finish_reason"] = "stop"
|
||||
if optional_params.get("format", "") == "json":
|
||||
function_call = json.loads(response_json["response"])
|
||||
message = litellm.Message(
|
||||
content=None,
|
||||
tool_calls=[
|
||||
{
|
||||
"id": f"call_{str(uuid.uuid4())}",
|
||||
"function": {"arguments": response_json["response"], "name": ""},
|
||||
"function": {"name": function_call["name"], "arguments": json.dumps(function_call["arguments"])},
|
||||
"type": "function",
|
||||
}
|
||||
],
|
||||
|
@ -335,15 +336,13 @@ async def ollama_acompletion(url, data, model_response, encoding, logging_obj):
|
|||
## RESPONSE OBJECT
|
||||
model_response["choices"][0]["finish_reason"] = "stop"
|
||||
if data.get("format", "") == "json":
|
||||
function_call = json.loads(response_json["response"])
|
||||
message = litellm.Message(
|
||||
content=None,
|
||||
tool_calls=[
|
||||
{
|
||||
"id": f"call_{str(uuid.uuid4())}",
|
||||
"function": {
|
||||
"arguments": response_json["response"],
|
||||
"name": "",
|
||||
},
|
||||
"function": {"name": function_call["name"], "arguments": json.dumps(function_call["arguments"])},
|
||||
"type": "function",
|
||||
}
|
||||
],
|
||||
|
|
|
@ -285,15 +285,13 @@ def get_ollama_response(
|
|||
## RESPONSE OBJECT
|
||||
model_response["choices"][0]["finish_reason"] = "stop"
|
||||
if data.get("format", "") == "json":
|
||||
function_call = json.loads(response_json["message"]["content"])
|
||||
message = litellm.Message(
|
||||
content=None,
|
||||
tool_calls=[
|
||||
{
|
||||
"id": f"call_{str(uuid.uuid4())}",
|
||||
"function": {
|
||||
"arguments": response_json["message"]["content"],
|
||||
"name": "",
|
||||
},
|
||||
"function": {"name": function_call["name"], "arguments": json.dumps(function_call["arguments"])},
|
||||
"type": "function",
|
||||
}
|
||||
],
|
||||
|
@ -415,15 +413,13 @@ async def ollama_acompletion(
|
|||
## RESPONSE OBJECT
|
||||
model_response["choices"][0]["finish_reason"] = "stop"
|
||||
if data.get("format", "") == "json":
|
||||
function_call = json.loads(response_json["message"]["content"])
|
||||
message = litellm.Message(
|
||||
content=None,
|
||||
tool_calls=[
|
||||
{
|
||||
"id": f"call_{str(uuid.uuid4())}",
|
||||
"function": {
|
||||
"arguments": response_json["message"]["content"],
|
||||
"name": function_name or "",
|
||||
},
|
||||
"function": {"name": function_call["name"], "arguments": json.dumps(function_call["arguments"])},
|
||||
"type": "function",
|
||||
}
|
||||
],
|
||||
|
|
|
@ -360,7 +360,7 @@ def mock_completion(
|
|||
model: str,
|
||||
messages: List,
|
||||
stream: Optional[bool] = False,
|
||||
mock_response: str = "This is a mock request",
|
||||
mock_response: Union[str, Exception] = "This is a mock request",
|
||||
logging=None,
|
||||
**kwargs,
|
||||
):
|
||||
|
@ -387,6 +387,20 @@ def mock_completion(
|
|||
- If 'stream' is True, it returns a response that mimics the behavior of a streaming completion.
|
||||
"""
|
||||
try:
|
||||
## LOGGING
|
||||
if logging is not None:
|
||||
logging.pre_call(
|
||||
input=messages,
|
||||
api_key="mock-key",
|
||||
)
|
||||
if isinstance(mock_response, Exception):
|
||||
raise litellm.APIError(
|
||||
status_code=500, # type: ignore
|
||||
message=str(mock_response),
|
||||
llm_provider="openai", # type: ignore
|
||||
model=model, # type: ignore
|
||||
request=httpx.Request(method="POST", url="https://api.openai.com/v1/"),
|
||||
)
|
||||
model_response = ModelResponse(stream=stream)
|
||||
if stream is True:
|
||||
# don't try to access stream object,
|
||||
|
|
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
|
@ -1 +1 @@
|
|||
!function(){"use strict";var e,t,n,r,o,u,i,c,f,a={},l={};function d(e){var t=l[e];if(void 0!==t)return t.exports;var n=l[e]={id:e,loaded:!1,exports:{}},r=!0;try{a[e](n,n.exports,d),r=!1}finally{r&&delete l[e]}return n.loaded=!0,n.exports}d.m=a,e=[],d.O=function(t,n,r,o){if(n){o=o||0;for(var u=e.length;u>0&&e[u-1][2]>o;u--)e[u]=e[u-1];e[u]=[n,r,o];return}for(var i=1/0,u=0;u<e.length;u++){for(var n=e[u][0],r=e[u][1],o=e[u][2],c=!0,f=0;f<n.length;f++)i>=o&&Object.keys(d.O).every(function(e){return d.O[e](n[f])})?n.splice(f--,1):(c=!1,o<i&&(i=o));if(c){e.splice(u--,1);var a=r();void 0!==a&&(t=a)}}return t},d.n=function(e){var t=e&&e.__esModule?function(){return e.default}:function(){return e};return d.d(t,{a:t}),t},n=Object.getPrototypeOf?function(e){return Object.getPrototypeOf(e)}:function(e){return e.__proto__},d.t=function(e,r){if(1&r&&(e=this(e)),8&r||"object"==typeof e&&e&&(4&r&&e.__esModule||16&r&&"function"==typeof e.then))return e;var o=Object.create(null);d.r(o);var u={};t=t||[null,n({}),n([]),n(n)];for(var i=2&r&&e;"object"==typeof i&&!~t.indexOf(i);i=n(i))Object.getOwnPropertyNames(i).forEach(function(t){u[t]=function(){return e[t]}});return u.default=function(){return e},d.d(o,u),o},d.d=function(e,t){for(var n in t)d.o(t,n)&&!d.o(e,n)&&Object.defineProperty(e,n,{enumerable:!0,get:t[n]})},d.f={},d.e=function(e){return Promise.all(Object.keys(d.f).reduce(function(t,n){return d.f[n](e,t),t},[]))},d.u=function(e){},d.miniCssF=function(e){return"static/css/5e699db73bf6f8c2.css"},d.g=function(){if("object"==typeof globalThis)return globalThis;try{return this||Function("return this")()}catch(e){if("object"==typeof window)return window}}(),d.o=function(e,t){return Object.prototype.hasOwnProperty.call(e,t)},r={},o="_N_E:",d.l=function(e,t,n,u){if(r[e]){r[e].push(t);return}if(void 0!==n)for(var i,c,f=document.getElementsByTagName("script"),a=0;a<f.length;a++){var l=f[a];if(l.getAttribute("src")==e||l.getAttribute("data-webpack")==o+n){i=l;break}}i||(c=!0,(i=document.createElement("script")).charset="utf-8",i.timeout=120,d.nc&&i.setAttribute("nonce",d.nc),i.setAttribute("data-webpack",o+n),i.src=d.tu(e)),r[e]=[t];var s=function(t,n){i.onerror=i.onload=null,clearTimeout(p);var o=r[e];if(delete r[e],i.parentNode&&i.parentNode.removeChild(i),o&&o.forEach(function(e){return e(n)}),t)return t(n)},p=setTimeout(s.bind(null,void 0,{type:"timeout",target:i}),12e4);i.onerror=s.bind(null,i.onerror),i.onload=s.bind(null,i.onload),c&&document.head.appendChild(i)},d.r=function(e){"undefined"!=typeof Symbol&&Symbol.toStringTag&&Object.defineProperty(e,Symbol.toStringTag,{value:"Module"}),Object.defineProperty(e,"__esModule",{value:!0})},d.nmd=function(e){return e.paths=[],e.children||(e.children=[]),e},d.tt=function(){return void 0===u&&(u={createScriptURL:function(e){return e}},"undefined"!=typeof trustedTypes&&trustedTypes.createPolicy&&(u=trustedTypes.createPolicy("nextjs#bundler",u))),u},d.tu=function(e){return d.tt().createScriptURL(e)},d.p="/ui/_next/",i={272:0},d.f.j=function(e,t){var n=d.o(i,e)?i[e]:void 0;if(0!==n){if(n)t.push(n[2]);else if(272!=e){var r=new Promise(function(t,r){n=i[e]=[t,r]});t.push(n[2]=r);var o=d.p+d.u(e),u=Error();d.l(o,function(t){if(d.o(i,e)&&(0!==(n=i[e])&&(i[e]=void 0),n)){var r=t&&("load"===t.type?"missing":t.type),o=t&&t.target&&t.target.src;u.message="Loading chunk "+e+" failed.\n("+r+": "+o+")",u.name="ChunkLoadError",u.type=r,u.request=o,n[1](u)}},"chunk-"+e,e)}else i[e]=0}},d.O.j=function(e){return 0===i[e]},c=function(e,t){var n,r,o=t[0],u=t[1],c=t[2],f=0;if(o.some(function(e){return 0!==i[e]})){for(n in u)d.o(u,n)&&(d.m[n]=u[n]);if(c)var a=c(d)}for(e&&e(t);f<o.length;f++)r=o[f],d.o(i,r)&&i[r]&&i[r][0](),i[r]=0;return d.O(a)},(f=self.webpackChunk_N_E=self.webpackChunk_N_E||[]).forEach(c.bind(null,0)),f.push=c.bind(null,f.push.bind(f))}();
|
||||
!function(){"use strict";var e,t,n,r,o,u,i,c,f,a={},l={};function d(e){var t=l[e];if(void 0!==t)return t.exports;var n=l[e]={id:e,loaded:!1,exports:{}},r=!0;try{a[e](n,n.exports,d),r=!1}finally{r&&delete l[e]}return n.loaded=!0,n.exports}d.m=a,e=[],d.O=function(t,n,r,o){if(n){o=o||0;for(var u=e.length;u>0&&e[u-1][2]>o;u--)e[u]=e[u-1];e[u]=[n,r,o];return}for(var i=1/0,u=0;u<e.length;u++){for(var n=e[u][0],r=e[u][1],o=e[u][2],c=!0,f=0;f<n.length;f++)i>=o&&Object.keys(d.O).every(function(e){return d.O[e](n[f])})?n.splice(f--,1):(c=!1,o<i&&(i=o));if(c){e.splice(u--,1);var a=r();void 0!==a&&(t=a)}}return t},d.n=function(e){var t=e&&e.__esModule?function(){return e.default}:function(){return e};return d.d(t,{a:t}),t},n=Object.getPrototypeOf?function(e){return Object.getPrototypeOf(e)}:function(e){return e.__proto__},d.t=function(e,r){if(1&r&&(e=this(e)),8&r||"object"==typeof e&&e&&(4&r&&e.__esModule||16&r&&"function"==typeof e.then))return e;var o=Object.create(null);d.r(o);var u={};t=t||[null,n({}),n([]),n(n)];for(var i=2&r&&e;"object"==typeof i&&!~t.indexOf(i);i=n(i))Object.getOwnPropertyNames(i).forEach(function(t){u[t]=function(){return e[t]}});return u.default=function(){return e},d.d(o,u),o},d.d=function(e,t){for(var n in t)d.o(t,n)&&!d.o(e,n)&&Object.defineProperty(e,n,{enumerable:!0,get:t[n]})},d.f={},d.e=function(e){return Promise.all(Object.keys(d.f).reduce(function(t,n){return d.f[n](e,t),t},[]))},d.u=function(e){},d.miniCssF=function(e){return"static/css/4ccaa87c9648acfb.css"},d.g=function(){if("object"==typeof globalThis)return globalThis;try{return this||Function("return this")()}catch(e){if("object"==typeof window)return window}}(),d.o=function(e,t){return Object.prototype.hasOwnProperty.call(e,t)},r={},o="_N_E:",d.l=function(e,t,n,u){if(r[e]){r[e].push(t);return}if(void 0!==n)for(var i,c,f=document.getElementsByTagName("script"),a=0;a<f.length;a++){var l=f[a];if(l.getAttribute("src")==e||l.getAttribute("data-webpack")==o+n){i=l;break}}i||(c=!0,(i=document.createElement("script")).charset="utf-8",i.timeout=120,d.nc&&i.setAttribute("nonce",d.nc),i.setAttribute("data-webpack",o+n),i.src=d.tu(e)),r[e]=[t];var s=function(t,n){i.onerror=i.onload=null,clearTimeout(p);var o=r[e];if(delete r[e],i.parentNode&&i.parentNode.removeChild(i),o&&o.forEach(function(e){return e(n)}),t)return t(n)},p=setTimeout(s.bind(null,void 0,{type:"timeout",target:i}),12e4);i.onerror=s.bind(null,i.onerror),i.onload=s.bind(null,i.onload),c&&document.head.appendChild(i)},d.r=function(e){"undefined"!=typeof Symbol&&Symbol.toStringTag&&Object.defineProperty(e,Symbol.toStringTag,{value:"Module"}),Object.defineProperty(e,"__esModule",{value:!0})},d.nmd=function(e){return e.paths=[],e.children||(e.children=[]),e},d.tt=function(){return void 0===u&&(u={createScriptURL:function(e){return e}},"undefined"!=typeof trustedTypes&&trustedTypes.createPolicy&&(u=trustedTypes.createPolicy("nextjs#bundler",u))),u},d.tu=function(e){return d.tt().createScriptURL(e)},d.p="/ui/_next/",i={272:0},d.f.j=function(e,t){var n=d.o(i,e)?i[e]:void 0;if(0!==n){if(n)t.push(n[2]);else if(272!=e){var r=new Promise(function(t,r){n=i[e]=[t,r]});t.push(n[2]=r);var o=d.p+d.u(e),u=Error();d.l(o,function(t){if(d.o(i,e)&&(0!==(n=i[e])&&(i[e]=void 0),n)){var r=t&&("load"===t.type?"missing":t.type),o=t&&t.target&&t.target.src;u.message="Loading chunk "+e+" failed.\n("+r+": "+o+")",u.name="ChunkLoadError",u.type=r,u.request=o,n[1](u)}},"chunk-"+e,e)}else i[e]=0}},d.O.j=function(e){return 0===i[e]},c=function(e,t){var n,r,o=t[0],u=t[1],c=t[2],f=0;if(o.some(function(e){return 0!==i[e]})){for(n in u)d.o(u,n)&&(d.m[n]=u[n]);if(c)var a=c(d)}for(e&&e(t);f<o.length;f++)r=o[f],d.o(i,r)&&i[r]&&i[r][0](),i[r]=0;return d.O(a)},(f=self.webpackChunk_N_E=self.webpackChunk_N_E||[]).forEach(c.bind(null,0)),f.push=c.bind(null,f.push.bind(f))}();
|
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
|
@ -1 +1 @@
|
|||
<!DOCTYPE html><html id="__next_error__"><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width, initial-scale=1"/><link rel="preload" as="script" fetchPriority="low" href="/ui/_next/static/chunks/webpack-ccae12a25017afa5.js" crossorigin=""/><script src="/ui/_next/static/chunks/fd9d1056-dafd44dfa2da140c.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/69-e49705773ae41779.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/main-app-9b4fb13a7db53edf.js" async="" crossorigin=""></script><title>LiteLLM Dashboard</title><meta name="description" content="LiteLLM Proxy Admin UI"/><link rel="icon" href="/ui/favicon.ico" type="image/x-icon" sizes="16x16"/><meta name="next-size-adjust"/><script src="/ui/_next/static/chunks/polyfills-c67a75d1b6f99dc8.js" crossorigin="" noModule=""></script></head><body><script src="/ui/_next/static/chunks/webpack-ccae12a25017afa5.js" crossorigin="" async=""></script><script>(self.__next_f=self.__next_f||[]).push([0]);self.__next_f.push([2,null])</script><script>self.__next_f.push([1,"1:HL[\"/ui/_next/static/media/c9a5bc6a7c948fb0-s.p.woff2\",\"font\",{\"crossOrigin\":\"\",\"type\":\"font/woff2\"}]\n2:HL[\"/ui/_next/static/css/5e699db73bf6f8c2.css\",\"style\",{\"crossOrigin\":\"\"}]\n0:\"$L3\"\n"])</script><script>self.__next_f.push([1,"4:I[47690,[],\"\"]\n6:I[77831,[],\"\"]\n7:I[27125,[\"447\",\"static/chunks/447-9f8d32190ff7d16d.js\",\"931\",\"static/chunks/app/page-508c39694bd40fe9.js\"],\"\"]\n8:I[5613,[],\"\"]\n9:I[31778,[],\"\"]\nb:I[48955,[],\"\"]\nc:[]\n"])</script><script>self.__next_f.push([1,"3:[[[\"$\",\"link\",\"0\",{\"rel\":\"stylesheet\",\"href\":\"/ui/_next/static/css/5e699db73bf6f8c2.css\",\"precedence\":\"next\",\"crossOrigin\":\"\"}]],[\"$\",\"$L4\",null,{\"buildId\":\"kbGdRQFfI6W3bEwfzmJDI\",\"assetPrefix\":\"/ui\",\"initialCanonicalUrl\":\"/\",\"initialTree\":[\"\",{\"children\":[\"__PAGE__\",{}]},\"$undefined\",\"$undefined\",true],\"initialSeedData\":[\"\",{\"children\":[\"__PAGE__\",{},[\"$L5\",[\"$\",\"$L6\",null,{\"propsForComponent\":{\"params\":{}},\"Component\":\"$7\",\"isStaticGeneration\":true}],null]]},[null,[\"$\",\"html\",null,{\"lang\":\"en\",\"children\":[\"$\",\"body\",null,{\"className\":\"__className_c23dc8\",\"children\":[\"$\",\"$L8\",null,{\"parallelRouterKey\":\"children\",\"segmentPath\":[\"children\"],\"loading\":\"$undefined\",\"loadingStyles\":\"$undefined\",\"loadingScripts\":\"$undefined\",\"hasLoading\":false,\"error\":\"$undefined\",\"errorStyles\":\"$undefined\",\"errorScripts\":\"$undefined\",\"template\":[\"$\",\"$L9\",null,{}],\"templateStyles\":\"$undefined\",\"templateScripts\":\"$undefined\",\"notFound\":[[\"$\",\"title\",null,{\"children\":\"404: This page could not be found.\"}],[\"$\",\"div\",null,{\"style\":{\"fontFamily\":\"system-ui,\\\"Segoe UI\\\",Roboto,Helvetica,Arial,sans-serif,\\\"Apple Color Emoji\\\",\\\"Segoe UI Emoji\\\"\",\"height\":\"100vh\",\"textAlign\":\"center\",\"display\":\"flex\",\"flexDirection\":\"column\",\"alignItems\":\"center\",\"justifyContent\":\"center\"},\"children\":[\"$\",\"div\",null,{\"children\":[[\"$\",\"style\",null,{\"dangerouslySetInnerHTML\":{\"__html\":\"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}\"}}],[\"$\",\"h1\",null,{\"className\":\"next-error-h1\",\"style\":{\"display\":\"inline-block\",\"margin\":\"0 20px 0 0\",\"padding\":\"0 23px 0 0\",\"fontSize\":24,\"fontWeight\":500,\"verticalAlign\":\"top\",\"lineHeight\":\"49px\"},\"children\":\"404\"}],[\"$\",\"div\",null,{\"style\":{\"display\":\"inline-block\"},\"children\":[\"$\",\"h2\",null,{\"style\":{\"fontSize\":14,\"fontWeight\":400,\"lineHeight\":\"49px\",\"margin\":0},\"children\":\"This page could not be found.\"}]}]]}]}]],\"notFoundStyles\":[],\"styles\":null}]}]}],null]],\"initialHead\":[false,\"$La\"],\"globalErrorComponent\":\"$b\",\"missingSlots\":\"$Wc\"}]]\n"])</script><script>self.__next_f.push([1,"a:[[\"$\",\"meta\",\"0\",{\"name\":\"viewport\",\"content\":\"width=device-width, initial-scale=1\"}],[\"$\",\"meta\",\"1\",{\"charSet\":\"utf-8\"}],[\"$\",\"title\",\"2\",{\"children\":\"LiteLLM Dashboard\"}],[\"$\",\"meta\",\"3\",{\"name\":\"description\",\"content\":\"LiteLLM Proxy Admin UI\"}],[\"$\",\"link\",\"4\",{\"rel\":\"icon\",\"href\":\"/ui/favicon.ico\",\"type\":\"image/x-icon\",\"sizes\":\"16x16\"}],[\"$\",\"meta\",\"5\",{\"name\":\"next-size-adjust\"}]]\n5:null\n"])</script><script>self.__next_f.push([1,""])</script></body></html>
|
||||
<!DOCTYPE html><html id="__next_error__"><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width, initial-scale=1"/><link rel="preload" as="script" fetchPriority="low" href="/ui/_next/static/chunks/webpack-4acf5608f06a35df.js" crossorigin=""/><script src="/ui/_next/static/chunks/fd9d1056-dafd44dfa2da140c.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/69-e49705773ae41779.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/main-app-9b4fb13a7db53edf.js" async="" crossorigin=""></script><title>LiteLLM Dashboard</title><meta name="description" content="LiteLLM Proxy Admin UI"/><link rel="icon" href="/ui/favicon.ico" type="image/x-icon" sizes="16x16"/><meta name="next-size-adjust"/><script src="/ui/_next/static/chunks/polyfills-c67a75d1b6f99dc8.js" crossorigin="" noModule=""></script></head><body><script src="/ui/_next/static/chunks/webpack-4acf5608f06a35df.js" crossorigin="" async=""></script><script>(self.__next_f=self.__next_f||[]).push([0]);self.__next_f.push([2,null])</script><script>self.__next_f.push([1,"1:HL[\"/ui/_next/static/media/c9a5bc6a7c948fb0-s.p.woff2\",\"font\",{\"crossOrigin\":\"\",\"type\":\"font/woff2\"}]\n2:HL[\"/ui/_next/static/css/4ccaa87c9648acfb.css\",\"style\",{\"crossOrigin\":\"\"}]\n0:\"$L3\"\n"])</script><script>self.__next_f.push([1,"4:I[47690,[],\"\"]\n6:I[77831,[],\"\"]\n7:I[46414,[\"761\",\"static/chunks/761-05f8a8451296476c.js\",\"931\",\"static/chunks/app/page-e710f07514d9286b.js\"],\"\"]\n8:I[5613,[],\"\"]\n9:I[31778,[],\"\"]\nb:I[48955,[],\"\"]\nc:[]\n"])</script><script>self.__next_f.push([1,"3:[[[\"$\",\"link\",\"0\",{\"rel\":\"stylesheet\",\"href\":\"/ui/_next/static/css/4ccaa87c9648acfb.css\",\"precedence\":\"next\",\"crossOrigin\":\"\"}]],[\"$\",\"$L4\",null,{\"buildId\":\"7aR2yOE4Bz0za1EnxRCsv\",\"assetPrefix\":\"/ui\",\"initialCanonicalUrl\":\"/\",\"initialTree\":[\"\",{\"children\":[\"__PAGE__\",{}]},\"$undefined\",\"$undefined\",true],\"initialSeedData\":[\"\",{\"children\":[\"__PAGE__\",{},[\"$L5\",[\"$\",\"$L6\",null,{\"propsForComponent\":{\"params\":{}},\"Component\":\"$7\",\"isStaticGeneration\":true}],null]]},[null,[\"$\",\"html\",null,{\"lang\":\"en\",\"children\":[\"$\",\"body\",null,{\"className\":\"__className_c23dc8\",\"children\":[\"$\",\"$L8\",null,{\"parallelRouterKey\":\"children\",\"segmentPath\":[\"children\"],\"loading\":\"$undefined\",\"loadingStyles\":\"$undefined\",\"loadingScripts\":\"$undefined\",\"hasLoading\":false,\"error\":\"$undefined\",\"errorStyles\":\"$undefined\",\"errorScripts\":\"$undefined\",\"template\":[\"$\",\"$L9\",null,{}],\"templateStyles\":\"$undefined\",\"templateScripts\":\"$undefined\",\"notFound\":[[\"$\",\"title\",null,{\"children\":\"404: This page could not be found.\"}],[\"$\",\"div\",null,{\"style\":{\"fontFamily\":\"system-ui,\\\"Segoe UI\\\",Roboto,Helvetica,Arial,sans-serif,\\\"Apple Color Emoji\\\",\\\"Segoe UI Emoji\\\"\",\"height\":\"100vh\",\"textAlign\":\"center\",\"display\":\"flex\",\"flexDirection\":\"column\",\"alignItems\":\"center\",\"justifyContent\":\"center\"},\"children\":[\"$\",\"div\",null,{\"children\":[[\"$\",\"style\",null,{\"dangerouslySetInnerHTML\":{\"__html\":\"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}\"}}],[\"$\",\"h1\",null,{\"className\":\"next-error-h1\",\"style\":{\"display\":\"inline-block\",\"margin\":\"0 20px 0 0\",\"padding\":\"0 23px 0 0\",\"fontSize\":24,\"fontWeight\":500,\"verticalAlign\":\"top\",\"lineHeight\":\"49px\"},\"children\":\"404\"}],[\"$\",\"div\",null,{\"style\":{\"display\":\"inline-block\"},\"children\":[\"$\",\"h2\",null,{\"style\":{\"fontSize\":14,\"fontWeight\":400,\"lineHeight\":\"49px\",\"margin\":0},\"children\":\"This page could not be found.\"}]}]]}]}]],\"notFoundStyles\":[],\"styles\":null}]}]}],null]],\"initialHead\":[false,\"$La\"],\"globalErrorComponent\":\"$b\",\"missingSlots\":\"$Wc\"}]]\n"])</script><script>self.__next_f.push([1,"a:[[\"$\",\"meta\",\"0\",{\"name\":\"viewport\",\"content\":\"width=device-width, initial-scale=1\"}],[\"$\",\"meta\",\"1\",{\"charSet\":\"utf-8\"}],[\"$\",\"title\",\"2\",{\"children\":\"LiteLLM Dashboard\"}],[\"$\",\"meta\",\"3\",{\"name\":\"description\",\"content\":\"LiteLLM Proxy Admin UI\"}],[\"$\",\"link\",\"4\",{\"rel\":\"icon\",\"href\":\"/ui/favicon.ico\",\"type\":\"image/x-icon\",\"sizes\":\"16x16\"}],[\"$\",\"meta\",\"5\",{\"name\":\"next-size-adjust\"}]]\n5:null\n"])</script><script>self.__next_f.push([1,""])</script></body></html>
|
|
@ -1,7 +1,7 @@
|
|||
2:I[77831,[],""]
|
||||
3:I[27125,["447","static/chunks/447-9f8d32190ff7d16d.js","931","static/chunks/app/page-508c39694bd40fe9.js"],""]
|
||||
3:I[46414,["761","static/chunks/761-05f8a8451296476c.js","931","static/chunks/app/page-e710f07514d9286b.js"],""]
|
||||
4:I[5613,[],""]
|
||||
5:I[31778,[],""]
|
||||
0:["kbGdRQFfI6W3bEwfzmJDI",[[["",{"children":["__PAGE__",{}]},"$undefined","$undefined",true],["",{"children":["__PAGE__",{},["$L1",["$","$L2",null,{"propsForComponent":{"params":{}},"Component":"$3","isStaticGeneration":true}],null]]},[null,["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_c23dc8","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[],"styles":null}]}]}],null]],[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/5e699db73bf6f8c2.css","precedence":"next","crossOrigin":""}]],"$L6"]]]]
|
||||
0:["7aR2yOE4Bz0za1EnxRCsv",[[["",{"children":["__PAGE__",{}]},"$undefined","$undefined",true],["",{"children":["__PAGE__",{},["$L1",["$","$L2",null,{"propsForComponent":{"params":{}},"Component":"$3","isStaticGeneration":true}],null]]},[null,["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_c23dc8","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[],"styles":null}]}]}],null]],[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/4ccaa87c9648acfb.css","precedence":"next","crossOrigin":""}]],"$L6"]]]]
|
||||
6:[["$","meta","0",{"name":"viewport","content":"width=device-width, initial-scale=1"}],["$","meta","1",{"charSet":"utf-8"}],["$","title","2",{"children":"LiteLLM Dashboard"}],["$","meta","3",{"name":"description","content":"LiteLLM Proxy Admin UI"}],["$","link","4",{"rel":"icon","href":"/ui/favicon.ico","type":"image/x-icon","sizes":"16x16"}],["$","meta","5",{"name":"next-size-adjust"}]]
|
||||
1:null
|
||||
|
|
|
@ -916,6 +916,7 @@ class LiteLLM_ErrorLogs(LiteLLMBase):
|
|||
request_id: Optional[str] = str(uuid.uuid4())
|
||||
api_base: Optional[str] = ""
|
||||
model_group: Optional[str] = ""
|
||||
litellm_model_name: Optional[str] = ""
|
||||
model_id: Optional[str] = ""
|
||||
request_kwargs: Optional[dict] = {}
|
||||
exception_type: Optional[str] = ""
|
||||
|
|
|
@ -1258,6 +1258,7 @@ async def _PROXY_failure_handler(
|
|||
request_id=str(uuid.uuid4()),
|
||||
model_group=_model_group,
|
||||
model_id=_model_id,
|
||||
litellm_model_name=kwargs.get("model"),
|
||||
request_kwargs=_optional_params,
|
||||
api_base=api_base,
|
||||
exception_type=_exception_type,
|
||||
|
@ -7523,9 +7524,9 @@ async def model_info_v2(
|
|||
)
|
||||
async def model_metrics(
|
||||
user_api_key_dict: UserAPIKeyAuth = Depends(user_api_key_auth),
|
||||
_selected_model_group: Optional[str] = None,
|
||||
startTime: Optional[datetime] = datetime.now() - timedelta(days=30),
|
||||
endTime: Optional[datetime] = datetime.now(),
|
||||
_selected_model_group: Optional[str] = "gpt-4-32k",
|
||||
startTime: Optional[datetime] = None,
|
||||
endTime: Optional[datetime] = None,
|
||||
):
|
||||
global prisma_client, llm_router
|
||||
if prisma_client is None:
|
||||
|
@ -7535,65 +7536,153 @@ async def model_metrics(
|
|||
param="None",
|
||||
code=status.HTTP_500_INTERNAL_SERVER_ERROR,
|
||||
)
|
||||
if _selected_model_group and llm_router is not None:
|
||||
_model_list = llm_router.get_model_list()
|
||||
_relevant_api_bases = []
|
||||
for model in _model_list:
|
||||
if model["model_name"] == _selected_model_group:
|
||||
_litellm_params = model["litellm_params"]
|
||||
_api_base = _litellm_params.get("api_base", "")
|
||||
_relevant_api_bases.append(_api_base)
|
||||
_relevant_api_bases.append(_api_base + "/openai/")
|
||||
startTime = startTime or datetime.now() - timedelta(days=30)
|
||||
endTime = endTime or datetime.now()
|
||||
|
||||
sql_query = """
|
||||
SELECT
|
||||
api_base,
|
||||
model,
|
||||
DATE_TRUNC('day', "startTime")::DATE AS day,
|
||||
AVG(EXTRACT(epoch FROM ("endTime" - "startTime"))) / SUM(total_tokens) AS avg_latency_per_token
|
||||
FROM
|
||||
"LiteLLM_SpendLogs"
|
||||
WHERE
|
||||
"startTime" >= NOW() - INTERVAL '30 days'
|
||||
AND "model" = $1
|
||||
GROUP BY
|
||||
api_base,
|
||||
model,
|
||||
day
|
||||
HAVING
|
||||
SUM(total_tokens) > 0
|
||||
ORDER BY
|
||||
avg_latency_per_token DESC;
|
||||
"""
|
||||
_all_api_bases = set()
|
||||
db_response = await prisma_client.db.query_raw(
|
||||
sql_query, _selected_model_group, startTime, endTime
|
||||
)
|
||||
_daily_entries: dict = {} # {"Jun 23": {"model1": 0.002, "model2": 0.003}}
|
||||
if db_response is not None:
|
||||
for model_data in db_response:
|
||||
_api_base = model_data["api_base"]
|
||||
_model = model_data["model"]
|
||||
_day = model_data["day"]
|
||||
_avg_latency_per_token = model_data["avg_latency_per_token"]
|
||||
if _day not in _daily_entries:
|
||||
_daily_entries[_day] = {}
|
||||
_combined_model_name = str(_model)
|
||||
if "https://" in _api_base:
|
||||
_combined_model_name = str(_api_base)
|
||||
if "/openai/" in _combined_model_name:
|
||||
_combined_model_name = _combined_model_name.split("/openai/")[0]
|
||||
|
||||
_all_api_bases.add(_combined_model_name)
|
||||
_daily_entries[_day][_combined_model_name] = _avg_latency_per_token
|
||||
|
||||
sql_query = """
|
||||
SELECT
|
||||
CASE WHEN api_base = '' THEN model ELSE CONCAT(model, '-', api_base) END AS combined_model_api_base,
|
||||
COUNT(*) AS num_requests,
|
||||
AVG(EXTRACT(epoch FROM ("endTime" - "startTime"))) AS avg_latency_seconds
|
||||
FROM "LiteLLM_SpendLogs"
|
||||
WHERE "startTime" >= $1::timestamp AND "endTime" <= $2::timestamp
|
||||
AND api_base = ANY($3)
|
||||
GROUP BY CASE WHEN api_base = '' THEN model ELSE CONCAT(model, '-', api_base) END
|
||||
ORDER BY num_requests DESC
|
||||
LIMIT 50;
|
||||
"""
|
||||
each entry needs to be like this:
|
||||
{
|
||||
date: 'Jun 23',
|
||||
'gpt-4-https://api.openai.com/v1/': 0.002,
|
||||
'gpt-43-https://api.openai.com-12/v1/': 0.002,
|
||||
}
|
||||
"""
|
||||
# convert daily entries to list of dicts
|
||||
|
||||
db_response = await prisma_client.db.query_raw(
|
||||
sql_query, startTime, endTime, _relevant_api_bases
|
||||
response: List[dict] = []
|
||||
|
||||
# sort daily entries by date
|
||||
_daily_entries = dict(sorted(_daily_entries.items(), key=lambda item: item[0]))
|
||||
for day in _daily_entries:
|
||||
entry = {"date": str(day)}
|
||||
for model_key, latency in _daily_entries[day].items():
|
||||
entry[model_key] = round(latency, 8)
|
||||
response.append(entry)
|
||||
|
||||
return {
|
||||
"data": response,
|
||||
"all_api_bases": list(_all_api_bases),
|
||||
}
|
||||
|
||||
|
||||
@router.get(
|
||||
"/model/metrics/exceptions",
|
||||
description="View number of failed requests per model on config.yaml",
|
||||
tags=["model management"],
|
||||
include_in_schema=False,
|
||||
dependencies=[Depends(user_api_key_auth)],
|
||||
)
|
||||
async def model_metrics_exceptions(
|
||||
user_api_key_dict: UserAPIKeyAuth = Depends(user_api_key_auth),
|
||||
_selected_model_group: Optional[str] = None,
|
||||
startTime: Optional[datetime] = None,
|
||||
endTime: Optional[datetime] = None,
|
||||
):
|
||||
global prisma_client, llm_router
|
||||
if prisma_client is None:
|
||||
raise ProxyException(
|
||||
message="Prisma Client is not initialized",
|
||||
type="internal_error",
|
||||
param="None",
|
||||
code=status.HTTP_500_INTERNAL_SERVER_ERROR,
|
||||
)
|
||||
else:
|
||||
|
||||
sql_query = """
|
||||
SELECT
|
||||
CASE WHEN api_base = '' THEN model ELSE CONCAT(model, '-', api_base) END AS combined_model_api_base,
|
||||
COUNT(*) AS num_requests,
|
||||
AVG(EXTRACT(epoch FROM ("endTime" - "startTime"))) AS avg_latency_seconds
|
||||
FROM
|
||||
"LiteLLM_SpendLogs"
|
||||
startTime = startTime or datetime.now() - timedelta(days=30)
|
||||
endTime = endTime or datetime.now()
|
||||
|
||||
"""
|
||||
"""
|
||||
sql_query = """
|
||||
WITH cte AS (
|
||||
SELECT
|
||||
CASE WHEN api_base = '' THEN litellm_model_name ELSE CONCAT(litellm_model_name, '-', api_base) END AS combined_model_api_base,
|
||||
exception_type,
|
||||
COUNT(*) AS num_exceptions
|
||||
FROM "LiteLLM_ErrorLogs"
|
||||
WHERE "startTime" >= $1::timestamp AND "endTime" <= $2::timestamp
|
||||
GROUP BY
|
||||
CASE WHEN api_base = '' THEN model ELSE CONCAT(model, '-', api_base) END
|
||||
ORDER BY
|
||||
num_requests DESC
|
||||
LIMIT 50;
|
||||
"""
|
||||
|
||||
db_response = await prisma_client.db.query_raw(sql_query, startTime, endTime)
|
||||
GROUP BY combined_model_api_base, exception_type
|
||||
)
|
||||
SELECT
|
||||
combined_model_api_base,
|
||||
COUNT(*) AS total_exceptions,
|
||||
json_object_agg(exception_type, num_exceptions) AS exception_counts
|
||||
FROM cte
|
||||
GROUP BY combined_model_api_base
|
||||
ORDER BY total_exceptions DESC
|
||||
LIMIT 200;
|
||||
"""
|
||||
db_response = await prisma_client.db.query_raw(sql_query, startTime, endTime)
|
||||
response: List[dict] = []
|
||||
if response is not None:
|
||||
exception_types = set()
|
||||
|
||||
"""
|
||||
Return Data
|
||||
{
|
||||
"combined_model_api_base": "gpt-3.5-turbo-https://api.openai.com/v1/,
|
||||
"total_exceptions": 5,
|
||||
"BadRequestException": 5,
|
||||
"TimeoutException": 2
|
||||
}
|
||||
"""
|
||||
|
||||
if db_response is not None:
|
||||
# loop through all models
|
||||
for model_data in db_response:
|
||||
model = model_data.get("combined_model_api_base", "")
|
||||
num_requests = model_data.get("num_requests", 0)
|
||||
avg_latency_seconds = model_data.get("avg_latency_seconds", 0)
|
||||
response.append(
|
||||
{
|
||||
"model": model,
|
||||
"num_requests": num_requests,
|
||||
"avg_latency_seconds": avg_latency_seconds,
|
||||
}
|
||||
)
|
||||
return response
|
||||
total_exceptions = model_data.get("total_exceptions", 0)
|
||||
exception_counts = model_data.get("exception_counts", {})
|
||||
curr_row = {
|
||||
"model": model,
|
||||
"total_exceptions": total_exceptions,
|
||||
}
|
||||
curr_row.update(exception_counts)
|
||||
response.append(curr_row)
|
||||
for k, v in exception_counts.items():
|
||||
exception_types.add(k)
|
||||
|
||||
return {"data": response, "exception_types": list(exception_types)}
|
||||
|
||||
|
||||
@router.get(
|
||||
|
|
|
@ -183,6 +183,21 @@ model LiteLLM_SpendLogs {
|
|||
end_user String?
|
||||
}
|
||||
|
||||
// View spend, model, api_key per request
|
||||
model LiteLLM_ErrorLogs {
|
||||
request_id String @id @default(uuid())
|
||||
startTime DateTime // Assuming start_time is a DateTime field
|
||||
endTime DateTime // Assuming end_time is a DateTime field
|
||||
api_base String @default("")
|
||||
model_group String @default("") // public model_name / model_group
|
||||
litellm_model_name String @default("") // model passed to litellm
|
||||
model_id String @default("") // ID of model in ProxyModelTable
|
||||
request_kwargs Json @default("{}")
|
||||
exception_type String @default("")
|
||||
exception_string String @default("")
|
||||
status_code String @default("")
|
||||
}
|
||||
|
||||
// Beta - allow team members to request access to a model
|
||||
model LiteLLM_UserNotifications {
|
||||
request_id String @id
|
||||
|
|
|
@ -2049,6 +2049,11 @@ async def update_spend(
|
|||
raise e
|
||||
|
||||
### UPDATE KEY TABLE ###
|
||||
verbose_proxy_logger.debug(
|
||||
"KEY Spend transactions: {}".format(
|
||||
len(prisma_client.key_list_transactons.keys())
|
||||
)
|
||||
)
|
||||
if len(prisma_client.key_list_transactons.keys()) > 0:
|
||||
for i in range(n_retry_times + 1):
|
||||
start_time = time.time()
|
||||
|
|
|
@ -290,6 +290,21 @@ class Router:
|
|||
}
|
||||
"""
|
||||
### ROUTING SETUP ###
|
||||
self.routing_strategy_init(
|
||||
routing_strategy=routing_strategy,
|
||||
routing_strategy_args=routing_strategy_args,
|
||||
)
|
||||
## COOLDOWNS ##
|
||||
if isinstance(litellm.failure_callback, list):
|
||||
litellm.failure_callback.append(self.deployment_callback_on_failure)
|
||||
else:
|
||||
litellm.failure_callback = [self.deployment_callback_on_failure]
|
||||
print( # noqa
|
||||
f"Intialized router with Routing strategy: {self.routing_strategy}\n\nRouting fallbacks: {self.fallbacks}\n\nRouting context window fallbacks: {self.context_window_fallbacks}\n\nRouter Redis Caching={self.cache.redis_cache}"
|
||||
) # noqa
|
||||
self.routing_strategy_args = routing_strategy_args
|
||||
|
||||
def routing_strategy_init(self, routing_strategy: str, routing_strategy_args: dict):
|
||||
if routing_strategy == "least-busy":
|
||||
self.leastbusy_logger = LeastBusyLoggingHandler(
|
||||
router_cache=self.cache, model_list=self.model_list
|
||||
|
@ -321,15 +336,6 @@ class Router:
|
|||
)
|
||||
if isinstance(litellm.callbacks, list):
|
||||
litellm.callbacks.append(self.lowestlatency_logger) # type: ignore
|
||||
## COOLDOWNS ##
|
||||
if isinstance(litellm.failure_callback, list):
|
||||
litellm.failure_callback.append(self.deployment_callback_on_failure)
|
||||
else:
|
||||
litellm.failure_callback = [self.deployment_callback_on_failure]
|
||||
print( # noqa
|
||||
f"Intialized router with Routing strategy: {self.routing_strategy}\n\nRouting fallbacks: {self.fallbacks}\n\nRouting context window fallbacks: {self.context_window_fallbacks}\n\nRouter Redis Caching={self.cache.redis_cache}"
|
||||
) # noqa
|
||||
self.routing_strategy_args = routing_strategy_args
|
||||
|
||||
def print_deployment(self, deployment: dict):
|
||||
"""
|
||||
|
@ -1450,40 +1456,47 @@ class Router:
|
|||
raise original_exception
|
||||
### RETRY
|
||||
#### check if it should retry + back-off if required
|
||||
if "No models available" in str(
|
||||
e
|
||||
) or RouterErrors.no_deployments_available.value in str(e):
|
||||
timeout = litellm._calculate_retry_after(
|
||||
remaining_retries=num_retries,
|
||||
max_retries=num_retries,
|
||||
min_timeout=self.retry_after,
|
||||
)
|
||||
await asyncio.sleep(timeout)
|
||||
elif RouterErrors.user_defined_ratelimit_error.value in str(e):
|
||||
raise e # don't wait to retry if deployment hits user-defined rate-limit
|
||||
# if "No models available" in str(
|
||||
# e
|
||||
# ) or RouterErrors.no_deployments_available.value in str(e):
|
||||
# timeout = litellm._calculate_retry_after(
|
||||
# remaining_retries=num_retries,
|
||||
# max_retries=num_retries,
|
||||
# min_timeout=self.retry_after,
|
||||
# )
|
||||
# await asyncio.sleep(timeout)
|
||||
# elif RouterErrors.user_defined_ratelimit_error.value in str(e):
|
||||
# raise e # don't wait to retry if deployment hits user-defined rate-limit
|
||||
|
||||
elif hasattr(original_exception, "status_code") and litellm._should_retry(
|
||||
status_code=original_exception.status_code
|
||||
):
|
||||
if hasattr(original_exception, "response") and hasattr(
|
||||
original_exception.response, "headers"
|
||||
):
|
||||
timeout = litellm._calculate_retry_after(
|
||||
remaining_retries=num_retries,
|
||||
max_retries=num_retries,
|
||||
response_headers=original_exception.response.headers,
|
||||
min_timeout=self.retry_after,
|
||||
)
|
||||
else:
|
||||
timeout = litellm._calculate_retry_after(
|
||||
remaining_retries=num_retries,
|
||||
max_retries=num_retries,
|
||||
min_timeout=self.retry_after,
|
||||
)
|
||||
await asyncio.sleep(timeout)
|
||||
else:
|
||||
raise original_exception
|
||||
# elif hasattr(original_exception, "status_code") and litellm._should_retry(
|
||||
# status_code=original_exception.status_code
|
||||
# ):
|
||||
# if hasattr(original_exception, "response") and hasattr(
|
||||
# original_exception.response, "headers"
|
||||
# ):
|
||||
# timeout = litellm._calculate_retry_after(
|
||||
# remaining_retries=num_retries,
|
||||
# max_retries=num_retries,
|
||||
# response_headers=original_exception.response.headers,
|
||||
# min_timeout=self.retry_after,
|
||||
# )
|
||||
# else:
|
||||
# timeout = litellm._calculate_retry_after(
|
||||
# remaining_retries=num_retries,
|
||||
# max_retries=num_retries,
|
||||
# min_timeout=self.retry_after,
|
||||
# )
|
||||
# await asyncio.sleep(timeout)
|
||||
# else:
|
||||
# raise original_exception
|
||||
|
||||
### RETRY
|
||||
_timeout = self._router_should_retry(
|
||||
e=original_exception,
|
||||
remaining_retries=num_retries,
|
||||
num_retries=num_retries,
|
||||
)
|
||||
await asyncio.sleep(_timeout)
|
||||
## LOGGING
|
||||
if num_retries > 0:
|
||||
kwargs = self.log_retry(kwargs=kwargs, e=original_exception)
|
||||
|
@ -1505,34 +1518,12 @@ class Router:
|
|||
## LOGGING
|
||||
kwargs = self.log_retry(kwargs=kwargs, e=e)
|
||||
remaining_retries = num_retries - current_attempt
|
||||
if "No models available" in str(e):
|
||||
timeout = litellm._calculate_retry_after(
|
||||
remaining_retries=remaining_retries,
|
||||
max_retries=num_retries,
|
||||
min_timeout=self.retry_after,
|
||||
)
|
||||
await asyncio.sleep(timeout)
|
||||
elif (
|
||||
hasattr(e, "status_code")
|
||||
and hasattr(e, "response")
|
||||
and litellm._should_retry(status_code=e.status_code)
|
||||
):
|
||||
if hasattr(e.response, "headers"):
|
||||
timeout = litellm._calculate_retry_after(
|
||||
remaining_retries=remaining_retries,
|
||||
max_retries=num_retries,
|
||||
response_headers=e.response.headers,
|
||||
min_timeout=self.retry_after,
|
||||
)
|
||||
else:
|
||||
timeout = litellm._calculate_retry_after(
|
||||
remaining_retries=remaining_retries,
|
||||
max_retries=num_retries,
|
||||
min_timeout=self.retry_after,
|
||||
)
|
||||
await asyncio.sleep(timeout)
|
||||
else:
|
||||
raise e
|
||||
_timeout = self._router_should_retry(
|
||||
e=original_exception,
|
||||
remaining_retries=remaining_retries,
|
||||
num_retries=num_retries,
|
||||
)
|
||||
await asyncio.sleep(_timeout)
|
||||
raise original_exception
|
||||
|
||||
def function_with_fallbacks(self, *args, **kwargs):
|
||||
|
@ -1625,7 +1616,7 @@ class Router:
|
|||
|
||||
def _router_should_retry(
|
||||
self, e: Exception, remaining_retries: int, num_retries: int
|
||||
):
|
||||
) -> Union[int, float]:
|
||||
"""
|
||||
Calculate back-off, then retry
|
||||
"""
|
||||
|
@ -1636,14 +1627,13 @@ class Router:
|
|||
response_headers=e.response.headers,
|
||||
min_timeout=self.retry_after,
|
||||
)
|
||||
time.sleep(timeout)
|
||||
else:
|
||||
timeout = litellm._calculate_retry_after(
|
||||
remaining_retries=remaining_retries,
|
||||
max_retries=num_retries,
|
||||
min_timeout=self.retry_after,
|
||||
)
|
||||
time.sleep(timeout)
|
||||
return timeout
|
||||
|
||||
def function_with_retries(self, *args, **kwargs):
|
||||
"""
|
||||
|
@ -1658,6 +1648,7 @@ class Router:
|
|||
context_window_fallbacks = kwargs.pop(
|
||||
"context_window_fallbacks", self.context_window_fallbacks
|
||||
)
|
||||
|
||||
try:
|
||||
# if the function call is successful, no exception will be raised and we'll break out of the loop
|
||||
response = original_function(*args, **kwargs)
|
||||
|
@ -1677,11 +1668,12 @@ class Router:
|
|||
if num_retries > 0:
|
||||
kwargs = self.log_retry(kwargs=kwargs, e=original_exception)
|
||||
### RETRY
|
||||
self._router_should_retry(
|
||||
_timeout = self._router_should_retry(
|
||||
e=original_exception,
|
||||
remaining_retries=num_retries,
|
||||
num_retries=num_retries,
|
||||
)
|
||||
time.sleep(_timeout)
|
||||
for current_attempt in range(num_retries):
|
||||
verbose_router_logger.debug(
|
||||
f"retrying request. Current attempt - {current_attempt}; retries left: {num_retries}"
|
||||
|
@ -1695,11 +1687,12 @@ class Router:
|
|||
## LOGGING
|
||||
kwargs = self.log_retry(kwargs=kwargs, e=e)
|
||||
remaining_retries = num_retries - current_attempt
|
||||
self._router_should_retry(
|
||||
_timeout = self._router_should_retry(
|
||||
e=e,
|
||||
remaining_retries=remaining_retries,
|
||||
num_retries=num_retries,
|
||||
)
|
||||
time.sleep(_timeout)
|
||||
raise original_exception
|
||||
|
||||
### HELPER FUNCTIONS
|
||||
|
@ -1733,10 +1726,11 @@ class Router:
|
|||
) # i.e. azure
|
||||
metadata = kwargs.get("litellm_params", {}).get("metadata", None)
|
||||
_model_info = kwargs.get("litellm_params", {}).get("model_info", {})
|
||||
|
||||
if isinstance(_model_info, dict):
|
||||
deployment_id = _model_info.get("id", None)
|
||||
self._set_cooldown_deployments(
|
||||
deployment_id
|
||||
exception_status=exception_status, deployment=deployment_id
|
||||
) # setting deployment_id in cooldown deployments
|
||||
if custom_llm_provider:
|
||||
model_name = f"{custom_llm_provider}/{model_name}"
|
||||
|
@ -1796,9 +1790,15 @@ class Router:
|
|||
key=rpm_key, value=request_count, local_only=True
|
||||
) # don't change existing ttl
|
||||
|
||||
def _set_cooldown_deployments(self, deployment: Optional[str] = None):
|
||||
def _set_cooldown_deployments(
|
||||
self, exception_status: Union[str, int], deployment: Optional[str] = None
|
||||
):
|
||||
"""
|
||||
Add a model to the list of models being cooled down for that minute, if it exceeds the allowed fails / minute
|
||||
|
||||
or
|
||||
|
||||
the exception is not one that should be immediately retried (e.g. 401)
|
||||
"""
|
||||
if deployment is None:
|
||||
return
|
||||
|
@ -1815,7 +1815,20 @@ class Router:
|
|||
f"Attempting to add {deployment} to cooldown list. updated_fails: {updated_fails}; self.allowed_fails: {self.allowed_fails}"
|
||||
)
|
||||
cooldown_time = self.cooldown_time or 1
|
||||
if updated_fails > self.allowed_fails:
|
||||
|
||||
if isinstance(exception_status, str):
|
||||
try:
|
||||
exception_status = int(exception_status)
|
||||
except Exception as e:
|
||||
verbose_router_logger.debug(
|
||||
"Unable to cast exception status to int {}. Defaulting to status=500.".format(
|
||||
exception_status
|
||||
)
|
||||
)
|
||||
exception_status = 500
|
||||
_should_retry = litellm._should_retry(status_code=exception_status)
|
||||
|
||||
if updated_fails > self.allowed_fails or _should_retry == False:
|
||||
# get the current cooldown list for that minute
|
||||
cooldown_key = f"{current_minute}:cooldown_models" # group cooldown models by minute to reduce number of redis calls
|
||||
cached_value = self.cache.get_cache(key=cooldown_key)
|
||||
|
@ -2652,6 +2665,13 @@ class Router:
|
|||
_casted_value = int(kwargs[var])
|
||||
setattr(self, var, _casted_value)
|
||||
else:
|
||||
if var == "routing_strategy":
|
||||
self.routing_strategy_init(
|
||||
routing_strategy=kwargs[var],
|
||||
routing_strategy_args=kwargs.get(
|
||||
"routing_strategy_args", {}
|
||||
),
|
||||
)
|
||||
setattr(self, var, kwargs[var])
|
||||
else:
|
||||
verbose_router_logger.debug("Setting {} is not allowed".format(var))
|
||||
|
|
|
@ -19,6 +19,7 @@ def setup_and_teardown():
|
|||
0, os.path.abspath("../..")
|
||||
) # Adds the project directory to the system path
|
||||
import litellm
|
||||
from litellm import Router
|
||||
|
||||
importlib.reload(litellm)
|
||||
import asyncio
|
||||
|
|
|
@ -348,6 +348,220 @@ def test_langfuse_logging_function_calling():
|
|||
# test_langfuse_logging_function_calling()
|
||||
|
||||
|
||||
def test_langfuse_existing_trace_id():
|
||||
"""
|
||||
When existing trace id is passed, don't set trace params -> prevents overwriting the trace
|
||||
|
||||
Pass 1 logging object with a trace
|
||||
|
||||
Pass 2nd logging object with the trace id
|
||||
|
||||
Assert no changes to the trace
|
||||
"""
|
||||
# Test - if the logs were sent to the correct team on langfuse
|
||||
import litellm, datetime
|
||||
from litellm.integrations.langfuse import LangFuseLogger
|
||||
|
||||
langfuse_Logger = LangFuseLogger(
|
||||
langfuse_public_key=os.getenv("LANGFUSE_PROJECT2_PUBLIC"),
|
||||
langfuse_secret=os.getenv("LANGFUSE_PROJECT2_SECRET"),
|
||||
)
|
||||
litellm.success_callback = ["langfuse"]
|
||||
|
||||
# langfuse_args = {'kwargs': { 'start_time': 'end_time': datetime.datetime(2024, 5, 1, 7, 31, 29, 903685), 'user_id': None, 'print_verbose': <function print_verbose at 0x109d1f420>, 'level': 'DEFAULT', 'status_message': None}
|
||||
response_obj = litellm.ModelResponse(
|
||||
id="chatcmpl-9K5HUAbVRqFrMZKXL0WoC295xhguY",
|
||||
choices=[
|
||||
litellm.Choices(
|
||||
finish_reason="stop",
|
||||
index=0,
|
||||
message=litellm.Message(
|
||||
content="I'm sorry, I am an AI assistant and do not have real-time information. I recommend checking a reliable weather website or app for the most up-to-date weather information in Boston.",
|
||||
role="assistant",
|
||||
),
|
||||
)
|
||||
],
|
||||
created=1714573888,
|
||||
model="gpt-3.5-turbo-0125",
|
||||
object="chat.completion",
|
||||
system_fingerprint="fp_3b956da36b",
|
||||
usage=litellm.Usage(completion_tokens=37, prompt_tokens=14, total_tokens=51),
|
||||
)
|
||||
|
||||
### NEW TRACE ###
|
||||
message = [{"role": "user", "content": "what's the weather in boston"}]
|
||||
langfuse_args = {
|
||||
"response_obj": response_obj,
|
||||
"kwargs": {
|
||||
"model": "gpt-3.5-turbo",
|
||||
"litellm_params": {
|
||||
"acompletion": False,
|
||||
"api_key": None,
|
||||
"force_timeout": 600,
|
||||
"logger_fn": None,
|
||||
"verbose": False,
|
||||
"custom_llm_provider": "openai",
|
||||
"api_base": "https://api.openai.com/v1/",
|
||||
"litellm_call_id": "508113a1-c6f1-48ce-a3e1-01c6cce9330e",
|
||||
"model_alias_map": {},
|
||||
"completion_call_id": None,
|
||||
"metadata": None,
|
||||
"model_info": None,
|
||||
"proxy_server_request": None,
|
||||
"preset_cache_key": None,
|
||||
"no-log": False,
|
||||
"stream_response": {},
|
||||
},
|
||||
"messages": message,
|
||||
"optional_params": {"temperature": 0.1, "extra_body": {}},
|
||||
"start_time": "2024-05-01 07:31:27.986164",
|
||||
"stream": False,
|
||||
"user": None,
|
||||
"call_type": "completion",
|
||||
"litellm_call_id": "508113a1-c6f1-48ce-a3e1-01c6cce9330e",
|
||||
"completion_start_time": "2024-05-01 07:31:29.903685",
|
||||
"temperature": 0.1,
|
||||
"extra_body": {},
|
||||
"input": [{"role": "user", "content": "what's the weather in boston"}],
|
||||
"api_key": "my-api-key",
|
||||
"additional_args": {
|
||||
"complete_input_dict": {
|
||||
"model": "gpt-3.5-turbo",
|
||||
"messages": [
|
||||
{"role": "user", "content": "what's the weather in boston"}
|
||||
],
|
||||
"temperature": 0.1,
|
||||
"extra_body": {},
|
||||
}
|
||||
},
|
||||
"log_event_type": "successful_api_call",
|
||||
"end_time": "2024-05-01 07:31:29.903685",
|
||||
"cache_hit": None,
|
||||
"response_cost": 6.25e-05,
|
||||
},
|
||||
"start_time": datetime.datetime(2024, 5, 1, 7, 31, 27, 986164),
|
||||
"end_time": datetime.datetime(2024, 5, 1, 7, 31, 29, 903685),
|
||||
"user_id": None,
|
||||
"print_verbose": litellm.print_verbose,
|
||||
"level": "DEFAULT",
|
||||
"status_message": None,
|
||||
}
|
||||
|
||||
langfuse_response_object = langfuse_Logger.log_event(**langfuse_args)
|
||||
|
||||
import langfuse
|
||||
|
||||
langfuse_client = langfuse.Langfuse(
|
||||
public_key=os.getenv("LANGFUSE_PROJECT2_PUBLIC"),
|
||||
secret_key=os.getenv("LANGFUSE_PROJECT2_SECRET"),
|
||||
)
|
||||
|
||||
trace_id = langfuse_response_object["trace_id"]
|
||||
|
||||
langfuse_client.flush()
|
||||
|
||||
time.sleep(2)
|
||||
|
||||
print(langfuse_client.get_trace(id=trace_id))
|
||||
|
||||
initial_langfuse_trace = langfuse_client.get_trace(id=trace_id)
|
||||
|
||||
### EXISTING TRACE ###
|
||||
|
||||
new_metadata = {"existing_trace_id": trace_id}
|
||||
new_messages = [{"role": "user", "content": "What do you know?"}]
|
||||
new_response_obj = litellm.ModelResponse(
|
||||
id="chatcmpl-9K5HUAbVRqFrMZKXL0WoC295xhguY",
|
||||
choices=[
|
||||
litellm.Choices(
|
||||
finish_reason="stop",
|
||||
index=0,
|
||||
message=litellm.Message(
|
||||
content="What do I know?",
|
||||
role="assistant",
|
||||
),
|
||||
)
|
||||
],
|
||||
created=1714573888,
|
||||
model="gpt-3.5-turbo-0125",
|
||||
object="chat.completion",
|
||||
system_fingerprint="fp_3b956da36b",
|
||||
usage=litellm.Usage(completion_tokens=37, prompt_tokens=14, total_tokens=51),
|
||||
)
|
||||
langfuse_args = {
|
||||
"response_obj": new_response_obj,
|
||||
"kwargs": {
|
||||
"model": "gpt-3.5-turbo",
|
||||
"litellm_params": {
|
||||
"acompletion": False,
|
||||
"api_key": None,
|
||||
"force_timeout": 600,
|
||||
"logger_fn": None,
|
||||
"verbose": False,
|
||||
"custom_llm_provider": "openai",
|
||||
"api_base": "https://api.openai.com/v1/",
|
||||
"litellm_call_id": "508113a1-c6f1-48ce-a3e1-01c6cce9330e",
|
||||
"model_alias_map": {},
|
||||
"completion_call_id": None,
|
||||
"metadata": new_metadata,
|
||||
"model_info": None,
|
||||
"proxy_server_request": None,
|
||||
"preset_cache_key": None,
|
||||
"no-log": False,
|
||||
"stream_response": {},
|
||||
},
|
||||
"messages": new_messages,
|
||||
"optional_params": {"temperature": 0.1, "extra_body": {}},
|
||||
"start_time": "2024-05-01 07:31:27.986164",
|
||||
"stream": False,
|
||||
"user": None,
|
||||
"call_type": "completion",
|
||||
"litellm_call_id": "508113a1-c6f1-48ce-a3e1-01c6cce9330e",
|
||||
"completion_start_time": "2024-05-01 07:31:29.903685",
|
||||
"temperature": 0.1,
|
||||
"extra_body": {},
|
||||
"input": [{"role": "user", "content": "what's the weather in boston"}],
|
||||
"api_key": "my-api-key",
|
||||
"additional_args": {
|
||||
"complete_input_dict": {
|
||||
"model": "gpt-3.5-turbo",
|
||||
"messages": [
|
||||
{"role": "user", "content": "what's the weather in boston"}
|
||||
],
|
||||
"temperature": 0.1,
|
||||
"extra_body": {},
|
||||
}
|
||||
},
|
||||
"log_event_type": "successful_api_call",
|
||||
"end_time": "2024-05-01 07:31:29.903685",
|
||||
"cache_hit": None,
|
||||
"response_cost": 6.25e-05,
|
||||
},
|
||||
"start_time": datetime.datetime(2024, 5, 1, 7, 31, 27, 986164),
|
||||
"end_time": datetime.datetime(2024, 5, 1, 7, 31, 29, 903685),
|
||||
"user_id": None,
|
||||
"print_verbose": litellm.print_verbose,
|
||||
"level": "DEFAULT",
|
||||
"status_message": None,
|
||||
}
|
||||
|
||||
langfuse_response_object = langfuse_Logger.log_event(**langfuse_args)
|
||||
|
||||
new_trace_id = langfuse_response_object["trace_id"]
|
||||
|
||||
assert new_trace_id == trace_id
|
||||
|
||||
langfuse_client.flush()
|
||||
|
||||
time.sleep(2)
|
||||
|
||||
print(langfuse_client.get_trace(id=trace_id))
|
||||
|
||||
new_langfuse_trace = langfuse_client.get_trace(id=trace_id)
|
||||
|
||||
assert dict(initial_langfuse_trace) == dict(new_langfuse_trace)
|
||||
|
||||
|
||||
def test_langfuse_logging_tool_calling():
|
||||
litellm.set_verbose = True
|
||||
|
||||
|
|
|
@ -104,6 +104,42 @@ def test_router_timeout_init(timeout, ssl_verify):
|
|||
)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("sync_mode", [False, True])
|
||||
@pytest.mark.asyncio
|
||||
async def test_router_retries(sync_mode):
|
||||
"""
|
||||
- make sure retries work as expected
|
||||
"""
|
||||
model_list = [
|
||||
{
|
||||
"model_name": "gpt-3.5-turbo",
|
||||
"litellm_params": {"model": "gpt-3.5-turbo", "api_key": "bad-key"},
|
||||
},
|
||||
{
|
||||
"model_name": "gpt-3.5-turbo",
|
||||
"litellm_params": {
|
||||
"model": "azure/chatgpt-v-2",
|
||||
"api_key": os.getenv("AZURE_API_KEY"),
|
||||
"api_base": os.getenv("AZURE_API_BASE"),
|
||||
"api_version": os.getenv("AZURE_API_VERSION"),
|
||||
},
|
||||
},
|
||||
]
|
||||
|
||||
router = Router(model_list=model_list, num_retries=2)
|
||||
|
||||
if sync_mode:
|
||||
router.completion(
|
||||
model="gpt-3.5-turbo",
|
||||
messages=[{"role": "user", "content": "Hey, how's it going?"}],
|
||||
)
|
||||
else:
|
||||
await router.acompletion(
|
||||
model="gpt-3.5-turbo",
|
||||
messages=[{"role": "user", "content": "Hey, how's it going?"}],
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"mistral_api_base",
|
||||
[
|
||||
|
@ -1118,6 +1154,7 @@ def test_consistent_model_id():
|
|||
assert id1 == id2
|
||||
|
||||
|
||||
@pytest.mark.skip(reason="local test")
|
||||
def test_reading_keys_os_environ():
|
||||
import openai
|
||||
|
||||
|
@ -1217,6 +1254,7 @@ def test_reading_keys_os_environ():
|
|||
# test_reading_keys_os_environ()
|
||||
|
||||
|
||||
@pytest.mark.skip(reason="local test")
|
||||
def test_reading_openai_keys_os_environ():
|
||||
import openai
|
||||
|
||||
|
|
|
@ -46,6 +46,7 @@ def test_async_fallbacks(caplog):
|
|||
router = Router(
|
||||
model_list=model_list,
|
||||
fallbacks=[{"gpt-3.5-turbo": ["azure/gpt-3.5-turbo"]}],
|
||||
num_retries=1,
|
||||
)
|
||||
|
||||
user_message = "Hello, how are you?"
|
||||
|
@ -82,6 +83,7 @@ def test_async_fallbacks(caplog):
|
|||
# - error request, falling back notice, success notice
|
||||
expected_logs = [
|
||||
"litellm.acompletion(model=gpt-3.5-turbo)\x1b[31m Exception OpenAIException - Error code: 401 - {'error': {'message': 'Incorrect API key provided: bad-key. You can find your API key at https://platform.openai.com/account/api-keys.', 'type': 'invalid_request_error', 'param': None, 'code': 'invalid_api_key'}}\x1b[0m",
|
||||
"litellm.acompletion(model=None)\x1b[31m Exception No deployments available for selected model, passed model=gpt-3.5-turbo\x1b[0m",
|
||||
"Falling back to model_group = azure/gpt-3.5-turbo",
|
||||
"litellm.acompletion(model=azure/chatgpt-v-2)\x1b[32m 200 OK\x1b[0m",
|
||||
]
|
||||
|
|
|
@ -22,10 +22,10 @@ class MyCustomHandler(CustomLogger):
|
|||
def log_pre_api_call(self, model, messages, kwargs):
|
||||
print(f"Pre-API Call")
|
||||
print(
|
||||
f"previous_models: {kwargs['litellm_params']['metadata']['previous_models']}"
|
||||
f"previous_models: {kwargs['litellm_params']['metadata'].get('previous_models', None)}"
|
||||
)
|
||||
self.previous_models += len(
|
||||
kwargs["litellm_params"]["metadata"]["previous_models"]
|
||||
self.previous_models = len(
|
||||
kwargs["litellm_params"]["metadata"].get("previous_models", [])
|
||||
) # {"previous_models": [{"model": litellm_model_name, "exception_type": AuthenticationError, "exception_string": <complete_traceback>}]}
|
||||
print(f"self.previous_models: {self.previous_models}")
|
||||
|
||||
|
@ -127,7 +127,7 @@ def test_sync_fallbacks():
|
|||
response = router.completion(**kwargs)
|
||||
print(f"response: {response}")
|
||||
time.sleep(0.05) # allow a delay as success_callbacks are on a separate thread
|
||||
assert customHandler.previous_models == 1 # 0 retries, 1 fallback
|
||||
assert customHandler.previous_models == 4
|
||||
|
||||
print("Passed ! Test router_fallbacks: test_sync_fallbacks()")
|
||||
router.reset()
|
||||
|
@ -140,7 +140,7 @@ def test_sync_fallbacks():
|
|||
|
||||
@pytest.mark.asyncio
|
||||
async def test_async_fallbacks():
|
||||
litellm.set_verbose = False
|
||||
litellm.set_verbose = True
|
||||
model_list = [
|
||||
{ # list of model deployments
|
||||
"model_name": "azure/gpt-3.5-turbo", # openai model name
|
||||
|
@ -209,12 +209,13 @@ async def test_async_fallbacks():
|
|||
user_message = "Hello, how are you?"
|
||||
messages = [{"content": user_message, "role": "user"}]
|
||||
try:
|
||||
kwargs["model"] = "azure/gpt-3.5-turbo"
|
||||
response = await router.acompletion(**kwargs)
|
||||
print(f"customHandler.previous_models: {customHandler.previous_models}")
|
||||
await asyncio.sleep(
|
||||
0.05
|
||||
) # allow a delay as success_callbacks are on a separate thread
|
||||
assert customHandler.previous_models == 1 # 0 retries, 1 fallback
|
||||
assert customHandler.previous_models == 4 # 1 init call, 2 retries, 1 fallback
|
||||
router.reset()
|
||||
except litellm.Timeout as e:
|
||||
pass
|
||||
|
@ -258,7 +259,6 @@ def test_sync_fallbacks_embeddings():
|
|||
model_list=model_list,
|
||||
fallbacks=[{"bad-azure-embedding-model": ["good-azure-embedding-model"]}],
|
||||
set_verbose=False,
|
||||
num_retries=0,
|
||||
)
|
||||
customHandler = MyCustomHandler()
|
||||
litellm.callbacks = [customHandler]
|
||||
|
@ -269,7 +269,7 @@ def test_sync_fallbacks_embeddings():
|
|||
response = router.embedding(**kwargs)
|
||||
print(f"customHandler.previous_models: {customHandler.previous_models}")
|
||||
time.sleep(0.05) # allow a delay as success_callbacks are on a separate thread
|
||||
assert customHandler.previous_models == 1 # 0 retries, 1 fallback
|
||||
assert customHandler.previous_models == 4 # 1 init call, 2 retries, 1 fallback
|
||||
router.reset()
|
||||
except litellm.Timeout as e:
|
||||
pass
|
||||
|
@ -323,7 +323,7 @@ async def test_async_fallbacks_embeddings():
|
|||
await asyncio.sleep(
|
||||
0.05
|
||||
) # allow a delay as success_callbacks are on a separate thread
|
||||
assert customHandler.previous_models == 1 # 0 retries, 1 fallback
|
||||
assert customHandler.previous_models == 4 # 1 init call, 2 retries, 1 fallback
|
||||
router.reset()
|
||||
except litellm.Timeout as e:
|
||||
pass
|
||||
|
@ -394,7 +394,7 @@ def test_dynamic_fallbacks_sync():
|
|||
},
|
||||
]
|
||||
|
||||
router = Router(model_list=model_list, set_verbose=True, num_retries=0)
|
||||
router = Router(model_list=model_list, set_verbose=True)
|
||||
kwargs = {}
|
||||
kwargs["model"] = "azure/gpt-3.5-turbo"
|
||||
kwargs["messages"] = [{"role": "user", "content": "Hey, how's it going?"}]
|
||||
|
@ -402,7 +402,7 @@ def test_dynamic_fallbacks_sync():
|
|||
response = router.completion(**kwargs)
|
||||
print(f"response: {response}")
|
||||
time.sleep(0.05) # allow a delay as success_callbacks are on a separate thread
|
||||
assert customHandler.previous_models == 1 # 0 retries, 1 fallback
|
||||
assert customHandler.previous_models == 4 # 1 init call, 2 retries, 1 fallback
|
||||
router.reset()
|
||||
except Exception as e:
|
||||
pytest.fail(f"An exception occurred - {e}")
|
||||
|
@ -488,7 +488,7 @@ async def test_dynamic_fallbacks_async():
|
|||
await asyncio.sleep(
|
||||
0.05
|
||||
) # allow a delay as success_callbacks are on a separate thread
|
||||
assert customHandler.previous_models == 1 # 0 retries, 1 fallback
|
||||
assert customHandler.previous_models == 4 # 1 init call, 2 retries, 1 fallback
|
||||
router.reset()
|
||||
except Exception as e:
|
||||
pytest.fail(f"An exception occurred - {e}")
|
||||
|
@ -573,7 +573,7 @@ async def test_async_fallbacks_streaming():
|
|||
await asyncio.sleep(
|
||||
0.05
|
||||
) # allow a delay as success_callbacks are on a separate thread
|
||||
assert customHandler.previous_models == 1 # 0 retries, 1 fallback
|
||||
assert customHandler.previous_models == 4 # 1 init call, 2 retries, 1 fallback
|
||||
router.reset()
|
||||
except litellm.Timeout as e:
|
||||
pass
|
||||
|
@ -752,7 +752,7 @@ async def test_async_fallbacks_max_retries_per_request():
|
|||
router.reset()
|
||||
|
||||
|
||||
def test_usage_based_routing_fallbacks():
|
||||
def test_ausage_based_routing_fallbacks():
|
||||
try:
|
||||
# [Prod Test]
|
||||
# IT tests Usage Based Routing with fallbacks
|
||||
|
@ -766,9 +766,9 @@ def test_usage_based_routing_fallbacks():
|
|||
load_dotenv()
|
||||
|
||||
# Constants for TPM and RPM allocation
|
||||
AZURE_FAST_RPM = 3
|
||||
AZURE_BASIC_RPM = 4
|
||||
OPENAI_RPM = 10
|
||||
AZURE_FAST_RPM = 1
|
||||
AZURE_BASIC_RPM = 1
|
||||
OPENAI_RPM = 2
|
||||
ANTHROPIC_RPM = 100000
|
||||
|
||||
def get_azure_params(deployment_name: str):
|
||||
|
|
121
litellm/tests/test_router_retries.py
Normal file
121
litellm/tests/test_router_retries.py
Normal file
|
@ -0,0 +1,121 @@
|
|||
#### What this tests ####
|
||||
# This tests calling router with fallback models
|
||||
|
||||
import sys, os, time
|
||||
import traceback, asyncio
|
||||
import pytest
|
||||
|
||||
sys.path.insert(
|
||||
0, os.path.abspath("../..")
|
||||
) # Adds the parent directory to the system path
|
||||
|
||||
import litellm
|
||||
from litellm import Router
|
||||
from litellm.integrations.custom_logger import CustomLogger
|
||||
|
||||
|
||||
class MyCustomHandler(CustomLogger):
|
||||
success: bool = False
|
||||
failure: bool = False
|
||||
previous_models: int = 0
|
||||
|
||||
def log_pre_api_call(self, model, messages, kwargs):
|
||||
print(f"Pre-API Call")
|
||||
print(
|
||||
f"previous_models: {kwargs['litellm_params']['metadata'].get('previous_models', None)}"
|
||||
)
|
||||
self.previous_models = len(
|
||||
kwargs["litellm_params"]["metadata"].get("previous_models", [])
|
||||
) # {"previous_models": [{"model": litellm_model_name, "exception_type": AuthenticationError, "exception_string": <complete_traceback>}]}
|
||||
print(f"self.previous_models: {self.previous_models}")
|
||||
|
||||
def log_post_api_call(self, kwargs, response_obj, start_time, end_time):
|
||||
print(
|
||||
f"Post-API Call - response object: {response_obj}; model: {kwargs['model']}"
|
||||
)
|
||||
|
||||
def log_stream_event(self, kwargs, response_obj, start_time, end_time):
|
||||
print(f"On Stream")
|
||||
|
||||
def async_log_stream_event(self, kwargs, response_obj, start_time, end_time):
|
||||
print(f"On Stream")
|
||||
|
||||
def log_success_event(self, kwargs, response_obj, start_time, end_time):
|
||||
print(f"On Success")
|
||||
|
||||
async def async_log_success_event(self, kwargs, response_obj, start_time, end_time):
|
||||
print(f"On Success")
|
||||
|
||||
def log_failure_event(self, kwargs, response_obj, start_time, end_time):
|
||||
print(f"On Failure")
|
||||
|
||||
|
||||
"""
|
||||
Test sync + async
|
||||
|
||||
- Authorization Errors
|
||||
- Random API Error
|
||||
"""
|
||||
|
||||
|
||||
@pytest.mark.parametrize("sync_mode", [True, False])
|
||||
@pytest.mark.parametrize("error_type", ["Authorization Error", "API Error"])
|
||||
@pytest.mark.asyncio
|
||||
async def test_router_retries_errors(sync_mode, error_type):
|
||||
"""
|
||||
- Auth Error -> 0 retries
|
||||
- API Error -> 2 retries
|
||||
"""
|
||||
|
||||
_api_key = (
|
||||
"bad-key" if error_type == "Authorization Error" else os.getenv("AZURE_API_KEY")
|
||||
)
|
||||
print(f"_api_key: {_api_key}")
|
||||
model_list = [
|
||||
{
|
||||
"model_name": "azure/gpt-3.5-turbo", # openai model name
|
||||
"litellm_params": { # params for litellm completion/embedding call
|
||||
"model": "azure/chatgpt-functioncalling",
|
||||
"api_key": _api_key,
|
||||
"api_version": os.getenv("AZURE_API_VERSION"),
|
||||
"api_base": os.getenv("AZURE_API_BASE"),
|
||||
},
|
||||
"tpm": 240000,
|
||||
"rpm": 1800,
|
||||
},
|
||||
]
|
||||
|
||||
router = Router(model_list=model_list, allowed_fails=3)
|
||||
|
||||
customHandler = MyCustomHandler()
|
||||
litellm.callbacks = [customHandler]
|
||||
user_message = "Hello, how are you?"
|
||||
messages = [{"content": user_message, "role": "user"}]
|
||||
|
||||
kwargs = {
|
||||
"model": "azure/gpt-3.5-turbo",
|
||||
"messages": messages,
|
||||
"mock_response": (
|
||||
None
|
||||
if error_type == "Authorization Error"
|
||||
else Exception("Invalid Request")
|
||||
),
|
||||
}
|
||||
|
||||
try:
|
||||
if sync_mode:
|
||||
response = router.completion(**kwargs)
|
||||
else:
|
||||
response = await router.acompletion(**kwargs)
|
||||
except Exception as e:
|
||||
pass
|
||||
|
||||
await asyncio.sleep(
|
||||
0.05
|
||||
) # allow a delay as success_callbacks are on a separate thread
|
||||
print(f"customHandler.previous_models: {customHandler.previous_models}")
|
||||
|
||||
if error_type == "Authorization Error":
|
||||
assert customHandler.previous_models == 0 # 0 retries
|
||||
else:
|
||||
assert customHandler.previous_models == 2 # 2 retries
|
|
@ -106,7 +106,7 @@ try:
|
|||
except Exception as e:
|
||||
verbose_logger.debug(f"Exception import enterprise features {str(e)}")
|
||||
|
||||
from typing import cast, List, Dict, Union, Optional, Literal, Any, BinaryIO
|
||||
from typing import cast, List, Dict, Union, Optional, Literal, Any, BinaryIO, Iterable
|
||||
from .caching import Cache
|
||||
from concurrent.futures import ThreadPoolExecutor
|
||||
|
||||
|
@ -1236,7 +1236,10 @@ class Logging:
|
|||
print_verbose=print_verbose,
|
||||
)
|
||||
elif callback == "sentry" and add_breadcrumb:
|
||||
details_to_log = copy.deepcopy(self.model_call_details)
|
||||
try:
|
||||
details_to_log = copy.deepcopy(self.model_call_details)
|
||||
except:
|
||||
details_to_log = self.model_call_details
|
||||
if litellm.turn_off_message_logging:
|
||||
# make a copy of the _model_Call_details and log it
|
||||
details_to_log.pop("messages", None)
|
||||
|
@ -1327,8 +1330,10 @@ class Logging:
|
|||
)
|
||||
elif callback == "sentry" and add_breadcrumb:
|
||||
print_verbose("reaches sentry breadcrumbing")
|
||||
|
||||
details_to_log = copy.deepcopy(self.model_call_details)
|
||||
try:
|
||||
details_to_log = copy.deepcopy(self.model_call_details)
|
||||
except:
|
||||
details_to_log = self.model_call_details
|
||||
if litellm.turn_off_message_logging:
|
||||
# make a copy of the _model_Call_details and log it
|
||||
details_to_log.pop("messages", None)
|
||||
|
@ -2635,7 +2640,11 @@ def function_setup(
|
|||
dynamic_success_callbacks = kwargs.pop("success_callback")
|
||||
|
||||
if add_breadcrumb:
|
||||
details_to_log = copy.deepcopy(kwargs)
|
||||
try:
|
||||
details_to_log = copy.deepcopy(kwargs)
|
||||
except:
|
||||
details_to_log = kwargs
|
||||
|
||||
if litellm.turn_off_message_logging:
|
||||
# make a copy of the _model_Call_details and log it
|
||||
details_to_log.pop("messages", None)
|
||||
|
@ -7171,6 +7180,7 @@ def convert_to_model_response_object(
|
|||
end_time=None,
|
||||
hidden_params: Optional[dict] = None,
|
||||
):
|
||||
received_args = locals()
|
||||
try:
|
||||
if response_type == "completion" and (
|
||||
model_response_object is None
|
||||
|
@ -7182,6 +7192,11 @@ def convert_to_model_response_object(
|
|||
# for returning cached responses, we need to yield a generator
|
||||
return convert_to_streaming_response(response_object=response_object)
|
||||
choice_list = []
|
||||
|
||||
assert response_object["choices"] is not None and isinstance(
|
||||
response_object["choices"], Iterable
|
||||
)
|
||||
|
||||
for idx, choice in enumerate(response_object["choices"]):
|
||||
message = Message(
|
||||
content=choice["message"].get("content", None),
|
||||
|
@ -7303,7 +7318,9 @@ def convert_to_model_response_object(
|
|||
model_response_object._hidden_params = hidden_params
|
||||
return model_response_object
|
||||
except Exception as e:
|
||||
raise Exception(f"Invalid response object {traceback.format_exc()}")
|
||||
raise Exception(
|
||||
f"Invalid response object {traceback.format_exc()}\n\nreceived_args={received_args}"
|
||||
)
|
||||
|
||||
|
||||
def acreate(*args, **kwargs): ## Thin client to handle the acreate langchain call
|
||||
|
|
|
@ -1572,6 +1572,17 @@
|
|||
"litellm_provider": "openrouter",
|
||||
"mode": "chat"
|
||||
},
|
||||
"openrouter/anthropic/claude-3-opus": {
|
||||
"max_tokens": 4096,
|
||||
"max_input_tokens": 200000,
|
||||
"max_output_tokens": 4096,
|
||||
"input_cost_per_token": 0.000015,
|
||||
"output_cost_per_token": 0.000075,
|
||||
"litellm_provider": "openrouter",
|
||||
"mode": "chat",
|
||||
"supports_function_calling": true,
|
||||
"tool_use_system_prompt_tokens": 395
|
||||
},
|
||||
"openrouter/google/palm-2-chat-bison": {
|
||||
"max_tokens": 8000,
|
||||
"input_cost_per_token": 0.0000005,
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
[tool.poetry]
|
||||
name = "litellm"
|
||||
version = "1.35.33"
|
||||
version = "1.35.34"
|
||||
description = "Library to easily interface with LLM API providers"
|
||||
authors = ["BerriAI"]
|
||||
license = "MIT"
|
||||
|
@ -80,7 +80,7 @@ requires = ["poetry-core", "wheel"]
|
|||
build-backend = "poetry.core.masonry.api"
|
||||
|
||||
[tool.commitizen]
|
||||
version = "1.35.33"
|
||||
version = "1.35.34"
|
||||
version_files = [
|
||||
"pyproject.toml:^version"
|
||||
]
|
||||
|
|
|
@ -190,6 +190,7 @@ model LiteLLM_ErrorLogs {
|
|||
endTime DateTime // Assuming end_time is a DateTime field
|
||||
api_base String @default("")
|
||||
model_group String @default("") // public model_name / model_group
|
||||
litellm_model_name String @default("") // model passed to litellm
|
||||
model_id String @default("") // ID of model in ProxyModelTable
|
||||
request_kwargs Json @default("{}")
|
||||
exception_type String @default("")
|
||||
|
|
|
@ -488,7 +488,9 @@ async def test_key_info_spend_values():
|
|||
)
|
||||
rounded_response_cost = round(response_cost, 8)
|
||||
rounded_key_info_spend = round(key_info["info"]["spend"], 8)
|
||||
assert rounded_response_cost == rounded_key_info_spend
|
||||
assert (
|
||||
rounded_response_cost == rounded_key_info_spend
|
||||
), f"Expected cost= {rounded_response_cost} != Tracked Cost={rounded_key_info_spend}"
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
|
|
|
@ -91,7 +91,7 @@ class ExpectNoException(Exception):
|
|||
@pytest.mark.parametrize(
|
||||
"num_try_send, num_allowed_send",
|
||||
[
|
||||
(2, 2), # sending as many as allowed, ExpectNoException
|
||||
(2, 3), # sending as many as allowed, ExpectNoException
|
||||
# (10, 10), # sending as many as allowed, ExpectNoException
|
||||
(3, 2), # Sending more than allowed, ValueError
|
||||
# (10, 9), # Sending more than allowed, ValueError
|
||||
|
|
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
|
@ -1 +1 @@
|
|||
!function(){"use strict";var e,t,n,r,o,u,i,c,f,a={},l={};function d(e){var t=l[e];if(void 0!==t)return t.exports;var n=l[e]={id:e,loaded:!1,exports:{}},r=!0;try{a[e](n,n.exports,d),r=!1}finally{r&&delete l[e]}return n.loaded=!0,n.exports}d.m=a,e=[],d.O=function(t,n,r,o){if(n){o=o||0;for(var u=e.length;u>0&&e[u-1][2]>o;u--)e[u]=e[u-1];e[u]=[n,r,o];return}for(var i=1/0,u=0;u<e.length;u++){for(var n=e[u][0],r=e[u][1],o=e[u][2],c=!0,f=0;f<n.length;f++)i>=o&&Object.keys(d.O).every(function(e){return d.O[e](n[f])})?n.splice(f--,1):(c=!1,o<i&&(i=o));if(c){e.splice(u--,1);var a=r();void 0!==a&&(t=a)}}return t},d.n=function(e){var t=e&&e.__esModule?function(){return e.default}:function(){return e};return d.d(t,{a:t}),t},n=Object.getPrototypeOf?function(e){return Object.getPrototypeOf(e)}:function(e){return e.__proto__},d.t=function(e,r){if(1&r&&(e=this(e)),8&r||"object"==typeof e&&e&&(4&r&&e.__esModule||16&r&&"function"==typeof e.then))return e;var o=Object.create(null);d.r(o);var u={};t=t||[null,n({}),n([]),n(n)];for(var i=2&r&&e;"object"==typeof i&&!~t.indexOf(i);i=n(i))Object.getOwnPropertyNames(i).forEach(function(t){u[t]=function(){return e[t]}});return u.default=function(){return e},d.d(o,u),o},d.d=function(e,t){for(var n in t)d.o(t,n)&&!d.o(e,n)&&Object.defineProperty(e,n,{enumerable:!0,get:t[n]})},d.f={},d.e=function(e){return Promise.all(Object.keys(d.f).reduce(function(t,n){return d.f[n](e,t),t},[]))},d.u=function(e){},d.miniCssF=function(e){return"static/css/5e699db73bf6f8c2.css"},d.g=function(){if("object"==typeof globalThis)return globalThis;try{return this||Function("return this")()}catch(e){if("object"==typeof window)return window}}(),d.o=function(e,t){return Object.prototype.hasOwnProperty.call(e,t)},r={},o="_N_E:",d.l=function(e,t,n,u){if(r[e]){r[e].push(t);return}if(void 0!==n)for(var i,c,f=document.getElementsByTagName("script"),a=0;a<f.length;a++){var l=f[a];if(l.getAttribute("src")==e||l.getAttribute("data-webpack")==o+n){i=l;break}}i||(c=!0,(i=document.createElement("script")).charset="utf-8",i.timeout=120,d.nc&&i.setAttribute("nonce",d.nc),i.setAttribute("data-webpack",o+n),i.src=d.tu(e)),r[e]=[t];var s=function(t,n){i.onerror=i.onload=null,clearTimeout(p);var o=r[e];if(delete r[e],i.parentNode&&i.parentNode.removeChild(i),o&&o.forEach(function(e){return e(n)}),t)return t(n)},p=setTimeout(s.bind(null,void 0,{type:"timeout",target:i}),12e4);i.onerror=s.bind(null,i.onerror),i.onload=s.bind(null,i.onload),c&&document.head.appendChild(i)},d.r=function(e){"undefined"!=typeof Symbol&&Symbol.toStringTag&&Object.defineProperty(e,Symbol.toStringTag,{value:"Module"}),Object.defineProperty(e,"__esModule",{value:!0})},d.nmd=function(e){return e.paths=[],e.children||(e.children=[]),e},d.tt=function(){return void 0===u&&(u={createScriptURL:function(e){return e}},"undefined"!=typeof trustedTypes&&trustedTypes.createPolicy&&(u=trustedTypes.createPolicy("nextjs#bundler",u))),u},d.tu=function(e){return d.tt().createScriptURL(e)},d.p="/ui/_next/",i={272:0},d.f.j=function(e,t){var n=d.o(i,e)?i[e]:void 0;if(0!==n){if(n)t.push(n[2]);else if(272!=e){var r=new Promise(function(t,r){n=i[e]=[t,r]});t.push(n[2]=r);var o=d.p+d.u(e),u=Error();d.l(o,function(t){if(d.o(i,e)&&(0!==(n=i[e])&&(i[e]=void 0),n)){var r=t&&("load"===t.type?"missing":t.type),o=t&&t.target&&t.target.src;u.message="Loading chunk "+e+" failed.\n("+r+": "+o+")",u.name="ChunkLoadError",u.type=r,u.request=o,n[1](u)}},"chunk-"+e,e)}else i[e]=0}},d.O.j=function(e){return 0===i[e]},c=function(e,t){var n,r,o=t[0],u=t[1],c=t[2],f=0;if(o.some(function(e){return 0!==i[e]})){for(n in u)d.o(u,n)&&(d.m[n]=u[n]);if(c)var a=c(d)}for(e&&e(t);f<o.length;f++)r=o[f],d.o(i,r)&&i[r]&&i[r][0](),i[r]=0;return d.O(a)},(f=self.webpackChunk_N_E=self.webpackChunk_N_E||[]).forEach(c.bind(null,0)),f.push=c.bind(null,f.push.bind(f))}();
|
||||
!function(){"use strict";var e,t,n,r,o,u,i,c,f,a={},l={};function d(e){var t=l[e];if(void 0!==t)return t.exports;var n=l[e]={id:e,loaded:!1,exports:{}},r=!0;try{a[e](n,n.exports,d),r=!1}finally{r&&delete l[e]}return n.loaded=!0,n.exports}d.m=a,e=[],d.O=function(t,n,r,o){if(n){o=o||0;for(var u=e.length;u>0&&e[u-1][2]>o;u--)e[u]=e[u-1];e[u]=[n,r,o];return}for(var i=1/0,u=0;u<e.length;u++){for(var n=e[u][0],r=e[u][1],o=e[u][2],c=!0,f=0;f<n.length;f++)i>=o&&Object.keys(d.O).every(function(e){return d.O[e](n[f])})?n.splice(f--,1):(c=!1,o<i&&(i=o));if(c){e.splice(u--,1);var a=r();void 0!==a&&(t=a)}}return t},d.n=function(e){var t=e&&e.__esModule?function(){return e.default}:function(){return e};return d.d(t,{a:t}),t},n=Object.getPrototypeOf?function(e){return Object.getPrototypeOf(e)}:function(e){return e.__proto__},d.t=function(e,r){if(1&r&&(e=this(e)),8&r||"object"==typeof e&&e&&(4&r&&e.__esModule||16&r&&"function"==typeof e.then))return e;var o=Object.create(null);d.r(o);var u={};t=t||[null,n({}),n([]),n(n)];for(var i=2&r&&e;"object"==typeof i&&!~t.indexOf(i);i=n(i))Object.getOwnPropertyNames(i).forEach(function(t){u[t]=function(){return e[t]}});return u.default=function(){return e},d.d(o,u),o},d.d=function(e,t){for(var n in t)d.o(t,n)&&!d.o(e,n)&&Object.defineProperty(e,n,{enumerable:!0,get:t[n]})},d.f={},d.e=function(e){return Promise.all(Object.keys(d.f).reduce(function(t,n){return d.f[n](e,t),t},[]))},d.u=function(e){},d.miniCssF=function(e){return"static/css/4ccaa87c9648acfb.css"},d.g=function(){if("object"==typeof globalThis)return globalThis;try{return this||Function("return this")()}catch(e){if("object"==typeof window)return window}}(),d.o=function(e,t){return Object.prototype.hasOwnProperty.call(e,t)},r={},o="_N_E:",d.l=function(e,t,n,u){if(r[e]){r[e].push(t);return}if(void 0!==n)for(var i,c,f=document.getElementsByTagName("script"),a=0;a<f.length;a++){var l=f[a];if(l.getAttribute("src")==e||l.getAttribute("data-webpack")==o+n){i=l;break}}i||(c=!0,(i=document.createElement("script")).charset="utf-8",i.timeout=120,d.nc&&i.setAttribute("nonce",d.nc),i.setAttribute("data-webpack",o+n),i.src=d.tu(e)),r[e]=[t];var s=function(t,n){i.onerror=i.onload=null,clearTimeout(p);var o=r[e];if(delete r[e],i.parentNode&&i.parentNode.removeChild(i),o&&o.forEach(function(e){return e(n)}),t)return t(n)},p=setTimeout(s.bind(null,void 0,{type:"timeout",target:i}),12e4);i.onerror=s.bind(null,i.onerror),i.onload=s.bind(null,i.onload),c&&document.head.appendChild(i)},d.r=function(e){"undefined"!=typeof Symbol&&Symbol.toStringTag&&Object.defineProperty(e,Symbol.toStringTag,{value:"Module"}),Object.defineProperty(e,"__esModule",{value:!0})},d.nmd=function(e){return e.paths=[],e.children||(e.children=[]),e},d.tt=function(){return void 0===u&&(u={createScriptURL:function(e){return e}},"undefined"!=typeof trustedTypes&&trustedTypes.createPolicy&&(u=trustedTypes.createPolicy("nextjs#bundler",u))),u},d.tu=function(e){return d.tt().createScriptURL(e)},d.p="/ui/_next/",i={272:0},d.f.j=function(e,t){var n=d.o(i,e)?i[e]:void 0;if(0!==n){if(n)t.push(n[2]);else if(272!=e){var r=new Promise(function(t,r){n=i[e]=[t,r]});t.push(n[2]=r);var o=d.p+d.u(e),u=Error();d.l(o,function(t){if(d.o(i,e)&&(0!==(n=i[e])&&(i[e]=void 0),n)){var r=t&&("load"===t.type?"missing":t.type),o=t&&t.target&&t.target.src;u.message="Loading chunk "+e+" failed.\n("+r+": "+o+")",u.name="ChunkLoadError",u.type=r,u.request=o,n[1](u)}},"chunk-"+e,e)}else i[e]=0}},d.O.j=function(e){return 0===i[e]},c=function(e,t){var n,r,o=t[0],u=t[1],c=t[2],f=0;if(o.some(function(e){return 0!==i[e]})){for(n in u)d.o(u,n)&&(d.m[n]=u[n]);if(c)var a=c(d)}for(e&&e(t);f<o.length;f++)r=o[f],d.o(i,r)&&i[r]&&i[r][0](),i[r]=0;return d.O(a)},(f=self.webpackChunk_N_E=self.webpackChunk_N_E||[]).forEach(c.bind(null,0)),f.push=c.bind(null,f.push.bind(f))}();
|
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
|
@ -1 +1 @@
|
|||
<!DOCTYPE html><html id="__next_error__"><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width, initial-scale=1"/><link rel="preload" as="script" fetchPriority="low" href="/ui/_next/static/chunks/webpack-ccae12a25017afa5.js" crossorigin=""/><script src="/ui/_next/static/chunks/fd9d1056-dafd44dfa2da140c.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/69-e49705773ae41779.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/main-app-9b4fb13a7db53edf.js" async="" crossorigin=""></script><title>LiteLLM Dashboard</title><meta name="description" content="LiteLLM Proxy Admin UI"/><link rel="icon" href="/ui/favicon.ico" type="image/x-icon" sizes="16x16"/><meta name="next-size-adjust"/><script src="/ui/_next/static/chunks/polyfills-c67a75d1b6f99dc8.js" crossorigin="" noModule=""></script></head><body><script src="/ui/_next/static/chunks/webpack-ccae12a25017afa5.js" crossorigin="" async=""></script><script>(self.__next_f=self.__next_f||[]).push([0]);self.__next_f.push([2,null])</script><script>self.__next_f.push([1,"1:HL[\"/ui/_next/static/media/c9a5bc6a7c948fb0-s.p.woff2\",\"font\",{\"crossOrigin\":\"\",\"type\":\"font/woff2\"}]\n2:HL[\"/ui/_next/static/css/5e699db73bf6f8c2.css\",\"style\",{\"crossOrigin\":\"\"}]\n0:\"$L3\"\n"])</script><script>self.__next_f.push([1,"4:I[47690,[],\"\"]\n6:I[77831,[],\"\"]\n7:I[27125,[\"447\",\"static/chunks/447-9f8d32190ff7d16d.js\",\"931\",\"static/chunks/app/page-508c39694bd40fe9.js\"],\"\"]\n8:I[5613,[],\"\"]\n9:I[31778,[],\"\"]\nb:I[48955,[],\"\"]\nc:[]\n"])</script><script>self.__next_f.push([1,"3:[[[\"$\",\"link\",\"0\",{\"rel\":\"stylesheet\",\"href\":\"/ui/_next/static/css/5e699db73bf6f8c2.css\",\"precedence\":\"next\",\"crossOrigin\":\"\"}]],[\"$\",\"$L4\",null,{\"buildId\":\"kbGdRQFfI6W3bEwfzmJDI\",\"assetPrefix\":\"/ui\",\"initialCanonicalUrl\":\"/\",\"initialTree\":[\"\",{\"children\":[\"__PAGE__\",{}]},\"$undefined\",\"$undefined\",true],\"initialSeedData\":[\"\",{\"children\":[\"__PAGE__\",{},[\"$L5\",[\"$\",\"$L6\",null,{\"propsForComponent\":{\"params\":{}},\"Component\":\"$7\",\"isStaticGeneration\":true}],null]]},[null,[\"$\",\"html\",null,{\"lang\":\"en\",\"children\":[\"$\",\"body\",null,{\"className\":\"__className_c23dc8\",\"children\":[\"$\",\"$L8\",null,{\"parallelRouterKey\":\"children\",\"segmentPath\":[\"children\"],\"loading\":\"$undefined\",\"loadingStyles\":\"$undefined\",\"loadingScripts\":\"$undefined\",\"hasLoading\":false,\"error\":\"$undefined\",\"errorStyles\":\"$undefined\",\"errorScripts\":\"$undefined\",\"template\":[\"$\",\"$L9\",null,{}],\"templateStyles\":\"$undefined\",\"templateScripts\":\"$undefined\",\"notFound\":[[\"$\",\"title\",null,{\"children\":\"404: This page could not be found.\"}],[\"$\",\"div\",null,{\"style\":{\"fontFamily\":\"system-ui,\\\"Segoe UI\\\",Roboto,Helvetica,Arial,sans-serif,\\\"Apple Color Emoji\\\",\\\"Segoe UI Emoji\\\"\",\"height\":\"100vh\",\"textAlign\":\"center\",\"display\":\"flex\",\"flexDirection\":\"column\",\"alignItems\":\"center\",\"justifyContent\":\"center\"},\"children\":[\"$\",\"div\",null,{\"children\":[[\"$\",\"style\",null,{\"dangerouslySetInnerHTML\":{\"__html\":\"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}\"}}],[\"$\",\"h1\",null,{\"className\":\"next-error-h1\",\"style\":{\"display\":\"inline-block\",\"margin\":\"0 20px 0 0\",\"padding\":\"0 23px 0 0\",\"fontSize\":24,\"fontWeight\":500,\"verticalAlign\":\"top\",\"lineHeight\":\"49px\"},\"children\":\"404\"}],[\"$\",\"div\",null,{\"style\":{\"display\":\"inline-block\"},\"children\":[\"$\",\"h2\",null,{\"style\":{\"fontSize\":14,\"fontWeight\":400,\"lineHeight\":\"49px\",\"margin\":0},\"children\":\"This page could not be found.\"}]}]]}]}]],\"notFoundStyles\":[],\"styles\":null}]}]}],null]],\"initialHead\":[false,\"$La\"],\"globalErrorComponent\":\"$b\",\"missingSlots\":\"$Wc\"}]]\n"])</script><script>self.__next_f.push([1,"a:[[\"$\",\"meta\",\"0\",{\"name\":\"viewport\",\"content\":\"width=device-width, initial-scale=1\"}],[\"$\",\"meta\",\"1\",{\"charSet\":\"utf-8\"}],[\"$\",\"title\",\"2\",{\"children\":\"LiteLLM Dashboard\"}],[\"$\",\"meta\",\"3\",{\"name\":\"description\",\"content\":\"LiteLLM Proxy Admin UI\"}],[\"$\",\"link\",\"4\",{\"rel\":\"icon\",\"href\":\"/ui/favicon.ico\",\"type\":\"image/x-icon\",\"sizes\":\"16x16\"}],[\"$\",\"meta\",\"5\",{\"name\":\"next-size-adjust\"}]]\n5:null\n"])</script><script>self.__next_f.push([1,""])</script></body></html>
|
||||
<!DOCTYPE html><html id="__next_error__"><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width, initial-scale=1"/><link rel="preload" as="script" fetchPriority="low" href="/ui/_next/static/chunks/webpack-4acf5608f06a35df.js" crossorigin=""/><script src="/ui/_next/static/chunks/fd9d1056-dafd44dfa2da140c.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/69-e49705773ae41779.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/main-app-9b4fb13a7db53edf.js" async="" crossorigin=""></script><title>LiteLLM Dashboard</title><meta name="description" content="LiteLLM Proxy Admin UI"/><link rel="icon" href="/ui/favicon.ico" type="image/x-icon" sizes="16x16"/><meta name="next-size-adjust"/><script src="/ui/_next/static/chunks/polyfills-c67a75d1b6f99dc8.js" crossorigin="" noModule=""></script></head><body><script src="/ui/_next/static/chunks/webpack-4acf5608f06a35df.js" crossorigin="" async=""></script><script>(self.__next_f=self.__next_f||[]).push([0]);self.__next_f.push([2,null])</script><script>self.__next_f.push([1,"1:HL[\"/ui/_next/static/media/c9a5bc6a7c948fb0-s.p.woff2\",\"font\",{\"crossOrigin\":\"\",\"type\":\"font/woff2\"}]\n2:HL[\"/ui/_next/static/css/4ccaa87c9648acfb.css\",\"style\",{\"crossOrigin\":\"\"}]\n0:\"$L3\"\n"])</script><script>self.__next_f.push([1,"4:I[47690,[],\"\"]\n6:I[77831,[],\"\"]\n7:I[46414,[\"761\",\"static/chunks/761-05f8a8451296476c.js\",\"931\",\"static/chunks/app/page-e710f07514d9286b.js\"],\"\"]\n8:I[5613,[],\"\"]\n9:I[31778,[],\"\"]\nb:I[48955,[],\"\"]\nc:[]\n"])</script><script>self.__next_f.push([1,"3:[[[\"$\",\"link\",\"0\",{\"rel\":\"stylesheet\",\"href\":\"/ui/_next/static/css/4ccaa87c9648acfb.css\",\"precedence\":\"next\",\"crossOrigin\":\"\"}]],[\"$\",\"$L4\",null,{\"buildId\":\"7aR2yOE4Bz0za1EnxRCsv\",\"assetPrefix\":\"/ui\",\"initialCanonicalUrl\":\"/\",\"initialTree\":[\"\",{\"children\":[\"__PAGE__\",{}]},\"$undefined\",\"$undefined\",true],\"initialSeedData\":[\"\",{\"children\":[\"__PAGE__\",{},[\"$L5\",[\"$\",\"$L6\",null,{\"propsForComponent\":{\"params\":{}},\"Component\":\"$7\",\"isStaticGeneration\":true}],null]]},[null,[\"$\",\"html\",null,{\"lang\":\"en\",\"children\":[\"$\",\"body\",null,{\"className\":\"__className_c23dc8\",\"children\":[\"$\",\"$L8\",null,{\"parallelRouterKey\":\"children\",\"segmentPath\":[\"children\"],\"loading\":\"$undefined\",\"loadingStyles\":\"$undefined\",\"loadingScripts\":\"$undefined\",\"hasLoading\":false,\"error\":\"$undefined\",\"errorStyles\":\"$undefined\",\"errorScripts\":\"$undefined\",\"template\":[\"$\",\"$L9\",null,{}],\"templateStyles\":\"$undefined\",\"templateScripts\":\"$undefined\",\"notFound\":[[\"$\",\"title\",null,{\"children\":\"404: This page could not be found.\"}],[\"$\",\"div\",null,{\"style\":{\"fontFamily\":\"system-ui,\\\"Segoe UI\\\",Roboto,Helvetica,Arial,sans-serif,\\\"Apple Color Emoji\\\",\\\"Segoe UI Emoji\\\"\",\"height\":\"100vh\",\"textAlign\":\"center\",\"display\":\"flex\",\"flexDirection\":\"column\",\"alignItems\":\"center\",\"justifyContent\":\"center\"},\"children\":[\"$\",\"div\",null,{\"children\":[[\"$\",\"style\",null,{\"dangerouslySetInnerHTML\":{\"__html\":\"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}\"}}],[\"$\",\"h1\",null,{\"className\":\"next-error-h1\",\"style\":{\"display\":\"inline-block\",\"margin\":\"0 20px 0 0\",\"padding\":\"0 23px 0 0\",\"fontSize\":24,\"fontWeight\":500,\"verticalAlign\":\"top\",\"lineHeight\":\"49px\"},\"children\":\"404\"}],[\"$\",\"div\",null,{\"style\":{\"display\":\"inline-block\"},\"children\":[\"$\",\"h2\",null,{\"style\":{\"fontSize\":14,\"fontWeight\":400,\"lineHeight\":\"49px\",\"margin\":0},\"children\":\"This page could not be found.\"}]}]]}]}]],\"notFoundStyles\":[],\"styles\":null}]}]}],null]],\"initialHead\":[false,\"$La\"],\"globalErrorComponent\":\"$b\",\"missingSlots\":\"$Wc\"}]]\n"])</script><script>self.__next_f.push([1,"a:[[\"$\",\"meta\",\"0\",{\"name\":\"viewport\",\"content\":\"width=device-width, initial-scale=1\"}],[\"$\",\"meta\",\"1\",{\"charSet\":\"utf-8\"}],[\"$\",\"title\",\"2\",{\"children\":\"LiteLLM Dashboard\"}],[\"$\",\"meta\",\"3\",{\"name\":\"description\",\"content\":\"LiteLLM Proxy Admin UI\"}],[\"$\",\"link\",\"4\",{\"rel\":\"icon\",\"href\":\"/ui/favicon.ico\",\"type\":\"image/x-icon\",\"sizes\":\"16x16\"}],[\"$\",\"meta\",\"5\",{\"name\":\"next-size-adjust\"}]]\n5:null\n"])</script><script>self.__next_f.push([1,""])</script></body></html>
|
|
@ -1,7 +1,7 @@
|
|||
2:I[77831,[],""]
|
||||
3:I[27125,["447","static/chunks/447-9f8d32190ff7d16d.js","931","static/chunks/app/page-508c39694bd40fe9.js"],""]
|
||||
3:I[46414,["761","static/chunks/761-05f8a8451296476c.js","931","static/chunks/app/page-e710f07514d9286b.js"],""]
|
||||
4:I[5613,[],""]
|
||||
5:I[31778,[],""]
|
||||
0:["kbGdRQFfI6W3bEwfzmJDI",[[["",{"children":["__PAGE__",{}]},"$undefined","$undefined",true],["",{"children":["__PAGE__",{},["$L1",["$","$L2",null,{"propsForComponent":{"params":{}},"Component":"$3","isStaticGeneration":true}],null]]},[null,["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_c23dc8","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[],"styles":null}]}]}],null]],[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/5e699db73bf6f8c2.css","precedence":"next","crossOrigin":""}]],"$L6"]]]]
|
||||
0:["7aR2yOE4Bz0za1EnxRCsv",[[["",{"children":["__PAGE__",{}]},"$undefined","$undefined",true],["",{"children":["__PAGE__",{},["$L1",["$","$L2",null,{"propsForComponent":{"params":{}},"Component":"$3","isStaticGeneration":true}],null]]},[null,["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_c23dc8","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[],"styles":null}]}]}],null]],[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/4ccaa87c9648acfb.css","precedence":"next","crossOrigin":""}]],"$L6"]]]]
|
||||
6:[["$","meta","0",{"name":"viewport","content":"width=device-width, initial-scale=1"}],["$","meta","1",{"charSet":"utf-8"}],["$","title","2",{"children":"LiteLLM Dashboard"}],["$","meta","3",{"name":"description","content":"LiteLLM Proxy Admin UI"}],["$","link","4",{"rel":"icon","href":"/ui/favicon.ico","type":"image/x-icon","sizes":"16x16"}],["$","meta","5",{"name":"next-size-adjust"}]]
|
||||
1:null
|
||||
|
|
|
@ -18,8 +18,8 @@ import {
|
|||
} from "@tremor/react";
|
||||
import { TabPanel, TabPanels, TabGroup, TabList, Tab, TextInput, Icon } from "@tremor/react";
|
||||
import { Select, SelectItem, MultiSelect, MultiSelectItem } from "@tremor/react";
|
||||
import { modelInfoCall, userGetRequesedtModelsCall, modelCreateCall, Model, modelCostMap, modelDeleteCall, healthCheckCall, modelUpdateCall } from "./networking";
|
||||
import { BarChart } from "@tremor/react";
|
||||
import { modelInfoCall, userGetRequesedtModelsCall, modelCreateCall, Model, modelCostMap, modelDeleteCall, healthCheckCall, modelUpdateCall, modelMetricsCall, modelExceptionsCall } from "./networking";
|
||||
import { BarChart, AreaChart } from "@tremor/react";
|
||||
import {
|
||||
Button as Button2,
|
||||
Modal,
|
||||
|
@ -192,7 +192,6 @@ const ModelDashboard: React.FC<ModelDashboardProps> = ({
|
|||
const [providerModels, setProviderModels] = useState<Array<string>>([]); // Explicitly typing providerModels as a string array
|
||||
|
||||
const providers = Object.values(Providers).filter(key => isNaN(Number(key)));
|
||||
|
||||
|
||||
const [selectedProvider, setSelectedProvider] = useState<String>("OpenAI");
|
||||
const [healthCheckResponse, setHealthCheckResponse] = useState<string>('');
|
||||
|
@ -200,6 +199,12 @@ const ModelDashboard: React.FC<ModelDashboardProps> = ({
|
|||
const [selectedModel, setSelectedModel] = useState<any>(null);
|
||||
const [availableModelGroups, setAvailableModelGroups] = useState<Array<string>>([]);
|
||||
const [selectedModelGroup, setSelectedModelGroup] = useState<string | null>(null);
|
||||
const [modelLatencyMetrics, setModelLatencyMetrics] = useState<any[]>([]);
|
||||
const [modelMetrics, setModelMetrics] = useState<any[]>([]);
|
||||
const [modelMetricsCategories, setModelMetricsCategories] = useState<any[]>([]);
|
||||
const [modelExceptions, setModelExceptions] = useState<any[]>([]);
|
||||
const [allExceptions, setAllExceptions] = useState<any[]>([]);
|
||||
const [failureTableData, setFailureTableData] = useState<any[]>([]);
|
||||
|
||||
const EditModelModal: React.FC<EditModelModalProps> = ({ visible, onCancel, model, onSubmit }) => {
|
||||
const [form] = Form.useForm();
|
||||
|
@ -443,14 +448,71 @@ const handleEditSubmit = async (formValues: Record<string, any>) => {
|
|||
}
|
||||
console.log("all_model_groups:", all_model_groups)
|
||||
let _array_model_groups = Array.from(all_model_groups)
|
||||
// sort _array_model_groups alphabetically
|
||||
_array_model_groups = _array_model_groups.sort();
|
||||
|
||||
setAvailableModelGroups(_array_model_groups);
|
||||
|
||||
// if userRole is Admin, show the pending requests
|
||||
if (userRole === "Admin" && accessToken) {
|
||||
const user_requests = await userGetRequesedtModelsCall(accessToken);
|
||||
console.log("Pending Requests:", pendingRequests);
|
||||
setPendingRequests(user_requests.requests || []);
|
||||
const modelMetricsResponse = await modelMetricsCall(
|
||||
accessToken,
|
||||
userID,
|
||||
userRole,
|
||||
null
|
||||
);
|
||||
|
||||
console.log("Model metrics response:", modelMetricsResponse);
|
||||
// Sort by latency (avg_latency_per_token)
|
||||
|
||||
|
||||
setModelMetrics(modelMetricsResponse.data);
|
||||
setModelMetricsCategories(modelMetricsResponse.all_api_bases);
|
||||
|
||||
|
||||
const modelExceptionsResponse = await modelExceptionsCall(
|
||||
accessToken,
|
||||
userID,
|
||||
userRole,
|
||||
null
|
||||
)
|
||||
console.log("Model exceptions response:", modelExceptionsResponse);
|
||||
setModelExceptions(modelExceptionsResponse.data);
|
||||
setAllExceptions(modelExceptionsResponse.exception_types);
|
||||
|
||||
|
||||
let modelMetricsData = modelMetricsResponse.data;
|
||||
let successdeploymentToSuccess: Record<string, number> = {};
|
||||
for (let i = 0; i < modelMetricsData.length; i++) {
|
||||
let element = modelMetricsData[i];
|
||||
let _model_name = element.model;
|
||||
let _num_requests = element.num_requests;
|
||||
successdeploymentToSuccess[_model_name] = _num_requests
|
||||
}
|
||||
console.log("successdeploymentToSuccess:", successdeploymentToSuccess)
|
||||
|
||||
let failureTableData = [];
|
||||
let _failureData = modelExceptionsResponse.data;
|
||||
for (let i = 0; i < _failureData.length; i++) {
|
||||
const model = _failureData[i];
|
||||
let _model_name = model.model;
|
||||
let total_exceptions = model.total_exceptions;
|
||||
let total_Requests = successdeploymentToSuccess[_model_name];
|
||||
if (total_Requests == null) {
|
||||
total_Requests = 0
|
||||
}
|
||||
let _data = {
|
||||
model: _model_name,
|
||||
total_exceptions: total_exceptions,
|
||||
total_Requests: total_Requests,
|
||||
failure_rate: total_Requests / total_exceptions
|
||||
}
|
||||
failureTableData.push(_data);
|
||||
// sort failureTableData by failure_rate
|
||||
failureTableData.sort((a, b) => b.failure_rate - a.failure_rate);
|
||||
|
||||
setFailureTableData(failureTableData);
|
||||
console.log("failureTableData:", failureTableData);
|
||||
}
|
||||
|
||||
} catch (error) {
|
||||
console.error("There was an error fetching the model data", error);
|
||||
}
|
||||
|
@ -603,6 +665,77 @@ const handleEditSubmit = async (formValues: Record<string, any>) => {
|
|||
};
|
||||
|
||||
|
||||
const updateModelMetrics = async (modelGroup: string | null) => {
|
||||
console.log("Updating model metrics for group:", modelGroup);
|
||||
if (!accessToken || !userID || !userRole) {
|
||||
return
|
||||
}
|
||||
setSelectedModelGroup(modelGroup); // If you want to store the selected model group in state
|
||||
|
||||
|
||||
try {
|
||||
const modelMetricsResponse = await modelMetricsCall(accessToken, userID, userRole, modelGroup);
|
||||
console.log("Model metrics response:", modelMetricsResponse);
|
||||
|
||||
// Assuming modelMetricsResponse now contains the metric data for the specified model group
|
||||
setModelMetrics(modelMetricsResponse.data);
|
||||
setModelMetricsCategories(modelMetricsResponse.all_api_bases);
|
||||
|
||||
const modelExceptionsResponse = await modelExceptionsCall(
|
||||
accessToken,
|
||||
userID,
|
||||
userRole,
|
||||
modelGroup
|
||||
)
|
||||
console.log("Model exceptions response:", modelExceptionsResponse);
|
||||
setModelExceptions(modelExceptionsResponse.data);
|
||||
setAllExceptions(modelExceptionsResponse.exception_types);
|
||||
|
||||
} catch (error) {
|
||||
console.error("Failed to fetch model metrics", error);
|
||||
}
|
||||
}
|
||||
|
||||
const customTooltip = (props: any) => {
|
||||
const { payload, active } = props;
|
||||
if (!active || !payload) return null;
|
||||
|
||||
// Extract the date from the first item in the payload array
|
||||
const date = payload[0]?.payload?.date;
|
||||
|
||||
// Sort the payload array by category.value in descending order
|
||||
let sortedPayload = payload.sort((a: any, b: any) => b.value - a.value);
|
||||
|
||||
// Only show the top 5, the 6th one should be called "X other categories" depending on how many categories were not shown
|
||||
if (sortedPayload.length > 5) {
|
||||
let remainingItems = sortedPayload.length - 5;
|
||||
sortedPayload = sortedPayload.slice(0, 5);
|
||||
sortedPayload.push({
|
||||
dataKey: `${remainingItems} other deployments`,
|
||||
value: payload.slice(5).reduce((acc: number, curr: any) => acc + curr.value, 0),
|
||||
color: "gray",
|
||||
});
|
||||
}
|
||||
|
||||
return (
|
||||
<div className="w-150 rounded-tremor-default border border-tremor-border bg-tremor-background p-2 text-tremor-default shadow-tremor-dropdown">
|
||||
{date && <p className="text-tremor-content-emphasis mb-2">Date: {date}</p>}
|
||||
{sortedPayload.map((category: any, idx: number) => (
|
||||
<div key={idx} className="flex justify-between">
|
||||
<div className="flex items-center space-x-2">
|
||||
<div className={`w-2 h-2 mt-1 rounded-full bg-${category.color}-500`} />
|
||||
<p className="text-tremor-content">{category.dataKey}</p>
|
||||
</div>
|
||||
<p className="font-medium text-tremor-content-emphasis text-righ ml-2">
|
||||
{category.value.toFixed(5)}
|
||||
</p>
|
||||
</div>
|
||||
))}
|
||||
</div>
|
||||
);
|
||||
};
|
||||
|
||||
|
||||
|
||||
const getPlaceholder = (selectedProvider: string): string => {
|
||||
if (selectedProvider === Providers.Vertex_AI) {
|
||||
|
@ -640,6 +773,7 @@ const handleEditSubmit = async (formValues: Record<string, any>) => {
|
|||
<Tab>All Models</Tab>
|
||||
<Tab>Add Model</Tab>
|
||||
<Tab><pre>/health Models</pre></Tab>
|
||||
<Tab>Model Analytics</Tab>
|
||||
</div>
|
||||
|
||||
<div className="flex items-center space-x-2">
|
||||
|
@ -955,6 +1089,87 @@ const handleEditSubmit = async (formValues: Record<string, any>) => {
|
|||
|
||||
</Card>
|
||||
</TabPanel>
|
||||
<TabPanel>
|
||||
<p style={{fontSize: '0.85rem', color: '#808080'}}>View how requests were load balanced within a model group</p>
|
||||
<Select
|
||||
className="mb-4 mt-2"
|
||||
>
|
||||
{availableModelGroups.map((group, idx) => (
|
||||
<SelectItem
|
||||
key={idx}
|
||||
value={group}
|
||||
onClick={() => updateModelMetrics(group)}
|
||||
>
|
||||
{group}
|
||||
</SelectItem>
|
||||
))}
|
||||
</Select>
|
||||
|
||||
<Grid numItems={2}>
|
||||
<Col>
|
||||
<Card className="mr-2">
|
||||
<Title>Avg Latency per Token</Title><p className="text-gray-500 italic"> (seconds/token)</p>
|
||||
<Text className="text-gray-500 italic mt-1 mb-1">average Latency for successfull requests divided by the total tokens</Text>
|
||||
{ modelMetrics && modelMetricsCategories && (
|
||||
<AreaChart
|
||||
title="Model Latency"
|
||||
className="h-72"
|
||||
data={modelMetrics}
|
||||
showLegend={false}
|
||||
index="date"
|
||||
categories={modelMetricsCategories}
|
||||
connectNulls={true}
|
||||
customTooltip={customTooltip}
|
||||
/>
|
||||
)}
|
||||
|
||||
|
||||
|
||||
</Card>
|
||||
</Col>
|
||||
<Col>
|
||||
<Card className="ml-2">
|
||||
<Table>
|
||||
<TableHead>
|
||||
<TableRow>
|
||||
<TableHeaderCell>Model</TableHeaderCell>
|
||||
<TableHeaderCell>Success Requests</TableHeaderCell>
|
||||
<TableHeaderCell>Error Requests</TableHeaderCell>
|
||||
<TableHeaderCell>Failure %</TableHeaderCell>
|
||||
|
||||
</TableRow>
|
||||
</TableHead>
|
||||
<TableBody>
|
||||
{failureTableData.map((metric, idx) => (
|
||||
<TableRow key={idx}>
|
||||
<TableCell>{metric.model}</TableCell>
|
||||
<TableCell>{metric.total_Requests}</TableCell>
|
||||
<TableCell>{metric.total_exceptions}</TableCell>
|
||||
<TableCell>{metric.failure_rate}%</TableCell>
|
||||
</TableRow>
|
||||
|
||||
))}
|
||||
</TableBody>
|
||||
</Table>
|
||||
|
||||
|
||||
</Card>
|
||||
</Col>
|
||||
</Grid>
|
||||
<Card className="mt-4">
|
||||
<Title>Exceptions per Model</Title>
|
||||
<BarChart
|
||||
className="h-72"
|
||||
data={modelExceptions}
|
||||
index="model"
|
||||
categories={allExceptions}
|
||||
stack={true}
|
||||
colors={['indigo-300', 'rose-200', '#ffcc33']}
|
||||
yAxisWidth={30}
|
||||
/>
|
||||
</Card>
|
||||
</TabPanel>
|
||||
|
||||
</TabPanels>
|
||||
</TabGroup>
|
||||
|
||||
|
|
|
@ -474,6 +474,43 @@ export const modelMetricsCall = async (
|
|||
}
|
||||
};
|
||||
|
||||
|
||||
export const modelExceptionsCall = async (
|
||||
accessToken: String,
|
||||
userID: String,
|
||||
userRole: String,
|
||||
modelGroup: String | null,
|
||||
) => {
|
||||
/**
|
||||
* Get all models on proxy
|
||||
*/
|
||||
try {
|
||||
let url = proxyBaseUrl ? `${proxyBaseUrl}/model/metrics/exceptions` : `/model/metrics/exceptions`;
|
||||
|
||||
const response = await fetch(url, {
|
||||
method: "GET",
|
||||
headers: {
|
||||
Authorization: `Bearer ${accessToken}`,
|
||||
"Content-Type": "application/json",
|
||||
},
|
||||
});
|
||||
|
||||
if (!response.ok) {
|
||||
const errorData = await response.text();
|
||||
message.error(errorData, 20);
|
||||
throw new Error("Network response was not ok");
|
||||
}
|
||||
const data = await response.json();
|
||||
// message.info("Received model data");
|
||||
return data;
|
||||
// Handle success - you might want to update some state or UI based on the created key
|
||||
} catch (error) {
|
||||
console.error("Failed to create key:", error);
|
||||
throw error;
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
export const modelAvailableCall = async (
|
||||
accessToken: String,
|
||||
userID: String,
|
||||
|
|
|
@ -146,10 +146,6 @@ const UsagePage: React.FC<UsagePageProps> = ({
|
|||
const [topTagsData, setTopTagsData] = useState<any[]>([]);
|
||||
const [uniqueTeamIds, setUniqueTeamIds] = useState<any[]>([]);
|
||||
const [totalSpendPerTeam, setTotalSpendPerTeam] = useState<any[]>([]);
|
||||
const [modelMetrics, setModelMetrics] = useState<any[]>([]);
|
||||
const [modelLatencyMetrics, setModelLatencyMetrics] = useState<any[]>([]);
|
||||
const [modelGroups, setModelGroups] = useState<any[]>([]);
|
||||
const [selectedModelGroup, setSelectedModelGroup] = useState<string | null>(null);
|
||||
|
||||
const firstDay = new Date(
|
||||
currentDate.getFullYear(),
|
||||
|
@ -231,25 +227,6 @@ const UsagePage: React.FC<UsagePageProps> = ({
|
|||
const top_tags = await tagsSpendLogsCall(accessToken);
|
||||
setTopTagsData(top_tags.top_10_tags);
|
||||
|
||||
// get model groups
|
||||
const _model_groups = await modelInfoCall(accessToken, userID, userRole);
|
||||
let model_groups = _model_groups.data;
|
||||
console.log("model groups in model dashboard", model_groups);
|
||||
|
||||
let available_model_groups = [];
|
||||
// loop through each model in model_group, access litellm_params and only inlclude the model if model["litellm_params"]["model"] startswith "azure/"
|
||||
for (let i = 0; i < model_groups.length; i++) {
|
||||
let model = model_groups[i];
|
||||
console.log("model check", model);
|
||||
let model_group = model["litellm_params"]["model"];
|
||||
console.log("model group", model_group);
|
||||
if (model_group.startsWith("azure/")) {
|
||||
available_model_groups.push(model["model_name"]);
|
||||
}
|
||||
}
|
||||
setModelGroups(available_model_groups);
|
||||
|
||||
|
||||
} else if (userRole == "App Owner") {
|
||||
await userSpendLogsCall(
|
||||
accessToken,
|
||||
|
@ -286,22 +263,6 @@ const UsagePage: React.FC<UsagePageProps> = ({
|
|||
}
|
||||
});
|
||||
}
|
||||
|
||||
const modelMetricsResponse = await modelMetricsCall(
|
||||
accessToken,
|
||||
userID,
|
||||
userRole,
|
||||
null
|
||||
);
|
||||
|
||||
console.log("Model metrics response:", modelMetricsResponse);
|
||||
// Sort by latency (avg_latency_seconds)
|
||||
const sortedByLatency = [...modelMetricsResponse].sort((a, b) => b.avg_latency_seconds - a.avg_latency_seconds);
|
||||
console.log("Sorted by latency:", sortedByLatency);
|
||||
|
||||
setModelMetrics(modelMetricsResponse);
|
||||
setModelLatencyMetrics(sortedByLatency);
|
||||
|
||||
} catch (error) {
|
||||
console.error("There was an error fetching the data", error);
|
||||
// Optionally, update your UI to reflect the error state here as well
|
||||
|
@ -312,30 +273,6 @@ const UsagePage: React.FC<UsagePageProps> = ({
|
|||
}, [accessToken, token, userRole, userID, startTime, endTime]);
|
||||
|
||||
|
||||
const updateModelMetrics = async (modelGroup: string | null) => {
|
||||
console.log("Updating model metrics for group:", modelGroup);
|
||||
if (!accessToken || !userID || !userRole) {
|
||||
return
|
||||
}
|
||||
setSelectedModelGroup(modelGroup); // If you want to store the selected model group in state
|
||||
|
||||
|
||||
try {
|
||||
const modelMetricsResponse = await modelMetricsCall(accessToken, userID, userRole, modelGroup);
|
||||
console.log("Model metrics response:", modelMetricsResponse);
|
||||
|
||||
// Assuming modelMetricsResponse now contains the metric data for the specified model group
|
||||
const sortedByLatency = [...modelMetricsResponse].sort((a, b) => b.avg_latency_seconds - a.avg_latency_seconds);
|
||||
console.log("Sorted by latency:", sortedByLatency);
|
||||
|
||||
setModelMetrics(modelMetricsResponse);
|
||||
setModelLatencyMetrics(sortedByLatency);
|
||||
} catch (error) {
|
||||
console.error("Failed to fetch model metrics", error);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
return (
|
||||
<div style={{ width: "100%" }} className="p-8">
|
||||
<ViewUserSpend
|
||||
|
@ -350,7 +287,6 @@ const UsagePage: React.FC<UsagePageProps> = ({
|
|||
<Tab>All Up</Tab>
|
||||
<Tab>Team Based Usage</Tab>
|
||||
<Tab>Tag Based Usage</Tab>
|
||||
<Tab>Model Based Usage</Tab>
|
||||
</TabList>
|
||||
<TabPanels>
|
||||
<TabPanel>
|
||||
|
@ -492,60 +428,6 @@ const UsagePage: React.FC<UsagePageProps> = ({
|
|||
</Grid>
|
||||
</TabPanel>
|
||||
|
||||
<TabPanel>
|
||||
<Title>Filter By Model Group</Title>
|
||||
<p style={{fontSize: '0.85rem', color: '#808080'}}>View how requests were load balanced within a model group</p>
|
||||
<p style={{fontSize: '0.85rem', color: '#808080', fontStyle: 'italic'}}>(Beta feature) only supported for Azure Model Groups</p>
|
||||
|
||||
|
||||
<Select
|
||||
className="mb-4 mt-2"
|
||||
defaultValue="all"
|
||||
>
|
||||
<SelectItem
|
||||
value={"all"}
|
||||
onClick={() => updateModelMetrics(null)}
|
||||
>
|
||||
All Model Groups
|
||||
</SelectItem>
|
||||
{modelGroups.map((group, idx) => (
|
||||
<SelectItem
|
||||
key={idx}
|
||||
value={group}
|
||||
onClick={() => updateModelMetrics(group)}
|
||||
>
|
||||
{group}
|
||||
</SelectItem>
|
||||
))}
|
||||
</Select>
|
||||
<Card>
|
||||
<Title>Number Requests per Model</Title>
|
||||
<BarChart
|
||||
data={modelMetrics}
|
||||
className="h-[50vh]"
|
||||
index="model"
|
||||
categories={["num_requests"]}
|
||||
colors={["blue"]}
|
||||
yAxisWidth={400}
|
||||
layout="vertical"
|
||||
tickGap={5}
|
||||
/>
|
||||
</Card>
|
||||
<Card className="mt-4">
|
||||
<Title>Latency Per Model</Title>
|
||||
<BarChart
|
||||
data={modelLatencyMetrics}
|
||||
className="h-[50vh]"
|
||||
index="model"
|
||||
categories={["avg_latency_seconds"]}
|
||||
colors={["red"]}
|
||||
yAxisWidth={400}
|
||||
layout="vertical"
|
||||
tickGap={5}
|
||||
/>
|
||||
</Card>
|
||||
|
||||
</TabPanel>
|
||||
</TabPanels>
|
||||
</TabGroup>
|
||||
</div>
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue