diff --git a/litellm/proxy/proxy_server.py b/litellm/proxy/proxy_server.py index f157f420c..abc27fa35 100644 --- a/litellm/proxy/proxy_server.py +++ b/litellm/proxy/proxy_server.py @@ -8565,6 +8565,102 @@ async def model_info_v2( return {"data": all_models} +@router.get( + "/model/streaming_metrics", + description="View time to first token for models in spend logs", + tags=["model management"], + include_in_schema=False, + dependencies=[Depends(user_api_key_auth)], +) +async def model_streaming_metrics( + user_api_key_dict: UserAPIKeyAuth = Depends(user_api_key_auth), + _selected_model_group: Optional[str] = None, + startTime: Optional[datetime] = None, + endTime: Optional[datetime] = None, +): + global prisma_client, llm_router + if prisma_client is None: + raise ProxyException( + message=CommonProxyErrors.db_not_connected_error.value, + type="internal_error", + param="None", + code=status.HTTP_500_INTERNAL_SERVER_ERROR, + ) + + startTime = startTime or datetime.now() - timedelta(days=7) # show over past week + endTime = endTime or datetime.now() + + sql_query = """ + SELECT + api_base, + model_group, + model, + DATE_TRUNC('day', "startTime")::DATE AS day, + AVG(EXTRACT(epoch FROM ("completionStartTime" - "startTime"))) AS time_to_first_token + FROM + "LiteLLM_SpendLogs" + WHERE + "startTime" BETWEEN $2::timestamp AND $3::timestamp + AND "model_group" = $1 AND "cache_hit" != 'True' + AND "completionStartTime" IS NOT NULL + AND "completionStartTime" != "endTime" + GROUP BY + api_base, + model_group, + model, + day + ORDER BY + time_to_first_token DESC; + """ + + _all_api_bases = set() + db_response = await prisma_client.db.query_raw( + sql_query, _selected_model_group, startTime, endTime + ) + _daily_entries: dict = {} # {"Jun 23": {"model1": 0.002, "model2": 0.003}} + if db_response is not None: + for model_data in db_response: + _api_base = model_data["api_base"] + _model = model_data["model"] + _day = model_data["day"] + time_to_first_token = model_data["time_to_first_token"] + if _day not in _daily_entries: + _daily_entries[_day] = {} + _combined_model_name = str(_model) + if "https://" in _api_base: + _combined_model_name = str(_api_base) + if "/openai/" in _combined_model_name: + _combined_model_name = _combined_model_name.split("/openai/")[0] + + _all_api_bases.add(_combined_model_name) + _daily_entries[_day][_combined_model_name] = time_to_first_token + + """ + each entry needs to be like this: + { + date: 'Jun 23', + 'gpt-4-https://api.openai.com/v1/': 0.002, + 'gpt-43-https://api.openai.com-12/v1/': 0.002, + } + """ + # convert daily entries to list of dicts + + response: List[dict] = [] + + # sort daily entries by date + _daily_entries = dict(sorted(_daily_entries.items(), key=lambda item: item[0])) + for day in _daily_entries: + entry = {"date": str(day)} + for model_key, latency in _daily_entries[day].items(): + entry[model_key] = latency + response.append(entry) + + return { + "data": response, + "all_api_bases": list(_all_api_bases), + } + + @router.get( "/model/metrics", description="View number of requests & avg latency per model on config.yaml", @@ -8592,6 +8688,7 @@ async def model_metrics( sql_query = """ SELECT api_base, + model_group, model, DATE_TRUNC('day', "startTime")::DATE AS day, AVG(EXTRACT(epoch FROM ("endTime" - "startTime"))) / SUM(total_tokens) AS avg_latency_per_token @@ -8599,9 +8696,10 @@ async def model_metrics( "LiteLLM_SpendLogs" WHERE "startTime" BETWEEN $2::timestamp AND $3::timestamp - AND "model" = $1 AND "cache_hit" != 'True' + AND "model_group" = $1 AND "cache_hit" != 'True' GROUP BY api_base, + model_group, model, day HAVING @@ -8614,6 +8712,7 @@ async def model_metrics( sql_query, _selected_model_group, startTime, endTime ) _daily_entries: dict = {} # {"Jun 23": {"model1": 0.002, "model2": 0.003}} + if db_response is not None: for model_data in db_response: _api_base = model_data["api_base"] diff --git a/ui/litellm-dashboard/src/components/model_dashboard.tsx b/ui/litellm-dashboard/src/components/model_dashboard.tsx index 8c08da6cb..4160b7cc0 100644 --- a/ui/litellm-dashboard/src/components/model_dashboard.tsx +++ b/ui/litellm-dashboard/src/components/model_dashboard.tsx @@ -43,6 +43,7 @@ import { healthCheckCall, modelUpdateCall, modelMetricsCall, + streamingModelMetricsCall, modelExceptionsCall, modelMetricsSlowResponsesCall, getCallbacksCall, @@ -259,6 +260,9 @@ const ModelDashboard: React.FC = ({ const [modelMetricsCategories, setModelMetricsCategories] = useState( [] ); + const [streamingModelMetrics, setStreamingModelMetrics] = useState([]); + const [streamingModelMetricsCategories, setStreamingModelMetricsCategories] = + useState([]); const [modelExceptions, setModelExceptions] = useState([]); const [allExceptions, setAllExceptions] = useState([]); const [failureTableData, setFailureTableData] = useState([]); @@ -558,6 +562,19 @@ const ModelDashboard: React.FC = ({ setModelMetrics(modelMetricsResponse.data); setModelMetricsCategories(modelMetricsResponse.all_api_bases); + const streamingModelMetricsResponse = await streamingModelMetricsCall( + accessToken, + _initial_model_group, + dateValue.from?.toISOString(), + dateValue.to?.toISOString() + ); + + // Assuming modelMetricsResponse now contains the metric data for the specified model group + setStreamingModelMetrics(streamingModelMetricsResponse.data); + setStreamingModelMetricsCategories( + streamingModelMetricsResponse.all_api_bases + ); + const modelExceptionsResponse = await modelExceptionsCall( accessToken, userID, @@ -804,6 +821,19 @@ const ModelDashboard: React.FC = ({ setModelMetrics(modelMetricsResponse.data); setModelMetricsCategories(modelMetricsResponse.all_api_bases); + const streamingModelMetricsResponse = await streamingModelMetricsCall( + accessToken, + modelGroup, + startTime.toISOString(), + endTime.toISOString() + ); + + // Assuming modelMetricsResponse now contains the metric data for the specified model group + setStreamingModelMetrics(streamingModelMetricsResponse.data); + setStreamingModelMetricsCategories( + streamingModelMetricsResponse.all_api_bases + ); + const modelExceptionsResponse = await modelExceptionsCall( accessToken, userID, @@ -1573,7 +1603,13 @@ const ModelDashboard: React.FC = ({ )} - + diff --git a/ui/litellm-dashboard/src/components/model_metrics/time_to_first_token.tsx b/ui/litellm-dashboard/src/components/model_metrics/time_to_first_token.tsx index ab3bd1309..10e400397 100644 --- a/ui/litellm-dashboard/src/components/model_metrics/time_to_first_token.tsx +++ b/ui/litellm-dashboard/src/components/model_metrics/time_to_first_token.tsx @@ -1,82 +1,27 @@ +import React from "react"; import { LineChart } from "@tremor/react"; +interface TimeToFirstTokenProps { + modelMetrics: any[]; + modelMetricsCategories: string[]; + customTooltip: any; +} -const chartdata = [ - { - date: "Jan 22", - SolarPanels: 2890, - Inverters: 2338, - }, - { - date: "Feb 22", - SolarPanels: 2756, - Inverters: 2103, - }, - { - date: "Mar 22", - SolarPanels: 3322, - Inverters: 2194, - }, - { - date: "Apr 22", - SolarPanels: 3470, - Inverters: 2108, - }, - { - date: "May 22", - SolarPanels: 3475, - Inverters: 1812, - }, - { - date: "Jun 22", - SolarPanels: 3129, - Inverters: 1726, - }, - { - date: "Jul 22", - SolarPanels: 3490, - Inverters: 1982, - }, - { - date: "Aug 22", - SolarPanels: 2903, - Inverters: 2012, - }, - { - date: "Sep 22", - SolarPanels: 2643, - Inverters: 2342, - }, - { - date: "Oct 22", - SolarPanels: 2837, - Inverters: 2473, - }, - { - date: "Nov 22", - SolarPanels: 2954, - Inverters: 3848, - }, - { - date: "Dec 22", - SolarPanels: 3239, - Inverters: 3736, - }, -]; - -const dataFormatter = (number: number) => - `$${Intl.NumberFormat("us").format(number).toString()}`; - -const TimeToFirstToken = () => { +const TimeToFirstToken: React.FC = ({ + modelMetrics, + modelMetricsCategories, + customTooltip, +}) => { return ( console.log(v)} + connectNulls={true} + customTooltip={customTooltip} /> ); }; diff --git a/ui/litellm-dashboard/src/components/networking.tsx b/ui/litellm-dashboard/src/components/networking.tsx index bb7899755..56131f4f8 100644 --- a/ui/litellm-dashboard/src/components/networking.tsx +++ b/ui/litellm-dashboard/src/components/networking.tsx @@ -473,6 +473,45 @@ export const modelMetricsCall = async ( throw error; } }; +export const streamingModelMetricsCall = async ( + accessToken: String, + modelGroup: String | null, + startTime: String | undefined, + endTime: String | undefined +) => { + /** + * Get all models on proxy + */ + try { + let url = proxyBaseUrl + ? `${proxyBaseUrl}/model/streaming_metrics` + : `/model/streaming_metrics`; + if (modelGroup) { + url = `${url}?_selected_model_group=${modelGroup}&startTime=${startTime}&endTime=${endTime}`; + } + // message.info("Requesting model data"); + const response = await fetch(url, { + method: "GET", + headers: { + Authorization: `Bearer ${accessToken}`, + "Content-Type": "application/json", + }, + }); + + if (!response.ok) { + const errorData = await response.text(); + message.error(errorData, 10); + throw new Error("Network response was not ok"); + } + const data = await response.json(); + // message.info("Received model data"); + return data; + // Handle success - you might want to update some state or UI based on the created key + } catch (error) { + console.error("Failed to create key:", error); + throw error; + } +}; export const modelMetricsSlowResponsesCall = async ( accessToken: String,