diff --git a/docs/my-website/docs/providers/vertex.md b/docs/my-website/docs/providers/vertex.md index 70ee9eca9..d959498ce 100644 --- a/docs/my-website/docs/providers/vertex.md +++ b/docs/my-website/docs/providers/vertex.md @@ -1,3 +1,6 @@ +import Tabs from '@theme/Tabs'; +import TabItem from '@theme/TabItem'; + # VertexAI - Google [Gemini, Model Garden] @@ -22,8 +25,36 @@ response = litellm.completion(model="gemini-pro", messages=[{"role": "user", "co ## OpenAI Proxy Usage +Here's how to use Vertex AI with the LiteLLM Proxy Server + 1. Modify the config.yaml + + + + +Use this when you need to set a different location for each vertex model + +```yaml +model_list: + - model_name: gemini-vision + litellm_params: + model: vertex_ai/gemini-1.0-pro-vision-001 + vertex_project: "project-id" + vertex_location: "us-central1" + - model_name: gemini-vision + litellm_params: + model: vertex_ai/gemini-1.0-pro-vision-001 + vertex_project: "project-id2" + vertex_location: "us-east" +``` + + + + + +Use this when you have one vertex location for all models + ```yaml litellm_settings: vertex_project: "hardy-device-38811" # Your Project ID @@ -35,6 +66,10 @@ model_list: model: gemini-pro ``` + + + + 2. Start the proxy ```bash diff --git a/docs/my-website/docs/proxy/metrics.md b/docs/my-website/docs/proxy/metrics.md new file mode 100644 index 000000000..bf5ebe285 --- /dev/null +++ b/docs/my-website/docs/proxy/metrics.md @@ -0,0 +1,44 @@ +# 💸 GET Daily Spend, Usage Metrics + +## Request Format +```shell +curl -X GET "http://0.0.0.0:4000/daily_metrics" -H "Authorization: Bearer sk-1234" +``` + +## Response format +```json +[ + daily_spend = [ + { + "daily_spend": 7.9261938052047e+16, + "day": "2024-02-01T00:00:00", + "spend_per_model": {"azure/gpt-4": 7.9261938052047e+16}, + "spend_per_api_key": { + "76": 914495704992000.0, + "12": 905726697912000.0, + "71": 866312628003000.0, + "28": 865461799332000.0, + "13": 859151538396000.0 + } + }, + { + "daily_spend": 7.938489251309491e+16, + "day": "2024-02-02T00:00:00", + "spend_per_model": {"gpt-3.5": 7.938489251309491e+16}, + "spend_per_api_key": { + "91": 896805036036000.0, + "78": 889692646082000.0, + "49": 885386687861000.0, + "28": 873869890984000.0, + "56": 867398637692000.0 + } + } + + ], + total_spend = 200, + top_models = {"gpt4": 0.2, "vertexai/gemini-pro":10}, + top_api_keys = {"899922": 0.9, "838hcjd999seerr88": 20} + +] + +``` \ No newline at end of file diff --git a/docs/my-website/sidebars.js b/docs/my-website/sidebars.js index 7a24723af..d69abcbfb 100644 --- a/docs/my-website/sidebars.js +++ b/docs/my-website/sidebars.js @@ -40,6 +40,7 @@ const sidebars = { "proxy/virtual_keys", "proxy/users", "proxy/ui", + "proxy/metrics", "proxy/model_management", "proxy/health", "proxy/debugging", diff --git a/enterprise/utils.py b/enterprise/utils.py index f4916b689..3b5a90fc0 100644 --- a/enterprise/utils.py +++ b/enterprise/utils.py @@ -110,3 +110,138 @@ async def view_spend_logs_from_clickhouse( "log_count": num_rows, } return response_data + + +def _create_clickhouse_material_views(client=None, table_names=[]): + # Create Materialized Views if they don't exist + # Materialized Views send new inserted rows to the aggregate tables + + verbose_logger.debug("Clickhouse: Creating Materialized Views") + if "daily_aggregated_spend_per_model_mv" not in table_names: + verbose_logger.debug("Clickhouse: Creating daily_aggregated_spend_per_model_mv") + client.command( + """ + CREATE MATERIALIZED VIEW daily_aggregated_spend_per_model_mv + TO daily_aggregated_spend_per_model + AS + SELECT + toDate(startTime) as day, + sumState(spend) AS DailySpend, + model as model + FROM spend_logs + GROUP BY + day, model + """ + ) + if "daily_aggregated_spend_per_api_key_mv" not in table_names: + verbose_logger.debug( + "Clickhouse: Creating daily_aggregated_spend_per_api_key_mv" + ) + client.command( + """ + CREATE MATERIALIZED VIEW daily_aggregated_spend_per_api_key_mv + TO daily_aggregated_spend_per_api_key + AS + SELECT + toDate(startTime) as day, + sumState(spend) AS DailySpend, + api_key as api_key + FROM spend_logs + GROUP BY + day, api_key + """ + ) + if "daily_aggregated_spend_per_user_mv" not in table_names: + verbose_logger.debug("Clickhouse: Creating daily_aggregated_spend_per_user_mv") + client.command( + """ + CREATE MATERIALIZED VIEW daily_aggregated_spend_per_user_mv + TO daily_aggregated_spend_per_user + AS + SELECT + toDate(startTime) as day, + sumState(spend) AS DailySpend, + user as user + FROM spend_logs + GROUP BY + day, user + """ + ) + if "daily_aggregated_spend_mv" not in table_names: + verbose_logger.debug("Clickhouse: Creating daily_aggregated_spend_mv") + client.command( + """ + CREATE MATERIALIZED VIEW daily_aggregated_spend_mv + TO daily_aggregated_spend + AS + SELECT + toDate(startTime) as day, + sumState(spend) AS DailySpend + FROM spend_logs + GROUP BY + day + """ + ) + + +def _create_clickhouse_aggregate_tables(client=None, table_names=[]): + # Basic Logging works without this - this is only used for low latency reporting apis + verbose_logger.debug("Clickhouse: Creating Aggregate Tables") + + # Create Aggregeate Tables if they don't exist + if "daily_aggregated_spend_per_model" not in table_names: + verbose_logger.debug("Clickhouse: Creating daily_aggregated_spend_per_model") + client.command( + """ + CREATE TABLE daily_aggregated_spend_per_model + ( + `day` Date, + `DailySpend` AggregateFunction(sum, Float64), + `model` String + ) + ENGINE = SummingMergeTree() + ORDER BY (day, model); + """ + ) + if "daily_aggregated_spend_per_api_key" not in table_names: + verbose_logger.debug("Clickhouse: Creating daily_aggregated_spend_per_api_key") + client.command( + """ + CREATE TABLE daily_aggregated_spend_per_api_key + ( + `day` Date, + `DailySpend` AggregateFunction(sum, Float64), + `api_key` String + ) + ENGINE = SummingMergeTree() + ORDER BY (day, api_key); + """ + ) + if "daily_aggregated_spend_per_user" not in table_names: + verbose_logger.debug("Clickhouse: Creating daily_aggregated_spend_per_user") + client.command( + """ + CREATE TABLE daily_aggregated_spend_per_user + ( + `day` Date, + `DailySpend` AggregateFunction(sum, Float64), + `user` String + ) + ENGINE = SummingMergeTree() + ORDER BY (day, user); + """ + ) + if "daily_aggregated_spend" not in table_names: + verbose_logger.debug("Clickhouse: Creating daily_aggregated_spend") + client.command( + """ + CREATE TABLE daily_aggregated_spend + ( + `day` Date, + `DailySpend` AggregateFunction(sum, Float64), + ) + ENGINE = SummingMergeTree() + ORDER BY (day); + """ + ) + return diff --git a/litellm/integrations/clickhouse.py b/litellm/integrations/clickhouse.py index 280d64c4a..d5000e5c4 100644 --- a/litellm/integrations/clickhouse.py +++ b/litellm/integrations/clickhouse.py @@ -27,6 +27,151 @@ import litellm, uuid from litellm._logging import print_verbose, verbose_logger +def create_client(): + try: + import clickhouse_connect + + port = os.getenv("CLICKHOUSE_PORT") + clickhouse_host = os.getenv("CLICKHOUSE_HOST") + if clickhouse_host is not None: + verbose_logger.debug("setting up clickhouse") + if port is not None and isinstance(port, str): + port = int(port) + + client = clickhouse_connect.get_client( + host=os.getenv("CLICKHOUSE_HOST"), + port=port, + username=os.getenv("CLICKHOUSE_USERNAME"), + password=os.getenv("CLICKHOUSE_PASSWORD"), + ) + return client + else: + raise Exception("Clickhouse: Clickhouse host not set") + except Exception as e: + raise ValueError(f"Clickhouse: {e}") + + +def build_daily_metrics(): + click_house_client = create_client() + + # get daily spend + daily_spend = click_house_client.query_df( + """ + SELECT sumMerge(DailySpend) as daily_spend, day FROM daily_aggregated_spend GROUP BY day + """ + ) + + # get daily spend per model + daily_spend_per_model = click_house_client.query_df( + """ + SELECT sumMerge(DailySpend) as daily_spend, day, model FROM daily_aggregated_spend_per_model GROUP BY day, model + """ + ) + new_df = daily_spend_per_model.to_dict(orient="records") + import pandas as pd + + df = pd.DataFrame(new_df) + # Group by 'day' and create a dictionary for each group + result_dict = {} + for day, group in df.groupby("day"): + models = group["model"].tolist() + spend = group["daily_spend"].tolist() + spend_per_model = {model: spend for model, spend in zip(models, spend)} + result_dict[day] = spend_per_model + + # Display the resulting dictionary + + # get daily spend per API key + daily_spend_per_api_key = click_house_client.query_df( + """ + SELECT + daily_spend, + day, + api_key + FROM ( + SELECT + sumMerge(DailySpend) as daily_spend, + day, + api_key, + RANK() OVER (PARTITION BY day ORDER BY sumMerge(DailySpend) DESC) as spend_rank + FROM + daily_aggregated_spend_per_api_key + GROUP BY + day, + api_key + ) AS ranked_api_keys + WHERE + spend_rank <= 5 + AND day IS NOT NULL + ORDER BY + day, + daily_spend DESC + """ + ) + new_df = daily_spend_per_api_key.to_dict(orient="records") + import pandas as pd + + df = pd.DataFrame(new_df) + # Group by 'day' and create a dictionary for each group + api_key_result_dict = {} + for day, group in df.groupby("day"): + api_keys = group["api_key"].tolist() + spend = group["daily_spend"].tolist() + spend_per_api_key = {api_key: spend for api_key, spend in zip(api_keys, spend)} + api_key_result_dict[day] = spend_per_api_key + + # Display the resulting dictionary + + # Calculate total spend across all days + total_spend = daily_spend["daily_spend"].sum() + + # Identify top models and top API keys with the highest spend across all days + top_models = {} + top_api_keys = {} + + for day, spend_per_model in result_dict.items(): + for model, model_spend in spend_per_model.items(): + if model not in top_models or model_spend > top_models[model]: + top_models[model] = model_spend + + for day, spend_per_api_key in api_key_result_dict.items(): + for api_key, api_key_spend in spend_per_api_key.items(): + if api_key not in top_api_keys or api_key_spend > top_api_keys[api_key]: + top_api_keys[api_key] = api_key_spend + + # for each day in daily spend, look up the day in result_dict and api_key_result_dict + # Assuming daily_spend DataFrame has 'day' column + result = [] + for index, row in daily_spend.iterrows(): + day = row["day"] + data_day = row.to_dict() + + # Look up in result_dict + if day in result_dict: + spend_per_model = result_dict[day] + # Assuming there is a column named 'model' in daily_spend + data_day["spend_per_model"] = spend_per_model # Assign 0 if model not found + + # Look up in api_key_result_dict + if day in api_key_result_dict: + spend_per_api_key = api_key_result_dict[day] + # Assuming there is a column named 'api_key' in daily_spend + data_day["spend_per_api_key"] = spend_per_api_key + + result.append(data_day) + + data_to_return = {} + data_to_return["daily_spend"] = result + + data_to_return["total_spend"] = total_spend + data_to_return["top_models"] = top_models + data_to_return["top_api_keys"] = top_api_keys + return data_to_return + + +# build_daily_metrics() + + def _start_clickhouse(): import clickhouse_connect @@ -86,6 +231,14 @@ def _start_clickhouse(): response = client.query("DESCRIBE default.spend_logs") verbose_logger.debug(f"spend logs schema ={response.result_rows}") + # RUN Enterprise Clickhouse Setup + # TLDR: For Enterprise - we create views / aggregate tables for low latency reporting APIs + from litellm.proxy.enterprise.utils import _create_clickhouse_aggregate_tables + from litellm.proxy.enterprise.utils import _create_clickhouse_material_views + + _create_clickhouse_aggregate_tables(client=client, table_names=table_names) + _create_clickhouse_material_views(client=client, table_names=table_names) + class ClickhouseLogger: # Class variables or attributes diff --git a/litellm/llms/vertex_ai.py b/litellm/llms/vertex_ai.py index f4447a9e9..18c06d4d6 100644 --- a/litellm/llms/vertex_ai.py +++ b/litellm/llms/vertex_ai.py @@ -278,7 +278,11 @@ def completion( import google.auth ## Load credentials with the correct quota project ref: https://github.com/googleapis/python-aiplatform/issues/2557#issuecomment-1709284744 + print_verbose( + f"VERTEX AI: vertex_project={vertex_project}; vertex_location={vertex_location}" + ) creds, _ = google.auth.default(quota_project_id=vertex_project) + print_verbose(f"VERTEX AI: creds={creds}") vertexai.init( project=vertex_project, location=vertex_location, credentials=creds ) diff --git a/litellm/model_prices_and_context_window_backup.json b/litellm/model_prices_and_context_window_backup.json index 6c663200d..5bd0bcdff 100644 --- a/litellm/model_prices_and_context_window_backup.json +++ b/litellm/model_prices_and_context_window_backup.json @@ -687,6 +687,15 @@ "litellm_provider": "vertex_ai-language-models", "mode": "chat" }, + "gemini-1.5-pro-preview-0215": { + "max_tokens": 8192, + "max_input_tokens": 1000000, + "max_output_tokens": 8192, + "input_cost_per_token": 0, + "output_cost_per_token": 0, + "litellm_provider": "vertex_ai-language-models", + "mode": "chat" + }, "gemini-pro-vision": { "max_tokens": 16384, "max_output_tokens": 2048, diff --git a/litellm/proxy/proxy_config.yaml b/litellm/proxy/proxy_config.yaml index 74a780c71..5755293e0 100644 --- a/litellm/proxy/proxy_config.yaml +++ b/litellm/proxy/proxy_config.yaml @@ -43,7 +43,7 @@ model_list: api_key: os.environ/OPENAI_API_KEY litellm_settings: fallbacks: [{"openai-gpt-3.5": ["azure-gpt-3.5"]}] - success_callback: ['langfuse'] + success_callback: ['clickhouse', 'langfuse'] # setting callback class # callbacks: custom_callbacks.proxy_handler_instance # sets litellm.callbacks = [proxy_handler_instance] diff --git a/litellm/proxy/proxy_server.py b/litellm/proxy/proxy_server.py index ed96479b7..2e9a48460 100644 --- a/litellm/proxy/proxy_server.py +++ b/litellm/proxy/proxy_server.py @@ -240,6 +240,8 @@ health_check_results = {} queue: List = [] litellm_proxy_budget_name = "litellm-proxy-budget" ui_access_mode: Literal["admin", "all"] = "all" +proxy_budget_rescheduler_min_time = 597 +proxy_budget_rescheduler_max_time = 605 ### INITIALIZE GLOBAL LOGGING OBJECT ### proxy_logging_obj = ProxyLogging(user_api_key_cache=user_api_key_cache) ### REDIS QUEUE ### @@ -1407,7 +1409,7 @@ class ProxyConfig: """ Load config values into proxy global state """ - global master_key, user_config_file_path, otel_logging, user_custom_auth, user_custom_auth_path, user_custom_key_generate, use_background_health_checks, health_check_interval, use_queue, custom_db_client, ui_access_mode + global master_key, user_config_file_path, otel_logging, user_custom_auth, user_custom_auth_path, user_custom_key_generate, use_background_health_checks, health_check_interval, use_queue, custom_db_client, proxy_budget_rescheduler_max_time, proxy_budget_rescheduler_min_time, ui_access_mode # Load existing config config = await self.get_config(config_file_path=config_file_path) @@ -1718,6 +1720,13 @@ class ProxyConfig: ui_access_mode = general_settings.get( "ui_access_mode", "all" ) # can be either ["admin_only" or "all"] + ## BUDGET RESCHEDULER ## + proxy_budget_rescheduler_min_time = general_settings.get( + "proxy_budget_rescheduler_min_time", proxy_budget_rescheduler_min_time + ) + proxy_budget_rescheduler_max_time = general_settings.get( + "proxy_budget_rescheduler_max_time", proxy_budget_rescheduler_max_time + ) ### BACKGROUND HEALTH CHECKS ### # Enable background health checks use_background_health_checks = general_settings.get( @@ -2120,10 +2129,9 @@ async def async_data_generator(response, user_api_key_dict): try: start_time = time.time() async for chunk in response: - verbose_proxy_logger.debug(f"returned chunk: {chunk}") - assert isinstance(chunk, litellm.ModelResponse) + chunk = chunk.model_dump_json(exclude_none=True) try: - yield f"data: {json.dumps(chunk.model_dump(exclude_none=True))}\n\n" + yield f"data: {chunk}\n\n" except Exception as e: yield f"data: {str(e)}\n\n" @@ -2202,7 +2210,7 @@ def parse_cache_control(cache_control): @router.on_event("startup") async def startup_event(): - global prisma_client, master_key, use_background_health_checks, llm_router, llm_model_list, general_settings + global prisma_client, master_key, use_background_health_checks, llm_router, llm_model_list, general_settings, proxy_budget_rescheduler_min_time, proxy_budget_rescheduler_max_time import json ### LOAD MASTER KEY ### @@ -2313,7 +2321,7 @@ async def startup_event(): if prisma_client is not None: scheduler = AsyncIOScheduler() interval = random.randint( - 597, 605 + proxy_budget_rescheduler_min_time, proxy_budget_rescheduler_max_time ) # random interval, so multiple workers avoid resetting budget at the same time scheduler.add_job( reset_budget, "interval", seconds=interval, args=[prisma_client] @@ -3839,13 +3847,55 @@ async def view_spend_logs( # gettting spend logs from clickhouse from litellm.proxy.enterprise.utils import view_spend_logs_from_clickhouse - return await view_spend_logs_from_clickhouse( - api_key=api_key, - user_id=user_id, - request_id=request_id, + daily_metrics = await view_daily_metrics( start_date=start_date, end_date=end_date, ) + + # get the top api keys across all daily_metrics + top_api_keys = {} # type: ignore + + # make this compatible with the admin UI + for response in daily_metrics.get("daily_spend", {}): + response["startTime"] = response["day"] + response["spend"] = response["daily_spend"] + response["models"] = response["spend_per_model"] + response["users"] = {"ishaan": 0.0} + spend_per_api_key = response["spend_per_api_key"] + + # insert spend_per_api_key key, values in response + for key, value in spend_per_api_key.items(): + response[key] = value + top_api_keys[key] = top_api_keys.get(key, 0.0) + value + + del response["day"] + del response["daily_spend"] + del response["spend_per_model"] + del response["spend_per_api_key"] + + # get top 5 api keys + top_api_keys = sorted(top_api_keys.items(), key=lambda x: x[1], reverse=True) # type: ignore + top_api_keys = top_api_keys[:5] # type: ignore + top_api_keys = dict(top_api_keys) # type: ignore + """ + set it like this + { + "key" : key, + "spend:" : spend + } + """ + # we need this to show on the Admin UI + response_keys = [] + for key in top_api_keys.items(): + response_keys.append( + { + "key": key[0], + "spend": key[1], + } + ) + daily_metrics["top_api_keys"] = response_keys + + return daily_metrics global prisma_client try: verbose_proxy_logger.debug("inside view_spend_logs") @@ -3998,6 +4048,61 @@ async def view_spend_logs( ) +@router.get( + "/daily_metrics", + summary="Get daily spend metrics", + tags=["budget & spend Tracking"], + dependencies=[Depends(user_api_key_auth)], +) +async def view_daily_metrics( + start_date: Optional[str] = fastapi.Query( + default=None, + description="Time from which to start viewing key spend", + ), + end_date: Optional[str] = fastapi.Query( + default=None, + description="Time till which to view key spend", + ), +): + """ """ + try: + if os.getenv("CLICKHOUSE_HOST") is not None: + # gettting spend logs from clickhouse + from litellm.integrations import clickhouse + + return clickhouse.build_daily_metrics() + + # create a response object + """ + { + "date": "2022-01-01", + "spend": 0.0, + "users": {}, + "models": {}, + } + """ + else: + raise Exception( + "Clickhouse: Clickhouse host not set. Required for viewing /daily/metrics" + ) + except Exception as e: + if isinstance(e, HTTPException): + raise ProxyException( + message=getattr(e, "detail", f"/spend/logs Error({str(e)})"), + type="internal_error", + param=getattr(e, "param", "None"), + code=getattr(e, "status_code", status.HTTP_500_INTERNAL_SERVER_ERROR), + ) + elif isinstance(e, ProxyException): + raise e + raise ProxyException( + message="/spend/logs Error" + str(e), + type="internal_error", + param=getattr(e, "param", "None"), + code=status.HTTP_500_INTERNAL_SERVER_ERROR, + ) + + #### USER MANAGEMENT #### @router.post( "/user/new", diff --git a/litellm/tests/test_amazing_vertex_completion.py b/litellm/tests/test_amazing_vertex_completion.py index 1de26d3b9..f5df00c8d 100644 --- a/litellm/tests/test_amazing_vertex_completion.py +++ b/litellm/tests/test_amazing_vertex_completion.py @@ -130,6 +130,8 @@ def test_vertex_ai(): f"response.choices[0].finish_reason: {response.choices[0].finish_reason}" ) assert response.choices[0].finish_reason in litellm._openai_finish_reasons + except litellm.RateLimitError as e: + pass except Exception as e: pytest.fail(f"Error occurred: {e}") @@ -183,6 +185,8 @@ def test_vertex_ai_stream(): assert type(content) == str # pass assert len(completed_str) > 4 + except litellm.RateLimitError as e: + pass except Exception as e: pytest.fail(f"Error occurred: {e}") diff --git a/litellm/utils.py b/litellm/utils.py index 0ef50f20c..acad61702 100644 --- a/litellm/utils.py +++ b/litellm/utils.py @@ -205,18 +205,18 @@ def map_finish_reason( class FunctionCall(OpenAIObject): arguments: str - name: str + name: Optional[str] = None class Function(OpenAIObject): arguments: str - name: str + name: Optional[str] = None class ChatCompletionDeltaToolCall(OpenAIObject): - id: str + id: Optional[str] = None function: Function - type: str + type: Optional[str] = None index: int @@ -275,13 +275,19 @@ class Delta(OpenAIObject): super(Delta, self).__init__(**params) self.content = content self.role = role - self.function_call = function_call - if tool_calls is not None and isinstance(tool_calls, dict): + if function_call is not None and isinstance(function_call, dict): + self.function_call = FunctionCall(**function_call) + else: + self.function_call = function_call + if tool_calls is not None and isinstance(tool_calls, list): self.tool_calls = [] for tool_call in tool_calls: - if tool_call.get("index", None) is None: - tool_call["index"] = 0 - self.tool_calls.append(ChatCompletionDeltaToolCall(**tool_call)) + if isinstance(tool_call, dict): + if tool_call.get("index", None) is None: + tool_call["index"] = 0 + self.tool_calls.append(ChatCompletionDeltaToolCall(**tool_call)) + elif isinstance(tool_call, ChatCompletionDeltaToolCall): + self.tool_calls.append(tool_call) else: self.tool_calls = tool_calls @@ -8728,7 +8734,7 @@ class CustomStreamWrapper: or original_chunk.choices[0].delta.tool_calls is not None ): try: - delta = dict(original_chunk.choices[0].delta) + delta = original_chunk.choices[0].delta model_response.system_fingerprint = ( original_chunk.system_fingerprint ) @@ -8763,7 +8769,9 @@ class CustomStreamWrapper: is None ): t.function.arguments = "" - model_response.choices[0].delta = Delta(**delta) + _json_delta = delta.model_dump() + print_verbose(f"_json_delta: {_json_delta}") + model_response.choices[0].delta = Delta(**_json_delta) except Exception as e: traceback.print_exc() model_response.choices[0].delta = Delta() diff --git a/model_prices_and_context_window.json b/model_prices_and_context_window.json index 6c663200d..5bd0bcdff 100644 --- a/model_prices_and_context_window.json +++ b/model_prices_and_context_window.json @@ -687,6 +687,15 @@ "litellm_provider": "vertex_ai-language-models", "mode": "chat" }, + "gemini-1.5-pro-preview-0215": { + "max_tokens": 8192, + "max_input_tokens": 1000000, + "max_output_tokens": 8192, + "input_cost_per_token": 0, + "output_cost_per_token": 0, + "litellm_provider": "vertex_ai-language-models", + "mode": "chat" + }, "gemini-pro-vision": { "max_tokens": 16384, "max_output_tokens": 2048, diff --git a/proxy_server_config.yaml b/proxy_server_config.yaml index d0cb5739e..198d33013 100644 --- a/proxy_server_config.yaml +++ b/proxy_server_config.yaml @@ -40,6 +40,8 @@ litellm_settings: budget_duration: 30d general_settings: master_key: sk-1234 # [OPTIONAL] Only use this if you to require all calls to contain this key (Authorization: Bearer sk-1234) + proxy_budget_rescheduler_min_time: 30 + proxy_budget_rescheduler_max_time: 60 # database_url: "postgresql://:@:/" # [OPTIONAL] use for token-based auth to proxy environment_variables: diff --git a/pyproject.toml b/pyproject.toml index 295d7e902..ce452a2d0 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "litellm" -version = "1.27.12" +version = "1.27.14" description = "Library to easily interface with LLM API providers" authors = ["BerriAI"] license = "MIT" @@ -74,7 +74,7 @@ requires = ["poetry-core", "wheel"] build-backend = "poetry.core.masonry.api" [tool.commitizen] -version = "1.27.12" +version = "1.27.14" version_files = [ "pyproject.toml:^version" ] diff --git a/requirements.txt b/requirements.txt index 6bd69302f..caede5b67 100644 --- a/requirements.txt +++ b/requirements.txt @@ -10,6 +10,7 @@ gunicorn==21.2.0 # server dep boto3==1.34.34 # aws bedrock/sagemaker calls redis==5.0.0 # caching numpy==1.24.3 # semantic caching +pandas==2.1.1 # for viewing clickhouse spend analytics prisma==0.11.0 # for db mangum==0.17.0 # for aws lambda functions google-generativeai==0.3.2 # for vertex ai calls diff --git a/tests/test_keys.py b/tests/test_keys.py index 28ce02511..c2b957180 100644 --- a/tests/test_keys.py +++ b/tests/test_keys.py @@ -449,7 +449,7 @@ async def test_key_with_budgets(): reset_at_init_value = key_info["info"]["budget_reset_at"] reset_at_new_value = None i = 0 - await asyncio.sleep(610) + await asyncio.sleep(120) while i < 3: key_info = await get_key_info(session=session, get_key=key, call_key=key) reset_at_new_value = key_info["info"]["budget_reset_at"] diff --git a/ui/litellm-dashboard/src/components/usage.tsx b/ui/litellm-dashboard/src/components/usage.tsx index 84c942eec..53493eabc 100644 --- a/ui/litellm-dashboard/src/components/usage.tsx +++ b/ui/litellm-dashboard/src/components/usage.tsx @@ -172,20 +172,32 @@ const UsagePage: React.FC = ({ startTime, endTime ).then(async (response) => { - const topKeysResponse = await keyInfoCall( - accessToken, - getTopKeys(response) - ); - const filtered_keys = topKeysResponse["info"].map((k: any) => ({ - key: (k["key_name"] || k["key_alias"] || k["token"]).substring( - 0, - 7 - ), - spend: k["spend"], - })); - setTopKeys(filtered_keys); - setTopUsers(getTopUsers(response)); - setKeySpendData(response); + console.log("result from spend logs call", response); + if ("daily_spend" in response) { + // this is from clickhouse analytics + // + let daily_spend = response["daily_spend"]; + console.log("daily spend", daily_spend); + setKeySpendData(daily_spend); + let topApiKeys = response.top_api_keys; + setTopKeys(topApiKeys); + } + else { + const topKeysResponse = await keyInfoCall( + accessToken, + getTopKeys(response) + ); + const filtered_keys = topKeysResponse["info"].map((k: any) => ({ + key: (k["key_name"] || k["key_alias"] || k["token"]).substring( + 0, + 7 + ), + spend: k["spend"], + })); + setTopKeys(filtered_keys); + setTopUsers(getTopUsers(response)); + setKeySpendData(response); + } }); } catch (error) { console.error("There was an error fetching the data", error);