diff --git a/litellm/model_prices_and_context_window_backup.json b/litellm/model_prices_and_context_window_backup.json index 6804d677e..2831f1a5c 100644 --- a/litellm/model_prices_and_context_window_backup.json +++ b/litellm/model_prices_and_context_window_backup.json @@ -2454,6 +2454,17 @@ "mode": "chat", "source": "https://cloud.google.com/vertex-ai/generative-ai/pricing#partner-models" }, + "vertex_ai/meta/llama-3.2-90b-vision-instruct-maas": { + "max_tokens": 8192, + "max_input_tokens": 128000, + "max_output_tokens": 8192, + "input_cost_per_token": 0.0, + "output_cost_per_token": 0.0, + "litellm_provider": "vertex_ai-llama_models", + "mode": "chat", + "supports_system_messages": true, + "source": "https://console.cloud.google.com/vertex-ai/publishers/meta/model-garden/llama-3.2-90b-vision-instruct-maas" + }, "vertex_ai/mistral-large@latest": { "max_tokens": 8191, "max_input_tokens": 128000, diff --git a/litellm/proxy/proxy_server.py b/litellm/proxy/proxy_server.py index 03ba2e839..4f09518d1 100644 --- a/litellm/proxy/proxy_server.py +++ b/litellm/proxy/proxy_server.py @@ -901,9 +901,6 @@ async def update_database( - Update litellm-proxy-budget row (global proxy spend) """ ## if an end-user is passed in, do an upsert - we can't guarantee they already exist in db - existing_token_obj = await user_api_key_cache.async_get_cache( - key=hashed_token - ) existing_user_obj = await user_api_key_cache.async_get_cache(key=user_id) if existing_user_obj is not None and isinstance(existing_user_obj, dict): existing_user_obj = LiteLLM_UserTable(**existing_user_obj) @@ -1156,7 +1153,9 @@ async def update_cache( # Update the cost column for the given token existing_spend_obj.spend = new_spend - user_api_key_cache.set_cache(key=hashed_token, value=existing_spend_obj) + await user_api_key_cache.async_set_cache( + key=hashed_token, value=existing_spend_obj + ) ### UPDATE USER SPEND ### async def _update_user_cache(): @@ -1185,10 +1184,12 @@ async def update_cache( # Update the cost column for the given user if isinstance(existing_spend_obj, dict): existing_spend_obj["spend"] = new_spend - user_api_key_cache.set_cache(key=_id, value=existing_spend_obj) + await user_api_key_cache.async_set_cache( + key=_id, value=existing_spend_obj + ) else: existing_spend_obj.spend = new_spend - user_api_key_cache.set_cache( + await user_api_key_cache.async_set_cache( key=_id, value=existing_spend_obj.json() ) ## UPDATE GLOBAL PROXY ## @@ -1237,10 +1238,14 @@ async def update_cache( # Update the cost column for the given user if isinstance(existing_spend_obj, dict): existing_spend_obj["spend"] = new_spend - user_api_key_cache.set_cache(key=_id, value=existing_spend_obj) + await user_api_key_cache.async_set_cache( + key=_id, value=existing_spend_obj + ) else: existing_spend_obj.spend = new_spend - user_api_key_cache.set_cache(key=_id, value=existing_spend_obj.json()) + await user_api_key_cache.async_set_cache( + key=_id, value=existing_spend_obj.json() + ) except Exception as e: verbose_proxy_logger.exception( f"An error occurred updating end user cache: {str(e)}" @@ -1279,10 +1284,14 @@ async def update_cache( # Update the cost column for the given user if isinstance(existing_spend_obj, dict): existing_spend_obj["spend"] = new_spend - user_api_key_cache.set_cache(key=_id, value=existing_spend_obj) + await user_api_key_cache.async_set_cache( + key=_id, value=existing_spend_obj + ) else: existing_spend_obj.spend = new_spend - user_api_key_cache.set_cache(key=_id, value=existing_spend_obj) + await user_api_key_cache.async_set_cache( + key=_id, value=existing_spend_obj + ) except Exception as e: verbose_proxy_logger.exception( f"An error occurred updating end user cache: {str(e)}"