diff --git a/litellm/llms/bedrock/chat/invoke_transformations/amazon_deepseek_transformation.py b/litellm/llms/bedrock/chat/invoke_transformations/amazon_deepseek_transformation.py index 8fbb3e6703..d7ceec1f1c 100644 --- a/litellm/llms/bedrock/chat/invoke_transformations/amazon_deepseek_transformation.py +++ b/litellm/llms/bedrock/chat/invoke_transformations/amazon_deepseek_transformation.py @@ -2,6 +2,7 @@ from typing import Any, List, Optional, cast from httpx import Response +from litellm import verbose_logger from litellm.litellm_core_utils.llm_response_utils.convert_dict_to_response import ( _parse_content_for_reasoning, ) @@ -93,7 +94,12 @@ class AmazonDeepseekR1ResponseIterator(BaseModelResponseIterator): """ try: typed_chunk = AmazonDeepSeekR1StreamingResponse(**chunk) # type: ignore - if "" in typed_chunk["generation"]: + generated_content = typed_chunk["generation"] + if generated_content == "" and not self.has_finished_thinking: + verbose_logger.debug( + "Deepseek r1: received, setting has_finished_thinking to True" + ) + generated_content = "" self.has_finished_thinking = True prompt_token_count = typed_chunk.get("prompt_token_count") or 0 @@ -110,12 +116,12 @@ class AmazonDeepseekR1ResponseIterator(BaseModelResponseIterator): finish_reason=typed_chunk["stop_reason"], delta=Delta( content=( - typed_chunk["generation"] + generated_content if self.has_finished_thinking else None ), reasoning_content=( - typed_chunk["generation"] + generated_content if not self.has_finished_thinking else None ), @@ -124,5 +130,6 @@ class AmazonDeepseekR1ResponseIterator(BaseModelResponseIterator): ], usage=usage, ) + except Exception as e: raise e diff --git a/litellm/proxy/management_endpoints/key_management_endpoints.py b/litellm/proxy/management_endpoints/key_management_endpoints.py index 83bf944096..0352a81cd5 100644 --- a/litellm/proxy/management_endpoints/key_management_endpoints.py +++ b/litellm/proxy/management_endpoints/key_management_endpoints.py @@ -1962,6 +1962,10 @@ async def _list_key_helper( where=where, # type: ignore skip=skip, # type: ignore take=size, # type: ignore + order=[ + {"created_at": "desc"}, + {"token": "desc"}, # fallback sort + ], ) verbose_proxy_logger.debug(f"Fetched {len(keys)} keys") diff --git a/ui/litellm-dashboard/src/components/key_edit_view.tsx b/ui/litellm-dashboard/src/components/key_edit_view.tsx index 770c5c2bb9..e6fcadf3fc 100644 --- a/ui/litellm-dashboard/src/components/key_edit_view.tsx +++ b/ui/litellm-dashboard/src/components/key_edit_view.tsx @@ -133,11 +133,23 @@ export function KeyEditView({ - + - + + + + + + + + + + + + + diff --git a/ui/litellm-dashboard/src/components/key_info_view.tsx b/ui/litellm-dashboard/src/components/key_info_view.tsx index 7460fc62bd..cadc365ada 100644 --- a/ui/litellm-dashboard/src/components/key_info_view.tsx +++ b/ui/litellm-dashboard/src/components/key_info_view.tsx @@ -332,6 +332,9 @@ export default function KeyInfoView({ keyId, onClose, keyData, accessToken, user Rate Limits TPM: {keyData.tpm_limit !== null ? keyData.tpm_limit : "Unlimited"} RPM: {keyData.rpm_limit !== null ? keyData.rpm_limit : "Unlimited"} + Max Parallel Requests: {keyData.max_parallel_requests !== null ? keyData.max_parallel_requests : "Unlimited"} + Model TPM Limits: {keyData.metadata?.model_tpm_limit ? JSON.stringify(keyData.metadata.model_tpm_limit) : "Unlimited"} + Model RPM Limits: {keyData.metadata?.model_rpm_limit ? JSON.stringify(keyData.metadata.model_rpm_limit) : "Unlimited"}
diff --git a/ui/litellm-dashboard/src/components/networking.tsx b/ui/litellm-dashboard/src/components/networking.tsx index b58a74c196..5cd9c99a24 100644 --- a/ui/litellm-dashboard/src/components/networking.tsx +++ b/ui/litellm-dashboard/src/components/networking.tsx @@ -2476,6 +2476,25 @@ export const keyUpdateCall = async ( try { console.log("Form Values in keyUpdateCall:", formValues); // Log the form values before making the API call + if (formValues.model_tpm_limit) { + console.log("formValues.model_tpm_limit:", formValues.model_tpm_limit); + // if there's an exception JSON.parse, show it in the message + try { + formValues.model_tpm_limit = JSON.parse(formValues.model_tpm_limit); + } catch (error) { + throw new Error("Failed to parse model_tpm_limit: " + error); + } + } + + if (formValues.model_rpm_limit) { + console.log("formValues.model_rpm_limit:", formValues.model_rpm_limit); + // if there's an exception JSON.parse, show it in the message + try { + formValues.model_rpm_limit = JSON.parse(formValues.model_rpm_limit); + } catch (error) { + throw new Error("Failed to parse model_rpm_limit: " + error); + } + } const url = proxyBaseUrl ? `${proxyBaseUrl}/key/update` : `/key/update`; const response = await fetch(url, { method: "POST", diff --git a/ui/litellm-dashboard/src/components/teams.tsx b/ui/litellm-dashboard/src/components/teams.tsx index 44d5ef4557..0cd3525958 100644 --- a/ui/litellm-dashboard/src/components/teams.tsx +++ b/ui/litellm-dashboard/src/components/teams.tsx @@ -389,7 +389,6 @@ const Teams: React.FC = ({ > {team["team_alias"]} -
@@ -408,8 +407,6 @@ const Teams: React.FC = ({
-
-