mirror of
https://github.com/BerriAI/litellm.git
synced 2025-04-26 11:14:04 +00:00
fix(amazon_deepseek_transformation.py): remove </think> from stream o… (#8717)
* fix(amazon_deepseek_transformation.py): remove </think> from stream output - cleanup user facing stream * fix(key_managenet_endpoints.py): return `/key/list` sorted by created_at makes it easier to see created key * style: cleanup team table * feat(key_edit_view.tsx): support setting model specific tpm/rpm limits on keys
This commit is contained in:
parent
c4d5b65e7b
commit
d7e4cb3606
6 changed files with 50 additions and 8 deletions
|
@ -2,6 +2,7 @@ from typing import Any, List, Optional, cast
|
|||
|
||||
from httpx import Response
|
||||
|
||||
from litellm import verbose_logger
|
||||
from litellm.litellm_core_utils.llm_response_utils.convert_dict_to_response import (
|
||||
_parse_content_for_reasoning,
|
||||
)
|
||||
|
@ -93,7 +94,12 @@ class AmazonDeepseekR1ResponseIterator(BaseModelResponseIterator):
|
|||
"""
|
||||
try:
|
||||
typed_chunk = AmazonDeepSeekR1StreamingResponse(**chunk) # type: ignore
|
||||
if "</think>" in typed_chunk["generation"]:
|
||||
generated_content = typed_chunk["generation"]
|
||||
if generated_content == "</think>" and not self.has_finished_thinking:
|
||||
verbose_logger.debug(
|
||||
"Deepseek r1: </think> received, setting has_finished_thinking to True"
|
||||
)
|
||||
generated_content = ""
|
||||
self.has_finished_thinking = True
|
||||
|
||||
prompt_token_count = typed_chunk.get("prompt_token_count") or 0
|
||||
|
@ -110,12 +116,12 @@ class AmazonDeepseekR1ResponseIterator(BaseModelResponseIterator):
|
|||
finish_reason=typed_chunk["stop_reason"],
|
||||
delta=Delta(
|
||||
content=(
|
||||
typed_chunk["generation"]
|
||||
generated_content
|
||||
if self.has_finished_thinking
|
||||
else None
|
||||
),
|
||||
reasoning_content=(
|
||||
typed_chunk["generation"]
|
||||
generated_content
|
||||
if not self.has_finished_thinking
|
||||
else None
|
||||
),
|
||||
|
@ -124,5 +130,6 @@ class AmazonDeepseekR1ResponseIterator(BaseModelResponseIterator):
|
|||
],
|
||||
usage=usage,
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
raise e
|
||||
|
|
|
@ -1962,6 +1962,10 @@ async def _list_key_helper(
|
|||
where=where, # type: ignore
|
||||
skip=skip, # type: ignore
|
||||
take=size, # type: ignore
|
||||
order=[
|
||||
{"created_at": "desc"},
|
||||
{"token": "desc"}, # fallback sort
|
||||
],
|
||||
)
|
||||
|
||||
verbose_proxy_logger.debug(f"Fetched {len(keys)} keys")
|
||||
|
|
|
@ -133,11 +133,23 @@ export function KeyEditView({
|
|||
</Form.Item>
|
||||
|
||||
<Form.Item label="TPM Limit" name="tpm_limit">
|
||||
<InputNumber style={{ width: "100%" }} />
|
||||
<InputNumber style={{ width: "100%" }} min={0}/>
|
||||
</Form.Item>
|
||||
|
||||
<Form.Item label="RPM Limit" name="rpm_limit">
|
||||
<InputNumber style={{ width: "100%" }} />
|
||||
<InputNumber style={{ width: "100%" }} min={0}/>
|
||||
</Form.Item>
|
||||
|
||||
<Form.Item label="Max Parallel Requests" name="max_parallel_requests">
|
||||
<InputNumber style={{ width: "100%" }} min={0}/>
|
||||
</Form.Item>
|
||||
|
||||
<Form.Item label="Model TPM Limit" name="model_tpm_limit">
|
||||
<Input.TextArea rows={4} placeholder='{"gpt-4": 100, "claude-v1": 200}'/>
|
||||
</Form.Item>
|
||||
|
||||
<Form.Item label="Model RPM Limit" name="model_rpm_limit">
|
||||
<Input.TextArea rows={4} placeholder='{"gpt-4": 100, "claude-v1": 200}'/>
|
||||
</Form.Item>
|
||||
|
||||
<Form.Item label="Guardrails" name="guardrails">
|
||||
|
|
|
@ -332,6 +332,9 @@ export default function KeyInfoView({ keyId, onClose, keyData, accessToken, user
|
|||
<Text className="font-medium">Rate Limits</Text>
|
||||
<Text>TPM: {keyData.tpm_limit !== null ? keyData.tpm_limit : "Unlimited"}</Text>
|
||||
<Text>RPM: {keyData.rpm_limit !== null ? keyData.rpm_limit : "Unlimited"}</Text>
|
||||
<Text>Max Parallel Requests: {keyData.max_parallel_requests !== null ? keyData.max_parallel_requests : "Unlimited"}</Text>
|
||||
<Text>Model TPM Limits: {keyData.metadata?.model_tpm_limit ? JSON.stringify(keyData.metadata.model_tpm_limit) : "Unlimited"}</Text>
|
||||
<Text>Model RPM Limits: {keyData.metadata?.model_rpm_limit ? JSON.stringify(keyData.metadata.model_rpm_limit) : "Unlimited"}</Text>
|
||||
</div>
|
||||
|
||||
<div>
|
||||
|
|
|
@ -2476,6 +2476,25 @@ export const keyUpdateCall = async (
|
|||
try {
|
||||
console.log("Form Values in keyUpdateCall:", formValues); // Log the form values before making the API call
|
||||
|
||||
if (formValues.model_tpm_limit) {
|
||||
console.log("formValues.model_tpm_limit:", formValues.model_tpm_limit);
|
||||
// if there's an exception JSON.parse, show it in the message
|
||||
try {
|
||||
formValues.model_tpm_limit = JSON.parse(formValues.model_tpm_limit);
|
||||
} catch (error) {
|
||||
throw new Error("Failed to parse model_tpm_limit: " + error);
|
||||
}
|
||||
}
|
||||
|
||||
if (formValues.model_rpm_limit) {
|
||||
console.log("formValues.model_rpm_limit:", formValues.model_rpm_limit);
|
||||
// if there's an exception JSON.parse, show it in the message
|
||||
try {
|
||||
formValues.model_rpm_limit = JSON.parse(formValues.model_rpm_limit);
|
||||
} catch (error) {
|
||||
throw new Error("Failed to parse model_rpm_limit: " + error);
|
||||
}
|
||||
}
|
||||
const url = proxyBaseUrl ? `${proxyBaseUrl}/key/update` : `/key/update`;
|
||||
const response = await fetch(url, {
|
||||
method: "POST",
|
||||
|
|
|
@ -389,7 +389,6 @@ const Teams: React.FC<TeamProps> = ({
|
|||
>
|
||||
{team["team_alias"]}
|
||||
</TableCell>
|
||||
<TableRow>
|
||||
<TableCell>
|
||||
<div className="overflow-hidden">
|
||||
<Tooltip title={team.team_id}>
|
||||
|
@ -408,8 +407,6 @@ const Teams: React.FC<TeamProps> = ({
|
|||
</Tooltip>
|
||||
</div>
|
||||
</TableCell>
|
||||
</TableRow>
|
||||
|
||||
<TableCell
|
||||
style={{
|
||||
maxWidth: "4px",
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue