mirror of
https://github.com/BerriAI/litellm.git
synced 2025-04-26 11:14:04 +00:00
fix(amazon_deepseek_transformation.py): remove </think> from stream o… (#8717)
* fix(amazon_deepseek_transformation.py): remove </think> from stream output - cleanup user facing stream * fix(key_managenet_endpoints.py): return `/key/list` sorted by created_at makes it easier to see created key * style: cleanup team table * feat(key_edit_view.tsx): support setting model specific tpm/rpm limits on keys
This commit is contained in:
parent
c4d5b65e7b
commit
d7e4cb3606
6 changed files with 50 additions and 8 deletions
|
@ -2,6 +2,7 @@ from typing import Any, List, Optional, cast
|
||||||
|
|
||||||
from httpx import Response
|
from httpx import Response
|
||||||
|
|
||||||
|
from litellm import verbose_logger
|
||||||
from litellm.litellm_core_utils.llm_response_utils.convert_dict_to_response import (
|
from litellm.litellm_core_utils.llm_response_utils.convert_dict_to_response import (
|
||||||
_parse_content_for_reasoning,
|
_parse_content_for_reasoning,
|
||||||
)
|
)
|
||||||
|
@ -93,7 +94,12 @@ class AmazonDeepseekR1ResponseIterator(BaseModelResponseIterator):
|
||||||
"""
|
"""
|
||||||
try:
|
try:
|
||||||
typed_chunk = AmazonDeepSeekR1StreamingResponse(**chunk) # type: ignore
|
typed_chunk = AmazonDeepSeekR1StreamingResponse(**chunk) # type: ignore
|
||||||
if "</think>" in typed_chunk["generation"]:
|
generated_content = typed_chunk["generation"]
|
||||||
|
if generated_content == "</think>" and not self.has_finished_thinking:
|
||||||
|
verbose_logger.debug(
|
||||||
|
"Deepseek r1: </think> received, setting has_finished_thinking to True"
|
||||||
|
)
|
||||||
|
generated_content = ""
|
||||||
self.has_finished_thinking = True
|
self.has_finished_thinking = True
|
||||||
|
|
||||||
prompt_token_count = typed_chunk.get("prompt_token_count") or 0
|
prompt_token_count = typed_chunk.get("prompt_token_count") or 0
|
||||||
|
@ -110,12 +116,12 @@ class AmazonDeepseekR1ResponseIterator(BaseModelResponseIterator):
|
||||||
finish_reason=typed_chunk["stop_reason"],
|
finish_reason=typed_chunk["stop_reason"],
|
||||||
delta=Delta(
|
delta=Delta(
|
||||||
content=(
|
content=(
|
||||||
typed_chunk["generation"]
|
generated_content
|
||||||
if self.has_finished_thinking
|
if self.has_finished_thinking
|
||||||
else None
|
else None
|
||||||
),
|
),
|
||||||
reasoning_content=(
|
reasoning_content=(
|
||||||
typed_chunk["generation"]
|
generated_content
|
||||||
if not self.has_finished_thinking
|
if not self.has_finished_thinking
|
||||||
else None
|
else None
|
||||||
),
|
),
|
||||||
|
@ -124,5 +130,6 @@ class AmazonDeepseekR1ResponseIterator(BaseModelResponseIterator):
|
||||||
],
|
],
|
||||||
usage=usage,
|
usage=usage,
|
||||||
)
|
)
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
raise e
|
raise e
|
||||||
|
|
|
@ -1962,6 +1962,10 @@ async def _list_key_helper(
|
||||||
where=where, # type: ignore
|
where=where, # type: ignore
|
||||||
skip=skip, # type: ignore
|
skip=skip, # type: ignore
|
||||||
take=size, # type: ignore
|
take=size, # type: ignore
|
||||||
|
order=[
|
||||||
|
{"created_at": "desc"},
|
||||||
|
{"token": "desc"}, # fallback sort
|
||||||
|
],
|
||||||
)
|
)
|
||||||
|
|
||||||
verbose_proxy_logger.debug(f"Fetched {len(keys)} keys")
|
verbose_proxy_logger.debug(f"Fetched {len(keys)} keys")
|
||||||
|
|
|
@ -133,11 +133,23 @@ export function KeyEditView({
|
||||||
</Form.Item>
|
</Form.Item>
|
||||||
|
|
||||||
<Form.Item label="TPM Limit" name="tpm_limit">
|
<Form.Item label="TPM Limit" name="tpm_limit">
|
||||||
<InputNumber style={{ width: "100%" }} />
|
<InputNumber style={{ width: "100%" }} min={0}/>
|
||||||
</Form.Item>
|
</Form.Item>
|
||||||
|
|
||||||
<Form.Item label="RPM Limit" name="rpm_limit">
|
<Form.Item label="RPM Limit" name="rpm_limit">
|
||||||
<InputNumber style={{ width: "100%" }} />
|
<InputNumber style={{ width: "100%" }} min={0}/>
|
||||||
|
</Form.Item>
|
||||||
|
|
||||||
|
<Form.Item label="Max Parallel Requests" name="max_parallel_requests">
|
||||||
|
<InputNumber style={{ width: "100%" }} min={0}/>
|
||||||
|
</Form.Item>
|
||||||
|
|
||||||
|
<Form.Item label="Model TPM Limit" name="model_tpm_limit">
|
||||||
|
<Input.TextArea rows={4} placeholder='{"gpt-4": 100, "claude-v1": 200}'/>
|
||||||
|
</Form.Item>
|
||||||
|
|
||||||
|
<Form.Item label="Model RPM Limit" name="model_rpm_limit">
|
||||||
|
<Input.TextArea rows={4} placeholder='{"gpt-4": 100, "claude-v1": 200}'/>
|
||||||
</Form.Item>
|
</Form.Item>
|
||||||
|
|
||||||
<Form.Item label="Guardrails" name="guardrails">
|
<Form.Item label="Guardrails" name="guardrails">
|
||||||
|
|
|
@ -332,6 +332,9 @@ export default function KeyInfoView({ keyId, onClose, keyData, accessToken, user
|
||||||
<Text className="font-medium">Rate Limits</Text>
|
<Text className="font-medium">Rate Limits</Text>
|
||||||
<Text>TPM: {keyData.tpm_limit !== null ? keyData.tpm_limit : "Unlimited"}</Text>
|
<Text>TPM: {keyData.tpm_limit !== null ? keyData.tpm_limit : "Unlimited"}</Text>
|
||||||
<Text>RPM: {keyData.rpm_limit !== null ? keyData.rpm_limit : "Unlimited"}</Text>
|
<Text>RPM: {keyData.rpm_limit !== null ? keyData.rpm_limit : "Unlimited"}</Text>
|
||||||
|
<Text>Max Parallel Requests: {keyData.max_parallel_requests !== null ? keyData.max_parallel_requests : "Unlimited"}</Text>
|
||||||
|
<Text>Model TPM Limits: {keyData.metadata?.model_tpm_limit ? JSON.stringify(keyData.metadata.model_tpm_limit) : "Unlimited"}</Text>
|
||||||
|
<Text>Model RPM Limits: {keyData.metadata?.model_rpm_limit ? JSON.stringify(keyData.metadata.model_rpm_limit) : "Unlimited"}</Text>
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
<div>
|
<div>
|
||||||
|
|
|
@ -2476,6 +2476,25 @@ export const keyUpdateCall = async (
|
||||||
try {
|
try {
|
||||||
console.log("Form Values in keyUpdateCall:", formValues); // Log the form values before making the API call
|
console.log("Form Values in keyUpdateCall:", formValues); // Log the form values before making the API call
|
||||||
|
|
||||||
|
if (formValues.model_tpm_limit) {
|
||||||
|
console.log("formValues.model_tpm_limit:", formValues.model_tpm_limit);
|
||||||
|
// if there's an exception JSON.parse, show it in the message
|
||||||
|
try {
|
||||||
|
formValues.model_tpm_limit = JSON.parse(formValues.model_tpm_limit);
|
||||||
|
} catch (error) {
|
||||||
|
throw new Error("Failed to parse model_tpm_limit: " + error);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (formValues.model_rpm_limit) {
|
||||||
|
console.log("formValues.model_rpm_limit:", formValues.model_rpm_limit);
|
||||||
|
// if there's an exception JSON.parse, show it in the message
|
||||||
|
try {
|
||||||
|
formValues.model_rpm_limit = JSON.parse(formValues.model_rpm_limit);
|
||||||
|
} catch (error) {
|
||||||
|
throw new Error("Failed to parse model_rpm_limit: " + error);
|
||||||
|
}
|
||||||
|
}
|
||||||
const url = proxyBaseUrl ? `${proxyBaseUrl}/key/update` : `/key/update`;
|
const url = proxyBaseUrl ? `${proxyBaseUrl}/key/update` : `/key/update`;
|
||||||
const response = await fetch(url, {
|
const response = await fetch(url, {
|
||||||
method: "POST",
|
method: "POST",
|
||||||
|
|
|
@ -389,7 +389,6 @@ const Teams: React.FC<TeamProps> = ({
|
||||||
>
|
>
|
||||||
{team["team_alias"]}
|
{team["team_alias"]}
|
||||||
</TableCell>
|
</TableCell>
|
||||||
<TableRow>
|
|
||||||
<TableCell>
|
<TableCell>
|
||||||
<div className="overflow-hidden">
|
<div className="overflow-hidden">
|
||||||
<Tooltip title={team.team_id}>
|
<Tooltip title={team.team_id}>
|
||||||
|
@ -408,8 +407,6 @@ const Teams: React.FC<TeamProps> = ({
|
||||||
</Tooltip>
|
</Tooltip>
|
||||||
</div>
|
</div>
|
||||||
</TableCell>
|
</TableCell>
|
||||||
</TableRow>
|
|
||||||
|
|
||||||
<TableCell
|
<TableCell
|
||||||
style={{
|
style={{
|
||||||
maxWidth: "4px",
|
maxWidth: "4px",
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue