LiteLLM Minor Fixes & Improvements (10/08/2024) (#6119)

* refactor(cost_calculator.py): move error line to debug - https://github.com/BerriAI/litellm/issues/5683#issuecomment-2398599498

* fix(migrate-hidden-params-to-read-from-standard-logging-payload): Fixes https://github.com/BerriAI/litellm/issues/5546#issuecomment-2399994026

* fix(types/utils.py): mark weight as a litellm param

Fixes https://github.com/BerriAI/litellm/issues/5781

* feat(internal_user_endpoints.py): fix /user/info + show user max budget as default max budget

Fixes https://github.com/BerriAI/litellm/issues/6117

* feat: support returning team member budget in `/user/info`

Sets user max budget in team as max budget on ui

  Closes https://github.com/BerriAI/litellm/issues/6117

* bug fix for optional parameter passing to replicate (#6067)

Signed-off-by: Mandana Vaziri <mvaziri@us.ibm.com>

* fix(o1_transformation.py): handle o1 temperature=0

o1 doesn't support temp=0, allow admin to drop this param

* test: fix test

---------

Signed-off-by: Mandana Vaziri <mvaziri@us.ibm.com>
Co-authored-by: Mandana Vaziri <mvaziri@us.ibm.com>
This commit is contained in:
Krish Dholakia 2024-10-08 21:57:03 -07:00 committed by GitHub
parent ac6fb0cbef
commit 9695c1af10
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
21 changed files with 260 additions and 86 deletions

View file

@ -3197,7 +3197,7 @@ def get_optional_params(
if stream:
optional_params["stream"] = stream
return optional_params
#return optional_params
if max_tokens is not None:
if "vicuna" in model or "flan" in model:
optional_params["max_length"] = max_tokens
@ -7244,34 +7244,6 @@ class CustomStreamWrapper:
except Exception as e:
raise e
def handle_bedrock_stream(self, chunk):
return {
"text": chunk["text"],
"is_finished": chunk["is_finished"],
"finish_reason": chunk["finish_reason"],
}
def handle_sagemaker_stream(self, chunk):
if "data: [DONE]" in chunk:
text = ""
is_finished = True
finish_reason = "stop"
return {
"text": text,
"is_finished": is_finished,
"finish_reason": finish_reason,
}
elif isinstance(chunk, dict):
if chunk["is_finished"] is True:
finish_reason = "stop"
else:
finish_reason = ""
return {
"text": chunk["text"],
"is_finished": chunk["is_finished"],
"finish_reason": finish_reason,
}
def handle_watsonx_stream(self, chunk):
try:
if isinstance(chunk, dict):
@ -7419,6 +7391,10 @@ class CustomStreamWrapper:
model_response._hidden_params = hidden_params
model_response._hidden_params["custom_llm_provider"] = _logging_obj_llm_provider
model_response._hidden_params["created_at"] = time.time()
model_response._hidden_params = {
**model_response._hidden_params,
**self._hidden_params,
}
if (
len(model_response.choices) > 0