LiteLLM Minor Fixes & Improvements (09/17/2024) (#5742)

* fix(proxy_server.py): use default azure credentials to support azure non-client secret kms

* fix(langsmith.py): raise error if credentials missing

* feat(langsmith.py): support error logging for langsmith + standard logging payload

Fixes https://github.com/BerriAI/litellm/issues/5738

* Fix hardcoding of schema in view check (#5749)

* fix - deal with case when check view exists returns None (#5740)

* Revert "fix - deal with case when check view exists returns None (#5740)" (#5741)

This reverts commit 535228159b.

* test(test_router_debug_logs.py): move to mock response

* Fix hardcoding of schema

---------

Co-authored-by: Ishaan Jaff <ishaanjaffer0324@gmail.com>
Co-authored-by: Krrish Dholakia <krrishdholakia@gmail.com>

* fix(proxy_server.py): allow admin to disable ui via `DISABLE_ADMIN_UI` flag

* fix(router.py): fix default model name value

Fixes 55db19a1e4 (r1763712148)

* fix(utils.py): fix unbound variable error

* feat(rerank/main.py): add azure ai rerank endpoints

Closes https://github.com/BerriAI/litellm/issues/5667

* feat(secret_detection.py): Allow configuring secret detection params

Allows admin to control what plugins to run for secret detection. Prevents overzealous secret detection.

* docs(secret_detection.md): add secret detection guardrail docs

* fix: fix linting errors

* fix - deal with case when check view exists returns None (#5740)

* Revert "fix - deal with case when check view exists returns None (#5740)" (#5741)

This reverts commit 535228159b.

* Litellm fix router testing (#5748)

* test: fix testing - azure changed content policy error logic

* test: fix tests to use mock responses

* test(test_image_generation.py): handle api instability

* test(test_image_generation.py): handle azure api instability

* fix(utils.py): fix unbounded variable error

* fix(utils.py): fix unbounded variable error

* test: refactor test to use mock response

* test: mark flaky azure tests

* Bump next from 14.1.1 to 14.2.10 in /ui/litellm-dashboard (#5753)

Bumps [next](https://github.com/vercel/next.js) from 14.1.1 to 14.2.10.
- [Release notes](https://github.com/vercel/next.js/releases)
- [Changelog](https://github.com/vercel/next.js/blob/canary/release.js)
- [Commits](https://github.com/vercel/next.js/compare/v14.1.1...v14.2.10)

---
updated-dependencies:
- dependency-name: next
  dependency-type: direct:production
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>

* [Fix] o1-mini causes pydantic warnings on `reasoning_tokens`  (#5754)

* add requester_metadata in standard logging payload

* log requester_metadata in metadata

* use StandardLoggingPayload for logging

* docs StandardLoggingPayload

* fix import

* include standard logging object in failure

* add test for requester metadata

* handle completion_tokens_details

* add test for completion_tokens_details

* [Feat-Proxy-DataDog] Log Redis, Postgres Failure events on DataDog  (#5750)

* dd - start tracking redis status on dd

* add async_service_succes_hook / failure hook in custom logger

* add async_service_failure_hook

* log service failures on dd

* fix import error

* add test for redis errors / warning

* [Fix] Router/ Proxy - Tag Based routing, raise correct error when no deployments found and tag filtering is on  (#5745)

* fix tag routing - raise correct error when no model with tag based routing

* fix error string from tag based routing

* test router tag based routing

* raise 401 error when no tags avialable for deploymen

* linting fix

* [Feat] Log Request metadata on gcs bucket logging (#5743)

* add requester_metadata in standard logging payload

* log requester_metadata in metadata

* use StandardLoggingPayload for logging

* docs StandardLoggingPayload

* fix import

* include standard logging object in failure

* add test for requester metadata

* fix(litellm_logging.py): fix logging message

* fix(rerank_api/main.py): fix linting errors

* fix(custom_guardrails.py): maintain backwards compatibility for older guardrails

* fix(rerank_api/main.py): fix cost tracking for rerank endpoints

---------

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: steffen-sbt <148480574+steffen-sbt@users.noreply.github.com>
Co-authored-by: Ishaan Jaff <ishaanjaffer0324@gmail.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
This commit is contained in:
Krish Dholakia 2024-09-17 23:00:04 -07:00 committed by GitHub
parent c5c64a6c04
commit 98c335acd0
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
29 changed files with 1261 additions and 257 deletions

View file

@ -43,6 +43,7 @@ from litellm.types.utils import (
StandardLoggingMetadata,
StandardLoggingModelInformation,
StandardLoggingPayload,
StandardLoggingPayloadStatus,
StandardPassThroughResponseObject,
TextCompletionResponse,
TranscriptionResponse,
@ -668,6 +669,7 @@ class Logging:
start_time=start_time,
end_time=end_time,
logging_obj=self,
status="success",
)
)
elif isinstance(result, dict): # pass-through endpoints
@ -679,6 +681,7 @@ class Logging:
start_time=start_time,
end_time=end_time,
logging_obj=self,
status="success",
)
)
else: # streaming chunks + image gen.
@ -762,6 +765,7 @@ class Logging:
start_time=start_time,
end_time=end_time,
logging_obj=self,
status="success",
)
)
if self.dynamic_success_callbacks is not None and isinstance(
@ -1390,6 +1394,7 @@ class Logging:
start_time=start_time,
end_time=end_time,
logging_obj=self,
status="success",
)
)
if self.dynamic_async_success_callbacks is not None and isinstance(
@ -1645,6 +1650,20 @@ class Logging:
self.model_call_details["litellm_params"].get("metadata", {}) or {}
)
metadata.update(exception.headers)
## STANDARDIZED LOGGING PAYLOAD
self.model_call_details["standard_logging_object"] = (
get_standard_logging_object_payload(
kwargs=self.model_call_details,
init_response_obj={},
start_time=start_time,
end_time=end_time,
logging_obj=self,
status="failure",
error_str=str(exception),
)
)
return start_time, end_time
async def special_failure_handlers(self, exception: Exception):
@ -2347,10 +2366,12 @@ def is_valid_sha256_hash(value: str) -> bool:
def get_standard_logging_object_payload(
kwargs: Optional[dict],
init_response_obj: Any,
init_response_obj: Union[Any, BaseModel, dict],
start_time: dt_object,
end_time: dt_object,
logging_obj: Logging,
status: StandardLoggingPayloadStatus,
error_str: Optional[str] = None,
) -> Optional[StandardLoggingPayload]:
try:
if kwargs is None:
@ -2467,7 +2488,7 @@ def get_standard_logging_object_payload(
custom_pricing = use_custom_pricing_for_model(litellm_params=litellm_params)
model_cost_name = _select_model_name_for_cost_calc(
model=None,
completion_response=init_response_obj,
completion_response=init_response_obj, # type: ignore
base_model=base_model,
custom_pricing=custom_pricing,
)
@ -2498,6 +2519,7 @@ def get_standard_logging_object_payload(
id=str(id),
call_type=call_type or "",
cache_hit=cache_hit,
status=status,
saved_cache_cost=saved_cache_cost,
startTime=start_time_float,
endTime=end_time_float,
@ -2517,11 +2539,12 @@ def get_standard_logging_object_payload(
requester_ip_address=clean_metadata.get("requester_ip_address", None),
messages=kwargs.get("messages"),
response=( # type: ignore
response_obj if len(response_obj.keys()) > 0 else init_response_obj
response_obj if len(response_obj.keys()) > 0 else init_response_obj # type: ignore
),
model_parameters=kwargs.get("optional_params", None),
hidden_params=clean_hidden_params,
model_map_information=model_cost_information,
error_str=error_str,
)
verbose_logger.debug(