mirror of
https://github.com/BerriAI/litellm.git
synced 2025-04-26 11:14:04 +00:00
feat(prometheus_services.py): emit proxy latency for successful llm api requests
uses prometheus histogram for this
This commit is contained in:
parent
1b98503be3
commit
7f5bcf38b7
6 changed files with 87 additions and 20 deletions
|
@ -18,6 +18,7 @@ from litellm.llms.custom_httpx.httpx_handler import HTTPHandler
|
|||
from litellm.proxy.hooks.parallel_request_limiter import (
|
||||
_PROXY_MaxParallelRequestsHandler,
|
||||
)
|
||||
from litellm._service_logger import ServiceLogging
|
||||
from litellm import ModelResponse, EmbeddingResponse, ImageResponse
|
||||
from litellm.proxy.hooks.max_budget_limiter import _PROXY_MaxBudgetLimiter
|
||||
from litellm.proxy.hooks.tpm_rpm_limiter import _PROXY_MaxTPMRPMLimiter
|
||||
|
@ -80,10 +81,12 @@ class ProxyLogging:
|
|||
|
||||
def _init_litellm_callbacks(self):
|
||||
print_verbose(f"INITIALIZING LITELLM CALLBACKS!")
|
||||
self.service_logging_obj = ServiceLogging()
|
||||
litellm.callbacks.append(self.max_parallel_request_limiter)
|
||||
litellm.callbacks.append(self.max_tpm_rpm_limiter)
|
||||
litellm.callbacks.append(self.max_budget_limiter)
|
||||
litellm.callbacks.append(self.cache_control_check)
|
||||
litellm.callbacks.append(self.service_logging_obj)
|
||||
litellm.success_callback.append(self.response_taking_too_long_callback)
|
||||
for callback in litellm.callbacks:
|
||||
if callback not in litellm.input_callback:
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue