(Feat) Add x-litellm-overhead-duration-ms and "x-litellm-response-duration-ms" in response from LiteLLM (#7899)

* add track_llm_api_timing

* add track_llm_api_timing

* test_litellm_overhead

* use ResponseMetadata class for setting hidden params and response overhead

* instrument http handler

* fix track_llm_api_timing

* track_llm_api_timing

* emit response overhead on hidden params

* fix resp metadata

* fix make_sync_openai_embedding_request

* test_aaaaatext_completion_endpoint fixes

* _get_value_from_hidden_params

* set_hidden_params

* test_litellm_overhead

* test_litellm_overhead

* test_litellm_overhead

* fix import

* test_litellm_overhead_stream

* add LiteLLMLoggingObject

* use diff folder for testing

* use diff folder for overhead testing

* test litellm overhead

* use typing

* clear typing

* test_litellm_overhead

* fix async_streaming

* update_response_metadata

* move test file

* pply metadata to the response objec
This commit is contained in:
Ishaan Jaff 2025-01-21 20:27:55 -08:00 committed by GitHub
parent 63d7d04232
commit b6f2e659b9
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
17 changed files with 464 additions and 73 deletions

View file

@ -6,12 +6,17 @@ import httpx
from httpx import USE_CLIENT_DEFAULT, AsyncHTTPTransport, HTTPTransport
import litellm
from litellm.litellm_core_utils.logging_utils import track_llm_api_timing
from litellm.types.llms.custom_http import *
if TYPE_CHECKING:
from litellm import LlmProviders
from litellm.litellm_core_utils.litellm_logging import (
Logging as LiteLLMLoggingObject,
)
else:
LlmProviders = Any
LiteLLMLoggingObject = Any
try:
from litellm._version import version
@ -156,6 +161,7 @@ class AsyncHTTPHandler:
)
return response
@track_llm_api_timing()
async def post(
self,
url: str,
@ -165,6 +171,7 @@ class AsyncHTTPHandler:
headers: Optional[dict] = None,
timeout: Optional[Union[float, httpx.Timeout]] = None,
stream: bool = False,
logging_obj: Optional[LiteLLMLoggingObject] = None,
):
try:
if timeout is None:
@ -494,6 +501,7 @@ class HTTPHandler:
timeout: Optional[Union[float, httpx.Timeout]] = None,
files: Optional[dict] = None,
content: Any = None,
logging_obj: Optional[LiteLLMLoggingObject] = None,
):
try:
if timeout is not None: