(Feat) Add x-litellm-overhead-duration-ms and "x-litellm-response-duration-ms" in response from LiteLLM (#7899)

* add track_llm_api_timing

* add track_llm_api_timing

* test_litellm_overhead

* use ResponseMetadata class for setting hidden params and response overhead

* instrument http handler

* fix track_llm_api_timing

* track_llm_api_timing

* emit response overhead on hidden params

* fix resp metadata

* fix make_sync_openai_embedding_request

* test_aaaaatext_completion_endpoint fixes

* _get_value_from_hidden_params

* set_hidden_params

* test_litellm_overhead

* test_litellm_overhead

* test_litellm_overhead

* fix import

* test_litellm_overhead_stream

* add LiteLLMLoggingObject

* use diff folder for testing

* use diff folder for overhead testing

* test litellm overhead

* use typing

* clear typing

* test_litellm_overhead

* fix async_streaming

* update_response_metadata

* move test file

* pply metadata to the response objec
This commit is contained in:
Ishaan Jaff 2025-01-21 20:27:55 -08:00 committed by GitHub
parent 63d7d04232
commit b6f2e659b9
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
17 changed files with 464 additions and 73 deletions

View file

@ -28,6 +28,7 @@ from litellm import verbose_logger
from litellm.caching.caching import InMemoryCache
from litellm.litellm_core_utils.core_helpers import map_finish_reason
from litellm.litellm_core_utils.litellm_logging import Logging
from litellm.litellm_core_utils.logging_utils import track_llm_api_timing
from litellm.litellm_core_utils.prompt_templates.factory import (
cohere_message_pt,
construct_tool_use_system_prompt,
@ -171,7 +172,7 @@ async def make_call(
data: str,
model: str,
messages: list,
logging_obj,
logging_obj: Logging,
fake_stream: bool = False,
json_mode: Optional[bool] = False,
):
@ -186,6 +187,7 @@ async def make_call(
headers=headers,
data=data,
stream=not fake_stream,
logging_obj=logging_obj,
)
if response.status_code != 200:
@ -577,7 +579,7 @@ class BedrockLLM(BaseAWSLLM):
model_response: ModelResponse,
print_verbose: Callable,
encoding,
logging_obj,
logging_obj: Logging,
optional_params: dict,
acompletion: bool,
timeout: Optional[Union[float, httpx.Timeout]],
@ -890,6 +892,7 @@ class BedrockLLM(BaseAWSLLM):
headers=prepped.headers, # type: ignore
data=data,
stream=stream,
logging_obj=logging_obj,
)
if response.status_code != 200:
@ -917,7 +920,12 @@ class BedrockLLM(BaseAWSLLM):
return streaming_response
try:
response = self.client.post(url=proxy_endpoint_url, headers=prepped.headers, data=data) # type: ignore
response = self.client.post(
url=proxy_endpoint_url,
headers=dict(prepped.headers),
data=data,
logging_obj=logging_obj,
)
response.raise_for_status()
except httpx.HTTPStatusError as err:
error_code = err.response.status_code
@ -949,7 +957,7 @@ class BedrockLLM(BaseAWSLLM):
data: str,
timeout: Optional[Union[float, httpx.Timeout]],
encoding,
logging_obj,
logging_obj: Logging,
stream,
optional_params: dict,
litellm_params=None,
@ -968,7 +976,13 @@ class BedrockLLM(BaseAWSLLM):
client = client # type: ignore
try:
response = await client.post(api_base, headers=headers, data=data) # type: ignore
response = await client.post(
api_base,
headers=headers,
data=data,
timeout=timeout,
logging_obj=logging_obj,
)
response.raise_for_status()
except httpx.HTTPStatusError as err:
error_code = err.response.status_code
@ -990,6 +1004,7 @@ class BedrockLLM(BaseAWSLLM):
encoding=encoding,
)
@track_llm_api_timing() # for streaming, we need to instrument the function calling the wrapper
async def async_streaming(
self,
model: str,
@ -1000,7 +1015,7 @@ class BedrockLLM(BaseAWSLLM):
data: str,
timeout: Optional[Union[float, httpx.Timeout]],
encoding,
logging_obj,
logging_obj: Logging,
stream,
optional_params: dict,
litellm_params=None,