mirror of
https://github.com/BerriAI/litellm.git
synced 2025-04-27 11:43:54 +00:00
(Feat) Add x-litellm-overhead-duration-ms and "x-litellm-response-duration-ms" in response from LiteLLM (#7899)
* add track_llm_api_timing * add track_llm_api_timing * test_litellm_overhead * use ResponseMetadata class for setting hidden params and response overhead * instrument http handler * fix track_llm_api_timing * track_llm_api_timing * emit response overhead on hidden params * fix resp metadata * fix make_sync_openai_embedding_request * test_aaaaatext_completion_endpoint fixes * _get_value_from_hidden_params * set_hidden_params * test_litellm_overhead * test_litellm_overhead * test_litellm_overhead * fix import * test_litellm_overhead_stream * add LiteLLMLoggingObject * use diff folder for testing * use diff folder for overhead testing * test litellm overhead * use typing * clear typing * test_litellm_overhead * fix async_streaming * update_response_metadata * move test file * pply metadata to the response objec
This commit is contained in:
parent
2c7082853c
commit
359a4ee3a9
17 changed files with 464 additions and 73 deletions
|
@ -28,6 +28,7 @@ from litellm import verbose_logger
|
|||
from litellm.caching.caching import InMemoryCache
|
||||
from litellm.litellm_core_utils.core_helpers import map_finish_reason
|
||||
from litellm.litellm_core_utils.litellm_logging import Logging
|
||||
from litellm.litellm_core_utils.logging_utils import track_llm_api_timing
|
||||
from litellm.litellm_core_utils.prompt_templates.factory import (
|
||||
cohere_message_pt,
|
||||
construct_tool_use_system_prompt,
|
||||
|
@ -171,7 +172,7 @@ async def make_call(
|
|||
data: str,
|
||||
model: str,
|
||||
messages: list,
|
||||
logging_obj,
|
||||
logging_obj: Logging,
|
||||
fake_stream: bool = False,
|
||||
json_mode: Optional[bool] = False,
|
||||
):
|
||||
|
@ -186,6 +187,7 @@ async def make_call(
|
|||
headers=headers,
|
||||
data=data,
|
||||
stream=not fake_stream,
|
||||
logging_obj=logging_obj,
|
||||
)
|
||||
|
||||
if response.status_code != 200:
|
||||
|
@ -577,7 +579,7 @@ class BedrockLLM(BaseAWSLLM):
|
|||
model_response: ModelResponse,
|
||||
print_verbose: Callable,
|
||||
encoding,
|
||||
logging_obj,
|
||||
logging_obj: Logging,
|
||||
optional_params: dict,
|
||||
acompletion: bool,
|
||||
timeout: Optional[Union[float, httpx.Timeout]],
|
||||
|
@ -890,6 +892,7 @@ class BedrockLLM(BaseAWSLLM):
|
|||
headers=prepped.headers, # type: ignore
|
||||
data=data,
|
||||
stream=stream,
|
||||
logging_obj=logging_obj,
|
||||
)
|
||||
|
||||
if response.status_code != 200:
|
||||
|
@ -917,7 +920,12 @@ class BedrockLLM(BaseAWSLLM):
|
|||
return streaming_response
|
||||
|
||||
try:
|
||||
response = self.client.post(url=proxy_endpoint_url, headers=prepped.headers, data=data) # type: ignore
|
||||
response = self.client.post(
|
||||
url=proxy_endpoint_url,
|
||||
headers=dict(prepped.headers),
|
||||
data=data,
|
||||
logging_obj=logging_obj,
|
||||
)
|
||||
response.raise_for_status()
|
||||
except httpx.HTTPStatusError as err:
|
||||
error_code = err.response.status_code
|
||||
|
@ -949,7 +957,7 @@ class BedrockLLM(BaseAWSLLM):
|
|||
data: str,
|
||||
timeout: Optional[Union[float, httpx.Timeout]],
|
||||
encoding,
|
||||
logging_obj,
|
||||
logging_obj: Logging,
|
||||
stream,
|
||||
optional_params: dict,
|
||||
litellm_params=None,
|
||||
|
@ -968,7 +976,13 @@ class BedrockLLM(BaseAWSLLM):
|
|||
client = client # type: ignore
|
||||
|
||||
try:
|
||||
response = await client.post(api_base, headers=headers, data=data) # type: ignore
|
||||
response = await client.post(
|
||||
api_base,
|
||||
headers=headers,
|
||||
data=data,
|
||||
timeout=timeout,
|
||||
logging_obj=logging_obj,
|
||||
)
|
||||
response.raise_for_status()
|
||||
except httpx.HTTPStatusError as err:
|
||||
error_code = err.response.status_code
|
||||
|
@ -990,6 +1004,7 @@ class BedrockLLM(BaseAWSLLM):
|
|||
encoding=encoding,
|
||||
)
|
||||
|
||||
@track_llm_api_timing() # for streaming, we need to instrument the function calling the wrapper
|
||||
async def async_streaming(
|
||||
self,
|
||||
model: str,
|
||||
|
@ -1000,7 +1015,7 @@ class BedrockLLM(BaseAWSLLM):
|
|||
data: str,
|
||||
timeout: Optional[Union[float, httpx.Timeout]],
|
||||
encoding,
|
||||
logging_obj,
|
||||
logging_obj: Logging,
|
||||
stream,
|
||||
optional_params: dict,
|
||||
litellm_params=None,
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue