forked from phoenix/litellm-mirror
Merge pull request #4650 from msabramo/litellm_call_id_in_response
Proxy: Add `x-litellm-call-id` HTTP response header
This commit is contained in:
commit
533d2dba0b
2 changed files with 39 additions and 0 deletions
|
@ -14,6 +14,39 @@ import Image from '@theme/IdealImage';
|
|||
import Tabs from '@theme/Tabs';
|
||||
import TabItem from '@theme/TabItem';
|
||||
|
||||
## Getting the LiteLLM Call ID
|
||||
|
||||
LiteLLM generates a unique `call_id` for each request. This `call_id` can be
|
||||
used to track the request across the system. This can be very useful for finding
|
||||
the info for a particular request in a logging system like one of the systems
|
||||
mentioned in this page.
|
||||
|
||||
```shell
|
||||
curl -i -sSL --location 'http://0.0.0.0:4000/chat/completions' \
|
||||
--header 'Authorization: Bearer sk-1234' \
|
||||
--header 'Content-Type: application/json' \
|
||||
--data '{
|
||||
"model": "gpt-3.5-turbo",
|
||||
"messages": [{"role": "user", "content": "what llm are you"}]
|
||||
}' | grep 'x-litellm'
|
||||
```
|
||||
|
||||
The output of this is:
|
||||
|
||||
```output
|
||||
x-litellm-call-id: b980db26-9512-45cc-b1da-c511a363b83f
|
||||
x-litellm-model-id: cb41bc03f4c33d310019bae8c5afdb1af0a8f97b36a234405a9807614988457c
|
||||
x-litellm-model-api-base: https://x-example-1234.openai.azure.com
|
||||
x-litellm-version: 1.40.21
|
||||
x-litellm-response-cost: 2.85e-05
|
||||
x-litellm-key-tpm-limit: None
|
||||
x-litellm-key-rpm-limit: None
|
||||
```
|
||||
|
||||
A number of these headers could be useful for troubleshooting, but the
|
||||
`x-litellm-call-id` is the one that is most useful for tracking a request across
|
||||
components in your system, including in logging tools.
|
||||
|
||||
## Logging Proxy Input/Output - Langfuse
|
||||
|
||||
We will use the `--config` to set `litellm.success_callback = ["langfuse"]` this will log all successfull LLM calls to langfuse. Make sure to set `LANGFUSE_PUBLIC_KEY` and `LANGFUSE_SECRET_KEY` in your environment
|
||||
|
|
|
@ -446,6 +446,7 @@ def _get_pydantic_json_dict(pydantic_obj: BaseModel) -> dict:
|
|||
def get_custom_headers(
|
||||
*,
|
||||
user_api_key_dict: UserAPIKeyAuth,
|
||||
call_id: Optional[str] = None,
|
||||
model_id: Optional[str] = None,
|
||||
cache_key: Optional[str] = None,
|
||||
api_base: Optional[str] = None,
|
||||
|
@ -457,6 +458,7 @@ def get_custom_headers(
|
|||
) -> dict:
|
||||
exclude_values = {"", None}
|
||||
headers = {
|
||||
"x-litellm-call-id": call_id,
|
||||
"x-litellm-model-id": model_id,
|
||||
"x-litellm-cache-key": cache_key,
|
||||
"x-litellm-model-api-base": api_base,
|
||||
|
@ -2902,6 +2904,7 @@ async def chat_completion(
|
|||
): # use generate_responses to stream responses
|
||||
custom_headers = get_custom_headers(
|
||||
user_api_key_dict=user_api_key_dict,
|
||||
call_id=logging_obj.litellm_call_id,
|
||||
model_id=model_id,
|
||||
cache_key=cache_key,
|
||||
api_base=api_base,
|
||||
|
@ -2932,6 +2935,7 @@ async def chat_completion(
|
|||
fastapi_response.headers.update(
|
||||
get_custom_headers(
|
||||
user_api_key_dict=user_api_key_dict,
|
||||
call_id=logging_obj.litellm_call_id,
|
||||
model_id=model_id,
|
||||
cache_key=cache_key,
|
||||
api_base=api_base,
|
||||
|
@ -3145,6 +3149,7 @@ async def completion(
|
|||
): # use generate_responses to stream responses
|
||||
custom_headers = get_custom_headers(
|
||||
user_api_key_dict=user_api_key_dict,
|
||||
call_id=logging_obj.litellm_call_id,
|
||||
model_id=model_id,
|
||||
cache_key=cache_key,
|
||||
api_base=api_base,
|
||||
|
@ -3165,6 +3170,7 @@ async def completion(
|
|||
fastapi_response.headers.update(
|
||||
get_custom_headers(
|
||||
user_api_key_dict=user_api_key_dict,
|
||||
call_id=logging_obj.litellm_call_id,
|
||||
model_id=model_id,
|
||||
cache_key=cache_key,
|
||||
api_base=api_base,
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue