Litellm dev 03 04 2025 p3 (#8997)

* fix(core_helpers.py): handle litellm_metadata instead of 'metadata'

* feat(batches/): ensure batches logs are written to db

makes batches response dict compatible

* fix(cost_calculator.py): handle batch response being a dictionary

* fix(batches/main.py): modify retrieve endpoints to use @client decorator

enables logging to work on retrieve call

* fix(batches/main.py): fix retrieve batch response type to be 'dict' compatible

* fix(spend_tracking_utils.py): send unique uuid for retrieve batch call type

create batch and retrieve batch share the same id

* fix(spend_tracking_utils.py): prevent duplicate retrieve batch calls from being double counted

* refactor(batches/): refactor cost tracking for batches - do it on retrieve, and within the established litellm_logging pipeline

ensures cost is always logged to db

* fix: fix linting errors

* fix: fix linting error
This commit is contained in:
Krish Dholakia 2025-03-04 21:58:03 -08:00 committed by GitHub
parent f2a9d67e05
commit b43b8dc21c
17 changed files with 314 additions and 219 deletions

View file

@ -2,10 +2,10 @@
# /v1/batches Endpoints
import asyncio
######################################################################
from typing import Dict, Optional
import asyncio
from typing import Dict, Optional, cast
from fastapi import APIRouter, Depends, HTTPException, Path, Request, Response
@ -199,8 +199,11 @@ async def retrieve_batch(
```
"""
from litellm.proxy.proxy_server import (
add_litellm_data_to_request,
general_settings,
get_custom_headers,
llm_router,
proxy_config,
proxy_logging_obj,
version,
)
@ -212,6 +215,23 @@ async def retrieve_batch(
batch_id=batch_id,
)
data = cast(dict, _retrieve_batch_request)
# setup logging
data["litellm_call_id"] = request.headers.get(
"x-litellm-call-id", str(uuid.uuid4())
)
# Include original request and headers in the data
data = await add_litellm_data_to_request(
data=data,
request=request,
general_settings=general_settings,
user_api_key_dict=user_api_key_dict,
version=version,
proxy_config=proxy_config,
)
if litellm.enable_loadbalancing_on_batch_endpoints is True:
if llm_router is None:
raise HTTPException(
@ -221,7 +241,7 @@ async def retrieve_batch(
},
)
response = await llm_router.aretrieve_batch(**_retrieve_batch_request) # type: ignore
response = await llm_router.aretrieve_batch(**data) # type: ignore
else:
custom_llm_provider = (
provider
@ -229,7 +249,7 @@ async def retrieve_batch(
or "openai"
)
response = await litellm.aretrieve_batch(
custom_llm_provider=custom_llm_provider, **_retrieve_batch_request # type: ignore
custom_llm_provider=custom_llm_provider, **data # type: ignore
)
### ALERTING ###