Litellm dev 03 04 2025 p3 (#8997)

* fix(core_helpers.py): handle litellm_metadata instead of 'metadata'

* feat(batches/): ensure batches logs are written to db

makes batches response dict compatible

* fix(cost_calculator.py): handle batch response being a dictionary

* fix(batches/main.py): modify retrieve endpoints to use @client decorator

enables logging to work on retrieve call

* fix(batches/main.py): fix retrieve batch response type to be 'dict' compatible

* fix(spend_tracking_utils.py): send unique uuid for retrieve batch call type

create batch and retrieve batch share the same id

* fix(spend_tracking_utils.py): prevent duplicate retrieve batch calls from being double counted

* refactor(batches/): refactor cost tracking for batches - do it on retrieve, and within the established litellm_logging pipeline

ensures cost is always logged to db

* fix: fix linting errors

* fix: fix linting error
This commit is contained in:
Krish Dholakia 2025-03-04 21:58:03 -08:00 committed by GitHub
parent f2a9d67e05
commit b43b8dc21c
17 changed files with 314 additions and 219 deletions

View file

@ -37,6 +37,7 @@ from litellm.llms.custom_httpx.http_handler import _DEFAULT_TTL_FOR_HTTPX_CLIENT
from litellm.types.utils import (
EmbeddingResponse,
ImageResponse,
LiteLLMBatch,
ModelResponse,
ModelResponseStream,
)
@ -1755,9 +1756,9 @@ class OpenAIBatchesAPI(BaseLLM):
self,
create_batch_data: CreateBatchRequest,
openai_client: AsyncOpenAI,
) -> Batch:
) -> LiteLLMBatch:
response = await openai_client.batches.create(**create_batch_data)
return response
return LiteLLMBatch(**response.model_dump())
def create_batch(
self,
@ -1769,7 +1770,7 @@ class OpenAIBatchesAPI(BaseLLM):
max_retries: Optional[int],
organization: Optional[str],
client: Optional[Union[OpenAI, AsyncOpenAI]] = None,
) -> Union[Batch, Coroutine[Any, Any, Batch]]:
) -> Union[LiteLLMBatch, Coroutine[Any, Any, LiteLLMBatch]]:
openai_client: Optional[Union[OpenAI, AsyncOpenAI]] = self.get_openai_client(
api_key=api_key,
api_base=api_base,
@ -1792,17 +1793,18 @@ class OpenAIBatchesAPI(BaseLLM):
return self.acreate_batch( # type: ignore
create_batch_data=create_batch_data, openai_client=openai_client
)
response = openai_client.batches.create(**create_batch_data)
return response
response = cast(OpenAI, openai_client).batches.create(**create_batch_data)
return LiteLLMBatch(**response.model_dump())
async def aretrieve_batch(
self,
retrieve_batch_data: RetrieveBatchRequest,
openai_client: AsyncOpenAI,
) -> Batch:
) -> LiteLLMBatch:
verbose_logger.debug("retrieving batch, args= %s", retrieve_batch_data)
response = await openai_client.batches.retrieve(**retrieve_batch_data)
return response
return LiteLLMBatch(**response.model_dump())
def retrieve_batch(
self,
@ -1837,8 +1839,8 @@ class OpenAIBatchesAPI(BaseLLM):
return self.aretrieve_batch( # type: ignore
retrieve_batch_data=retrieve_batch_data, openai_client=openai_client
)
response = openai_client.batches.retrieve(**retrieve_batch_data)
return response
response = cast(OpenAI, openai_client).batches.retrieve(**retrieve_batch_data)
return LiteLLMBatch(**response.model_dump())
async def acancel_batch(
self,