working streaming logging

This commit is contained in:
Ishaan Jaff 2025-03-12 00:02:39 -07:00
parent 4ff6e41c15
commit fde75a068a
3 changed files with 49 additions and 36 deletions

View file

@ -45,6 +45,7 @@ from litellm.types.llms.openai import (
Batch, Batch,
FineTuningJob, FineTuningJob,
HttpxBinaryResponseContent, HttpxBinaryResponseContent,
ResponseCompletedEvent,
ResponsesAPIResponse, ResponsesAPIResponse,
) )
from litellm.types.rerank import RerankResponse from litellm.types.rerank import RerankResponse
@ -854,6 +855,7 @@ class Logging(LiteLLMLoggingBaseClass):
Batch, Batch,
FineTuningJob, FineTuningJob,
ResponsesAPIResponse, ResponsesAPIResponse,
ResponseCompletedEvent,
], ],
cache_hit: Optional[bool] = None, cache_hit: Optional[bool] = None,
) -> Optional[float]: ) -> Optional[float]:
@ -1000,9 +1002,7 @@ class Logging(LiteLLMLoggingBaseClass):
## if model in model cost map - log the response cost ## if model in model cost map - log the response cost
## else set cost to None ## else set cost to None
if ( if (
standard_logging_object is None standard_logging_object is None and result is not None
and result is not None
and self.stream is not True
): # handle streaming separately ): # handle streaming separately
if ( if (
isinstance(result, ModelResponse) isinstance(result, ModelResponse)
@ -1016,6 +1016,7 @@ class Logging(LiteLLMLoggingBaseClass):
or isinstance(result, FineTuningJob) or isinstance(result, FineTuningJob)
or isinstance(result, LiteLLMBatch) or isinstance(result, LiteLLMBatch)
or isinstance(result, ResponsesAPIResponse) or isinstance(result, ResponsesAPIResponse)
or isinstance(result, ResponseCompletedEvent)
): ):
## HIDDEN PARAMS ## ## HIDDEN PARAMS ##
hidden_params = getattr(result, "_hidden_params", {}) hidden_params = getattr(result, "_hidden_params", {})

View file

@ -9,6 +9,7 @@ from litellm.litellm_core_utils.litellm_logging import Logging as LiteLLMLogging
from litellm.llms.base_llm.responses.transformation import BaseResponsesAPIConfig from litellm.llms.base_llm.responses.transformation import BaseResponsesAPIConfig
from litellm.types.llms.openai import ( from litellm.types.llms.openai import (
ResponsesAPIResponse, ResponsesAPIResponse,
ResponsesAPIStreamEvents,
ResponsesAPIStreamingResponse, ResponsesAPIStreamingResponse,
) )
from litellm.utils import CustomStreamWrapper from litellm.utils import CustomStreamWrapper
@ -82,21 +83,20 @@ class ResponsesAPIStreamingIterator:
if ( if (
openai_responses_api_chunk openai_responses_api_chunk
and openai_responses_api_chunk.type and openai_responses_api_chunk.type
== COMPLETED_OPENAI_CHUNK_TYPE == ResponsesAPIStreamEvents.RESPONSE_COMPLETED
): ):
self.completed_response = openai_responses_api_chunk self.completed_response = openai_responses_api_chunk
await self.logging_obj.async_success_handler( asyncio.create_task(
result=self.completed_response, self.logging_obj.async_success_handler(
start_time=self.start_time, result=self.completed_response,
end_time=datetime.now(), start_time=self.start_time,
cache_hit=None, end_time=datetime.now(),
cache_hit=None,
)
) )
return openai_responses_api_chunk return openai_responses_api_chunk
return ResponsesAPIStreamingResponse( return await self.__anext__()
type="response", response=parsed_chunk
)
except json.JSONDecodeError: except json.JSONDecodeError:
# If we can't parse the chunk, continue to the next one # If we can't parse the chunk, continue to the next one
return await self.__anext__() return await self.__anext__()

View file

@ -852,44 +852,56 @@ class ResponsesAPIStreamEvents(str, Enum):
# Base streaming response types # Base streaming response types
class ResponseCreatedEvent(BaseModel): class BaseResponseAPIStreamEvent(BaseModel):
def __getitem__(self, key):
return self.__dict__[key]
def get(self, key, default=None):
return self.__dict__.get(key, default)
def __contains__(self, key):
return key in self.__dict__
class ResponseCreatedEvent(BaseResponseAPIStreamEvent):
type: Literal[ResponsesAPIStreamEvents.RESPONSE_CREATED] type: Literal[ResponsesAPIStreamEvents.RESPONSE_CREATED]
response: ResponsesAPIResponse response: ResponsesAPIResponse
class ResponseInProgressEvent(BaseModel): class ResponseInProgressEvent(BaseResponseAPIStreamEvent):
type: Literal[ResponsesAPIStreamEvents.RESPONSE_IN_PROGRESS] type: Literal[ResponsesAPIStreamEvents.RESPONSE_IN_PROGRESS]
response: ResponsesAPIResponse response: ResponsesAPIResponse
class ResponseCompletedEvent(BaseModel): class ResponseCompletedEvent(BaseResponseAPIStreamEvent):
type: Literal[ResponsesAPIStreamEvents.RESPONSE_COMPLETED] type: Literal[ResponsesAPIStreamEvents.RESPONSE_COMPLETED]
response: ResponsesAPIResponse response: ResponsesAPIResponse
_hidden_params: dict = PrivateAttr(default_factory=dict)
class ResponseFailedEvent(BaseModel): class ResponseFailedEvent(BaseResponseAPIStreamEvent):
type: Literal[ResponsesAPIStreamEvents.RESPONSE_FAILED] type: Literal[ResponsesAPIStreamEvents.RESPONSE_FAILED]
response: ResponsesAPIResponse response: ResponsesAPIResponse
class ResponseIncompleteEvent(BaseModel): class ResponseIncompleteEvent(BaseResponseAPIStreamEvent):
type: Literal[ResponsesAPIStreamEvents.RESPONSE_INCOMPLETE] type: Literal[ResponsesAPIStreamEvents.RESPONSE_INCOMPLETE]
response: ResponsesAPIResponse response: ResponsesAPIResponse
class OutputItemAddedEvent(BaseModel): class OutputItemAddedEvent(BaseResponseAPIStreamEvent):
type: Literal[ResponsesAPIStreamEvents.OUTPUT_ITEM_ADDED] type: Literal[ResponsesAPIStreamEvents.OUTPUT_ITEM_ADDED]
output_index: int output_index: int
item: dict item: dict
class OutputItemDoneEvent(BaseModel): class OutputItemDoneEvent(BaseResponseAPIStreamEvent):
type: Literal[ResponsesAPIStreamEvents.OUTPUT_ITEM_DONE] type: Literal[ResponsesAPIStreamEvents.OUTPUT_ITEM_DONE]
output_index: int output_index: int
item: dict item: dict
class ContentPartAddedEvent(BaseModel): class ContentPartAddedEvent(BaseResponseAPIStreamEvent):
type: Literal[ResponsesAPIStreamEvents.CONTENT_PART_ADDED] type: Literal[ResponsesAPIStreamEvents.CONTENT_PART_ADDED]
item_id: str item_id: str
output_index: int output_index: int
@ -897,7 +909,7 @@ class ContentPartAddedEvent(BaseModel):
part: dict part: dict
class ContentPartDoneEvent(BaseModel): class ContentPartDoneEvent(BaseResponseAPIStreamEvent):
type: Literal[ResponsesAPIStreamEvents.CONTENT_PART_DONE] type: Literal[ResponsesAPIStreamEvents.CONTENT_PART_DONE]
item_id: str item_id: str
output_index: int output_index: int
@ -905,7 +917,7 @@ class ContentPartDoneEvent(BaseModel):
part: dict part: dict
class OutputTextDeltaEvent(BaseModel): class OutputTextDeltaEvent(BaseResponseAPIStreamEvent):
type: Literal[ResponsesAPIStreamEvents.OUTPUT_TEXT_DELTA] type: Literal[ResponsesAPIStreamEvents.OUTPUT_TEXT_DELTA]
item_id: str item_id: str
output_index: int output_index: int
@ -913,7 +925,7 @@ class OutputTextDeltaEvent(BaseModel):
delta: str delta: str
class OutputTextAnnotationAddedEvent(BaseModel): class OutputTextAnnotationAddedEvent(BaseResponseAPIStreamEvent):
type: Literal[ResponsesAPIStreamEvents.OUTPUT_TEXT_ANNOTATION_ADDED] type: Literal[ResponsesAPIStreamEvents.OUTPUT_TEXT_ANNOTATION_ADDED]
item_id: str item_id: str
output_index: int output_index: int
@ -922,7 +934,7 @@ class OutputTextAnnotationAddedEvent(BaseModel):
annotation: dict annotation: dict
class OutputTextDoneEvent(BaseModel): class OutputTextDoneEvent(BaseResponseAPIStreamEvent):
type: Literal[ResponsesAPIStreamEvents.OUTPUT_TEXT_DONE] type: Literal[ResponsesAPIStreamEvents.OUTPUT_TEXT_DONE]
item_id: str item_id: str
output_index: int output_index: int
@ -930,7 +942,7 @@ class OutputTextDoneEvent(BaseModel):
text: str text: str
class RefusalDeltaEvent(BaseModel): class RefusalDeltaEvent(BaseResponseAPIStreamEvent):
type: Literal[ResponsesAPIStreamEvents.REFUSAL_DELTA] type: Literal[ResponsesAPIStreamEvents.REFUSAL_DELTA]
item_id: str item_id: str
output_index: int output_index: int
@ -938,7 +950,7 @@ class RefusalDeltaEvent(BaseModel):
delta: str delta: str
class RefusalDoneEvent(BaseModel): class RefusalDoneEvent(BaseResponseAPIStreamEvent):
type: Literal[ResponsesAPIStreamEvents.REFUSAL_DONE] type: Literal[ResponsesAPIStreamEvents.REFUSAL_DONE]
item_id: str item_id: str
output_index: int output_index: int
@ -946,57 +958,57 @@ class RefusalDoneEvent(BaseModel):
refusal: str refusal: str
class FunctionCallArgumentsDeltaEvent(BaseModel): class FunctionCallArgumentsDeltaEvent(BaseResponseAPIStreamEvent):
type: Literal[ResponsesAPIStreamEvents.FUNCTION_CALL_ARGUMENTS_DELTA] type: Literal[ResponsesAPIStreamEvents.FUNCTION_CALL_ARGUMENTS_DELTA]
item_id: str item_id: str
output_index: int output_index: int
delta: str delta: str
class FunctionCallArgumentsDoneEvent(BaseModel): class FunctionCallArgumentsDoneEvent(BaseResponseAPIStreamEvent):
type: Literal[ResponsesAPIStreamEvents.FUNCTION_CALL_ARGUMENTS_DONE] type: Literal[ResponsesAPIStreamEvents.FUNCTION_CALL_ARGUMENTS_DONE]
item_id: str item_id: str
output_index: int output_index: int
arguments: str arguments: str
class FileSearchCallInProgressEvent(BaseModel): class FileSearchCallInProgressEvent(BaseResponseAPIStreamEvent):
type: Literal[ResponsesAPIStreamEvents.FILE_SEARCH_CALL_IN_PROGRESS] type: Literal[ResponsesAPIStreamEvents.FILE_SEARCH_CALL_IN_PROGRESS]
output_index: int output_index: int
item_id: str item_id: str
class FileSearchCallSearchingEvent(BaseModel): class FileSearchCallSearchingEvent(BaseResponseAPIStreamEvent):
type: Literal[ResponsesAPIStreamEvents.FILE_SEARCH_CALL_SEARCHING] type: Literal[ResponsesAPIStreamEvents.FILE_SEARCH_CALL_SEARCHING]
output_index: int output_index: int
item_id: str item_id: str
class FileSearchCallCompletedEvent(BaseModel): class FileSearchCallCompletedEvent(BaseResponseAPIStreamEvent):
type: Literal[ResponsesAPIStreamEvents.FILE_SEARCH_CALL_COMPLETED] type: Literal[ResponsesAPIStreamEvents.FILE_SEARCH_CALL_COMPLETED]
output_index: int output_index: int
item_id: str item_id: str
class WebSearchCallInProgressEvent(BaseModel): class WebSearchCallInProgressEvent(BaseResponseAPIStreamEvent):
type: Literal[ResponsesAPIStreamEvents.WEB_SEARCH_CALL_IN_PROGRESS] type: Literal[ResponsesAPIStreamEvents.WEB_SEARCH_CALL_IN_PROGRESS]
output_index: int output_index: int
item_id: str item_id: str
class WebSearchCallSearchingEvent(BaseModel): class WebSearchCallSearchingEvent(BaseResponseAPIStreamEvent):
type: Literal[ResponsesAPIStreamEvents.WEB_SEARCH_CALL_SEARCHING] type: Literal[ResponsesAPIStreamEvents.WEB_SEARCH_CALL_SEARCHING]
output_index: int output_index: int
item_id: str item_id: str
class WebSearchCallCompletedEvent(BaseModel): class WebSearchCallCompletedEvent(BaseResponseAPIStreamEvent):
type: Literal[ResponsesAPIStreamEvents.WEB_SEARCH_CALL_COMPLETED] type: Literal[ResponsesAPIStreamEvents.WEB_SEARCH_CALL_COMPLETED]
output_index: int output_index: int
item_id: str item_id: str
class ErrorEvent(BaseModel): class ErrorEvent(BaseResponseAPIStreamEvent):
type: Literal[ResponsesAPIStreamEvents.ERROR] type: Literal[ResponsesAPIStreamEvents.ERROR]
code: Optional[str] code: Optional[str]
message: str message: str