diff --git a/litellm/constants.py b/litellm/constants.py index 0288c45e40..b4551a78f5 100644 --- a/litellm/constants.py +++ b/litellm/constants.py @@ -18,6 +18,7 @@ SINGLE_DEPLOYMENT_TRAFFIC_FAILURE_THRESHOLD = 1000 # Minimum number of requests REPEATED_STREAMING_CHUNK_LIMIT = 100 # catch if model starts looping the same chunk while streaming. Uses high default to prevent false positives. #### Networking settings #### request_timeout: float = 6000 # time in seconds +STREAM_SSE_DONE_STRING: str = "[DONE]" LITELLM_CHAT_PROVIDERS = [ "openai", diff --git a/litellm/responses/streaming_iterator.py b/litellm/responses/streaming_iterator.py index 13112e3647..7325e4e645 100644 --- a/litellm/responses/streaming_iterator.py +++ b/litellm/responses/streaming_iterator.py @@ -5,17 +5,15 @@ from typing import Any, AsyncIterator, Dict, Optional, Union import httpx +from litellm.constants import STREAM_SSE_DONE_STRING from litellm.litellm_core_utils.litellm_logging import Logging as LiteLLMLoggingObj from litellm.llms.base_llm.responses.transformation import BaseResponsesAPIConfig from litellm.types.llms.openai import ( - ResponsesAPIResponse, ResponsesAPIStreamEvents, ResponsesAPIStreamingResponse, ) from litellm.utils import CustomStreamWrapper -COMPLETED_OPENAI_CHUNK_TYPE = "response.completed" - class BaseResponsesAPIStreamingIterator: """ @@ -50,7 +48,7 @@ class BaseResponsesAPIStreamingIterator: return None # Handle "[DONE]" marker - if chunk == "[DONE]": + if chunk == STREAM_SSE_DONE_STRING: self.finished = True return None