[Feat] Expose Responses API on LiteLLM UI Test Key Page (#10166)

* add /responses API on UI

* add makeOpenAIResponsesRequest

* add makeOpenAIResponsesRequest

* fix add responses API on UI

* fix endpoint selector

* responses API render chunks on litellm chat ui

* fixes to streaming iterator

* fix render responses completed events

* fixes for MockResponsesAPIStreamingIterator

* transform_responses_api_request_to_chat_completion_request

* fix for responses API

* test_basic_openai_responses_api_streaming

* fix base responses api tests
This commit is contained in:
Ishaan Jaff 2025-04-19 13:18:54 -07:00 committed by GitHub
parent 03b5399f86
commit 0717369ae6
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
8 changed files with 332 additions and 52 deletions

View file

@ -11,7 +11,9 @@ from litellm.litellm_core_utils.litellm_logging import Logging as LiteLLMLogging
from litellm.litellm_core_utils.thread_pool_executor import executor
from litellm.llms.base_llm.responses.transformation import BaseResponsesAPIConfig
from litellm.types.llms.openai import (
OutputTextDeltaEvent,
ResponseCompletedEvent,
ResponsesAPIResponse,
ResponsesAPIStreamEvents,
ResponsesAPIStreamingResponse,
)
@ -212,9 +214,14 @@ class SyncResponsesAPIStreamingIterator(BaseResponsesAPIStreamingIterator):
class MockResponsesAPIStreamingIterator(BaseResponsesAPIStreamingIterator):
"""
mock iterator - some models like o1-pro do not support streaming, we need to fake a stream
Mock iteratorfake a stream by slicing the full response text into
5 char deltas, then emit a completed event.
Models like o1-pro don't support streaming, so we fake it.
"""
CHUNK_SIZE = 5
def __init__(
self,
response: httpx.Response,
@ -222,49 +229,68 @@ class MockResponsesAPIStreamingIterator(BaseResponsesAPIStreamingIterator):
responses_api_provider_config: BaseResponsesAPIConfig,
logging_obj: LiteLLMLoggingObj,
):
self.raw_http_response = response
super().__init__(
response=response,
model=model,
responses_api_provider_config=responses_api_provider_config,
logging_obj=logging_obj,
)
self.is_done = False
# one-time transform
transformed = (
self.responses_api_provider_config.transform_response_api_response(
model=self.model,
raw_response=response,
logging_obj=logging_obj,
)
)
full_text = self._collect_text(transformed)
# build a list of 5char delta events
deltas = [
OutputTextDeltaEvent(
type=ResponsesAPIStreamEvents.OUTPUT_TEXT_DELTA,
delta=full_text[i : i + self.CHUNK_SIZE],
item_id=transformed.id,
output_index=0,
content_index=0,
)
for i in range(0, len(full_text), self.CHUNK_SIZE)
]
# append the completed event
self._events = deltas + [
ResponseCompletedEvent(
type=ResponsesAPIStreamEvents.RESPONSE_COMPLETED,
response=transformed,
)
]
self._idx = 0
def __aiter__(self):
return self
async def __anext__(self) -> ResponsesAPIStreamingResponse:
if self.is_done:
if self._idx >= len(self._events):
raise StopAsyncIteration
self.is_done = True
transformed_response = (
self.responses_api_provider_config.transform_response_api_response(
model=self.model,
raw_response=self.raw_http_response,
logging_obj=self.logging_obj,
)
)
return ResponseCompletedEvent(
type=ResponsesAPIStreamEvents.RESPONSE_COMPLETED,
response=transformed_response,
)
evt = self._events[self._idx]
self._idx += 1
return evt
def __iter__(self):
return self
def __next__(self) -> ResponsesAPIStreamingResponse:
if self.is_done:
if self._idx >= len(self._events):
raise StopIteration
self.is_done = True
transformed_response = (
self.responses_api_provider_config.transform_response_api_response(
model=self.model,
raw_response=self.raw_http_response,
logging_obj=self.logging_obj,
)
)
return ResponseCompletedEvent(
type=ResponsesAPIStreamEvents.RESPONSE_COMPLETED,
response=transformed_response,
)
evt = self._events[self._idx]
self._idx += 1
return evt
def _collect_text(self, resp: ResponsesAPIResponse) -> str:
out = ""
for out_item in resp.output:
if out_item.type == "message":
for c in getattr(out_item, "content", []):
out += c.text
return out