mirror of
https://github.com/BerriAI/litellm.git
synced 2025-04-25 18:54:30 +00:00
[Feat] Expose Responses API on LiteLLM UI Test Key Page (#10166)
* add /responses API on UI * add makeOpenAIResponsesRequest * add makeOpenAIResponsesRequest * fix add responses API on UI * fix endpoint selector * responses API render chunks on litellm chat ui * fixes to streaming iterator * fix render responses completed events * fixes for MockResponsesAPIStreamingIterator * transform_responses_api_request_to_chat_completion_request * fix for responses API * test_basic_openai_responses_api_streaming * fix base responses api tests
This commit is contained in:
parent
03b5399f86
commit
0717369ae6
8 changed files with 332 additions and 52 deletions
|
@ -11,7 +11,9 @@ from litellm.litellm_core_utils.litellm_logging import Logging as LiteLLMLogging
|
|||
from litellm.litellm_core_utils.thread_pool_executor import executor
|
||||
from litellm.llms.base_llm.responses.transformation import BaseResponsesAPIConfig
|
||||
from litellm.types.llms.openai import (
|
||||
OutputTextDeltaEvent,
|
||||
ResponseCompletedEvent,
|
||||
ResponsesAPIResponse,
|
||||
ResponsesAPIStreamEvents,
|
||||
ResponsesAPIStreamingResponse,
|
||||
)
|
||||
|
@ -212,9 +214,14 @@ class SyncResponsesAPIStreamingIterator(BaseResponsesAPIStreamingIterator):
|
|||
|
||||
class MockResponsesAPIStreamingIterator(BaseResponsesAPIStreamingIterator):
|
||||
"""
|
||||
mock iterator - some models like o1-pro do not support streaming, we need to fake a stream
|
||||
Mock iterator—fake a stream by slicing the full response text into
|
||||
5 char deltas, then emit a completed event.
|
||||
|
||||
Models like o1-pro don't support streaming, so we fake it.
|
||||
"""
|
||||
|
||||
CHUNK_SIZE = 5
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
response: httpx.Response,
|
||||
|
@ -222,49 +229,68 @@ class MockResponsesAPIStreamingIterator(BaseResponsesAPIStreamingIterator):
|
|||
responses_api_provider_config: BaseResponsesAPIConfig,
|
||||
logging_obj: LiteLLMLoggingObj,
|
||||
):
|
||||
self.raw_http_response = response
|
||||
super().__init__(
|
||||
response=response,
|
||||
model=model,
|
||||
responses_api_provider_config=responses_api_provider_config,
|
||||
logging_obj=logging_obj,
|
||||
)
|
||||
self.is_done = False
|
||||
|
||||
# one-time transform
|
||||
transformed = (
|
||||
self.responses_api_provider_config.transform_response_api_response(
|
||||
model=self.model,
|
||||
raw_response=response,
|
||||
logging_obj=logging_obj,
|
||||
)
|
||||
)
|
||||
full_text = self._collect_text(transformed)
|
||||
|
||||
# build a list of 5‑char delta events
|
||||
deltas = [
|
||||
OutputTextDeltaEvent(
|
||||
type=ResponsesAPIStreamEvents.OUTPUT_TEXT_DELTA,
|
||||
delta=full_text[i : i + self.CHUNK_SIZE],
|
||||
item_id=transformed.id,
|
||||
output_index=0,
|
||||
content_index=0,
|
||||
)
|
||||
for i in range(0, len(full_text), self.CHUNK_SIZE)
|
||||
]
|
||||
|
||||
# append the completed event
|
||||
self._events = deltas + [
|
||||
ResponseCompletedEvent(
|
||||
type=ResponsesAPIStreamEvents.RESPONSE_COMPLETED,
|
||||
response=transformed,
|
||||
)
|
||||
]
|
||||
self._idx = 0
|
||||
|
||||
def __aiter__(self):
|
||||
return self
|
||||
|
||||
async def __anext__(self) -> ResponsesAPIStreamingResponse:
|
||||
if self.is_done:
|
||||
if self._idx >= len(self._events):
|
||||
raise StopAsyncIteration
|
||||
self.is_done = True
|
||||
transformed_response = (
|
||||
self.responses_api_provider_config.transform_response_api_response(
|
||||
model=self.model,
|
||||
raw_response=self.raw_http_response,
|
||||
logging_obj=self.logging_obj,
|
||||
)
|
||||
)
|
||||
return ResponseCompletedEvent(
|
||||
type=ResponsesAPIStreamEvents.RESPONSE_COMPLETED,
|
||||
response=transformed_response,
|
||||
)
|
||||
evt = self._events[self._idx]
|
||||
self._idx += 1
|
||||
return evt
|
||||
|
||||
def __iter__(self):
|
||||
return self
|
||||
|
||||
def __next__(self) -> ResponsesAPIStreamingResponse:
|
||||
if self.is_done:
|
||||
if self._idx >= len(self._events):
|
||||
raise StopIteration
|
||||
self.is_done = True
|
||||
transformed_response = (
|
||||
self.responses_api_provider_config.transform_response_api_response(
|
||||
model=self.model,
|
||||
raw_response=self.raw_http_response,
|
||||
logging_obj=self.logging_obj,
|
||||
)
|
||||
)
|
||||
return ResponseCompletedEvent(
|
||||
type=ResponsesAPIStreamEvents.RESPONSE_COMPLETED,
|
||||
response=transformed_response,
|
||||
)
|
||||
evt = self._events[self._idx]
|
||||
self._idx += 1
|
||||
return evt
|
||||
|
||||
def _collect_text(self, resp: ResponsesAPIResponse) -> str:
|
||||
out = ""
|
||||
for out_item in resp.output:
|
||||
if out_item.type == "message":
|
||||
for c in getattr(out_item, "content", []):
|
||||
out += c.text
|
||||
return out
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue