mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-06-27 18:50:41 +00:00
fix(responses): use input, not original_input when storing the Response (#2300)
Some checks failed
Integration Auth Tests / test-matrix (oauth2_token) (push) Failing after 2s
Integration Tests / test-matrix (http, datasets) (push) Failing after 9s
Integration Tests / test-matrix (http, tool_runtime) (push) Failing after 7s
Integration Tests / test-matrix (http, providers) (push) Failing after 7s
Integration Tests / test-matrix (http, agents) (push) Failing after 9s
Integration Tests / test-matrix (http, inference) (push) Failing after 10s
Integration Tests / test-matrix (http, post_training) (push) Failing after 9s
Integration Tests / test-matrix (http, inspect) (push) Failing after 10s
Integration Tests / test-matrix (http, scoring) (push) Failing after 9s
Integration Tests / test-matrix (library, agents) (push) Failing after 10s
Integration Tests / test-matrix (library, datasets) (push) Failing after 9s
Integration Tests / test-matrix (library, inference) (push) Failing after 7s
Test External Providers / test-external-providers (venv) (push) Failing after 6s
Integration Tests / test-matrix (library, post_training) (push) Failing after 8s
Integration Tests / test-matrix (library, scoring) (push) Failing after 10s
Integration Tests / test-matrix (library, providers) (push) Failing after 10s
Integration Tests / test-matrix (library, tool_runtime) (push) Failing after 9s
Integration Tests / test-matrix (library, inspect) (push) Failing after 11s
Unit Tests / unit-tests (3.10) (push) Failing after 8s
Unit Tests / unit-tests (3.12) (push) Failing after 9s
Unit Tests / unit-tests (3.11) (push) Failing after 9s
Unit Tests / unit-tests (3.13) (push) Failing after 7s
Update ReadTheDocs / update-readthedocs (push) Failing after 5s
Pre-commit / pre-commit (push) Failing after 53s
Some checks failed
Integration Auth Tests / test-matrix (oauth2_token) (push) Failing after 2s
Integration Tests / test-matrix (http, datasets) (push) Failing after 9s
Integration Tests / test-matrix (http, tool_runtime) (push) Failing after 7s
Integration Tests / test-matrix (http, providers) (push) Failing after 7s
Integration Tests / test-matrix (http, agents) (push) Failing after 9s
Integration Tests / test-matrix (http, inference) (push) Failing after 10s
Integration Tests / test-matrix (http, post_training) (push) Failing after 9s
Integration Tests / test-matrix (http, inspect) (push) Failing after 10s
Integration Tests / test-matrix (http, scoring) (push) Failing after 9s
Integration Tests / test-matrix (library, agents) (push) Failing after 10s
Integration Tests / test-matrix (library, datasets) (push) Failing after 9s
Integration Tests / test-matrix (library, inference) (push) Failing after 7s
Test External Providers / test-external-providers (venv) (push) Failing after 6s
Integration Tests / test-matrix (library, post_training) (push) Failing after 8s
Integration Tests / test-matrix (library, scoring) (push) Failing after 10s
Integration Tests / test-matrix (library, providers) (push) Failing after 10s
Integration Tests / test-matrix (library, tool_runtime) (push) Failing after 9s
Integration Tests / test-matrix (library, inspect) (push) Failing after 11s
Unit Tests / unit-tests (3.10) (push) Failing after 8s
Unit Tests / unit-tests (3.12) (push) Failing after 9s
Unit Tests / unit-tests (3.11) (push) Failing after 9s
Unit Tests / unit-tests (3.13) (push) Failing after 7s
Update ReadTheDocs / update-readthedocs (push) Failing after 5s
Pre-commit / pre-commit (push) Failing after 53s
We must store the full (re-hydrated) input not just the original input in the Response object. Of course, this is not very space efficient and we should likely find a better storage scheme so that we can only store unique entries in the database and then re-hydrate them efficiently later. But that can be done safely later. Closes https://github.com/meta-llama/llama-stack/issues/2299 ## Test Plan Unit test
This commit is contained in:
parent
a654467552
commit
bfdd15d1fa
2 changed files with 76 additions and 11 deletions
|
@ -292,12 +292,12 @@ class OpenAIResponsesImpl:
|
|||
async def _store_response(
|
||||
self,
|
||||
response: OpenAIResponseObject,
|
||||
original_input: str | list[OpenAIResponseInput],
|
||||
input: str | list[OpenAIResponseInput],
|
||||
) -> None:
|
||||
new_input_id = f"msg_{uuid.uuid4()}"
|
||||
if isinstance(original_input, str):
|
||||
if isinstance(input, str):
|
||||
# synthesize a message from the input string
|
||||
input_content = OpenAIResponseInputMessageContentText(text=original_input)
|
||||
input_content = OpenAIResponseInputMessageContentText(text=input)
|
||||
input_content_item = OpenAIResponseMessage(
|
||||
role="user",
|
||||
content=[input_content],
|
||||
|
@ -307,7 +307,7 @@ class OpenAIResponsesImpl:
|
|||
else:
|
||||
# we already have a list of messages
|
||||
input_items_data = []
|
||||
for input_item in original_input:
|
||||
for input_item in input:
|
||||
if isinstance(input_item, OpenAIResponseMessage):
|
||||
# These may or may not already have an id, so dump to dict, check for id, and add if missing
|
||||
input_item_dict = input_item.model_dump()
|
||||
|
@ -334,7 +334,6 @@ class OpenAIResponsesImpl:
|
|||
tools: list[OpenAIResponseInputTool] | None = None,
|
||||
):
|
||||
stream = False if stream is None else stream
|
||||
original_input = input # Keep reference for storage
|
||||
|
||||
output_messages: list[OpenAIResponseOutput] = []
|
||||
|
||||
|
@ -372,7 +371,7 @@ class OpenAIResponsesImpl:
|
|||
inference_result=inference_result,
|
||||
ctx=ctx,
|
||||
output_messages=output_messages,
|
||||
original_input=original_input,
|
||||
input=input,
|
||||
model=model,
|
||||
store=store,
|
||||
tools=tools,
|
||||
|
@ -382,7 +381,7 @@ class OpenAIResponsesImpl:
|
|||
inference_result=inference_result,
|
||||
ctx=ctx,
|
||||
output_messages=output_messages,
|
||||
original_input=original_input,
|
||||
input=input,
|
||||
model=model,
|
||||
store=store,
|
||||
tools=tools,
|
||||
|
@ -393,7 +392,7 @@ class OpenAIResponsesImpl:
|
|||
inference_result: Any,
|
||||
ctx: ChatCompletionContext,
|
||||
output_messages: list[OpenAIResponseOutput],
|
||||
original_input: str | list[OpenAIResponseInput],
|
||||
input: str | list[OpenAIResponseInput],
|
||||
model: str,
|
||||
store: bool | None,
|
||||
tools: list[OpenAIResponseInputTool] | None,
|
||||
|
@ -423,7 +422,7 @@ class OpenAIResponsesImpl:
|
|||
if store:
|
||||
await self._store_response(
|
||||
response=response,
|
||||
original_input=original_input,
|
||||
input=input,
|
||||
)
|
||||
|
||||
return response
|
||||
|
@ -433,7 +432,7 @@ class OpenAIResponsesImpl:
|
|||
inference_result: Any,
|
||||
ctx: ChatCompletionContext,
|
||||
output_messages: list[OpenAIResponseOutput],
|
||||
original_input: str | list[OpenAIResponseInput],
|
||||
input: str | list[OpenAIResponseInput],
|
||||
model: str,
|
||||
store: bool | None,
|
||||
tools: list[OpenAIResponseInputTool] | None,
|
||||
|
@ -544,7 +543,7 @@ class OpenAIResponsesImpl:
|
|||
if store:
|
||||
await self._store_response(
|
||||
response=final_response,
|
||||
original_input=original_input,
|
||||
input=input,
|
||||
)
|
||||
|
||||
# Emit response.completed
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue