Merge branch 'main' into watsonx_health_check

This commit is contained in:
Sumit Jaiswal 2025-05-30 00:26:52 +05:30 committed by GitHub
commit 6af13bbbf0
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
4 changed files with 86 additions and 12 deletions

View file

@ -292,12 +292,12 @@ class OpenAIResponsesImpl:
async def _store_response(
self,
response: OpenAIResponseObject,
original_input: str | list[OpenAIResponseInput],
input: str | list[OpenAIResponseInput],
) -> None:
new_input_id = f"msg_{uuid.uuid4()}"
if isinstance(original_input, str):
if isinstance(input, str):
# synthesize a message from the input string
input_content = OpenAIResponseInputMessageContentText(text=original_input)
input_content = OpenAIResponseInputMessageContentText(text=input)
input_content_item = OpenAIResponseMessage(
role="user",
content=[input_content],
@ -307,7 +307,7 @@ class OpenAIResponsesImpl:
else:
# we already have a list of messages
input_items_data = []
for input_item in original_input:
for input_item in input:
if isinstance(input_item, OpenAIResponseMessage):
# These may or may not already have an id, so dump to dict, check for id, and add if missing
input_item_dict = input_item.model_dump()
@ -334,7 +334,6 @@ class OpenAIResponsesImpl:
tools: list[OpenAIResponseInputTool] | None = None,
):
stream = False if stream is None else stream
original_input = input # Keep reference for storage
output_messages: list[OpenAIResponseOutput] = []
@ -372,7 +371,7 @@ class OpenAIResponsesImpl:
inference_result=inference_result,
ctx=ctx,
output_messages=output_messages,
original_input=original_input,
input=input,
model=model,
store=store,
tools=tools,
@ -382,7 +381,7 @@ class OpenAIResponsesImpl:
inference_result=inference_result,
ctx=ctx,
output_messages=output_messages,
original_input=original_input,
input=input,
model=model,
store=store,
tools=tools,
@ -393,7 +392,7 @@ class OpenAIResponsesImpl:
inference_result: Any,
ctx: ChatCompletionContext,
output_messages: list[OpenAIResponseOutput],
original_input: str | list[OpenAIResponseInput],
input: str | list[OpenAIResponseInput],
model: str,
store: bool | None,
tools: list[OpenAIResponseInputTool] | None,
@ -423,7 +422,7 @@ class OpenAIResponsesImpl:
if store:
await self._store_response(
response=response,
original_input=original_input,
input=input,
)
return response
@ -433,7 +432,7 @@ class OpenAIResponsesImpl:
inference_result: Any,
ctx: ChatCompletionContext,
output_messages: list[OpenAIResponseOutput],
original_input: str | list[OpenAIResponseInput],
input: str | list[OpenAIResponseInput],
model: str,
store: bool | None,
tools: list[OpenAIResponseInputTool] | None,
@ -544,7 +543,7 @@ class OpenAIResponsesImpl:
if store:
await self._store_response(
response=final_response,
original_input=original_input,
input=input,
)
# Emit response.completed

View file

@ -218,7 +218,7 @@ class SambaNovaInferenceAdapter(LiteLLMOpenAIMixin):
"json_schema": {
"name": name,
"schema": fmt,
"strict": True,
"strict": False,
},
}
if request.tools:

View file

@ -107,6 +107,13 @@ def collect_template_dependencies(template_dir: Path) -> tuple[str | None, list[
return None, []
def pre_import_templates(template_dirs: list[Path]) -> None:
# Pre-import all template modules to avoid deadlocks.
for template_dir in template_dirs:
module_name = f"llama_stack.templates.{template_dir.name}"
importlib.import_module(module_name)
def main():
templates_dir = REPO_ROOT / "llama_stack" / "templates"
change_tracker = ChangedPathTracker()
@ -118,6 +125,8 @@ def main():
template_dirs = list(find_template_dirs(templates_dir))
task = progress.add_task("Processing distribution templates...", total=len(template_dirs))
pre_import_templates(template_dirs)
# Create a partial function with the progress bar
process_func = partial(process_template, progress=progress, change_tracker=change_tracker)

View file

@ -628,3 +628,69 @@ async def test_responses_store_list_input_items_logic():
result = await responses_store.list_response_input_items("resp_123", limit=0, order=Order.asc)
assert result.object == "list"
assert len(result.data) == 0 # Should return no items
@pytest.mark.asyncio
async def test_store_response_uses_rehydrated_input_with_previous_response(
openai_responses_impl, mock_responses_store, mock_inference_api
):
"""Test that _store_response uses the full re-hydrated input (including previous responses)
rather than just the original input when previous_response_id is provided."""
# Setup - Create a previous response that should be included in the stored input
previous_response = OpenAIResponseObjectWithInput(
id="resp-previous-123",
object="response",
created_at=1234567890,
model="meta-llama/Llama-3.1-8B-Instruct",
status="completed",
input=[
OpenAIResponseMessage(
id="msg-prev-user", role="user", content=[OpenAIResponseInputMessageContentText(text="What is 2+2?")]
)
],
output=[
OpenAIResponseMessage(
id="msg-prev-assistant",
role="assistant",
content=[OpenAIResponseOutputMessageContentOutputText(text="2+2 equals 4.")],
)
],
)
mock_responses_store.get_response_object.return_value = previous_response
current_input = "Now what is 3+3?"
model = "meta-llama/Llama-3.1-8B-Instruct"
mock_chat_completion = load_chat_completion_fixture("simple_chat_completion.yaml")
mock_inference_api.openai_chat_completion.return_value = mock_chat_completion
# Execute - Create response with previous_response_id
result = await openai_responses_impl.create_openai_response(
input=current_input,
model=model,
previous_response_id="resp-previous-123",
store=True,
)
store_call_args = mock_responses_store.store_response_object.call_args
stored_input = store_call_args.kwargs["input"]
# Verify that the stored input contains the full re-hydrated conversation:
# 1. Previous user message
# 2. Previous assistant response
# 3. Current user message
assert len(stored_input) == 3
assert stored_input[0].role == "user"
assert stored_input[0].content[0].text == "What is 2+2?"
assert stored_input[1].role == "assistant"
assert stored_input[1].content[0].text == "2+2 equals 4."
assert stored_input[2].role == "user"
assert stored_input[2].content == "Now what is 3+3?"
# Verify the response itself is correct
assert result.model == model
assert result.status == "completed"