mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-12-14 02:32:35 +00:00
add support for instructions parameter in response object
This commit is contained in:
parent
08cbb69ef7
commit
f176e1a74b
10 changed files with 229 additions and 29 deletions
|
|
@ -327,6 +327,35 @@ OpenAIResponseOutput = Annotated[
|
|||
register_schema(OpenAIResponseOutput, name="OpenAIResponseOutput")
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class OpenAIResponseInputFunctionToolCallOutput(BaseModel):
|
||||
"""
|
||||
This represents the output of a function call that gets passed back to the model.
|
||||
"""
|
||||
|
||||
call_id: str
|
||||
output: str
|
||||
type: Literal["function_call_output"] = "function_call_output"
|
||||
id: str | None = None
|
||||
status: str | None = None
|
||||
|
||||
|
||||
OpenAIResponseInput = Annotated[
|
||||
# Responses API allows output messages to be passed in as input
|
||||
OpenAIResponseOutputMessageWebSearchToolCall
|
||||
| OpenAIResponseOutputMessageFileSearchToolCall
|
||||
| OpenAIResponseOutputMessageFunctionToolCall
|
||||
| OpenAIResponseInputFunctionToolCallOutput
|
||||
| OpenAIResponseMCPApprovalRequest
|
||||
| OpenAIResponseMCPApprovalResponse
|
||||
| OpenAIResponseOutputMessageMCPCall
|
||||
| OpenAIResponseOutputMessageMCPListTools
|
||||
| OpenAIResponseMessage,
|
||||
Field(union_mode="left_to_right"),
|
||||
]
|
||||
register_schema(OpenAIResponseInput, name="OpenAIResponseInput")
|
||||
|
||||
|
||||
# This has to be a TypedDict because we need a "schema" field and our strong
|
||||
# typing code in the schema generator doesn't support Pydantic aliases. That also
|
||||
# means we can't use a discriminator field here, because TypedDicts don't support
|
||||
|
|
@ -545,6 +574,7 @@ class OpenAIResponseObject(BaseModel):
|
|||
:param tools: (Optional) An array of tools the model may call while generating a response.
|
||||
:param truncation: (Optional) Truncation strategy applied to the response
|
||||
:param usage: (Optional) Token usage information for the response
|
||||
:param instructions: (Optional) System message inserted into the model's context
|
||||
"""
|
||||
|
||||
created_at: int
|
||||
|
|
@ -564,6 +594,7 @@ class OpenAIResponseObject(BaseModel):
|
|||
tools: list[OpenAIResponseTool] | None = None
|
||||
truncation: str | None = None
|
||||
usage: OpenAIResponseUsage | None = None
|
||||
instructions: str | list[OpenAIResponseInput] | None = None
|
||||
|
||||
|
||||
@json_schema_type
|
||||
|
|
@ -1237,35 +1268,6 @@ OpenAIResponseObjectStream = Annotated[
|
|||
register_schema(OpenAIResponseObjectStream, name="OpenAIResponseObjectStream")
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class OpenAIResponseInputFunctionToolCallOutput(BaseModel):
|
||||
"""
|
||||
This represents the output of a function call that gets passed back to the model.
|
||||
"""
|
||||
|
||||
call_id: str
|
||||
output: str
|
||||
type: Literal["function_call_output"] = "function_call_output"
|
||||
id: str | None = None
|
||||
status: str | None = None
|
||||
|
||||
|
||||
OpenAIResponseInput = Annotated[
|
||||
# Responses API allows output messages to be passed in as input
|
||||
OpenAIResponseOutputMessageWebSearchToolCall
|
||||
| OpenAIResponseOutputMessageFileSearchToolCall
|
||||
| OpenAIResponseOutputMessageFunctionToolCall
|
||||
| OpenAIResponseInputFunctionToolCallOutput
|
||||
| OpenAIResponseMCPApprovalRequest
|
||||
| OpenAIResponseMCPApprovalResponse
|
||||
| OpenAIResponseOutputMessageMCPCall
|
||||
| OpenAIResponseOutputMessageMCPListTools
|
||||
| OpenAIResponseMessage,
|
||||
Field(union_mode="left_to_right"),
|
||||
]
|
||||
register_schema(OpenAIResponseInput, name="OpenAIResponseInput")
|
||||
|
||||
|
||||
class ListOpenAIResponseInputItem(BaseModel):
|
||||
"""List container for OpenAI response input items.
|
||||
|
||||
|
|
|
|||
|
|
@ -123,6 +123,17 @@ class OpenAIResponsesImpl:
|
|||
# Use stored messages directly and convert only new input
|
||||
message_adapter = TypeAdapter(list[OpenAIMessageParam])
|
||||
messages = message_adapter.validate_python(previous_response.messages)
|
||||
# When managing conversation state with the previous_response_id parameter,
|
||||
# the instructions used on previous turns will not be carried over in the context
|
||||
previous_instructions = previous_response.instructions
|
||||
if previous_instructions:
|
||||
if (isinstance(previous_instructions, str) and
|
||||
previous_instructions == messages[0].content and
|
||||
messages[0].role == "system"):
|
||||
# Omit instructions from previous response
|
||||
del messages[0]
|
||||
else:
|
||||
raise ValueError("Instructions from the previous response could not be validated")
|
||||
new_messages = await convert_response_input_to_chat_messages(input, previous_messages=messages)
|
||||
messages.extend(new_messages)
|
||||
else:
|
||||
|
|
@ -359,6 +370,7 @@ class OpenAIResponsesImpl:
|
|||
tool_executor=self.tool_executor,
|
||||
safety_api=self.safety_api,
|
||||
guardrail_ids=guardrail_ids,
|
||||
instructions=instructions,
|
||||
)
|
||||
|
||||
# Stream the response
|
||||
|
|
|
|||
|
|
@ -112,6 +112,7 @@ class StreamingResponseOrchestrator:
|
|||
tool_executor, # Will be the tool execution logic from the main class
|
||||
safety_api,
|
||||
guardrail_ids: list[str] | None = None,
|
||||
instructions: str,
|
||||
):
|
||||
self.inference_api = inference_api
|
||||
self.ctx = ctx
|
||||
|
|
@ -133,6 +134,8 @@ class StreamingResponseOrchestrator:
|
|||
self.accumulated_usage: OpenAIResponseUsage | None = None
|
||||
# Track if we've sent a refusal response
|
||||
self.violation_detected = False
|
||||
# system message that is inserted into the model's context
|
||||
self.instructions = instructions
|
||||
|
||||
async def _create_refusal_response(self, violation_message: str) -> OpenAIResponseObjectStream:
|
||||
"""Create a refusal response to replace streaming content."""
|
||||
|
|
@ -176,6 +179,7 @@ class StreamingResponseOrchestrator:
|
|||
tools=self.ctx.available_tools(),
|
||||
error=error,
|
||||
usage=self.accumulated_usage,
|
||||
instructions=self.instructions,
|
||||
)
|
||||
|
||||
async def create_response(self) -> AsyncIterator[OpenAIResponseObjectStream]:
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue