mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-12-12 12:06:04 +00:00
add support for instructions parameter in response object
This commit is contained in:
parent
08cbb69ef7
commit
f176e1a74b
10 changed files with 229 additions and 29 deletions
28
docs/static/deprecated-llama-stack-spec.html
vendored
28
docs/static/deprecated-llama-stack-spec.html
vendored
|
|
@ -9024,6 +9024,20 @@
|
|||
"$ref": "#/components/schemas/OpenAIResponseUsage",
|
||||
"description": "(Optional) Token usage information for the response"
|
||||
},
|
||||
"instructions": {
|
||||
"oneOf": [
|
||||
{
|
||||
"type": "string"
|
||||
},
|
||||
{
|
||||
"type": "array",
|
||||
"items": {
|
||||
"$ref": "#/components/schemas/OpenAIResponseInput"
|
||||
}
|
||||
}
|
||||
],
|
||||
"description": "(Optional) System message inserted into the model's context"
|
||||
},
|
||||
"input": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
|
|
@ -9901,6 +9915,20 @@
|
|||
"usage": {
|
||||
"$ref": "#/components/schemas/OpenAIResponseUsage",
|
||||
"description": "(Optional) Token usage information for the response"
|
||||
},
|
||||
"instructions": {
|
||||
"oneOf": [
|
||||
{
|
||||
"type": "string"
|
||||
},
|
||||
{
|
||||
"type": "array",
|
||||
"items": {
|
||||
"$ref": "#/components/schemas/OpenAIResponseInput"
|
||||
}
|
||||
}
|
||||
],
|
||||
"description": "(Optional) System message inserted into the model's context"
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
|
|
|
|||
16
docs/static/deprecated-llama-stack-spec.yaml
vendored
16
docs/static/deprecated-llama-stack-spec.yaml
vendored
|
|
@ -6734,6 +6734,14 @@ components:
|
|||
$ref: '#/components/schemas/OpenAIResponseUsage'
|
||||
description: >-
|
||||
(Optional) Token usage information for the response
|
||||
instructions:
|
||||
oneOf:
|
||||
- type: string
|
||||
- type: array
|
||||
items:
|
||||
$ref: '#/components/schemas/OpenAIResponseInput'
|
||||
description: >-
|
||||
(Optional) System message inserted into the model's context
|
||||
input:
|
||||
type: array
|
||||
items:
|
||||
|
|
@ -7403,6 +7411,14 @@ components:
|
|||
$ref: '#/components/schemas/OpenAIResponseUsage'
|
||||
description: >-
|
||||
(Optional) Token usage information for the response
|
||||
instructions:
|
||||
oneOf:
|
||||
- type: string
|
||||
- type: array
|
||||
items:
|
||||
$ref: '#/components/schemas/OpenAIResponseInput'
|
||||
description: >-
|
||||
(Optional) System message inserted into the model's context
|
||||
additionalProperties: false
|
||||
required:
|
||||
- created_at
|
||||
|
|
|
|||
28
docs/static/llama-stack-spec.html
vendored
28
docs/static/llama-stack-spec.html
vendored
|
|
@ -7600,6 +7600,20 @@
|
|||
"$ref": "#/components/schemas/OpenAIResponseUsage",
|
||||
"description": "(Optional) Token usage information for the response"
|
||||
},
|
||||
"instructions": {
|
||||
"oneOf": [
|
||||
{
|
||||
"type": "string"
|
||||
},
|
||||
{
|
||||
"type": "array",
|
||||
"items": {
|
||||
"$ref": "#/components/schemas/OpenAIResponseInput"
|
||||
}
|
||||
}
|
||||
],
|
||||
"description": "(Optional) System message inserted into the model's context"
|
||||
},
|
||||
"input": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
|
|
@ -8148,6 +8162,20 @@
|
|||
"usage": {
|
||||
"$ref": "#/components/schemas/OpenAIResponseUsage",
|
||||
"description": "(Optional) Token usage information for the response"
|
||||
},
|
||||
"instructions": {
|
||||
"oneOf": [
|
||||
{
|
||||
"type": "string"
|
||||
},
|
||||
{
|
||||
"type": "array",
|
||||
"items": {
|
||||
"$ref": "#/components/schemas/OpenAIResponseInput"
|
||||
}
|
||||
}
|
||||
],
|
||||
"description": "(Optional) System message inserted into the model's context"
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
|
|
|
|||
16
docs/static/llama-stack-spec.yaml
vendored
16
docs/static/llama-stack-spec.yaml
vendored
|
|
@ -5815,6 +5815,14 @@ components:
|
|||
$ref: '#/components/schemas/OpenAIResponseUsage'
|
||||
description: >-
|
||||
(Optional) Token usage information for the response
|
||||
instructions:
|
||||
oneOf:
|
||||
- type: string
|
||||
- type: array
|
||||
items:
|
||||
$ref: '#/components/schemas/OpenAIResponseInput'
|
||||
description: >-
|
||||
(Optional) System message inserted into the model's context
|
||||
input:
|
||||
type: array
|
||||
items:
|
||||
|
|
@ -6218,6 +6226,14 @@ components:
|
|||
$ref: '#/components/schemas/OpenAIResponseUsage'
|
||||
description: >-
|
||||
(Optional) Token usage information for the response
|
||||
instructions:
|
||||
oneOf:
|
||||
- type: string
|
||||
- type: array
|
||||
items:
|
||||
$ref: '#/components/schemas/OpenAIResponseInput'
|
||||
description: >-
|
||||
(Optional) System message inserted into the model's context
|
||||
additionalProperties: false
|
||||
required:
|
||||
- created_at
|
||||
|
|
|
|||
28
docs/static/stainless-llama-stack-spec.html
vendored
28
docs/static/stainless-llama-stack-spec.html
vendored
|
|
@ -9272,6 +9272,20 @@
|
|||
"$ref": "#/components/schemas/OpenAIResponseUsage",
|
||||
"description": "(Optional) Token usage information for the response"
|
||||
},
|
||||
"instructions": {
|
||||
"oneOf": [
|
||||
{
|
||||
"type": "string"
|
||||
},
|
||||
{
|
||||
"type": "array",
|
||||
"items": {
|
||||
"$ref": "#/components/schemas/OpenAIResponseInput"
|
||||
}
|
||||
}
|
||||
],
|
||||
"description": "(Optional) System message inserted into the model's context"
|
||||
},
|
||||
"input": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
|
|
@ -9820,6 +9834,20 @@
|
|||
"usage": {
|
||||
"$ref": "#/components/schemas/OpenAIResponseUsage",
|
||||
"description": "(Optional) Token usage information for the response"
|
||||
},
|
||||
"instructions": {
|
||||
"oneOf": [
|
||||
{
|
||||
"type": "string"
|
||||
},
|
||||
{
|
||||
"type": "array",
|
||||
"items": {
|
||||
"$ref": "#/components/schemas/OpenAIResponseInput"
|
||||
}
|
||||
}
|
||||
],
|
||||
"description": "(Optional) System message inserted into the model's context"
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
|
|
|
|||
16
docs/static/stainless-llama-stack-spec.yaml
vendored
16
docs/static/stainless-llama-stack-spec.yaml
vendored
|
|
@ -7028,6 +7028,14 @@ components:
|
|||
$ref: '#/components/schemas/OpenAIResponseUsage'
|
||||
description: >-
|
||||
(Optional) Token usage information for the response
|
||||
instructions:
|
||||
oneOf:
|
||||
- type: string
|
||||
- type: array
|
||||
items:
|
||||
$ref: '#/components/schemas/OpenAIResponseInput'
|
||||
description: >-
|
||||
(Optional) System message inserted into the model's context
|
||||
input:
|
||||
type: array
|
||||
items:
|
||||
|
|
@ -7431,6 +7439,14 @@ components:
|
|||
$ref: '#/components/schemas/OpenAIResponseUsage'
|
||||
description: >-
|
||||
(Optional) Token usage information for the response
|
||||
instructions:
|
||||
oneOf:
|
||||
- type: string
|
||||
- type: array
|
||||
items:
|
||||
$ref: '#/components/schemas/OpenAIResponseInput'
|
||||
description: >-
|
||||
(Optional) System message inserted into the model's context
|
||||
additionalProperties: false
|
||||
required:
|
||||
- created_at
|
||||
|
|
|
|||
|
|
@ -327,6 +327,35 @@ OpenAIResponseOutput = Annotated[
|
|||
register_schema(OpenAIResponseOutput, name="OpenAIResponseOutput")
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class OpenAIResponseInputFunctionToolCallOutput(BaseModel):
|
||||
"""
|
||||
This represents the output of a function call that gets passed back to the model.
|
||||
"""
|
||||
|
||||
call_id: str
|
||||
output: str
|
||||
type: Literal["function_call_output"] = "function_call_output"
|
||||
id: str | None = None
|
||||
status: str | None = None
|
||||
|
||||
|
||||
OpenAIResponseInput = Annotated[
|
||||
# Responses API allows output messages to be passed in as input
|
||||
OpenAIResponseOutputMessageWebSearchToolCall
|
||||
| OpenAIResponseOutputMessageFileSearchToolCall
|
||||
| OpenAIResponseOutputMessageFunctionToolCall
|
||||
| OpenAIResponseInputFunctionToolCallOutput
|
||||
| OpenAIResponseMCPApprovalRequest
|
||||
| OpenAIResponseMCPApprovalResponse
|
||||
| OpenAIResponseOutputMessageMCPCall
|
||||
| OpenAIResponseOutputMessageMCPListTools
|
||||
| OpenAIResponseMessage,
|
||||
Field(union_mode="left_to_right"),
|
||||
]
|
||||
register_schema(OpenAIResponseInput, name="OpenAIResponseInput")
|
||||
|
||||
|
||||
# This has to be a TypedDict because we need a "schema" field and our strong
|
||||
# typing code in the schema generator doesn't support Pydantic aliases. That also
|
||||
# means we can't use a discriminator field here, because TypedDicts don't support
|
||||
|
|
@ -545,6 +574,7 @@ class OpenAIResponseObject(BaseModel):
|
|||
:param tools: (Optional) An array of tools the model may call while generating a response.
|
||||
:param truncation: (Optional) Truncation strategy applied to the response
|
||||
:param usage: (Optional) Token usage information for the response
|
||||
:param instructions: (Optional) System message inserted into the model's context
|
||||
"""
|
||||
|
||||
created_at: int
|
||||
|
|
@ -564,6 +594,7 @@ class OpenAIResponseObject(BaseModel):
|
|||
tools: list[OpenAIResponseTool] | None = None
|
||||
truncation: str | None = None
|
||||
usage: OpenAIResponseUsage | None = None
|
||||
instructions: str | list[OpenAIResponseInput] | None = None
|
||||
|
||||
|
||||
@json_schema_type
|
||||
|
|
@ -1237,35 +1268,6 @@ OpenAIResponseObjectStream = Annotated[
|
|||
register_schema(OpenAIResponseObjectStream, name="OpenAIResponseObjectStream")
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class OpenAIResponseInputFunctionToolCallOutput(BaseModel):
|
||||
"""
|
||||
This represents the output of a function call that gets passed back to the model.
|
||||
"""
|
||||
|
||||
call_id: str
|
||||
output: str
|
||||
type: Literal["function_call_output"] = "function_call_output"
|
||||
id: str | None = None
|
||||
status: str | None = None
|
||||
|
||||
|
||||
OpenAIResponseInput = Annotated[
|
||||
# Responses API allows output messages to be passed in as input
|
||||
OpenAIResponseOutputMessageWebSearchToolCall
|
||||
| OpenAIResponseOutputMessageFileSearchToolCall
|
||||
| OpenAIResponseOutputMessageFunctionToolCall
|
||||
| OpenAIResponseInputFunctionToolCallOutput
|
||||
| OpenAIResponseMCPApprovalRequest
|
||||
| OpenAIResponseMCPApprovalResponse
|
||||
| OpenAIResponseOutputMessageMCPCall
|
||||
| OpenAIResponseOutputMessageMCPListTools
|
||||
| OpenAIResponseMessage,
|
||||
Field(union_mode="left_to_right"),
|
||||
]
|
||||
register_schema(OpenAIResponseInput, name="OpenAIResponseInput")
|
||||
|
||||
|
||||
class ListOpenAIResponseInputItem(BaseModel):
|
||||
"""List container for OpenAI response input items.
|
||||
|
||||
|
|
|
|||
|
|
@ -123,6 +123,17 @@ class OpenAIResponsesImpl:
|
|||
# Use stored messages directly and convert only new input
|
||||
message_adapter = TypeAdapter(list[OpenAIMessageParam])
|
||||
messages = message_adapter.validate_python(previous_response.messages)
|
||||
# When managing conversation state with the previous_response_id parameter,
|
||||
# the instructions used on previous turns will not be carried over in the context
|
||||
previous_instructions = previous_response.instructions
|
||||
if previous_instructions:
|
||||
if (isinstance(previous_instructions, str) and
|
||||
previous_instructions == messages[0].content and
|
||||
messages[0].role == "system"):
|
||||
# Omit instructions from previous response
|
||||
del messages[0]
|
||||
else:
|
||||
raise ValueError("Instructions from the previous response could not be validated")
|
||||
new_messages = await convert_response_input_to_chat_messages(input, previous_messages=messages)
|
||||
messages.extend(new_messages)
|
||||
else:
|
||||
|
|
@ -359,6 +370,7 @@ class OpenAIResponsesImpl:
|
|||
tool_executor=self.tool_executor,
|
||||
safety_api=self.safety_api,
|
||||
guardrail_ids=guardrail_ids,
|
||||
instructions=instructions,
|
||||
)
|
||||
|
||||
# Stream the response
|
||||
|
|
|
|||
|
|
@ -112,6 +112,7 @@ class StreamingResponseOrchestrator:
|
|||
tool_executor, # Will be the tool execution logic from the main class
|
||||
safety_api,
|
||||
guardrail_ids: list[str] | None = None,
|
||||
instructions: str,
|
||||
):
|
||||
self.inference_api = inference_api
|
||||
self.ctx = ctx
|
||||
|
|
@ -133,6 +134,8 @@ class StreamingResponseOrchestrator:
|
|||
self.accumulated_usage: OpenAIResponseUsage | None = None
|
||||
# Track if we've sent a refusal response
|
||||
self.violation_detected = False
|
||||
# system message that is inserted into the model's context
|
||||
self.instructions = instructions
|
||||
|
||||
async def _create_refusal_response(self, violation_message: str) -> OpenAIResponseObjectStream:
|
||||
"""Create a refusal response to replace streaming content."""
|
||||
|
|
@ -176,6 +179,7 @@ class StreamingResponseOrchestrator:
|
|||
tools=self.ctx.available_tools(),
|
||||
error=error,
|
||||
usage=self.accumulated_usage,
|
||||
instructions=self.instructions,
|
||||
)
|
||||
|
||||
async def create_response(self) -> AsyncIterator[OpenAIResponseObjectStream]:
|
||||
|
|
|
|||
|
|
@ -466,3 +466,53 @@ def test_guardrails_with_tools(compat_client, text_model_id):
|
|||
# Response should be either a function call or a message
|
||||
output_type = response.output[0].type
|
||||
assert output_type in ["function_call", "message"]
|
||||
|
||||
|
||||
def test_response_with_instructions(openai_client, client_with_models, text_model_id):
|
||||
"""Test instructions parameter in the responses object."""
|
||||
if isinstance(client_with_models, LlamaStackAsLibraryClient):
|
||||
pytest.skip("OpenAI responses are not supported when testing with library client yet.")
|
||||
|
||||
client = openai_client
|
||||
|
||||
messages = [
|
||||
{
|
||||
"role": "user",
|
||||
"content": "What is the capital of France?",
|
||||
}
|
||||
]
|
||||
|
||||
# First create a response without instructions parameter
|
||||
response_w_o_instructions = client.responses.create(
|
||||
model=text_model_id,
|
||||
input=messages,
|
||||
stream=False,
|
||||
)
|
||||
|
||||
# Verify we have None in the instructions field
|
||||
assert response_w_o_instructions.instructions is None
|
||||
|
||||
# Next create a response and pass instructions parameter
|
||||
instructions = "You are a helpful assistant."
|
||||
response_with_instructions = client.responses.create(
|
||||
model=text_model_id,
|
||||
instructions=instructions,
|
||||
input=messages,
|
||||
stream=False,
|
||||
)
|
||||
|
||||
# Verify we have a valid instructions field
|
||||
assert response_with_instructions.instructions == instructions
|
||||
|
||||
# Finally test instructions parameter with a previous response id
|
||||
instructions2 = "You are a helpful assistant and speak in pirate language."
|
||||
response_with_instructions2 = client.responses.create(
|
||||
model=text_model_id,
|
||||
instructions=instructions2,
|
||||
input=messages,
|
||||
previous_response_id=response_with_instructions.id,
|
||||
stream=False,
|
||||
)
|
||||
|
||||
# Verify instructions from previous response was not carried over to the next response
|
||||
assert response_with_instructions2.instructions == instructions2
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue