mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-12-12 12:06:04 +00:00
add support for instructions parameter in response object
This commit is contained in:
parent
08cbb69ef7
commit
f176e1a74b
10 changed files with 229 additions and 29 deletions
28
docs/static/deprecated-llama-stack-spec.html
vendored
28
docs/static/deprecated-llama-stack-spec.html
vendored
|
|
@ -9024,6 +9024,20 @@
|
||||||
"$ref": "#/components/schemas/OpenAIResponseUsage",
|
"$ref": "#/components/schemas/OpenAIResponseUsage",
|
||||||
"description": "(Optional) Token usage information for the response"
|
"description": "(Optional) Token usage information for the response"
|
||||||
},
|
},
|
||||||
|
"instructions": {
|
||||||
|
"oneOf": [
|
||||||
|
{
|
||||||
|
"type": "string"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"type": "array",
|
||||||
|
"items": {
|
||||||
|
"$ref": "#/components/schemas/OpenAIResponseInput"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"description": "(Optional) System message inserted into the model's context"
|
||||||
|
},
|
||||||
"input": {
|
"input": {
|
||||||
"type": "array",
|
"type": "array",
|
||||||
"items": {
|
"items": {
|
||||||
|
|
@ -9901,6 +9915,20 @@
|
||||||
"usage": {
|
"usage": {
|
||||||
"$ref": "#/components/schemas/OpenAIResponseUsage",
|
"$ref": "#/components/schemas/OpenAIResponseUsage",
|
||||||
"description": "(Optional) Token usage information for the response"
|
"description": "(Optional) Token usage information for the response"
|
||||||
|
},
|
||||||
|
"instructions": {
|
||||||
|
"oneOf": [
|
||||||
|
{
|
||||||
|
"type": "string"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"type": "array",
|
||||||
|
"items": {
|
||||||
|
"$ref": "#/components/schemas/OpenAIResponseInput"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"description": "(Optional) System message inserted into the model's context"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"additionalProperties": false,
|
"additionalProperties": false,
|
||||||
|
|
|
||||||
16
docs/static/deprecated-llama-stack-spec.yaml
vendored
16
docs/static/deprecated-llama-stack-spec.yaml
vendored
|
|
@ -6734,6 +6734,14 @@ components:
|
||||||
$ref: '#/components/schemas/OpenAIResponseUsage'
|
$ref: '#/components/schemas/OpenAIResponseUsage'
|
||||||
description: >-
|
description: >-
|
||||||
(Optional) Token usage information for the response
|
(Optional) Token usage information for the response
|
||||||
|
instructions:
|
||||||
|
oneOf:
|
||||||
|
- type: string
|
||||||
|
- type: array
|
||||||
|
items:
|
||||||
|
$ref: '#/components/schemas/OpenAIResponseInput'
|
||||||
|
description: >-
|
||||||
|
(Optional) System message inserted into the model's context
|
||||||
input:
|
input:
|
||||||
type: array
|
type: array
|
||||||
items:
|
items:
|
||||||
|
|
@ -7403,6 +7411,14 @@ components:
|
||||||
$ref: '#/components/schemas/OpenAIResponseUsage'
|
$ref: '#/components/schemas/OpenAIResponseUsage'
|
||||||
description: >-
|
description: >-
|
||||||
(Optional) Token usage information for the response
|
(Optional) Token usage information for the response
|
||||||
|
instructions:
|
||||||
|
oneOf:
|
||||||
|
- type: string
|
||||||
|
- type: array
|
||||||
|
items:
|
||||||
|
$ref: '#/components/schemas/OpenAIResponseInput'
|
||||||
|
description: >-
|
||||||
|
(Optional) System message inserted into the model's context
|
||||||
additionalProperties: false
|
additionalProperties: false
|
||||||
required:
|
required:
|
||||||
- created_at
|
- created_at
|
||||||
|
|
|
||||||
28
docs/static/llama-stack-spec.html
vendored
28
docs/static/llama-stack-spec.html
vendored
|
|
@ -7600,6 +7600,20 @@
|
||||||
"$ref": "#/components/schemas/OpenAIResponseUsage",
|
"$ref": "#/components/schemas/OpenAIResponseUsage",
|
||||||
"description": "(Optional) Token usage information for the response"
|
"description": "(Optional) Token usage information for the response"
|
||||||
},
|
},
|
||||||
|
"instructions": {
|
||||||
|
"oneOf": [
|
||||||
|
{
|
||||||
|
"type": "string"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"type": "array",
|
||||||
|
"items": {
|
||||||
|
"$ref": "#/components/schemas/OpenAIResponseInput"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"description": "(Optional) System message inserted into the model's context"
|
||||||
|
},
|
||||||
"input": {
|
"input": {
|
||||||
"type": "array",
|
"type": "array",
|
||||||
"items": {
|
"items": {
|
||||||
|
|
@ -8148,6 +8162,20 @@
|
||||||
"usage": {
|
"usage": {
|
||||||
"$ref": "#/components/schemas/OpenAIResponseUsage",
|
"$ref": "#/components/schemas/OpenAIResponseUsage",
|
||||||
"description": "(Optional) Token usage information for the response"
|
"description": "(Optional) Token usage information for the response"
|
||||||
|
},
|
||||||
|
"instructions": {
|
||||||
|
"oneOf": [
|
||||||
|
{
|
||||||
|
"type": "string"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"type": "array",
|
||||||
|
"items": {
|
||||||
|
"$ref": "#/components/schemas/OpenAIResponseInput"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"description": "(Optional) System message inserted into the model's context"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"additionalProperties": false,
|
"additionalProperties": false,
|
||||||
|
|
|
||||||
16
docs/static/llama-stack-spec.yaml
vendored
16
docs/static/llama-stack-spec.yaml
vendored
|
|
@ -5815,6 +5815,14 @@ components:
|
||||||
$ref: '#/components/schemas/OpenAIResponseUsage'
|
$ref: '#/components/schemas/OpenAIResponseUsage'
|
||||||
description: >-
|
description: >-
|
||||||
(Optional) Token usage information for the response
|
(Optional) Token usage information for the response
|
||||||
|
instructions:
|
||||||
|
oneOf:
|
||||||
|
- type: string
|
||||||
|
- type: array
|
||||||
|
items:
|
||||||
|
$ref: '#/components/schemas/OpenAIResponseInput'
|
||||||
|
description: >-
|
||||||
|
(Optional) System message inserted into the model's context
|
||||||
input:
|
input:
|
||||||
type: array
|
type: array
|
||||||
items:
|
items:
|
||||||
|
|
@ -6218,6 +6226,14 @@ components:
|
||||||
$ref: '#/components/schemas/OpenAIResponseUsage'
|
$ref: '#/components/schemas/OpenAIResponseUsage'
|
||||||
description: >-
|
description: >-
|
||||||
(Optional) Token usage information for the response
|
(Optional) Token usage information for the response
|
||||||
|
instructions:
|
||||||
|
oneOf:
|
||||||
|
- type: string
|
||||||
|
- type: array
|
||||||
|
items:
|
||||||
|
$ref: '#/components/schemas/OpenAIResponseInput'
|
||||||
|
description: >-
|
||||||
|
(Optional) System message inserted into the model's context
|
||||||
additionalProperties: false
|
additionalProperties: false
|
||||||
required:
|
required:
|
||||||
- created_at
|
- created_at
|
||||||
|
|
|
||||||
28
docs/static/stainless-llama-stack-spec.html
vendored
28
docs/static/stainless-llama-stack-spec.html
vendored
|
|
@ -9272,6 +9272,20 @@
|
||||||
"$ref": "#/components/schemas/OpenAIResponseUsage",
|
"$ref": "#/components/schemas/OpenAIResponseUsage",
|
||||||
"description": "(Optional) Token usage information for the response"
|
"description": "(Optional) Token usage information for the response"
|
||||||
},
|
},
|
||||||
|
"instructions": {
|
||||||
|
"oneOf": [
|
||||||
|
{
|
||||||
|
"type": "string"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"type": "array",
|
||||||
|
"items": {
|
||||||
|
"$ref": "#/components/schemas/OpenAIResponseInput"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"description": "(Optional) System message inserted into the model's context"
|
||||||
|
},
|
||||||
"input": {
|
"input": {
|
||||||
"type": "array",
|
"type": "array",
|
||||||
"items": {
|
"items": {
|
||||||
|
|
@ -9820,6 +9834,20 @@
|
||||||
"usage": {
|
"usage": {
|
||||||
"$ref": "#/components/schemas/OpenAIResponseUsage",
|
"$ref": "#/components/schemas/OpenAIResponseUsage",
|
||||||
"description": "(Optional) Token usage information for the response"
|
"description": "(Optional) Token usage information for the response"
|
||||||
|
},
|
||||||
|
"instructions": {
|
||||||
|
"oneOf": [
|
||||||
|
{
|
||||||
|
"type": "string"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"type": "array",
|
||||||
|
"items": {
|
||||||
|
"$ref": "#/components/schemas/OpenAIResponseInput"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"description": "(Optional) System message inserted into the model's context"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"additionalProperties": false,
|
"additionalProperties": false,
|
||||||
|
|
|
||||||
16
docs/static/stainless-llama-stack-spec.yaml
vendored
16
docs/static/stainless-llama-stack-spec.yaml
vendored
|
|
@ -7028,6 +7028,14 @@ components:
|
||||||
$ref: '#/components/schemas/OpenAIResponseUsage'
|
$ref: '#/components/schemas/OpenAIResponseUsage'
|
||||||
description: >-
|
description: >-
|
||||||
(Optional) Token usage information for the response
|
(Optional) Token usage information for the response
|
||||||
|
instructions:
|
||||||
|
oneOf:
|
||||||
|
- type: string
|
||||||
|
- type: array
|
||||||
|
items:
|
||||||
|
$ref: '#/components/schemas/OpenAIResponseInput'
|
||||||
|
description: >-
|
||||||
|
(Optional) System message inserted into the model's context
|
||||||
input:
|
input:
|
||||||
type: array
|
type: array
|
||||||
items:
|
items:
|
||||||
|
|
@ -7431,6 +7439,14 @@ components:
|
||||||
$ref: '#/components/schemas/OpenAIResponseUsage'
|
$ref: '#/components/schemas/OpenAIResponseUsage'
|
||||||
description: >-
|
description: >-
|
||||||
(Optional) Token usage information for the response
|
(Optional) Token usage information for the response
|
||||||
|
instructions:
|
||||||
|
oneOf:
|
||||||
|
- type: string
|
||||||
|
- type: array
|
||||||
|
items:
|
||||||
|
$ref: '#/components/schemas/OpenAIResponseInput'
|
||||||
|
description: >-
|
||||||
|
(Optional) System message inserted into the model's context
|
||||||
additionalProperties: false
|
additionalProperties: false
|
||||||
required:
|
required:
|
||||||
- created_at
|
- created_at
|
||||||
|
|
|
||||||
|
|
@ -327,6 +327,35 @@ OpenAIResponseOutput = Annotated[
|
||||||
register_schema(OpenAIResponseOutput, name="OpenAIResponseOutput")
|
register_schema(OpenAIResponseOutput, name="OpenAIResponseOutput")
|
||||||
|
|
||||||
|
|
||||||
|
@json_schema_type
|
||||||
|
class OpenAIResponseInputFunctionToolCallOutput(BaseModel):
|
||||||
|
"""
|
||||||
|
This represents the output of a function call that gets passed back to the model.
|
||||||
|
"""
|
||||||
|
|
||||||
|
call_id: str
|
||||||
|
output: str
|
||||||
|
type: Literal["function_call_output"] = "function_call_output"
|
||||||
|
id: str | None = None
|
||||||
|
status: str | None = None
|
||||||
|
|
||||||
|
|
||||||
|
OpenAIResponseInput = Annotated[
|
||||||
|
# Responses API allows output messages to be passed in as input
|
||||||
|
OpenAIResponseOutputMessageWebSearchToolCall
|
||||||
|
| OpenAIResponseOutputMessageFileSearchToolCall
|
||||||
|
| OpenAIResponseOutputMessageFunctionToolCall
|
||||||
|
| OpenAIResponseInputFunctionToolCallOutput
|
||||||
|
| OpenAIResponseMCPApprovalRequest
|
||||||
|
| OpenAIResponseMCPApprovalResponse
|
||||||
|
| OpenAIResponseOutputMessageMCPCall
|
||||||
|
| OpenAIResponseOutputMessageMCPListTools
|
||||||
|
| OpenAIResponseMessage,
|
||||||
|
Field(union_mode="left_to_right"),
|
||||||
|
]
|
||||||
|
register_schema(OpenAIResponseInput, name="OpenAIResponseInput")
|
||||||
|
|
||||||
|
|
||||||
# This has to be a TypedDict because we need a "schema" field and our strong
|
# This has to be a TypedDict because we need a "schema" field and our strong
|
||||||
# typing code in the schema generator doesn't support Pydantic aliases. That also
|
# typing code in the schema generator doesn't support Pydantic aliases. That also
|
||||||
# means we can't use a discriminator field here, because TypedDicts don't support
|
# means we can't use a discriminator field here, because TypedDicts don't support
|
||||||
|
|
@ -545,6 +574,7 @@ class OpenAIResponseObject(BaseModel):
|
||||||
:param tools: (Optional) An array of tools the model may call while generating a response.
|
:param tools: (Optional) An array of tools the model may call while generating a response.
|
||||||
:param truncation: (Optional) Truncation strategy applied to the response
|
:param truncation: (Optional) Truncation strategy applied to the response
|
||||||
:param usage: (Optional) Token usage information for the response
|
:param usage: (Optional) Token usage information for the response
|
||||||
|
:param instructions: (Optional) System message inserted into the model's context
|
||||||
"""
|
"""
|
||||||
|
|
||||||
created_at: int
|
created_at: int
|
||||||
|
|
@ -564,6 +594,7 @@ class OpenAIResponseObject(BaseModel):
|
||||||
tools: list[OpenAIResponseTool] | None = None
|
tools: list[OpenAIResponseTool] | None = None
|
||||||
truncation: str | None = None
|
truncation: str | None = None
|
||||||
usage: OpenAIResponseUsage | None = None
|
usage: OpenAIResponseUsage | None = None
|
||||||
|
instructions: str | list[OpenAIResponseInput] | None = None
|
||||||
|
|
||||||
|
|
||||||
@json_schema_type
|
@json_schema_type
|
||||||
|
|
@ -1237,35 +1268,6 @@ OpenAIResponseObjectStream = Annotated[
|
||||||
register_schema(OpenAIResponseObjectStream, name="OpenAIResponseObjectStream")
|
register_schema(OpenAIResponseObjectStream, name="OpenAIResponseObjectStream")
|
||||||
|
|
||||||
|
|
||||||
@json_schema_type
|
|
||||||
class OpenAIResponseInputFunctionToolCallOutput(BaseModel):
|
|
||||||
"""
|
|
||||||
This represents the output of a function call that gets passed back to the model.
|
|
||||||
"""
|
|
||||||
|
|
||||||
call_id: str
|
|
||||||
output: str
|
|
||||||
type: Literal["function_call_output"] = "function_call_output"
|
|
||||||
id: str | None = None
|
|
||||||
status: str | None = None
|
|
||||||
|
|
||||||
|
|
||||||
OpenAIResponseInput = Annotated[
|
|
||||||
# Responses API allows output messages to be passed in as input
|
|
||||||
OpenAIResponseOutputMessageWebSearchToolCall
|
|
||||||
| OpenAIResponseOutputMessageFileSearchToolCall
|
|
||||||
| OpenAIResponseOutputMessageFunctionToolCall
|
|
||||||
| OpenAIResponseInputFunctionToolCallOutput
|
|
||||||
| OpenAIResponseMCPApprovalRequest
|
|
||||||
| OpenAIResponseMCPApprovalResponse
|
|
||||||
| OpenAIResponseOutputMessageMCPCall
|
|
||||||
| OpenAIResponseOutputMessageMCPListTools
|
|
||||||
| OpenAIResponseMessage,
|
|
||||||
Field(union_mode="left_to_right"),
|
|
||||||
]
|
|
||||||
register_schema(OpenAIResponseInput, name="OpenAIResponseInput")
|
|
||||||
|
|
||||||
|
|
||||||
class ListOpenAIResponseInputItem(BaseModel):
|
class ListOpenAIResponseInputItem(BaseModel):
|
||||||
"""List container for OpenAI response input items.
|
"""List container for OpenAI response input items.
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -123,6 +123,17 @@ class OpenAIResponsesImpl:
|
||||||
# Use stored messages directly and convert only new input
|
# Use stored messages directly and convert only new input
|
||||||
message_adapter = TypeAdapter(list[OpenAIMessageParam])
|
message_adapter = TypeAdapter(list[OpenAIMessageParam])
|
||||||
messages = message_adapter.validate_python(previous_response.messages)
|
messages = message_adapter.validate_python(previous_response.messages)
|
||||||
|
# When managing conversation state with the previous_response_id parameter,
|
||||||
|
# the instructions used on previous turns will not be carried over in the context
|
||||||
|
previous_instructions = previous_response.instructions
|
||||||
|
if previous_instructions:
|
||||||
|
if (isinstance(previous_instructions, str) and
|
||||||
|
previous_instructions == messages[0].content and
|
||||||
|
messages[0].role == "system"):
|
||||||
|
# Omit instructions from previous response
|
||||||
|
del messages[0]
|
||||||
|
else:
|
||||||
|
raise ValueError("Instructions from the previous response could not be validated")
|
||||||
new_messages = await convert_response_input_to_chat_messages(input, previous_messages=messages)
|
new_messages = await convert_response_input_to_chat_messages(input, previous_messages=messages)
|
||||||
messages.extend(new_messages)
|
messages.extend(new_messages)
|
||||||
else:
|
else:
|
||||||
|
|
@ -359,6 +370,7 @@ class OpenAIResponsesImpl:
|
||||||
tool_executor=self.tool_executor,
|
tool_executor=self.tool_executor,
|
||||||
safety_api=self.safety_api,
|
safety_api=self.safety_api,
|
||||||
guardrail_ids=guardrail_ids,
|
guardrail_ids=guardrail_ids,
|
||||||
|
instructions=instructions,
|
||||||
)
|
)
|
||||||
|
|
||||||
# Stream the response
|
# Stream the response
|
||||||
|
|
|
||||||
|
|
@ -112,6 +112,7 @@ class StreamingResponseOrchestrator:
|
||||||
tool_executor, # Will be the tool execution logic from the main class
|
tool_executor, # Will be the tool execution logic from the main class
|
||||||
safety_api,
|
safety_api,
|
||||||
guardrail_ids: list[str] | None = None,
|
guardrail_ids: list[str] | None = None,
|
||||||
|
instructions: str,
|
||||||
):
|
):
|
||||||
self.inference_api = inference_api
|
self.inference_api = inference_api
|
||||||
self.ctx = ctx
|
self.ctx = ctx
|
||||||
|
|
@ -133,6 +134,8 @@ class StreamingResponseOrchestrator:
|
||||||
self.accumulated_usage: OpenAIResponseUsage | None = None
|
self.accumulated_usage: OpenAIResponseUsage | None = None
|
||||||
# Track if we've sent a refusal response
|
# Track if we've sent a refusal response
|
||||||
self.violation_detected = False
|
self.violation_detected = False
|
||||||
|
# system message that is inserted into the model's context
|
||||||
|
self.instructions = instructions
|
||||||
|
|
||||||
async def _create_refusal_response(self, violation_message: str) -> OpenAIResponseObjectStream:
|
async def _create_refusal_response(self, violation_message: str) -> OpenAIResponseObjectStream:
|
||||||
"""Create a refusal response to replace streaming content."""
|
"""Create a refusal response to replace streaming content."""
|
||||||
|
|
@ -176,6 +179,7 @@ class StreamingResponseOrchestrator:
|
||||||
tools=self.ctx.available_tools(),
|
tools=self.ctx.available_tools(),
|
||||||
error=error,
|
error=error,
|
||||||
usage=self.accumulated_usage,
|
usage=self.accumulated_usage,
|
||||||
|
instructions=self.instructions,
|
||||||
)
|
)
|
||||||
|
|
||||||
async def create_response(self) -> AsyncIterator[OpenAIResponseObjectStream]:
|
async def create_response(self) -> AsyncIterator[OpenAIResponseObjectStream]:
|
||||||
|
|
|
||||||
|
|
@ -466,3 +466,53 @@ def test_guardrails_with_tools(compat_client, text_model_id):
|
||||||
# Response should be either a function call or a message
|
# Response should be either a function call or a message
|
||||||
output_type = response.output[0].type
|
output_type = response.output[0].type
|
||||||
assert output_type in ["function_call", "message"]
|
assert output_type in ["function_call", "message"]
|
||||||
|
|
||||||
|
|
||||||
|
def test_response_with_instructions(openai_client, client_with_models, text_model_id):
|
||||||
|
"""Test instructions parameter in the responses object."""
|
||||||
|
if isinstance(client_with_models, LlamaStackAsLibraryClient):
|
||||||
|
pytest.skip("OpenAI responses are not supported when testing with library client yet.")
|
||||||
|
|
||||||
|
client = openai_client
|
||||||
|
|
||||||
|
messages = [
|
||||||
|
{
|
||||||
|
"role": "user",
|
||||||
|
"content": "What is the capital of France?",
|
||||||
|
}
|
||||||
|
]
|
||||||
|
|
||||||
|
# First create a response without instructions parameter
|
||||||
|
response_w_o_instructions = client.responses.create(
|
||||||
|
model=text_model_id,
|
||||||
|
input=messages,
|
||||||
|
stream=False,
|
||||||
|
)
|
||||||
|
|
||||||
|
# Verify we have None in the instructions field
|
||||||
|
assert response_w_o_instructions.instructions is None
|
||||||
|
|
||||||
|
# Next create a response and pass instructions parameter
|
||||||
|
instructions = "You are a helpful assistant."
|
||||||
|
response_with_instructions = client.responses.create(
|
||||||
|
model=text_model_id,
|
||||||
|
instructions=instructions,
|
||||||
|
input=messages,
|
||||||
|
stream=False,
|
||||||
|
)
|
||||||
|
|
||||||
|
# Verify we have a valid instructions field
|
||||||
|
assert response_with_instructions.instructions == instructions
|
||||||
|
|
||||||
|
# Finally test instructions parameter with a previous response id
|
||||||
|
instructions2 = "You are a helpful assistant and speak in pirate language."
|
||||||
|
response_with_instructions2 = client.responses.create(
|
||||||
|
model=text_model_id,
|
||||||
|
instructions=instructions2,
|
||||||
|
input=messages,
|
||||||
|
previous_response_id=response_with_instructions.id,
|
||||||
|
stream=False,
|
||||||
|
)
|
||||||
|
|
||||||
|
# Verify instructions from previous response was not carried over to the next response
|
||||||
|
assert response_with_instructions2.instructions == instructions2
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue