diff --git a/docs/openapi_generator/pyopenapi/generator.py b/docs/openapi_generator/pyopenapi/generator.py index 202d3732b..05c609867 100644 --- a/docs/openapi_generator/pyopenapi/generator.py +++ b/docs/openapi_generator/pyopenapi/generator.py @@ -177,20 +177,37 @@ class ContentBuilder: ) -> Dict[str, MediaType]: "Creates the content subtree for a request or response." - def has_iterator_type(t): - if typing.get_origin(t) is typing.Union: - return any(has_iterator_type(a) for a in typing.get_args(t)) + def is_iterator_type(t): + return "StreamChunk" in str(t) + + def get_media_type(t): + if is_generic_list(t): + return "application/jsonl" + elif is_iterator_type(t): + return "text/event-stream" else: - # TODO: needs a proper fix where we let all types correctly flow upwards - # and then test against AsyncIterator - return "StreamChunk" in str(t) + return "application/json" + + if typing.get_origin(payload_type) is typing.Union: + media_types = [] + item_types = [] + for x in typing.get_args(payload_type): + media_types.append(get_media_type(x)) + item_types.append(x) + + if len(set(media_types)) == 1: + # all types have the same media type + return {media_types[0]: self.build_media_type(payload_type, examples)} + else: + # different types have different media types + return { + media_type: self.build_media_type(item_type, examples) + for media_type, item_type in zip(media_types, item_types) + } if is_generic_list(payload_type): media_type = "application/jsonl" item_type = unwrap_generic_list(payload_type) - elif has_iterator_type(payload_type): - item_type = payload_type - media_type = "text/event-stream" else: media_type = "application/json" item_type = payload_type diff --git a/docs/openapi_generator/strong_typing/schema.py b/docs/openapi_generator/strong_typing/schema.py index f4393041f..577428035 100644 --- a/docs/openapi_generator/strong_typing/schema.py +++ b/docs/openapi_generator/strong_typing/schema.py @@ -248,7 +248,9 @@ class JsonSchemaGenerator: type_schema.update(self._metadata_to_schema(m)) return type_schema - def _simple_type_to_schema(self, typ: TypeLike) -> Optional[Schema]: + def _simple_type_to_schema( + self, typ: TypeLike, json_schema_extra: Optional[dict] = None + ) -> Optional[Schema]: """ Returns the JSON schema associated with a simple, unrestricted type. @@ -264,6 +266,11 @@ class JsonSchemaGenerator: elif typ is float: return {"type": "number"} elif typ is str: + if json_schema_extra and "contentEncoding" in json_schema_extra: + return { + "type": "string", + "contentEncoding": json_schema_extra["contentEncoding"], + } return {"type": "string"} elif typ is bytes: return {"type": "string", "contentEncoding": "base64"} @@ -303,7 +310,12 @@ class JsonSchemaGenerator: # not a simple type return None - def type_to_schema(self, data_type: TypeLike, force_expand: bool = False) -> Schema: + def type_to_schema( + self, + data_type: TypeLike, + force_expand: bool = False, + json_schema_extra: Optional[dict] = None, + ) -> Schema: """ Returns the JSON schema associated with a type. @@ -313,7 +325,7 @@ class JsonSchemaGenerator: """ # short-circuit for common simple types - schema = self._simple_type_to_schema(data_type) + schema = self._simple_type_to_schema(data_type, json_schema_extra) if schema is not None: return schema @@ -486,15 +498,9 @@ class JsonSchemaGenerator: property_docstrings = get_class_property_docstrings( typ, self.options.property_description_fun ) - properties: Dict[str, Schema] = {} required: List[str] = [] for property_name, property_type in get_class_properties(typ): - defaults = {} - if "model_fields" in members: - f = members["model_fields"] - defaults = {k: finfo.default for k, finfo in f.items()} - # rename property if an alias name is specified alias = get_annotation(property_type, Alias) if alias: @@ -502,11 +508,22 @@ class JsonSchemaGenerator: else: output_name = property_name + defaults = {} + json_schema_extra = None + if "model_fields" in members: + f = members["model_fields"] + defaults = {k: finfo.default for k, finfo in f.items()} + json_schema_extra = f.get(output_name, None).json_schema_extra + if is_type_optional(property_type): optional_type: type = unwrap_optional_type(property_type) - property_def = self.type_to_schema(optional_type) + property_def = self.type_to_schema( + optional_type, json_schema_extra=json_schema_extra + ) else: - property_def = self.type_to_schema(property_type) + property_def = self.type_to_schema( + property_type, json_schema_extra=json_schema_extra + ) required.append(output_name) # check if attribute has a default value initializer diff --git a/docs/resources/llama-stack-spec.html b/docs/resources/llama-stack-spec.html index 0454e22ec..85ae516f5 100644 --- a/docs/resources/llama-stack-spec.html +++ b/docs/resources/llama-stack-spec.html @@ -192,16 +192,14 @@ "200": { "description": "If stream=False, returns a ChatCompletionResponse with the full completion. If stream=True, returns an SSE event stream of ChatCompletionResponseStreamChunk", "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/ChatCompletionResponse" + } + }, "text/event-stream": { "schema": { - "oneOf": [ - { - "$ref": "#/components/schemas/ChatCompletionResponse" - }, - { - "$ref": "#/components/schemas/ChatCompletionResponseStreamChunk" - } - ] + "$ref": "#/components/schemas/ChatCompletionResponseStreamChunk" } } } @@ -230,16 +228,14 @@ "200": { "description": "If stream=False, returns a CompletionResponse with the full completion. If stream=True, returns an SSE event stream of CompletionResponseStreamChunk", "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/CompletionResponse" + } + }, "text/event-stream": { "schema": { - "oneOf": [ - { - "$ref": "#/components/schemas/CompletionResponse" - }, - { - "$ref": "#/components/schemas/CompletionResponseStreamChunk" - } - ] + "$ref": "#/components/schemas/CompletionResponseStreamChunk" } } } @@ -337,16 +333,14 @@ "200": { "description": "A single turn in an interaction with an Agentic System. **OR** streamed agent turn completion response.", "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/Turn" + } + }, "text/event-stream": { "schema": { - "oneOf": [ - { - "$ref": "#/components/schemas/Turn" - }, - { - "$ref": "#/components/schemas/AgentTurnResponseStreamChunk" - } - ] + "$ref": "#/components/schemas/AgentTurnResponseStreamChunk" } } } @@ -2439,27 +2433,32 @@ "type": { "type": "string", "const": "image", - "default": "image" + "default": "image", + "description": "Discriminator type of the content item. Always \"image\"" }, "image": { "type": "object", "properties": { "url": { - "$ref": "#/components/schemas/URL" + "$ref": "#/components/schemas/URL", + "description": "A URL of the image or data URL in the format of data:image/{type};base64,{data}. Note that URL could have length limits." }, "data": { "type": "string", - "contentEncoding": "base64" + "contentEncoding": "base64", + "description": "base64 encoded image data as string" } }, - "additionalProperties": false + "additionalProperties": false, + "description": "Image as a base64 encoded string or an URL" } }, "additionalProperties": false, "required": [ "type", "image" - ] + ], + "title": "A image content item" }, "InterleavedContent": { "oneOf": [ @@ -2647,17 +2646,20 @@ "type": { "type": "string", "const": "text", - "default": "text" + "default": "text", + "description": "Discriminator type of the content item. Always \"text\"" }, "text": { - "type": "string" + "type": "string", + "description": "Text content" } }, "additionalProperties": false, "required": [ "type", "text" - ] + ], + "title": "A text content item" }, "ToolCall": { "type": "object", @@ -3772,235 +3774,6 @@ "messages" ] }, - "AgentTurnResponseEvent": { - "type": "object", - "properties": { - "payload": { - "$ref": "#/components/schemas/AgentTurnResponseEventPayload" - } - }, - "additionalProperties": false, - "required": [ - "payload" - ] - }, - "AgentTurnResponseEventPayload": { - "oneOf": [ - { - "$ref": "#/components/schemas/AgentTurnResponseStepStartPayload" - }, - { - "$ref": "#/components/schemas/AgentTurnResponseStepProgressPayload" - }, - { - "$ref": "#/components/schemas/AgentTurnResponseStepCompletePayload" - }, - { - "$ref": "#/components/schemas/AgentTurnResponseTurnStartPayload" - }, - { - "$ref": "#/components/schemas/AgentTurnResponseTurnCompletePayload" - } - ], - "discriminator": { - "propertyName": "event_type", - "mapping": { - "step_start": "#/components/schemas/AgentTurnResponseStepStartPayload", - "step_progress": "#/components/schemas/AgentTurnResponseStepProgressPayload", - "step_complete": "#/components/schemas/AgentTurnResponseStepCompletePayload", - "turn_start": "#/components/schemas/AgentTurnResponseTurnStartPayload", - "turn_complete": "#/components/schemas/AgentTurnResponseTurnCompletePayload" - } - } - }, - "AgentTurnResponseStepCompletePayload": { - "type": "object", - "properties": { - "event_type": { - "type": "string", - "const": "step_complete", - "default": "step_complete" - }, - "step_type": { - "type": "string", - "enum": [ - "inference", - "tool_execution", - "shield_call", - "memory_retrieval" - ] - }, - "step_id": { - "type": "string" - }, - "step_details": { - "oneOf": [ - { - "$ref": "#/components/schemas/InferenceStep" - }, - { - "$ref": "#/components/schemas/ToolExecutionStep" - }, - { - "$ref": "#/components/schemas/ShieldCallStep" - }, - { - "$ref": "#/components/schemas/MemoryRetrievalStep" - } - ], - "discriminator": { - "propertyName": "step_type", - "mapping": { - "inference": "#/components/schemas/InferenceStep", - "tool_execution": "#/components/schemas/ToolExecutionStep", - "shield_call": "#/components/schemas/ShieldCallStep", - "memory_retrieval": "#/components/schemas/MemoryRetrievalStep" - } - } - } - }, - "additionalProperties": false, - "required": [ - "event_type", - "step_type", - "step_id", - "step_details" - ] - }, - "AgentTurnResponseStepProgressPayload": { - "type": "object", - "properties": { - "event_type": { - "type": "string", - "const": "step_progress", - "default": "step_progress" - }, - "step_type": { - "type": "string", - "enum": [ - "inference", - "tool_execution", - "shield_call", - "memory_retrieval" - ] - }, - "step_id": { - "type": "string" - }, - "delta": { - "$ref": "#/components/schemas/ContentDelta" - } - }, - "additionalProperties": false, - "required": [ - "event_type", - "step_type", - "step_id", - "delta" - ] - }, - "AgentTurnResponseStepStartPayload": { - "type": "object", - "properties": { - "event_type": { - "type": "string", - "const": "step_start", - "default": "step_start" - }, - "step_type": { - "type": "string", - "enum": [ - "inference", - "tool_execution", - "shield_call", - "memory_retrieval" - ] - }, - "step_id": { - "type": "string" - }, - "metadata": { - "type": "object", - "additionalProperties": { - "oneOf": [ - { - "type": "null" - }, - { - "type": "boolean" - }, - { - "type": "number" - }, - { - "type": "string" - }, - { - "type": "array" - }, - { - "type": "object" - } - ] - } - } - }, - "additionalProperties": false, - "required": [ - "event_type", - "step_type", - "step_id" - ] - }, - "AgentTurnResponseStreamChunk": { - "type": "object", - "properties": { - "event": { - "$ref": "#/components/schemas/AgentTurnResponseEvent" - } - }, - "additionalProperties": false, - "required": [ - "event" - ], - "title": "streamed agent turn completion response." - }, - "AgentTurnResponseTurnCompletePayload": { - "type": "object", - "properties": { - "event_type": { - "type": "string", - "const": "turn_complete", - "default": "turn_complete" - }, - "turn": { - "$ref": "#/components/schemas/Turn" - } - }, - "additionalProperties": false, - "required": [ - "event_type", - "turn" - ] - }, - "AgentTurnResponseTurnStartPayload": { - "type": "object", - "properties": { - "event_type": { - "type": "string", - "const": "turn_start", - "default": "turn_start" - }, - "turn_id": { - "type": "string" - } - }, - "additionalProperties": false, - "required": [ - "event_type", - "turn_id" - ] - }, "InferenceStep": { "type": "object", "properties": { @@ -4341,6 +4114,235 @@ "error" ] }, + "AgentTurnResponseEvent": { + "type": "object", + "properties": { + "payload": { + "$ref": "#/components/schemas/AgentTurnResponseEventPayload" + } + }, + "additionalProperties": false, + "required": [ + "payload" + ] + }, + "AgentTurnResponseEventPayload": { + "oneOf": [ + { + "$ref": "#/components/schemas/AgentTurnResponseStepStartPayload" + }, + { + "$ref": "#/components/schemas/AgentTurnResponseStepProgressPayload" + }, + { + "$ref": "#/components/schemas/AgentTurnResponseStepCompletePayload" + }, + { + "$ref": "#/components/schemas/AgentTurnResponseTurnStartPayload" + }, + { + "$ref": "#/components/schemas/AgentTurnResponseTurnCompletePayload" + } + ], + "discriminator": { + "propertyName": "event_type", + "mapping": { + "step_start": "#/components/schemas/AgentTurnResponseStepStartPayload", + "step_progress": "#/components/schemas/AgentTurnResponseStepProgressPayload", + "step_complete": "#/components/schemas/AgentTurnResponseStepCompletePayload", + "turn_start": "#/components/schemas/AgentTurnResponseTurnStartPayload", + "turn_complete": "#/components/schemas/AgentTurnResponseTurnCompletePayload" + } + } + }, + "AgentTurnResponseStepCompletePayload": { + "type": "object", + "properties": { + "event_type": { + "type": "string", + "const": "step_complete", + "default": "step_complete" + }, + "step_type": { + "type": "string", + "enum": [ + "inference", + "tool_execution", + "shield_call", + "memory_retrieval" + ] + }, + "step_id": { + "type": "string" + }, + "step_details": { + "oneOf": [ + { + "$ref": "#/components/schemas/InferenceStep" + }, + { + "$ref": "#/components/schemas/ToolExecutionStep" + }, + { + "$ref": "#/components/schemas/ShieldCallStep" + }, + { + "$ref": "#/components/schemas/MemoryRetrievalStep" + } + ], + "discriminator": { + "propertyName": "step_type", + "mapping": { + "inference": "#/components/schemas/InferenceStep", + "tool_execution": "#/components/schemas/ToolExecutionStep", + "shield_call": "#/components/schemas/ShieldCallStep", + "memory_retrieval": "#/components/schemas/MemoryRetrievalStep" + } + } + } + }, + "additionalProperties": false, + "required": [ + "event_type", + "step_type", + "step_id", + "step_details" + ] + }, + "AgentTurnResponseStepProgressPayload": { + "type": "object", + "properties": { + "event_type": { + "type": "string", + "const": "step_progress", + "default": "step_progress" + }, + "step_type": { + "type": "string", + "enum": [ + "inference", + "tool_execution", + "shield_call", + "memory_retrieval" + ] + }, + "step_id": { + "type": "string" + }, + "delta": { + "$ref": "#/components/schemas/ContentDelta" + } + }, + "additionalProperties": false, + "required": [ + "event_type", + "step_type", + "step_id", + "delta" + ] + }, + "AgentTurnResponseStepStartPayload": { + "type": "object", + "properties": { + "event_type": { + "type": "string", + "const": "step_start", + "default": "step_start" + }, + "step_type": { + "type": "string", + "enum": [ + "inference", + "tool_execution", + "shield_call", + "memory_retrieval" + ] + }, + "step_id": { + "type": "string" + }, + "metadata": { + "type": "object", + "additionalProperties": { + "oneOf": [ + { + "type": "null" + }, + { + "type": "boolean" + }, + { + "type": "number" + }, + { + "type": "string" + }, + { + "type": "array" + }, + { + "type": "object" + } + ] + } + } + }, + "additionalProperties": false, + "required": [ + "event_type", + "step_type", + "step_id" + ] + }, + "AgentTurnResponseStreamChunk": { + "type": "object", + "properties": { + "event": { + "$ref": "#/components/schemas/AgentTurnResponseEvent" + } + }, + "additionalProperties": false, + "required": [ + "event" + ], + "title": "streamed agent turn completion response." + }, + "AgentTurnResponseTurnCompletePayload": { + "type": "object", + "properties": { + "event_type": { + "type": "string", + "const": "turn_complete", + "default": "turn_complete" + }, + "turn": { + "$ref": "#/components/schemas/Turn" + } + }, + "additionalProperties": false, + "required": [ + "event_type", + "turn" + ] + }, + "AgentTurnResponseTurnStartPayload": { + "type": "object", + "properties": { + "event_type": { + "type": "string", + "const": "turn_start", + "default": "turn_start" + }, + "turn_id": { + "type": "string" + } + }, + "additionalProperties": false, + "required": [ + "event_type", + "turn_id" + ] + }, "EmbeddingsRequest": { "type": "object", "properties": { diff --git a/docs/resources/llama-stack-spec.yaml b/docs/resources/llama-stack-spec.yaml index 0734ef236..2a95acf38 100644 --- a/docs/resources/llama-stack-spec.yaml +++ b/docs/resources/llama-stack-spec.yaml @@ -113,11 +113,12 @@ paths: If stream=False, returns a ChatCompletionResponse with the full completion. If stream=True, returns an SSE event stream of ChatCompletionResponseStreamChunk content: + application/json: + schema: + $ref: '#/components/schemas/ChatCompletionResponse' text/event-stream: schema: - oneOf: - - $ref: '#/components/schemas/ChatCompletionResponse' - - $ref: '#/components/schemas/ChatCompletionResponseStreamChunk' + $ref: '#/components/schemas/ChatCompletionResponseStreamChunk' tags: - Inference summary: >- @@ -137,11 +138,12 @@ paths: If stream=False, returns a CompletionResponse with the full completion. If stream=True, returns an SSE event stream of CompletionResponseStreamChunk content: + application/json: + schema: + $ref: '#/components/schemas/CompletionResponse' text/event-stream: schema: - oneOf: - - $ref: '#/components/schemas/CompletionResponse' - - $ref: '#/components/schemas/CompletionResponseStreamChunk' + $ref: '#/components/schemas/CompletionResponseStreamChunk' tags: - Inference summary: >- @@ -202,11 +204,12 @@ paths: A single turn in an interaction with an Agentic System. **OR** streamed agent turn completion response. content: + application/json: + schema: + $ref: '#/components/schemas/Turn' text/event-stream: schema: - oneOf: - - $ref: '#/components/schemas/Turn' - - $ref: '#/components/schemas/AgentTurnResponseStreamChunk' + $ref: '#/components/schemas/AgentTurnResponseStreamChunk' tags: - Agents parameters: @@ -1466,19 +1469,28 @@ components: type: string const: image default: image + description: >- + Discriminator type of the content item. Always "image" image: type: object properties: url: $ref: '#/components/schemas/URL' + description: >- + A URL of the image or data URL in the format of data:image/{type};base64,{data}. + Note that URL could have length limits. data: type: string contentEncoding: base64 + description: base64 encoded image data as string additionalProperties: false + description: >- + Image as a base64 encoded string or an URL additionalProperties: false required: - type - image + title: A image content item InterleavedContent: oneOf: - type: string @@ -1598,12 +1610,16 @@ components: type: string const: text default: text + description: >- + Discriminator type of the content item. Always "text" text: type: string + description: Text content additionalProperties: false required: - type - text + title: A text content item ToolCall: type: object properties: @@ -2381,154 +2397,6 @@ components: additionalProperties: false required: - messages - AgentTurnResponseEvent: - type: object - properties: - payload: - $ref: '#/components/schemas/AgentTurnResponseEventPayload' - additionalProperties: false - required: - - payload - AgentTurnResponseEventPayload: - oneOf: - - $ref: '#/components/schemas/AgentTurnResponseStepStartPayload' - - $ref: '#/components/schemas/AgentTurnResponseStepProgressPayload' - - $ref: '#/components/schemas/AgentTurnResponseStepCompletePayload' - - $ref: '#/components/schemas/AgentTurnResponseTurnStartPayload' - - $ref: '#/components/schemas/AgentTurnResponseTurnCompletePayload' - discriminator: - propertyName: event_type - mapping: - step_start: '#/components/schemas/AgentTurnResponseStepStartPayload' - step_progress: '#/components/schemas/AgentTurnResponseStepProgressPayload' - step_complete: '#/components/schemas/AgentTurnResponseStepCompletePayload' - turn_start: '#/components/schemas/AgentTurnResponseTurnStartPayload' - turn_complete: '#/components/schemas/AgentTurnResponseTurnCompletePayload' - AgentTurnResponseStepCompletePayload: - type: object - properties: - event_type: - type: string - const: step_complete - default: step_complete - step_type: - type: string - enum: - - inference - - tool_execution - - shield_call - - memory_retrieval - step_id: - type: string - step_details: - oneOf: - - $ref: '#/components/schemas/InferenceStep' - - $ref: '#/components/schemas/ToolExecutionStep' - - $ref: '#/components/schemas/ShieldCallStep' - - $ref: '#/components/schemas/MemoryRetrievalStep' - discriminator: - propertyName: step_type - mapping: - inference: '#/components/schemas/InferenceStep' - tool_execution: '#/components/schemas/ToolExecutionStep' - shield_call: '#/components/schemas/ShieldCallStep' - memory_retrieval: '#/components/schemas/MemoryRetrievalStep' - additionalProperties: false - required: - - event_type - - step_type - - step_id - - step_details - AgentTurnResponseStepProgressPayload: - type: object - properties: - event_type: - type: string - const: step_progress - default: step_progress - step_type: - type: string - enum: - - inference - - tool_execution - - shield_call - - memory_retrieval - step_id: - type: string - delta: - $ref: '#/components/schemas/ContentDelta' - additionalProperties: false - required: - - event_type - - step_type - - step_id - - delta - AgentTurnResponseStepStartPayload: - type: object - properties: - event_type: - type: string - const: step_start - default: step_start - step_type: - type: string - enum: - - inference - - tool_execution - - shield_call - - memory_retrieval - step_id: - type: string - metadata: - type: object - additionalProperties: - oneOf: - - type: 'null' - - type: boolean - - type: number - - type: string - - type: array - - type: object - additionalProperties: false - required: - - event_type - - step_type - - step_id - AgentTurnResponseStreamChunk: - type: object - properties: - event: - $ref: '#/components/schemas/AgentTurnResponseEvent' - additionalProperties: false - required: - - event - title: streamed agent turn completion response. - AgentTurnResponseTurnCompletePayload: - type: object - properties: - event_type: - type: string - const: turn_complete - default: turn_complete - turn: - $ref: '#/components/schemas/Turn' - additionalProperties: false - required: - - event_type - - turn - AgentTurnResponseTurnStartPayload: - type: object - properties: - event_type: - type: string - const: turn_start - default: turn_start - turn_id: - type: string - additionalProperties: false - required: - - event_type - - turn_id InferenceStep: type: object properties: @@ -2752,6 +2620,154 @@ components: - info - warn - error + AgentTurnResponseEvent: + type: object + properties: + payload: + $ref: '#/components/schemas/AgentTurnResponseEventPayload' + additionalProperties: false + required: + - payload + AgentTurnResponseEventPayload: + oneOf: + - $ref: '#/components/schemas/AgentTurnResponseStepStartPayload' + - $ref: '#/components/schemas/AgentTurnResponseStepProgressPayload' + - $ref: '#/components/schemas/AgentTurnResponseStepCompletePayload' + - $ref: '#/components/schemas/AgentTurnResponseTurnStartPayload' + - $ref: '#/components/schemas/AgentTurnResponseTurnCompletePayload' + discriminator: + propertyName: event_type + mapping: + step_start: '#/components/schemas/AgentTurnResponseStepStartPayload' + step_progress: '#/components/schemas/AgentTurnResponseStepProgressPayload' + step_complete: '#/components/schemas/AgentTurnResponseStepCompletePayload' + turn_start: '#/components/schemas/AgentTurnResponseTurnStartPayload' + turn_complete: '#/components/schemas/AgentTurnResponseTurnCompletePayload' + AgentTurnResponseStepCompletePayload: + type: object + properties: + event_type: + type: string + const: step_complete + default: step_complete + step_type: + type: string + enum: + - inference + - tool_execution + - shield_call + - memory_retrieval + step_id: + type: string + step_details: + oneOf: + - $ref: '#/components/schemas/InferenceStep' + - $ref: '#/components/schemas/ToolExecutionStep' + - $ref: '#/components/schemas/ShieldCallStep' + - $ref: '#/components/schemas/MemoryRetrievalStep' + discriminator: + propertyName: step_type + mapping: + inference: '#/components/schemas/InferenceStep' + tool_execution: '#/components/schemas/ToolExecutionStep' + shield_call: '#/components/schemas/ShieldCallStep' + memory_retrieval: '#/components/schemas/MemoryRetrievalStep' + additionalProperties: false + required: + - event_type + - step_type + - step_id + - step_details + AgentTurnResponseStepProgressPayload: + type: object + properties: + event_type: + type: string + const: step_progress + default: step_progress + step_type: + type: string + enum: + - inference + - tool_execution + - shield_call + - memory_retrieval + step_id: + type: string + delta: + $ref: '#/components/schemas/ContentDelta' + additionalProperties: false + required: + - event_type + - step_type + - step_id + - delta + AgentTurnResponseStepStartPayload: + type: object + properties: + event_type: + type: string + const: step_start + default: step_start + step_type: + type: string + enum: + - inference + - tool_execution + - shield_call + - memory_retrieval + step_id: + type: string + metadata: + type: object + additionalProperties: + oneOf: + - type: 'null' + - type: boolean + - type: number + - type: string + - type: array + - type: object + additionalProperties: false + required: + - event_type + - step_type + - step_id + AgentTurnResponseStreamChunk: + type: object + properties: + event: + $ref: '#/components/schemas/AgentTurnResponseEvent' + additionalProperties: false + required: + - event + title: streamed agent turn completion response. + AgentTurnResponseTurnCompletePayload: + type: object + properties: + event_type: + type: string + const: turn_complete + default: turn_complete + turn: + $ref: '#/components/schemas/Turn' + additionalProperties: false + required: + - event_type + - turn + AgentTurnResponseTurnStartPayload: + type: object + properties: + event_type: + type: string + const: turn_start + default: turn_start + turn_id: + type: string + additionalProperties: false + required: + - event_type + - turn_id EmbeddingsRequest: type: object properties: diff --git a/llama_stack/apis/common/content_types.py b/llama_stack/apis/common/content_types.py index 0b27a0196..8e56f59b1 100644 --- a/llama_stack/apis/common/content_types.py +++ b/llama_stack/apis/common/content_types.py @@ -4,14 +4,13 @@ # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. -import base64 from enum import Enum from typing import Annotated, List, Literal, Optional, Union from llama_models.llama3.api.datatypes import ToolCall from llama_models.schema_utils import json_schema_type, register_schema -from pydantic import BaseModel, Field, field_serializer, model_validator +from pydantic import BaseModel, Field, model_validator @json_schema_type @@ -20,8 +19,16 @@ class URL(BaseModel): class _URLOrData(BaseModel): + """ + A URL or a base64 encoded string + + :param url: A URL of the image or data URL in the format of data:image/{type};base64,{data}. Note that URL could have length limits. + :param data: base64 encoded image data as string + """ + url: Optional[URL] = None - data: Optional[bytes] = None + # data is a base64 encoded string, hint with contentEncoding=base64 + data: Optional[str] = Field(contentEncoding="base64", default=None) @model_validator(mode="before") @classmethod @@ -30,21 +37,27 @@ class _URLOrData(BaseModel): return values return {"url": values} - @field_serializer("data") - def serialize_data(self, data: Optional[bytes], _info): - if data is None: - return None - return base64.b64encode(data).decode("utf-8") - @json_schema_type class ImageContentItem(BaseModel): + """A image content item + + :param type: Discriminator type of the content item. Always "image" + :param image: Image as a base64 encoded string or an URL + """ + type: Literal["image"] = "image" image: _URLOrData @json_schema_type class TextContentItem(BaseModel): + """A text content item + + :param type: Discriminator type of the content item. Always "text" + :param text: Text content + """ + type: Literal["text"] = "text" text: str diff --git a/llama_stack/providers/inline/agents/meta_reference/agent_instance.py b/llama_stack/providers/inline/agents/meta_reference/agent_instance.py index eca7364d7..706dd74f1 100644 --- a/llama_stack/providers/inline/agents/meta_reference/agent_instance.py +++ b/llama_stack/providers/inline/agents/meta_reference/agent_instance.py @@ -66,6 +66,7 @@ from llama_stack.apis.vector_io import VectorIO from llama_stack.providers.utils.kvstore import KVStore from llama_stack.providers.utils.memory.vector_store import concat_interleaved_content from llama_stack.providers.utils.telemetry import tracing + from .persistence import AgentPersistence from .safety import SafetyException, ShieldRunnerMixin @@ -476,9 +477,12 @@ class ChatAgent(ShieldRunnerMixin): ) span.set_attribute("output", retrieved_context) span.set_attribute("tool_name", MEMORY_QUERY_TOOL) - if retrieved_context: - last_message = input_messages[-1] - last_message.context = retrieved_context + + # append retrieved_context to the last user message + for message in input_messages[::-1]: + if isinstance(message, UserMessage): + message.context = retrieved_context + break output_attachments = [] diff --git a/llama_stack/providers/utils/inference/prompt_adapter.py b/llama_stack/providers/utils/inference/prompt_adapter.py index f5298d844..e49771980 100644 --- a/llama_stack/providers/utils/inference/prompt_adapter.py +++ b/llama_stack/providers/utils/inference/prompt_adapter.py @@ -135,7 +135,8 @@ async def interleaved_content_convert_to_raw( else: raise ValueError("Unsupported URL type") elif image.data: - data = image.data + # data is a base64 encoded string, decode it to bytes for RawMediaItem + data = base64.b64decode(image.data) else: raise ValueError("No data or URL provided") @@ -184,8 +185,10 @@ async def localize_image_content(media: ImageContentItem) -> Tuple[bytes, str]: return content, format else: - pil_image = PIL_Image.open(io.BytesIO(image.data)) - return image.data, pil_image.format + # data is a base64 encoded string, decode it to bytes first + data_bytes = base64.b64decode(image.data) + pil_image = PIL_Image.open(io.BytesIO(data_bytes)) + return data_bytes, pil_image.format async def convert_image_content_to_url( diff --git a/tests/client-sdk/agents/test_agents.py b/tests/client-sdk/agents/test_agents.py index 4a8fdd36a..e0f86e3d7 100644 --- a/tests/client-sdk/agents/test_agents.py +++ b/tests/client-sdk/agents/test_agents.py @@ -211,7 +211,7 @@ def test_code_interpreter_for_attachments(llama_stack_client, agent_config): } codex_agent = Agent(llama_stack_client, agent_config) - session_id = codex_agent.create_session("test-session") + session_id = codex_agent.create_session(f"test-session-{uuid4()}") inflation_doc = AgentDocument( content="https://raw.githubusercontent.com/meta-llama/llama-stack-apps/main/examples/resources/inflation.csv", mime_type="text/csv", @@ -285,7 +285,8 @@ def test_rag_agent(llama_stack_client, agent_config): llama_stack_client.tool_runtime.rag_tool.insert( documents=documents, vector_db_id=vector_db_id, - chunk_size_in_tokens=512, + # small chunks help to get specific info out of the docs + chunk_size_in_tokens=128, ) agent_config = { **agent_config, @@ -299,11 +300,15 @@ def test_rag_agent(llama_stack_client, agent_config): ], } rag_agent = Agent(llama_stack_client, agent_config) - session_id = rag_agent.create_session("test-session") + session_id = rag_agent.create_session(f"test-session-{uuid4()}") user_prompts = [ - "What are the top 5 topics that were explained? Only list succinct bullet points.", + ( + "Instead of the standard multi-head attention, what attention type does Llama3-8B use?", + "grouped-query", + ), + ("What command to use to get access to Llama3-8B-Instruct ?", "tune download"), ] - for prompt in user_prompts: + for prompt, expected_kw in user_prompts: print(f"User> {prompt}") response = rag_agent.create_turn( messages=[{"role": "user", "content": prompt}], @@ -312,3 +317,69 @@ def test_rag_agent(llama_stack_client, agent_config): logs = [str(log) for log in EventLogger().log(response) if log is not None] logs_str = "".join(logs) assert "Tool:query_from_memory" in logs_str + assert expected_kw in logs_str.lower() + + +def test_rag_and_code_agent(llama_stack_client, agent_config): + urls = ["chat.rst"] + documents = [ + Document( + document_id=f"num-{i}", + content=f"https://raw.githubusercontent.com/pytorch/torchtune/main/docs/source/tutorials/{url}", + mime_type="text/plain", + metadata={}, + ) + for i, url in enumerate(urls) + ] + vector_db_id = "test-vector-db" + llama_stack_client.vector_dbs.register( + vector_db_id=vector_db_id, + embedding_model="all-MiniLM-L6-v2", + embedding_dimension=384, + ) + llama_stack_client.tool_runtime.rag_tool.insert( + documents=documents, + vector_db_id=vector_db_id, + chunk_size_in_tokens=128, + ) + agent_config = { + **agent_config, + "toolgroups": [ + dict( + name="builtin::rag", + args={"vector_db_ids": [vector_db_id]}, + ), + "builtin::code_interpreter", + ], + } + agent = Agent(llama_stack_client, agent_config) + inflation_doc = Document( + document_id="test_csv", + content="https://raw.githubusercontent.com/meta-llama/llama-stack-apps/main/examples/resources/inflation.csv", + mime_type="text/csv", + metadata={}, + ) + user_prompts = [ + ( + "Here is a csv file, can you describe it?", + [inflation_doc], + "code_interpreter", + ), + ( + "What are the top 5 topics that were explained? Only list succinct bullet points.", + [], + "query_from_memory", + ), + ] + + for prompt, docs, tool_name in user_prompts: + print(f"User> {prompt}") + session_id = agent.create_session(f"test-session-{uuid4()}") + response = agent.create_turn( + messages=[{"role": "user", "content": prompt}], + session_id=session_id, + documents=docs, + ) + logs = [str(log) for log in EventLogger().log(response) if log is not None] + logs_str = "".join(logs) + assert f"Tool:{tool_name}" in logs_str diff --git a/tests/client-sdk/inference/test_inference.py b/tests/client-sdk/inference/test_inference.py index 6dff1be24..b10ede357 100644 --- a/tests/client-sdk/inference/test_inference.py +++ b/tests/client-sdk/inference/test_inference.py @@ -5,7 +5,7 @@ # the root directory of this source tree. import base64 -import os +import pathlib import pytest from pydantic import BaseModel @@ -57,13 +57,20 @@ def get_weather_tool_definition(): @pytest.fixture -def base64_image_url(): - image_path = os.path.join(os.path.dirname(__file__), "dog.png") - with open(image_path, "rb") as image_file: - # Convert the image to base64 - base64_string = base64.b64encode(image_file.read()).decode("utf-8") - base64_url = f"data:image/png;base64,{base64_string}" - return base64_url +def image_path(): + return pathlib.Path(__file__).parent / "dog.png" + + +@pytest.fixture +def base64_image_data(image_path): + # Convert the image to base64 + return base64.b64encode(image_path.read_bytes()).decode("utf-8") + + +@pytest.fixture +def base64_image_url(base64_image_data, image_path): + # suffix includes the ., so we remove it + return f"data:image/{image_path.suffix[1:]};base64,{base64_image_data}" def test_text_completion_non_streaming(llama_stack_client, text_model_id): @@ -371,20 +378,31 @@ def test_image_chat_completion_streaming(llama_stack_client, vision_model_id): assert any(expected in streamed_content for expected in {"dog", "puppy", "pup"}) -def test_image_chat_completion_base64_url( - llama_stack_client, vision_model_id, base64_image_url +@pytest.mark.parametrize("type_", ["url", "data"]) +def test_image_chat_completion_base64( + llama_stack_client, vision_model_id, base64_image_data, base64_image_url, type_ ): + image_spec = { + "url": { + "type": "image", + "image": { + "url": { + "uri": base64_image_url, + }, + }, + }, + "data": { + "type": "image", + "image": { + "data": base64_image_data, + }, + }, + }[type_] + message = { "role": "user", "content": [ - { - "type": "image", - "image": { - "url": { - "uri": base64_image_url, - }, - }, - }, + image_spec, { "type": "text", "text": "Describe what is in this image.",