Merge branch 'main' into responses_object

This commit is contained in:
Emilio Garcia 2025-08-20 11:33:41 -04:00 committed by Emilio Garcia
commit 8fb17ba18e
67 changed files with 794 additions and 218 deletions

View file

@ -36,6 +36,21 @@ jobs:
**/requirements*.txt **/requirements*.txt
.pre-commit-config.yaml .pre-commit-config.yaml
# npm ci may fail -
# npm error `npm ci` can only install packages when your package.json and package-lock.json or npm-shrinkwrap.json are in sync. Please update your lock file with `npm install` before continuing.
# npm error Invalid: lock file's llama-stack-client@0.2.17 does not satisfy llama-stack-client@0.2.18
# - name: Set up Node.js
# uses: actions/setup-node@39370e3970a6d050c480ffad4ff0ed4d3fdee5af # v4.1.0
# with:
# node-version: '20'
# cache: 'npm'
# cache-dependency-path: 'llama_stack/ui/'
# - name: Install npm dependencies
# run: npm ci
# working-directory: llama_stack/ui
- uses: pre-commit/action@2c7b3805fd2a0fd8c1884dcaebf91fc102a13ecd # v3.0.1 - uses: pre-commit/action@2c7b3805fd2a0fd8c1884dcaebf91fc102a13ecd # v3.0.1
continue-on-error: true continue-on-error: true
env: env:

View file

@ -146,20 +146,50 @@ repos:
pass_filenames: false pass_filenames: false
require_serial: true require_serial: true
files: ^.github/workflows/.*$ files: ^.github/workflows/.*$
- id: ui-prettier # ui-prettier and ui-eslint are disabled until we can avoid `npm ci`, which is slow and may fail -
name: Format UI code with Prettier # npm error `npm ci` can only install packages when your package.json and package-lock.json or npm-shrinkwrap.json are in sync. Please update your lock file with `npm install` before continuing.
entry: bash -c 'cd llama_stack/ui && npm run format' # npm error Invalid: lock file's llama-stack-client@0.2.17 does not satisfy llama-stack-client@0.2.18
# and until we have infra for installing prettier and next via npm -
# Lint UI code with ESLint.....................................................Failed
# - hook id: ui-eslint
# - exit code: 127
# > ui@0.1.0 lint
# > next lint --fix --quiet
# sh: line 1: next: command not found
#
# - id: ui-prettier
# name: Format UI code with Prettier
# entry: bash -c 'cd llama_stack/ui && npm ci && npm run format'
# language: system
# files: ^llama_stack/ui/.*\.(ts|tsx)$
# pass_filenames: false
# require_serial: true
# - id: ui-eslint
# name: Lint UI code with ESLint
# entry: bash -c 'cd llama_stack/ui && npm run lint -- --fix --quiet'
# language: system
# files: ^llama_stack/ui/.*\.(ts|tsx)$
# pass_filenames: false
# require_serial: true
- id: check-log-usage
name: Ensure 'llama_stack.log' usage for logging
entry: bash
language: system language: system
files: ^llama_stack/ui/.*\.(ts|tsx)$ types: [python]
pass_filenames: false pass_filenames: true
require_serial: true args:
- id: ui-eslint - -c
name: Lint UI code with ESLint - |
entry: bash -c 'cd llama_stack/ui && npm run lint -- --fix --quiet' matches=$(grep -EnH '^[^#]*\b(import\s+logging|from\s+logging\b)' "$@" | grep -v -e '#\s*allow-direct-logging' || true)
language: system if [ -n "$matches" ]; then
files: ^llama_stack/ui/.*\.(ts|tsx)$ # GitHub Actions annotation format
pass_filenames: false while IFS=: read -r file line_num rest; do
require_serial: true echo "::error file=$file,line=$line_num::Do not use 'import logging' or 'from logging import' in $file. Use the custom log instead: from llama_stack.log import get_logger; logger = get_logger(). If direct logging is truly needed, add: # allow-direct-logging"
done <<< "$matches"
exit 1
fi
exit 0
ci: ci:
autofix_commit_msg: 🎨 [pre-commit.ci] Auto format from pre-commit.com hooks autofix_commit_msg: 🎨 [pre-commit.ci] Auto format from pre-commit.com hooks

View file

@ -8585,6 +8585,21 @@
"title": "OpenAIResponseError", "title": "OpenAIResponseError",
"description": "Error details for failed OpenAI response requests." "description": "Error details for failed OpenAI response requests."
}, },
"OpenAIResponseIncompleteDetails": {
"type": "object",
"properties": {
"reason": {
"type": "string",
"description": "Reason for the response being incomplete"
}
},
"additionalProperties": false,
"required": [
"reason"
],
"title": "OpenAIResponseIncompleteDetails",
"description": "Incomplete details for OpenAI responses."
},
"OpenAIResponseObject": { "OpenAIResponseObject": {
"type": "object", "type": "object",
"properties": { "properties": {
@ -8600,6 +8615,39 @@
"type": "string", "type": "string",
"description": "Unique identifier for this response" "description": "Unique identifier for this response"
}, },
"incomplete_details": {
"$ref": "#/components/schemas/OpenAIResponseIncompleteDetails",
"description": "(Optional) Incomplete details if the response is incomplete"
},
"instructions": {
"oneOf": [
{
"type": "string"
},
{
"type": "array",
"items": {
"type": "string"
}
}
],
"description": "(Optional) A system (or developer) message inserted into the model's context."
},
"max_output_tokens": {
"type": "integer",
"description": "(Optional) An upper bound for the number of tokens that can be generated for a response, including visible output tokens and reasoning tokens."
},
"max_tool_calls": {
"type": "integer",
"description": "(Optional) The maximum number of total calls to built-in tools that can be processed in a response."
},
"metadata": {
"type": "object",
"additionalProperties": {
"type": "string"
},
"description": "(Optional) Set of 16 key-value pairs that can be attached to an object. This can be useful for storing additional information about the object in a structured format, and querying for objects via API or the dashboard."
},
"model": { "model": {
"type": "string", "type": "string",
"description": "Model identifier used for generation" "description": "Model identifier used for generation"
@ -8626,6 +8674,26 @@
"type": "string", "type": "string",
"description": "(Optional) ID of the previous response in a conversation" "description": "(Optional) ID of the previous response in a conversation"
}, },
"prompt": {
"$ref": "#/components/schemas/OpenAIResponsePrompt",
"description": "(Optional) Reference to a prompt template and its variables."
},
"prompt_cache_key": {
"type": "string",
"description": "(Optional)Used to cache responses for similar requests to optimize your cache hit rates. Replaces the user field."
},
"reasoning": {
"$ref": "#/components/schemas/OpenAIResponseReasoning",
"description": "(Optional) Configuration options for reasoning models."
},
"safety_identifier": {
"type": "string",
"description": "(Optional) A stable identifier used to help detect users of your application that may be violating OpenAI's usage policies."
},
"service_tier": {
"type": "string",
"description": "(Optional) Specifies the processing type used for serving the request."
},
"status": { "status": {
"type": "string", "type": "string",
"description": "Current status of the response generation" "description": "Current status of the response generation"
@ -8638,17 +8706,29 @@
"$ref": "#/components/schemas/OpenAIResponseText", "$ref": "#/components/schemas/OpenAIResponseText",
"description": "Text formatting configuration for the response" "description": "Text formatting configuration for the response"
}, },
"tool_choice": {
"$ref": "#/components/schemas/OpenAIResponsesToolChoice"
},
"tools": {
"type": "array",
"items": {
"$ref": "#/components/schemas/OpenAIResponsesTool"
}
},
"top_logprobs": {
"type": "integer"
},
"top_p": { "top_p": {
"type": "number", "type": "number",
"description": "(Optional) Nucleus sampling parameter used for generation" "description": "(Optional) Nucleus sampling parameter used for generation"
}, },
"truncation": {
"type": "string",
"description": "(Optional) Truncation strategy applied to the response"
},
"user": { "user": {
"type": "string", "type": "string",
"description": "(Optional) User identifier associated with the request" "description": "(Optional) User identifier associated with the request"
},
"truncation": {
"type": "string",
"description": "(Optional) Truncation strategy applied to the response"
} }
}, },
"additionalProperties": false, "additionalProperties": false,
@ -8659,11 +8739,10 @@
"object", "object",
"output", "output",
"parallel_tool_calls", "parallel_tool_calls",
"status", "status"
"text"
], ],
"title": "OpenAIResponseObject", "title": "OpenAIResponseObject",
"description": "Complete OpenAI response object containing generation results and metadata." "description": "Complete OpenAI response object containing generation results and metadata.\nBased on OpenAI Responses API schema: https://github.com/openai/openai-python/blob/34014aedbb8946c03e97e5c8d72e03ad2259cd7c/src/openai/types/responses/response.py#L38"
}, },
"OpenAIResponseOutput": { "OpenAIResponseOutput": {
"oneOf": [ "oneOf": [
@ -8821,6 +8900,103 @@
"title": "OpenAIResponseOutputMessageMCPListTools", "title": "OpenAIResponseOutputMessageMCPListTools",
"description": "MCP list tools output message containing available tools from an MCP server." "description": "MCP list tools output message containing available tools from an MCP server."
}, },
"OpenAIResponsePrompt": {
"type": "object",
"properties": {
"id": {
"type": "string",
"description": "The unique identifier of the prompt template to use."
},
"variables": {
"type": "object",
"additionalProperties": {
"oneOf": [
{
"type": "null"
},
{
"type": "boolean"
},
{
"type": "number"
},
{
"type": "string"
},
{
"type": "array"
},
{
"type": "object"
}
]
},
"description": "(Optional) Map of values to substitute in for variables in your prompt. The substitution values can either be strings, or other Response input types like images or files."
},
"version": {
"type": "string",
"description": "(Optional) Version of the prompt template."
}
},
"additionalProperties": false,
"required": [
"id"
],
"title": "OpenAIResponsePrompt",
"description": "Reference to a prompt template and its variables."
},
"OpenAIResponseReasoning": {
"type": "object",
"properties": {
"effort": {
"type": "string",
"enum": [
"low",
"medium",
"high",
"minimal"
],
"description": "(Optional) The effort level to use for reasoning."
},
"generate_summary": {
"type": "string",
"description": "Deprecated. Use the generate_summary_text field instead. (Optional) Whether to generate a summary of the reasoning process."
},
"summary": {
"type": "string"
}
},
"additionalProperties": false,
"title": "OpenAIResponseReasoning",
"description": "Configuration options for reasoning models."
},
"OpenAIResponsesTool": {
"type": "object",
"properties": {
"description": {
"type": "string"
},
"name": {
"type": "string"
},
"parameters": {
"type": "object",
"title": "object",
"description": "The base class of the class hierarchy.\nWhen called, it accepts no arguments and returns a new featureless\ninstance that has no instance attributes and cannot be given any."
},
"type": {
"type": "string",
"const": "function"
}
},
"additionalProperties": false,
"title": "OpenAIResponsesTool"
},
"OpenAIResponsesToolChoice": {
"type": "object",
"title": "OpenAIResponsesToolChoice",
"description": "Type alias.\nType aliases are created through the type statement::\n\n type Alias = int\n\nIn this example, Alias and int will be treated equivalently by static\ntype checkers.\n\nAt runtime, Alias is an instance of TypeAliasType. The __name__\nattribute holds the name of the type alias. The value of the type alias\nis stored in the __value__ attribute. It is evaluated lazily, so the\nvalue is computed only if the attribute is accessed.\n\nType aliases can also be generic::\n\n type ListOrSet[T] = list[T] | set[T]\n\nIn this case, the type parameters of the alias are stored in the\n__type_params__ attribute.\n\nSee PEP 695 for more information."
},
"OpenAIResponseContentPart": { "OpenAIResponseContentPart": {
"oneOf": [ "oneOf": [
{ {
@ -12744,6 +12920,39 @@
"type": "string", "type": "string",
"description": "Unique identifier for this response" "description": "Unique identifier for this response"
}, },
"incomplete_details": {
"$ref": "#/components/schemas/OpenAIResponseIncompleteDetails",
"description": "(Optional) Incomplete details if the response is incomplete"
},
"instructions": {
"oneOf": [
{
"type": "string"
},
{
"type": "array",
"items": {
"type": "string"
}
}
],
"description": "(Optional) A system (or developer) message inserted into the model's context."
},
"max_output_tokens": {
"type": "integer",
"description": "(Optional) An upper bound for the number of tokens that can be generated for a response, including visible output tokens and reasoning tokens."
},
"max_tool_calls": {
"type": "integer",
"description": "(Optional) The maximum number of total calls to built-in tools that can be processed in a response."
},
"metadata": {
"type": "object",
"additionalProperties": {
"type": "string"
},
"description": "(Optional) Set of 16 key-value pairs that can be attached to an object. This can be useful for storing additional information about the object in a structured format, and querying for objects via API or the dashboard."
},
"model": { "model": {
"type": "string", "type": "string",
"description": "Model identifier used for generation" "description": "Model identifier used for generation"
@ -12770,6 +12979,26 @@
"type": "string", "type": "string",
"description": "(Optional) ID of the previous response in a conversation" "description": "(Optional) ID of the previous response in a conversation"
}, },
"prompt": {
"$ref": "#/components/schemas/OpenAIResponsePrompt",
"description": "(Optional) Reference to a prompt template and its variables."
},
"prompt_cache_key": {
"type": "string",
"description": "(Optional)Used to cache responses for similar requests to optimize your cache hit rates. Replaces the user field."
},
"reasoning": {
"$ref": "#/components/schemas/OpenAIResponseReasoning",
"description": "(Optional) Configuration options for reasoning models."
},
"safety_identifier": {
"type": "string",
"description": "(Optional) A stable identifier used to help detect users of your application that may be violating OpenAI's usage policies."
},
"service_tier": {
"type": "string",
"description": "(Optional) Specifies the processing type used for serving the request."
},
"status": { "status": {
"type": "string", "type": "string",
"description": "Current status of the response generation" "description": "Current status of the response generation"
@ -12782,18 +13011,30 @@
"$ref": "#/components/schemas/OpenAIResponseText", "$ref": "#/components/schemas/OpenAIResponseText",
"description": "Text formatting configuration for the response" "description": "Text formatting configuration for the response"
}, },
"tool_choice": {
"$ref": "#/components/schemas/OpenAIResponsesToolChoice"
},
"tools": {
"type": "array",
"items": {
"$ref": "#/components/schemas/OpenAIResponsesTool"
}
},
"top_logprobs": {
"type": "integer"
},
"top_p": { "top_p": {
"type": "number", "type": "number",
"description": "(Optional) Nucleus sampling parameter used for generation" "description": "(Optional) Nucleus sampling parameter used for generation"
}, },
"truncation": {
"type": "string",
"description": "(Optional) Truncation strategy applied to the response"
},
"user": { "user": {
"type": "string", "type": "string",
"description": "(Optional) User identifier associated with the request" "description": "(Optional) User identifier associated with the request"
}, },
"truncation": {
"type": "string",
"description": "(Optional) Truncation strategy applied to the response"
},
"input": { "input": {
"type": "array", "type": "array",
"items": { "items": {
@ -12811,7 +13052,6 @@
"output", "output",
"parallel_tool_calls", "parallel_tool_calls",
"status", "status",
"text",
"input" "input"
], ],
"title": "OpenAIResponseObjectWithInput", "title": "OpenAIResponseObjectWithInput",

View file

@ -6249,6 +6249,17 @@ components:
title: OpenAIResponseError title: OpenAIResponseError
description: >- description: >-
Error details for failed OpenAI response requests. Error details for failed OpenAI response requests.
OpenAIResponseIncompleteDetails:
type: object
properties:
reason:
type: string
description: Reason for the response being incomplete
additionalProperties: false
required:
- reason
title: OpenAIResponseIncompleteDetails
description: Incomplete details for OpenAI responses.
OpenAIResponseObject: OpenAIResponseObject:
type: object type: object
properties: properties:
@ -6263,6 +6274,36 @@ components:
id: id:
type: string type: string
description: Unique identifier for this response description: Unique identifier for this response
incomplete_details:
$ref: '#/components/schemas/OpenAIResponseIncompleteDetails'
description: >-
(Optional) Incomplete details if the response is incomplete
instructions:
oneOf:
- type: string
- type: array
items:
type: string
description: >-
(Optional) A system (or developer) message inserted into the model's context.
max_output_tokens:
type: integer
description: >-
(Optional) An upper bound for the number of tokens that can be generated
for a response, including visible output tokens and reasoning tokens.
max_tool_calls:
type: integer
description: >-
(Optional) The maximum number of total calls to built-in tools that can
be processed in a response.
metadata:
type: object
additionalProperties:
type: string
description: >-
(Optional) Set of 16 key-value pairs that can be attached to an object.
This can be useful for storing additional information about the object
in a structured format, and querying for objects via API or the dashboard.
model: model:
type: string type: string
description: Model identifier used for generation description: Model identifier used for generation
@ -6287,6 +6328,28 @@ components:
type: string type: string
description: >- description: >-
(Optional) ID of the previous response in a conversation (Optional) ID of the previous response in a conversation
prompt:
$ref: '#/components/schemas/OpenAIResponsePrompt'
description: >-
(Optional) Reference to a prompt template and its variables.
prompt_cache_key:
type: string
description: >-
(Optional)Used to cache responses for similar requests to optimize your
cache hit rates. Replaces the user field.
reasoning:
$ref: '#/components/schemas/OpenAIResponseReasoning'
description: >-
(Optional) Configuration options for reasoning models.
safety_identifier:
type: string
description: >-
(Optional) A stable identifier used to help detect users of your application
that may be violating OpenAI's usage policies.
service_tier:
type: string
description: >-
(Optional) Specifies the processing type used for serving the request.
status: status:
type: string type: string
description: >- description: >-
@ -6299,18 +6362,26 @@ components:
$ref: '#/components/schemas/OpenAIResponseText' $ref: '#/components/schemas/OpenAIResponseText'
description: >- description: >-
Text formatting configuration for the response Text formatting configuration for the response
tool_choice:
$ref: '#/components/schemas/OpenAIResponsesToolChoice'
tools:
type: array
items:
$ref: '#/components/schemas/OpenAIResponsesTool'
top_logprobs:
type: integer
top_p: top_p:
type: number type: number
description: >- description: >-
(Optional) Nucleus sampling parameter used for generation (Optional) Nucleus sampling parameter used for generation
truncation:
type: string
description: >-
(Optional) Truncation strategy applied to the response
user: user:
type: string type: string
description: >- description: >-
(Optional) User identifier associated with the request (Optional) User identifier associated with the request
truncation:
type: string
description: >-
(Optional) Truncation strategy applied to the response
additionalProperties: false additionalProperties: false
required: required:
- created_at - created_at
@ -6320,10 +6391,11 @@ components:
- output - output
- parallel_tool_calls - parallel_tool_calls
- status - status
- text
title: OpenAIResponseObject title: OpenAIResponseObject
description: >- description: >-
Complete OpenAI response object containing generation results and metadata. Complete OpenAI response object containing generation results and metadata.
Based on OpenAI Responses API schema: https://github.com/openai/openai-python/blob/34014aedbb8946c03e97e5c8d72e03ad2259cd7c/src/openai/types/responses/response.py#L38
OpenAIResponseOutput: OpenAIResponseOutput:
oneOf: oneOf:
- $ref: '#/components/schemas/OpenAIResponseMessage' - $ref: '#/components/schemas/OpenAIResponseMessage'
@ -6441,6 +6513,115 @@ components:
title: OpenAIResponseOutputMessageMCPListTools title: OpenAIResponseOutputMessageMCPListTools
description: >- description: >-
MCP list tools output message containing available tools from an MCP server. MCP list tools output message containing available tools from an MCP server.
OpenAIResponsePrompt:
type: object
properties:
id:
type: string
description: >-
The unique identifier of the prompt template to use.
variables:
type: object
additionalProperties:
oneOf:
- type: 'null'
- type: boolean
- type: number
- type: string
- type: array
- type: object
description: >-
(Optional) Map of values to substitute in for variables in your prompt.
The substitution values can either be strings, or other Response input
types like images or files.
version:
type: string
description: >-
(Optional) Version of the prompt template.
additionalProperties: false
required:
- id
title: OpenAIResponsePrompt
description: >-
Reference to a prompt template and its variables.
OpenAIResponseReasoning:
type: object
properties:
effort:
type: string
enum:
- low
- medium
- high
- minimal
description: >-
(Optional) The effort level to use for reasoning.
generate_summary:
type: string
description: >-
Deprecated. Use the generate_summary_text field instead. (Optional) Whether
to generate a summary of the reasoning process.
summary:
type: string
additionalProperties: false
title: OpenAIResponseReasoning
description: >-
Configuration options for reasoning models.
OpenAIResponsesTool:
type: object
properties:
description:
type: string
name:
type: string
parameters:
type: object
title: object
description: >-
The base class of the class hierarchy.
When called, it accepts no arguments and returns a new featureless
instance that has no instance attributes and cannot be given any.
type:
type: string
const: function
additionalProperties: false
title: OpenAIResponsesTool
OpenAIResponsesToolChoice:
type: object
title: OpenAIResponsesToolChoice
description: >-
Type alias.
Type aliases are created through the type statement::
type Alias = int
In this example, Alias and int will be treated equivalently by static
type checkers.
At runtime, Alias is an instance of TypeAliasType. The __name__
attribute holds the name of the type alias. The value of the type alias
is stored in the __value__ attribute. It is evaluated lazily, so the
value is computed only if the attribute is accessed.
Type aliases can also be generic::
type ListOrSet[T] = list[T] | set[T]
In this case, the type parameters of the alias are stored in the
__type_params__ attribute.
See PEP 695 for more information.
OpenAIResponseContentPart: OpenAIResponseContentPart:
oneOf: oneOf:
- $ref: '#/components/schemas/OpenAIResponseContentPartOutputText' - $ref: '#/components/schemas/OpenAIResponseContentPartOutputText'
@ -9464,6 +9645,36 @@ components:
id: id:
type: string type: string
description: Unique identifier for this response description: Unique identifier for this response
incomplete_details:
$ref: '#/components/schemas/OpenAIResponseIncompleteDetails'
description: >-
(Optional) Incomplete details if the response is incomplete
instructions:
oneOf:
- type: string
- type: array
items:
type: string
description: >-
(Optional) A system (or developer) message inserted into the model's context.
max_output_tokens:
type: integer
description: >-
(Optional) An upper bound for the number of tokens that can be generated
for a response, including visible output tokens and reasoning tokens.
max_tool_calls:
type: integer
description: >-
(Optional) The maximum number of total calls to built-in tools that can
be processed in a response.
metadata:
type: object
additionalProperties:
type: string
description: >-
(Optional) Set of 16 key-value pairs that can be attached to an object.
This can be useful for storing additional information about the object
in a structured format, and querying for objects via API or the dashboard.
model: model:
type: string type: string
description: Model identifier used for generation description: Model identifier used for generation
@ -9488,6 +9699,28 @@ components:
type: string type: string
description: >- description: >-
(Optional) ID of the previous response in a conversation (Optional) ID of the previous response in a conversation
prompt:
$ref: '#/components/schemas/OpenAIResponsePrompt'
description: >-
(Optional) Reference to a prompt template and its variables.
prompt_cache_key:
type: string
description: >-
(Optional)Used to cache responses for similar requests to optimize your
cache hit rates. Replaces the user field.
reasoning:
$ref: '#/components/schemas/OpenAIResponseReasoning'
description: >-
(Optional) Configuration options for reasoning models.
safety_identifier:
type: string
description: >-
(Optional) A stable identifier used to help detect users of your application
that may be violating OpenAI's usage policies.
service_tier:
type: string
description: >-
(Optional) Specifies the processing type used for serving the request.
status: status:
type: string type: string
description: >- description: >-
@ -9500,18 +9733,26 @@ components:
$ref: '#/components/schemas/OpenAIResponseText' $ref: '#/components/schemas/OpenAIResponseText'
description: >- description: >-
Text formatting configuration for the response Text formatting configuration for the response
tool_choice:
$ref: '#/components/schemas/OpenAIResponsesToolChoice'
tools:
type: array
items:
$ref: '#/components/schemas/OpenAIResponsesTool'
top_logprobs:
type: integer
top_p: top_p:
type: number type: number
description: >- description: >-
(Optional) Nucleus sampling parameter used for generation (Optional) Nucleus sampling parameter used for generation
truncation:
type: string
description: >-
(Optional) Truncation strategy applied to the response
user: user:
type: string type: string
description: >- description: >-
(Optional) User identifier associated with the request (Optional) User identifier associated with the request
truncation:
type: string
description: >-
(Optional) Truncation strategy applied to the response
input: input:
type: array type: array
items: items:
@ -9527,7 +9768,6 @@ components:
- output - output
- parallel_tool_calls - parallel_tool_calls
- status - status
- text
- input - input
title: OpenAIResponseObjectWithInput title: OpenAIResponseObjectWithInput
description: >- description: >-

View file

@ -1,4 +1,3 @@
#!/usr/bin/env python3
# Copyright (c) Meta Platforms, Inc. and affiliates. # Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved. # All rights reserved.
# #

View file

@ -4,7 +4,7 @@
# This source code is licensed under the terms described in the LICENSE file in # This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree. # the root directory of this source tree.
from typing import Annotated, Any, Literal, Optional from typing import Annotated, Any, Literal, Union
from pydantic import BaseModel, Field from pydantic import BaseModel, Field
from typing_extensions import TypedDict from typing_extensions import TypedDict
@ -19,9 +19,16 @@ from llama_stack.apis.tools.openai_tool_choice import (
from llama_stack.apis.vector_io import SearchRankingOptions as FileSearchRankingOptions from llama_stack.apis.vector_io import SearchRankingOptions as FileSearchRankingOptions
from llama_stack.schema_utils import json_schema_type, register_schema from llama_stack.schema_utils import json_schema_type, register_schema
type OpenAIResponsesToolChoice = ( type OpenAIResponsesToolChoice = Annotated[
ToolChoiceTypes | ToolChoiceAllowed | ToolChoiceFunction | ToolChoiceMcp | ToolChoiceCustom Union[
) ToolChoiceTypes,
ToolChoiceAllowed,
ToolChoiceFunction,
ToolChoiceMcp,
ToolChoiceCustom
],
Field(discriminator="type"),
]
register_schema(OpenAIResponsesToolChoice, name="OpenAIResponsesToolChoice") register_schema(OpenAIResponsesToolChoice, name="OpenAIResponsesToolChoice")
@ -420,30 +427,30 @@ class OpenAIResponseObject(BaseModel):
created_at: int created_at: int
error: OpenAIResponseError | None = None error: OpenAIResponseError | None = None
id: str id: str
incomplete_details: Optional[OpenAIResponseIncompleteDetails] = None incomplete_details: OpenAIResponseIncompleteDetails | None = None
instructions: Optional[str | list[str]] = None instructions: str | list[str] | None = None
max_output_tokens: Optional[int] = None max_output_tokens: int | None = None
max_tool_calls: Optional[int] = None max_tool_calls: int | None = None
metadata: Optional[dict[str, str]] = None metadata: dict[str, str] | None = None
model: str model: str
object: Literal["response"] = "response" object: Literal["response"] = "response"
output: list[OpenAIResponseOutput] output: list[OpenAIResponseOutput]
parallel_tool_calls: bool = False parallel_tool_calls: bool = False
previous_response_id: Optional[str] = None previous_response_id: str | None = None
prompt: Optional[OpenAIResponsePrompt] = None prompt: OpenAIResponsePrompt | None = None
prompt_cache_key: Optional[str] = None prompt_cache_key: str | None = None
reasoning: Optional[OpenAIResponseReasoning] = None reasoning: OpenAIResponseReasoning | None = None
safety_identifier: Optional[str] = None safety_identifier: str | None = None
service_tier: Optional[str] = None service_tier: str | None = None
status: str status: str
temperature: float | None = None temperature: float | None = None
text: Optional[OpenAIResponseText] = None text: OpenAIResponseText | None = None
tool_choice: Optional[OpenAIResponsesToolChoice] = None tool_choice: OpenAIResponsesToolChoice | None = None
tools: Optional[list[OpenAIResponsesTool]] = None tools: list[OpenAIResponsesTool] | None = None
top_logprobs: Optional[int] = None top_logprobs: int | None = None
top_p: Optional[float] = None top_p: float | None = None
user: Optional[str] = None # Deprecated: This field is being replaced by safety_identifier and prompt_cache_key user: str | None = None # Deprecated: This field is being replaced by safety_identifier and prompt_cache_key
truncation: Optional[str] = None truncation: str | None = None
@json_schema_type @json_schema_type

View file

@ -5,7 +5,6 @@
# the root directory of this source tree. # the root directory of this source tree.
import importlib.resources import importlib.resources
import logging
import sys import sys
from pydantic import BaseModel from pydantic import BaseModel
@ -17,9 +16,10 @@ from llama_stack.core.external import load_external_apis
from llama_stack.core.utils.exec import run_command from llama_stack.core.utils.exec import run_command
from llama_stack.core.utils.image_types import LlamaStackImageType from llama_stack.core.utils.image_types import LlamaStackImageType
from llama_stack.distributions.template import DistributionTemplate from llama_stack.distributions.template import DistributionTemplate
from llama_stack.log import get_logger
from llama_stack.providers.datatypes import Api from llama_stack.providers.datatypes import Api
log = logging.getLogger(__name__) log = get_logger(name=__name__, category="core")
# These are the dependencies needed by the distribution server. # These are the dependencies needed by the distribution server.
# `llama-stack` is automatically installed by the installation script. # `llama-stack` is automatically installed by the installation script.

View file

@ -3,7 +3,6 @@
# #
# This source code is licensed under the terms described in the LICENSE file in # This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree. # the root directory of this source tree.
import logging
import textwrap import textwrap
from typing import Any from typing import Any
@ -21,9 +20,10 @@ from llama_stack.core.stack import cast_image_name_to_string, replace_env_vars
from llama_stack.core.utils.config_dirs import EXTERNAL_PROVIDERS_DIR from llama_stack.core.utils.config_dirs import EXTERNAL_PROVIDERS_DIR
from llama_stack.core.utils.dynamic import instantiate_class_type from llama_stack.core.utils.dynamic import instantiate_class_type
from llama_stack.core.utils.prompt_for_config import prompt_for_config from llama_stack.core.utils.prompt_for_config import prompt_for_config
from llama_stack.log import get_logger
from llama_stack.providers.datatypes import Api, ProviderSpec from llama_stack.providers.datatypes import Api, ProviderSpec
logger = logging.getLogger(__name__) logger = get_logger(name=__name__, category="core")
def configure_single_provider(registry: dict[str, ProviderSpec], provider: Provider) -> Provider: def configure_single_provider(registry: dict[str, ProviderSpec], provider: Provider) -> Provider:

View file

@ -7,7 +7,7 @@
import asyncio import asyncio
import inspect import inspect
import json import json
import logging import logging # allow-direct-logging
import os import os
import sys import sys
from concurrent.futures import ThreadPoolExecutor from concurrent.futures import ThreadPoolExecutor
@ -48,6 +48,7 @@ from llama_stack.core.stack import (
from llama_stack.core.utils.config import redact_sensitive_fields from llama_stack.core.utils.config import redact_sensitive_fields
from llama_stack.core.utils.context import preserve_contexts_async_generator from llama_stack.core.utils.context import preserve_contexts_async_generator
from llama_stack.core.utils.exec import in_notebook from llama_stack.core.utils.exec import in_notebook
from llama_stack.log import get_logger
from llama_stack.providers.utils.telemetry.tracing import ( from llama_stack.providers.utils.telemetry.tracing import (
CURRENT_TRACE_CONTEXT, CURRENT_TRACE_CONTEXT,
end_trace, end_trace,
@ -55,7 +56,7 @@ from llama_stack.providers.utils.telemetry.tracing import (
start_trace, start_trace,
) )
logger = logging.getLogger(__name__) logger = get_logger(name=__name__, category="core")
T = TypeVar("T") T = TypeVar("T")

View file

@ -6,15 +6,15 @@
import contextvars import contextvars
import json import json
import logging
from contextlib import AbstractContextManager from contextlib import AbstractContextManager
from typing import Any from typing import Any
from llama_stack.core.datatypes import User from llama_stack.core.datatypes import User
from llama_stack.log import get_logger
from .utils.dynamic import instantiate_class_type from .utils.dynamic import instantiate_class_type
log = logging.getLogger(__name__) log = get_logger(name=__name__, category="core")
# Context variable for request provider data and auth attributes # Context variable for request provider data and auth attributes
PROVIDER_DATA_VAR = contextvars.ContextVar("provider_data", default=None) PROVIDER_DATA_VAR = contextvars.ContextVar("provider_data", default=None)

View file

@ -9,7 +9,7 @@ import asyncio
import functools import functools
import inspect import inspect
import json import json
import logging import logging # allow-direct-logging
import os import os
import ssl import ssl
import sys import sys

View file

@ -4,7 +4,7 @@
# This source code is licensed under the terms described in the LICENSE file in # This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree. # the root directory of this source tree.
import logging import importlib
import os import os
import signal import signal
import subprocess import subprocess
@ -12,9 +12,9 @@ import sys
from termcolor import cprint from termcolor import cprint
log = logging.getLogger(__name__) from llama_stack.log import get_logger
import importlib log = get_logger(name=__name__, category="core")
def formulate_run_args(image_type: str, image_name: str) -> list: def formulate_run_args(image_type: str, image_name: str) -> list:

View file

@ -6,7 +6,6 @@
import inspect import inspect
import json import json
import logging
from enum import Enum from enum import Enum
from typing import Annotated, Any, Literal, Union, get_args, get_origin from typing import Annotated, Any, Literal, Union, get_args, get_origin
@ -14,7 +13,9 @@ from pydantic import BaseModel
from pydantic.fields import FieldInfo from pydantic.fields import FieldInfo
from pydantic_core import PydanticUndefinedType from pydantic_core import PydanticUndefinedType
log = logging.getLogger(__name__) from llama_stack.log import get_logger
log = get_logger(name=__name__, category="core")
def is_list_of_primitives(field_type): def is_list_of_primitives(field_type):

View file

@ -4,10 +4,10 @@
# This source code is licensed under the terms described in the LICENSE file in # This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree. # the root directory of this source tree.
import logging import logging # allow-direct-logging
import os import os
import re import re
from logging.config import dictConfig from logging.config import dictConfig # allow-direct-logging
from rich.console import Console from rich.console import Console
from rich.errors import MarkupError from rich.errors import MarkupError

View file

@ -13,14 +13,15 @@
# Copyright (c) Meta Platforms, Inc. and its affiliates. # Copyright (c) Meta Platforms, Inc. and its affiliates.
import math import math
from logging import getLogger
import torch import torch
import torch.nn.functional as F import torch.nn.functional as F
from llama_stack.log import get_logger
from .utils import get_negative_inf_value, to_2tuple from .utils import get_negative_inf_value, to_2tuple
logger = getLogger() logger = get_logger(name=__name__, category="models::llama")
def resize_local_position_embedding(orig_pos_embed, grid_size): def resize_local_position_embedding(orig_pos_embed, grid_size):

View file

@ -13,7 +13,6 @@
import math import math
from collections import defaultdict from collections import defaultdict
from logging import getLogger
from typing import Any from typing import Any
import torch import torch
@ -21,9 +20,11 @@ import torchvision.transforms as tv
from PIL import Image from PIL import Image
from torchvision.transforms import functional as F from torchvision.transforms import functional as F
from llama_stack.log import get_logger
IMAGE_RES = 224 IMAGE_RES = 224
logger = getLogger() logger = get_logger(name=__name__, category="models::llama")
class VariableSizeImageTransform: class VariableSizeImageTransform:

View file

@ -3,8 +3,6 @@
# #
# This source code is licensed under the terms described in the LICENSE file in # This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree. # the root directory of this source tree.
import logging
import math import math
from collections.abc import Callable from collections.abc import Callable
from functools import partial from functools import partial
@ -22,6 +20,8 @@ from PIL import Image as PIL_Image
from torch import Tensor, nn from torch import Tensor, nn
from torch.distributed import _functional_collectives as funcol from torch.distributed import _functional_collectives as funcol
from llama_stack.log import get_logger
from ..model import ModelArgs, RMSNorm, apply_rotary_emb, precompute_freqs_cis from ..model import ModelArgs, RMSNorm, apply_rotary_emb, precompute_freqs_cis
from .encoder_utils import ( from .encoder_utils import (
build_encoder_attention_mask, build_encoder_attention_mask,
@ -34,9 +34,10 @@ from .encoder_utils import (
from .image_transform import VariableSizeImageTransform from .image_transform import VariableSizeImageTransform
from .utils import get_negative_inf_value, to_2tuple from .utils import get_negative_inf_value, to_2tuple
logger = logging.getLogger(__name__)
MP_SCALE = 8 MP_SCALE = 8
logger = get_logger(name=__name__, category="models")
def reduce_from_tensor_model_parallel_region(input_): def reduce_from_tensor_model_parallel_region(input_):
"""All-reduce the input tensor across model parallel group.""" """All-reduce the input tensor across model parallel group."""
@ -771,7 +772,7 @@ class TilePositionEmbedding(nn.Module):
if embed is not None: if embed is not None:
# reshape the weights to the correct shape # reshape the weights to the correct shape
nt_old, nt_old, _, w = embed.shape nt_old, nt_old, _, w = embed.shape
logging.info(f"Resizing tile embedding from {nt_old}x{nt_old} to {self.num_tiles}x{self.num_tiles}") logger.info(f"Resizing tile embedding from {nt_old}x{nt_old} to {self.num_tiles}x{self.num_tiles}")
embed_new = TilePositionEmbedding._dynamic_resize(embed, self.num_tiles) embed_new = TilePositionEmbedding._dynamic_resize(embed, self.num_tiles)
# assign the weights to the module # assign the weights to the module
state_dict[prefix + "embedding"] = embed_new state_dict[prefix + "embedding"] = embed_new

View file

@ -4,8 +4,8 @@
# This source code is licensed under the terms described in the LICENSE file in # This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree. # the root directory of this source tree.
from collections.abc import Collection, Iterator, Sequence, Set from collections.abc import Collection, Iterator, Sequence, Set
from logging import getLogger
from pathlib import Path from pathlib import Path
from typing import ( from typing import (
Literal, Literal,
@ -14,11 +14,9 @@ from typing import (
import tiktoken import tiktoken
from llama_stack.log import get_logger
from llama_stack.models.llama.tokenizer_utils import load_bpe_file from llama_stack.models.llama.tokenizer_utils import load_bpe_file
logger = getLogger(__name__)
# The tiktoken tokenizer can handle <=400k chars without # The tiktoken tokenizer can handle <=400k chars without
# pyo3_runtime.PanicException. # pyo3_runtime.PanicException.
TIKTOKEN_MAX_ENCODE_CHARS = 400_000 TIKTOKEN_MAX_ENCODE_CHARS = 400_000
@ -31,6 +29,8 @@ MAX_NO_WHITESPACES_CHARS = 25_000
_INSTANCE = None _INSTANCE = None
logger = get_logger(name=__name__, category="models::llama")
class Tokenizer: class Tokenizer:
""" """

View file

@ -4,7 +4,6 @@
# This source code is licensed under the terms described in the LICENSE file in # This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree. # the root directory of this source tree.
import logging
import os import os
from collections.abc import Callable from collections.abc import Callable
@ -13,11 +12,13 @@ from fairscale.nn.model_parallel.initialize import get_model_parallel_rank
from torch import Tensor, nn from torch import Tensor, nn
from torch.nn import functional as F from torch.nn import functional as F
from llama_stack.log import get_logger
from ...datatypes import QuantizationMode from ...datatypes import QuantizationMode
from ..model import Transformer, TransformerBlock from ..model import Transformer, TransformerBlock
from ..moe import MoE from ..moe import MoE
log = logging.getLogger(__name__) log = get_logger(name=__name__, category="models")
def swiglu_wrapper_no_reduce( def swiglu_wrapper_no_reduce(

View file

@ -5,7 +5,6 @@
# the root directory of this source tree. # the root directory of this source tree.
from collections.abc import Collection, Iterator, Sequence, Set from collections.abc import Collection, Iterator, Sequence, Set
from logging import getLogger
from pathlib import Path from pathlib import Path
from typing import ( from typing import (
Literal, Literal,
@ -14,11 +13,9 @@ from typing import (
import tiktoken import tiktoken
from llama_stack.log import get_logger
from llama_stack.models.llama.tokenizer_utils import load_bpe_file from llama_stack.models.llama.tokenizer_utils import load_bpe_file
logger = getLogger(__name__)
# The tiktoken tokenizer can handle <=400k chars without # The tiktoken tokenizer can handle <=400k chars without
# pyo3_runtime.PanicException. # pyo3_runtime.PanicException.
TIKTOKEN_MAX_ENCODE_CHARS = 400_000 TIKTOKEN_MAX_ENCODE_CHARS = 400_000
@ -101,6 +98,8 @@ BASIC_SPECIAL_TOKENS = [
"<|fim_suffix|>", "<|fim_suffix|>",
] ]
logger = get_logger(name=__name__, category="models::llama")
class Tokenizer: class Tokenizer:
""" """

View file

@ -6,9 +6,10 @@
# type: ignore # type: ignore
import collections import collections
import logging
log = logging.getLogger(__name__) from llama_stack.log import get_logger
log = get_logger(name=__name__, category="llama")
try: try:
import fbgemm_gpu.experimental.gen_ai # noqa: F401 import fbgemm_gpu.experimental.gen_ai # noqa: F401

View file

@ -4,7 +4,6 @@
# This source code is licensed under the terms described in the LICENSE file in # This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree. # the root directory of this source tree.
import logging
import uuid import uuid
from collections.abc import AsyncGenerator from collections.abc import AsyncGenerator
from datetime import UTC, datetime from datetime import UTC, datetime
@ -42,6 +41,7 @@ from llama_stack.apis.safety import Safety
from llama_stack.apis.tools import ToolGroups, ToolRuntime from llama_stack.apis.tools import ToolGroups, ToolRuntime
from llama_stack.apis.vector_io import VectorIO from llama_stack.apis.vector_io import VectorIO
from llama_stack.core.datatypes import AccessRule from llama_stack.core.datatypes import AccessRule
from llama_stack.log import get_logger
from llama_stack.providers.utils.kvstore import InmemoryKVStoreImpl, kvstore_impl from llama_stack.providers.utils.kvstore import InmemoryKVStoreImpl, kvstore_impl
from llama_stack.providers.utils.pagination import paginate_records from llama_stack.providers.utils.pagination import paginate_records
from llama_stack.providers.utils.responses.responses_store import ResponsesStore from llama_stack.providers.utils.responses.responses_store import ResponsesStore
@ -51,7 +51,7 @@ from .config import MetaReferenceAgentsImplConfig
from .persistence import AgentInfo from .persistence import AgentInfo
from .responses.openai_responses import OpenAIResponsesImpl from .responses.openai_responses import OpenAIResponsesImpl
logger = logging.getLogger() logger = get_logger(name=__name__, category="agents")
class MetaReferenceAgentsImpl(Agents): class MetaReferenceAgentsImpl(Agents):

View file

@ -5,7 +5,6 @@
# the root directory of this source tree. # the root directory of this source tree.
import json import json
import logging
import uuid import uuid
from datetime import UTC, datetime from datetime import UTC, datetime
@ -15,9 +14,10 @@ from llama_stack.core.access_control.access_control import AccessDeniedError, is
from llama_stack.core.access_control.datatypes import AccessRule from llama_stack.core.access_control.datatypes import AccessRule
from llama_stack.core.datatypes import User from llama_stack.core.datatypes import User
from llama_stack.core.request_headers import get_authenticated_user from llama_stack.core.request_headers import get_authenticated_user
from llama_stack.log import get_logger
from llama_stack.providers.utils.kvstore import KVStore from llama_stack.providers.utils.kvstore import KVStore
log = logging.getLogger(__name__) log = get_logger(name=__name__, category="agents")
class AgentSessionInfo(Session): class AgentSessionInfo(Session):

View file

@ -5,13 +5,13 @@
# the root directory of this source tree. # the root directory of this source tree.
import asyncio import asyncio
import logging
from llama_stack.apis.inference import Message from llama_stack.apis.inference import Message
from llama_stack.apis.safety import Safety, SafetyViolation, ViolationLevel from llama_stack.apis.safety import Safety, SafetyViolation, ViolationLevel
from llama_stack.log import get_logger
from llama_stack.providers.utils.telemetry import tracing from llama_stack.providers.utils.telemetry import tracing
log = logging.getLogger(__name__) log = get_logger(name=__name__, category="agents")
class SafetyException(Exception): # noqa: N818 class SafetyException(Exception): # noqa: N818

View file

@ -12,7 +12,6 @@
import copy import copy
import json import json
import logging
import multiprocessing import multiprocessing
import os import os
import tempfile import tempfile
@ -32,13 +31,14 @@ from fairscale.nn.model_parallel.initialize import (
from pydantic import BaseModel, Field from pydantic import BaseModel, Field
from torch.distributed.launcher.api import LaunchConfig, elastic_launch from torch.distributed.launcher.api import LaunchConfig, elastic_launch
from llama_stack.log import get_logger
from llama_stack.models.llama.datatypes import GenerationResult from llama_stack.models.llama.datatypes import GenerationResult
from llama_stack.providers.utils.inference.prompt_adapter import ( from llama_stack.providers.utils.inference.prompt_adapter import (
ChatCompletionRequestWithRawContent, ChatCompletionRequestWithRawContent,
CompletionRequestWithRawContent, CompletionRequestWithRawContent,
) )
log = logging.getLogger(__name__) log = get_logger(name=__name__, category="inference")
class ProcessingMessageName(str, Enum): class ProcessingMessageName(str, Enum):

View file

@ -4,7 +4,6 @@
# This source code is licensed under the terms described in the LICENSE file in # This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree. # the root directory of this source tree.
import logging
from collections.abc import AsyncGenerator from collections.abc import AsyncGenerator
from llama_stack.apis.inference import ( from llama_stack.apis.inference import (
@ -21,6 +20,7 @@ from llama_stack.apis.inference import (
ToolPromptFormat, ToolPromptFormat,
) )
from llama_stack.apis.models import ModelType from llama_stack.apis.models import ModelType
from llama_stack.log import get_logger
from llama_stack.providers.datatypes import Model, ModelsProtocolPrivate from llama_stack.providers.datatypes import Model, ModelsProtocolPrivate
from llama_stack.providers.utils.inference.embedding_mixin import ( from llama_stack.providers.utils.inference.embedding_mixin import (
SentenceTransformerEmbeddingMixin, SentenceTransformerEmbeddingMixin,
@ -32,7 +32,7 @@ from llama_stack.providers.utils.inference.openai_compat import (
from .config import SentenceTransformersInferenceConfig from .config import SentenceTransformersInferenceConfig
log = logging.getLogger(__name__) log = get_logger(name=__name__, category="inference")
class SentenceTransformersInferenceImpl( class SentenceTransformersInferenceImpl(

View file

@ -6,7 +6,6 @@
import gc import gc
import json import json
import logging
import multiprocessing import multiprocessing
from pathlib import Path from pathlib import Path
from typing import Any from typing import Any
@ -28,6 +27,7 @@ from llama_stack.apis.post_training import (
LoraFinetuningConfig, LoraFinetuningConfig,
TrainingConfig, TrainingConfig,
) )
from llama_stack.log import get_logger
from llama_stack.providers.inline.post_training.common.utils import evacuate_model_from_device from llama_stack.providers.inline.post_training.common.utils import evacuate_model_from_device
from ..config import HuggingFacePostTrainingConfig from ..config import HuggingFacePostTrainingConfig
@ -44,7 +44,7 @@ from ..utils import (
split_dataset, split_dataset,
) )
logger = logging.getLogger(__name__) logger = get_logger(name=__name__, category="post_training")
class HFFinetuningSingleDevice: class HFFinetuningSingleDevice:

View file

@ -5,7 +5,6 @@
# the root directory of this source tree. # the root directory of this source tree.
import gc import gc
import logging
import multiprocessing import multiprocessing
from pathlib import Path from pathlib import Path
from typing import Any from typing import Any
@ -24,6 +23,7 @@ from llama_stack.apis.post_training import (
DPOAlignmentConfig, DPOAlignmentConfig,
TrainingConfig, TrainingConfig,
) )
from llama_stack.log import get_logger
from llama_stack.providers.inline.post_training.common.utils import evacuate_model_from_device from llama_stack.providers.inline.post_training.common.utils import evacuate_model_from_device
from ..config import HuggingFacePostTrainingConfig from ..config import HuggingFacePostTrainingConfig
@ -40,7 +40,7 @@ from ..utils import (
split_dataset, split_dataset,
) )
logger = logging.getLogger(__name__) logger = get_logger(name=__name__, category="post_training")
class HFDPOAlignmentSingleDevice: class HFDPOAlignmentSingleDevice:

View file

@ -4,7 +4,6 @@
# This source code is licensed under the terms described in the LICENSE file in # This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree. # the root directory of this source tree.
import logging
import os import os
import signal import signal
import sys import sys
@ -19,10 +18,11 @@ from transformers import AutoConfig, AutoModelForCausalLM
from llama_stack.apis.datasetio import DatasetIO from llama_stack.apis.datasetio import DatasetIO
from llama_stack.apis.post_training import Checkpoint, TrainingConfig from llama_stack.apis.post_training import Checkpoint, TrainingConfig
from llama_stack.log import get_logger
from .config import HuggingFacePostTrainingConfig from .config import HuggingFacePostTrainingConfig
logger = logging.getLogger(__name__) logger = get_logger(name=__name__, category="post_training")
def setup_environment(): def setup_environment():

View file

@ -4,7 +4,6 @@
# This source code is licensed under the terms described in the LICENSE file in # This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree. # the root directory of this source tree.
import logging
import os import os
import time import time
from datetime import UTC, datetime from datetime import UTC, datetime
@ -19,6 +18,7 @@ from torch.utils.data import DataLoader, DistributedSampler
from torchtune import modules, training from torchtune import modules, training
from torchtune import utils as torchtune_utils from torchtune import utils as torchtune_utils
from torchtune.data import padded_collate_sft from torchtune.data import padded_collate_sft
from torchtune.models.llama3._tokenizer import Llama3Tokenizer
from torchtune.modules.loss import CEWithChunkedOutputLoss from torchtune.modules.loss import CEWithChunkedOutputLoss
from torchtune.modules.peft import ( from torchtune.modules.peft import (
get_adapter_params, get_adapter_params,
@ -45,6 +45,7 @@ from llama_stack.apis.post_training import (
) )
from llama_stack.core.utils.config_dirs import DEFAULT_CHECKPOINT_DIR from llama_stack.core.utils.config_dirs import DEFAULT_CHECKPOINT_DIR
from llama_stack.core.utils.model_utils import model_local_dir from llama_stack.core.utils.model_utils import model_local_dir
from llama_stack.log import get_logger
from llama_stack.models.llama.sku_list import resolve_model from llama_stack.models.llama.sku_list import resolve_model
from llama_stack.providers.inline.post_training.common.utils import evacuate_model_from_device from llama_stack.providers.inline.post_training.common.utils import evacuate_model_from_device
from llama_stack.providers.inline.post_training.torchtune.common import utils from llama_stack.providers.inline.post_training.torchtune.common import utils
@ -56,9 +57,7 @@ from llama_stack.providers.inline.post_training.torchtune.config import (
) )
from llama_stack.providers.inline.post_training.torchtune.datasets.sft import SFTDataset from llama_stack.providers.inline.post_training.torchtune.datasets.sft import SFTDataset
log = logging.getLogger(__name__) log = get_logger(name=__name__, category="post_training")
from torchtune.models.llama3._tokenizer import Llama3Tokenizer
class LoraFinetuningSingleDevice: class LoraFinetuningSingleDevice:

View file

@ -4,7 +4,6 @@
# This source code is licensed under the terms described in the LICENSE file in # This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree. # the root directory of this source tree.
import logging
import uuid import uuid
from typing import TYPE_CHECKING, Any from typing import TYPE_CHECKING, Any
@ -20,13 +19,14 @@ from llama_stack.apis.safety import (
) )
from llama_stack.apis.safety.safety import ModerationObject, ModerationObjectResults from llama_stack.apis.safety.safety import ModerationObject, ModerationObjectResults
from llama_stack.apis.shields import Shield from llama_stack.apis.shields import Shield
from llama_stack.log import get_logger
from llama_stack.providers.utils.inference.prompt_adapter import ( from llama_stack.providers.utils.inference.prompt_adapter import (
interleaved_content_as_str, interleaved_content_as_str,
) )
from .config import CodeScannerConfig from .config import CodeScannerConfig
log = logging.getLogger(__name__) log = get_logger(name=__name__, category="safety")
ALLOWED_CODE_SCANNER_MODEL_IDS = [ ALLOWED_CODE_SCANNER_MODEL_IDS = [
"code-scanner", "code-scanner",

View file

@ -4,7 +4,6 @@
# This source code is licensed under the terms described in the LICENSE file in # This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree. # the root directory of this source tree.
import logging
import re import re
import uuid import uuid
from string import Template from string import Template
@ -21,6 +20,7 @@ from llama_stack.apis.safety import (
from llama_stack.apis.safety.safety import ModerationObject, ModerationObjectResults from llama_stack.apis.safety.safety import ModerationObject, ModerationObjectResults
from llama_stack.apis.shields import Shield from llama_stack.apis.shields import Shield
from llama_stack.core.datatypes import Api from llama_stack.core.datatypes import Api
from llama_stack.log import get_logger
from llama_stack.models.llama.datatypes import Role from llama_stack.models.llama.datatypes import Role
from llama_stack.models.llama.sku_types import CoreModelId from llama_stack.models.llama.sku_types import CoreModelId
from llama_stack.providers.datatypes import ShieldsProtocolPrivate from llama_stack.providers.datatypes import ShieldsProtocolPrivate
@ -132,6 +132,8 @@ Provide your safety assessment for ONLY THE LAST $agent_type message in the abov
PROMPT_TEMPLATE = Template(f"{PROMPT_TASK}{SAFETY_CATEGORIES}{PROMPT_CONVERSATION}{PROMPT_INSTRUCTIONS}") PROMPT_TEMPLATE = Template(f"{PROMPT_TASK}{SAFETY_CATEGORIES}{PROMPT_CONVERSATION}{PROMPT_INSTRUCTIONS}")
logger = get_logger(name=__name__, category="safety")
class LlamaGuardSafetyImpl(Safety, ShieldsProtocolPrivate): class LlamaGuardSafetyImpl(Safety, ShieldsProtocolPrivate):
def __init__(self, config: LlamaGuardConfig, deps) -> None: def __init__(self, config: LlamaGuardConfig, deps) -> None:
@ -407,7 +409,7 @@ class LlamaGuardShield:
unsafe_code_list = [code.strip() for code in unsafe_code.split(",")] unsafe_code_list = [code.strip() for code in unsafe_code.split(",")]
invalid_codes = [code for code in unsafe_code_list if code not in SAFETY_CODE_TO_CATEGORIES_MAP] invalid_codes = [code for code in unsafe_code_list if code not in SAFETY_CODE_TO_CATEGORIES_MAP]
if invalid_codes: if invalid_codes:
logging.warning(f"Invalid safety codes returned: {invalid_codes}") logger.warning(f"Invalid safety codes returned: {invalid_codes}")
# just returning safe object, as we don't know what the invalid codes can map to # just returning safe object, as we don't know what the invalid codes can map to
return ModerationObject( return ModerationObject(
id=f"modr-{uuid.uuid4()}", id=f"modr-{uuid.uuid4()}",

View file

@ -4,7 +4,6 @@
# This source code is licensed under the terms described in the LICENSE file in # This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree. # the root directory of this source tree.
import logging
from typing import Any from typing import Any
import torch import torch
@ -21,6 +20,7 @@ from llama_stack.apis.safety import (
from llama_stack.apis.safety.safety import ModerationObject from llama_stack.apis.safety.safety import ModerationObject
from llama_stack.apis.shields import Shield from llama_stack.apis.shields import Shield
from llama_stack.core.utils.model_utils import model_local_dir from llama_stack.core.utils.model_utils import model_local_dir
from llama_stack.log import get_logger
from llama_stack.providers.datatypes import ShieldsProtocolPrivate from llama_stack.providers.datatypes import ShieldsProtocolPrivate
from llama_stack.providers.utils.inference.prompt_adapter import ( from llama_stack.providers.utils.inference.prompt_adapter import (
interleaved_content_as_str, interleaved_content_as_str,
@ -28,7 +28,7 @@ from llama_stack.providers.utils.inference.prompt_adapter import (
from .config import PromptGuardConfig, PromptGuardType from .config import PromptGuardConfig, PromptGuardType
log = logging.getLogger(__name__) log = get_logger(name=__name__, category="safety")
PROMPT_GUARD_MODEL = "Prompt-Guard-86M" PROMPT_GUARD_MODEL = "Prompt-Guard-86M"

View file

@ -7,7 +7,6 @@
import collections import collections
import functools import functools
import json import json
import logging
import random import random
import re import re
import string import string
@ -20,7 +19,9 @@ import nltk
from pythainlp.tokenize import sent_tokenize as sent_tokenize_thai from pythainlp.tokenize import sent_tokenize as sent_tokenize_thai
from pythainlp.tokenize import word_tokenize as word_tokenize_thai from pythainlp.tokenize import word_tokenize as word_tokenize_thai
logger = logging.getLogger() from llama_stack.log import get_logger
logger = get_logger(name=__name__, category="scoring")
WORD_LIST = [ WORD_LIST = [
"western", "western",

View file

@ -4,13 +4,10 @@
# This source code is licensed under the terms described in the LICENSE file in # This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree. # the root directory of this source tree.
import logging
import threading import threading
from typing import Any from typing import Any
from opentelemetry import metrics, trace from opentelemetry import metrics, trace
logger = logging.getLogger(__name__)
from opentelemetry.exporter.otlp.proto.http.metric_exporter import OTLPMetricExporter from opentelemetry.exporter.otlp.proto.http.metric_exporter import OTLPMetricExporter
from opentelemetry.exporter.otlp.proto.http.trace_exporter import OTLPSpanExporter from opentelemetry.exporter.otlp.proto.http.trace_exporter import OTLPSpanExporter
from opentelemetry.sdk.metrics import MeterProvider from opentelemetry.sdk.metrics import MeterProvider
@ -40,6 +37,7 @@ from llama_stack.apis.telemetry import (
UnstructuredLogEvent, UnstructuredLogEvent,
) )
from llama_stack.core.datatypes import Api from llama_stack.core.datatypes import Api
from llama_stack.log import get_logger
from llama_stack.providers.inline.telemetry.meta_reference.console_span_processor import ( from llama_stack.providers.inline.telemetry.meta_reference.console_span_processor import (
ConsoleSpanProcessor, ConsoleSpanProcessor,
) )
@ -61,6 +59,8 @@ _GLOBAL_STORAGE: dict[str, dict[str | int, Any]] = {
_global_lock = threading.Lock() _global_lock = threading.Lock()
_TRACER_PROVIDER = None _TRACER_PROVIDER = None
logger = get_logger(name=__name__, category="telemetry")
def is_tracing_enabled(tracer): def is_tracing_enabled(tracer):
with tracer.start_as_current_span("check_tracing") as span: with tracer.start_as_current_span("check_tracing") as span:

View file

@ -5,7 +5,6 @@
# the root directory of this source tree. # the root directory of this source tree.
import asyncio import asyncio
import logging
import secrets import secrets
import string import string
from typing import Any from typing import Any
@ -32,6 +31,7 @@ from llama_stack.apis.tools import (
ToolRuntime, ToolRuntime,
) )
from llama_stack.apis.vector_io import QueryChunksResponse, VectorIO from llama_stack.apis.vector_io import QueryChunksResponse, VectorIO
from llama_stack.log import get_logger
from llama_stack.providers.datatypes import ToolGroupsProtocolPrivate from llama_stack.providers.datatypes import ToolGroupsProtocolPrivate
from llama_stack.providers.utils.inference.prompt_adapter import interleaved_content_as_str from llama_stack.providers.utils.inference.prompt_adapter import interleaved_content_as_str
from llama_stack.providers.utils.memory.vector_store import ( from llama_stack.providers.utils.memory.vector_store import (
@ -42,7 +42,7 @@ from llama_stack.providers.utils.memory.vector_store import (
from .config import RagToolRuntimeConfig from .config import RagToolRuntimeConfig
from .context_retriever import generate_rag_query from .context_retriever import generate_rag_query
log = logging.getLogger(__name__) log = get_logger(name=__name__, category="tool_runtime")
def make_random_string(length: int = 8): def make_random_string(length: int = 8):

View file

@ -8,7 +8,6 @@ import asyncio
import base64 import base64
import io import io
import json import json
import logging
from typing import Any from typing import Any
import faiss import faiss
@ -24,6 +23,7 @@ from llama_stack.apis.vector_io import (
QueryChunksResponse, QueryChunksResponse,
VectorIO, VectorIO,
) )
from llama_stack.log import get_logger
from llama_stack.providers.datatypes import ( from llama_stack.providers.datatypes import (
HealthResponse, HealthResponse,
HealthStatus, HealthStatus,
@ -40,7 +40,7 @@ from llama_stack.providers.utils.memory.vector_store import (
from .config import FaissVectorIOConfig from .config import FaissVectorIOConfig
logger = logging.getLogger(__name__) logger = get_logger(name=__name__, category="vector_io")
VERSION = "v3" VERSION = "v3"
VECTOR_DBS_PREFIX = f"vector_dbs:{VERSION}::" VECTOR_DBS_PREFIX = f"vector_dbs:{VERSION}::"

View file

@ -5,7 +5,6 @@
# the root directory of this source tree. # the root directory of this source tree.
import asyncio import asyncio
import logging
import re import re
import sqlite3 import sqlite3
import struct import struct
@ -24,6 +23,7 @@ from llama_stack.apis.vector_io import (
QueryChunksResponse, QueryChunksResponse,
VectorIO, VectorIO,
) )
from llama_stack.log import get_logger
from llama_stack.providers.datatypes import VectorDBsProtocolPrivate from llama_stack.providers.datatypes import VectorDBsProtocolPrivate
from llama_stack.providers.utils.kvstore import kvstore_impl from llama_stack.providers.utils.kvstore import kvstore_impl
from llama_stack.providers.utils.kvstore.api import KVStore from llama_stack.providers.utils.kvstore.api import KVStore
@ -36,7 +36,7 @@ from llama_stack.providers.utils.memory.vector_store import (
VectorDBWithIndex, VectorDBWithIndex,
) )
logger = logging.getLogger(__name__) logger = get_logger(name=__name__, category="vector_io")
# Specifying search mode is dependent on the VectorIO provider. # Specifying search mode is dependent on the VectorIO provider.
VECTOR_SEARCH = "vector" VECTOR_SEARCH = "vector"

View file

@ -3,15 +3,14 @@
# #
# This source code is licensed under the terms described in the LICENSE file in # This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree. # the root directory of this source tree.
import logging from llama_stack.log import get_logger
from llama_stack.providers.remote.inference.llama_openai_compat.config import LlamaCompatConfig from llama_stack.providers.remote.inference.llama_openai_compat.config import LlamaCompatConfig
from llama_stack.providers.utils.inference.litellm_openai_mixin import LiteLLMOpenAIMixin from llama_stack.providers.utils.inference.litellm_openai_mixin import LiteLLMOpenAIMixin
from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin
from .models import MODEL_ENTRIES from .models import MODEL_ENTRIES
logger = logging.getLogger(__name__) logger = get_logger(name=__name__, category="inference")
class LlamaCompatInferenceAdapter(OpenAIMixin, LiteLLMOpenAIMixin): class LlamaCompatInferenceAdapter(OpenAIMixin, LiteLLMOpenAIMixin):

View file

@ -77,6 +77,10 @@ print(f"Response: {response.completion_message.content}")
``` ```
### Create Embeddings ### Create Embeddings
> Note on OpenAI embeddings compatibility
>
> NVIDIA asymmetric embedding models (e.g., `nvidia/llama-3.2-nv-embedqa-1b-v2`) require an `input_type` parameter not present in the standard OpenAI embeddings API. The NVIDIA Inference Adapter automatically sets `input_type="query"` when using the OpenAI-compatible embeddings endpoint for NVIDIA. For passage embeddings, use the `embeddings` API with `task_type="document"`.
```python ```python
response = client.inference.embeddings( response = client.inference.embeddings(
model_id="nvidia/llama-3.2-nv-embedqa-1b-v2", model_id="nvidia/llama-3.2-nv-embedqa-1b-v2",

View file

@ -4,11 +4,10 @@
# This source code is licensed under the terms described in the LICENSE file in # This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree. # the root directory of this source tree.
import logging
import warnings import warnings
from collections.abc import AsyncIterator from collections.abc import AsyncIterator
from openai import APIConnectionError, BadRequestError from openai import NOT_GIVEN, APIConnectionError, BadRequestError
from llama_stack.apis.common.content_types import ( from llama_stack.apis.common.content_types import (
InterleavedContent, InterleavedContent,
@ -27,12 +26,16 @@ from llama_stack.apis.inference import (
Inference, Inference,
LogProbConfig, LogProbConfig,
Message, Message,
OpenAIEmbeddingData,
OpenAIEmbeddingsResponse,
OpenAIEmbeddingUsage,
ResponseFormat, ResponseFormat,
SamplingParams, SamplingParams,
TextTruncation, TextTruncation,
ToolChoice, ToolChoice,
ToolConfig, ToolConfig,
) )
from llama_stack.log import get_logger
from llama_stack.models.llama.datatypes import ToolDefinition, ToolPromptFormat from llama_stack.models.llama.datatypes import ToolDefinition, ToolPromptFormat
from llama_stack.providers.utils.inference.model_registry import ( from llama_stack.providers.utils.inference.model_registry import (
ModelRegistryHelper, ModelRegistryHelper,
@ -54,7 +57,7 @@ from .openai_utils import (
) )
from .utils import _is_nvidia_hosted from .utils import _is_nvidia_hosted
logger = logging.getLogger(__name__) logger = get_logger(name=__name__, category="inference")
class NVIDIAInferenceAdapter(OpenAIMixin, Inference, ModelRegistryHelper): class NVIDIAInferenceAdapter(OpenAIMixin, Inference, ModelRegistryHelper):
@ -210,6 +213,57 @@ class NVIDIAInferenceAdapter(OpenAIMixin, Inference, ModelRegistryHelper):
# #
return EmbeddingsResponse(embeddings=[embedding.embedding for embedding in response.data]) return EmbeddingsResponse(embeddings=[embedding.embedding for embedding in response.data])
async def openai_embeddings(
self,
model: str,
input: str | list[str],
encoding_format: str | None = "float",
dimensions: int | None = None,
user: str | None = None,
) -> OpenAIEmbeddingsResponse:
"""
OpenAI-compatible embeddings for NVIDIA NIM.
Note: NVIDIA NIM asymmetric embedding models require an "input_type" field not present in the standard OpenAI embeddings API.
We default this to "query" to ensure requests succeed when using the
OpenAI-compatible endpoint. For passage embeddings, use the embeddings API with
`task_type='document'`.
"""
extra_body: dict[str, object] = {"input_type": "query"}
logger.warning(
"NVIDIA OpenAI-compatible embeddings: defaulting to input_type='query'. "
"For passage embeddings, use the embeddings API with task_type='document'."
)
response = await self.client.embeddings.create(
model=await self._get_provider_model_id(model),
input=input,
encoding_format=encoding_format if encoding_format is not None else NOT_GIVEN,
dimensions=dimensions if dimensions is not None else NOT_GIVEN,
user=user if user is not None else NOT_GIVEN,
extra_body=extra_body,
)
data = []
for i, embedding_data in enumerate(response.data):
data.append(
OpenAIEmbeddingData(
embedding=embedding_data.embedding,
index=i,
)
)
usage = OpenAIEmbeddingUsage(
prompt_tokens=response.usage.prompt_tokens,
total_tokens=response.usage.total_tokens,
)
return OpenAIEmbeddingsResponse(
data=data,
model=response.model,
usage=usage,
)
async def chat_completion( async def chat_completion(
self, self,
model_id: str, model_id: str,

View file

@ -4,13 +4,13 @@
# This source code is licensed under the terms described in the LICENSE file in # This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree. # the root directory of this source tree.
import logging
import httpx import httpx
from llama_stack.log import get_logger
from . import NVIDIAConfig from . import NVIDIAConfig
logger = logging.getLogger(__name__) logger = get_logger(name=__name__, category="inference")
def _is_nvidia_hosted(config: NVIDIAConfig) -> bool: def _is_nvidia_hosted(config: NVIDIAConfig) -> bool:

View file

@ -4,15 +4,14 @@
# This source code is licensed under the terms described in the LICENSE file in # This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree. # the root directory of this source tree.
import logging from llama_stack.log import get_logger
from llama_stack.providers.utils.inference.litellm_openai_mixin import LiteLLMOpenAIMixin from llama_stack.providers.utils.inference.litellm_openai_mixin import LiteLLMOpenAIMixin
from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin
from .config import OpenAIConfig from .config import OpenAIConfig
from .models import MODEL_ENTRIES from .models import MODEL_ENTRIES
logger = logging.getLogger(__name__) logger = get_logger(name=__name__, category="inference")
# #

View file

@ -5,7 +5,6 @@
# the root directory of this source tree. # the root directory of this source tree.
import logging
from collections.abc import AsyncGenerator from collections.abc import AsyncGenerator
from huggingface_hub import AsyncInferenceClient, HfApi from huggingface_hub import AsyncInferenceClient, HfApi
@ -34,6 +33,7 @@ from llama_stack.apis.inference import (
ToolPromptFormat, ToolPromptFormat,
) )
from llama_stack.apis.models import Model from llama_stack.apis.models import Model
from llama_stack.log import get_logger
from llama_stack.models.llama.sku_list import all_registered_models from llama_stack.models.llama.sku_list import all_registered_models
from llama_stack.providers.datatypes import ModelsProtocolPrivate from llama_stack.providers.datatypes import ModelsProtocolPrivate
from llama_stack.providers.utils.inference.model_registry import ( from llama_stack.providers.utils.inference.model_registry import (
@ -58,7 +58,7 @@ from llama_stack.providers.utils.inference.prompt_adapter import (
from .config import InferenceAPIImplConfig, InferenceEndpointImplConfig, TGIImplConfig from .config import InferenceAPIImplConfig, InferenceEndpointImplConfig, TGIImplConfig
log = logging.getLogger(__name__) log = get_logger(name=__name__, category="inference")
def build_hf_repo_model_entries(): def build_hf_repo_model_entries():

View file

@ -4,18 +4,18 @@
# This source code is licensed under the terms described in the LICENSE file in # This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree. # the root directory of this source tree.
import logging
import warnings import warnings
from typing import Any from typing import Any
from pydantic import BaseModel from pydantic import BaseModel
from llama_stack.apis.post_training import TrainingConfig from llama_stack.apis.post_training import TrainingConfig
from llama_stack.log import get_logger
from llama_stack.providers.remote.post_training.nvidia.config import SFTLoRADefaultConfig from llama_stack.providers.remote.post_training.nvidia.config import SFTLoRADefaultConfig
from .config import NvidiaPostTrainingConfig from .config import NvidiaPostTrainingConfig
logger = logging.getLogger(__name__) logger = get_logger(name=__name__, category="integration")
def warn_unsupported_params(config_dict: Any, supported_keys: set[str], config_name: str) -> None: def warn_unsupported_params(config_dict: Any, supported_keys: set[str], config_name: str) -> None:

View file

@ -5,7 +5,6 @@
# the root directory of this source tree. # the root directory of this source tree.
import json import json
import logging
from typing import Any from typing import Any
from llama_stack.apis.inference import Message from llama_stack.apis.inference import Message
@ -16,12 +15,13 @@ from llama_stack.apis.safety import (
ViolationLevel, ViolationLevel,
) )
from llama_stack.apis.shields import Shield from llama_stack.apis.shields import Shield
from llama_stack.log import get_logger
from llama_stack.providers.datatypes import ShieldsProtocolPrivate from llama_stack.providers.datatypes import ShieldsProtocolPrivate
from llama_stack.providers.utils.bedrock.client import create_bedrock_client from llama_stack.providers.utils.bedrock.client import create_bedrock_client
from .config import BedrockSafetyConfig from .config import BedrockSafetyConfig
logger = logging.getLogger(__name__) logger = get_logger(name=__name__, category="safety")
class BedrockSafetyAdapter(Safety, ShieldsProtocolPrivate): class BedrockSafetyAdapter(Safety, ShieldsProtocolPrivate):

View file

@ -4,7 +4,6 @@
# This source code is licensed under the terms described in the LICENSE file in # This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree. # the root directory of this source tree.
import logging
from typing import Any from typing import Any
import requests import requests
@ -12,12 +11,13 @@ import requests
from llama_stack.apis.inference import Message from llama_stack.apis.inference import Message
from llama_stack.apis.safety import RunShieldResponse, Safety, SafetyViolation, ViolationLevel from llama_stack.apis.safety import RunShieldResponse, Safety, SafetyViolation, ViolationLevel
from llama_stack.apis.shields import Shield from llama_stack.apis.shields import Shield
from llama_stack.log import get_logger
from llama_stack.providers.datatypes import ShieldsProtocolPrivate from llama_stack.providers.datatypes import ShieldsProtocolPrivate
from llama_stack.providers.utils.inference.openai_compat import convert_message_to_openai_dict_new from llama_stack.providers.utils.inference.openai_compat import convert_message_to_openai_dict_new
from .config import NVIDIASafetyConfig from .config import NVIDIASafetyConfig
logger = logging.getLogger(__name__) logger = get_logger(name=__name__, category="safety")
class NVIDIASafetyAdapter(Safety, ShieldsProtocolPrivate): class NVIDIASafetyAdapter(Safety, ShieldsProtocolPrivate):

View file

@ -5,7 +5,6 @@
# the root directory of this source tree. # the root directory of this source tree.
import json import json
import logging
from typing import Any from typing import Any
import litellm import litellm
@ -20,12 +19,13 @@ from llama_stack.apis.safety import (
) )
from llama_stack.apis.shields import Shield from llama_stack.apis.shields import Shield
from llama_stack.core.request_headers import NeedsRequestProviderData from llama_stack.core.request_headers import NeedsRequestProviderData
from llama_stack.log import get_logger
from llama_stack.providers.datatypes import ShieldsProtocolPrivate from llama_stack.providers.datatypes import ShieldsProtocolPrivate
from llama_stack.providers.utils.inference.openai_compat import convert_message_to_openai_dict_new from llama_stack.providers.utils.inference.openai_compat import convert_message_to_openai_dict_new
from .config import SambaNovaSafetyConfig from .config import SambaNovaSafetyConfig
logger = logging.getLogger(__name__) logger = get_logger(name=__name__, category="safety")
CANNED_RESPONSE_TEXT = "I can't answer that. Can I help with something else?" CANNED_RESPONSE_TEXT = "I can't answer that. Can I help with something else?"

View file

@ -5,7 +5,6 @@
# the root directory of this source tree. # the root directory of this source tree.
import asyncio import asyncio
import json import json
import logging
from typing import Any from typing import Any
from urllib.parse import urlparse from urllib.parse import urlparse
@ -20,6 +19,7 @@ from llama_stack.apis.vector_io import (
QueryChunksResponse, QueryChunksResponse,
VectorIO, VectorIO,
) )
from llama_stack.log import get_logger
from llama_stack.providers.datatypes import Api, VectorDBsProtocolPrivate from llama_stack.providers.datatypes import Api, VectorDBsProtocolPrivate
from llama_stack.providers.inline.vector_io.chroma import ChromaVectorIOConfig as InlineChromaVectorIOConfig from llama_stack.providers.inline.vector_io.chroma import ChromaVectorIOConfig as InlineChromaVectorIOConfig
from llama_stack.providers.utils.kvstore import kvstore_impl from llama_stack.providers.utils.kvstore import kvstore_impl
@ -33,7 +33,7 @@ from llama_stack.providers.utils.memory.vector_store import (
from .config import ChromaVectorIOConfig as RemoteChromaVectorIOConfig from .config import ChromaVectorIOConfig as RemoteChromaVectorIOConfig
log = logging.getLogger(__name__) log = get_logger(name=__name__, category="vector_io")
ChromaClientType = chromadb.api.AsyncClientAPI | chromadb.api.ClientAPI ChromaClientType = chromadb.api.AsyncClientAPI | chromadb.api.ClientAPI

View file

@ -5,7 +5,6 @@
# the root directory of this source tree. # the root directory of this source tree.
import asyncio import asyncio
import logging
import os import os
from typing import Any from typing import Any
@ -21,6 +20,7 @@ from llama_stack.apis.vector_io import (
QueryChunksResponse, QueryChunksResponse,
VectorIO, VectorIO,
) )
from llama_stack.log import get_logger
from llama_stack.providers.datatypes import VectorDBsProtocolPrivate from llama_stack.providers.datatypes import VectorDBsProtocolPrivate
from llama_stack.providers.inline.vector_io.milvus import MilvusVectorIOConfig as InlineMilvusVectorIOConfig from llama_stack.providers.inline.vector_io.milvus import MilvusVectorIOConfig as InlineMilvusVectorIOConfig
from llama_stack.providers.utils.kvstore import kvstore_impl from llama_stack.providers.utils.kvstore import kvstore_impl
@ -36,7 +36,7 @@ from llama_stack.providers.utils.vector_io.vector_utils import sanitize_collecti
from .config import MilvusVectorIOConfig as RemoteMilvusVectorIOConfig from .config import MilvusVectorIOConfig as RemoteMilvusVectorIOConfig
logger = logging.getLogger(__name__) logger = get_logger(name=__name__, category="vector_io")
VERSION = "v3" VERSION = "v3"
VECTOR_DBS_PREFIX = f"vector_dbs:milvus:{VERSION}::" VECTOR_DBS_PREFIX = f"vector_dbs:milvus:{VERSION}::"
@ -413,15 +413,6 @@ class MilvusVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorDBsProtocolP
index = await self._get_and_cache_vector_db_index(vector_db_id) index = await self._get_and_cache_vector_db_index(vector_db_id)
if not index: if not index:
raise VectorStoreNotFoundError(vector_db_id) raise VectorStoreNotFoundError(vector_db_id)
if params and params.get("mode") == "keyword":
# Check if this is inline Milvus (Milvus-Lite)
if hasattr(self.config, "db_path"):
raise NotImplementedError(
"Keyword search is not supported in Milvus-Lite. "
"Please use a remote Milvus server for keyword search functionality."
)
return await index.query_chunks(query, params) return await index.query_chunks(query, params)
async def delete_chunks(self, store_id: str, chunks_for_deletion: list[ChunkForDeletion]) -> None: async def delete_chunks(self, store_id: str, chunks_for_deletion: list[ChunkForDeletion]) -> None:

View file

@ -4,7 +4,6 @@
# This source code is licensed under the terms described in the LICENSE file in # This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree. # the root directory of this source tree.
import logging
from typing import Any from typing import Any
import psycopg2 import psycopg2
@ -22,6 +21,7 @@ from llama_stack.apis.vector_io import (
QueryChunksResponse, QueryChunksResponse,
VectorIO, VectorIO,
) )
from llama_stack.log import get_logger
from llama_stack.providers.datatypes import Api, VectorDBsProtocolPrivate from llama_stack.providers.datatypes import Api, VectorDBsProtocolPrivate
from llama_stack.providers.utils.kvstore import kvstore_impl from llama_stack.providers.utils.kvstore import kvstore_impl
from llama_stack.providers.utils.kvstore.api import KVStore from llama_stack.providers.utils.kvstore.api import KVStore
@ -34,7 +34,7 @@ from llama_stack.providers.utils.memory.vector_store import (
from .config import PGVectorVectorIOConfig from .config import PGVectorVectorIOConfig
log = logging.getLogger(__name__) log = get_logger(name=__name__, category="vector_io")
VERSION = "v3" VERSION = "v3"
VECTOR_DBS_PREFIX = f"vector_dbs:pgvector:{VERSION}::" VECTOR_DBS_PREFIX = f"vector_dbs:pgvector:{VERSION}::"

View file

@ -5,7 +5,6 @@
# the root directory of this source tree. # the root directory of this source tree.
import asyncio import asyncio
import logging
import uuid import uuid
from typing import Any from typing import Any
@ -24,6 +23,7 @@ from llama_stack.apis.vector_io import (
VectorStoreChunkingStrategy, VectorStoreChunkingStrategy,
VectorStoreFileObject, VectorStoreFileObject,
) )
from llama_stack.log import get_logger
from llama_stack.providers.datatypes import Api, VectorDBsProtocolPrivate from llama_stack.providers.datatypes import Api, VectorDBsProtocolPrivate
from llama_stack.providers.inline.vector_io.qdrant import QdrantVectorIOConfig as InlineQdrantVectorIOConfig from llama_stack.providers.inline.vector_io.qdrant import QdrantVectorIOConfig as InlineQdrantVectorIOConfig
from llama_stack.providers.utils.kvstore import KVStore, kvstore_impl from llama_stack.providers.utils.kvstore import KVStore, kvstore_impl
@ -36,7 +36,7 @@ from llama_stack.providers.utils.memory.vector_store import (
from .config import QdrantVectorIOConfig as RemoteQdrantVectorIOConfig from .config import QdrantVectorIOConfig as RemoteQdrantVectorIOConfig
log = logging.getLogger(__name__) log = get_logger(name=__name__, category="vector_io")
CHUNK_ID_KEY = "_chunk_id" CHUNK_ID_KEY = "_chunk_id"
# KV store prefixes for vector databases # KV store prefixes for vector databases

View file

@ -4,7 +4,6 @@
# This source code is licensed under the terms described in the LICENSE file in # This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree. # the root directory of this source tree.
import json import json
import logging
from typing import Any from typing import Any
import weaviate import weaviate
@ -19,6 +18,7 @@ from llama_stack.apis.files.files import Files
from llama_stack.apis.vector_dbs import VectorDB from llama_stack.apis.vector_dbs import VectorDB
from llama_stack.apis.vector_io import Chunk, QueryChunksResponse, VectorIO from llama_stack.apis.vector_io import Chunk, QueryChunksResponse, VectorIO
from llama_stack.core.request_headers import NeedsRequestProviderData from llama_stack.core.request_headers import NeedsRequestProviderData
from llama_stack.log import get_logger
from llama_stack.providers.datatypes import Api, VectorDBsProtocolPrivate from llama_stack.providers.datatypes import Api, VectorDBsProtocolPrivate
from llama_stack.providers.utils.kvstore import kvstore_impl from llama_stack.providers.utils.kvstore import kvstore_impl
from llama_stack.providers.utils.kvstore.api import KVStore from llama_stack.providers.utils.kvstore.api import KVStore
@ -34,7 +34,7 @@ from llama_stack.providers.utils.vector_io.vector_utils import sanitize_collecti
from .config import WeaviateVectorIOConfig from .config import WeaviateVectorIOConfig
log = logging.getLogger(__name__) log = get_logger(name=__name__, category="vector_io")
VERSION = "v3" VERSION = "v3"
VECTOR_DBS_PREFIX = f"vector_dbs:weaviate:{VERSION}::" VECTOR_DBS_PREFIX = f"vector_dbs:weaviate:{VERSION}::"

View file

@ -5,10 +5,11 @@
# the root directory of this source tree. # the root directory of this source tree.
import base64 import base64
import logging
import struct import struct
from typing import TYPE_CHECKING from typing import TYPE_CHECKING
from llama_stack.log import get_logger
if TYPE_CHECKING: if TYPE_CHECKING:
from sentence_transformers import SentenceTransformer from sentence_transformers import SentenceTransformer
@ -27,7 +28,7 @@ from llama_stack.providers.utils.inference.prompt_adapter import interleaved_con
EMBEDDING_MODELS = {} EMBEDDING_MODELS = {}
log = logging.getLogger(__name__) log = get_logger(name=__name__, category="inference")
class SentenceTransformerEmbeddingMixin: class SentenceTransformerEmbeddingMixin:

View file

@ -5,7 +5,6 @@
# the root directory of this source tree. # the root directory of this source tree.
import base64 import base64
import json import json
import logging
import struct import struct
import time import time
import uuid import uuid
@ -122,6 +121,7 @@ from llama_stack.apis.inference import (
from llama_stack.apis.inference import ( from llama_stack.apis.inference import (
OpenAIChoice as OpenAIChatCompletionChoice, OpenAIChoice as OpenAIChatCompletionChoice,
) )
from llama_stack.log import get_logger
from llama_stack.models.llama.datatypes import ( from llama_stack.models.llama.datatypes import (
BuiltinTool, BuiltinTool,
StopReason, StopReason,
@ -134,7 +134,7 @@ from llama_stack.providers.utils.inference.prompt_adapter import (
decode_assistant_message, decode_assistant_message,
) )
logger = logging.getLogger(__name__) logger = get_logger(name=__name__, category="inference")
class OpenAICompatCompletionChoiceDelta(BaseModel): class OpenAICompatCompletionChoiceDelta(BaseModel):

View file

@ -4,16 +4,16 @@
# This source code is licensed under the terms described in the LICENSE file in # This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree. # the root directory of this source tree.
import logging
from datetime import datetime from datetime import datetime
from pymongo import AsyncMongoClient from pymongo import AsyncMongoClient
from llama_stack.log import get_logger
from llama_stack.providers.utils.kvstore import KVStore from llama_stack.providers.utils.kvstore import KVStore
from ..config import MongoDBKVStoreConfig from ..config import MongoDBKVStoreConfig
log = logging.getLogger(__name__) log = get_logger(name=__name__, category="kvstore")
class MongoDBKVStoreImpl(KVStore): class MongoDBKVStoreImpl(KVStore):

View file

@ -4,16 +4,17 @@
# This source code is licensed under the terms described in the LICENSE file in # This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree. # the root directory of this source tree.
import logging
from datetime import datetime from datetime import datetime
import psycopg2 import psycopg2
from psycopg2.extras import DictCursor from psycopg2.extras import DictCursor
from llama_stack.log import get_logger
from ..api import KVStore from ..api import KVStore
from ..config import PostgresKVStoreConfig from ..config import PostgresKVStoreConfig
log = logging.getLogger(__name__) log = get_logger(name=__name__, category="kvstore")
class PostgresKVStoreImpl(KVStore): class PostgresKVStoreImpl(KVStore):

View file

@ -44,7 +44,7 @@ from llama_stack.providers.utils.memory.vector_store import (
make_overlapped_chunks, make_overlapped_chunks,
) )
logger = get_logger(__name__, category="vector_io") logger = get_logger(name=__name__, category="memory")
# Constants for OpenAI vector stores # Constants for OpenAI vector stores
CHUNK_MULTIPLIER = 5 CHUNK_MULTIPLIER = 5

View file

@ -5,7 +5,6 @@
# the root directory of this source tree. # the root directory of this source tree.
import base64 import base64
import io import io
import logging
import re import re
import time import time
from abc import ABC, abstractmethod from abc import ABC, abstractmethod
@ -26,6 +25,7 @@ from llama_stack.apis.common.content_types import (
from llama_stack.apis.tools import RAGDocument from llama_stack.apis.tools import RAGDocument
from llama_stack.apis.vector_dbs import VectorDB from llama_stack.apis.vector_dbs import VectorDB
from llama_stack.apis.vector_io import Chunk, ChunkMetadata, QueryChunksResponse from llama_stack.apis.vector_io import Chunk, ChunkMetadata, QueryChunksResponse
from llama_stack.log import get_logger
from llama_stack.models.llama.llama3.tokenizer import Tokenizer from llama_stack.models.llama.llama3.tokenizer import Tokenizer
from llama_stack.providers.datatypes import Api from llama_stack.providers.datatypes import Api
from llama_stack.providers.utils.inference.prompt_adapter import ( from llama_stack.providers.utils.inference.prompt_adapter import (
@ -33,7 +33,7 @@ from llama_stack.providers.utils.inference.prompt_adapter import (
) )
from llama_stack.providers.utils.vector_io.vector_utils import generate_chunk_id from llama_stack.providers.utils.vector_io.vector_utils import generate_chunk_id
log = logging.getLogger(__name__) log = get_logger(name=__name__, category="memory")
class ChunkForDeletion(BaseModel): class ChunkForDeletion(BaseModel):

View file

@ -6,7 +6,7 @@
import asyncio import asyncio
import contextvars import contextvars
import logging import logging # allow-direct-logging
import queue import queue
import random import random
import sys import sys

View file

@ -23,7 +23,7 @@
"class-variance-authority": "^0.7.1", "class-variance-authority": "^0.7.1",
"clsx": "^2.1.1", "clsx": "^2.1.1",
"framer-motion": "^11.18.2", "framer-motion": "^11.18.2",
"llama-stack-client": "^0.2.17", "llama-stack-client": "^0.2.18",
"lucide-react": "^0.510.0", "lucide-react": "^0.510.0",
"next": "15.3.3", "next": "15.3.3",
"next-auth": "^4.24.11", "next-auth": "^4.24.11",

View file

@ -7,7 +7,7 @@ required-version = ">=0.7.0"
[project] [project]
name = "llama_stack" name = "llama_stack"
version = "0.2.17" version = "0.2.18"
authors = [{ name = "Meta Llama", email = "llama-oss@meta.com" }] authors = [{ name = "Meta Llama", email = "llama-oss@meta.com" }]
description = "Llama Stack" description = "Llama Stack"
readme = "README.md" readme = "README.md"
@ -31,7 +31,7 @@ dependencies = [
"huggingface-hub>=0.34.0,<1.0", "huggingface-hub>=0.34.0,<1.0",
"jinja2>=3.1.6", "jinja2>=3.1.6",
"jsonschema", "jsonschema",
"llama-stack-client>=0.2.17", "llama-stack-client>=0.2.18",
"llama-api-client>=0.1.2", "llama-api-client>=0.1.2",
"openai>=1.99.6,<1.100.0", "openai>=1.99.6,<1.100.0",
"prompt-toolkit", "prompt-toolkit",
@ -56,7 +56,7 @@ dependencies = [
ui = [ ui = [
"streamlit", "streamlit",
"pandas", "pandas",
"llama-stack-client>=0.2.17", "llama-stack-client>=0.2.18",
"streamlit-option-menu", "streamlit-option-menu",
] ]
@ -93,6 +93,7 @@ unit = [
"blobfile", "blobfile",
"faiss-cpu", "faiss-cpu",
"pymilvus>=2.5.12", "pymilvus>=2.5.12",
"milvus-lite>=2.5.0",
"litellm", "litellm",
"together", "together",
"coverage", "coverage",
@ -118,6 +119,7 @@ test = [
"sqlalchemy[asyncio]>=2.0.41", "sqlalchemy[asyncio]>=2.0.41",
"requests", "requests",
"pymilvus>=2.5.12", "pymilvus>=2.5.12",
"milvus-lite>=2.5.0",
"weaviate-client>=4.16.4", "weaviate-client>=4.16.4",
] ]
docs = [ docs = [

View file

@ -4,7 +4,6 @@
# This source code is licensed under the terms described in the LICENSE file in # This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree. # the root directory of this source tree.
import logging
import sys import sys
import time import time
import uuid import uuid
@ -19,10 +18,10 @@ from llama_stack.apis.post_training import (
LoraFinetuningConfig, LoraFinetuningConfig,
TrainingConfig, TrainingConfig,
) )
from llama_stack.log import get_logger
# Configure logging # Configure logging
logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s", force=True) logger = get_logger(name=__name__, category="post_training")
logger = logging.getLogger(__name__)
skip_because_resource_intensive = pytest.mark.skip( skip_because_resource_intensive = pytest.mark.skip(

View file

@ -4,7 +4,6 @@
# This source code is licensed under the terms described in the LICENSE file in # This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree. # the root directory of this source tree.
import logging
import time import time
from io import BytesIO from io import BytesIO
@ -14,8 +13,9 @@ from openai import BadRequestError as OpenAIBadRequestError
from llama_stack.apis.vector_io import Chunk from llama_stack.apis.vector_io import Chunk
from llama_stack.core.library_client import LlamaStackAsLibraryClient from llama_stack.core.library_client import LlamaStackAsLibraryClient
from llama_stack.log import get_logger
logger = logging.getLogger(__name__) logger = get_logger(name=__name__, category="vector_io")
def skip_if_provider_doesnt_support_openai_vector_stores(client_with_models): def skip_if_provider_doesnt_support_openai_vector_stores(client_with_models):
@ -56,6 +56,7 @@ def skip_if_provider_doesnt_support_openai_vector_stores_search(client_with_mode
"keyword": [ "keyword": [
"inline::sqlite-vec", "inline::sqlite-vec",
"remote::milvus", "remote::milvus",
"inline::milvus",
], ],
"hybrid": [ "hybrid": [
"inline::sqlite-vec", "inline::sqlite-vec",

View file

@ -45,7 +45,6 @@ from llama_stack.providers.inline.agents.meta_reference.responses.utils import (
class TestConvertChatChoiceToResponseMessage: class TestConvertChatChoiceToResponseMessage:
@pytest.mark.asyncio
async def test_convert_string_content(self): async def test_convert_string_content(self):
choice = OpenAIChoice( choice = OpenAIChoice(
message=OpenAIAssistantMessageParam(content="Test message"), message=OpenAIAssistantMessageParam(content="Test message"),
@ -61,7 +60,6 @@ class TestConvertChatChoiceToResponseMessage:
assert isinstance(result.content[0], OpenAIResponseOutputMessageContentOutputText) assert isinstance(result.content[0], OpenAIResponseOutputMessageContentOutputText)
assert result.content[0].text == "Test message" assert result.content[0].text == "Test message"
@pytest.mark.asyncio
async def test_convert_text_param_content(self): async def test_convert_text_param_content(self):
choice = OpenAIChoice( choice = OpenAIChoice(
message=OpenAIAssistantMessageParam( message=OpenAIAssistantMessageParam(
@ -78,12 +76,10 @@ class TestConvertChatChoiceToResponseMessage:
class TestConvertResponseContentToChatContent: class TestConvertResponseContentToChatContent:
@pytest.mark.asyncio
async def test_convert_string_content(self): async def test_convert_string_content(self):
result = await convert_response_content_to_chat_content("Simple string") result = await convert_response_content_to_chat_content("Simple string")
assert result == "Simple string" assert result == "Simple string"
@pytest.mark.asyncio
async def test_convert_text_content_parts(self): async def test_convert_text_content_parts(self):
content = [ content = [
OpenAIResponseInputMessageContentText(text="First part"), OpenAIResponseInputMessageContentText(text="First part"),
@ -98,7 +94,6 @@ class TestConvertResponseContentToChatContent:
assert isinstance(result[1], OpenAIChatCompletionContentPartTextParam) assert isinstance(result[1], OpenAIChatCompletionContentPartTextParam)
assert result[1].text == "Second part" assert result[1].text == "Second part"
@pytest.mark.asyncio
async def test_convert_image_content(self): async def test_convert_image_content(self):
content = [OpenAIResponseInputMessageContentImage(image_url="https://example.com/image.jpg", detail="high")] content = [OpenAIResponseInputMessageContentImage(image_url="https://example.com/image.jpg", detail="high")]
@ -111,7 +106,6 @@ class TestConvertResponseContentToChatContent:
class TestConvertResponseInputToChatMessages: class TestConvertResponseInputToChatMessages:
@pytest.mark.asyncio
async def test_convert_string_input(self): async def test_convert_string_input(self):
result = await convert_response_input_to_chat_messages("User message") result = await convert_response_input_to_chat_messages("User message")
@ -119,7 +113,6 @@ class TestConvertResponseInputToChatMessages:
assert isinstance(result[0], OpenAIUserMessageParam) assert isinstance(result[0], OpenAIUserMessageParam)
assert result[0].content == "User message" assert result[0].content == "User message"
@pytest.mark.asyncio
async def test_convert_function_tool_call_output(self): async def test_convert_function_tool_call_output(self):
input_items = [ input_items = [
OpenAIResponseInputFunctionToolCallOutput( OpenAIResponseInputFunctionToolCallOutput(
@ -135,7 +128,6 @@ class TestConvertResponseInputToChatMessages:
assert result[0].content == "Tool output" assert result[0].content == "Tool output"
assert result[0].tool_call_id == "call_123" assert result[0].tool_call_id == "call_123"
@pytest.mark.asyncio
async def test_convert_function_tool_call(self): async def test_convert_function_tool_call(self):
input_items = [ input_items = [
OpenAIResponseOutputMessageFunctionToolCall( OpenAIResponseOutputMessageFunctionToolCall(
@ -154,7 +146,6 @@ class TestConvertResponseInputToChatMessages:
assert result[0].tool_calls[0].function.name == "test_function" assert result[0].tool_calls[0].function.name == "test_function"
assert result[0].tool_calls[0].function.arguments == '{"param": "value"}' assert result[0].tool_calls[0].function.arguments == '{"param": "value"}'
@pytest.mark.asyncio
async def test_convert_response_message(self): async def test_convert_response_message(self):
input_items = [ input_items = [
OpenAIResponseMessage( OpenAIResponseMessage(
@ -173,7 +164,6 @@ class TestConvertResponseInputToChatMessages:
class TestConvertResponseTextToChatResponseFormat: class TestConvertResponseTextToChatResponseFormat:
@pytest.mark.asyncio
async def test_convert_text_format(self): async def test_convert_text_format(self):
text = OpenAIResponseText(format=OpenAIResponseTextFormat(type="text")) text = OpenAIResponseText(format=OpenAIResponseTextFormat(type="text"))
result = await convert_response_text_to_chat_response_format(text) result = await convert_response_text_to_chat_response_format(text)
@ -181,14 +171,12 @@ class TestConvertResponseTextToChatResponseFormat:
assert isinstance(result, OpenAIResponseFormatText) assert isinstance(result, OpenAIResponseFormatText)
assert result.type == "text" assert result.type == "text"
@pytest.mark.asyncio
async def test_convert_json_object_format(self): async def test_convert_json_object_format(self):
text = OpenAIResponseText(format={"type": "json_object"}) text = OpenAIResponseText(format={"type": "json_object"})
result = await convert_response_text_to_chat_response_format(text) result = await convert_response_text_to_chat_response_format(text)
assert isinstance(result, OpenAIResponseFormatJSONObject) assert isinstance(result, OpenAIResponseFormatJSONObject)
@pytest.mark.asyncio
async def test_convert_json_schema_format(self): async def test_convert_json_schema_format(self):
schema_def = {"type": "object", "properties": {"test": {"type": "string"}}} schema_def = {"type": "object", "properties": {"test": {"type": "string"}}}
text = OpenAIResponseText( text = OpenAIResponseText(
@ -204,7 +192,6 @@ class TestConvertResponseTextToChatResponseFormat:
assert result.json_schema["name"] == "test_schema" assert result.json_schema["name"] == "test_schema"
assert result.json_schema["schema"] == schema_def assert result.json_schema["schema"] == schema_def
@pytest.mark.asyncio
async def test_default_text_format(self): async def test_default_text_format(self):
text = OpenAIResponseText() text = OpenAIResponseText()
result = await convert_response_text_to_chat_response_format(text) result = await convert_response_text_to_chat_response_format(text)
@ -214,27 +201,22 @@ class TestConvertResponseTextToChatResponseFormat:
class TestGetMessageTypeByRole: class TestGetMessageTypeByRole:
@pytest.mark.asyncio
async def test_user_role(self): async def test_user_role(self):
result = await get_message_type_by_role("user") result = await get_message_type_by_role("user")
assert result == OpenAIUserMessageParam assert result == OpenAIUserMessageParam
@pytest.mark.asyncio
async def test_system_role(self): async def test_system_role(self):
result = await get_message_type_by_role("system") result = await get_message_type_by_role("system")
assert result == OpenAISystemMessageParam assert result == OpenAISystemMessageParam
@pytest.mark.asyncio
async def test_assistant_role(self): async def test_assistant_role(self):
result = await get_message_type_by_role("assistant") result = await get_message_type_by_role("assistant")
assert result == OpenAIAssistantMessageParam assert result == OpenAIAssistantMessageParam
@pytest.mark.asyncio
async def test_developer_role(self): async def test_developer_role(self):
result = await get_message_type_by_role("developer") result = await get_message_type_by_role("developer")
assert result == OpenAIDeveloperMessageParam assert result == OpenAIDeveloperMessageParam
@pytest.mark.asyncio
async def test_unknown_role(self): async def test_unknown_role(self):
result = await get_message_type_by_role("unknown") result = await get_message_type_by_role("unknown")
assert result is None assert result is None

View file

@ -6,7 +6,7 @@
import asyncio import asyncio
import json import json
import logging import logging # allow-direct-logging
import threading import threading
import time import time
from http.server import BaseHTTPRequestHandler, HTTPServer from http.server import BaseHTTPRequestHandler, HTTPServer

16
uv.lock generated
View file

@ -1719,7 +1719,7 @@ wheels = [
[[package]] [[package]]
name = "llama-stack" name = "llama-stack"
version = "0.2.17" version = "0.2.18"
source = { editable = "." } source = { editable = "." }
dependencies = [ dependencies = [
{ name = "aiohttp" }, { name = "aiohttp" },
@ -1809,6 +1809,7 @@ test = [
{ name = "chardet" }, { name = "chardet" },
{ name = "datasets" }, { name = "datasets" },
{ name = "mcp" }, { name = "mcp" },
{ name = "milvus-lite" },
{ name = "openai" }, { name = "openai" },
{ name = "pymilvus" }, { name = "pymilvus" },
{ name = "pypdf" }, { name = "pypdf" },
@ -1831,6 +1832,7 @@ unit = [
{ name = "faiss-cpu" }, { name = "faiss-cpu" },
{ name = "litellm" }, { name = "litellm" },
{ name = "mcp" }, { name = "mcp" },
{ name = "milvus-lite" },
{ name = "ollama" }, { name = "ollama" },
{ name = "openai" }, { name = "openai" },
{ name = "pymilvus" }, { name = "pymilvus" },
@ -1854,8 +1856,8 @@ requires-dist = [
{ name = "jinja2", specifier = ">=3.1.6" }, { name = "jinja2", specifier = ">=3.1.6" },
{ name = "jsonschema" }, { name = "jsonschema" },
{ name = "llama-api-client", specifier = ">=0.1.2" }, { name = "llama-api-client", specifier = ">=0.1.2" },
{ name = "llama-stack-client", specifier = ">=0.2.17" }, { name = "llama-stack-client", specifier = ">=0.2.18" },
{ name = "llama-stack-client", marker = "extra == 'ui'", specifier = ">=0.2.17" }, { name = "llama-stack-client", marker = "extra == 'ui'", specifier = ">=0.2.18" },
{ name = "openai", specifier = ">=1.99.6,<1.100.0" }, { name = "openai", specifier = ">=1.99.6,<1.100.0" },
{ name = "opentelemetry-exporter-otlp-proto-http", specifier = ">=1.30.0" }, { name = "opentelemetry-exporter-otlp-proto-http", specifier = ">=1.30.0" },
{ name = "opentelemetry-sdk", specifier = ">=1.30.0" }, { name = "opentelemetry-sdk", specifier = ">=1.30.0" },
@ -1925,6 +1927,7 @@ test = [
{ name = "chardet" }, { name = "chardet" },
{ name = "datasets" }, { name = "datasets" },
{ name = "mcp" }, { name = "mcp" },
{ name = "milvus-lite", specifier = ">=2.5.0" },
{ name = "openai" }, { name = "openai" },
{ name = "pymilvus", specifier = ">=2.5.12" }, { name = "pymilvus", specifier = ">=2.5.12" },
{ name = "pypdf" }, { name = "pypdf" },
@ -1946,6 +1949,7 @@ unit = [
{ name = "faiss-cpu" }, { name = "faiss-cpu" },
{ name = "litellm" }, { name = "litellm" },
{ name = "mcp" }, { name = "mcp" },
{ name = "milvus-lite", specifier = ">=2.5.0" },
{ name = "ollama" }, { name = "ollama" },
{ name = "openai" }, { name = "openai" },
{ name = "pymilvus", specifier = ">=2.5.12" }, { name = "pymilvus", specifier = ">=2.5.12" },
@ -1959,7 +1963,7 @@ unit = [
[[package]] [[package]]
name = "llama-stack-client" name = "llama-stack-client"
version = "0.2.17" version = "0.2.18"
source = { registry = "https://pypi.org/simple" } source = { registry = "https://pypi.org/simple" }
dependencies = [ dependencies = [
{ name = "anyio" }, { name = "anyio" },
@ -1978,9 +1982,9 @@ dependencies = [
{ name = "tqdm" }, { name = "tqdm" },
{ name = "typing-extensions" }, { name = "typing-extensions" },
] ]
sdist = { url = "https://files.pythonhosted.org/packages/c5/2a/bb2949d6a5c494d21da0c185d426e25eaa8016f8287b689249afc6c96fb5/llama_stack_client-0.2.17.tar.gz", hash = "sha256:1fe2070133c6356761e394fa346045e9b6b567d4c63157b9bc6be89b9a6e7a41", size = 257636, upload-time = "2025-08-05T01:42:55.911Z" } sdist = { url = "https://files.pythonhosted.org/packages/69/da/5e5a745495f8a2b8ef24fc4d01fe9031aa2277c36447cb22192ec8c8cc1e/llama_stack_client-0.2.18.tar.gz", hash = "sha256:860c885c9e549445178ac55cc9422e6e2a91215ac7aff5aaccfb42f3ce07e79e", size = 277284, upload-time = "2025-08-19T22:12:09.106Z" }
wheels = [ wheels = [
{ url = "https://files.pythonhosted.org/packages/81/fc/5eccc86b83c5ced3a3bca071d250a86ccafa4ff17546cf781deb7758ab74/llama_stack_client-0.2.17-py3-none-any.whl", hash = "sha256:336c32f8688700ff64717b8109f405dc87a990fbe310c2027ac9ed6d39d67d16", size = 350329, upload-time = "2025-08-05T01:42:54.381Z" }, { url = "https://files.pythonhosted.org/packages/0a/e4/e97f8fdd8a07aa1efc7f7e37b5657d84357b664bf70dd1885a437edc0699/llama_stack_client-0.2.18-py3-none-any.whl", hash = "sha256:90f827d5476f7fc15fd993f1863af6a6e72bd064646bf6a99435eb43a1327f70", size = 367586, upload-time = "2025-08-19T22:12:07.899Z" },
] ]
[[package]] [[package]]