Merge branch 'main' into responses_object

This commit is contained in:
Emilio Garcia 2025-08-20 11:33:41 -04:00 committed by Emilio Garcia
commit 8fb17ba18e
67 changed files with 794 additions and 218 deletions

View file

@ -36,6 +36,21 @@ jobs:
**/requirements*.txt
.pre-commit-config.yaml
# npm ci may fail -
# npm error `npm ci` can only install packages when your package.json and package-lock.json or npm-shrinkwrap.json are in sync. Please update your lock file with `npm install` before continuing.
# npm error Invalid: lock file's llama-stack-client@0.2.17 does not satisfy llama-stack-client@0.2.18
# - name: Set up Node.js
# uses: actions/setup-node@39370e3970a6d050c480ffad4ff0ed4d3fdee5af # v4.1.0
# with:
# node-version: '20'
# cache: 'npm'
# cache-dependency-path: 'llama_stack/ui/'
# - name: Install npm dependencies
# run: npm ci
# working-directory: llama_stack/ui
- uses: pre-commit/action@2c7b3805fd2a0fd8c1884dcaebf91fc102a13ecd # v3.0.1
continue-on-error: true
env:

View file

@ -146,20 +146,50 @@ repos:
pass_filenames: false
require_serial: true
files: ^.github/workflows/.*$
- id: ui-prettier
name: Format UI code with Prettier
entry: bash -c 'cd llama_stack/ui && npm run format'
# ui-prettier and ui-eslint are disabled until we can avoid `npm ci`, which is slow and may fail -
# npm error `npm ci` can only install packages when your package.json and package-lock.json or npm-shrinkwrap.json are in sync. Please update your lock file with `npm install` before continuing.
# npm error Invalid: lock file's llama-stack-client@0.2.17 does not satisfy llama-stack-client@0.2.18
# and until we have infra for installing prettier and next via npm -
# Lint UI code with ESLint.....................................................Failed
# - hook id: ui-eslint
# - exit code: 127
# > ui@0.1.0 lint
# > next lint --fix --quiet
# sh: line 1: next: command not found
#
# - id: ui-prettier
# name: Format UI code with Prettier
# entry: bash -c 'cd llama_stack/ui && npm ci && npm run format'
# language: system
# files: ^llama_stack/ui/.*\.(ts|tsx)$
# pass_filenames: false
# require_serial: true
# - id: ui-eslint
# name: Lint UI code with ESLint
# entry: bash -c 'cd llama_stack/ui && npm run lint -- --fix --quiet'
# language: system
# files: ^llama_stack/ui/.*\.(ts|tsx)$
# pass_filenames: false
# require_serial: true
- id: check-log-usage
name: Ensure 'llama_stack.log' usage for logging
entry: bash
language: system
files: ^llama_stack/ui/.*\.(ts|tsx)$
pass_filenames: false
require_serial: true
- id: ui-eslint
name: Lint UI code with ESLint
entry: bash -c 'cd llama_stack/ui && npm run lint -- --fix --quiet'
language: system
files: ^llama_stack/ui/.*\.(ts|tsx)$
pass_filenames: false
require_serial: true
types: [python]
pass_filenames: true
args:
- -c
- |
matches=$(grep -EnH '^[^#]*\b(import\s+logging|from\s+logging\b)' "$@" | grep -v -e '#\s*allow-direct-logging' || true)
if [ -n "$matches" ]; then
# GitHub Actions annotation format
while IFS=: read -r file line_num rest; do
echo "::error file=$file,line=$line_num::Do not use 'import logging' or 'from logging import' in $file. Use the custom log instead: from llama_stack.log import get_logger; logger = get_logger(). If direct logging is truly needed, add: # allow-direct-logging"
done <<< "$matches"
exit 1
fi
exit 0
ci:
autofix_commit_msg: 🎨 [pre-commit.ci] Auto format from pre-commit.com hooks

View file

@ -8585,6 +8585,21 @@
"title": "OpenAIResponseError",
"description": "Error details for failed OpenAI response requests."
},
"OpenAIResponseIncompleteDetails": {
"type": "object",
"properties": {
"reason": {
"type": "string",
"description": "Reason for the response being incomplete"
}
},
"additionalProperties": false,
"required": [
"reason"
],
"title": "OpenAIResponseIncompleteDetails",
"description": "Incomplete details for OpenAI responses."
},
"OpenAIResponseObject": {
"type": "object",
"properties": {
@ -8600,6 +8615,39 @@
"type": "string",
"description": "Unique identifier for this response"
},
"incomplete_details": {
"$ref": "#/components/schemas/OpenAIResponseIncompleteDetails",
"description": "(Optional) Incomplete details if the response is incomplete"
},
"instructions": {
"oneOf": [
{
"type": "string"
},
{
"type": "array",
"items": {
"type": "string"
}
}
],
"description": "(Optional) A system (or developer) message inserted into the model's context."
},
"max_output_tokens": {
"type": "integer",
"description": "(Optional) An upper bound for the number of tokens that can be generated for a response, including visible output tokens and reasoning tokens."
},
"max_tool_calls": {
"type": "integer",
"description": "(Optional) The maximum number of total calls to built-in tools that can be processed in a response."
},
"metadata": {
"type": "object",
"additionalProperties": {
"type": "string"
},
"description": "(Optional) Set of 16 key-value pairs that can be attached to an object. This can be useful for storing additional information about the object in a structured format, and querying for objects via API or the dashboard."
},
"model": {
"type": "string",
"description": "Model identifier used for generation"
@ -8626,6 +8674,26 @@
"type": "string",
"description": "(Optional) ID of the previous response in a conversation"
},
"prompt": {
"$ref": "#/components/schemas/OpenAIResponsePrompt",
"description": "(Optional) Reference to a prompt template and its variables."
},
"prompt_cache_key": {
"type": "string",
"description": "(Optional)Used to cache responses for similar requests to optimize your cache hit rates. Replaces the user field."
},
"reasoning": {
"$ref": "#/components/schemas/OpenAIResponseReasoning",
"description": "(Optional) Configuration options for reasoning models."
},
"safety_identifier": {
"type": "string",
"description": "(Optional) A stable identifier used to help detect users of your application that may be violating OpenAI's usage policies."
},
"service_tier": {
"type": "string",
"description": "(Optional) Specifies the processing type used for serving the request."
},
"status": {
"type": "string",
"description": "Current status of the response generation"
@ -8638,17 +8706,29 @@
"$ref": "#/components/schemas/OpenAIResponseText",
"description": "Text formatting configuration for the response"
},
"tool_choice": {
"$ref": "#/components/schemas/OpenAIResponsesToolChoice"
},
"tools": {
"type": "array",
"items": {
"$ref": "#/components/schemas/OpenAIResponsesTool"
}
},
"top_logprobs": {
"type": "integer"
},
"top_p": {
"type": "number",
"description": "(Optional) Nucleus sampling parameter used for generation"
},
"truncation": {
"type": "string",
"description": "(Optional) Truncation strategy applied to the response"
},
"user": {
"type": "string",
"description": "(Optional) User identifier associated with the request"
},
"truncation": {
"type": "string",
"description": "(Optional) Truncation strategy applied to the response"
}
},
"additionalProperties": false,
@ -8659,11 +8739,10 @@
"object",
"output",
"parallel_tool_calls",
"status",
"text"
"status"
],
"title": "OpenAIResponseObject",
"description": "Complete OpenAI response object containing generation results and metadata."
"description": "Complete OpenAI response object containing generation results and metadata.\nBased on OpenAI Responses API schema: https://github.com/openai/openai-python/blob/34014aedbb8946c03e97e5c8d72e03ad2259cd7c/src/openai/types/responses/response.py#L38"
},
"OpenAIResponseOutput": {
"oneOf": [
@ -8821,6 +8900,103 @@
"title": "OpenAIResponseOutputMessageMCPListTools",
"description": "MCP list tools output message containing available tools from an MCP server."
},
"OpenAIResponsePrompt": {
"type": "object",
"properties": {
"id": {
"type": "string",
"description": "The unique identifier of the prompt template to use."
},
"variables": {
"type": "object",
"additionalProperties": {
"oneOf": [
{
"type": "null"
},
{
"type": "boolean"
},
{
"type": "number"
},
{
"type": "string"
},
{
"type": "array"
},
{
"type": "object"
}
]
},
"description": "(Optional) Map of values to substitute in for variables in your prompt. The substitution values can either be strings, or other Response input types like images or files."
},
"version": {
"type": "string",
"description": "(Optional) Version of the prompt template."
}
},
"additionalProperties": false,
"required": [
"id"
],
"title": "OpenAIResponsePrompt",
"description": "Reference to a prompt template and its variables."
},
"OpenAIResponseReasoning": {
"type": "object",
"properties": {
"effort": {
"type": "string",
"enum": [
"low",
"medium",
"high",
"minimal"
],
"description": "(Optional) The effort level to use for reasoning."
},
"generate_summary": {
"type": "string",
"description": "Deprecated. Use the generate_summary_text field instead. (Optional) Whether to generate a summary of the reasoning process."
},
"summary": {
"type": "string"
}
},
"additionalProperties": false,
"title": "OpenAIResponseReasoning",
"description": "Configuration options for reasoning models."
},
"OpenAIResponsesTool": {
"type": "object",
"properties": {
"description": {
"type": "string"
},
"name": {
"type": "string"
},
"parameters": {
"type": "object",
"title": "object",
"description": "The base class of the class hierarchy.\nWhen called, it accepts no arguments and returns a new featureless\ninstance that has no instance attributes and cannot be given any."
},
"type": {
"type": "string",
"const": "function"
}
},
"additionalProperties": false,
"title": "OpenAIResponsesTool"
},
"OpenAIResponsesToolChoice": {
"type": "object",
"title": "OpenAIResponsesToolChoice",
"description": "Type alias.\nType aliases are created through the type statement::\n\n type Alias = int\n\nIn this example, Alias and int will be treated equivalently by static\ntype checkers.\n\nAt runtime, Alias is an instance of TypeAliasType. The __name__\nattribute holds the name of the type alias. The value of the type alias\nis stored in the __value__ attribute. It is evaluated lazily, so the\nvalue is computed only if the attribute is accessed.\n\nType aliases can also be generic::\n\n type ListOrSet[T] = list[T] | set[T]\n\nIn this case, the type parameters of the alias are stored in the\n__type_params__ attribute.\n\nSee PEP 695 for more information."
},
"OpenAIResponseContentPart": {
"oneOf": [
{
@ -12744,6 +12920,39 @@
"type": "string",
"description": "Unique identifier for this response"
},
"incomplete_details": {
"$ref": "#/components/schemas/OpenAIResponseIncompleteDetails",
"description": "(Optional) Incomplete details if the response is incomplete"
},
"instructions": {
"oneOf": [
{
"type": "string"
},
{
"type": "array",
"items": {
"type": "string"
}
}
],
"description": "(Optional) A system (or developer) message inserted into the model's context."
},
"max_output_tokens": {
"type": "integer",
"description": "(Optional) An upper bound for the number of tokens that can be generated for a response, including visible output tokens and reasoning tokens."
},
"max_tool_calls": {
"type": "integer",
"description": "(Optional) The maximum number of total calls to built-in tools that can be processed in a response."
},
"metadata": {
"type": "object",
"additionalProperties": {
"type": "string"
},
"description": "(Optional) Set of 16 key-value pairs that can be attached to an object. This can be useful for storing additional information about the object in a structured format, and querying for objects via API or the dashboard."
},
"model": {
"type": "string",
"description": "Model identifier used for generation"
@ -12770,6 +12979,26 @@
"type": "string",
"description": "(Optional) ID of the previous response in a conversation"
},
"prompt": {
"$ref": "#/components/schemas/OpenAIResponsePrompt",
"description": "(Optional) Reference to a prompt template and its variables."
},
"prompt_cache_key": {
"type": "string",
"description": "(Optional)Used to cache responses for similar requests to optimize your cache hit rates. Replaces the user field."
},
"reasoning": {
"$ref": "#/components/schemas/OpenAIResponseReasoning",
"description": "(Optional) Configuration options for reasoning models."
},
"safety_identifier": {
"type": "string",
"description": "(Optional) A stable identifier used to help detect users of your application that may be violating OpenAI's usage policies."
},
"service_tier": {
"type": "string",
"description": "(Optional) Specifies the processing type used for serving the request."
},
"status": {
"type": "string",
"description": "Current status of the response generation"
@ -12782,18 +13011,30 @@
"$ref": "#/components/schemas/OpenAIResponseText",
"description": "Text formatting configuration for the response"
},
"tool_choice": {
"$ref": "#/components/schemas/OpenAIResponsesToolChoice"
},
"tools": {
"type": "array",
"items": {
"$ref": "#/components/schemas/OpenAIResponsesTool"
}
},
"top_logprobs": {
"type": "integer"
},
"top_p": {
"type": "number",
"description": "(Optional) Nucleus sampling parameter used for generation"
},
"truncation": {
"type": "string",
"description": "(Optional) Truncation strategy applied to the response"
},
"user": {
"type": "string",
"description": "(Optional) User identifier associated with the request"
},
"truncation": {
"type": "string",
"description": "(Optional) Truncation strategy applied to the response"
},
"input": {
"type": "array",
"items": {
@ -12811,7 +13052,6 @@
"output",
"parallel_tool_calls",
"status",
"text",
"input"
],
"title": "OpenAIResponseObjectWithInput",

View file

@ -6249,6 +6249,17 @@ components:
title: OpenAIResponseError
description: >-
Error details for failed OpenAI response requests.
OpenAIResponseIncompleteDetails:
type: object
properties:
reason:
type: string
description: Reason for the response being incomplete
additionalProperties: false
required:
- reason
title: OpenAIResponseIncompleteDetails
description: Incomplete details for OpenAI responses.
OpenAIResponseObject:
type: object
properties:
@ -6263,6 +6274,36 @@ components:
id:
type: string
description: Unique identifier for this response
incomplete_details:
$ref: '#/components/schemas/OpenAIResponseIncompleteDetails'
description: >-
(Optional) Incomplete details if the response is incomplete
instructions:
oneOf:
- type: string
- type: array
items:
type: string
description: >-
(Optional) A system (or developer) message inserted into the model's context.
max_output_tokens:
type: integer
description: >-
(Optional) An upper bound for the number of tokens that can be generated
for a response, including visible output tokens and reasoning tokens.
max_tool_calls:
type: integer
description: >-
(Optional) The maximum number of total calls to built-in tools that can
be processed in a response.
metadata:
type: object
additionalProperties:
type: string
description: >-
(Optional) Set of 16 key-value pairs that can be attached to an object.
This can be useful for storing additional information about the object
in a structured format, and querying for objects via API or the dashboard.
model:
type: string
description: Model identifier used for generation
@ -6287,6 +6328,28 @@ components:
type: string
description: >-
(Optional) ID of the previous response in a conversation
prompt:
$ref: '#/components/schemas/OpenAIResponsePrompt'
description: >-
(Optional) Reference to a prompt template and its variables.
prompt_cache_key:
type: string
description: >-
(Optional)Used to cache responses for similar requests to optimize your
cache hit rates. Replaces the user field.
reasoning:
$ref: '#/components/schemas/OpenAIResponseReasoning'
description: >-
(Optional) Configuration options for reasoning models.
safety_identifier:
type: string
description: >-
(Optional) A stable identifier used to help detect users of your application
that may be violating OpenAI's usage policies.
service_tier:
type: string
description: >-
(Optional) Specifies the processing type used for serving the request.
status:
type: string
description: >-
@ -6299,18 +6362,26 @@ components:
$ref: '#/components/schemas/OpenAIResponseText'
description: >-
Text formatting configuration for the response
tool_choice:
$ref: '#/components/schemas/OpenAIResponsesToolChoice'
tools:
type: array
items:
$ref: '#/components/schemas/OpenAIResponsesTool'
top_logprobs:
type: integer
top_p:
type: number
description: >-
(Optional) Nucleus sampling parameter used for generation
truncation:
type: string
description: >-
(Optional) Truncation strategy applied to the response
user:
type: string
description: >-
(Optional) User identifier associated with the request
truncation:
type: string
description: >-
(Optional) Truncation strategy applied to the response
additionalProperties: false
required:
- created_at
@ -6320,10 +6391,11 @@ components:
- output
- parallel_tool_calls
- status
- text
title: OpenAIResponseObject
description: >-
Complete OpenAI response object containing generation results and metadata.
Based on OpenAI Responses API schema: https://github.com/openai/openai-python/blob/34014aedbb8946c03e97e5c8d72e03ad2259cd7c/src/openai/types/responses/response.py#L38
OpenAIResponseOutput:
oneOf:
- $ref: '#/components/schemas/OpenAIResponseMessage'
@ -6441,6 +6513,115 @@ components:
title: OpenAIResponseOutputMessageMCPListTools
description: >-
MCP list tools output message containing available tools from an MCP server.
OpenAIResponsePrompt:
type: object
properties:
id:
type: string
description: >-
The unique identifier of the prompt template to use.
variables:
type: object
additionalProperties:
oneOf:
- type: 'null'
- type: boolean
- type: number
- type: string
- type: array
- type: object
description: >-
(Optional) Map of values to substitute in for variables in your prompt.
The substitution values can either be strings, or other Response input
types like images or files.
version:
type: string
description: >-
(Optional) Version of the prompt template.
additionalProperties: false
required:
- id
title: OpenAIResponsePrompt
description: >-
Reference to a prompt template and its variables.
OpenAIResponseReasoning:
type: object
properties:
effort:
type: string
enum:
- low
- medium
- high
- minimal
description: >-
(Optional) The effort level to use for reasoning.
generate_summary:
type: string
description: >-
Deprecated. Use the generate_summary_text field instead. (Optional) Whether
to generate a summary of the reasoning process.
summary:
type: string
additionalProperties: false
title: OpenAIResponseReasoning
description: >-
Configuration options for reasoning models.
OpenAIResponsesTool:
type: object
properties:
description:
type: string
name:
type: string
parameters:
type: object
title: object
description: >-
The base class of the class hierarchy.
When called, it accepts no arguments and returns a new featureless
instance that has no instance attributes and cannot be given any.
type:
type: string
const: function
additionalProperties: false
title: OpenAIResponsesTool
OpenAIResponsesToolChoice:
type: object
title: OpenAIResponsesToolChoice
description: >-
Type alias.
Type aliases are created through the type statement::
type Alias = int
In this example, Alias and int will be treated equivalently by static
type checkers.
At runtime, Alias is an instance of TypeAliasType. The __name__
attribute holds the name of the type alias. The value of the type alias
is stored in the __value__ attribute. It is evaluated lazily, so the
value is computed only if the attribute is accessed.
Type aliases can also be generic::
type ListOrSet[T] = list[T] | set[T]
In this case, the type parameters of the alias are stored in the
__type_params__ attribute.
See PEP 695 for more information.
OpenAIResponseContentPart:
oneOf:
- $ref: '#/components/schemas/OpenAIResponseContentPartOutputText'
@ -9464,6 +9645,36 @@ components:
id:
type: string
description: Unique identifier for this response
incomplete_details:
$ref: '#/components/schemas/OpenAIResponseIncompleteDetails'
description: >-
(Optional) Incomplete details if the response is incomplete
instructions:
oneOf:
- type: string
- type: array
items:
type: string
description: >-
(Optional) A system (or developer) message inserted into the model's context.
max_output_tokens:
type: integer
description: >-
(Optional) An upper bound for the number of tokens that can be generated
for a response, including visible output tokens and reasoning tokens.
max_tool_calls:
type: integer
description: >-
(Optional) The maximum number of total calls to built-in tools that can
be processed in a response.
metadata:
type: object
additionalProperties:
type: string
description: >-
(Optional) Set of 16 key-value pairs that can be attached to an object.
This can be useful for storing additional information about the object
in a structured format, and querying for objects via API or the dashboard.
model:
type: string
description: Model identifier used for generation
@ -9488,6 +9699,28 @@ components:
type: string
description: >-
(Optional) ID of the previous response in a conversation
prompt:
$ref: '#/components/schemas/OpenAIResponsePrompt'
description: >-
(Optional) Reference to a prompt template and its variables.
prompt_cache_key:
type: string
description: >-
(Optional)Used to cache responses for similar requests to optimize your
cache hit rates. Replaces the user field.
reasoning:
$ref: '#/components/schemas/OpenAIResponseReasoning'
description: >-
(Optional) Configuration options for reasoning models.
safety_identifier:
type: string
description: >-
(Optional) A stable identifier used to help detect users of your application
that may be violating OpenAI's usage policies.
service_tier:
type: string
description: >-
(Optional) Specifies the processing type used for serving the request.
status:
type: string
description: >-
@ -9500,18 +9733,26 @@ components:
$ref: '#/components/schemas/OpenAIResponseText'
description: >-
Text formatting configuration for the response
tool_choice:
$ref: '#/components/schemas/OpenAIResponsesToolChoice'
tools:
type: array
items:
$ref: '#/components/schemas/OpenAIResponsesTool'
top_logprobs:
type: integer
top_p:
type: number
description: >-
(Optional) Nucleus sampling parameter used for generation
truncation:
type: string
description: >-
(Optional) Truncation strategy applied to the response
user:
type: string
description: >-
(Optional) User identifier associated with the request
truncation:
type: string
description: >-
(Optional) Truncation strategy applied to the response
input:
type: array
items:
@ -9527,7 +9768,6 @@ components:
- output
- parallel_tool_calls
- status
- text
- input
title: OpenAIResponseObjectWithInput
description: >-

View file

@ -1,4 +1,3 @@
#!/usr/bin/env python3
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#

View file

@ -4,7 +4,7 @@
# This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree.
from typing import Annotated, Any, Literal, Optional
from typing import Annotated, Any, Literal, Union
from pydantic import BaseModel, Field
from typing_extensions import TypedDict
@ -19,9 +19,16 @@ from llama_stack.apis.tools.openai_tool_choice import (
from llama_stack.apis.vector_io import SearchRankingOptions as FileSearchRankingOptions
from llama_stack.schema_utils import json_schema_type, register_schema
type OpenAIResponsesToolChoice = (
ToolChoiceTypes | ToolChoiceAllowed | ToolChoiceFunction | ToolChoiceMcp | ToolChoiceCustom
)
type OpenAIResponsesToolChoice = Annotated[
Union[
ToolChoiceTypes,
ToolChoiceAllowed,
ToolChoiceFunction,
ToolChoiceMcp,
ToolChoiceCustom
],
Field(discriminator="type"),
]
register_schema(OpenAIResponsesToolChoice, name="OpenAIResponsesToolChoice")
@ -420,30 +427,30 @@ class OpenAIResponseObject(BaseModel):
created_at: int
error: OpenAIResponseError | None = None
id: str
incomplete_details: Optional[OpenAIResponseIncompleteDetails] = None
instructions: Optional[str | list[str]] = None
max_output_tokens: Optional[int] = None
max_tool_calls: Optional[int] = None
metadata: Optional[dict[str, str]] = None
incomplete_details: OpenAIResponseIncompleteDetails | None = None
instructions: str | list[str] | None = None
max_output_tokens: int | None = None
max_tool_calls: int | None = None
metadata: dict[str, str] | None = None
model: str
object: Literal["response"] = "response"
output: list[OpenAIResponseOutput]
parallel_tool_calls: bool = False
previous_response_id: Optional[str] = None
prompt: Optional[OpenAIResponsePrompt] = None
prompt_cache_key: Optional[str] = None
reasoning: Optional[OpenAIResponseReasoning] = None
safety_identifier: Optional[str] = None
service_tier: Optional[str] = None
previous_response_id: str | None = None
prompt: OpenAIResponsePrompt | None = None
prompt_cache_key: str | None = None
reasoning: OpenAIResponseReasoning | None = None
safety_identifier: str | None = None
service_tier: str | None = None
status: str
temperature: float | None = None
text: Optional[OpenAIResponseText] = None
tool_choice: Optional[OpenAIResponsesToolChoice] = None
tools: Optional[list[OpenAIResponsesTool]] = None
top_logprobs: Optional[int] = None
top_p: Optional[float] = None
user: Optional[str] = None # Deprecated: This field is being replaced by safety_identifier and prompt_cache_key
truncation: Optional[str] = None
text: OpenAIResponseText | None = None
tool_choice: OpenAIResponsesToolChoice | None = None
tools: list[OpenAIResponsesTool] | None = None
top_logprobs: int | None = None
top_p: float | None = None
user: str | None = None # Deprecated: This field is being replaced by safety_identifier and prompt_cache_key
truncation: str | None = None
@json_schema_type

View file

@ -5,7 +5,6 @@
# the root directory of this source tree.
import importlib.resources
import logging
import sys
from pydantic import BaseModel
@ -17,9 +16,10 @@ from llama_stack.core.external import load_external_apis
from llama_stack.core.utils.exec import run_command
from llama_stack.core.utils.image_types import LlamaStackImageType
from llama_stack.distributions.template import DistributionTemplate
from llama_stack.log import get_logger
from llama_stack.providers.datatypes import Api
log = logging.getLogger(__name__)
log = get_logger(name=__name__, category="core")
# These are the dependencies needed by the distribution server.
# `llama-stack` is automatically installed by the installation script.

View file

@ -3,7 +3,6 @@
#
# This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree.
import logging
import textwrap
from typing import Any
@ -21,9 +20,10 @@ from llama_stack.core.stack import cast_image_name_to_string, replace_env_vars
from llama_stack.core.utils.config_dirs import EXTERNAL_PROVIDERS_DIR
from llama_stack.core.utils.dynamic import instantiate_class_type
from llama_stack.core.utils.prompt_for_config import prompt_for_config
from llama_stack.log import get_logger
from llama_stack.providers.datatypes import Api, ProviderSpec
logger = logging.getLogger(__name__)
logger = get_logger(name=__name__, category="core")
def configure_single_provider(registry: dict[str, ProviderSpec], provider: Provider) -> Provider:

View file

@ -7,7 +7,7 @@
import asyncio
import inspect
import json
import logging
import logging # allow-direct-logging
import os
import sys
from concurrent.futures import ThreadPoolExecutor
@ -48,6 +48,7 @@ from llama_stack.core.stack import (
from llama_stack.core.utils.config import redact_sensitive_fields
from llama_stack.core.utils.context import preserve_contexts_async_generator
from llama_stack.core.utils.exec import in_notebook
from llama_stack.log import get_logger
from llama_stack.providers.utils.telemetry.tracing import (
CURRENT_TRACE_CONTEXT,
end_trace,
@ -55,7 +56,7 @@ from llama_stack.providers.utils.telemetry.tracing import (
start_trace,
)
logger = logging.getLogger(__name__)
logger = get_logger(name=__name__, category="core")
T = TypeVar("T")

View file

@ -6,15 +6,15 @@
import contextvars
import json
import logging
from contextlib import AbstractContextManager
from typing import Any
from llama_stack.core.datatypes import User
from llama_stack.log import get_logger
from .utils.dynamic import instantiate_class_type
log = logging.getLogger(__name__)
log = get_logger(name=__name__, category="core")
# Context variable for request provider data and auth attributes
PROVIDER_DATA_VAR = contextvars.ContextVar("provider_data", default=None)

View file

@ -9,7 +9,7 @@ import asyncio
import functools
import inspect
import json
import logging
import logging # allow-direct-logging
import os
import ssl
import sys

View file

@ -4,7 +4,7 @@
# This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree.
import logging
import importlib
import os
import signal
import subprocess
@ -12,9 +12,9 @@ import sys
from termcolor import cprint
log = logging.getLogger(__name__)
from llama_stack.log import get_logger
import importlib
log = get_logger(name=__name__, category="core")
def formulate_run_args(image_type: str, image_name: str) -> list:

View file

@ -6,7 +6,6 @@
import inspect
import json
import logging
from enum import Enum
from typing import Annotated, Any, Literal, Union, get_args, get_origin
@ -14,7 +13,9 @@ from pydantic import BaseModel
from pydantic.fields import FieldInfo
from pydantic_core import PydanticUndefinedType
log = logging.getLogger(__name__)
from llama_stack.log import get_logger
log = get_logger(name=__name__, category="core")
def is_list_of_primitives(field_type):

View file

@ -4,10 +4,10 @@
# This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree.
import logging
import logging # allow-direct-logging
import os
import re
from logging.config import dictConfig
from logging.config import dictConfig # allow-direct-logging
from rich.console import Console
from rich.errors import MarkupError

View file

@ -13,14 +13,15 @@
# Copyright (c) Meta Platforms, Inc. and its affiliates.
import math
from logging import getLogger
import torch
import torch.nn.functional as F
from llama_stack.log import get_logger
from .utils import get_negative_inf_value, to_2tuple
logger = getLogger()
logger = get_logger(name=__name__, category="models::llama")
def resize_local_position_embedding(orig_pos_embed, grid_size):

View file

@ -13,7 +13,6 @@
import math
from collections import defaultdict
from logging import getLogger
from typing import Any
import torch
@ -21,9 +20,11 @@ import torchvision.transforms as tv
from PIL import Image
from torchvision.transforms import functional as F
from llama_stack.log import get_logger
IMAGE_RES = 224
logger = getLogger()
logger = get_logger(name=__name__, category="models::llama")
class VariableSizeImageTransform:

View file

@ -3,8 +3,6 @@
#
# This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree.
import logging
import math
from collections.abc import Callable
from functools import partial
@ -22,6 +20,8 @@ from PIL import Image as PIL_Image
from torch import Tensor, nn
from torch.distributed import _functional_collectives as funcol
from llama_stack.log import get_logger
from ..model import ModelArgs, RMSNorm, apply_rotary_emb, precompute_freqs_cis
from .encoder_utils import (
build_encoder_attention_mask,
@ -34,9 +34,10 @@ from .encoder_utils import (
from .image_transform import VariableSizeImageTransform
from .utils import get_negative_inf_value, to_2tuple
logger = logging.getLogger(__name__)
MP_SCALE = 8
logger = get_logger(name=__name__, category="models")
def reduce_from_tensor_model_parallel_region(input_):
"""All-reduce the input tensor across model parallel group."""
@ -771,7 +772,7 @@ class TilePositionEmbedding(nn.Module):
if embed is not None:
# reshape the weights to the correct shape
nt_old, nt_old, _, w = embed.shape
logging.info(f"Resizing tile embedding from {nt_old}x{nt_old} to {self.num_tiles}x{self.num_tiles}")
logger.info(f"Resizing tile embedding from {nt_old}x{nt_old} to {self.num_tiles}x{self.num_tiles}")
embed_new = TilePositionEmbedding._dynamic_resize(embed, self.num_tiles)
# assign the weights to the module
state_dict[prefix + "embedding"] = embed_new

View file

@ -4,8 +4,8 @@
# This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree.
from collections.abc import Collection, Iterator, Sequence, Set
from logging import getLogger
from pathlib import Path
from typing import (
Literal,
@ -14,11 +14,9 @@ from typing import (
import tiktoken
from llama_stack.log import get_logger
from llama_stack.models.llama.tokenizer_utils import load_bpe_file
logger = getLogger(__name__)
# The tiktoken tokenizer can handle <=400k chars without
# pyo3_runtime.PanicException.
TIKTOKEN_MAX_ENCODE_CHARS = 400_000
@ -31,6 +29,8 @@ MAX_NO_WHITESPACES_CHARS = 25_000
_INSTANCE = None
logger = get_logger(name=__name__, category="models::llama")
class Tokenizer:
"""

View file

@ -4,7 +4,6 @@
# This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree.
import logging
import os
from collections.abc import Callable
@ -13,11 +12,13 @@ from fairscale.nn.model_parallel.initialize import get_model_parallel_rank
from torch import Tensor, nn
from torch.nn import functional as F
from llama_stack.log import get_logger
from ...datatypes import QuantizationMode
from ..model import Transformer, TransformerBlock
from ..moe import MoE
log = logging.getLogger(__name__)
log = get_logger(name=__name__, category="models")
def swiglu_wrapper_no_reduce(

View file

@ -5,7 +5,6 @@
# the root directory of this source tree.
from collections.abc import Collection, Iterator, Sequence, Set
from logging import getLogger
from pathlib import Path
from typing import (
Literal,
@ -14,11 +13,9 @@ from typing import (
import tiktoken
from llama_stack.log import get_logger
from llama_stack.models.llama.tokenizer_utils import load_bpe_file
logger = getLogger(__name__)
# The tiktoken tokenizer can handle <=400k chars without
# pyo3_runtime.PanicException.
TIKTOKEN_MAX_ENCODE_CHARS = 400_000
@ -101,6 +98,8 @@ BASIC_SPECIAL_TOKENS = [
"<|fim_suffix|>",
]
logger = get_logger(name=__name__, category="models::llama")
class Tokenizer:
"""

View file

@ -6,9 +6,10 @@
# type: ignore
import collections
import logging
log = logging.getLogger(__name__)
from llama_stack.log import get_logger
log = get_logger(name=__name__, category="llama")
try:
import fbgemm_gpu.experimental.gen_ai # noqa: F401

View file

@ -4,7 +4,6 @@
# This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree.
import logging
import uuid
from collections.abc import AsyncGenerator
from datetime import UTC, datetime
@ -42,6 +41,7 @@ from llama_stack.apis.safety import Safety
from llama_stack.apis.tools import ToolGroups, ToolRuntime
from llama_stack.apis.vector_io import VectorIO
from llama_stack.core.datatypes import AccessRule
from llama_stack.log import get_logger
from llama_stack.providers.utils.kvstore import InmemoryKVStoreImpl, kvstore_impl
from llama_stack.providers.utils.pagination import paginate_records
from llama_stack.providers.utils.responses.responses_store import ResponsesStore
@ -51,7 +51,7 @@ from .config import MetaReferenceAgentsImplConfig
from .persistence import AgentInfo
from .responses.openai_responses import OpenAIResponsesImpl
logger = logging.getLogger()
logger = get_logger(name=__name__, category="agents")
class MetaReferenceAgentsImpl(Agents):

View file

@ -5,7 +5,6 @@
# the root directory of this source tree.
import json
import logging
import uuid
from datetime import UTC, datetime
@ -15,9 +14,10 @@ from llama_stack.core.access_control.access_control import AccessDeniedError, is
from llama_stack.core.access_control.datatypes import AccessRule
from llama_stack.core.datatypes import User
from llama_stack.core.request_headers import get_authenticated_user
from llama_stack.log import get_logger
from llama_stack.providers.utils.kvstore import KVStore
log = logging.getLogger(__name__)
log = get_logger(name=__name__, category="agents")
class AgentSessionInfo(Session):

View file

@ -5,13 +5,13 @@
# the root directory of this source tree.
import asyncio
import logging
from llama_stack.apis.inference import Message
from llama_stack.apis.safety import Safety, SafetyViolation, ViolationLevel
from llama_stack.log import get_logger
from llama_stack.providers.utils.telemetry import tracing
log = logging.getLogger(__name__)
log = get_logger(name=__name__, category="agents")
class SafetyException(Exception): # noqa: N818

View file

@ -12,7 +12,6 @@
import copy
import json
import logging
import multiprocessing
import os
import tempfile
@ -32,13 +31,14 @@ from fairscale.nn.model_parallel.initialize import (
from pydantic import BaseModel, Field
from torch.distributed.launcher.api import LaunchConfig, elastic_launch
from llama_stack.log import get_logger
from llama_stack.models.llama.datatypes import GenerationResult
from llama_stack.providers.utils.inference.prompt_adapter import (
ChatCompletionRequestWithRawContent,
CompletionRequestWithRawContent,
)
log = logging.getLogger(__name__)
log = get_logger(name=__name__, category="inference")
class ProcessingMessageName(str, Enum):

View file

@ -4,7 +4,6 @@
# This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree.
import logging
from collections.abc import AsyncGenerator
from llama_stack.apis.inference import (
@ -21,6 +20,7 @@ from llama_stack.apis.inference import (
ToolPromptFormat,
)
from llama_stack.apis.models import ModelType
from llama_stack.log import get_logger
from llama_stack.providers.datatypes import Model, ModelsProtocolPrivate
from llama_stack.providers.utils.inference.embedding_mixin import (
SentenceTransformerEmbeddingMixin,
@ -32,7 +32,7 @@ from llama_stack.providers.utils.inference.openai_compat import (
from .config import SentenceTransformersInferenceConfig
log = logging.getLogger(__name__)
log = get_logger(name=__name__, category="inference")
class SentenceTransformersInferenceImpl(

View file

@ -6,7 +6,6 @@
import gc
import json
import logging
import multiprocessing
from pathlib import Path
from typing import Any
@ -28,6 +27,7 @@ from llama_stack.apis.post_training import (
LoraFinetuningConfig,
TrainingConfig,
)
from llama_stack.log import get_logger
from llama_stack.providers.inline.post_training.common.utils import evacuate_model_from_device
from ..config import HuggingFacePostTrainingConfig
@ -44,7 +44,7 @@ from ..utils import (
split_dataset,
)
logger = logging.getLogger(__name__)
logger = get_logger(name=__name__, category="post_training")
class HFFinetuningSingleDevice:

View file

@ -5,7 +5,6 @@
# the root directory of this source tree.
import gc
import logging
import multiprocessing
from pathlib import Path
from typing import Any
@ -24,6 +23,7 @@ from llama_stack.apis.post_training import (
DPOAlignmentConfig,
TrainingConfig,
)
from llama_stack.log import get_logger
from llama_stack.providers.inline.post_training.common.utils import evacuate_model_from_device
from ..config import HuggingFacePostTrainingConfig
@ -40,7 +40,7 @@ from ..utils import (
split_dataset,
)
logger = logging.getLogger(__name__)
logger = get_logger(name=__name__, category="post_training")
class HFDPOAlignmentSingleDevice:

View file

@ -4,7 +4,6 @@
# This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree.
import logging
import os
import signal
import sys
@ -19,10 +18,11 @@ from transformers import AutoConfig, AutoModelForCausalLM
from llama_stack.apis.datasetio import DatasetIO
from llama_stack.apis.post_training import Checkpoint, TrainingConfig
from llama_stack.log import get_logger
from .config import HuggingFacePostTrainingConfig
logger = logging.getLogger(__name__)
logger = get_logger(name=__name__, category="post_training")
def setup_environment():

View file

@ -4,7 +4,6 @@
# This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree.
import logging
import os
import time
from datetime import UTC, datetime
@ -19,6 +18,7 @@ from torch.utils.data import DataLoader, DistributedSampler
from torchtune import modules, training
from torchtune import utils as torchtune_utils
from torchtune.data import padded_collate_sft
from torchtune.models.llama3._tokenizer import Llama3Tokenizer
from torchtune.modules.loss import CEWithChunkedOutputLoss
from torchtune.modules.peft import (
get_adapter_params,
@ -45,6 +45,7 @@ from llama_stack.apis.post_training import (
)
from llama_stack.core.utils.config_dirs import DEFAULT_CHECKPOINT_DIR
from llama_stack.core.utils.model_utils import model_local_dir
from llama_stack.log import get_logger
from llama_stack.models.llama.sku_list import resolve_model
from llama_stack.providers.inline.post_training.common.utils import evacuate_model_from_device
from llama_stack.providers.inline.post_training.torchtune.common import utils
@ -56,9 +57,7 @@ from llama_stack.providers.inline.post_training.torchtune.config import (
)
from llama_stack.providers.inline.post_training.torchtune.datasets.sft import SFTDataset
log = logging.getLogger(__name__)
from torchtune.models.llama3._tokenizer import Llama3Tokenizer
log = get_logger(name=__name__, category="post_training")
class LoraFinetuningSingleDevice:

View file

@ -4,7 +4,6 @@
# This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree.
import logging
import uuid
from typing import TYPE_CHECKING, Any
@ -20,13 +19,14 @@ from llama_stack.apis.safety import (
)
from llama_stack.apis.safety.safety import ModerationObject, ModerationObjectResults
from llama_stack.apis.shields import Shield
from llama_stack.log import get_logger
from llama_stack.providers.utils.inference.prompt_adapter import (
interleaved_content_as_str,
)
from .config import CodeScannerConfig
log = logging.getLogger(__name__)
log = get_logger(name=__name__, category="safety")
ALLOWED_CODE_SCANNER_MODEL_IDS = [
"code-scanner",

View file

@ -4,7 +4,6 @@
# This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree.
import logging
import re
import uuid
from string import Template
@ -21,6 +20,7 @@ from llama_stack.apis.safety import (
from llama_stack.apis.safety.safety import ModerationObject, ModerationObjectResults
from llama_stack.apis.shields import Shield
from llama_stack.core.datatypes import Api
from llama_stack.log import get_logger
from llama_stack.models.llama.datatypes import Role
from llama_stack.models.llama.sku_types import CoreModelId
from llama_stack.providers.datatypes import ShieldsProtocolPrivate
@ -132,6 +132,8 @@ Provide your safety assessment for ONLY THE LAST $agent_type message in the abov
PROMPT_TEMPLATE = Template(f"{PROMPT_TASK}{SAFETY_CATEGORIES}{PROMPT_CONVERSATION}{PROMPT_INSTRUCTIONS}")
logger = get_logger(name=__name__, category="safety")
class LlamaGuardSafetyImpl(Safety, ShieldsProtocolPrivate):
def __init__(self, config: LlamaGuardConfig, deps) -> None:
@ -407,7 +409,7 @@ class LlamaGuardShield:
unsafe_code_list = [code.strip() for code in unsafe_code.split(",")]
invalid_codes = [code for code in unsafe_code_list if code not in SAFETY_CODE_TO_CATEGORIES_MAP]
if invalid_codes:
logging.warning(f"Invalid safety codes returned: {invalid_codes}")
logger.warning(f"Invalid safety codes returned: {invalid_codes}")
# just returning safe object, as we don't know what the invalid codes can map to
return ModerationObject(
id=f"modr-{uuid.uuid4()}",

View file

@ -4,7 +4,6 @@
# This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree.
import logging
from typing import Any
import torch
@ -21,6 +20,7 @@ from llama_stack.apis.safety import (
from llama_stack.apis.safety.safety import ModerationObject
from llama_stack.apis.shields import Shield
from llama_stack.core.utils.model_utils import model_local_dir
from llama_stack.log import get_logger
from llama_stack.providers.datatypes import ShieldsProtocolPrivate
from llama_stack.providers.utils.inference.prompt_adapter import (
interleaved_content_as_str,
@ -28,7 +28,7 @@ from llama_stack.providers.utils.inference.prompt_adapter import (
from .config import PromptGuardConfig, PromptGuardType
log = logging.getLogger(__name__)
log = get_logger(name=__name__, category="safety")
PROMPT_GUARD_MODEL = "Prompt-Guard-86M"

View file

@ -7,7 +7,6 @@
import collections
import functools
import json
import logging
import random
import re
import string
@ -20,7 +19,9 @@ import nltk
from pythainlp.tokenize import sent_tokenize as sent_tokenize_thai
from pythainlp.tokenize import word_tokenize as word_tokenize_thai
logger = logging.getLogger()
from llama_stack.log import get_logger
logger = get_logger(name=__name__, category="scoring")
WORD_LIST = [
"western",

View file

@ -4,13 +4,10 @@
# This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree.
import logging
import threading
from typing import Any
from opentelemetry import metrics, trace
logger = logging.getLogger(__name__)
from opentelemetry.exporter.otlp.proto.http.metric_exporter import OTLPMetricExporter
from opentelemetry.exporter.otlp.proto.http.trace_exporter import OTLPSpanExporter
from opentelemetry.sdk.metrics import MeterProvider
@ -40,6 +37,7 @@ from llama_stack.apis.telemetry import (
UnstructuredLogEvent,
)
from llama_stack.core.datatypes import Api
from llama_stack.log import get_logger
from llama_stack.providers.inline.telemetry.meta_reference.console_span_processor import (
ConsoleSpanProcessor,
)
@ -61,6 +59,8 @@ _GLOBAL_STORAGE: dict[str, dict[str | int, Any]] = {
_global_lock = threading.Lock()
_TRACER_PROVIDER = None
logger = get_logger(name=__name__, category="telemetry")
def is_tracing_enabled(tracer):
with tracer.start_as_current_span("check_tracing") as span:

View file

@ -5,7 +5,6 @@
# the root directory of this source tree.
import asyncio
import logging
import secrets
import string
from typing import Any
@ -32,6 +31,7 @@ from llama_stack.apis.tools import (
ToolRuntime,
)
from llama_stack.apis.vector_io import QueryChunksResponse, VectorIO
from llama_stack.log import get_logger
from llama_stack.providers.datatypes import ToolGroupsProtocolPrivate
from llama_stack.providers.utils.inference.prompt_adapter import interleaved_content_as_str
from llama_stack.providers.utils.memory.vector_store import (
@ -42,7 +42,7 @@ from llama_stack.providers.utils.memory.vector_store import (
from .config import RagToolRuntimeConfig
from .context_retriever import generate_rag_query
log = logging.getLogger(__name__)
log = get_logger(name=__name__, category="tool_runtime")
def make_random_string(length: int = 8):

View file

@ -8,7 +8,6 @@ import asyncio
import base64
import io
import json
import logging
from typing import Any
import faiss
@ -24,6 +23,7 @@ from llama_stack.apis.vector_io import (
QueryChunksResponse,
VectorIO,
)
from llama_stack.log import get_logger
from llama_stack.providers.datatypes import (
HealthResponse,
HealthStatus,
@ -40,7 +40,7 @@ from llama_stack.providers.utils.memory.vector_store import (
from .config import FaissVectorIOConfig
logger = logging.getLogger(__name__)
logger = get_logger(name=__name__, category="vector_io")
VERSION = "v3"
VECTOR_DBS_PREFIX = f"vector_dbs:{VERSION}::"

View file

@ -5,7 +5,6 @@
# the root directory of this source tree.
import asyncio
import logging
import re
import sqlite3
import struct
@ -24,6 +23,7 @@ from llama_stack.apis.vector_io import (
QueryChunksResponse,
VectorIO,
)
from llama_stack.log import get_logger
from llama_stack.providers.datatypes import VectorDBsProtocolPrivate
from llama_stack.providers.utils.kvstore import kvstore_impl
from llama_stack.providers.utils.kvstore.api import KVStore
@ -36,7 +36,7 @@ from llama_stack.providers.utils.memory.vector_store import (
VectorDBWithIndex,
)
logger = logging.getLogger(__name__)
logger = get_logger(name=__name__, category="vector_io")
# Specifying search mode is dependent on the VectorIO provider.
VECTOR_SEARCH = "vector"

View file

@ -3,15 +3,14 @@
#
# This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree.
import logging
from llama_stack.log import get_logger
from llama_stack.providers.remote.inference.llama_openai_compat.config import LlamaCompatConfig
from llama_stack.providers.utils.inference.litellm_openai_mixin import LiteLLMOpenAIMixin
from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin
from .models import MODEL_ENTRIES
logger = logging.getLogger(__name__)
logger = get_logger(name=__name__, category="inference")
class LlamaCompatInferenceAdapter(OpenAIMixin, LiteLLMOpenAIMixin):

View file

@ -77,6 +77,10 @@ print(f"Response: {response.completion_message.content}")
```
### Create Embeddings
> Note on OpenAI embeddings compatibility
>
> NVIDIA asymmetric embedding models (e.g., `nvidia/llama-3.2-nv-embedqa-1b-v2`) require an `input_type` parameter not present in the standard OpenAI embeddings API. The NVIDIA Inference Adapter automatically sets `input_type="query"` when using the OpenAI-compatible embeddings endpoint for NVIDIA. For passage embeddings, use the `embeddings` API with `task_type="document"`.
```python
response = client.inference.embeddings(
model_id="nvidia/llama-3.2-nv-embedqa-1b-v2",

View file

@ -4,11 +4,10 @@
# This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree.
import logging
import warnings
from collections.abc import AsyncIterator
from openai import APIConnectionError, BadRequestError
from openai import NOT_GIVEN, APIConnectionError, BadRequestError
from llama_stack.apis.common.content_types import (
InterleavedContent,
@ -27,12 +26,16 @@ from llama_stack.apis.inference import (
Inference,
LogProbConfig,
Message,
OpenAIEmbeddingData,
OpenAIEmbeddingsResponse,
OpenAIEmbeddingUsage,
ResponseFormat,
SamplingParams,
TextTruncation,
ToolChoice,
ToolConfig,
)
from llama_stack.log import get_logger
from llama_stack.models.llama.datatypes import ToolDefinition, ToolPromptFormat
from llama_stack.providers.utils.inference.model_registry import (
ModelRegistryHelper,
@ -54,7 +57,7 @@ from .openai_utils import (
)
from .utils import _is_nvidia_hosted
logger = logging.getLogger(__name__)
logger = get_logger(name=__name__, category="inference")
class NVIDIAInferenceAdapter(OpenAIMixin, Inference, ModelRegistryHelper):
@ -210,6 +213,57 @@ class NVIDIAInferenceAdapter(OpenAIMixin, Inference, ModelRegistryHelper):
#
return EmbeddingsResponse(embeddings=[embedding.embedding for embedding in response.data])
async def openai_embeddings(
self,
model: str,
input: str | list[str],
encoding_format: str | None = "float",
dimensions: int | None = None,
user: str | None = None,
) -> OpenAIEmbeddingsResponse:
"""
OpenAI-compatible embeddings for NVIDIA NIM.
Note: NVIDIA NIM asymmetric embedding models require an "input_type" field not present in the standard OpenAI embeddings API.
We default this to "query" to ensure requests succeed when using the
OpenAI-compatible endpoint. For passage embeddings, use the embeddings API with
`task_type='document'`.
"""
extra_body: dict[str, object] = {"input_type": "query"}
logger.warning(
"NVIDIA OpenAI-compatible embeddings: defaulting to input_type='query'. "
"For passage embeddings, use the embeddings API with task_type='document'."
)
response = await self.client.embeddings.create(
model=await self._get_provider_model_id(model),
input=input,
encoding_format=encoding_format if encoding_format is not None else NOT_GIVEN,
dimensions=dimensions if dimensions is not None else NOT_GIVEN,
user=user if user is not None else NOT_GIVEN,
extra_body=extra_body,
)
data = []
for i, embedding_data in enumerate(response.data):
data.append(
OpenAIEmbeddingData(
embedding=embedding_data.embedding,
index=i,
)
)
usage = OpenAIEmbeddingUsage(
prompt_tokens=response.usage.prompt_tokens,
total_tokens=response.usage.total_tokens,
)
return OpenAIEmbeddingsResponse(
data=data,
model=response.model,
usage=usage,
)
async def chat_completion(
self,
model_id: str,

View file

@ -4,13 +4,13 @@
# This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree.
import logging
import httpx
from llama_stack.log import get_logger
from . import NVIDIAConfig
logger = logging.getLogger(__name__)
logger = get_logger(name=__name__, category="inference")
def _is_nvidia_hosted(config: NVIDIAConfig) -> bool:

View file

@ -4,15 +4,14 @@
# This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree.
import logging
from llama_stack.log import get_logger
from llama_stack.providers.utils.inference.litellm_openai_mixin import LiteLLMOpenAIMixin
from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin
from .config import OpenAIConfig
from .models import MODEL_ENTRIES
logger = logging.getLogger(__name__)
logger = get_logger(name=__name__, category="inference")
#

View file

@ -5,7 +5,6 @@
# the root directory of this source tree.
import logging
from collections.abc import AsyncGenerator
from huggingface_hub import AsyncInferenceClient, HfApi
@ -34,6 +33,7 @@ from llama_stack.apis.inference import (
ToolPromptFormat,
)
from llama_stack.apis.models import Model
from llama_stack.log import get_logger
from llama_stack.models.llama.sku_list import all_registered_models
from llama_stack.providers.datatypes import ModelsProtocolPrivate
from llama_stack.providers.utils.inference.model_registry import (
@ -58,7 +58,7 @@ from llama_stack.providers.utils.inference.prompt_adapter import (
from .config import InferenceAPIImplConfig, InferenceEndpointImplConfig, TGIImplConfig
log = logging.getLogger(__name__)
log = get_logger(name=__name__, category="inference")
def build_hf_repo_model_entries():

View file

@ -4,18 +4,18 @@
# This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree.
import logging
import warnings
from typing import Any
from pydantic import BaseModel
from llama_stack.apis.post_training import TrainingConfig
from llama_stack.log import get_logger
from llama_stack.providers.remote.post_training.nvidia.config import SFTLoRADefaultConfig
from .config import NvidiaPostTrainingConfig
logger = logging.getLogger(__name__)
logger = get_logger(name=__name__, category="integration")
def warn_unsupported_params(config_dict: Any, supported_keys: set[str], config_name: str) -> None:

View file

@ -5,7 +5,6 @@
# the root directory of this source tree.
import json
import logging
from typing import Any
from llama_stack.apis.inference import Message
@ -16,12 +15,13 @@ from llama_stack.apis.safety import (
ViolationLevel,
)
from llama_stack.apis.shields import Shield
from llama_stack.log import get_logger
from llama_stack.providers.datatypes import ShieldsProtocolPrivate
from llama_stack.providers.utils.bedrock.client import create_bedrock_client
from .config import BedrockSafetyConfig
logger = logging.getLogger(__name__)
logger = get_logger(name=__name__, category="safety")
class BedrockSafetyAdapter(Safety, ShieldsProtocolPrivate):

View file

@ -4,7 +4,6 @@
# This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree.
import logging
from typing import Any
import requests
@ -12,12 +11,13 @@ import requests
from llama_stack.apis.inference import Message
from llama_stack.apis.safety import RunShieldResponse, Safety, SafetyViolation, ViolationLevel
from llama_stack.apis.shields import Shield
from llama_stack.log import get_logger
from llama_stack.providers.datatypes import ShieldsProtocolPrivate
from llama_stack.providers.utils.inference.openai_compat import convert_message_to_openai_dict_new
from .config import NVIDIASafetyConfig
logger = logging.getLogger(__name__)
logger = get_logger(name=__name__, category="safety")
class NVIDIASafetyAdapter(Safety, ShieldsProtocolPrivate):

View file

@ -5,7 +5,6 @@
# the root directory of this source tree.
import json
import logging
from typing import Any
import litellm
@ -20,12 +19,13 @@ from llama_stack.apis.safety import (
)
from llama_stack.apis.shields import Shield
from llama_stack.core.request_headers import NeedsRequestProviderData
from llama_stack.log import get_logger
from llama_stack.providers.datatypes import ShieldsProtocolPrivate
from llama_stack.providers.utils.inference.openai_compat import convert_message_to_openai_dict_new
from .config import SambaNovaSafetyConfig
logger = logging.getLogger(__name__)
logger = get_logger(name=__name__, category="safety")
CANNED_RESPONSE_TEXT = "I can't answer that. Can I help with something else?"

View file

@ -5,7 +5,6 @@
# the root directory of this source tree.
import asyncio
import json
import logging
from typing import Any
from urllib.parse import urlparse
@ -20,6 +19,7 @@ from llama_stack.apis.vector_io import (
QueryChunksResponse,
VectorIO,
)
from llama_stack.log import get_logger
from llama_stack.providers.datatypes import Api, VectorDBsProtocolPrivate
from llama_stack.providers.inline.vector_io.chroma import ChromaVectorIOConfig as InlineChromaVectorIOConfig
from llama_stack.providers.utils.kvstore import kvstore_impl
@ -33,7 +33,7 @@ from llama_stack.providers.utils.memory.vector_store import (
from .config import ChromaVectorIOConfig as RemoteChromaVectorIOConfig
log = logging.getLogger(__name__)
log = get_logger(name=__name__, category="vector_io")
ChromaClientType = chromadb.api.AsyncClientAPI | chromadb.api.ClientAPI

View file

@ -5,7 +5,6 @@
# the root directory of this source tree.
import asyncio
import logging
import os
from typing import Any
@ -21,6 +20,7 @@ from llama_stack.apis.vector_io import (
QueryChunksResponse,
VectorIO,
)
from llama_stack.log import get_logger
from llama_stack.providers.datatypes import VectorDBsProtocolPrivate
from llama_stack.providers.inline.vector_io.milvus import MilvusVectorIOConfig as InlineMilvusVectorIOConfig
from llama_stack.providers.utils.kvstore import kvstore_impl
@ -36,7 +36,7 @@ from llama_stack.providers.utils.vector_io.vector_utils import sanitize_collecti
from .config import MilvusVectorIOConfig as RemoteMilvusVectorIOConfig
logger = logging.getLogger(__name__)
logger = get_logger(name=__name__, category="vector_io")
VERSION = "v3"
VECTOR_DBS_PREFIX = f"vector_dbs:milvus:{VERSION}::"
@ -413,15 +413,6 @@ class MilvusVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorDBsProtocolP
index = await self._get_and_cache_vector_db_index(vector_db_id)
if not index:
raise VectorStoreNotFoundError(vector_db_id)
if params and params.get("mode") == "keyword":
# Check if this is inline Milvus (Milvus-Lite)
if hasattr(self.config, "db_path"):
raise NotImplementedError(
"Keyword search is not supported in Milvus-Lite. "
"Please use a remote Milvus server for keyword search functionality."
)
return await index.query_chunks(query, params)
async def delete_chunks(self, store_id: str, chunks_for_deletion: list[ChunkForDeletion]) -> None:

View file

@ -4,7 +4,6 @@
# This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree.
import logging
from typing import Any
import psycopg2
@ -22,6 +21,7 @@ from llama_stack.apis.vector_io import (
QueryChunksResponse,
VectorIO,
)
from llama_stack.log import get_logger
from llama_stack.providers.datatypes import Api, VectorDBsProtocolPrivate
from llama_stack.providers.utils.kvstore import kvstore_impl
from llama_stack.providers.utils.kvstore.api import KVStore
@ -34,7 +34,7 @@ from llama_stack.providers.utils.memory.vector_store import (
from .config import PGVectorVectorIOConfig
log = logging.getLogger(__name__)
log = get_logger(name=__name__, category="vector_io")
VERSION = "v3"
VECTOR_DBS_PREFIX = f"vector_dbs:pgvector:{VERSION}::"

View file

@ -5,7 +5,6 @@
# the root directory of this source tree.
import asyncio
import logging
import uuid
from typing import Any
@ -24,6 +23,7 @@ from llama_stack.apis.vector_io import (
VectorStoreChunkingStrategy,
VectorStoreFileObject,
)
from llama_stack.log import get_logger
from llama_stack.providers.datatypes import Api, VectorDBsProtocolPrivate
from llama_stack.providers.inline.vector_io.qdrant import QdrantVectorIOConfig as InlineQdrantVectorIOConfig
from llama_stack.providers.utils.kvstore import KVStore, kvstore_impl
@ -36,7 +36,7 @@ from llama_stack.providers.utils.memory.vector_store import (
from .config import QdrantVectorIOConfig as RemoteQdrantVectorIOConfig
log = logging.getLogger(__name__)
log = get_logger(name=__name__, category="vector_io")
CHUNK_ID_KEY = "_chunk_id"
# KV store prefixes for vector databases

View file

@ -4,7 +4,6 @@
# This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree.
import json
import logging
from typing import Any
import weaviate
@ -19,6 +18,7 @@ from llama_stack.apis.files.files import Files
from llama_stack.apis.vector_dbs import VectorDB
from llama_stack.apis.vector_io import Chunk, QueryChunksResponse, VectorIO
from llama_stack.core.request_headers import NeedsRequestProviderData
from llama_stack.log import get_logger
from llama_stack.providers.datatypes import Api, VectorDBsProtocolPrivate
from llama_stack.providers.utils.kvstore import kvstore_impl
from llama_stack.providers.utils.kvstore.api import KVStore
@ -34,7 +34,7 @@ from llama_stack.providers.utils.vector_io.vector_utils import sanitize_collecti
from .config import WeaviateVectorIOConfig
log = logging.getLogger(__name__)
log = get_logger(name=__name__, category="vector_io")
VERSION = "v3"
VECTOR_DBS_PREFIX = f"vector_dbs:weaviate:{VERSION}::"

View file

@ -5,10 +5,11 @@
# the root directory of this source tree.
import base64
import logging
import struct
from typing import TYPE_CHECKING
from llama_stack.log import get_logger
if TYPE_CHECKING:
from sentence_transformers import SentenceTransformer
@ -27,7 +28,7 @@ from llama_stack.providers.utils.inference.prompt_adapter import interleaved_con
EMBEDDING_MODELS = {}
log = logging.getLogger(__name__)
log = get_logger(name=__name__, category="inference")
class SentenceTransformerEmbeddingMixin:

View file

@ -5,7 +5,6 @@
# the root directory of this source tree.
import base64
import json
import logging
import struct
import time
import uuid
@ -122,6 +121,7 @@ from llama_stack.apis.inference import (
from llama_stack.apis.inference import (
OpenAIChoice as OpenAIChatCompletionChoice,
)
from llama_stack.log import get_logger
from llama_stack.models.llama.datatypes import (
BuiltinTool,
StopReason,
@ -134,7 +134,7 @@ from llama_stack.providers.utils.inference.prompt_adapter import (
decode_assistant_message,
)
logger = logging.getLogger(__name__)
logger = get_logger(name=__name__, category="inference")
class OpenAICompatCompletionChoiceDelta(BaseModel):

View file

@ -4,16 +4,16 @@
# This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree.
import logging
from datetime import datetime
from pymongo import AsyncMongoClient
from llama_stack.log import get_logger
from llama_stack.providers.utils.kvstore import KVStore
from ..config import MongoDBKVStoreConfig
log = logging.getLogger(__name__)
log = get_logger(name=__name__, category="kvstore")
class MongoDBKVStoreImpl(KVStore):

View file

@ -4,16 +4,17 @@
# This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree.
import logging
from datetime import datetime
import psycopg2
from psycopg2.extras import DictCursor
from llama_stack.log import get_logger
from ..api import KVStore
from ..config import PostgresKVStoreConfig
log = logging.getLogger(__name__)
log = get_logger(name=__name__, category="kvstore")
class PostgresKVStoreImpl(KVStore):

View file

@ -44,7 +44,7 @@ from llama_stack.providers.utils.memory.vector_store import (
make_overlapped_chunks,
)
logger = get_logger(__name__, category="vector_io")
logger = get_logger(name=__name__, category="memory")
# Constants for OpenAI vector stores
CHUNK_MULTIPLIER = 5

View file

@ -5,7 +5,6 @@
# the root directory of this source tree.
import base64
import io
import logging
import re
import time
from abc import ABC, abstractmethod
@ -26,6 +25,7 @@ from llama_stack.apis.common.content_types import (
from llama_stack.apis.tools import RAGDocument
from llama_stack.apis.vector_dbs import VectorDB
from llama_stack.apis.vector_io import Chunk, ChunkMetadata, QueryChunksResponse
from llama_stack.log import get_logger
from llama_stack.models.llama.llama3.tokenizer import Tokenizer
from llama_stack.providers.datatypes import Api
from llama_stack.providers.utils.inference.prompt_adapter import (
@ -33,7 +33,7 @@ from llama_stack.providers.utils.inference.prompt_adapter import (
)
from llama_stack.providers.utils.vector_io.vector_utils import generate_chunk_id
log = logging.getLogger(__name__)
log = get_logger(name=__name__, category="memory")
class ChunkForDeletion(BaseModel):

View file

@ -6,7 +6,7 @@
import asyncio
import contextvars
import logging
import logging # allow-direct-logging
import queue
import random
import sys

View file

@ -23,7 +23,7 @@
"class-variance-authority": "^0.7.1",
"clsx": "^2.1.1",
"framer-motion": "^11.18.2",
"llama-stack-client": "^0.2.17",
"llama-stack-client": "^0.2.18",
"lucide-react": "^0.510.0",
"next": "15.3.3",
"next-auth": "^4.24.11",

View file

@ -7,7 +7,7 @@ required-version = ">=0.7.0"
[project]
name = "llama_stack"
version = "0.2.17"
version = "0.2.18"
authors = [{ name = "Meta Llama", email = "llama-oss@meta.com" }]
description = "Llama Stack"
readme = "README.md"
@ -31,7 +31,7 @@ dependencies = [
"huggingface-hub>=0.34.0,<1.0",
"jinja2>=3.1.6",
"jsonschema",
"llama-stack-client>=0.2.17",
"llama-stack-client>=0.2.18",
"llama-api-client>=0.1.2",
"openai>=1.99.6,<1.100.0",
"prompt-toolkit",
@ -56,7 +56,7 @@ dependencies = [
ui = [
"streamlit",
"pandas",
"llama-stack-client>=0.2.17",
"llama-stack-client>=0.2.18",
"streamlit-option-menu",
]
@ -93,6 +93,7 @@ unit = [
"blobfile",
"faiss-cpu",
"pymilvus>=2.5.12",
"milvus-lite>=2.5.0",
"litellm",
"together",
"coverage",
@ -118,6 +119,7 @@ test = [
"sqlalchemy[asyncio]>=2.0.41",
"requests",
"pymilvus>=2.5.12",
"milvus-lite>=2.5.0",
"weaviate-client>=4.16.4",
]
docs = [

View file

@ -4,7 +4,6 @@
# This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree.
import logging
import sys
import time
import uuid
@ -19,10 +18,10 @@ from llama_stack.apis.post_training import (
LoraFinetuningConfig,
TrainingConfig,
)
from llama_stack.log import get_logger
# Configure logging
logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s", force=True)
logger = logging.getLogger(__name__)
logger = get_logger(name=__name__, category="post_training")
skip_because_resource_intensive = pytest.mark.skip(

View file

@ -4,7 +4,6 @@
# This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree.
import logging
import time
from io import BytesIO
@ -14,8 +13,9 @@ from openai import BadRequestError as OpenAIBadRequestError
from llama_stack.apis.vector_io import Chunk
from llama_stack.core.library_client import LlamaStackAsLibraryClient
from llama_stack.log import get_logger
logger = logging.getLogger(__name__)
logger = get_logger(name=__name__, category="vector_io")
def skip_if_provider_doesnt_support_openai_vector_stores(client_with_models):
@ -56,6 +56,7 @@ def skip_if_provider_doesnt_support_openai_vector_stores_search(client_with_mode
"keyword": [
"inline::sqlite-vec",
"remote::milvus",
"inline::milvus",
],
"hybrid": [
"inline::sqlite-vec",

View file

@ -45,7 +45,6 @@ from llama_stack.providers.inline.agents.meta_reference.responses.utils import (
class TestConvertChatChoiceToResponseMessage:
@pytest.mark.asyncio
async def test_convert_string_content(self):
choice = OpenAIChoice(
message=OpenAIAssistantMessageParam(content="Test message"),
@ -61,7 +60,6 @@ class TestConvertChatChoiceToResponseMessage:
assert isinstance(result.content[0], OpenAIResponseOutputMessageContentOutputText)
assert result.content[0].text == "Test message"
@pytest.mark.asyncio
async def test_convert_text_param_content(self):
choice = OpenAIChoice(
message=OpenAIAssistantMessageParam(
@ -78,12 +76,10 @@ class TestConvertChatChoiceToResponseMessage:
class TestConvertResponseContentToChatContent:
@pytest.mark.asyncio
async def test_convert_string_content(self):
result = await convert_response_content_to_chat_content("Simple string")
assert result == "Simple string"
@pytest.mark.asyncio
async def test_convert_text_content_parts(self):
content = [
OpenAIResponseInputMessageContentText(text="First part"),
@ -98,7 +94,6 @@ class TestConvertResponseContentToChatContent:
assert isinstance(result[1], OpenAIChatCompletionContentPartTextParam)
assert result[1].text == "Second part"
@pytest.mark.asyncio
async def test_convert_image_content(self):
content = [OpenAIResponseInputMessageContentImage(image_url="https://example.com/image.jpg", detail="high")]
@ -111,7 +106,6 @@ class TestConvertResponseContentToChatContent:
class TestConvertResponseInputToChatMessages:
@pytest.mark.asyncio
async def test_convert_string_input(self):
result = await convert_response_input_to_chat_messages("User message")
@ -119,7 +113,6 @@ class TestConvertResponseInputToChatMessages:
assert isinstance(result[0], OpenAIUserMessageParam)
assert result[0].content == "User message"
@pytest.mark.asyncio
async def test_convert_function_tool_call_output(self):
input_items = [
OpenAIResponseInputFunctionToolCallOutput(
@ -135,7 +128,6 @@ class TestConvertResponseInputToChatMessages:
assert result[0].content == "Tool output"
assert result[0].tool_call_id == "call_123"
@pytest.mark.asyncio
async def test_convert_function_tool_call(self):
input_items = [
OpenAIResponseOutputMessageFunctionToolCall(
@ -154,7 +146,6 @@ class TestConvertResponseInputToChatMessages:
assert result[0].tool_calls[0].function.name == "test_function"
assert result[0].tool_calls[0].function.arguments == '{"param": "value"}'
@pytest.mark.asyncio
async def test_convert_response_message(self):
input_items = [
OpenAIResponseMessage(
@ -173,7 +164,6 @@ class TestConvertResponseInputToChatMessages:
class TestConvertResponseTextToChatResponseFormat:
@pytest.mark.asyncio
async def test_convert_text_format(self):
text = OpenAIResponseText(format=OpenAIResponseTextFormat(type="text"))
result = await convert_response_text_to_chat_response_format(text)
@ -181,14 +171,12 @@ class TestConvertResponseTextToChatResponseFormat:
assert isinstance(result, OpenAIResponseFormatText)
assert result.type == "text"
@pytest.mark.asyncio
async def test_convert_json_object_format(self):
text = OpenAIResponseText(format={"type": "json_object"})
result = await convert_response_text_to_chat_response_format(text)
assert isinstance(result, OpenAIResponseFormatJSONObject)
@pytest.mark.asyncio
async def test_convert_json_schema_format(self):
schema_def = {"type": "object", "properties": {"test": {"type": "string"}}}
text = OpenAIResponseText(
@ -204,7 +192,6 @@ class TestConvertResponseTextToChatResponseFormat:
assert result.json_schema["name"] == "test_schema"
assert result.json_schema["schema"] == schema_def
@pytest.mark.asyncio
async def test_default_text_format(self):
text = OpenAIResponseText()
result = await convert_response_text_to_chat_response_format(text)
@ -214,27 +201,22 @@ class TestConvertResponseTextToChatResponseFormat:
class TestGetMessageTypeByRole:
@pytest.mark.asyncio
async def test_user_role(self):
result = await get_message_type_by_role("user")
assert result == OpenAIUserMessageParam
@pytest.mark.asyncio
async def test_system_role(self):
result = await get_message_type_by_role("system")
assert result == OpenAISystemMessageParam
@pytest.mark.asyncio
async def test_assistant_role(self):
result = await get_message_type_by_role("assistant")
assert result == OpenAIAssistantMessageParam
@pytest.mark.asyncio
async def test_developer_role(self):
result = await get_message_type_by_role("developer")
assert result == OpenAIDeveloperMessageParam
@pytest.mark.asyncio
async def test_unknown_role(self):
result = await get_message_type_by_role("unknown")
assert result is None

View file

@ -6,7 +6,7 @@
import asyncio
import json
import logging
import logging # allow-direct-logging
import threading
import time
from http.server import BaseHTTPRequestHandler, HTTPServer

16
uv.lock generated
View file

@ -1719,7 +1719,7 @@ wheels = [
[[package]]
name = "llama-stack"
version = "0.2.17"
version = "0.2.18"
source = { editable = "." }
dependencies = [
{ name = "aiohttp" },
@ -1809,6 +1809,7 @@ test = [
{ name = "chardet" },
{ name = "datasets" },
{ name = "mcp" },
{ name = "milvus-lite" },
{ name = "openai" },
{ name = "pymilvus" },
{ name = "pypdf" },
@ -1831,6 +1832,7 @@ unit = [
{ name = "faiss-cpu" },
{ name = "litellm" },
{ name = "mcp" },
{ name = "milvus-lite" },
{ name = "ollama" },
{ name = "openai" },
{ name = "pymilvus" },
@ -1854,8 +1856,8 @@ requires-dist = [
{ name = "jinja2", specifier = ">=3.1.6" },
{ name = "jsonschema" },
{ name = "llama-api-client", specifier = ">=0.1.2" },
{ name = "llama-stack-client", specifier = ">=0.2.17" },
{ name = "llama-stack-client", marker = "extra == 'ui'", specifier = ">=0.2.17" },
{ name = "llama-stack-client", specifier = ">=0.2.18" },
{ name = "llama-stack-client", marker = "extra == 'ui'", specifier = ">=0.2.18" },
{ name = "openai", specifier = ">=1.99.6,<1.100.0" },
{ name = "opentelemetry-exporter-otlp-proto-http", specifier = ">=1.30.0" },
{ name = "opentelemetry-sdk", specifier = ">=1.30.0" },
@ -1925,6 +1927,7 @@ test = [
{ name = "chardet" },
{ name = "datasets" },
{ name = "mcp" },
{ name = "milvus-lite", specifier = ">=2.5.0" },
{ name = "openai" },
{ name = "pymilvus", specifier = ">=2.5.12" },
{ name = "pypdf" },
@ -1946,6 +1949,7 @@ unit = [
{ name = "faiss-cpu" },
{ name = "litellm" },
{ name = "mcp" },
{ name = "milvus-lite", specifier = ">=2.5.0" },
{ name = "ollama" },
{ name = "openai" },
{ name = "pymilvus", specifier = ">=2.5.12" },
@ -1959,7 +1963,7 @@ unit = [
[[package]]
name = "llama-stack-client"
version = "0.2.17"
version = "0.2.18"
source = { registry = "https://pypi.org/simple" }
dependencies = [
{ name = "anyio" },
@ -1978,9 +1982,9 @@ dependencies = [
{ name = "tqdm" },
{ name = "typing-extensions" },
]
sdist = { url = "https://files.pythonhosted.org/packages/c5/2a/bb2949d6a5c494d21da0c185d426e25eaa8016f8287b689249afc6c96fb5/llama_stack_client-0.2.17.tar.gz", hash = "sha256:1fe2070133c6356761e394fa346045e9b6b567d4c63157b9bc6be89b9a6e7a41", size = 257636, upload-time = "2025-08-05T01:42:55.911Z" }
sdist = { url = "https://files.pythonhosted.org/packages/69/da/5e5a745495f8a2b8ef24fc4d01fe9031aa2277c36447cb22192ec8c8cc1e/llama_stack_client-0.2.18.tar.gz", hash = "sha256:860c885c9e549445178ac55cc9422e6e2a91215ac7aff5aaccfb42f3ce07e79e", size = 277284, upload-time = "2025-08-19T22:12:09.106Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/81/fc/5eccc86b83c5ced3a3bca071d250a86ccafa4ff17546cf781deb7758ab74/llama_stack_client-0.2.17-py3-none-any.whl", hash = "sha256:336c32f8688700ff64717b8109f405dc87a990fbe310c2027ac9ed6d39d67d16", size = 350329, upload-time = "2025-08-05T01:42:54.381Z" },
{ url = "https://files.pythonhosted.org/packages/0a/e4/e97f8fdd8a07aa1efc7f7e37b5657d84357b664bf70dd1885a437edc0699/llama_stack_client-0.2.18-py3-none-any.whl", hash = "sha256:90f827d5476f7fc15fd993f1863af6a6e72bd064646bf6a99435eb43a1327f70", size = 367586, upload-time = "2025-08-19T22:12:07.899Z" },
]
[[package]]