This commit is contained in:
Hardik Shah 2025-06-26 16:10:38 -07:00
commit a1033ba805
184 changed files with 1691 additions and 1138 deletions

View file

@ -11,6 +11,8 @@ on:
- 'llama_stack/distribution/*.sh' - 'llama_stack/distribution/*.sh'
- '.github/workflows/providers-build.yml' - '.github/workflows/providers-build.yml'
- 'llama_stack/templates/**' - 'llama_stack/templates/**'
- 'pyproject.toml'
pull_request: pull_request:
paths: paths:
- 'llama_stack/cli/stack/build.py' - 'llama_stack/cli/stack/build.py'
@ -19,6 +21,7 @@ on:
- 'llama_stack/distribution/*.sh' - 'llama_stack/distribution/*.sh'
- '.github/workflows/providers-build.yml' - '.github/workflows/providers-build.yml'
- 'llama_stack/templates/**' - 'llama_stack/templates/**'
- 'pyproject.toml'
concurrency: concurrency:
group: ${{ github.workflow }}-${{ github.ref }} group: ${{ github.workflow }}-${{ github.ref }}

View file

@ -7390,6 +7390,147 @@
], ],
"title": "AgentTurnResponseTurnStartPayload" "title": "AgentTurnResponseTurnStartPayload"
}, },
"OpenAIResponseAnnotationCitation": {
"type": "object",
"properties": {
"type": {
"type": "string",
"const": "url_citation",
"default": "url_citation"
},
"end_index": {
"type": "integer"
},
"start_index": {
"type": "integer"
},
"title": {
"type": "string"
},
"url": {
"type": "string"
}
},
"additionalProperties": false,
"required": [
"type",
"end_index",
"start_index",
"title",
"url"
],
"title": "OpenAIResponseAnnotationCitation"
},
"OpenAIResponseAnnotationContainerFileCitation": {
"type": "object",
"properties": {
"type": {
"type": "string",
"const": "container_file_citation",
"default": "container_file_citation"
},
"container_id": {
"type": "string"
},
"end_index": {
"type": "integer"
},
"file_id": {
"type": "string"
},
"filename": {
"type": "string"
},
"start_index": {
"type": "integer"
}
},
"additionalProperties": false,
"required": [
"type",
"container_id",
"end_index",
"file_id",
"filename",
"start_index"
],
"title": "OpenAIResponseAnnotationContainerFileCitation"
},
"OpenAIResponseAnnotationFileCitation": {
"type": "object",
"properties": {
"type": {
"type": "string",
"const": "file_citation",
"default": "file_citation"
},
"file_id": {
"type": "string"
},
"filename": {
"type": "string"
},
"index": {
"type": "integer"
}
},
"additionalProperties": false,
"required": [
"type",
"file_id",
"filename",
"index"
],
"title": "OpenAIResponseAnnotationFileCitation"
},
"OpenAIResponseAnnotationFilePath": {
"type": "object",
"properties": {
"type": {
"type": "string",
"const": "file_path",
"default": "file_path"
},
"file_id": {
"type": "string"
},
"index": {
"type": "integer"
}
},
"additionalProperties": false,
"required": [
"type",
"file_id",
"index"
],
"title": "OpenAIResponseAnnotationFilePath"
},
"OpenAIResponseAnnotations": {
"oneOf": [
{
"$ref": "#/components/schemas/OpenAIResponseAnnotationFileCitation"
},
{
"$ref": "#/components/schemas/OpenAIResponseAnnotationCitation"
},
{
"$ref": "#/components/schemas/OpenAIResponseAnnotationContainerFileCitation"
},
{
"$ref": "#/components/schemas/OpenAIResponseAnnotationFilePath"
}
],
"discriminator": {
"propertyName": "type",
"mapping": {
"file_citation": "#/components/schemas/OpenAIResponseAnnotationFileCitation",
"url_citation": "#/components/schemas/OpenAIResponseAnnotationCitation",
"container_file_citation": "#/components/schemas/OpenAIResponseAnnotationContainerFileCitation",
"file_path": "#/components/schemas/OpenAIResponseAnnotationFilePath"
}
}
},
"OpenAIResponseInput": { "OpenAIResponseInput": {
"oneOf": [ "oneOf": [
{ {
@ -7764,6 +7905,10 @@
"type": "string", "type": "string",
"const": "web_search" "const": "web_search"
}, },
{
"type": "string",
"const": "web_search_preview"
},
{ {
"type": "string", "type": "string",
"const": "web_search_preview_2025_03_11" "const": "web_search_preview_2025_03_11"
@ -7855,12 +8000,19 @@
"type": "string", "type": "string",
"const": "output_text", "const": "output_text",
"default": "output_text" "default": "output_text"
},
"annotations": {
"type": "array",
"items": {
"$ref": "#/components/schemas/OpenAIResponseAnnotations"
}
} }
}, },
"additionalProperties": false, "additionalProperties": false,
"required": [ "required": [
"text", "text",
"type" "type",
"annotations"
], ],
"title": "OpenAIResponseOutputMessageContentOutputText" "title": "OpenAIResponseOutputMessageContentOutputText"
}, },

View file

@ -5263,6 +5263,106 @@ components:
- event_type - event_type
- turn_id - turn_id
title: AgentTurnResponseTurnStartPayload title: AgentTurnResponseTurnStartPayload
OpenAIResponseAnnotationCitation:
type: object
properties:
type:
type: string
const: url_citation
default: url_citation
end_index:
type: integer
start_index:
type: integer
title:
type: string
url:
type: string
additionalProperties: false
required:
- type
- end_index
- start_index
- title
- url
title: OpenAIResponseAnnotationCitation
"OpenAIResponseAnnotationContainerFileCitation":
type: object
properties:
type:
type: string
const: container_file_citation
default: container_file_citation
container_id:
type: string
end_index:
type: integer
file_id:
type: string
filename:
type: string
start_index:
type: integer
additionalProperties: false
required:
- type
- container_id
- end_index
- file_id
- filename
- start_index
title: >-
OpenAIResponseAnnotationContainerFileCitation
OpenAIResponseAnnotationFileCitation:
type: object
properties:
type:
type: string
const: file_citation
default: file_citation
file_id:
type: string
filename:
type: string
index:
type: integer
additionalProperties: false
required:
- type
- file_id
- filename
- index
title: OpenAIResponseAnnotationFileCitation
OpenAIResponseAnnotationFilePath:
type: object
properties:
type:
type: string
const: file_path
default: file_path
file_id:
type: string
index:
type: integer
additionalProperties: false
required:
- type
- file_id
- index
title: OpenAIResponseAnnotationFilePath
OpenAIResponseAnnotations:
oneOf:
- $ref: '#/components/schemas/OpenAIResponseAnnotationFileCitation'
- $ref: '#/components/schemas/OpenAIResponseAnnotationCitation'
- $ref: '#/components/schemas/OpenAIResponseAnnotationContainerFileCitation'
- $ref: '#/components/schemas/OpenAIResponseAnnotationFilePath'
discriminator:
propertyName: type
mapping:
file_citation: '#/components/schemas/OpenAIResponseAnnotationFileCitation'
url_citation: '#/components/schemas/OpenAIResponseAnnotationCitation'
container_file_citation: '#/components/schemas/OpenAIResponseAnnotationContainerFileCitation'
file_path: '#/components/schemas/OpenAIResponseAnnotationFilePath'
OpenAIResponseInput: OpenAIResponseInput:
oneOf: oneOf:
- $ref: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall' - $ref: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall'
@ -5488,6 +5588,8 @@ components:
oneOf: oneOf:
- type: string - type: string
const: web_search const: web_search
- type: string
const: web_search_preview
- type: string - type: string
const: web_search_preview_2025_03_11 const: web_search_preview_2025_03_11
default: web_search default: web_search
@ -5547,10 +5649,15 @@ components:
type: string type: string
const: output_text const: output_text
default: output_text default: output_text
annotations:
type: array
items:
$ref: '#/components/schemas/OpenAIResponseAnnotations'
additionalProperties: false additionalProperties: false
required: required:
- text - text
- type - type
- annotations
title: >- title: >-
OpenAIResponseOutputMessageContentOutputText OpenAIResponseOutputMessageContentOutputText
"OpenAIResponseOutputMessageFileSearchToolCall": "OpenAIResponseOutputMessageFileSearchToolCall":

View file

@ -18,7 +18,7 @@ providers:
- provider_id: ollama - provider_id: ollama
provider_type: remote::ollama provider_type: remote::ollama
config: config:
url: ${env.OLLAMA_URL:http://localhost:11434} url: ${env.OLLAMA_URL:=http://localhost:11434}
vector_io: vector_io:
- provider_id: faiss - provider_id: faiss
provider_type: inline::faiss provider_type: inline::faiss
@ -26,7 +26,7 @@ providers:
kvstore: kvstore:
type: sqlite type: sqlite
namespace: null namespace: null
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ollama}/faiss_store.db db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ollama}/faiss_store.db
safety: safety:
- provider_id: llama-guard - provider_id: llama-guard
provider_type: inline::llama-guard provider_type: inline::llama-guard
@ -38,7 +38,7 @@ providers:
persistence_store: persistence_store:
type: sqlite type: sqlite
namespace: null namespace: null
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ollama}/agents_store.db db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ollama}/agents_store.db
telemetry: telemetry:
- provider_id: meta-reference - provider_id: meta-reference
provider_type: inline::meta-reference provider_type: inline::meta-reference
@ -46,7 +46,7 @@ providers:
metadata_store: metadata_store:
namespace: null namespace: null
type: sqlite type: sqlite
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ollama}/registry.db db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ollama}/registry.db
models: models:
- metadata: {} - metadata: {}
model_id: ${env.INFERENCE_MODEL} model_id: ${env.INFERENCE_MODEL}
@ -85,7 +85,7 @@ providers:
# config is a dictionary that contains the configuration for the provider. # config is a dictionary that contains the configuration for the provider.
# in this case, the configuration is the url of the ollama server # in this case, the configuration is the url of the ollama server
config: config:
url: ${env.OLLAMA_URL:http://localhost:11434} url: ${env.OLLAMA_URL:=http://localhost:11434}
``` ```
A few things to note: A few things to note:
- A _provider instance_ is identified with an (id, type, configuration) triplet. - A _provider instance_ is identified with an (id, type, configuration) triplet.
@ -94,6 +94,95 @@ A few things to note:
- The configuration dictionary is provider-specific. - The configuration dictionary is provider-specific.
- Notice that configuration can reference environment variables (with default values), which are expanded at runtime. When you run a stack server (via docker or via `llama stack run`), you can specify `--env OLLAMA_URL=http://my-server:11434` to override the default value. - Notice that configuration can reference environment variables (with default values), which are expanded at runtime. When you run a stack server (via docker or via `llama stack run`), you can specify `--env OLLAMA_URL=http://my-server:11434` to override the default value.
### Environment Variable Substitution
Llama Stack supports environment variable substitution in configuration values using the
`${env.VARIABLE_NAME}` syntax. This allows you to externalize configuration values and provide
different settings for different environments. The syntax is inspired by [bash parameter expansion](https://www.gnu.org/software/bash/manual/html_node/Shell-Parameter-Expansion.html)
and follows similar patterns.
#### Basic Syntax
The basic syntax for environment variable substitution is:
```yaml
config:
api_key: ${env.API_KEY}
url: ${env.SERVICE_URL}
```
If the environment variable is not set, the server will raise an error during startup.
#### Default Values
You can provide default values using the `:=` operator:
```yaml
config:
url: ${env.OLLAMA_URL:=http://localhost:11434}
port: ${env.PORT:=8321}
timeout: ${env.TIMEOUT:=60}
```
If the environment variable is not set, the default value `http://localhost:11434` will be used.
Empty defaults are not allowed so `url: ${env.OLLAMA_URL:=}` will raise an error if the environment variable is not set.
#### Conditional Values
You can use the `:+` operator to provide a value only when the environment variable is set:
```yaml
config:
# Only include this field if ENVIRONMENT is set
environment: ${env.ENVIRONMENT:+production}
```
If the environment variable is set, the value after `:+` will be used. If it's not set, the field
will be omitted with a `None` value.
So `${env.ENVIRONMENT:+}` is supported, it means that the field will be omitted if the environment
variable is not set. It can be used to make a field optional and then enabled at runtime when desired.
#### Examples
Here are some common patterns:
```yaml
# Required environment variable (will error if not set)
api_key: ${env.OPENAI_API_KEY}
# Optional with default
base_url: ${env.API_BASE_URL:=https://api.openai.com/v1}
# Conditional field
debug_mode: ${env.DEBUG:+true}
# Optional field that becomes None if not set
optional_token: ${env.OPTIONAL_TOKEN:+}
```
#### Runtime Override
You can override environment variables at runtime when starting the server:
```bash
# Override specific environment variables
llama stack run --config run.yaml --env API_KEY=sk-123 --env BASE_URL=https://custom-api.com
# Or set them in your shell
export API_KEY=sk-123
export BASE_URL=https://custom-api.com
llama stack run --config run.yaml
```
#### Type Safety
The environment variable substitution system is type-safe:
- String values remain strings
- Empty defaults (`${env.VAR:+}`) are converted to `None` for fields that accept `str | None`
- Numeric defaults are properly typed (e.g., `${env.PORT:=8321}` becomes an integer)
- Boolean defaults work correctly (e.g., `${env.DEBUG:=false}` becomes a boolean)
## Resources ## Resources
Finally, let's look at the `models` section: Finally, let's look at the `models` section:
@ -152,7 +241,7 @@ server:
config: config:
jwks: jwks:
uri: "https://kubernetes.default.svc:8443/openid/v1/jwks" uri: "https://kubernetes.default.svc:8443/openid/v1/jwks"
token: "${env.TOKEN:}" token: "${env.TOKEN:+}"
key_recheck_period: 3600 key_recheck_period: 3600
tls_cafile: "/path/to/ca.crt" tls_cafile: "/path/to/ca.crt"
issuer: "https://kubernetes.default.svc" issuer: "https://kubernetes.default.svc"
@ -396,12 +485,12 @@ providers:
- provider_id: vllm-0 - provider_id: vllm-0
provider_type: remote::vllm provider_type: remote::vllm
config: config:
url: ${env.VLLM_URL:http://localhost:8000} url: ${env.VLLM_URL:=http://localhost:8000}
# this vLLM server serves the llama-guard model (e.g., llama-guard:3b) # this vLLM server serves the llama-guard model (e.g., llama-guard:3b)
- provider_id: vllm-1 - provider_id: vllm-1
provider_type: remote::vllm provider_type: remote::vllm
config: config:
url: ${env.SAFETY_VLLM_URL:http://localhost:8001} url: ${env.SAFETY_VLLM_URL:=http://localhost:8001}
... ...
models: models:
- metadata: {} - metadata: {}

View file

@ -15,10 +15,10 @@ data:
- provider_id: vllm-inference - provider_id: vllm-inference
provider_type: remote::vllm provider_type: remote::vllm
config: config:
url: ${env.VLLM_URL:http://localhost:8000/v1} url: ${env.VLLM_URL:=http://localhost:8000/v1}
max_tokens: ${env.VLLM_MAX_TOKENS:4096} max_tokens: ${env.VLLM_MAX_TOKENS:=4096}
api_token: ${env.VLLM_API_TOKEN:fake} api_token: ${env.VLLM_API_TOKEN:=fake}
tls_verify: ${env.VLLM_TLS_VERIFY:true} tls_verify: ${env.VLLM_TLS_VERIFY:=true}
- provider_id: vllm-safety - provider_id: vllm-safety
provider_type: remote::vllm provider_type: remote::vllm
config: config:
@ -30,10 +30,10 @@ data:
provider_type: inline::sentence-transformers provider_type: inline::sentence-transformers
config: {} config: {}
vector_io: vector_io:
- provider_id: ${env.ENABLE_CHROMADB+chromadb} - provider_id: ${env.ENABLE_CHROMADB:+chromadb}
provider_type: remote::chromadb provider_type: remote::chromadb
config: config:
url: ${env.CHROMADB_URL:} url: ${env.CHROMADB_URL:+}
safety: safety:
- provider_id: llama-guard - provider_id: llama-guard
provider_type: inline::llama-guard provider_type: inline::llama-guard
@ -45,34 +45,34 @@ data:
config: config:
persistence_store: persistence_store:
type: postgres type: postgres
host: ${env.POSTGRES_HOST:localhost} host: ${env.POSTGRES_HOST:=localhost}
port: ${env.POSTGRES_PORT:5432} port: ${env.POSTGRES_PORT:=5432}
db: ${env.POSTGRES_DB:llamastack} db: ${env.POSTGRES_DB:=llamastack}
user: ${env.POSTGRES_USER:llamastack} user: ${env.POSTGRES_USER:llamastack}
password: ${env.POSTGRES_PASSWORD:llamastack} password: ${env.POSTGRES_PASSWORD:=llamastack}
responses_store: responses_store:
type: postgres type: postgres
host: ${env.POSTGRES_HOST:localhost} host: ${env.POSTGRES_HOST:=localhost}
port: ${env.POSTGRES_PORT:5432} port: ${env.POSTGRES_PORT:=5432}
db: ${env.POSTGRES_DB:llamastack} db: ${env.POSTGRES_DB:=llamastack}
user: ${env.POSTGRES_USER:llamastack} user: ${env.POSTGRES_USER:=llamastack}
password: ${env.POSTGRES_PASSWORD:llamastack} password: ${env.POSTGRES_PASSWORD:=llamastack}
telemetry: telemetry:
- provider_id: meta-reference - provider_id: meta-reference
provider_type: inline::meta-reference provider_type: inline::meta-reference
config: config:
service_name: ${env.OTEL_SERVICE_NAME:} service_name: ${env.OTEL_SERVICE_NAME:+}
sinks: ${env.TELEMETRY_SINKS:console} sinks: ${env.TELEMETRY_SINKS:console}
tool_runtime: tool_runtime:
- provider_id: brave-search - provider_id: brave-search
provider_type: remote::brave-search provider_type: remote::brave-search
config: config:
api_key: ${env.BRAVE_SEARCH_API_KEY:} api_key: ${env.BRAVE_SEARCH_API_KEY:+}
max_results: 3 max_results: 3
- provider_id: tavily-search - provider_id: tavily-search
provider_type: remote::tavily-search provider_type: remote::tavily-search
config: config:
api_key: ${env.TAVILY_SEARCH_API_KEY:} api_key: ${env.TAVILY_SEARCH_API_KEY:+}
max_results: 3 max_results: 3
- provider_id: rag-runtime - provider_id: rag-runtime
provider_type: inline::rag-runtime provider_type: inline::rag-runtime
@ -82,19 +82,19 @@ data:
config: {} config: {}
metadata_store: metadata_store:
type: postgres type: postgres
host: ${env.POSTGRES_HOST:localhost} host: ${env.POSTGRES_HOST:=localhost}
port: ${env.POSTGRES_PORT:5432} port: ${env.POSTGRES_PORT:=5432}
db: ${env.POSTGRES_DB:llamastack} db: ${env.POSTGRES_DB:=llamastack}
user: ${env.POSTGRES_USER:llamastack} user: ${env.POSTGRES_USER:=llamastack}
password: ${env.POSTGRES_PASSWORD:llamastack} password: ${env.POSTGRES_PASSWORD:=llamastack}
table_name: llamastack_kvstore table_name: llamastack_kvstore
inference_store: inference_store:
type: postgres type: postgres
host: ${env.POSTGRES_HOST:localhost} host: ${env.POSTGRES_HOST:=localhost}
port: ${env.POSTGRES_PORT:5432} port: ${env.POSTGRES_PORT:=5432}
db: ${env.POSTGRES_DB:llamastack} db: ${env.POSTGRES_DB:=llamastack}
user: ${env.POSTGRES_USER:llamastack} user: ${env.POSTGRES_USER:=llamastack}
password: ${env.POSTGRES_PASSWORD:llamastack} password: ${env.POSTGRES_PASSWORD:=llamastack}
models: models:
- metadata: - metadata:
embedding_dimension: 384 embedding_dimension: 384
@ -106,11 +106,11 @@ data:
provider_id: vllm-inference provider_id: vllm-inference
model_type: llm model_type: llm
- metadata: {} - metadata: {}
model_id: ${env.SAFETY_MODEL:meta-llama/Llama-Guard-3-1B} model_id: ${env.SAFETY_MODEL:=meta-llama/Llama-Guard-3-1B}
provider_id: vllm-safety provider_id: vllm-safety
model_type: llm model_type: llm
shields: shields:
- shield_id: ${env.SAFETY_MODEL:meta-llama/Llama-Guard-3-1B} - shield_id: ${env.SAFETY_MODEL:=meta-llama/Llama-Guard-3-1B}
vector_dbs: [] vector_dbs: []
datasets: [] datasets: []
scoring_fns: [] scoring_fns: []

View file

@ -12,25 +12,25 @@ providers:
- provider_id: vllm-inference - provider_id: vllm-inference
provider_type: remote::vllm provider_type: remote::vllm
config: config:
url: ${env.VLLM_URL:http://localhost:8000/v1} url: ${env.VLLM_URL:=http://localhost:8000/v1}
max_tokens: ${env.VLLM_MAX_TOKENS:4096} max_tokens: ${env.VLLM_MAX_TOKENS:=4096}
api_token: ${env.VLLM_API_TOKEN:fake} api_token: ${env.VLLM_API_TOKEN:=fake}
tls_verify: ${env.VLLM_TLS_VERIFY:true} tls_verify: ${env.VLLM_TLS_VERIFY:=true}
- provider_id: vllm-safety - provider_id: vllm-safety
provider_type: remote::vllm provider_type: remote::vllm
config: config:
url: ${env.VLLM_SAFETY_URL:http://localhost:8000/v1} url: ${env.VLLM_SAFETY_URL:=http://localhost:8000/v1}
max_tokens: ${env.VLLM_MAX_TOKENS:4096} max_tokens: ${env.VLLM_MAX_TOKENS:=4096}
api_token: ${env.VLLM_API_TOKEN:fake} api_token: ${env.VLLM_API_TOKEN:=fake}
tls_verify: ${env.VLLM_TLS_VERIFY:true} tls_verify: ${env.VLLM_TLS_VERIFY:=true}
- provider_id: sentence-transformers - provider_id: sentence-transformers
provider_type: inline::sentence-transformers provider_type: inline::sentence-transformers
config: {} config: {}
vector_io: vector_io:
- provider_id: ${env.ENABLE_CHROMADB+chromadb} - provider_id: ${env.ENABLE_CHROMADB:+chromadb}
provider_type: remote::chromadb provider_type: remote::chromadb
config: config:
url: ${env.CHROMADB_URL:} url: ${env.CHROMADB_URL:+}
safety: safety:
- provider_id: llama-guard - provider_id: llama-guard
provider_type: inline::llama-guard provider_type: inline::llama-guard
@ -42,34 +42,34 @@ providers:
config: config:
persistence_store: persistence_store:
type: postgres type: postgres
host: ${env.POSTGRES_HOST:localhost} host: ${env.POSTGRES_HOST:=localhost}
port: ${env.POSTGRES_PORT:5432} port: ${env.POSTGRES_PORT:=5432}
db: ${env.POSTGRES_DB:llamastack} db: ${env.POSTGRES_DB:=llamastack}
user: ${env.POSTGRES_USER:llamastack} user: ${env.POSTGRES_USER:=llamastack}
password: ${env.POSTGRES_PASSWORD:llamastack} password: ${env.POSTGRES_PASSWORD:=llamastack}
responses_store: responses_store:
type: postgres type: postgres
host: ${env.POSTGRES_HOST:localhost} host: ${env.POSTGRES_HOST:=localhost}
port: ${env.POSTGRES_PORT:5432} port: ${env.POSTGRES_PORT:=5432}
db: ${env.POSTGRES_DB:llamastack} db: ${env.POSTGRES_DB:=llamastack}
user: ${env.POSTGRES_USER:llamastack} user: ${env.POSTGRES_USER:=llamastack}
password: ${env.POSTGRES_PASSWORD:llamastack} password: ${env.POSTGRES_PASSWORD:=llamastack}
telemetry: telemetry:
- provider_id: meta-reference - provider_id: meta-reference
provider_type: inline::meta-reference provider_type: inline::meta-reference
config: config:
service_name: ${env.OTEL_SERVICE_NAME:} service_name: ${env.OTEL_SERVICE_NAME:+console}
sinks: ${env.TELEMETRY_SINKS:console} sinks: ${env.TELEMETRY_SINKS:+console}
tool_runtime: tool_runtime:
- provider_id: brave-search - provider_id: brave-search
provider_type: remote::brave-search provider_type: remote::brave-search
config: config:
api_key: ${env.BRAVE_SEARCH_API_KEY:} api_key: ${env.BRAVE_SEARCH_API_KEY:+}
max_results: 3 max_results: 3
- provider_id: tavily-search - provider_id: tavily-search
provider_type: remote::tavily-search provider_type: remote::tavily-search
config: config:
api_key: ${env.TAVILY_SEARCH_API_KEY:} api_key: ${env.TAVILY_SEARCH_API_KEY:+}
max_results: 3 max_results: 3
- provider_id: rag-runtime - provider_id: rag-runtime
provider_type: inline::rag-runtime provider_type: inline::rag-runtime
@ -79,19 +79,19 @@ providers:
config: {} config: {}
metadata_store: metadata_store:
type: postgres type: postgres
host: ${env.POSTGRES_HOST:localhost} host: ${env.POSTGRES_HOST:=localhost}
port: ${env.POSTGRES_PORT:5432} port: ${env.POSTGRES_PORT:=5432}
db: ${env.POSTGRES_DB:llamastack} db: ${env.POSTGRES_DB:=llamastack}
user: ${env.POSTGRES_USER:llamastack} user: ${env.POSTGRES_USER:=llamastack}
password: ${env.POSTGRES_PASSWORD:llamastack} password: ${env.POSTGRES_PASSWORD:=llamastack}
table_name: llamastack_kvstore table_name: llamastack_kvstore
inference_store: inference_store:
type: postgres type: postgres
host: ${env.POSTGRES_HOST:localhost} host: ${env.POSTGRES_HOST:=localhost}
port: ${env.POSTGRES_PORT:5432} port: ${env.POSTGRES_PORT:=5432}
db: ${env.POSTGRES_DB:llamastack} db: ${env.POSTGRES_DB:=llamastack}
user: ${env.POSTGRES_USER:llamastack} user: ${env.POSTGRES_USER:=llamastack}
password: ${env.POSTGRES_PASSWORD:llamastack} password: ${env.POSTGRES_PASSWORD:=llamastack}
models: models:
- metadata: - metadata:
embedding_dimension: 384 embedding_dimension: 384
@ -103,11 +103,11 @@ models:
provider_id: vllm-inference provider_id: vllm-inference
model_type: llm model_type: llm
- metadata: {} - metadata: {}
model_id: ${env.SAFETY_MODEL:meta-llama/Llama-Guard-3-1B} model_id: ${env.SAFETY_MODEL:=meta-llama/Llama-Guard-3-1B}
provider_id: vllm-safety provider_id: vllm-safety
model_type: llm model_type: llm
shields: shields:
- shield_id: ${env.SAFETY_MODEL:meta-llama/Llama-Guard-3-1B} - shield_id: ${env.SAFETY_MODEL:=meta-llama/Llama-Guard-3-1B}
vector_dbs: [] vector_dbs: []
datasets: [] datasets: []
scoring_fns: [] scoring_fns: []

View file

@ -4,4 +4,4 @@
# This source code is licensed under the terms described in the LICENSE file in # This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree. # the root directory of this source tree.
from .agents import * # noqa: F401 F403 from .agents import *

View file

@ -44,10 +44,55 @@ OpenAIResponseInputMessageContent = Annotated[
register_schema(OpenAIResponseInputMessageContent, name="OpenAIResponseInputMessageContent") register_schema(OpenAIResponseInputMessageContent, name="OpenAIResponseInputMessageContent")
@json_schema_type
class OpenAIResponseAnnotationFileCitation(BaseModel):
type: Literal["file_citation"] = "file_citation"
file_id: str
filename: str
index: int
@json_schema_type
class OpenAIResponseAnnotationCitation(BaseModel):
type: Literal["url_citation"] = "url_citation"
end_index: int
start_index: int
title: str
url: str
@json_schema_type
class OpenAIResponseAnnotationContainerFileCitation(BaseModel):
type: Literal["container_file_citation"] = "container_file_citation"
container_id: str
end_index: int
file_id: str
filename: str
start_index: int
@json_schema_type
class OpenAIResponseAnnotationFilePath(BaseModel):
type: Literal["file_path"] = "file_path"
file_id: str
index: int
OpenAIResponseAnnotations = Annotated[
OpenAIResponseAnnotationFileCitation
| OpenAIResponseAnnotationCitation
| OpenAIResponseAnnotationContainerFileCitation
| OpenAIResponseAnnotationFilePath,
Field(discriminator="type"),
]
register_schema(OpenAIResponseAnnotations, name="OpenAIResponseAnnotations")
@json_schema_type @json_schema_type
class OpenAIResponseOutputMessageContentOutputText(BaseModel): class OpenAIResponseOutputMessageContentOutputText(BaseModel):
text: str text: str
type: Literal["output_text"] = "output_text" type: Literal["output_text"] = "output_text"
annotations: list[OpenAIResponseAnnotations] = Field(default_factory=list)
OpenAIResponseOutputMessageContent = Annotated[ OpenAIResponseOutputMessageContent = Annotated[
@ -384,9 +429,16 @@ OpenAIResponseInput = Annotated[
register_schema(OpenAIResponseInput, name="OpenAIResponseInput") register_schema(OpenAIResponseInput, name="OpenAIResponseInput")
# Must match type Literals of OpenAIResponseInputToolWebSearch below
WebSearchToolTypes = ["web_search", "web_search_preview", "web_search_preview_2025_03_11"]
@json_schema_type @json_schema_type
class OpenAIResponseInputToolWebSearch(BaseModel): class OpenAIResponseInputToolWebSearch(BaseModel):
type: Literal["web_search"] | Literal["web_search_preview_2025_03_11"] = "web_search" # Must match values of WebSearchToolTypes above
type: Literal["web_search"] | Literal["web_search_preview"] | Literal["web_search_preview_2025_03_11"] = (
"web_search"
)
# TODO: actually use search_context_size somewhere... # TODO: actually use search_context_size somewhere...
search_context_size: str | None = Field(default="medium", pattern="^low|medium|high$") search_context_size: str | None = Field(default="medium", pattern="^low|medium|high$")
# TODO: add user_location # TODO: add user_location

View file

@ -4,4 +4,4 @@
# This source code is licensed under the terms described in the LICENSE file in # This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree. # the root directory of this source tree.
from .batch_inference import * # noqa: F401 F403 from .batch_inference import *

View file

@ -4,4 +4,4 @@
# This source code is licensed under the terms described in the LICENSE file in # This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree. # the root directory of this source tree.
from .benchmarks import * # noqa: F401 F403 from .benchmarks import *

View file

@ -4,4 +4,4 @@
# This source code is licensed under the terms described in the LICENSE file in # This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree. # the root directory of this source tree.
from .datasetio import * # noqa: F401 F403 from .datasetio import *

View file

@ -4,4 +4,4 @@
# This source code is licensed under the terms described in the LICENSE file in # This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree. # the root directory of this source tree.
from .datasets import * # noqa: F401 F403 from .datasets import *

View file

@ -4,7 +4,7 @@
# This source code is licensed under the terms described in the LICENSE file in # This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree. # the root directory of this source tree.
from enum import Enum from enum import Enum, StrEnum
from typing import Annotated, Any, Literal, Protocol from typing import Annotated, Any, Literal, Protocol
from pydantic import BaseModel, Field from pydantic import BaseModel, Field
@ -13,7 +13,7 @@ from llama_stack.apis.resource import Resource, ResourceType
from llama_stack.schema_utils import json_schema_type, register_schema, webmethod from llama_stack.schema_utils import json_schema_type, register_schema, webmethod
class DatasetPurpose(str, Enum): class DatasetPurpose(StrEnum):
""" """
Purpose of the dataset. Each purpose has a required input data schema. Purpose of the dataset. Each purpose has a required input data schema.

View file

@ -4,4 +4,4 @@
# This source code is licensed under the terms described in the LICENSE file in # This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree. # the root directory of this source tree.
from .eval import * # noqa: F401 F403 from .eval import *

View file

@ -4,4 +4,4 @@
# This source code is licensed under the terms described in the LICENSE file in # This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree. # the root directory of this source tree.
from .files import * # noqa: F401 F403 from .files import *

View file

@ -4,7 +4,7 @@
# This source code is licensed under the terms described in the LICENSE file in # This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree. # the root directory of this source tree.
from enum import Enum from enum import StrEnum
from typing import Annotated, Literal, Protocol, runtime_checkable from typing import Annotated, Literal, Protocol, runtime_checkable
from fastapi import File, Form, Response, UploadFile from fastapi import File, Form, Response, UploadFile
@ -16,7 +16,7 @@ from llama_stack.schema_utils import json_schema_type, webmethod
# OpenAI Files API Models # OpenAI Files API Models
class OpenAIFilePurpose(str, Enum): class OpenAIFilePurpose(StrEnum):
""" """
Valid purpose values for OpenAI Files API. Valid purpose values for OpenAI Files API.
""" """

View file

@ -4,4 +4,4 @@
# This source code is licensed under the terms described in the LICENSE file in # This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree. # the root directory of this source tree.
from .inference import * # noqa: F401 F403 from .inference import *

View file

@ -20,7 +20,7 @@ from typing_extensions import TypedDict
from llama_stack.apis.common.content_types import ContentDelta, InterleavedContent, InterleavedContentItem from llama_stack.apis.common.content_types import ContentDelta, InterleavedContent, InterleavedContentItem
from llama_stack.apis.common.responses import Order from llama_stack.apis.common.responses import Order
from llama_stack.apis.models import Model from llama_stack.apis.models import Model
from llama_stack.apis.telemetry.telemetry import MetricResponseMixin from llama_stack.apis.telemetry import MetricResponseMixin
from llama_stack.models.llama.datatypes import ( from llama_stack.models.llama.datatypes import (
BuiltinTool, BuiltinTool,
StopReason, StopReason,

View file

@ -4,4 +4,4 @@
# This source code is licensed under the terms described in the LICENSE file in # This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree. # the root directory of this source tree.
from .inspect import * # noqa: F401 F403 from .inspect import *

View file

@ -4,4 +4,4 @@
# This source code is licensed under the terms described in the LICENSE file in # This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree. # the root directory of this source tree.
from .models import * # noqa: F401 F403 from .models import *

View file

@ -4,7 +4,7 @@
# This source code is licensed under the terms described in the LICENSE file in # This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree. # the root directory of this source tree.
from enum import Enum from enum import StrEnum
from typing import Any, Literal, Protocol, runtime_checkable from typing import Any, Literal, Protocol, runtime_checkable
from pydantic import BaseModel, ConfigDict, Field from pydantic import BaseModel, ConfigDict, Field
@ -22,7 +22,7 @@ class CommonModelFields(BaseModel):
@json_schema_type @json_schema_type
class ModelType(str, Enum): class ModelType(StrEnum):
llm = "llm" llm = "llm"
embedding = "embedding" embedding = "embedding"

View file

@ -4,4 +4,4 @@
# This source code is licensed under the terms described in the LICENSE file in # This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree. # the root directory of this source tree.
from .post_training import * # noqa: F401 F403 from .post_training import *

View file

@ -4,4 +4,4 @@
# This source code is licensed under the terms described in the LICENSE file in # This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree. # the root directory of this source tree.
from .providers import * # noqa: F401 F403 from .providers import *

View file

@ -4,4 +4,4 @@
# This source code is licensed under the terms described in the LICENSE file in # This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree. # the root directory of this source tree.
from .safety import * # noqa: F401 F403 from .safety import *

View file

@ -4,4 +4,4 @@
# This source code is licensed under the terms described in the LICENSE file in # This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree. # the root directory of this source tree.
from .scoring import * # noqa: F401 F403 from .scoring import *

View file

@ -4,4 +4,4 @@
# This source code is licensed under the terms described in the LICENSE file in # This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree. # the root directory of this source tree.
from .scoring_functions import * # noqa: F401 F403 from .scoring_functions import *

View file

@ -4,4 +4,4 @@
# This source code is licensed under the terms described in the LICENSE file in # This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree. # the root directory of this source tree.
from .shields import * # noqa: F401 F403 from .shields import *

View file

@ -4,4 +4,4 @@
# This source code is licensed under the terms described in the LICENSE file in # This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree. # the root directory of this source tree.
from .synthetic_data_generation import * # noqa: F401 F403 from .synthetic_data_generation import *

View file

@ -4,4 +4,4 @@
# This source code is licensed under the terms described in the LICENSE file in # This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree. # the root directory of this source tree.
from .telemetry import * # noqa: F401 F403 from .telemetry import *

View file

@ -4,5 +4,5 @@
# This source code is licensed under the terms described in the LICENSE file in # This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree. # the root directory of this source tree.
from .rag_tool import * # noqa: F401 F403 from .rag_tool import *
from .tools import * # noqa: F401 F403 from .tools import *

View file

@ -4,4 +4,4 @@
# This source code is licensed under the terms described in the LICENSE file in # This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree. # the root directory of this source tree.
from .vector_dbs import * # noqa: F401 F403 from .vector_dbs import *

View file

@ -4,4 +4,4 @@
# This source code is licensed under the terms described in the LICENSE file in # This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree. # the root directory of this source tree.
from .vector_io import * # noqa: F401 F403 from .vector_io import *

View file

@ -4,7 +4,7 @@
# This source code is licensed under the terms described in the LICENSE file in # This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree. # the root directory of this source tree.
from enum import Enum from enum import StrEnum
from typing import Self from typing import Self
from pydantic import BaseModel, model_validator from pydantic import BaseModel, model_validator
@ -12,7 +12,7 @@ from pydantic import BaseModel, model_validator
from .conditions import parse_conditions from .conditions import parse_conditions
class Action(str, Enum): class Action(StrEnum):
CREATE = "create" CREATE = "create"
READ = "read" READ = "read"
UPDATE = "update" UPDATE = "update"

View file

@ -4,7 +4,7 @@
# This source code is licensed under the terms described in the LICENSE file in # This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree. # the root directory of this source tree.
from enum import Enum from enum import StrEnum
from pathlib import Path from pathlib import Path
from typing import Annotated, Any from typing import Annotated, Any
@ -29,8 +29,8 @@ from llama_stack.providers.datatypes import Api, ProviderSpec
from llama_stack.providers.utils.kvstore.config import KVStoreConfig, SqliteKVStoreConfig from llama_stack.providers.utils.kvstore.config import KVStoreConfig, SqliteKVStoreConfig
from llama_stack.providers.utils.sqlstore.sqlstore import SqlStoreConfig from llama_stack.providers.utils.sqlstore.sqlstore import SqlStoreConfig
LLAMA_STACK_BUILD_CONFIG_VERSION = "2" LLAMA_STACK_BUILD_CONFIG_VERSION = 2
LLAMA_STACK_RUN_CONFIG_VERSION = "2" LLAMA_STACK_RUN_CONFIG_VERSION = 2
RoutingKey = str | list[str] RoutingKey = str | list[str]
@ -159,7 +159,7 @@ class LoggingConfig(BaseModel):
) )
class AuthProviderType(str, Enum): class AuthProviderType(StrEnum):
"""Supported authentication provider types.""" """Supported authentication provider types."""
OAUTH2_TOKEN = "oauth2_token" OAUTH2_TOKEN = "oauth2_token"
@ -182,7 +182,7 @@ class AuthenticationRequiredError(Exception):
pass pass
class QuotaPeriod(str, Enum): class QuotaPeriod(StrEnum):
DAY = "day" DAY = "day"
@ -229,7 +229,7 @@ class ServerConfig(BaseModel):
class StackRunConfig(BaseModel): class StackRunConfig(BaseModel):
version: str = LLAMA_STACK_RUN_CONFIG_VERSION version: int = LLAMA_STACK_RUN_CONFIG_VERSION
image_name: str = Field( image_name: str = Field(
..., ...,
@ -300,7 +300,7 @@ a default SQLite store will be used.""",
class BuildConfig(BaseModel): class BuildConfig(BaseModel):
version: str = LLAMA_STACK_BUILD_CONFIG_VERSION version: int = LLAMA_STACK_BUILD_CONFIG_VERSION
distribution_spec: DistributionSpec = Field(description="The distribution spec to build including API providers. ") distribution_spec: DistributionSpec = Field(description="The distribution spec to build including API providers. ")
image_type: str = Field( image_type: str = Field(

View file

@ -30,7 +30,13 @@ from llama_stack.apis.inference import (
ListOpenAIChatCompletionResponse, ListOpenAIChatCompletionResponse,
LogProbConfig, LogProbConfig,
Message, Message,
OpenAIChatCompletion,
OpenAIChatCompletionChunk,
OpenAICompletion,
OpenAICompletionWithInputMessages, OpenAICompletionWithInputMessages,
OpenAIEmbeddingsResponse,
OpenAIMessageParam,
OpenAIResponseFormatParam,
Order, Order,
ResponseFormat, ResponseFormat,
SamplingParams, SamplingParams,
@ -41,14 +47,6 @@ from llama_stack.apis.inference import (
ToolDefinition, ToolDefinition,
ToolPromptFormat, ToolPromptFormat,
) )
from llama_stack.apis.inference.inference import (
OpenAIChatCompletion,
OpenAIChatCompletionChunk,
OpenAICompletion,
OpenAIEmbeddingsResponse,
OpenAIMessageParam,
OpenAIResponseFormatParam,
)
from llama_stack.apis.models import Model, ModelType from llama_stack.apis.models import Model, ModelType
from llama_stack.apis.telemetry import MetricEvent, MetricInResponse, Telemetry from llama_stack.apis.telemetry import MetricEvent, MetricInResponse, Telemetry
from llama_stack.log import get_logger from llama_stack.log import get_logger

View file

@ -16,17 +16,15 @@ from llama_stack.apis.vector_io import (
QueryChunksResponse, QueryChunksResponse,
SearchRankingOptions, SearchRankingOptions,
VectorIO, VectorIO,
VectorStoreDeleteResponse,
VectorStoreListResponse,
VectorStoreObject,
VectorStoreSearchResponsePage,
)
from llama_stack.apis.vector_io.vector_io import (
VectorStoreChunkingStrategy, VectorStoreChunkingStrategy,
VectorStoreDeleteResponse,
VectorStoreFileContentsResponse, VectorStoreFileContentsResponse,
VectorStoreFileDeleteResponse, VectorStoreFileDeleteResponse,
VectorStoreFileObject, VectorStoreFileObject,
VectorStoreFileStatus, VectorStoreFileStatus,
VectorStoreListResponse,
VectorStoreObject,
VectorStoreSearchResponsePage,
) )
from llama_stack.log import get_logger from llama_stack.log import get_logger
from llama_stack.providers.datatypes import HealthResponse, HealthStatus, RoutingTable from llama_stack.providers.datatypes import HealthResponse, HealthStatus, RoutingTable

View file

@ -127,7 +127,12 @@ class EnvVarError(Exception):
def __init__(self, var_name: str, path: str = ""): def __init__(self, var_name: str, path: str = ""):
self.var_name = var_name self.var_name = var_name
self.path = path self.path = path
super().__init__(f"Environment variable '{var_name}' not set or empty{f' at {path}' if path else ''}") super().__init__(
f"Environment variable '{var_name}' not set or empty {f'at {path}' if path else ''}. "
f"Use ${{env.{var_name}:=default_value}} to provide a default value, "
f"${{env.{var_name}:+value_if_set}} to make the field conditional, "
f"or ensure the environment variable is set."
)
def replace_env_vars(config: Any, path: str = "") -> Any: def replace_env_vars(config: Any, path: str = "") -> Any:
@ -150,25 +155,27 @@ def replace_env_vars(config: Any, path: str = "") -> Any:
return result return result
elif isinstance(config, str): elif isinstance(config, str):
# Updated pattern to support both default values (:) and conditional values (+) # Pattern supports bash-like syntax: := for default and :+ for conditional and a optional value
pattern = r"\${env\.([A-Z0-9_]+)(?:([:\+])([^}]*))?}" pattern = r"\${env\.([A-Z0-9_]+)(?::([=+])([^}]*))?}"
def get_env_var(match): def get_env_var(match: re.Match):
env_var = match.group(1) env_var = match.group(1)
operator = match.group(2) # ':' for default, '+' for conditional operator = match.group(2) # '=' for default, '+' for conditional
value_expr = match.group(3) value_expr = match.group(3)
env_value = os.environ.get(env_var) env_value = os.environ.get(env_var)
if operator == ":": # Default value syntax: ${env.FOO:default} if operator == "=": # Default value syntax: ${env.FOO:=default}
if not env_value: if not env_value:
if value_expr is None: # value_expr returns empty string (not None) when not matched
# This means ${env.FOO:=} is an error
if value_expr == "":
raise EnvVarError(env_var, path) raise EnvVarError(env_var, path)
else: else:
value = value_expr value = value_expr
else: else:
value = env_value value = env_value
elif operator == "+": # Conditional value syntax: ${env.FOO+value_if_set} elif operator == "+": # Conditional value syntax: ${env.FOO:+value_if_set}
if env_value: if env_value:
value = value_expr value = value_expr
else: else:
@ -183,13 +190,42 @@ def replace_env_vars(config: Any, path: str = "") -> Any:
return os.path.expanduser(value) return os.path.expanduser(value)
try: try:
return re.sub(pattern, get_env_var, config) result = re.sub(pattern, get_env_var, config)
return _convert_string_to_proper_type(result)
except EnvVarError as e: except EnvVarError as e:
raise EnvVarError(e.var_name, e.path) from None raise EnvVarError(e.var_name, e.path) from None
return config return config
def _convert_string_to_proper_type(value: str) -> Any:
# This might be tricky depending on what the config type is, if 'str | None' we are
# good, if 'str' we need to keep the empty string... 'str | None' is more common and
# providers config should be typed this way.
# TODO: we could try to load the config class and see if the config has a field with type 'str | None'
# and then convert the empty string to None or not
if value == "":
return None
lowered = value.lower()
if lowered == "true":
return True
elif lowered == "false":
return False
try:
return int(value)
except ValueError:
pass
try:
return float(value)
except ValueError:
pass
return value
def validate_env_pair(env_pair: str) -> tuple[str, str]: def validate_env_pair(env_pair: str) -> tuple[str, str]:
"""Validate and split an environment variable key-value pair.""" """Validate and split an environment variable key-value pair."""
try: try:

View file

@ -25,7 +25,7 @@ class LlamaStackApi:
def run_scoring(self, row, scoring_function_ids: list[str], scoring_params: dict | None): def run_scoring(self, row, scoring_function_ids: list[str], scoring_params: dict | None):
"""Run scoring on a single row""" """Run scoring on a single row"""
if not scoring_params: if not scoring_params:
scoring_params = {fn_id: None for fn_id in scoring_function_ids} scoring_params = dict.fromkeys(scoring_function_ids)
return self.client.scoring.score(input_rows=[row], scoring_functions=scoring_params) return self.client.scoring.score(input_rows=[row], scoring_functions=scoring_params)

View file

@ -33,7 +33,7 @@ CATEGORIES = [
] ]
# Initialize category levels with default level # Initialize category levels with default level
_category_levels: dict[str, int] = {category: DEFAULT_LOG_LEVEL for category in CATEGORIES} _category_levels: dict[str, int] = dict.fromkeys(CATEGORIES, DEFAULT_LOG_LEVEL)
def config_to_category_levels(category: str, level: str): def config_to_category_levels(category: str, level: str):

View file

@ -5,7 +5,7 @@
# the root directory of this source tree. # the root directory of this source tree.
import base64 import base64
from enum import Enum from enum import Enum, StrEnum
from io import BytesIO from io import BytesIO
from typing import Annotated, Any, Literal from typing import Annotated, Any, Literal
@ -171,7 +171,7 @@ class GenerationResult(BaseModel):
ignore_token: bool ignore_token: bool
class QuantizationMode(str, Enum): class QuantizationMode(StrEnum):
none = "none" none = "none"
fp8_mixed = "fp8_mixed" fp8_mixed = "fp8_mixed"
int4_mixed = "int4_mixed" int4_mixed = "int4_mixed"

View file

@ -4,7 +4,7 @@
# This source code is licensed under the terms described in the LICENSE file in # This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree. # the root directory of this source tree.
from enum import Enum from enum import StrEnum
from typing import Any, Protocol from typing import Any, Protocol
from urllib.parse import urlparse from urllib.parse import urlparse
@ -225,7 +225,7 @@ def remote_provider_spec(
) )
class HealthStatus(str, Enum): class HealthStatus(StrEnum):
OK = "OK" OK = "OK"
ERROR = "Error" ERROR = "Error"
NOT_IMPLEMENTED = "Not Implemented" NOT_IMPLEMENTED = "Not Implemented"

View file

@ -42,9 +42,10 @@ from llama_stack.apis.agents.openai_responses import (
OpenAIResponseOutputMessageWebSearchToolCall, OpenAIResponseOutputMessageWebSearchToolCall,
OpenAIResponseText, OpenAIResponseText,
OpenAIResponseTextFormat, OpenAIResponseTextFormat,
WebSearchToolTypes,
) )
from llama_stack.apis.common.content_types import TextContentItem from llama_stack.apis.common.content_types import TextContentItem
from llama_stack.apis.inference.inference import ( from llama_stack.apis.inference import (
Inference, Inference,
OpenAIAssistantMessageParam, OpenAIAssistantMessageParam,
OpenAIChatCompletion, OpenAIChatCompletion,
@ -583,7 +584,7 @@ class OpenAIResponsesImpl:
from llama_stack.apis.agents.openai_responses import ( from llama_stack.apis.agents.openai_responses import (
MCPListToolsTool, MCPListToolsTool,
) )
from llama_stack.apis.tools.tools import Tool from llama_stack.apis.tools import Tool
mcp_tool_to_server = {} mcp_tool_to_server = {}
@ -609,7 +610,7 @@ class OpenAIResponsesImpl:
# TODO: Handle other tool types # TODO: Handle other tool types
if input_tool.type == "function": if input_tool.type == "function":
chat_tools.append(ChatCompletionToolParam(type="function", function=input_tool.model_dump())) chat_tools.append(ChatCompletionToolParam(type="function", function=input_tool.model_dump()))
elif input_tool.type == "web_search": elif input_tool.type in WebSearchToolTypes:
tool_name = "web_search" tool_name = "web_search"
tool = await self.tool_groups_api.get_tool(tool_name) tool = await self.tool_groups_api.get_tool(tool_name)
if not tool: if not tool:

View file

@ -208,7 +208,7 @@ class MetaReferenceEvalImpl(
for scoring_fn_id in scoring_functions for scoring_fn_id in scoring_functions
} }
else: else:
scoring_functions_dict = {scoring_fn_id: None for scoring_fn_id in scoring_functions} scoring_functions_dict = dict.fromkeys(scoring_functions)
score_response = await self.scoring_api.score( score_response = await self.scoring_api.score(
input_rows=score_input_rows, scoring_functions=scoring_functions_dict input_rows=score_input_rows, scoring_functions=scoring_functions_dict

View file

@ -23,7 +23,7 @@ class LocalfsFilesImplConfig(BaseModel):
@classmethod @classmethod
def sample_run_config(cls, __distro_dir__: str) -> dict[str, Any]: def sample_run_config(cls, __distro_dir__: str) -> dict[str, Any]:
return { return {
"storage_dir": "${env.FILES_STORAGE_DIR:" + __distro_dir__ + "/files}", "storage_dir": "${env.FILES_STORAGE_DIR:=" + __distro_dir__ + "/files}",
"metadata_store": SqliteSqlStoreConfig.sample_run_config( "metadata_store": SqliteSqlStoreConfig.sample_run_config(
__distro_dir__=__distro_dir__, __distro_dir__=__distro_dir__,
db_name="files_metadata.db", db_name="files_metadata.db",

View file

@ -49,11 +49,11 @@ class MetaReferenceInferenceConfig(BaseModel):
def sample_run_config( def sample_run_config(
cls, cls,
model: str = "Llama3.2-3B-Instruct", model: str = "Llama3.2-3B-Instruct",
checkpoint_dir: str = "${env.CHECKPOINT_DIR:null}", checkpoint_dir: str = "${env.CHECKPOINT_DIR:=null}",
quantization_type: str = "${env.QUANTIZATION_TYPE:bf16}", quantization_type: str = "${env.QUANTIZATION_TYPE:=bf16}",
model_parallel_size: str = "${env.MODEL_PARALLEL_SIZE:0}", model_parallel_size: str = "${env.MODEL_PARALLEL_SIZE:=0}",
max_batch_size: str = "${env.MAX_BATCH_SIZE:1}", max_batch_size: str = "${env.MAX_BATCH_SIZE:=1}",
max_seq_len: str = "${env.MAX_SEQ_LEN:4096}", max_seq_len: str = "${env.MAX_SEQ_LEN:=4096}",
**kwargs, **kwargs,
) -> dict[str, Any]: ) -> dict[str, Any]:
return { return {

View file

@ -44,10 +44,10 @@ class VLLMConfig(BaseModel):
@classmethod @classmethod
def sample_run_config(cls, **kwargs: Any) -> dict[str, Any]: def sample_run_config(cls, **kwargs: Any) -> dict[str, Any]:
return { return {
"tensor_parallel_size": "${env.TENSOR_PARALLEL_SIZE:1}", "tensor_parallel_size": "${env.TENSOR_PARALLEL_SIZE:=1}",
"max_tokens": "${env.MAX_TOKENS:4096}", "max_tokens": "${env.MAX_TOKENS:=4096}",
"max_model_len": "${env.MAX_MODEL_LEN:4096}", "max_model_len": "${env.MAX_MODEL_LEN:=4096}",
"max_num_seqs": "${env.MAX_NUM_SEQS:4}", "max_num_seqs": "${env.MAX_NUM_SEQS:=4}",
"enforce_eager": "${env.ENFORCE_EAGER:False}", "enforce_eager": "${env.ENFORCE_EAGER:=False}",
"gpu_memory_utilization": "${env.GPU_MEMORY_UTILIZATION:0.3}", "gpu_memory_utilization": "${env.GPU_MEMORY_UTILIZATION:=0.3}",
} }

View file

@ -17,5 +17,5 @@ class BraintrustScoringConfig(BaseModel):
@classmethod @classmethod
def sample_run_config(cls, **kwargs) -> dict[str, Any]: def sample_run_config(cls, **kwargs) -> dict[str, Any]:
return { return {
"openai_api_key": "${env.OPENAI_API_KEY:}", "openai_api_key": "${env.OPENAI_API_KEY:+}",
} }

View file

@ -7,7 +7,7 @@ from typing import Any
from llama_stack.apis.datasetio import DatasetIO from llama_stack.apis.datasetio import DatasetIO
from llama_stack.apis.datasets import Datasets from llama_stack.apis.datasets import Datasets
from llama_stack.apis.inference.inference import Inference from llama_stack.apis.inference import Inference
from llama_stack.apis.scoring import ( from llama_stack.apis.scoring import (
ScoreBatchResponse, ScoreBatchResponse,
ScoreResponse, ScoreResponse,

View file

@ -6,7 +6,7 @@
import re import re
from typing import Any from typing import Any
from llama_stack.apis.inference.inference import Inference, UserMessage from llama_stack.apis.inference import Inference, UserMessage
from llama_stack.apis.scoring import ScoringResultRow from llama_stack.apis.scoring import ScoringResultRow
from llama_stack.apis.scoring_functions import ScoringFnParams from llama_stack.apis.scoring_functions import ScoringFnParams
from llama_stack.providers.utils.scoring.base_scoring_fn import RegisteredBaseScoringFn from llama_stack.providers.utils.scoring.base_scoring_fn import RegisteredBaseScoringFn

View file

@ -4,7 +4,7 @@
# This source code is licensed under the terms described in the LICENSE file in # This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree. # the root directory of this source tree.
from enum import Enum from enum import StrEnum
from typing import Any from typing import Any
from pydantic import BaseModel, Field, field_validator from pydantic import BaseModel, Field, field_validator
@ -12,7 +12,7 @@ from pydantic import BaseModel, Field, field_validator
from llama_stack.distribution.utils.config_dirs import RUNTIME_BASE_DIR from llama_stack.distribution.utils.config_dirs import RUNTIME_BASE_DIR
class TelemetrySink(str, Enum): class TelemetrySink(StrEnum):
OTEL_TRACE = "otel_trace" OTEL_TRACE = "otel_trace"
OTEL_METRIC = "otel_metric" OTEL_METRIC = "otel_metric"
SQLITE = "sqlite" SQLITE = "sqlite"
@ -20,12 +20,12 @@ class TelemetrySink(str, Enum):
class TelemetryConfig(BaseModel): class TelemetryConfig(BaseModel):
otel_trace_endpoint: str = Field( otel_trace_endpoint: str | None = Field(
default="http://localhost:4318/v1/traces", default=None,
description="The OpenTelemetry collector endpoint URL for traces", description="The OpenTelemetry collector endpoint URL for traces",
) )
otel_metric_endpoint: str = Field( otel_metric_endpoint: str | None = Field(
default="http://localhost:4318/v1/metrics", default=None,
description="The OpenTelemetry collector endpoint URL for metrics", description="The OpenTelemetry collector endpoint URL for metrics",
) )
service_name: str = Field( service_name: str = Field(
@ -52,7 +52,7 @@ class TelemetryConfig(BaseModel):
@classmethod @classmethod
def sample_run_config(cls, __distro_dir__: str, db_name: str = "trace_store.db") -> dict[str, Any]: def sample_run_config(cls, __distro_dir__: str, db_name: str = "trace_store.db") -> dict[str, Any]:
return { return {
"service_name": "${env.OTEL_SERVICE_NAME:\u200b}", "service_name": "${env.OTEL_SERVICE_NAME:=\u200b}",
"sinks": "${env.TELEMETRY_SINKS:console,sqlite}", "sinks": "${env.TELEMETRY_SINKS:=console,sqlite}",
"sqlite_db_path": "${env.SQLITE_STORE_DIR:" + __distro_dir__ + "}/" + db_name, "sqlite_db_path": "${env.SQLITE_STORE_DIR:=" + __distro_dir__ + "}/" + db_name,
} }

View file

@ -14,6 +14,8 @@ from opentelemetry.sdk.trace import SpanProcessor
from opentelemetry.trace import Span from opentelemetry.trace import Span
from opentelemetry.trace.span import format_span_id, format_trace_id from opentelemetry.trace.span import format_span_id, format_trace_id
from llama_stack.providers.utils.telemetry.tracing import LOCAL_ROOT_SPAN_MARKER
class SQLiteSpanProcessor(SpanProcessor): class SQLiteSpanProcessor(SpanProcessor):
def __init__(self, conn_string): def __init__(self, conn_string):
@ -124,7 +126,7 @@ class SQLiteSpanProcessor(SpanProcessor):
( (
trace_id, trace_id,
service_name, service_name,
(span_id if span.attributes.get("__root_span__") == "true" else None), (span_id if span.attributes.get(LOCAL_ROOT_SPAN_MARKER) else None),
datetime.fromtimestamp(span.start_time / 1e9, UTC).isoformat(), datetime.fromtimestamp(span.start_time / 1e9, UTC).isoformat(),
datetime.fromtimestamp(span.end_time / 1e9, UTC).isoformat(), datetime.fromtimestamp(span.end_time / 1e9, UTC).isoformat(),
), ),

View file

@ -87,12 +87,16 @@ class TelemetryAdapter(TelemetryDatasetMixin, Telemetry):
trace.set_tracer_provider(provider) trace.set_tracer_provider(provider)
_TRACER_PROVIDER = provider _TRACER_PROVIDER = provider
if TelemetrySink.OTEL_TRACE in self.config.sinks: if TelemetrySink.OTEL_TRACE in self.config.sinks:
if self.config.otel_trace_endpoint is None:
raise ValueError("otel_trace_endpoint is required when OTEL_TRACE is enabled")
span_exporter = OTLPSpanExporter( span_exporter = OTLPSpanExporter(
endpoint=self.config.otel_trace_endpoint, endpoint=self.config.otel_trace_endpoint,
) )
span_processor = BatchSpanProcessor(span_exporter) span_processor = BatchSpanProcessor(span_exporter)
trace.get_tracer_provider().add_span_processor(span_processor) trace.get_tracer_provider().add_span_processor(span_processor)
if TelemetrySink.OTEL_METRIC in self.config.sinks: if TelemetrySink.OTEL_METRIC in self.config.sinks:
if self.config.otel_metric_endpoint is None:
raise ValueError("otel_metric_endpoint is required when OTEL_METRIC is enabled")
metric_reader = PeriodicExportingMetricReader( metric_reader = PeriodicExportingMetricReader(
OTLPMetricExporter( OTLPMetricExporter(
endpoint=self.config.otel_metric_endpoint, endpoint=self.config.otel_metric_endpoint,

View file

@ -16,8 +16,7 @@ import numpy as np
from numpy.typing import NDArray from numpy.typing import NDArray
from llama_stack.apis.files import Files from llama_stack.apis.files import Files
from llama_stack.apis.inference import InterleavedContent from llama_stack.apis.inference import Inference, InterleavedContent
from llama_stack.apis.inference.inference import Inference
from llama_stack.apis.vector_dbs import VectorDB from llama_stack.apis.vector_dbs import VectorDB
from llama_stack.apis.vector_io import ( from llama_stack.apis.vector_io import (
Chunk, Chunk,

View file

@ -19,5 +19,5 @@ class QdrantVectorIOConfig(BaseModel):
@classmethod @classmethod
def sample_run_config(cls, __distro_dir__: str) -> dict[str, Any]: def sample_run_config(cls, __distro_dir__: str) -> dict[str, Any]:
return { return {
"path": "${env.QDRANT_PATH:~/.llama/" + __distro_dir__ + "}/" + "qdrant.db", "path": "${env.QDRANT_PATH:=~/.llama/" + __distro_dir__ + "}/" + "qdrant.db",
} }

View file

@ -15,5 +15,5 @@ class SQLiteVectorIOConfig(BaseModel):
@classmethod @classmethod
def sample_run_config(cls, __distro_dir__: str) -> dict[str, Any]: def sample_run_config(cls, __distro_dir__: str) -> dict[str, Any]:
return { return {
"db_path": "${env.SQLITE_STORE_DIR:" + __distro_dir__ + "}/" + "sqlite_vec.db", "db_path": "${env.SQLITE_STORE_DIR:=" + __distro_dir__ + "}/" + "sqlite_vec.db",
} }

View file

@ -15,8 +15,8 @@ import numpy as np
import sqlite_vec import sqlite_vec
from numpy.typing import NDArray from numpy.typing import NDArray
from llama_stack.apis.files.files import Files from llama_stack.apis.files import Files
from llama_stack.apis.inference.inference import Inference from llama_stack.apis.inference import Inference
from llama_stack.apis.vector_dbs import VectorDB from llama_stack.apis.vector_dbs import VectorDB
from llama_stack.apis.vector_io import ( from llama_stack.apis.vector_io import (
Chunk, Chunk,
@ -64,7 +64,7 @@ def _normalize_scores(scores: dict[str, float]) -> dict[str, float]:
score_range = max_score - min_score score_range = max_score - min_score
if score_range > 0: if score_range > 0:
return {doc_id: (score - min_score) / score_range for doc_id, score in scores.items()} return {doc_id: (score - min_score) / score_range for doc_id, score in scores.items()}
return {doc_id: 1.0 for doc_id in scores} return dict.fromkeys(scores, 1.0)
def _weighted_rerank( def _weighted_rerank(

View file

@ -70,7 +70,7 @@ def available_providers() -> list[ProviderSpec]:
api=Api.inference, api=Api.inference,
adapter=AdapterSpec( adapter=AdapterSpec(
adapter_type="ollama", adapter_type="ollama",
pip_packages=["ollama", "aiohttp"], pip_packages=["ollama", "aiohttp", "h11>=0.16.0"],
config_class="llama_stack.providers.remote.inference.ollama.OllamaImplConfig", config_class="llama_stack.providers.remote.inference.ollama.OllamaImplConfig",
module="llama_stack.providers.remote.inference.ollama", module="llama_stack.providers.remote.inference.ollama",
), ),

View file

@ -67,7 +67,7 @@ def available_providers() -> list[ProviderSpec]:
api=Api.safety, api=Api.safety,
adapter=AdapterSpec( adapter=AdapterSpec(
adapter_type="sambanova", adapter_type="sambanova",
pip_packages=["litellm"], pip_packages=["litellm", "requests"],
module="llama_stack.providers.remote.safety.sambanova", module="llama_stack.providers.remote.safety.sambanova",
config_class="llama_stack.providers.remote.safety.sambanova.SambaNovaSafetyConfig", config_class="llama_stack.providers.remote.safety.sambanova.SambaNovaSafetyConfig",
provider_data_validator="llama_stack.providers.remote.safety.sambanova.config.SambaNovaProviderDataValidator", provider_data_validator="llama_stack.providers.remote.safety.sambanova.config.SambaNovaProviderDataValidator",

View file

@ -13,7 +13,7 @@ def available_providers() -> list[ProviderSpec]:
InlineProviderSpec( InlineProviderSpec(
api=Api.scoring, api=Api.scoring,
provider_type="inline::basic", provider_type="inline::basic",
pip_packages=[], pip_packages=["requests"],
module="llama_stack.providers.inline.scoring.basic", module="llama_stack.providers.inline.scoring.basic",
config_class="llama_stack.providers.inline.scoring.basic.BasicScoringConfig", config_class="llama_stack.providers.inline.scoring.basic.BasicScoringConfig",
api_dependencies=[ api_dependencies=[

View file

@ -54,8 +54,8 @@ class NvidiaDatasetIOConfig(BaseModel):
@classmethod @classmethod
def sample_run_config(cls, **kwargs) -> dict[str, Any]: def sample_run_config(cls, **kwargs) -> dict[str, Any]:
return { return {
"api_key": "${env.NVIDIA_API_KEY:}", "api_key": "${env.NVIDIA_API_KEY:+}",
"dataset_namespace": "${env.NVIDIA_DATASET_NAMESPACE:default}", "dataset_namespace": "${env.NVIDIA_DATASET_NAMESPACE:=default}",
"project_id": "${env.NVIDIA_PROJECT_ID:test-project}", "project_id": "${env.NVIDIA_PROJECT_ID:=test-project}",
"datasets_url": "${env.NVIDIA_DATASETS_URL:http://nemo.test}", "datasets_url": "${env.NVIDIA_DATASETS_URL:=http://nemo.test}",
} }

View file

@ -66,7 +66,7 @@ class NvidiaDatasetIOAdapter:
Returns: Returns:
Dataset Dataset
""" """
## add warnings for unsupported params # add warnings for unsupported params
request_body = { request_body = {
"name": dataset_def.identifier, "name": dataset_def.identifier,
"namespace": self.config.dataset_namespace, "namespace": self.config.dataset_namespace,

View file

@ -25,5 +25,5 @@ class NVIDIAEvalConfig(BaseModel):
@classmethod @classmethod
def sample_run_config(cls, **kwargs) -> dict[str, Any]: def sample_run_config(cls, **kwargs) -> dict[str, Any]:
return { return {
"evaluator_url": "${env.NVIDIA_EVALUATOR_URL:http://localhost:7331}", "evaluator_url": "${env.NVIDIA_EVALUATOR_URL:=http://localhost:7331}",
} }

View file

@ -4,7 +4,7 @@
# This source code is licensed under the terms described in the LICENSE file in # This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree. # the root directory of this source tree.
from llama_stack.apis.models.models import ModelType from llama_stack.apis.models import ModelType
from llama_stack.providers.utils.inference.model_registry import ( from llama_stack.providers.utils.inference.model_registry import (
ProviderModelEntry, ProviderModelEntry,
) )

View file

@ -24,6 +24,12 @@ from llama_stack.apis.inference import (
Inference, Inference,
LogProbConfig, LogProbConfig,
Message, Message,
OpenAIChatCompletion,
OpenAIChatCompletionChunk,
OpenAICompletion,
OpenAIEmbeddingsResponse,
OpenAIMessageParam,
OpenAIResponseFormatParam,
ResponseFormat, ResponseFormat,
ResponseFormatType, ResponseFormatType,
SamplingParams, SamplingParams,
@ -33,14 +39,6 @@ from llama_stack.apis.inference import (
ToolDefinition, ToolDefinition,
ToolPromptFormat, ToolPromptFormat,
) )
from llama_stack.apis.inference.inference import (
OpenAIChatCompletion,
OpenAIChatCompletionChunk,
OpenAICompletion,
OpenAIEmbeddingsResponse,
OpenAIMessageParam,
OpenAIResponseFormatParam,
)
from llama_stack.distribution.request_headers import NeedsRequestProviderData from llama_stack.distribution.request_headers import NeedsRequestProviderData
from llama_stack.log import get_logger from llama_stack.log import get_logger
from llama_stack.providers.utils.inference.model_registry import ( from llama_stack.providers.utils.inference.model_registry import (

View file

@ -4,7 +4,7 @@
# This source code is licensed under the terms described in the LICENSE file in # This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree. # the root directory of this source tree.
from llama_stack.apis.models.models import ModelType from llama_stack.apis.models import ModelType
from llama_stack.models.llama.sku_types import CoreModelId from llama_stack.models.llama.sku_types import CoreModelId
from llama_stack.providers.utils.inference.model_registry import ( from llama_stack.providers.utils.inference.model_registry import (
ProviderModelEntry, ProviderModelEntry,

View file

@ -4,7 +4,7 @@
# This source code is licensed under the terms described in the LICENSE file in # This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree. # the root directory of this source tree.
from llama_stack.apis.models.models import ModelType from llama_stack.apis.models import ModelType
from llama_stack.providers.utils.inference.model_registry import ( from llama_stack.providers.utils.inference.model_registry import (
ProviderModelEntry, ProviderModelEntry,
) )

View file

@ -9,7 +9,7 @@ from typing import Any
from openai import AsyncOpenAI from openai import AsyncOpenAI
from llama_stack.apis.inference.inference import ( from llama_stack.apis.inference import (
OpenAIChatCompletion, OpenAIChatCompletion,
OpenAIChatCompletionChunk, OpenAIChatCompletionChunk,
OpenAIChoiceDelta, OpenAIChoiceDelta,

View file

@ -55,7 +55,7 @@ class NVIDIAConfig(BaseModel):
@classmethod @classmethod
def sample_run_config(cls, **kwargs) -> dict[str, Any]: def sample_run_config(cls, **kwargs) -> dict[str, Any]:
return { return {
"url": "${env.NVIDIA_BASE_URL:https://integrate.api.nvidia.com}", "url": "${env.NVIDIA_BASE_URL:=https://integrate.api.nvidia.com}",
"api_key": "${env.NVIDIA_API_KEY:}", "api_key": "${env.NVIDIA_API_KEY:+}",
"append_api_version": "${env.NVIDIA_APPEND_API_VERSION:True}", "append_api_version": "${env.NVIDIA_APPEND_API_VERSION:=True}",
} }

View file

@ -29,20 +29,18 @@ from llama_stack.apis.inference import (
Inference, Inference,
LogProbConfig, LogProbConfig,
Message, Message,
OpenAIChatCompletion,
OpenAIChatCompletionChunk,
OpenAICompletion,
OpenAIEmbeddingsResponse, OpenAIEmbeddingsResponse,
OpenAIMessageParam,
OpenAIResponseFormatParam,
ResponseFormat, ResponseFormat,
SamplingParams, SamplingParams,
TextTruncation, TextTruncation,
ToolChoice, ToolChoice,
ToolConfig, ToolConfig,
) )
from llama_stack.apis.inference.inference import (
OpenAIChatCompletion,
OpenAIChatCompletionChunk,
OpenAICompletion,
OpenAIMessageParam,
OpenAIResponseFormatParam,
)
from llama_stack.apis.models import Model, ModelType from llama_stack.apis.models import Model, ModelType
from llama_stack.models.llama.datatypes import ToolDefinition, ToolPromptFormat from llama_stack.models.llama.datatypes import ToolDefinition, ToolPromptFormat
from llama_stack.providers.utils.inference import ( from llama_stack.providers.utils.inference import (

View file

@ -17,7 +17,7 @@ class OllamaImplConfig(BaseModel):
@classmethod @classmethod
def sample_run_config( def sample_run_config(
cls, url: str = "${env.OLLAMA_URL:http://localhost:11434}", raise_on_connect_error: bool = True, **kwargs cls, url: str = "${env.OLLAMA_URL:=http://localhost:11434}", raise_on_connect_error: bool = True, **kwargs
) -> dict[str, Any]: ) -> dict[str, Any]:
return { return {
"url": url, "url": url,

View file

@ -4,7 +4,7 @@
# This source code is licensed under the terms described in the LICENSE file in # This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree. # the root directory of this source tree.
from llama_stack.apis.models.models import ModelType from llama_stack.apis.models import ModelType
from llama_stack.models.llama.sku_types import CoreModelId from llama_stack.models.llama.sku_types import CoreModelId
from llama_stack.providers.utils.inference.model_registry import ( from llama_stack.providers.utils.inference.model_registry import (
ProviderModelEntry, ProviderModelEntry,

View file

@ -32,15 +32,6 @@ from llama_stack.apis.inference import (
JsonSchemaResponseFormat, JsonSchemaResponseFormat,
LogProbConfig, LogProbConfig,
Message, Message,
ResponseFormat,
SamplingParams,
TextTruncation,
ToolChoice,
ToolConfig,
ToolDefinition,
ToolPromptFormat,
)
from llama_stack.apis.inference.inference import (
OpenAIChatCompletion, OpenAIChatCompletion,
OpenAIChatCompletionChunk, OpenAIChatCompletionChunk,
OpenAICompletion, OpenAICompletion,
@ -48,6 +39,13 @@ from llama_stack.apis.inference.inference import (
OpenAIEmbeddingUsage, OpenAIEmbeddingUsage,
OpenAIMessageParam, OpenAIMessageParam,
OpenAIResponseFormatParam, OpenAIResponseFormatParam,
ResponseFormat,
SamplingParams,
TextTruncation,
ToolChoice,
ToolConfig,
ToolDefinition,
ToolPromptFormat,
) )
from llama_stack.apis.models import Model, ModelType from llama_stack.apis.models import Model, ModelType
from llama_stack.log import get_logger from llama_stack.log import get_logger

View file

@ -6,7 +6,7 @@
from dataclasses import dataclass from dataclasses import dataclass
from llama_stack.apis.models.models import ModelType from llama_stack.apis.models import ModelType
from llama_stack.providers.utils.inference.model_registry import ( from llama_stack.providers.utils.inference.model_registry import (
ProviderModelEntry, ProviderModelEntry,
) )

View file

@ -10,7 +10,7 @@ from typing import Any
from openai import AsyncOpenAI from openai import AsyncOpenAI
from llama_stack.apis.inference.inference import ( from llama_stack.apis.inference import (
OpenAIChatCompletion, OpenAIChatCompletion,
OpenAIChatCompletionChunk, OpenAIChatCompletionChunk,
OpenAICompletion, OpenAICompletion,

View file

@ -19,7 +19,12 @@ from llama_stack.apis.inference import (
Inference, Inference,
LogProbConfig, LogProbConfig,
Message, Message,
OpenAIChatCompletion,
OpenAIChatCompletionChunk,
OpenAICompletion,
OpenAIEmbeddingsResponse, OpenAIEmbeddingsResponse,
OpenAIMessageParam,
OpenAIResponseFormatParam,
ResponseFormat, ResponseFormat,
SamplingParams, SamplingParams,
TextTruncation, TextTruncation,
@ -28,13 +33,6 @@ from llama_stack.apis.inference import (
ToolDefinition, ToolDefinition,
ToolPromptFormat, ToolPromptFormat,
) )
from llama_stack.apis.inference.inference import (
OpenAIChatCompletion,
OpenAIChatCompletionChunk,
OpenAICompletion,
OpenAIMessageParam,
OpenAIResponseFormatParam,
)
from llama_stack.apis.models import Model from llama_stack.apis.models import Model
from llama_stack.distribution.library_client import convert_pydantic_to_json_value, convert_to_pydantic from llama_stack.distribution.library_client import convert_pydantic_to_json_value, convert_to_pydantic
from llama_stack.providers.utils.inference.model_registry import ModelRegistryHelper from llama_stack.providers.utils.inference.model_registry import ModelRegistryHelper

View file

@ -25,6 +25,6 @@ class RunpodImplConfig(BaseModel):
@classmethod @classmethod
def sample_run_config(cls, **kwargs: Any) -> dict[str, Any]: def sample_run_config(cls, **kwargs: Any) -> dict[str, Any]:
return { return {
"url": "${env.RUNPOD_URL:}", "url": "${env.RUNPOD_URL:+}",
"api_token": "${env.RUNPOD_API_TOKEN:}", "api_token": "${env.RUNPOD_API_TOKEN:+}",
} }

View file

@ -8,7 +8,7 @@ from collections.abc import AsyncGenerator
from openai import OpenAI from openai import OpenAI
from llama_stack.apis.inference import * # noqa: F403 from llama_stack.apis.inference import * # noqa: F403
from llama_stack.apis.inference.inference import OpenAIEmbeddingsResponse from llama_stack.apis.inference import OpenAIEmbeddingsResponse
# from llama_stack.providers.datatypes import ModelsProtocolPrivate # from llama_stack.providers.datatypes import ModelsProtocolPrivate
from llama_stack.providers.utils.inference.model_registry import ModelRegistryHelper from llama_stack.providers.utils.inference.model_registry import ModelRegistryHelper

View file

@ -26,5 +26,5 @@ class TogetherImplConfig(BaseModel):
def sample_run_config(cls, **kwargs) -> dict[str, Any]: def sample_run_config(cls, **kwargs) -> dict[str, Any]:
return { return {
"url": "https://api.together.xyz/v1", "url": "https://api.together.xyz/v1",
"api_key": "${env.TOGETHER_API_KEY:}", "api_key": "${env.TOGETHER_API_KEY:+}",
} }

View file

@ -4,7 +4,7 @@
# This source code is licensed under the terms described in the LICENSE file in # This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree. # the root directory of this source tree.
from llama_stack.apis.models.models import ModelType from llama_stack.apis.models import ModelType
from llama_stack.models.llama.sku_types import CoreModelId from llama_stack.models.llama.sku_types import CoreModelId
from llama_stack.providers.utils.inference.model_registry import ( from llama_stack.providers.utils.inference.model_registry import (
ProviderModelEntry, ProviderModelEntry,

View file

@ -23,7 +23,12 @@ from llama_stack.apis.inference import (
Inference, Inference,
LogProbConfig, LogProbConfig,
Message, Message,
OpenAIChatCompletion,
OpenAIChatCompletionChunk,
OpenAICompletion,
OpenAIEmbeddingsResponse, OpenAIEmbeddingsResponse,
OpenAIMessageParam,
OpenAIResponseFormatParam,
ResponseFormat, ResponseFormat,
ResponseFormatType, ResponseFormatType,
SamplingParams, SamplingParams,
@ -33,13 +38,6 @@ from llama_stack.apis.inference import (
ToolDefinition, ToolDefinition,
ToolPromptFormat, ToolPromptFormat,
) )
from llama_stack.apis.inference.inference import (
OpenAIChatCompletion,
OpenAIChatCompletionChunk,
OpenAICompletion,
OpenAIMessageParam,
OpenAIResponseFormatParam,
)
from llama_stack.distribution.request_headers import NeedsRequestProviderData from llama_stack.distribution.request_headers import NeedsRequestProviderData
from llama_stack.log import get_logger from llama_stack.log import get_logger
from llama_stack.providers.utils.inference.model_registry import ModelRegistryHelper from llama_stack.providers.utils.inference.model_registry import ModelRegistryHelper

View file

@ -34,9 +34,6 @@ class VLLMInferenceAdapterConfig(BaseModel):
@classmethod @classmethod
def validate_tls_verify(cls, v): def validate_tls_verify(cls, v):
if isinstance(v, str): if isinstance(v, str):
# Check if it's a boolean string
if v.lower() in ("true", "false"):
return v.lower() == "true"
# Otherwise, treat it as a cert path # Otherwise, treat it as a cert path
cert_path = Path(v).expanduser().resolve() cert_path = Path(v).expanduser().resolve()
if not cert_path.exists(): if not cert_path.exists():
@ -54,7 +51,7 @@ class VLLMInferenceAdapterConfig(BaseModel):
): ):
return { return {
"url": url, "url": url,
"max_tokens": "${env.VLLM_MAX_TOKENS:4096}", "max_tokens": "${env.VLLM_MAX_TOKENS:=4096}",
"api_token": "${env.VLLM_API_TOKEN:fake}", "api_token": "${env.VLLM_API_TOKEN:=fake}",
"tls_verify": "${env.VLLM_TLS_VERIFY:true}", "tls_verify": "${env.VLLM_TLS_VERIFY:=true}",
} }

View file

@ -38,9 +38,13 @@ from llama_stack.apis.inference import (
JsonSchemaResponseFormat, JsonSchemaResponseFormat,
LogProbConfig, LogProbConfig,
Message, Message,
OpenAIChatCompletion,
OpenAICompletion,
OpenAIEmbeddingData, OpenAIEmbeddingData,
OpenAIEmbeddingsResponse, OpenAIEmbeddingsResponse,
OpenAIEmbeddingUsage, OpenAIEmbeddingUsage,
OpenAIMessageParam,
OpenAIResponseFormatParam,
ResponseFormat, ResponseFormat,
SamplingParams, SamplingParams,
TextTruncation, TextTruncation,
@ -49,12 +53,6 @@ from llama_stack.apis.inference import (
ToolDefinition, ToolDefinition,
ToolPromptFormat, ToolPromptFormat,
) )
from llama_stack.apis.inference.inference import (
OpenAIChatCompletion,
OpenAICompletion,
OpenAIMessageParam,
OpenAIResponseFormatParam,
)
from llama_stack.apis.models import Model, ModelType from llama_stack.apis.models import Model, ModelType
from llama_stack.models.llama.datatypes import BuiltinTool, StopReason, ToolCall from llama_stack.models.llama.datatypes import BuiltinTool, StopReason, ToolCall
from llama_stack.models.llama.sku_list import all_registered_models from llama_stack.models.llama.sku_list import all_registered_models

View file

@ -40,7 +40,7 @@ class WatsonXConfig(BaseModel):
@classmethod @classmethod
def sample_run_config(cls, **kwargs) -> dict[str, Any]: def sample_run_config(cls, **kwargs) -> dict[str, Any]:
return { return {
"url": "${env.WATSONX_BASE_URL:https://us-south.ml.cloud.ibm.com}", "url": "${env.WATSONX_BASE_URL:=https://us-south.ml.cloud.ibm.com}",
"api_key": "${env.WATSONX_API_KEY:}", "api_key": "${env.WATSONX_API_KEY:+}",
"project_id": "${env.WATSONX_PROJECT_ID:}", "project_id": "${env.WATSONX_PROJECT_ID:+}",
} }

View file

@ -18,10 +18,16 @@ from llama_stack.apis.inference import (
CompletionRequest, CompletionRequest,
EmbeddingsResponse, EmbeddingsResponse,
EmbeddingTaskType, EmbeddingTaskType,
GreedySamplingStrategy,
Inference, Inference,
LogProbConfig, LogProbConfig,
Message, Message,
OpenAIChatCompletion,
OpenAIChatCompletionChunk,
OpenAICompletion,
OpenAIEmbeddingsResponse, OpenAIEmbeddingsResponse,
OpenAIMessageParam,
OpenAIResponseFormatParam,
ResponseFormat, ResponseFormat,
SamplingParams, SamplingParams,
TextTruncation, TextTruncation,
@ -29,14 +35,6 @@ from llama_stack.apis.inference import (
ToolConfig, ToolConfig,
ToolDefinition, ToolDefinition,
ToolPromptFormat, ToolPromptFormat,
)
from llama_stack.apis.inference.inference import (
GreedySamplingStrategy,
OpenAIChatCompletion,
OpenAIChatCompletionChunk,
OpenAICompletion,
OpenAIMessageParam,
OpenAIResponseFormatParam,
TopKSamplingStrategy, TopKSamplingStrategy,
TopPSamplingStrategy, TopPSamplingStrategy,
) )

View file

@ -55,10 +55,10 @@ class NvidiaPostTrainingConfig(BaseModel):
@classmethod @classmethod
def sample_run_config(cls, **kwargs) -> dict[str, Any]: def sample_run_config(cls, **kwargs) -> dict[str, Any]:
return { return {
"api_key": "${env.NVIDIA_API_KEY:}", "api_key": "${env.NVIDIA_API_KEY:+}",
"dataset_namespace": "${env.NVIDIA_DATASET_NAMESPACE:default}", "dataset_namespace": "${env.NVIDIA_DATASET_NAMESPACE:=default}",
"project_id": "${env.NVIDIA_PROJECT_ID:test-project}", "project_id": "${env.NVIDIA_PROJECT_ID:=test-project}",
"customizer_url": "${env.NVIDIA_CUSTOMIZER_URL:http://nemo.test}", "customizer_url": "${env.NVIDIA_CUSTOMIZER_URL:=http://nemo.test}",
} }

View file

@ -35,6 +35,6 @@ class NVIDIASafetyConfig(BaseModel):
@classmethod @classmethod
def sample_run_config(cls, **kwargs) -> dict[str, Any]: def sample_run_config(cls, **kwargs) -> dict[str, Any]:
return { return {
"guardrails_service_url": "${env.GUARDRAILS_SERVICE_URL:http://localhost:7331}", "guardrails_service_url": "${env.GUARDRAILS_SERVICE_URL:=http://localhost:7331}",
"config_id": "${env.NVIDIA_GUARDRAILS_CONFIG_ID:self-check}", "config_id": "${env.NVIDIA_GUARDRAILS_CONFIG_ID:=self-check}",
} }

View file

@ -22,6 +22,6 @@ class BraveSearchToolConfig(BaseModel):
@classmethod @classmethod
def sample_run_config(cls, __distro_dir__: str) -> dict[str, Any]: def sample_run_config(cls, __distro_dir__: str) -> dict[str, Any]:
return { return {
"api_key": "${env.BRAVE_SEARCH_API_KEY:}", "api_key": "${env.BRAVE_SEARCH_API_KEY:+}",
"max_results": 3, "max_results": 3,
} }

View file

@ -22,6 +22,6 @@ class TavilySearchToolConfig(BaseModel):
@classmethod @classmethod
def sample_run_config(cls, __distro_dir__: str) -> dict[str, Any]: def sample_run_config(cls, __distro_dir__: str) -> dict[str, Any]:
return { return {
"api_key": "${env.TAVILY_SEARCH_API_KEY:}", "api_key": "${env.TAVILY_SEARCH_API_KEY:+}",
"max_results": 3, "max_results": 3,
} }

View file

@ -17,5 +17,5 @@ class WolframAlphaToolConfig(BaseModel):
@classmethod @classmethod
def sample_run_config(cls, __distro_dir__: str, **kwargs: Any) -> dict[str, Any]: def sample_run_config(cls, __distro_dir__: str, **kwargs: Any) -> dict[str, Any]:
return { return {
"api_key": "${env.WOLFRAM_ALPHA_API_KEY:}", "api_key": "${env.WOLFRAM_ALPHA_API_KEY:+}",
} }

View file

@ -22,8 +22,8 @@ class PGVectorVectorIOConfig(BaseModel):
@classmethod @classmethod
def sample_run_config( def sample_run_config(
cls, cls,
host: str = "${env.PGVECTOR_HOST:localhost}", host: str = "${env.PGVECTOR_HOST:=localhost}",
port: int = "${env.PGVECTOR_PORT:5432}", port: int = "${env.PGVECTOR_PORT:=5432}",
db: str = "${env.PGVECTOR_DB}", db: str = "${env.PGVECTOR_DB}",
user: str = "${env.PGVECTOR_USER}", user: str = "${env.PGVECTOR_USER}",
password: str = "${env.PGVECTOR_PASSWORD}", password: str = "${env.PGVECTOR_PASSWORD}",

View file

@ -23,6 +23,13 @@ from llama_stack.apis.inference import (
JsonSchemaResponseFormat, JsonSchemaResponseFormat,
LogProbConfig, LogProbConfig,
Message, Message,
OpenAIChatCompletion,
OpenAIChatCompletionChunk,
OpenAICompletion,
OpenAIEmbeddingsResponse,
OpenAIEmbeddingUsage,
OpenAIMessageParam,
OpenAIResponseFormatParam,
ResponseFormat, ResponseFormat,
SamplingParams, SamplingParams,
TextTruncation, TextTruncation,
@ -31,16 +38,7 @@ from llama_stack.apis.inference import (
ToolDefinition, ToolDefinition,
ToolPromptFormat, ToolPromptFormat,
) )
from llama_stack.apis.inference.inference import ( from llama_stack.apis.models import Model
OpenAIChatCompletion,
OpenAIChatCompletionChunk,
OpenAICompletion,
OpenAIEmbeddingsResponse,
OpenAIEmbeddingUsage,
OpenAIMessageParam,
OpenAIResponseFormatParam,
)
from llama_stack.apis.models.models import Model
from llama_stack.distribution.request_headers import NeedsRequestProviderData from llama_stack.distribution.request_headers import NeedsRequestProviderData
from llama_stack.log import get_logger from llama_stack.log import get_logger
from llama_stack.providers.utils.inference.model_registry import ModelRegistryHelper from llama_stack.providers.utils.inference.model_registry import ModelRegistryHelper

View file

@ -8,7 +8,7 @@ from typing import Any
from pydantic import BaseModel, Field from pydantic import BaseModel, Field
from llama_stack.apis.models.models import ModelType from llama_stack.apis.models import ModelType
from llama_stack.models.llama.sku_list import all_registered_models from llama_stack.models.llama.sku_list import all_registered_models
from llama_stack.providers.datatypes import Model, ModelsProtocolPrivate from llama_stack.providers.datatypes import Model, ModelsProtocolPrivate
from llama_stack.providers.utils.inference import ( from llama_stack.providers.utils.inference import (

View file

@ -95,27 +95,25 @@ from llama_stack.apis.inference import (
CompletionResponse, CompletionResponse,
CompletionResponseStreamChunk, CompletionResponseStreamChunk,
GreedySamplingStrategy, GreedySamplingStrategy,
Message,
SamplingParams,
SystemMessage,
TokenLogProbs,
ToolChoice,
ToolResponseMessage,
TopKSamplingStrategy,
TopPSamplingStrategy,
UserMessage,
)
from llama_stack.apis.inference.inference import (
JsonSchemaResponseFormat, JsonSchemaResponseFormat,
Message,
OpenAIChatCompletion, OpenAIChatCompletion,
OpenAICompletion, OpenAICompletion,
OpenAICompletionChoice, OpenAICompletionChoice,
OpenAIEmbeddingData, OpenAIEmbeddingData,
OpenAIMessageParam, OpenAIMessageParam,
OpenAIResponseFormatParam, OpenAIResponseFormatParam,
SamplingParams,
SystemMessage,
TokenLogProbs,
ToolChoice,
ToolConfig, ToolConfig,
ToolResponseMessage,
TopKSamplingStrategy,
TopPSamplingStrategy,
UserMessage,
) )
from llama_stack.apis.inference.inference import ( from llama_stack.apis.inference import (
OpenAIChoice as OpenAIChatCompletionChoice, OpenAIChoice as OpenAIChatCompletionChoice,
) )
from llama_stack.models.llama.datatypes import ( from llama_stack.models.llama.datatypes import (

View file

@ -45,8 +45,8 @@ class RedisKVStoreConfig(CommonConfig):
return { return {
"type": "redis", "type": "redis",
"namespace": None, "namespace": None,
"host": "${env.REDIS_HOST:localhost}", "host": "${env.REDIS_HOST:=localhost}",
"port": "${env.REDIS_PORT:6379}", "port": "${env.REDIS_PORT:=6379}",
} }
@ -66,7 +66,7 @@ class SqliteKVStoreConfig(CommonConfig):
return { return {
"type": "sqlite", "type": "sqlite",
"namespace": None, "namespace": None,
"db_path": "${env.SQLITE_STORE_DIR:" + __distro_dir__ + "}/" + db_name, "db_path": "${env.SQLITE_STORE_DIR:=" + __distro_dir__ + "}/" + db_name,
} }
@ -84,12 +84,12 @@ class PostgresKVStoreConfig(CommonConfig):
return { return {
"type": "postgres", "type": "postgres",
"namespace": None, "namespace": None,
"host": "${env.POSTGRES_HOST:localhost}", "host": "${env.POSTGRES_HOST:=localhost}",
"port": "${env.POSTGRES_PORT:5432}", "port": "${env.POSTGRES_PORT:=5432}",
"db": "${env.POSTGRES_DB:llamastack}", "db": "${env.POSTGRES_DB:=llamastack}",
"user": "${env.POSTGRES_USER:llamastack}", "user": "${env.POSTGRES_USER:=llamastack}",
"password": "${env.POSTGRES_PASSWORD:llamastack}", "password": "${env.POSTGRES_PASSWORD:=llamastack}",
"table_name": "${env.POSTGRES_TABLE_NAME:" + table_name + "}", "table_name": "${env.POSTGRES_TABLE_NAME:=" + table_name + "}",
} }
@classmethod @classmethod
@ -131,12 +131,12 @@ class MongoDBKVStoreConfig(CommonConfig):
return { return {
"type": "mongodb", "type": "mongodb",
"namespace": None, "namespace": None,
"host": "${env.MONGODB_HOST:localhost}", "host": "${env.MONGODB_HOST:=localhost}",
"port": "${env.MONGODB_PORT:5432}", "port": "${env.MONGODB_PORT:=5432}",
"db": "${env.MONGODB_DB}", "db": "${env.MONGODB_DB}",
"user": "${env.MONGODB_USER}", "user": "${env.MONGODB_USER}",
"password": "${env.MONGODB_PASSWORD}", "password": "${env.MONGODB_PASSWORD}",
"collection_name": "${env.MONGODB_COLLECTION_NAME:" + collection_name + "}", "collection_name": "${env.MONGODB_COLLECTION_NAME:=" + collection_name + "}",
} }

View file

@ -12,8 +12,7 @@ import uuid
from abc import ABC, abstractmethod from abc import ABC, abstractmethod
from typing import Any from typing import Any
from llama_stack.apis.files import Files from llama_stack.apis.files import Files, OpenAIFileObject
from llama_stack.apis.files.files import OpenAIFileObject
from llama_stack.apis.vector_dbs import VectorDB from llama_stack.apis.vector_dbs import VectorDB
from llama_stack.apis.vector_io import ( from llama_stack.apis.vector_io import (
Chunk, Chunk,

View file

@ -50,7 +50,7 @@ class SqliteSqlStoreConfig(SqlAlchemySqlStoreConfig):
def sample_run_config(cls, __distro_dir__: str, db_name: str = "sqlstore.db"): def sample_run_config(cls, __distro_dir__: str, db_name: str = "sqlstore.db"):
return cls( return cls(
type="sqlite", type="sqlite",
db_path="${env.SQLITE_STORE_DIR:" + __distro_dir__ + "}/" + db_name, db_path="${env.SQLITE_STORE_DIR:=" + __distro_dir__ + "}/" + db_name,
) )
@property @property
@ -78,11 +78,11 @@ class PostgresSqlStoreConfig(SqlAlchemySqlStoreConfig):
def sample_run_config(cls, **kwargs): def sample_run_config(cls, **kwargs):
return cls( return cls(
type="postgres", type="postgres",
host="${env.POSTGRES_HOST:localhost}", host="${env.POSTGRES_HOST:=localhost}",
port="${env.POSTGRES_PORT:5432}", port="${env.POSTGRES_PORT:=5432}",
db="${env.POSTGRES_DB:llamastack}", db="${env.POSTGRES_DB:=llamastack}",
user="${env.POSTGRES_USER:llamastack}", user="${env.POSTGRES_USER:=llamastack}",
password="${env.POSTGRES_PASSWORD:llamastack}", password="${env.POSTGRES_PASSWORD:=llamastack}",
) )

View file

@ -35,6 +35,9 @@ INVALID_SPAN_ID = 0x0000000000000000
INVALID_TRACE_ID = 0x00000000000000000000000000000000 INVALID_TRACE_ID = 0x00000000000000000000000000000000
ROOT_SPAN_MARKERS = ["__root__", "__root_span__"] ROOT_SPAN_MARKERS = ["__root__", "__root_span__"]
# The logical root span may not be visible to this process if a parent context
# is passed in. The local root span is the first local span in a trace.
LOCAL_ROOT_SPAN_MARKER = "__local_root_span__"
def trace_id_to_str(trace_id: int) -> str: def trace_id_to_str(trace_id: int) -> str:
@ -180,7 +183,13 @@ async def start_trace(name: str, attributes: dict[str, Any] = None) -> TraceCont
trace_id = generate_trace_id() trace_id = generate_trace_id()
context = TraceContext(BACKGROUND_LOGGER, trace_id) context = TraceContext(BACKGROUND_LOGGER, trace_id)
attributes = {marker: True for marker in ROOT_SPAN_MARKERS} | (attributes or {}) # Mark this span as the root for the trace for now. The processing of
# traceparent context if supplied comes later and will result in the
# ROOT_SPAN_MARKERS being removed. Also mark this is the 'local' root,
# i.e. the root of the spans originating in this process as this is
# needed to ensure that we insert this 'local' root span's id into
# the trace record in sqlite store.
attributes = dict.fromkeys(ROOT_SPAN_MARKERS, True) | {LOCAL_ROOT_SPAN_MARKER: True} | (attributes or {})
context.push_span(name, attributes) context.push_span(name, attributes)
CURRENT_TRACE_CONTEXT.set(context) CURRENT_TRACE_CONTEXT.set(context)

View file

@ -1,4 +1,4 @@
version: '2' version: 2
distribution_spec: distribution_spec:
description: Use AWS Bedrock for running LLM inference and safety description: Use AWS Bedrock for running LLM inference and safety
providers: providers:

View file

@ -1,4 +1,4 @@
version: '2' version: 2
image_name: bedrock image_name: bedrock
apis: apis:
- agents - agents
@ -22,7 +22,7 @@ providers:
kvstore: kvstore:
type: sqlite type: sqlite
namespace: null namespace: null
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/bedrock}/faiss_store.db db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/bedrock}/faiss_store.db
safety: safety:
- provider_id: bedrock - provider_id: bedrock
provider_type: remote::bedrock provider_type: remote::bedrock
@ -34,17 +34,17 @@ providers:
persistence_store: persistence_store:
type: sqlite type: sqlite
namespace: null namespace: null
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/bedrock}/agents_store.db db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/bedrock}/agents_store.db
responses_store: responses_store:
type: sqlite type: sqlite
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/bedrock}/responses_store.db db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/bedrock}/responses_store.db
telemetry: telemetry:
- provider_id: meta-reference - provider_id: meta-reference
provider_type: inline::meta-reference provider_type: inline::meta-reference
config: config:
service_name: "${env.OTEL_SERVICE_NAME:\u200B}" service_name: "${env.OTEL_SERVICE_NAME:=\u200B}"
sinks: ${env.TELEMETRY_SINKS:console,sqlite} sinks: ${env.TELEMETRY_SINKS:=console,sqlite}
sqlite_db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/bedrock}/trace_store.db sqlite_db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/bedrock}/trace_store.db
eval: eval:
- provider_id: meta-reference - provider_id: meta-reference
provider_type: inline::meta-reference provider_type: inline::meta-reference
@ -52,7 +52,7 @@ providers:
kvstore: kvstore:
type: sqlite type: sqlite
namespace: null namespace: null
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/bedrock}/meta_reference_eval.db db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/bedrock}/meta_reference_eval.db
datasetio: datasetio:
- provider_id: huggingface - provider_id: huggingface
provider_type: remote::huggingface provider_type: remote::huggingface
@ -60,14 +60,14 @@ providers:
kvstore: kvstore:
type: sqlite type: sqlite
namespace: null namespace: null
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/bedrock}/huggingface_datasetio.db db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/bedrock}/huggingface_datasetio.db
- provider_id: localfs - provider_id: localfs
provider_type: inline::localfs provider_type: inline::localfs
config: config:
kvstore: kvstore:
type: sqlite type: sqlite
namespace: null namespace: null
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/bedrock}/localfs_datasetio.db db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/bedrock}/localfs_datasetio.db
scoring: scoring:
- provider_id: basic - provider_id: basic
provider_type: inline::basic provider_type: inline::basic
@ -78,17 +78,17 @@ providers:
- provider_id: braintrust - provider_id: braintrust
provider_type: inline::braintrust provider_type: inline::braintrust
config: config:
openai_api_key: ${env.OPENAI_API_KEY:} openai_api_key: ${env.OPENAI_API_KEY:+}
tool_runtime: tool_runtime:
- provider_id: brave-search - provider_id: brave-search
provider_type: remote::brave-search provider_type: remote::brave-search
config: config:
api_key: ${env.BRAVE_SEARCH_API_KEY:} api_key: ${env.BRAVE_SEARCH_API_KEY:+}
max_results: 3 max_results: 3
- provider_id: tavily-search - provider_id: tavily-search
provider_type: remote::tavily-search provider_type: remote::tavily-search
config: config:
api_key: ${env.TAVILY_SEARCH_API_KEY:} api_key: ${env.TAVILY_SEARCH_API_KEY:+}
max_results: 3 max_results: 3
- provider_id: rag-runtime - provider_id: rag-runtime
provider_type: inline::rag-runtime provider_type: inline::rag-runtime
@ -98,10 +98,10 @@ providers:
config: {} config: {}
metadata_store: metadata_store:
type: sqlite type: sqlite
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/bedrock}/registry.db db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/bedrock}/registry.db
inference_store: inference_store:
type: sqlite type: sqlite
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/bedrock}/inference_store.db db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/bedrock}/inference_store.db
models: models:
- metadata: {} - metadata: {}
model_id: meta.llama3-1-8b-instruct-v1:0 model_id: meta.llama3-1-8b-instruct-v1:0

View file

@ -1,4 +1,4 @@
version: '2' version: 2
distribution_spec: distribution_spec:
description: Use Cerebras for running LLM inference description: Use Cerebras for running LLM inference
providers: providers:

Some files were not shown because too many files have changed in this diff Show more