mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-06-27 18:50:41 +00:00
Merge branch 'vit' of https://github.com/hardikjshah/llama-stack into vit
This commit is contained in:
commit
a1033ba805
184 changed files with 1691 additions and 1138 deletions
3
.github/workflows/providers-build.yml
vendored
3
.github/workflows/providers-build.yml
vendored
|
@ -11,6 +11,8 @@ on:
|
|||
- 'llama_stack/distribution/*.sh'
|
||||
- '.github/workflows/providers-build.yml'
|
||||
- 'llama_stack/templates/**'
|
||||
- 'pyproject.toml'
|
||||
|
||||
pull_request:
|
||||
paths:
|
||||
- 'llama_stack/cli/stack/build.py'
|
||||
|
@ -19,6 +21,7 @@ on:
|
|||
- 'llama_stack/distribution/*.sh'
|
||||
- '.github/workflows/providers-build.yml'
|
||||
- 'llama_stack/templates/**'
|
||||
- 'pyproject.toml'
|
||||
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}-${{ github.ref }}
|
||||
|
|
154
docs/_static/llama-stack-spec.html
vendored
154
docs/_static/llama-stack-spec.html
vendored
|
@ -7390,6 +7390,147 @@
|
|||
],
|
||||
"title": "AgentTurnResponseTurnStartPayload"
|
||||
},
|
||||
"OpenAIResponseAnnotationCitation": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"type": {
|
||||
"type": "string",
|
||||
"const": "url_citation",
|
||||
"default": "url_citation"
|
||||
},
|
||||
"end_index": {
|
||||
"type": "integer"
|
||||
},
|
||||
"start_index": {
|
||||
"type": "integer"
|
||||
},
|
||||
"title": {
|
||||
"type": "string"
|
||||
},
|
||||
"url": {
|
||||
"type": "string"
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
"required": [
|
||||
"type",
|
||||
"end_index",
|
||||
"start_index",
|
||||
"title",
|
||||
"url"
|
||||
],
|
||||
"title": "OpenAIResponseAnnotationCitation"
|
||||
},
|
||||
"OpenAIResponseAnnotationContainerFileCitation": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"type": {
|
||||
"type": "string",
|
||||
"const": "container_file_citation",
|
||||
"default": "container_file_citation"
|
||||
},
|
||||
"container_id": {
|
||||
"type": "string"
|
||||
},
|
||||
"end_index": {
|
||||
"type": "integer"
|
||||
},
|
||||
"file_id": {
|
||||
"type": "string"
|
||||
},
|
||||
"filename": {
|
||||
"type": "string"
|
||||
},
|
||||
"start_index": {
|
||||
"type": "integer"
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
"required": [
|
||||
"type",
|
||||
"container_id",
|
||||
"end_index",
|
||||
"file_id",
|
||||
"filename",
|
||||
"start_index"
|
||||
],
|
||||
"title": "OpenAIResponseAnnotationContainerFileCitation"
|
||||
},
|
||||
"OpenAIResponseAnnotationFileCitation": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"type": {
|
||||
"type": "string",
|
||||
"const": "file_citation",
|
||||
"default": "file_citation"
|
||||
},
|
||||
"file_id": {
|
||||
"type": "string"
|
||||
},
|
||||
"filename": {
|
||||
"type": "string"
|
||||
},
|
||||
"index": {
|
||||
"type": "integer"
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
"required": [
|
||||
"type",
|
||||
"file_id",
|
||||
"filename",
|
||||
"index"
|
||||
],
|
||||
"title": "OpenAIResponseAnnotationFileCitation"
|
||||
},
|
||||
"OpenAIResponseAnnotationFilePath": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"type": {
|
||||
"type": "string",
|
||||
"const": "file_path",
|
||||
"default": "file_path"
|
||||
},
|
||||
"file_id": {
|
||||
"type": "string"
|
||||
},
|
||||
"index": {
|
||||
"type": "integer"
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
"required": [
|
||||
"type",
|
||||
"file_id",
|
||||
"index"
|
||||
],
|
||||
"title": "OpenAIResponseAnnotationFilePath"
|
||||
},
|
||||
"OpenAIResponseAnnotations": {
|
||||
"oneOf": [
|
||||
{
|
||||
"$ref": "#/components/schemas/OpenAIResponseAnnotationFileCitation"
|
||||
},
|
||||
{
|
||||
"$ref": "#/components/schemas/OpenAIResponseAnnotationCitation"
|
||||
},
|
||||
{
|
||||
"$ref": "#/components/schemas/OpenAIResponseAnnotationContainerFileCitation"
|
||||
},
|
||||
{
|
||||
"$ref": "#/components/schemas/OpenAIResponseAnnotationFilePath"
|
||||
}
|
||||
],
|
||||
"discriminator": {
|
||||
"propertyName": "type",
|
||||
"mapping": {
|
||||
"file_citation": "#/components/schemas/OpenAIResponseAnnotationFileCitation",
|
||||
"url_citation": "#/components/schemas/OpenAIResponseAnnotationCitation",
|
||||
"container_file_citation": "#/components/schemas/OpenAIResponseAnnotationContainerFileCitation",
|
||||
"file_path": "#/components/schemas/OpenAIResponseAnnotationFilePath"
|
||||
}
|
||||
}
|
||||
},
|
||||
"OpenAIResponseInput": {
|
||||
"oneOf": [
|
||||
{
|
||||
|
@ -7764,6 +7905,10 @@
|
|||
"type": "string",
|
||||
"const": "web_search"
|
||||
},
|
||||
{
|
||||
"type": "string",
|
||||
"const": "web_search_preview"
|
||||
},
|
||||
{
|
||||
"type": "string",
|
||||
"const": "web_search_preview_2025_03_11"
|
||||
|
@ -7855,12 +8000,19 @@
|
|||
"type": "string",
|
||||
"const": "output_text",
|
||||
"default": "output_text"
|
||||
},
|
||||
"annotations": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"$ref": "#/components/schemas/OpenAIResponseAnnotations"
|
||||
}
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
"required": [
|
||||
"text",
|
||||
"type"
|
||||
"type",
|
||||
"annotations"
|
||||
],
|
||||
"title": "OpenAIResponseOutputMessageContentOutputText"
|
||||
},
|
||||
|
|
107
docs/_static/llama-stack-spec.yaml
vendored
107
docs/_static/llama-stack-spec.yaml
vendored
|
@ -5263,6 +5263,106 @@ components:
|
|||
- event_type
|
||||
- turn_id
|
||||
title: AgentTurnResponseTurnStartPayload
|
||||
OpenAIResponseAnnotationCitation:
|
||||
type: object
|
||||
properties:
|
||||
type:
|
||||
type: string
|
||||
const: url_citation
|
||||
default: url_citation
|
||||
end_index:
|
||||
type: integer
|
||||
start_index:
|
||||
type: integer
|
||||
title:
|
||||
type: string
|
||||
url:
|
||||
type: string
|
||||
additionalProperties: false
|
||||
required:
|
||||
- type
|
||||
- end_index
|
||||
- start_index
|
||||
- title
|
||||
- url
|
||||
title: OpenAIResponseAnnotationCitation
|
||||
"OpenAIResponseAnnotationContainerFileCitation":
|
||||
type: object
|
||||
properties:
|
||||
type:
|
||||
type: string
|
||||
const: container_file_citation
|
||||
default: container_file_citation
|
||||
container_id:
|
||||
type: string
|
||||
end_index:
|
||||
type: integer
|
||||
file_id:
|
||||
type: string
|
||||
filename:
|
||||
type: string
|
||||
start_index:
|
||||
type: integer
|
||||
additionalProperties: false
|
||||
required:
|
||||
- type
|
||||
- container_id
|
||||
- end_index
|
||||
- file_id
|
||||
- filename
|
||||
- start_index
|
||||
title: >-
|
||||
OpenAIResponseAnnotationContainerFileCitation
|
||||
OpenAIResponseAnnotationFileCitation:
|
||||
type: object
|
||||
properties:
|
||||
type:
|
||||
type: string
|
||||
const: file_citation
|
||||
default: file_citation
|
||||
file_id:
|
||||
type: string
|
||||
filename:
|
||||
type: string
|
||||
index:
|
||||
type: integer
|
||||
additionalProperties: false
|
||||
required:
|
||||
- type
|
||||
- file_id
|
||||
- filename
|
||||
- index
|
||||
title: OpenAIResponseAnnotationFileCitation
|
||||
OpenAIResponseAnnotationFilePath:
|
||||
type: object
|
||||
properties:
|
||||
type:
|
||||
type: string
|
||||
const: file_path
|
||||
default: file_path
|
||||
file_id:
|
||||
type: string
|
||||
index:
|
||||
type: integer
|
||||
additionalProperties: false
|
||||
required:
|
||||
- type
|
||||
- file_id
|
||||
- index
|
||||
title: OpenAIResponseAnnotationFilePath
|
||||
OpenAIResponseAnnotations:
|
||||
oneOf:
|
||||
- $ref: '#/components/schemas/OpenAIResponseAnnotationFileCitation'
|
||||
- $ref: '#/components/schemas/OpenAIResponseAnnotationCitation'
|
||||
- $ref: '#/components/schemas/OpenAIResponseAnnotationContainerFileCitation'
|
||||
- $ref: '#/components/schemas/OpenAIResponseAnnotationFilePath'
|
||||
discriminator:
|
||||
propertyName: type
|
||||
mapping:
|
||||
file_citation: '#/components/schemas/OpenAIResponseAnnotationFileCitation'
|
||||
url_citation: '#/components/schemas/OpenAIResponseAnnotationCitation'
|
||||
container_file_citation: '#/components/schemas/OpenAIResponseAnnotationContainerFileCitation'
|
||||
file_path: '#/components/schemas/OpenAIResponseAnnotationFilePath'
|
||||
OpenAIResponseInput:
|
||||
oneOf:
|
||||
- $ref: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall'
|
||||
|
@ -5488,6 +5588,8 @@ components:
|
|||
oneOf:
|
||||
- type: string
|
||||
const: web_search
|
||||
- type: string
|
||||
const: web_search_preview
|
||||
- type: string
|
||||
const: web_search_preview_2025_03_11
|
||||
default: web_search
|
||||
|
@ -5547,10 +5649,15 @@ components:
|
|||
type: string
|
||||
const: output_text
|
||||
default: output_text
|
||||
annotations:
|
||||
type: array
|
||||
items:
|
||||
$ref: '#/components/schemas/OpenAIResponseAnnotations'
|
||||
additionalProperties: false
|
||||
required:
|
||||
- text
|
||||
- type
|
||||
- annotations
|
||||
title: >-
|
||||
OpenAIResponseOutputMessageContentOutputText
|
||||
"OpenAIResponseOutputMessageFileSearchToolCall":
|
||||
|
|
|
@ -18,7 +18,7 @@ providers:
|
|||
- provider_id: ollama
|
||||
provider_type: remote::ollama
|
||||
config:
|
||||
url: ${env.OLLAMA_URL:http://localhost:11434}
|
||||
url: ${env.OLLAMA_URL:=http://localhost:11434}
|
||||
vector_io:
|
||||
- provider_id: faiss
|
||||
provider_type: inline::faiss
|
||||
|
@ -26,7 +26,7 @@ providers:
|
|||
kvstore:
|
||||
type: sqlite
|
||||
namespace: null
|
||||
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ollama}/faiss_store.db
|
||||
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ollama}/faiss_store.db
|
||||
safety:
|
||||
- provider_id: llama-guard
|
||||
provider_type: inline::llama-guard
|
||||
|
@ -38,7 +38,7 @@ providers:
|
|||
persistence_store:
|
||||
type: sqlite
|
||||
namespace: null
|
||||
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ollama}/agents_store.db
|
||||
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ollama}/agents_store.db
|
||||
telemetry:
|
||||
- provider_id: meta-reference
|
||||
provider_type: inline::meta-reference
|
||||
|
@ -46,7 +46,7 @@ providers:
|
|||
metadata_store:
|
||||
namespace: null
|
||||
type: sqlite
|
||||
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ollama}/registry.db
|
||||
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ollama}/registry.db
|
||||
models:
|
||||
- metadata: {}
|
||||
model_id: ${env.INFERENCE_MODEL}
|
||||
|
@ -85,7 +85,7 @@ providers:
|
|||
# config is a dictionary that contains the configuration for the provider.
|
||||
# in this case, the configuration is the url of the ollama server
|
||||
config:
|
||||
url: ${env.OLLAMA_URL:http://localhost:11434}
|
||||
url: ${env.OLLAMA_URL:=http://localhost:11434}
|
||||
```
|
||||
A few things to note:
|
||||
- A _provider instance_ is identified with an (id, type, configuration) triplet.
|
||||
|
@ -94,6 +94,95 @@ A few things to note:
|
|||
- The configuration dictionary is provider-specific.
|
||||
- Notice that configuration can reference environment variables (with default values), which are expanded at runtime. When you run a stack server (via docker or via `llama stack run`), you can specify `--env OLLAMA_URL=http://my-server:11434` to override the default value.
|
||||
|
||||
### Environment Variable Substitution
|
||||
|
||||
Llama Stack supports environment variable substitution in configuration values using the
|
||||
`${env.VARIABLE_NAME}` syntax. This allows you to externalize configuration values and provide
|
||||
different settings for different environments. The syntax is inspired by [bash parameter expansion](https://www.gnu.org/software/bash/manual/html_node/Shell-Parameter-Expansion.html)
|
||||
and follows similar patterns.
|
||||
|
||||
#### Basic Syntax
|
||||
|
||||
The basic syntax for environment variable substitution is:
|
||||
|
||||
```yaml
|
||||
config:
|
||||
api_key: ${env.API_KEY}
|
||||
url: ${env.SERVICE_URL}
|
||||
```
|
||||
|
||||
If the environment variable is not set, the server will raise an error during startup.
|
||||
|
||||
#### Default Values
|
||||
|
||||
You can provide default values using the `:=` operator:
|
||||
|
||||
```yaml
|
||||
config:
|
||||
url: ${env.OLLAMA_URL:=http://localhost:11434}
|
||||
port: ${env.PORT:=8321}
|
||||
timeout: ${env.TIMEOUT:=60}
|
||||
```
|
||||
|
||||
If the environment variable is not set, the default value `http://localhost:11434` will be used.
|
||||
Empty defaults are not allowed so `url: ${env.OLLAMA_URL:=}` will raise an error if the environment variable is not set.
|
||||
|
||||
#### Conditional Values
|
||||
|
||||
You can use the `:+` operator to provide a value only when the environment variable is set:
|
||||
|
||||
```yaml
|
||||
config:
|
||||
# Only include this field if ENVIRONMENT is set
|
||||
environment: ${env.ENVIRONMENT:+production}
|
||||
```
|
||||
|
||||
If the environment variable is set, the value after `:+` will be used. If it's not set, the field
|
||||
will be omitted with a `None` value.
|
||||
So `${env.ENVIRONMENT:+}` is supported, it means that the field will be omitted if the environment
|
||||
variable is not set. It can be used to make a field optional and then enabled at runtime when desired.
|
||||
|
||||
#### Examples
|
||||
|
||||
Here are some common patterns:
|
||||
|
||||
```yaml
|
||||
# Required environment variable (will error if not set)
|
||||
api_key: ${env.OPENAI_API_KEY}
|
||||
|
||||
# Optional with default
|
||||
base_url: ${env.API_BASE_URL:=https://api.openai.com/v1}
|
||||
|
||||
# Conditional field
|
||||
debug_mode: ${env.DEBUG:+true}
|
||||
|
||||
# Optional field that becomes None if not set
|
||||
optional_token: ${env.OPTIONAL_TOKEN:+}
|
||||
```
|
||||
|
||||
#### Runtime Override
|
||||
|
||||
You can override environment variables at runtime when starting the server:
|
||||
|
||||
```bash
|
||||
# Override specific environment variables
|
||||
llama stack run --config run.yaml --env API_KEY=sk-123 --env BASE_URL=https://custom-api.com
|
||||
|
||||
# Or set them in your shell
|
||||
export API_KEY=sk-123
|
||||
export BASE_URL=https://custom-api.com
|
||||
llama stack run --config run.yaml
|
||||
```
|
||||
|
||||
#### Type Safety
|
||||
|
||||
The environment variable substitution system is type-safe:
|
||||
|
||||
- String values remain strings
|
||||
- Empty defaults (`${env.VAR:+}`) are converted to `None` for fields that accept `str | None`
|
||||
- Numeric defaults are properly typed (e.g., `${env.PORT:=8321}` becomes an integer)
|
||||
- Boolean defaults work correctly (e.g., `${env.DEBUG:=false}` becomes a boolean)
|
||||
|
||||
## Resources
|
||||
|
||||
Finally, let's look at the `models` section:
|
||||
|
@ -152,7 +241,7 @@ server:
|
|||
config:
|
||||
jwks:
|
||||
uri: "https://kubernetes.default.svc:8443/openid/v1/jwks"
|
||||
token: "${env.TOKEN:}"
|
||||
token: "${env.TOKEN:+}"
|
||||
key_recheck_period: 3600
|
||||
tls_cafile: "/path/to/ca.crt"
|
||||
issuer: "https://kubernetes.default.svc"
|
||||
|
@ -396,12 +485,12 @@ providers:
|
|||
- provider_id: vllm-0
|
||||
provider_type: remote::vllm
|
||||
config:
|
||||
url: ${env.VLLM_URL:http://localhost:8000}
|
||||
url: ${env.VLLM_URL:=http://localhost:8000}
|
||||
# this vLLM server serves the llama-guard model (e.g., llama-guard:3b)
|
||||
- provider_id: vllm-1
|
||||
provider_type: remote::vllm
|
||||
config:
|
||||
url: ${env.SAFETY_VLLM_URL:http://localhost:8001}
|
||||
url: ${env.SAFETY_VLLM_URL:=http://localhost:8001}
|
||||
...
|
||||
models:
|
||||
- metadata: {}
|
||||
|
|
|
@ -15,10 +15,10 @@ data:
|
|||
- provider_id: vllm-inference
|
||||
provider_type: remote::vllm
|
||||
config:
|
||||
url: ${env.VLLM_URL:http://localhost:8000/v1}
|
||||
max_tokens: ${env.VLLM_MAX_TOKENS:4096}
|
||||
api_token: ${env.VLLM_API_TOKEN:fake}
|
||||
tls_verify: ${env.VLLM_TLS_VERIFY:true}
|
||||
url: ${env.VLLM_URL:=http://localhost:8000/v1}
|
||||
max_tokens: ${env.VLLM_MAX_TOKENS:=4096}
|
||||
api_token: ${env.VLLM_API_TOKEN:=fake}
|
||||
tls_verify: ${env.VLLM_TLS_VERIFY:=true}
|
||||
- provider_id: vllm-safety
|
||||
provider_type: remote::vllm
|
||||
config:
|
||||
|
@ -30,10 +30,10 @@ data:
|
|||
provider_type: inline::sentence-transformers
|
||||
config: {}
|
||||
vector_io:
|
||||
- provider_id: ${env.ENABLE_CHROMADB+chromadb}
|
||||
- provider_id: ${env.ENABLE_CHROMADB:+chromadb}
|
||||
provider_type: remote::chromadb
|
||||
config:
|
||||
url: ${env.CHROMADB_URL:}
|
||||
url: ${env.CHROMADB_URL:+}
|
||||
safety:
|
||||
- provider_id: llama-guard
|
||||
provider_type: inline::llama-guard
|
||||
|
@ -45,34 +45,34 @@ data:
|
|||
config:
|
||||
persistence_store:
|
||||
type: postgres
|
||||
host: ${env.POSTGRES_HOST:localhost}
|
||||
port: ${env.POSTGRES_PORT:5432}
|
||||
db: ${env.POSTGRES_DB:llamastack}
|
||||
host: ${env.POSTGRES_HOST:=localhost}
|
||||
port: ${env.POSTGRES_PORT:=5432}
|
||||
db: ${env.POSTGRES_DB:=llamastack}
|
||||
user: ${env.POSTGRES_USER:llamastack}
|
||||
password: ${env.POSTGRES_PASSWORD:llamastack}
|
||||
password: ${env.POSTGRES_PASSWORD:=llamastack}
|
||||
responses_store:
|
||||
type: postgres
|
||||
host: ${env.POSTGRES_HOST:localhost}
|
||||
port: ${env.POSTGRES_PORT:5432}
|
||||
db: ${env.POSTGRES_DB:llamastack}
|
||||
user: ${env.POSTGRES_USER:llamastack}
|
||||
password: ${env.POSTGRES_PASSWORD:llamastack}
|
||||
host: ${env.POSTGRES_HOST:=localhost}
|
||||
port: ${env.POSTGRES_PORT:=5432}
|
||||
db: ${env.POSTGRES_DB:=llamastack}
|
||||
user: ${env.POSTGRES_USER:=llamastack}
|
||||
password: ${env.POSTGRES_PASSWORD:=llamastack}
|
||||
telemetry:
|
||||
- provider_id: meta-reference
|
||||
provider_type: inline::meta-reference
|
||||
config:
|
||||
service_name: ${env.OTEL_SERVICE_NAME:}
|
||||
service_name: ${env.OTEL_SERVICE_NAME:+}
|
||||
sinks: ${env.TELEMETRY_SINKS:console}
|
||||
tool_runtime:
|
||||
- provider_id: brave-search
|
||||
provider_type: remote::brave-search
|
||||
config:
|
||||
api_key: ${env.BRAVE_SEARCH_API_KEY:}
|
||||
api_key: ${env.BRAVE_SEARCH_API_KEY:+}
|
||||
max_results: 3
|
||||
- provider_id: tavily-search
|
||||
provider_type: remote::tavily-search
|
||||
config:
|
||||
api_key: ${env.TAVILY_SEARCH_API_KEY:}
|
||||
api_key: ${env.TAVILY_SEARCH_API_KEY:+}
|
||||
max_results: 3
|
||||
- provider_id: rag-runtime
|
||||
provider_type: inline::rag-runtime
|
||||
|
@ -82,19 +82,19 @@ data:
|
|||
config: {}
|
||||
metadata_store:
|
||||
type: postgres
|
||||
host: ${env.POSTGRES_HOST:localhost}
|
||||
port: ${env.POSTGRES_PORT:5432}
|
||||
db: ${env.POSTGRES_DB:llamastack}
|
||||
user: ${env.POSTGRES_USER:llamastack}
|
||||
password: ${env.POSTGRES_PASSWORD:llamastack}
|
||||
host: ${env.POSTGRES_HOST:=localhost}
|
||||
port: ${env.POSTGRES_PORT:=5432}
|
||||
db: ${env.POSTGRES_DB:=llamastack}
|
||||
user: ${env.POSTGRES_USER:=llamastack}
|
||||
password: ${env.POSTGRES_PASSWORD:=llamastack}
|
||||
table_name: llamastack_kvstore
|
||||
inference_store:
|
||||
type: postgres
|
||||
host: ${env.POSTGRES_HOST:localhost}
|
||||
port: ${env.POSTGRES_PORT:5432}
|
||||
db: ${env.POSTGRES_DB:llamastack}
|
||||
user: ${env.POSTGRES_USER:llamastack}
|
||||
password: ${env.POSTGRES_PASSWORD:llamastack}
|
||||
host: ${env.POSTGRES_HOST:=localhost}
|
||||
port: ${env.POSTGRES_PORT:=5432}
|
||||
db: ${env.POSTGRES_DB:=llamastack}
|
||||
user: ${env.POSTGRES_USER:=llamastack}
|
||||
password: ${env.POSTGRES_PASSWORD:=llamastack}
|
||||
models:
|
||||
- metadata:
|
||||
embedding_dimension: 384
|
||||
|
@ -106,11 +106,11 @@ data:
|
|||
provider_id: vllm-inference
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: ${env.SAFETY_MODEL:meta-llama/Llama-Guard-3-1B}
|
||||
model_id: ${env.SAFETY_MODEL:=meta-llama/Llama-Guard-3-1B}
|
||||
provider_id: vllm-safety
|
||||
model_type: llm
|
||||
shields:
|
||||
- shield_id: ${env.SAFETY_MODEL:meta-llama/Llama-Guard-3-1B}
|
||||
- shield_id: ${env.SAFETY_MODEL:=meta-llama/Llama-Guard-3-1B}
|
||||
vector_dbs: []
|
||||
datasets: []
|
||||
scoring_fns: []
|
||||
|
|
|
@ -12,25 +12,25 @@ providers:
|
|||
- provider_id: vllm-inference
|
||||
provider_type: remote::vllm
|
||||
config:
|
||||
url: ${env.VLLM_URL:http://localhost:8000/v1}
|
||||
max_tokens: ${env.VLLM_MAX_TOKENS:4096}
|
||||
api_token: ${env.VLLM_API_TOKEN:fake}
|
||||
tls_verify: ${env.VLLM_TLS_VERIFY:true}
|
||||
url: ${env.VLLM_URL:=http://localhost:8000/v1}
|
||||
max_tokens: ${env.VLLM_MAX_TOKENS:=4096}
|
||||
api_token: ${env.VLLM_API_TOKEN:=fake}
|
||||
tls_verify: ${env.VLLM_TLS_VERIFY:=true}
|
||||
- provider_id: vllm-safety
|
||||
provider_type: remote::vllm
|
||||
config:
|
||||
url: ${env.VLLM_SAFETY_URL:http://localhost:8000/v1}
|
||||
max_tokens: ${env.VLLM_MAX_TOKENS:4096}
|
||||
api_token: ${env.VLLM_API_TOKEN:fake}
|
||||
tls_verify: ${env.VLLM_TLS_VERIFY:true}
|
||||
url: ${env.VLLM_SAFETY_URL:=http://localhost:8000/v1}
|
||||
max_tokens: ${env.VLLM_MAX_TOKENS:=4096}
|
||||
api_token: ${env.VLLM_API_TOKEN:=fake}
|
||||
tls_verify: ${env.VLLM_TLS_VERIFY:=true}
|
||||
- provider_id: sentence-transformers
|
||||
provider_type: inline::sentence-transformers
|
||||
config: {}
|
||||
vector_io:
|
||||
- provider_id: ${env.ENABLE_CHROMADB+chromadb}
|
||||
- provider_id: ${env.ENABLE_CHROMADB:+chromadb}
|
||||
provider_type: remote::chromadb
|
||||
config:
|
||||
url: ${env.CHROMADB_URL:}
|
||||
url: ${env.CHROMADB_URL:+}
|
||||
safety:
|
||||
- provider_id: llama-guard
|
||||
provider_type: inline::llama-guard
|
||||
|
@ -42,34 +42,34 @@ providers:
|
|||
config:
|
||||
persistence_store:
|
||||
type: postgres
|
||||
host: ${env.POSTGRES_HOST:localhost}
|
||||
port: ${env.POSTGRES_PORT:5432}
|
||||
db: ${env.POSTGRES_DB:llamastack}
|
||||
user: ${env.POSTGRES_USER:llamastack}
|
||||
password: ${env.POSTGRES_PASSWORD:llamastack}
|
||||
host: ${env.POSTGRES_HOST:=localhost}
|
||||
port: ${env.POSTGRES_PORT:=5432}
|
||||
db: ${env.POSTGRES_DB:=llamastack}
|
||||
user: ${env.POSTGRES_USER:=llamastack}
|
||||
password: ${env.POSTGRES_PASSWORD:=llamastack}
|
||||
responses_store:
|
||||
type: postgres
|
||||
host: ${env.POSTGRES_HOST:localhost}
|
||||
port: ${env.POSTGRES_PORT:5432}
|
||||
db: ${env.POSTGRES_DB:llamastack}
|
||||
user: ${env.POSTGRES_USER:llamastack}
|
||||
password: ${env.POSTGRES_PASSWORD:llamastack}
|
||||
host: ${env.POSTGRES_HOST:=localhost}
|
||||
port: ${env.POSTGRES_PORT:=5432}
|
||||
db: ${env.POSTGRES_DB:=llamastack}
|
||||
user: ${env.POSTGRES_USER:=llamastack}
|
||||
password: ${env.POSTGRES_PASSWORD:=llamastack}
|
||||
telemetry:
|
||||
- provider_id: meta-reference
|
||||
provider_type: inline::meta-reference
|
||||
config:
|
||||
service_name: ${env.OTEL_SERVICE_NAME:}
|
||||
sinks: ${env.TELEMETRY_SINKS:console}
|
||||
service_name: ${env.OTEL_SERVICE_NAME:+console}
|
||||
sinks: ${env.TELEMETRY_SINKS:+console}
|
||||
tool_runtime:
|
||||
- provider_id: brave-search
|
||||
provider_type: remote::brave-search
|
||||
config:
|
||||
api_key: ${env.BRAVE_SEARCH_API_KEY:}
|
||||
api_key: ${env.BRAVE_SEARCH_API_KEY:+}
|
||||
max_results: 3
|
||||
- provider_id: tavily-search
|
||||
provider_type: remote::tavily-search
|
||||
config:
|
||||
api_key: ${env.TAVILY_SEARCH_API_KEY:}
|
||||
api_key: ${env.TAVILY_SEARCH_API_KEY:+}
|
||||
max_results: 3
|
||||
- provider_id: rag-runtime
|
||||
provider_type: inline::rag-runtime
|
||||
|
@ -79,19 +79,19 @@ providers:
|
|||
config: {}
|
||||
metadata_store:
|
||||
type: postgres
|
||||
host: ${env.POSTGRES_HOST:localhost}
|
||||
port: ${env.POSTGRES_PORT:5432}
|
||||
db: ${env.POSTGRES_DB:llamastack}
|
||||
user: ${env.POSTGRES_USER:llamastack}
|
||||
password: ${env.POSTGRES_PASSWORD:llamastack}
|
||||
host: ${env.POSTGRES_HOST:=localhost}
|
||||
port: ${env.POSTGRES_PORT:=5432}
|
||||
db: ${env.POSTGRES_DB:=llamastack}
|
||||
user: ${env.POSTGRES_USER:=llamastack}
|
||||
password: ${env.POSTGRES_PASSWORD:=llamastack}
|
||||
table_name: llamastack_kvstore
|
||||
inference_store:
|
||||
type: postgres
|
||||
host: ${env.POSTGRES_HOST:localhost}
|
||||
port: ${env.POSTGRES_PORT:5432}
|
||||
db: ${env.POSTGRES_DB:llamastack}
|
||||
user: ${env.POSTGRES_USER:llamastack}
|
||||
password: ${env.POSTGRES_PASSWORD:llamastack}
|
||||
host: ${env.POSTGRES_HOST:=localhost}
|
||||
port: ${env.POSTGRES_PORT:=5432}
|
||||
db: ${env.POSTGRES_DB:=llamastack}
|
||||
user: ${env.POSTGRES_USER:=llamastack}
|
||||
password: ${env.POSTGRES_PASSWORD:=llamastack}
|
||||
models:
|
||||
- metadata:
|
||||
embedding_dimension: 384
|
||||
|
@ -103,11 +103,11 @@ models:
|
|||
provider_id: vllm-inference
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: ${env.SAFETY_MODEL:meta-llama/Llama-Guard-3-1B}
|
||||
model_id: ${env.SAFETY_MODEL:=meta-llama/Llama-Guard-3-1B}
|
||||
provider_id: vllm-safety
|
||||
model_type: llm
|
||||
shields:
|
||||
- shield_id: ${env.SAFETY_MODEL:meta-llama/Llama-Guard-3-1B}
|
||||
- shield_id: ${env.SAFETY_MODEL:=meta-llama/Llama-Guard-3-1B}
|
||||
vector_dbs: []
|
||||
datasets: []
|
||||
scoring_fns: []
|
||||
|
|
|
@ -4,4 +4,4 @@
|
|||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
|
||||
from .agents import * # noqa: F401 F403
|
||||
from .agents import *
|
||||
|
|
|
@ -44,10 +44,55 @@ OpenAIResponseInputMessageContent = Annotated[
|
|||
register_schema(OpenAIResponseInputMessageContent, name="OpenAIResponseInputMessageContent")
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class OpenAIResponseAnnotationFileCitation(BaseModel):
|
||||
type: Literal["file_citation"] = "file_citation"
|
||||
file_id: str
|
||||
filename: str
|
||||
index: int
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class OpenAIResponseAnnotationCitation(BaseModel):
|
||||
type: Literal["url_citation"] = "url_citation"
|
||||
end_index: int
|
||||
start_index: int
|
||||
title: str
|
||||
url: str
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class OpenAIResponseAnnotationContainerFileCitation(BaseModel):
|
||||
type: Literal["container_file_citation"] = "container_file_citation"
|
||||
container_id: str
|
||||
end_index: int
|
||||
file_id: str
|
||||
filename: str
|
||||
start_index: int
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class OpenAIResponseAnnotationFilePath(BaseModel):
|
||||
type: Literal["file_path"] = "file_path"
|
||||
file_id: str
|
||||
index: int
|
||||
|
||||
|
||||
OpenAIResponseAnnotations = Annotated[
|
||||
OpenAIResponseAnnotationFileCitation
|
||||
| OpenAIResponseAnnotationCitation
|
||||
| OpenAIResponseAnnotationContainerFileCitation
|
||||
| OpenAIResponseAnnotationFilePath,
|
||||
Field(discriminator="type"),
|
||||
]
|
||||
register_schema(OpenAIResponseAnnotations, name="OpenAIResponseAnnotations")
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class OpenAIResponseOutputMessageContentOutputText(BaseModel):
|
||||
text: str
|
||||
type: Literal["output_text"] = "output_text"
|
||||
annotations: list[OpenAIResponseAnnotations] = Field(default_factory=list)
|
||||
|
||||
|
||||
OpenAIResponseOutputMessageContent = Annotated[
|
||||
|
@ -384,9 +429,16 @@ OpenAIResponseInput = Annotated[
|
|||
register_schema(OpenAIResponseInput, name="OpenAIResponseInput")
|
||||
|
||||
|
||||
# Must match type Literals of OpenAIResponseInputToolWebSearch below
|
||||
WebSearchToolTypes = ["web_search", "web_search_preview", "web_search_preview_2025_03_11"]
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class OpenAIResponseInputToolWebSearch(BaseModel):
|
||||
type: Literal["web_search"] | Literal["web_search_preview_2025_03_11"] = "web_search"
|
||||
# Must match values of WebSearchToolTypes above
|
||||
type: Literal["web_search"] | Literal["web_search_preview"] | Literal["web_search_preview_2025_03_11"] = (
|
||||
"web_search"
|
||||
)
|
||||
# TODO: actually use search_context_size somewhere...
|
||||
search_context_size: str | None = Field(default="medium", pattern="^low|medium|high$")
|
||||
# TODO: add user_location
|
||||
|
|
|
@ -4,4 +4,4 @@
|
|||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
|
||||
from .batch_inference import * # noqa: F401 F403
|
||||
from .batch_inference import *
|
||||
|
|
|
@ -4,4 +4,4 @@
|
|||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
|
||||
from .benchmarks import * # noqa: F401 F403
|
||||
from .benchmarks import *
|
||||
|
|
|
@ -4,4 +4,4 @@
|
|||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
|
||||
from .datasetio import * # noqa: F401 F403
|
||||
from .datasetio import *
|
||||
|
|
|
@ -4,4 +4,4 @@
|
|||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
|
||||
from .datasets import * # noqa: F401 F403
|
||||
from .datasets import *
|
||||
|
|
|
@ -4,7 +4,7 @@
|
|||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
|
||||
from enum import Enum
|
||||
from enum import Enum, StrEnum
|
||||
from typing import Annotated, Any, Literal, Protocol
|
||||
|
||||
from pydantic import BaseModel, Field
|
||||
|
@ -13,7 +13,7 @@ from llama_stack.apis.resource import Resource, ResourceType
|
|||
from llama_stack.schema_utils import json_schema_type, register_schema, webmethod
|
||||
|
||||
|
||||
class DatasetPurpose(str, Enum):
|
||||
class DatasetPurpose(StrEnum):
|
||||
"""
|
||||
Purpose of the dataset. Each purpose has a required input data schema.
|
||||
|
||||
|
|
|
@ -4,4 +4,4 @@
|
|||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
|
||||
from .eval import * # noqa: F401 F403
|
||||
from .eval import *
|
||||
|
|
|
@ -4,4 +4,4 @@
|
|||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
|
||||
from .files import * # noqa: F401 F403
|
||||
from .files import *
|
||||
|
|
|
@ -4,7 +4,7 @@
|
|||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
|
||||
from enum import Enum
|
||||
from enum import StrEnum
|
||||
from typing import Annotated, Literal, Protocol, runtime_checkable
|
||||
|
||||
from fastapi import File, Form, Response, UploadFile
|
||||
|
@ -16,7 +16,7 @@ from llama_stack.schema_utils import json_schema_type, webmethod
|
|||
|
||||
|
||||
# OpenAI Files API Models
|
||||
class OpenAIFilePurpose(str, Enum):
|
||||
class OpenAIFilePurpose(StrEnum):
|
||||
"""
|
||||
Valid purpose values for OpenAI Files API.
|
||||
"""
|
||||
|
|
|
@ -4,4 +4,4 @@
|
|||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
|
||||
from .inference import * # noqa: F401 F403
|
||||
from .inference import *
|
||||
|
|
|
@ -20,7 +20,7 @@ from typing_extensions import TypedDict
|
|||
from llama_stack.apis.common.content_types import ContentDelta, InterleavedContent, InterleavedContentItem
|
||||
from llama_stack.apis.common.responses import Order
|
||||
from llama_stack.apis.models import Model
|
||||
from llama_stack.apis.telemetry.telemetry import MetricResponseMixin
|
||||
from llama_stack.apis.telemetry import MetricResponseMixin
|
||||
from llama_stack.models.llama.datatypes import (
|
||||
BuiltinTool,
|
||||
StopReason,
|
||||
|
|
|
@ -4,4 +4,4 @@
|
|||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
|
||||
from .inspect import * # noqa: F401 F403
|
||||
from .inspect import *
|
||||
|
|
|
@ -4,4 +4,4 @@
|
|||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
|
||||
from .models import * # noqa: F401 F403
|
||||
from .models import *
|
||||
|
|
|
@ -4,7 +4,7 @@
|
|||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
|
||||
from enum import Enum
|
||||
from enum import StrEnum
|
||||
from typing import Any, Literal, Protocol, runtime_checkable
|
||||
|
||||
from pydantic import BaseModel, ConfigDict, Field
|
||||
|
@ -22,7 +22,7 @@ class CommonModelFields(BaseModel):
|
|||
|
||||
|
||||
@json_schema_type
|
||||
class ModelType(str, Enum):
|
||||
class ModelType(StrEnum):
|
||||
llm = "llm"
|
||||
embedding = "embedding"
|
||||
|
||||
|
|
|
@ -4,4 +4,4 @@
|
|||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
|
||||
from .post_training import * # noqa: F401 F403
|
||||
from .post_training import *
|
||||
|
|
|
@ -4,4 +4,4 @@
|
|||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
|
||||
from .providers import * # noqa: F401 F403
|
||||
from .providers import *
|
||||
|
|
|
@ -4,4 +4,4 @@
|
|||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
|
||||
from .safety import * # noqa: F401 F403
|
||||
from .safety import *
|
||||
|
|
|
@ -4,4 +4,4 @@
|
|||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
|
||||
from .scoring import * # noqa: F401 F403
|
||||
from .scoring import *
|
||||
|
|
|
@ -4,4 +4,4 @@
|
|||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
|
||||
from .scoring_functions import * # noqa: F401 F403
|
||||
from .scoring_functions import *
|
||||
|
|
|
@ -4,4 +4,4 @@
|
|||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
|
||||
from .shields import * # noqa: F401 F403
|
||||
from .shields import *
|
||||
|
|
|
@ -4,4 +4,4 @@
|
|||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
|
||||
from .synthetic_data_generation import * # noqa: F401 F403
|
||||
from .synthetic_data_generation import *
|
||||
|
|
|
@ -4,4 +4,4 @@
|
|||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
|
||||
from .telemetry import * # noqa: F401 F403
|
||||
from .telemetry import *
|
||||
|
|
|
@ -4,5 +4,5 @@
|
|||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
|
||||
from .rag_tool import * # noqa: F401 F403
|
||||
from .tools import * # noqa: F401 F403
|
||||
from .rag_tool import *
|
||||
from .tools import *
|
||||
|
|
|
@ -4,4 +4,4 @@
|
|||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
|
||||
from .vector_dbs import * # noqa: F401 F403
|
||||
from .vector_dbs import *
|
||||
|
|
|
@ -4,4 +4,4 @@
|
|||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
|
||||
from .vector_io import * # noqa: F401 F403
|
||||
from .vector_io import *
|
||||
|
|
|
@ -4,7 +4,7 @@
|
|||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
|
||||
from enum import Enum
|
||||
from enum import StrEnum
|
||||
from typing import Self
|
||||
|
||||
from pydantic import BaseModel, model_validator
|
||||
|
@ -12,7 +12,7 @@ from pydantic import BaseModel, model_validator
|
|||
from .conditions import parse_conditions
|
||||
|
||||
|
||||
class Action(str, Enum):
|
||||
class Action(StrEnum):
|
||||
CREATE = "create"
|
||||
READ = "read"
|
||||
UPDATE = "update"
|
||||
|
|
|
@ -4,7 +4,7 @@
|
|||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
|
||||
from enum import Enum
|
||||
from enum import StrEnum
|
||||
from pathlib import Path
|
||||
from typing import Annotated, Any
|
||||
|
||||
|
@ -29,8 +29,8 @@ from llama_stack.providers.datatypes import Api, ProviderSpec
|
|||
from llama_stack.providers.utils.kvstore.config import KVStoreConfig, SqliteKVStoreConfig
|
||||
from llama_stack.providers.utils.sqlstore.sqlstore import SqlStoreConfig
|
||||
|
||||
LLAMA_STACK_BUILD_CONFIG_VERSION = "2"
|
||||
LLAMA_STACK_RUN_CONFIG_VERSION = "2"
|
||||
LLAMA_STACK_BUILD_CONFIG_VERSION = 2
|
||||
LLAMA_STACK_RUN_CONFIG_VERSION = 2
|
||||
|
||||
|
||||
RoutingKey = str | list[str]
|
||||
|
@ -159,7 +159,7 @@ class LoggingConfig(BaseModel):
|
|||
)
|
||||
|
||||
|
||||
class AuthProviderType(str, Enum):
|
||||
class AuthProviderType(StrEnum):
|
||||
"""Supported authentication provider types."""
|
||||
|
||||
OAUTH2_TOKEN = "oauth2_token"
|
||||
|
@ -182,7 +182,7 @@ class AuthenticationRequiredError(Exception):
|
|||
pass
|
||||
|
||||
|
||||
class QuotaPeriod(str, Enum):
|
||||
class QuotaPeriod(StrEnum):
|
||||
DAY = "day"
|
||||
|
||||
|
||||
|
@ -229,7 +229,7 @@ class ServerConfig(BaseModel):
|
|||
|
||||
|
||||
class StackRunConfig(BaseModel):
|
||||
version: str = LLAMA_STACK_RUN_CONFIG_VERSION
|
||||
version: int = LLAMA_STACK_RUN_CONFIG_VERSION
|
||||
|
||||
image_name: str = Field(
|
||||
...,
|
||||
|
@ -300,7 +300,7 @@ a default SQLite store will be used.""",
|
|||
|
||||
|
||||
class BuildConfig(BaseModel):
|
||||
version: str = LLAMA_STACK_BUILD_CONFIG_VERSION
|
||||
version: int = LLAMA_STACK_BUILD_CONFIG_VERSION
|
||||
|
||||
distribution_spec: DistributionSpec = Field(description="The distribution spec to build including API providers. ")
|
||||
image_type: str = Field(
|
||||
|
|
|
@ -30,7 +30,13 @@ from llama_stack.apis.inference import (
|
|||
ListOpenAIChatCompletionResponse,
|
||||
LogProbConfig,
|
||||
Message,
|
||||
OpenAIChatCompletion,
|
||||
OpenAIChatCompletionChunk,
|
||||
OpenAICompletion,
|
||||
OpenAICompletionWithInputMessages,
|
||||
OpenAIEmbeddingsResponse,
|
||||
OpenAIMessageParam,
|
||||
OpenAIResponseFormatParam,
|
||||
Order,
|
||||
ResponseFormat,
|
||||
SamplingParams,
|
||||
|
@ -41,14 +47,6 @@ from llama_stack.apis.inference import (
|
|||
ToolDefinition,
|
||||
ToolPromptFormat,
|
||||
)
|
||||
from llama_stack.apis.inference.inference import (
|
||||
OpenAIChatCompletion,
|
||||
OpenAIChatCompletionChunk,
|
||||
OpenAICompletion,
|
||||
OpenAIEmbeddingsResponse,
|
||||
OpenAIMessageParam,
|
||||
OpenAIResponseFormatParam,
|
||||
)
|
||||
from llama_stack.apis.models import Model, ModelType
|
||||
from llama_stack.apis.telemetry import MetricEvent, MetricInResponse, Telemetry
|
||||
from llama_stack.log import get_logger
|
||||
|
|
|
@ -16,17 +16,15 @@ from llama_stack.apis.vector_io import (
|
|||
QueryChunksResponse,
|
||||
SearchRankingOptions,
|
||||
VectorIO,
|
||||
VectorStoreDeleteResponse,
|
||||
VectorStoreListResponse,
|
||||
VectorStoreObject,
|
||||
VectorStoreSearchResponsePage,
|
||||
)
|
||||
from llama_stack.apis.vector_io.vector_io import (
|
||||
VectorStoreChunkingStrategy,
|
||||
VectorStoreDeleteResponse,
|
||||
VectorStoreFileContentsResponse,
|
||||
VectorStoreFileDeleteResponse,
|
||||
VectorStoreFileObject,
|
||||
VectorStoreFileStatus,
|
||||
VectorStoreListResponse,
|
||||
VectorStoreObject,
|
||||
VectorStoreSearchResponsePage,
|
||||
)
|
||||
from llama_stack.log import get_logger
|
||||
from llama_stack.providers.datatypes import HealthResponse, HealthStatus, RoutingTable
|
||||
|
|
|
@ -127,7 +127,12 @@ class EnvVarError(Exception):
|
|||
def __init__(self, var_name: str, path: str = ""):
|
||||
self.var_name = var_name
|
||||
self.path = path
|
||||
super().__init__(f"Environment variable '{var_name}' not set or empty{f' at {path}' if path else ''}")
|
||||
super().__init__(
|
||||
f"Environment variable '{var_name}' not set or empty {f'at {path}' if path else ''}. "
|
||||
f"Use ${{env.{var_name}:=default_value}} to provide a default value, "
|
||||
f"${{env.{var_name}:+value_if_set}} to make the field conditional, "
|
||||
f"or ensure the environment variable is set."
|
||||
)
|
||||
|
||||
|
||||
def replace_env_vars(config: Any, path: str = "") -> Any:
|
||||
|
@ -150,25 +155,27 @@ def replace_env_vars(config: Any, path: str = "") -> Any:
|
|||
return result
|
||||
|
||||
elif isinstance(config, str):
|
||||
# Updated pattern to support both default values (:) and conditional values (+)
|
||||
pattern = r"\${env\.([A-Z0-9_]+)(?:([:\+])([^}]*))?}"
|
||||
# Pattern supports bash-like syntax: := for default and :+ for conditional and a optional value
|
||||
pattern = r"\${env\.([A-Z0-9_]+)(?::([=+])([^}]*))?}"
|
||||
|
||||
def get_env_var(match):
|
||||
def get_env_var(match: re.Match):
|
||||
env_var = match.group(1)
|
||||
operator = match.group(2) # ':' for default, '+' for conditional
|
||||
operator = match.group(2) # '=' for default, '+' for conditional
|
||||
value_expr = match.group(3)
|
||||
|
||||
env_value = os.environ.get(env_var)
|
||||
|
||||
if operator == ":": # Default value syntax: ${env.FOO:default}
|
||||
if operator == "=": # Default value syntax: ${env.FOO:=default}
|
||||
if not env_value:
|
||||
if value_expr is None:
|
||||
# value_expr returns empty string (not None) when not matched
|
||||
# This means ${env.FOO:=} is an error
|
||||
if value_expr == "":
|
||||
raise EnvVarError(env_var, path)
|
||||
else:
|
||||
value = value_expr
|
||||
else:
|
||||
value = env_value
|
||||
elif operator == "+": # Conditional value syntax: ${env.FOO+value_if_set}
|
||||
elif operator == "+": # Conditional value syntax: ${env.FOO:+value_if_set}
|
||||
if env_value:
|
||||
value = value_expr
|
||||
else:
|
||||
|
@ -183,13 +190,42 @@ def replace_env_vars(config: Any, path: str = "") -> Any:
|
|||
return os.path.expanduser(value)
|
||||
|
||||
try:
|
||||
return re.sub(pattern, get_env_var, config)
|
||||
result = re.sub(pattern, get_env_var, config)
|
||||
return _convert_string_to_proper_type(result)
|
||||
except EnvVarError as e:
|
||||
raise EnvVarError(e.var_name, e.path) from None
|
||||
|
||||
return config
|
||||
|
||||
|
||||
def _convert_string_to_proper_type(value: str) -> Any:
|
||||
# This might be tricky depending on what the config type is, if 'str | None' we are
|
||||
# good, if 'str' we need to keep the empty string... 'str | None' is more common and
|
||||
# providers config should be typed this way.
|
||||
# TODO: we could try to load the config class and see if the config has a field with type 'str | None'
|
||||
# and then convert the empty string to None or not
|
||||
if value == "":
|
||||
return None
|
||||
|
||||
lowered = value.lower()
|
||||
if lowered == "true":
|
||||
return True
|
||||
elif lowered == "false":
|
||||
return False
|
||||
|
||||
try:
|
||||
return int(value)
|
||||
except ValueError:
|
||||
pass
|
||||
|
||||
try:
|
||||
return float(value)
|
||||
except ValueError:
|
||||
pass
|
||||
|
||||
return value
|
||||
|
||||
|
||||
def validate_env_pair(env_pair: str) -> tuple[str, str]:
|
||||
"""Validate and split an environment variable key-value pair."""
|
||||
try:
|
||||
|
|
|
@ -25,7 +25,7 @@ class LlamaStackApi:
|
|||
def run_scoring(self, row, scoring_function_ids: list[str], scoring_params: dict | None):
|
||||
"""Run scoring on a single row"""
|
||||
if not scoring_params:
|
||||
scoring_params = {fn_id: None for fn_id in scoring_function_ids}
|
||||
scoring_params = dict.fromkeys(scoring_function_ids)
|
||||
return self.client.scoring.score(input_rows=[row], scoring_functions=scoring_params)
|
||||
|
||||
|
||||
|
|
|
@ -33,7 +33,7 @@ CATEGORIES = [
|
|||
]
|
||||
|
||||
# Initialize category levels with default level
|
||||
_category_levels: dict[str, int] = {category: DEFAULT_LOG_LEVEL for category in CATEGORIES}
|
||||
_category_levels: dict[str, int] = dict.fromkeys(CATEGORIES, DEFAULT_LOG_LEVEL)
|
||||
|
||||
|
||||
def config_to_category_levels(category: str, level: str):
|
||||
|
|
|
@ -5,7 +5,7 @@
|
|||
# the root directory of this source tree.
|
||||
|
||||
import base64
|
||||
from enum import Enum
|
||||
from enum import Enum, StrEnum
|
||||
from io import BytesIO
|
||||
from typing import Annotated, Any, Literal
|
||||
|
||||
|
@ -171,7 +171,7 @@ class GenerationResult(BaseModel):
|
|||
ignore_token: bool
|
||||
|
||||
|
||||
class QuantizationMode(str, Enum):
|
||||
class QuantizationMode(StrEnum):
|
||||
none = "none"
|
||||
fp8_mixed = "fp8_mixed"
|
||||
int4_mixed = "int4_mixed"
|
||||
|
|
|
@ -4,7 +4,7 @@
|
|||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
|
||||
from enum import Enum
|
||||
from enum import StrEnum
|
||||
from typing import Any, Protocol
|
||||
from urllib.parse import urlparse
|
||||
|
||||
|
@ -225,7 +225,7 @@ def remote_provider_spec(
|
|||
)
|
||||
|
||||
|
||||
class HealthStatus(str, Enum):
|
||||
class HealthStatus(StrEnum):
|
||||
OK = "OK"
|
||||
ERROR = "Error"
|
||||
NOT_IMPLEMENTED = "Not Implemented"
|
||||
|
|
|
@ -42,9 +42,10 @@ from llama_stack.apis.agents.openai_responses import (
|
|||
OpenAIResponseOutputMessageWebSearchToolCall,
|
||||
OpenAIResponseText,
|
||||
OpenAIResponseTextFormat,
|
||||
WebSearchToolTypes,
|
||||
)
|
||||
from llama_stack.apis.common.content_types import TextContentItem
|
||||
from llama_stack.apis.inference.inference import (
|
||||
from llama_stack.apis.inference import (
|
||||
Inference,
|
||||
OpenAIAssistantMessageParam,
|
||||
OpenAIChatCompletion,
|
||||
|
@ -583,7 +584,7 @@ class OpenAIResponsesImpl:
|
|||
from llama_stack.apis.agents.openai_responses import (
|
||||
MCPListToolsTool,
|
||||
)
|
||||
from llama_stack.apis.tools.tools import Tool
|
||||
from llama_stack.apis.tools import Tool
|
||||
|
||||
mcp_tool_to_server = {}
|
||||
|
||||
|
@ -609,7 +610,7 @@ class OpenAIResponsesImpl:
|
|||
# TODO: Handle other tool types
|
||||
if input_tool.type == "function":
|
||||
chat_tools.append(ChatCompletionToolParam(type="function", function=input_tool.model_dump()))
|
||||
elif input_tool.type == "web_search":
|
||||
elif input_tool.type in WebSearchToolTypes:
|
||||
tool_name = "web_search"
|
||||
tool = await self.tool_groups_api.get_tool(tool_name)
|
||||
if not tool:
|
||||
|
|
|
@ -208,7 +208,7 @@ class MetaReferenceEvalImpl(
|
|||
for scoring_fn_id in scoring_functions
|
||||
}
|
||||
else:
|
||||
scoring_functions_dict = {scoring_fn_id: None for scoring_fn_id in scoring_functions}
|
||||
scoring_functions_dict = dict.fromkeys(scoring_functions)
|
||||
|
||||
score_response = await self.scoring_api.score(
|
||||
input_rows=score_input_rows, scoring_functions=scoring_functions_dict
|
||||
|
|
|
@ -23,7 +23,7 @@ class LocalfsFilesImplConfig(BaseModel):
|
|||
@classmethod
|
||||
def sample_run_config(cls, __distro_dir__: str) -> dict[str, Any]:
|
||||
return {
|
||||
"storage_dir": "${env.FILES_STORAGE_DIR:" + __distro_dir__ + "/files}",
|
||||
"storage_dir": "${env.FILES_STORAGE_DIR:=" + __distro_dir__ + "/files}",
|
||||
"metadata_store": SqliteSqlStoreConfig.sample_run_config(
|
||||
__distro_dir__=__distro_dir__,
|
||||
db_name="files_metadata.db",
|
||||
|
|
|
@ -49,11 +49,11 @@ class MetaReferenceInferenceConfig(BaseModel):
|
|||
def sample_run_config(
|
||||
cls,
|
||||
model: str = "Llama3.2-3B-Instruct",
|
||||
checkpoint_dir: str = "${env.CHECKPOINT_DIR:null}",
|
||||
quantization_type: str = "${env.QUANTIZATION_TYPE:bf16}",
|
||||
model_parallel_size: str = "${env.MODEL_PARALLEL_SIZE:0}",
|
||||
max_batch_size: str = "${env.MAX_BATCH_SIZE:1}",
|
||||
max_seq_len: str = "${env.MAX_SEQ_LEN:4096}",
|
||||
checkpoint_dir: str = "${env.CHECKPOINT_DIR:=null}",
|
||||
quantization_type: str = "${env.QUANTIZATION_TYPE:=bf16}",
|
||||
model_parallel_size: str = "${env.MODEL_PARALLEL_SIZE:=0}",
|
||||
max_batch_size: str = "${env.MAX_BATCH_SIZE:=1}",
|
||||
max_seq_len: str = "${env.MAX_SEQ_LEN:=4096}",
|
||||
**kwargs,
|
||||
) -> dict[str, Any]:
|
||||
return {
|
||||
|
|
|
@ -44,10 +44,10 @@ class VLLMConfig(BaseModel):
|
|||
@classmethod
|
||||
def sample_run_config(cls, **kwargs: Any) -> dict[str, Any]:
|
||||
return {
|
||||
"tensor_parallel_size": "${env.TENSOR_PARALLEL_SIZE:1}",
|
||||
"max_tokens": "${env.MAX_TOKENS:4096}",
|
||||
"max_model_len": "${env.MAX_MODEL_LEN:4096}",
|
||||
"max_num_seqs": "${env.MAX_NUM_SEQS:4}",
|
||||
"enforce_eager": "${env.ENFORCE_EAGER:False}",
|
||||
"gpu_memory_utilization": "${env.GPU_MEMORY_UTILIZATION:0.3}",
|
||||
"tensor_parallel_size": "${env.TENSOR_PARALLEL_SIZE:=1}",
|
||||
"max_tokens": "${env.MAX_TOKENS:=4096}",
|
||||
"max_model_len": "${env.MAX_MODEL_LEN:=4096}",
|
||||
"max_num_seqs": "${env.MAX_NUM_SEQS:=4}",
|
||||
"enforce_eager": "${env.ENFORCE_EAGER:=False}",
|
||||
"gpu_memory_utilization": "${env.GPU_MEMORY_UTILIZATION:=0.3}",
|
||||
}
|
||||
|
|
|
@ -17,5 +17,5 @@ class BraintrustScoringConfig(BaseModel):
|
|||
@classmethod
|
||||
def sample_run_config(cls, **kwargs) -> dict[str, Any]:
|
||||
return {
|
||||
"openai_api_key": "${env.OPENAI_API_KEY:}",
|
||||
"openai_api_key": "${env.OPENAI_API_KEY:+}",
|
||||
}
|
||||
|
|
|
@ -7,7 +7,7 @@ from typing import Any
|
|||
|
||||
from llama_stack.apis.datasetio import DatasetIO
|
||||
from llama_stack.apis.datasets import Datasets
|
||||
from llama_stack.apis.inference.inference import Inference
|
||||
from llama_stack.apis.inference import Inference
|
||||
from llama_stack.apis.scoring import (
|
||||
ScoreBatchResponse,
|
||||
ScoreResponse,
|
||||
|
|
|
@ -6,7 +6,7 @@
|
|||
import re
|
||||
from typing import Any
|
||||
|
||||
from llama_stack.apis.inference.inference import Inference, UserMessage
|
||||
from llama_stack.apis.inference import Inference, UserMessage
|
||||
from llama_stack.apis.scoring import ScoringResultRow
|
||||
from llama_stack.apis.scoring_functions import ScoringFnParams
|
||||
from llama_stack.providers.utils.scoring.base_scoring_fn import RegisteredBaseScoringFn
|
||||
|
|
|
@ -4,7 +4,7 @@
|
|||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
|
||||
from enum import Enum
|
||||
from enum import StrEnum
|
||||
from typing import Any
|
||||
|
||||
from pydantic import BaseModel, Field, field_validator
|
||||
|
@ -12,7 +12,7 @@ from pydantic import BaseModel, Field, field_validator
|
|||
from llama_stack.distribution.utils.config_dirs import RUNTIME_BASE_DIR
|
||||
|
||||
|
||||
class TelemetrySink(str, Enum):
|
||||
class TelemetrySink(StrEnum):
|
||||
OTEL_TRACE = "otel_trace"
|
||||
OTEL_METRIC = "otel_metric"
|
||||
SQLITE = "sqlite"
|
||||
|
@ -20,12 +20,12 @@ class TelemetrySink(str, Enum):
|
|||
|
||||
|
||||
class TelemetryConfig(BaseModel):
|
||||
otel_trace_endpoint: str = Field(
|
||||
default="http://localhost:4318/v1/traces",
|
||||
otel_trace_endpoint: str | None = Field(
|
||||
default=None,
|
||||
description="The OpenTelemetry collector endpoint URL for traces",
|
||||
)
|
||||
otel_metric_endpoint: str = Field(
|
||||
default="http://localhost:4318/v1/metrics",
|
||||
otel_metric_endpoint: str | None = Field(
|
||||
default=None,
|
||||
description="The OpenTelemetry collector endpoint URL for metrics",
|
||||
)
|
||||
service_name: str = Field(
|
||||
|
@ -52,7 +52,7 @@ class TelemetryConfig(BaseModel):
|
|||
@classmethod
|
||||
def sample_run_config(cls, __distro_dir__: str, db_name: str = "trace_store.db") -> dict[str, Any]:
|
||||
return {
|
||||
"service_name": "${env.OTEL_SERVICE_NAME:\u200b}",
|
||||
"sinks": "${env.TELEMETRY_SINKS:console,sqlite}",
|
||||
"sqlite_db_path": "${env.SQLITE_STORE_DIR:" + __distro_dir__ + "}/" + db_name,
|
||||
"service_name": "${env.OTEL_SERVICE_NAME:=\u200b}",
|
||||
"sinks": "${env.TELEMETRY_SINKS:=console,sqlite}",
|
||||
"sqlite_db_path": "${env.SQLITE_STORE_DIR:=" + __distro_dir__ + "}/" + db_name,
|
||||
}
|
||||
|
|
|
@ -14,6 +14,8 @@ from opentelemetry.sdk.trace import SpanProcessor
|
|||
from opentelemetry.trace import Span
|
||||
from opentelemetry.trace.span import format_span_id, format_trace_id
|
||||
|
||||
from llama_stack.providers.utils.telemetry.tracing import LOCAL_ROOT_SPAN_MARKER
|
||||
|
||||
|
||||
class SQLiteSpanProcessor(SpanProcessor):
|
||||
def __init__(self, conn_string):
|
||||
|
@ -124,7 +126,7 @@ class SQLiteSpanProcessor(SpanProcessor):
|
|||
(
|
||||
trace_id,
|
||||
service_name,
|
||||
(span_id if span.attributes.get("__root_span__") == "true" else None),
|
||||
(span_id if span.attributes.get(LOCAL_ROOT_SPAN_MARKER) else None),
|
||||
datetime.fromtimestamp(span.start_time / 1e9, UTC).isoformat(),
|
||||
datetime.fromtimestamp(span.end_time / 1e9, UTC).isoformat(),
|
||||
),
|
||||
|
|
|
@ -87,12 +87,16 @@ class TelemetryAdapter(TelemetryDatasetMixin, Telemetry):
|
|||
trace.set_tracer_provider(provider)
|
||||
_TRACER_PROVIDER = provider
|
||||
if TelemetrySink.OTEL_TRACE in self.config.sinks:
|
||||
if self.config.otel_trace_endpoint is None:
|
||||
raise ValueError("otel_trace_endpoint is required when OTEL_TRACE is enabled")
|
||||
span_exporter = OTLPSpanExporter(
|
||||
endpoint=self.config.otel_trace_endpoint,
|
||||
)
|
||||
span_processor = BatchSpanProcessor(span_exporter)
|
||||
trace.get_tracer_provider().add_span_processor(span_processor)
|
||||
if TelemetrySink.OTEL_METRIC in self.config.sinks:
|
||||
if self.config.otel_metric_endpoint is None:
|
||||
raise ValueError("otel_metric_endpoint is required when OTEL_METRIC is enabled")
|
||||
metric_reader = PeriodicExportingMetricReader(
|
||||
OTLPMetricExporter(
|
||||
endpoint=self.config.otel_metric_endpoint,
|
||||
|
|
|
@ -16,8 +16,7 @@ import numpy as np
|
|||
from numpy.typing import NDArray
|
||||
|
||||
from llama_stack.apis.files import Files
|
||||
from llama_stack.apis.inference import InterleavedContent
|
||||
from llama_stack.apis.inference.inference import Inference
|
||||
from llama_stack.apis.inference import Inference, InterleavedContent
|
||||
from llama_stack.apis.vector_dbs import VectorDB
|
||||
from llama_stack.apis.vector_io import (
|
||||
Chunk,
|
||||
|
|
|
@ -19,5 +19,5 @@ class QdrantVectorIOConfig(BaseModel):
|
|||
@classmethod
|
||||
def sample_run_config(cls, __distro_dir__: str) -> dict[str, Any]:
|
||||
return {
|
||||
"path": "${env.QDRANT_PATH:~/.llama/" + __distro_dir__ + "}/" + "qdrant.db",
|
||||
"path": "${env.QDRANT_PATH:=~/.llama/" + __distro_dir__ + "}/" + "qdrant.db",
|
||||
}
|
||||
|
|
|
@ -15,5 +15,5 @@ class SQLiteVectorIOConfig(BaseModel):
|
|||
@classmethod
|
||||
def sample_run_config(cls, __distro_dir__: str) -> dict[str, Any]:
|
||||
return {
|
||||
"db_path": "${env.SQLITE_STORE_DIR:" + __distro_dir__ + "}/" + "sqlite_vec.db",
|
||||
"db_path": "${env.SQLITE_STORE_DIR:=" + __distro_dir__ + "}/" + "sqlite_vec.db",
|
||||
}
|
||||
|
|
|
@ -15,8 +15,8 @@ import numpy as np
|
|||
import sqlite_vec
|
||||
from numpy.typing import NDArray
|
||||
|
||||
from llama_stack.apis.files.files import Files
|
||||
from llama_stack.apis.inference.inference import Inference
|
||||
from llama_stack.apis.files import Files
|
||||
from llama_stack.apis.inference import Inference
|
||||
from llama_stack.apis.vector_dbs import VectorDB
|
||||
from llama_stack.apis.vector_io import (
|
||||
Chunk,
|
||||
|
@ -64,7 +64,7 @@ def _normalize_scores(scores: dict[str, float]) -> dict[str, float]:
|
|||
score_range = max_score - min_score
|
||||
if score_range > 0:
|
||||
return {doc_id: (score - min_score) / score_range for doc_id, score in scores.items()}
|
||||
return {doc_id: 1.0 for doc_id in scores}
|
||||
return dict.fromkeys(scores, 1.0)
|
||||
|
||||
|
||||
def _weighted_rerank(
|
||||
|
|
|
@ -70,7 +70,7 @@ def available_providers() -> list[ProviderSpec]:
|
|||
api=Api.inference,
|
||||
adapter=AdapterSpec(
|
||||
adapter_type="ollama",
|
||||
pip_packages=["ollama", "aiohttp"],
|
||||
pip_packages=["ollama", "aiohttp", "h11>=0.16.0"],
|
||||
config_class="llama_stack.providers.remote.inference.ollama.OllamaImplConfig",
|
||||
module="llama_stack.providers.remote.inference.ollama",
|
||||
),
|
||||
|
|
|
@ -67,7 +67,7 @@ def available_providers() -> list[ProviderSpec]:
|
|||
api=Api.safety,
|
||||
adapter=AdapterSpec(
|
||||
adapter_type="sambanova",
|
||||
pip_packages=["litellm"],
|
||||
pip_packages=["litellm", "requests"],
|
||||
module="llama_stack.providers.remote.safety.sambanova",
|
||||
config_class="llama_stack.providers.remote.safety.sambanova.SambaNovaSafetyConfig",
|
||||
provider_data_validator="llama_stack.providers.remote.safety.sambanova.config.SambaNovaProviderDataValidator",
|
||||
|
|
|
@ -13,7 +13,7 @@ def available_providers() -> list[ProviderSpec]:
|
|||
InlineProviderSpec(
|
||||
api=Api.scoring,
|
||||
provider_type="inline::basic",
|
||||
pip_packages=[],
|
||||
pip_packages=["requests"],
|
||||
module="llama_stack.providers.inline.scoring.basic",
|
||||
config_class="llama_stack.providers.inline.scoring.basic.BasicScoringConfig",
|
||||
api_dependencies=[
|
||||
|
|
|
@ -54,8 +54,8 @@ class NvidiaDatasetIOConfig(BaseModel):
|
|||
@classmethod
|
||||
def sample_run_config(cls, **kwargs) -> dict[str, Any]:
|
||||
return {
|
||||
"api_key": "${env.NVIDIA_API_KEY:}",
|
||||
"dataset_namespace": "${env.NVIDIA_DATASET_NAMESPACE:default}",
|
||||
"project_id": "${env.NVIDIA_PROJECT_ID:test-project}",
|
||||
"datasets_url": "${env.NVIDIA_DATASETS_URL:http://nemo.test}",
|
||||
"api_key": "${env.NVIDIA_API_KEY:+}",
|
||||
"dataset_namespace": "${env.NVIDIA_DATASET_NAMESPACE:=default}",
|
||||
"project_id": "${env.NVIDIA_PROJECT_ID:=test-project}",
|
||||
"datasets_url": "${env.NVIDIA_DATASETS_URL:=http://nemo.test}",
|
||||
}
|
||||
|
|
|
@ -66,7 +66,7 @@ class NvidiaDatasetIOAdapter:
|
|||
Returns:
|
||||
Dataset
|
||||
"""
|
||||
## add warnings for unsupported params
|
||||
# add warnings for unsupported params
|
||||
request_body = {
|
||||
"name": dataset_def.identifier,
|
||||
"namespace": self.config.dataset_namespace,
|
||||
|
|
|
@ -25,5 +25,5 @@ class NVIDIAEvalConfig(BaseModel):
|
|||
@classmethod
|
||||
def sample_run_config(cls, **kwargs) -> dict[str, Any]:
|
||||
return {
|
||||
"evaluator_url": "${env.NVIDIA_EVALUATOR_URL:http://localhost:7331}",
|
||||
"evaluator_url": "${env.NVIDIA_EVALUATOR_URL:=http://localhost:7331}",
|
||||
}
|
||||
|
|
|
@ -4,7 +4,7 @@
|
|||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
|
||||
from llama_stack.apis.models.models import ModelType
|
||||
from llama_stack.apis.models import ModelType
|
||||
from llama_stack.providers.utils.inference.model_registry import (
|
||||
ProviderModelEntry,
|
||||
)
|
||||
|
|
|
@ -24,6 +24,12 @@ from llama_stack.apis.inference import (
|
|||
Inference,
|
||||
LogProbConfig,
|
||||
Message,
|
||||
OpenAIChatCompletion,
|
||||
OpenAIChatCompletionChunk,
|
||||
OpenAICompletion,
|
||||
OpenAIEmbeddingsResponse,
|
||||
OpenAIMessageParam,
|
||||
OpenAIResponseFormatParam,
|
||||
ResponseFormat,
|
||||
ResponseFormatType,
|
||||
SamplingParams,
|
||||
|
@ -33,14 +39,6 @@ from llama_stack.apis.inference import (
|
|||
ToolDefinition,
|
||||
ToolPromptFormat,
|
||||
)
|
||||
from llama_stack.apis.inference.inference import (
|
||||
OpenAIChatCompletion,
|
||||
OpenAIChatCompletionChunk,
|
||||
OpenAICompletion,
|
||||
OpenAIEmbeddingsResponse,
|
||||
OpenAIMessageParam,
|
||||
OpenAIResponseFormatParam,
|
||||
)
|
||||
from llama_stack.distribution.request_headers import NeedsRequestProviderData
|
||||
from llama_stack.log import get_logger
|
||||
from llama_stack.providers.utils.inference.model_registry import (
|
||||
|
|
|
@ -4,7 +4,7 @@
|
|||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
|
||||
from llama_stack.apis.models.models import ModelType
|
||||
from llama_stack.apis.models import ModelType
|
||||
from llama_stack.models.llama.sku_types import CoreModelId
|
||||
from llama_stack.providers.utils.inference.model_registry import (
|
||||
ProviderModelEntry,
|
||||
|
|
|
@ -4,7 +4,7 @@
|
|||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
|
||||
from llama_stack.apis.models.models import ModelType
|
||||
from llama_stack.apis.models import ModelType
|
||||
from llama_stack.providers.utils.inference.model_registry import (
|
||||
ProviderModelEntry,
|
||||
)
|
||||
|
|
|
@ -9,7 +9,7 @@ from typing import Any
|
|||
|
||||
from openai import AsyncOpenAI
|
||||
|
||||
from llama_stack.apis.inference.inference import (
|
||||
from llama_stack.apis.inference import (
|
||||
OpenAIChatCompletion,
|
||||
OpenAIChatCompletionChunk,
|
||||
OpenAIChoiceDelta,
|
||||
|
|
|
@ -55,7 +55,7 @@ class NVIDIAConfig(BaseModel):
|
|||
@classmethod
|
||||
def sample_run_config(cls, **kwargs) -> dict[str, Any]:
|
||||
return {
|
||||
"url": "${env.NVIDIA_BASE_URL:https://integrate.api.nvidia.com}",
|
||||
"api_key": "${env.NVIDIA_API_KEY:}",
|
||||
"append_api_version": "${env.NVIDIA_APPEND_API_VERSION:True}",
|
||||
"url": "${env.NVIDIA_BASE_URL:=https://integrate.api.nvidia.com}",
|
||||
"api_key": "${env.NVIDIA_API_KEY:+}",
|
||||
"append_api_version": "${env.NVIDIA_APPEND_API_VERSION:=True}",
|
||||
}
|
||||
|
|
|
@ -29,20 +29,18 @@ from llama_stack.apis.inference import (
|
|||
Inference,
|
||||
LogProbConfig,
|
||||
Message,
|
||||
OpenAIChatCompletion,
|
||||
OpenAIChatCompletionChunk,
|
||||
OpenAICompletion,
|
||||
OpenAIEmbeddingsResponse,
|
||||
OpenAIMessageParam,
|
||||
OpenAIResponseFormatParam,
|
||||
ResponseFormat,
|
||||
SamplingParams,
|
||||
TextTruncation,
|
||||
ToolChoice,
|
||||
ToolConfig,
|
||||
)
|
||||
from llama_stack.apis.inference.inference import (
|
||||
OpenAIChatCompletion,
|
||||
OpenAIChatCompletionChunk,
|
||||
OpenAICompletion,
|
||||
OpenAIMessageParam,
|
||||
OpenAIResponseFormatParam,
|
||||
)
|
||||
from llama_stack.apis.models import Model, ModelType
|
||||
from llama_stack.models.llama.datatypes import ToolDefinition, ToolPromptFormat
|
||||
from llama_stack.providers.utils.inference import (
|
||||
|
|
|
@ -17,7 +17,7 @@ class OllamaImplConfig(BaseModel):
|
|||
|
||||
@classmethod
|
||||
def sample_run_config(
|
||||
cls, url: str = "${env.OLLAMA_URL:http://localhost:11434}", raise_on_connect_error: bool = True, **kwargs
|
||||
cls, url: str = "${env.OLLAMA_URL:=http://localhost:11434}", raise_on_connect_error: bool = True, **kwargs
|
||||
) -> dict[str, Any]:
|
||||
return {
|
||||
"url": url,
|
||||
|
|
|
@ -4,7 +4,7 @@
|
|||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
|
||||
from llama_stack.apis.models.models import ModelType
|
||||
from llama_stack.apis.models import ModelType
|
||||
from llama_stack.models.llama.sku_types import CoreModelId
|
||||
from llama_stack.providers.utils.inference.model_registry import (
|
||||
ProviderModelEntry,
|
||||
|
|
|
@ -32,15 +32,6 @@ from llama_stack.apis.inference import (
|
|||
JsonSchemaResponseFormat,
|
||||
LogProbConfig,
|
||||
Message,
|
||||
ResponseFormat,
|
||||
SamplingParams,
|
||||
TextTruncation,
|
||||
ToolChoice,
|
||||
ToolConfig,
|
||||
ToolDefinition,
|
||||
ToolPromptFormat,
|
||||
)
|
||||
from llama_stack.apis.inference.inference import (
|
||||
OpenAIChatCompletion,
|
||||
OpenAIChatCompletionChunk,
|
||||
OpenAICompletion,
|
||||
|
@ -48,6 +39,13 @@ from llama_stack.apis.inference.inference import (
|
|||
OpenAIEmbeddingUsage,
|
||||
OpenAIMessageParam,
|
||||
OpenAIResponseFormatParam,
|
||||
ResponseFormat,
|
||||
SamplingParams,
|
||||
TextTruncation,
|
||||
ToolChoice,
|
||||
ToolConfig,
|
||||
ToolDefinition,
|
||||
ToolPromptFormat,
|
||||
)
|
||||
from llama_stack.apis.models import Model, ModelType
|
||||
from llama_stack.log import get_logger
|
||||
|
|
|
@ -6,7 +6,7 @@
|
|||
|
||||
from dataclasses import dataclass
|
||||
|
||||
from llama_stack.apis.models.models import ModelType
|
||||
from llama_stack.apis.models import ModelType
|
||||
from llama_stack.providers.utils.inference.model_registry import (
|
||||
ProviderModelEntry,
|
||||
)
|
||||
|
|
|
@ -10,7 +10,7 @@ from typing import Any
|
|||
|
||||
from openai import AsyncOpenAI
|
||||
|
||||
from llama_stack.apis.inference.inference import (
|
||||
from llama_stack.apis.inference import (
|
||||
OpenAIChatCompletion,
|
||||
OpenAIChatCompletionChunk,
|
||||
OpenAICompletion,
|
||||
|
|
|
@ -19,7 +19,12 @@ from llama_stack.apis.inference import (
|
|||
Inference,
|
||||
LogProbConfig,
|
||||
Message,
|
||||
OpenAIChatCompletion,
|
||||
OpenAIChatCompletionChunk,
|
||||
OpenAICompletion,
|
||||
OpenAIEmbeddingsResponse,
|
||||
OpenAIMessageParam,
|
||||
OpenAIResponseFormatParam,
|
||||
ResponseFormat,
|
||||
SamplingParams,
|
||||
TextTruncation,
|
||||
|
@ -28,13 +33,6 @@ from llama_stack.apis.inference import (
|
|||
ToolDefinition,
|
||||
ToolPromptFormat,
|
||||
)
|
||||
from llama_stack.apis.inference.inference import (
|
||||
OpenAIChatCompletion,
|
||||
OpenAIChatCompletionChunk,
|
||||
OpenAICompletion,
|
||||
OpenAIMessageParam,
|
||||
OpenAIResponseFormatParam,
|
||||
)
|
||||
from llama_stack.apis.models import Model
|
||||
from llama_stack.distribution.library_client import convert_pydantic_to_json_value, convert_to_pydantic
|
||||
from llama_stack.providers.utils.inference.model_registry import ModelRegistryHelper
|
||||
|
|
|
@ -25,6 +25,6 @@ class RunpodImplConfig(BaseModel):
|
|||
@classmethod
|
||||
def sample_run_config(cls, **kwargs: Any) -> dict[str, Any]:
|
||||
return {
|
||||
"url": "${env.RUNPOD_URL:}",
|
||||
"api_token": "${env.RUNPOD_API_TOKEN:}",
|
||||
"url": "${env.RUNPOD_URL:+}",
|
||||
"api_token": "${env.RUNPOD_API_TOKEN:+}",
|
||||
}
|
||||
|
|
|
@ -8,7 +8,7 @@ from collections.abc import AsyncGenerator
|
|||
from openai import OpenAI
|
||||
|
||||
from llama_stack.apis.inference import * # noqa: F403
|
||||
from llama_stack.apis.inference.inference import OpenAIEmbeddingsResponse
|
||||
from llama_stack.apis.inference import OpenAIEmbeddingsResponse
|
||||
|
||||
# from llama_stack.providers.datatypes import ModelsProtocolPrivate
|
||||
from llama_stack.providers.utils.inference.model_registry import ModelRegistryHelper
|
||||
|
|
|
@ -26,5 +26,5 @@ class TogetherImplConfig(BaseModel):
|
|||
def sample_run_config(cls, **kwargs) -> dict[str, Any]:
|
||||
return {
|
||||
"url": "https://api.together.xyz/v1",
|
||||
"api_key": "${env.TOGETHER_API_KEY:}",
|
||||
"api_key": "${env.TOGETHER_API_KEY:+}",
|
||||
}
|
||||
|
|
|
@ -4,7 +4,7 @@
|
|||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
|
||||
from llama_stack.apis.models.models import ModelType
|
||||
from llama_stack.apis.models import ModelType
|
||||
from llama_stack.models.llama.sku_types import CoreModelId
|
||||
from llama_stack.providers.utils.inference.model_registry import (
|
||||
ProviderModelEntry,
|
||||
|
|
|
@ -23,7 +23,12 @@ from llama_stack.apis.inference import (
|
|||
Inference,
|
||||
LogProbConfig,
|
||||
Message,
|
||||
OpenAIChatCompletion,
|
||||
OpenAIChatCompletionChunk,
|
||||
OpenAICompletion,
|
||||
OpenAIEmbeddingsResponse,
|
||||
OpenAIMessageParam,
|
||||
OpenAIResponseFormatParam,
|
||||
ResponseFormat,
|
||||
ResponseFormatType,
|
||||
SamplingParams,
|
||||
|
@ -33,13 +38,6 @@ from llama_stack.apis.inference import (
|
|||
ToolDefinition,
|
||||
ToolPromptFormat,
|
||||
)
|
||||
from llama_stack.apis.inference.inference import (
|
||||
OpenAIChatCompletion,
|
||||
OpenAIChatCompletionChunk,
|
||||
OpenAICompletion,
|
||||
OpenAIMessageParam,
|
||||
OpenAIResponseFormatParam,
|
||||
)
|
||||
from llama_stack.distribution.request_headers import NeedsRequestProviderData
|
||||
from llama_stack.log import get_logger
|
||||
from llama_stack.providers.utils.inference.model_registry import ModelRegistryHelper
|
||||
|
|
|
@ -34,9 +34,6 @@ class VLLMInferenceAdapterConfig(BaseModel):
|
|||
@classmethod
|
||||
def validate_tls_verify(cls, v):
|
||||
if isinstance(v, str):
|
||||
# Check if it's a boolean string
|
||||
if v.lower() in ("true", "false"):
|
||||
return v.lower() == "true"
|
||||
# Otherwise, treat it as a cert path
|
||||
cert_path = Path(v).expanduser().resolve()
|
||||
if not cert_path.exists():
|
||||
|
@ -54,7 +51,7 @@ class VLLMInferenceAdapterConfig(BaseModel):
|
|||
):
|
||||
return {
|
||||
"url": url,
|
||||
"max_tokens": "${env.VLLM_MAX_TOKENS:4096}",
|
||||
"api_token": "${env.VLLM_API_TOKEN:fake}",
|
||||
"tls_verify": "${env.VLLM_TLS_VERIFY:true}",
|
||||
"max_tokens": "${env.VLLM_MAX_TOKENS:=4096}",
|
||||
"api_token": "${env.VLLM_API_TOKEN:=fake}",
|
||||
"tls_verify": "${env.VLLM_TLS_VERIFY:=true}",
|
||||
}
|
||||
|
|
|
@ -38,9 +38,13 @@ from llama_stack.apis.inference import (
|
|||
JsonSchemaResponseFormat,
|
||||
LogProbConfig,
|
||||
Message,
|
||||
OpenAIChatCompletion,
|
||||
OpenAICompletion,
|
||||
OpenAIEmbeddingData,
|
||||
OpenAIEmbeddingsResponse,
|
||||
OpenAIEmbeddingUsage,
|
||||
OpenAIMessageParam,
|
||||
OpenAIResponseFormatParam,
|
||||
ResponseFormat,
|
||||
SamplingParams,
|
||||
TextTruncation,
|
||||
|
@ -49,12 +53,6 @@ from llama_stack.apis.inference import (
|
|||
ToolDefinition,
|
||||
ToolPromptFormat,
|
||||
)
|
||||
from llama_stack.apis.inference.inference import (
|
||||
OpenAIChatCompletion,
|
||||
OpenAICompletion,
|
||||
OpenAIMessageParam,
|
||||
OpenAIResponseFormatParam,
|
||||
)
|
||||
from llama_stack.apis.models import Model, ModelType
|
||||
from llama_stack.models.llama.datatypes import BuiltinTool, StopReason, ToolCall
|
||||
from llama_stack.models.llama.sku_list import all_registered_models
|
||||
|
|
|
@ -40,7 +40,7 @@ class WatsonXConfig(BaseModel):
|
|||
@classmethod
|
||||
def sample_run_config(cls, **kwargs) -> dict[str, Any]:
|
||||
return {
|
||||
"url": "${env.WATSONX_BASE_URL:https://us-south.ml.cloud.ibm.com}",
|
||||
"api_key": "${env.WATSONX_API_KEY:}",
|
||||
"project_id": "${env.WATSONX_PROJECT_ID:}",
|
||||
"url": "${env.WATSONX_BASE_URL:=https://us-south.ml.cloud.ibm.com}",
|
||||
"api_key": "${env.WATSONX_API_KEY:+}",
|
||||
"project_id": "${env.WATSONX_PROJECT_ID:+}",
|
||||
}
|
||||
|
|
|
@ -18,10 +18,16 @@ from llama_stack.apis.inference import (
|
|||
CompletionRequest,
|
||||
EmbeddingsResponse,
|
||||
EmbeddingTaskType,
|
||||
GreedySamplingStrategy,
|
||||
Inference,
|
||||
LogProbConfig,
|
||||
Message,
|
||||
OpenAIChatCompletion,
|
||||
OpenAIChatCompletionChunk,
|
||||
OpenAICompletion,
|
||||
OpenAIEmbeddingsResponse,
|
||||
OpenAIMessageParam,
|
||||
OpenAIResponseFormatParam,
|
||||
ResponseFormat,
|
||||
SamplingParams,
|
||||
TextTruncation,
|
||||
|
@ -29,14 +35,6 @@ from llama_stack.apis.inference import (
|
|||
ToolConfig,
|
||||
ToolDefinition,
|
||||
ToolPromptFormat,
|
||||
)
|
||||
from llama_stack.apis.inference.inference import (
|
||||
GreedySamplingStrategy,
|
||||
OpenAIChatCompletion,
|
||||
OpenAIChatCompletionChunk,
|
||||
OpenAICompletion,
|
||||
OpenAIMessageParam,
|
||||
OpenAIResponseFormatParam,
|
||||
TopKSamplingStrategy,
|
||||
TopPSamplingStrategy,
|
||||
)
|
||||
|
|
|
@ -55,10 +55,10 @@ class NvidiaPostTrainingConfig(BaseModel):
|
|||
@classmethod
|
||||
def sample_run_config(cls, **kwargs) -> dict[str, Any]:
|
||||
return {
|
||||
"api_key": "${env.NVIDIA_API_KEY:}",
|
||||
"dataset_namespace": "${env.NVIDIA_DATASET_NAMESPACE:default}",
|
||||
"project_id": "${env.NVIDIA_PROJECT_ID:test-project}",
|
||||
"customizer_url": "${env.NVIDIA_CUSTOMIZER_URL:http://nemo.test}",
|
||||
"api_key": "${env.NVIDIA_API_KEY:+}",
|
||||
"dataset_namespace": "${env.NVIDIA_DATASET_NAMESPACE:=default}",
|
||||
"project_id": "${env.NVIDIA_PROJECT_ID:=test-project}",
|
||||
"customizer_url": "${env.NVIDIA_CUSTOMIZER_URL:=http://nemo.test}",
|
||||
}
|
||||
|
||||
|
||||
|
|
|
@ -35,6 +35,6 @@ class NVIDIASafetyConfig(BaseModel):
|
|||
@classmethod
|
||||
def sample_run_config(cls, **kwargs) -> dict[str, Any]:
|
||||
return {
|
||||
"guardrails_service_url": "${env.GUARDRAILS_SERVICE_URL:http://localhost:7331}",
|
||||
"config_id": "${env.NVIDIA_GUARDRAILS_CONFIG_ID:self-check}",
|
||||
"guardrails_service_url": "${env.GUARDRAILS_SERVICE_URL:=http://localhost:7331}",
|
||||
"config_id": "${env.NVIDIA_GUARDRAILS_CONFIG_ID:=self-check}",
|
||||
}
|
||||
|
|
|
@ -22,6 +22,6 @@ class BraveSearchToolConfig(BaseModel):
|
|||
@classmethod
|
||||
def sample_run_config(cls, __distro_dir__: str) -> dict[str, Any]:
|
||||
return {
|
||||
"api_key": "${env.BRAVE_SEARCH_API_KEY:}",
|
||||
"api_key": "${env.BRAVE_SEARCH_API_KEY:+}",
|
||||
"max_results": 3,
|
||||
}
|
||||
|
|
|
@ -22,6 +22,6 @@ class TavilySearchToolConfig(BaseModel):
|
|||
@classmethod
|
||||
def sample_run_config(cls, __distro_dir__: str) -> dict[str, Any]:
|
||||
return {
|
||||
"api_key": "${env.TAVILY_SEARCH_API_KEY:}",
|
||||
"api_key": "${env.TAVILY_SEARCH_API_KEY:+}",
|
||||
"max_results": 3,
|
||||
}
|
||||
|
|
|
@ -17,5 +17,5 @@ class WolframAlphaToolConfig(BaseModel):
|
|||
@classmethod
|
||||
def sample_run_config(cls, __distro_dir__: str, **kwargs: Any) -> dict[str, Any]:
|
||||
return {
|
||||
"api_key": "${env.WOLFRAM_ALPHA_API_KEY:}",
|
||||
"api_key": "${env.WOLFRAM_ALPHA_API_KEY:+}",
|
||||
}
|
||||
|
|
|
@ -22,8 +22,8 @@ class PGVectorVectorIOConfig(BaseModel):
|
|||
@classmethod
|
||||
def sample_run_config(
|
||||
cls,
|
||||
host: str = "${env.PGVECTOR_HOST:localhost}",
|
||||
port: int = "${env.PGVECTOR_PORT:5432}",
|
||||
host: str = "${env.PGVECTOR_HOST:=localhost}",
|
||||
port: int = "${env.PGVECTOR_PORT:=5432}",
|
||||
db: str = "${env.PGVECTOR_DB}",
|
||||
user: str = "${env.PGVECTOR_USER}",
|
||||
password: str = "${env.PGVECTOR_PASSWORD}",
|
||||
|
|
|
@ -23,6 +23,13 @@ from llama_stack.apis.inference import (
|
|||
JsonSchemaResponseFormat,
|
||||
LogProbConfig,
|
||||
Message,
|
||||
OpenAIChatCompletion,
|
||||
OpenAIChatCompletionChunk,
|
||||
OpenAICompletion,
|
||||
OpenAIEmbeddingsResponse,
|
||||
OpenAIEmbeddingUsage,
|
||||
OpenAIMessageParam,
|
||||
OpenAIResponseFormatParam,
|
||||
ResponseFormat,
|
||||
SamplingParams,
|
||||
TextTruncation,
|
||||
|
@ -31,16 +38,7 @@ from llama_stack.apis.inference import (
|
|||
ToolDefinition,
|
||||
ToolPromptFormat,
|
||||
)
|
||||
from llama_stack.apis.inference.inference import (
|
||||
OpenAIChatCompletion,
|
||||
OpenAIChatCompletionChunk,
|
||||
OpenAICompletion,
|
||||
OpenAIEmbeddingsResponse,
|
||||
OpenAIEmbeddingUsage,
|
||||
OpenAIMessageParam,
|
||||
OpenAIResponseFormatParam,
|
||||
)
|
||||
from llama_stack.apis.models.models import Model
|
||||
from llama_stack.apis.models import Model
|
||||
from llama_stack.distribution.request_headers import NeedsRequestProviderData
|
||||
from llama_stack.log import get_logger
|
||||
from llama_stack.providers.utils.inference.model_registry import ModelRegistryHelper
|
||||
|
|
|
@ -8,7 +8,7 @@ from typing import Any
|
|||
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
from llama_stack.apis.models.models import ModelType
|
||||
from llama_stack.apis.models import ModelType
|
||||
from llama_stack.models.llama.sku_list import all_registered_models
|
||||
from llama_stack.providers.datatypes import Model, ModelsProtocolPrivate
|
||||
from llama_stack.providers.utils.inference import (
|
||||
|
|
|
@ -95,27 +95,25 @@ from llama_stack.apis.inference import (
|
|||
CompletionResponse,
|
||||
CompletionResponseStreamChunk,
|
||||
GreedySamplingStrategy,
|
||||
Message,
|
||||
SamplingParams,
|
||||
SystemMessage,
|
||||
TokenLogProbs,
|
||||
ToolChoice,
|
||||
ToolResponseMessage,
|
||||
TopKSamplingStrategy,
|
||||
TopPSamplingStrategy,
|
||||
UserMessage,
|
||||
)
|
||||
from llama_stack.apis.inference.inference import (
|
||||
JsonSchemaResponseFormat,
|
||||
Message,
|
||||
OpenAIChatCompletion,
|
||||
OpenAICompletion,
|
||||
OpenAICompletionChoice,
|
||||
OpenAIEmbeddingData,
|
||||
OpenAIMessageParam,
|
||||
OpenAIResponseFormatParam,
|
||||
SamplingParams,
|
||||
SystemMessage,
|
||||
TokenLogProbs,
|
||||
ToolChoice,
|
||||
ToolConfig,
|
||||
ToolResponseMessage,
|
||||
TopKSamplingStrategy,
|
||||
TopPSamplingStrategy,
|
||||
UserMessage,
|
||||
)
|
||||
from llama_stack.apis.inference.inference import (
|
||||
from llama_stack.apis.inference import (
|
||||
OpenAIChoice as OpenAIChatCompletionChoice,
|
||||
)
|
||||
from llama_stack.models.llama.datatypes import (
|
||||
|
|
|
@ -45,8 +45,8 @@ class RedisKVStoreConfig(CommonConfig):
|
|||
return {
|
||||
"type": "redis",
|
||||
"namespace": None,
|
||||
"host": "${env.REDIS_HOST:localhost}",
|
||||
"port": "${env.REDIS_PORT:6379}",
|
||||
"host": "${env.REDIS_HOST:=localhost}",
|
||||
"port": "${env.REDIS_PORT:=6379}",
|
||||
}
|
||||
|
||||
|
||||
|
@ -66,7 +66,7 @@ class SqliteKVStoreConfig(CommonConfig):
|
|||
return {
|
||||
"type": "sqlite",
|
||||
"namespace": None,
|
||||
"db_path": "${env.SQLITE_STORE_DIR:" + __distro_dir__ + "}/" + db_name,
|
||||
"db_path": "${env.SQLITE_STORE_DIR:=" + __distro_dir__ + "}/" + db_name,
|
||||
}
|
||||
|
||||
|
||||
|
@ -84,12 +84,12 @@ class PostgresKVStoreConfig(CommonConfig):
|
|||
return {
|
||||
"type": "postgres",
|
||||
"namespace": None,
|
||||
"host": "${env.POSTGRES_HOST:localhost}",
|
||||
"port": "${env.POSTGRES_PORT:5432}",
|
||||
"db": "${env.POSTGRES_DB:llamastack}",
|
||||
"user": "${env.POSTGRES_USER:llamastack}",
|
||||
"password": "${env.POSTGRES_PASSWORD:llamastack}",
|
||||
"table_name": "${env.POSTGRES_TABLE_NAME:" + table_name + "}",
|
||||
"host": "${env.POSTGRES_HOST:=localhost}",
|
||||
"port": "${env.POSTGRES_PORT:=5432}",
|
||||
"db": "${env.POSTGRES_DB:=llamastack}",
|
||||
"user": "${env.POSTGRES_USER:=llamastack}",
|
||||
"password": "${env.POSTGRES_PASSWORD:=llamastack}",
|
||||
"table_name": "${env.POSTGRES_TABLE_NAME:=" + table_name + "}",
|
||||
}
|
||||
|
||||
@classmethod
|
||||
|
@ -131,12 +131,12 @@ class MongoDBKVStoreConfig(CommonConfig):
|
|||
return {
|
||||
"type": "mongodb",
|
||||
"namespace": None,
|
||||
"host": "${env.MONGODB_HOST:localhost}",
|
||||
"port": "${env.MONGODB_PORT:5432}",
|
||||
"host": "${env.MONGODB_HOST:=localhost}",
|
||||
"port": "${env.MONGODB_PORT:=5432}",
|
||||
"db": "${env.MONGODB_DB}",
|
||||
"user": "${env.MONGODB_USER}",
|
||||
"password": "${env.MONGODB_PASSWORD}",
|
||||
"collection_name": "${env.MONGODB_COLLECTION_NAME:" + collection_name + "}",
|
||||
"collection_name": "${env.MONGODB_COLLECTION_NAME:=" + collection_name + "}",
|
||||
}
|
||||
|
||||
|
||||
|
|
|
@ -12,8 +12,7 @@ import uuid
|
|||
from abc import ABC, abstractmethod
|
||||
from typing import Any
|
||||
|
||||
from llama_stack.apis.files import Files
|
||||
from llama_stack.apis.files.files import OpenAIFileObject
|
||||
from llama_stack.apis.files import Files, OpenAIFileObject
|
||||
from llama_stack.apis.vector_dbs import VectorDB
|
||||
from llama_stack.apis.vector_io import (
|
||||
Chunk,
|
||||
|
|
|
@ -50,7 +50,7 @@ class SqliteSqlStoreConfig(SqlAlchemySqlStoreConfig):
|
|||
def sample_run_config(cls, __distro_dir__: str, db_name: str = "sqlstore.db"):
|
||||
return cls(
|
||||
type="sqlite",
|
||||
db_path="${env.SQLITE_STORE_DIR:" + __distro_dir__ + "}/" + db_name,
|
||||
db_path="${env.SQLITE_STORE_DIR:=" + __distro_dir__ + "}/" + db_name,
|
||||
)
|
||||
|
||||
@property
|
||||
|
@ -78,11 +78,11 @@ class PostgresSqlStoreConfig(SqlAlchemySqlStoreConfig):
|
|||
def sample_run_config(cls, **kwargs):
|
||||
return cls(
|
||||
type="postgres",
|
||||
host="${env.POSTGRES_HOST:localhost}",
|
||||
port="${env.POSTGRES_PORT:5432}",
|
||||
db="${env.POSTGRES_DB:llamastack}",
|
||||
user="${env.POSTGRES_USER:llamastack}",
|
||||
password="${env.POSTGRES_PASSWORD:llamastack}",
|
||||
host="${env.POSTGRES_HOST:=localhost}",
|
||||
port="${env.POSTGRES_PORT:=5432}",
|
||||
db="${env.POSTGRES_DB:=llamastack}",
|
||||
user="${env.POSTGRES_USER:=llamastack}",
|
||||
password="${env.POSTGRES_PASSWORD:=llamastack}",
|
||||
)
|
||||
|
||||
|
||||
|
|
|
@ -35,6 +35,9 @@ INVALID_SPAN_ID = 0x0000000000000000
|
|||
INVALID_TRACE_ID = 0x00000000000000000000000000000000
|
||||
|
||||
ROOT_SPAN_MARKERS = ["__root__", "__root_span__"]
|
||||
# The logical root span may not be visible to this process if a parent context
|
||||
# is passed in. The local root span is the first local span in a trace.
|
||||
LOCAL_ROOT_SPAN_MARKER = "__local_root_span__"
|
||||
|
||||
|
||||
def trace_id_to_str(trace_id: int) -> str:
|
||||
|
@ -180,7 +183,13 @@ async def start_trace(name: str, attributes: dict[str, Any] = None) -> TraceCont
|
|||
|
||||
trace_id = generate_trace_id()
|
||||
context = TraceContext(BACKGROUND_LOGGER, trace_id)
|
||||
attributes = {marker: True for marker in ROOT_SPAN_MARKERS} | (attributes or {})
|
||||
# Mark this span as the root for the trace for now. The processing of
|
||||
# traceparent context if supplied comes later and will result in the
|
||||
# ROOT_SPAN_MARKERS being removed. Also mark this is the 'local' root,
|
||||
# i.e. the root of the spans originating in this process as this is
|
||||
# needed to ensure that we insert this 'local' root span's id into
|
||||
# the trace record in sqlite store.
|
||||
attributes = dict.fromkeys(ROOT_SPAN_MARKERS, True) | {LOCAL_ROOT_SPAN_MARKER: True} | (attributes or {})
|
||||
context.push_span(name, attributes)
|
||||
|
||||
CURRENT_TRACE_CONTEXT.set(context)
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
version: '2'
|
||||
version: 2
|
||||
distribution_spec:
|
||||
description: Use AWS Bedrock for running LLM inference and safety
|
||||
providers:
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
version: '2'
|
||||
version: 2
|
||||
image_name: bedrock
|
||||
apis:
|
||||
- agents
|
||||
|
@ -22,7 +22,7 @@ providers:
|
|||
kvstore:
|
||||
type: sqlite
|
||||
namespace: null
|
||||
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/bedrock}/faiss_store.db
|
||||
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/bedrock}/faiss_store.db
|
||||
safety:
|
||||
- provider_id: bedrock
|
||||
provider_type: remote::bedrock
|
||||
|
@ -34,17 +34,17 @@ providers:
|
|||
persistence_store:
|
||||
type: sqlite
|
||||
namespace: null
|
||||
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/bedrock}/agents_store.db
|
||||
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/bedrock}/agents_store.db
|
||||
responses_store:
|
||||
type: sqlite
|
||||
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/bedrock}/responses_store.db
|
||||
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/bedrock}/responses_store.db
|
||||
telemetry:
|
||||
- provider_id: meta-reference
|
||||
provider_type: inline::meta-reference
|
||||
config:
|
||||
service_name: "${env.OTEL_SERVICE_NAME:\u200B}"
|
||||
sinks: ${env.TELEMETRY_SINKS:console,sqlite}
|
||||
sqlite_db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/bedrock}/trace_store.db
|
||||
service_name: "${env.OTEL_SERVICE_NAME:=\u200B}"
|
||||
sinks: ${env.TELEMETRY_SINKS:=console,sqlite}
|
||||
sqlite_db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/bedrock}/trace_store.db
|
||||
eval:
|
||||
- provider_id: meta-reference
|
||||
provider_type: inline::meta-reference
|
||||
|
@ -52,7 +52,7 @@ providers:
|
|||
kvstore:
|
||||
type: sqlite
|
||||
namespace: null
|
||||
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/bedrock}/meta_reference_eval.db
|
||||
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/bedrock}/meta_reference_eval.db
|
||||
datasetio:
|
||||
- provider_id: huggingface
|
||||
provider_type: remote::huggingface
|
||||
|
@ -60,14 +60,14 @@ providers:
|
|||
kvstore:
|
||||
type: sqlite
|
||||
namespace: null
|
||||
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/bedrock}/huggingface_datasetio.db
|
||||
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/bedrock}/huggingface_datasetio.db
|
||||
- provider_id: localfs
|
||||
provider_type: inline::localfs
|
||||
config:
|
||||
kvstore:
|
||||
type: sqlite
|
||||
namespace: null
|
||||
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/bedrock}/localfs_datasetio.db
|
||||
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/bedrock}/localfs_datasetio.db
|
||||
scoring:
|
||||
- provider_id: basic
|
||||
provider_type: inline::basic
|
||||
|
@ -78,17 +78,17 @@ providers:
|
|||
- provider_id: braintrust
|
||||
provider_type: inline::braintrust
|
||||
config:
|
||||
openai_api_key: ${env.OPENAI_API_KEY:}
|
||||
openai_api_key: ${env.OPENAI_API_KEY:+}
|
||||
tool_runtime:
|
||||
- provider_id: brave-search
|
||||
provider_type: remote::brave-search
|
||||
config:
|
||||
api_key: ${env.BRAVE_SEARCH_API_KEY:}
|
||||
api_key: ${env.BRAVE_SEARCH_API_KEY:+}
|
||||
max_results: 3
|
||||
- provider_id: tavily-search
|
||||
provider_type: remote::tavily-search
|
||||
config:
|
||||
api_key: ${env.TAVILY_SEARCH_API_KEY:}
|
||||
api_key: ${env.TAVILY_SEARCH_API_KEY:+}
|
||||
max_results: 3
|
||||
- provider_id: rag-runtime
|
||||
provider_type: inline::rag-runtime
|
||||
|
@ -98,10 +98,10 @@ providers:
|
|||
config: {}
|
||||
metadata_store:
|
||||
type: sqlite
|
||||
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/bedrock}/registry.db
|
||||
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/bedrock}/registry.db
|
||||
inference_store:
|
||||
type: sqlite
|
||||
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/bedrock}/inference_store.db
|
||||
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/bedrock}/inference_store.db
|
||||
models:
|
||||
- metadata: {}
|
||||
model_id: meta.llama3-1-8b-instruct-v1:0
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
version: '2'
|
||||
version: 2
|
||||
distribution_spec:
|
||||
description: Use Cerebras for running LLM inference
|
||||
providers:
|
||||
|
|
Some files were not shown because too many files have changed in this diff Show more
Loading…
Add table
Add a link
Reference in a new issue