mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-06-28 19:04:19 +00:00
Merge branch 'vit' of https://github.com/hardikjshah/llama-stack into vit
This commit is contained in:
commit
a1033ba805
184 changed files with 1691 additions and 1138 deletions
3
.github/workflows/providers-build.yml
vendored
3
.github/workflows/providers-build.yml
vendored
|
@ -11,6 +11,8 @@ on:
|
||||||
- 'llama_stack/distribution/*.sh'
|
- 'llama_stack/distribution/*.sh'
|
||||||
- '.github/workflows/providers-build.yml'
|
- '.github/workflows/providers-build.yml'
|
||||||
- 'llama_stack/templates/**'
|
- 'llama_stack/templates/**'
|
||||||
|
- 'pyproject.toml'
|
||||||
|
|
||||||
pull_request:
|
pull_request:
|
||||||
paths:
|
paths:
|
||||||
- 'llama_stack/cli/stack/build.py'
|
- 'llama_stack/cli/stack/build.py'
|
||||||
|
@ -19,6 +21,7 @@ on:
|
||||||
- 'llama_stack/distribution/*.sh'
|
- 'llama_stack/distribution/*.sh'
|
||||||
- '.github/workflows/providers-build.yml'
|
- '.github/workflows/providers-build.yml'
|
||||||
- 'llama_stack/templates/**'
|
- 'llama_stack/templates/**'
|
||||||
|
- 'pyproject.toml'
|
||||||
|
|
||||||
concurrency:
|
concurrency:
|
||||||
group: ${{ github.workflow }}-${{ github.ref }}
|
group: ${{ github.workflow }}-${{ github.ref }}
|
||||||
|
|
154
docs/_static/llama-stack-spec.html
vendored
154
docs/_static/llama-stack-spec.html
vendored
|
@ -7390,6 +7390,147 @@
|
||||||
],
|
],
|
||||||
"title": "AgentTurnResponseTurnStartPayload"
|
"title": "AgentTurnResponseTurnStartPayload"
|
||||||
},
|
},
|
||||||
|
"OpenAIResponseAnnotationCitation": {
|
||||||
|
"type": "object",
|
||||||
|
"properties": {
|
||||||
|
"type": {
|
||||||
|
"type": "string",
|
||||||
|
"const": "url_citation",
|
||||||
|
"default": "url_citation"
|
||||||
|
},
|
||||||
|
"end_index": {
|
||||||
|
"type": "integer"
|
||||||
|
},
|
||||||
|
"start_index": {
|
||||||
|
"type": "integer"
|
||||||
|
},
|
||||||
|
"title": {
|
||||||
|
"type": "string"
|
||||||
|
},
|
||||||
|
"url": {
|
||||||
|
"type": "string"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"additionalProperties": false,
|
||||||
|
"required": [
|
||||||
|
"type",
|
||||||
|
"end_index",
|
||||||
|
"start_index",
|
||||||
|
"title",
|
||||||
|
"url"
|
||||||
|
],
|
||||||
|
"title": "OpenAIResponseAnnotationCitation"
|
||||||
|
},
|
||||||
|
"OpenAIResponseAnnotationContainerFileCitation": {
|
||||||
|
"type": "object",
|
||||||
|
"properties": {
|
||||||
|
"type": {
|
||||||
|
"type": "string",
|
||||||
|
"const": "container_file_citation",
|
||||||
|
"default": "container_file_citation"
|
||||||
|
},
|
||||||
|
"container_id": {
|
||||||
|
"type": "string"
|
||||||
|
},
|
||||||
|
"end_index": {
|
||||||
|
"type": "integer"
|
||||||
|
},
|
||||||
|
"file_id": {
|
||||||
|
"type": "string"
|
||||||
|
},
|
||||||
|
"filename": {
|
||||||
|
"type": "string"
|
||||||
|
},
|
||||||
|
"start_index": {
|
||||||
|
"type": "integer"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"additionalProperties": false,
|
||||||
|
"required": [
|
||||||
|
"type",
|
||||||
|
"container_id",
|
||||||
|
"end_index",
|
||||||
|
"file_id",
|
||||||
|
"filename",
|
||||||
|
"start_index"
|
||||||
|
],
|
||||||
|
"title": "OpenAIResponseAnnotationContainerFileCitation"
|
||||||
|
},
|
||||||
|
"OpenAIResponseAnnotationFileCitation": {
|
||||||
|
"type": "object",
|
||||||
|
"properties": {
|
||||||
|
"type": {
|
||||||
|
"type": "string",
|
||||||
|
"const": "file_citation",
|
||||||
|
"default": "file_citation"
|
||||||
|
},
|
||||||
|
"file_id": {
|
||||||
|
"type": "string"
|
||||||
|
},
|
||||||
|
"filename": {
|
||||||
|
"type": "string"
|
||||||
|
},
|
||||||
|
"index": {
|
||||||
|
"type": "integer"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"additionalProperties": false,
|
||||||
|
"required": [
|
||||||
|
"type",
|
||||||
|
"file_id",
|
||||||
|
"filename",
|
||||||
|
"index"
|
||||||
|
],
|
||||||
|
"title": "OpenAIResponseAnnotationFileCitation"
|
||||||
|
},
|
||||||
|
"OpenAIResponseAnnotationFilePath": {
|
||||||
|
"type": "object",
|
||||||
|
"properties": {
|
||||||
|
"type": {
|
||||||
|
"type": "string",
|
||||||
|
"const": "file_path",
|
||||||
|
"default": "file_path"
|
||||||
|
},
|
||||||
|
"file_id": {
|
||||||
|
"type": "string"
|
||||||
|
},
|
||||||
|
"index": {
|
||||||
|
"type": "integer"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"additionalProperties": false,
|
||||||
|
"required": [
|
||||||
|
"type",
|
||||||
|
"file_id",
|
||||||
|
"index"
|
||||||
|
],
|
||||||
|
"title": "OpenAIResponseAnnotationFilePath"
|
||||||
|
},
|
||||||
|
"OpenAIResponseAnnotations": {
|
||||||
|
"oneOf": [
|
||||||
|
{
|
||||||
|
"$ref": "#/components/schemas/OpenAIResponseAnnotationFileCitation"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"$ref": "#/components/schemas/OpenAIResponseAnnotationCitation"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"$ref": "#/components/schemas/OpenAIResponseAnnotationContainerFileCitation"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"$ref": "#/components/schemas/OpenAIResponseAnnotationFilePath"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"discriminator": {
|
||||||
|
"propertyName": "type",
|
||||||
|
"mapping": {
|
||||||
|
"file_citation": "#/components/schemas/OpenAIResponseAnnotationFileCitation",
|
||||||
|
"url_citation": "#/components/schemas/OpenAIResponseAnnotationCitation",
|
||||||
|
"container_file_citation": "#/components/schemas/OpenAIResponseAnnotationContainerFileCitation",
|
||||||
|
"file_path": "#/components/schemas/OpenAIResponseAnnotationFilePath"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
"OpenAIResponseInput": {
|
"OpenAIResponseInput": {
|
||||||
"oneOf": [
|
"oneOf": [
|
||||||
{
|
{
|
||||||
|
@ -7764,6 +7905,10 @@
|
||||||
"type": "string",
|
"type": "string",
|
||||||
"const": "web_search"
|
"const": "web_search"
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
"type": "string",
|
||||||
|
"const": "web_search_preview"
|
||||||
|
},
|
||||||
{
|
{
|
||||||
"type": "string",
|
"type": "string",
|
||||||
"const": "web_search_preview_2025_03_11"
|
"const": "web_search_preview_2025_03_11"
|
||||||
|
@ -7855,12 +8000,19 @@
|
||||||
"type": "string",
|
"type": "string",
|
||||||
"const": "output_text",
|
"const": "output_text",
|
||||||
"default": "output_text"
|
"default": "output_text"
|
||||||
|
},
|
||||||
|
"annotations": {
|
||||||
|
"type": "array",
|
||||||
|
"items": {
|
||||||
|
"$ref": "#/components/schemas/OpenAIResponseAnnotations"
|
||||||
|
}
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"additionalProperties": false,
|
"additionalProperties": false,
|
||||||
"required": [
|
"required": [
|
||||||
"text",
|
"text",
|
||||||
"type"
|
"type",
|
||||||
|
"annotations"
|
||||||
],
|
],
|
||||||
"title": "OpenAIResponseOutputMessageContentOutputText"
|
"title": "OpenAIResponseOutputMessageContentOutputText"
|
||||||
},
|
},
|
||||||
|
|
107
docs/_static/llama-stack-spec.yaml
vendored
107
docs/_static/llama-stack-spec.yaml
vendored
|
@ -5263,6 +5263,106 @@ components:
|
||||||
- event_type
|
- event_type
|
||||||
- turn_id
|
- turn_id
|
||||||
title: AgentTurnResponseTurnStartPayload
|
title: AgentTurnResponseTurnStartPayload
|
||||||
|
OpenAIResponseAnnotationCitation:
|
||||||
|
type: object
|
||||||
|
properties:
|
||||||
|
type:
|
||||||
|
type: string
|
||||||
|
const: url_citation
|
||||||
|
default: url_citation
|
||||||
|
end_index:
|
||||||
|
type: integer
|
||||||
|
start_index:
|
||||||
|
type: integer
|
||||||
|
title:
|
||||||
|
type: string
|
||||||
|
url:
|
||||||
|
type: string
|
||||||
|
additionalProperties: false
|
||||||
|
required:
|
||||||
|
- type
|
||||||
|
- end_index
|
||||||
|
- start_index
|
||||||
|
- title
|
||||||
|
- url
|
||||||
|
title: OpenAIResponseAnnotationCitation
|
||||||
|
"OpenAIResponseAnnotationContainerFileCitation":
|
||||||
|
type: object
|
||||||
|
properties:
|
||||||
|
type:
|
||||||
|
type: string
|
||||||
|
const: container_file_citation
|
||||||
|
default: container_file_citation
|
||||||
|
container_id:
|
||||||
|
type: string
|
||||||
|
end_index:
|
||||||
|
type: integer
|
||||||
|
file_id:
|
||||||
|
type: string
|
||||||
|
filename:
|
||||||
|
type: string
|
||||||
|
start_index:
|
||||||
|
type: integer
|
||||||
|
additionalProperties: false
|
||||||
|
required:
|
||||||
|
- type
|
||||||
|
- container_id
|
||||||
|
- end_index
|
||||||
|
- file_id
|
||||||
|
- filename
|
||||||
|
- start_index
|
||||||
|
title: >-
|
||||||
|
OpenAIResponseAnnotationContainerFileCitation
|
||||||
|
OpenAIResponseAnnotationFileCitation:
|
||||||
|
type: object
|
||||||
|
properties:
|
||||||
|
type:
|
||||||
|
type: string
|
||||||
|
const: file_citation
|
||||||
|
default: file_citation
|
||||||
|
file_id:
|
||||||
|
type: string
|
||||||
|
filename:
|
||||||
|
type: string
|
||||||
|
index:
|
||||||
|
type: integer
|
||||||
|
additionalProperties: false
|
||||||
|
required:
|
||||||
|
- type
|
||||||
|
- file_id
|
||||||
|
- filename
|
||||||
|
- index
|
||||||
|
title: OpenAIResponseAnnotationFileCitation
|
||||||
|
OpenAIResponseAnnotationFilePath:
|
||||||
|
type: object
|
||||||
|
properties:
|
||||||
|
type:
|
||||||
|
type: string
|
||||||
|
const: file_path
|
||||||
|
default: file_path
|
||||||
|
file_id:
|
||||||
|
type: string
|
||||||
|
index:
|
||||||
|
type: integer
|
||||||
|
additionalProperties: false
|
||||||
|
required:
|
||||||
|
- type
|
||||||
|
- file_id
|
||||||
|
- index
|
||||||
|
title: OpenAIResponseAnnotationFilePath
|
||||||
|
OpenAIResponseAnnotations:
|
||||||
|
oneOf:
|
||||||
|
- $ref: '#/components/schemas/OpenAIResponseAnnotationFileCitation'
|
||||||
|
- $ref: '#/components/schemas/OpenAIResponseAnnotationCitation'
|
||||||
|
- $ref: '#/components/schemas/OpenAIResponseAnnotationContainerFileCitation'
|
||||||
|
- $ref: '#/components/schemas/OpenAIResponseAnnotationFilePath'
|
||||||
|
discriminator:
|
||||||
|
propertyName: type
|
||||||
|
mapping:
|
||||||
|
file_citation: '#/components/schemas/OpenAIResponseAnnotationFileCitation'
|
||||||
|
url_citation: '#/components/schemas/OpenAIResponseAnnotationCitation'
|
||||||
|
container_file_citation: '#/components/schemas/OpenAIResponseAnnotationContainerFileCitation'
|
||||||
|
file_path: '#/components/schemas/OpenAIResponseAnnotationFilePath'
|
||||||
OpenAIResponseInput:
|
OpenAIResponseInput:
|
||||||
oneOf:
|
oneOf:
|
||||||
- $ref: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall'
|
- $ref: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall'
|
||||||
|
@ -5488,6 +5588,8 @@ components:
|
||||||
oneOf:
|
oneOf:
|
||||||
- type: string
|
- type: string
|
||||||
const: web_search
|
const: web_search
|
||||||
|
- type: string
|
||||||
|
const: web_search_preview
|
||||||
- type: string
|
- type: string
|
||||||
const: web_search_preview_2025_03_11
|
const: web_search_preview_2025_03_11
|
||||||
default: web_search
|
default: web_search
|
||||||
|
@ -5547,10 +5649,15 @@ components:
|
||||||
type: string
|
type: string
|
||||||
const: output_text
|
const: output_text
|
||||||
default: output_text
|
default: output_text
|
||||||
|
annotations:
|
||||||
|
type: array
|
||||||
|
items:
|
||||||
|
$ref: '#/components/schemas/OpenAIResponseAnnotations'
|
||||||
additionalProperties: false
|
additionalProperties: false
|
||||||
required:
|
required:
|
||||||
- text
|
- text
|
||||||
- type
|
- type
|
||||||
|
- annotations
|
||||||
title: >-
|
title: >-
|
||||||
OpenAIResponseOutputMessageContentOutputText
|
OpenAIResponseOutputMessageContentOutputText
|
||||||
"OpenAIResponseOutputMessageFileSearchToolCall":
|
"OpenAIResponseOutputMessageFileSearchToolCall":
|
||||||
|
|
|
@ -18,7 +18,7 @@ providers:
|
||||||
- provider_id: ollama
|
- provider_id: ollama
|
||||||
provider_type: remote::ollama
|
provider_type: remote::ollama
|
||||||
config:
|
config:
|
||||||
url: ${env.OLLAMA_URL:http://localhost:11434}
|
url: ${env.OLLAMA_URL:=http://localhost:11434}
|
||||||
vector_io:
|
vector_io:
|
||||||
- provider_id: faiss
|
- provider_id: faiss
|
||||||
provider_type: inline::faiss
|
provider_type: inline::faiss
|
||||||
|
@ -26,7 +26,7 @@ providers:
|
||||||
kvstore:
|
kvstore:
|
||||||
type: sqlite
|
type: sqlite
|
||||||
namespace: null
|
namespace: null
|
||||||
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ollama}/faiss_store.db
|
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ollama}/faiss_store.db
|
||||||
safety:
|
safety:
|
||||||
- provider_id: llama-guard
|
- provider_id: llama-guard
|
||||||
provider_type: inline::llama-guard
|
provider_type: inline::llama-guard
|
||||||
|
@ -38,7 +38,7 @@ providers:
|
||||||
persistence_store:
|
persistence_store:
|
||||||
type: sqlite
|
type: sqlite
|
||||||
namespace: null
|
namespace: null
|
||||||
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ollama}/agents_store.db
|
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ollama}/agents_store.db
|
||||||
telemetry:
|
telemetry:
|
||||||
- provider_id: meta-reference
|
- provider_id: meta-reference
|
||||||
provider_type: inline::meta-reference
|
provider_type: inline::meta-reference
|
||||||
|
@ -46,7 +46,7 @@ providers:
|
||||||
metadata_store:
|
metadata_store:
|
||||||
namespace: null
|
namespace: null
|
||||||
type: sqlite
|
type: sqlite
|
||||||
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ollama}/registry.db
|
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ollama}/registry.db
|
||||||
models:
|
models:
|
||||||
- metadata: {}
|
- metadata: {}
|
||||||
model_id: ${env.INFERENCE_MODEL}
|
model_id: ${env.INFERENCE_MODEL}
|
||||||
|
@ -85,7 +85,7 @@ providers:
|
||||||
# config is a dictionary that contains the configuration for the provider.
|
# config is a dictionary that contains the configuration for the provider.
|
||||||
# in this case, the configuration is the url of the ollama server
|
# in this case, the configuration is the url of the ollama server
|
||||||
config:
|
config:
|
||||||
url: ${env.OLLAMA_URL:http://localhost:11434}
|
url: ${env.OLLAMA_URL:=http://localhost:11434}
|
||||||
```
|
```
|
||||||
A few things to note:
|
A few things to note:
|
||||||
- A _provider instance_ is identified with an (id, type, configuration) triplet.
|
- A _provider instance_ is identified with an (id, type, configuration) triplet.
|
||||||
|
@ -94,6 +94,95 @@ A few things to note:
|
||||||
- The configuration dictionary is provider-specific.
|
- The configuration dictionary is provider-specific.
|
||||||
- Notice that configuration can reference environment variables (with default values), which are expanded at runtime. When you run a stack server (via docker or via `llama stack run`), you can specify `--env OLLAMA_URL=http://my-server:11434` to override the default value.
|
- Notice that configuration can reference environment variables (with default values), which are expanded at runtime. When you run a stack server (via docker or via `llama stack run`), you can specify `--env OLLAMA_URL=http://my-server:11434` to override the default value.
|
||||||
|
|
||||||
|
### Environment Variable Substitution
|
||||||
|
|
||||||
|
Llama Stack supports environment variable substitution in configuration values using the
|
||||||
|
`${env.VARIABLE_NAME}` syntax. This allows you to externalize configuration values and provide
|
||||||
|
different settings for different environments. The syntax is inspired by [bash parameter expansion](https://www.gnu.org/software/bash/manual/html_node/Shell-Parameter-Expansion.html)
|
||||||
|
and follows similar patterns.
|
||||||
|
|
||||||
|
#### Basic Syntax
|
||||||
|
|
||||||
|
The basic syntax for environment variable substitution is:
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
config:
|
||||||
|
api_key: ${env.API_KEY}
|
||||||
|
url: ${env.SERVICE_URL}
|
||||||
|
```
|
||||||
|
|
||||||
|
If the environment variable is not set, the server will raise an error during startup.
|
||||||
|
|
||||||
|
#### Default Values
|
||||||
|
|
||||||
|
You can provide default values using the `:=` operator:
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
config:
|
||||||
|
url: ${env.OLLAMA_URL:=http://localhost:11434}
|
||||||
|
port: ${env.PORT:=8321}
|
||||||
|
timeout: ${env.TIMEOUT:=60}
|
||||||
|
```
|
||||||
|
|
||||||
|
If the environment variable is not set, the default value `http://localhost:11434` will be used.
|
||||||
|
Empty defaults are not allowed so `url: ${env.OLLAMA_URL:=}` will raise an error if the environment variable is not set.
|
||||||
|
|
||||||
|
#### Conditional Values
|
||||||
|
|
||||||
|
You can use the `:+` operator to provide a value only when the environment variable is set:
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
config:
|
||||||
|
# Only include this field if ENVIRONMENT is set
|
||||||
|
environment: ${env.ENVIRONMENT:+production}
|
||||||
|
```
|
||||||
|
|
||||||
|
If the environment variable is set, the value after `:+` will be used. If it's not set, the field
|
||||||
|
will be omitted with a `None` value.
|
||||||
|
So `${env.ENVIRONMENT:+}` is supported, it means that the field will be omitted if the environment
|
||||||
|
variable is not set. It can be used to make a field optional and then enabled at runtime when desired.
|
||||||
|
|
||||||
|
#### Examples
|
||||||
|
|
||||||
|
Here are some common patterns:
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
# Required environment variable (will error if not set)
|
||||||
|
api_key: ${env.OPENAI_API_KEY}
|
||||||
|
|
||||||
|
# Optional with default
|
||||||
|
base_url: ${env.API_BASE_URL:=https://api.openai.com/v1}
|
||||||
|
|
||||||
|
# Conditional field
|
||||||
|
debug_mode: ${env.DEBUG:+true}
|
||||||
|
|
||||||
|
# Optional field that becomes None if not set
|
||||||
|
optional_token: ${env.OPTIONAL_TOKEN:+}
|
||||||
|
```
|
||||||
|
|
||||||
|
#### Runtime Override
|
||||||
|
|
||||||
|
You can override environment variables at runtime when starting the server:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Override specific environment variables
|
||||||
|
llama stack run --config run.yaml --env API_KEY=sk-123 --env BASE_URL=https://custom-api.com
|
||||||
|
|
||||||
|
# Or set them in your shell
|
||||||
|
export API_KEY=sk-123
|
||||||
|
export BASE_URL=https://custom-api.com
|
||||||
|
llama stack run --config run.yaml
|
||||||
|
```
|
||||||
|
|
||||||
|
#### Type Safety
|
||||||
|
|
||||||
|
The environment variable substitution system is type-safe:
|
||||||
|
|
||||||
|
- String values remain strings
|
||||||
|
- Empty defaults (`${env.VAR:+}`) are converted to `None` for fields that accept `str | None`
|
||||||
|
- Numeric defaults are properly typed (e.g., `${env.PORT:=8321}` becomes an integer)
|
||||||
|
- Boolean defaults work correctly (e.g., `${env.DEBUG:=false}` becomes a boolean)
|
||||||
|
|
||||||
## Resources
|
## Resources
|
||||||
|
|
||||||
Finally, let's look at the `models` section:
|
Finally, let's look at the `models` section:
|
||||||
|
@ -152,7 +241,7 @@ server:
|
||||||
config:
|
config:
|
||||||
jwks:
|
jwks:
|
||||||
uri: "https://kubernetes.default.svc:8443/openid/v1/jwks"
|
uri: "https://kubernetes.default.svc:8443/openid/v1/jwks"
|
||||||
token: "${env.TOKEN:}"
|
token: "${env.TOKEN:+}"
|
||||||
key_recheck_period: 3600
|
key_recheck_period: 3600
|
||||||
tls_cafile: "/path/to/ca.crt"
|
tls_cafile: "/path/to/ca.crt"
|
||||||
issuer: "https://kubernetes.default.svc"
|
issuer: "https://kubernetes.default.svc"
|
||||||
|
@ -396,12 +485,12 @@ providers:
|
||||||
- provider_id: vllm-0
|
- provider_id: vllm-0
|
||||||
provider_type: remote::vllm
|
provider_type: remote::vllm
|
||||||
config:
|
config:
|
||||||
url: ${env.VLLM_URL:http://localhost:8000}
|
url: ${env.VLLM_URL:=http://localhost:8000}
|
||||||
# this vLLM server serves the llama-guard model (e.g., llama-guard:3b)
|
# this vLLM server serves the llama-guard model (e.g., llama-guard:3b)
|
||||||
- provider_id: vllm-1
|
- provider_id: vllm-1
|
||||||
provider_type: remote::vllm
|
provider_type: remote::vllm
|
||||||
config:
|
config:
|
||||||
url: ${env.SAFETY_VLLM_URL:http://localhost:8001}
|
url: ${env.SAFETY_VLLM_URL:=http://localhost:8001}
|
||||||
...
|
...
|
||||||
models:
|
models:
|
||||||
- metadata: {}
|
- metadata: {}
|
||||||
|
|
|
@ -15,10 +15,10 @@ data:
|
||||||
- provider_id: vllm-inference
|
- provider_id: vllm-inference
|
||||||
provider_type: remote::vllm
|
provider_type: remote::vllm
|
||||||
config:
|
config:
|
||||||
url: ${env.VLLM_URL:http://localhost:8000/v1}
|
url: ${env.VLLM_URL:=http://localhost:8000/v1}
|
||||||
max_tokens: ${env.VLLM_MAX_TOKENS:4096}
|
max_tokens: ${env.VLLM_MAX_TOKENS:=4096}
|
||||||
api_token: ${env.VLLM_API_TOKEN:fake}
|
api_token: ${env.VLLM_API_TOKEN:=fake}
|
||||||
tls_verify: ${env.VLLM_TLS_VERIFY:true}
|
tls_verify: ${env.VLLM_TLS_VERIFY:=true}
|
||||||
- provider_id: vllm-safety
|
- provider_id: vllm-safety
|
||||||
provider_type: remote::vllm
|
provider_type: remote::vllm
|
||||||
config:
|
config:
|
||||||
|
@ -30,10 +30,10 @@ data:
|
||||||
provider_type: inline::sentence-transformers
|
provider_type: inline::sentence-transformers
|
||||||
config: {}
|
config: {}
|
||||||
vector_io:
|
vector_io:
|
||||||
- provider_id: ${env.ENABLE_CHROMADB+chromadb}
|
- provider_id: ${env.ENABLE_CHROMADB:+chromadb}
|
||||||
provider_type: remote::chromadb
|
provider_type: remote::chromadb
|
||||||
config:
|
config:
|
||||||
url: ${env.CHROMADB_URL:}
|
url: ${env.CHROMADB_URL:+}
|
||||||
safety:
|
safety:
|
||||||
- provider_id: llama-guard
|
- provider_id: llama-guard
|
||||||
provider_type: inline::llama-guard
|
provider_type: inline::llama-guard
|
||||||
|
@ -45,34 +45,34 @@ data:
|
||||||
config:
|
config:
|
||||||
persistence_store:
|
persistence_store:
|
||||||
type: postgres
|
type: postgres
|
||||||
host: ${env.POSTGRES_HOST:localhost}
|
host: ${env.POSTGRES_HOST:=localhost}
|
||||||
port: ${env.POSTGRES_PORT:5432}
|
port: ${env.POSTGRES_PORT:=5432}
|
||||||
db: ${env.POSTGRES_DB:llamastack}
|
db: ${env.POSTGRES_DB:=llamastack}
|
||||||
user: ${env.POSTGRES_USER:llamastack}
|
user: ${env.POSTGRES_USER:llamastack}
|
||||||
password: ${env.POSTGRES_PASSWORD:llamastack}
|
password: ${env.POSTGRES_PASSWORD:=llamastack}
|
||||||
responses_store:
|
responses_store:
|
||||||
type: postgres
|
type: postgres
|
||||||
host: ${env.POSTGRES_HOST:localhost}
|
host: ${env.POSTGRES_HOST:=localhost}
|
||||||
port: ${env.POSTGRES_PORT:5432}
|
port: ${env.POSTGRES_PORT:=5432}
|
||||||
db: ${env.POSTGRES_DB:llamastack}
|
db: ${env.POSTGRES_DB:=llamastack}
|
||||||
user: ${env.POSTGRES_USER:llamastack}
|
user: ${env.POSTGRES_USER:=llamastack}
|
||||||
password: ${env.POSTGRES_PASSWORD:llamastack}
|
password: ${env.POSTGRES_PASSWORD:=llamastack}
|
||||||
telemetry:
|
telemetry:
|
||||||
- provider_id: meta-reference
|
- provider_id: meta-reference
|
||||||
provider_type: inline::meta-reference
|
provider_type: inline::meta-reference
|
||||||
config:
|
config:
|
||||||
service_name: ${env.OTEL_SERVICE_NAME:}
|
service_name: ${env.OTEL_SERVICE_NAME:+}
|
||||||
sinks: ${env.TELEMETRY_SINKS:console}
|
sinks: ${env.TELEMETRY_SINKS:console}
|
||||||
tool_runtime:
|
tool_runtime:
|
||||||
- provider_id: brave-search
|
- provider_id: brave-search
|
||||||
provider_type: remote::brave-search
|
provider_type: remote::brave-search
|
||||||
config:
|
config:
|
||||||
api_key: ${env.BRAVE_SEARCH_API_KEY:}
|
api_key: ${env.BRAVE_SEARCH_API_KEY:+}
|
||||||
max_results: 3
|
max_results: 3
|
||||||
- provider_id: tavily-search
|
- provider_id: tavily-search
|
||||||
provider_type: remote::tavily-search
|
provider_type: remote::tavily-search
|
||||||
config:
|
config:
|
||||||
api_key: ${env.TAVILY_SEARCH_API_KEY:}
|
api_key: ${env.TAVILY_SEARCH_API_KEY:+}
|
||||||
max_results: 3
|
max_results: 3
|
||||||
- provider_id: rag-runtime
|
- provider_id: rag-runtime
|
||||||
provider_type: inline::rag-runtime
|
provider_type: inline::rag-runtime
|
||||||
|
@ -82,19 +82,19 @@ data:
|
||||||
config: {}
|
config: {}
|
||||||
metadata_store:
|
metadata_store:
|
||||||
type: postgres
|
type: postgres
|
||||||
host: ${env.POSTGRES_HOST:localhost}
|
host: ${env.POSTGRES_HOST:=localhost}
|
||||||
port: ${env.POSTGRES_PORT:5432}
|
port: ${env.POSTGRES_PORT:=5432}
|
||||||
db: ${env.POSTGRES_DB:llamastack}
|
db: ${env.POSTGRES_DB:=llamastack}
|
||||||
user: ${env.POSTGRES_USER:llamastack}
|
user: ${env.POSTGRES_USER:=llamastack}
|
||||||
password: ${env.POSTGRES_PASSWORD:llamastack}
|
password: ${env.POSTGRES_PASSWORD:=llamastack}
|
||||||
table_name: llamastack_kvstore
|
table_name: llamastack_kvstore
|
||||||
inference_store:
|
inference_store:
|
||||||
type: postgres
|
type: postgres
|
||||||
host: ${env.POSTGRES_HOST:localhost}
|
host: ${env.POSTGRES_HOST:=localhost}
|
||||||
port: ${env.POSTGRES_PORT:5432}
|
port: ${env.POSTGRES_PORT:=5432}
|
||||||
db: ${env.POSTGRES_DB:llamastack}
|
db: ${env.POSTGRES_DB:=llamastack}
|
||||||
user: ${env.POSTGRES_USER:llamastack}
|
user: ${env.POSTGRES_USER:=llamastack}
|
||||||
password: ${env.POSTGRES_PASSWORD:llamastack}
|
password: ${env.POSTGRES_PASSWORD:=llamastack}
|
||||||
models:
|
models:
|
||||||
- metadata:
|
- metadata:
|
||||||
embedding_dimension: 384
|
embedding_dimension: 384
|
||||||
|
@ -106,11 +106,11 @@ data:
|
||||||
provider_id: vllm-inference
|
provider_id: vllm-inference
|
||||||
model_type: llm
|
model_type: llm
|
||||||
- metadata: {}
|
- metadata: {}
|
||||||
model_id: ${env.SAFETY_MODEL:meta-llama/Llama-Guard-3-1B}
|
model_id: ${env.SAFETY_MODEL:=meta-llama/Llama-Guard-3-1B}
|
||||||
provider_id: vllm-safety
|
provider_id: vllm-safety
|
||||||
model_type: llm
|
model_type: llm
|
||||||
shields:
|
shields:
|
||||||
- shield_id: ${env.SAFETY_MODEL:meta-llama/Llama-Guard-3-1B}
|
- shield_id: ${env.SAFETY_MODEL:=meta-llama/Llama-Guard-3-1B}
|
||||||
vector_dbs: []
|
vector_dbs: []
|
||||||
datasets: []
|
datasets: []
|
||||||
scoring_fns: []
|
scoring_fns: []
|
||||||
|
|
|
@ -12,25 +12,25 @@ providers:
|
||||||
- provider_id: vllm-inference
|
- provider_id: vllm-inference
|
||||||
provider_type: remote::vllm
|
provider_type: remote::vllm
|
||||||
config:
|
config:
|
||||||
url: ${env.VLLM_URL:http://localhost:8000/v1}
|
url: ${env.VLLM_URL:=http://localhost:8000/v1}
|
||||||
max_tokens: ${env.VLLM_MAX_TOKENS:4096}
|
max_tokens: ${env.VLLM_MAX_TOKENS:=4096}
|
||||||
api_token: ${env.VLLM_API_TOKEN:fake}
|
api_token: ${env.VLLM_API_TOKEN:=fake}
|
||||||
tls_verify: ${env.VLLM_TLS_VERIFY:true}
|
tls_verify: ${env.VLLM_TLS_VERIFY:=true}
|
||||||
- provider_id: vllm-safety
|
- provider_id: vllm-safety
|
||||||
provider_type: remote::vllm
|
provider_type: remote::vllm
|
||||||
config:
|
config:
|
||||||
url: ${env.VLLM_SAFETY_URL:http://localhost:8000/v1}
|
url: ${env.VLLM_SAFETY_URL:=http://localhost:8000/v1}
|
||||||
max_tokens: ${env.VLLM_MAX_TOKENS:4096}
|
max_tokens: ${env.VLLM_MAX_TOKENS:=4096}
|
||||||
api_token: ${env.VLLM_API_TOKEN:fake}
|
api_token: ${env.VLLM_API_TOKEN:=fake}
|
||||||
tls_verify: ${env.VLLM_TLS_VERIFY:true}
|
tls_verify: ${env.VLLM_TLS_VERIFY:=true}
|
||||||
- provider_id: sentence-transformers
|
- provider_id: sentence-transformers
|
||||||
provider_type: inline::sentence-transformers
|
provider_type: inline::sentence-transformers
|
||||||
config: {}
|
config: {}
|
||||||
vector_io:
|
vector_io:
|
||||||
- provider_id: ${env.ENABLE_CHROMADB+chromadb}
|
- provider_id: ${env.ENABLE_CHROMADB:+chromadb}
|
||||||
provider_type: remote::chromadb
|
provider_type: remote::chromadb
|
||||||
config:
|
config:
|
||||||
url: ${env.CHROMADB_URL:}
|
url: ${env.CHROMADB_URL:+}
|
||||||
safety:
|
safety:
|
||||||
- provider_id: llama-guard
|
- provider_id: llama-guard
|
||||||
provider_type: inline::llama-guard
|
provider_type: inline::llama-guard
|
||||||
|
@ -42,34 +42,34 @@ providers:
|
||||||
config:
|
config:
|
||||||
persistence_store:
|
persistence_store:
|
||||||
type: postgres
|
type: postgres
|
||||||
host: ${env.POSTGRES_HOST:localhost}
|
host: ${env.POSTGRES_HOST:=localhost}
|
||||||
port: ${env.POSTGRES_PORT:5432}
|
port: ${env.POSTGRES_PORT:=5432}
|
||||||
db: ${env.POSTGRES_DB:llamastack}
|
db: ${env.POSTGRES_DB:=llamastack}
|
||||||
user: ${env.POSTGRES_USER:llamastack}
|
user: ${env.POSTGRES_USER:=llamastack}
|
||||||
password: ${env.POSTGRES_PASSWORD:llamastack}
|
password: ${env.POSTGRES_PASSWORD:=llamastack}
|
||||||
responses_store:
|
responses_store:
|
||||||
type: postgres
|
type: postgres
|
||||||
host: ${env.POSTGRES_HOST:localhost}
|
host: ${env.POSTGRES_HOST:=localhost}
|
||||||
port: ${env.POSTGRES_PORT:5432}
|
port: ${env.POSTGRES_PORT:=5432}
|
||||||
db: ${env.POSTGRES_DB:llamastack}
|
db: ${env.POSTGRES_DB:=llamastack}
|
||||||
user: ${env.POSTGRES_USER:llamastack}
|
user: ${env.POSTGRES_USER:=llamastack}
|
||||||
password: ${env.POSTGRES_PASSWORD:llamastack}
|
password: ${env.POSTGRES_PASSWORD:=llamastack}
|
||||||
telemetry:
|
telemetry:
|
||||||
- provider_id: meta-reference
|
- provider_id: meta-reference
|
||||||
provider_type: inline::meta-reference
|
provider_type: inline::meta-reference
|
||||||
config:
|
config:
|
||||||
service_name: ${env.OTEL_SERVICE_NAME:}
|
service_name: ${env.OTEL_SERVICE_NAME:+console}
|
||||||
sinks: ${env.TELEMETRY_SINKS:console}
|
sinks: ${env.TELEMETRY_SINKS:+console}
|
||||||
tool_runtime:
|
tool_runtime:
|
||||||
- provider_id: brave-search
|
- provider_id: brave-search
|
||||||
provider_type: remote::brave-search
|
provider_type: remote::brave-search
|
||||||
config:
|
config:
|
||||||
api_key: ${env.BRAVE_SEARCH_API_KEY:}
|
api_key: ${env.BRAVE_SEARCH_API_KEY:+}
|
||||||
max_results: 3
|
max_results: 3
|
||||||
- provider_id: tavily-search
|
- provider_id: tavily-search
|
||||||
provider_type: remote::tavily-search
|
provider_type: remote::tavily-search
|
||||||
config:
|
config:
|
||||||
api_key: ${env.TAVILY_SEARCH_API_KEY:}
|
api_key: ${env.TAVILY_SEARCH_API_KEY:+}
|
||||||
max_results: 3
|
max_results: 3
|
||||||
- provider_id: rag-runtime
|
- provider_id: rag-runtime
|
||||||
provider_type: inline::rag-runtime
|
provider_type: inline::rag-runtime
|
||||||
|
@ -79,19 +79,19 @@ providers:
|
||||||
config: {}
|
config: {}
|
||||||
metadata_store:
|
metadata_store:
|
||||||
type: postgres
|
type: postgres
|
||||||
host: ${env.POSTGRES_HOST:localhost}
|
host: ${env.POSTGRES_HOST:=localhost}
|
||||||
port: ${env.POSTGRES_PORT:5432}
|
port: ${env.POSTGRES_PORT:=5432}
|
||||||
db: ${env.POSTGRES_DB:llamastack}
|
db: ${env.POSTGRES_DB:=llamastack}
|
||||||
user: ${env.POSTGRES_USER:llamastack}
|
user: ${env.POSTGRES_USER:=llamastack}
|
||||||
password: ${env.POSTGRES_PASSWORD:llamastack}
|
password: ${env.POSTGRES_PASSWORD:=llamastack}
|
||||||
table_name: llamastack_kvstore
|
table_name: llamastack_kvstore
|
||||||
inference_store:
|
inference_store:
|
||||||
type: postgres
|
type: postgres
|
||||||
host: ${env.POSTGRES_HOST:localhost}
|
host: ${env.POSTGRES_HOST:=localhost}
|
||||||
port: ${env.POSTGRES_PORT:5432}
|
port: ${env.POSTGRES_PORT:=5432}
|
||||||
db: ${env.POSTGRES_DB:llamastack}
|
db: ${env.POSTGRES_DB:=llamastack}
|
||||||
user: ${env.POSTGRES_USER:llamastack}
|
user: ${env.POSTGRES_USER:=llamastack}
|
||||||
password: ${env.POSTGRES_PASSWORD:llamastack}
|
password: ${env.POSTGRES_PASSWORD:=llamastack}
|
||||||
models:
|
models:
|
||||||
- metadata:
|
- metadata:
|
||||||
embedding_dimension: 384
|
embedding_dimension: 384
|
||||||
|
@ -103,11 +103,11 @@ models:
|
||||||
provider_id: vllm-inference
|
provider_id: vllm-inference
|
||||||
model_type: llm
|
model_type: llm
|
||||||
- metadata: {}
|
- metadata: {}
|
||||||
model_id: ${env.SAFETY_MODEL:meta-llama/Llama-Guard-3-1B}
|
model_id: ${env.SAFETY_MODEL:=meta-llama/Llama-Guard-3-1B}
|
||||||
provider_id: vllm-safety
|
provider_id: vllm-safety
|
||||||
model_type: llm
|
model_type: llm
|
||||||
shields:
|
shields:
|
||||||
- shield_id: ${env.SAFETY_MODEL:meta-llama/Llama-Guard-3-1B}
|
- shield_id: ${env.SAFETY_MODEL:=meta-llama/Llama-Guard-3-1B}
|
||||||
vector_dbs: []
|
vector_dbs: []
|
||||||
datasets: []
|
datasets: []
|
||||||
scoring_fns: []
|
scoring_fns: []
|
||||||
|
|
|
@ -4,4 +4,4 @@
|
||||||
# This source code is licensed under the terms described in the LICENSE file in
|
# This source code is licensed under the terms described in the LICENSE file in
|
||||||
# the root directory of this source tree.
|
# the root directory of this source tree.
|
||||||
|
|
||||||
from .agents import * # noqa: F401 F403
|
from .agents import *
|
||||||
|
|
|
@ -44,10 +44,55 @@ OpenAIResponseInputMessageContent = Annotated[
|
||||||
register_schema(OpenAIResponseInputMessageContent, name="OpenAIResponseInputMessageContent")
|
register_schema(OpenAIResponseInputMessageContent, name="OpenAIResponseInputMessageContent")
|
||||||
|
|
||||||
|
|
||||||
|
@json_schema_type
|
||||||
|
class OpenAIResponseAnnotationFileCitation(BaseModel):
|
||||||
|
type: Literal["file_citation"] = "file_citation"
|
||||||
|
file_id: str
|
||||||
|
filename: str
|
||||||
|
index: int
|
||||||
|
|
||||||
|
|
||||||
|
@json_schema_type
|
||||||
|
class OpenAIResponseAnnotationCitation(BaseModel):
|
||||||
|
type: Literal["url_citation"] = "url_citation"
|
||||||
|
end_index: int
|
||||||
|
start_index: int
|
||||||
|
title: str
|
||||||
|
url: str
|
||||||
|
|
||||||
|
|
||||||
|
@json_schema_type
|
||||||
|
class OpenAIResponseAnnotationContainerFileCitation(BaseModel):
|
||||||
|
type: Literal["container_file_citation"] = "container_file_citation"
|
||||||
|
container_id: str
|
||||||
|
end_index: int
|
||||||
|
file_id: str
|
||||||
|
filename: str
|
||||||
|
start_index: int
|
||||||
|
|
||||||
|
|
||||||
|
@json_schema_type
|
||||||
|
class OpenAIResponseAnnotationFilePath(BaseModel):
|
||||||
|
type: Literal["file_path"] = "file_path"
|
||||||
|
file_id: str
|
||||||
|
index: int
|
||||||
|
|
||||||
|
|
||||||
|
OpenAIResponseAnnotations = Annotated[
|
||||||
|
OpenAIResponseAnnotationFileCitation
|
||||||
|
| OpenAIResponseAnnotationCitation
|
||||||
|
| OpenAIResponseAnnotationContainerFileCitation
|
||||||
|
| OpenAIResponseAnnotationFilePath,
|
||||||
|
Field(discriminator="type"),
|
||||||
|
]
|
||||||
|
register_schema(OpenAIResponseAnnotations, name="OpenAIResponseAnnotations")
|
||||||
|
|
||||||
|
|
||||||
@json_schema_type
|
@json_schema_type
|
||||||
class OpenAIResponseOutputMessageContentOutputText(BaseModel):
|
class OpenAIResponseOutputMessageContentOutputText(BaseModel):
|
||||||
text: str
|
text: str
|
||||||
type: Literal["output_text"] = "output_text"
|
type: Literal["output_text"] = "output_text"
|
||||||
|
annotations: list[OpenAIResponseAnnotations] = Field(default_factory=list)
|
||||||
|
|
||||||
|
|
||||||
OpenAIResponseOutputMessageContent = Annotated[
|
OpenAIResponseOutputMessageContent = Annotated[
|
||||||
|
@ -384,9 +429,16 @@ OpenAIResponseInput = Annotated[
|
||||||
register_schema(OpenAIResponseInput, name="OpenAIResponseInput")
|
register_schema(OpenAIResponseInput, name="OpenAIResponseInput")
|
||||||
|
|
||||||
|
|
||||||
|
# Must match type Literals of OpenAIResponseInputToolWebSearch below
|
||||||
|
WebSearchToolTypes = ["web_search", "web_search_preview", "web_search_preview_2025_03_11"]
|
||||||
|
|
||||||
|
|
||||||
@json_schema_type
|
@json_schema_type
|
||||||
class OpenAIResponseInputToolWebSearch(BaseModel):
|
class OpenAIResponseInputToolWebSearch(BaseModel):
|
||||||
type: Literal["web_search"] | Literal["web_search_preview_2025_03_11"] = "web_search"
|
# Must match values of WebSearchToolTypes above
|
||||||
|
type: Literal["web_search"] | Literal["web_search_preview"] | Literal["web_search_preview_2025_03_11"] = (
|
||||||
|
"web_search"
|
||||||
|
)
|
||||||
# TODO: actually use search_context_size somewhere...
|
# TODO: actually use search_context_size somewhere...
|
||||||
search_context_size: str | None = Field(default="medium", pattern="^low|medium|high$")
|
search_context_size: str | None = Field(default="medium", pattern="^low|medium|high$")
|
||||||
# TODO: add user_location
|
# TODO: add user_location
|
||||||
|
|
|
@ -4,4 +4,4 @@
|
||||||
# This source code is licensed under the terms described in the LICENSE file in
|
# This source code is licensed under the terms described in the LICENSE file in
|
||||||
# the root directory of this source tree.
|
# the root directory of this source tree.
|
||||||
|
|
||||||
from .batch_inference import * # noqa: F401 F403
|
from .batch_inference import *
|
||||||
|
|
|
@ -4,4 +4,4 @@
|
||||||
# This source code is licensed under the terms described in the LICENSE file in
|
# This source code is licensed under the terms described in the LICENSE file in
|
||||||
# the root directory of this source tree.
|
# the root directory of this source tree.
|
||||||
|
|
||||||
from .benchmarks import * # noqa: F401 F403
|
from .benchmarks import *
|
||||||
|
|
|
@ -4,4 +4,4 @@
|
||||||
# This source code is licensed under the terms described in the LICENSE file in
|
# This source code is licensed under the terms described in the LICENSE file in
|
||||||
# the root directory of this source tree.
|
# the root directory of this source tree.
|
||||||
|
|
||||||
from .datasetio import * # noqa: F401 F403
|
from .datasetio import *
|
||||||
|
|
|
@ -4,4 +4,4 @@
|
||||||
# This source code is licensed under the terms described in the LICENSE file in
|
# This source code is licensed under the terms described in the LICENSE file in
|
||||||
# the root directory of this source tree.
|
# the root directory of this source tree.
|
||||||
|
|
||||||
from .datasets import * # noqa: F401 F403
|
from .datasets import *
|
||||||
|
|
|
@ -4,7 +4,7 @@
|
||||||
# This source code is licensed under the terms described in the LICENSE file in
|
# This source code is licensed under the terms described in the LICENSE file in
|
||||||
# the root directory of this source tree.
|
# the root directory of this source tree.
|
||||||
|
|
||||||
from enum import Enum
|
from enum import Enum, StrEnum
|
||||||
from typing import Annotated, Any, Literal, Protocol
|
from typing import Annotated, Any, Literal, Protocol
|
||||||
|
|
||||||
from pydantic import BaseModel, Field
|
from pydantic import BaseModel, Field
|
||||||
|
@ -13,7 +13,7 @@ from llama_stack.apis.resource import Resource, ResourceType
|
||||||
from llama_stack.schema_utils import json_schema_type, register_schema, webmethod
|
from llama_stack.schema_utils import json_schema_type, register_schema, webmethod
|
||||||
|
|
||||||
|
|
||||||
class DatasetPurpose(str, Enum):
|
class DatasetPurpose(StrEnum):
|
||||||
"""
|
"""
|
||||||
Purpose of the dataset. Each purpose has a required input data schema.
|
Purpose of the dataset. Each purpose has a required input data schema.
|
||||||
|
|
||||||
|
|
|
@ -4,4 +4,4 @@
|
||||||
# This source code is licensed under the terms described in the LICENSE file in
|
# This source code is licensed under the terms described in the LICENSE file in
|
||||||
# the root directory of this source tree.
|
# the root directory of this source tree.
|
||||||
|
|
||||||
from .eval import * # noqa: F401 F403
|
from .eval import *
|
||||||
|
|
|
@ -4,4 +4,4 @@
|
||||||
# This source code is licensed under the terms described in the LICENSE file in
|
# This source code is licensed under the terms described in the LICENSE file in
|
||||||
# the root directory of this source tree.
|
# the root directory of this source tree.
|
||||||
|
|
||||||
from .files import * # noqa: F401 F403
|
from .files import *
|
||||||
|
|
|
@ -4,7 +4,7 @@
|
||||||
# This source code is licensed under the terms described in the LICENSE file in
|
# This source code is licensed under the terms described in the LICENSE file in
|
||||||
# the root directory of this source tree.
|
# the root directory of this source tree.
|
||||||
|
|
||||||
from enum import Enum
|
from enum import StrEnum
|
||||||
from typing import Annotated, Literal, Protocol, runtime_checkable
|
from typing import Annotated, Literal, Protocol, runtime_checkable
|
||||||
|
|
||||||
from fastapi import File, Form, Response, UploadFile
|
from fastapi import File, Form, Response, UploadFile
|
||||||
|
@ -16,7 +16,7 @@ from llama_stack.schema_utils import json_schema_type, webmethod
|
||||||
|
|
||||||
|
|
||||||
# OpenAI Files API Models
|
# OpenAI Files API Models
|
||||||
class OpenAIFilePurpose(str, Enum):
|
class OpenAIFilePurpose(StrEnum):
|
||||||
"""
|
"""
|
||||||
Valid purpose values for OpenAI Files API.
|
Valid purpose values for OpenAI Files API.
|
||||||
"""
|
"""
|
||||||
|
|
|
@ -4,4 +4,4 @@
|
||||||
# This source code is licensed under the terms described in the LICENSE file in
|
# This source code is licensed under the terms described in the LICENSE file in
|
||||||
# the root directory of this source tree.
|
# the root directory of this source tree.
|
||||||
|
|
||||||
from .inference import * # noqa: F401 F403
|
from .inference import *
|
||||||
|
|
|
@ -20,7 +20,7 @@ from typing_extensions import TypedDict
|
||||||
from llama_stack.apis.common.content_types import ContentDelta, InterleavedContent, InterleavedContentItem
|
from llama_stack.apis.common.content_types import ContentDelta, InterleavedContent, InterleavedContentItem
|
||||||
from llama_stack.apis.common.responses import Order
|
from llama_stack.apis.common.responses import Order
|
||||||
from llama_stack.apis.models import Model
|
from llama_stack.apis.models import Model
|
||||||
from llama_stack.apis.telemetry.telemetry import MetricResponseMixin
|
from llama_stack.apis.telemetry import MetricResponseMixin
|
||||||
from llama_stack.models.llama.datatypes import (
|
from llama_stack.models.llama.datatypes import (
|
||||||
BuiltinTool,
|
BuiltinTool,
|
||||||
StopReason,
|
StopReason,
|
||||||
|
|
|
@ -4,4 +4,4 @@
|
||||||
# This source code is licensed under the terms described in the LICENSE file in
|
# This source code is licensed under the terms described in the LICENSE file in
|
||||||
# the root directory of this source tree.
|
# the root directory of this source tree.
|
||||||
|
|
||||||
from .inspect import * # noqa: F401 F403
|
from .inspect import *
|
||||||
|
|
|
@ -4,4 +4,4 @@
|
||||||
# This source code is licensed under the terms described in the LICENSE file in
|
# This source code is licensed under the terms described in the LICENSE file in
|
||||||
# the root directory of this source tree.
|
# the root directory of this source tree.
|
||||||
|
|
||||||
from .models import * # noqa: F401 F403
|
from .models import *
|
||||||
|
|
|
@ -4,7 +4,7 @@
|
||||||
# This source code is licensed under the terms described in the LICENSE file in
|
# This source code is licensed under the terms described in the LICENSE file in
|
||||||
# the root directory of this source tree.
|
# the root directory of this source tree.
|
||||||
|
|
||||||
from enum import Enum
|
from enum import StrEnum
|
||||||
from typing import Any, Literal, Protocol, runtime_checkable
|
from typing import Any, Literal, Protocol, runtime_checkable
|
||||||
|
|
||||||
from pydantic import BaseModel, ConfigDict, Field
|
from pydantic import BaseModel, ConfigDict, Field
|
||||||
|
@ -22,7 +22,7 @@ class CommonModelFields(BaseModel):
|
||||||
|
|
||||||
|
|
||||||
@json_schema_type
|
@json_schema_type
|
||||||
class ModelType(str, Enum):
|
class ModelType(StrEnum):
|
||||||
llm = "llm"
|
llm = "llm"
|
||||||
embedding = "embedding"
|
embedding = "embedding"
|
||||||
|
|
||||||
|
|
|
@ -4,4 +4,4 @@
|
||||||
# This source code is licensed under the terms described in the LICENSE file in
|
# This source code is licensed under the terms described in the LICENSE file in
|
||||||
# the root directory of this source tree.
|
# the root directory of this source tree.
|
||||||
|
|
||||||
from .post_training import * # noqa: F401 F403
|
from .post_training import *
|
||||||
|
|
|
@ -4,4 +4,4 @@
|
||||||
# This source code is licensed under the terms described in the LICENSE file in
|
# This source code is licensed under the terms described in the LICENSE file in
|
||||||
# the root directory of this source tree.
|
# the root directory of this source tree.
|
||||||
|
|
||||||
from .providers import * # noqa: F401 F403
|
from .providers import *
|
||||||
|
|
|
@ -4,4 +4,4 @@
|
||||||
# This source code is licensed under the terms described in the LICENSE file in
|
# This source code is licensed under the terms described in the LICENSE file in
|
||||||
# the root directory of this source tree.
|
# the root directory of this source tree.
|
||||||
|
|
||||||
from .safety import * # noqa: F401 F403
|
from .safety import *
|
||||||
|
|
|
@ -4,4 +4,4 @@
|
||||||
# This source code is licensed under the terms described in the LICENSE file in
|
# This source code is licensed under the terms described in the LICENSE file in
|
||||||
# the root directory of this source tree.
|
# the root directory of this source tree.
|
||||||
|
|
||||||
from .scoring import * # noqa: F401 F403
|
from .scoring import *
|
||||||
|
|
|
@ -4,4 +4,4 @@
|
||||||
# This source code is licensed under the terms described in the LICENSE file in
|
# This source code is licensed under the terms described in the LICENSE file in
|
||||||
# the root directory of this source tree.
|
# the root directory of this source tree.
|
||||||
|
|
||||||
from .scoring_functions import * # noqa: F401 F403
|
from .scoring_functions import *
|
||||||
|
|
|
@ -4,4 +4,4 @@
|
||||||
# This source code is licensed under the terms described in the LICENSE file in
|
# This source code is licensed under the terms described in the LICENSE file in
|
||||||
# the root directory of this source tree.
|
# the root directory of this source tree.
|
||||||
|
|
||||||
from .shields import * # noqa: F401 F403
|
from .shields import *
|
||||||
|
|
|
@ -4,4 +4,4 @@
|
||||||
# This source code is licensed under the terms described in the LICENSE file in
|
# This source code is licensed under the terms described in the LICENSE file in
|
||||||
# the root directory of this source tree.
|
# the root directory of this source tree.
|
||||||
|
|
||||||
from .synthetic_data_generation import * # noqa: F401 F403
|
from .synthetic_data_generation import *
|
||||||
|
|
|
@ -4,4 +4,4 @@
|
||||||
# This source code is licensed under the terms described in the LICENSE file in
|
# This source code is licensed under the terms described in the LICENSE file in
|
||||||
# the root directory of this source tree.
|
# the root directory of this source tree.
|
||||||
|
|
||||||
from .telemetry import * # noqa: F401 F403
|
from .telemetry import *
|
||||||
|
|
|
@ -4,5 +4,5 @@
|
||||||
# This source code is licensed under the terms described in the LICENSE file in
|
# This source code is licensed under the terms described in the LICENSE file in
|
||||||
# the root directory of this source tree.
|
# the root directory of this source tree.
|
||||||
|
|
||||||
from .rag_tool import * # noqa: F401 F403
|
from .rag_tool import *
|
||||||
from .tools import * # noqa: F401 F403
|
from .tools import *
|
||||||
|
|
|
@ -4,4 +4,4 @@
|
||||||
# This source code is licensed under the terms described in the LICENSE file in
|
# This source code is licensed under the terms described in the LICENSE file in
|
||||||
# the root directory of this source tree.
|
# the root directory of this source tree.
|
||||||
|
|
||||||
from .vector_dbs import * # noqa: F401 F403
|
from .vector_dbs import *
|
||||||
|
|
|
@ -4,4 +4,4 @@
|
||||||
# This source code is licensed under the terms described in the LICENSE file in
|
# This source code is licensed under the terms described in the LICENSE file in
|
||||||
# the root directory of this source tree.
|
# the root directory of this source tree.
|
||||||
|
|
||||||
from .vector_io import * # noqa: F401 F403
|
from .vector_io import *
|
||||||
|
|
|
@ -4,7 +4,7 @@
|
||||||
# This source code is licensed under the terms described in the LICENSE file in
|
# This source code is licensed under the terms described in the LICENSE file in
|
||||||
# the root directory of this source tree.
|
# the root directory of this source tree.
|
||||||
|
|
||||||
from enum import Enum
|
from enum import StrEnum
|
||||||
from typing import Self
|
from typing import Self
|
||||||
|
|
||||||
from pydantic import BaseModel, model_validator
|
from pydantic import BaseModel, model_validator
|
||||||
|
@ -12,7 +12,7 @@ from pydantic import BaseModel, model_validator
|
||||||
from .conditions import parse_conditions
|
from .conditions import parse_conditions
|
||||||
|
|
||||||
|
|
||||||
class Action(str, Enum):
|
class Action(StrEnum):
|
||||||
CREATE = "create"
|
CREATE = "create"
|
||||||
READ = "read"
|
READ = "read"
|
||||||
UPDATE = "update"
|
UPDATE = "update"
|
||||||
|
|
|
@ -4,7 +4,7 @@
|
||||||
# This source code is licensed under the terms described in the LICENSE file in
|
# This source code is licensed under the terms described in the LICENSE file in
|
||||||
# the root directory of this source tree.
|
# the root directory of this source tree.
|
||||||
|
|
||||||
from enum import Enum
|
from enum import StrEnum
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from typing import Annotated, Any
|
from typing import Annotated, Any
|
||||||
|
|
||||||
|
@ -29,8 +29,8 @@ from llama_stack.providers.datatypes import Api, ProviderSpec
|
||||||
from llama_stack.providers.utils.kvstore.config import KVStoreConfig, SqliteKVStoreConfig
|
from llama_stack.providers.utils.kvstore.config import KVStoreConfig, SqliteKVStoreConfig
|
||||||
from llama_stack.providers.utils.sqlstore.sqlstore import SqlStoreConfig
|
from llama_stack.providers.utils.sqlstore.sqlstore import SqlStoreConfig
|
||||||
|
|
||||||
LLAMA_STACK_BUILD_CONFIG_VERSION = "2"
|
LLAMA_STACK_BUILD_CONFIG_VERSION = 2
|
||||||
LLAMA_STACK_RUN_CONFIG_VERSION = "2"
|
LLAMA_STACK_RUN_CONFIG_VERSION = 2
|
||||||
|
|
||||||
|
|
||||||
RoutingKey = str | list[str]
|
RoutingKey = str | list[str]
|
||||||
|
@ -159,7 +159,7 @@ class LoggingConfig(BaseModel):
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
class AuthProviderType(str, Enum):
|
class AuthProviderType(StrEnum):
|
||||||
"""Supported authentication provider types."""
|
"""Supported authentication provider types."""
|
||||||
|
|
||||||
OAUTH2_TOKEN = "oauth2_token"
|
OAUTH2_TOKEN = "oauth2_token"
|
||||||
|
@ -182,7 +182,7 @@ class AuthenticationRequiredError(Exception):
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
|
||||||
class QuotaPeriod(str, Enum):
|
class QuotaPeriod(StrEnum):
|
||||||
DAY = "day"
|
DAY = "day"
|
||||||
|
|
||||||
|
|
||||||
|
@ -229,7 +229,7 @@ class ServerConfig(BaseModel):
|
||||||
|
|
||||||
|
|
||||||
class StackRunConfig(BaseModel):
|
class StackRunConfig(BaseModel):
|
||||||
version: str = LLAMA_STACK_RUN_CONFIG_VERSION
|
version: int = LLAMA_STACK_RUN_CONFIG_VERSION
|
||||||
|
|
||||||
image_name: str = Field(
|
image_name: str = Field(
|
||||||
...,
|
...,
|
||||||
|
@ -300,7 +300,7 @@ a default SQLite store will be used.""",
|
||||||
|
|
||||||
|
|
||||||
class BuildConfig(BaseModel):
|
class BuildConfig(BaseModel):
|
||||||
version: str = LLAMA_STACK_BUILD_CONFIG_VERSION
|
version: int = LLAMA_STACK_BUILD_CONFIG_VERSION
|
||||||
|
|
||||||
distribution_spec: DistributionSpec = Field(description="The distribution spec to build including API providers. ")
|
distribution_spec: DistributionSpec = Field(description="The distribution spec to build including API providers. ")
|
||||||
image_type: str = Field(
|
image_type: str = Field(
|
||||||
|
|
|
@ -30,7 +30,13 @@ from llama_stack.apis.inference import (
|
||||||
ListOpenAIChatCompletionResponse,
|
ListOpenAIChatCompletionResponse,
|
||||||
LogProbConfig,
|
LogProbConfig,
|
||||||
Message,
|
Message,
|
||||||
|
OpenAIChatCompletion,
|
||||||
|
OpenAIChatCompletionChunk,
|
||||||
|
OpenAICompletion,
|
||||||
OpenAICompletionWithInputMessages,
|
OpenAICompletionWithInputMessages,
|
||||||
|
OpenAIEmbeddingsResponse,
|
||||||
|
OpenAIMessageParam,
|
||||||
|
OpenAIResponseFormatParam,
|
||||||
Order,
|
Order,
|
||||||
ResponseFormat,
|
ResponseFormat,
|
||||||
SamplingParams,
|
SamplingParams,
|
||||||
|
@ -41,14 +47,6 @@ from llama_stack.apis.inference import (
|
||||||
ToolDefinition,
|
ToolDefinition,
|
||||||
ToolPromptFormat,
|
ToolPromptFormat,
|
||||||
)
|
)
|
||||||
from llama_stack.apis.inference.inference import (
|
|
||||||
OpenAIChatCompletion,
|
|
||||||
OpenAIChatCompletionChunk,
|
|
||||||
OpenAICompletion,
|
|
||||||
OpenAIEmbeddingsResponse,
|
|
||||||
OpenAIMessageParam,
|
|
||||||
OpenAIResponseFormatParam,
|
|
||||||
)
|
|
||||||
from llama_stack.apis.models import Model, ModelType
|
from llama_stack.apis.models import Model, ModelType
|
||||||
from llama_stack.apis.telemetry import MetricEvent, MetricInResponse, Telemetry
|
from llama_stack.apis.telemetry import MetricEvent, MetricInResponse, Telemetry
|
||||||
from llama_stack.log import get_logger
|
from llama_stack.log import get_logger
|
||||||
|
|
|
@ -16,17 +16,15 @@ from llama_stack.apis.vector_io import (
|
||||||
QueryChunksResponse,
|
QueryChunksResponse,
|
||||||
SearchRankingOptions,
|
SearchRankingOptions,
|
||||||
VectorIO,
|
VectorIO,
|
||||||
VectorStoreDeleteResponse,
|
|
||||||
VectorStoreListResponse,
|
|
||||||
VectorStoreObject,
|
|
||||||
VectorStoreSearchResponsePage,
|
|
||||||
)
|
|
||||||
from llama_stack.apis.vector_io.vector_io import (
|
|
||||||
VectorStoreChunkingStrategy,
|
VectorStoreChunkingStrategy,
|
||||||
|
VectorStoreDeleteResponse,
|
||||||
VectorStoreFileContentsResponse,
|
VectorStoreFileContentsResponse,
|
||||||
VectorStoreFileDeleteResponse,
|
VectorStoreFileDeleteResponse,
|
||||||
VectorStoreFileObject,
|
VectorStoreFileObject,
|
||||||
VectorStoreFileStatus,
|
VectorStoreFileStatus,
|
||||||
|
VectorStoreListResponse,
|
||||||
|
VectorStoreObject,
|
||||||
|
VectorStoreSearchResponsePage,
|
||||||
)
|
)
|
||||||
from llama_stack.log import get_logger
|
from llama_stack.log import get_logger
|
||||||
from llama_stack.providers.datatypes import HealthResponse, HealthStatus, RoutingTable
|
from llama_stack.providers.datatypes import HealthResponse, HealthStatus, RoutingTable
|
||||||
|
|
|
@ -127,7 +127,12 @@ class EnvVarError(Exception):
|
||||||
def __init__(self, var_name: str, path: str = ""):
|
def __init__(self, var_name: str, path: str = ""):
|
||||||
self.var_name = var_name
|
self.var_name = var_name
|
||||||
self.path = path
|
self.path = path
|
||||||
super().__init__(f"Environment variable '{var_name}' not set or empty{f' at {path}' if path else ''}")
|
super().__init__(
|
||||||
|
f"Environment variable '{var_name}' not set or empty {f'at {path}' if path else ''}. "
|
||||||
|
f"Use ${{env.{var_name}:=default_value}} to provide a default value, "
|
||||||
|
f"${{env.{var_name}:+value_if_set}} to make the field conditional, "
|
||||||
|
f"or ensure the environment variable is set."
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
def replace_env_vars(config: Any, path: str = "") -> Any:
|
def replace_env_vars(config: Any, path: str = "") -> Any:
|
||||||
|
@ -150,25 +155,27 @@ def replace_env_vars(config: Any, path: str = "") -> Any:
|
||||||
return result
|
return result
|
||||||
|
|
||||||
elif isinstance(config, str):
|
elif isinstance(config, str):
|
||||||
# Updated pattern to support both default values (:) and conditional values (+)
|
# Pattern supports bash-like syntax: := for default and :+ for conditional and a optional value
|
||||||
pattern = r"\${env\.([A-Z0-9_]+)(?:([:\+])([^}]*))?}"
|
pattern = r"\${env\.([A-Z0-9_]+)(?::([=+])([^}]*))?}"
|
||||||
|
|
||||||
def get_env_var(match):
|
def get_env_var(match: re.Match):
|
||||||
env_var = match.group(1)
|
env_var = match.group(1)
|
||||||
operator = match.group(2) # ':' for default, '+' for conditional
|
operator = match.group(2) # '=' for default, '+' for conditional
|
||||||
value_expr = match.group(3)
|
value_expr = match.group(3)
|
||||||
|
|
||||||
env_value = os.environ.get(env_var)
|
env_value = os.environ.get(env_var)
|
||||||
|
|
||||||
if operator == ":": # Default value syntax: ${env.FOO:default}
|
if operator == "=": # Default value syntax: ${env.FOO:=default}
|
||||||
if not env_value:
|
if not env_value:
|
||||||
if value_expr is None:
|
# value_expr returns empty string (not None) when not matched
|
||||||
|
# This means ${env.FOO:=} is an error
|
||||||
|
if value_expr == "":
|
||||||
raise EnvVarError(env_var, path)
|
raise EnvVarError(env_var, path)
|
||||||
else:
|
else:
|
||||||
value = value_expr
|
value = value_expr
|
||||||
else:
|
else:
|
||||||
value = env_value
|
value = env_value
|
||||||
elif operator == "+": # Conditional value syntax: ${env.FOO+value_if_set}
|
elif operator == "+": # Conditional value syntax: ${env.FOO:+value_if_set}
|
||||||
if env_value:
|
if env_value:
|
||||||
value = value_expr
|
value = value_expr
|
||||||
else:
|
else:
|
||||||
|
@ -183,13 +190,42 @@ def replace_env_vars(config: Any, path: str = "") -> Any:
|
||||||
return os.path.expanduser(value)
|
return os.path.expanduser(value)
|
||||||
|
|
||||||
try:
|
try:
|
||||||
return re.sub(pattern, get_env_var, config)
|
result = re.sub(pattern, get_env_var, config)
|
||||||
|
return _convert_string_to_proper_type(result)
|
||||||
except EnvVarError as e:
|
except EnvVarError as e:
|
||||||
raise EnvVarError(e.var_name, e.path) from None
|
raise EnvVarError(e.var_name, e.path) from None
|
||||||
|
|
||||||
return config
|
return config
|
||||||
|
|
||||||
|
|
||||||
|
def _convert_string_to_proper_type(value: str) -> Any:
|
||||||
|
# This might be tricky depending on what the config type is, if 'str | None' we are
|
||||||
|
# good, if 'str' we need to keep the empty string... 'str | None' is more common and
|
||||||
|
# providers config should be typed this way.
|
||||||
|
# TODO: we could try to load the config class and see if the config has a field with type 'str | None'
|
||||||
|
# and then convert the empty string to None or not
|
||||||
|
if value == "":
|
||||||
|
return None
|
||||||
|
|
||||||
|
lowered = value.lower()
|
||||||
|
if lowered == "true":
|
||||||
|
return True
|
||||||
|
elif lowered == "false":
|
||||||
|
return False
|
||||||
|
|
||||||
|
try:
|
||||||
|
return int(value)
|
||||||
|
except ValueError:
|
||||||
|
pass
|
||||||
|
|
||||||
|
try:
|
||||||
|
return float(value)
|
||||||
|
except ValueError:
|
||||||
|
pass
|
||||||
|
|
||||||
|
return value
|
||||||
|
|
||||||
|
|
||||||
def validate_env_pair(env_pair: str) -> tuple[str, str]:
|
def validate_env_pair(env_pair: str) -> tuple[str, str]:
|
||||||
"""Validate and split an environment variable key-value pair."""
|
"""Validate and split an environment variable key-value pair."""
|
||||||
try:
|
try:
|
||||||
|
|
|
@ -25,7 +25,7 @@ class LlamaStackApi:
|
||||||
def run_scoring(self, row, scoring_function_ids: list[str], scoring_params: dict | None):
|
def run_scoring(self, row, scoring_function_ids: list[str], scoring_params: dict | None):
|
||||||
"""Run scoring on a single row"""
|
"""Run scoring on a single row"""
|
||||||
if not scoring_params:
|
if not scoring_params:
|
||||||
scoring_params = {fn_id: None for fn_id in scoring_function_ids}
|
scoring_params = dict.fromkeys(scoring_function_ids)
|
||||||
return self.client.scoring.score(input_rows=[row], scoring_functions=scoring_params)
|
return self.client.scoring.score(input_rows=[row], scoring_functions=scoring_params)
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -33,7 +33,7 @@ CATEGORIES = [
|
||||||
]
|
]
|
||||||
|
|
||||||
# Initialize category levels with default level
|
# Initialize category levels with default level
|
||||||
_category_levels: dict[str, int] = {category: DEFAULT_LOG_LEVEL for category in CATEGORIES}
|
_category_levels: dict[str, int] = dict.fromkeys(CATEGORIES, DEFAULT_LOG_LEVEL)
|
||||||
|
|
||||||
|
|
||||||
def config_to_category_levels(category: str, level: str):
|
def config_to_category_levels(category: str, level: str):
|
||||||
|
|
|
@ -5,7 +5,7 @@
|
||||||
# the root directory of this source tree.
|
# the root directory of this source tree.
|
||||||
|
|
||||||
import base64
|
import base64
|
||||||
from enum import Enum
|
from enum import Enum, StrEnum
|
||||||
from io import BytesIO
|
from io import BytesIO
|
||||||
from typing import Annotated, Any, Literal
|
from typing import Annotated, Any, Literal
|
||||||
|
|
||||||
|
@ -171,7 +171,7 @@ class GenerationResult(BaseModel):
|
||||||
ignore_token: bool
|
ignore_token: bool
|
||||||
|
|
||||||
|
|
||||||
class QuantizationMode(str, Enum):
|
class QuantizationMode(StrEnum):
|
||||||
none = "none"
|
none = "none"
|
||||||
fp8_mixed = "fp8_mixed"
|
fp8_mixed = "fp8_mixed"
|
||||||
int4_mixed = "int4_mixed"
|
int4_mixed = "int4_mixed"
|
||||||
|
|
|
@ -4,7 +4,7 @@
|
||||||
# This source code is licensed under the terms described in the LICENSE file in
|
# This source code is licensed under the terms described in the LICENSE file in
|
||||||
# the root directory of this source tree.
|
# the root directory of this source tree.
|
||||||
|
|
||||||
from enum import Enum
|
from enum import StrEnum
|
||||||
from typing import Any, Protocol
|
from typing import Any, Protocol
|
||||||
from urllib.parse import urlparse
|
from urllib.parse import urlparse
|
||||||
|
|
||||||
|
@ -225,7 +225,7 @@ def remote_provider_spec(
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
class HealthStatus(str, Enum):
|
class HealthStatus(StrEnum):
|
||||||
OK = "OK"
|
OK = "OK"
|
||||||
ERROR = "Error"
|
ERROR = "Error"
|
||||||
NOT_IMPLEMENTED = "Not Implemented"
|
NOT_IMPLEMENTED = "Not Implemented"
|
||||||
|
|
|
@ -42,9 +42,10 @@ from llama_stack.apis.agents.openai_responses import (
|
||||||
OpenAIResponseOutputMessageWebSearchToolCall,
|
OpenAIResponseOutputMessageWebSearchToolCall,
|
||||||
OpenAIResponseText,
|
OpenAIResponseText,
|
||||||
OpenAIResponseTextFormat,
|
OpenAIResponseTextFormat,
|
||||||
|
WebSearchToolTypes,
|
||||||
)
|
)
|
||||||
from llama_stack.apis.common.content_types import TextContentItem
|
from llama_stack.apis.common.content_types import TextContentItem
|
||||||
from llama_stack.apis.inference.inference import (
|
from llama_stack.apis.inference import (
|
||||||
Inference,
|
Inference,
|
||||||
OpenAIAssistantMessageParam,
|
OpenAIAssistantMessageParam,
|
||||||
OpenAIChatCompletion,
|
OpenAIChatCompletion,
|
||||||
|
@ -583,7 +584,7 @@ class OpenAIResponsesImpl:
|
||||||
from llama_stack.apis.agents.openai_responses import (
|
from llama_stack.apis.agents.openai_responses import (
|
||||||
MCPListToolsTool,
|
MCPListToolsTool,
|
||||||
)
|
)
|
||||||
from llama_stack.apis.tools.tools import Tool
|
from llama_stack.apis.tools import Tool
|
||||||
|
|
||||||
mcp_tool_to_server = {}
|
mcp_tool_to_server = {}
|
||||||
|
|
||||||
|
@ -609,7 +610,7 @@ class OpenAIResponsesImpl:
|
||||||
# TODO: Handle other tool types
|
# TODO: Handle other tool types
|
||||||
if input_tool.type == "function":
|
if input_tool.type == "function":
|
||||||
chat_tools.append(ChatCompletionToolParam(type="function", function=input_tool.model_dump()))
|
chat_tools.append(ChatCompletionToolParam(type="function", function=input_tool.model_dump()))
|
||||||
elif input_tool.type == "web_search":
|
elif input_tool.type in WebSearchToolTypes:
|
||||||
tool_name = "web_search"
|
tool_name = "web_search"
|
||||||
tool = await self.tool_groups_api.get_tool(tool_name)
|
tool = await self.tool_groups_api.get_tool(tool_name)
|
||||||
if not tool:
|
if not tool:
|
||||||
|
|
|
@ -208,7 +208,7 @@ class MetaReferenceEvalImpl(
|
||||||
for scoring_fn_id in scoring_functions
|
for scoring_fn_id in scoring_functions
|
||||||
}
|
}
|
||||||
else:
|
else:
|
||||||
scoring_functions_dict = {scoring_fn_id: None for scoring_fn_id in scoring_functions}
|
scoring_functions_dict = dict.fromkeys(scoring_functions)
|
||||||
|
|
||||||
score_response = await self.scoring_api.score(
|
score_response = await self.scoring_api.score(
|
||||||
input_rows=score_input_rows, scoring_functions=scoring_functions_dict
|
input_rows=score_input_rows, scoring_functions=scoring_functions_dict
|
||||||
|
|
|
@ -23,7 +23,7 @@ class LocalfsFilesImplConfig(BaseModel):
|
||||||
@classmethod
|
@classmethod
|
||||||
def sample_run_config(cls, __distro_dir__: str) -> dict[str, Any]:
|
def sample_run_config(cls, __distro_dir__: str) -> dict[str, Any]:
|
||||||
return {
|
return {
|
||||||
"storage_dir": "${env.FILES_STORAGE_DIR:" + __distro_dir__ + "/files}",
|
"storage_dir": "${env.FILES_STORAGE_DIR:=" + __distro_dir__ + "/files}",
|
||||||
"metadata_store": SqliteSqlStoreConfig.sample_run_config(
|
"metadata_store": SqliteSqlStoreConfig.sample_run_config(
|
||||||
__distro_dir__=__distro_dir__,
|
__distro_dir__=__distro_dir__,
|
||||||
db_name="files_metadata.db",
|
db_name="files_metadata.db",
|
||||||
|
|
|
@ -49,11 +49,11 @@ class MetaReferenceInferenceConfig(BaseModel):
|
||||||
def sample_run_config(
|
def sample_run_config(
|
||||||
cls,
|
cls,
|
||||||
model: str = "Llama3.2-3B-Instruct",
|
model: str = "Llama3.2-3B-Instruct",
|
||||||
checkpoint_dir: str = "${env.CHECKPOINT_DIR:null}",
|
checkpoint_dir: str = "${env.CHECKPOINT_DIR:=null}",
|
||||||
quantization_type: str = "${env.QUANTIZATION_TYPE:bf16}",
|
quantization_type: str = "${env.QUANTIZATION_TYPE:=bf16}",
|
||||||
model_parallel_size: str = "${env.MODEL_PARALLEL_SIZE:0}",
|
model_parallel_size: str = "${env.MODEL_PARALLEL_SIZE:=0}",
|
||||||
max_batch_size: str = "${env.MAX_BATCH_SIZE:1}",
|
max_batch_size: str = "${env.MAX_BATCH_SIZE:=1}",
|
||||||
max_seq_len: str = "${env.MAX_SEQ_LEN:4096}",
|
max_seq_len: str = "${env.MAX_SEQ_LEN:=4096}",
|
||||||
**kwargs,
|
**kwargs,
|
||||||
) -> dict[str, Any]:
|
) -> dict[str, Any]:
|
||||||
return {
|
return {
|
||||||
|
|
|
@ -44,10 +44,10 @@ class VLLMConfig(BaseModel):
|
||||||
@classmethod
|
@classmethod
|
||||||
def sample_run_config(cls, **kwargs: Any) -> dict[str, Any]:
|
def sample_run_config(cls, **kwargs: Any) -> dict[str, Any]:
|
||||||
return {
|
return {
|
||||||
"tensor_parallel_size": "${env.TENSOR_PARALLEL_SIZE:1}",
|
"tensor_parallel_size": "${env.TENSOR_PARALLEL_SIZE:=1}",
|
||||||
"max_tokens": "${env.MAX_TOKENS:4096}",
|
"max_tokens": "${env.MAX_TOKENS:=4096}",
|
||||||
"max_model_len": "${env.MAX_MODEL_LEN:4096}",
|
"max_model_len": "${env.MAX_MODEL_LEN:=4096}",
|
||||||
"max_num_seqs": "${env.MAX_NUM_SEQS:4}",
|
"max_num_seqs": "${env.MAX_NUM_SEQS:=4}",
|
||||||
"enforce_eager": "${env.ENFORCE_EAGER:False}",
|
"enforce_eager": "${env.ENFORCE_EAGER:=False}",
|
||||||
"gpu_memory_utilization": "${env.GPU_MEMORY_UTILIZATION:0.3}",
|
"gpu_memory_utilization": "${env.GPU_MEMORY_UTILIZATION:=0.3}",
|
||||||
}
|
}
|
||||||
|
|
|
@ -17,5 +17,5 @@ class BraintrustScoringConfig(BaseModel):
|
||||||
@classmethod
|
@classmethod
|
||||||
def sample_run_config(cls, **kwargs) -> dict[str, Any]:
|
def sample_run_config(cls, **kwargs) -> dict[str, Any]:
|
||||||
return {
|
return {
|
||||||
"openai_api_key": "${env.OPENAI_API_KEY:}",
|
"openai_api_key": "${env.OPENAI_API_KEY:+}",
|
||||||
}
|
}
|
||||||
|
|
|
@ -7,7 +7,7 @@ from typing import Any
|
||||||
|
|
||||||
from llama_stack.apis.datasetio import DatasetIO
|
from llama_stack.apis.datasetio import DatasetIO
|
||||||
from llama_stack.apis.datasets import Datasets
|
from llama_stack.apis.datasets import Datasets
|
||||||
from llama_stack.apis.inference.inference import Inference
|
from llama_stack.apis.inference import Inference
|
||||||
from llama_stack.apis.scoring import (
|
from llama_stack.apis.scoring import (
|
||||||
ScoreBatchResponse,
|
ScoreBatchResponse,
|
||||||
ScoreResponse,
|
ScoreResponse,
|
||||||
|
|
|
@ -6,7 +6,7 @@
|
||||||
import re
|
import re
|
||||||
from typing import Any
|
from typing import Any
|
||||||
|
|
||||||
from llama_stack.apis.inference.inference import Inference, UserMessage
|
from llama_stack.apis.inference import Inference, UserMessage
|
||||||
from llama_stack.apis.scoring import ScoringResultRow
|
from llama_stack.apis.scoring import ScoringResultRow
|
||||||
from llama_stack.apis.scoring_functions import ScoringFnParams
|
from llama_stack.apis.scoring_functions import ScoringFnParams
|
||||||
from llama_stack.providers.utils.scoring.base_scoring_fn import RegisteredBaseScoringFn
|
from llama_stack.providers.utils.scoring.base_scoring_fn import RegisteredBaseScoringFn
|
||||||
|
|
|
@ -4,7 +4,7 @@
|
||||||
# This source code is licensed under the terms described in the LICENSE file in
|
# This source code is licensed under the terms described in the LICENSE file in
|
||||||
# the root directory of this source tree.
|
# the root directory of this source tree.
|
||||||
|
|
||||||
from enum import Enum
|
from enum import StrEnum
|
||||||
from typing import Any
|
from typing import Any
|
||||||
|
|
||||||
from pydantic import BaseModel, Field, field_validator
|
from pydantic import BaseModel, Field, field_validator
|
||||||
|
@ -12,7 +12,7 @@ from pydantic import BaseModel, Field, field_validator
|
||||||
from llama_stack.distribution.utils.config_dirs import RUNTIME_BASE_DIR
|
from llama_stack.distribution.utils.config_dirs import RUNTIME_BASE_DIR
|
||||||
|
|
||||||
|
|
||||||
class TelemetrySink(str, Enum):
|
class TelemetrySink(StrEnum):
|
||||||
OTEL_TRACE = "otel_trace"
|
OTEL_TRACE = "otel_trace"
|
||||||
OTEL_METRIC = "otel_metric"
|
OTEL_METRIC = "otel_metric"
|
||||||
SQLITE = "sqlite"
|
SQLITE = "sqlite"
|
||||||
|
@ -20,12 +20,12 @@ class TelemetrySink(str, Enum):
|
||||||
|
|
||||||
|
|
||||||
class TelemetryConfig(BaseModel):
|
class TelemetryConfig(BaseModel):
|
||||||
otel_trace_endpoint: str = Field(
|
otel_trace_endpoint: str | None = Field(
|
||||||
default="http://localhost:4318/v1/traces",
|
default=None,
|
||||||
description="The OpenTelemetry collector endpoint URL for traces",
|
description="The OpenTelemetry collector endpoint URL for traces",
|
||||||
)
|
)
|
||||||
otel_metric_endpoint: str = Field(
|
otel_metric_endpoint: str | None = Field(
|
||||||
default="http://localhost:4318/v1/metrics",
|
default=None,
|
||||||
description="The OpenTelemetry collector endpoint URL for metrics",
|
description="The OpenTelemetry collector endpoint URL for metrics",
|
||||||
)
|
)
|
||||||
service_name: str = Field(
|
service_name: str = Field(
|
||||||
|
@ -52,7 +52,7 @@ class TelemetryConfig(BaseModel):
|
||||||
@classmethod
|
@classmethod
|
||||||
def sample_run_config(cls, __distro_dir__: str, db_name: str = "trace_store.db") -> dict[str, Any]:
|
def sample_run_config(cls, __distro_dir__: str, db_name: str = "trace_store.db") -> dict[str, Any]:
|
||||||
return {
|
return {
|
||||||
"service_name": "${env.OTEL_SERVICE_NAME:\u200b}",
|
"service_name": "${env.OTEL_SERVICE_NAME:=\u200b}",
|
||||||
"sinks": "${env.TELEMETRY_SINKS:console,sqlite}",
|
"sinks": "${env.TELEMETRY_SINKS:=console,sqlite}",
|
||||||
"sqlite_db_path": "${env.SQLITE_STORE_DIR:" + __distro_dir__ + "}/" + db_name,
|
"sqlite_db_path": "${env.SQLITE_STORE_DIR:=" + __distro_dir__ + "}/" + db_name,
|
||||||
}
|
}
|
||||||
|
|
|
@ -14,6 +14,8 @@ from opentelemetry.sdk.trace import SpanProcessor
|
||||||
from opentelemetry.trace import Span
|
from opentelemetry.trace import Span
|
||||||
from opentelemetry.trace.span import format_span_id, format_trace_id
|
from opentelemetry.trace.span import format_span_id, format_trace_id
|
||||||
|
|
||||||
|
from llama_stack.providers.utils.telemetry.tracing import LOCAL_ROOT_SPAN_MARKER
|
||||||
|
|
||||||
|
|
||||||
class SQLiteSpanProcessor(SpanProcessor):
|
class SQLiteSpanProcessor(SpanProcessor):
|
||||||
def __init__(self, conn_string):
|
def __init__(self, conn_string):
|
||||||
|
@ -124,7 +126,7 @@ class SQLiteSpanProcessor(SpanProcessor):
|
||||||
(
|
(
|
||||||
trace_id,
|
trace_id,
|
||||||
service_name,
|
service_name,
|
||||||
(span_id if span.attributes.get("__root_span__") == "true" else None),
|
(span_id if span.attributes.get(LOCAL_ROOT_SPAN_MARKER) else None),
|
||||||
datetime.fromtimestamp(span.start_time / 1e9, UTC).isoformat(),
|
datetime.fromtimestamp(span.start_time / 1e9, UTC).isoformat(),
|
||||||
datetime.fromtimestamp(span.end_time / 1e9, UTC).isoformat(),
|
datetime.fromtimestamp(span.end_time / 1e9, UTC).isoformat(),
|
||||||
),
|
),
|
||||||
|
|
|
@ -87,12 +87,16 @@ class TelemetryAdapter(TelemetryDatasetMixin, Telemetry):
|
||||||
trace.set_tracer_provider(provider)
|
trace.set_tracer_provider(provider)
|
||||||
_TRACER_PROVIDER = provider
|
_TRACER_PROVIDER = provider
|
||||||
if TelemetrySink.OTEL_TRACE in self.config.sinks:
|
if TelemetrySink.OTEL_TRACE in self.config.sinks:
|
||||||
|
if self.config.otel_trace_endpoint is None:
|
||||||
|
raise ValueError("otel_trace_endpoint is required when OTEL_TRACE is enabled")
|
||||||
span_exporter = OTLPSpanExporter(
|
span_exporter = OTLPSpanExporter(
|
||||||
endpoint=self.config.otel_trace_endpoint,
|
endpoint=self.config.otel_trace_endpoint,
|
||||||
)
|
)
|
||||||
span_processor = BatchSpanProcessor(span_exporter)
|
span_processor = BatchSpanProcessor(span_exporter)
|
||||||
trace.get_tracer_provider().add_span_processor(span_processor)
|
trace.get_tracer_provider().add_span_processor(span_processor)
|
||||||
if TelemetrySink.OTEL_METRIC in self.config.sinks:
|
if TelemetrySink.OTEL_METRIC in self.config.sinks:
|
||||||
|
if self.config.otel_metric_endpoint is None:
|
||||||
|
raise ValueError("otel_metric_endpoint is required when OTEL_METRIC is enabled")
|
||||||
metric_reader = PeriodicExportingMetricReader(
|
metric_reader = PeriodicExportingMetricReader(
|
||||||
OTLPMetricExporter(
|
OTLPMetricExporter(
|
||||||
endpoint=self.config.otel_metric_endpoint,
|
endpoint=self.config.otel_metric_endpoint,
|
||||||
|
|
|
@ -16,8 +16,7 @@ import numpy as np
|
||||||
from numpy.typing import NDArray
|
from numpy.typing import NDArray
|
||||||
|
|
||||||
from llama_stack.apis.files import Files
|
from llama_stack.apis.files import Files
|
||||||
from llama_stack.apis.inference import InterleavedContent
|
from llama_stack.apis.inference import Inference, InterleavedContent
|
||||||
from llama_stack.apis.inference.inference import Inference
|
|
||||||
from llama_stack.apis.vector_dbs import VectorDB
|
from llama_stack.apis.vector_dbs import VectorDB
|
||||||
from llama_stack.apis.vector_io import (
|
from llama_stack.apis.vector_io import (
|
||||||
Chunk,
|
Chunk,
|
||||||
|
|
|
@ -19,5 +19,5 @@ class QdrantVectorIOConfig(BaseModel):
|
||||||
@classmethod
|
@classmethod
|
||||||
def sample_run_config(cls, __distro_dir__: str) -> dict[str, Any]:
|
def sample_run_config(cls, __distro_dir__: str) -> dict[str, Any]:
|
||||||
return {
|
return {
|
||||||
"path": "${env.QDRANT_PATH:~/.llama/" + __distro_dir__ + "}/" + "qdrant.db",
|
"path": "${env.QDRANT_PATH:=~/.llama/" + __distro_dir__ + "}/" + "qdrant.db",
|
||||||
}
|
}
|
||||||
|
|
|
@ -15,5 +15,5 @@ class SQLiteVectorIOConfig(BaseModel):
|
||||||
@classmethod
|
@classmethod
|
||||||
def sample_run_config(cls, __distro_dir__: str) -> dict[str, Any]:
|
def sample_run_config(cls, __distro_dir__: str) -> dict[str, Any]:
|
||||||
return {
|
return {
|
||||||
"db_path": "${env.SQLITE_STORE_DIR:" + __distro_dir__ + "}/" + "sqlite_vec.db",
|
"db_path": "${env.SQLITE_STORE_DIR:=" + __distro_dir__ + "}/" + "sqlite_vec.db",
|
||||||
}
|
}
|
||||||
|
|
|
@ -15,8 +15,8 @@ import numpy as np
|
||||||
import sqlite_vec
|
import sqlite_vec
|
||||||
from numpy.typing import NDArray
|
from numpy.typing import NDArray
|
||||||
|
|
||||||
from llama_stack.apis.files.files import Files
|
from llama_stack.apis.files import Files
|
||||||
from llama_stack.apis.inference.inference import Inference
|
from llama_stack.apis.inference import Inference
|
||||||
from llama_stack.apis.vector_dbs import VectorDB
|
from llama_stack.apis.vector_dbs import VectorDB
|
||||||
from llama_stack.apis.vector_io import (
|
from llama_stack.apis.vector_io import (
|
||||||
Chunk,
|
Chunk,
|
||||||
|
@ -64,7 +64,7 @@ def _normalize_scores(scores: dict[str, float]) -> dict[str, float]:
|
||||||
score_range = max_score - min_score
|
score_range = max_score - min_score
|
||||||
if score_range > 0:
|
if score_range > 0:
|
||||||
return {doc_id: (score - min_score) / score_range for doc_id, score in scores.items()}
|
return {doc_id: (score - min_score) / score_range for doc_id, score in scores.items()}
|
||||||
return {doc_id: 1.0 for doc_id in scores}
|
return dict.fromkeys(scores, 1.0)
|
||||||
|
|
||||||
|
|
||||||
def _weighted_rerank(
|
def _weighted_rerank(
|
||||||
|
|
|
@ -70,7 +70,7 @@ def available_providers() -> list[ProviderSpec]:
|
||||||
api=Api.inference,
|
api=Api.inference,
|
||||||
adapter=AdapterSpec(
|
adapter=AdapterSpec(
|
||||||
adapter_type="ollama",
|
adapter_type="ollama",
|
||||||
pip_packages=["ollama", "aiohttp"],
|
pip_packages=["ollama", "aiohttp", "h11>=0.16.0"],
|
||||||
config_class="llama_stack.providers.remote.inference.ollama.OllamaImplConfig",
|
config_class="llama_stack.providers.remote.inference.ollama.OllamaImplConfig",
|
||||||
module="llama_stack.providers.remote.inference.ollama",
|
module="llama_stack.providers.remote.inference.ollama",
|
||||||
),
|
),
|
||||||
|
|
|
@ -67,7 +67,7 @@ def available_providers() -> list[ProviderSpec]:
|
||||||
api=Api.safety,
|
api=Api.safety,
|
||||||
adapter=AdapterSpec(
|
adapter=AdapterSpec(
|
||||||
adapter_type="sambanova",
|
adapter_type="sambanova",
|
||||||
pip_packages=["litellm"],
|
pip_packages=["litellm", "requests"],
|
||||||
module="llama_stack.providers.remote.safety.sambanova",
|
module="llama_stack.providers.remote.safety.sambanova",
|
||||||
config_class="llama_stack.providers.remote.safety.sambanova.SambaNovaSafetyConfig",
|
config_class="llama_stack.providers.remote.safety.sambanova.SambaNovaSafetyConfig",
|
||||||
provider_data_validator="llama_stack.providers.remote.safety.sambanova.config.SambaNovaProviderDataValidator",
|
provider_data_validator="llama_stack.providers.remote.safety.sambanova.config.SambaNovaProviderDataValidator",
|
||||||
|
|
|
@ -13,7 +13,7 @@ def available_providers() -> list[ProviderSpec]:
|
||||||
InlineProviderSpec(
|
InlineProviderSpec(
|
||||||
api=Api.scoring,
|
api=Api.scoring,
|
||||||
provider_type="inline::basic",
|
provider_type="inline::basic",
|
||||||
pip_packages=[],
|
pip_packages=["requests"],
|
||||||
module="llama_stack.providers.inline.scoring.basic",
|
module="llama_stack.providers.inline.scoring.basic",
|
||||||
config_class="llama_stack.providers.inline.scoring.basic.BasicScoringConfig",
|
config_class="llama_stack.providers.inline.scoring.basic.BasicScoringConfig",
|
||||||
api_dependencies=[
|
api_dependencies=[
|
||||||
|
|
|
@ -54,8 +54,8 @@ class NvidiaDatasetIOConfig(BaseModel):
|
||||||
@classmethod
|
@classmethod
|
||||||
def sample_run_config(cls, **kwargs) -> dict[str, Any]:
|
def sample_run_config(cls, **kwargs) -> dict[str, Any]:
|
||||||
return {
|
return {
|
||||||
"api_key": "${env.NVIDIA_API_KEY:}",
|
"api_key": "${env.NVIDIA_API_KEY:+}",
|
||||||
"dataset_namespace": "${env.NVIDIA_DATASET_NAMESPACE:default}",
|
"dataset_namespace": "${env.NVIDIA_DATASET_NAMESPACE:=default}",
|
||||||
"project_id": "${env.NVIDIA_PROJECT_ID:test-project}",
|
"project_id": "${env.NVIDIA_PROJECT_ID:=test-project}",
|
||||||
"datasets_url": "${env.NVIDIA_DATASETS_URL:http://nemo.test}",
|
"datasets_url": "${env.NVIDIA_DATASETS_URL:=http://nemo.test}",
|
||||||
}
|
}
|
||||||
|
|
|
@ -66,7 +66,7 @@ class NvidiaDatasetIOAdapter:
|
||||||
Returns:
|
Returns:
|
||||||
Dataset
|
Dataset
|
||||||
"""
|
"""
|
||||||
## add warnings for unsupported params
|
# add warnings for unsupported params
|
||||||
request_body = {
|
request_body = {
|
||||||
"name": dataset_def.identifier,
|
"name": dataset_def.identifier,
|
||||||
"namespace": self.config.dataset_namespace,
|
"namespace": self.config.dataset_namespace,
|
||||||
|
|
|
@ -25,5 +25,5 @@ class NVIDIAEvalConfig(BaseModel):
|
||||||
@classmethod
|
@classmethod
|
||||||
def sample_run_config(cls, **kwargs) -> dict[str, Any]:
|
def sample_run_config(cls, **kwargs) -> dict[str, Any]:
|
||||||
return {
|
return {
|
||||||
"evaluator_url": "${env.NVIDIA_EVALUATOR_URL:http://localhost:7331}",
|
"evaluator_url": "${env.NVIDIA_EVALUATOR_URL:=http://localhost:7331}",
|
||||||
}
|
}
|
||||||
|
|
|
@ -4,7 +4,7 @@
|
||||||
# This source code is licensed under the terms described in the LICENSE file in
|
# This source code is licensed under the terms described in the LICENSE file in
|
||||||
# the root directory of this source tree.
|
# the root directory of this source tree.
|
||||||
|
|
||||||
from llama_stack.apis.models.models import ModelType
|
from llama_stack.apis.models import ModelType
|
||||||
from llama_stack.providers.utils.inference.model_registry import (
|
from llama_stack.providers.utils.inference.model_registry import (
|
||||||
ProviderModelEntry,
|
ProviderModelEntry,
|
||||||
)
|
)
|
||||||
|
|
|
@ -24,6 +24,12 @@ from llama_stack.apis.inference import (
|
||||||
Inference,
|
Inference,
|
||||||
LogProbConfig,
|
LogProbConfig,
|
||||||
Message,
|
Message,
|
||||||
|
OpenAIChatCompletion,
|
||||||
|
OpenAIChatCompletionChunk,
|
||||||
|
OpenAICompletion,
|
||||||
|
OpenAIEmbeddingsResponse,
|
||||||
|
OpenAIMessageParam,
|
||||||
|
OpenAIResponseFormatParam,
|
||||||
ResponseFormat,
|
ResponseFormat,
|
||||||
ResponseFormatType,
|
ResponseFormatType,
|
||||||
SamplingParams,
|
SamplingParams,
|
||||||
|
@ -33,14 +39,6 @@ from llama_stack.apis.inference import (
|
||||||
ToolDefinition,
|
ToolDefinition,
|
||||||
ToolPromptFormat,
|
ToolPromptFormat,
|
||||||
)
|
)
|
||||||
from llama_stack.apis.inference.inference import (
|
|
||||||
OpenAIChatCompletion,
|
|
||||||
OpenAIChatCompletionChunk,
|
|
||||||
OpenAICompletion,
|
|
||||||
OpenAIEmbeddingsResponse,
|
|
||||||
OpenAIMessageParam,
|
|
||||||
OpenAIResponseFormatParam,
|
|
||||||
)
|
|
||||||
from llama_stack.distribution.request_headers import NeedsRequestProviderData
|
from llama_stack.distribution.request_headers import NeedsRequestProviderData
|
||||||
from llama_stack.log import get_logger
|
from llama_stack.log import get_logger
|
||||||
from llama_stack.providers.utils.inference.model_registry import (
|
from llama_stack.providers.utils.inference.model_registry import (
|
||||||
|
|
|
@ -4,7 +4,7 @@
|
||||||
# This source code is licensed under the terms described in the LICENSE file in
|
# This source code is licensed under the terms described in the LICENSE file in
|
||||||
# the root directory of this source tree.
|
# the root directory of this source tree.
|
||||||
|
|
||||||
from llama_stack.apis.models.models import ModelType
|
from llama_stack.apis.models import ModelType
|
||||||
from llama_stack.models.llama.sku_types import CoreModelId
|
from llama_stack.models.llama.sku_types import CoreModelId
|
||||||
from llama_stack.providers.utils.inference.model_registry import (
|
from llama_stack.providers.utils.inference.model_registry import (
|
||||||
ProviderModelEntry,
|
ProviderModelEntry,
|
||||||
|
|
|
@ -4,7 +4,7 @@
|
||||||
# This source code is licensed under the terms described in the LICENSE file in
|
# This source code is licensed under the terms described in the LICENSE file in
|
||||||
# the root directory of this source tree.
|
# the root directory of this source tree.
|
||||||
|
|
||||||
from llama_stack.apis.models.models import ModelType
|
from llama_stack.apis.models import ModelType
|
||||||
from llama_stack.providers.utils.inference.model_registry import (
|
from llama_stack.providers.utils.inference.model_registry import (
|
||||||
ProviderModelEntry,
|
ProviderModelEntry,
|
||||||
)
|
)
|
||||||
|
|
|
@ -9,7 +9,7 @@ from typing import Any
|
||||||
|
|
||||||
from openai import AsyncOpenAI
|
from openai import AsyncOpenAI
|
||||||
|
|
||||||
from llama_stack.apis.inference.inference import (
|
from llama_stack.apis.inference import (
|
||||||
OpenAIChatCompletion,
|
OpenAIChatCompletion,
|
||||||
OpenAIChatCompletionChunk,
|
OpenAIChatCompletionChunk,
|
||||||
OpenAIChoiceDelta,
|
OpenAIChoiceDelta,
|
||||||
|
|
|
@ -55,7 +55,7 @@ class NVIDIAConfig(BaseModel):
|
||||||
@classmethod
|
@classmethod
|
||||||
def sample_run_config(cls, **kwargs) -> dict[str, Any]:
|
def sample_run_config(cls, **kwargs) -> dict[str, Any]:
|
||||||
return {
|
return {
|
||||||
"url": "${env.NVIDIA_BASE_URL:https://integrate.api.nvidia.com}",
|
"url": "${env.NVIDIA_BASE_URL:=https://integrate.api.nvidia.com}",
|
||||||
"api_key": "${env.NVIDIA_API_KEY:}",
|
"api_key": "${env.NVIDIA_API_KEY:+}",
|
||||||
"append_api_version": "${env.NVIDIA_APPEND_API_VERSION:True}",
|
"append_api_version": "${env.NVIDIA_APPEND_API_VERSION:=True}",
|
||||||
}
|
}
|
||||||
|
|
|
@ -29,20 +29,18 @@ from llama_stack.apis.inference import (
|
||||||
Inference,
|
Inference,
|
||||||
LogProbConfig,
|
LogProbConfig,
|
||||||
Message,
|
Message,
|
||||||
|
OpenAIChatCompletion,
|
||||||
|
OpenAIChatCompletionChunk,
|
||||||
|
OpenAICompletion,
|
||||||
OpenAIEmbeddingsResponse,
|
OpenAIEmbeddingsResponse,
|
||||||
|
OpenAIMessageParam,
|
||||||
|
OpenAIResponseFormatParam,
|
||||||
ResponseFormat,
|
ResponseFormat,
|
||||||
SamplingParams,
|
SamplingParams,
|
||||||
TextTruncation,
|
TextTruncation,
|
||||||
ToolChoice,
|
ToolChoice,
|
||||||
ToolConfig,
|
ToolConfig,
|
||||||
)
|
)
|
||||||
from llama_stack.apis.inference.inference import (
|
|
||||||
OpenAIChatCompletion,
|
|
||||||
OpenAIChatCompletionChunk,
|
|
||||||
OpenAICompletion,
|
|
||||||
OpenAIMessageParam,
|
|
||||||
OpenAIResponseFormatParam,
|
|
||||||
)
|
|
||||||
from llama_stack.apis.models import Model, ModelType
|
from llama_stack.apis.models import Model, ModelType
|
||||||
from llama_stack.models.llama.datatypes import ToolDefinition, ToolPromptFormat
|
from llama_stack.models.llama.datatypes import ToolDefinition, ToolPromptFormat
|
||||||
from llama_stack.providers.utils.inference import (
|
from llama_stack.providers.utils.inference import (
|
||||||
|
|
|
@ -17,7 +17,7 @@ class OllamaImplConfig(BaseModel):
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def sample_run_config(
|
def sample_run_config(
|
||||||
cls, url: str = "${env.OLLAMA_URL:http://localhost:11434}", raise_on_connect_error: bool = True, **kwargs
|
cls, url: str = "${env.OLLAMA_URL:=http://localhost:11434}", raise_on_connect_error: bool = True, **kwargs
|
||||||
) -> dict[str, Any]:
|
) -> dict[str, Any]:
|
||||||
return {
|
return {
|
||||||
"url": url,
|
"url": url,
|
||||||
|
|
|
@ -4,7 +4,7 @@
|
||||||
# This source code is licensed under the terms described in the LICENSE file in
|
# This source code is licensed under the terms described in the LICENSE file in
|
||||||
# the root directory of this source tree.
|
# the root directory of this source tree.
|
||||||
|
|
||||||
from llama_stack.apis.models.models import ModelType
|
from llama_stack.apis.models import ModelType
|
||||||
from llama_stack.models.llama.sku_types import CoreModelId
|
from llama_stack.models.llama.sku_types import CoreModelId
|
||||||
from llama_stack.providers.utils.inference.model_registry import (
|
from llama_stack.providers.utils.inference.model_registry import (
|
||||||
ProviderModelEntry,
|
ProviderModelEntry,
|
||||||
|
|
|
@ -32,15 +32,6 @@ from llama_stack.apis.inference import (
|
||||||
JsonSchemaResponseFormat,
|
JsonSchemaResponseFormat,
|
||||||
LogProbConfig,
|
LogProbConfig,
|
||||||
Message,
|
Message,
|
||||||
ResponseFormat,
|
|
||||||
SamplingParams,
|
|
||||||
TextTruncation,
|
|
||||||
ToolChoice,
|
|
||||||
ToolConfig,
|
|
||||||
ToolDefinition,
|
|
||||||
ToolPromptFormat,
|
|
||||||
)
|
|
||||||
from llama_stack.apis.inference.inference import (
|
|
||||||
OpenAIChatCompletion,
|
OpenAIChatCompletion,
|
||||||
OpenAIChatCompletionChunk,
|
OpenAIChatCompletionChunk,
|
||||||
OpenAICompletion,
|
OpenAICompletion,
|
||||||
|
@ -48,6 +39,13 @@ from llama_stack.apis.inference.inference import (
|
||||||
OpenAIEmbeddingUsage,
|
OpenAIEmbeddingUsage,
|
||||||
OpenAIMessageParam,
|
OpenAIMessageParam,
|
||||||
OpenAIResponseFormatParam,
|
OpenAIResponseFormatParam,
|
||||||
|
ResponseFormat,
|
||||||
|
SamplingParams,
|
||||||
|
TextTruncation,
|
||||||
|
ToolChoice,
|
||||||
|
ToolConfig,
|
||||||
|
ToolDefinition,
|
||||||
|
ToolPromptFormat,
|
||||||
)
|
)
|
||||||
from llama_stack.apis.models import Model, ModelType
|
from llama_stack.apis.models import Model, ModelType
|
||||||
from llama_stack.log import get_logger
|
from llama_stack.log import get_logger
|
||||||
|
|
|
@ -6,7 +6,7 @@
|
||||||
|
|
||||||
from dataclasses import dataclass
|
from dataclasses import dataclass
|
||||||
|
|
||||||
from llama_stack.apis.models.models import ModelType
|
from llama_stack.apis.models import ModelType
|
||||||
from llama_stack.providers.utils.inference.model_registry import (
|
from llama_stack.providers.utils.inference.model_registry import (
|
||||||
ProviderModelEntry,
|
ProviderModelEntry,
|
||||||
)
|
)
|
||||||
|
|
|
@ -10,7 +10,7 @@ from typing import Any
|
||||||
|
|
||||||
from openai import AsyncOpenAI
|
from openai import AsyncOpenAI
|
||||||
|
|
||||||
from llama_stack.apis.inference.inference import (
|
from llama_stack.apis.inference import (
|
||||||
OpenAIChatCompletion,
|
OpenAIChatCompletion,
|
||||||
OpenAIChatCompletionChunk,
|
OpenAIChatCompletionChunk,
|
||||||
OpenAICompletion,
|
OpenAICompletion,
|
||||||
|
|
|
@ -19,7 +19,12 @@ from llama_stack.apis.inference import (
|
||||||
Inference,
|
Inference,
|
||||||
LogProbConfig,
|
LogProbConfig,
|
||||||
Message,
|
Message,
|
||||||
|
OpenAIChatCompletion,
|
||||||
|
OpenAIChatCompletionChunk,
|
||||||
|
OpenAICompletion,
|
||||||
OpenAIEmbeddingsResponse,
|
OpenAIEmbeddingsResponse,
|
||||||
|
OpenAIMessageParam,
|
||||||
|
OpenAIResponseFormatParam,
|
||||||
ResponseFormat,
|
ResponseFormat,
|
||||||
SamplingParams,
|
SamplingParams,
|
||||||
TextTruncation,
|
TextTruncation,
|
||||||
|
@ -28,13 +33,6 @@ from llama_stack.apis.inference import (
|
||||||
ToolDefinition,
|
ToolDefinition,
|
||||||
ToolPromptFormat,
|
ToolPromptFormat,
|
||||||
)
|
)
|
||||||
from llama_stack.apis.inference.inference import (
|
|
||||||
OpenAIChatCompletion,
|
|
||||||
OpenAIChatCompletionChunk,
|
|
||||||
OpenAICompletion,
|
|
||||||
OpenAIMessageParam,
|
|
||||||
OpenAIResponseFormatParam,
|
|
||||||
)
|
|
||||||
from llama_stack.apis.models import Model
|
from llama_stack.apis.models import Model
|
||||||
from llama_stack.distribution.library_client import convert_pydantic_to_json_value, convert_to_pydantic
|
from llama_stack.distribution.library_client import convert_pydantic_to_json_value, convert_to_pydantic
|
||||||
from llama_stack.providers.utils.inference.model_registry import ModelRegistryHelper
|
from llama_stack.providers.utils.inference.model_registry import ModelRegistryHelper
|
||||||
|
|
|
@ -25,6 +25,6 @@ class RunpodImplConfig(BaseModel):
|
||||||
@classmethod
|
@classmethod
|
||||||
def sample_run_config(cls, **kwargs: Any) -> dict[str, Any]:
|
def sample_run_config(cls, **kwargs: Any) -> dict[str, Any]:
|
||||||
return {
|
return {
|
||||||
"url": "${env.RUNPOD_URL:}",
|
"url": "${env.RUNPOD_URL:+}",
|
||||||
"api_token": "${env.RUNPOD_API_TOKEN:}",
|
"api_token": "${env.RUNPOD_API_TOKEN:+}",
|
||||||
}
|
}
|
||||||
|
|
|
@ -8,7 +8,7 @@ from collections.abc import AsyncGenerator
|
||||||
from openai import OpenAI
|
from openai import OpenAI
|
||||||
|
|
||||||
from llama_stack.apis.inference import * # noqa: F403
|
from llama_stack.apis.inference import * # noqa: F403
|
||||||
from llama_stack.apis.inference.inference import OpenAIEmbeddingsResponse
|
from llama_stack.apis.inference import OpenAIEmbeddingsResponse
|
||||||
|
|
||||||
# from llama_stack.providers.datatypes import ModelsProtocolPrivate
|
# from llama_stack.providers.datatypes import ModelsProtocolPrivate
|
||||||
from llama_stack.providers.utils.inference.model_registry import ModelRegistryHelper
|
from llama_stack.providers.utils.inference.model_registry import ModelRegistryHelper
|
||||||
|
|
|
@ -26,5 +26,5 @@ class TogetherImplConfig(BaseModel):
|
||||||
def sample_run_config(cls, **kwargs) -> dict[str, Any]:
|
def sample_run_config(cls, **kwargs) -> dict[str, Any]:
|
||||||
return {
|
return {
|
||||||
"url": "https://api.together.xyz/v1",
|
"url": "https://api.together.xyz/v1",
|
||||||
"api_key": "${env.TOGETHER_API_KEY:}",
|
"api_key": "${env.TOGETHER_API_KEY:+}",
|
||||||
}
|
}
|
||||||
|
|
|
@ -4,7 +4,7 @@
|
||||||
# This source code is licensed under the terms described in the LICENSE file in
|
# This source code is licensed under the terms described in the LICENSE file in
|
||||||
# the root directory of this source tree.
|
# the root directory of this source tree.
|
||||||
|
|
||||||
from llama_stack.apis.models.models import ModelType
|
from llama_stack.apis.models import ModelType
|
||||||
from llama_stack.models.llama.sku_types import CoreModelId
|
from llama_stack.models.llama.sku_types import CoreModelId
|
||||||
from llama_stack.providers.utils.inference.model_registry import (
|
from llama_stack.providers.utils.inference.model_registry import (
|
||||||
ProviderModelEntry,
|
ProviderModelEntry,
|
||||||
|
|
|
@ -23,7 +23,12 @@ from llama_stack.apis.inference import (
|
||||||
Inference,
|
Inference,
|
||||||
LogProbConfig,
|
LogProbConfig,
|
||||||
Message,
|
Message,
|
||||||
|
OpenAIChatCompletion,
|
||||||
|
OpenAIChatCompletionChunk,
|
||||||
|
OpenAICompletion,
|
||||||
OpenAIEmbeddingsResponse,
|
OpenAIEmbeddingsResponse,
|
||||||
|
OpenAIMessageParam,
|
||||||
|
OpenAIResponseFormatParam,
|
||||||
ResponseFormat,
|
ResponseFormat,
|
||||||
ResponseFormatType,
|
ResponseFormatType,
|
||||||
SamplingParams,
|
SamplingParams,
|
||||||
|
@ -33,13 +38,6 @@ from llama_stack.apis.inference import (
|
||||||
ToolDefinition,
|
ToolDefinition,
|
||||||
ToolPromptFormat,
|
ToolPromptFormat,
|
||||||
)
|
)
|
||||||
from llama_stack.apis.inference.inference import (
|
|
||||||
OpenAIChatCompletion,
|
|
||||||
OpenAIChatCompletionChunk,
|
|
||||||
OpenAICompletion,
|
|
||||||
OpenAIMessageParam,
|
|
||||||
OpenAIResponseFormatParam,
|
|
||||||
)
|
|
||||||
from llama_stack.distribution.request_headers import NeedsRequestProviderData
|
from llama_stack.distribution.request_headers import NeedsRequestProviderData
|
||||||
from llama_stack.log import get_logger
|
from llama_stack.log import get_logger
|
||||||
from llama_stack.providers.utils.inference.model_registry import ModelRegistryHelper
|
from llama_stack.providers.utils.inference.model_registry import ModelRegistryHelper
|
||||||
|
|
|
@ -34,9 +34,6 @@ class VLLMInferenceAdapterConfig(BaseModel):
|
||||||
@classmethod
|
@classmethod
|
||||||
def validate_tls_verify(cls, v):
|
def validate_tls_verify(cls, v):
|
||||||
if isinstance(v, str):
|
if isinstance(v, str):
|
||||||
# Check if it's a boolean string
|
|
||||||
if v.lower() in ("true", "false"):
|
|
||||||
return v.lower() == "true"
|
|
||||||
# Otherwise, treat it as a cert path
|
# Otherwise, treat it as a cert path
|
||||||
cert_path = Path(v).expanduser().resolve()
|
cert_path = Path(v).expanduser().resolve()
|
||||||
if not cert_path.exists():
|
if not cert_path.exists():
|
||||||
|
@ -54,7 +51,7 @@ class VLLMInferenceAdapterConfig(BaseModel):
|
||||||
):
|
):
|
||||||
return {
|
return {
|
||||||
"url": url,
|
"url": url,
|
||||||
"max_tokens": "${env.VLLM_MAX_TOKENS:4096}",
|
"max_tokens": "${env.VLLM_MAX_TOKENS:=4096}",
|
||||||
"api_token": "${env.VLLM_API_TOKEN:fake}",
|
"api_token": "${env.VLLM_API_TOKEN:=fake}",
|
||||||
"tls_verify": "${env.VLLM_TLS_VERIFY:true}",
|
"tls_verify": "${env.VLLM_TLS_VERIFY:=true}",
|
||||||
}
|
}
|
||||||
|
|
|
@ -38,9 +38,13 @@ from llama_stack.apis.inference import (
|
||||||
JsonSchemaResponseFormat,
|
JsonSchemaResponseFormat,
|
||||||
LogProbConfig,
|
LogProbConfig,
|
||||||
Message,
|
Message,
|
||||||
|
OpenAIChatCompletion,
|
||||||
|
OpenAICompletion,
|
||||||
OpenAIEmbeddingData,
|
OpenAIEmbeddingData,
|
||||||
OpenAIEmbeddingsResponse,
|
OpenAIEmbeddingsResponse,
|
||||||
OpenAIEmbeddingUsage,
|
OpenAIEmbeddingUsage,
|
||||||
|
OpenAIMessageParam,
|
||||||
|
OpenAIResponseFormatParam,
|
||||||
ResponseFormat,
|
ResponseFormat,
|
||||||
SamplingParams,
|
SamplingParams,
|
||||||
TextTruncation,
|
TextTruncation,
|
||||||
|
@ -49,12 +53,6 @@ from llama_stack.apis.inference import (
|
||||||
ToolDefinition,
|
ToolDefinition,
|
||||||
ToolPromptFormat,
|
ToolPromptFormat,
|
||||||
)
|
)
|
||||||
from llama_stack.apis.inference.inference import (
|
|
||||||
OpenAIChatCompletion,
|
|
||||||
OpenAICompletion,
|
|
||||||
OpenAIMessageParam,
|
|
||||||
OpenAIResponseFormatParam,
|
|
||||||
)
|
|
||||||
from llama_stack.apis.models import Model, ModelType
|
from llama_stack.apis.models import Model, ModelType
|
||||||
from llama_stack.models.llama.datatypes import BuiltinTool, StopReason, ToolCall
|
from llama_stack.models.llama.datatypes import BuiltinTool, StopReason, ToolCall
|
||||||
from llama_stack.models.llama.sku_list import all_registered_models
|
from llama_stack.models.llama.sku_list import all_registered_models
|
||||||
|
|
|
@ -40,7 +40,7 @@ class WatsonXConfig(BaseModel):
|
||||||
@classmethod
|
@classmethod
|
||||||
def sample_run_config(cls, **kwargs) -> dict[str, Any]:
|
def sample_run_config(cls, **kwargs) -> dict[str, Any]:
|
||||||
return {
|
return {
|
||||||
"url": "${env.WATSONX_BASE_URL:https://us-south.ml.cloud.ibm.com}",
|
"url": "${env.WATSONX_BASE_URL:=https://us-south.ml.cloud.ibm.com}",
|
||||||
"api_key": "${env.WATSONX_API_KEY:}",
|
"api_key": "${env.WATSONX_API_KEY:+}",
|
||||||
"project_id": "${env.WATSONX_PROJECT_ID:}",
|
"project_id": "${env.WATSONX_PROJECT_ID:+}",
|
||||||
}
|
}
|
||||||
|
|
|
@ -18,10 +18,16 @@ from llama_stack.apis.inference import (
|
||||||
CompletionRequest,
|
CompletionRequest,
|
||||||
EmbeddingsResponse,
|
EmbeddingsResponse,
|
||||||
EmbeddingTaskType,
|
EmbeddingTaskType,
|
||||||
|
GreedySamplingStrategy,
|
||||||
Inference,
|
Inference,
|
||||||
LogProbConfig,
|
LogProbConfig,
|
||||||
Message,
|
Message,
|
||||||
|
OpenAIChatCompletion,
|
||||||
|
OpenAIChatCompletionChunk,
|
||||||
|
OpenAICompletion,
|
||||||
OpenAIEmbeddingsResponse,
|
OpenAIEmbeddingsResponse,
|
||||||
|
OpenAIMessageParam,
|
||||||
|
OpenAIResponseFormatParam,
|
||||||
ResponseFormat,
|
ResponseFormat,
|
||||||
SamplingParams,
|
SamplingParams,
|
||||||
TextTruncation,
|
TextTruncation,
|
||||||
|
@ -29,14 +35,6 @@ from llama_stack.apis.inference import (
|
||||||
ToolConfig,
|
ToolConfig,
|
||||||
ToolDefinition,
|
ToolDefinition,
|
||||||
ToolPromptFormat,
|
ToolPromptFormat,
|
||||||
)
|
|
||||||
from llama_stack.apis.inference.inference import (
|
|
||||||
GreedySamplingStrategy,
|
|
||||||
OpenAIChatCompletion,
|
|
||||||
OpenAIChatCompletionChunk,
|
|
||||||
OpenAICompletion,
|
|
||||||
OpenAIMessageParam,
|
|
||||||
OpenAIResponseFormatParam,
|
|
||||||
TopKSamplingStrategy,
|
TopKSamplingStrategy,
|
||||||
TopPSamplingStrategy,
|
TopPSamplingStrategy,
|
||||||
)
|
)
|
||||||
|
|
|
@ -55,10 +55,10 @@ class NvidiaPostTrainingConfig(BaseModel):
|
||||||
@classmethod
|
@classmethod
|
||||||
def sample_run_config(cls, **kwargs) -> dict[str, Any]:
|
def sample_run_config(cls, **kwargs) -> dict[str, Any]:
|
||||||
return {
|
return {
|
||||||
"api_key": "${env.NVIDIA_API_KEY:}",
|
"api_key": "${env.NVIDIA_API_KEY:+}",
|
||||||
"dataset_namespace": "${env.NVIDIA_DATASET_NAMESPACE:default}",
|
"dataset_namespace": "${env.NVIDIA_DATASET_NAMESPACE:=default}",
|
||||||
"project_id": "${env.NVIDIA_PROJECT_ID:test-project}",
|
"project_id": "${env.NVIDIA_PROJECT_ID:=test-project}",
|
||||||
"customizer_url": "${env.NVIDIA_CUSTOMIZER_URL:http://nemo.test}",
|
"customizer_url": "${env.NVIDIA_CUSTOMIZER_URL:=http://nemo.test}",
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -35,6 +35,6 @@ class NVIDIASafetyConfig(BaseModel):
|
||||||
@classmethod
|
@classmethod
|
||||||
def sample_run_config(cls, **kwargs) -> dict[str, Any]:
|
def sample_run_config(cls, **kwargs) -> dict[str, Any]:
|
||||||
return {
|
return {
|
||||||
"guardrails_service_url": "${env.GUARDRAILS_SERVICE_URL:http://localhost:7331}",
|
"guardrails_service_url": "${env.GUARDRAILS_SERVICE_URL:=http://localhost:7331}",
|
||||||
"config_id": "${env.NVIDIA_GUARDRAILS_CONFIG_ID:self-check}",
|
"config_id": "${env.NVIDIA_GUARDRAILS_CONFIG_ID:=self-check}",
|
||||||
}
|
}
|
||||||
|
|
|
@ -22,6 +22,6 @@ class BraveSearchToolConfig(BaseModel):
|
||||||
@classmethod
|
@classmethod
|
||||||
def sample_run_config(cls, __distro_dir__: str) -> dict[str, Any]:
|
def sample_run_config(cls, __distro_dir__: str) -> dict[str, Any]:
|
||||||
return {
|
return {
|
||||||
"api_key": "${env.BRAVE_SEARCH_API_KEY:}",
|
"api_key": "${env.BRAVE_SEARCH_API_KEY:+}",
|
||||||
"max_results": 3,
|
"max_results": 3,
|
||||||
}
|
}
|
||||||
|
|
|
@ -22,6 +22,6 @@ class TavilySearchToolConfig(BaseModel):
|
||||||
@classmethod
|
@classmethod
|
||||||
def sample_run_config(cls, __distro_dir__: str) -> dict[str, Any]:
|
def sample_run_config(cls, __distro_dir__: str) -> dict[str, Any]:
|
||||||
return {
|
return {
|
||||||
"api_key": "${env.TAVILY_SEARCH_API_KEY:}",
|
"api_key": "${env.TAVILY_SEARCH_API_KEY:+}",
|
||||||
"max_results": 3,
|
"max_results": 3,
|
||||||
}
|
}
|
||||||
|
|
|
@ -17,5 +17,5 @@ class WolframAlphaToolConfig(BaseModel):
|
||||||
@classmethod
|
@classmethod
|
||||||
def sample_run_config(cls, __distro_dir__: str, **kwargs: Any) -> dict[str, Any]:
|
def sample_run_config(cls, __distro_dir__: str, **kwargs: Any) -> dict[str, Any]:
|
||||||
return {
|
return {
|
||||||
"api_key": "${env.WOLFRAM_ALPHA_API_KEY:}",
|
"api_key": "${env.WOLFRAM_ALPHA_API_KEY:+}",
|
||||||
}
|
}
|
||||||
|
|
|
@ -22,8 +22,8 @@ class PGVectorVectorIOConfig(BaseModel):
|
||||||
@classmethod
|
@classmethod
|
||||||
def sample_run_config(
|
def sample_run_config(
|
||||||
cls,
|
cls,
|
||||||
host: str = "${env.PGVECTOR_HOST:localhost}",
|
host: str = "${env.PGVECTOR_HOST:=localhost}",
|
||||||
port: int = "${env.PGVECTOR_PORT:5432}",
|
port: int = "${env.PGVECTOR_PORT:=5432}",
|
||||||
db: str = "${env.PGVECTOR_DB}",
|
db: str = "${env.PGVECTOR_DB}",
|
||||||
user: str = "${env.PGVECTOR_USER}",
|
user: str = "${env.PGVECTOR_USER}",
|
||||||
password: str = "${env.PGVECTOR_PASSWORD}",
|
password: str = "${env.PGVECTOR_PASSWORD}",
|
||||||
|
|
|
@ -23,6 +23,13 @@ from llama_stack.apis.inference import (
|
||||||
JsonSchemaResponseFormat,
|
JsonSchemaResponseFormat,
|
||||||
LogProbConfig,
|
LogProbConfig,
|
||||||
Message,
|
Message,
|
||||||
|
OpenAIChatCompletion,
|
||||||
|
OpenAIChatCompletionChunk,
|
||||||
|
OpenAICompletion,
|
||||||
|
OpenAIEmbeddingsResponse,
|
||||||
|
OpenAIEmbeddingUsage,
|
||||||
|
OpenAIMessageParam,
|
||||||
|
OpenAIResponseFormatParam,
|
||||||
ResponseFormat,
|
ResponseFormat,
|
||||||
SamplingParams,
|
SamplingParams,
|
||||||
TextTruncation,
|
TextTruncation,
|
||||||
|
@ -31,16 +38,7 @@ from llama_stack.apis.inference import (
|
||||||
ToolDefinition,
|
ToolDefinition,
|
||||||
ToolPromptFormat,
|
ToolPromptFormat,
|
||||||
)
|
)
|
||||||
from llama_stack.apis.inference.inference import (
|
from llama_stack.apis.models import Model
|
||||||
OpenAIChatCompletion,
|
|
||||||
OpenAIChatCompletionChunk,
|
|
||||||
OpenAICompletion,
|
|
||||||
OpenAIEmbeddingsResponse,
|
|
||||||
OpenAIEmbeddingUsage,
|
|
||||||
OpenAIMessageParam,
|
|
||||||
OpenAIResponseFormatParam,
|
|
||||||
)
|
|
||||||
from llama_stack.apis.models.models import Model
|
|
||||||
from llama_stack.distribution.request_headers import NeedsRequestProviderData
|
from llama_stack.distribution.request_headers import NeedsRequestProviderData
|
||||||
from llama_stack.log import get_logger
|
from llama_stack.log import get_logger
|
||||||
from llama_stack.providers.utils.inference.model_registry import ModelRegistryHelper
|
from llama_stack.providers.utils.inference.model_registry import ModelRegistryHelper
|
||||||
|
|
|
@ -8,7 +8,7 @@ from typing import Any
|
||||||
|
|
||||||
from pydantic import BaseModel, Field
|
from pydantic import BaseModel, Field
|
||||||
|
|
||||||
from llama_stack.apis.models.models import ModelType
|
from llama_stack.apis.models import ModelType
|
||||||
from llama_stack.models.llama.sku_list import all_registered_models
|
from llama_stack.models.llama.sku_list import all_registered_models
|
||||||
from llama_stack.providers.datatypes import Model, ModelsProtocolPrivate
|
from llama_stack.providers.datatypes import Model, ModelsProtocolPrivate
|
||||||
from llama_stack.providers.utils.inference import (
|
from llama_stack.providers.utils.inference import (
|
||||||
|
|
|
@ -95,27 +95,25 @@ from llama_stack.apis.inference import (
|
||||||
CompletionResponse,
|
CompletionResponse,
|
||||||
CompletionResponseStreamChunk,
|
CompletionResponseStreamChunk,
|
||||||
GreedySamplingStrategy,
|
GreedySamplingStrategy,
|
||||||
Message,
|
|
||||||
SamplingParams,
|
|
||||||
SystemMessage,
|
|
||||||
TokenLogProbs,
|
|
||||||
ToolChoice,
|
|
||||||
ToolResponseMessage,
|
|
||||||
TopKSamplingStrategy,
|
|
||||||
TopPSamplingStrategy,
|
|
||||||
UserMessage,
|
|
||||||
)
|
|
||||||
from llama_stack.apis.inference.inference import (
|
|
||||||
JsonSchemaResponseFormat,
|
JsonSchemaResponseFormat,
|
||||||
|
Message,
|
||||||
OpenAIChatCompletion,
|
OpenAIChatCompletion,
|
||||||
OpenAICompletion,
|
OpenAICompletion,
|
||||||
OpenAICompletionChoice,
|
OpenAICompletionChoice,
|
||||||
OpenAIEmbeddingData,
|
OpenAIEmbeddingData,
|
||||||
OpenAIMessageParam,
|
OpenAIMessageParam,
|
||||||
OpenAIResponseFormatParam,
|
OpenAIResponseFormatParam,
|
||||||
|
SamplingParams,
|
||||||
|
SystemMessage,
|
||||||
|
TokenLogProbs,
|
||||||
|
ToolChoice,
|
||||||
ToolConfig,
|
ToolConfig,
|
||||||
|
ToolResponseMessage,
|
||||||
|
TopKSamplingStrategy,
|
||||||
|
TopPSamplingStrategy,
|
||||||
|
UserMessage,
|
||||||
)
|
)
|
||||||
from llama_stack.apis.inference.inference import (
|
from llama_stack.apis.inference import (
|
||||||
OpenAIChoice as OpenAIChatCompletionChoice,
|
OpenAIChoice as OpenAIChatCompletionChoice,
|
||||||
)
|
)
|
||||||
from llama_stack.models.llama.datatypes import (
|
from llama_stack.models.llama.datatypes import (
|
||||||
|
|
|
@ -45,8 +45,8 @@ class RedisKVStoreConfig(CommonConfig):
|
||||||
return {
|
return {
|
||||||
"type": "redis",
|
"type": "redis",
|
||||||
"namespace": None,
|
"namespace": None,
|
||||||
"host": "${env.REDIS_HOST:localhost}",
|
"host": "${env.REDIS_HOST:=localhost}",
|
||||||
"port": "${env.REDIS_PORT:6379}",
|
"port": "${env.REDIS_PORT:=6379}",
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@ -66,7 +66,7 @@ class SqliteKVStoreConfig(CommonConfig):
|
||||||
return {
|
return {
|
||||||
"type": "sqlite",
|
"type": "sqlite",
|
||||||
"namespace": None,
|
"namespace": None,
|
||||||
"db_path": "${env.SQLITE_STORE_DIR:" + __distro_dir__ + "}/" + db_name,
|
"db_path": "${env.SQLITE_STORE_DIR:=" + __distro_dir__ + "}/" + db_name,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@ -84,12 +84,12 @@ class PostgresKVStoreConfig(CommonConfig):
|
||||||
return {
|
return {
|
||||||
"type": "postgres",
|
"type": "postgres",
|
||||||
"namespace": None,
|
"namespace": None,
|
||||||
"host": "${env.POSTGRES_HOST:localhost}",
|
"host": "${env.POSTGRES_HOST:=localhost}",
|
||||||
"port": "${env.POSTGRES_PORT:5432}",
|
"port": "${env.POSTGRES_PORT:=5432}",
|
||||||
"db": "${env.POSTGRES_DB:llamastack}",
|
"db": "${env.POSTGRES_DB:=llamastack}",
|
||||||
"user": "${env.POSTGRES_USER:llamastack}",
|
"user": "${env.POSTGRES_USER:=llamastack}",
|
||||||
"password": "${env.POSTGRES_PASSWORD:llamastack}",
|
"password": "${env.POSTGRES_PASSWORD:=llamastack}",
|
||||||
"table_name": "${env.POSTGRES_TABLE_NAME:" + table_name + "}",
|
"table_name": "${env.POSTGRES_TABLE_NAME:=" + table_name + "}",
|
||||||
}
|
}
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
|
@ -131,12 +131,12 @@ class MongoDBKVStoreConfig(CommonConfig):
|
||||||
return {
|
return {
|
||||||
"type": "mongodb",
|
"type": "mongodb",
|
||||||
"namespace": None,
|
"namespace": None,
|
||||||
"host": "${env.MONGODB_HOST:localhost}",
|
"host": "${env.MONGODB_HOST:=localhost}",
|
||||||
"port": "${env.MONGODB_PORT:5432}",
|
"port": "${env.MONGODB_PORT:=5432}",
|
||||||
"db": "${env.MONGODB_DB}",
|
"db": "${env.MONGODB_DB}",
|
||||||
"user": "${env.MONGODB_USER}",
|
"user": "${env.MONGODB_USER}",
|
||||||
"password": "${env.MONGODB_PASSWORD}",
|
"password": "${env.MONGODB_PASSWORD}",
|
||||||
"collection_name": "${env.MONGODB_COLLECTION_NAME:" + collection_name + "}",
|
"collection_name": "${env.MONGODB_COLLECTION_NAME:=" + collection_name + "}",
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -12,8 +12,7 @@ import uuid
|
||||||
from abc import ABC, abstractmethod
|
from abc import ABC, abstractmethod
|
||||||
from typing import Any
|
from typing import Any
|
||||||
|
|
||||||
from llama_stack.apis.files import Files
|
from llama_stack.apis.files import Files, OpenAIFileObject
|
||||||
from llama_stack.apis.files.files import OpenAIFileObject
|
|
||||||
from llama_stack.apis.vector_dbs import VectorDB
|
from llama_stack.apis.vector_dbs import VectorDB
|
||||||
from llama_stack.apis.vector_io import (
|
from llama_stack.apis.vector_io import (
|
||||||
Chunk,
|
Chunk,
|
||||||
|
|
|
@ -50,7 +50,7 @@ class SqliteSqlStoreConfig(SqlAlchemySqlStoreConfig):
|
||||||
def sample_run_config(cls, __distro_dir__: str, db_name: str = "sqlstore.db"):
|
def sample_run_config(cls, __distro_dir__: str, db_name: str = "sqlstore.db"):
|
||||||
return cls(
|
return cls(
|
||||||
type="sqlite",
|
type="sqlite",
|
||||||
db_path="${env.SQLITE_STORE_DIR:" + __distro_dir__ + "}/" + db_name,
|
db_path="${env.SQLITE_STORE_DIR:=" + __distro_dir__ + "}/" + db_name,
|
||||||
)
|
)
|
||||||
|
|
||||||
@property
|
@property
|
||||||
|
@ -78,11 +78,11 @@ class PostgresSqlStoreConfig(SqlAlchemySqlStoreConfig):
|
||||||
def sample_run_config(cls, **kwargs):
|
def sample_run_config(cls, **kwargs):
|
||||||
return cls(
|
return cls(
|
||||||
type="postgres",
|
type="postgres",
|
||||||
host="${env.POSTGRES_HOST:localhost}",
|
host="${env.POSTGRES_HOST:=localhost}",
|
||||||
port="${env.POSTGRES_PORT:5432}",
|
port="${env.POSTGRES_PORT:=5432}",
|
||||||
db="${env.POSTGRES_DB:llamastack}",
|
db="${env.POSTGRES_DB:=llamastack}",
|
||||||
user="${env.POSTGRES_USER:llamastack}",
|
user="${env.POSTGRES_USER:=llamastack}",
|
||||||
password="${env.POSTGRES_PASSWORD:llamastack}",
|
password="${env.POSTGRES_PASSWORD:=llamastack}",
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -35,6 +35,9 @@ INVALID_SPAN_ID = 0x0000000000000000
|
||||||
INVALID_TRACE_ID = 0x00000000000000000000000000000000
|
INVALID_TRACE_ID = 0x00000000000000000000000000000000
|
||||||
|
|
||||||
ROOT_SPAN_MARKERS = ["__root__", "__root_span__"]
|
ROOT_SPAN_MARKERS = ["__root__", "__root_span__"]
|
||||||
|
# The logical root span may not be visible to this process if a parent context
|
||||||
|
# is passed in. The local root span is the first local span in a trace.
|
||||||
|
LOCAL_ROOT_SPAN_MARKER = "__local_root_span__"
|
||||||
|
|
||||||
|
|
||||||
def trace_id_to_str(trace_id: int) -> str:
|
def trace_id_to_str(trace_id: int) -> str:
|
||||||
|
@ -180,7 +183,13 @@ async def start_trace(name: str, attributes: dict[str, Any] = None) -> TraceCont
|
||||||
|
|
||||||
trace_id = generate_trace_id()
|
trace_id = generate_trace_id()
|
||||||
context = TraceContext(BACKGROUND_LOGGER, trace_id)
|
context = TraceContext(BACKGROUND_LOGGER, trace_id)
|
||||||
attributes = {marker: True for marker in ROOT_SPAN_MARKERS} | (attributes or {})
|
# Mark this span as the root for the trace for now. The processing of
|
||||||
|
# traceparent context if supplied comes later and will result in the
|
||||||
|
# ROOT_SPAN_MARKERS being removed. Also mark this is the 'local' root,
|
||||||
|
# i.e. the root of the spans originating in this process as this is
|
||||||
|
# needed to ensure that we insert this 'local' root span's id into
|
||||||
|
# the trace record in sqlite store.
|
||||||
|
attributes = dict.fromkeys(ROOT_SPAN_MARKERS, True) | {LOCAL_ROOT_SPAN_MARKER: True} | (attributes or {})
|
||||||
context.push_span(name, attributes)
|
context.push_span(name, attributes)
|
||||||
|
|
||||||
CURRENT_TRACE_CONTEXT.set(context)
|
CURRENT_TRACE_CONTEXT.set(context)
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
version: '2'
|
version: 2
|
||||||
distribution_spec:
|
distribution_spec:
|
||||||
description: Use AWS Bedrock for running LLM inference and safety
|
description: Use AWS Bedrock for running LLM inference and safety
|
||||||
providers:
|
providers:
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
version: '2'
|
version: 2
|
||||||
image_name: bedrock
|
image_name: bedrock
|
||||||
apis:
|
apis:
|
||||||
- agents
|
- agents
|
||||||
|
@ -22,7 +22,7 @@ providers:
|
||||||
kvstore:
|
kvstore:
|
||||||
type: sqlite
|
type: sqlite
|
||||||
namespace: null
|
namespace: null
|
||||||
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/bedrock}/faiss_store.db
|
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/bedrock}/faiss_store.db
|
||||||
safety:
|
safety:
|
||||||
- provider_id: bedrock
|
- provider_id: bedrock
|
||||||
provider_type: remote::bedrock
|
provider_type: remote::bedrock
|
||||||
|
@ -34,17 +34,17 @@ providers:
|
||||||
persistence_store:
|
persistence_store:
|
||||||
type: sqlite
|
type: sqlite
|
||||||
namespace: null
|
namespace: null
|
||||||
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/bedrock}/agents_store.db
|
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/bedrock}/agents_store.db
|
||||||
responses_store:
|
responses_store:
|
||||||
type: sqlite
|
type: sqlite
|
||||||
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/bedrock}/responses_store.db
|
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/bedrock}/responses_store.db
|
||||||
telemetry:
|
telemetry:
|
||||||
- provider_id: meta-reference
|
- provider_id: meta-reference
|
||||||
provider_type: inline::meta-reference
|
provider_type: inline::meta-reference
|
||||||
config:
|
config:
|
||||||
service_name: "${env.OTEL_SERVICE_NAME:\u200B}"
|
service_name: "${env.OTEL_SERVICE_NAME:=\u200B}"
|
||||||
sinks: ${env.TELEMETRY_SINKS:console,sqlite}
|
sinks: ${env.TELEMETRY_SINKS:=console,sqlite}
|
||||||
sqlite_db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/bedrock}/trace_store.db
|
sqlite_db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/bedrock}/trace_store.db
|
||||||
eval:
|
eval:
|
||||||
- provider_id: meta-reference
|
- provider_id: meta-reference
|
||||||
provider_type: inline::meta-reference
|
provider_type: inline::meta-reference
|
||||||
|
@ -52,7 +52,7 @@ providers:
|
||||||
kvstore:
|
kvstore:
|
||||||
type: sqlite
|
type: sqlite
|
||||||
namespace: null
|
namespace: null
|
||||||
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/bedrock}/meta_reference_eval.db
|
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/bedrock}/meta_reference_eval.db
|
||||||
datasetio:
|
datasetio:
|
||||||
- provider_id: huggingface
|
- provider_id: huggingface
|
||||||
provider_type: remote::huggingface
|
provider_type: remote::huggingface
|
||||||
|
@ -60,14 +60,14 @@ providers:
|
||||||
kvstore:
|
kvstore:
|
||||||
type: sqlite
|
type: sqlite
|
||||||
namespace: null
|
namespace: null
|
||||||
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/bedrock}/huggingface_datasetio.db
|
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/bedrock}/huggingface_datasetio.db
|
||||||
- provider_id: localfs
|
- provider_id: localfs
|
||||||
provider_type: inline::localfs
|
provider_type: inline::localfs
|
||||||
config:
|
config:
|
||||||
kvstore:
|
kvstore:
|
||||||
type: sqlite
|
type: sqlite
|
||||||
namespace: null
|
namespace: null
|
||||||
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/bedrock}/localfs_datasetio.db
|
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/bedrock}/localfs_datasetio.db
|
||||||
scoring:
|
scoring:
|
||||||
- provider_id: basic
|
- provider_id: basic
|
||||||
provider_type: inline::basic
|
provider_type: inline::basic
|
||||||
|
@ -78,17 +78,17 @@ providers:
|
||||||
- provider_id: braintrust
|
- provider_id: braintrust
|
||||||
provider_type: inline::braintrust
|
provider_type: inline::braintrust
|
||||||
config:
|
config:
|
||||||
openai_api_key: ${env.OPENAI_API_KEY:}
|
openai_api_key: ${env.OPENAI_API_KEY:+}
|
||||||
tool_runtime:
|
tool_runtime:
|
||||||
- provider_id: brave-search
|
- provider_id: brave-search
|
||||||
provider_type: remote::brave-search
|
provider_type: remote::brave-search
|
||||||
config:
|
config:
|
||||||
api_key: ${env.BRAVE_SEARCH_API_KEY:}
|
api_key: ${env.BRAVE_SEARCH_API_KEY:+}
|
||||||
max_results: 3
|
max_results: 3
|
||||||
- provider_id: tavily-search
|
- provider_id: tavily-search
|
||||||
provider_type: remote::tavily-search
|
provider_type: remote::tavily-search
|
||||||
config:
|
config:
|
||||||
api_key: ${env.TAVILY_SEARCH_API_KEY:}
|
api_key: ${env.TAVILY_SEARCH_API_KEY:+}
|
||||||
max_results: 3
|
max_results: 3
|
||||||
- provider_id: rag-runtime
|
- provider_id: rag-runtime
|
||||||
provider_type: inline::rag-runtime
|
provider_type: inline::rag-runtime
|
||||||
|
@ -98,10 +98,10 @@ providers:
|
||||||
config: {}
|
config: {}
|
||||||
metadata_store:
|
metadata_store:
|
||||||
type: sqlite
|
type: sqlite
|
||||||
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/bedrock}/registry.db
|
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/bedrock}/registry.db
|
||||||
inference_store:
|
inference_store:
|
||||||
type: sqlite
|
type: sqlite
|
||||||
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/bedrock}/inference_store.db
|
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/bedrock}/inference_store.db
|
||||||
models:
|
models:
|
||||||
- metadata: {}
|
- metadata: {}
|
||||||
model_id: meta.llama3-1-8b-instruct-v1:0
|
model_id: meta.llama3-1-8b-instruct-v1:0
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
version: '2'
|
version: 2
|
||||||
distribution_spec:
|
distribution_spec:
|
||||||
description: Use Cerebras for running LLM inference
|
description: Use Cerebras for running LLM inference
|
||||||
providers:
|
providers:
|
||||||
|
|
Some files were not shown because too many files have changed in this diff Show more
Loading…
Add table
Add a link
Reference in a new issue