Merge branch 'main' into routeur

This commit is contained in:
Sébastien Han 2025-11-24 14:58:43 +01:00 committed by GitHub
commit 3770963130
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
255 changed files with 18366 additions and 1909 deletions

2
.github/CODEOWNERS vendored
View file

@ -2,4 +2,4 @@
# These owners will be the default owners for everything in # These owners will be the default owners for everything in
# the repo. Unless a later match takes precedence, # the repo. Unless a later match takes precedence,
* @ashwinb @yanxi0830 @hardikjshah @raghotham @ehhuang @leseb @bbrowning @reluctantfuturist @mattf @slekkala1 @franciscojavierarceo * @ashwinb @raghotham @ehhuang @leseb @bbrowning @mattf @franciscojavierarceo @cdoern

View file

@ -0,0 +1,35 @@
name: Setup TypeScript client
description: Conditionally checkout and link llama-stack-client-typescript based on client-version
inputs:
client-version:
description: 'Client version (latest or published)'
required: true
outputs:
ts-client-path:
description: 'Path or version to use for TypeScript client'
value: ${{ steps.set-path.outputs.ts-client-path }}
runs:
using: "composite"
steps:
- name: Checkout TypeScript client (latest)
if: ${{ inputs.client-version == 'latest' }}
uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0
with:
repository: llamastack/llama-stack-client-typescript
ref: main
path: .ts-client-checkout
- name: Set TS_CLIENT_PATH
id: set-path
shell: bash
run: |
if [ "${{ inputs.client-version }}" = "latest" ]; then
echo "ts-client-path=${{ github.workspace }}/.ts-client-checkout" >> $GITHUB_OUTPUT
elif [ "${{ inputs.client-version }}" = "published" ]; then
echo "ts-client-path=^0.3.2" >> $GITHUB_OUTPUT
else
echo "::error::Invalid client-version: ${{ inputs.client-version }}"
exit 1
fi

View file

@ -93,11 +93,27 @@ jobs:
suite: ${{ matrix.config.suite }} suite: ${{ matrix.config.suite }}
inference-mode: 'replay' inference-mode: 'replay'
- name: Setup Node.js for TypeScript client tests
if: ${{ matrix.client == 'server' }}
uses: actions/setup-node@2028fbc5c25fe9cf00d9f06a71cc4710d4507903 # v6.0.0
with:
node-version: '20'
cache: 'npm'
cache-dependency-path: tests/integration/client-typescript/package-lock.json
- name: Setup TypeScript client
if: ${{ matrix.client == 'server' }}
id: setup-ts-client
uses: ./.github/actions/setup-typescript-client
with:
client-version: ${{ matrix.client-version }}
- name: Run tests - name: Run tests
if: ${{ matrix.config.allowed_clients == null || contains(matrix.config.allowed_clients, matrix.client) }} if: ${{ matrix.config.allowed_clients == null || contains(matrix.config.allowed_clients, matrix.client) }}
uses: ./.github/actions/run-and-record-tests uses: ./.github/actions/run-and-record-tests
env: env:
OPENAI_API_KEY: dummy OPENAI_API_KEY: dummy
TS_CLIENT_PATH: ${{ steps.setup-ts-client.outputs.ts-client-path || '' }}
with: with:
stack-config: >- stack-config: >-
${{ matrix.config.stack_config ${{ matrix.config.stack_config

View file

@ -43,7 +43,41 @@ env:
# Stainless organization dashboard # Stainless organization dashboard
jobs: jobs:
compute-branch:
runs-on: ubuntu-latest
outputs:
preview_branch: ${{ steps.compute.outputs.preview_branch }}
base_branch: ${{ steps.compute.outputs.base_branch }}
merge_branch: ${{ steps.compute.outputs.merge_branch }}
steps:
- name: Compute branch names
id: compute
run: |
HEAD_REPO="${{ github.event.pull_request.head.repo.full_name }}"
BASE_REPO="${{ github.repository }}"
BRANCH_NAME="${{ github.event.pull_request.head.ref }}"
FORK_OWNER="${{ github.event.pull_request.head.repo.owner.login }}"
if [ "$HEAD_REPO" != "$BASE_REPO" ]; then
# Fork PR: prefix with fork owner for isolation
if [ -z "$FORK_OWNER" ]; then
echo "Error: Fork PR detected but fork owner is empty" >&2
exit 1
fi
PREVIEW_BRANCH="preview/${FORK_OWNER}/${BRANCH_NAME}"
BASE_BRANCH="preview/base/${FORK_OWNER}/${BRANCH_NAME}"
else
# Same-repo PR
PREVIEW_BRANCH="preview/${BRANCH_NAME}"
BASE_BRANCH="preview/base/${BRANCH_NAME}"
fi
echo "preview_branch=${PREVIEW_BRANCH}" >> $GITHUB_OUTPUT
echo "base_branch=${BASE_BRANCH}" >> $GITHUB_OUTPUT
echo "merge_branch=${PREVIEW_BRANCH}" >> $GITHUB_OUTPUT
preview: preview:
needs: compute-branch
if: github.event.action != 'closed' if: github.event.action != 'closed'
runs-on: ubuntu-latest runs-on: ubuntu-latest
permissions: permissions:
@ -59,8 +93,6 @@ jobs:
ref: ${{ github.event.pull_request.head.sha }} ref: ${{ github.event.pull_request.head.sha }}
fetch-depth: 2 fetch-depth: 2
# This action builds preview SDKs from the OpenAPI spec changes and
# posts/updates a comment on the PR with build results and links to the preview.
- name: Run preview builds - name: Run preview builds
uses: stainless-api/upload-openapi-spec-action/preview@32823b096b4319c53ee948d702d9052873af485f # 1.6.0 uses: stainless-api/upload-openapi-spec-action/preview@32823b096b4319c53ee948d702d9052873af485f # 1.6.0
with: with:
@ -73,8 +105,11 @@ jobs:
base_sha: ${{ github.event.pull_request.base.sha }} base_sha: ${{ github.event.pull_request.base.sha }}
base_ref: ${{ github.event.pull_request.base.ref }} base_ref: ${{ github.event.pull_request.base.ref }}
head_sha: ${{ github.event.pull_request.head.sha }} head_sha: ${{ github.event.pull_request.head.sha }}
branch: ${{ needs.compute-branch.outputs.preview_branch }}
base_branch: ${{ needs.compute-branch.outputs.base_branch }}
merge: merge:
needs: compute-branch
if: github.event.action == 'closed' && github.event.pull_request.merged == true if: github.event.action == 'closed' && github.event.pull_request.merged == true
runs-on: ubuntu-latest runs-on: ubuntu-latest
permissions: permissions:
@ -91,11 +126,11 @@ jobs:
fetch-depth: 2 fetch-depth: 2
# Note that this only merges in changes that happened on the last build on # Note that this only merges in changes that happened on the last build on
# preview/${{ github.head_ref }}. It's possible that there are OAS/config # the computed preview branch. It's possible that there are OAS/config
# changes that haven't been built, if the preview-sdk job didn't finish # changes that haven't been built, if the preview job didn't finish
# before this step starts. In theory we want to wait for all builds # before this step starts. In theory we want to wait for all builds
# against preview/${{ github.head_ref }} to complete, but assuming that # against the preview branch to complete, but assuming that
# the preview-sdk job happens before the PR merge, it should be fine. # the preview job happens before the PR merge, it should be fine.
- name: Run merge build - name: Run merge build
uses: stainless-api/upload-openapi-spec-action/merge@32823b096b4319c53ee948d702d9052873af485f # 1.6.0 uses: stainless-api/upload-openapi-spec-action/merge@32823b096b4319c53ee948d702d9052873af485f # 1.6.0
with: with:
@ -108,3 +143,4 @@ jobs:
base_sha: ${{ github.event.pull_request.base.sha }} base_sha: ${{ github.event.pull_request.base.sha }}
base_ref: ${{ github.event.pull_request.base.ref }} base_ref: ${{ github.event.pull_request.base.ref }}
head_sha: ${{ github.event.pull_request.head.sha }} head_sha: ${{ github.event.pull_request.head.sha }}
merge_branch: ${{ needs.compute-branch.outputs.merge_branch }}

2
.gitignore vendored
View file

@ -35,3 +35,5 @@ docs/static/imported-files/
docs/docs/api-deprecated/ docs/docs/api-deprecated/
docs/docs/api-experimental/ docs/docs/api-experimental/
docs/docs/api/ docs/docs/api/
tests/integration/client-typescript/node_modules/
.ts-client-checkout/

View file

@ -199,6 +199,27 @@ repos:
echo; echo;
exit 1; exit 1;
} || true } || true
- id: check-api-independence
name: Ensure llama_stack_api does not import llama_stack
entry: bash
language: system
pass_filenames: false
require_serial: true
always_run: true
files: ^src/llama_stack_api/.*$
args:
- -c
- |
API_DIR="src/llama_stack_api"
grep -rn --include="*.py" -E '^[^#]*(import llama_stack\b|from llama_stack\b)' "$API_DIR" 2>/dev/null && {
echo "llama_stack_api must not import llama_stack";
exit 1;
}
[ -f "$API_DIR/pyproject.toml" ] && grep -n 'llama_stack[^_]' "$API_DIR/pyproject.toml" && {
echo "llama_stack_api must not depend on llama_stack in pyproject.toml";
exit 1;
}
exit 0
ci: ci:
autofix_commit_msg: 🎨 [pre-commit.ci] Auto format from pre-commit.com hooks autofix_commit_msg: 🎨 [pre-commit.ci] Auto format from pre-commit.com hooks

View file

@ -10,83 +10,6 @@
[**Quick Start**](https://llamastack.github.io/docs/getting_started/quickstart) | [**Documentation**](https://llamastack.github.io/docs) | [**Colab Notebook**](./docs/getting_started.ipynb) | [**Discord**](https://discord.gg/llama-stack) [**Quick Start**](https://llamastack.github.io/docs/getting_started/quickstart) | [**Documentation**](https://llamastack.github.io/docs) | [**Colab Notebook**](./docs/getting_started.ipynb) | [**Discord**](https://discord.gg/llama-stack)
### ✨🎉 Llama 4 Support 🎉✨
We released [Version 0.2.0](https://github.com/meta-llama/llama-stack/releases/tag/v0.2.0) with support for the Llama 4 herd of models released by Meta.
<details>
<summary>👋 Click here to see how to run Llama 4 models on Llama Stack </summary>
\
*Note you need 8xH100 GPU-host to run these models*
```bash
pip install -U llama_stack
MODEL="Llama-4-Scout-17B-16E-Instruct"
# get meta url from llama.com
huggingface-cli download meta-llama/$MODEL --local-dir ~/.llama/$MODEL
# install dependencies for the distribution
llama stack list-deps meta-reference-gpu | xargs -L1 uv pip install
# start a llama stack server
INFERENCE_MODEL=meta-llama/$MODEL llama stack run meta-reference-gpu
# install client to interact with the server
pip install llama-stack-client
```
### CLI
```bash
# Run a chat completion
MODEL="Llama-4-Scout-17B-16E-Instruct"
llama-stack-client --endpoint http://localhost:8321 \
inference chat-completion \
--model-id meta-llama/$MODEL \
--message "write a haiku for meta's llama 4 models"
OpenAIChatCompletion(
...
choices=[
OpenAIChatCompletionChoice(
finish_reason='stop',
index=0,
message=OpenAIChatCompletionChoiceMessageOpenAIAssistantMessageParam(
role='assistant',
content='...**Silent minds awaken,** \n**Whispers of billions of words,** \n**Reasoning breaks the night.** \n\n— \n*This haiku blends the essence of LLaMA 4\'s capabilities with nature-inspired metaphor, evoking its vast training data and transformative potential.*',
...
),
...
)
],
...
)
```
### Python SDK
```python
from llama_stack_client import LlamaStackClient
client = LlamaStackClient(base_url=f"http://localhost:8321")
model_id = "meta-llama/Llama-4-Scout-17B-16E-Instruct"
prompt = "Write a haiku about coding"
print(f"User> {prompt}")
response = client.chat.completions.create(
model=model_id,
messages=[
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": prompt},
],
)
print(f"Assistant> {response.choices[0].message.content}")
```
As more providers start supporting Llama 4, you can use them in Llama Stack as well. We are adding to the list. Stay tuned!
</details>
### 🚀 One-Line Installer 🚀 ### 🚀 One-Line Installer 🚀
To try Llama Stack locally, run: To try Llama Stack locally, run:

View file

@ -5,4 +5,7 @@ These are the source-of-truth configuration files used to generate the Stainless
A small side note: notice the `.yml` suffixes since Stainless uses that suffix typically for its configuration files. A small side note: notice the `.yml` suffixes since Stainless uses that suffix typically for its configuration files.
These files go hand-in-hand. As of now, only the `openapi.yml` file is automatically generated using the `scripts/run_openapi_generator.sh` script. These files go hand-in-hand. Both `openapi.yml` and `config.yml` are generated by `scripts/run_openapi_generator.sh`:
- `openapi.yml` comes from the FastAPI-based generator.
- `config.yml` is rendered from `scripts/openapi_generator/stainless_config/config_data.py` so the Stainless config stays in lock-step with the spec.

View file

@ -1,20 +1,16 @@
# yaml-language-server: $schema=https://app.stainlessapi.com/config-internal.schema.json # yaml-language-server: $schema=https://app.stainlessapi.com/config-internal.schema.json
organization: organization:
# Name of your organization or company, used to determine the name of the client
# and headings.
name: llama-stack-client name: llama-stack-client
docs: https://llama-stack.readthedocs.io/en/latest/ docs: https://llama-stack.readthedocs.io/en/latest/
contact: llamastack@meta.com contact: llamastack@meta.com
security: security:
- {} - {}
- BearerAuth: [] - BearerAuth: []
security_schemes: security_schemes:
BearerAuth: BearerAuth:
type: http type: http
scheme: bearer scheme: bearer
# `targets` define the output targets and their customization options, such as
# whether to emit the Node SDK and what it's package name should be.
targets: targets:
node: node:
package_name: llama-stack-client package_name: llama-stack-client
@ -40,27 +36,19 @@ targets:
options: options:
enable_v2: true enable_v2: true
back_compat_use_shared_package: false back_compat_use_shared_package: false
# `client_settings` define settings for the API client, such as extra constructor
# arguments (used for authentication), retry behavior, idempotency, etc.
client_settings: client_settings:
default_env_prefix: LLAMA_STACK_CLIENT default_env_prefix: LLAMA_STACK_CLIENT
opts: opts:
api_key: api_key:
type: string type: string
read_env: LLAMA_STACK_CLIENT_API_KEY read_env: LLAMA_STACK_CLIENT_API_KEY
auth: { security_scheme: BearerAuth } auth:
security_scheme: BearerAuth
nullable: true nullable: true
# `environments` are a map of the name of the environment (e.g. "sandbox",
# "production") to the corresponding url to use.
environments: environments:
production: http://any-hosted-llama-stack.com production: http://any-hosted-llama-stack.com
# `pagination` defines [pagination schemes] which provides a template to match
# endpoints and generate next-page and auto-pagination helpers in the SDKs.
pagination: pagination:
- name: datasets_iterrows - name: datasets_iterrows
type: offset type: offset
request: request:
dataset_id: dataset_id:
@ -80,7 +68,7 @@ pagination:
type: integer type: integer
x-stainless-pagination-property: x-stainless-pagination-property:
purpose: offset_count_start_field purpose: offset_count_start_field
- name: openai_cursor_page - name: openai_cursor_page
type: cursor type: cursor
request: request:
limit: limit:
@ -99,12 +87,72 @@ pagination:
type: string type: string
x-stainless-pagination-property: x-stainless-pagination-property:
purpose: next_cursor_field purpose: next_cursor_field
# `resources` define the structure and organziation for your API, such as how settings:
# methods and models are grouped together and accessed. See the [configuration license: MIT
# guide] for more information. unwrap_response_fields:
# - data
# [configuration guide]: file_header: 'Copyright (c) Meta Platforms, Inc. and affiliates.
# https://app.stainlessapi.com/docs/guides/configure#resources
All rights reserved.
This source code is licensed under the terms described in the LICENSE file in
the root directory of this source tree.
'
openapi:
transformations:
- command: mergeObject
reason: Better return_type using enum
args:
target:
- $.components.schemas
object:
ReturnType:
additionalProperties: false
properties:
type:
enum:
- string
- number
- boolean
- array
- object
- json
- union
- chat_completion_input
- completion_input
- agent_turn_input
required:
- type
type: object
- command: replaceProperties
reason: Replace return type properties with better model (see above)
args:
filter:
only:
- $.components.schemas.ScoringFn.properties.return_type
- $.components.schemas.RegisterScoringFunctionRequest.properties.return_type
value:
$ref: '#/components/schemas/ReturnType'
- command: oneOfToAnyOf
reason: Prism (mock server) doesn't like one of our requests as it technically
matches multiple variants
readme:
example_requests:
default:
type: request
endpoint: post /v1/chat/completions
params: {}
headline:
type: request
endpoint: get /v1/models
params: {}
pagination:
type: request
endpoint: post /v1/chat/completions
params: {}
resources: resources:
$shared: $shared:
models: models:
@ -128,19 +176,17 @@ resources:
methods: methods:
get: get /v1/tools/{tool_name} get: get /v1/tools/{tool_name}
list: list:
endpoint: get /v1/tools
paginated: false paginated: false
endpoint: get /v1/tools
tool_runtime: tool_runtime:
models: models:
tool_def: ToolDef tool_def: ToolDef
tool_invocation_result: ToolInvocationResult tool_invocation_result: ToolInvocationResult
methods: methods:
list_tools: list_tools:
endpoint: get /v1/tool-runtime/list-tools
paginated: false paginated: false
endpoint: get /v1/tool-runtime/list-tools
invoke_tool: post /v1/tool-runtime/invoke invoke_tool: post /v1/tool-runtime/invoke
responses: responses:
models: models:
response_object_stream: OpenAIResponseObjectStream response_object_stream: OpenAIResponseObjectStream
@ -148,10 +194,10 @@ resources:
methods: methods:
create: create:
type: http type: http
endpoint: post /v1/responses
streaming: streaming:
stream_event_model: responses.response_object_stream stream_event_model: responses.response_object_stream
param_discriminator: stream param_discriminator: stream
endpoint: post /v1/responses
retrieve: get /v1/responses/{response_id} retrieve: get /v1/responses/{response_id}
list: list:
type: http type: http
@ -164,9 +210,8 @@ resources:
methods: methods:
list: list:
type: http type: http
endpoint: get /v1/responses/{response_id}/input_items
paginated: false paginated: false
endpoint: get /v1/responses/{response_id}/input_items
prompts: prompts:
models: models:
prompt: Prompt prompt: Prompt
@ -174,8 +219,8 @@ resources:
methods: methods:
create: post /v1/prompts create: post /v1/prompts
list: list:
endpoint: get /v1/prompts
paginated: false paginated: false
endpoint: get /v1/prompts
retrieve: get /v1/prompts/{prompt_id} retrieve: get /v1/prompts/{prompt_id}
update: post /v1/prompts/{prompt_id} update: post /v1/prompts/{prompt_id}
delete: delete /v1/prompts/{prompt_id} delete: delete /v1/prompts/{prompt_id}
@ -184,9 +229,8 @@ resources:
versions: versions:
methods: methods:
list: list:
endpoint: get /v1/prompts/{prompt_id}/versions
paginated: false paginated: false
endpoint: get /v1/prompts/{prompt_id}/versions
conversations: conversations:
models: models:
conversation_object: Conversation conversation_object: Conversation
@ -216,7 +260,6 @@ resources:
delete: delete:
type: http type: http
endpoint: delete /v1/conversations/{conversation_id}/items/{item_id} endpoint: delete /v1/conversations/{conversation_id}/items/{item_id}
inspect: inspect:
models: models:
healthInfo: HealthInfo healthInfo: HealthInfo
@ -226,13 +269,11 @@ resources:
methods: methods:
health: get /v1/health health: get /v1/health
version: get /v1/version version: get /v1/version
embeddings: embeddings:
models: models:
create_embeddings_response: OpenAIEmbeddingsResponse create_embeddings_response: OpenAIEmbeddingsResponse
methods: methods:
create: post /v1/embeddings create: post /v1/embeddings
chat: chat:
models: models:
chat_completion_chunk: OpenAIChatCompletionChunk chat_completion_chunk: OpenAIChatCompletionChunk
@ -241,14 +282,14 @@ resources:
methods: methods:
create: create:
type: http type: http
endpoint: post /v1/chat/completions
streaming: streaming:
stream_event_model: chat.chat_completion_chunk stream_event_model: chat.chat_completion_chunk
param_discriminator: stream param_discriminator: stream
endpoint: post /v1/chat/completions
list: list:
type: http type: http
endpoint: get /v1/chat/completions
paginated: false paginated: false
endpoint: get /v1/chat/completions
retrieve: retrieve:
type: http type: http
endpoint: get /v1/chat/completions/{completion_id} endpoint: get /v1/chat/completions/{completion_id}
@ -256,17 +297,15 @@ resources:
methods: methods:
create: create:
type: http type: http
endpoint: post /v1/completions
streaming: streaming:
param_discriminator: stream param_discriminator: stream
endpoint: post /v1/completions
vector_io: vector_io:
models: models:
queryChunksResponse: QueryChunksResponse queryChunksResponse: QueryChunksResponse
methods: methods:
insert: post /v1/vector-io/insert insert: post /v1/vector-io/insert
query: post /v1/vector-io/query query: post /v1/vector-io/query
vector_stores: vector_stores:
models: models:
vector_store: VectorStoreObject vector_store: VectorStoreObject
@ -275,8 +314,7 @@ resources:
vector_store_search_response: VectorStoreSearchResponsePage vector_store_search_response: VectorStoreSearchResponsePage
methods: methods:
create: post /v1/vector_stores create: post /v1/vector_stores
list: list: get /v1/vector_stores
endpoint: get /v1/vector_stores
retrieve: get /v1/vector_stores/{vector_store_id} retrieve: get /v1/vector_stores/{vector_store_id}
update: post /v1/vector_stores/{vector_store_id} update: post /v1/vector_stores/{vector_store_id}
delete: delete /v1/vector_stores/{vector_store_id} delete: delete /v1/vector_stores/{vector_store_id}
@ -301,15 +339,14 @@ resources:
retrieve: get /v1/vector_stores/{vector_store_id}/file_batches/{batch_id} retrieve: get /v1/vector_stores/{vector_store_id}/file_batches/{batch_id}
list_files: get /v1/vector_stores/{vector_store_id}/file_batches/{batch_id}/files list_files: get /v1/vector_stores/{vector_store_id}/file_batches/{batch_id}/files
cancel: post /v1/vector_stores/{vector_store_id}/file_batches/{batch_id}/cancel cancel: post /v1/vector_stores/{vector_store_id}/file_batches/{batch_id}/cancel
models: models:
models: models:
model: OpenAIModel model: OpenAIModel
list_models_response: OpenAIListModelsResponse list_models_response: OpenAIListModelsResponse
methods: methods:
list: list:
endpoint: get /v1/models
paginated: false paginated: false
endpoint: get /v1/models
retrieve: get /v1/models/{model_id} retrieve: get /v1/models/{model_id}
register: post /v1/models register: post /v1/models
unregister: delete /v1/models/{model_id} unregister: delete /v1/models/{model_id}
@ -317,38 +354,33 @@ resources:
openai: openai:
methods: methods:
list: list:
endpoint: get /v1/models
paginated: false paginated: false
endpoint: get /v1/models
providers: providers:
models: models:
list_providers_response: ListProvidersResponse list_providers_response: ListProvidersResponse
methods: methods:
list: list:
endpoint: get /v1/providers
paginated: false paginated: false
endpoint: get /v1/providers
retrieve: get /v1/providers/{provider_id} retrieve: get /v1/providers/{provider_id}
routes: routes:
models: models:
list_routes_response: ListRoutesResponse list_routes_response: ListRoutesResponse
methods: methods:
list: list:
endpoint: get /v1/inspect/routes
paginated: false paginated: false
endpoint: get /v1/inspect/routes
moderations: moderations:
models: models:
create_response: ModerationObject create_response: ModerationObject
methods: methods:
create: post /v1/moderations create: post /v1/moderations
safety: safety:
models: models:
run_shield_response: RunShieldResponse run_shield_response: RunShieldResponse
methods: methods:
run_shield: post /v1/safety/run-shield run_shield: post /v1/safety/run-shield
shields: shields:
models: models:
shield: Shield shield: Shield
@ -356,53 +388,48 @@ resources:
methods: methods:
retrieve: get /v1/shields/{identifier} retrieve: get /v1/shields/{identifier}
list: list:
endpoint: get /v1/shields
paginated: false paginated: false
endpoint: get /v1/shields
register: post /v1/shields register: post /v1/shields
delete: delete /v1/shields/{identifier} delete: delete /v1/shields/{identifier}
scoring: scoring:
methods: methods:
score: post /v1/scoring/score score: post /v1/scoring/score
score_batch: post /v1/scoring/score-batch score_batch: post /v1/scoring/score-batch
scoring_functions: scoring_functions:
methods:
retrieve: get /v1/scoring-functions/{scoring_fn_id}
list:
endpoint: get /v1/scoring-functions
paginated: false
register: post /v1/scoring-functions
unregister: delete /v1/scoring-functions/{scoring_fn_id}
models: models:
scoring_fn: ScoringFn scoring_fn: ScoringFn
scoring_fn_params: ScoringFnParams scoring_fn_params: ScoringFnParams
list_scoring_functions_response: ListScoringFunctionsResponse list_scoring_functions_response: ListScoringFunctionsResponse
methods:
retrieve: get /v1/scoring-functions/{scoring_fn_id}
list:
paginated: false
endpoint: get /v1/scoring-functions
register: post /v1/scoring-functions
unregister: delete /v1/scoring-functions/{scoring_fn_id}
files: files:
models:
file: OpenAIFileObject
list_files_response: ListOpenAIFileResponse
delete_file_response: OpenAIFileDeleteResponse
methods: methods:
create: post /v1/files create: post /v1/files
list: get /v1/files list: get /v1/files
retrieve: get /v1/files/{file_id} retrieve: get /v1/files/{file_id}
delete: delete /v1/files/{file_id} delete: delete /v1/files/{file_id}
content: get /v1/files/{file_id}/content content: get /v1/files/{file_id}/content
models:
file: OpenAIFileObject
list_files_response: ListOpenAIFileResponse
delete_file_response: OpenAIFileDeleteResponse
batches: batches:
methods: methods:
create: post /v1/batches create: post /v1/batches
list: get /v1/batches list: get /v1/batches
retrieve: get /v1/batches/{batch_id} retrieve: get /v1/batches/{batch_id}
cancel: post /v1/batches/{batch_id}/cancel cancel: post /v1/batches/{batch_id}/cancel
alpha: alpha:
subresources: subresources:
inference: inference:
methods: methods:
rerank: post /v1alpha/inference/rerank rerank: post /v1alpha/inference/rerank
post_training: post_training:
models: models:
algorithm_config: AlgorithmConfig algorithm_config: AlgorithmConfig
@ -418,39 +445,35 @@ resources:
cancel: post /v1alpha/post-training/job/cancel cancel: post /v1alpha/post-training/job/cancel
status: get /v1alpha/post-training/job/status status: get /v1alpha/post-training/job/status
list: list:
paginated: false
endpoint: get /v1alpha/post-training/jobs endpoint: get /v1alpha/post-training/jobs
paginated: false
benchmarks: benchmarks:
methods:
retrieve: get /v1alpha/eval/benchmarks/{benchmark_id}
list:
endpoint: get /v1alpha/eval/benchmarks
paginated: false
register: post /v1alpha/eval/benchmarks
unregister: delete /v1alpha/eval/benchmarks/{benchmark_id}
models: models:
benchmark: Benchmark benchmark: Benchmark
list_benchmarks_response: ListBenchmarksResponse list_benchmarks_response: ListBenchmarksResponse
methods:
retrieve: get /v1alpha/eval/benchmarks/{benchmark_id}
list:
paginated: false
endpoint: get /v1alpha/eval/benchmarks
register: post /v1alpha/eval/benchmarks
unregister: delete /v1alpha/eval/benchmarks/{benchmark_id}
eval: eval:
models:
evaluate_response: EvaluateResponse
benchmark_config: BenchmarkConfig
job: Job
methods: methods:
evaluate_rows: post /v1alpha/eval/benchmarks/{benchmark_id}/evaluations evaluate_rows: post /v1alpha/eval/benchmarks/{benchmark_id}/evaluations
run_eval: post /v1alpha/eval/benchmarks/{benchmark_id}/jobs run_eval: post /v1alpha/eval/benchmarks/{benchmark_id}/jobs
evaluate_rows_alpha: post /v1alpha/eval/benchmarks/{benchmark_id}/evaluations evaluate_rows_alpha: post /v1alpha/eval/benchmarks/{benchmark_id}/evaluations
run_eval_alpha: post /v1alpha/eval/benchmarks/{benchmark_id}/jobs run_eval_alpha: post /v1alpha/eval/benchmarks/{benchmark_id}/jobs
subresources: subresources:
jobs: jobs:
methods: methods:
cancel: delete /v1alpha/eval/benchmarks/{benchmark_id}/jobs/{job_id} cancel: delete /v1alpha/eval/benchmarks/{benchmark_id}/jobs/{job_id}
status: get /v1alpha/eval/benchmarks/{benchmark_id}/jobs/{job_id} status: get /v1alpha/eval/benchmarks/{benchmark_id}/jobs/{job_id}
retrieve: get /v1alpha/eval/benchmarks/{benchmark_id}/jobs/{job_id}/result retrieve: get /v1alpha/eval/benchmarks/{benchmark_id}/jobs/{job_id}/result
models:
evaluate_response: EvaluateResponse
benchmark_config: BenchmarkConfig
job: Job
beta: beta:
subresources: subresources:
datasets: datasets:
@ -460,74 +483,8 @@ resources:
register: post /v1beta/datasets register: post /v1beta/datasets
retrieve: get /v1beta/datasets/{dataset_id} retrieve: get /v1beta/datasets/{dataset_id}
list: list:
endpoint: get /v1beta/datasets
paginated: false paginated: false
endpoint: get /v1beta/datasets
unregister: delete /v1beta/datasets/{dataset_id} unregister: delete /v1beta/datasets/{dataset_id}
iterrows: get /v1beta/datasetio/iterrows/{dataset_id} iterrows: get /v1beta/datasetio/iterrows/{dataset_id}
appendrows: post /v1beta/datasetio/append-rows/{dataset_id} appendrows: post /v1beta/datasetio/append-rows/{dataset_id}
settings:
license: MIT
unwrap_response_fields: [data]
file_header: |
Copyright (c) Meta Platforms, Inc. and affiliates.
All rights reserved.
This source code is licensed under the terms described in the LICENSE file in
the root directory of this source tree.
openapi:
transformations:
- command: mergeObject
reason: Better return_type using enum
args:
target:
- "$.components.schemas"
object:
ReturnType:
additionalProperties: false
properties:
type:
enum:
- string
- number
- boolean
- array
- object
- json
- union
- chat_completion_input
- completion_input
- agent_turn_input
required:
- type
type: object
- command: replaceProperties
reason: Replace return type properties with better model (see above)
args:
filter:
only:
- "$.components.schemas.ScoringFn.properties.return_type"
- "$.components.schemas.RegisterScoringFunctionRequest.properties.return_type"
value:
$ref: "#/components/schemas/ReturnType"
- command: oneOfToAnyOf
reason: Prism (mock server) doesn't like one of our requests as it technically matches multiple variants
# `readme` is used to configure the code snippets that will be rendered in the
# README.md of various SDKs. In particular, you can change the `headline`
# snippet's endpoint and the arguments to call it with.
readme:
example_requests:
default:
type: request
endpoint: post /v1/chat/completions
params: &ref_0 {}
headline:
type: request
endpoint: get /v1/models
params: *ref_0
pagination:
type: request
endpoint: post /v1/chat/completions
params: {}

View file

@ -1820,7 +1820,7 @@ paths:
content: content:
application/json: application/json:
schema: schema:
$ref: '#/components/schemas/RegisterScoringFunctionRequestLoose' $ref: '#/components/schemas/RegisterScoringFunctionRequest'
required: true required: true
deprecated: true deprecated: true
/v1/scoring-functions/{scoring_fn_id}: /v1/scoring-functions/{scoring_fn_id}:
@ -3310,7 +3310,7 @@ paths:
content: content:
application/json: application/json:
schema: schema:
$ref: '#/components/schemas/RegisterDatasetRequestLoose' $ref: '#/components/schemas/RegisterDatasetRequest'
required: true required: true
deprecated: true deprecated: true
/v1beta/datasets/{dataset_id}: /v1beta/datasets/{dataset_id}:
@ -3567,7 +3567,7 @@ paths:
content: content:
application/json: application/json:
schema: schema:
$ref: '#/components/schemas/BenchmarkConfig' $ref: '#/components/schemas/RunEvalRequest'
required: true required: true
/v1alpha/eval/benchmarks/{benchmark_id}/jobs/{job_id}: /v1alpha/eval/benchmarks/{benchmark_id}/jobs/{job_id}:
get: get:
@ -6739,9 +6739,10 @@ components:
type: array type: array
title: Output title: Output
parallel_tool_calls: parallel_tool_calls:
type: boolean anyOf:
title: Parallel Tool Calls - type: boolean
default: false - type: 'null'
default: true
previous_response_id: previous_response_id:
anyOf: anyOf:
- type: string - type: string
@ -7141,6 +7142,11 @@ components:
anyOf: anyOf:
- type: string - type: string
- type: 'null' - type: 'null'
parallel_tool_calls:
anyOf:
- type: boolean
- type: 'null'
default: true
previous_response_id: previous_response_id:
anyOf: anyOf:
- type: string - type: string
@ -7267,9 +7273,10 @@ components:
type: array type: array
title: Output title: Output
parallel_tool_calls: parallel_tool_calls:
type: boolean anyOf:
title: Parallel Tool Calls - type: boolean
default: false - type: 'null'
default: true
previous_response_id: previous_response_id:
anyOf: anyOf:
- type: string - type: string
@ -9871,9 +9878,21 @@ components:
title: Object title: Object
default: vector_store.file default: vector_store.file
attributes: attributes:
additionalProperties: true additionalProperties:
anyOf:
- type: string
maxLength: 512
- type: number
- type: boolean
title: string | number | boolean
propertyNames:
type: string
maxLength: 64
type: object type: object
maxProperties: 16
title: Attributes title: Attributes
description: Set of 16 key-value pairs that can be attached to an object. This can be useful for storing additional information about the object in a structured format, and querying for objects via API or the dashboard. Keys are strings with a maximum length of 64 characters. Values are strings with a maximum length of 512 characters, booleans, or numbers.
x-oaiTypeLabel: map
chunking_strategy: chunking_strategy:
oneOf: oneOf:
- $ref: '#/components/schemas/VectorStoreChunkingStrategyAuto' - $ref: '#/components/schemas/VectorStoreChunkingStrategyAuto'
@ -10602,6 +10621,14 @@ components:
- scores - scores
title: EvaluateResponse title: EvaluateResponse
description: The response from an evaluation. description: The response from an evaluation.
RunEvalRequest:
properties:
benchmark_config:
$ref: '#/components/schemas/BenchmarkConfig'
type: object
required:
- benchmark_config
title: RunEvalRequest
Job: Job:
properties: properties:
job_id: job_id:
@ -11185,6 +11212,67 @@ components:
- $ref: '#/components/schemas/CompletionInputType' - $ref: '#/components/schemas/CompletionInputType'
title: CompletionInputType title: CompletionInputType
title: StringType | ... (9 variants) title: StringType | ... (9 variants)
RegisterScoringFunctionRequest:
properties:
scoring_fn_id:
type: string
title: Scoring Fn Id
description:
type: string
title: Description
return_type:
anyOf:
- $ref: '#/components/schemas/StringType'
title: StringType
- $ref: '#/components/schemas/NumberType'
title: NumberType
- $ref: '#/components/schemas/BooleanType'
title: BooleanType
- $ref: '#/components/schemas/ArrayType'
title: ArrayType
- $ref: '#/components/schemas/ObjectType'
title: ObjectType
- $ref: '#/components/schemas/JsonType'
title: JsonType
- $ref: '#/components/schemas/UnionType'
title: UnionType
- $ref: '#/components/schemas/ChatCompletionInputType'
title: ChatCompletionInputType
- $ref: '#/components/schemas/CompletionInputType'
title: CompletionInputType
title: StringType | ... (9 variants)
provider_scoring_fn_id:
anyOf:
- type: string
- type: 'null'
provider_id:
anyOf:
- type: string
- type: 'null'
params:
anyOf:
- oneOf:
- $ref: '#/components/schemas/LLMAsJudgeScoringFnParams'
title: LLMAsJudgeScoringFnParams
- $ref: '#/components/schemas/RegexParserScoringFnParams'
title: RegexParserScoringFnParams
- $ref: '#/components/schemas/BasicScoringFnParams'
title: BasicScoringFnParams
discriminator:
propertyName: type
mapping:
basic: '#/components/schemas/BasicScoringFnParams'
llm_as_judge: '#/components/schemas/LLMAsJudgeScoringFnParams'
regex_parser: '#/components/schemas/RegexParserScoringFnParams'
title: LLMAsJudgeScoringFnParams | RegexParserScoringFnParams | BasicScoringFnParams
- type: 'null'
title: Params
type: object
required:
- scoring_fn_id
- description
- return_type
title: RegisterScoringFunctionRequest
RegisterShieldRequest: RegisterShieldRequest:
properties: properties:
shield_id: shield_id:
@ -11243,6 +11331,31 @@ components:
- $ref: '#/components/schemas/RowsDataSource' - $ref: '#/components/schemas/RowsDataSource'
title: RowsDataSource title: RowsDataSource
title: URIDataSource | RowsDataSource title: URIDataSource | RowsDataSource
RegisterDatasetRequest:
properties:
purpose:
$ref: '#/components/schemas/DatasetPurpose'
source:
anyOf:
- $ref: '#/components/schemas/URIDataSource'
title: URIDataSource
- $ref: '#/components/schemas/RowsDataSource'
title: RowsDataSource
title: URIDataSource | RowsDataSource
metadata:
anyOf:
- additionalProperties: true
type: object
- type: 'null'
dataset_id:
anyOf:
- type: string
- type: 'null'
type: object
required:
- purpose
- source
title: RegisterDatasetRequest
RegisterBenchmarkRequest: RegisterBenchmarkRequest:
properties: properties:
benchmark_id: benchmark_id:
@ -11979,41 +12092,6 @@ components:
required: required:
- reasoning_tokens - reasoning_tokens
title: OutputTokensDetails title: OutputTokensDetails
RegisterDatasetRequestLoose:
properties:
purpose:
title: Purpose
source:
title: Source
metadata:
title: Metadata
dataset_id:
title: Dataset Id
type: object
required:
- purpose
- source
title: RegisterDatasetRequestLoose
RegisterScoringFunctionRequestLoose:
properties:
scoring_fn_id:
title: Scoring Fn Id
description:
title: Description
return_type:
title: Return Type
provider_scoring_fn_id:
title: Provider Scoring Fn Id
provider_id:
title: Provider Id
params:
title: Params
type: object
required:
- scoring_fn_id
- description
- return_type
title: RegisterScoringFunctionRequestLoose
SearchRankingOptions: SearchRankingOptions:
properties: properties:
ranker: ranker:

View file

@ -104,23 +104,19 @@ client.toolgroups.register(
) )
``` ```
Note that most of the more useful MCP servers need you to authenticate with them. Many of them use OAuth2.0 for authentication. You can provide authorization headers to send to the MCP server using the "Provider Data" abstraction provided by Llama Stack. When making an agent call, Note that most of the more useful MCP servers need you to authenticate with them. Many of them use OAuth2.0 for authentication. You can provide the authorization token when creating the Agent:
```python ```python
agent = Agent( agent = Agent(
..., ...,
tools=["mcp::deepwiki"], tools=[
extra_headers={
"X-LlamaStack-Provider-Data": json.dumps(
{ {
"mcp_headers": { "type": "mcp",
"http://mcp.deepwiki.com/sse": { "server_url": "https://mcp.deepwiki.com/sse",
"Authorization": "Bearer <your_access_token>", "server_label": "mcp::deepwiki",
}, "authorization": "<your_access_token>", # OAuth token (without "Bearer " prefix)
},
} }
), ],
},
) )
agent.create_turn(...) agent.create_turn(...)
``` ```

View file

@ -1,7 +1,8 @@
--- ---
description: "Agents description: |
Agents
APIs for creating and interacting with agentic systems." APIs for creating and interacting with agentic systems.
sidebar_label: Agents sidebar_label: Agents
title: Agents title: Agents
--- ---

View file

@ -14,7 +14,7 @@ Meta's reference implementation of an agent system that can use tools, access ve
| Field | Type | Required | Default | Description | | Field | Type | Required | Default | Description |
|-------|------|----------|---------|-------------| |-------|------|----------|---------|-------------|
| `persistence` | `<class 'inline.agents.meta_reference.config.AgentPersistenceConfig'>` | No | | | | `persistence` | `AgentPersistenceConfig` | No | | |
## Sample Configuration ## Sample Configuration

View file

@ -1,5 +1,6 @@
--- ---
description: "The Batches API enables efficient processing of multiple requests in a single operation, description: |
The Batches API enables efficient processing of multiple requests in a single operation,
particularly useful for processing large datasets, batch evaluation workflows, and particularly useful for processing large datasets, batch evaluation workflows, and
cost-effective inference at scale. cost-effective inference at scale.
@ -8,7 +9,7 @@ description: "The Batches API enables efficient processing of multiple requests
This API provides the following extensions: This API provides the following extensions:
- idempotent batch creation - idempotent batch creation
Note: This API is currently under active development and may undergo changes." Note: This API is currently under active development and may undergo changes.
sidebar_label: Batches sidebar_label: Batches
title: Batches title: Batches
--- ---

View file

@ -14,9 +14,9 @@ Reference implementation of batches API with KVStore persistence.
| Field | Type | Required | Default | Description | | Field | Type | Required | Default | Description |
|-------|------|----------|---------|-------------| |-------|------|----------|---------|-------------|
| `kvstore` | `<class 'llama_stack.core.storage.datatypes.KVStoreReference'>` | No | | Configuration for the key-value store backend. | | `kvstore` | `KVStoreReference` | No | | Configuration for the key-value store backend. |
| `max_concurrent_batches` | `<class 'int'>` | No | 1 | Maximum number of concurrent batches to process simultaneously. | | `max_concurrent_batches` | `int` | No | 1 | Maximum number of concurrent batches to process simultaneously. |
| `max_concurrent_requests_per_batch` | `<class 'int'>` | No | 10 | Maximum number of concurrent requests to process per batch. | | `max_concurrent_requests_per_batch` | `int` | No | 10 | Maximum number of concurrent requests to process per batch. |
## Sample Configuration ## Sample Configuration

View file

@ -14,7 +14,7 @@ Local filesystem-based dataset I/O provider for reading and writing datasets to
| Field | Type | Required | Default | Description | | Field | Type | Required | Default | Description |
|-------|------|----------|---------|-------------| |-------|------|----------|---------|-------------|
| `kvstore` | `<class 'llama_stack.core.storage.datatypes.KVStoreReference'>` | No | | | | `kvstore` | `KVStoreReference` | No | | |
## Sample Configuration ## Sample Configuration

View file

@ -14,7 +14,7 @@ HuggingFace datasets provider for accessing and managing datasets from the Huggi
| Field | Type | Required | Default | Description | | Field | Type | Required | Default | Description |
|-------|------|----------|---------|-------------| |-------|------|----------|---------|-------------|
| `kvstore` | `<class 'llama_stack.core.storage.datatypes.KVStoreReference'>` | No | | | | `kvstore` | `KVStoreReference` | No | | |
## Sample Configuration ## Sample Configuration

View file

@ -17,7 +17,7 @@ NVIDIA's dataset I/O provider for accessing datasets from NVIDIA's data platform
| `api_key` | `str \| None` | No | | The NVIDIA API key. | | `api_key` | `str \| None` | No | | The NVIDIA API key. |
| `dataset_namespace` | `str \| None` | No | default | The NVIDIA dataset namespace. | | `dataset_namespace` | `str \| None` | No | default | The NVIDIA dataset namespace. |
| `project_id` | `str \| None` | No | test-project | The NVIDIA project ID. | | `project_id` | `str \| None` | No | test-project | The NVIDIA project ID. |
| `datasets_url` | `<class 'str'>` | No | http://nemo.test | Base URL for the NeMo Dataset API | | `datasets_url` | `str` | No | http://nemo.test | Base URL for the NeMo Dataset API |
## Sample Configuration ## Sample Configuration

View file

@ -1,7 +1,8 @@
--- ---
description: "Evaluations description: |
Evaluations
Llama Stack Evaluation API for running evaluations on model and agent candidates." Llama Stack Evaluation API for running evaluations on model and agent candidates.
sidebar_label: Eval sidebar_label: Eval
title: Eval title: Eval
--- ---

View file

@ -14,7 +14,7 @@ Meta's reference implementation of evaluation tasks with support for multiple la
| Field | Type | Required | Default | Description | | Field | Type | Required | Default | Description |
|-------|------|----------|---------|-------------| |-------|------|----------|---------|-------------|
| `kvstore` | `<class 'llama_stack.core.storage.datatypes.KVStoreReference'>` | No | | | | `kvstore` | `KVStoreReference` | No | | |
## Sample Configuration ## Sample Configuration

View file

@ -14,7 +14,7 @@ NVIDIA's evaluation provider for running evaluation tasks on NVIDIA's platform.
| Field | Type | Required | Default | Description | | Field | Type | Required | Default | Description |
|-------|------|----------|---------|-------------| |-------|------|----------|---------|-------------|
| `evaluator_url` | `<class 'str'>` | No | http://0.0.0.0:7331 | The url for accessing the evaluator service | | `evaluator_url` | `str` | No | http://0.0.0.0:7331 | The url for accessing the evaluator service |
## Sample Configuration ## Sample Configuration

View file

@ -1,7 +1,8 @@
--- ---
description: "Files description: |
Files
This API is used to upload documents that can be used with other Llama Stack APIs." This API is used to upload documents that can be used with other Llama Stack APIs.
sidebar_label: Files sidebar_label: Files
title: Files title: Files
--- ---

View file

@ -14,9 +14,9 @@ Local filesystem-based file storage provider for managing files and documents lo
| Field | Type | Required | Default | Description | | Field | Type | Required | Default | Description |
|-------|------|----------|---------|-------------| |-------|------|----------|---------|-------------|
| `storage_dir` | `<class 'str'>` | No | | Directory to store uploaded files | | `storage_dir` | `str` | No | | Directory to store uploaded files |
| `metadata_store` | `<class 'llama_stack.core.storage.datatypes.SqlStoreReference'>` | No | | SQL store configuration for file metadata | | `metadata_store` | `SqlStoreReference` | No | | SQL store configuration for file metadata |
| `ttl_secs` | `<class 'int'>` | No | 31536000 | | | `ttl_secs` | `int` | No | 31536000 | |
## Sample Configuration ## Sample Configuration

View file

@ -14,8 +14,8 @@ OpenAI Files API provider for managing files through OpenAI's native file storag
| Field | Type | Required | Default | Description | | Field | Type | Required | Default | Description |
|-------|------|----------|---------|-------------| |-------|------|----------|---------|-------------|
| `api_key` | `<class 'str'>` | No | | OpenAI API key for authentication | | `api_key` | `str` | No | | OpenAI API key for authentication |
| `metadata_store` | `<class 'llama_stack.core.storage.datatypes.SqlStoreReference'>` | No | | SQL store configuration for file metadata | | `metadata_store` | `SqlStoreReference` | No | | SQL store configuration for file metadata |
## Sample Configuration ## Sample Configuration

View file

@ -14,13 +14,13 @@ AWS S3-based file storage provider for scalable cloud file management with metad
| Field | Type | Required | Default | Description | | Field | Type | Required | Default | Description |
|-------|------|----------|---------|-------------| |-------|------|----------|---------|-------------|
| `bucket_name` | `<class 'str'>` | No | | S3 bucket name to store files | | `bucket_name` | `str` | No | | S3 bucket name to store files |
| `region` | `<class 'str'>` | No | us-east-1 | AWS region where the bucket is located | | `region` | `str` | No | us-east-1 | AWS region where the bucket is located |
| `aws_access_key_id` | `str \| None` | No | | AWS access key ID (optional if using IAM roles) | | `aws_access_key_id` | `str \| None` | No | | AWS access key ID (optional if using IAM roles) |
| `aws_secret_access_key` | `str \| None` | No | | AWS secret access key (optional if using IAM roles) | | `aws_secret_access_key` | `str \| None` | No | | AWS secret access key (optional if using IAM roles) |
| `endpoint_url` | `str \| None` | No | | Custom S3 endpoint URL (for MinIO, LocalStack, etc.) | | `endpoint_url` | `str \| None` | No | | Custom S3 endpoint URL (for MinIO, LocalStack, etc.) |
| `auto_create_bucket` | `<class 'bool'>` | No | False | Automatically create the S3 bucket if it doesn't exist | | `auto_create_bucket` | `bool` | No | False | Automatically create the S3 bucket if it doesn't exist |
| `metadata_store` | `<class 'llama_stack.core.storage.datatypes.SqlStoreReference'>` | No | | SQL store configuration for file metadata | | `metadata_store` | `SqlStoreReference` | No | | SQL store configuration for file metadata |
## Sample Configuration ## Sample Configuration

View file

@ -1,12 +1,13 @@
--- ---
description: "Inference description: |
Inference
Llama Stack Inference API for generating completions, chat completions, and embeddings. Llama Stack Inference API for generating completions, chat completions, and embeddings.
This API provides the raw interface to the underlying models. Three kinds of models are supported: This API provides the raw interface to the underlying models. Three kinds of models are supported:
- LLM models: these models generate \"raw\" and \"chat\" (conversational) completions. - LLM models: these models generate "raw" and "chat" (conversational) completions.
- Embedding models: these models generate embeddings to be used for semantic search. - Embedding models: these models generate embeddings to be used for semantic search.
- Rerank models: these models reorder the documents based on their relevance to a query." - Rerank models: these models reorder the documents based on their relevance to a query.
sidebar_label: Inference sidebar_label: Inference
title: Inference title: Inference
--- ---

View file

@ -16,12 +16,12 @@ Meta's reference implementation of inference with support for various model form
|-------|------|----------|---------|-------------| |-------|------|----------|---------|-------------|
| `model` | `str \| None` | No | | | | `model` | `str \| None` | No | | |
| `torch_seed` | `int \| None` | No | | | | `torch_seed` | `int \| None` | No | | |
| `max_seq_len` | `<class 'int'>` | No | 4096 | | | `max_seq_len` | `int` | No | 4096 | |
| `max_batch_size` | `<class 'int'>` | No | 1 | | | `max_batch_size` | `int` | No | 1 | |
| `model_parallel_size` | `int \| None` | No | | | | `model_parallel_size` | `int \| None` | No | | |
| `create_distributed_process_group` | `<class 'bool'>` | No | True | | | `create_distributed_process_group` | `bool` | No | True | |
| `checkpoint_dir` | `str \| None` | No | | | | `checkpoint_dir` | `str \| None` | No | | |
| `quantization` | `Bf16QuantizationConfig \| Fp8QuantizationConfig \| Int4QuantizationConfig, annotation=NoneType, required=True, discriminator='type'` | No | | | | `quantization` | `Bf16QuantizationConfig \| Fp8QuantizationConfig \| Int4QuantizationConfig \| None` | No | | |
## Sample Configuration ## Sample Configuration

View file

@ -14,9 +14,9 @@ Anthropic inference provider for accessing Claude models and Anthropic's AI serv
| Field | Type | Required | Default | Description | | Field | Type | Required | Default | Description |
|-------|------|----------|---------|-------------| |-------|------|----------|---------|-------------|
| `allowed_models` | `list[str \| None` | No | | List of models that should be registered with the model registry. If None, all models are allowed. | | `allowed_models` | `list[str] \| None` | No | | List of models that should be registered with the model registry. If None, all models are allowed. |
| `refresh_models` | `<class 'bool'>` | No | False | Whether to refresh models periodically from the provider | | `refresh_models` | `bool` | No | False | Whether to refresh models periodically from the provider |
| `api_key` | `pydantic.types.SecretStr \| None` | No | | Authentication credential for the provider | | `api_key` | `SecretStr \| None` | No | | Authentication credential for the provider |
## Sample Configuration ## Sample Configuration

View file

@ -21,10 +21,10 @@ https://learn.microsoft.com/en-us/azure/ai-foundry/openai/overview
| Field | Type | Required | Default | Description | | Field | Type | Required | Default | Description |
|-------|------|----------|---------|-------------| |-------|------|----------|---------|-------------|
| `allowed_models` | `list[str \| None` | No | | List of models that should be registered with the model registry. If None, all models are allowed. | | `allowed_models` | `list[str] \| None` | No | | List of models that should be registered with the model registry. If None, all models are allowed. |
| `refresh_models` | `<class 'bool'>` | No | False | Whether to refresh models periodically from the provider | | `refresh_models` | `bool` | No | False | Whether to refresh models periodically from the provider |
| `api_key` | `pydantic.types.SecretStr \| None` | No | | Authentication credential for the provider | | `api_key` | `SecretStr \| None` | No | | Authentication credential for the provider |
| `api_base` | `<class 'pydantic.networks.HttpUrl'>` | No | | Azure API base for Azure (e.g., https://your-resource-name.openai.azure.com) | | `base_url` | `HttpUrl \| None` | No | | Azure API base for Azure (e.g., https://your-resource-name.openai.azure.com/openai/v1) |
| `api_version` | `str \| None` | No | | Azure API version for Azure (e.g., 2024-12-01-preview) | | `api_version` | `str \| None` | No | | Azure API version for Azure (e.g., 2024-12-01-preview) |
| `api_type` | `str \| None` | No | azure | Azure API type for Azure (e.g., azure) | | `api_type` | `str \| None` | No | azure | Azure API type for Azure (e.g., azure) |
@ -32,7 +32,7 @@ https://learn.microsoft.com/en-us/azure/ai-foundry/openai/overview
```yaml ```yaml
api_key: ${env.AZURE_API_KEY:=} api_key: ${env.AZURE_API_KEY:=}
api_base: ${env.AZURE_API_BASE:=} base_url: ${env.AZURE_API_BASE:=}
api_version: ${env.AZURE_API_VERSION:=} api_version: ${env.AZURE_API_VERSION:=}
api_type: ${env.AZURE_API_TYPE:=} api_type: ${env.AZURE_API_TYPE:=}
``` ```

View file

@ -14,14 +14,14 @@ AWS Bedrock inference provider using OpenAI compatible endpoint.
| Field | Type | Required | Default | Description | | Field | Type | Required | Default | Description |
|-------|------|----------|---------|-------------| |-------|------|----------|---------|-------------|
| `allowed_models` | `list[str \| None` | No | | List of models that should be registered with the model registry. If None, all models are allowed. | | `allowed_models` | `list[str] \| None` | No | | List of models that should be registered with the model registry. If None, all models are allowed. |
| `refresh_models` | `<class 'bool'>` | No | False | Whether to refresh models periodically from the provider | | `refresh_models` | `bool` | No | False | Whether to refresh models periodically from the provider |
| `api_key` | `pydantic.types.SecretStr \| None` | No | | Authentication credential for the provider | | `api_key` | `SecretStr \| None` | No | | Authentication credential for the provider |
| `region_name` | `<class 'str'>` | No | us-east-2 | AWS Region for the Bedrock Runtime endpoint | | `region_name` | `str` | No | us-east-2 | AWS Region for the Bedrock Runtime endpoint |
## Sample Configuration ## Sample Configuration
```yaml ```yaml
api_key: ${env.AWS_BEDROCK_API_KEY:=} api_key: ${env.AWS_BEARER_TOKEN_BEDROCK:=}
region_name: ${env.AWS_DEFAULT_REGION:=us-east-2} region_name: ${env.AWS_DEFAULT_REGION:=us-east-2}
``` ```

View file

@ -14,14 +14,14 @@ Cerebras inference provider for running models on Cerebras Cloud platform.
| Field | Type | Required | Default | Description | | Field | Type | Required | Default | Description |
|-------|------|----------|---------|-------------| |-------|------|----------|---------|-------------|
| `allowed_models` | `list[str \| None` | No | | List of models that should be registered with the model registry. If None, all models are allowed. | | `allowed_models` | `list[str] \| None` | No | | List of models that should be registered with the model registry. If None, all models are allowed. |
| `refresh_models` | `<class 'bool'>` | No | False | Whether to refresh models periodically from the provider | | `refresh_models` | `bool` | No | False | Whether to refresh models periodically from the provider |
| `api_key` | `pydantic.types.SecretStr \| None` | No | | Authentication credential for the provider | | `api_key` | `SecretStr \| None` | No | | Authentication credential for the provider |
| `base_url` | `<class 'str'>` | No | https://api.cerebras.ai | Base URL for the Cerebras API | | `base_url` | `HttpUrl \| None` | No | https://api.cerebras.ai/v1 | Base URL for the Cerebras API |
## Sample Configuration ## Sample Configuration
```yaml ```yaml
base_url: https://api.cerebras.ai base_url: https://api.cerebras.ai/v1
api_key: ${env.CEREBRAS_API_KEY:=} api_key: ${env.CEREBRAS_API_KEY:=}
``` ```

View file

@ -14,14 +14,14 @@ Databricks inference provider for running models on Databricks' unified analytic
| Field | Type | Required | Default | Description | | Field | Type | Required | Default | Description |
|-------|------|----------|---------|-------------| |-------|------|----------|---------|-------------|
| `allowed_models` | `list[str \| None` | No | | List of models that should be registered with the model registry. If None, all models are allowed. | | `allowed_models` | `list[str] \| None` | No | | List of models that should be registered with the model registry. If None, all models are allowed. |
| `refresh_models` | `<class 'bool'>` | No | False | Whether to refresh models periodically from the provider | | `refresh_models` | `bool` | No | False | Whether to refresh models periodically from the provider |
| `api_token` | `pydantic.types.SecretStr \| None` | No | | The Databricks API token | | `api_token` | `SecretStr \| None` | No | | The Databricks API token |
| `url` | `str \| None` | No | | The URL for the Databricks model serving endpoint | | `base_url` | `HttpUrl \| None` | No | | The URL for the Databricks model serving endpoint (should include /serving-endpoints path) |
## Sample Configuration ## Sample Configuration
```yaml ```yaml
url: ${env.DATABRICKS_HOST:=} base_url: ${env.DATABRICKS_HOST:=}
api_token: ${env.DATABRICKS_TOKEN:=} api_token: ${env.DATABRICKS_TOKEN:=}
``` ```

View file

@ -14,14 +14,14 @@ Fireworks AI inference provider for Llama models and other AI models on the Fire
| Field | Type | Required | Default | Description | | Field | Type | Required | Default | Description |
|-------|------|----------|---------|-------------| |-------|------|----------|---------|-------------|
| `allowed_models` | `list[str \| None` | No | | List of models that should be registered with the model registry. If None, all models are allowed. | | `allowed_models` | `list[str] \| None` | No | | List of models that should be registered with the model registry. If None, all models are allowed. |
| `refresh_models` | `<class 'bool'>` | No | False | Whether to refresh models periodically from the provider | | `refresh_models` | `bool` | No | False | Whether to refresh models periodically from the provider |
| `api_key` | `pydantic.types.SecretStr \| None` | No | | Authentication credential for the provider | | `api_key` | `SecretStr \| None` | No | | Authentication credential for the provider |
| `url` | `<class 'str'>` | No | https://api.fireworks.ai/inference/v1 | The URL for the Fireworks server | | `base_url` | `HttpUrl \| None` | No | https://api.fireworks.ai/inference/v1 | The URL for the Fireworks server |
## Sample Configuration ## Sample Configuration
```yaml ```yaml
url: https://api.fireworks.ai/inference/v1 base_url: https://api.fireworks.ai/inference/v1
api_key: ${env.FIREWORKS_API_KEY:=} api_key: ${env.FIREWORKS_API_KEY:=}
``` ```

View file

@ -14,9 +14,9 @@ Google Gemini inference provider for accessing Gemini models and Google's AI ser
| Field | Type | Required | Default | Description | | Field | Type | Required | Default | Description |
|-------|------|----------|---------|-------------| |-------|------|----------|---------|-------------|
| `allowed_models` | `list[str \| None` | No | | List of models that should be registered with the model registry. If None, all models are allowed. | | `allowed_models` | `list[str] \| None` | No | | List of models that should be registered with the model registry. If None, all models are allowed. |
| `refresh_models` | `<class 'bool'>` | No | False | Whether to refresh models periodically from the provider | | `refresh_models` | `bool` | No | False | Whether to refresh models periodically from the provider |
| `api_key` | `pydantic.types.SecretStr \| None` | No | | Authentication credential for the provider | | `api_key` | `SecretStr \| None` | No | | Authentication credential for the provider |
## Sample Configuration ## Sample Configuration

View file

@ -14,14 +14,14 @@ Groq inference provider for ultra-fast inference using Groq's LPU technology.
| Field | Type | Required | Default | Description | | Field | Type | Required | Default | Description |
|-------|------|----------|---------|-------------| |-------|------|----------|---------|-------------|
| `allowed_models` | `list[str \| None` | No | | List of models that should be registered with the model registry. If None, all models are allowed. | | `allowed_models` | `list[str] \| None` | No | | List of models that should be registered with the model registry. If None, all models are allowed. |
| `refresh_models` | `<class 'bool'>` | No | False | Whether to refresh models periodically from the provider | | `refresh_models` | `bool` | No | False | Whether to refresh models periodically from the provider |
| `api_key` | `pydantic.types.SecretStr \| None` | No | | Authentication credential for the provider | | `api_key` | `SecretStr \| None` | No | | Authentication credential for the provider |
| `url` | `<class 'str'>` | No | https://api.groq.com | The URL for the Groq AI server | | `base_url` | `HttpUrl \| None` | No | https://api.groq.com/openai/v1 | The URL for the Groq AI server |
## Sample Configuration ## Sample Configuration
```yaml ```yaml
url: https://api.groq.com base_url: https://api.groq.com/openai/v1
api_key: ${env.GROQ_API_KEY:=} api_key: ${env.GROQ_API_KEY:=}
``` ```

View file

@ -14,8 +14,8 @@ HuggingFace Inference Endpoints provider for dedicated model serving.
| Field | Type | Required | Default | Description | | Field | Type | Required | Default | Description |
|-------|------|----------|---------|-------------| |-------|------|----------|---------|-------------|
| `endpoint_name` | `<class 'str'>` | No | | The name of the Hugging Face Inference Endpoint in the format of '&#123;namespace&#125;/&#123;endpoint_name&#125;' (e.g. 'my-cool-org/meta-llama-3-1-8b-instruct-rce'). Namespace is optional and will default to the user account if not provided. | | `endpoint_name` | `str` | No | | The name of the Hugging Face Inference Endpoint in the format of '&#123;namespace&#125;/&#123;endpoint_name&#125;' (e.g. 'my-cool-org/meta-llama-3-1-8b-instruct-rce'). Namespace is optional and will default to the user account if not provided. |
| `api_token` | `pydantic.types.SecretStr \| None` | No | | Your Hugging Face user access token (will default to locally saved token if not provided) | | `api_token` | `SecretStr \| None` | No | | Your Hugging Face user access token (will default to locally saved token if not provided) |
## Sample Configuration ## Sample Configuration

View file

@ -14,8 +14,8 @@ HuggingFace Inference API serverless provider for on-demand model inference.
| Field | Type | Required | Default | Description | | Field | Type | Required | Default | Description |
|-------|------|----------|---------|-------------| |-------|------|----------|---------|-------------|
| `huggingface_repo` | `<class 'str'>` | No | | The model ID of the model on the Hugging Face Hub (e.g. 'meta-llama/Meta-Llama-3.1-70B-Instruct') | | `huggingface_repo` | `str` | No | | The model ID of the model on the Hugging Face Hub (e.g. 'meta-llama/Meta-Llama-3.1-70B-Instruct') |
| `api_token` | `pydantic.types.SecretStr \| None` | No | | Your Hugging Face user access token (will default to locally saved token if not provided) | | `api_token` | `SecretStr \| None` | No | | Your Hugging Face user access token (will default to locally saved token if not provided) |
## Sample Configuration ## Sample Configuration

View file

@ -14,14 +14,14 @@ Llama OpenAI-compatible provider for using Llama models with OpenAI API format.
| Field | Type | Required | Default | Description | | Field | Type | Required | Default | Description |
|-------|------|----------|---------|-------------| |-------|------|----------|---------|-------------|
| `allowed_models` | `list[str \| None` | No | | List of models that should be registered with the model registry. If None, all models are allowed. | | `allowed_models` | `list[str] \| None` | No | | List of models that should be registered with the model registry. If None, all models are allowed. |
| `refresh_models` | `<class 'bool'>` | No | False | Whether to refresh models periodically from the provider | | `refresh_models` | `bool` | No | False | Whether to refresh models periodically from the provider |
| `api_key` | `pydantic.types.SecretStr \| None` | No | | Authentication credential for the provider | | `api_key` | `SecretStr \| None` | No | | Authentication credential for the provider |
| `openai_compat_api_base` | `<class 'str'>` | No | https://api.llama.com/compat/v1/ | The URL for the Llama API server | | `base_url` | `HttpUrl \| None` | No | https://api.llama.com/compat/v1/ | The URL for the Llama API server |
## Sample Configuration ## Sample Configuration
```yaml ```yaml
openai_compat_api_base: https://api.llama.com/compat/v1/ base_url: https://api.llama.com/compat/v1/
api_key: ${env.LLAMA_API_KEY} api_key: ${env.LLAMA_API_KEY}
``` ```

View file

@ -14,18 +14,16 @@ NVIDIA inference provider for accessing NVIDIA NIM models and AI services.
| Field | Type | Required | Default | Description | | Field | Type | Required | Default | Description |
|-------|------|----------|---------|-------------| |-------|------|----------|---------|-------------|
| `allowed_models` | `list[str \| None` | No | | List of models that should be registered with the model registry. If None, all models are allowed. | | `allowed_models` | `list[str] \| None` | No | | List of models that should be registered with the model registry. If None, all models are allowed. |
| `refresh_models` | `<class 'bool'>` | No | False | Whether to refresh models periodically from the provider | | `refresh_models` | `bool` | No | False | Whether to refresh models periodically from the provider |
| `api_key` | `pydantic.types.SecretStr \| None` | No | | Authentication credential for the provider | | `api_key` | `SecretStr \| None` | No | | Authentication credential for the provider |
| `url` | `<class 'str'>` | No | https://integrate.api.nvidia.com | A base url for accessing the NVIDIA NIM | | `base_url` | `HttpUrl \| None` | No | https://integrate.api.nvidia.com/v1 | A base url for accessing the NVIDIA NIM |
| `timeout` | `<class 'int'>` | No | 60 | Timeout for the HTTP requests | | `timeout` | `int` | No | 60 | Timeout for the HTTP requests |
| `append_api_version` | `<class 'bool'>` | No | True | When set to false, the API version will not be appended to the base_url. By default, it is true. | | `rerank_model_to_url` | `dict[str, str]` | No | `{'nv-rerank-qa-mistral-4b:1': 'https://ai.api.nvidia.com/v1/retrieval/nvidia/reranking', 'nvidia/nv-rerankqa-mistral-4b-v3': 'https://ai.api.nvidia.com/v1/retrieval/nvidia/nv-rerankqa-mistral-4b-v3/reranking', 'nvidia/llama-3.2-nv-rerankqa-1b-v2': 'https://ai.api.nvidia.com/v1/retrieval/nvidia/llama-3_2-nv-rerankqa-1b-v2/reranking'}` | Mapping of rerank model identifiers to their API endpoints. |
| `rerank_model_to_url` | `dict[str, str` | No | `{'nv-rerank-qa-mistral-4b:1': 'https://ai.api.nvidia.com/v1/retrieval/nvidia/reranking', 'nvidia/nv-rerankqa-mistral-4b-v3': 'https://ai.api.nvidia.com/v1/retrieval/nvidia/nv-rerankqa-mistral-4b-v3/reranking', 'nvidia/llama-3.2-nv-rerankqa-1b-v2': 'https://ai.api.nvidia.com/v1/retrieval/nvidia/llama-3_2-nv-rerankqa-1b-v2/reranking'}` | Mapping of rerank model identifiers to their API endpoints. |
## Sample Configuration ## Sample Configuration
```yaml ```yaml
url: ${env.NVIDIA_BASE_URL:=https://integrate.api.nvidia.com} base_url: ${env.NVIDIA_BASE_URL:=https://integrate.api.nvidia.com/v1}
api_key: ${env.NVIDIA_API_KEY:=} api_key: ${env.NVIDIA_API_KEY:=}
append_api_version: ${env.NVIDIA_APPEND_API_VERSION:=True}
``` ```

View file

@ -21,14 +21,14 @@ https://docs.oracle.com/en-us/iaas/Content/generative-ai/home.htm
| Field | Type | Required | Default | Description | | Field | Type | Required | Default | Description |
|-------|------|----------|---------|-------------| |-------|------|----------|---------|-------------|
| `allowed_models` | `list[str \| None` | No | | List of models that should be registered with the model registry. If None, all models are allowed. | | `allowed_models` | `list[str] \| None` | No | | List of models that should be registered with the model registry. If None, all models are allowed. |
| `refresh_models` | `<class 'bool'>` | No | False | Whether to refresh models periodically from the provider | | `refresh_models` | `bool` | No | False | Whether to refresh models periodically from the provider |
| `api_key` | `pydantic.types.SecretStr \| None` | No | | Authentication credential for the provider | | `api_key` | `SecretStr \| None` | No | | Authentication credential for the provider |
| `oci_auth_type` | `<class 'str'>` | No | instance_principal | OCI authentication type (must be one of: instance_principal, config_file) | | `oci_auth_type` | `str` | No | instance_principal | OCI authentication type (must be one of: instance_principal, config_file) |
| `oci_region` | `<class 'str'>` | No | us-ashburn-1 | OCI region (e.g., us-ashburn-1) | | `oci_region` | `str` | No | us-ashburn-1 | OCI region (e.g., us-ashburn-1) |
| `oci_compartment_id` | `<class 'str'>` | No | | OCI compartment ID for the Generative AI service | | `oci_compartment_id` | `str` | No | | OCI compartment ID for the Generative AI service |
| `oci_config_file_path` | `<class 'str'>` | No | ~/.oci/config | OCI config file path (required if oci_auth_type is config_file) | | `oci_config_file_path` | `str` | No | ~/.oci/config | OCI config file path (required if oci_auth_type is config_file) |
| `oci_config_profile` | `<class 'str'>` | No | DEFAULT | OCI config profile (required if oci_auth_type is config_file) | | `oci_config_profile` | `str` | No | DEFAULT | OCI config profile (required if oci_auth_type is config_file) |
## Sample Configuration ## Sample Configuration

View file

@ -14,12 +14,12 @@ Ollama inference provider for running local models through the Ollama runtime.
| Field | Type | Required | Default | Description | | Field | Type | Required | Default | Description |
|-------|------|----------|---------|-------------| |-------|------|----------|---------|-------------|
| `allowed_models` | `list[str \| None` | No | | List of models that should be registered with the model registry. If None, all models are allowed. | | `allowed_models` | `list[str] \| None` | No | | List of models that should be registered with the model registry. If None, all models are allowed. |
| `refresh_models` | `<class 'bool'>` | No | False | Whether to refresh models periodically from the provider | | `refresh_models` | `bool` | No | False | Whether to refresh models periodically from the provider |
| `url` | `<class 'str'>` | No | http://localhost:11434 | | | `base_url` | `HttpUrl \| None` | No | http://localhost:11434/v1 | |
## Sample Configuration ## Sample Configuration
```yaml ```yaml
url: ${env.OLLAMA_URL:=http://localhost:11434} base_url: ${env.OLLAMA_URL:=http://localhost:11434/v1}
``` ```

View file

@ -14,10 +14,10 @@ OpenAI inference provider for accessing GPT models and other OpenAI services.
| Field | Type | Required | Default | Description | | Field | Type | Required | Default | Description |
|-------|------|----------|---------|-------------| |-------|------|----------|---------|-------------|
| `allowed_models` | `list[str \| None` | No | | List of models that should be registered with the model registry. If None, all models are allowed. | | `allowed_models` | `list[str] \| None` | No | | List of models that should be registered with the model registry. If None, all models are allowed. |
| `refresh_models` | `<class 'bool'>` | No | False | Whether to refresh models periodically from the provider | | `refresh_models` | `bool` | No | False | Whether to refresh models periodically from the provider |
| `api_key` | `pydantic.types.SecretStr \| None` | No | | Authentication credential for the provider | | `api_key` | `SecretStr \| None` | No | | Authentication credential for the provider |
| `base_url` | `<class 'str'>` | No | https://api.openai.com/v1 | Base URL for OpenAI API | | `base_url` | `HttpUrl \| None` | No | https://api.openai.com/v1 | Base URL for OpenAI API |
## Sample Configuration ## Sample Configuration

View file

@ -14,14 +14,14 @@ Passthrough inference provider for connecting to any external inference service
| Field | Type | Required | Default | Description | | Field | Type | Required | Default | Description |
|-------|------|----------|---------|-------------| |-------|------|----------|---------|-------------|
| `allowed_models` | `list[str \| None` | No | | List of models that should be registered with the model registry. If None, all models are allowed. | | `allowed_models` | `list[str] \| None` | No | | List of models that should be registered with the model registry. If None, all models are allowed. |
| `refresh_models` | `<class 'bool'>` | No | False | Whether to refresh models periodically from the provider | | `refresh_models` | `bool` | No | False | Whether to refresh models periodically from the provider |
| `api_key` | `pydantic.types.SecretStr \| None` | No | | Authentication credential for the provider | | `api_key` | `SecretStr \| None` | No | | Authentication credential for the provider |
| `url` | `<class 'str'>` | No | | The URL for the passthrough endpoint | | `base_url` | `HttpUrl \| None` | No | | The URL for the passthrough endpoint |
## Sample Configuration ## Sample Configuration
```yaml ```yaml
url: ${env.PASSTHROUGH_URL} base_url: ${env.PASSTHROUGH_URL}
api_key: ${env.PASSTHROUGH_API_KEY} api_key: ${env.PASSTHROUGH_API_KEY}
``` ```

View file

@ -14,14 +14,14 @@ RunPod inference provider for running models on RunPod's cloud GPU platform.
| Field | Type | Required | Default | Description | | Field | Type | Required | Default | Description |
|-------|------|----------|---------|-------------| |-------|------|----------|---------|-------------|
| `allowed_models` | `list[str \| None` | No | | List of models that should be registered with the model registry. If None, all models are allowed. | | `allowed_models` | `list[str] \| None` | No | | List of models that should be registered with the model registry. If None, all models are allowed. |
| `refresh_models` | `<class 'bool'>` | No | False | Whether to refresh models periodically from the provider | | `refresh_models` | `bool` | No | False | Whether to refresh models periodically from the provider |
| `api_token` | `pydantic.types.SecretStr \| None` | No | | The API token | | `api_token` | `SecretStr \| None` | No | | The API token |
| `url` | `str \| None` | No | | The URL for the Runpod model serving endpoint | | `base_url` | `HttpUrl \| None` | No | | The URL for the Runpod model serving endpoint |
## Sample Configuration ## Sample Configuration
```yaml ```yaml
url: ${env.RUNPOD_URL:=} base_url: ${env.RUNPOD_URL:=}
api_token: ${env.RUNPOD_API_TOKEN} api_token: ${env.RUNPOD_API_TOKEN}
``` ```

View file

@ -14,14 +14,14 @@ SambaNova inference provider for running models on SambaNova's dataflow architec
| Field | Type | Required | Default | Description | | Field | Type | Required | Default | Description |
|-------|------|----------|---------|-------------| |-------|------|----------|---------|-------------|
| `allowed_models` | `list[str \| None` | No | | List of models that should be registered with the model registry. If None, all models are allowed. | | `allowed_models` | `list[str] \| None` | No | | List of models that should be registered with the model registry. If None, all models are allowed. |
| `refresh_models` | `<class 'bool'>` | No | False | Whether to refresh models periodically from the provider | | `refresh_models` | `bool` | No | False | Whether to refresh models periodically from the provider |
| `api_key` | `pydantic.types.SecretStr \| None` | No | | Authentication credential for the provider | | `api_key` | `SecretStr \| None` | No | | Authentication credential for the provider |
| `url` | `<class 'str'>` | No | https://api.sambanova.ai/v1 | The URL for the SambaNova AI server | | `base_url` | `HttpUrl \| None` | No | https://api.sambanova.ai/v1 | The URL for the SambaNova AI server |
## Sample Configuration ## Sample Configuration
```yaml ```yaml
url: https://api.sambanova.ai/v1 base_url: https://api.sambanova.ai/v1
api_key: ${env.SAMBANOVA_API_KEY:=} api_key: ${env.SAMBANOVA_API_KEY:=}
``` ```

View file

@ -14,12 +14,12 @@ Text Generation Inference (TGI) provider for HuggingFace model serving.
| Field | Type | Required | Default | Description | | Field | Type | Required | Default | Description |
|-------|------|----------|---------|-------------| |-------|------|----------|---------|-------------|
| `allowed_models` | `list[str \| None` | No | | List of models that should be registered with the model registry. If None, all models are allowed. | | `allowed_models` | `list[str] \| None` | No | | List of models that should be registered with the model registry. If None, all models are allowed. |
| `refresh_models` | `<class 'bool'>` | No | False | Whether to refresh models periodically from the provider | | `refresh_models` | `bool` | No | False | Whether to refresh models periodically from the provider |
| `url` | `<class 'str'>` | No | | The URL for the TGI serving endpoint | | `base_url` | `HttpUrl \| None` | No | | The URL for the TGI serving endpoint (should include /v1 path) |
## Sample Configuration ## Sample Configuration
```yaml ```yaml
url: ${env.TGI_URL:=} base_url: ${env.TGI_URL:=}
``` ```

View file

@ -14,14 +14,14 @@ Together AI inference provider for open-source models and collaborative AI devel
| Field | Type | Required | Default | Description | | Field | Type | Required | Default | Description |
|-------|------|----------|---------|-------------| |-------|------|----------|---------|-------------|
| `allowed_models` | `list[str \| None` | No | | List of models that should be registered with the model registry. If None, all models are allowed. | | `allowed_models` | `list[str] \| None` | No | | List of models that should be registered with the model registry. If None, all models are allowed. |
| `refresh_models` | `<class 'bool'>` | No | False | Whether to refresh models periodically from the provider | | `refresh_models` | `bool` | No | False | Whether to refresh models periodically from the provider |
| `api_key` | `pydantic.types.SecretStr \| None` | No | | Authentication credential for the provider | | `api_key` | `SecretStr \| None` | No | | Authentication credential for the provider |
| `url` | `<class 'str'>` | No | https://api.together.xyz/v1 | The URL for the Together AI server | | `base_url` | `HttpUrl \| None` | No | https://api.together.xyz/v1 | The URL for the Together AI server |
## Sample Configuration ## Sample Configuration
```yaml ```yaml
url: https://api.together.xyz/v1 base_url: https://api.together.xyz/v1
api_key: ${env.TOGETHER_API_KEY:=} api_key: ${env.TOGETHER_API_KEY:=}
``` ```

View file

@ -53,10 +53,10 @@ Available Models:
| Field | Type | Required | Default | Description | | Field | Type | Required | Default | Description |
|-------|------|----------|---------|-------------| |-------|------|----------|---------|-------------|
| `allowed_models` | `list[str \| None` | No | | List of models that should be registered with the model registry. If None, all models are allowed. | | `allowed_models` | `list[str] \| None` | No | | List of models that should be registered with the model registry. If None, all models are allowed. |
| `refresh_models` | `<class 'bool'>` | No | False | Whether to refresh models periodically from the provider | | `refresh_models` | `bool` | No | False | Whether to refresh models periodically from the provider |
| `project` | `<class 'str'>` | No | | Google Cloud project ID for Vertex AI | | `project` | `str` | No | | Google Cloud project ID for Vertex AI |
| `location` | `<class 'str'>` | No | us-central1 | Google Cloud location for Vertex AI | | `location` | `str` | No | us-central1 | Google Cloud location for Vertex AI |
## Sample Configuration ## Sample Configuration

View file

@ -14,17 +14,17 @@ Remote vLLM inference provider for connecting to vLLM servers.
| Field | Type | Required | Default | Description | | Field | Type | Required | Default | Description |
|-------|------|----------|---------|-------------| |-------|------|----------|---------|-------------|
| `allowed_models` | `list[str \| None` | No | | List of models that should be registered with the model registry. If None, all models are allowed. | | `allowed_models` | `list[str] \| None` | No | | List of models that should be registered with the model registry. If None, all models are allowed. |
| `refresh_models` | `<class 'bool'>` | No | False | Whether to refresh models periodically from the provider | | `refresh_models` | `bool` | No | False | Whether to refresh models periodically from the provider |
| `api_token` | `pydantic.types.SecretStr \| None` | No | | The API token | | `api_token` | `SecretStr \| None` | No | | The API token |
| `url` | `str \| None` | No | | The URL for the vLLM model serving endpoint | | `base_url` | `HttpUrl \| None` | No | | The URL for the vLLM model serving endpoint |
| `max_tokens` | `<class 'int'>` | No | 4096 | Maximum number of tokens to generate. | | `max_tokens` | `int` | No | 4096 | Maximum number of tokens to generate. |
| `tls_verify` | `bool \| str` | No | True | Whether to verify TLS certificates. Can be a boolean or a path to a CA certificate file. | | `tls_verify` | `bool \| str` | No | True | Whether to verify TLS certificates. Can be a boolean or a path to a CA certificate file. |
## Sample Configuration ## Sample Configuration
```yaml ```yaml
url: ${env.VLLM_URL:=} base_url: ${env.VLLM_URL:=}
max_tokens: ${env.VLLM_MAX_TOKENS:=4096} max_tokens: ${env.VLLM_MAX_TOKENS:=4096}
api_token: ${env.VLLM_API_TOKEN:=fake} api_token: ${env.VLLM_API_TOKEN:=fake}
tls_verify: ${env.VLLM_TLS_VERIFY:=true} tls_verify: ${env.VLLM_TLS_VERIFY:=true}

View file

@ -14,17 +14,17 @@ IBM WatsonX inference provider for accessing AI models on IBM's WatsonX platform
| Field | Type | Required | Default | Description | | Field | Type | Required | Default | Description |
|-------|------|----------|---------|-------------| |-------|------|----------|---------|-------------|
| `allowed_models` | `list[str \| None` | No | | List of models that should be registered with the model registry. If None, all models are allowed. | | `allowed_models` | `list[str] \| None` | No | | List of models that should be registered with the model registry. If None, all models are allowed. |
| `refresh_models` | `<class 'bool'>` | No | False | Whether to refresh models periodically from the provider | | `refresh_models` | `bool` | No | False | Whether to refresh models periodically from the provider |
| `api_key` | `pydantic.types.SecretStr \| None` | No | | Authentication credential for the provider | | `api_key` | `SecretStr \| None` | No | | Authentication credential for the provider |
| `url` | `<class 'str'>` | No | https://us-south.ml.cloud.ibm.com | A base url for accessing the watsonx.ai | | `base_url` | `HttpUrl \| None` | No | https://us-south.ml.cloud.ibm.com | A base url for accessing the watsonx.ai |
| `project_id` | `str \| None` | No | | The watsonx.ai project ID | | `project_id` | `str \| None` | No | | The watsonx.ai project ID |
| `timeout` | `<class 'int'>` | No | 60 | Timeout for the HTTP requests | | `timeout` | `int` | No | 60 | Timeout for the HTTP requests |
## Sample Configuration ## Sample Configuration
```yaml ```yaml
url: ${env.WATSONX_BASE_URL:=https://us-south.ml.cloud.ibm.com} base_url: ${env.WATSONX_BASE_URL:=https://us-south.ml.cloud.ibm.com}
api_key: ${env.WATSONX_API_KEY:=} api_key: ${env.WATSONX_API_KEY:=}
project_id: ${env.WATSONX_PROJECT_ID:=} project_id: ${env.WATSONX_PROJECT_ID:=}
``` ```

View file

@ -14,23 +14,23 @@ HuggingFace-based post-training provider for fine-tuning models using the Huggin
| Field | Type | Required | Default | Description | | Field | Type | Required | Default | Description |
|-------|------|----------|---------|-------------| |-------|------|----------|---------|-------------|
| `device` | `<class 'str'>` | No | cuda | | | `device` | `str` | No | cuda | |
| `distributed_backend` | `Literal['fsdp', 'deepspeed'` | No | | | | `distributed_backend` | `Literal[fsdp, deepspeed] \| None` | No | | |
| `checkpoint_format` | `Literal['full_state', 'huggingface'` | No | huggingface | | | `checkpoint_format` | `Literal[full_state, huggingface] \| None` | No | huggingface | |
| `chat_template` | `<class 'str'>` | No | `&lt;|user|&gt;`<br/>`{input}`<br/>`&lt;|assistant|&gt;`<br/>`{output}` | | | `chat_template` | `str` | No | `&lt;|user|&gt;`<br/>`{input}`<br/>`&lt;|assistant|&gt;`<br/>`{output}` | |
| `model_specific_config` | `<class 'dict'>` | No | `{'trust_remote_code': True, 'attn_implementation': 'sdpa'}` | | | `model_specific_config` | `dict` | No | `{'trust_remote_code': True, 'attn_implementation': 'sdpa'}` | |
| `max_seq_length` | `<class 'int'>` | No | 2048 | | | `max_seq_length` | `int` | No | 2048 | |
| `gradient_checkpointing` | `<class 'bool'>` | No | False | | | `gradient_checkpointing` | `bool` | No | False | |
| `save_total_limit` | `<class 'int'>` | No | 3 | | | `save_total_limit` | `int` | No | 3 | |
| `logging_steps` | `<class 'int'>` | No | 10 | | | `logging_steps` | `int` | No | 10 | |
| `warmup_ratio` | `<class 'float'>` | No | 0.1 | | | `warmup_ratio` | `float` | No | 0.1 | |
| `weight_decay` | `<class 'float'>` | No | 0.01 | | | `weight_decay` | `float` | No | 0.01 | |
| `dataloader_num_workers` | `<class 'int'>` | No | 4 | | | `dataloader_num_workers` | `int` | No | 4 | |
| `dataloader_pin_memory` | `<class 'bool'>` | No | True | | | `dataloader_pin_memory` | `bool` | No | True | |
| `dpo_beta` | `<class 'float'>` | No | 0.1 | | | `dpo_beta` | `float` | No | 0.1 | |
| `use_reference_model` | `<class 'bool'>` | No | True | | | `use_reference_model` | `bool` | No | True | |
| `dpo_loss_type` | `Literal['sigmoid', 'hinge', 'ipo', 'kto_pair'` | No | sigmoid | | | `dpo_loss_type` | `Literal[sigmoid, hinge, ipo, kto_pair]` | No | sigmoid | |
| `dpo_output_dir` | `<class 'str'>` | No | | | | `dpo_output_dir` | `str` | No | | |
## Sample Configuration ## Sample Configuration

View file

@ -15,7 +15,7 @@ TorchTune-based post-training provider for fine-tuning and optimizing models usi
| Field | Type | Required | Default | Description | | Field | Type | Required | Default | Description |
|-------|------|----------|---------|-------------| |-------|------|----------|---------|-------------|
| `torch_seed` | `int \| None` | No | | | | `torch_seed` | `int \| None` | No | | |
| `checkpoint_format` | `Literal['meta', 'huggingface'` | No | meta | | | `checkpoint_format` | `Literal[meta, huggingface] \| None` | No | meta | |
## Sample Configuration ## Sample Configuration

View file

@ -15,7 +15,7 @@ TorchTune-based post-training provider for fine-tuning and optimizing models usi
| Field | Type | Required | Default | Description | | Field | Type | Required | Default | Description |
|-------|------|----------|---------|-------------| |-------|------|----------|---------|-------------|
| `torch_seed` | `int \| None` | No | | | | `torch_seed` | `int \| None` | No | | |
| `checkpoint_format` | `Literal['meta', 'huggingface'` | No | meta | | | `checkpoint_format` | `Literal[meta, huggingface] \| None` | No | meta | |
## Sample Configuration ## Sample Configuration

View file

@ -18,9 +18,9 @@ NVIDIA's post-training provider for fine-tuning models on NVIDIA's platform.
| `dataset_namespace` | `str \| None` | No | default | The NVIDIA dataset namespace. | | `dataset_namespace` | `str \| None` | No | default | The NVIDIA dataset namespace. |
| `project_id` | `str \| None` | No | test-example-model@v1 | The NVIDIA project ID. | | `project_id` | `str \| None` | No | test-example-model@v1 | The NVIDIA project ID. |
| `customizer_url` | `str \| None` | No | | Base URL for the NeMo Customizer API | | `customizer_url` | `str \| None` | No | | Base URL for the NeMo Customizer API |
| `timeout` | `<class 'int'>` | No | 300 | Timeout for the NVIDIA Post Training API | | `timeout` | `int` | No | 300 | Timeout for the NVIDIA Post Training API |
| `max_retries` | `<class 'int'>` | No | 3 | Maximum number of retries for the NVIDIA Post Training API | | `max_retries` | `int` | No | 3 | Maximum number of retries for the NVIDIA Post Training API |
| `output_model_dir` | `<class 'str'>` | No | test-example-model@v1 | Directory to save the output model | | `output_model_dir` | `str` | No | test-example-model@v1 | Directory to save the output model |
## Sample Configuration ## Sample Configuration

View file

@ -1,7 +1,8 @@
--- ---
description: "Safety description: |
Safety
OpenAI-compatible Moderations API." OpenAI-compatible Moderations API.
sidebar_label: Safety sidebar_label: Safety
title: Safety title: Safety
--- ---

View file

@ -14,7 +14,7 @@ Llama Guard safety provider for content moderation and safety filtering using Me
| Field | Type | Required | Default | Description | | Field | Type | Required | Default | Description |
|-------|------|----------|---------|-------------| |-------|------|----------|---------|-------------|
| `excluded_categories` | `list[str` | No | [] | | | `excluded_categories` | `list[str]` | No | [] | |
## Sample Configuration ## Sample Configuration

View file

@ -14,7 +14,7 @@ Prompt Guard safety provider for detecting and filtering unsafe prompts and cont
| Field | Type | Required | Default | Description | | Field | Type | Required | Default | Description |
|-------|------|----------|---------|-------------| |-------|------|----------|---------|-------------|
| `guard_type` | `<class 'str'>` | No | injection | | | `guard_type` | `str` | No | injection | |
## Sample Configuration ## Sample Configuration

View file

@ -14,8 +14,8 @@ AWS Bedrock safety provider for content moderation using AWS's safety services.
| Field | Type | Required | Default | Description | | Field | Type | Required | Default | Description |
|-------|------|----------|---------|-------------| |-------|------|----------|---------|-------------|
| `allowed_models` | `list[str \| None` | No | | List of models that should be registered with the model registry. If None, all models are allowed. | | `allowed_models` | `list[str] \| None` | No | | List of models that should be registered with the model registry. If None, all models are allowed. |
| `refresh_models` | `<class 'bool'>` | No | False | Whether to refresh models periodically from the provider | | `refresh_models` | `bool` | No | False | Whether to refresh models periodically from the provider |
| `aws_access_key_id` | `str \| None` | No | | The AWS access key to use. Default use environment variable: AWS_ACCESS_KEY_ID | | `aws_access_key_id` | `str \| None` | No | | The AWS access key to use. Default use environment variable: AWS_ACCESS_KEY_ID |
| `aws_secret_access_key` | `str \| None` | No | | The AWS secret access key to use. Default use environment variable: AWS_SECRET_ACCESS_KEY | | `aws_secret_access_key` | `str \| None` | No | | The AWS secret access key to use. Default use environment variable: AWS_SECRET_ACCESS_KEY |
| `aws_session_token` | `str \| None` | No | | The AWS session token to use. Default use environment variable: AWS_SESSION_TOKEN | | `aws_session_token` | `str \| None` | No | | The AWS session token to use. Default use environment variable: AWS_SESSION_TOKEN |

View file

@ -14,7 +14,7 @@ NVIDIA's safety provider for content moderation and safety filtering.
| Field | Type | Required | Default | Description | | Field | Type | Required | Default | Description |
|-------|------|----------|---------|-------------| |-------|------|----------|---------|-------------|
| `guardrails_service_url` | `<class 'str'>` | No | http://0.0.0.0:7331 | The url for accessing the Guardrails service | | `guardrails_service_url` | `str` | No | http://0.0.0.0:7331 | The url for accessing the Guardrails service |
| `config_id` | `str \| None` | No | self-check | Guardrails configuration ID to use from the Guardrails configuration store | | `config_id` | `str \| None` | No | self-check | Guardrails configuration ID to use from the Guardrails configuration store |
## Sample Configuration ## Sample Configuration

View file

@ -14,8 +14,8 @@ SambaNova's safety provider for content moderation and safety filtering.
| Field | Type | Required | Default | Description | | Field | Type | Required | Default | Description |
|-------|------|----------|---------|-------------| |-------|------|----------|---------|-------------|
| `url` | `<class 'str'>` | No | https://api.sambanova.ai/v1 | The URL for the SambaNova AI server | | `url` | `str` | No | https://api.sambanova.ai/v1 | The URL for the SambaNova AI server |
| `api_key` | `pydantic.types.SecretStr \| None` | No | | The SambaNova cloud API Key | | `api_key` | `SecretStr \| None` | No | | The SambaNova cloud API Key |
## Sample Configuration ## Sample Configuration

View file

@ -15,7 +15,7 @@ Bing Search tool for web search capabilities using Microsoft's search engine.
| Field | Type | Required | Default | Description | | Field | Type | Required | Default | Description |
|-------|------|----------|---------|-------------| |-------|------|----------|---------|-------------|
| `api_key` | `str \| None` | No | | | | `api_key` | `str \| None` | No | | |
| `top_k` | `<class 'int'>` | No | 3 | | | `top_k` | `int` | No | 3 | |
## Sample Configuration ## Sample Configuration

View file

@ -15,7 +15,7 @@ Brave Search tool for web search capabilities with privacy-focused results.
| Field | Type | Required | Default | Description | | Field | Type | Required | Default | Description |
|-------|------|----------|---------|-------------| |-------|------|----------|---------|-------------|
| `api_key` | `str \| None` | No | | The Brave Search API Key | | `api_key` | `str \| None` | No | | The Brave Search API Key |
| `max_results` | `<class 'int'>` | No | 3 | The maximum number of results to return | | `max_results` | `int` | No | 3 | The maximum number of results to return |
## Sample Configuration ## Sample Configuration

View file

@ -15,7 +15,7 @@ Tavily Search tool for AI-optimized web search with structured results.
| Field | Type | Required | Default | Description | | Field | Type | Required | Default | Description |
|-------|------|----------|---------|-------------| |-------|------|----------|---------|-------------|
| `api_key` | `str \| None` | No | | The Tavily Search API Key | | `api_key` | `str \| None` | No | | The Tavily Search API Key |
| `max_results` | `<class 'int'>` | No | 3 | The maximum number of results to return | | `max_results` | `int` | No | 3 | The maximum number of results to return |
## Sample Configuration ## Sample Configuration

View file

@ -78,8 +78,8 @@ See [Chroma's documentation](https://docs.trychroma.com/docs/overview/introducti
| Field | Type | Required | Default | Description | | Field | Type | Required | Default | Description |
|-------|------|----------|---------|-------------| |-------|------|----------|---------|-------------|
| `db_path` | `<class 'str'>` | No | | | | `db_path` | `str` | No | | |
| `persistence` | `<class 'llama_stack.core.storage.datatypes.KVStoreReference'>` | No | | Config for KV store backend | | `persistence` | `KVStoreReference` | No | | Config for KV store backend |
## Sample Configuration ## Sample Configuration

View file

@ -95,7 +95,7 @@ more details about Faiss in general.
| Field | Type | Required | Default | Description | | Field | Type | Required | Default | Description |
|-------|------|----------|---------|-------------| |-------|------|----------|---------|-------------|
| `persistence` | `<class 'llama_stack.core.storage.datatypes.KVStoreReference'>` | No | | | | `persistence` | `KVStoreReference` | No | | |
## Sample Configuration ## Sample Configuration

View file

@ -14,7 +14,7 @@ Meta's reference implementation of a vector database.
| Field | Type | Required | Default | Description | | Field | Type | Required | Default | Description |
|-------|------|----------|---------|-------------| |-------|------|----------|---------|-------------|
| `persistence` | `<class 'llama_stack.core.storage.datatypes.KVStoreReference'>` | No | | | | `persistence` | `KVStoreReference` | No | | |
## Sample Configuration ## Sample Configuration

View file

@ -16,9 +16,9 @@ Please refer to the remote provider documentation.
| Field | Type | Required | Default | Description | | Field | Type | Required | Default | Description |
|-------|------|----------|---------|-------------| |-------|------|----------|---------|-------------|
| `db_path` | `<class 'str'>` | No | | | | `db_path` | `str` | No | | |
| `persistence` | `<class 'llama_stack.core.storage.datatypes.KVStoreReference'>` | No | | Config for KV store backend (SQLite only for now) | | `persistence` | `KVStoreReference` | No | | Config for KV store backend (SQLite only for now) |
| `consistency_level` | `<class 'str'>` | No | Strong | The consistency level of the Milvus server | | `consistency_level` | `str` | No | Strong | The consistency level of the Milvus server |
## Sample Configuration ## Sample Configuration

View file

@ -97,8 +97,8 @@ See the [Qdrant documentation](https://qdrant.tech/documentation/) for more deta
| Field | Type | Required | Default | Description | | Field | Type | Required | Default | Description |
|-------|------|----------|---------|-------------| |-------|------|----------|---------|-------------|
| `path` | `<class 'str'>` | No | | | | `path` | `str` | No | | |
| `persistence` | `<class 'llama_stack.core.storage.datatypes.KVStoreReference'>` | No | | | | `persistence` | `KVStoreReference` | No | | |
## Sample Configuration ## Sample Configuration

View file

@ -407,8 +407,8 @@ See [sqlite-vec's GitHub repo](https://github.com/asg017/sqlite-vec/tree/main) f
| Field | Type | Required | Default | Description | | Field | Type | Required | Default | Description |
|-------|------|----------|---------|-------------| |-------|------|----------|---------|-------------|
| `db_path` | `<class 'str'>` | No | | Path to the SQLite database file | | `db_path` | `str` | No | | Path to the SQLite database file |
| `persistence` | `<class 'llama_stack.core.storage.datatypes.KVStoreReference'>` | No | | Config for KV store backend (SQLite only for now) | | `persistence` | `KVStoreReference` | No | | Config for KV store backend (SQLite only for now) |
## Sample Configuration ## Sample Configuration

View file

@ -16,8 +16,8 @@ Please refer to the sqlite-vec provider documentation.
| Field | Type | Required | Default | Description | | Field | Type | Required | Default | Description |
|-------|------|----------|---------|-------------| |-------|------|----------|---------|-------------|
| `db_path` | `<class 'str'>` | No | | Path to the SQLite database file | | `db_path` | `str` | No | | Path to the SQLite database file |
| `persistence` | `<class 'llama_stack.core.storage.datatypes.KVStoreReference'>` | No | | Config for KV store backend (SQLite only for now) | | `persistence` | `KVStoreReference` | No | | Config for KV store backend (SQLite only for now) |
## Sample Configuration ## Sample Configuration

View file

@ -78,7 +78,7 @@ See [Chroma's documentation](https://docs.trychroma.com/docs/overview/introducti
| Field | Type | Required | Default | Description | | Field | Type | Required | Default | Description |
|-------|------|----------|---------|-------------| |-------|------|----------|---------|-------------|
| `url` | `str \| None` | No | | | | `url` | `str \| None` | No | | |
| `persistence` | `<class 'llama_stack.core.storage.datatypes.KVStoreReference'>` | No | | Config for KV store backend | | `persistence` | `KVStoreReference` | No | | Config for KV store backend |
## Sample Configuration ## Sample Configuration

View file

@ -405,10 +405,10 @@ For more details on TLS configuration, refer to the [TLS setup guide](https://mi
| Field | Type | Required | Default | Description | | Field | Type | Required | Default | Description |
|-------|------|----------|---------|-------------| |-------|------|----------|---------|-------------|
| `uri` | `<class 'str'>` | No | | The URI of the Milvus server | | `uri` | `str` | No | | The URI of the Milvus server |
| `token` | `str \| None` | No | | The token of the Milvus server | | `token` | `str \| None` | No | | The token of the Milvus server |
| `consistency_level` | `<class 'str'>` | No | Strong | The consistency level of the Milvus server | | `consistency_level` | `str` | No | Strong | The consistency level of the Milvus server |
| `persistence` | `<class 'llama_stack.core.storage.datatypes.KVStoreReference'>` | No | | Config for KV store backend | | `persistence` | `KVStoreReference` | No | | Config for KV store backend |
| `config` | `dict` | No | `{}` | This configuration allows additional fields to be passed through to the underlying Milvus client. See the [Milvus](https://milvus.io/docs/install-overview.md) documentation for more details about Milvus in general. | | `config` | `dict` | No | `{}` | This configuration allows additional fields to be passed through to the underlying Milvus client. See the [Milvus](https://milvus.io/docs/install-overview.md) documentation for more details about Milvus in general. |
:::note :::note

View file

@ -218,7 +218,7 @@ See [PGVector's documentation](https://github.com/pgvector/pgvector) for more de
| `db` | `str \| None` | No | postgres | | | `db` | `str \| None` | No | postgres | |
| `user` | `str \| None` | No | postgres | | | `user` | `str \| None` | No | postgres | |
| `password` | `str \| None` | No | mysecretpassword | | | `password` | `str \| None` | No | mysecretpassword | |
| `persistence` | `llama_stack.core.storage.datatypes.KVStoreReference \| None` | No | | Config for KV store backend (SQLite only for now) | | `persistence` | `KVStoreReference \| None` | No | | Config for KV store backend (SQLite only for now) |
## Sample Configuration ## Sample Configuration

View file

@ -19,14 +19,14 @@ Please refer to the inline provider documentation.
| `location` | `str \| None` | No | | | | `location` | `str \| None` | No | | |
| `url` | `str \| None` | No | | | | `url` | `str \| None` | No | | |
| `port` | `int \| None` | No | 6333 | | | `port` | `int \| None` | No | 6333 | |
| `grpc_port` | `<class 'int'>` | No | 6334 | | | `grpc_port` | `int` | No | 6334 | |
| `prefer_grpc` | `<class 'bool'>` | No | False | | | `prefer_grpc` | `bool` | No | False | |
| `https` | `bool \| None` | No | | | | `https` | `bool \| None` | No | | |
| `api_key` | `str \| None` | No | | | | `api_key` | `str \| None` | No | | |
| `prefix` | `str \| None` | No | | | | `prefix` | `str \| None` | No | | |
| `timeout` | `int \| None` | No | | | | `timeout` | `int \| None` | No | | |
| `host` | `str \| None` | No | | | | `host` | `str \| None` | No | | |
| `persistence` | `<class 'llama_stack.core.storage.datatypes.KVStoreReference'>` | No | | | | `persistence` | `KVStoreReference` | No | | |
## Sample Configuration ## Sample Configuration

View file

@ -75,7 +75,7 @@ See [Weaviate's documentation](https://weaviate.io/developers/weaviate) for more
|-------|------|----------|---------|-------------| |-------|------|----------|---------|-------------|
| `weaviate_api_key` | `str \| None` | No | | The API key for the Weaviate instance | | `weaviate_api_key` | `str \| None` | No | | The API key for the Weaviate instance |
| `weaviate_cluster_url` | `str \| None` | No | localhost:8080 | The URL of the Weaviate cluster | | `weaviate_cluster_url` | `str \| None` | No | localhost:8080 | The URL of the Weaviate cluster |
| `persistence` | `llama_stack.core.storage.datatypes.KVStoreReference \| None` | No | | Config for KV store backend (SQLite only for now) | | `persistence` | `KVStoreReference \| None` | No | | Config for KV store backend (SQLite only for now) |
## Sample Configuration ## Sample Configuration

122
docs/package-lock.json generated
View file

@ -10712,12 +10712,6 @@
"integrity": "sha512-QMUezzXWII9EV5aTFXW1UBVUO77wYPpjqIF8/AviUCThNeSYZykpoTixUeaNNBwmCev0AMDWMAni+f8Hxb1IFw==", "integrity": "sha512-QMUezzXWII9EV5aTFXW1UBVUO77wYPpjqIF8/AviUCThNeSYZykpoTixUeaNNBwmCev0AMDWMAni+f8Hxb1IFw==",
"license": "Unlicense" "license": "Unlicense"
}, },
"node_modules/fs.realpath": {
"version": "1.0.0",
"resolved": "https://registry.npmjs.org/fs.realpath/-/fs.realpath-1.0.0.tgz",
"integrity": "sha512-OO0pH2lK6a0hZnAdau5ItzHPI6pUlvI7jMVnxUQRtw4owF2wk8lOSabtGDCTP4Ggrg2MbGnWO9X8K1t4+fGMDw==",
"license": "ISC"
},
"node_modules/fsevents": { "node_modules/fsevents": {
"version": "2.3.3", "version": "2.3.3",
"resolved": "https://registry.npmjs.org/fsevents/-/fsevents-2.3.3.tgz", "resolved": "https://registry.npmjs.org/fsevents/-/fsevents-2.3.3.tgz",
@ -10821,21 +10815,20 @@
"license": "ISC" "license": "ISC"
}, },
"node_modules/glob": { "node_modules/glob": {
"version": "7.2.3", "version": "10.5.0",
"resolved": "https://registry.npmjs.org/glob/-/glob-7.2.3.tgz", "resolved": "https://registry.npmjs.org/glob/-/glob-10.5.0.tgz",
"integrity": "sha512-nFR0zLpU2YCaRxwoCJvL6UvCH2JFyFVIvwTLsIf21AuHlMskA1hhTdk+LlYJtOlYt9v6dvszD2BGRqBL+iQK9Q==", "integrity": "sha512-DfXN8DfhJ7NH3Oe7cFmu3NCu1wKbkReJ8TorzSAFbSKrlNaQSKfIzqYqVY8zlbs2NLBbWpRiU52GX2PbaBVNkg==",
"deprecated": "Glob versions prior to v9 are no longer supported",
"license": "ISC", "license": "ISC",
"dependencies": { "dependencies": {
"fs.realpath": "^1.0.0", "foreground-child": "^3.1.0",
"inflight": "^1.0.4", "jackspeak": "^3.1.2",
"inherits": "2", "minimatch": "^9.0.4",
"minimatch": "^3.1.1", "minipass": "^7.1.2",
"once": "^1.3.0", "package-json-from-dist": "^1.0.0",
"path-is-absolute": "^1.0.0" "path-scurry": "^1.11.1"
}, },
"engines": { "bin": {
"node": "*" "glob": "dist/esm/bin.mjs"
}, },
"funding": { "funding": {
"url": "https://github.com/sponsors/isaacs" "url": "https://github.com/sponsors/isaacs"
@ -10859,26 +10852,19 @@
"integrity": "sha512-lkX1HJXwyMcprw/5YUZc2s7DrpAiHB21/V+E1rHUrVNokkvB6bqMzT0VfV6/86ZNabt1k14YOIaT7nDvOX3Iiw==", "integrity": "sha512-lkX1HJXwyMcprw/5YUZc2s7DrpAiHB21/V+E1rHUrVNokkvB6bqMzT0VfV6/86ZNabt1k14YOIaT7nDvOX3Iiw==",
"license": "BSD-2-Clause" "license": "BSD-2-Clause"
}, },
"node_modules/glob/node_modules/brace-expansion": {
"version": "1.1.12",
"resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-1.1.12.tgz",
"integrity": "sha512-9T9UjW3r0UW5c1Q7GTwllptXwhvYmEzFhzMfZ9H7FQWt+uZePjZPjBP/W1ZEyZ1twGWom5/56TF4lPcqjnDHcg==",
"license": "MIT",
"dependencies": {
"balanced-match": "^1.0.0",
"concat-map": "0.0.1"
}
},
"node_modules/glob/node_modules/minimatch": { "node_modules/glob/node_modules/minimatch": {
"version": "3.1.2", "version": "9.0.5",
"resolved": "https://registry.npmjs.org/minimatch/-/minimatch-3.1.2.tgz", "resolved": "https://registry.npmjs.org/minimatch/-/minimatch-9.0.5.tgz",
"integrity": "sha512-J7p63hRiAjw1NDEww1W7i37+ByIrOWO5XQQAzZ3VOcL0PNybwpfmV/N05zFAzwQ9USyEcX6t3UO+K5aqBQOIHw==", "integrity": "sha512-G6T0ZX48xgozx7587koeX9Ys2NYy6Gmv//P89sEte9V9whIapMNF4idKxnW2QtCcLiTWlb/wfCabAtAFWhhBow==",
"license": "ISC", "license": "ISC",
"dependencies": { "dependencies": {
"brace-expansion": "^1.1.7" "brace-expansion": "^2.0.1"
}, },
"engines": { "engines": {
"node": "*" "node": ">=16 || 14 >=14.17"
},
"funding": {
"url": "https://github.com/sponsors/isaacs"
} }
}, },
"node_modules/global-dirs": { "node_modules/global-dirs": {
@ -11792,17 +11778,6 @@
"node": ">=12" "node": ">=12"
} }
}, },
"node_modules/inflight": {
"version": "1.0.6",
"resolved": "https://registry.npmjs.org/inflight/-/inflight-1.0.6.tgz",
"integrity": "sha512-k92I/b08q4wvFscXCLvqfsHCrjrF7yiXsQuIVvVE7N82W3+aqpzuUdBbfhWcy/FZR3/4IgflMgKLOsvPDrGCJA==",
"deprecated": "This module is not supported, and leaks memory. Do not use it. Check out lru-cache if you want a good and tested way to coalesce async requests by a key value, which is much more comprehensive and powerful.",
"license": "ISC",
"dependencies": {
"once": "^1.3.0",
"wrappy": "1"
}
},
"node_modules/inherits": { "node_modules/inherits": {
"version": "2.0.4", "version": "2.0.4",
"resolved": "https://registry.npmjs.org/inherits/-/inherits-2.0.4.tgz", "resolved": "https://registry.npmjs.org/inherits/-/inherits-2.0.4.tgz",
@ -15570,15 +15545,6 @@
"node": ">= 0.8" "node": ">= 0.8"
} }
}, },
"node_modules/once": {
"version": "1.4.0",
"resolved": "https://registry.npmjs.org/once/-/once-1.4.0.tgz",
"integrity": "sha512-lNaJgI+2Q5URQBkccEKHTQOPaXdUxnZZElQTZY0MFUAuaEqe1E+Nyvgdz/aIyNi6Z9MzO5dv1H8n58/GELp3+w==",
"license": "ISC",
"dependencies": {
"wrappy": "1"
}
},
"node_modules/onetime": { "node_modules/onetime": {
"version": "5.1.2", "version": "5.1.2",
"resolved": "https://registry.npmjs.org/onetime/-/onetime-5.1.2.tgz", "resolved": "https://registry.npmjs.org/onetime/-/onetime-5.1.2.tgz",
@ -15955,15 +15921,6 @@
"node": "^12.20.0 || ^14.13.1 || >=16.0.0" "node": "^12.20.0 || ^14.13.1 || >=16.0.0"
} }
}, },
"node_modules/path-is-absolute": {
"version": "1.0.1",
"resolved": "https://registry.npmjs.org/path-is-absolute/-/path-is-absolute-1.0.1.tgz",
"integrity": "sha512-AVbw3UJ2e9bq64vSaS9Am0fje1Pa8pbGqTTsmXfaIiMpnr5DlDhfJOuLj9Sf95ZPVDAUerDfEk88MPmPe7UCQg==",
"license": "MIT",
"engines": {
"node": ">=0.10.0"
}
},
"node_modules/path-is-inside": { "node_modules/path-is-inside": {
"version": "1.0.2", "version": "1.0.2",
"resolved": "https://registry.npmjs.org/path-is-inside/-/path-is-inside-1.0.2.tgz", "resolved": "https://registry.npmjs.org/path-is-inside/-/path-is-inside-1.0.2.tgz",
@ -20038,41 +19995,6 @@
"node": ">= 6" "node": ">= 6"
} }
}, },
"node_modules/sucrase/node_modules/glob": {
"version": "10.4.5",
"resolved": "https://registry.npmjs.org/glob/-/glob-10.4.5.tgz",
"integrity": "sha512-7Bv8RF0k6xjo7d4A/PxYLbUCfb6c+Vpd2/mB2yRDlew7Jb5hEXiCD9ibfO7wpk8i4sevK6DFny9h7EYbM3/sHg==",
"license": "ISC",
"dependencies": {
"foreground-child": "^3.1.0",
"jackspeak": "^3.1.2",
"minimatch": "^9.0.4",
"minipass": "^7.1.2",
"package-json-from-dist": "^1.0.0",
"path-scurry": "^1.11.1"
},
"bin": {
"glob": "dist/esm/bin.mjs"
},
"funding": {
"url": "https://github.com/sponsors/isaacs"
}
},
"node_modules/sucrase/node_modules/minimatch": {
"version": "9.0.5",
"resolved": "https://registry.npmjs.org/minimatch/-/minimatch-9.0.5.tgz",
"integrity": "sha512-G6T0ZX48xgozx7587koeX9Ys2NYy6Gmv//P89sEte9V9whIapMNF4idKxnW2QtCcLiTWlb/wfCabAtAFWhhBow==",
"license": "ISC",
"dependencies": {
"brace-expansion": "^2.0.1"
},
"engines": {
"node": ">=16 || 14 >=14.17"
},
"funding": {
"url": "https://github.com/sponsors/isaacs"
}
},
"node_modules/supports-color": { "node_modules/supports-color": {
"version": "7.2.0", "version": "7.2.0",
"resolved": "https://registry.npmjs.org/supports-color/-/supports-color-7.2.0.tgz", "resolved": "https://registry.npmjs.org/supports-color/-/supports-color-7.2.0.tgz",
@ -21620,12 +21542,6 @@
"url": "https://github.com/chalk/strip-ansi?sponsor=1" "url": "https://github.com/chalk/strip-ansi?sponsor=1"
} }
}, },
"node_modules/wrappy": {
"version": "1.0.2",
"resolved": "https://registry.npmjs.org/wrappy/-/wrappy-1.0.2.tgz",
"integrity": "sha512-l4Sp/DRseor9wL6EvV2+TuQn63dMkPjZ/sp9XkghTEbV9KlPS1xUsZ3u7/IQO4wxtcFB4bgpQPRcR3QCvezPcQ==",
"license": "ISC"
},
"node_modules/write-file-atomic": { "node_modules/write-file-atomic": {
"version": "3.0.3", "version": "3.0.3",
"resolved": "https://registry.npmjs.org/write-file-atomic/-/write-file-atomic-3.0.3.tgz", "resolved": "https://registry.npmjs.org/write-file-atomic/-/write-file-atomic-3.0.3.tgz",

View file

@ -31,6 +31,9 @@
"react-dom": "^19.0.0", "react-dom": "^19.0.0",
"remark-code-import": "^1.2.0" "remark-code-import": "^1.2.0"
}, },
"overrides": {
"glob": "^10.5.0"
},
"browserslist": { "browserslist": {
"production": [ "production": [
">0.5%", ">0.5%",

View file

@ -193,7 +193,7 @@ paths:
content: content:
application/json: application/json:
schema: schema:
$ref: '#/components/schemas/RegisterScoringFunctionRequestLoose' $ref: '#/components/schemas/RegisterScoringFunctionRequest'
required: true required: true
deprecated: true deprecated: true
/v1/scoring-functions/{scoring_fn_id}: /v1/scoring-functions/{scoring_fn_id}:
@ -549,7 +549,7 @@ paths:
content: content:
application/json: application/json:
schema: schema:
$ref: '#/components/schemas/RegisterDatasetRequestLoose' $ref: '#/components/schemas/RegisterDatasetRequest'
required: true required: true
deprecated: true deprecated: true
/v1beta/datasets/{dataset_id}: /v1beta/datasets/{dataset_id}:
@ -3572,9 +3572,10 @@ components:
type: array type: array
title: Output title: Output
parallel_tool_calls: parallel_tool_calls:
type: boolean anyOf:
title: Parallel Tool Calls - type: boolean
default: false - type: 'null'
default: true
previous_response_id: previous_response_id:
anyOf: anyOf:
- type: string - type: string
@ -3974,6 +3975,11 @@ components:
anyOf: anyOf:
- type: string - type: string
- type: 'null' - type: 'null'
parallel_tool_calls:
anyOf:
- type: boolean
- type: 'null'
default: true
previous_response_id: previous_response_id:
anyOf: anyOf:
- type: string - type: string
@ -4100,9 +4106,10 @@ components:
type: array type: array
title: Output title: Output
parallel_tool_calls: parallel_tool_calls:
type: boolean anyOf:
title: Parallel Tool Calls - type: boolean
default: false - type: 'null'
default: true
previous_response_id: previous_response_id:
anyOf: anyOf:
- type: string - type: string
@ -6704,9 +6711,21 @@ components:
title: Object title: Object
default: vector_store.file default: vector_store.file
attributes: attributes:
additionalProperties: true additionalProperties:
anyOf:
- type: string
maxLength: 512
- type: number
- type: boolean
title: string | number | boolean
propertyNames:
type: string
maxLength: 64
type: object type: object
maxProperties: 16
title: Attributes title: Attributes
description: Set of 16 key-value pairs that can be attached to an object. This can be useful for storing additional information about the object in a structured format, and querying for objects via API or the dashboard. Keys are strings with a maximum length of 64 characters. Values are strings with a maximum length of 512 characters, booleans, or numbers.
x-oaiTypeLabel: map
chunking_strategy: chunking_strategy:
oneOf: oneOf:
- $ref: '#/components/schemas/VectorStoreChunkingStrategyAuto' - $ref: '#/components/schemas/VectorStoreChunkingStrategyAuto'
@ -7435,6 +7454,14 @@ components:
- scores - scores
title: EvaluateResponse title: EvaluateResponse
description: The response from an evaluation. description: The response from an evaluation.
RunEvalRequest:
properties:
benchmark_config:
$ref: '#/components/schemas/BenchmarkConfig'
type: object
required:
- benchmark_config
title: RunEvalRequest
Job: Job:
properties: properties:
job_id: job_id:
@ -8018,6 +8045,67 @@ components:
- $ref: '#/components/schemas/CompletionInputType' - $ref: '#/components/schemas/CompletionInputType'
title: CompletionInputType title: CompletionInputType
title: StringType | ... (9 variants) title: StringType | ... (9 variants)
RegisterScoringFunctionRequest:
properties:
scoring_fn_id:
type: string
title: Scoring Fn Id
description:
type: string
title: Description
return_type:
anyOf:
- $ref: '#/components/schemas/StringType'
title: StringType
- $ref: '#/components/schemas/NumberType'
title: NumberType
- $ref: '#/components/schemas/BooleanType'
title: BooleanType
- $ref: '#/components/schemas/ArrayType'
title: ArrayType
- $ref: '#/components/schemas/ObjectType'
title: ObjectType
- $ref: '#/components/schemas/JsonType'
title: JsonType
- $ref: '#/components/schemas/UnionType'
title: UnionType
- $ref: '#/components/schemas/ChatCompletionInputType'
title: ChatCompletionInputType
- $ref: '#/components/schemas/CompletionInputType'
title: CompletionInputType
title: StringType | ... (9 variants)
provider_scoring_fn_id:
anyOf:
- type: string
- type: 'null'
provider_id:
anyOf:
- type: string
- type: 'null'
params:
anyOf:
- oneOf:
- $ref: '#/components/schemas/LLMAsJudgeScoringFnParams'
title: LLMAsJudgeScoringFnParams
- $ref: '#/components/schemas/RegexParserScoringFnParams'
title: RegexParserScoringFnParams
- $ref: '#/components/schemas/BasicScoringFnParams'
title: BasicScoringFnParams
discriminator:
propertyName: type
mapping:
basic: '#/components/schemas/BasicScoringFnParams'
llm_as_judge: '#/components/schemas/LLMAsJudgeScoringFnParams'
regex_parser: '#/components/schemas/RegexParserScoringFnParams'
title: LLMAsJudgeScoringFnParams | RegexParserScoringFnParams | BasicScoringFnParams
- type: 'null'
title: Params
type: object
required:
- scoring_fn_id
- description
- return_type
title: RegisterScoringFunctionRequest
RegisterShieldRequest: RegisterShieldRequest:
properties: properties:
shield_id: shield_id:
@ -8076,6 +8164,31 @@ components:
- $ref: '#/components/schemas/RowsDataSource' - $ref: '#/components/schemas/RowsDataSource'
title: RowsDataSource title: RowsDataSource
title: URIDataSource | RowsDataSource title: URIDataSource | RowsDataSource
RegisterDatasetRequest:
properties:
purpose:
$ref: '#/components/schemas/DatasetPurpose'
source:
anyOf:
- $ref: '#/components/schemas/URIDataSource'
title: URIDataSource
- $ref: '#/components/schemas/RowsDataSource'
title: RowsDataSource
title: URIDataSource | RowsDataSource
metadata:
anyOf:
- additionalProperties: true
type: object
- type: 'null'
dataset_id:
anyOf:
- type: string
- type: 'null'
type: object
required:
- purpose
- source
title: RegisterDatasetRequest
RegisterBenchmarkRequest: RegisterBenchmarkRequest:
properties: properties:
benchmark_id: benchmark_id:
@ -8812,41 +8925,6 @@ components:
required: required:
- reasoning_tokens - reasoning_tokens
title: OutputTokensDetails title: OutputTokensDetails
RegisterDatasetRequestLoose:
properties:
purpose:
title: Purpose
source:
title: Source
metadata:
title: Metadata
dataset_id:
title: Dataset Id
type: object
required:
- purpose
- source
title: RegisterDatasetRequestLoose
RegisterScoringFunctionRequestLoose:
properties:
scoring_fn_id:
title: Scoring Fn Id
description:
title: Description
return_type:
title: Return Type
provider_scoring_fn_id:
title: Provider Scoring Fn Id
provider_id:
title: Provider Id
params:
title: Params
type: object
required:
- scoring_fn_id
- description
- return_type
title: RegisterScoringFunctionRequestLoose
SearchRankingOptions: SearchRankingOptions:
properties: properties:
ranker: ranker:

View file

@ -300,7 +300,7 @@ paths:
content: content:
application/json: application/json:
schema: schema:
$ref: '#/components/schemas/BenchmarkConfig' $ref: '#/components/schemas/RunEvalRequest'
required: true required: true
/v1alpha/eval/benchmarks/{benchmark_id}/jobs/{job_id}: /v1alpha/eval/benchmarks/{benchmark_id}/jobs/{job_id}:
get: get:
@ -3297,9 +3297,10 @@ components:
type: array type: array
title: Output title: Output
parallel_tool_calls: parallel_tool_calls:
type: boolean anyOf:
title: Parallel Tool Calls - type: boolean
default: false - type: 'null'
default: true
previous_response_id: previous_response_id:
anyOf: anyOf:
- type: string - type: string
@ -3696,9 +3697,10 @@ components:
type: array type: array
title: Output title: Output
parallel_tool_calls: parallel_tool_calls:
type: boolean anyOf:
title: Parallel Tool Calls - type: boolean
default: false - type: 'null'
default: true
previous_response_id: previous_response_id:
anyOf: anyOf:
- type: string - type: string
@ -6093,9 +6095,21 @@ components:
title: Object title: Object
default: vector_store.file default: vector_store.file
attributes: attributes:
additionalProperties: true additionalProperties:
anyOf:
- type: string
maxLength: 512
- type: number
- type: boolean
title: string | number | boolean
propertyNames:
type: string
maxLength: 64
type: object type: object
maxProperties: 16
title: Attributes title: Attributes
description: Set of 16 key-value pairs that can be attached to an object. This can be useful for storing additional information about the object in a structured format, and querying for objects via API or the dashboard. Keys are strings with a maximum length of 64 characters. Values are strings with a maximum length of 512 characters, booleans, or numbers.
x-oaiTypeLabel: map
chunking_strategy: chunking_strategy:
oneOf: oneOf:
- $ref: '#/components/schemas/VectorStoreChunkingStrategyAuto' - $ref: '#/components/schemas/VectorStoreChunkingStrategyAuto'
@ -6745,6 +6759,14 @@ components:
- scores - scores
title: EvaluateResponse title: EvaluateResponse
description: The response from an evaluation. description: The response from an evaluation.
RunEvalRequest:
properties:
benchmark_config:
$ref: '#/components/schemas/BenchmarkConfig'
type: object
required:
- benchmark_config
title: RunEvalRequest
Job: Job:
properties: properties:
job_id: job_id:

View file

@ -5760,9 +5760,10 @@ components:
type: array type: array
title: Output title: Output
parallel_tool_calls: parallel_tool_calls:
type: boolean anyOf:
title: Parallel Tool Calls - type: boolean
default: false - type: 'null'
default: true
previous_response_id: previous_response_id:
anyOf: anyOf:
- type: string - type: string
@ -6162,6 +6163,11 @@ components:
anyOf: anyOf:
- type: string - type: string
- type: 'null' - type: 'null'
parallel_tool_calls:
anyOf:
- type: boolean
- type: 'null'
default: true
previous_response_id: previous_response_id:
anyOf: anyOf:
- type: string - type: string
@ -6288,9 +6294,10 @@ components:
type: array type: array
title: Output title: Output
parallel_tool_calls: parallel_tool_calls:
type: boolean anyOf:
title: Parallel Tool Calls - type: boolean
default: false - type: 'null'
default: true
previous_response_id: previous_response_id:
anyOf: anyOf:
- type: string - type: string
@ -8892,9 +8899,21 @@ components:
title: Object title: Object
default: vector_store.file default: vector_store.file
attributes: attributes:
additionalProperties: true additionalProperties:
anyOf:
- type: string
maxLength: 512
- type: number
- type: boolean
title: string | number | boolean
propertyNames:
type: string
maxLength: 64
type: object type: object
maxProperties: 16
title: Attributes title: Attributes
description: Set of 16 key-value pairs that can be attached to an object. This can be useful for storing additional information about the object in a structured format, and querying for objects via API or the dashboard. Keys are strings with a maximum length of 64 characters. Values are strings with a maximum length of 512 characters, booleans, or numbers.
x-oaiTypeLabel: map
chunking_strategy: chunking_strategy:
oneOf: oneOf:
- $ref: '#/components/schemas/VectorStoreChunkingStrategyAuto' - $ref: '#/components/schemas/VectorStoreChunkingStrategyAuto'

View file

@ -1820,7 +1820,7 @@ paths:
content: content:
application/json: application/json:
schema: schema:
$ref: '#/components/schemas/RegisterScoringFunctionRequestLoose' $ref: '#/components/schemas/RegisterScoringFunctionRequest'
required: true required: true
deprecated: true deprecated: true
/v1/scoring-functions/{scoring_fn_id}: /v1/scoring-functions/{scoring_fn_id}:
@ -3310,7 +3310,7 @@ paths:
content: content:
application/json: application/json:
schema: schema:
$ref: '#/components/schemas/RegisterDatasetRequestLoose' $ref: '#/components/schemas/RegisterDatasetRequest'
required: true required: true
deprecated: true deprecated: true
/v1beta/datasets/{dataset_id}: /v1beta/datasets/{dataset_id}:
@ -3567,7 +3567,7 @@ paths:
content: content:
application/json: application/json:
schema: schema:
$ref: '#/components/schemas/BenchmarkConfig' $ref: '#/components/schemas/RunEvalRequest'
required: true required: true
/v1alpha/eval/benchmarks/{benchmark_id}/jobs/{job_id}: /v1alpha/eval/benchmarks/{benchmark_id}/jobs/{job_id}:
get: get:
@ -6739,9 +6739,10 @@ components:
type: array type: array
title: Output title: Output
parallel_tool_calls: parallel_tool_calls:
type: boolean anyOf:
title: Parallel Tool Calls - type: boolean
default: false - type: 'null'
default: true
previous_response_id: previous_response_id:
anyOf: anyOf:
- type: string - type: string
@ -7141,6 +7142,11 @@ components:
anyOf: anyOf:
- type: string - type: string
- type: 'null' - type: 'null'
parallel_tool_calls:
anyOf:
- type: boolean
- type: 'null'
default: true
previous_response_id: previous_response_id:
anyOf: anyOf:
- type: string - type: string
@ -7267,9 +7273,10 @@ components:
type: array type: array
title: Output title: Output
parallel_tool_calls: parallel_tool_calls:
type: boolean anyOf:
title: Parallel Tool Calls - type: boolean
default: false - type: 'null'
default: true
previous_response_id: previous_response_id:
anyOf: anyOf:
- type: string - type: string
@ -9871,9 +9878,21 @@ components:
title: Object title: Object
default: vector_store.file default: vector_store.file
attributes: attributes:
additionalProperties: true additionalProperties:
anyOf:
- type: string
maxLength: 512
- type: number
- type: boolean
title: string | number | boolean
propertyNames:
type: string
maxLength: 64
type: object type: object
maxProperties: 16
title: Attributes title: Attributes
description: Set of 16 key-value pairs that can be attached to an object. This can be useful for storing additional information about the object in a structured format, and querying for objects via API or the dashboard. Keys are strings with a maximum length of 64 characters. Values are strings with a maximum length of 512 characters, booleans, or numbers.
x-oaiTypeLabel: map
chunking_strategy: chunking_strategy:
oneOf: oneOf:
- $ref: '#/components/schemas/VectorStoreChunkingStrategyAuto' - $ref: '#/components/schemas/VectorStoreChunkingStrategyAuto'
@ -10602,6 +10621,14 @@ components:
- scores - scores
title: EvaluateResponse title: EvaluateResponse
description: The response from an evaluation. description: The response from an evaluation.
RunEvalRequest:
properties:
benchmark_config:
$ref: '#/components/schemas/BenchmarkConfig'
type: object
required:
- benchmark_config
title: RunEvalRequest
Job: Job:
properties: properties:
job_id: job_id:
@ -11185,6 +11212,67 @@ components:
- $ref: '#/components/schemas/CompletionInputType' - $ref: '#/components/schemas/CompletionInputType'
title: CompletionInputType title: CompletionInputType
title: StringType | ... (9 variants) title: StringType | ... (9 variants)
RegisterScoringFunctionRequest:
properties:
scoring_fn_id:
type: string
title: Scoring Fn Id
description:
type: string
title: Description
return_type:
anyOf:
- $ref: '#/components/schemas/StringType'
title: StringType
- $ref: '#/components/schemas/NumberType'
title: NumberType
- $ref: '#/components/schemas/BooleanType'
title: BooleanType
- $ref: '#/components/schemas/ArrayType'
title: ArrayType
- $ref: '#/components/schemas/ObjectType'
title: ObjectType
- $ref: '#/components/schemas/JsonType'
title: JsonType
- $ref: '#/components/schemas/UnionType'
title: UnionType
- $ref: '#/components/schemas/ChatCompletionInputType'
title: ChatCompletionInputType
- $ref: '#/components/schemas/CompletionInputType'
title: CompletionInputType
title: StringType | ... (9 variants)
provider_scoring_fn_id:
anyOf:
- type: string
- type: 'null'
provider_id:
anyOf:
- type: string
- type: 'null'
params:
anyOf:
- oneOf:
- $ref: '#/components/schemas/LLMAsJudgeScoringFnParams'
title: LLMAsJudgeScoringFnParams
- $ref: '#/components/schemas/RegexParserScoringFnParams'
title: RegexParserScoringFnParams
- $ref: '#/components/schemas/BasicScoringFnParams'
title: BasicScoringFnParams
discriminator:
propertyName: type
mapping:
basic: '#/components/schemas/BasicScoringFnParams'
llm_as_judge: '#/components/schemas/LLMAsJudgeScoringFnParams'
regex_parser: '#/components/schemas/RegexParserScoringFnParams'
title: LLMAsJudgeScoringFnParams | RegexParserScoringFnParams | BasicScoringFnParams
- type: 'null'
title: Params
type: object
required:
- scoring_fn_id
- description
- return_type
title: RegisterScoringFunctionRequest
RegisterShieldRequest: RegisterShieldRequest:
properties: properties:
shield_id: shield_id:
@ -11243,6 +11331,31 @@ components:
- $ref: '#/components/schemas/RowsDataSource' - $ref: '#/components/schemas/RowsDataSource'
title: RowsDataSource title: RowsDataSource
title: URIDataSource | RowsDataSource title: URIDataSource | RowsDataSource
RegisterDatasetRequest:
properties:
purpose:
$ref: '#/components/schemas/DatasetPurpose'
source:
anyOf:
- $ref: '#/components/schemas/URIDataSource'
title: URIDataSource
- $ref: '#/components/schemas/RowsDataSource'
title: RowsDataSource
title: URIDataSource | RowsDataSource
metadata:
anyOf:
- additionalProperties: true
type: object
- type: 'null'
dataset_id:
anyOf:
- type: string
- type: 'null'
type: object
required:
- purpose
- source
title: RegisterDatasetRequest
RegisterBenchmarkRequest: RegisterBenchmarkRequest:
properties: properties:
benchmark_id: benchmark_id:
@ -11979,41 +12092,6 @@ components:
required: required:
- reasoning_tokens - reasoning_tokens
title: OutputTokensDetails title: OutputTokensDetails
RegisterDatasetRequestLoose:
properties:
purpose:
title: Purpose
source:
title: Source
metadata:
title: Metadata
dataset_id:
title: Dataset Id
type: object
required:
- purpose
- source
title: RegisterDatasetRequestLoose
RegisterScoringFunctionRequestLoose:
properties:
scoring_fn_id:
title: Scoring Fn Id
description:
title: Description
return_type:
title: Return Type
provider_scoring_fn_id:
title: Provider Scoring Fn Id
provider_id:
title: Provider Id
params:
title: Params
type: object
required:
- scoring_fn_id
- description
- return_type
title: RegisterScoringFunctionRequestLoose
SearchRankingOptions: SearchRankingOptions:
properties: properties:
ranker: ranker:

View file

@ -38,7 +38,6 @@ dependencies = [
"pyjwt[crypto]>=2.10.0", # Pull crypto to support RS256 for jwt. Requires 2.10.0+ for ssl_context support. "pyjwt[crypto]>=2.10.0", # Pull crypto to support RS256 for jwt. Requires 2.10.0+ for ssl_context support.
"pydantic>=2.11.9", "pydantic>=2.11.9",
"rich", "rich",
"starlette",
"termcolor", "termcolor",
"tiktoken", "tiktoken",
"pillow", "pillow",
@ -50,7 +49,6 @@ dependencies = [
"aiosqlite>=0.21.0", # server - for metadata store "aiosqlite>=0.21.0", # server - for metadata store
"asyncpg", # for metadata store "asyncpg", # for metadata store
"sqlalchemy[asyncio]>=2.0.41", # server - for conversations "sqlalchemy[asyncio]>=2.0.41", # server - for conversations
"pyyaml>=6.0.2",
"starlette>=0.49.1", "starlette>=0.49.1",
] ]
@ -358,6 +356,10 @@ exclude = [
module = [ module = [
"yaml", "yaml",
"fire", "fire",
"redis.asyncio",
"psycopg2",
"psycopg2.extras",
"psycopg2.extensions",
"torchtune.*", "torchtune.*",
"fairscale.*", "fairscale.*",
"torchvision.*", "torchvision.*",

View file

@ -287,9 +287,9 @@ start_container() {
# On macOS/Windows, use host.docker.internal to reach host from container # On macOS/Windows, use host.docker.internal to reach host from container
# On Linux with --network host, use localhost # On Linux with --network host, use localhost
if [[ "$(uname)" == "Darwin" ]] || [[ "$(uname)" == *"MINGW"* ]]; then if [[ "$(uname)" == "Darwin" ]] || [[ "$(uname)" == *"MINGW"* ]]; then
OLLAMA_URL="${OLLAMA_URL:-http://host.docker.internal:11434}" OLLAMA_URL="${OLLAMA_URL:-http://host.docker.internal:11434/v1}"
else else
OLLAMA_URL="${OLLAMA_URL:-http://localhost:11434}" OLLAMA_URL="${OLLAMA_URL:-http://localhost:11434/v1}"
fi fi
DOCKER_ENV_VARS="$DOCKER_ENV_VARS -e OLLAMA_URL=$OLLAMA_URL" DOCKER_ENV_VARS="$DOCKER_ENV_VARS -e OLLAMA_URL=$OLLAMA_URL"

View file

@ -16,16 +16,16 @@ import sys
from tests.integration.suites import SETUP_DEFINITIONS, SUITE_DEFINITIONS from tests.integration.suites import SETUP_DEFINITIONS, SUITE_DEFINITIONS
def get_setup_env_vars(setup_name, suite_name=None): def get_setup_config(setup_name, suite_name=None):
""" """
Get environment variables for a setup, with optional suite default fallback. Get full configuration (env vars + defaults) for a setup.
Args: Args:
setup_name: Name of the setup (e.g., 'ollama', 'gpt') setup_name: Name of the setup (e.g., 'ollama', 'gpt')
suite_name: Optional suite name to get default setup if setup_name is None suite_name: Optional suite name to get default setup if setup_name is None
Returns: Returns:
Dictionary of environment variables Dictionary with 'env' and 'defaults' keys
""" """
# If no setup specified, try to get default from suite # If no setup specified, try to get default from suite
if not setup_name and suite_name: if not setup_name and suite_name:
@ -34,7 +34,7 @@ def get_setup_env_vars(setup_name, suite_name=None):
setup_name = suite.default_setup setup_name = suite.default_setup
if not setup_name: if not setup_name:
return {} return {"env": {}, "defaults": {}}
setup = SETUP_DEFINITIONS.get(setup_name) setup = SETUP_DEFINITIONS.get(setup_name)
if not setup: if not setup:
@ -44,27 +44,31 @@ def get_setup_env_vars(setup_name, suite_name=None):
) )
sys.exit(1) sys.exit(1)
return setup.env return {"env": setup.env, "defaults": setup.defaults}
def main(): def main():
parser = argparse.ArgumentParser(description="Extract environment variables from a test setup") parser = argparse.ArgumentParser(description="Extract environment variables and defaults from a test setup")
parser.add_argument("--setup", help="Setup name (e.g., ollama, gpt)") parser.add_argument("--setup", help="Setup name (e.g., ollama, gpt)")
parser.add_argument("--suite", help="Suite name to get default setup from if --setup not provided") parser.add_argument("--suite", help="Suite name to get default setup from if --setup not provided")
parser.add_argument("--format", choices=["bash", "json"], default="bash", help="Output format (default: bash)") parser.add_argument("--format", choices=["bash", "json"], default="bash", help="Output format (default: bash)")
args = parser.parse_args() args = parser.parse_args()
env_vars = get_setup_env_vars(args.setup, args.suite) config = get_setup_config(args.setup, args.suite)
if args.format == "bash": if args.format == "bash":
# Output as bash export statements # Output env vars as bash export statements
for key, value in env_vars.items(): for key, value in config["env"].items():
print(f"export {key}='{value}'") print(f"export {key}='{value}'")
# Output defaults as bash export statements with LLAMA_STACK_TEST_ prefix
for key, value in config["defaults"].items():
env_key = f"LLAMA_STACK_TEST_{key.upper()}"
print(f"export {env_key}='{value}'")
elif args.format == "json": elif args.format == "json":
import json import json
print(json.dumps(env_vars)) print(json.dumps(config))
if __name__ == "__main__": if __name__ == "__main__":

View file

@ -640,7 +640,7 @@ cmd=( run -d "${PLATFORM_OPTS[@]}" --name llama-stack \
--network llama-net \ --network llama-net \
-p "${PORT}:${PORT}" \ -p "${PORT}:${PORT}" \
"${server_env_opts[@]}" \ "${server_env_opts[@]}" \
-e OLLAMA_URL="http://ollama-server:${OLLAMA_PORT}" \ -e OLLAMA_URL="http://ollama-server:${OLLAMA_PORT}/v1" \
"${SERVER_IMAGE}" --port "${PORT}") "${SERVER_IMAGE}" --port "${PORT}")
log "🦙 Starting Llama Stack..." log "🦙 Starting Llama Stack..."

View file

@ -20,6 +20,7 @@ TEST_PATTERN=""
INFERENCE_MODE="replay" INFERENCE_MODE="replay"
EXTRA_PARAMS="" EXTRA_PARAMS=""
COLLECT_ONLY=false COLLECT_ONLY=false
TYPESCRIPT_ONLY=false
# Function to display usage # Function to display usage
usage() { usage() {
@ -34,6 +35,7 @@ Options:
--subdirs STRING Comma-separated list of test subdirectories to run (overrides suite) --subdirs STRING Comma-separated list of test subdirectories to run (overrides suite)
--pattern STRING Regex pattern to pass to pytest -k --pattern STRING Regex pattern to pass to pytest -k
--collect-only Collect tests only without running them (skips server startup) --collect-only Collect tests only without running them (skips server startup)
--typescript-only Skip Python tests and run only TypeScript client tests
--help Show this help message --help Show this help message
Suites are defined in tests/integration/suites.py and define which tests to run. Suites are defined in tests/integration/suites.py and define which tests to run.
@ -90,6 +92,10 @@ while [[ $# -gt 0 ]]; do
COLLECT_ONLY=true COLLECT_ONLY=true
shift shift
;; ;;
--typescript-only)
TYPESCRIPT_ONLY=true
shift
;;
--help) --help)
usage usage
exit 0 exit 0
@ -181,6 +187,10 @@ echo "$SETUP_ENV"
eval "$SETUP_ENV" eval "$SETUP_ENV"
echo "" echo ""
# Export suite and setup names for TypeScript tests
export LLAMA_STACK_TEST_SUITE="$TEST_SUITE"
export LLAMA_STACK_TEST_SETUP="$TEST_SETUP"
ROOT_DIR="$THIS_DIR/.." ROOT_DIR="$THIS_DIR/.."
cd $ROOT_DIR cd $ROOT_DIR
@ -212,6 +222,71 @@ find_available_port() {
return 1 return 1
} }
run_client_ts_tests() {
if ! command -v npm &>/dev/null; then
echo "npm could not be found; ensure Node.js is installed"
return 1
fi
pushd tests/integration/client-typescript >/dev/null
# Determine if TS_CLIENT_PATH is a directory path or an npm version
if [[ -d "$TS_CLIENT_PATH" ]]; then
# It's a directory path - use local checkout
if [[ ! -f "$TS_CLIENT_PATH/package.json" ]]; then
echo "Error: $TS_CLIENT_PATH exists but doesn't look like llama-stack-client-typescript (no package.json)"
popd >/dev/null
return 1
fi
echo "Using local llama-stack-client-typescript from: $TS_CLIENT_PATH"
# Build the TypeScript client first
echo "Building TypeScript client..."
pushd "$TS_CLIENT_PATH" >/dev/null
npm install --silent
npm run build --silent
popd >/dev/null
# Install other dependencies first
if [[ "${CI:-}" == "true" || "${CI:-}" == "1" ]]; then
npm ci --silent
else
npm install --silent
fi
# Then install the client from local directory
echo "Installing llama-stack-client from: $TS_CLIENT_PATH"
npm install "$TS_CLIENT_PATH" --silent
else
# It's an npm version specifier - install from npm
echo "Installing llama-stack-client@${TS_CLIENT_PATH} from npm"
if [[ "${CI:-}" == "true" || "${CI:-}" == "1" ]]; then
npm ci --silent
npm install "llama-stack-client@${TS_CLIENT_PATH}" --silent
else
npm install "llama-stack-client@${TS_CLIENT_PATH}" --silent
fi
fi
# Verify installation
echo "Verifying llama-stack-client installation..."
if npm list llama-stack-client 2>/dev/null | grep -q llama-stack-client; then
echo "✅ llama-stack-client successfully installed"
npm list llama-stack-client
else
echo "❌ llama-stack-client not found in node_modules"
echo "Installed packages:"
npm list --depth=0
popd >/dev/null
return 1
fi
echo "Running TypeScript tests for suite $TEST_SUITE (setup $TEST_SETUP)"
npm test
popd >/dev/null
}
# Start Llama Stack Server if needed # Start Llama Stack Server if needed
if [[ "$STACK_CONFIG" == *"server:"* && "$COLLECT_ONLY" == false ]]; then if [[ "$STACK_CONFIG" == *"server:"* && "$COLLECT_ONLY" == false ]]; then
# Find an available port for the server # Find an available port for the server
@ -221,6 +296,7 @@ if [[ "$STACK_CONFIG" == *"server:"* && "$COLLECT_ONLY" == false ]]; then
exit 1 exit 1
fi fi
export LLAMA_STACK_PORT export LLAMA_STACK_PORT
export TEST_API_BASE_URL="http://localhost:$LLAMA_STACK_PORT"
echo "Will use port: $LLAMA_STACK_PORT" echo "Will use port: $LLAMA_STACK_PORT"
stop_server() { stop_server() {
@ -298,6 +374,7 @@ if [[ "$STACK_CONFIG" == *"docker:"* && "$COLLECT_ONLY" == false ]]; then
exit 1 exit 1
fi fi
export LLAMA_STACK_PORT export LLAMA_STACK_PORT
export TEST_API_BASE_URL="http://localhost:$LLAMA_STACK_PORT"
echo "Will use port: $LLAMA_STACK_PORT" echo "Will use port: $LLAMA_STACK_PORT"
echo "=== Building Docker Image for distribution: $DISTRO ===" echo "=== Building Docker Image for distribution: $DISTRO ==="
@ -473,7 +550,9 @@ if [[ -n "$STACK_CONFIG" ]]; then
STACK_CONFIG_ARG="--stack-config=$STACK_CONFIG" STACK_CONFIG_ARG="--stack-config=$STACK_CONFIG"
fi fi
pytest -s -v $PYTEST_TARGET \ # Run Python tests unless typescript-only mode
if [[ "$TYPESCRIPT_ONLY" == "false" ]]; then
pytest -s -v $PYTEST_TARGET \
$STACK_CONFIG_ARG \ $STACK_CONFIG_ARG \
--inference-mode="$INFERENCE_MODE" \ --inference-mode="$INFERENCE_MODE" \
-k "$PYTEST_PATTERN" \ -k "$PYTEST_PATTERN" \
@ -482,7 +561,12 @@ pytest -s -v $PYTEST_TARGET \
--embedding-model=sentence-transformers/nomic-ai/nomic-embed-text-v1.5 \ --embedding-model=sentence-transformers/nomic-ai/nomic-embed-text-v1.5 \
--color=yes $EXTRA_PARAMS \ --color=yes $EXTRA_PARAMS \
--capture=tee-sys --capture=tee-sys
exit_code=$? exit_code=$?
else
echo "Skipping Python tests (--typescript-only mode)"
exit_code=0
fi
set +x set +x
set -e set -e
@ -506,5 +590,10 @@ else
exit 1 exit 1
fi fi
# Run TypeScript client tests if TS_CLIENT_PATH is set
if [[ $exit_code -eq 0 && -n "${TS_CLIENT_PATH:-}" && "${LLAMA_STACK_TEST_STACK_CONFIG_TYPE:-}" == "server" ]]; then
run_client_ts_tests
fi
echo "" echo ""
echo "=== Integration Tests Complete ===" echo "=== Integration Tests Complete ==="

View file

@ -11,6 +11,13 @@ This module provides functionality to generate OpenAPI specifications
from FastAPI applications. from FastAPI applications.
""" """
from .main import generate_openapi_spec, main
__all__ = ["generate_openapi_spec", "main"] __all__ = ["generate_openapi_spec", "main"]
def __getattr__(name: str):
if name in {"generate_openapi_spec", "main"}:
from .main import generate_openapi_spec as _gos
from .main import main as _main
return {"generate_openapi_spec": _gos, "main": _main}[name]
raise AttributeError(name)

View file

@ -15,6 +15,7 @@ import typing
from typing import Annotated, Any, get_args, get_origin from typing import Annotated, Any, get_args, get_origin
from fastapi import FastAPI from fastapi import FastAPI
from fastapi.params import Body as FastAPIBody
from pydantic import Field, create_model from pydantic import Field, create_model
from llama_stack.log import get_logger from llama_stack.log import get_logger
@ -26,6 +27,8 @@ from .state import _extra_body_fields, register_dynamic_model
logger = get_logger(name=__name__, category="core") logger = get_logger(name=__name__, category="core")
type QueryParameter = tuple[str, type, Any, bool]
def _to_pascal_case(segment: str) -> str: def _to_pascal_case(segment: str) -> str:
tokens = re.findall(r"[A-Za-z]+|\d+", segment) tokens = re.findall(r"[A-Za-z]+|\d+", segment)
@ -75,12 +78,12 @@ def _create_endpoint_with_request_model(
return endpoint return endpoint
def _build_field_definitions(query_parameters: list[tuple[str, type, Any]], use_any: bool = False) -> dict[str, tuple]: def _build_field_definitions(query_parameters: list[QueryParameter], use_any: bool = False) -> dict[str, tuple]:
"""Build field definitions for a Pydantic model from query parameters.""" """Build field definitions for a Pydantic model from query parameters."""
from typing import Any from typing import Any
field_definitions = {} field_definitions = {}
for param_name, param_type, default_value in query_parameters: for param_name, param_type, default_value, _ in query_parameters:
if use_any: if use_any:
field_definitions[param_name] = (Any, ... if default_value is inspect.Parameter.empty else default_value) field_definitions[param_name] = (Any, ... if default_value is inspect.Parameter.empty else default_value)
continue continue
@ -108,10 +111,10 @@ def _build_field_definitions(query_parameters: list[tuple[str, type, Any]], use_
field_definitions[param_name] = (Any, ... if default_value is inspect.Parameter.empty else default_value) field_definitions[param_name] = (Any, ... if default_value is inspect.Parameter.empty else default_value)
# Ensure all parameters are included # Ensure all parameters are included
expected_params = {name for name, _, _ in query_parameters} expected_params = {name for name, _, _, _ in query_parameters}
missing = expected_params - set(field_definitions.keys()) missing = expected_params - set(field_definitions.keys())
if missing: if missing:
for param_name, _, default_value in query_parameters: for param_name, _, default_value, _ in query_parameters:
if param_name in missing: if param_name in missing:
field_definitions[param_name] = ( field_definitions[param_name] = (
Any, Any,
@ -126,7 +129,7 @@ def _create_dynamic_request_model(
webmethod, webmethod,
method_name: str, method_name: str,
http_method: str, http_method: str,
query_parameters: list[tuple[str, type, Any]], query_parameters: list[QueryParameter],
use_any: bool = False, use_any: bool = False,
variant_suffix: str | None = None, variant_suffix: str | None = None,
) -> type | None: ) -> type | None:
@ -143,12 +146,12 @@ def _create_dynamic_request_model(
def _build_signature_params( def _build_signature_params(
query_parameters: list[tuple[str, type, Any]], query_parameters: list[QueryParameter],
) -> tuple[list[inspect.Parameter], dict[str, type]]: ) -> tuple[list[inspect.Parameter], dict[str, type]]:
"""Build signature parameters and annotations from query parameters.""" """Build signature parameters and annotations from query parameters."""
signature_params = [] signature_params = []
param_annotations = {} param_annotations = {}
for param_name, param_type, default_value in query_parameters: for param_name, param_type, default_value, _ in query_parameters:
param_annotations[param_name] = param_type param_annotations[param_name] = param_type
signature_params.append( signature_params.append(
inspect.Parameter( inspect.Parameter(
@ -219,6 +222,19 @@ def _is_extra_body_field(metadata_item: Any) -> bool:
return isinstance(metadata_item, ExtraBodyField) return isinstance(metadata_item, ExtraBodyField)
def _should_embed_parameter(param_type: Any) -> bool:
"""Determine whether a parameter should be embedded (wrapped) in the request body."""
if get_origin(param_type) is Annotated:
args = get_args(param_type)
metadata = args[1:] if len(args) > 1 else []
for metadata_item in metadata:
if isinstance(metadata_item, FastAPIBody):
# FastAPI treats embed=None as False, so default to False when unset.
return bool(metadata_item.embed)
# Unannotated parameters default to embed=True through create_dynamic_typed_route.
return True
def _is_async_iterator_type(type_obj: Any) -> bool: def _is_async_iterator_type(type_obj: Any) -> bool:
"""Check if a type is AsyncIterator or AsyncIterable.""" """Check if a type is AsyncIterator or AsyncIterable."""
from collections.abc import AsyncIterable, AsyncIterator from collections.abc import AsyncIterable, AsyncIterator
@ -282,7 +298,7 @@ def _find_models_for_endpoint(
Returns: Returns:
tuple: (request_model, response_model, query_parameters, file_form_params, streaming_response_model, response_schema_name) tuple: (request_model, response_model, query_parameters, file_form_params, streaming_response_model, response_schema_name)
where query_parameters is a list of (name, type, default_value) tuples where query_parameters is a list of (name, type, default_value, should_embed) tuples
and file_form_params is a list of inspect.Parameter objects for File()/Form() params and file_form_params is a list of inspect.Parameter objects for File()/Form() params
and streaming_response_model is the model for streaming responses (AsyncIterator content) and streaming_response_model is the model for streaming responses (AsyncIterator content)
""" """
@ -299,7 +315,7 @@ def _find_models_for_endpoint(
# Find request model and collect all body parameters # Find request model and collect all body parameters
request_model = None request_model = None
query_parameters = [] query_parameters: list[QueryParameter] = []
file_form_params = [] file_form_params = []
path_params = set() path_params = set()
extra_body_params = [] extra_body_params = []
@ -325,6 +341,7 @@ def _find_models_for_endpoint(
# Check if it's a File() or Form() parameter - these need special handling # Check if it's a File() or Form() parameter - these need special handling
param_type = param.annotation param_type = param.annotation
param_should_embed = _should_embed_parameter(param_type)
if _is_file_or_form_param(param_type): if _is_file_or_form_param(param_type):
# File() and Form() parameters must be in the function signature directly # File() and Form() parameters must be in the function signature directly
# They cannot be part of a Pydantic model # They cannot be part of a Pydantic model
@ -350,30 +367,14 @@ def _find_models_for_endpoint(
# Store as extra body parameter - exclude from request model # Store as extra body parameter - exclude from request model
extra_body_params.append((param_name, base_type, extra_body_description)) extra_body_params.append((param_name, base_type, extra_body_description))
continue continue
param_type = base_type
# Check if it's a Pydantic model (for POST/PUT requests) # Check if it's a Pydantic model (for POST/PUT requests)
if hasattr(param_type, "model_json_schema"): if hasattr(param_type, "model_json_schema"):
# Collect all body parameters including Pydantic models query_parameters.append((param_name, param_type, param.default, param_should_embed))
# We'll decide later whether to use a single model or create a combined one
query_parameters.append((param_name, param_type, param.default))
elif get_origin(param_type) is Annotated:
# Handle Annotated types - get the base type
args = get_args(param_type)
if args and hasattr(args[0], "model_json_schema"):
# Collect Pydantic models from Annotated types
query_parameters.append((param_name, args[0], param.default))
else: else:
# Regular annotated parameter (but not File/Form, already handled above) # Regular annotated parameter (but not File/Form, already handled above)
query_parameters.append((param_name, param_type, param.default)) query_parameters.append((param_name, param_type, param.default, param_should_embed))
else:
# This is likely a body parameter for POST/PUT or query parameter for GET
# Store the parameter info for later use
# Preserve inspect.Parameter.empty to distinguish "no default" from "default=None"
default_value = param.default
# Extract the base type from union types (e.g., str | None -> str)
# Also make it safe for FastAPI to avoid forward reference issues
query_parameters.append((param_name, param_type, default_value))
# Store extra body fields for later use in post-processing # Store extra body fields for later use in post-processing
# We'll store them when the endpoint is created, as we need the full path # We'll store them when the endpoint is created, as we need the full path
@ -385,8 +386,8 @@ def _find_models_for_endpoint(
# Otherwise, we'll create a combined request model from all parameters # Otherwise, we'll create a combined request model from all parameters
# BUT: For GET requests, never create a request body - all parameters should be query parameters # BUT: For GET requests, never create a request body - all parameters should be query parameters
if is_post_put and len(query_parameters) == 1: if is_post_put and len(query_parameters) == 1:
param_name, param_type, default_value = query_parameters[0] param_name, param_type, default_value, should_embed = query_parameters[0]
if hasattr(param_type, "model_json_schema"): if hasattr(param_type, "model_json_schema") and not should_embed:
request_model = param_type request_model = param_type
query_parameters = [] # Clear query_parameters so we use the single model query_parameters = [] # Clear query_parameters so we use the single model
@ -495,7 +496,7 @@ def _create_fastapi_endpoint(app: FastAPI, route, webmethod, api: Api):
if file_form_params and is_post_put: if file_form_params and is_post_put:
signature_params = list(file_form_params) signature_params = list(file_form_params)
param_annotations = {param.name: param.annotation for param in file_form_params} param_annotations = {param.name: param.annotation for param in file_form_params}
for param_name, param_type, default_value in query_parameters: for param_name, param_type, default_value, _ in query_parameters:
signature_params.append( signature_params.append(
inspect.Parameter( inspect.Parameter(
param_name, param_name,

View file

@ -4,4 +4,4 @@
# This source code is licensed under the terms described in the LICENSE file in # This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree. # the root directory of this source tree.
from .kvstore import * # noqa: F401, F403 # Package marker for Stainless config generation.

View file

@ -0,0 +1,821 @@
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree.
from __future__ import annotations
from collections.abc import Iterator
from dataclasses import dataclass, field
from pathlib import Path
from typing import Any
import yaml
HEADER = "# yaml-language-server: $schema=https://app.stainlessapi.com/config-internal.schema.json\n\n"
SECTION_ORDER = [
"organization",
"security",
"security_schemes",
"targets",
"client_settings",
"environments",
"pagination",
"settings",
"openapi",
"readme",
"resources",
]
ORGANIZATION = {
"name": "llama-stack-client",
"docs": "https://llama-stack.readthedocs.io/en/latest/",
"contact": "llamastack@meta.com",
}
SECURITY = [{}, {"BearerAuth": []}]
SECURITY_SCHEMES = {"BearerAuth": {"type": "http", "scheme": "bearer"}}
TARGETS = {
"node": {
"package_name": "llama-stack-client",
"production_repo": "llamastack/llama-stack-client-typescript",
"publish": {"npm": False},
},
"python": {
"package_name": "llama_stack_client",
"production_repo": "llamastack/llama-stack-client-python",
"options": {"use_uv": True},
"publish": {"pypi": True},
"project_name": "llama_stack_client",
},
"kotlin": {
"reverse_domain": "com.llama_stack_client.api",
"production_repo": None,
"publish": {"maven": False},
},
"go": {
"package_name": "llama-stack-client",
"production_repo": "llamastack/llama-stack-client-go",
"options": {"enable_v2": True, "back_compat_use_shared_package": False},
},
}
CLIENT_SETTINGS = {
"default_env_prefix": "LLAMA_STACK_CLIENT",
"opts": {
"api_key": {
"type": "string",
"read_env": "LLAMA_STACK_CLIENT_API_KEY",
"auth": {"security_scheme": "BearerAuth"},
"nullable": True,
}
},
}
ENVIRONMENTS = {"production": "http://any-hosted-llama-stack.com"}
PAGINATION = [
{
"name": "datasets_iterrows",
"type": "offset",
"request": {
"dataset_id": {"type": "string"},
"start_index": {
"type": "integer",
"x-stainless-pagination-property": {"purpose": "offset_count_param"},
},
"limit": {"type": "integer"},
},
"response": {
"data": {"type": "array", "items": {"type": "object"}},
"next_index": {
"type": "integer",
"x-stainless-pagination-property": {"purpose": "offset_count_start_field"},
},
},
},
{
"name": "openai_cursor_page",
"type": "cursor",
"request": {
"limit": {"type": "integer"},
"after": {
"type": "string",
"x-stainless-pagination-property": {"purpose": "next_cursor_param"},
},
},
"response": {
"data": {"type": "array", "items": {}},
"has_more": {"type": "boolean"},
"last_id": {
"type": "string",
"x-stainless-pagination-property": {"purpose": "next_cursor_field"},
},
},
},
]
SETTINGS = {
"license": "MIT",
"unwrap_response_fields": ["data"],
"file_header": "Copyright (c) Meta Platforms, Inc. and affiliates.\n"
"All rights reserved.\n"
"\n"
"This source code is licensed under the terms described in the "
"LICENSE file in\n"
"the root directory of this source tree.\n",
}
OPENAPI = {
"transformations": [
{
"command": "mergeObject",
"reason": "Better return_type using enum",
"args": {
"target": ["$.components.schemas"],
"object": {
"ReturnType": {
"additionalProperties": False,
"properties": {
"type": {
"enum": [
"string",
"number",
"boolean",
"array",
"object",
"json",
"union",
"chat_completion_input",
"completion_input",
"agent_turn_input",
]
}
},
"required": ["type"],
"type": "object",
}
},
},
},
{
"command": "replaceProperties",
"reason": "Replace return type properties with better model (see above)",
"args": {
"filter": {
"only": [
"$.components.schemas.ScoringFn.properties.return_type",
"$.components.schemas.RegisterScoringFunctionRequest.properties.return_type",
]
},
"value": {"$ref": "#/components/schemas/ReturnType"},
},
},
{
"command": "oneOfToAnyOf",
"reason": "Prism (mock server) doesn't like one of our "
"requests as it technically matches multiple "
"variants",
},
]
}
README = {
"example_requests": {
"default": {
"type": "request",
"endpoint": "post /v1/chat/completions",
"params": {},
},
"headline": {"type": "request", "endpoint": "get /v1/models", "params": {}},
"pagination": {
"type": "request",
"endpoint": "post /v1/chat/completions",
"params": {},
},
}
}
ALL_RESOURCES = {
"$shared": {
"models": {
"interleaved_content_item": "InterleavedContentItem",
"interleaved_content": "InterleavedContent",
"param_type": "ParamType",
"safety_violation": "SafetyViolation",
"sampling_params": "SamplingParams",
"scoring_result": "ScoringResult",
"system_message": "SystemMessage",
}
},
"toolgroups": {
"models": {
"tool_group": "ToolGroup",
"list_tool_groups_response": "ListToolGroupsResponse",
},
"methods": {
"register": "post /v1/toolgroups",
"get": "get /v1/toolgroups/{toolgroup_id}",
"list": "get /v1/toolgroups",
"unregister": "delete /v1/toolgroups/{toolgroup_id}",
},
},
"tools": {
"methods": {
"get": "get /v1/tools/{tool_name}",
"list": {"paginated": False, "endpoint": "get /v1/tools"},
}
},
"tool_runtime": {
"models": {
"tool_def": "ToolDef",
"tool_invocation_result": "ToolInvocationResult",
},
"methods": {
"list_tools": {
"paginated": False,
"endpoint": "get /v1/tool-runtime/list-tools",
},
"invoke_tool": "post /v1/tool-runtime/invoke",
},
},
"responses": {
"models": {
"response_object_stream": "OpenAIResponseObjectStream",
"response_object": "OpenAIResponseObject",
},
"methods": {
"create": {
"type": "http",
"streaming": {
"stream_event_model": "responses.response_object_stream",
"param_discriminator": "stream",
},
"endpoint": "post /v1/responses",
},
"retrieve": "get /v1/responses/{response_id}",
"list": {"type": "http", "endpoint": "get /v1/responses"},
"delete": {
"type": "http",
"endpoint": "delete /v1/responses/{response_id}",
},
},
"subresources": {
"input_items": {
"methods": {
"list": {
"type": "http",
"paginated": False,
"endpoint": "get /v1/responses/{response_id}/input_items",
}
}
}
},
},
"prompts": {
"models": {"prompt": "Prompt", "list_prompts_response": "ListPromptsResponse"},
"methods": {
"create": "post /v1/prompts",
"list": {"paginated": False, "endpoint": "get /v1/prompts"},
"retrieve": "get /v1/prompts/{prompt_id}",
"update": "post /v1/prompts/{prompt_id}",
"delete": "delete /v1/prompts/{prompt_id}",
"set_default_version": "post /v1/prompts/{prompt_id}/set-default-version",
},
"subresources": {
"versions": {
"methods": {
"list": {
"paginated": False,
"endpoint": "get /v1/prompts/{prompt_id}/versions",
}
}
}
},
},
"conversations": {
"models": {"conversation_object": "Conversation"},
"methods": {
"create": {"type": "http", "endpoint": "post /v1/conversations"},
"retrieve": "get /v1/conversations/{conversation_id}",
"update": {
"type": "http",
"endpoint": "post /v1/conversations/{conversation_id}",
},
"delete": {
"type": "http",
"endpoint": "delete /v1/conversations/{conversation_id}",
},
},
"subresources": {
"items": {
"methods": {
"get": {
"type": "http",
"endpoint": "get /v1/conversations/{conversation_id}/items/{item_id}",
},
"list": {
"type": "http",
"endpoint": "get /v1/conversations/{conversation_id}/items",
},
"create": {
"type": "http",
"endpoint": "post /v1/conversations/{conversation_id}/items",
},
"delete": {
"type": "http",
"endpoint": "delete /v1/conversations/{conversation_id}/items/{item_id}",
},
}
}
},
},
"inspect": {
"models": {
"healthInfo": "HealthInfo",
"providerInfo": "ProviderInfo",
"routeInfo": "RouteInfo",
"versionInfo": "VersionInfo",
},
"methods": {"health": "get /v1/health", "version": "get /v1/version"},
},
"embeddings": {
"models": {"create_embeddings_response": "OpenAIEmbeddingsResponse"},
"methods": {"create": "post /v1/embeddings"},
},
"chat": {
"models": {"chat_completion_chunk": "OpenAIChatCompletionChunk"},
"subresources": {
"completions": {
"methods": {
"create": {
"type": "http",
"streaming": {
"stream_event_model": "chat.chat_completion_chunk",
"param_discriminator": "stream",
},
"endpoint": "post /v1/chat/completions",
},
"list": {
"type": "http",
"paginated": False,
"endpoint": "get /v1/chat/completions",
},
"retrieve": {
"type": "http",
"endpoint": "get /v1/chat/completions/{completion_id}",
},
}
}
},
},
"completions": {
"methods": {
"create": {
"type": "http",
"streaming": {"param_discriminator": "stream"},
"endpoint": "post /v1/completions",
}
}
},
"vector_io": {
"models": {"queryChunksResponse": "QueryChunksResponse"},
"methods": {
"insert": "post /v1/vector-io/insert",
"query": "post /v1/vector-io/query",
},
},
"vector_stores": {
"models": {
"vector_store": "VectorStoreObject",
"list_vector_stores_response": "VectorStoreListResponse",
"vector_store_delete_response": "VectorStoreDeleteResponse",
"vector_store_search_response": "VectorStoreSearchResponsePage",
},
"methods": {
"create": "post /v1/vector_stores",
"list": "get /v1/vector_stores",
"retrieve": "get /v1/vector_stores/{vector_store_id}",
"update": "post /v1/vector_stores/{vector_store_id}",
"delete": "delete /v1/vector_stores/{vector_store_id}",
"search": "post /v1/vector_stores/{vector_store_id}/search",
},
"subresources": {
"files": {
"models": {"vector_store_file": "VectorStoreFileObject"},
"methods": {
"list": "get /v1/vector_stores/{vector_store_id}/files",
"retrieve": "get /v1/vector_stores/{vector_store_id}/files/{file_id}",
"update": "post /v1/vector_stores/{vector_store_id}/files/{file_id}",
"delete": "delete /v1/vector_stores/{vector_store_id}/files/{file_id}",
"create": "post /v1/vector_stores/{vector_store_id}/files",
"content": "get /v1/vector_stores/{vector_store_id}/files/{file_id}/content",
},
},
"file_batches": {
"models": {
"vector_store_file_batches": "VectorStoreFileBatchObject",
"list_vector_store_files_in_batch_response": "VectorStoreFilesListInBatchResponse",
},
"methods": {
"create": "post /v1/vector_stores/{vector_store_id}/file_batches",
"retrieve": "get /v1/vector_stores/{vector_store_id}/file_batches/{batch_id}",
"list_files": "get /v1/vector_stores/{vector_store_id}/file_batches/{batch_id}/files",
"cancel": "post /v1/vector_stores/{vector_store_id}/file_batches/{batch_id}/cancel",
},
},
},
},
"models": {
"models": {
"model": "OpenAIModel",
"list_models_response": "OpenAIListModelsResponse",
},
"methods": {
"list": {"paginated": False, "endpoint": "get /v1/models"},
"retrieve": "get /v1/models/{model_id}",
"register": "post /v1/models",
"unregister": "delete /v1/models/{model_id}",
},
"subresources": {"openai": {"methods": {"list": {"paginated": False, "endpoint": "get /v1/models"}}}},
},
"providers": {
"models": {"list_providers_response": "ListProvidersResponse"},
"methods": {
"list": {"paginated": False, "endpoint": "get /v1/providers"},
"retrieve": "get /v1/providers/{provider_id}",
},
},
"routes": {
"models": {"list_routes_response": "ListRoutesResponse"},
"methods": {"list": {"paginated": False, "endpoint": "get /v1/inspect/routes"}},
},
"moderations": {
"models": {"create_response": "ModerationObject"},
"methods": {"create": "post /v1/moderations"},
},
"safety": {
"models": {"run_shield_response": "RunShieldResponse"},
"methods": {"run_shield": "post /v1/safety/run-shield"},
},
"shields": {
"models": {"shield": "Shield", "list_shields_response": "ListShieldsResponse"},
"methods": {
"retrieve": "get /v1/shields/{identifier}",
"list": {"paginated": False, "endpoint": "get /v1/shields"},
"register": "post /v1/shields",
"delete": "delete /v1/shields/{identifier}",
},
},
"scoring": {
"methods": {
"score": "post /v1/scoring/score",
"score_batch": "post /v1/scoring/score-batch",
}
},
"scoring_functions": {
"models": {
"scoring_fn": "ScoringFn",
"scoring_fn_params": "ScoringFnParams",
"list_scoring_functions_response": "ListScoringFunctionsResponse",
},
"methods": {
"retrieve": "get /v1/scoring-functions/{scoring_fn_id}",
"list": {"paginated": False, "endpoint": "get /v1/scoring-functions"},
"register": "post /v1/scoring-functions",
"unregister": "delete /v1/scoring-functions/{scoring_fn_id}",
},
},
"files": {
"models": {
"file": "OpenAIFileObject",
"list_files_response": "ListOpenAIFileResponse",
"delete_file_response": "OpenAIFileDeleteResponse",
},
"methods": {
"create": "post /v1/files",
"list": "get /v1/files",
"retrieve": "get /v1/files/{file_id}",
"delete": "delete /v1/files/{file_id}",
"content": "get /v1/files/{file_id}/content",
},
},
"batches": {
"methods": {
"create": "post /v1/batches",
"list": "get /v1/batches",
"retrieve": "get /v1/batches/{batch_id}",
"cancel": "post /v1/batches/{batch_id}/cancel",
}
},
"alpha": {
"subresources": {
"inference": {"methods": {"rerank": "post /v1alpha/inference/rerank"}},
"post_training": {
"models": {
"algorithm_config": "AlgorithmConfig",
"post_training_job": "PostTrainingJob",
"list_post_training_jobs_response": "ListPostTrainingJobsResponse",
},
"methods": {
"preference_optimize": "post /v1alpha/post-training/preference-optimize",
"supervised_fine_tune": "post /v1alpha/post-training/supervised-fine-tune",
},
"subresources": {
"job": {
"methods": {
"artifacts": "get /v1alpha/post-training/job/artifacts",
"cancel": "post /v1alpha/post-training/job/cancel",
"status": "get /v1alpha/post-training/job/status",
"list": {
"paginated": False,
"endpoint": "get /v1alpha/post-training/jobs",
},
}
}
},
},
"benchmarks": {
"models": {
"benchmark": "Benchmark",
"list_benchmarks_response": "ListBenchmarksResponse",
},
"methods": {
"retrieve": "get /v1alpha/eval/benchmarks/{benchmark_id}",
"list": {
"paginated": False,
"endpoint": "get /v1alpha/eval/benchmarks",
},
"register": "post /v1alpha/eval/benchmarks",
"unregister": "delete /v1alpha/eval/benchmarks/{benchmark_id}",
},
},
"eval": {
"models": {
"evaluate_response": "EvaluateResponse",
"benchmark_config": "BenchmarkConfig",
"job": "Job",
},
"methods": {
"evaluate_rows": "post /v1alpha/eval/benchmarks/{benchmark_id}/evaluations",
"run_eval": "post /v1alpha/eval/benchmarks/{benchmark_id}/jobs",
"evaluate_rows_alpha": "post /v1alpha/eval/benchmarks/{benchmark_id}/evaluations",
"run_eval_alpha": "post /v1alpha/eval/benchmarks/{benchmark_id}/jobs",
},
"subresources": {
"jobs": {
"methods": {
"cancel": "delete /v1alpha/eval/benchmarks/{benchmark_id}/jobs/{job_id}",
"status": "get /v1alpha/eval/benchmarks/{benchmark_id}/jobs/{job_id}",
"retrieve": "get /v1alpha/eval/benchmarks/{benchmark_id}/jobs/{job_id}/result",
}
}
},
},
}
},
"beta": {
"subresources": {
"datasets": {
"models": {"list_datasets_response": "ListDatasetsResponse"},
"methods": {
"register": "post /v1beta/datasets",
"retrieve": "get /v1beta/datasets/{dataset_id}",
"list": {"paginated": False, "endpoint": "get /v1beta/datasets"},
"unregister": "delete /v1beta/datasets/{dataset_id}",
"iterrows": "get /v1beta/datasetio/iterrows/{dataset_id}",
"appendrows": "post /v1beta/datasetio/append-rows/{dataset_id}",
},
}
}
},
}
HTTP_METHODS = {"get", "post", "put", "patch", "delete", "options", "head"}
@dataclass
class Endpoint:
method: str
path: str
extra: dict[str, Any] = field(default_factory=dict)
@classmethod
def from_config(cls, value: Any) -> Endpoint:
if isinstance(value, str):
method, _, path = value.partition(" ")
return cls._from_parts(method, path)
if isinstance(value, dict) and "endpoint" in value:
method, _, path = value["endpoint"].partition(" ")
extra = {k: v for k, v in value.items() if k != "endpoint"}
endpoint = cls._from_parts(method, path)
endpoint.extra.update(extra)
return endpoint
raise ValueError(f"Unsupported endpoint value: {value!r}")
@classmethod
def _from_parts(cls, method: str, path: str) -> Endpoint:
method = method.strip().lower()
path = path.strip()
if method not in HTTP_METHODS:
raise ValueError(f"Unsupported HTTP method for Stainless config: {method!r}")
if not path.startswith("/"):
raise ValueError(f"Endpoint path must start with '/': {path!r}")
return cls(method=method, path=path)
def to_config(self) -> Any:
if not self.extra:
return f"{self.method} {self.path}"
data = dict(self.extra)
data["endpoint"] = f"{self.method} {self.path}"
return data
def route_key(self) -> str:
return f"{self.method} {self.path}"
@dataclass
class Resource:
models: dict[str, str] | None = None
methods: dict[str, Endpoint] = field(default_factory=dict)
subresources: dict[str, Resource] = field(default_factory=dict)
@classmethod
def from_dict(cls, data: dict[str, Any]) -> Resource:
models = data.get("models")
methods = {name: Endpoint.from_config(value) for name, value in data.get("methods", {}).items()}
subresources = {name: cls.from_dict(value) for name, value in data.get("subresources", {}).items()}
return cls(models=models, methods=methods, subresources=subresources)
def to_config(self) -> dict[str, Any]:
result: dict[str, Any] = {}
if self.models:
result["models"] = self.models
if self.methods:
result["methods"] = {name: endpoint.to_config() for name, endpoint in self.methods.items()}
if self.subresources:
result["subresources"] = {name: resource.to_config() for name, resource in self.subresources.items()}
return result
def collect_endpoint_paths(self) -> set[str]:
paths = {endpoint.route_key() for endpoint in self.methods.values()}
for subresource in self.subresources.values():
paths.update(subresource.collect_endpoint_paths())
return paths
def iter_endpoints(self, prefix: str) -> Iterator[tuple[str, str]]:
for method_name, endpoint in self.methods.items():
label = f"{prefix}.{method_name}" if prefix else method_name
yield endpoint.route_key(), label
for sub_name, subresource in self.subresources.items():
sub_prefix = f"{prefix}.{sub_name}" if prefix else sub_name
yield from subresource.iter_endpoints(sub_prefix)
_RESOURCES = {name: Resource.from_dict(data) for name, data in ALL_RESOURCES.items()}
def _load_openapi_paths(openapi_path: Path) -> set[str]:
spec = yaml.safe_load(openapi_path.read_text()) or {}
paths: set[str] = set()
for path, path_item in (spec.get("paths") or {}).items():
if not isinstance(path_item, dict):
continue
for method, operation in path_item.items():
if not isinstance(operation, dict):
continue
paths.add(f"{str(method).lower()} {path}")
return paths
@dataclass(frozen=True)
class StainlessConfig:
organization: dict[str, Any]
security: list[Any]
security_schemes: dict[str, Any]
targets: dict[str, Any]
client_settings: dict[str, Any]
environments: dict[str, Any]
pagination: list[dict[str, Any]]
settings: dict[str, Any]
openapi: dict[str, Any]
readme: dict[str, Any]
resources: dict[str, Resource]
@classmethod
def make(cls) -> StainlessConfig:
return cls(
organization=ORGANIZATION,
security=SECURITY,
security_schemes=SECURITY_SCHEMES,
targets=TARGETS,
client_settings=CLIENT_SETTINGS,
environments=ENVIRONMENTS,
pagination=PAGINATION,
settings=SETTINGS,
openapi=OPENAPI,
readme=README,
resources=dict(_RESOURCES),
)
def referenced_paths(self) -> set[str]:
paths: set[str] = set()
for resource in self.resources.values():
paths.update(resource.collect_endpoint_paths())
paths.update(self.readme_endpoint_paths())
return paths
def readme_endpoint_paths(self) -> set[str]:
example_requests = self.readme.get("example_requests", {}) if self.readme else {}
paths: set[str] = set()
for entry in example_requests.values():
endpoint = entry.get("endpoint") if isinstance(entry, dict) else None
if isinstance(endpoint, str):
method, _, route = endpoint.partition(" ")
method = method.strip().lower()
route = route.strip()
if method and route:
paths.add(f"{method} {route}")
return paths
def endpoint_map(self) -> dict[str, list[str]]:
mapping: dict[str, list[str]] = {}
for resource_name, resource in self.resources.items():
for route, label in resource.iter_endpoints(resource_name):
mapping.setdefault(route, []).append(label)
return mapping
def validate_unique_endpoints(self) -> None:
duplicates: dict[str, list[str]] = {}
for route, labels in self.endpoint_map().items():
top_levels = {label.split(".", 1)[0] for label in labels}
if len(top_levels) > 1:
duplicates[route] = labels
if duplicates:
formatted = "\n".join(
f" - {route} defined in: {', '.join(sorted(labels))}" for route, labels in sorted(duplicates.items())
)
raise ValueError("Duplicate endpoints found across resources:\n" + formatted)
def validate_readme_endpoints(self) -> None:
resource_paths: set[str] = set()
for resource in self.resources.values():
resource_paths.update(resource.collect_endpoint_paths())
missing = sorted(path for path in self.readme_endpoint_paths() if path not in resource_paths)
if missing:
formatted = "\n".join(f" - {path}" for path in missing)
raise ValueError("README example endpoints are not present in Stainless resources:\n" + formatted)
def to_dict(self) -> dict[str, Any]:
cfg: dict[str, Any] = {}
for section in SECTION_ORDER:
if section == "resources":
cfg[section] = {name: resource.to_config() for name, resource in self.resources.items()}
continue
cfg[section] = getattr(self, section)
return cfg
def validate_against_openapi(self, openapi_path: Path) -> None:
if not openapi_path.exists():
raise FileNotFoundError(f"OpenAPI spec not found at {openapi_path}")
spec_paths = _load_openapi_paths(openapi_path)
config_paths = self.referenced_paths()
missing = sorted(path for path in config_paths if path not in spec_paths)
if missing:
formatted = "\n".join(f" - {path}" for path in missing)
raise ValueError("Stainless config references missing endpoints:\n" + formatted)
def validate(self, openapi_path: Path | None = None) -> None:
self.validate_unique_endpoints()
self.validate_readme_endpoints()
if openapi_path is not None:
self.validate_against_openapi(openapi_path)
def build_config() -> dict[str, Any]:
return StainlessConfig.make().to_dict()
def write_config(repo_root: Path, openapi_path: Path | None = None) -> Path:
stainless_config = StainlessConfig.make()
spec_path = (openapi_path or (repo_root / "client-sdks" / "stainless" / "openapi.yml")).resolve()
stainless_config.validate(spec_path)
yaml_text = yaml.safe_dump(stainless_config.to_dict(), sort_keys=False)
output = repo_root / "client-sdks" / "stainless" / "config.yml"
output.write_text(HEADER + yaml_text)
return output
def main() -> None:
repo_root = Path(__file__).resolve().parents[3]
output = write_config(repo_root)
print(f"Wrote Stainless config: {output}")
if __name__ == "__main__":
main()

View file

@ -8,7 +8,8 @@
import subprocess import subprocess
import sys import sys
from pathlib import Path from pathlib import Path
from typing import Any from types import UnionType
from typing import Annotated, Any, Union, get_args, get_origin
from pydantic_core import PydanticUndefined from pydantic_core import PydanticUndefined
from rich.progress import Progress, SpinnerColumn, TextColumn from rich.progress import Progress, SpinnerColumn, TextColumn
@ -51,6 +52,41 @@ class ChangedPathTracker:
return self._changed_paths return self._changed_paths
def extract_type_annotation(annotation: Any) -> str:
"""extract a type annotation into a clean string representation."""
if annotation is None:
return "Any"
if annotation is type(None):
return "None"
origin = get_origin(annotation)
args = get_args(annotation)
# recursive workaround for Annotated types to ignore FieldInfo part
if origin is Annotated and args:
return extract_type_annotation(args[0])
if origin in [Union, UnionType]:
non_none_args = [arg for arg in args if arg is not type(None)]
has_none = len(non_none_args) < len(args)
if len(non_none_args) == 1:
formatted = extract_type_annotation(non_none_args[0])
return f"{formatted} | None" if has_none else formatted
else:
formatted_args = [extract_type_annotation(arg) for arg in non_none_args]
result = " | ".join(formatted_args)
return f"{result} | None" if has_none else result
if origin is not None and args:
origin_name = getattr(origin, "__name__", str(origin))
formatted_args = [extract_type_annotation(arg) for arg in args]
return f"{origin_name}[{', '.join(formatted_args)}]"
return annotation.__name__ if hasattr(annotation, "__name__") else str(annotation)
def get_config_class_info(config_class_path: str) -> dict[str, Any]: def get_config_class_info(config_class_path: str) -> dict[str, Any]:
"""Extract configuration information from a config class.""" """Extract configuration information from a config class."""
try: try:
@ -78,14 +114,8 @@ def get_config_class_info(config_class_path: str) -> dict[str, Any]:
for field_name, field in config_class.model_fields.items(): for field_name, field in config_class.model_fields.items():
if getattr(field, "exclude", False): if getattr(field, "exclude", False):
continue continue
field_type = str(field.annotation) if field.annotation else "Any"
# this string replace is ridiculous field_type = extract_type_annotation(field.annotation)
field_type = field_type.replace("typing.", "").replace("Optional[", "").replace("]", "")
field_type = field_type.replace("Annotated[", "").replace("FieldInfo(", "").replace(")", "")
field_type = field_type.replace("llama_stack_api.inference.", "")
field_type = field_type.replace("llama_stack.providers.", "")
field_type = field_type.replace("llama_stack_api.datatypes.", "")
default_value = field.default default_value = field.default
if field.default_factory is not None: if field.default_factory is not None:
@ -345,6 +375,14 @@ def generate_index_docs(api_name: str, api_docstring: str | None, provider_entri
# Add YAML frontmatter for index # Add YAML frontmatter for index
md_lines.append("---") md_lines.append("---")
if api_docstring: if api_docstring:
# Handle multi-line descriptions in YAML
if "\n" in api_docstring.strip():
md_lines.append("description: |")
for line in api_docstring.strip().split("\n"):
# Avoid trailing whitespace by only adding spaces to non-empty lines
md_lines.append(f" {line}" if line.strip() else "")
else:
# For single line descriptions, format properly for YAML
clean_desc = api_docstring.strip().replace('"', '\\"') clean_desc = api_docstring.strip().replace('"', '\\"')
md_lines.append(f'description: "{clean_desc}"') md_lines.append(f'description: "{clean_desc}"')
md_lines.append(f"sidebar_label: {sidebar_label}") md_lines.append(f"sidebar_label: {sidebar_label}")

View file

@ -17,3 +17,5 @@ PYTHONPATH=$PYTHONPATH:$stack_dir \
python3 -m scripts.openapi_generator "$stack_dir"/docs/static python3 -m scripts.openapi_generator "$stack_dir"/docs/static
cp "$stack_dir"/docs/static/stainless-llama-stack-spec.yaml "$stack_dir"/client-sdks/stainless/openapi.yml cp "$stack_dir"/docs/static/stainless-llama-stack-spec.yaml "$stack_dir"/client-sdks/stainless/openapi.yml
PYTHONPATH=$PYTHONPATH:$stack_dir \
python3 -m scripts.openapi_generator.stainless_config.generate_config

View file

@ -11,10 +11,9 @@ from typing import Any, Literal
from pydantic import BaseModel, TypeAdapter from pydantic import BaseModel, TypeAdapter
from llama_stack.core.datatypes import AccessRule, StackRunConfig from llama_stack.core.datatypes import AccessRule, StackRunConfig
from llama_stack.core.storage.sqlstore.authorized_sqlstore import AuthorizedSqlStore
from llama_stack.core.storage.sqlstore.sqlstore import sqlstore_impl
from llama_stack.log import get_logger from llama_stack.log import get_logger
from llama_stack.providers.utils.sqlstore.api import ColumnDefinition, ColumnType
from llama_stack.providers.utils.sqlstore.authorized_sqlstore import AuthorizedSqlStore
from llama_stack.providers.utils.sqlstore.sqlstore import sqlstore_impl
from llama_stack_api import ( from llama_stack_api import (
Conversation, Conversation,
ConversationDeletedResource, ConversationDeletedResource,
@ -25,6 +24,7 @@ from llama_stack_api import (
Conversations, Conversations,
Metadata, Metadata,
) )
from llama_stack_api.internal.sqlstore import ColumnDefinition, ColumnType
logger = get_logger(name=__name__, category="openai_conversations") logger = get_logger(name=__name__, category="openai_conversations")

View file

@ -10,7 +10,7 @@ from typing import Any
from pydantic import BaseModel from pydantic import BaseModel
from llama_stack.core.datatypes import StackRunConfig from llama_stack.core.datatypes import StackRunConfig
from llama_stack.providers.utils.kvstore import KVStore, kvstore_impl from llama_stack.core.storage.kvstore import KVStore, kvstore_impl
from llama_stack_api import ListPromptsResponse, Prompt, Prompts from llama_stack_api import ListPromptsResponse, Prompt, Prompts

View file

@ -11,9 +11,9 @@ from datetime import UTC, datetime, timedelta
from starlette.types import ASGIApp, Receive, Scope, Send from starlette.types import ASGIApp, Receive, Scope, Send
from llama_stack.core.storage.datatypes import KVStoreReference, StorageBackendType from llama_stack.core.storage.datatypes import KVStoreReference, StorageBackendType
from llama_stack.core.storage.kvstore.kvstore import _KVSTORE_BACKENDS, kvstore_impl
from llama_stack.log import get_logger from llama_stack.log import get_logger
from llama_stack.providers.utils.kvstore.api import KVStore from llama_stack_api.internal.kvstore import KVStore
from llama_stack.providers.utils.kvstore.kvstore import _KVSTORE_BACKENDS, kvstore_impl
logger = get_logger(name=__name__, category="core::server") logger = get_logger(name=__name__, category="core::server")

View file

@ -385,8 +385,8 @@ def _initialize_storage(run_config: StackRunConfig):
else: else:
raise ValueError(f"Unknown storage backend type: {type}") raise ValueError(f"Unknown storage backend type: {type}")
from llama_stack.providers.utils.kvstore.kvstore import register_kvstore_backends from llama_stack.core.storage.kvstore.kvstore import register_kvstore_backends
from llama_stack.providers.utils.sqlstore.sqlstore import register_sqlstore_backends from llama_stack.core.storage.sqlstore.sqlstore import register_sqlstore_backends
register_kvstore_backends(kv_backends) register_kvstore_backends(kv_backends)
register_sqlstore_backends(sql_backends) register_sqlstore_backends(sql_backends)

View file

@ -12,6 +12,8 @@ from typing import Annotated, Literal
from pydantic import BaseModel, Field, field_validator from pydantic import BaseModel, Field, field_validator
from llama_stack.core.utils.config_dirs import DISTRIBS_BASE_DIR
class StorageBackendType(StrEnum): class StorageBackendType(StrEnum):
KV_REDIS = "kv_redis" KV_REDIS = "kv_redis"
@ -256,15 +258,24 @@ class ResponsesStoreReference(InferenceStoreReference):
class ServerStoresConfig(BaseModel): class ServerStoresConfig(BaseModel):
metadata: KVStoreReference | None = Field( metadata: KVStoreReference | None = Field(
default=None, default=KVStoreReference(
backend="kv_default",
namespace="registry",
),
description="Metadata store configuration (uses KV backend)", description="Metadata store configuration (uses KV backend)",
) )
inference: InferenceStoreReference | None = Field( inference: InferenceStoreReference | None = Field(
default=None, default=InferenceStoreReference(
backend="sql_default",
table_name="inference_store",
),
description="Inference store configuration (uses SQL backend)", description="Inference store configuration (uses SQL backend)",
) )
conversations: SqlStoreReference | None = Field( conversations: SqlStoreReference | None = Field(
default=None, default=SqlStoreReference(
backend="sql_default",
table_name="openai_conversations",
),
description="Conversations store configuration (uses SQL backend)", description="Conversations store configuration (uses SQL backend)",
) )
responses: ResponsesStoreReference | None = Field( responses: ResponsesStoreReference | None = Field(
@ -272,13 +283,21 @@ class ServerStoresConfig(BaseModel):
description="Responses store configuration (uses SQL backend)", description="Responses store configuration (uses SQL backend)",
) )
prompts: KVStoreReference | None = Field( prompts: KVStoreReference | None = Field(
default=None, default=KVStoreReference(backend="kv_default", namespace="prompts"),
description="Prompts store configuration (uses KV backend)", description="Prompts store configuration (uses KV backend)",
) )
class StorageConfig(BaseModel): class StorageConfig(BaseModel):
backends: dict[str, StorageBackendConfig] = Field( backends: dict[str, StorageBackendConfig] = Field(
default={
"kv_default": SqliteKVStoreConfig(
db_path=f"${{env.SQLITE_STORE_DIR:={DISTRIBS_BASE_DIR}}}/kvstore.db",
),
"sql_default": SqliteSqlStoreConfig(
db_path=f"${{env.SQLITE_STORE_DIR:={DISTRIBS_BASE_DIR}}}/sql_store.db",
),
},
description="Named backend configurations (e.g., 'default', 'cache')", description="Named backend configurations (e.g., 'default', 'cache')",
) )
stores: ServerStoresConfig = Field( stores: ServerStoresConfig = Field(

View file

@ -0,0 +1,9 @@
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree.
from llama_stack_api.internal.kvstore import KVStore as KVStore
from .kvstore import * # noqa: F401, F403

View file

@ -13,11 +13,19 @@ from __future__ import annotations
import asyncio import asyncio
from collections import defaultdict from collections import defaultdict
from datetime import datetime
from typing import cast
from llama_stack.core.storage.datatypes import KVStoreReference, StorageBackendConfig, StorageBackendType from llama_stack.core.storage.datatypes import KVStoreReference, StorageBackendConfig
from llama_stack_api.internal.kvstore import KVStore
from .api import KVStore from .config import (
from .config import KVStoreConfig KVStoreConfig,
MongoDBKVStoreConfig,
PostgresKVStoreConfig,
RedisKVStoreConfig,
SqliteKVStoreConfig,
)
def kvstore_dependencies(): def kvstore_dependencies():
@ -33,7 +41,7 @@ def kvstore_dependencies():
class InmemoryKVStoreImpl(KVStore): class InmemoryKVStoreImpl(KVStore):
def __init__(self): def __init__(self):
self._store = {} self._store: dict[str, str] = {}
async def initialize(self) -> None: async def initialize(self) -> None:
pass pass
@ -41,7 +49,7 @@ class InmemoryKVStoreImpl(KVStore):
async def get(self, key: str) -> str | None: async def get(self, key: str) -> str | None:
return self._store.get(key) return self._store.get(key)
async def set(self, key: str, value: str) -> None: async def set(self, key: str, value: str, expiration: datetime | None = None) -> None:
self._store[key] = value self._store[key] = value
async def values_in_range(self, start_key: str, end_key: str) -> list[str]: async def values_in_range(self, start_key: str, end_key: str) -> list[str]:
@ -70,7 +78,8 @@ def register_kvstore_backends(backends: dict[str, StorageBackendConfig]) -> None
_KVSTORE_INSTANCES.clear() _KVSTORE_INSTANCES.clear()
_KVSTORE_LOCKS.clear() _KVSTORE_LOCKS.clear()
for name, cfg in backends.items(): for name, cfg in backends.items():
_KVSTORE_BACKENDS[name] = cfg typed_cfg = cast(KVStoreConfig, cfg)
_KVSTORE_BACKENDS[name] = typed_cfg
async def kvstore_impl(reference: KVStoreReference) -> KVStore: async def kvstore_impl(reference: KVStoreReference) -> KVStore:
@ -94,19 +103,20 @@ async def kvstore_impl(reference: KVStoreReference) -> KVStore:
config = backend_config.model_copy() config = backend_config.model_copy()
config.namespace = reference.namespace config.namespace = reference.namespace
if config.type == StorageBackendType.KV_REDIS.value: impl: KVStore
if isinstance(config, RedisKVStoreConfig):
from .redis import RedisKVStoreImpl from .redis import RedisKVStoreImpl
impl = RedisKVStoreImpl(config) impl = RedisKVStoreImpl(config)
elif config.type == StorageBackendType.KV_SQLITE.value: elif isinstance(config, SqliteKVStoreConfig):
from .sqlite import SqliteKVStoreImpl from .sqlite import SqliteKVStoreImpl
impl = SqliteKVStoreImpl(config) impl = SqliteKVStoreImpl(config)
elif config.type == StorageBackendType.KV_POSTGRES.value: elif isinstance(config, PostgresKVStoreConfig):
from .postgres import PostgresKVStoreImpl from .postgres import PostgresKVStoreImpl
impl = PostgresKVStoreImpl(config) impl = PostgresKVStoreImpl(config)
elif config.type == StorageBackendType.KV_MONGODB.value: elif isinstance(config, MongoDBKVStoreConfig):
from .mongodb import MongoDBKVStoreImpl from .mongodb import MongoDBKVStoreImpl
impl = MongoDBKVStoreImpl(config) impl = MongoDBKVStoreImpl(config)

Some files were not shown because too many files have changed in this diff Show more