Merge branch 'main' into add_max_tool_calls

This commit is contained in:
Ashwin Bharambe 2025-11-10 12:01:32 -08:00 committed by GitHub
commit dfa2210319
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
25 changed files with 1889 additions and 1700 deletions

View file

@ -998,39 +998,6 @@ paths:
description: List models using the OpenAI API. description: List models using the OpenAI API.
parameters: [] parameters: []
deprecated: false deprecated: false
post:
responses:
'200':
description: A Model.
content:
application/json:
schema:
$ref: '#/components/schemas/Model'
'400':
$ref: '#/components/responses/BadRequest400'
'429':
$ref: >-
#/components/responses/TooManyRequests429
'500':
$ref: >-
#/components/responses/InternalServerError500
default:
$ref: '#/components/responses/DefaultError'
tags:
- Models
summary: Register model.
description: >-
Register model.
Register a model.
parameters: []
requestBody:
content:
application/json:
schema:
$ref: '#/components/schemas/RegisterModelRequest'
required: true
deprecated: false
/v1/models/{model_id}: /v1/models/{model_id}:
get: get:
responses: responses:
@ -1065,36 +1032,6 @@ paths:
schema: schema:
type: string type: string
deprecated: false deprecated: false
delete:
responses:
'200':
description: OK
'400':
$ref: '#/components/responses/BadRequest400'
'429':
$ref: >-
#/components/responses/TooManyRequests429
'500':
$ref: >-
#/components/responses/InternalServerError500
default:
$ref: '#/components/responses/DefaultError'
tags:
- Models
summary: Unregister model.
description: >-
Unregister model.
Unregister a model.
parameters:
- name: model_id
in: path
description: >-
The identifier of the model to unregister.
required: true
schema:
type: string
deprecated: false
/v1/moderations: /v1/moderations:
post: post:
responses: responses:
@ -1725,32 +1662,6 @@ paths:
description: List all scoring functions. description: List all scoring functions.
parameters: [] parameters: []
deprecated: false deprecated: false
post:
responses:
'200':
description: OK
'400':
$ref: '#/components/responses/BadRequest400'
'429':
$ref: >-
#/components/responses/TooManyRequests429
'500':
$ref: >-
#/components/responses/InternalServerError500
default:
$ref: '#/components/responses/DefaultError'
tags:
- ScoringFunctions
summary: Register a scoring function.
description: Register a scoring function.
parameters: []
requestBody:
content:
application/json:
schema:
$ref: '#/components/schemas/RegisterScoringFunctionRequest'
required: true
deprecated: false
/v1/scoring-functions/{scoring_fn_id}: /v1/scoring-functions/{scoring_fn_id}:
get: get:
responses: responses:
@ -1782,33 +1693,6 @@ paths:
schema: schema:
type: string type: string
deprecated: false deprecated: false
delete:
responses:
'200':
description: OK
'400':
$ref: '#/components/responses/BadRequest400'
'429':
$ref: >-
#/components/responses/TooManyRequests429
'500':
$ref: >-
#/components/responses/InternalServerError500
default:
$ref: '#/components/responses/DefaultError'
tags:
- ScoringFunctions
summary: Unregister a scoring function.
description: Unregister a scoring function.
parameters:
- name: scoring_fn_id
in: path
description: >-
The ID of the scoring function to unregister.
required: true
schema:
type: string
deprecated: false
/v1/scoring/score: /v1/scoring/score:
post: post:
responses: responses:
@ -1897,36 +1781,6 @@ paths:
description: List all shields. description: List all shields.
parameters: [] parameters: []
deprecated: false deprecated: false
post:
responses:
'200':
description: A Shield.
content:
application/json:
schema:
$ref: '#/components/schemas/Shield'
'400':
$ref: '#/components/responses/BadRequest400'
'429':
$ref: >-
#/components/responses/TooManyRequests429
'500':
$ref: >-
#/components/responses/InternalServerError500
default:
$ref: '#/components/responses/DefaultError'
tags:
- Shields
summary: Register a shield.
description: Register a shield.
parameters: []
requestBody:
content:
application/json:
schema:
$ref: '#/components/schemas/RegisterShieldRequest'
required: true
deprecated: false
/v1/shields/{identifier}: /v1/shields/{identifier}:
get: get:
responses: responses:
@ -1958,33 +1812,6 @@ paths:
schema: schema:
type: string type: string
deprecated: false deprecated: false
delete:
responses:
'200':
description: OK
'400':
$ref: '#/components/responses/BadRequest400'
'429':
$ref: >-
#/components/responses/TooManyRequests429
'500':
$ref: >-
#/components/responses/InternalServerError500
default:
$ref: '#/components/responses/DefaultError'
tags:
- Shields
summary: Unregister a shield.
description: Unregister a shield.
parameters:
- name: identifier
in: path
description: >-
The identifier of the shield to unregister.
required: true
schema:
type: string
deprecated: false
/v1/tool-runtime/invoke: /v1/tool-runtime/invoke:
post: post:
responses: responses:
@ -2080,32 +1907,6 @@ paths:
description: List tool groups with optional provider. description: List tool groups with optional provider.
parameters: [] parameters: []
deprecated: false deprecated: false
post:
responses:
'200':
description: OK
'400':
$ref: '#/components/responses/BadRequest400'
'429':
$ref: >-
#/components/responses/TooManyRequests429
'500':
$ref: >-
#/components/responses/InternalServerError500
default:
$ref: '#/components/responses/DefaultError'
tags:
- ToolGroups
summary: Register a tool group.
description: Register a tool group.
parameters: []
requestBody:
content:
application/json:
schema:
$ref: '#/components/schemas/RegisterToolGroupRequest'
required: true
deprecated: false
/v1/toolgroups/{toolgroup_id}: /v1/toolgroups/{toolgroup_id}:
get: get:
responses: responses:
@ -2137,32 +1938,6 @@ paths:
schema: schema:
type: string type: string
deprecated: false deprecated: false
delete:
responses:
'200':
description: OK
'400':
$ref: '#/components/responses/BadRequest400'
'429':
$ref: >-
#/components/responses/TooManyRequests429
'500':
$ref: >-
#/components/responses/InternalServerError500
default:
$ref: '#/components/responses/DefaultError'
tags:
- ToolGroups
summary: Unregister a tool group.
description: Unregister a tool group.
parameters:
- name: toolgroup_id
in: path
description: The ID of the tool group to unregister.
required: true
schema:
type: string
deprecated: false
/v1/tools: /v1/tools:
get: get:
responses: responses:
@ -2916,11 +2691,11 @@ paths:
responses: responses:
'200': '200':
description: >- description: >-
A list of InterleavedContent representing the file contents. A VectorStoreFileContentResponse representing the file contents.
content: content:
application/json: application/json:
schema: schema:
$ref: '#/components/schemas/VectorStoreFileContentsResponse' $ref: '#/components/schemas/VectorStoreFileContentResponse'
'400': '400':
$ref: '#/components/responses/BadRequest400' $ref: '#/components/responses/BadRequest400'
'429': '429':
@ -3171,7 +2946,7 @@ paths:
schema: schema:
$ref: '#/components/schemas/RegisterDatasetRequest' $ref: '#/components/schemas/RegisterDatasetRequest'
required: true required: true
deprecated: false deprecated: true
/v1beta/datasets/{dataset_id}: /v1beta/datasets/{dataset_id}:
get: get:
responses: responses:
@ -3228,7 +3003,7 @@ paths:
required: true required: true
schema: schema:
type: string type: string
deprecated: false deprecated: true
/v1alpha/eval/benchmarks: /v1alpha/eval/benchmarks:
get: get:
responses: responses:
@ -3279,7 +3054,7 @@ paths:
schema: schema:
$ref: '#/components/schemas/RegisterBenchmarkRequest' $ref: '#/components/schemas/RegisterBenchmarkRequest'
required: true required: true
deprecated: false deprecated: true
/v1alpha/eval/benchmarks/{benchmark_id}: /v1alpha/eval/benchmarks/{benchmark_id}:
get: get:
responses: responses:
@ -3336,7 +3111,7 @@ paths:
required: true required: true
schema: schema:
type: string type: string
deprecated: false deprecated: true
/v1alpha/eval/benchmarks/{benchmark_id}/evaluations: /v1alpha/eval/benchmarks/{benchmark_id}/evaluations:
post: post:
responses: responses:
@ -6280,46 +6055,6 @@ components:
required: required:
- data - data
title: OpenAIListModelsResponse title: OpenAIListModelsResponse
ModelType:
type: string
enum:
- llm
- embedding
- rerank
title: ModelType
description: >-
Enumeration of supported model types in Llama Stack.
RegisterModelRequest:
type: object
properties:
model_id:
type: string
description: The identifier of the model to register.
provider_model_id:
type: string
description: >-
The identifier of the model in the provider.
provider_id:
type: string
description: The identifier of the provider.
metadata:
type: object
additionalProperties:
oneOf:
- type: 'null'
- type: boolean
- type: number
- type: string
- type: array
- type: object
description: Any additional metadata for this model.
model_type:
$ref: '#/components/schemas/ModelType'
description: The type of model to register.
additionalProperties: false
required:
- model_id
title: RegisterModelRequest
Model: Model:
type: object type: object
properties: properties:
@ -6377,6 +6112,15 @@ components:
title: Model title: Model
description: >- description: >-
A model resource representing an AI model registered in Llama Stack. A model resource representing an AI model registered in Llama Stack.
ModelType:
type: string
enum:
- llm
- embedding
- rerank
title: ModelType
description: >-
Enumeration of supported model types in Llama Stack.
RunModerationRequest: RunModerationRequest:
type: object type: object
properties: properties:
@ -9130,61 +8874,6 @@ components:
required: required:
- data - data
title: ListScoringFunctionsResponse title: ListScoringFunctionsResponse
ParamType:
oneOf:
- $ref: '#/components/schemas/StringType'
- $ref: '#/components/schemas/NumberType'
- $ref: '#/components/schemas/BooleanType'
- $ref: '#/components/schemas/ArrayType'
- $ref: '#/components/schemas/ObjectType'
- $ref: '#/components/schemas/JsonType'
- $ref: '#/components/schemas/UnionType'
- $ref: '#/components/schemas/ChatCompletionInputType'
- $ref: '#/components/schemas/CompletionInputType'
discriminator:
propertyName: type
mapping:
string: '#/components/schemas/StringType'
number: '#/components/schemas/NumberType'
boolean: '#/components/schemas/BooleanType'
array: '#/components/schemas/ArrayType'
object: '#/components/schemas/ObjectType'
json: '#/components/schemas/JsonType'
union: '#/components/schemas/UnionType'
chat_completion_input: '#/components/schemas/ChatCompletionInputType'
completion_input: '#/components/schemas/CompletionInputType'
RegisterScoringFunctionRequest:
type: object
properties:
scoring_fn_id:
type: string
description: >-
The ID of the scoring function to register.
description:
type: string
description: The description of the scoring function.
return_type:
$ref: '#/components/schemas/ParamType'
description: The return type of the scoring function.
provider_scoring_fn_id:
type: string
description: >-
The ID of the provider scoring function to use for the scoring function.
provider_id:
type: string
description: >-
The ID of the provider to use for the scoring function.
params:
$ref: '#/components/schemas/ScoringFnParams'
description: >-
The parameters for the scoring function for benchmark eval, these can
be overridden for app eval.
additionalProperties: false
required:
- scoring_fn_id
- description
- return_type
title: RegisterScoringFunctionRequest
ScoreRequest: ScoreRequest:
type: object type: object
properties: properties:
@ -9360,35 +9049,6 @@ components:
required: required:
- data - data
title: ListShieldsResponse title: ListShieldsResponse
RegisterShieldRequest:
type: object
properties:
shield_id:
type: string
description: >-
The identifier of the shield to register.
provider_shield_id:
type: string
description: >-
The identifier of the shield in the provider.
provider_id:
type: string
description: The identifier of the provider.
params:
type: object
additionalProperties:
oneOf:
- type: 'null'
- type: boolean
- type: number
- type: string
- type: array
- type: object
description: The parameters of the shield.
additionalProperties: false
required:
- shield_id
title: RegisterShieldRequest
InvokeToolRequest: InvokeToolRequest:
type: object type: object
properties: properties:
@ -9649,37 +9309,6 @@ components:
title: ListToolGroupsResponse title: ListToolGroupsResponse
description: >- description: >-
Response containing a list of tool groups. Response containing a list of tool groups.
RegisterToolGroupRequest:
type: object
properties:
toolgroup_id:
type: string
description: The ID of the tool group to register.
provider_id:
type: string
description: >-
The ID of the provider to use for the tool group.
mcp_endpoint:
$ref: '#/components/schemas/URL'
description: >-
The MCP endpoint to use for the tool group.
args:
type: object
additionalProperties:
oneOf:
- type: 'null'
- type: boolean
- type: number
- type: string
- type: array
- type: object
description: >-
A dictionary of arguments to pass to the tool group.
additionalProperties: false
required:
- toolgroup_id
- provider_id
title: RegisterToolGroupRequest
Chunk: Chunk:
type: object type: object
properties: properties:
@ -10480,41 +10109,35 @@ components:
title: VectorStoreContent title: VectorStoreContent
description: >- description: >-
Content item from a vector store file or search result. Content item from a vector store file or search result.
VectorStoreFileContentsResponse: VectorStoreFileContentResponse:
type: object type: object
properties: properties:
file_id: object:
type: string type: string
description: Unique identifier for the file const: vector_store.file_content.page
filename: default: vector_store.file_content.page
type: string
description: Name of the file
attributes:
type: object
additionalProperties:
oneOf:
- type: 'null'
- type: boolean
- type: number
- type: string
- type: array
- type: object
description: >- description: >-
Key-value attributes associated with the file The object type, which is always `vector_store.file_content.page`
content: data:
type: array type: array
items: items:
$ref: '#/components/schemas/VectorStoreContent' $ref: '#/components/schemas/VectorStoreContent'
description: List of content items from the file description: Parsed content of the file
has_more:
type: boolean
description: >-
Indicates if there are more content pages to fetch
next_page:
type: string
description: The token for the next page, if any
additionalProperties: false additionalProperties: false
required: required:
- file_id - object
- filename - data
- attributes - has_more
- content title: VectorStoreFileContentResponse
title: VectorStoreFileContentsResponse
description: >- description: >-
Response from retrieving the contents of a vector store file. Represents the parsed content of a vector store file.
OpenaiSearchVectorStoreRequest: OpenaiSearchVectorStoreRequest:
type: object type: object
properties: properties:
@ -10831,68 +10454,6 @@ components:
- data - data
title: ListDatasetsResponse title: ListDatasetsResponse
description: Response from listing datasets. description: Response from listing datasets.
DataSource:
oneOf:
- $ref: '#/components/schemas/URIDataSource'
- $ref: '#/components/schemas/RowsDataSource'
discriminator:
propertyName: type
mapping:
uri: '#/components/schemas/URIDataSource'
rows: '#/components/schemas/RowsDataSource'
RegisterDatasetRequest:
type: object
properties:
purpose:
type: string
enum:
- post-training/messages
- eval/question-answer
- eval/messages-answer
description: >-
The purpose of the dataset. One of: - "post-training/messages": The dataset
contains a messages column with list of messages for post-training. {
"messages": [ {"role": "user", "content": "Hello, world!"}, {"role": "assistant",
"content": "Hello, world!"}, ] } - "eval/question-answer": The dataset
contains a question column and an answer column for evaluation. { "question":
"What is the capital of France?", "answer": "Paris" } - "eval/messages-answer":
The dataset contains a messages column with list of messages and an answer
column for evaluation. { "messages": [ {"role": "user", "content": "Hello,
my name is John Doe."}, {"role": "assistant", "content": "Hello, John
Doe. How can I help you today?"}, {"role": "user", "content": "What's
my name?"}, ], "answer": "John Doe" }
source:
$ref: '#/components/schemas/DataSource'
description: >-
The data source of the dataset. Ensure that the data source schema is
compatible with the purpose of the dataset. Examples: - { "type": "uri",
"uri": "https://mywebsite.com/mydata.jsonl" } - { "type": "uri", "uri":
"lsfs://mydata.jsonl" } - { "type": "uri", "uri": "data:csv;base64,{base64_content}"
} - { "type": "uri", "uri": "huggingface://llamastack/simpleqa?split=train"
} - { "type": "rows", "rows": [ { "messages": [ {"role": "user", "content":
"Hello, world!"}, {"role": "assistant", "content": "Hello, world!"}, ]
} ] }
metadata:
type: object
additionalProperties:
oneOf:
- type: 'null'
- type: boolean
- type: number
- type: string
- type: array
- type: object
description: >-
The metadata for the dataset. - E.g. {"description": "My dataset"}.
dataset_id:
type: string
description: >-
The ID of the dataset. If not provided, an ID will be generated.
additionalProperties: false
required:
- purpose
- source
title: RegisterDatasetRequest
Benchmark: Benchmark:
type: object type: object
properties: properties:
@ -10960,47 +10521,6 @@ components:
required: required:
- data - data
title: ListBenchmarksResponse title: ListBenchmarksResponse
RegisterBenchmarkRequest:
type: object
properties:
benchmark_id:
type: string
description: The ID of the benchmark to register.
dataset_id:
type: string
description: >-
The ID of the dataset to use for the benchmark.
scoring_functions:
type: array
items:
type: string
description: >-
The scoring functions to use for the benchmark.
provider_benchmark_id:
type: string
description: >-
The ID of the provider benchmark to use for the benchmark.
provider_id:
type: string
description: >-
The ID of the provider to use for the benchmark.
metadata:
type: object
additionalProperties:
oneOf:
- type: 'null'
- type: boolean
- type: number
- type: string
- type: array
- type: object
description: The metadata to use for the benchmark.
additionalProperties: false
required:
- benchmark_id
- dataset_id
- scoring_functions
title: RegisterBenchmarkRequest
BenchmarkConfig: BenchmarkConfig:
type: object type: object
properties: properties:
@ -11862,6 +11382,109 @@ components:
- hyperparam_search_config - hyperparam_search_config
- logger_config - logger_config
title: SupervisedFineTuneRequest title: SupervisedFineTuneRequest
DataSource:
oneOf:
- $ref: '#/components/schemas/URIDataSource'
- $ref: '#/components/schemas/RowsDataSource'
discriminator:
propertyName: type
mapping:
uri: '#/components/schemas/URIDataSource'
rows: '#/components/schemas/RowsDataSource'
RegisterDatasetRequest:
type: object
properties:
purpose:
type: string
enum:
- post-training/messages
- eval/question-answer
- eval/messages-answer
description: >-
The purpose of the dataset. One of: - "post-training/messages": The dataset
contains a messages column with list of messages for post-training. {
"messages": [ {"role": "user", "content": "Hello, world!"}, {"role": "assistant",
"content": "Hello, world!"}, ] } - "eval/question-answer": The dataset
contains a question column and an answer column for evaluation. { "question":
"What is the capital of France?", "answer": "Paris" } - "eval/messages-answer":
The dataset contains a messages column with list of messages and an answer
column for evaluation. { "messages": [ {"role": "user", "content": "Hello,
my name is John Doe."}, {"role": "assistant", "content": "Hello, John
Doe. How can I help you today?"}, {"role": "user", "content": "What's
my name?"}, ], "answer": "John Doe" }
source:
$ref: '#/components/schemas/DataSource'
description: >-
The data source of the dataset. Ensure that the data source schema is
compatible with the purpose of the dataset. Examples: - { "type": "uri",
"uri": "https://mywebsite.com/mydata.jsonl" } - { "type": "uri", "uri":
"lsfs://mydata.jsonl" } - { "type": "uri", "uri": "data:csv;base64,{base64_content}"
} - { "type": "uri", "uri": "huggingface://llamastack/simpleqa?split=train"
} - { "type": "rows", "rows": [ { "messages": [ {"role": "user", "content":
"Hello, world!"}, {"role": "assistant", "content": "Hello, world!"}, ]
} ] }
metadata:
type: object
additionalProperties:
oneOf:
- type: 'null'
- type: boolean
- type: number
- type: string
- type: array
- type: object
description: >-
The metadata for the dataset. - E.g. {"description": "My dataset"}.
dataset_id:
type: string
description: >-
The ID of the dataset. If not provided, an ID will be generated.
additionalProperties: false
required:
- purpose
- source
title: RegisterDatasetRequest
RegisterBenchmarkRequest:
type: object
properties:
benchmark_id:
type: string
description: The ID of the benchmark to register.
dataset_id:
type: string
description: >-
The ID of the dataset to use for the benchmark.
scoring_functions:
type: array
items:
type: string
description: >-
The scoring functions to use for the benchmark.
provider_benchmark_id:
type: string
description: >-
The ID of the provider benchmark to use for the benchmark.
provider_id:
type: string
description: >-
The ID of the provider to use for the benchmark.
metadata:
type: object
additionalProperties:
oneOf:
- type: 'null'
- type: boolean
- type: number
- type: string
- type: array
- type: object
description: The metadata to use for the benchmark.
additionalProperties: false
required:
- benchmark_id
- dataset_id
- scoring_functions
title: RegisterBenchmarkRequest
responses: responses:
BadRequest400: BadRequest400:
description: The request was invalid or malformed description: The request was invalid or malformed

View file

@ -10,7 +10,7 @@ import TabItem from '@theme/TabItem';
# Kubernetes Deployment Guide # Kubernetes Deployment Guide
Deploy Llama Stack and vLLM servers in a Kubernetes cluster instead of running them locally. This guide covers both local development with Kind and production deployment on AWS EKS. Deploy Llama Stack and vLLM servers in a Kubernetes cluster instead of running them locally. This guide covers deployment using the Kubernetes operator to manage the Llama Stack server with Kind. The vLLM inference server is deployed manually.
## Prerequisites ## Prerequisites
@ -110,115 +110,176 @@ spec:
EOF EOF
``` ```
### Step 3: Configure Llama Stack ### Step 3: Install Kubernetes Operator
Update your run configuration: Install the Llama Stack Kubernetes operator to manage Llama Stack deployments:
```yaml
providers:
inference:
- provider_id: vllm
provider_type: remote::vllm
config:
url: http://vllm-server.default.svc.cluster.local:8000/v1
max_tokens: 4096
api_token: fake
```
Build container image:
```bash ```bash
tmp_dir=$(mktemp -d) && cat >$tmp_dir/Containerfile.llama-stack-run-k8s <<EOF # Install from the latest main branch
FROM distribution-myenv:dev kubectl apply -f https://raw.githubusercontent.com/llamastack/llama-stack-k8s-operator/main/release/operator.yaml
RUN apt-get update && apt-get install -y git
RUN git clone https://github.com/meta-llama/llama-stack.git /app/llama-stack-source # Or install a specific version (e.g., v0.4.0)
ADD ./vllm-llama-stack-run-k8s.yaml /app/config.yaml # kubectl apply -f https://raw.githubusercontent.com/llamastack/llama-stack-k8s-operator/v0.4.0/release/operator.yaml
EOF
podman build -f $tmp_dir/Containerfile.llama-stack-run-k8s -t llama-stack-run-k8s $tmp_dir
``` ```
### Step 4: Deploy Llama Stack Server Verify the operator is running:
```bash
kubectl get pods -n llama-stack-operator-system
```
For more information about the operator, see the [llama-stack-k8s-operator repository](https://github.com/llamastack/llama-stack-k8s-operator).
### Step 4: Deploy Llama Stack Server using Operator
Create a `LlamaStackDistribution` custom resource to deploy the Llama Stack server. The operator will automatically create the necessary Deployment, Service, and other resources.
You can optionally override the default `run.yaml` using `spec.server.userConfig` with a ConfigMap (see [userConfig spec](https://github.com/llamastack/llama-stack-k8s-operator/blob/main/docs/api-overview.md#userconfigspec)).
```yaml ```yaml
cat <<EOF | kubectl apply -f - cat <<EOF | kubectl apply -f -
apiVersion: v1 apiVersion: llamastack.io/v1alpha1
kind: PersistentVolumeClaim kind: LlamaStackDistribution
metadata: metadata:
name: llama-pvc name: llamastack-vllm
spec:
accessModes:
- ReadWriteOnce
resources:
requests:
storage: 1Gi
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: llama-stack-server
spec: spec:
replicas: 1 replicas: 1
selector: server:
matchLabels: distribution:
app.kubernetes.io/name: llama-stack name: starter
template: containerSpec:
metadata: port: 8321
labels: env:
app.kubernetes.io/name: llama-stack - name: VLLM_URL
spec: value: "http://vllm-server.default.svc.cluster.local:8000/v1"
containers: - name: VLLM_MAX_TOKENS
- name: llama-stack value: "4096"
image: localhost/llama-stack-run-k8s:latest - name: VLLM_API_TOKEN
imagePullPolicy: IfNotPresent value: "fake"
command: ["llama", "stack", "run", "/app/config.yaml"] # Optional: override run.yaml from a ConfigMap using userConfig
ports: userConfig:
- containerPort: 5000 configMap:
volumeMounts: name: llama-stack-config
- name: llama-storage storage:
mountPath: /root/.llama size: "20Gi"
volumes: mountPath: "/home/lls/.lls"
- name: llama-storage
persistentVolumeClaim:
claimName: llama-pvc
---
apiVersion: v1
kind: Service
metadata:
name: llama-stack-service
spec:
selector:
app.kubernetes.io/name: llama-stack
ports:
- protocol: TCP
port: 5000
targetPort: 5000
type: ClusterIP
EOF EOF
``` ```
**Configuration Options:**
- `replicas`: Number of Llama Stack server instances to run
- `server.distribution.name`: The distribution to use (e.g., `starter` for the starter distribution). See the [list of supported distributions](https://github.com/llamastack/llama-stack-k8s-operator/blob/main/distributions.json) in the operator repository.
- `server.distribution.image`: (Optional) Custom container image for non-supported distributions. Use this field when deploying a distribution that is not in the supported list. If specified, this takes precedence over `name`.
- `server.containerSpec.port`: Port on which the Llama Stack server listens (default: 8321)
- `server.containerSpec.env`: Environment variables to configure providers:
- `server.userConfig`: (Optional) Override the default `run.yaml` using a ConfigMap. See [userConfig spec](https://github.com/llamastack/llama-stack-k8s-operator/blob/main/docs/api-overview.md#userconfigspec).
- `server.storage.size`: Size of the persistent volume for model and data storage
- `server.storage.mountPath`: Where to mount the storage in the container
**Note:** For a complete list of supported distributions, see [distributions.json](https://github.com/llamastack/llama-stack-k8s-operator/blob/main/distributions.json) in the operator repository. To use a custom or non-supported distribution, set the `server.distribution.image` field with your container image instead of `server.distribution.name`.
The operator automatically creates:
- A Deployment for the Llama Stack server
- A Service to access the server
- A PersistentVolumeClaim for storage
- All necessary RBAC resources
Check the status of your deployment:
```bash
kubectl get llamastackdistribution
kubectl describe llamastackdistribution llamastack-vllm
```
### Step 5: Test Deployment ### Step 5: Test Deployment
Wait for the Llama Stack server pod to be ready:
```bash ```bash
# Port forward and test # Check the status of the LlamaStackDistribution
kubectl port-forward service/llama-stack-service 5000:5000 kubectl get llamastackdistribution llamastack-vllm
llama-stack-client --endpoint http://localhost:5000 inference chat-completion --message "hello, what model are you?"
# Check the pods created by the operator
kubectl get pods -l app.kubernetes.io/name=llama-stack
# Wait for the pod to be ready
kubectl wait --for=condition=ready pod -l app.kubernetes.io/name=llama-stack --timeout=300s
```
Get the service name created by the operator (it typically follows the pattern `<llamastackdistribution-name>-service`):
```bash
# List services to find the service name
kubectl get services | grep llamastack
# Port forward and test (replace SERVICE_NAME with the actual service name)
kubectl port-forward service/llamastack-vllm-service 8321:8321
```
In another terminal, test the deployment:
```bash
llama-stack-client --endpoint http://localhost:8321 inference chat-completion --message "hello, what model are you?"
``` ```
## Troubleshooting ## Troubleshooting
**Check pod status:** ### vLLM Server Issues
**Check vLLM pod status:**
```bash ```bash
kubectl get pods -l app.kubernetes.io/name=vllm kubectl get pods -l app.kubernetes.io/name=vllm
kubectl logs -l app.kubernetes.io/name=vllm kubectl logs -l app.kubernetes.io/name=vllm
``` ```
**Test service connectivity:** **Test vLLM service connectivity:**
```bash ```bash
kubectl run -it --rm debug --image=curlimages/curl --restart=Never -- curl http://vllm-server:8000/v1/models kubectl run -it --rm debug --image=curlimages/curl --restart=Never -- curl http://vllm-server:8000/v1/models
``` ```
### Llama Stack Server Issues
**Check LlamaStackDistribution status:**
```bash
# Get detailed status
kubectl describe llamastackdistribution llamastack-vllm
# Check for events
kubectl get events --sort-by='.lastTimestamp' | grep llamastack-vllm
```
**Check operator-managed pods:**
```bash
# List all pods managed by the operator
kubectl get pods -l app.kubernetes.io/name=llama-stack
# Check pod logs (replace POD_NAME with actual pod name)
kubectl logs -l app.kubernetes.io/name=llama-stack
```
**Check operator status:**
```bash
# Verify the operator is running
kubectl get pods -n llama-stack-operator-system
# Check operator logs if issues persist
kubectl logs -n llama-stack-operator-system -l control-plane=controller-manager
```
**Verify service connectivity:**
```bash
# Get the service endpoint
kubectl get svc llamastack-vllm-service
# Test connectivity from within the cluster
kubectl run -it --rm debug --image=curlimages/curl --restart=Never -- curl http://llamastack-vllm-service:8321/health
```
## Related Resources ## Related Resources
- **[Deployment Overview](/docs/deploying/)** - Overview of deployment options - **[Deployment Overview](/docs/deploying/)** - Overview of deployment options
- **[Distributions](/docs/distributions)** - Understanding Llama Stack distributions - **[Distributions](/docs/distributions)** - Understanding Llama Stack distributions
- **[Configuration](/docs/distributions/configuration)** - Detailed configuration options - **[Configuration](/docs/distributions/configuration)** - Detailed configuration options
- **[LlamaStack Operator](https://github.com/llamastack/llama-stack-k8s-operator)** - Overview of llama-stack kubernetes operator
- **[LlamaStackDistribution](https://github.com/llamastack/llama-stack-k8s-operator/blob/main/docs/api-overview.md)** - API Spec of the llama-stack operator Custom Resource.

File diff suppressed because it is too large Load diff

View file

@ -162,7 +162,7 @@ paths:
schema: schema:
$ref: '#/components/schemas/RegisterDatasetRequest' $ref: '#/components/schemas/RegisterDatasetRequest'
required: true required: true
deprecated: false deprecated: true
/v1beta/datasets/{dataset_id}: /v1beta/datasets/{dataset_id}:
get: get:
responses: responses:
@ -219,7 +219,7 @@ paths:
required: true required: true
schema: schema:
type: string type: string
deprecated: false deprecated: true
/v1alpha/eval/benchmarks: /v1alpha/eval/benchmarks:
get: get:
responses: responses:
@ -270,7 +270,7 @@ paths:
schema: schema:
$ref: '#/components/schemas/RegisterBenchmarkRequest' $ref: '#/components/schemas/RegisterBenchmarkRequest'
required: true required: true
deprecated: false deprecated: true
/v1alpha/eval/benchmarks/{benchmark_id}: /v1alpha/eval/benchmarks/{benchmark_id}:
get: get:
responses: responses:
@ -327,7 +327,7 @@ paths:
required: true required: true
schema: schema:
type: string type: string
deprecated: false deprecated: true
/v1alpha/eval/benchmarks/{benchmark_id}/evaluations: /v1alpha/eval/benchmarks/{benchmark_id}/evaluations:
post: post:
responses: responses:
@ -936,68 +936,6 @@ components:
- data - data
title: ListDatasetsResponse title: ListDatasetsResponse
description: Response from listing datasets. description: Response from listing datasets.
DataSource:
oneOf:
- $ref: '#/components/schemas/URIDataSource'
- $ref: '#/components/schemas/RowsDataSource'
discriminator:
propertyName: type
mapping:
uri: '#/components/schemas/URIDataSource'
rows: '#/components/schemas/RowsDataSource'
RegisterDatasetRequest:
type: object
properties:
purpose:
type: string
enum:
- post-training/messages
- eval/question-answer
- eval/messages-answer
description: >-
The purpose of the dataset. One of: - "post-training/messages": The dataset
contains a messages column with list of messages for post-training. {
"messages": [ {"role": "user", "content": "Hello, world!"}, {"role": "assistant",
"content": "Hello, world!"}, ] } - "eval/question-answer": The dataset
contains a question column and an answer column for evaluation. { "question":
"What is the capital of France?", "answer": "Paris" } - "eval/messages-answer":
The dataset contains a messages column with list of messages and an answer
column for evaluation. { "messages": [ {"role": "user", "content": "Hello,
my name is John Doe."}, {"role": "assistant", "content": "Hello, John
Doe. How can I help you today?"}, {"role": "user", "content": "What's
my name?"}, ], "answer": "John Doe" }
source:
$ref: '#/components/schemas/DataSource'
description: >-
The data source of the dataset. Ensure that the data source schema is
compatible with the purpose of the dataset. Examples: - { "type": "uri",
"uri": "https://mywebsite.com/mydata.jsonl" } - { "type": "uri", "uri":
"lsfs://mydata.jsonl" } - { "type": "uri", "uri": "data:csv;base64,{base64_content}"
} - { "type": "uri", "uri": "huggingface://llamastack/simpleqa?split=train"
} - { "type": "rows", "rows": [ { "messages": [ {"role": "user", "content":
"Hello, world!"}, {"role": "assistant", "content": "Hello, world!"}, ]
} ] }
metadata:
type: object
additionalProperties:
oneOf:
- type: 'null'
- type: boolean
- type: number
- type: string
- type: array
- type: object
description: >-
The metadata for the dataset. - E.g. {"description": "My dataset"}.
dataset_id:
type: string
description: >-
The ID of the dataset. If not provided, an ID will be generated.
additionalProperties: false
required:
- purpose
- source
title: RegisterDatasetRequest
Benchmark: Benchmark:
type: object type: object
properties: properties:
@ -1065,47 +1003,6 @@ components:
required: required:
- data - data
title: ListBenchmarksResponse title: ListBenchmarksResponse
RegisterBenchmarkRequest:
type: object
properties:
benchmark_id:
type: string
description: The ID of the benchmark to register.
dataset_id:
type: string
description: >-
The ID of the dataset to use for the benchmark.
scoring_functions:
type: array
items:
type: string
description: >-
The scoring functions to use for the benchmark.
provider_benchmark_id:
type: string
description: >-
The ID of the provider benchmark to use for the benchmark.
provider_id:
type: string
description: >-
The ID of the provider to use for the benchmark.
metadata:
type: object
additionalProperties:
oneOf:
- type: 'null'
- type: boolean
- type: number
- type: string
- type: array
- type: object
description: The metadata to use for the benchmark.
additionalProperties: false
required:
- benchmark_id
- dataset_id
- scoring_functions
title: RegisterBenchmarkRequest
AggregationFunctionType: AggregationFunctionType:
type: string type: string
enum: enum:
@ -2254,6 +2151,109 @@ components:
- hyperparam_search_config - hyperparam_search_config
- logger_config - logger_config
title: SupervisedFineTuneRequest title: SupervisedFineTuneRequest
DataSource:
oneOf:
- $ref: '#/components/schemas/URIDataSource'
- $ref: '#/components/schemas/RowsDataSource'
discriminator:
propertyName: type
mapping:
uri: '#/components/schemas/URIDataSource'
rows: '#/components/schemas/RowsDataSource'
RegisterDatasetRequest:
type: object
properties:
purpose:
type: string
enum:
- post-training/messages
- eval/question-answer
- eval/messages-answer
description: >-
The purpose of the dataset. One of: - "post-training/messages": The dataset
contains a messages column with list of messages for post-training. {
"messages": [ {"role": "user", "content": "Hello, world!"}, {"role": "assistant",
"content": "Hello, world!"}, ] } - "eval/question-answer": The dataset
contains a question column and an answer column for evaluation. { "question":
"What is the capital of France?", "answer": "Paris" } - "eval/messages-answer":
The dataset contains a messages column with list of messages and an answer
column for evaluation. { "messages": [ {"role": "user", "content": "Hello,
my name is John Doe."}, {"role": "assistant", "content": "Hello, John
Doe. How can I help you today?"}, {"role": "user", "content": "What's
my name?"}, ], "answer": "John Doe" }
source:
$ref: '#/components/schemas/DataSource'
description: >-
The data source of the dataset. Ensure that the data source schema is
compatible with the purpose of the dataset. Examples: - { "type": "uri",
"uri": "https://mywebsite.com/mydata.jsonl" } - { "type": "uri", "uri":
"lsfs://mydata.jsonl" } - { "type": "uri", "uri": "data:csv;base64,{base64_content}"
} - { "type": "uri", "uri": "huggingface://llamastack/simpleqa?split=train"
} - { "type": "rows", "rows": [ { "messages": [ {"role": "user", "content":
"Hello, world!"}, {"role": "assistant", "content": "Hello, world!"}, ]
} ] }
metadata:
type: object
additionalProperties:
oneOf:
- type: 'null'
- type: boolean
- type: number
- type: string
- type: array
- type: object
description: >-
The metadata for the dataset. - E.g. {"description": "My dataset"}.
dataset_id:
type: string
description: >-
The ID of the dataset. If not provided, an ID will be generated.
additionalProperties: false
required:
- purpose
- source
title: RegisterDatasetRequest
RegisterBenchmarkRequest:
type: object
properties:
benchmark_id:
type: string
description: The ID of the benchmark to register.
dataset_id:
type: string
description: >-
The ID of the dataset to use for the benchmark.
scoring_functions:
type: array
items:
type: string
description: >-
The scoring functions to use for the benchmark.
provider_benchmark_id:
type: string
description: >-
The ID of the provider benchmark to use for the benchmark.
provider_id:
type: string
description: >-
The ID of the provider to use for the benchmark.
metadata:
type: object
additionalProperties:
oneOf:
- type: 'null'
- type: boolean
- type: number
- type: string
- type: array
- type: object
description: The metadata to use for the benchmark.
additionalProperties: false
required:
- benchmark_id
- dataset_id
- scoring_functions
title: RegisterBenchmarkRequest
responses: responses:
BadRequest400: BadRequest400:
description: The request was invalid or malformed description: The request was invalid or malformed

View file

@ -995,39 +995,6 @@ paths:
description: List models using the OpenAI API. description: List models using the OpenAI API.
parameters: [] parameters: []
deprecated: false deprecated: false
post:
responses:
'200':
description: A Model.
content:
application/json:
schema:
$ref: '#/components/schemas/Model'
'400':
$ref: '#/components/responses/BadRequest400'
'429':
$ref: >-
#/components/responses/TooManyRequests429
'500':
$ref: >-
#/components/responses/InternalServerError500
default:
$ref: '#/components/responses/DefaultError'
tags:
- Models
summary: Register model.
description: >-
Register model.
Register a model.
parameters: []
requestBody:
content:
application/json:
schema:
$ref: '#/components/schemas/RegisterModelRequest'
required: true
deprecated: false
/v1/models/{model_id}: /v1/models/{model_id}:
get: get:
responses: responses:
@ -1062,36 +1029,6 @@ paths:
schema: schema:
type: string type: string
deprecated: false deprecated: false
delete:
responses:
'200':
description: OK
'400':
$ref: '#/components/responses/BadRequest400'
'429':
$ref: >-
#/components/responses/TooManyRequests429
'500':
$ref: >-
#/components/responses/InternalServerError500
default:
$ref: '#/components/responses/DefaultError'
tags:
- Models
summary: Unregister model.
description: >-
Unregister model.
Unregister a model.
parameters:
- name: model_id
in: path
description: >-
The identifier of the model to unregister.
required: true
schema:
type: string
deprecated: false
/v1/moderations: /v1/moderations:
post: post:
responses: responses:
@ -1722,32 +1659,6 @@ paths:
description: List all scoring functions. description: List all scoring functions.
parameters: [] parameters: []
deprecated: false deprecated: false
post:
responses:
'200':
description: OK
'400':
$ref: '#/components/responses/BadRequest400'
'429':
$ref: >-
#/components/responses/TooManyRequests429
'500':
$ref: >-
#/components/responses/InternalServerError500
default:
$ref: '#/components/responses/DefaultError'
tags:
- ScoringFunctions
summary: Register a scoring function.
description: Register a scoring function.
parameters: []
requestBody:
content:
application/json:
schema:
$ref: '#/components/schemas/RegisterScoringFunctionRequest'
required: true
deprecated: false
/v1/scoring-functions/{scoring_fn_id}: /v1/scoring-functions/{scoring_fn_id}:
get: get:
responses: responses:
@ -1779,33 +1690,6 @@ paths:
schema: schema:
type: string type: string
deprecated: false deprecated: false
delete:
responses:
'200':
description: OK
'400':
$ref: '#/components/responses/BadRequest400'
'429':
$ref: >-
#/components/responses/TooManyRequests429
'500':
$ref: >-
#/components/responses/InternalServerError500
default:
$ref: '#/components/responses/DefaultError'
tags:
- ScoringFunctions
summary: Unregister a scoring function.
description: Unregister a scoring function.
parameters:
- name: scoring_fn_id
in: path
description: >-
The ID of the scoring function to unregister.
required: true
schema:
type: string
deprecated: false
/v1/scoring/score: /v1/scoring/score:
post: post:
responses: responses:
@ -1894,36 +1778,6 @@ paths:
description: List all shields. description: List all shields.
parameters: [] parameters: []
deprecated: false deprecated: false
post:
responses:
'200':
description: A Shield.
content:
application/json:
schema:
$ref: '#/components/schemas/Shield'
'400':
$ref: '#/components/responses/BadRequest400'
'429':
$ref: >-
#/components/responses/TooManyRequests429
'500':
$ref: >-
#/components/responses/InternalServerError500
default:
$ref: '#/components/responses/DefaultError'
tags:
- Shields
summary: Register a shield.
description: Register a shield.
parameters: []
requestBody:
content:
application/json:
schema:
$ref: '#/components/schemas/RegisterShieldRequest'
required: true
deprecated: false
/v1/shields/{identifier}: /v1/shields/{identifier}:
get: get:
responses: responses:
@ -1955,33 +1809,6 @@ paths:
schema: schema:
type: string type: string
deprecated: false deprecated: false
delete:
responses:
'200':
description: OK
'400':
$ref: '#/components/responses/BadRequest400'
'429':
$ref: >-
#/components/responses/TooManyRequests429
'500':
$ref: >-
#/components/responses/InternalServerError500
default:
$ref: '#/components/responses/DefaultError'
tags:
- Shields
summary: Unregister a shield.
description: Unregister a shield.
parameters:
- name: identifier
in: path
description: >-
The identifier of the shield to unregister.
required: true
schema:
type: string
deprecated: false
/v1/tool-runtime/invoke: /v1/tool-runtime/invoke:
post: post:
responses: responses:
@ -2077,32 +1904,6 @@ paths:
description: List tool groups with optional provider. description: List tool groups with optional provider.
parameters: [] parameters: []
deprecated: false deprecated: false
post:
responses:
'200':
description: OK
'400':
$ref: '#/components/responses/BadRequest400'
'429':
$ref: >-
#/components/responses/TooManyRequests429
'500':
$ref: >-
#/components/responses/InternalServerError500
default:
$ref: '#/components/responses/DefaultError'
tags:
- ToolGroups
summary: Register a tool group.
description: Register a tool group.
parameters: []
requestBody:
content:
application/json:
schema:
$ref: '#/components/schemas/RegisterToolGroupRequest'
required: true
deprecated: false
/v1/toolgroups/{toolgroup_id}: /v1/toolgroups/{toolgroup_id}:
get: get:
responses: responses:
@ -2134,32 +1935,6 @@ paths:
schema: schema:
type: string type: string
deprecated: false deprecated: false
delete:
responses:
'200':
description: OK
'400':
$ref: '#/components/responses/BadRequest400'
'429':
$ref: >-
#/components/responses/TooManyRequests429
'500':
$ref: >-
#/components/responses/InternalServerError500
default:
$ref: '#/components/responses/DefaultError'
tags:
- ToolGroups
summary: Unregister a tool group.
description: Unregister a tool group.
parameters:
- name: toolgroup_id
in: path
description: The ID of the tool group to unregister.
required: true
schema:
type: string
deprecated: false
/v1/tools: /v1/tools:
get: get:
responses: responses:
@ -2913,11 +2688,11 @@ paths:
responses: responses:
'200': '200':
description: >- description: >-
A list of InterleavedContent representing the file contents. A VectorStoreFileContentResponse representing the file contents.
content: content:
application/json: application/json:
schema: schema:
$ref: '#/components/schemas/VectorStoreFileContentsResponse' $ref: '#/components/schemas/VectorStoreFileContentResponse'
'400': '400':
$ref: '#/components/responses/BadRequest400' $ref: '#/components/responses/BadRequest400'
'429': '429':
@ -5564,46 +5339,6 @@ components:
required: required:
- data - data
title: OpenAIListModelsResponse title: OpenAIListModelsResponse
ModelType:
type: string
enum:
- llm
- embedding
- rerank
title: ModelType
description: >-
Enumeration of supported model types in Llama Stack.
RegisterModelRequest:
type: object
properties:
model_id:
type: string
description: The identifier of the model to register.
provider_model_id:
type: string
description: >-
The identifier of the model in the provider.
provider_id:
type: string
description: The identifier of the provider.
metadata:
type: object
additionalProperties:
oneOf:
- type: 'null'
- type: boolean
- type: number
- type: string
- type: array
- type: object
description: Any additional metadata for this model.
model_type:
$ref: '#/components/schemas/ModelType'
description: The type of model to register.
additionalProperties: false
required:
- model_id
title: RegisterModelRequest
Model: Model:
type: object type: object
properties: properties:
@ -5661,6 +5396,15 @@ components:
title: Model title: Model
description: >- description: >-
A model resource representing an AI model registered in Llama Stack. A model resource representing an AI model registered in Llama Stack.
ModelType:
type: string
enum:
- llm
- embedding
- rerank
title: ModelType
description: >-
Enumeration of supported model types in Llama Stack.
RunModerationRequest: RunModerationRequest:
type: object type: object
properties: properties:
@ -8414,61 +8158,6 @@ components:
required: required:
- data - data
title: ListScoringFunctionsResponse title: ListScoringFunctionsResponse
ParamType:
oneOf:
- $ref: '#/components/schemas/StringType'
- $ref: '#/components/schemas/NumberType'
- $ref: '#/components/schemas/BooleanType'
- $ref: '#/components/schemas/ArrayType'
- $ref: '#/components/schemas/ObjectType'
- $ref: '#/components/schemas/JsonType'
- $ref: '#/components/schemas/UnionType'
- $ref: '#/components/schemas/ChatCompletionInputType'
- $ref: '#/components/schemas/CompletionInputType'
discriminator:
propertyName: type
mapping:
string: '#/components/schemas/StringType'
number: '#/components/schemas/NumberType'
boolean: '#/components/schemas/BooleanType'
array: '#/components/schemas/ArrayType'
object: '#/components/schemas/ObjectType'
json: '#/components/schemas/JsonType'
union: '#/components/schemas/UnionType'
chat_completion_input: '#/components/schemas/ChatCompletionInputType'
completion_input: '#/components/schemas/CompletionInputType'
RegisterScoringFunctionRequest:
type: object
properties:
scoring_fn_id:
type: string
description: >-
The ID of the scoring function to register.
description:
type: string
description: The description of the scoring function.
return_type:
$ref: '#/components/schemas/ParamType'
description: The return type of the scoring function.
provider_scoring_fn_id:
type: string
description: >-
The ID of the provider scoring function to use for the scoring function.
provider_id:
type: string
description: >-
The ID of the provider to use for the scoring function.
params:
$ref: '#/components/schemas/ScoringFnParams'
description: >-
The parameters for the scoring function for benchmark eval, these can
be overridden for app eval.
additionalProperties: false
required:
- scoring_fn_id
- description
- return_type
title: RegisterScoringFunctionRequest
ScoreRequest: ScoreRequest:
type: object type: object
properties: properties:
@ -8644,35 +8333,6 @@ components:
required: required:
- data - data
title: ListShieldsResponse title: ListShieldsResponse
RegisterShieldRequest:
type: object
properties:
shield_id:
type: string
description: >-
The identifier of the shield to register.
provider_shield_id:
type: string
description: >-
The identifier of the shield in the provider.
provider_id:
type: string
description: The identifier of the provider.
params:
type: object
additionalProperties:
oneOf:
- type: 'null'
- type: boolean
- type: number
- type: string
- type: array
- type: object
description: The parameters of the shield.
additionalProperties: false
required:
- shield_id
title: RegisterShieldRequest
InvokeToolRequest: InvokeToolRequest:
type: object type: object
properties: properties:
@ -8933,37 +8593,6 @@ components:
title: ListToolGroupsResponse title: ListToolGroupsResponse
description: >- description: >-
Response containing a list of tool groups. Response containing a list of tool groups.
RegisterToolGroupRequest:
type: object
properties:
toolgroup_id:
type: string
description: The ID of the tool group to register.
provider_id:
type: string
description: >-
The ID of the provider to use for the tool group.
mcp_endpoint:
$ref: '#/components/schemas/URL'
description: >-
The MCP endpoint to use for the tool group.
args:
type: object
additionalProperties:
oneOf:
- type: 'null'
- type: boolean
- type: number
- type: string
- type: array
- type: object
description: >-
A dictionary of arguments to pass to the tool group.
additionalProperties: false
required:
- toolgroup_id
- provider_id
title: RegisterToolGroupRequest
Chunk: Chunk:
type: object type: object
properties: properties:
@ -9764,41 +9393,35 @@ components:
title: VectorStoreContent title: VectorStoreContent
description: >- description: >-
Content item from a vector store file or search result. Content item from a vector store file or search result.
VectorStoreFileContentsResponse: VectorStoreFileContentResponse:
type: object type: object
properties: properties:
file_id: object:
type: string type: string
description: Unique identifier for the file const: vector_store.file_content.page
filename: default: vector_store.file_content.page
type: string
description: Name of the file
attributes:
type: object
additionalProperties:
oneOf:
- type: 'null'
- type: boolean
- type: number
- type: string
- type: array
- type: object
description: >- description: >-
Key-value attributes associated with the file The object type, which is always `vector_store.file_content.page`
content: data:
type: array type: array
items: items:
$ref: '#/components/schemas/VectorStoreContent' $ref: '#/components/schemas/VectorStoreContent'
description: List of content items from the file description: Parsed content of the file
has_more:
type: boolean
description: >-
Indicates if there are more content pages to fetch
next_page:
type: string
description: The token for the next page, if any
additionalProperties: false additionalProperties: false
required: required:
- file_id - object
- filename - data
- attributes - has_more
- content title: VectorStoreFileContentResponse
title: VectorStoreFileContentsResponse
description: >- description: >-
Response from retrieving the contents of a vector store file. Represents the parsed content of a vector store file.
OpenaiSearchVectorStoreRequest: OpenaiSearchVectorStoreRequest:
type: object type: object
properties: properties:

View file

@ -998,39 +998,6 @@ paths:
description: List models using the OpenAI API. description: List models using the OpenAI API.
parameters: [] parameters: []
deprecated: false deprecated: false
post:
responses:
'200':
description: A Model.
content:
application/json:
schema:
$ref: '#/components/schemas/Model'
'400':
$ref: '#/components/responses/BadRequest400'
'429':
$ref: >-
#/components/responses/TooManyRequests429
'500':
$ref: >-
#/components/responses/InternalServerError500
default:
$ref: '#/components/responses/DefaultError'
tags:
- Models
summary: Register model.
description: >-
Register model.
Register a model.
parameters: []
requestBody:
content:
application/json:
schema:
$ref: '#/components/schemas/RegisterModelRequest'
required: true
deprecated: false
/v1/models/{model_id}: /v1/models/{model_id}:
get: get:
responses: responses:
@ -1065,36 +1032,6 @@ paths:
schema: schema:
type: string type: string
deprecated: false deprecated: false
delete:
responses:
'200':
description: OK
'400':
$ref: '#/components/responses/BadRequest400'
'429':
$ref: >-
#/components/responses/TooManyRequests429
'500':
$ref: >-
#/components/responses/InternalServerError500
default:
$ref: '#/components/responses/DefaultError'
tags:
- Models
summary: Unregister model.
description: >-
Unregister model.
Unregister a model.
parameters:
- name: model_id
in: path
description: >-
The identifier of the model to unregister.
required: true
schema:
type: string
deprecated: false
/v1/moderations: /v1/moderations:
post: post:
responses: responses:
@ -1725,32 +1662,6 @@ paths:
description: List all scoring functions. description: List all scoring functions.
parameters: [] parameters: []
deprecated: false deprecated: false
post:
responses:
'200':
description: OK
'400':
$ref: '#/components/responses/BadRequest400'
'429':
$ref: >-
#/components/responses/TooManyRequests429
'500':
$ref: >-
#/components/responses/InternalServerError500
default:
$ref: '#/components/responses/DefaultError'
tags:
- ScoringFunctions
summary: Register a scoring function.
description: Register a scoring function.
parameters: []
requestBody:
content:
application/json:
schema:
$ref: '#/components/schemas/RegisterScoringFunctionRequest'
required: true
deprecated: false
/v1/scoring-functions/{scoring_fn_id}: /v1/scoring-functions/{scoring_fn_id}:
get: get:
responses: responses:
@ -1782,33 +1693,6 @@ paths:
schema: schema:
type: string type: string
deprecated: false deprecated: false
delete:
responses:
'200':
description: OK
'400':
$ref: '#/components/responses/BadRequest400'
'429':
$ref: >-
#/components/responses/TooManyRequests429
'500':
$ref: >-
#/components/responses/InternalServerError500
default:
$ref: '#/components/responses/DefaultError'
tags:
- ScoringFunctions
summary: Unregister a scoring function.
description: Unregister a scoring function.
parameters:
- name: scoring_fn_id
in: path
description: >-
The ID of the scoring function to unregister.
required: true
schema:
type: string
deprecated: false
/v1/scoring/score: /v1/scoring/score:
post: post:
responses: responses:
@ -1897,36 +1781,6 @@ paths:
description: List all shields. description: List all shields.
parameters: [] parameters: []
deprecated: false deprecated: false
post:
responses:
'200':
description: A Shield.
content:
application/json:
schema:
$ref: '#/components/schemas/Shield'
'400':
$ref: '#/components/responses/BadRequest400'
'429':
$ref: >-
#/components/responses/TooManyRequests429
'500':
$ref: >-
#/components/responses/InternalServerError500
default:
$ref: '#/components/responses/DefaultError'
tags:
- Shields
summary: Register a shield.
description: Register a shield.
parameters: []
requestBody:
content:
application/json:
schema:
$ref: '#/components/schemas/RegisterShieldRequest'
required: true
deprecated: false
/v1/shields/{identifier}: /v1/shields/{identifier}:
get: get:
responses: responses:
@ -1958,33 +1812,6 @@ paths:
schema: schema:
type: string type: string
deprecated: false deprecated: false
delete:
responses:
'200':
description: OK
'400':
$ref: '#/components/responses/BadRequest400'
'429':
$ref: >-
#/components/responses/TooManyRequests429
'500':
$ref: >-
#/components/responses/InternalServerError500
default:
$ref: '#/components/responses/DefaultError'
tags:
- Shields
summary: Unregister a shield.
description: Unregister a shield.
parameters:
- name: identifier
in: path
description: >-
The identifier of the shield to unregister.
required: true
schema:
type: string
deprecated: false
/v1/tool-runtime/invoke: /v1/tool-runtime/invoke:
post: post:
responses: responses:
@ -2080,32 +1907,6 @@ paths:
description: List tool groups with optional provider. description: List tool groups with optional provider.
parameters: [] parameters: []
deprecated: false deprecated: false
post:
responses:
'200':
description: OK
'400':
$ref: '#/components/responses/BadRequest400'
'429':
$ref: >-
#/components/responses/TooManyRequests429
'500':
$ref: >-
#/components/responses/InternalServerError500
default:
$ref: '#/components/responses/DefaultError'
tags:
- ToolGroups
summary: Register a tool group.
description: Register a tool group.
parameters: []
requestBody:
content:
application/json:
schema:
$ref: '#/components/schemas/RegisterToolGroupRequest'
required: true
deprecated: false
/v1/toolgroups/{toolgroup_id}: /v1/toolgroups/{toolgroup_id}:
get: get:
responses: responses:
@ -2137,32 +1938,6 @@ paths:
schema: schema:
type: string type: string
deprecated: false deprecated: false
delete:
responses:
'200':
description: OK
'400':
$ref: '#/components/responses/BadRequest400'
'429':
$ref: >-
#/components/responses/TooManyRequests429
'500':
$ref: >-
#/components/responses/InternalServerError500
default:
$ref: '#/components/responses/DefaultError'
tags:
- ToolGroups
summary: Unregister a tool group.
description: Unregister a tool group.
parameters:
- name: toolgroup_id
in: path
description: The ID of the tool group to unregister.
required: true
schema:
type: string
deprecated: false
/v1/tools: /v1/tools:
get: get:
responses: responses:
@ -2916,11 +2691,11 @@ paths:
responses: responses:
'200': '200':
description: >- description: >-
A list of InterleavedContent representing the file contents. A VectorStoreFileContentResponse representing the file contents.
content: content:
application/json: application/json:
schema: schema:
$ref: '#/components/schemas/VectorStoreFileContentsResponse' $ref: '#/components/schemas/VectorStoreFileContentResponse'
'400': '400':
$ref: '#/components/responses/BadRequest400' $ref: '#/components/responses/BadRequest400'
'429': '429':
@ -3171,7 +2946,7 @@ paths:
schema: schema:
$ref: '#/components/schemas/RegisterDatasetRequest' $ref: '#/components/schemas/RegisterDatasetRequest'
required: true required: true
deprecated: false deprecated: true
/v1beta/datasets/{dataset_id}: /v1beta/datasets/{dataset_id}:
get: get:
responses: responses:
@ -3228,7 +3003,7 @@ paths:
required: true required: true
schema: schema:
type: string type: string
deprecated: false deprecated: true
/v1alpha/eval/benchmarks: /v1alpha/eval/benchmarks:
get: get:
responses: responses:
@ -3279,7 +3054,7 @@ paths:
schema: schema:
$ref: '#/components/schemas/RegisterBenchmarkRequest' $ref: '#/components/schemas/RegisterBenchmarkRequest'
required: true required: true
deprecated: false deprecated: true
/v1alpha/eval/benchmarks/{benchmark_id}: /v1alpha/eval/benchmarks/{benchmark_id}:
get: get:
responses: responses:
@ -3336,7 +3111,7 @@ paths:
required: true required: true
schema: schema:
type: string type: string
deprecated: false deprecated: true
/v1alpha/eval/benchmarks/{benchmark_id}/evaluations: /v1alpha/eval/benchmarks/{benchmark_id}/evaluations:
post: post:
responses: responses:
@ -6280,46 +6055,6 @@ components:
required: required:
- data - data
title: OpenAIListModelsResponse title: OpenAIListModelsResponse
ModelType:
type: string
enum:
- llm
- embedding
- rerank
title: ModelType
description: >-
Enumeration of supported model types in Llama Stack.
RegisterModelRequest:
type: object
properties:
model_id:
type: string
description: The identifier of the model to register.
provider_model_id:
type: string
description: >-
The identifier of the model in the provider.
provider_id:
type: string
description: The identifier of the provider.
metadata:
type: object
additionalProperties:
oneOf:
- type: 'null'
- type: boolean
- type: number
- type: string
- type: array
- type: object
description: Any additional metadata for this model.
model_type:
$ref: '#/components/schemas/ModelType'
description: The type of model to register.
additionalProperties: false
required:
- model_id
title: RegisterModelRequest
Model: Model:
type: object type: object
properties: properties:
@ -6377,6 +6112,15 @@ components:
title: Model title: Model
description: >- description: >-
A model resource representing an AI model registered in Llama Stack. A model resource representing an AI model registered in Llama Stack.
ModelType:
type: string
enum:
- llm
- embedding
- rerank
title: ModelType
description: >-
Enumeration of supported model types in Llama Stack.
RunModerationRequest: RunModerationRequest:
type: object type: object
properties: properties:
@ -9130,61 +8874,6 @@ components:
required: required:
- data - data
title: ListScoringFunctionsResponse title: ListScoringFunctionsResponse
ParamType:
oneOf:
- $ref: '#/components/schemas/StringType'
- $ref: '#/components/schemas/NumberType'
- $ref: '#/components/schemas/BooleanType'
- $ref: '#/components/schemas/ArrayType'
- $ref: '#/components/schemas/ObjectType'
- $ref: '#/components/schemas/JsonType'
- $ref: '#/components/schemas/UnionType'
- $ref: '#/components/schemas/ChatCompletionInputType'
- $ref: '#/components/schemas/CompletionInputType'
discriminator:
propertyName: type
mapping:
string: '#/components/schemas/StringType'
number: '#/components/schemas/NumberType'
boolean: '#/components/schemas/BooleanType'
array: '#/components/schemas/ArrayType'
object: '#/components/schemas/ObjectType'
json: '#/components/schemas/JsonType'
union: '#/components/schemas/UnionType'
chat_completion_input: '#/components/schemas/ChatCompletionInputType'
completion_input: '#/components/schemas/CompletionInputType'
RegisterScoringFunctionRequest:
type: object
properties:
scoring_fn_id:
type: string
description: >-
The ID of the scoring function to register.
description:
type: string
description: The description of the scoring function.
return_type:
$ref: '#/components/schemas/ParamType'
description: The return type of the scoring function.
provider_scoring_fn_id:
type: string
description: >-
The ID of the provider scoring function to use for the scoring function.
provider_id:
type: string
description: >-
The ID of the provider to use for the scoring function.
params:
$ref: '#/components/schemas/ScoringFnParams'
description: >-
The parameters for the scoring function for benchmark eval, these can
be overridden for app eval.
additionalProperties: false
required:
- scoring_fn_id
- description
- return_type
title: RegisterScoringFunctionRequest
ScoreRequest: ScoreRequest:
type: object type: object
properties: properties:
@ -9360,35 +9049,6 @@ components:
required: required:
- data - data
title: ListShieldsResponse title: ListShieldsResponse
RegisterShieldRequest:
type: object
properties:
shield_id:
type: string
description: >-
The identifier of the shield to register.
provider_shield_id:
type: string
description: >-
The identifier of the shield in the provider.
provider_id:
type: string
description: The identifier of the provider.
params:
type: object
additionalProperties:
oneOf:
- type: 'null'
- type: boolean
- type: number
- type: string
- type: array
- type: object
description: The parameters of the shield.
additionalProperties: false
required:
- shield_id
title: RegisterShieldRequest
InvokeToolRequest: InvokeToolRequest:
type: object type: object
properties: properties:
@ -9649,37 +9309,6 @@ components:
title: ListToolGroupsResponse title: ListToolGroupsResponse
description: >- description: >-
Response containing a list of tool groups. Response containing a list of tool groups.
RegisterToolGroupRequest:
type: object
properties:
toolgroup_id:
type: string
description: The ID of the tool group to register.
provider_id:
type: string
description: >-
The ID of the provider to use for the tool group.
mcp_endpoint:
$ref: '#/components/schemas/URL'
description: >-
The MCP endpoint to use for the tool group.
args:
type: object
additionalProperties:
oneOf:
- type: 'null'
- type: boolean
- type: number
- type: string
- type: array
- type: object
description: >-
A dictionary of arguments to pass to the tool group.
additionalProperties: false
required:
- toolgroup_id
- provider_id
title: RegisterToolGroupRequest
Chunk: Chunk:
type: object type: object
properties: properties:
@ -10480,41 +10109,35 @@ components:
title: VectorStoreContent title: VectorStoreContent
description: >- description: >-
Content item from a vector store file or search result. Content item from a vector store file or search result.
VectorStoreFileContentsResponse: VectorStoreFileContentResponse:
type: object type: object
properties: properties:
file_id: object:
type: string type: string
description: Unique identifier for the file const: vector_store.file_content.page
filename: default: vector_store.file_content.page
type: string
description: Name of the file
attributes:
type: object
additionalProperties:
oneOf:
- type: 'null'
- type: boolean
- type: number
- type: string
- type: array
- type: object
description: >- description: >-
Key-value attributes associated with the file The object type, which is always `vector_store.file_content.page`
content: data:
type: array type: array
items: items:
$ref: '#/components/schemas/VectorStoreContent' $ref: '#/components/schemas/VectorStoreContent'
description: List of content items from the file description: Parsed content of the file
has_more:
type: boolean
description: >-
Indicates if there are more content pages to fetch
next_page:
type: string
description: The token for the next page, if any
additionalProperties: false additionalProperties: false
required: required:
- file_id - object
- filename - data
- attributes - has_more
- content title: VectorStoreFileContentResponse
title: VectorStoreFileContentsResponse
description: >- description: >-
Response from retrieving the contents of a vector store file. Represents the parsed content of a vector store file.
OpenaiSearchVectorStoreRequest: OpenaiSearchVectorStoreRequest:
type: object type: object
properties: properties:
@ -10831,68 +10454,6 @@ components:
- data - data
title: ListDatasetsResponse title: ListDatasetsResponse
description: Response from listing datasets. description: Response from listing datasets.
DataSource:
oneOf:
- $ref: '#/components/schemas/URIDataSource'
- $ref: '#/components/schemas/RowsDataSource'
discriminator:
propertyName: type
mapping:
uri: '#/components/schemas/URIDataSource'
rows: '#/components/schemas/RowsDataSource'
RegisterDatasetRequest:
type: object
properties:
purpose:
type: string
enum:
- post-training/messages
- eval/question-answer
- eval/messages-answer
description: >-
The purpose of the dataset. One of: - "post-training/messages": The dataset
contains a messages column with list of messages for post-training. {
"messages": [ {"role": "user", "content": "Hello, world!"}, {"role": "assistant",
"content": "Hello, world!"}, ] } - "eval/question-answer": The dataset
contains a question column and an answer column for evaluation. { "question":
"What is the capital of France?", "answer": "Paris" } - "eval/messages-answer":
The dataset contains a messages column with list of messages and an answer
column for evaluation. { "messages": [ {"role": "user", "content": "Hello,
my name is John Doe."}, {"role": "assistant", "content": "Hello, John
Doe. How can I help you today?"}, {"role": "user", "content": "What's
my name?"}, ], "answer": "John Doe" }
source:
$ref: '#/components/schemas/DataSource'
description: >-
The data source of the dataset. Ensure that the data source schema is
compatible with the purpose of the dataset. Examples: - { "type": "uri",
"uri": "https://mywebsite.com/mydata.jsonl" } - { "type": "uri", "uri":
"lsfs://mydata.jsonl" } - { "type": "uri", "uri": "data:csv;base64,{base64_content}"
} - { "type": "uri", "uri": "huggingface://llamastack/simpleqa?split=train"
} - { "type": "rows", "rows": [ { "messages": [ {"role": "user", "content":
"Hello, world!"}, {"role": "assistant", "content": "Hello, world!"}, ]
} ] }
metadata:
type: object
additionalProperties:
oneOf:
- type: 'null'
- type: boolean
- type: number
- type: string
- type: array
- type: object
description: >-
The metadata for the dataset. - E.g. {"description": "My dataset"}.
dataset_id:
type: string
description: >-
The ID of the dataset. If not provided, an ID will be generated.
additionalProperties: false
required:
- purpose
- source
title: RegisterDatasetRequest
Benchmark: Benchmark:
type: object type: object
properties: properties:
@ -10960,47 +10521,6 @@ components:
required: required:
- data - data
title: ListBenchmarksResponse title: ListBenchmarksResponse
RegisterBenchmarkRequest:
type: object
properties:
benchmark_id:
type: string
description: The ID of the benchmark to register.
dataset_id:
type: string
description: >-
The ID of the dataset to use for the benchmark.
scoring_functions:
type: array
items:
type: string
description: >-
The scoring functions to use for the benchmark.
provider_benchmark_id:
type: string
description: >-
The ID of the provider benchmark to use for the benchmark.
provider_id:
type: string
description: >-
The ID of the provider to use for the benchmark.
metadata:
type: object
additionalProperties:
oneOf:
- type: 'null'
- type: boolean
- type: number
- type: string
- type: array
- type: object
description: The metadata to use for the benchmark.
additionalProperties: false
required:
- benchmark_id
- dataset_id
- scoring_functions
title: RegisterBenchmarkRequest
BenchmarkConfig: BenchmarkConfig:
type: object type: object
properties: properties:
@ -11862,6 +11382,109 @@ components:
- hyperparam_search_config - hyperparam_search_config
- logger_config - logger_config
title: SupervisedFineTuneRequest title: SupervisedFineTuneRequest
DataSource:
oneOf:
- $ref: '#/components/schemas/URIDataSource'
- $ref: '#/components/schemas/RowsDataSource'
discriminator:
propertyName: type
mapping:
uri: '#/components/schemas/URIDataSource'
rows: '#/components/schemas/RowsDataSource'
RegisterDatasetRequest:
type: object
properties:
purpose:
type: string
enum:
- post-training/messages
- eval/question-answer
- eval/messages-answer
description: >-
The purpose of the dataset. One of: - "post-training/messages": The dataset
contains a messages column with list of messages for post-training. {
"messages": [ {"role": "user", "content": "Hello, world!"}, {"role": "assistant",
"content": "Hello, world!"}, ] } - "eval/question-answer": The dataset
contains a question column and an answer column for evaluation. { "question":
"What is the capital of France?", "answer": "Paris" } - "eval/messages-answer":
The dataset contains a messages column with list of messages and an answer
column for evaluation. { "messages": [ {"role": "user", "content": "Hello,
my name is John Doe."}, {"role": "assistant", "content": "Hello, John
Doe. How can I help you today?"}, {"role": "user", "content": "What's
my name?"}, ], "answer": "John Doe" }
source:
$ref: '#/components/schemas/DataSource'
description: >-
The data source of the dataset. Ensure that the data source schema is
compatible with the purpose of the dataset. Examples: - { "type": "uri",
"uri": "https://mywebsite.com/mydata.jsonl" } - { "type": "uri", "uri":
"lsfs://mydata.jsonl" } - { "type": "uri", "uri": "data:csv;base64,{base64_content}"
} - { "type": "uri", "uri": "huggingface://llamastack/simpleqa?split=train"
} - { "type": "rows", "rows": [ { "messages": [ {"role": "user", "content":
"Hello, world!"}, {"role": "assistant", "content": "Hello, world!"}, ]
} ] }
metadata:
type: object
additionalProperties:
oneOf:
- type: 'null'
- type: boolean
- type: number
- type: string
- type: array
- type: object
description: >-
The metadata for the dataset. - E.g. {"description": "My dataset"}.
dataset_id:
type: string
description: >-
The ID of the dataset. If not provided, an ID will be generated.
additionalProperties: false
required:
- purpose
- source
title: RegisterDatasetRequest
RegisterBenchmarkRequest:
type: object
properties:
benchmark_id:
type: string
description: The ID of the benchmark to register.
dataset_id:
type: string
description: >-
The ID of the dataset to use for the benchmark.
scoring_functions:
type: array
items:
type: string
description: >-
The scoring functions to use for the benchmark.
provider_benchmark_id:
type: string
description: >-
The ID of the provider benchmark to use for the benchmark.
provider_id:
type: string
description: >-
The ID of the provider to use for the benchmark.
metadata:
type: object
additionalProperties:
oneOf:
- type: 'null'
- type: boolean
- type: number
- type: string
- type: array
- type: object
description: The metadata to use for the benchmark.
additionalProperties: false
required:
- benchmark_id
- dataset_id
- scoring_functions
title: RegisterBenchmarkRequest
responses: responses:
BadRequest400: BadRequest400:
description: The request was invalid or malformed description: The request was invalid or malformed

View file

@ -74,7 +74,7 @@ class Benchmarks(Protocol):
""" """
... ...
@webmethod(route="/eval/benchmarks", method="POST", level=LLAMA_STACK_API_V1ALPHA) @webmethod(route="/eval/benchmarks", method="POST", level=LLAMA_STACK_API_V1ALPHA, deprecated=True)
async def register_benchmark( async def register_benchmark(
self, self,
benchmark_id: str, benchmark_id: str,
@ -95,7 +95,7 @@ class Benchmarks(Protocol):
""" """
... ...
@webmethod(route="/eval/benchmarks/{benchmark_id}", method="DELETE", level=LLAMA_STACK_API_V1ALPHA) @webmethod(route="/eval/benchmarks/{benchmark_id}", method="DELETE", level=LLAMA_STACK_API_V1ALPHA, deprecated=True)
async def unregister_benchmark(self, benchmark_id: str) -> None: async def unregister_benchmark(self, benchmark_id: str) -> None:
"""Unregister a benchmark. """Unregister a benchmark.

View file

@ -146,7 +146,7 @@ class ListDatasetsResponse(BaseModel):
class Datasets(Protocol): class Datasets(Protocol):
@webmethod(route="/datasets", method="POST", level=LLAMA_STACK_API_V1BETA) @webmethod(route="/datasets", method="POST", level=LLAMA_STACK_API_V1BETA, deprecated=True)
async def register_dataset( async def register_dataset(
self, self,
purpose: DatasetPurpose, purpose: DatasetPurpose,
@ -235,7 +235,7 @@ class Datasets(Protocol):
""" """
... ...
@webmethod(route="/datasets/{dataset_id:path}", method="DELETE", level=LLAMA_STACK_API_V1BETA) @webmethod(route="/datasets/{dataset_id:path}", method="DELETE", level=LLAMA_STACK_API_V1BETA, deprecated=True)
async def unregister_dataset( async def unregister_dataset(
self, self,
dataset_id: str, dataset_id: str,

View file

@ -136,7 +136,7 @@ class Models(Protocol):
""" """
... ...
@webmethod(route="/models", method="POST", level=LLAMA_STACK_API_V1) @webmethod(route="/models", method="POST", level=LLAMA_STACK_API_V1, deprecated=True)
async def register_model( async def register_model(
self, self,
model_id: str, model_id: str,
@ -158,7 +158,7 @@ class Models(Protocol):
""" """
... ...
@webmethod(route="/models/{model_id:path}", method="DELETE", level=LLAMA_STACK_API_V1) @webmethod(route="/models/{model_id:path}", method="DELETE", level=LLAMA_STACK_API_V1, deprecated=True)
async def unregister_model( async def unregister_model(
self, self,
model_id: str, model_id: str,

View file

@ -178,7 +178,7 @@ class ScoringFunctions(Protocol):
""" """
... ...
@webmethod(route="/scoring-functions", method="POST", level=LLAMA_STACK_API_V1) @webmethod(route="/scoring-functions", method="POST", level=LLAMA_STACK_API_V1, deprecated=True)
async def register_scoring_function( async def register_scoring_function(
self, self,
scoring_fn_id: str, scoring_fn_id: str,
@ -199,7 +199,9 @@ class ScoringFunctions(Protocol):
""" """
... ...
@webmethod(route="/scoring-functions/{scoring_fn_id:path}", method="DELETE", level=LLAMA_STACK_API_V1) @webmethod(
route="/scoring-functions/{scoring_fn_id:path}", method="DELETE", level=LLAMA_STACK_API_V1, deprecated=True
)
async def unregister_scoring_function(self, scoring_fn_id: str) -> None: async def unregister_scoring_function(self, scoring_fn_id: str) -> None:
"""Unregister a scoring function. """Unregister a scoring function.

View file

@ -67,7 +67,7 @@ class Shields(Protocol):
""" """
... ...
@webmethod(route="/shields", method="POST", level=LLAMA_STACK_API_V1) @webmethod(route="/shields", method="POST", level=LLAMA_STACK_API_V1, deprecated=True)
async def register_shield( async def register_shield(
self, self,
shield_id: str, shield_id: str,
@ -85,7 +85,7 @@ class Shields(Protocol):
""" """
... ...
@webmethod(route="/shields/{identifier:path}", method="DELETE", level=LLAMA_STACK_API_V1) @webmethod(route="/shields/{identifier:path}", method="DELETE", level=LLAMA_STACK_API_V1, deprecated=True)
async def unregister_shield(self, identifier: str) -> None: async def unregister_shield(self, identifier: str) -> None:
"""Unregister a shield. """Unregister a shield.

View file

@ -109,7 +109,7 @@ class ListToolDefsResponse(BaseModel):
@runtime_checkable @runtime_checkable
@telemetry_traceable @telemetry_traceable
class ToolGroups(Protocol): class ToolGroups(Protocol):
@webmethod(route="/toolgroups", method="POST", level=LLAMA_STACK_API_V1) @webmethod(route="/toolgroups", method="POST", level=LLAMA_STACK_API_V1, deprecated=True)
async def register_tool_group( async def register_tool_group(
self, self,
toolgroup_id: str, toolgroup_id: str,
@ -167,7 +167,7 @@ class ToolGroups(Protocol):
""" """
... ...
@webmethod(route="/toolgroups/{toolgroup_id:path}", method="DELETE", level=LLAMA_STACK_API_V1) @webmethod(route="/toolgroups/{toolgroup_id:path}", method="DELETE", level=LLAMA_STACK_API_V1, deprecated=True)
async def unregister_toolgroup( async def unregister_toolgroup(
self, self,
toolgroup_id: str, toolgroup_id: str,

View file

@ -396,19 +396,19 @@ class VectorStoreListFilesResponse(BaseModel):
@json_schema_type @json_schema_type
class VectorStoreFileContentsResponse(BaseModel): class VectorStoreFileContentResponse(BaseModel):
"""Response from retrieving the contents of a vector store file. """Represents the parsed content of a vector store file.
:param file_id: Unique identifier for the file :param object: The object type, which is always `vector_store.file_content.page`
:param filename: Name of the file :param data: Parsed content of the file
:param attributes: Key-value attributes associated with the file :param has_more: Indicates if there are more content pages to fetch
:param content: List of content items from the file :param next_page: The token for the next page, if any
""" """
file_id: str object: Literal["vector_store.file_content.page"] = "vector_store.file_content.page"
filename: str data: list[VectorStoreContent]
attributes: dict[str, Any] has_more: bool
content: list[VectorStoreContent] next_page: str | None = None
@json_schema_type @json_schema_type
@ -732,12 +732,12 @@ class VectorIO(Protocol):
self, self,
vector_store_id: str, vector_store_id: str,
file_id: str, file_id: str,
) -> VectorStoreFileContentsResponse: ) -> VectorStoreFileContentResponse:
"""Retrieves the contents of a vector store file. """Retrieves the contents of a vector store file.
:param vector_store_id: The ID of the vector store containing the file to retrieve. :param vector_store_id: The ID of the vector store containing the file to retrieve.
:param file_id: The ID of the file to retrieve. :param file_id: The ID of the file to retrieve.
:returns: A list of InterleavedContent representing the file contents. :returns: A VectorStoreFileContentResponse representing the file contents.
""" """
... ...

View file

@ -24,7 +24,7 @@ from llama_stack.apis.vector_io import (
VectorStoreChunkingStrategyStaticConfig, VectorStoreChunkingStrategyStaticConfig,
VectorStoreDeleteResponse, VectorStoreDeleteResponse,
VectorStoreFileBatchObject, VectorStoreFileBatchObject,
VectorStoreFileContentsResponse, VectorStoreFileContentResponse,
VectorStoreFileDeleteResponse, VectorStoreFileDeleteResponse,
VectorStoreFileObject, VectorStoreFileObject,
VectorStoreFilesListInBatchResponse, VectorStoreFilesListInBatchResponse,
@ -338,7 +338,7 @@ class VectorIORouter(VectorIO):
self, self,
vector_store_id: str, vector_store_id: str,
file_id: str, file_id: str,
) -> VectorStoreFileContentsResponse: ) -> VectorStoreFileContentResponse:
logger.debug(f"VectorIORouter.openai_retrieve_vector_store_file_contents: {vector_store_id}, {file_id}") logger.debug(f"VectorIORouter.openai_retrieve_vector_store_file_contents: {vector_store_id}, {file_id}")
provider = await self.routing_table.get_provider_impl(vector_store_id) provider = await self.routing_table.get_provider_impl(vector_store_id)
return await provider.openai_retrieve_vector_store_file_contents( return await provider.openai_retrieve_vector_store_file_contents(

View file

@ -15,7 +15,7 @@ from llama_stack.apis.vector_io.vector_io import (
SearchRankingOptions, SearchRankingOptions,
VectorStoreChunkingStrategy, VectorStoreChunkingStrategy,
VectorStoreDeleteResponse, VectorStoreDeleteResponse,
VectorStoreFileContentsResponse, VectorStoreFileContentResponse,
VectorStoreFileDeleteResponse, VectorStoreFileDeleteResponse,
VectorStoreFileObject, VectorStoreFileObject,
VectorStoreFileStatus, VectorStoreFileStatus,
@ -195,7 +195,7 @@ class VectorStoresRoutingTable(CommonRoutingTableImpl):
self, self,
vector_store_id: str, vector_store_id: str,
file_id: str, file_id: str,
) -> VectorStoreFileContentsResponse: ) -> VectorStoreFileContentResponse:
await self.assert_action_allowed("read", "vector_store", vector_store_id) await self.assert_action_allowed("read", "vector_store", vector_store_id)
provider = await self.get_provider_impl(vector_store_id) provider = await self.get_provider_impl(vector_store_id)
return await provider.openai_retrieve_vector_store_file_contents( return await provider.openai_retrieve_vector_store_file_contents(

View file

@ -223,7 +223,8 @@ class FaissVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorStoresProtoco
return HealthResponse(status=HealthStatus.ERROR, message=f"Health check failed: {str(e)}") return HealthResponse(status=HealthStatus.ERROR, message=f"Health check failed: {str(e)}")
async def register_vector_store(self, vector_store: VectorStore) -> None: async def register_vector_store(self, vector_store: VectorStore) -> None:
assert self.kvstore is not None if self.kvstore is None:
raise RuntimeError("KVStore not initialized. Call initialize() before registering vector stores.")
key = f"{VECTOR_DBS_PREFIX}{vector_store.identifier}" key = f"{VECTOR_DBS_PREFIX}{vector_store.identifier}"
await self.kvstore.set(key=key, value=vector_store.model_dump_json()) await self.kvstore.set(key=key, value=vector_store.model_dump_json())
@ -239,7 +240,8 @@ class FaissVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorStoresProtoco
return [i.vector_store for i in self.cache.values()] return [i.vector_store for i in self.cache.values()]
async def unregister_vector_store(self, vector_store_id: str) -> None: async def unregister_vector_store(self, vector_store_id: str) -> None:
assert self.kvstore is not None if self.kvstore is None:
raise RuntimeError("KVStore not initialized. Call initialize() before unregistering vector stores.")
if vector_store_id not in self.cache: if vector_store_id not in self.cache:
return return
@ -248,6 +250,27 @@ class FaissVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorStoresProtoco
del self.cache[vector_store_id] del self.cache[vector_store_id]
await self.kvstore.delete(f"{VECTOR_DBS_PREFIX}{vector_store_id}") await self.kvstore.delete(f"{VECTOR_DBS_PREFIX}{vector_store_id}")
async def _get_and_cache_vector_store_index(self, vector_store_id: str) -> VectorStoreWithIndex | None:
if vector_store_id in self.cache:
return self.cache[vector_store_id]
if self.kvstore is None:
raise RuntimeError("KVStore not initialized. Call initialize() before using vector stores.")
key = f"{VECTOR_DBS_PREFIX}{vector_store_id}"
vector_store_data = await self.kvstore.get(key)
if not vector_store_data:
raise VectorStoreNotFoundError(vector_store_id)
vector_store = VectorStore.model_validate_json(vector_store_data)
index = VectorStoreWithIndex(
vector_store=vector_store,
index=await FaissIndex.create(vector_store.embedding_dimension, self.kvstore, vector_store.identifier),
inference_api=self.inference_api,
)
self.cache[vector_store_id] = index
return index
async def insert_chunks(self, vector_store_id: str, chunks: list[Chunk], ttl_seconds: int | None = None) -> None: async def insert_chunks(self, vector_store_id: str, chunks: list[Chunk], ttl_seconds: int | None = None) -> None:
index = self.cache.get(vector_store_id) index = self.cache.get(vector_store_id)
if index is None: if index is None:

View file

@ -412,6 +412,14 @@ class SQLiteVecVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorStoresPro
return [v.vector_store for v in self.cache.values()] return [v.vector_store for v in self.cache.values()]
async def register_vector_store(self, vector_store: VectorStore) -> None: async def register_vector_store(self, vector_store: VectorStore) -> None:
if self.kvstore is None:
raise RuntimeError("KVStore not initialized. Call initialize() before registering vector stores.")
# Save to kvstore for persistence
key = f"{VECTOR_DBS_PREFIX}{vector_store.identifier}"
await self.kvstore.set(key=key, value=vector_store.model_dump_json())
# Create and cache the index
index = await SQLiteVecIndex.create( index = await SQLiteVecIndex.create(
vector_store.embedding_dimension, self.config.db_path, vector_store.identifier vector_store.embedding_dimension, self.config.db_path, vector_store.identifier
) )
@ -421,13 +429,16 @@ class SQLiteVecVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorStoresPro
if vector_store_id in self.cache: if vector_store_id in self.cache:
return self.cache[vector_store_id] return self.cache[vector_store_id]
if self.vector_store_table is None: # Try to load from kvstore
raise VectorStoreNotFoundError(vector_store_id) if self.kvstore is None:
raise RuntimeError("KVStore not initialized. Call initialize() before using vector stores.")
vector_store = self.vector_store_table.get_vector_store(vector_store_id)
if not vector_store: key = f"{VECTOR_DBS_PREFIX}{vector_store_id}"
vector_store_data = await self.kvstore.get(key)
if not vector_store_data:
raise VectorStoreNotFoundError(vector_store_id) raise VectorStoreNotFoundError(vector_store_id)
vector_store = VectorStore.model_validate_json(vector_store_data)
index = VectorStoreWithIndex( index = VectorStoreWithIndex(
vector_store=vector_store, vector_store=vector_store,
index=SQLiteVecIndex( index=SQLiteVecIndex(

View file

@ -131,7 +131,6 @@ class ChromaVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorStoresProtoc
async def initialize(self) -> None: async def initialize(self) -> None:
self.kvstore = await kvstore_impl(self.config.persistence) self.kvstore = await kvstore_impl(self.config.persistence)
self.vector_store_table = self.kvstore
if isinstance(self.config, RemoteChromaVectorIOConfig): if isinstance(self.config, RemoteChromaVectorIOConfig):
log.info(f"Connecting to Chroma server at: {self.config.url}") log.info(f"Connecting to Chroma server at: {self.config.url}")
@ -190,9 +189,16 @@ class ChromaVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorStoresProtoc
if vector_store_id in self.cache: if vector_store_id in self.cache:
return self.cache[vector_store_id] return self.cache[vector_store_id]
vector_store = await self.vector_store_table.get_vector_store(vector_store_id) # Try to load from kvstore
if not vector_store: if self.kvstore is None:
raise RuntimeError("KVStore not initialized. Call initialize() before using vector stores.")
key = f"{VECTOR_DBS_PREFIX}{vector_store_id}"
vector_store_data = await self.kvstore.get(key)
if not vector_store_data:
raise ValueError(f"Vector DB {vector_store_id} not found in Llama Stack") raise ValueError(f"Vector DB {vector_store_id} not found in Llama Stack")
vector_store = VectorStore.model_validate_json(vector_store_data)
collection = await maybe_await(self.client.get_collection(vector_store_id)) collection = await maybe_await(self.client.get_collection(vector_store_id))
if not collection: if not collection:
raise ValueError(f"Vector DB {vector_store_id} not found in Chroma") raise ValueError(f"Vector DB {vector_store_id} not found in Chroma")

View file

@ -328,13 +328,16 @@ class MilvusVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorStoresProtoc
if vector_store_id in self.cache: if vector_store_id in self.cache:
return self.cache[vector_store_id] return self.cache[vector_store_id]
if self.vector_store_table is None: # Try to load from kvstore
raise VectorStoreNotFoundError(vector_store_id) if self.kvstore is None:
raise RuntimeError("KVStore not initialized. Call initialize() before using vector stores.")
vector_store = await self.vector_store_table.get_vector_store(vector_store_id)
if not vector_store: key = f"{VECTOR_DBS_PREFIX}{vector_store_id}"
vector_store_data = await self.kvstore.get(key)
if not vector_store_data:
raise VectorStoreNotFoundError(vector_store_id) raise VectorStoreNotFoundError(vector_store_id)
vector_store = VectorStore.model_validate_json(vector_store_data)
index = VectorStoreWithIndex( index = VectorStoreWithIndex(
vector_store=vector_store, vector_store=vector_store,
index=MilvusIndex(client=self.client, collection_name=vector_store.identifier, kvstore=self.kvstore), index=MilvusIndex(client=self.client, collection_name=vector_store.identifier, kvstore=self.kvstore),

View file

@ -368,6 +368,22 @@ class PGVectorVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorStoresProt
log.exception("Could not connect to PGVector database server") log.exception("Could not connect to PGVector database server")
raise RuntimeError("Could not connect to PGVector database server") from e raise RuntimeError("Could not connect to PGVector database server") from e
# Load existing vector stores from KV store into cache
start_key = VECTOR_DBS_PREFIX
end_key = f"{VECTOR_DBS_PREFIX}\xff"
stored_vector_stores = await self.kvstore.values_in_range(start_key, end_key)
for vector_store_data in stored_vector_stores:
vector_store = VectorStore.model_validate_json(vector_store_data)
pgvector_index = PGVectorIndex(
vector_store=vector_store,
dimension=vector_store.embedding_dimension,
conn=self.conn,
kvstore=self.kvstore,
)
await pgvector_index.initialize()
index = VectorStoreWithIndex(vector_store, index=pgvector_index, inference_api=self.inference_api)
self.cache[vector_store.identifier] = index
async def shutdown(self) -> None: async def shutdown(self) -> None:
if self.conn is not None: if self.conn is not None:
self.conn.close() self.conn.close()
@ -377,7 +393,13 @@ class PGVectorVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorStoresProt
async def register_vector_store(self, vector_store: VectorStore) -> None: async def register_vector_store(self, vector_store: VectorStore) -> None:
# Persist vector DB metadata in the KV store # Persist vector DB metadata in the KV store
assert self.kvstore is not None if self.kvstore is None:
raise RuntimeError("KVStore not initialized. Call initialize() before registering vector stores.")
# Save to kvstore for persistence
key = f"{VECTOR_DBS_PREFIX}{vector_store.identifier}"
await self.kvstore.set(key=key, value=vector_store.model_dump_json())
# Upsert model metadata in Postgres # Upsert model metadata in Postgres
upsert_models(self.conn, [(vector_store.identifier, vector_store)]) upsert_models(self.conn, [(vector_store.identifier, vector_store)])
@ -396,7 +418,8 @@ class PGVectorVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorStoresProt
del self.cache[vector_store_id] del self.cache[vector_store_id]
# Delete vector DB metadata from KV store # Delete vector DB metadata from KV store
assert self.kvstore is not None if self.kvstore is None:
raise RuntimeError("KVStore not initialized. Call initialize() before unregistering vector stores.")
await self.kvstore.delete(key=f"{VECTOR_DBS_PREFIX}{vector_store_id}") await self.kvstore.delete(key=f"{VECTOR_DBS_PREFIX}{vector_store_id}")
async def insert_chunks(self, vector_store_id: str, chunks: list[Chunk], ttl_seconds: int | None = None) -> None: async def insert_chunks(self, vector_store_id: str, chunks: list[Chunk], ttl_seconds: int | None = None) -> None:
@ -413,13 +436,16 @@ class PGVectorVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorStoresProt
if vector_store_id in self.cache: if vector_store_id in self.cache:
return self.cache[vector_store_id] return self.cache[vector_store_id]
if self.vector_store_table is None: # Try to load from kvstore
raise VectorStoreNotFoundError(vector_store_id) if self.kvstore is None:
raise RuntimeError("KVStore not initialized. Call initialize() before using vector stores.")
vector_store = await self.vector_store_table.get_vector_store(vector_store_id)
if not vector_store: key = f"{VECTOR_DBS_PREFIX}{vector_store_id}"
vector_store_data = await self.kvstore.get(key)
if not vector_store_data:
raise VectorStoreNotFoundError(vector_store_id) raise VectorStoreNotFoundError(vector_store_id)
vector_store = VectorStore.model_validate_json(vector_store_data)
index = PGVectorIndex(vector_store, vector_store.embedding_dimension, self.conn) index = PGVectorIndex(vector_store, vector_store.embedding_dimension, self.conn)
await index.initialize() await index.initialize()
self.cache[vector_store_id] = VectorStoreWithIndex(vector_store, index, self.inference_api) self.cache[vector_store_id] = VectorStoreWithIndex(vector_store, index, self.inference_api)

View file

@ -183,7 +183,8 @@ class QdrantVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorStoresProtoc
await super().shutdown() await super().shutdown()
async def register_vector_store(self, vector_store: VectorStore) -> None: async def register_vector_store(self, vector_store: VectorStore) -> None:
assert self.kvstore is not None if self.kvstore is None:
raise RuntimeError("KVStore not initialized. Call initialize() before registering vector stores.")
key = f"{VECTOR_DBS_PREFIX}{vector_store.identifier}" key = f"{VECTOR_DBS_PREFIX}{vector_store.identifier}"
await self.kvstore.set(key=key, value=vector_store.model_dump_json()) await self.kvstore.set(key=key, value=vector_store.model_dump_json())
@ -200,20 +201,24 @@ class QdrantVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorStoresProtoc
await self.cache[vector_store_id].index.delete() await self.cache[vector_store_id].index.delete()
del self.cache[vector_store_id] del self.cache[vector_store_id]
assert self.kvstore is not None if self.kvstore is None:
raise RuntimeError("KVStore not initialized. Call initialize() before using vector stores.")
await self.kvstore.delete(f"{VECTOR_DBS_PREFIX}{vector_store_id}") await self.kvstore.delete(f"{VECTOR_DBS_PREFIX}{vector_store_id}")
async def _get_and_cache_vector_store_index(self, vector_store_id: str) -> VectorStoreWithIndex | None: async def _get_and_cache_vector_store_index(self, vector_store_id: str) -> VectorStoreWithIndex | None:
if vector_store_id in self.cache: if vector_store_id in self.cache:
return self.cache[vector_store_id] return self.cache[vector_store_id]
if self.vector_store_table is None: # Try to load from kvstore
raise ValueError(f"Vector DB not found {vector_store_id}") if self.kvstore is None:
raise RuntimeError("KVStore not initialized. Call initialize() before using vector stores.")
vector_store = await self.vector_store_table.get_vector_store(vector_store_id) key = f"{VECTOR_DBS_PREFIX}{vector_store_id}"
if not vector_store: vector_store_data = await self.kvstore.get(key)
if not vector_store_data:
raise VectorStoreNotFoundError(vector_store_id) raise VectorStoreNotFoundError(vector_store_id)
vector_store = VectorStore.model_validate_json(vector_store_data)
index = VectorStoreWithIndex( index = VectorStoreWithIndex(
vector_store=vector_store, vector_store=vector_store,
index=QdrantIndex(client=self.client, collection_name=vector_store.identifier), index=QdrantIndex(client=self.client, collection_name=vector_store.identifier),

View file

@ -346,13 +346,16 @@ class WeaviateVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, NeedsRequestProv
if vector_store_id in self.cache: if vector_store_id in self.cache:
return self.cache[vector_store_id] return self.cache[vector_store_id]
if self.vector_store_table is None: # Try to load from kvstore
raise VectorStoreNotFoundError(vector_store_id) if self.kvstore is None:
raise RuntimeError("KVStore not initialized. Call initialize() before using vector stores.")
vector_store = await self.vector_store_table.get_vector_store(vector_store_id)
if not vector_store: key = f"{VECTOR_DBS_PREFIX}{vector_store_id}"
vector_store_data = await self.kvstore.get(key)
if not vector_store_data:
raise VectorStoreNotFoundError(vector_store_id) raise VectorStoreNotFoundError(vector_store_id)
vector_store = VectorStore.model_validate_json(vector_store_data)
client = self._get_client() client = self._get_client()
sanitized_collection_name = sanitize_collection_name(vector_store.identifier, weaviate_format=True) sanitized_collection_name = sanitize_collection_name(vector_store.identifier, weaviate_format=True)
if not client.collections.exists(sanitized_collection_name): if not client.collections.exists(sanitized_collection_name):

View file

@ -30,7 +30,7 @@ from llama_stack.apis.vector_io import (
VectorStoreContent, VectorStoreContent,
VectorStoreDeleteResponse, VectorStoreDeleteResponse,
VectorStoreFileBatchObject, VectorStoreFileBatchObject,
VectorStoreFileContentsResponse, VectorStoreFileContentResponse,
VectorStoreFileCounts, VectorStoreFileCounts,
VectorStoreFileDeleteResponse, VectorStoreFileDeleteResponse,
VectorStoreFileLastError, VectorStoreFileLastError,
@ -921,22 +921,21 @@ class OpenAIVectorStoreMixin(ABC):
self, self,
vector_store_id: str, vector_store_id: str,
file_id: str, file_id: str,
) -> VectorStoreFileContentsResponse: ) -> VectorStoreFileContentResponse:
"""Retrieves the contents of a vector store file.""" """Retrieves the contents of a vector store file."""
if vector_store_id not in self.openai_vector_stores: if vector_store_id not in self.openai_vector_stores:
raise VectorStoreNotFoundError(vector_store_id) raise VectorStoreNotFoundError(vector_store_id)
file_info = await self._load_openai_vector_store_file(vector_store_id, file_id)
dict_chunks = await self._load_openai_vector_store_file_contents(vector_store_id, file_id) dict_chunks = await self._load_openai_vector_store_file_contents(vector_store_id, file_id)
chunks = [Chunk.model_validate(c) for c in dict_chunks] chunks = [Chunk.model_validate(c) for c in dict_chunks]
content = [] content = []
for chunk in chunks: for chunk in chunks:
content.extend(self._chunk_to_vector_store_content(chunk)) content.extend(self._chunk_to_vector_store_content(chunk))
return VectorStoreFileContentsResponse( return VectorStoreFileContentResponse(
file_id=file_id, object="vector_store.file_content.page",
filename=file_info.get("filename", ""), data=content,
attributes=file_info.get("attributes", {}), has_more=False,
content=content, next_page=None,
) )
async def openai_update_vector_store_file( async def openai_update_vector_store_file(

View file

@ -907,16 +907,16 @@ def test_openai_vector_store_retrieve_file_contents(
) )
assert file_contents is not None assert file_contents is not None
assert len(file_contents.content) == 1 assert file_contents.object == "vector_store.file_content.page"
content = file_contents.content[0] assert len(file_contents.data) == 1
content = file_contents.data[0]
# llama-stack-client returns a model, openai-python is a badboy and returns a dict # llama-stack-client returns a model, openai-python is a badboy and returns a dict
if not isinstance(content, dict): if not isinstance(content, dict):
content = content.model_dump() content = content.model_dump()
assert content["type"] == "text" assert content["type"] == "text"
assert content["text"] == test_content.decode("utf-8") assert content["text"] == test_content.decode("utf-8")
assert file_contents.filename == file_name assert file_contents.has_more is False
assert file_contents.attributes == attributes
@vector_provider_wrapper @vector_provider_wrapper
@ -1483,14 +1483,12 @@ def test_openai_vector_store_file_batch_retrieve_contents(
) )
assert file_contents is not None assert file_contents is not None
assert file_contents.filename == file_data[i][0] assert file_contents.object == "vector_store.file_content.page"
assert len(file_contents.content) > 0 assert len(file_contents.data) > 0
# Verify the content matches what we uploaded # Verify the content matches what we uploaded
content_text = ( content_text = (
file_contents.content[0].text file_contents.data[0].text if hasattr(file_contents.data[0], "text") else file_contents.data[0]["text"]
if hasattr(file_contents.content[0], "text")
else file_contents.content[0]["text"]
) )
assert file_data[i][1].decode("utf-8") in content_text assert file_data[i][1].decode("utf-8") in content_text

View file

@ -92,6 +92,99 @@ async def test_persistence_across_adapter_restarts(vector_io_adapter):
await vector_io_adapter.shutdown() await vector_io_adapter.shutdown()
async def test_vector_store_lazy_loading_from_kvstore(vector_io_adapter):
"""
Test that vector stores can be lazy-loaded from KV store when not in cache.
Verifies that clearing the cache doesn't break vector store access - they
can be loaded on-demand from persistent storage.
"""
await vector_io_adapter.initialize()
vector_store_id = f"lazy_load_test_{np.random.randint(1e6)}"
vector_store = VectorStore(
identifier=vector_store_id,
provider_id="test_provider",
embedding_model="test_model",
embedding_dimension=128,
)
await vector_io_adapter.register_vector_store(vector_store)
assert vector_store_id in vector_io_adapter.cache
vector_io_adapter.cache.clear()
assert vector_store_id not in vector_io_adapter.cache
loaded_index = await vector_io_adapter._get_and_cache_vector_store_index(vector_store_id)
assert loaded_index is not None
assert loaded_index.vector_store.identifier == vector_store_id
assert vector_store_id in vector_io_adapter.cache
cached_index = await vector_io_adapter._get_and_cache_vector_store_index(vector_store_id)
assert cached_index is loaded_index
await vector_io_adapter.shutdown()
async def test_vector_store_preloading_on_initialization(vector_io_adapter):
"""
Test that vector stores are preloaded from KV store during initialization.
Verifies that after restart, all vector stores are automatically loaded into
cache and immediately accessible without requiring lazy loading.
"""
await vector_io_adapter.initialize()
vector_store_ids = [f"preload_test_{i}_{np.random.randint(1e6)}" for i in range(3)]
for vs_id in vector_store_ids:
vector_store = VectorStore(
identifier=vs_id,
provider_id="test_provider",
embedding_model="test_model",
embedding_dimension=128,
)
await vector_io_adapter.register_vector_store(vector_store)
for vs_id in vector_store_ids:
assert vs_id in vector_io_adapter.cache
await vector_io_adapter.shutdown()
await vector_io_adapter.initialize()
for vs_id in vector_store_ids:
assert vs_id in vector_io_adapter.cache
for vs_id in vector_store_ids:
loaded_index = await vector_io_adapter._get_and_cache_vector_store_index(vs_id)
assert loaded_index is not None
assert loaded_index.vector_store.identifier == vs_id
await vector_io_adapter.shutdown()
async def test_kvstore_none_raises_runtime_error(vector_io_adapter):
"""
Test that accessing vector stores with uninitialized kvstore raises RuntimeError.
Verifies proper RuntimeError is raised instead of assertions when kvstore is None.
"""
await vector_io_adapter.initialize()
vector_store_id = f"kvstore_none_test_{np.random.randint(1e6)}"
vector_store = VectorStore(
identifier=vector_store_id,
provider_id="test_provider",
embedding_model="test_model",
embedding_dimension=128,
)
await vector_io_adapter.register_vector_store(vector_store)
vector_io_adapter.cache.clear()
vector_io_adapter.kvstore = None
with pytest.raises(RuntimeError, match="KVStore not initialized"):
await vector_io_adapter._get_and_cache_vector_store_index(vector_store_id)
async def test_register_and_unregister_vector_store(vector_io_adapter): async def test_register_and_unregister_vector_store(vector_io_adapter):
unique_id = f"foo_db_{np.random.randint(1e6)}" unique_id = f"foo_db_{np.random.randint(1e6)}"
dummy = VectorStore( dummy = VectorStore(