mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-10-21 16:07:16 +00:00
Compare commits
17 commits
Author | SHA1 | Date | |
---|---|---|---|
|
9191005ca1 | ||
|
0e96279bee | ||
|
5aaf1a8bca | ||
|
122de785c4 | ||
|
444f6c88f3 | ||
|
6a13a99e77 | ||
|
407bade359 | ||
|
ffeb86385c | ||
|
b215eb5944 | ||
|
d9274d199e | ||
|
ec364499f5 | ||
|
6a74894e22 | ||
|
5aafce4ff3 | ||
|
5678c25b9d | ||
|
7294385df3 | ||
|
8943335e0b | ||
|
e7f4ddcc86 |
120 changed files with 1045 additions and 5548 deletions
|
@ -86,10 +86,9 @@ runs:
|
|||
if: ${{ always() }}
|
||||
shell: bash
|
||||
run: |
|
||||
sudo docker logs ollama > ollama-${{ inputs.inference-mode }}.log || true
|
||||
distro_name=$(echo "${{ inputs.stack-config }}" | sed 's/^docker://' | sed 's/^server://')
|
||||
stack_container_name="llama-stack-test-$distro_name"
|
||||
sudo docker logs $stack_container_name > docker-${distro_name}-${{ inputs.inference-mode }}.log || true
|
||||
# Ollama logs (if ollama container exists)
|
||||
sudo docker logs ollama > ollama-${{ inputs.inference-mode }}.log 2>&1 || true
|
||||
# Note: distro container logs are now dumped in integration-tests.sh before container is removed
|
||||
|
||||
- name: Upload logs
|
||||
if: ${{ always() }}
|
||||
|
|
2
.github/workflows/pre-commit.yml
vendored
2
.github/workflows/pre-commit.yml
vendored
|
@ -37,7 +37,7 @@ jobs:
|
|||
.pre-commit-config.yaml
|
||||
|
||||
- name: Set up Node.js
|
||||
uses: actions/setup-node@a0853c24544627f65ddf259abe73b1d18a591444 # v5.0.0
|
||||
uses: actions/setup-node@2028fbc5c25fe9cf00d9f06a71cc4710d4507903 # v6.0.0
|
||||
with:
|
||||
node-version: '20'
|
||||
cache: 'npm'
|
||||
|
|
4
.github/workflows/precommit-trigger.yml
vendored
4
.github/workflows/precommit-trigger.yml
vendored
|
@ -99,7 +99,7 @@ jobs:
|
|||
owner: context.repo.owner,
|
||||
repo: context.repo.repo,
|
||||
issue_number: ${{ steps.check_author.outputs.pr_number }},
|
||||
body: `⏳ Running pre-commit hooks on PR #${{ steps.check_author.outputs.pr_number }}...`
|
||||
body: `⏳ Running [pre-commit hooks](https://github.com/${context.repo.owner}/${context.repo.repo}/actions/runs/${context.runId}) on PR #${{ steps.check_author.outputs.pr_number }}...`
|
||||
});
|
||||
|
||||
- name: Checkout PR branch (same-repo)
|
||||
|
@ -141,7 +141,7 @@ jobs:
|
|||
|
||||
- name: Set up Node.js
|
||||
if: steps.check_author.outputs.authorized == 'true'
|
||||
uses: actions/setup-node@a0853c24544627f65ddf259abe73b1d18a591444 # v5.0.0
|
||||
uses: actions/setup-node@2028fbc5c25fe9cf00d9f06a71cc4710d4507903 # v6.0.0
|
||||
with:
|
||||
node-version: '20'
|
||||
cache: 'npm'
|
||||
|
|
8
.github/workflows/providers-list-deps.yml
vendored
8
.github/workflows/providers-list-deps.yml
vendored
|
@ -36,7 +36,7 @@ jobs:
|
|||
distros: ${{ steps.set-matrix.outputs.distros }}
|
||||
steps:
|
||||
- name: Checkout repository
|
||||
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
|
||||
uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0
|
||||
|
||||
- name: Generate Distribution List
|
||||
id: set-matrix
|
||||
|
@ -55,7 +55,7 @@ jobs:
|
|||
|
||||
steps:
|
||||
- name: Checkout repository
|
||||
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
|
||||
uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0
|
||||
|
||||
- name: Install dependencies
|
||||
uses: ./.github/actions/setup-runner
|
||||
|
@ -79,7 +79,7 @@ jobs:
|
|||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Checkout repository
|
||||
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
|
||||
uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0
|
||||
|
||||
- name: Install dependencies
|
||||
uses: ./.github/actions/setup-runner
|
||||
|
@ -92,7 +92,7 @@ jobs:
|
|||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Checkout repository
|
||||
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
|
||||
uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0
|
||||
|
||||
- name: Install dependencies
|
||||
uses: ./.github/actions/setup-runner
|
||||
|
|
2
.github/workflows/python-build-test.yml
vendored
2
.github/workflows/python-build-test.yml
vendored
|
@ -24,7 +24,7 @@ jobs:
|
|||
uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0
|
||||
|
||||
- name: Install uv
|
||||
uses: astral-sh/setup-uv@eb1897b8dc4b5d5bfe39a428a8f2304605e0983c # v7.0.0
|
||||
uses: astral-sh/setup-uv@3259c6206f993105e3a61b142c2d97bf4b9ef83d # v7.1.0
|
||||
with:
|
||||
python-version: ${{ matrix.python-version }}
|
||||
activate-environment: true
|
||||
|
|
2
.github/workflows/ui-unit-tests.yml
vendored
2
.github/workflows/ui-unit-tests.yml
vendored
|
@ -29,7 +29,7 @@ jobs:
|
|||
uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0
|
||||
|
||||
- name: Setup Node.js
|
||||
uses: actions/setup-node@a0853c24544627f65ddf259abe73b1d18a591444 # v5.0.0
|
||||
uses: actions/setup-node@2028fbc5c25fe9cf00d9f06a71cc4710d4507903 # v6.0.0
|
||||
with:
|
||||
node-version: ${{ matrix.node-version }}
|
||||
cache: 'npm'
|
||||
|
|
|
@ -208,19 +208,6 @@ resources:
|
|||
type: http
|
||||
endpoint: post /v1/conversations/{conversation_id}/items
|
||||
|
||||
datasets:
|
||||
models:
|
||||
list_datasets_response: ListDatasetsResponse
|
||||
methods:
|
||||
register: post /v1beta/datasets
|
||||
retrieve: get /v1beta/datasets/{dataset_id}
|
||||
list:
|
||||
endpoint: get /v1beta/datasets
|
||||
paginated: false
|
||||
unregister: delete /v1beta/datasets/{dataset_id}
|
||||
iterrows: get /v1beta/datasetio/iterrows/{dataset_id}
|
||||
appendrows: post /v1beta/datasetio/append-rows/{dataset_id}
|
||||
|
||||
inspect:
|
||||
models:
|
||||
healthInfo: HealthInfo
|
||||
|
@ -521,6 +508,21 @@ resources:
|
|||
stream_event_model: alpha.agents.turn.agent_turn_response_stream_chunk
|
||||
param_discriminator: stream
|
||||
|
||||
beta:
|
||||
subresources:
|
||||
datasets:
|
||||
models:
|
||||
list_datasets_response: ListDatasetsResponse
|
||||
methods:
|
||||
register: post /v1beta/datasets
|
||||
retrieve: get /v1beta/datasets/{dataset_id}
|
||||
list:
|
||||
endpoint: get /v1beta/datasets
|
||||
paginated: false
|
||||
unregister: delete /v1beta/datasets/{dataset_id}
|
||||
iterrows: get /v1beta/datasetio/iterrows/{dataset_id}
|
||||
appendrows: post /v1beta/datasetio/append-rows/{dataset_id}
|
||||
|
||||
|
||||
settings:
|
||||
license: MIT
|
||||
|
|
|
@ -2039,69 +2039,6 @@ paths:
|
|||
schema:
|
||||
$ref: '#/components/schemas/URL'
|
||||
deprecated: false
|
||||
/v1/tool-runtime/rag-tool/insert:
|
||||
post:
|
||||
responses:
|
||||
'200':
|
||||
description: OK
|
||||
'400':
|
||||
$ref: '#/components/responses/BadRequest400'
|
||||
'429':
|
||||
$ref: >-
|
||||
#/components/responses/TooManyRequests429
|
||||
'500':
|
||||
$ref: >-
|
||||
#/components/responses/InternalServerError500
|
||||
default:
|
||||
$ref: '#/components/responses/DefaultError'
|
||||
tags:
|
||||
- ToolRuntime
|
||||
summary: >-
|
||||
Index documents so they can be used by the RAG system.
|
||||
description: >-
|
||||
Index documents so they can be used by the RAG system.
|
||||
parameters: []
|
||||
requestBody:
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: '#/components/schemas/InsertRequest'
|
||||
required: true
|
||||
deprecated: false
|
||||
/v1/tool-runtime/rag-tool/query:
|
||||
post:
|
||||
responses:
|
||||
'200':
|
||||
description: >-
|
||||
RAGQueryResult containing the retrieved content and metadata
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: '#/components/schemas/RAGQueryResult'
|
||||
'400':
|
||||
$ref: '#/components/responses/BadRequest400'
|
||||
'429':
|
||||
$ref: >-
|
||||
#/components/responses/TooManyRequests429
|
||||
'500':
|
||||
$ref: >-
|
||||
#/components/responses/InternalServerError500
|
||||
default:
|
||||
$ref: '#/components/responses/DefaultError'
|
||||
tags:
|
||||
- ToolRuntime
|
||||
summary: >-
|
||||
Query the RAG system for context; typically invoked by the agent.
|
||||
description: >-
|
||||
Query the RAG system for context; typically invoked by the agent.
|
||||
parameters: []
|
||||
requestBody:
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: '#/components/schemas/QueryRequest'
|
||||
required: true
|
||||
deprecated: false
|
||||
/v1/toolgroups:
|
||||
get:
|
||||
responses:
|
||||
|
@ -6440,7 +6377,7 @@ components:
|
|||
enum:
|
||||
- model
|
||||
- shield
|
||||
- vector_db
|
||||
- vector_store
|
||||
- dataset
|
||||
- scoring_function
|
||||
- benchmark
|
||||
|
@ -9132,7 +9069,7 @@ components:
|
|||
enum:
|
||||
- model
|
||||
- shield
|
||||
- vector_db
|
||||
- vector_store
|
||||
- dataset
|
||||
- scoring_function
|
||||
- benchmark
|
||||
|
@ -9440,7 +9377,7 @@ components:
|
|||
enum:
|
||||
- model
|
||||
- shield
|
||||
- vector_db
|
||||
- vector_store
|
||||
- dataset
|
||||
- scoring_function
|
||||
- benchmark
|
||||
|
@ -9921,274 +9858,6 @@ components:
|
|||
title: ListToolDefsResponse
|
||||
description: >-
|
||||
Response containing a list of tool definitions.
|
||||
RAGDocument:
|
||||
type: object
|
||||
properties:
|
||||
document_id:
|
||||
type: string
|
||||
description: The unique identifier for the document.
|
||||
content:
|
||||
oneOf:
|
||||
- type: string
|
||||
- $ref: '#/components/schemas/InterleavedContentItem'
|
||||
- type: array
|
||||
items:
|
||||
$ref: '#/components/schemas/InterleavedContentItem'
|
||||
- $ref: '#/components/schemas/URL'
|
||||
description: The content of the document.
|
||||
mime_type:
|
||||
type: string
|
||||
description: The MIME type of the document.
|
||||
metadata:
|
||||
type: object
|
||||
additionalProperties:
|
||||
oneOf:
|
||||
- type: 'null'
|
||||
- type: boolean
|
||||
- type: number
|
||||
- type: string
|
||||
- type: array
|
||||
- type: object
|
||||
description: Additional metadata for the document.
|
||||
additionalProperties: false
|
||||
required:
|
||||
- document_id
|
||||
- content
|
||||
- metadata
|
||||
title: RAGDocument
|
||||
description: >-
|
||||
A document to be used for document ingestion in the RAG Tool.
|
||||
InsertRequest:
|
||||
type: object
|
||||
properties:
|
||||
documents:
|
||||
type: array
|
||||
items:
|
||||
$ref: '#/components/schemas/RAGDocument'
|
||||
description: >-
|
||||
List of documents to index in the RAG system
|
||||
vector_db_id:
|
||||
type: string
|
||||
description: >-
|
||||
ID of the vector database to store the document embeddings
|
||||
chunk_size_in_tokens:
|
||||
type: integer
|
||||
description: >-
|
||||
(Optional) Size in tokens for document chunking during indexing
|
||||
additionalProperties: false
|
||||
required:
|
||||
- documents
|
||||
- vector_db_id
|
||||
- chunk_size_in_tokens
|
||||
title: InsertRequest
|
||||
DefaultRAGQueryGeneratorConfig:
|
||||
type: object
|
||||
properties:
|
||||
type:
|
||||
type: string
|
||||
const: default
|
||||
default: default
|
||||
description: >-
|
||||
Type of query generator, always 'default'
|
||||
separator:
|
||||
type: string
|
||||
default: ' '
|
||||
description: >-
|
||||
String separator used to join query terms
|
||||
additionalProperties: false
|
||||
required:
|
||||
- type
|
||||
- separator
|
||||
title: DefaultRAGQueryGeneratorConfig
|
||||
description: >-
|
||||
Configuration for the default RAG query generator.
|
||||
LLMRAGQueryGeneratorConfig:
|
||||
type: object
|
||||
properties:
|
||||
type:
|
||||
type: string
|
||||
const: llm
|
||||
default: llm
|
||||
description: Type of query generator, always 'llm'
|
||||
model:
|
||||
type: string
|
||||
description: >-
|
||||
Name of the language model to use for query generation
|
||||
template:
|
||||
type: string
|
||||
description: >-
|
||||
Template string for formatting the query generation prompt
|
||||
additionalProperties: false
|
||||
required:
|
||||
- type
|
||||
- model
|
||||
- template
|
||||
title: LLMRAGQueryGeneratorConfig
|
||||
description: >-
|
||||
Configuration for the LLM-based RAG query generator.
|
||||
RAGQueryConfig:
|
||||
type: object
|
||||
properties:
|
||||
query_generator_config:
|
||||
oneOf:
|
||||
- $ref: '#/components/schemas/DefaultRAGQueryGeneratorConfig'
|
||||
- $ref: '#/components/schemas/LLMRAGQueryGeneratorConfig'
|
||||
discriminator:
|
||||
propertyName: type
|
||||
mapping:
|
||||
default: '#/components/schemas/DefaultRAGQueryGeneratorConfig'
|
||||
llm: '#/components/schemas/LLMRAGQueryGeneratorConfig'
|
||||
description: Configuration for the query generator.
|
||||
max_tokens_in_context:
|
||||
type: integer
|
||||
default: 4096
|
||||
description: Maximum number of tokens in the context.
|
||||
max_chunks:
|
||||
type: integer
|
||||
default: 5
|
||||
description: Maximum number of chunks to retrieve.
|
||||
chunk_template:
|
||||
type: string
|
||||
default: >
|
||||
Result {index}
|
||||
|
||||
Content: {chunk.content}
|
||||
|
||||
Metadata: {metadata}
|
||||
description: >-
|
||||
Template for formatting each retrieved chunk in the context. Available
|
||||
placeholders: {index} (1-based chunk ordinal), {chunk.content} (chunk
|
||||
content string), {metadata} (chunk metadata dict). Default: "Result {index}\nContent:
|
||||
{chunk.content}\nMetadata: {metadata}\n"
|
||||
mode:
|
||||
$ref: '#/components/schemas/RAGSearchMode'
|
||||
default: vector
|
||||
description: >-
|
||||
Search mode for retrieval—either "vector", "keyword", or "hybrid". Default
|
||||
"vector".
|
||||
ranker:
|
||||
$ref: '#/components/schemas/Ranker'
|
||||
description: >-
|
||||
Configuration for the ranker to use in hybrid search. Defaults to RRF
|
||||
ranker.
|
||||
additionalProperties: false
|
||||
required:
|
||||
- query_generator_config
|
||||
- max_tokens_in_context
|
||||
- max_chunks
|
||||
- chunk_template
|
||||
title: RAGQueryConfig
|
||||
description: >-
|
||||
Configuration for the RAG query generation.
|
||||
RAGSearchMode:
|
||||
type: string
|
||||
enum:
|
||||
- vector
|
||||
- keyword
|
||||
- hybrid
|
||||
title: RAGSearchMode
|
||||
description: >-
|
||||
Search modes for RAG query retrieval: - VECTOR: Uses vector similarity search
|
||||
for semantic matching - KEYWORD: Uses keyword-based search for exact matching
|
||||
- HYBRID: Combines both vector and keyword search for better results
|
||||
RRFRanker:
|
||||
type: object
|
||||
properties:
|
||||
type:
|
||||
type: string
|
||||
const: rrf
|
||||
default: rrf
|
||||
description: The type of ranker, always "rrf"
|
||||
impact_factor:
|
||||
type: number
|
||||
default: 60.0
|
||||
description: >-
|
||||
The impact factor for RRF scoring. Higher values give more weight to higher-ranked
|
||||
results. Must be greater than 0
|
||||
additionalProperties: false
|
||||
required:
|
||||
- type
|
||||
- impact_factor
|
||||
title: RRFRanker
|
||||
description: >-
|
||||
Reciprocal Rank Fusion (RRF) ranker configuration.
|
||||
Ranker:
|
||||
oneOf:
|
||||
- $ref: '#/components/schemas/RRFRanker'
|
||||
- $ref: '#/components/schemas/WeightedRanker'
|
||||
discriminator:
|
||||
propertyName: type
|
||||
mapping:
|
||||
rrf: '#/components/schemas/RRFRanker'
|
||||
weighted: '#/components/schemas/WeightedRanker'
|
||||
WeightedRanker:
|
||||
type: object
|
||||
properties:
|
||||
type:
|
||||
type: string
|
||||
const: weighted
|
||||
default: weighted
|
||||
description: The type of ranker, always "weighted"
|
||||
alpha:
|
||||
type: number
|
||||
default: 0.5
|
||||
description: >-
|
||||
Weight factor between 0 and 1. 0 means only use keyword scores, 1 means
|
||||
only use vector scores, values in between blend both scores.
|
||||
additionalProperties: false
|
||||
required:
|
||||
- type
|
||||
- alpha
|
||||
title: WeightedRanker
|
||||
description: >-
|
||||
Weighted ranker configuration that combines vector and keyword scores.
|
||||
QueryRequest:
|
||||
type: object
|
||||
properties:
|
||||
content:
|
||||
$ref: '#/components/schemas/InterleavedContent'
|
||||
description: >-
|
||||
The query content to search for in the indexed documents
|
||||
vector_db_ids:
|
||||
type: array
|
||||
items:
|
||||
type: string
|
||||
description: >-
|
||||
List of vector database IDs to search within
|
||||
query_config:
|
||||
$ref: '#/components/schemas/RAGQueryConfig'
|
||||
description: >-
|
||||
(Optional) Configuration parameters for the query operation
|
||||
additionalProperties: false
|
||||
required:
|
||||
- content
|
||||
- vector_db_ids
|
||||
title: QueryRequest
|
||||
RAGQueryResult:
|
||||
type: object
|
||||
properties:
|
||||
content:
|
||||
$ref: '#/components/schemas/InterleavedContent'
|
||||
description: >-
|
||||
(Optional) The retrieved content from the query
|
||||
metadata:
|
||||
type: object
|
||||
additionalProperties:
|
||||
oneOf:
|
||||
- type: 'null'
|
||||
- type: boolean
|
||||
- type: number
|
||||
- type: string
|
||||
- type: array
|
||||
- type: object
|
||||
description: >-
|
||||
Additional metadata about the query result
|
||||
additionalProperties: false
|
||||
required:
|
||||
- metadata
|
||||
title: RAGQueryResult
|
||||
description: >-
|
||||
Result of a RAG query containing retrieved content and metadata.
|
||||
ToolGroup:
|
||||
type: object
|
||||
properties:
|
||||
|
@ -10203,7 +9872,7 @@ components:
|
|||
enum:
|
||||
- model
|
||||
- shield
|
||||
- vector_db
|
||||
- vector_store
|
||||
- dataset
|
||||
- scoring_function
|
||||
- benchmark
|
||||
|
@ -11325,7 +10994,7 @@ components:
|
|||
enum:
|
||||
- model
|
||||
- shield
|
||||
- vector_db
|
||||
- vector_store
|
||||
- dataset
|
||||
- scoring_function
|
||||
- benchmark
|
||||
|
@ -12652,7 +12321,7 @@ components:
|
|||
enum:
|
||||
- model
|
||||
- shield
|
||||
- vector_db
|
||||
- vector_store
|
||||
- dataset
|
||||
- scoring_function
|
||||
- benchmark
|
||||
|
|
|
@ -21,7 +21,7 @@ The `llamastack/distribution-meta-reference-gpu` distribution consists of the fo
|
|||
| inference | `inline::meta-reference` |
|
||||
| safety | `inline::llama-guard` |
|
||||
| scoring | `inline::basic`, `inline::llm-as-judge`, `inline::braintrust` |
|
||||
| tool_runtime | `remote::brave-search`, `remote::tavily-search`, `inline::rag-runtime`, `remote::model-context-protocol` |
|
||||
| tool_runtime | `remote::brave-search`, `remote::tavily-search`, `remote::model-context-protocol` |
|
||||
| vector_io | `inline::faiss`, `remote::chromadb`, `remote::pgvector` |
|
||||
|
||||
|
||||
|
|
|
@ -16,7 +16,7 @@ The `llamastack/distribution-nvidia` distribution consists of the following prov
|
|||
| post_training | `remote::nvidia` |
|
||||
| safety | `remote::nvidia` |
|
||||
| scoring | `inline::basic` |
|
||||
| tool_runtime | `inline::rag-runtime` |
|
||||
| tool_runtime | |
|
||||
| vector_io | `inline::faiss` |
|
||||
|
||||
|
||||
|
|
|
@ -28,7 +28,7 @@ description: |
|
|||
#### Empirical Example
|
||||
|
||||
Consider the histogram below in which 10,000 randomly generated strings were inserted
|
||||
in batches of 100 into both Faiss and sqlite-vec using `client.tool_runtime.rag_tool.insert()`.
|
||||
in batches of 100 into both Faiss and sqlite-vec.
|
||||
|
||||
```{image} ../../../../_static/providers/vector_io/write_time_comparison_sqlite-vec-faiss.png
|
||||
:alt: Comparison of SQLite-Vec and Faiss write times
|
||||
|
@ -233,7 +233,7 @@ Datasets that can fit in memory, frequent reads | Faiss | Optimized for speed, i
|
|||
#### Empirical Example
|
||||
|
||||
Consider the histogram below in which 10,000 randomly generated strings were inserted
|
||||
in batches of 100 into both Faiss and sqlite-vec using `client.tool_runtime.rag_tool.insert()`.
|
||||
in batches of 100 into both Faiss and sqlite-vec.
|
||||
|
||||
```{image} ../../../../_static/providers/vector_io/write_time_comparison_sqlite-vec-faiss.png
|
||||
:alt: Comparison of SQLite-Vec and Faiss write times
|
||||
|
|
|
@ -32,7 +32,6 @@ Commands:
|
|||
scoring_functions Manage scoring functions.
|
||||
shields Manage safety shield services.
|
||||
toolgroups Manage available tool groups.
|
||||
vector_dbs Manage vector databases.
|
||||
```
|
||||
|
||||
### `llama-stack-client configure`
|
||||
|
@ -211,53 +210,6 @@ Unregister a model from distribution endpoint
|
|||
llama-stack-client models unregister <model_id>
|
||||
```
|
||||
|
||||
## Vector DB Management
|
||||
Manage vector databases.
|
||||
|
||||
|
||||
### `llama-stack-client vector_dbs list`
|
||||
Show available vector dbs on distribution endpoint
|
||||
```bash
|
||||
llama-stack-client vector_dbs list
|
||||
```
|
||||
```
|
||||
┏━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┓
|
||||
┃ identifier ┃ provider_id ┃ provider_resource_id ┃ vector_db_type ┃ params ┃
|
||||
┡━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┩
|
||||
│ my_demo_vector_db │ faiss │ my_demo_vector_db │ │ embedding_dimension: 768 │
|
||||
│ │ │ │ │ embedding_model: nomic-embed-text-v1.5 │
|
||||
│ │ │ │ │ type: vector_db │
|
||||
│ │ │ │ │ │
|
||||
└──────────────────────────┴─────────────┴──────────────────────────┴────────────────┴───────────────────────────────────┘
|
||||
```
|
||||
|
||||
### `llama-stack-client vector_dbs register`
|
||||
Create a new vector db
|
||||
```bash
|
||||
llama-stack-client vector_dbs register <vector-db-id> [--provider-id <provider-id>] [--provider-vector-db-id <provider-vector-db-id>] [--embedding-model <embedding-model>] [--embedding-dimension <embedding-dimension>]
|
||||
```
|
||||
|
||||
|
||||
Required arguments:
|
||||
- `VECTOR_DB_ID`: Vector DB ID
|
||||
|
||||
Optional arguments:
|
||||
- `--provider-id`: Provider ID for the vector db
|
||||
- `--provider-vector-db-id`: Provider's vector db ID
|
||||
- `--embedding-model`: Embedding model to use. Default: `nomic-embed-text-v1.5`
|
||||
- `--embedding-dimension`: Dimension of embeddings. Default: 768
|
||||
|
||||
### `llama-stack-client vector_dbs unregister`
|
||||
Delete a vector db
|
||||
```bash
|
||||
llama-stack-client vector_dbs unregister <vector-db-id>
|
||||
```
|
||||
|
||||
|
||||
Required arguments:
|
||||
- `VECTOR_DB_ID`: Vector DB ID
|
||||
|
||||
|
||||
## Shield Management
|
||||
Manage safety shield services.
|
||||
### `llama-stack-client shields list`
|
||||
|
|
|
@ -196,16 +196,10 @@ def _get_endpoint_functions(
|
|||
def _get_defining_class(member_fn: str, derived_cls: type) -> type:
|
||||
"Find the class in which a member function is first defined in a class inheritance hierarchy."
|
||||
|
||||
# This import must be dynamic here
|
||||
from llama_stack.apis.tools import RAGToolRuntime, ToolRuntime
|
||||
|
||||
# iterate in reverse member resolution order to find most specific class first
|
||||
for cls in reversed(inspect.getmro(derived_cls)):
|
||||
for name, _ in inspect.getmembers(cls, inspect.isfunction):
|
||||
if name == member_fn:
|
||||
# HACK ALERT
|
||||
if cls == RAGToolRuntime:
|
||||
return ToolRuntime
|
||||
return cls
|
||||
|
||||
raise ValidationError(
|
||||
|
|
4
docs/static/deprecated-llama-stack-spec.html
vendored
4
docs/static/deprecated-llama-stack-spec.html
vendored
|
@ -5547,7 +5547,7 @@
|
|||
"enum": [
|
||||
"model",
|
||||
"shield",
|
||||
"vector_db",
|
||||
"vector_store",
|
||||
"dataset",
|
||||
"scoring_function",
|
||||
"benchmark",
|
||||
|
@ -5798,7 +5798,7 @@
|
|||
"enum": [
|
||||
"model",
|
||||
"shield",
|
||||
"vector_db",
|
||||
"vector_store",
|
||||
"dataset",
|
||||
"scoring_function",
|
||||
"benchmark",
|
||||
|
|
4
docs/static/deprecated-llama-stack-spec.yaml
vendored
4
docs/static/deprecated-llama-stack-spec.yaml
vendored
|
@ -4114,7 +4114,7 @@ components:
|
|||
enum:
|
||||
- model
|
||||
- shield
|
||||
- vector_db
|
||||
- vector_store
|
||||
- dataset
|
||||
- scoring_function
|
||||
- benchmark
|
||||
|
@ -4303,7 +4303,7 @@ components:
|
|||
enum:
|
||||
- model
|
||||
- shield
|
||||
- vector_db
|
||||
- vector_store
|
||||
- dataset
|
||||
- scoring_function
|
||||
- benchmark
|
||||
|
|
|
@ -1850,7 +1850,7 @@
|
|||
"enum": [
|
||||
"model",
|
||||
"shield",
|
||||
"vector_db",
|
||||
"vector_store",
|
||||
"dataset",
|
||||
"scoring_function",
|
||||
"benchmark",
|
||||
|
@ -3983,7 +3983,7 @@
|
|||
"enum": [
|
||||
"model",
|
||||
"shield",
|
||||
"vector_db",
|
||||
"vector_store",
|
||||
"dataset",
|
||||
"scoring_function",
|
||||
"benchmark",
|
||||
|
|
|
@ -1320,7 +1320,7 @@ components:
|
|||
enum:
|
||||
- model
|
||||
- shield
|
||||
- vector_db
|
||||
- vector_store
|
||||
- dataset
|
||||
- scoring_function
|
||||
- benchmark
|
||||
|
@ -2927,7 +2927,7 @@ components:
|
|||
enum:
|
||||
- model
|
||||
- shield
|
||||
- vector_db
|
||||
- vector_store
|
||||
- dataset
|
||||
- scoring_function
|
||||
- benchmark
|
||||
|
|
431
docs/static/llama-stack-spec.html
vendored
431
docs/static/llama-stack-spec.html
vendored
|
@ -2624,89 +2624,6 @@
|
|||
"deprecated": false
|
||||
}
|
||||
},
|
||||
"/v1/tool-runtime/rag-tool/insert": {
|
||||
"post": {
|
||||
"responses": {
|
||||
"200": {
|
||||
"description": "OK"
|
||||
},
|
||||
"400": {
|
||||
"$ref": "#/components/responses/BadRequest400"
|
||||
},
|
||||
"429": {
|
||||
"$ref": "#/components/responses/TooManyRequests429"
|
||||
},
|
||||
"500": {
|
||||
"$ref": "#/components/responses/InternalServerError500"
|
||||
},
|
||||
"default": {
|
||||
"$ref": "#/components/responses/DefaultError"
|
||||
}
|
||||
},
|
||||
"tags": [
|
||||
"ToolRuntime"
|
||||
],
|
||||
"summary": "Index documents so they can be used by the RAG system.",
|
||||
"description": "Index documents so they can be used by the RAG system.",
|
||||
"parameters": [],
|
||||
"requestBody": {
|
||||
"content": {
|
||||
"application/json": {
|
||||
"schema": {
|
||||
"$ref": "#/components/schemas/InsertRequest"
|
||||
}
|
||||
}
|
||||
},
|
||||
"required": true
|
||||
},
|
||||
"deprecated": false
|
||||
}
|
||||
},
|
||||
"/v1/tool-runtime/rag-tool/query": {
|
||||
"post": {
|
||||
"responses": {
|
||||
"200": {
|
||||
"description": "RAGQueryResult containing the retrieved content and metadata",
|
||||
"content": {
|
||||
"application/json": {
|
||||
"schema": {
|
||||
"$ref": "#/components/schemas/RAGQueryResult"
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
"400": {
|
||||
"$ref": "#/components/responses/BadRequest400"
|
||||
},
|
||||
"429": {
|
||||
"$ref": "#/components/responses/TooManyRequests429"
|
||||
},
|
||||
"500": {
|
||||
"$ref": "#/components/responses/InternalServerError500"
|
||||
},
|
||||
"default": {
|
||||
"$ref": "#/components/responses/DefaultError"
|
||||
}
|
||||
},
|
||||
"tags": [
|
||||
"ToolRuntime"
|
||||
],
|
||||
"summary": "Query the RAG system for context; typically invoked by the agent.",
|
||||
"description": "Query the RAG system for context; typically invoked by the agent.",
|
||||
"parameters": [],
|
||||
"requestBody": {
|
||||
"content": {
|
||||
"application/json": {
|
||||
"schema": {
|
||||
"$ref": "#/components/schemas/QueryRequest"
|
||||
}
|
||||
}
|
||||
},
|
||||
"required": true
|
||||
},
|
||||
"deprecated": false
|
||||
}
|
||||
},
|
||||
"/v1/toolgroups": {
|
||||
"get": {
|
||||
"responses": {
|
||||
|
@ -6800,7 +6717,7 @@
|
|||
"enum": [
|
||||
"model",
|
||||
"shield",
|
||||
"vector_db",
|
||||
"vector_store",
|
||||
"dataset",
|
||||
"scoring_function",
|
||||
"benchmark",
|
||||
|
@ -10205,7 +10122,7 @@
|
|||
"enum": [
|
||||
"model",
|
||||
"shield",
|
||||
"vector_db",
|
||||
"vector_store",
|
||||
"dataset",
|
||||
"scoring_function",
|
||||
"benchmark",
|
||||
|
@ -10687,7 +10604,7 @@
|
|||
"enum": [
|
||||
"model",
|
||||
"shield",
|
||||
"vector_db",
|
||||
"vector_store",
|
||||
"dataset",
|
||||
"scoring_function",
|
||||
"benchmark",
|
||||
|
@ -11383,346 +11300,6 @@
|
|||
"title": "ListToolDefsResponse",
|
||||
"description": "Response containing a list of tool definitions."
|
||||
},
|
||||
"RAGDocument": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"document_id": {
|
||||
"type": "string",
|
||||
"description": "The unique identifier for the document."
|
||||
},
|
||||
"content": {
|
||||
"oneOf": [
|
||||
{
|
||||
"type": "string"
|
||||
},
|
||||
{
|
||||
"$ref": "#/components/schemas/InterleavedContentItem"
|
||||
},
|
||||
{
|
||||
"type": "array",
|
||||
"items": {
|
||||
"$ref": "#/components/schemas/InterleavedContentItem"
|
||||
}
|
||||
},
|
||||
{
|
||||
"$ref": "#/components/schemas/URL"
|
||||
}
|
||||
],
|
||||
"description": "The content of the document."
|
||||
},
|
||||
"mime_type": {
|
||||
"type": "string",
|
||||
"description": "The MIME type of the document."
|
||||
},
|
||||
"metadata": {
|
||||
"type": "object",
|
||||
"additionalProperties": {
|
||||
"oneOf": [
|
||||
{
|
||||
"type": "null"
|
||||
},
|
||||
{
|
||||
"type": "boolean"
|
||||
},
|
||||
{
|
||||
"type": "number"
|
||||
},
|
||||
{
|
||||
"type": "string"
|
||||
},
|
||||
{
|
||||
"type": "array"
|
||||
},
|
||||
{
|
||||
"type": "object"
|
||||
}
|
||||
]
|
||||
},
|
||||
"description": "Additional metadata for the document."
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
"required": [
|
||||
"document_id",
|
||||
"content",
|
||||
"metadata"
|
||||
],
|
||||
"title": "RAGDocument",
|
||||
"description": "A document to be used for document ingestion in the RAG Tool."
|
||||
},
|
||||
"InsertRequest": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"documents": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"$ref": "#/components/schemas/RAGDocument"
|
||||
},
|
||||
"description": "List of documents to index in the RAG system"
|
||||
},
|
||||
"vector_db_id": {
|
||||
"type": "string",
|
||||
"description": "ID of the vector database to store the document embeddings"
|
||||
},
|
||||
"chunk_size_in_tokens": {
|
||||
"type": "integer",
|
||||
"description": "(Optional) Size in tokens for document chunking during indexing"
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
"required": [
|
||||
"documents",
|
||||
"vector_db_id",
|
||||
"chunk_size_in_tokens"
|
||||
],
|
||||
"title": "InsertRequest"
|
||||
},
|
||||
"DefaultRAGQueryGeneratorConfig": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"type": {
|
||||
"type": "string",
|
||||
"const": "default",
|
||||
"default": "default",
|
||||
"description": "Type of query generator, always 'default'"
|
||||
},
|
||||
"separator": {
|
||||
"type": "string",
|
||||
"default": " ",
|
||||
"description": "String separator used to join query terms"
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
"required": [
|
||||
"type",
|
||||
"separator"
|
||||
],
|
||||
"title": "DefaultRAGQueryGeneratorConfig",
|
||||
"description": "Configuration for the default RAG query generator."
|
||||
},
|
||||
"LLMRAGQueryGeneratorConfig": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"type": {
|
||||
"type": "string",
|
||||
"const": "llm",
|
||||
"default": "llm",
|
||||
"description": "Type of query generator, always 'llm'"
|
||||
},
|
||||
"model": {
|
||||
"type": "string",
|
||||
"description": "Name of the language model to use for query generation"
|
||||
},
|
||||
"template": {
|
||||
"type": "string",
|
||||
"description": "Template string for formatting the query generation prompt"
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
"required": [
|
||||
"type",
|
||||
"model",
|
||||
"template"
|
||||
],
|
||||
"title": "LLMRAGQueryGeneratorConfig",
|
||||
"description": "Configuration for the LLM-based RAG query generator."
|
||||
},
|
||||
"RAGQueryConfig": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"query_generator_config": {
|
||||
"oneOf": [
|
||||
{
|
||||
"$ref": "#/components/schemas/DefaultRAGQueryGeneratorConfig"
|
||||
},
|
||||
{
|
||||
"$ref": "#/components/schemas/LLMRAGQueryGeneratorConfig"
|
||||
}
|
||||
],
|
||||
"discriminator": {
|
||||
"propertyName": "type",
|
||||
"mapping": {
|
||||
"default": "#/components/schemas/DefaultRAGQueryGeneratorConfig",
|
||||
"llm": "#/components/schemas/LLMRAGQueryGeneratorConfig"
|
||||
}
|
||||
},
|
||||
"description": "Configuration for the query generator."
|
||||
},
|
||||
"max_tokens_in_context": {
|
||||
"type": "integer",
|
||||
"default": 4096,
|
||||
"description": "Maximum number of tokens in the context."
|
||||
},
|
||||
"max_chunks": {
|
||||
"type": "integer",
|
||||
"default": 5,
|
||||
"description": "Maximum number of chunks to retrieve."
|
||||
},
|
||||
"chunk_template": {
|
||||
"type": "string",
|
||||
"default": "Result {index}\nContent: {chunk.content}\nMetadata: {metadata}\n",
|
||||
"description": "Template for formatting each retrieved chunk in the context. Available placeholders: {index} (1-based chunk ordinal), {chunk.content} (chunk content string), {metadata} (chunk metadata dict). Default: \"Result {index}\\nContent: {chunk.content}\\nMetadata: {metadata}\\n\""
|
||||
},
|
||||
"mode": {
|
||||
"$ref": "#/components/schemas/RAGSearchMode",
|
||||
"default": "vector",
|
||||
"description": "Search mode for retrieval—either \"vector\", \"keyword\", or \"hybrid\". Default \"vector\"."
|
||||
},
|
||||
"ranker": {
|
||||
"$ref": "#/components/schemas/Ranker",
|
||||
"description": "Configuration for the ranker to use in hybrid search. Defaults to RRF ranker."
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
"required": [
|
||||
"query_generator_config",
|
||||
"max_tokens_in_context",
|
||||
"max_chunks",
|
||||
"chunk_template"
|
||||
],
|
||||
"title": "RAGQueryConfig",
|
||||
"description": "Configuration for the RAG query generation."
|
||||
},
|
||||
"RAGSearchMode": {
|
||||
"type": "string",
|
||||
"enum": [
|
||||
"vector",
|
||||
"keyword",
|
||||
"hybrid"
|
||||
],
|
||||
"title": "RAGSearchMode",
|
||||
"description": "Search modes for RAG query retrieval: - VECTOR: Uses vector similarity search for semantic matching - KEYWORD: Uses keyword-based search for exact matching - HYBRID: Combines both vector and keyword search for better results"
|
||||
},
|
||||
"RRFRanker": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"type": {
|
||||
"type": "string",
|
||||
"const": "rrf",
|
||||
"default": "rrf",
|
||||
"description": "The type of ranker, always \"rrf\""
|
||||
},
|
||||
"impact_factor": {
|
||||
"type": "number",
|
||||
"default": 60.0,
|
||||
"description": "The impact factor for RRF scoring. Higher values give more weight to higher-ranked results. Must be greater than 0"
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
"required": [
|
||||
"type",
|
||||
"impact_factor"
|
||||
],
|
||||
"title": "RRFRanker",
|
||||
"description": "Reciprocal Rank Fusion (RRF) ranker configuration."
|
||||
},
|
||||
"Ranker": {
|
||||
"oneOf": [
|
||||
{
|
||||
"$ref": "#/components/schemas/RRFRanker"
|
||||
},
|
||||
{
|
||||
"$ref": "#/components/schemas/WeightedRanker"
|
||||
}
|
||||
],
|
||||
"discriminator": {
|
||||
"propertyName": "type",
|
||||
"mapping": {
|
||||
"rrf": "#/components/schemas/RRFRanker",
|
||||
"weighted": "#/components/schemas/WeightedRanker"
|
||||
}
|
||||
}
|
||||
},
|
||||
"WeightedRanker": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"type": {
|
||||
"type": "string",
|
||||
"const": "weighted",
|
||||
"default": "weighted",
|
||||
"description": "The type of ranker, always \"weighted\""
|
||||
},
|
||||
"alpha": {
|
||||
"type": "number",
|
||||
"default": 0.5,
|
||||
"description": "Weight factor between 0 and 1. 0 means only use keyword scores, 1 means only use vector scores, values in between blend both scores."
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
"required": [
|
||||
"type",
|
||||
"alpha"
|
||||
],
|
||||
"title": "WeightedRanker",
|
||||
"description": "Weighted ranker configuration that combines vector and keyword scores."
|
||||
},
|
||||
"QueryRequest": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"content": {
|
||||
"$ref": "#/components/schemas/InterleavedContent",
|
||||
"description": "The query content to search for in the indexed documents"
|
||||
},
|
||||
"vector_db_ids": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "string"
|
||||
},
|
||||
"description": "List of vector database IDs to search within"
|
||||
},
|
||||
"query_config": {
|
||||
"$ref": "#/components/schemas/RAGQueryConfig",
|
||||
"description": "(Optional) Configuration parameters for the query operation"
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
"required": [
|
||||
"content",
|
||||
"vector_db_ids"
|
||||
],
|
||||
"title": "QueryRequest"
|
||||
},
|
||||
"RAGQueryResult": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"content": {
|
||||
"$ref": "#/components/schemas/InterleavedContent",
|
||||
"description": "(Optional) The retrieved content from the query"
|
||||
},
|
||||
"metadata": {
|
||||
"type": "object",
|
||||
"additionalProperties": {
|
||||
"oneOf": [
|
||||
{
|
||||
"type": "null"
|
||||
},
|
||||
{
|
||||
"type": "boolean"
|
||||
},
|
||||
{
|
||||
"type": "number"
|
||||
},
|
||||
{
|
||||
"type": "string"
|
||||
},
|
||||
{
|
||||
"type": "array"
|
||||
},
|
||||
{
|
||||
"type": "object"
|
||||
}
|
||||
]
|
||||
},
|
||||
"description": "Additional metadata about the query result"
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
"required": [
|
||||
"metadata"
|
||||
],
|
||||
"title": "RAGQueryResult",
|
||||
"description": "Result of a RAG query containing retrieved content and metadata."
|
||||
},
|
||||
"ToolGroup": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
|
@ -11740,7 +11317,7 @@
|
|||
"enum": [
|
||||
"model",
|
||||
"shield",
|
||||
"vector_db",
|
||||
"vector_store",
|
||||
"dataset",
|
||||
"scoring_function",
|
||||
"benchmark",
|
||||
|
|
339
docs/static/llama-stack-spec.yaml
vendored
339
docs/static/llama-stack-spec.yaml
vendored
|
@ -2036,69 +2036,6 @@ paths:
|
|||
schema:
|
||||
$ref: '#/components/schemas/URL'
|
||||
deprecated: false
|
||||
/v1/tool-runtime/rag-tool/insert:
|
||||
post:
|
||||
responses:
|
||||
'200':
|
||||
description: OK
|
||||
'400':
|
||||
$ref: '#/components/responses/BadRequest400'
|
||||
'429':
|
||||
$ref: >-
|
||||
#/components/responses/TooManyRequests429
|
||||
'500':
|
||||
$ref: >-
|
||||
#/components/responses/InternalServerError500
|
||||
default:
|
||||
$ref: '#/components/responses/DefaultError'
|
||||
tags:
|
||||
- ToolRuntime
|
||||
summary: >-
|
||||
Index documents so they can be used by the RAG system.
|
||||
description: >-
|
||||
Index documents so they can be used by the RAG system.
|
||||
parameters: []
|
||||
requestBody:
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: '#/components/schemas/InsertRequest'
|
||||
required: true
|
||||
deprecated: false
|
||||
/v1/tool-runtime/rag-tool/query:
|
||||
post:
|
||||
responses:
|
||||
'200':
|
||||
description: >-
|
||||
RAGQueryResult containing the retrieved content and metadata
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: '#/components/schemas/RAGQueryResult'
|
||||
'400':
|
||||
$ref: '#/components/responses/BadRequest400'
|
||||
'429':
|
||||
$ref: >-
|
||||
#/components/responses/TooManyRequests429
|
||||
'500':
|
||||
$ref: >-
|
||||
#/components/responses/InternalServerError500
|
||||
default:
|
||||
$ref: '#/components/responses/DefaultError'
|
||||
tags:
|
||||
- ToolRuntime
|
||||
summary: >-
|
||||
Query the RAG system for context; typically invoked by the agent.
|
||||
description: >-
|
||||
Query the RAG system for context; typically invoked by the agent.
|
||||
parameters: []
|
||||
requestBody:
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: '#/components/schemas/QueryRequest'
|
||||
required: true
|
||||
deprecated: false
|
||||
/v1/toolgroups:
|
||||
get:
|
||||
responses:
|
||||
|
@ -5227,7 +5164,7 @@ components:
|
|||
enum:
|
||||
- model
|
||||
- shield
|
||||
- vector_db
|
||||
- vector_store
|
||||
- dataset
|
||||
- scoring_function
|
||||
- benchmark
|
||||
|
@ -7919,7 +7856,7 @@ components:
|
|||
enum:
|
||||
- model
|
||||
- shield
|
||||
- vector_db
|
||||
- vector_store
|
||||
- dataset
|
||||
- scoring_function
|
||||
- benchmark
|
||||
|
@ -8227,7 +8164,7 @@ components:
|
|||
enum:
|
||||
- model
|
||||
- shield
|
||||
- vector_db
|
||||
- vector_store
|
||||
- dataset
|
||||
- scoring_function
|
||||
- benchmark
|
||||
|
@ -8708,274 +8645,6 @@ components:
|
|||
title: ListToolDefsResponse
|
||||
description: >-
|
||||
Response containing a list of tool definitions.
|
||||
RAGDocument:
|
||||
type: object
|
||||
properties:
|
||||
document_id:
|
||||
type: string
|
||||
description: The unique identifier for the document.
|
||||
content:
|
||||
oneOf:
|
||||
- type: string
|
||||
- $ref: '#/components/schemas/InterleavedContentItem'
|
||||
- type: array
|
||||
items:
|
||||
$ref: '#/components/schemas/InterleavedContentItem'
|
||||
- $ref: '#/components/schemas/URL'
|
||||
description: The content of the document.
|
||||
mime_type:
|
||||
type: string
|
||||
description: The MIME type of the document.
|
||||
metadata:
|
||||
type: object
|
||||
additionalProperties:
|
||||
oneOf:
|
||||
- type: 'null'
|
||||
- type: boolean
|
||||
- type: number
|
||||
- type: string
|
||||
- type: array
|
||||
- type: object
|
||||
description: Additional metadata for the document.
|
||||
additionalProperties: false
|
||||
required:
|
||||
- document_id
|
||||
- content
|
||||
- metadata
|
||||
title: RAGDocument
|
||||
description: >-
|
||||
A document to be used for document ingestion in the RAG Tool.
|
||||
InsertRequest:
|
||||
type: object
|
||||
properties:
|
||||
documents:
|
||||
type: array
|
||||
items:
|
||||
$ref: '#/components/schemas/RAGDocument'
|
||||
description: >-
|
||||
List of documents to index in the RAG system
|
||||
vector_db_id:
|
||||
type: string
|
||||
description: >-
|
||||
ID of the vector database to store the document embeddings
|
||||
chunk_size_in_tokens:
|
||||
type: integer
|
||||
description: >-
|
||||
(Optional) Size in tokens for document chunking during indexing
|
||||
additionalProperties: false
|
||||
required:
|
||||
- documents
|
||||
- vector_db_id
|
||||
- chunk_size_in_tokens
|
||||
title: InsertRequest
|
||||
DefaultRAGQueryGeneratorConfig:
|
||||
type: object
|
||||
properties:
|
||||
type:
|
||||
type: string
|
||||
const: default
|
||||
default: default
|
||||
description: >-
|
||||
Type of query generator, always 'default'
|
||||
separator:
|
||||
type: string
|
||||
default: ' '
|
||||
description: >-
|
||||
String separator used to join query terms
|
||||
additionalProperties: false
|
||||
required:
|
||||
- type
|
||||
- separator
|
||||
title: DefaultRAGQueryGeneratorConfig
|
||||
description: >-
|
||||
Configuration for the default RAG query generator.
|
||||
LLMRAGQueryGeneratorConfig:
|
||||
type: object
|
||||
properties:
|
||||
type:
|
||||
type: string
|
||||
const: llm
|
||||
default: llm
|
||||
description: Type of query generator, always 'llm'
|
||||
model:
|
||||
type: string
|
||||
description: >-
|
||||
Name of the language model to use for query generation
|
||||
template:
|
||||
type: string
|
||||
description: >-
|
||||
Template string for formatting the query generation prompt
|
||||
additionalProperties: false
|
||||
required:
|
||||
- type
|
||||
- model
|
||||
- template
|
||||
title: LLMRAGQueryGeneratorConfig
|
||||
description: >-
|
||||
Configuration for the LLM-based RAG query generator.
|
||||
RAGQueryConfig:
|
||||
type: object
|
||||
properties:
|
||||
query_generator_config:
|
||||
oneOf:
|
||||
- $ref: '#/components/schemas/DefaultRAGQueryGeneratorConfig'
|
||||
- $ref: '#/components/schemas/LLMRAGQueryGeneratorConfig'
|
||||
discriminator:
|
||||
propertyName: type
|
||||
mapping:
|
||||
default: '#/components/schemas/DefaultRAGQueryGeneratorConfig'
|
||||
llm: '#/components/schemas/LLMRAGQueryGeneratorConfig'
|
||||
description: Configuration for the query generator.
|
||||
max_tokens_in_context:
|
||||
type: integer
|
||||
default: 4096
|
||||
description: Maximum number of tokens in the context.
|
||||
max_chunks:
|
||||
type: integer
|
||||
default: 5
|
||||
description: Maximum number of chunks to retrieve.
|
||||
chunk_template:
|
||||
type: string
|
||||
default: >
|
||||
Result {index}
|
||||
|
||||
Content: {chunk.content}
|
||||
|
||||
Metadata: {metadata}
|
||||
description: >-
|
||||
Template for formatting each retrieved chunk in the context. Available
|
||||
placeholders: {index} (1-based chunk ordinal), {chunk.content} (chunk
|
||||
content string), {metadata} (chunk metadata dict). Default: "Result {index}\nContent:
|
||||
{chunk.content}\nMetadata: {metadata}\n"
|
||||
mode:
|
||||
$ref: '#/components/schemas/RAGSearchMode'
|
||||
default: vector
|
||||
description: >-
|
||||
Search mode for retrieval—either "vector", "keyword", or "hybrid". Default
|
||||
"vector".
|
||||
ranker:
|
||||
$ref: '#/components/schemas/Ranker'
|
||||
description: >-
|
||||
Configuration for the ranker to use in hybrid search. Defaults to RRF
|
||||
ranker.
|
||||
additionalProperties: false
|
||||
required:
|
||||
- query_generator_config
|
||||
- max_tokens_in_context
|
||||
- max_chunks
|
||||
- chunk_template
|
||||
title: RAGQueryConfig
|
||||
description: >-
|
||||
Configuration for the RAG query generation.
|
||||
RAGSearchMode:
|
||||
type: string
|
||||
enum:
|
||||
- vector
|
||||
- keyword
|
||||
- hybrid
|
||||
title: RAGSearchMode
|
||||
description: >-
|
||||
Search modes for RAG query retrieval: - VECTOR: Uses vector similarity search
|
||||
for semantic matching - KEYWORD: Uses keyword-based search for exact matching
|
||||
- HYBRID: Combines both vector and keyword search for better results
|
||||
RRFRanker:
|
||||
type: object
|
||||
properties:
|
||||
type:
|
||||
type: string
|
||||
const: rrf
|
||||
default: rrf
|
||||
description: The type of ranker, always "rrf"
|
||||
impact_factor:
|
||||
type: number
|
||||
default: 60.0
|
||||
description: >-
|
||||
The impact factor for RRF scoring. Higher values give more weight to higher-ranked
|
||||
results. Must be greater than 0
|
||||
additionalProperties: false
|
||||
required:
|
||||
- type
|
||||
- impact_factor
|
||||
title: RRFRanker
|
||||
description: >-
|
||||
Reciprocal Rank Fusion (RRF) ranker configuration.
|
||||
Ranker:
|
||||
oneOf:
|
||||
- $ref: '#/components/schemas/RRFRanker'
|
||||
- $ref: '#/components/schemas/WeightedRanker'
|
||||
discriminator:
|
||||
propertyName: type
|
||||
mapping:
|
||||
rrf: '#/components/schemas/RRFRanker'
|
||||
weighted: '#/components/schemas/WeightedRanker'
|
||||
WeightedRanker:
|
||||
type: object
|
||||
properties:
|
||||
type:
|
||||
type: string
|
||||
const: weighted
|
||||
default: weighted
|
||||
description: The type of ranker, always "weighted"
|
||||
alpha:
|
||||
type: number
|
||||
default: 0.5
|
||||
description: >-
|
||||
Weight factor between 0 and 1. 0 means only use keyword scores, 1 means
|
||||
only use vector scores, values in between blend both scores.
|
||||
additionalProperties: false
|
||||
required:
|
||||
- type
|
||||
- alpha
|
||||
title: WeightedRanker
|
||||
description: >-
|
||||
Weighted ranker configuration that combines vector and keyword scores.
|
||||
QueryRequest:
|
||||
type: object
|
||||
properties:
|
||||
content:
|
||||
$ref: '#/components/schemas/InterleavedContent'
|
||||
description: >-
|
||||
The query content to search for in the indexed documents
|
||||
vector_db_ids:
|
||||
type: array
|
||||
items:
|
||||
type: string
|
||||
description: >-
|
||||
List of vector database IDs to search within
|
||||
query_config:
|
||||
$ref: '#/components/schemas/RAGQueryConfig'
|
||||
description: >-
|
||||
(Optional) Configuration parameters for the query operation
|
||||
additionalProperties: false
|
||||
required:
|
||||
- content
|
||||
- vector_db_ids
|
||||
title: QueryRequest
|
||||
RAGQueryResult:
|
||||
type: object
|
||||
properties:
|
||||
content:
|
||||
$ref: '#/components/schemas/InterleavedContent'
|
||||
description: >-
|
||||
(Optional) The retrieved content from the query
|
||||
metadata:
|
||||
type: object
|
||||
additionalProperties:
|
||||
oneOf:
|
||||
- type: 'null'
|
||||
- type: boolean
|
||||
- type: number
|
||||
- type: string
|
||||
- type: array
|
||||
- type: object
|
||||
description: >-
|
||||
Additional metadata about the query result
|
||||
additionalProperties: false
|
||||
required:
|
||||
- metadata
|
||||
title: RAGQueryResult
|
||||
description: >-
|
||||
Result of a RAG query containing retrieved content and metadata.
|
||||
ToolGroup:
|
||||
type: object
|
||||
properties:
|
||||
|
@ -8990,7 +8659,7 @@ components:
|
|||
enum:
|
||||
- model
|
||||
- shield
|
||||
- vector_db
|
||||
- vector_store
|
||||
- dataset
|
||||
- scoring_function
|
||||
- benchmark
|
||||
|
|
435
docs/static/stainless-llama-stack-spec.html
vendored
435
docs/static/stainless-llama-stack-spec.html
vendored
|
@ -2624,89 +2624,6 @@
|
|||
"deprecated": false
|
||||
}
|
||||
},
|
||||
"/v1/tool-runtime/rag-tool/insert": {
|
||||
"post": {
|
||||
"responses": {
|
||||
"200": {
|
||||
"description": "OK"
|
||||
},
|
||||
"400": {
|
||||
"$ref": "#/components/responses/BadRequest400"
|
||||
},
|
||||
"429": {
|
||||
"$ref": "#/components/responses/TooManyRequests429"
|
||||
},
|
||||
"500": {
|
||||
"$ref": "#/components/responses/InternalServerError500"
|
||||
},
|
||||
"default": {
|
||||
"$ref": "#/components/responses/DefaultError"
|
||||
}
|
||||
},
|
||||
"tags": [
|
||||
"ToolRuntime"
|
||||
],
|
||||
"summary": "Index documents so they can be used by the RAG system.",
|
||||
"description": "Index documents so they can be used by the RAG system.",
|
||||
"parameters": [],
|
||||
"requestBody": {
|
||||
"content": {
|
||||
"application/json": {
|
||||
"schema": {
|
||||
"$ref": "#/components/schemas/InsertRequest"
|
||||
}
|
||||
}
|
||||
},
|
||||
"required": true
|
||||
},
|
||||
"deprecated": false
|
||||
}
|
||||
},
|
||||
"/v1/tool-runtime/rag-tool/query": {
|
||||
"post": {
|
||||
"responses": {
|
||||
"200": {
|
||||
"description": "RAGQueryResult containing the retrieved content and metadata",
|
||||
"content": {
|
||||
"application/json": {
|
||||
"schema": {
|
||||
"$ref": "#/components/schemas/RAGQueryResult"
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
"400": {
|
||||
"$ref": "#/components/responses/BadRequest400"
|
||||
},
|
||||
"429": {
|
||||
"$ref": "#/components/responses/TooManyRequests429"
|
||||
},
|
||||
"500": {
|
||||
"$ref": "#/components/responses/InternalServerError500"
|
||||
},
|
||||
"default": {
|
||||
"$ref": "#/components/responses/DefaultError"
|
||||
}
|
||||
},
|
||||
"tags": [
|
||||
"ToolRuntime"
|
||||
],
|
||||
"summary": "Query the RAG system for context; typically invoked by the agent.",
|
||||
"description": "Query the RAG system for context; typically invoked by the agent.",
|
||||
"parameters": [],
|
||||
"requestBody": {
|
||||
"content": {
|
||||
"application/json": {
|
||||
"schema": {
|
||||
"$ref": "#/components/schemas/QueryRequest"
|
||||
}
|
||||
}
|
||||
},
|
||||
"required": true
|
||||
},
|
||||
"deprecated": false
|
||||
}
|
||||
},
|
||||
"/v1/toolgroups": {
|
||||
"get": {
|
||||
"responses": {
|
||||
|
@ -8472,7 +8389,7 @@
|
|||
"enum": [
|
||||
"model",
|
||||
"shield",
|
||||
"vector_db",
|
||||
"vector_store",
|
||||
"dataset",
|
||||
"scoring_function",
|
||||
"benchmark",
|
||||
|
@ -11877,7 +11794,7 @@
|
|||
"enum": [
|
||||
"model",
|
||||
"shield",
|
||||
"vector_db",
|
||||
"vector_store",
|
||||
"dataset",
|
||||
"scoring_function",
|
||||
"benchmark",
|
||||
|
@ -12359,7 +12276,7 @@
|
|||
"enum": [
|
||||
"model",
|
||||
"shield",
|
||||
"vector_db",
|
||||
"vector_store",
|
||||
"dataset",
|
||||
"scoring_function",
|
||||
"benchmark",
|
||||
|
@ -13055,346 +12972,6 @@
|
|||
"title": "ListToolDefsResponse",
|
||||
"description": "Response containing a list of tool definitions."
|
||||
},
|
||||
"RAGDocument": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"document_id": {
|
||||
"type": "string",
|
||||
"description": "The unique identifier for the document."
|
||||
},
|
||||
"content": {
|
||||
"oneOf": [
|
||||
{
|
||||
"type": "string"
|
||||
},
|
||||
{
|
||||
"$ref": "#/components/schemas/InterleavedContentItem"
|
||||
},
|
||||
{
|
||||
"type": "array",
|
||||
"items": {
|
||||
"$ref": "#/components/schemas/InterleavedContentItem"
|
||||
}
|
||||
},
|
||||
{
|
||||
"$ref": "#/components/schemas/URL"
|
||||
}
|
||||
],
|
||||
"description": "The content of the document."
|
||||
},
|
||||
"mime_type": {
|
||||
"type": "string",
|
||||
"description": "The MIME type of the document."
|
||||
},
|
||||
"metadata": {
|
||||
"type": "object",
|
||||
"additionalProperties": {
|
||||
"oneOf": [
|
||||
{
|
||||
"type": "null"
|
||||
},
|
||||
{
|
||||
"type": "boolean"
|
||||
},
|
||||
{
|
||||
"type": "number"
|
||||
},
|
||||
{
|
||||
"type": "string"
|
||||
},
|
||||
{
|
||||
"type": "array"
|
||||
},
|
||||
{
|
||||
"type": "object"
|
||||
}
|
||||
]
|
||||
},
|
||||
"description": "Additional metadata for the document."
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
"required": [
|
||||
"document_id",
|
||||
"content",
|
||||
"metadata"
|
||||
],
|
||||
"title": "RAGDocument",
|
||||
"description": "A document to be used for document ingestion in the RAG Tool."
|
||||
},
|
||||
"InsertRequest": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"documents": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"$ref": "#/components/schemas/RAGDocument"
|
||||
},
|
||||
"description": "List of documents to index in the RAG system"
|
||||
},
|
||||
"vector_db_id": {
|
||||
"type": "string",
|
||||
"description": "ID of the vector database to store the document embeddings"
|
||||
},
|
||||
"chunk_size_in_tokens": {
|
||||
"type": "integer",
|
||||
"description": "(Optional) Size in tokens for document chunking during indexing"
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
"required": [
|
||||
"documents",
|
||||
"vector_db_id",
|
||||
"chunk_size_in_tokens"
|
||||
],
|
||||
"title": "InsertRequest"
|
||||
},
|
||||
"DefaultRAGQueryGeneratorConfig": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"type": {
|
||||
"type": "string",
|
||||
"const": "default",
|
||||
"default": "default",
|
||||
"description": "Type of query generator, always 'default'"
|
||||
},
|
||||
"separator": {
|
||||
"type": "string",
|
||||
"default": " ",
|
||||
"description": "String separator used to join query terms"
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
"required": [
|
||||
"type",
|
||||
"separator"
|
||||
],
|
||||
"title": "DefaultRAGQueryGeneratorConfig",
|
||||
"description": "Configuration for the default RAG query generator."
|
||||
},
|
||||
"LLMRAGQueryGeneratorConfig": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"type": {
|
||||
"type": "string",
|
||||
"const": "llm",
|
||||
"default": "llm",
|
||||
"description": "Type of query generator, always 'llm'"
|
||||
},
|
||||
"model": {
|
||||
"type": "string",
|
||||
"description": "Name of the language model to use for query generation"
|
||||
},
|
||||
"template": {
|
||||
"type": "string",
|
||||
"description": "Template string for formatting the query generation prompt"
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
"required": [
|
||||
"type",
|
||||
"model",
|
||||
"template"
|
||||
],
|
||||
"title": "LLMRAGQueryGeneratorConfig",
|
||||
"description": "Configuration for the LLM-based RAG query generator."
|
||||
},
|
||||
"RAGQueryConfig": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"query_generator_config": {
|
||||
"oneOf": [
|
||||
{
|
||||
"$ref": "#/components/schemas/DefaultRAGQueryGeneratorConfig"
|
||||
},
|
||||
{
|
||||
"$ref": "#/components/schemas/LLMRAGQueryGeneratorConfig"
|
||||
}
|
||||
],
|
||||
"discriminator": {
|
||||
"propertyName": "type",
|
||||
"mapping": {
|
||||
"default": "#/components/schemas/DefaultRAGQueryGeneratorConfig",
|
||||
"llm": "#/components/schemas/LLMRAGQueryGeneratorConfig"
|
||||
}
|
||||
},
|
||||
"description": "Configuration for the query generator."
|
||||
},
|
||||
"max_tokens_in_context": {
|
||||
"type": "integer",
|
||||
"default": 4096,
|
||||
"description": "Maximum number of tokens in the context."
|
||||
},
|
||||
"max_chunks": {
|
||||
"type": "integer",
|
||||
"default": 5,
|
||||
"description": "Maximum number of chunks to retrieve."
|
||||
},
|
||||
"chunk_template": {
|
||||
"type": "string",
|
||||
"default": "Result {index}\nContent: {chunk.content}\nMetadata: {metadata}\n",
|
||||
"description": "Template for formatting each retrieved chunk in the context. Available placeholders: {index} (1-based chunk ordinal), {chunk.content} (chunk content string), {metadata} (chunk metadata dict). Default: \"Result {index}\\nContent: {chunk.content}\\nMetadata: {metadata}\\n\""
|
||||
},
|
||||
"mode": {
|
||||
"$ref": "#/components/schemas/RAGSearchMode",
|
||||
"default": "vector",
|
||||
"description": "Search mode for retrieval—either \"vector\", \"keyword\", or \"hybrid\". Default \"vector\"."
|
||||
},
|
||||
"ranker": {
|
||||
"$ref": "#/components/schemas/Ranker",
|
||||
"description": "Configuration for the ranker to use in hybrid search. Defaults to RRF ranker."
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
"required": [
|
||||
"query_generator_config",
|
||||
"max_tokens_in_context",
|
||||
"max_chunks",
|
||||
"chunk_template"
|
||||
],
|
||||
"title": "RAGQueryConfig",
|
||||
"description": "Configuration for the RAG query generation."
|
||||
},
|
||||
"RAGSearchMode": {
|
||||
"type": "string",
|
||||
"enum": [
|
||||
"vector",
|
||||
"keyword",
|
||||
"hybrid"
|
||||
],
|
||||
"title": "RAGSearchMode",
|
||||
"description": "Search modes for RAG query retrieval: - VECTOR: Uses vector similarity search for semantic matching - KEYWORD: Uses keyword-based search for exact matching - HYBRID: Combines both vector and keyword search for better results"
|
||||
},
|
||||
"RRFRanker": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"type": {
|
||||
"type": "string",
|
||||
"const": "rrf",
|
||||
"default": "rrf",
|
||||
"description": "The type of ranker, always \"rrf\""
|
||||
},
|
||||
"impact_factor": {
|
||||
"type": "number",
|
||||
"default": 60.0,
|
||||
"description": "The impact factor for RRF scoring. Higher values give more weight to higher-ranked results. Must be greater than 0"
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
"required": [
|
||||
"type",
|
||||
"impact_factor"
|
||||
],
|
||||
"title": "RRFRanker",
|
||||
"description": "Reciprocal Rank Fusion (RRF) ranker configuration."
|
||||
},
|
||||
"Ranker": {
|
||||
"oneOf": [
|
||||
{
|
||||
"$ref": "#/components/schemas/RRFRanker"
|
||||
},
|
||||
{
|
||||
"$ref": "#/components/schemas/WeightedRanker"
|
||||
}
|
||||
],
|
||||
"discriminator": {
|
||||
"propertyName": "type",
|
||||
"mapping": {
|
||||
"rrf": "#/components/schemas/RRFRanker",
|
||||
"weighted": "#/components/schemas/WeightedRanker"
|
||||
}
|
||||
}
|
||||
},
|
||||
"WeightedRanker": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"type": {
|
||||
"type": "string",
|
||||
"const": "weighted",
|
||||
"default": "weighted",
|
||||
"description": "The type of ranker, always \"weighted\""
|
||||
},
|
||||
"alpha": {
|
||||
"type": "number",
|
||||
"default": 0.5,
|
||||
"description": "Weight factor between 0 and 1. 0 means only use keyword scores, 1 means only use vector scores, values in between blend both scores."
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
"required": [
|
||||
"type",
|
||||
"alpha"
|
||||
],
|
||||
"title": "WeightedRanker",
|
||||
"description": "Weighted ranker configuration that combines vector and keyword scores."
|
||||
},
|
||||
"QueryRequest": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"content": {
|
||||
"$ref": "#/components/schemas/InterleavedContent",
|
||||
"description": "The query content to search for in the indexed documents"
|
||||
},
|
||||
"vector_db_ids": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "string"
|
||||
},
|
||||
"description": "List of vector database IDs to search within"
|
||||
},
|
||||
"query_config": {
|
||||
"$ref": "#/components/schemas/RAGQueryConfig",
|
||||
"description": "(Optional) Configuration parameters for the query operation"
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
"required": [
|
||||
"content",
|
||||
"vector_db_ids"
|
||||
],
|
||||
"title": "QueryRequest"
|
||||
},
|
||||
"RAGQueryResult": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"content": {
|
||||
"$ref": "#/components/schemas/InterleavedContent",
|
||||
"description": "(Optional) The retrieved content from the query"
|
||||
},
|
||||
"metadata": {
|
||||
"type": "object",
|
||||
"additionalProperties": {
|
||||
"oneOf": [
|
||||
{
|
||||
"type": "null"
|
||||
},
|
||||
{
|
||||
"type": "boolean"
|
||||
},
|
||||
{
|
||||
"type": "number"
|
||||
},
|
||||
{
|
||||
"type": "string"
|
||||
},
|
||||
{
|
||||
"type": "array"
|
||||
},
|
||||
{
|
||||
"type": "object"
|
||||
}
|
||||
]
|
||||
},
|
||||
"description": "Additional metadata about the query result"
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
"required": [
|
||||
"metadata"
|
||||
],
|
||||
"title": "RAGQueryResult",
|
||||
"description": "Result of a RAG query containing retrieved content and metadata."
|
||||
},
|
||||
"ToolGroup": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
|
@ -13412,7 +12989,7 @@
|
|||
"enum": [
|
||||
"model",
|
||||
"shield",
|
||||
"vector_db",
|
||||
"vector_store",
|
||||
"dataset",
|
||||
"scoring_function",
|
||||
"benchmark",
|
||||
|
@ -14959,7 +14536,7 @@
|
|||
"enum": [
|
||||
"model",
|
||||
"shield",
|
||||
"vector_db",
|
||||
"vector_store",
|
||||
"dataset",
|
||||
"scoring_function",
|
||||
"benchmark",
|
||||
|
@ -16704,7 +16281,7 @@
|
|||
"enum": [
|
||||
"model",
|
||||
"shield",
|
||||
"vector_db",
|
||||
"vector_store",
|
||||
"dataset",
|
||||
"scoring_function",
|
||||
"benchmark",
|
||||
|
|
343
docs/static/stainless-llama-stack-spec.yaml
vendored
343
docs/static/stainless-llama-stack-spec.yaml
vendored
|
@ -2039,69 +2039,6 @@ paths:
|
|||
schema:
|
||||
$ref: '#/components/schemas/URL'
|
||||
deprecated: false
|
||||
/v1/tool-runtime/rag-tool/insert:
|
||||
post:
|
||||
responses:
|
||||
'200':
|
||||
description: OK
|
||||
'400':
|
||||
$ref: '#/components/responses/BadRequest400'
|
||||
'429':
|
||||
$ref: >-
|
||||
#/components/responses/TooManyRequests429
|
||||
'500':
|
||||
$ref: >-
|
||||
#/components/responses/InternalServerError500
|
||||
default:
|
||||
$ref: '#/components/responses/DefaultError'
|
||||
tags:
|
||||
- ToolRuntime
|
||||
summary: >-
|
||||
Index documents so they can be used by the RAG system.
|
||||
description: >-
|
||||
Index documents so they can be used by the RAG system.
|
||||
parameters: []
|
||||
requestBody:
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: '#/components/schemas/InsertRequest'
|
||||
required: true
|
||||
deprecated: false
|
||||
/v1/tool-runtime/rag-tool/query:
|
||||
post:
|
||||
responses:
|
||||
'200':
|
||||
description: >-
|
||||
RAGQueryResult containing the retrieved content and metadata
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: '#/components/schemas/RAGQueryResult'
|
||||
'400':
|
||||
$ref: '#/components/responses/BadRequest400'
|
||||
'429':
|
||||
$ref: >-
|
||||
#/components/responses/TooManyRequests429
|
||||
'500':
|
||||
$ref: >-
|
||||
#/components/responses/InternalServerError500
|
||||
default:
|
||||
$ref: '#/components/responses/DefaultError'
|
||||
tags:
|
||||
- ToolRuntime
|
||||
summary: >-
|
||||
Query the RAG system for context; typically invoked by the agent.
|
||||
description: >-
|
||||
Query the RAG system for context; typically invoked by the agent.
|
||||
parameters: []
|
||||
requestBody:
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: '#/components/schemas/QueryRequest'
|
||||
required: true
|
||||
deprecated: false
|
||||
/v1/toolgroups:
|
||||
get:
|
||||
responses:
|
||||
|
@ -6440,7 +6377,7 @@ components:
|
|||
enum:
|
||||
- model
|
||||
- shield
|
||||
- vector_db
|
||||
- vector_store
|
||||
- dataset
|
||||
- scoring_function
|
||||
- benchmark
|
||||
|
@ -9132,7 +9069,7 @@ components:
|
|||
enum:
|
||||
- model
|
||||
- shield
|
||||
- vector_db
|
||||
- vector_store
|
||||
- dataset
|
||||
- scoring_function
|
||||
- benchmark
|
||||
|
@ -9440,7 +9377,7 @@ components:
|
|||
enum:
|
||||
- model
|
||||
- shield
|
||||
- vector_db
|
||||
- vector_store
|
||||
- dataset
|
||||
- scoring_function
|
||||
- benchmark
|
||||
|
@ -9921,274 +9858,6 @@ components:
|
|||
title: ListToolDefsResponse
|
||||
description: >-
|
||||
Response containing a list of tool definitions.
|
||||
RAGDocument:
|
||||
type: object
|
||||
properties:
|
||||
document_id:
|
||||
type: string
|
||||
description: The unique identifier for the document.
|
||||
content:
|
||||
oneOf:
|
||||
- type: string
|
||||
- $ref: '#/components/schemas/InterleavedContentItem'
|
||||
- type: array
|
||||
items:
|
||||
$ref: '#/components/schemas/InterleavedContentItem'
|
||||
- $ref: '#/components/schemas/URL'
|
||||
description: The content of the document.
|
||||
mime_type:
|
||||
type: string
|
||||
description: The MIME type of the document.
|
||||
metadata:
|
||||
type: object
|
||||
additionalProperties:
|
||||
oneOf:
|
||||
- type: 'null'
|
||||
- type: boolean
|
||||
- type: number
|
||||
- type: string
|
||||
- type: array
|
||||
- type: object
|
||||
description: Additional metadata for the document.
|
||||
additionalProperties: false
|
||||
required:
|
||||
- document_id
|
||||
- content
|
||||
- metadata
|
||||
title: RAGDocument
|
||||
description: >-
|
||||
A document to be used for document ingestion in the RAG Tool.
|
||||
InsertRequest:
|
||||
type: object
|
||||
properties:
|
||||
documents:
|
||||
type: array
|
||||
items:
|
||||
$ref: '#/components/schemas/RAGDocument'
|
||||
description: >-
|
||||
List of documents to index in the RAG system
|
||||
vector_db_id:
|
||||
type: string
|
||||
description: >-
|
||||
ID of the vector database to store the document embeddings
|
||||
chunk_size_in_tokens:
|
||||
type: integer
|
||||
description: >-
|
||||
(Optional) Size in tokens for document chunking during indexing
|
||||
additionalProperties: false
|
||||
required:
|
||||
- documents
|
||||
- vector_db_id
|
||||
- chunk_size_in_tokens
|
||||
title: InsertRequest
|
||||
DefaultRAGQueryGeneratorConfig:
|
||||
type: object
|
||||
properties:
|
||||
type:
|
||||
type: string
|
||||
const: default
|
||||
default: default
|
||||
description: >-
|
||||
Type of query generator, always 'default'
|
||||
separator:
|
||||
type: string
|
||||
default: ' '
|
||||
description: >-
|
||||
String separator used to join query terms
|
||||
additionalProperties: false
|
||||
required:
|
||||
- type
|
||||
- separator
|
||||
title: DefaultRAGQueryGeneratorConfig
|
||||
description: >-
|
||||
Configuration for the default RAG query generator.
|
||||
LLMRAGQueryGeneratorConfig:
|
||||
type: object
|
||||
properties:
|
||||
type:
|
||||
type: string
|
||||
const: llm
|
||||
default: llm
|
||||
description: Type of query generator, always 'llm'
|
||||
model:
|
||||
type: string
|
||||
description: >-
|
||||
Name of the language model to use for query generation
|
||||
template:
|
||||
type: string
|
||||
description: >-
|
||||
Template string for formatting the query generation prompt
|
||||
additionalProperties: false
|
||||
required:
|
||||
- type
|
||||
- model
|
||||
- template
|
||||
title: LLMRAGQueryGeneratorConfig
|
||||
description: >-
|
||||
Configuration for the LLM-based RAG query generator.
|
||||
RAGQueryConfig:
|
||||
type: object
|
||||
properties:
|
||||
query_generator_config:
|
||||
oneOf:
|
||||
- $ref: '#/components/schemas/DefaultRAGQueryGeneratorConfig'
|
||||
- $ref: '#/components/schemas/LLMRAGQueryGeneratorConfig'
|
||||
discriminator:
|
||||
propertyName: type
|
||||
mapping:
|
||||
default: '#/components/schemas/DefaultRAGQueryGeneratorConfig'
|
||||
llm: '#/components/schemas/LLMRAGQueryGeneratorConfig'
|
||||
description: Configuration for the query generator.
|
||||
max_tokens_in_context:
|
||||
type: integer
|
||||
default: 4096
|
||||
description: Maximum number of tokens in the context.
|
||||
max_chunks:
|
||||
type: integer
|
||||
default: 5
|
||||
description: Maximum number of chunks to retrieve.
|
||||
chunk_template:
|
||||
type: string
|
||||
default: >
|
||||
Result {index}
|
||||
|
||||
Content: {chunk.content}
|
||||
|
||||
Metadata: {metadata}
|
||||
description: >-
|
||||
Template for formatting each retrieved chunk in the context. Available
|
||||
placeholders: {index} (1-based chunk ordinal), {chunk.content} (chunk
|
||||
content string), {metadata} (chunk metadata dict). Default: "Result {index}\nContent:
|
||||
{chunk.content}\nMetadata: {metadata}\n"
|
||||
mode:
|
||||
$ref: '#/components/schemas/RAGSearchMode'
|
||||
default: vector
|
||||
description: >-
|
||||
Search mode for retrieval—either "vector", "keyword", or "hybrid". Default
|
||||
"vector".
|
||||
ranker:
|
||||
$ref: '#/components/schemas/Ranker'
|
||||
description: >-
|
||||
Configuration for the ranker to use in hybrid search. Defaults to RRF
|
||||
ranker.
|
||||
additionalProperties: false
|
||||
required:
|
||||
- query_generator_config
|
||||
- max_tokens_in_context
|
||||
- max_chunks
|
||||
- chunk_template
|
||||
title: RAGQueryConfig
|
||||
description: >-
|
||||
Configuration for the RAG query generation.
|
||||
RAGSearchMode:
|
||||
type: string
|
||||
enum:
|
||||
- vector
|
||||
- keyword
|
||||
- hybrid
|
||||
title: RAGSearchMode
|
||||
description: >-
|
||||
Search modes for RAG query retrieval: - VECTOR: Uses vector similarity search
|
||||
for semantic matching - KEYWORD: Uses keyword-based search for exact matching
|
||||
- HYBRID: Combines both vector and keyword search for better results
|
||||
RRFRanker:
|
||||
type: object
|
||||
properties:
|
||||
type:
|
||||
type: string
|
||||
const: rrf
|
||||
default: rrf
|
||||
description: The type of ranker, always "rrf"
|
||||
impact_factor:
|
||||
type: number
|
||||
default: 60.0
|
||||
description: >-
|
||||
The impact factor for RRF scoring. Higher values give more weight to higher-ranked
|
||||
results. Must be greater than 0
|
||||
additionalProperties: false
|
||||
required:
|
||||
- type
|
||||
- impact_factor
|
||||
title: RRFRanker
|
||||
description: >-
|
||||
Reciprocal Rank Fusion (RRF) ranker configuration.
|
||||
Ranker:
|
||||
oneOf:
|
||||
- $ref: '#/components/schemas/RRFRanker'
|
||||
- $ref: '#/components/schemas/WeightedRanker'
|
||||
discriminator:
|
||||
propertyName: type
|
||||
mapping:
|
||||
rrf: '#/components/schemas/RRFRanker'
|
||||
weighted: '#/components/schemas/WeightedRanker'
|
||||
WeightedRanker:
|
||||
type: object
|
||||
properties:
|
||||
type:
|
||||
type: string
|
||||
const: weighted
|
||||
default: weighted
|
||||
description: The type of ranker, always "weighted"
|
||||
alpha:
|
||||
type: number
|
||||
default: 0.5
|
||||
description: >-
|
||||
Weight factor between 0 and 1. 0 means only use keyword scores, 1 means
|
||||
only use vector scores, values in between blend both scores.
|
||||
additionalProperties: false
|
||||
required:
|
||||
- type
|
||||
- alpha
|
||||
title: WeightedRanker
|
||||
description: >-
|
||||
Weighted ranker configuration that combines vector and keyword scores.
|
||||
QueryRequest:
|
||||
type: object
|
||||
properties:
|
||||
content:
|
||||
$ref: '#/components/schemas/InterleavedContent'
|
||||
description: >-
|
||||
The query content to search for in the indexed documents
|
||||
vector_db_ids:
|
||||
type: array
|
||||
items:
|
||||
type: string
|
||||
description: >-
|
||||
List of vector database IDs to search within
|
||||
query_config:
|
||||
$ref: '#/components/schemas/RAGQueryConfig'
|
||||
description: >-
|
||||
(Optional) Configuration parameters for the query operation
|
||||
additionalProperties: false
|
||||
required:
|
||||
- content
|
||||
- vector_db_ids
|
||||
title: QueryRequest
|
||||
RAGQueryResult:
|
||||
type: object
|
||||
properties:
|
||||
content:
|
||||
$ref: '#/components/schemas/InterleavedContent'
|
||||
description: >-
|
||||
(Optional) The retrieved content from the query
|
||||
metadata:
|
||||
type: object
|
||||
additionalProperties:
|
||||
oneOf:
|
||||
- type: 'null'
|
||||
- type: boolean
|
||||
- type: number
|
||||
- type: string
|
||||
- type: array
|
||||
- type: object
|
||||
description: >-
|
||||
Additional metadata about the query result
|
||||
additionalProperties: false
|
||||
required:
|
||||
- metadata
|
||||
title: RAGQueryResult
|
||||
description: >-
|
||||
Result of a RAG query containing retrieved content and metadata.
|
||||
ToolGroup:
|
||||
type: object
|
||||
properties:
|
||||
|
@ -10203,7 +9872,7 @@ components:
|
|||
enum:
|
||||
- model
|
||||
- shield
|
||||
- vector_db
|
||||
- vector_store
|
||||
- dataset
|
||||
- scoring_function
|
||||
- benchmark
|
||||
|
@ -11325,7 +10994,7 @@ components:
|
|||
enum:
|
||||
- model
|
||||
- shield
|
||||
- vector_db
|
||||
- vector_store
|
||||
- dataset
|
||||
- scoring_function
|
||||
- benchmark
|
||||
|
@ -12652,7 +12321,7 @@ components:
|
|||
enum:
|
||||
- model
|
||||
- shield
|
||||
- vector_db
|
||||
- vector_store
|
||||
- dataset
|
||||
- scoring_function
|
||||
- benchmark
|
||||
|
|
|
@ -121,7 +121,7 @@ class Api(Enum, metaclass=DynamicApiMeta):
|
|||
|
||||
models = "models"
|
||||
shields = "shields"
|
||||
vector_dbs = "vector_dbs" # only used for routing
|
||||
vector_stores = "vector_stores" # only used for routing table
|
||||
datasets = "datasets"
|
||||
scoring_functions = "scoring_functions"
|
||||
benchmarks = "benchmarks"
|
||||
|
|
|
@ -13,7 +13,7 @@ from pydantic import BaseModel, Field
|
|||
class ResourceType(StrEnum):
|
||||
model = "model"
|
||||
shield = "shield"
|
||||
vector_db = "vector_db"
|
||||
vector_store = "vector_store"
|
||||
dataset = "dataset"
|
||||
scoring_function = "scoring_function"
|
||||
benchmark = "benchmark"
|
||||
|
@ -34,4 +34,4 @@ class Resource(BaseModel):
|
|||
|
||||
provider_id: str = Field(description="ID of the provider that owns this resource")
|
||||
|
||||
type: ResourceType = Field(description="Type of resource (e.g. 'model', 'shield', 'vector_db', etc.)")
|
||||
type: ResourceType = Field(description="Type of resource (e.g. 'model', 'shield', 'vector_store', etc.)")
|
||||
|
|
|
@ -4,5 +4,4 @@
|
|||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
|
||||
from .rag_tool import *
|
||||
from .tools import *
|
||||
|
|
|
@ -1,218 +0,0 @@
|
|||
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
# All rights reserved.
|
||||
#
|
||||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
|
||||
from enum import Enum, StrEnum
|
||||
from typing import Annotated, Any, Literal, Protocol
|
||||
|
||||
from pydantic import BaseModel, Field, field_validator
|
||||
from typing_extensions import runtime_checkable
|
||||
|
||||
from llama_stack.apis.common.content_types import URL, InterleavedContent
|
||||
from llama_stack.apis.version import LLAMA_STACK_API_V1
|
||||
from llama_stack.providers.utils.telemetry.trace_protocol import trace_protocol
|
||||
from llama_stack.schema_utils import json_schema_type, register_schema, webmethod
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class RRFRanker(BaseModel):
|
||||
"""
|
||||
Reciprocal Rank Fusion (RRF) ranker configuration.
|
||||
|
||||
:param type: The type of ranker, always "rrf"
|
||||
:param impact_factor: The impact factor for RRF scoring. Higher values give more weight to higher-ranked results.
|
||||
Must be greater than 0
|
||||
"""
|
||||
|
||||
type: Literal["rrf"] = "rrf"
|
||||
impact_factor: float = Field(default=60.0, gt=0.0) # default of 60 for optimal performance
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class WeightedRanker(BaseModel):
|
||||
"""
|
||||
Weighted ranker configuration that combines vector and keyword scores.
|
||||
|
||||
:param type: The type of ranker, always "weighted"
|
||||
:param alpha: Weight factor between 0 and 1.
|
||||
0 means only use keyword scores,
|
||||
1 means only use vector scores,
|
||||
values in between blend both scores.
|
||||
"""
|
||||
|
||||
type: Literal["weighted"] = "weighted"
|
||||
alpha: float = Field(
|
||||
default=0.5,
|
||||
ge=0.0,
|
||||
le=1.0,
|
||||
description="Weight factor between 0 and 1. 0 means only keyword scores, 1 means only vector scores.",
|
||||
)
|
||||
|
||||
|
||||
Ranker = Annotated[
|
||||
RRFRanker | WeightedRanker,
|
||||
Field(discriminator="type"),
|
||||
]
|
||||
register_schema(Ranker, name="Ranker")
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class RAGDocument(BaseModel):
|
||||
"""
|
||||
A document to be used for document ingestion in the RAG Tool.
|
||||
|
||||
:param document_id: The unique identifier for the document.
|
||||
:param content: The content of the document.
|
||||
:param mime_type: The MIME type of the document.
|
||||
:param metadata: Additional metadata for the document.
|
||||
"""
|
||||
|
||||
document_id: str
|
||||
content: InterleavedContent | URL
|
||||
mime_type: str | None = None
|
||||
metadata: dict[str, Any] = Field(default_factory=dict)
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class RAGQueryResult(BaseModel):
|
||||
"""Result of a RAG query containing retrieved content and metadata.
|
||||
|
||||
:param content: (Optional) The retrieved content from the query
|
||||
:param metadata: Additional metadata about the query result
|
||||
"""
|
||||
|
||||
content: InterleavedContent | None = None
|
||||
metadata: dict[str, Any] = Field(default_factory=dict)
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class RAGQueryGenerator(Enum):
|
||||
"""Types of query generators for RAG systems.
|
||||
|
||||
:cvar default: Default query generator using simple text processing
|
||||
:cvar llm: LLM-based query generator for enhanced query understanding
|
||||
:cvar custom: Custom query generator implementation
|
||||
"""
|
||||
|
||||
default = "default"
|
||||
llm = "llm"
|
||||
custom = "custom"
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class RAGSearchMode(StrEnum):
|
||||
"""
|
||||
Search modes for RAG query retrieval:
|
||||
- VECTOR: Uses vector similarity search for semantic matching
|
||||
- KEYWORD: Uses keyword-based search for exact matching
|
||||
- HYBRID: Combines both vector and keyword search for better results
|
||||
"""
|
||||
|
||||
VECTOR = "vector"
|
||||
KEYWORD = "keyword"
|
||||
HYBRID = "hybrid"
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class DefaultRAGQueryGeneratorConfig(BaseModel):
|
||||
"""Configuration for the default RAG query generator.
|
||||
|
||||
:param type: Type of query generator, always 'default'
|
||||
:param separator: String separator used to join query terms
|
||||
"""
|
||||
|
||||
type: Literal["default"] = "default"
|
||||
separator: str = " "
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class LLMRAGQueryGeneratorConfig(BaseModel):
|
||||
"""Configuration for the LLM-based RAG query generator.
|
||||
|
||||
:param type: Type of query generator, always 'llm'
|
||||
:param model: Name of the language model to use for query generation
|
||||
:param template: Template string for formatting the query generation prompt
|
||||
"""
|
||||
|
||||
type: Literal["llm"] = "llm"
|
||||
model: str
|
||||
template: str
|
||||
|
||||
|
||||
RAGQueryGeneratorConfig = Annotated[
|
||||
DefaultRAGQueryGeneratorConfig | LLMRAGQueryGeneratorConfig,
|
||||
Field(discriminator="type"),
|
||||
]
|
||||
register_schema(RAGQueryGeneratorConfig, name="RAGQueryGeneratorConfig")
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class RAGQueryConfig(BaseModel):
|
||||
"""
|
||||
Configuration for the RAG query generation.
|
||||
|
||||
:param query_generator_config: Configuration for the query generator.
|
||||
:param max_tokens_in_context: Maximum number of tokens in the context.
|
||||
:param max_chunks: Maximum number of chunks to retrieve.
|
||||
:param chunk_template: Template for formatting each retrieved chunk in the context.
|
||||
Available placeholders: {index} (1-based chunk ordinal), {chunk.content} (chunk content string), {metadata} (chunk metadata dict).
|
||||
Default: "Result {index}\\nContent: {chunk.content}\\nMetadata: {metadata}\\n"
|
||||
:param mode: Search mode for retrieval—either "vector", "keyword", or "hybrid". Default "vector".
|
||||
:param ranker: Configuration for the ranker to use in hybrid search. Defaults to RRF ranker.
|
||||
"""
|
||||
|
||||
# This config defines how a query is generated using the messages
|
||||
# for memory bank retrieval.
|
||||
query_generator_config: RAGQueryGeneratorConfig = Field(default=DefaultRAGQueryGeneratorConfig())
|
||||
max_tokens_in_context: int = 4096
|
||||
max_chunks: int = 5
|
||||
chunk_template: str = "Result {index}\nContent: {chunk.content}\nMetadata: {metadata}\n"
|
||||
mode: RAGSearchMode | None = RAGSearchMode.VECTOR
|
||||
ranker: Ranker | None = Field(default=None) # Only used for hybrid mode
|
||||
|
||||
@field_validator("chunk_template")
|
||||
def validate_chunk_template(cls, v: str) -> str:
|
||||
if "{chunk.content}" not in v:
|
||||
raise ValueError("chunk_template must contain {chunk.content}")
|
||||
if "{index}" not in v:
|
||||
raise ValueError("chunk_template must contain {index}")
|
||||
if len(v) == 0:
|
||||
raise ValueError("chunk_template must not be empty")
|
||||
return v
|
||||
|
||||
|
||||
@runtime_checkable
|
||||
@trace_protocol
|
||||
class RAGToolRuntime(Protocol):
|
||||
@webmethod(route="/tool-runtime/rag-tool/insert", method="POST", level=LLAMA_STACK_API_V1)
|
||||
async def insert(
|
||||
self,
|
||||
documents: list[RAGDocument],
|
||||
vector_db_id: str,
|
||||
chunk_size_in_tokens: int = 512,
|
||||
) -> None:
|
||||
"""Index documents so they can be used by the RAG system.
|
||||
|
||||
:param documents: List of documents to index in the RAG system
|
||||
:param vector_db_id: ID of the vector database to store the document embeddings
|
||||
:param chunk_size_in_tokens: (Optional) Size in tokens for document chunking during indexing
|
||||
"""
|
||||
...
|
||||
|
||||
@webmethod(route="/tool-runtime/rag-tool/query", method="POST", level=LLAMA_STACK_API_V1)
|
||||
async def query(
|
||||
self,
|
||||
content: InterleavedContent,
|
||||
vector_db_ids: list[str],
|
||||
query_config: RAGQueryConfig | None = None,
|
||||
) -> RAGQueryResult:
|
||||
"""Query the RAG system for context; typically invoked by the agent.
|
||||
|
||||
:param content: The query content to search for in the indexed documents
|
||||
:param vector_db_ids: List of vector database IDs to search within
|
||||
:param query_config: (Optional) Configuration parameters for the query operation
|
||||
:returns: RAGQueryResult containing the retrieved content and metadata
|
||||
"""
|
||||
...
|
|
@ -4,7 +4,6 @@
|
|||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
|
||||
from enum import Enum
|
||||
from typing import Any, Literal, Protocol
|
||||
|
||||
from pydantic import BaseModel
|
||||
|
@ -16,8 +15,6 @@ from llama_stack.apis.version import LLAMA_STACK_API_V1
|
|||
from llama_stack.providers.utils.telemetry.trace_protocol import trace_protocol
|
||||
from llama_stack.schema_utils import json_schema_type, webmethod
|
||||
|
||||
from .rag_tool import RAGToolRuntime
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class ToolDef(BaseModel):
|
||||
|
@ -181,22 +178,11 @@ class ToolGroups(Protocol):
|
|||
...
|
||||
|
||||
|
||||
class SpecialToolGroup(Enum):
|
||||
"""Special tool groups with predefined functionality.
|
||||
|
||||
:cvar rag_tool: Retrieval-Augmented Generation tool group for document search and retrieval
|
||||
"""
|
||||
|
||||
rag_tool = "rag_tool"
|
||||
|
||||
|
||||
@runtime_checkable
|
||||
@trace_protocol
|
||||
class ToolRuntime(Protocol):
|
||||
tool_store: ToolStore | None = None
|
||||
|
||||
rag_tool: RAGToolRuntime | None = None
|
||||
|
||||
# TODO: This needs to be renamed once OPEN API generator name conflict issue is fixed.
|
||||
@webmethod(route="/tool-runtime/list-tools", method="GET", level=LLAMA_STACK_API_V1)
|
||||
async def list_runtime_tools(
|
||||
|
|
|
@ -1,93 +0,0 @@
|
|||
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
# All rights reserved.
|
||||
#
|
||||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
|
||||
from typing import Literal, Protocol, runtime_checkable
|
||||
|
||||
from pydantic import BaseModel
|
||||
|
||||
from llama_stack.apis.resource import Resource, ResourceType
|
||||
from llama_stack.schema_utils import json_schema_type
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class VectorDB(Resource):
|
||||
"""Vector database resource for storing and querying vector embeddings.
|
||||
|
||||
:param type: Type of resource, always 'vector_db' for vector databases
|
||||
:param embedding_model: Name of the embedding model to use for vector generation
|
||||
:param embedding_dimension: Dimension of the embedding vectors
|
||||
"""
|
||||
|
||||
type: Literal[ResourceType.vector_db] = ResourceType.vector_db
|
||||
|
||||
embedding_model: str
|
||||
embedding_dimension: int
|
||||
vector_db_name: str | None = None
|
||||
|
||||
@property
|
||||
def vector_db_id(self) -> str:
|
||||
return self.identifier
|
||||
|
||||
@property
|
||||
def provider_vector_db_id(self) -> str | None:
|
||||
return self.provider_resource_id
|
||||
|
||||
|
||||
class VectorDBInput(BaseModel):
|
||||
"""Input parameters for creating or configuring a vector database.
|
||||
|
||||
:param vector_db_id: Unique identifier for the vector database
|
||||
:param embedding_model: Name of the embedding model to use for vector generation
|
||||
:param embedding_dimension: Dimension of the embedding vectors
|
||||
:param provider_vector_db_id: (Optional) Provider-specific identifier for the vector database
|
||||
"""
|
||||
|
||||
vector_db_id: str
|
||||
embedding_model: str
|
||||
embedding_dimension: int
|
||||
provider_id: str | None = None
|
||||
provider_vector_db_id: str | None = None
|
||||
|
||||
|
||||
class ListVectorDBsResponse(BaseModel):
|
||||
"""Response from listing vector databases.
|
||||
|
||||
:param data: List of vector databases
|
||||
"""
|
||||
|
||||
data: list[VectorDB]
|
||||
|
||||
|
||||
@runtime_checkable
|
||||
class VectorDBs(Protocol):
|
||||
"""Internal protocol for vector_dbs routing - no public API endpoints."""
|
||||
|
||||
async def list_vector_dbs(self) -> ListVectorDBsResponse:
|
||||
"""Internal method to list vector databases."""
|
||||
...
|
||||
|
||||
async def get_vector_db(
|
||||
self,
|
||||
vector_db_id: str,
|
||||
) -> VectorDB:
|
||||
"""Internal method to get a vector database by ID."""
|
||||
...
|
||||
|
||||
async def register_vector_db(
|
||||
self,
|
||||
vector_db_id: str,
|
||||
embedding_model: str,
|
||||
embedding_dimension: int | None = 384,
|
||||
provider_id: str | None = None,
|
||||
vector_db_name: str | None = None,
|
||||
provider_vector_db_id: str | None = None,
|
||||
) -> VectorDB:
|
||||
"""Internal method to register a vector database."""
|
||||
...
|
||||
|
||||
async def unregister_vector_db(self, vector_db_id: str) -> None:
|
||||
"""Internal method to unregister a vector database."""
|
||||
...
|
|
@ -15,7 +15,7 @@ from fastapi import Body
|
|||
from pydantic import BaseModel, Field
|
||||
|
||||
from llama_stack.apis.inference import InterleavedContent
|
||||
from llama_stack.apis.vector_dbs import VectorDB
|
||||
from llama_stack.apis.vector_stores import VectorStore
|
||||
from llama_stack.apis.version import LLAMA_STACK_API_V1
|
||||
from llama_stack.providers.utils.telemetry.trace_protocol import trace_protocol
|
||||
from llama_stack.providers.utils.vector_io.vector_utils import generate_chunk_id
|
||||
|
@ -140,6 +140,7 @@ class VectorStoreFileCounts(BaseModel):
|
|||
total: int
|
||||
|
||||
|
||||
# TODO: rename this as OpenAIVectorStore
|
||||
@json_schema_type
|
||||
class VectorStoreObject(BaseModel):
|
||||
"""OpenAI Vector Store object.
|
||||
|
@ -517,17 +518,18 @@ class OpenAICreateVectorStoreFileBatchRequestWithExtraBody(BaseModel, extra="all
|
|||
chunking_strategy: VectorStoreChunkingStrategy | None = None
|
||||
|
||||
|
||||
class VectorDBStore(Protocol):
|
||||
def get_vector_db(self, vector_db_id: str) -> VectorDB | None: ...
|
||||
class VectorStoreTable(Protocol):
|
||||
def get_vector_store(self, vector_store_id: str) -> VectorStore | None: ...
|
||||
|
||||
|
||||
@runtime_checkable
|
||||
@trace_protocol
|
||||
class VectorIO(Protocol):
|
||||
vector_db_store: VectorDBStore | None = None
|
||||
vector_store_table: VectorStoreTable | None = None
|
||||
|
||||
# this will just block now until chunks are inserted, but it should
|
||||
# probably return a Job instance which can be polled for completion
|
||||
# TODO: rename vector_db_id to vector_store_id once Stainless is working
|
||||
@webmethod(route="/vector-io/insert", method="POST", level=LLAMA_STACK_API_V1)
|
||||
async def insert_chunks(
|
||||
self,
|
||||
|
@ -546,6 +548,7 @@ class VectorIO(Protocol):
|
|||
"""
|
||||
...
|
||||
|
||||
# TODO: rename vector_db_id to vector_store_id once Stainless is working
|
||||
@webmethod(route="/vector-io/query", method="POST", level=LLAMA_STACK_API_V1)
|
||||
async def query_chunks(
|
||||
self,
|
||||
|
|
|
@ -4,4 +4,4 @@
|
|||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
|
||||
from .vector_dbs import *
|
||||
from .vector_stores import *
|
51
llama_stack/apis/vector_stores/vector_stores.py
Normal file
51
llama_stack/apis/vector_stores/vector_stores.py
Normal file
|
@ -0,0 +1,51 @@
|
|||
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
# All rights reserved.
|
||||
#
|
||||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
|
||||
from typing import Literal
|
||||
|
||||
from pydantic import BaseModel
|
||||
|
||||
from llama_stack.apis.resource import Resource, ResourceType
|
||||
|
||||
|
||||
# Internal resource type for storing the vector store routing and other information
|
||||
class VectorStore(Resource):
|
||||
"""Vector database resource for storing and querying vector embeddings.
|
||||
|
||||
:param type: Type of resource, always 'vector_store' for vector stores
|
||||
:param embedding_model: Name of the embedding model to use for vector generation
|
||||
:param embedding_dimension: Dimension of the embedding vectors
|
||||
"""
|
||||
|
||||
type: Literal[ResourceType.vector_store] = ResourceType.vector_store
|
||||
|
||||
embedding_model: str
|
||||
embedding_dimension: int
|
||||
vector_store_name: str | None = None
|
||||
|
||||
@property
|
||||
def vector_store_id(self) -> str:
|
||||
return self.identifier
|
||||
|
||||
@property
|
||||
def provider_vector_store_id(self) -> str | None:
|
||||
return self.provider_resource_id
|
||||
|
||||
|
||||
class VectorStoreInput(BaseModel):
|
||||
"""Input parameters for creating or configuring a vector database.
|
||||
|
||||
:param vector_store_id: Unique identifier for the vector store
|
||||
:param embedding_model: Name of the embedding model to use for vector generation
|
||||
:param embedding_dimension: Dimension of the embedding vectors
|
||||
:param provider_vector_store_id: (Optional) Provider-specific identifier for the vector store
|
||||
"""
|
||||
|
||||
vector_store_id: str
|
||||
embedding_model: str
|
||||
embedding_dimension: int
|
||||
provider_id: str | None = None
|
||||
provider_vector_store_id: str | None = None
|
|
@ -1,519 +0,0 @@
|
|||
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
# All rights reserved.
|
||||
#
|
||||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
|
||||
import argparse
|
||||
import importlib.resources
|
||||
import json
|
||||
import os
|
||||
import shutil
|
||||
import sys
|
||||
import textwrap
|
||||
from functools import lru_cache
|
||||
from importlib.abc import Traversable
|
||||
from pathlib import Path
|
||||
|
||||
import yaml
|
||||
from prompt_toolkit import prompt
|
||||
from prompt_toolkit.completion import WordCompleter
|
||||
from prompt_toolkit.validation import Validator
|
||||
from termcolor import colored, cprint
|
||||
|
||||
from llama_stack.cli.stack.utils import ImageType
|
||||
from llama_stack.cli.table import print_table
|
||||
from llama_stack.core.build import (
|
||||
SERVER_DEPENDENCIES,
|
||||
build_image,
|
||||
get_provider_dependencies,
|
||||
)
|
||||
from llama_stack.core.configure import parse_and_maybe_upgrade_config
|
||||
from llama_stack.core.datatypes import (
|
||||
BuildConfig,
|
||||
BuildProvider,
|
||||
DistributionSpec,
|
||||
Provider,
|
||||
StackRunConfig,
|
||||
)
|
||||
from llama_stack.core.distribution import get_provider_registry
|
||||
from llama_stack.core.external import load_external_apis
|
||||
from llama_stack.core.resolver import InvalidProviderError
|
||||
from llama_stack.core.stack import replace_env_vars
|
||||
from llama_stack.core.storage.datatypes import (
|
||||
InferenceStoreReference,
|
||||
KVStoreReference,
|
||||
ServerStoresConfig,
|
||||
SqliteKVStoreConfig,
|
||||
SqliteSqlStoreConfig,
|
||||
SqlStoreReference,
|
||||
StorageConfig,
|
||||
)
|
||||
from llama_stack.core.utils.config_dirs import DISTRIBS_BASE_DIR, EXTERNAL_PROVIDERS_DIR
|
||||
from llama_stack.core.utils.dynamic import instantiate_class_type
|
||||
from llama_stack.core.utils.exec import formulate_run_args, run_command
|
||||
from llama_stack.core.utils.image_types import LlamaStackImageType
|
||||
from llama_stack.providers.datatypes import Api
|
||||
|
||||
DISTRIBS_PATH = Path(__file__).parent.parent.parent / "distributions"
|
||||
|
||||
|
||||
@lru_cache
|
||||
def available_distros_specs() -> dict[str, BuildConfig]:
|
||||
import yaml
|
||||
|
||||
distro_specs = {}
|
||||
for p in DISTRIBS_PATH.rglob("*build.yaml"):
|
||||
distro_name = p.parent.name
|
||||
with open(p) as f:
|
||||
build_config = BuildConfig(**yaml.safe_load(f))
|
||||
distro_specs[distro_name] = build_config
|
||||
return distro_specs
|
||||
|
||||
|
||||
def run_stack_build_command(args: argparse.Namespace) -> None:
|
||||
if args.list_distros:
|
||||
return _run_distro_list_cmd()
|
||||
|
||||
if args.image_type == ImageType.VENV.value:
|
||||
current_venv = os.environ.get("VIRTUAL_ENV")
|
||||
image_name = args.image_name or current_venv
|
||||
else:
|
||||
image_name = args.image_name
|
||||
|
||||
if args.template:
|
||||
cprint(
|
||||
"The --template argument is deprecated. Please use --distro instead.",
|
||||
color="red",
|
||||
file=sys.stderr,
|
||||
)
|
||||
distro_name = args.template
|
||||
else:
|
||||
distro_name = args.distribution
|
||||
|
||||
if distro_name:
|
||||
available_distros = available_distros_specs()
|
||||
if distro_name not in available_distros:
|
||||
cprint(
|
||||
f"Could not find distribution {distro_name}. Please run `llama stack build --list-distros` to check out the available distributions",
|
||||
color="red",
|
||||
file=sys.stderr,
|
||||
)
|
||||
sys.exit(1)
|
||||
build_config = available_distros[distro_name]
|
||||
if args.image_type:
|
||||
build_config.image_type = args.image_type
|
||||
else:
|
||||
cprint(
|
||||
f"Please specify a image-type ({' | '.join(e.value for e in ImageType)}) for {distro_name}",
|
||||
color="red",
|
||||
file=sys.stderr,
|
||||
)
|
||||
sys.exit(1)
|
||||
elif args.providers:
|
||||
provider_list: dict[str, list[BuildProvider]] = dict()
|
||||
for api_provider in args.providers.split(","):
|
||||
if "=" not in api_provider:
|
||||
cprint(
|
||||
"Could not parse `--providers`. Please ensure the list is in the format api1=provider1,api2=provider2",
|
||||
color="red",
|
||||
file=sys.stderr,
|
||||
)
|
||||
sys.exit(1)
|
||||
api, provider_type = api_provider.split("=")
|
||||
providers_for_api = get_provider_registry().get(Api(api), None)
|
||||
if providers_for_api is None:
|
||||
cprint(
|
||||
f"{api} is not a valid API.",
|
||||
color="red",
|
||||
file=sys.stderr,
|
||||
)
|
||||
sys.exit(1)
|
||||
if provider_type in providers_for_api:
|
||||
provider = BuildProvider(
|
||||
provider_type=provider_type,
|
||||
module=None,
|
||||
)
|
||||
provider_list.setdefault(api, []).append(provider)
|
||||
else:
|
||||
cprint(
|
||||
f"{provider} is not a valid provider for the {api} API.",
|
||||
color="red",
|
||||
file=sys.stderr,
|
||||
)
|
||||
sys.exit(1)
|
||||
distribution_spec = DistributionSpec(
|
||||
providers=provider_list,
|
||||
description=",".join(args.providers),
|
||||
)
|
||||
if not args.image_type:
|
||||
cprint(
|
||||
f"Please specify a image-type (container | venv) for {args.template}",
|
||||
color="red",
|
||||
file=sys.stderr,
|
||||
)
|
||||
sys.exit(1)
|
||||
|
||||
build_config = BuildConfig(image_type=args.image_type, distribution_spec=distribution_spec)
|
||||
elif not args.config and not distro_name:
|
||||
name = prompt(
|
||||
"> Enter a name for your Llama Stack (e.g. my-local-stack): ",
|
||||
validator=Validator.from_callable(
|
||||
lambda x: len(x) > 0,
|
||||
error_message="Name cannot be empty, please enter a name",
|
||||
),
|
||||
)
|
||||
|
||||
image_type = prompt(
|
||||
"> Enter the image type you want your Llama Stack to be built as (use <TAB> to see options): ",
|
||||
completer=WordCompleter([e.value for e in ImageType]),
|
||||
complete_while_typing=True,
|
||||
validator=Validator.from_callable(
|
||||
lambda x: x in [e.value for e in ImageType],
|
||||
error_message="Invalid image type. Use <TAB> to see options",
|
||||
),
|
||||
)
|
||||
|
||||
image_name = f"llamastack-{name}"
|
||||
|
||||
cprint(
|
||||
textwrap.dedent(
|
||||
"""
|
||||
Llama Stack is composed of several APIs working together. Let's select
|
||||
the provider types (implementations) you want to use for these APIs.
|
||||
""",
|
||||
),
|
||||
color="green",
|
||||
file=sys.stderr,
|
||||
)
|
||||
|
||||
cprint("Tip: use <TAB> to see options for the providers.\n", color="green", file=sys.stderr)
|
||||
|
||||
providers: dict[str, list[BuildProvider]] = dict()
|
||||
for api, providers_for_api in get_provider_registry().items():
|
||||
available_providers = [x for x in providers_for_api.keys() if x not in ("remote", "remote::sample")]
|
||||
if not available_providers:
|
||||
continue
|
||||
api_provider = prompt(
|
||||
f"> Enter provider for API {api.value}: ",
|
||||
completer=WordCompleter(available_providers),
|
||||
complete_while_typing=True,
|
||||
validator=Validator.from_callable(
|
||||
lambda x: x in available_providers, # noqa: B023 - see https://github.com/astral-sh/ruff/issues/7847
|
||||
error_message="Invalid provider, use <TAB> to see options",
|
||||
),
|
||||
)
|
||||
|
||||
string_providers = api_provider.split(" ")
|
||||
|
||||
for provider in string_providers:
|
||||
providers.setdefault(api.value, []).append(BuildProvider(provider_type=provider))
|
||||
|
||||
description = prompt(
|
||||
"\n > (Optional) Enter a short description for your Llama Stack: ",
|
||||
default="",
|
||||
)
|
||||
|
||||
distribution_spec = DistributionSpec(
|
||||
providers=providers,
|
||||
description=description,
|
||||
)
|
||||
|
||||
build_config = BuildConfig(image_type=image_type, distribution_spec=distribution_spec)
|
||||
else:
|
||||
with open(args.config) as f:
|
||||
try:
|
||||
contents = yaml.safe_load(f)
|
||||
contents = replace_env_vars(contents)
|
||||
build_config = BuildConfig(**contents)
|
||||
if args.image_type:
|
||||
build_config.image_type = args.image_type
|
||||
except Exception as e:
|
||||
cprint(
|
||||
f"Could not parse config file {args.config}: {e}",
|
||||
color="red",
|
||||
file=sys.stderr,
|
||||
)
|
||||
sys.exit(1)
|
||||
|
||||
if args.print_deps_only:
|
||||
print(f"# Dependencies for {distro_name or args.config or image_name}")
|
||||
normal_deps, special_deps, external_provider_dependencies = get_provider_dependencies(build_config)
|
||||
normal_deps += SERVER_DEPENDENCIES
|
||||
print(f"uv pip install {' '.join(normal_deps)}")
|
||||
for special_dep in special_deps:
|
||||
print(f"uv pip install {special_dep}")
|
||||
for external_dep in external_provider_dependencies:
|
||||
print(f"uv pip install {external_dep}")
|
||||
return
|
||||
|
||||
try:
|
||||
run_config = _run_stack_build_command_from_build_config(
|
||||
build_config,
|
||||
image_name=image_name,
|
||||
config_path=args.config,
|
||||
distro_name=distro_name,
|
||||
)
|
||||
|
||||
except (Exception, RuntimeError) as exc:
|
||||
import traceback
|
||||
|
||||
cprint(
|
||||
f"Error building stack: {exc}",
|
||||
color="red",
|
||||
file=sys.stderr,
|
||||
)
|
||||
cprint("Stack trace:", color="red", file=sys.stderr)
|
||||
traceback.print_exc()
|
||||
sys.exit(1)
|
||||
|
||||
if run_config is None:
|
||||
cprint(
|
||||
"Run config path is empty",
|
||||
color="red",
|
||||
file=sys.stderr,
|
||||
)
|
||||
sys.exit(1)
|
||||
|
||||
if args.run:
|
||||
config_dict = yaml.safe_load(run_config.read_text())
|
||||
config = parse_and_maybe_upgrade_config(config_dict)
|
||||
if config.external_providers_dir and not config.external_providers_dir.exists():
|
||||
config.external_providers_dir.mkdir(exist_ok=True)
|
||||
run_args = formulate_run_args(args.image_type, image_name or config.image_name)
|
||||
run_args.extend([str(os.getenv("LLAMA_STACK_PORT", 8321)), "--config", str(run_config)])
|
||||
run_command(run_args)
|
||||
|
||||
|
||||
def _generate_run_config(
|
||||
build_config: BuildConfig,
|
||||
build_dir: Path,
|
||||
image_name: str,
|
||||
) -> Path:
|
||||
"""
|
||||
Generate a run.yaml template file for user to edit from a build.yaml file
|
||||
"""
|
||||
apis = list(build_config.distribution_spec.providers.keys())
|
||||
distro_dir = DISTRIBS_BASE_DIR / image_name
|
||||
storage = StorageConfig(
|
||||
backends={
|
||||
"kv_default": SqliteKVStoreConfig(
|
||||
db_path=f"${{env.SQLITE_STORE_DIR:={distro_dir}}}/kvstore.db",
|
||||
),
|
||||
"sql_default": SqliteSqlStoreConfig(
|
||||
db_path=f"${{env.SQLITE_STORE_DIR:={distro_dir}}}/sql_store.db",
|
||||
),
|
||||
},
|
||||
stores=ServerStoresConfig(
|
||||
metadata=KVStoreReference(
|
||||
backend="kv_default",
|
||||
namespace="registry",
|
||||
),
|
||||
inference=InferenceStoreReference(
|
||||
backend="sql_default",
|
||||
table_name="inference_store",
|
||||
),
|
||||
conversations=SqlStoreReference(
|
||||
backend="sql_default",
|
||||
table_name="openai_conversations",
|
||||
),
|
||||
),
|
||||
)
|
||||
|
||||
run_config = StackRunConfig(
|
||||
container_image=(image_name if build_config.image_type == LlamaStackImageType.CONTAINER.value else None),
|
||||
image_name=image_name,
|
||||
apis=apis,
|
||||
providers={},
|
||||
storage=storage,
|
||||
external_providers_dir=build_config.external_providers_dir
|
||||
if build_config.external_providers_dir
|
||||
else EXTERNAL_PROVIDERS_DIR,
|
||||
)
|
||||
# build providers dict
|
||||
provider_registry = get_provider_registry(build_config)
|
||||
for api in apis:
|
||||
run_config.providers[api] = []
|
||||
providers = build_config.distribution_spec.providers[api]
|
||||
|
||||
for provider in providers:
|
||||
pid = provider.provider_type.split("::")[-1]
|
||||
|
||||
p = provider_registry[Api(api)][provider.provider_type]
|
||||
if p.deprecation_error:
|
||||
raise InvalidProviderError(p.deprecation_error)
|
||||
|
||||
try:
|
||||
config_type = instantiate_class_type(provider_registry[Api(api)][provider.provider_type].config_class)
|
||||
except (ModuleNotFoundError, ValueError) as exc:
|
||||
# HACK ALERT:
|
||||
# This code executes after building is done, the import cannot work since the
|
||||
# package is either available in the venv or container - not available on the host.
|
||||
# TODO: use a "is_external" flag in ProviderSpec to check if the provider is
|
||||
# external
|
||||
cprint(
|
||||
f"Failed to import provider {provider.provider_type} for API {api} - assuming it's external, skipping: {exc}",
|
||||
color="yellow",
|
||||
file=sys.stderr,
|
||||
)
|
||||
# Set config_type to None to avoid UnboundLocalError
|
||||
config_type = None
|
||||
|
||||
if config_type is not None and hasattr(config_type, "sample_run_config"):
|
||||
config = config_type.sample_run_config(__distro_dir__=f"~/.llama/distributions/{image_name}")
|
||||
else:
|
||||
config = {}
|
||||
|
||||
p_spec = Provider(
|
||||
provider_id=pid,
|
||||
provider_type=provider.provider_type,
|
||||
config=config,
|
||||
module=provider.module,
|
||||
)
|
||||
run_config.providers[api].append(p_spec)
|
||||
|
||||
run_config_file = build_dir / f"{image_name}-run.yaml"
|
||||
|
||||
with open(run_config_file, "w") as f:
|
||||
to_write = json.loads(run_config.model_dump_json())
|
||||
f.write(yaml.dump(to_write, sort_keys=False))
|
||||
|
||||
# Only print this message for non-container builds since it will be displayed before the
|
||||
# container is built
|
||||
# For non-container builds, the run.yaml is generated at the very end of the build process so it
|
||||
# makes sense to display this message
|
||||
if build_config.image_type != LlamaStackImageType.CONTAINER.value:
|
||||
cprint(f"You can now run your stack with `llama stack run {run_config_file}`", color="green", file=sys.stderr)
|
||||
return run_config_file
|
||||
|
||||
|
||||
def _run_stack_build_command_from_build_config(
|
||||
build_config: BuildConfig,
|
||||
image_name: str | None = None,
|
||||
distro_name: str | None = None,
|
||||
config_path: str | None = None,
|
||||
) -> Path | Traversable:
|
||||
image_name = image_name or build_config.image_name
|
||||
if build_config.image_type == LlamaStackImageType.CONTAINER.value:
|
||||
if distro_name:
|
||||
image_name = f"distribution-{distro_name}"
|
||||
else:
|
||||
if not image_name:
|
||||
raise ValueError("Please specify an image name when building a container image without a template")
|
||||
else:
|
||||
if not image_name and os.environ.get("UV_SYSTEM_PYTHON"):
|
||||
image_name = "__system__"
|
||||
if not image_name:
|
||||
raise ValueError("Please specify an image name when building a venv image")
|
||||
|
||||
# At this point, image_name should be guaranteed to be a string
|
||||
if image_name is None:
|
||||
raise ValueError("image_name should not be None after validation")
|
||||
|
||||
if distro_name:
|
||||
build_dir = DISTRIBS_BASE_DIR / distro_name
|
||||
build_file_path = build_dir / f"{distro_name}-build.yaml"
|
||||
else:
|
||||
if image_name is None:
|
||||
raise ValueError("image_name cannot be None")
|
||||
build_dir = DISTRIBS_BASE_DIR / image_name
|
||||
build_file_path = build_dir / f"{image_name}-build.yaml"
|
||||
|
||||
os.makedirs(build_dir, exist_ok=True)
|
||||
run_config_file = None
|
||||
# Generate the run.yaml so it can be included in the container image with the proper entrypoint
|
||||
# Only do this if we're building a container image and we're not using a template
|
||||
if build_config.image_type == LlamaStackImageType.CONTAINER.value and not distro_name and config_path:
|
||||
cprint("Generating run.yaml file", color="yellow", file=sys.stderr)
|
||||
run_config_file = _generate_run_config(build_config, build_dir, image_name)
|
||||
|
||||
with open(build_file_path, "w") as f:
|
||||
to_write = json.loads(build_config.model_dump_json(exclude_none=True))
|
||||
f.write(yaml.dump(to_write, sort_keys=False))
|
||||
|
||||
# We first install the external APIs so that the build process can use them and discover the
|
||||
# providers dependencies
|
||||
if build_config.external_apis_dir:
|
||||
cprint("Installing external APIs", color="yellow", file=sys.stderr)
|
||||
external_apis = load_external_apis(build_config)
|
||||
if external_apis:
|
||||
# install the external APIs
|
||||
packages = []
|
||||
for _, api_spec in external_apis.items():
|
||||
if api_spec.pip_packages:
|
||||
packages.extend(api_spec.pip_packages)
|
||||
cprint(
|
||||
f"Installing {api_spec.name} with pip packages {api_spec.pip_packages}",
|
||||
color="yellow",
|
||||
file=sys.stderr,
|
||||
)
|
||||
return_code = run_command(["uv", "pip", "install", *packages])
|
||||
if return_code != 0:
|
||||
packages_str = ", ".join(packages)
|
||||
raise RuntimeError(
|
||||
f"Failed to install external APIs packages: {packages_str} (return code: {return_code})"
|
||||
)
|
||||
|
||||
return_code = build_image(
|
||||
build_config,
|
||||
image_name,
|
||||
distro_or_config=distro_name or config_path or str(build_file_path),
|
||||
run_config=run_config_file.as_posix() if run_config_file else None,
|
||||
)
|
||||
if return_code != 0:
|
||||
raise RuntimeError(f"Failed to build image {image_name}")
|
||||
|
||||
if distro_name:
|
||||
# copy run.yaml from distribution to build_dir instead of generating it again
|
||||
distro_path = importlib.resources.files("llama_stack") / f"distributions/{distro_name}/run.yaml"
|
||||
run_config_file = build_dir / f"{distro_name}-run.yaml"
|
||||
|
||||
with importlib.resources.as_file(distro_path) as path:
|
||||
shutil.copy(path, run_config_file)
|
||||
|
||||
cprint("Build Successful!", color="green", file=sys.stderr)
|
||||
cprint(f"You can find the newly-built distribution here: {run_config_file}", color="blue", file=sys.stderr)
|
||||
if build_config.image_type == LlamaStackImageType.VENV:
|
||||
cprint(
|
||||
"You can run the new Llama Stack distro (after activating "
|
||||
+ colored(image_name, "cyan")
|
||||
+ ") via: "
|
||||
+ colored(f"llama stack run {run_config_file}", "blue"),
|
||||
color="green",
|
||||
file=sys.stderr,
|
||||
)
|
||||
elif build_config.image_type == LlamaStackImageType.CONTAINER:
|
||||
cprint(
|
||||
"You can run the container with: "
|
||||
+ colored(
|
||||
f"docker run -p 8321:8321 -v ~/.llama:/root/.llama localhost/{image_name} --port 8321", "blue"
|
||||
),
|
||||
color="green",
|
||||
file=sys.stderr,
|
||||
)
|
||||
return distro_path
|
||||
else:
|
||||
return _generate_run_config(build_config, build_dir, image_name)
|
||||
|
||||
|
||||
def _run_distro_list_cmd() -> None:
|
||||
headers = [
|
||||
"Distribution Name",
|
||||
# "Providers",
|
||||
"Description",
|
||||
]
|
||||
|
||||
rows = []
|
||||
for distro_name, spec in available_distros_specs().items():
|
||||
rows.append(
|
||||
[
|
||||
distro_name,
|
||||
# json.dumps(spec.distribution_spec.providers, indent=2),
|
||||
spec.distribution_spec.description,
|
||||
]
|
||||
)
|
||||
print_table(
|
||||
rows,
|
||||
headers,
|
||||
separate_rows=True,
|
||||
)
|
|
@ -1,106 +0,0 @@
|
|||
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
# All rights reserved.
|
||||
#
|
||||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
import argparse
|
||||
import textwrap
|
||||
|
||||
from llama_stack.cli.stack.utils import ImageType
|
||||
from llama_stack.cli.subcommand import Subcommand
|
||||
from llama_stack.log import get_logger
|
||||
|
||||
logger = get_logger(__name__, category="cli")
|
||||
|
||||
|
||||
class StackBuild(Subcommand):
|
||||
def __init__(self, subparsers: argparse._SubParsersAction):
|
||||
super().__init__()
|
||||
self.parser = subparsers.add_parser(
|
||||
"build",
|
||||
prog="llama stack build",
|
||||
description="[DEPRECATED] Build a Llama stack container. This command is deprecated and will be removed in a future release. Use `llama stack list-deps <distro>' instead.",
|
||||
formatter_class=argparse.ArgumentDefaultsHelpFormatter,
|
||||
)
|
||||
self._add_arguments()
|
||||
self.parser.set_defaults(func=self._run_stack_build_command)
|
||||
|
||||
def _add_arguments(self):
|
||||
self.parser.add_argument(
|
||||
"--config",
|
||||
type=str,
|
||||
default=None,
|
||||
help="Path to a config file to use for the build. You can find example configs in llama_stack.cores/**/build.yaml. If this argument is not provided, you will be prompted to enter information interactively",
|
||||
)
|
||||
|
||||
self.parser.add_argument(
|
||||
"--template",
|
||||
type=str,
|
||||
default=None,
|
||||
help="""(deprecated) Name of the example template config to use for build. You may use `llama stack build --list-distros` to check out the available distributions""",
|
||||
)
|
||||
self.parser.add_argument(
|
||||
"--distro",
|
||||
"--distribution",
|
||||
dest="distribution",
|
||||
type=str,
|
||||
default=None,
|
||||
help="""Name of the distribution to use for build. You may use `llama stack build --list-distros` to check out the available distributions""",
|
||||
)
|
||||
|
||||
self.parser.add_argument(
|
||||
"--list-distros",
|
||||
"--list-distributions",
|
||||
action="store_true",
|
||||
dest="list_distros",
|
||||
default=False,
|
||||
help="Show the available distributions for building a Llama Stack distribution",
|
||||
)
|
||||
|
||||
self.parser.add_argument(
|
||||
"--image-type",
|
||||
type=str,
|
||||
help="Image Type to use for the build. If not specified, will use the image type from the template config.",
|
||||
choices=[e.value for e in ImageType],
|
||||
default=None, # no default so we can detect if a user specified --image-type and override image_type in the config
|
||||
)
|
||||
|
||||
self.parser.add_argument(
|
||||
"--image-name",
|
||||
type=str,
|
||||
help=textwrap.dedent(
|
||||
f"""[for image-type={"|".join(e.value for e in ImageType)}] Name of the virtual environment to use for
|
||||
the build. If not specified, currently active environment will be used if found.
|
||||
"""
|
||||
),
|
||||
default=None,
|
||||
)
|
||||
self.parser.add_argument(
|
||||
"--print-deps-only",
|
||||
default=False,
|
||||
action="store_true",
|
||||
help="Print the dependencies for the stack only, without building the stack",
|
||||
)
|
||||
|
||||
self.parser.add_argument(
|
||||
"--run",
|
||||
action="store_true",
|
||||
default=False,
|
||||
help="Run the stack after building using the same image type, name, and other applicable arguments",
|
||||
)
|
||||
self.parser.add_argument(
|
||||
"--providers",
|
||||
type=str,
|
||||
default=None,
|
||||
help="Build a config for a list of providers and only those providers. This list is formatted like: api1=provider1,api2=provider2. Where there can be multiple providers per API.",
|
||||
)
|
||||
|
||||
def _run_stack_build_command(self, args: argparse.Namespace) -> None:
|
||||
logger.warning(
|
||||
"The 'llama stack build' command is deprecated and will be removed in a future release. Please use 'llama stack list-deps'"
|
||||
)
|
||||
# always keep implementation completely silo-ed away from CLI so CLI
|
||||
# can be fast to load and reduces dependencies
|
||||
from ._build import run_stack_build_command
|
||||
|
||||
return run_stack_build_command(args)
|
|
@ -11,7 +11,6 @@ from llama_stack.cli.stack.list_stacks import StackListBuilds
|
|||
from llama_stack.cli.stack.utils import print_subcommand_description
|
||||
from llama_stack.cli.subcommand import Subcommand
|
||||
|
||||
from .build import StackBuild
|
||||
from .list_apis import StackListApis
|
||||
from .list_deps import StackListDeps
|
||||
from .list_providers import StackListProviders
|
||||
|
@ -41,7 +40,6 @@ class StackParser(Subcommand):
|
|||
|
||||
# Add sub-commands
|
||||
StackListDeps.create(subparsers)
|
||||
StackBuild.create(subparsers)
|
||||
StackListApis.create(subparsers)
|
||||
StackListProviders.create(subparsers)
|
||||
StackRun.create(subparsers)
|
||||
|
|
|
@ -41,7 +41,7 @@ class AccessRule(BaseModel):
|
|||
A rule defines a list of action either to permit or to forbid. It may specify a
|
||||
principal or a resource that must match for the rule to take effect. The resource
|
||||
to match should be specified in the form of a type qualified identifier, e.g.
|
||||
model::my-model or vector_db::some-db, or a wildcard for all resources of a type,
|
||||
model::my-model or vector_store::some-db, or a wildcard for all resources of a type,
|
||||
e.g. model::*. If the principal or resource are not specified, they will match all
|
||||
requests.
|
||||
|
||||
|
@ -79,9 +79,9 @@ class AccessRule(BaseModel):
|
|||
description: any user has read access to any resource created by a member of their team
|
||||
- forbid:
|
||||
actions: [create, read, delete]
|
||||
resource: vector_db::*
|
||||
resource: vector_store::*
|
||||
unless: user with admin in roles
|
||||
description: only user with admin role can use vector_db resources
|
||||
description: only user with admin role can use vector_store resources
|
||||
|
||||
"""
|
||||
|
||||
|
|
|
@ -1,410 +0,0 @@
|
|||
#!/usr/bin/env bash
|
||||
|
||||
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
# All rights reserved.
|
||||
#
|
||||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
|
||||
LLAMA_STACK_DIR=${LLAMA_STACK_DIR:-}
|
||||
LLAMA_STACK_CLIENT_DIR=${LLAMA_STACK_CLIENT_DIR:-}
|
||||
|
||||
TEST_PYPI_VERSION=${TEST_PYPI_VERSION:-}
|
||||
PYPI_VERSION=${PYPI_VERSION:-}
|
||||
BUILD_PLATFORM=${BUILD_PLATFORM:-}
|
||||
# This timeout (in seconds) is necessary when installing PyTorch via uv since it's likely to time out
|
||||
# Reference: https://github.com/astral-sh/uv/pull/1694
|
||||
UV_HTTP_TIMEOUT=${UV_HTTP_TIMEOUT:-500}
|
||||
|
||||
# mounting is not supported by docker buildx, so we use COPY instead
|
||||
USE_COPY_NOT_MOUNT=${USE_COPY_NOT_MOUNT:-}
|
||||
# Path to the run.yaml file in the container
|
||||
RUN_CONFIG_PATH=/app/run.yaml
|
||||
|
||||
BUILD_CONTEXT_DIR=$(pwd)
|
||||
|
||||
set -euo pipefail
|
||||
|
||||
# Define color codes
|
||||
RED='\033[0;31m'
|
||||
NC='\033[0m' # No Color
|
||||
|
||||
# Usage function
|
||||
usage() {
|
||||
echo "Usage: $0 --image-name <image_name> --container-base <container_base> --normal-deps <pip_dependencies> [--run-config <run_config>] [--external-provider-deps <external_provider_deps>] [--optional-deps <special_pip_deps>]"
|
||||
echo "Example: $0 --image-name llama-stack-img --container-base python:3.12-slim --normal-deps 'numpy pandas' --run-config ./run.yaml --external-provider-deps 'foo' --optional-deps 'bar'"
|
||||
exit 1
|
||||
}
|
||||
|
||||
# Parse arguments
|
||||
image_name=""
|
||||
container_base=""
|
||||
normal_deps=""
|
||||
external_provider_deps=""
|
||||
optional_deps=""
|
||||
run_config=""
|
||||
distro_or_config=""
|
||||
|
||||
while [[ $# -gt 0 ]]; do
|
||||
key="$1"
|
||||
case "$key" in
|
||||
--image-name)
|
||||
if [[ -z "$2" || "$2" == --* ]]; then
|
||||
echo "Error: --image-name requires a string value" >&2
|
||||
usage
|
||||
fi
|
||||
image_name="$2"
|
||||
shift 2
|
||||
;;
|
||||
--container-base)
|
||||
if [[ -z "$2" || "$2" == --* ]]; then
|
||||
echo "Error: --container-base requires a string value" >&2
|
||||
usage
|
||||
fi
|
||||
container_base="$2"
|
||||
shift 2
|
||||
;;
|
||||
--normal-deps)
|
||||
if [[ -z "$2" || "$2" == --* ]]; then
|
||||
echo "Error: --normal-deps requires a string value" >&2
|
||||
usage
|
||||
fi
|
||||
normal_deps="$2"
|
||||
shift 2
|
||||
;;
|
||||
--external-provider-deps)
|
||||
if [[ -z "$2" || "$2" == --* ]]; then
|
||||
echo "Error: --external-provider-deps requires a string value" >&2
|
||||
usage
|
||||
fi
|
||||
external_provider_deps="$2"
|
||||
shift 2
|
||||
;;
|
||||
--optional-deps)
|
||||
if [[ -z "$2" || "$2" == --* ]]; then
|
||||
echo "Error: --optional-deps requires a string value" >&2
|
||||
usage
|
||||
fi
|
||||
optional_deps="$2"
|
||||
shift 2
|
||||
;;
|
||||
--run-config)
|
||||
if [[ -z "$2" || "$2" == --* ]]; then
|
||||
echo "Error: --run-config requires a string value" >&2
|
||||
usage
|
||||
fi
|
||||
run_config="$2"
|
||||
shift 2
|
||||
;;
|
||||
--distro-or-config)
|
||||
if [[ -z "$2" || "$2" == --* ]]; then
|
||||
echo "Error: --distro-or-config requires a string value" >&2
|
||||
usage
|
||||
fi
|
||||
distro_or_config="$2"
|
||||
shift 2
|
||||
;;
|
||||
*)
|
||||
echo "Unknown option: $1" >&2
|
||||
usage
|
||||
;;
|
||||
esac
|
||||
done
|
||||
|
||||
# Check required arguments
|
||||
if [[ -z "$image_name" || -z "$container_base" || -z "$normal_deps" ]]; then
|
||||
echo "Error: --image-name, --container-base, and --normal-deps are required." >&2
|
||||
usage
|
||||
fi
|
||||
|
||||
CONTAINER_BINARY=${CONTAINER_BINARY:-docker}
|
||||
CONTAINER_OPTS=${CONTAINER_OPTS:---progress=plain}
|
||||
TEMP_DIR=$(mktemp -d)
|
||||
SCRIPT_DIR=$(dirname "$(readlink -f "$0")")
|
||||
source "$SCRIPT_DIR/common.sh"
|
||||
|
||||
add_to_container() {
|
||||
output_file="$TEMP_DIR/Containerfile"
|
||||
if [ -t 0 ]; then
|
||||
printf '%s\n' "$1" >>"$output_file"
|
||||
else
|
||||
cat >>"$output_file"
|
||||
fi
|
||||
}
|
||||
|
||||
if ! is_command_available "$CONTAINER_BINARY"; then
|
||||
printf "${RED}Error: ${CONTAINER_BINARY} command not found. Is ${CONTAINER_BINARY} installed and in your PATH?${NC}" >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
if [[ $container_base == *"registry.access.redhat.com/ubi9"* ]]; then
|
||||
add_to_container << EOF
|
||||
FROM $container_base
|
||||
WORKDIR /app
|
||||
|
||||
# We install the Python 3.12 dev headers and build tools so that any
|
||||
# C-extension wheels (e.g. polyleven, faiss-cpu) can compile successfully.
|
||||
|
||||
RUN dnf -y update && dnf install -y iputils git net-tools wget \
|
||||
vim-minimal python3.12 python3.12-pip python3.12-wheel \
|
||||
python3.12-setuptools python3.12-devel gcc gcc-c++ make && \
|
||||
ln -s /bin/pip3.12 /bin/pip && ln -s /bin/python3.12 /bin/python && dnf clean all
|
||||
|
||||
ENV UV_SYSTEM_PYTHON=1
|
||||
RUN pip install uv
|
||||
EOF
|
||||
else
|
||||
add_to_container << EOF
|
||||
FROM $container_base
|
||||
WORKDIR /app
|
||||
|
||||
RUN apt-get update && apt-get install -y \
|
||||
iputils-ping net-tools iproute2 dnsutils telnet \
|
||||
curl wget telnet git\
|
||||
procps psmisc lsof \
|
||||
traceroute \
|
||||
bubblewrap \
|
||||
gcc g++ \
|
||||
&& rm -rf /var/lib/apt/lists/*
|
||||
|
||||
ENV UV_SYSTEM_PYTHON=1
|
||||
RUN pip install uv
|
||||
EOF
|
||||
fi
|
||||
|
||||
# Add pip dependencies first since llama-stack is what will change most often
|
||||
# so we can reuse layers.
|
||||
if [ -n "$normal_deps" ]; then
|
||||
read -ra pip_args <<< "$normal_deps"
|
||||
quoted_deps=$(printf " %q" "${pip_args[@]}")
|
||||
add_to_container << EOF
|
||||
RUN uv pip install --no-cache $quoted_deps
|
||||
EOF
|
||||
fi
|
||||
|
||||
if [ -n "$optional_deps" ]; then
|
||||
IFS='#' read -ra parts <<<"$optional_deps"
|
||||
for part in "${parts[@]}"; do
|
||||
read -ra pip_args <<< "$part"
|
||||
quoted_deps=$(printf " %q" "${pip_args[@]}")
|
||||
add_to_container <<EOF
|
||||
RUN uv pip install --no-cache $quoted_deps
|
||||
EOF
|
||||
done
|
||||
fi
|
||||
|
||||
if [ -n "$external_provider_deps" ]; then
|
||||
IFS='#' read -ra parts <<<"$external_provider_deps"
|
||||
for part in "${parts[@]}"; do
|
||||
read -ra pip_args <<< "$part"
|
||||
quoted_deps=$(printf " %q" "${pip_args[@]}")
|
||||
add_to_container <<EOF
|
||||
RUN uv pip install --no-cache $quoted_deps
|
||||
EOF
|
||||
add_to_container <<EOF
|
||||
RUN python3 - <<PYTHON | uv pip install --no-cache -r -
|
||||
import importlib
|
||||
import sys
|
||||
|
||||
try:
|
||||
package_name = '$part'.split('==')[0].split('>=')[0].split('<=')[0].split('!=')[0].split('<')[0].split('>')[0]
|
||||
module = importlib.import_module(f'{package_name}.provider')
|
||||
spec = module.get_provider_spec()
|
||||
if hasattr(spec, 'pip_packages') and spec.pip_packages:
|
||||
if isinstance(spec.pip_packages, (list, tuple)):
|
||||
print('\n'.join(spec.pip_packages))
|
||||
except Exception as e:
|
||||
print(f'Error getting provider spec for {package_name}: {e}', file=sys.stderr)
|
||||
PYTHON
|
||||
EOF
|
||||
done
|
||||
fi
|
||||
|
||||
get_python_cmd() {
|
||||
if is_command_available python; then
|
||||
echo "python"
|
||||
elif is_command_available python3; then
|
||||
echo "python3"
|
||||
else
|
||||
echo "Error: Neither python nor python3 is installed. Please install Python to continue." >&2
|
||||
exit 1
|
||||
fi
|
||||
}
|
||||
|
||||
if [ -n "$run_config" ]; then
|
||||
# Copy the run config to the build context since it's an absolute path
|
||||
cp "$run_config" "$BUILD_CONTEXT_DIR/run.yaml"
|
||||
|
||||
# Parse the run.yaml configuration to identify external provider directories
|
||||
# If external providers are specified, copy their directory to the container
|
||||
# and update the configuration to reference the new container path
|
||||
python_cmd=$(get_python_cmd)
|
||||
external_providers_dir=$($python_cmd -c "import yaml; config = yaml.safe_load(open('$run_config')); print(config.get('external_providers_dir') or '')")
|
||||
external_providers_dir=$(eval echo "$external_providers_dir")
|
||||
if [ -n "$external_providers_dir" ]; then
|
||||
if [ -d "$external_providers_dir" ]; then
|
||||
echo "Copying external providers directory: $external_providers_dir"
|
||||
cp -r "$external_providers_dir" "$BUILD_CONTEXT_DIR/providers.d"
|
||||
add_to_container << EOF
|
||||
COPY providers.d /.llama/providers.d
|
||||
EOF
|
||||
fi
|
||||
|
||||
# Edit the run.yaml file to change the external_providers_dir to /.llama/providers.d
|
||||
if [ "$(uname)" = "Darwin" ]; then
|
||||
sed -i.bak -e 's|external_providers_dir:.*|external_providers_dir: /.llama/providers.d|' "$BUILD_CONTEXT_DIR/run.yaml"
|
||||
rm -f "$BUILD_CONTEXT_DIR/run.yaml.bak"
|
||||
else
|
||||
sed -i 's|external_providers_dir:.*|external_providers_dir: /.llama/providers.d|' "$BUILD_CONTEXT_DIR/run.yaml"
|
||||
fi
|
||||
fi
|
||||
|
||||
# Copy run config into docker image
|
||||
add_to_container << EOF
|
||||
COPY run.yaml $RUN_CONFIG_PATH
|
||||
EOF
|
||||
fi
|
||||
|
||||
stack_mount="/app/llama-stack-source"
|
||||
client_mount="/app/llama-stack-client-source"
|
||||
|
||||
install_local_package() {
|
||||
local dir="$1"
|
||||
local mount_point="$2"
|
||||
local name="$3"
|
||||
|
||||
if [ ! -d "$dir" ]; then
|
||||
echo "${RED}Warning: $name is set but directory does not exist: $dir${NC}" >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
if [ "$USE_COPY_NOT_MOUNT" = "true" ]; then
|
||||
add_to_container << EOF
|
||||
COPY $dir $mount_point
|
||||
EOF
|
||||
fi
|
||||
add_to_container << EOF
|
||||
RUN uv pip install --no-cache -e $mount_point
|
||||
EOF
|
||||
}
|
||||
|
||||
|
||||
if [ -n "$LLAMA_STACK_CLIENT_DIR" ]; then
|
||||
install_local_package "$LLAMA_STACK_CLIENT_DIR" "$client_mount" "LLAMA_STACK_CLIENT_DIR"
|
||||
fi
|
||||
|
||||
if [ -n "$LLAMA_STACK_DIR" ]; then
|
||||
install_local_package "$LLAMA_STACK_DIR" "$stack_mount" "LLAMA_STACK_DIR"
|
||||
else
|
||||
if [ -n "$TEST_PYPI_VERSION" ]; then
|
||||
# these packages are damaged in test-pypi, so install them first
|
||||
add_to_container << EOF
|
||||
RUN uv pip install --no-cache fastapi libcst
|
||||
EOF
|
||||
add_to_container << EOF
|
||||
RUN uv pip install --no-cache --extra-index-url https://test.pypi.org/simple/ \
|
||||
--index-strategy unsafe-best-match \
|
||||
llama-stack==$TEST_PYPI_VERSION
|
||||
|
||||
EOF
|
||||
else
|
||||
if [ -n "$PYPI_VERSION" ]; then
|
||||
SPEC_VERSION="llama-stack==${PYPI_VERSION}"
|
||||
else
|
||||
SPEC_VERSION="llama-stack"
|
||||
fi
|
||||
add_to_container << EOF
|
||||
RUN uv pip install --no-cache $SPEC_VERSION
|
||||
EOF
|
||||
fi
|
||||
fi
|
||||
|
||||
# remove uv after installation
|
||||
add_to_container << EOF
|
||||
RUN pip uninstall -y uv
|
||||
EOF
|
||||
|
||||
# If a run config is provided, we use the llama stack CLI
|
||||
if [[ -n "$run_config" ]]; then
|
||||
add_to_container << EOF
|
||||
ENTRYPOINT ["llama", "stack", "run", "$RUN_CONFIG_PATH"]
|
||||
EOF
|
||||
elif [[ "$distro_or_config" != *.yaml ]]; then
|
||||
add_to_container << EOF
|
||||
ENTRYPOINT ["llama", "stack", "run", "$distro_or_config"]
|
||||
EOF
|
||||
fi
|
||||
|
||||
# Add other require item commands genearic to all containers
|
||||
add_to_container << EOF
|
||||
|
||||
RUN mkdir -p /.llama /.cache && chmod -R g+rw /.llama /.cache && (chmod -R g+rw /app 2>/dev/null || true)
|
||||
EOF
|
||||
|
||||
printf "Containerfile created successfully in %s/Containerfile\n\n" "$TEMP_DIR"
|
||||
cat "$TEMP_DIR"/Containerfile
|
||||
printf "\n"
|
||||
|
||||
# Start building the CLI arguments
|
||||
CLI_ARGS=()
|
||||
|
||||
# Read CONTAINER_OPTS and put it in an array
|
||||
read -ra CLI_ARGS <<< "$CONTAINER_OPTS"
|
||||
|
||||
if [ "$USE_COPY_NOT_MOUNT" != "true" ]; then
|
||||
if [ -n "$LLAMA_STACK_DIR" ]; then
|
||||
CLI_ARGS+=("-v" "$(readlink -f "$LLAMA_STACK_DIR"):$stack_mount")
|
||||
fi
|
||||
if [ -n "$LLAMA_STACK_CLIENT_DIR" ]; then
|
||||
CLI_ARGS+=("-v" "$(readlink -f "$LLAMA_STACK_CLIENT_DIR"):$client_mount")
|
||||
fi
|
||||
fi
|
||||
|
||||
if is_command_available selinuxenabled && selinuxenabled; then
|
||||
# Disable SELinux labels -- we don't want to relabel the llama-stack source dir
|
||||
CLI_ARGS+=("--security-opt" "label=disable")
|
||||
fi
|
||||
|
||||
# Set version tag based on PyPI version
|
||||
if [ -n "$PYPI_VERSION" ]; then
|
||||
version_tag="$PYPI_VERSION"
|
||||
elif [ -n "$TEST_PYPI_VERSION" ]; then
|
||||
version_tag="test-$TEST_PYPI_VERSION"
|
||||
elif [[ -n "$LLAMA_STACK_DIR" || -n "$LLAMA_STACK_CLIENT_DIR" ]]; then
|
||||
version_tag="dev"
|
||||
else
|
||||
URL="https://pypi.org/pypi/llama-stack/json"
|
||||
version_tag=$(curl -s $URL | jq -r '.info.version')
|
||||
fi
|
||||
|
||||
# Add version tag to image name
|
||||
image_tag="$image_name:$version_tag"
|
||||
|
||||
# Detect platform architecture
|
||||
ARCH=$(uname -m)
|
||||
if [ -n "$BUILD_PLATFORM" ]; then
|
||||
CLI_ARGS+=("--platform" "$BUILD_PLATFORM")
|
||||
elif [ "$ARCH" = "arm64" ] || [ "$ARCH" = "aarch64" ]; then
|
||||
CLI_ARGS+=("--platform" "linux/arm64")
|
||||
elif [ "$ARCH" = "x86_64" ]; then
|
||||
CLI_ARGS+=("--platform" "linux/amd64")
|
||||
else
|
||||
echo "Unsupported architecture: $ARCH"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
echo "PWD: $(pwd)"
|
||||
echo "Containerfile: $TEMP_DIR/Containerfile"
|
||||
set -x
|
||||
|
||||
$CONTAINER_BINARY build \
|
||||
"${CLI_ARGS[@]}" \
|
||||
-t "$image_tag" \
|
||||
-f "$TEMP_DIR/Containerfile" \
|
||||
"$BUILD_CONTEXT_DIR"
|
||||
|
||||
# clean up tmp/configs
|
||||
rm -rf "$BUILD_CONTEXT_DIR/run.yaml" "$TEMP_DIR"
|
||||
set +x
|
||||
|
||||
echo "Success!"
|
|
@ -1,220 +0,0 @@
|
|||
#!/bin/bash
|
||||
|
||||
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
# All rights reserved.
|
||||
#
|
||||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
|
||||
LLAMA_STACK_DIR=${LLAMA_STACK_DIR:-}
|
||||
LLAMA_STACK_CLIENT_DIR=${LLAMA_STACK_CLIENT_DIR:-}
|
||||
TEST_PYPI_VERSION=${TEST_PYPI_VERSION:-}
|
||||
# This timeout (in seconds) is necessary when installing PyTorch via uv since it's likely to time out
|
||||
# Reference: https://github.com/astral-sh/uv/pull/1694
|
||||
UV_HTTP_TIMEOUT=${UV_HTTP_TIMEOUT:-500}
|
||||
UV_SYSTEM_PYTHON=${UV_SYSTEM_PYTHON:-}
|
||||
VIRTUAL_ENV=${VIRTUAL_ENV:-}
|
||||
|
||||
set -euo pipefail
|
||||
|
||||
# Define color codes
|
||||
RED='\033[0;31m'
|
||||
NC='\033[0m' # No Color
|
||||
|
||||
SCRIPT_DIR=$(dirname "$(readlink -f "$0")")
|
||||
source "$SCRIPT_DIR/common.sh"
|
||||
|
||||
# Usage function
|
||||
usage() {
|
||||
echo "Usage: $0 --env-name <env_name> --normal-deps <pip_dependencies> [--external-provider-deps <external_provider_deps>] [--optional-deps <special_pip_deps>]"
|
||||
echo "Example: $0 --env-name mybuild --normal-deps 'numpy pandas scipy' --external-provider-deps 'foo' --optional-deps 'bar'"
|
||||
exit 1
|
||||
}
|
||||
|
||||
# Parse arguments
|
||||
env_name=""
|
||||
normal_deps=""
|
||||
external_provider_deps=""
|
||||
optional_deps=""
|
||||
|
||||
while [[ $# -gt 0 ]]; do
|
||||
key="$1"
|
||||
case "$key" in
|
||||
--env-name)
|
||||
if [[ -z "$2" || "$2" == --* ]]; then
|
||||
echo "Error: --env-name requires a string value" >&2
|
||||
usage
|
||||
fi
|
||||
env_name="$2"
|
||||
shift 2
|
||||
;;
|
||||
--normal-deps)
|
||||
if [[ -z "$2" || "$2" == --* ]]; then
|
||||
echo "Error: --normal-deps requires a string value" >&2
|
||||
usage
|
||||
fi
|
||||
normal_deps="$2"
|
||||
shift 2
|
||||
;;
|
||||
--external-provider-deps)
|
||||
if [[ -z "$2" || "$2" == --* ]]; then
|
||||
echo "Error: --external-provider-deps requires a string value" >&2
|
||||
usage
|
||||
fi
|
||||
external_provider_deps="$2"
|
||||
shift 2
|
||||
;;
|
||||
--optional-deps)
|
||||
if [[ -z "$2" || "$2" == --* ]]; then
|
||||
echo "Error: --optional-deps requires a string value" >&2
|
||||
usage
|
||||
fi
|
||||
optional_deps="$2"
|
||||
shift 2
|
||||
;;
|
||||
*)
|
||||
echo "Unknown option: $1" >&2
|
||||
usage
|
||||
;;
|
||||
esac
|
||||
done
|
||||
|
||||
# Check required arguments
|
||||
if [[ -z "$env_name" || -z "$normal_deps" ]]; then
|
||||
echo "Error: --env-name and --normal-deps are required." >&2
|
||||
usage
|
||||
fi
|
||||
|
||||
if [ -n "$LLAMA_STACK_DIR" ]; then
|
||||
echo "Using llama-stack-dir=$LLAMA_STACK_DIR"
|
||||
fi
|
||||
if [ -n "$LLAMA_STACK_CLIENT_DIR" ]; then
|
||||
echo "Using llama-stack-client-dir=$LLAMA_STACK_CLIENT_DIR"
|
||||
fi
|
||||
|
||||
ENVNAME=""
|
||||
|
||||
# pre-run checks to make sure we can proceed with the installation
|
||||
pre_run_checks() {
|
||||
local env_name="$1"
|
||||
|
||||
if ! is_command_available uv; then
|
||||
echo "uv is not installed, trying to install it."
|
||||
if ! is_command_available pip; then
|
||||
echo "pip is not installed, cannot automatically install 'uv'."
|
||||
echo "Follow this link to install it:"
|
||||
echo "https://docs.astral.sh/uv/getting-started/installation/"
|
||||
exit 1
|
||||
else
|
||||
pip install uv
|
||||
fi
|
||||
fi
|
||||
|
||||
# checking if an environment with the same name already exists
|
||||
if [ -d "$env_name" ]; then
|
||||
echo "Environment '$env_name' already exists, re-using it."
|
||||
fi
|
||||
}
|
||||
|
||||
run() {
|
||||
# Use only global variables set by flag parser
|
||||
if [ -n "$UV_SYSTEM_PYTHON" ] || [ "$env_name" == "__system__" ]; then
|
||||
echo "Installing dependencies in system Python environment"
|
||||
export UV_SYSTEM_PYTHON=1
|
||||
elif [ "$VIRTUAL_ENV" == "$env_name" ]; then
|
||||
echo "Virtual environment $env_name is already active"
|
||||
else
|
||||
echo "Using virtual environment $env_name"
|
||||
uv venv "$env_name"
|
||||
source "$env_name/bin/activate"
|
||||
fi
|
||||
|
||||
if [ -n "$TEST_PYPI_VERSION" ]; then
|
||||
uv pip install fastapi libcst
|
||||
uv pip install --extra-index-url https://test.pypi.org/simple/ \
|
||||
--index-strategy unsafe-best-match \
|
||||
llama-stack=="$TEST_PYPI_VERSION" \
|
||||
$normal_deps
|
||||
if [ -n "$optional_deps" ]; then
|
||||
IFS='#' read -ra parts <<<"$optional_deps"
|
||||
for part in "${parts[@]}"; do
|
||||
echo "$part"
|
||||
uv pip install $part
|
||||
done
|
||||
fi
|
||||
if [ -n "$external_provider_deps" ]; then
|
||||
IFS='#' read -ra parts <<<"$external_provider_deps"
|
||||
for part in "${parts[@]}"; do
|
||||
echo "$part"
|
||||
uv pip install "$part"
|
||||
done
|
||||
fi
|
||||
else
|
||||
if [ -n "$LLAMA_STACK_DIR" ]; then
|
||||
# only warn if DIR does not start with "git+"
|
||||
if [ ! -d "$LLAMA_STACK_DIR" ] && [[ "$LLAMA_STACK_DIR" != git+* ]]; then
|
||||
printf "${RED}Warning: LLAMA_STACK_DIR is set but directory does not exist: %s${NC}\n" "$LLAMA_STACK_DIR" >&2
|
||||
exit 1
|
||||
fi
|
||||
printf "Installing from LLAMA_STACK_DIR: %s\n" "$LLAMA_STACK_DIR"
|
||||
# editable only if LLAMA_STACK_DIR does not start with "git+"
|
||||
if [[ "$LLAMA_STACK_DIR" != git+* ]]; then
|
||||
EDITABLE="-e"
|
||||
else
|
||||
EDITABLE=""
|
||||
fi
|
||||
uv pip install --no-cache-dir $EDITABLE "$LLAMA_STACK_DIR"
|
||||
else
|
||||
uv pip install --no-cache-dir llama-stack
|
||||
fi
|
||||
|
||||
if [ -n "$LLAMA_STACK_CLIENT_DIR" ]; then
|
||||
# only warn if DIR does not start with "git+"
|
||||
if [ ! -d "$LLAMA_STACK_CLIENT_DIR" ] && [[ "$LLAMA_STACK_CLIENT_DIR" != git+* ]]; then
|
||||
printf "${RED}Warning: LLAMA_STACK_CLIENT_DIR is set but directory does not exist: %s${NC}\n" "$LLAMA_STACK_CLIENT_DIR" >&2
|
||||
exit 1
|
||||
fi
|
||||
printf "Installing from LLAMA_STACK_CLIENT_DIR: %s\n" "$LLAMA_STACK_CLIENT_DIR"
|
||||
# editable only if LLAMA_STACK_CLIENT_DIR does not start with "git+"
|
||||
if [[ "$LLAMA_STACK_CLIENT_DIR" != git+* ]]; then
|
||||
EDITABLE="-e"
|
||||
else
|
||||
EDITABLE=""
|
||||
fi
|
||||
uv pip install --no-cache-dir $EDITABLE "$LLAMA_STACK_CLIENT_DIR"
|
||||
fi
|
||||
|
||||
printf "Installing pip dependencies\n"
|
||||
uv pip install $normal_deps
|
||||
if [ -n "$optional_deps" ]; then
|
||||
IFS='#' read -ra parts <<<"$optional_deps"
|
||||
for part in "${parts[@]}"; do
|
||||
echo "Installing special provider module: $part"
|
||||
uv pip install $part
|
||||
done
|
||||
fi
|
||||
if [ -n "$external_provider_deps" ]; then
|
||||
IFS='#' read -ra parts <<<"$external_provider_deps"
|
||||
for part in "${parts[@]}"; do
|
||||
echo "Installing external provider module: $part"
|
||||
uv pip install "$part"
|
||||
echo "Getting provider spec for module: $part and installing dependencies"
|
||||
package_name=$(echo "$part" | sed 's/[<>=!].*//')
|
||||
python3 -c "
|
||||
import importlib
|
||||
import sys
|
||||
try:
|
||||
module = importlib.import_module(f'$package_name.provider')
|
||||
spec = module.get_provider_spec()
|
||||
if hasattr(spec, 'pip_packages') and spec.pip_packages:
|
||||
print('\\n'.join(spec.pip_packages))
|
||||
except Exception as e:
|
||||
print(f'Error getting provider spec for $package_name: {e}', file=sys.stderr)
|
||||
" | uv pip install -r -
|
||||
done
|
||||
fi
|
||||
fi
|
||||
}
|
||||
|
||||
pre_run_checks "$env_name"
|
||||
run
|
|
@ -23,8 +23,8 @@ from llama_stack.apis.scoring import Scoring
|
|||
from llama_stack.apis.scoring_functions import ScoringFn, ScoringFnInput
|
||||
from llama_stack.apis.shields import Shield, ShieldInput
|
||||
from llama_stack.apis.tools import ToolGroup, ToolGroupInput, ToolRuntime
|
||||
from llama_stack.apis.vector_dbs import VectorDB, VectorDBInput
|
||||
from llama_stack.apis.vector_io import VectorIO
|
||||
from llama_stack.apis.vector_stores import VectorStore, VectorStoreInput
|
||||
from llama_stack.core.access_control.datatypes import AccessRule
|
||||
from llama_stack.core.storage.datatypes import (
|
||||
KVStoreReference,
|
||||
|
@ -71,7 +71,7 @@ class ShieldWithOwner(Shield, ResourceWithOwner):
|
|||
pass
|
||||
|
||||
|
||||
class VectorDBWithOwner(VectorDB, ResourceWithOwner):
|
||||
class VectorStoreWithOwner(VectorStore, ResourceWithOwner):
|
||||
pass
|
||||
|
||||
|
||||
|
@ -91,12 +91,12 @@ class ToolGroupWithOwner(ToolGroup, ResourceWithOwner):
|
|||
pass
|
||||
|
||||
|
||||
RoutableObject = Model | Shield | VectorDB | Dataset | ScoringFn | Benchmark | ToolGroup
|
||||
RoutableObject = Model | Shield | VectorStore | Dataset | ScoringFn | Benchmark | ToolGroup
|
||||
|
||||
RoutableObjectWithProvider = Annotated[
|
||||
ModelWithOwner
|
||||
| ShieldWithOwner
|
||||
| VectorDBWithOwner
|
||||
| VectorStoreWithOwner
|
||||
| DatasetWithOwner
|
||||
| ScoringFnWithOwner
|
||||
| BenchmarkWithOwner
|
||||
|
@ -427,7 +427,7 @@ class RegisteredResources(BaseModel):
|
|||
|
||||
models: list[ModelInput] = Field(default_factory=list)
|
||||
shields: list[ShieldInput] = Field(default_factory=list)
|
||||
vector_dbs: list[VectorDBInput] = Field(default_factory=list)
|
||||
vector_stores: list[VectorStoreInput] = Field(default_factory=list)
|
||||
datasets: list[DatasetInput] = Field(default_factory=list)
|
||||
scoring_fns: list[ScoringFnInput] = Field(default_factory=list)
|
||||
benchmarks: list[BenchmarkInput] = Field(default_factory=list)
|
||||
|
|
|
@ -64,7 +64,7 @@ def builtin_automatically_routed_apis() -> list[AutoRoutedApiInfo]:
|
|||
router_api=Api.tool_runtime,
|
||||
),
|
||||
AutoRoutedApiInfo(
|
||||
routing_table_api=Api.vector_dbs,
|
||||
routing_table_api=Api.vector_stores,
|
||||
router_api=Api.vector_io,
|
||||
),
|
||||
]
|
||||
|
|
|
@ -278,7 +278,7 @@ class AsyncLlamaStackAsLibraryClient(AsyncLlamaStackClient):
|
|||
else:
|
||||
prefix = "!" if in_notebook() else ""
|
||||
cprint(
|
||||
f"Please run:\n\n{prefix}llama stack build --distro {self.config_path_or_distro_name} --image-type venv\n\n",
|
||||
f"Please run:\n\n{prefix}llama stack list-deps {self.config_path_or_distro_name} | xargs -L1 uv pip install\n\n",
|
||||
"yellow",
|
||||
file=sys.stderr,
|
||||
)
|
||||
|
|
|
@ -29,8 +29,8 @@ from llama_stack.apis.scoring_functions import ScoringFunctions
|
|||
from llama_stack.apis.shields import Shields
|
||||
from llama_stack.apis.telemetry import Telemetry
|
||||
from llama_stack.apis.tools import ToolGroups, ToolRuntime
|
||||
from llama_stack.apis.vector_dbs import VectorDBs
|
||||
from llama_stack.apis.vector_io import VectorIO
|
||||
from llama_stack.apis.vector_stores import VectorStore
|
||||
from llama_stack.apis.version import LLAMA_STACK_API_V1ALPHA
|
||||
from llama_stack.core.client import get_client_impl
|
||||
from llama_stack.core.datatypes import (
|
||||
|
@ -82,7 +82,7 @@ def api_protocol_map(external_apis: dict[Api, ExternalApiSpec] | None = None) ->
|
|||
Api.inspect: Inspect,
|
||||
Api.batches: Batches,
|
||||
Api.vector_io: VectorIO,
|
||||
Api.vector_dbs: VectorDBs,
|
||||
Api.vector_stores: VectorStore,
|
||||
Api.models: Models,
|
||||
Api.safety: Safety,
|
||||
Api.shields: Shields,
|
||||
|
|
|
@ -29,7 +29,7 @@ async def get_routing_table_impl(
|
|||
from ..routing_tables.scoring_functions import ScoringFunctionsRoutingTable
|
||||
from ..routing_tables.shields import ShieldsRoutingTable
|
||||
from ..routing_tables.toolgroups import ToolGroupsRoutingTable
|
||||
from ..routing_tables.vector_dbs import VectorDBsRoutingTable
|
||||
from ..routing_tables.vector_stores import VectorStoresRoutingTable
|
||||
|
||||
api_to_tables = {
|
||||
"models": ModelsRoutingTable,
|
||||
|
@ -38,7 +38,7 @@ async def get_routing_table_impl(
|
|||
"scoring_functions": ScoringFunctionsRoutingTable,
|
||||
"benchmarks": BenchmarksRoutingTable,
|
||||
"tool_groups": ToolGroupsRoutingTable,
|
||||
"vector_dbs": VectorDBsRoutingTable,
|
||||
"vector_stores": VectorStoresRoutingTable,
|
||||
}
|
||||
|
||||
if api.value not in api_to_tables:
|
||||
|
|
|
@ -8,16 +8,8 @@ from typing import Any
|
|||
|
||||
from llama_stack.apis.common.content_types import (
|
||||
URL,
|
||||
InterleavedContent,
|
||||
)
|
||||
from llama_stack.apis.tools import (
|
||||
ListToolDefsResponse,
|
||||
RAGDocument,
|
||||
RAGQueryConfig,
|
||||
RAGQueryResult,
|
||||
RAGToolRuntime,
|
||||
ToolRuntime,
|
||||
)
|
||||
from llama_stack.apis.tools import ListToolDefsResponse, ToolRuntime
|
||||
from llama_stack.log import get_logger
|
||||
|
||||
from ..routing_tables.toolgroups import ToolGroupsRoutingTable
|
||||
|
@ -26,36 +18,6 @@ logger = get_logger(name=__name__, category="core::routers")
|
|||
|
||||
|
||||
class ToolRuntimeRouter(ToolRuntime):
|
||||
class RagToolImpl(RAGToolRuntime):
|
||||
def __init__(
|
||||
self,
|
||||
routing_table: ToolGroupsRoutingTable,
|
||||
) -> None:
|
||||
logger.debug("Initializing ToolRuntimeRouter.RagToolImpl")
|
||||
self.routing_table = routing_table
|
||||
|
||||
async def query(
|
||||
self,
|
||||
content: InterleavedContent,
|
||||
vector_db_ids: list[str],
|
||||
query_config: RAGQueryConfig | None = None,
|
||||
) -> RAGQueryResult:
|
||||
logger.debug(f"ToolRuntimeRouter.RagToolImpl.query: {vector_db_ids}")
|
||||
provider = await self.routing_table.get_provider_impl("knowledge_search")
|
||||
return await provider.query(content, vector_db_ids, query_config)
|
||||
|
||||
async def insert(
|
||||
self,
|
||||
documents: list[RAGDocument],
|
||||
vector_db_id: str,
|
||||
chunk_size_in_tokens: int = 512,
|
||||
) -> None:
|
||||
logger.debug(
|
||||
f"ToolRuntimeRouter.RagToolImpl.insert: {vector_db_id}, {len(documents)} documents, chunk_size={chunk_size_in_tokens}"
|
||||
)
|
||||
provider = await self.routing_table.get_provider_impl("insert_into_memory")
|
||||
return await provider.insert(documents, vector_db_id, chunk_size_in_tokens)
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
routing_table: ToolGroupsRoutingTable,
|
||||
|
@ -63,11 +25,6 @@ class ToolRuntimeRouter(ToolRuntime):
|
|||
logger.debug("Initializing ToolRuntimeRouter")
|
||||
self.routing_table = routing_table
|
||||
|
||||
# HACK ALERT this should be in sync with "get_all_api_endpoints()"
|
||||
self.rag_tool = self.RagToolImpl(routing_table)
|
||||
for method in ("query", "insert"):
|
||||
setattr(self, f"rag_tool.{method}", getattr(self.rag_tool, method))
|
||||
|
||||
async def initialize(self) -> None:
|
||||
logger.debug("ToolRuntimeRouter.initialize")
|
||||
pass
|
||||
|
|
|
@ -71,25 +71,6 @@ class VectorIORouter(VectorIO):
|
|||
|
||||
raise ValueError(f"Embedding model '{embedding_model_id}' not found or not an embedding model")
|
||||
|
||||
async def register_vector_db(
|
||||
self,
|
||||
vector_db_id: str,
|
||||
embedding_model: str,
|
||||
embedding_dimension: int | None = 384,
|
||||
provider_id: str | None = None,
|
||||
vector_db_name: str | None = None,
|
||||
provider_vector_db_id: str | None = None,
|
||||
) -> None:
|
||||
logger.debug(f"VectorIORouter.register_vector_db: {vector_db_id}, {embedding_model}")
|
||||
await self.routing_table.register_vector_db(
|
||||
vector_db_id,
|
||||
embedding_model,
|
||||
embedding_dimension,
|
||||
provider_id,
|
||||
vector_db_name,
|
||||
provider_vector_db_id,
|
||||
)
|
||||
|
||||
async def insert_chunks(
|
||||
self,
|
||||
vector_db_id: str,
|
||||
|
@ -165,22 +146,22 @@ class VectorIORouter(VectorIO):
|
|||
else:
|
||||
provider_id = list(self.routing_table.impls_by_provider_id.keys())[0]
|
||||
|
||||
vector_db_id = f"vs_{uuid.uuid4()}"
|
||||
registered_vector_db = await self.routing_table.register_vector_db(
|
||||
vector_db_id=vector_db_id,
|
||||
vector_store_id = f"vs_{uuid.uuid4()}"
|
||||
registered_vector_store = await self.routing_table.register_vector_store(
|
||||
vector_store_id=vector_store_id,
|
||||
embedding_model=embedding_model,
|
||||
embedding_dimension=embedding_dimension,
|
||||
provider_id=provider_id,
|
||||
provider_vector_db_id=vector_db_id,
|
||||
vector_db_name=params.name,
|
||||
provider_vector_store_id=vector_store_id,
|
||||
vector_store_name=params.name,
|
||||
)
|
||||
provider = await self.routing_table.get_provider_impl(registered_vector_db.identifier)
|
||||
provider = await self.routing_table.get_provider_impl(registered_vector_store.identifier)
|
||||
|
||||
# Update model_extra with registered values so provider uses the already-registered vector_db
|
||||
# Update model_extra with registered values so provider uses the already-registered vector_store
|
||||
if params.model_extra is None:
|
||||
params.model_extra = {}
|
||||
params.model_extra["provider_vector_db_id"] = registered_vector_db.provider_resource_id
|
||||
params.model_extra["provider_id"] = registered_vector_db.provider_id
|
||||
params.model_extra["provider_vector_store_id"] = registered_vector_store.provider_resource_id
|
||||
params.model_extra["provider_id"] = registered_vector_store.provider_id
|
||||
if embedding_model is not None:
|
||||
params.model_extra["embedding_model"] = embedding_model
|
||||
if embedding_dimension is not None:
|
||||
|
@ -198,15 +179,15 @@ class VectorIORouter(VectorIO):
|
|||
logger.debug(f"VectorIORouter.openai_list_vector_stores: limit={limit}")
|
||||
# Route to default provider for now - could aggregate from all providers in the future
|
||||
# call retrieve on each vector dbs to get list of vector stores
|
||||
vector_dbs = await self.routing_table.get_all_with_type("vector_db")
|
||||
vector_stores = await self.routing_table.get_all_with_type("vector_store")
|
||||
all_stores = []
|
||||
for vector_db in vector_dbs:
|
||||
for vector_store in vector_stores:
|
||||
try:
|
||||
provider = await self.routing_table.get_provider_impl(vector_db.identifier)
|
||||
vector_store = await provider.openai_retrieve_vector_store(vector_db.identifier)
|
||||
provider = await self.routing_table.get_provider_impl(vector_store.identifier)
|
||||
vector_store = await provider.openai_retrieve_vector_store(vector_store.identifier)
|
||||
all_stores.append(vector_store)
|
||||
except Exception as e:
|
||||
logger.error(f"Error retrieving vector store {vector_db.identifier}: {e}")
|
||||
logger.error(f"Error retrieving vector store {vector_store.identifier}: {e}")
|
||||
continue
|
||||
|
||||
# Sort by created_at
|
||||
|
|
|
@ -41,7 +41,7 @@ async def register_object_with_provider(obj: RoutableObject, p: Any) -> Routable
|
|||
elif api == Api.safety:
|
||||
return await p.register_shield(obj)
|
||||
elif api == Api.vector_io:
|
||||
return await p.register_vector_db(obj)
|
||||
return await p.register_vector_store(obj)
|
||||
elif api == Api.datasetio:
|
||||
return await p.register_dataset(obj)
|
||||
elif api == Api.scoring:
|
||||
|
@ -57,7 +57,7 @@ async def register_object_with_provider(obj: RoutableObject, p: Any) -> Routable
|
|||
async def unregister_object_from_provider(obj: RoutableObject, p: Any) -> None:
|
||||
api = get_impl_api(p)
|
||||
if api == Api.vector_io:
|
||||
return await p.unregister_vector_db(obj.identifier)
|
||||
return await p.unregister_vector_store(obj.identifier)
|
||||
elif api == Api.inference:
|
||||
return await p.unregister_model(obj.identifier)
|
||||
elif api == Api.safety:
|
||||
|
@ -108,7 +108,7 @@ class CommonRoutingTableImpl(RoutingTable):
|
|||
elif api == Api.safety:
|
||||
p.shield_store = self
|
||||
elif api == Api.vector_io:
|
||||
p.vector_db_store = self
|
||||
p.vector_store_store = self
|
||||
elif api == Api.datasetio:
|
||||
p.dataset_store = self
|
||||
elif api == Api.scoring:
|
||||
|
@ -134,15 +134,15 @@ class CommonRoutingTableImpl(RoutingTable):
|
|||
from .scoring_functions import ScoringFunctionsRoutingTable
|
||||
from .shields import ShieldsRoutingTable
|
||||
from .toolgroups import ToolGroupsRoutingTable
|
||||
from .vector_dbs import VectorDBsRoutingTable
|
||||
from .vector_stores import VectorStoresRoutingTable
|
||||
|
||||
def apiname_object():
|
||||
if isinstance(self, ModelsRoutingTable):
|
||||
return ("Inference", "model")
|
||||
elif isinstance(self, ShieldsRoutingTable):
|
||||
return ("Safety", "shield")
|
||||
elif isinstance(self, VectorDBsRoutingTable):
|
||||
return ("VectorIO", "vector_db")
|
||||
elif isinstance(self, VectorStoresRoutingTable):
|
||||
return ("VectorIO", "vector_store")
|
||||
elif isinstance(self, DatasetsRoutingTable):
|
||||
return ("DatasetIO", "dataset")
|
||||
elif isinstance(self, ScoringFunctionsRoutingTable):
|
||||
|
|
|
@ -6,15 +6,12 @@
|
|||
|
||||
from typing import Any
|
||||
|
||||
from pydantic import TypeAdapter
|
||||
|
||||
from llama_stack.apis.common.errors import ModelNotFoundError, ModelTypeError
|
||||
from llama_stack.apis.models import ModelType
|
||||
from llama_stack.apis.resource import ResourceType
|
||||
|
||||
# Removed VectorDBs import to avoid exposing public API
|
||||
# Removed VectorStores import to avoid exposing public API
|
||||
from llama_stack.apis.vector_io.vector_io import (
|
||||
OpenAICreateVectorStoreRequestWithExtraBody,
|
||||
SearchRankingOptions,
|
||||
VectorStoreChunkingStrategy,
|
||||
VectorStoreDeleteResponse,
|
||||
|
@ -26,7 +23,7 @@ from llama_stack.apis.vector_io.vector_io import (
|
|||
VectorStoreSearchResponsePage,
|
||||
)
|
||||
from llama_stack.core.datatypes import (
|
||||
VectorDBWithOwner,
|
||||
VectorStoreWithOwner,
|
||||
)
|
||||
from llama_stack.log import get_logger
|
||||
|
||||
|
@ -35,23 +32,23 @@ from .common import CommonRoutingTableImpl, lookup_model
|
|||
logger = get_logger(name=__name__, category="core::routing_tables")
|
||||
|
||||
|
||||
class VectorDBsRoutingTable(CommonRoutingTableImpl):
|
||||
"""Internal routing table for vector_db operations.
|
||||
class VectorStoresRoutingTable(CommonRoutingTableImpl):
|
||||
"""Internal routing table for vector_store operations.
|
||||
|
||||
Does not inherit from VectorDBs to avoid exposing public API endpoints.
|
||||
Does not inherit from VectorStores to avoid exposing public API endpoints.
|
||||
Only provides internal routing functionality for VectorIORouter.
|
||||
"""
|
||||
|
||||
# Internal methods only - no public API exposure
|
||||
|
||||
async def register_vector_db(
|
||||
async def register_vector_store(
|
||||
self,
|
||||
vector_db_id: str,
|
||||
vector_store_id: str,
|
||||
embedding_model: str,
|
||||
embedding_dimension: int | None = 384,
|
||||
provider_id: str | None = None,
|
||||
provider_vector_db_id: str | None = None,
|
||||
vector_db_name: str | None = None,
|
||||
provider_vector_store_id: str | None = None,
|
||||
vector_store_name: str | None = None,
|
||||
) -> Any:
|
||||
if provider_id is None:
|
||||
if len(self.impls_by_provider_id) > 0:
|
||||
|
@ -67,52 +64,24 @@ class VectorDBsRoutingTable(CommonRoutingTableImpl):
|
|||
raise ModelNotFoundError(embedding_model)
|
||||
if model.model_type != ModelType.embedding:
|
||||
raise ModelTypeError(embedding_model, model.model_type, ModelType.embedding)
|
||||
if "embedding_dimension" not in model.metadata:
|
||||
raise ValueError(f"Model {embedding_model} does not have an embedding dimension")
|
||||
|
||||
try:
|
||||
provider = self.impls_by_provider_id[provider_id]
|
||||
except KeyError:
|
||||
available_providers = list(self.impls_by_provider_id.keys())
|
||||
raise ValueError(
|
||||
f"Provider '{provider_id}' not found in routing table. Available providers: {available_providers}"
|
||||
) from None
|
||||
logger.warning(
|
||||
"VectorDB is being deprecated in future releases in favor of VectorStore. Please migrate your usage accordingly."
|
||||
)
|
||||
request = OpenAICreateVectorStoreRequestWithExtraBody(
|
||||
name=vector_db_name or vector_db_id,
|
||||
embedding_model=embedding_model,
|
||||
embedding_dimension=model.metadata["embedding_dimension"],
|
||||
vector_store = VectorStoreWithOwner(
|
||||
identifier=vector_store_id,
|
||||
type=ResourceType.vector_store.value,
|
||||
provider_id=provider_id,
|
||||
provider_vector_db_id=provider_vector_db_id,
|
||||
provider_resource_id=provider_vector_store_id,
|
||||
embedding_model=embedding_model,
|
||||
embedding_dimension=embedding_dimension,
|
||||
vector_store_name=vector_store_name,
|
||||
)
|
||||
vector_store = await provider.openai_create_vector_store(request)
|
||||
|
||||
vector_store_id = vector_store.id
|
||||
actual_provider_vector_db_id = provider_vector_db_id or vector_store_id
|
||||
logger.warning(
|
||||
f"Ignoring vector_db_id {vector_db_id} and using vector_store_id {vector_store_id} instead. Setting VectorDB {vector_db_id} to VectorDB.vector_db_name"
|
||||
)
|
||||
|
||||
vector_db_data = {
|
||||
"identifier": vector_store_id,
|
||||
"type": ResourceType.vector_db.value,
|
||||
"provider_id": provider_id,
|
||||
"provider_resource_id": actual_provider_vector_db_id,
|
||||
"embedding_model": embedding_model,
|
||||
"embedding_dimension": model.metadata["embedding_dimension"],
|
||||
"vector_db_name": vector_store.name,
|
||||
}
|
||||
vector_db = TypeAdapter(VectorDBWithOwner).validate_python(vector_db_data)
|
||||
await self.register_object(vector_db)
|
||||
return vector_db
|
||||
await self.register_object(vector_store)
|
||||
return vector_store
|
||||
|
||||
async def openai_retrieve_vector_store(
|
||||
self,
|
||||
vector_store_id: str,
|
||||
) -> VectorStoreObject:
|
||||
await self.assert_action_allowed("read", "vector_db", vector_store_id)
|
||||
await self.assert_action_allowed("read", "vector_store", vector_store_id)
|
||||
provider = await self.get_provider_impl(vector_store_id)
|
||||
return await provider.openai_retrieve_vector_store(vector_store_id)
|
||||
|
||||
|
@ -123,7 +92,7 @@ class VectorDBsRoutingTable(CommonRoutingTableImpl):
|
|||
expires_after: dict[str, Any] | None = None,
|
||||
metadata: dict[str, Any] | None = None,
|
||||
) -> VectorStoreObject:
|
||||
await self.assert_action_allowed("update", "vector_db", vector_store_id)
|
||||
await self.assert_action_allowed("update", "vector_store", vector_store_id)
|
||||
provider = await self.get_provider_impl(vector_store_id)
|
||||
return await provider.openai_update_vector_store(
|
||||
vector_store_id=vector_store_id,
|
||||
|
@ -136,18 +105,18 @@ class VectorDBsRoutingTable(CommonRoutingTableImpl):
|
|||
self,
|
||||
vector_store_id: str,
|
||||
) -> VectorStoreDeleteResponse:
|
||||
await self.assert_action_allowed("delete", "vector_db", vector_store_id)
|
||||
await self.assert_action_allowed("delete", "vector_store", vector_store_id)
|
||||
provider = await self.get_provider_impl(vector_store_id)
|
||||
result = await provider.openai_delete_vector_store(vector_store_id)
|
||||
await self.unregister_vector_db(vector_store_id)
|
||||
await self.unregister_vector_store(vector_store_id)
|
||||
return result
|
||||
|
||||
async def unregister_vector_db(self, vector_store_id: str) -> None:
|
||||
async def unregister_vector_store(self, vector_store_id: str) -> None:
|
||||
"""Remove the vector store from the routing table registry."""
|
||||
try:
|
||||
vector_db_obj = await self.get_object_by_identifier("vector_db", vector_store_id)
|
||||
if vector_db_obj:
|
||||
await self.unregister_object(vector_db_obj)
|
||||
vector_store_obj = await self.get_object_by_identifier("vector_store", vector_store_id)
|
||||
if vector_store_obj:
|
||||
await self.unregister_object(vector_store_obj)
|
||||
except Exception as e:
|
||||
# Log the error but don't fail the operation
|
||||
logger.warning(f"Failed to unregister vector store {vector_store_id} from routing table: {e}")
|
||||
|
@ -162,7 +131,7 @@ class VectorDBsRoutingTable(CommonRoutingTableImpl):
|
|||
rewrite_query: bool | None = False,
|
||||
search_mode: str | None = "vector",
|
||||
) -> VectorStoreSearchResponsePage:
|
||||
await self.assert_action_allowed("read", "vector_db", vector_store_id)
|
||||
await self.assert_action_allowed("read", "vector_store", vector_store_id)
|
||||
provider = await self.get_provider_impl(vector_store_id)
|
||||
return await provider.openai_search_vector_store(
|
||||
vector_store_id=vector_store_id,
|
||||
|
@ -181,7 +150,7 @@ class VectorDBsRoutingTable(CommonRoutingTableImpl):
|
|||
attributes: dict[str, Any] | None = None,
|
||||
chunking_strategy: VectorStoreChunkingStrategy | None = None,
|
||||
) -> VectorStoreFileObject:
|
||||
await self.assert_action_allowed("update", "vector_db", vector_store_id)
|
||||
await self.assert_action_allowed("update", "vector_store", vector_store_id)
|
||||
provider = await self.get_provider_impl(vector_store_id)
|
||||
return await provider.openai_attach_file_to_vector_store(
|
||||
vector_store_id=vector_store_id,
|
||||
|
@ -199,7 +168,7 @@ class VectorDBsRoutingTable(CommonRoutingTableImpl):
|
|||
before: str | None = None,
|
||||
filter: VectorStoreFileStatus | None = None,
|
||||
) -> list[VectorStoreFileObject]:
|
||||
await self.assert_action_allowed("read", "vector_db", vector_store_id)
|
||||
await self.assert_action_allowed("read", "vector_store", vector_store_id)
|
||||
provider = await self.get_provider_impl(vector_store_id)
|
||||
return await provider.openai_list_files_in_vector_store(
|
||||
vector_store_id=vector_store_id,
|
||||
|
@ -215,7 +184,7 @@ class VectorDBsRoutingTable(CommonRoutingTableImpl):
|
|||
vector_store_id: str,
|
||||
file_id: str,
|
||||
) -> VectorStoreFileObject:
|
||||
await self.assert_action_allowed("read", "vector_db", vector_store_id)
|
||||
await self.assert_action_allowed("read", "vector_store", vector_store_id)
|
||||
provider = await self.get_provider_impl(vector_store_id)
|
||||
return await provider.openai_retrieve_vector_store_file(
|
||||
vector_store_id=vector_store_id,
|
||||
|
@ -227,7 +196,7 @@ class VectorDBsRoutingTable(CommonRoutingTableImpl):
|
|||
vector_store_id: str,
|
||||
file_id: str,
|
||||
) -> VectorStoreFileContentsResponse:
|
||||
await self.assert_action_allowed("read", "vector_db", vector_store_id)
|
||||
await self.assert_action_allowed("read", "vector_store", vector_store_id)
|
||||
provider = await self.get_provider_impl(vector_store_id)
|
||||
return await provider.openai_retrieve_vector_store_file_contents(
|
||||
vector_store_id=vector_store_id,
|
||||
|
@ -240,7 +209,7 @@ class VectorDBsRoutingTable(CommonRoutingTableImpl):
|
|||
file_id: str,
|
||||
attributes: dict[str, Any],
|
||||
) -> VectorStoreFileObject:
|
||||
await self.assert_action_allowed("update", "vector_db", vector_store_id)
|
||||
await self.assert_action_allowed("update", "vector_store", vector_store_id)
|
||||
provider = await self.get_provider_impl(vector_store_id)
|
||||
return await provider.openai_update_vector_store_file(
|
||||
vector_store_id=vector_store_id,
|
||||
|
@ -253,7 +222,7 @@ class VectorDBsRoutingTable(CommonRoutingTableImpl):
|
|||
vector_store_id: str,
|
||||
file_id: str,
|
||||
) -> VectorStoreFileDeleteResponse:
|
||||
await self.assert_action_allowed("delete", "vector_db", vector_store_id)
|
||||
await self.assert_action_allowed("delete", "vector_store", vector_store_id)
|
||||
provider = await self.get_provider_impl(vector_store_id)
|
||||
return await provider.openai_delete_vector_store_file(
|
||||
vector_store_id=vector_store_id,
|
||||
|
@ -267,7 +236,7 @@ class VectorDBsRoutingTable(CommonRoutingTableImpl):
|
|||
attributes: dict[str, Any] | None = None,
|
||||
chunking_strategy: Any | None = None,
|
||||
):
|
||||
await self.assert_action_allowed("update", "vector_db", vector_store_id)
|
||||
await self.assert_action_allowed("update", "vector_store", vector_store_id)
|
||||
provider = await self.get_provider_impl(vector_store_id)
|
||||
return await provider.openai_create_vector_store_file_batch(
|
||||
vector_store_id=vector_store_id,
|
||||
|
@ -281,7 +250,7 @@ class VectorDBsRoutingTable(CommonRoutingTableImpl):
|
|||
batch_id: str,
|
||||
vector_store_id: str,
|
||||
):
|
||||
await self.assert_action_allowed("read", "vector_db", vector_store_id)
|
||||
await self.assert_action_allowed("read", "vector_store", vector_store_id)
|
||||
provider = await self.get_provider_impl(vector_store_id)
|
||||
return await provider.openai_retrieve_vector_store_file_batch(
|
||||
batch_id=batch_id,
|
||||
|
@ -298,7 +267,7 @@ class VectorDBsRoutingTable(CommonRoutingTableImpl):
|
|||
limit: int | None = 20,
|
||||
order: str | None = "desc",
|
||||
):
|
||||
await self.assert_action_allowed("read", "vector_db", vector_store_id)
|
||||
await self.assert_action_allowed("read", "vector_store", vector_store_id)
|
||||
provider = await self.get_provider_impl(vector_store_id)
|
||||
return await provider.openai_list_files_in_vector_store_file_batch(
|
||||
batch_id=batch_id,
|
||||
|
@ -315,7 +284,7 @@ class VectorDBsRoutingTable(CommonRoutingTableImpl):
|
|||
batch_id: str,
|
||||
vector_store_id: str,
|
||||
):
|
||||
await self.assert_action_allowed("update", "vector_db", vector_store_id)
|
||||
await self.assert_action_allowed("update", "vector_store", vector_store_id)
|
||||
provider = await self.get_provider_impl(vector_store_id)
|
||||
return await provider.openai_cancel_vector_store_file_batch(
|
||||
batch_id=batch_id,
|
|
@ -13,7 +13,6 @@ from aiohttp import hdrs
|
|||
from starlette.routing import Route
|
||||
|
||||
from llama_stack.apis.datatypes import Api, ExternalApiSpec
|
||||
from llama_stack.apis.tools import RAGToolRuntime, SpecialToolGroup
|
||||
from llama_stack.core.resolver import api_protocol_map
|
||||
from llama_stack.schema_utils import WebMethod
|
||||
|
||||
|
@ -25,33 +24,16 @@ RouteImpls = dict[str, PathImpl]
|
|||
RouteMatch = tuple[EndpointFunc, PathParams, str, WebMethod]
|
||||
|
||||
|
||||
def toolgroup_protocol_map():
|
||||
return {
|
||||
SpecialToolGroup.rag_tool: RAGToolRuntime,
|
||||
}
|
||||
|
||||
|
||||
def get_all_api_routes(
|
||||
external_apis: dict[Api, ExternalApiSpec] | None = None,
|
||||
) -> dict[Api, list[tuple[Route, WebMethod]]]:
|
||||
apis = {}
|
||||
|
||||
protocols = api_protocol_map(external_apis)
|
||||
toolgroup_protocols = toolgroup_protocol_map()
|
||||
for api, protocol in protocols.items():
|
||||
routes = []
|
||||
protocol_methods = inspect.getmembers(protocol, predicate=inspect.isfunction)
|
||||
|
||||
# HACK ALERT
|
||||
if api == Api.tool_runtime:
|
||||
for tool_group in SpecialToolGroup:
|
||||
sub_protocol = toolgroup_protocols[tool_group]
|
||||
sub_protocol_methods = inspect.getmembers(sub_protocol, predicate=inspect.isfunction)
|
||||
for name, method in sub_protocol_methods:
|
||||
if not hasattr(method, "__webmethod__"):
|
||||
continue
|
||||
protocol_methods.append((f"{tool_group.value}.{name}", method))
|
||||
|
||||
for name, method in protocol_methods:
|
||||
# Get all webmethods for this method (supports multiple decorators)
|
||||
webmethods = getattr(method, "__webmethods__", [])
|
||||
|
|
|
@ -32,7 +32,7 @@ from llama_stack.apis.scoring_functions import ScoringFunctions
|
|||
from llama_stack.apis.shields import Shields
|
||||
from llama_stack.apis.synthetic_data_generation import SyntheticDataGeneration
|
||||
from llama_stack.apis.telemetry import Telemetry
|
||||
from llama_stack.apis.tools import RAGToolRuntime, ToolGroups, ToolRuntime
|
||||
from llama_stack.apis.tools import ToolGroups, ToolRuntime
|
||||
from llama_stack.apis.vector_io import VectorIO
|
||||
from llama_stack.core.conversations.conversations import ConversationServiceConfig, ConversationServiceImpl
|
||||
from llama_stack.core.datatypes import Provider, StackRunConfig, VectorStoresConfig
|
||||
|
@ -80,7 +80,6 @@ class LlamaStack(
|
|||
Inspect,
|
||||
ToolGroups,
|
||||
ToolRuntime,
|
||||
RAGToolRuntime,
|
||||
Files,
|
||||
Prompts,
|
||||
Conversations,
|
||||
|
|
|
@ -9,7 +9,7 @@
|
|||
1. Start up Llama Stack API server. More details [here](https://llamastack.github.io/latest/getting_started/index.htmll).
|
||||
|
||||
```
|
||||
llama stack build --distro together --image-type venv
|
||||
llama stack list-deps together | xargs -L1 uv pip install
|
||||
|
||||
llama stack run together
|
||||
```
|
||||
|
|
|
@ -32,7 +32,7 @@ def tool_chat_page():
|
|||
tool_groups_list = [tool_group.identifier for tool_group in tool_groups]
|
||||
mcp_tools_list = [tool for tool in tool_groups_list if tool.startswith("mcp::")]
|
||||
builtin_tools_list = [tool for tool in tool_groups_list if not tool.startswith("mcp::")]
|
||||
selected_vector_dbs = []
|
||||
selected_vector_stores = []
|
||||
|
||||
def reset_agent():
|
||||
st.session_state.clear()
|
||||
|
@ -55,13 +55,13 @@ def tool_chat_page():
|
|||
)
|
||||
|
||||
if "builtin::rag" in toolgroup_selection:
|
||||
vector_dbs = llama_stack_api.client.vector_dbs.list() or []
|
||||
if not vector_dbs:
|
||||
vector_stores = llama_stack_api.client.vector_stores.list() or []
|
||||
if not vector_stores:
|
||||
st.info("No vector databases available for selection.")
|
||||
vector_dbs = [vector_db.identifier for vector_db in vector_dbs]
|
||||
selected_vector_dbs = st.multiselect(
|
||||
vector_stores = [vector_store.identifier for vector_store in vector_stores]
|
||||
selected_vector_stores = st.multiselect(
|
||||
label="Select Document Collections to use in RAG queries",
|
||||
options=vector_dbs,
|
||||
options=vector_stores,
|
||||
on_change=reset_agent,
|
||||
)
|
||||
|
||||
|
@ -119,7 +119,7 @@ def tool_chat_page():
|
|||
tool_dict = dict(
|
||||
name="builtin::rag",
|
||||
args={
|
||||
"vector_db_ids": list(selected_vector_dbs),
|
||||
"vector_store_ids": list(selected_vector_stores),
|
||||
},
|
||||
)
|
||||
toolgroup_selection[i] = tool_dict
|
||||
|
|
|
@ -48,7 +48,6 @@ distribution_spec:
|
|||
tool_runtime:
|
||||
- provider_type: remote::brave-search
|
||||
- provider_type: remote::tavily-search
|
||||
- provider_type: inline::rag-runtime
|
||||
- provider_type: remote::model-context-protocol
|
||||
batches:
|
||||
- provider_type: inline::reference
|
||||
|
|
|
@ -216,8 +216,6 @@ providers:
|
|||
config:
|
||||
api_key: ${env.TAVILY_SEARCH_API_KEY:=}
|
||||
max_results: 3
|
||||
- provider_id: rag-runtime
|
||||
provider_type: inline::rag-runtime
|
||||
- provider_id: model-context-protocol
|
||||
provider_type: remote::model-context-protocol
|
||||
batches:
|
||||
|
@ -263,8 +261,6 @@ registered_resources:
|
|||
tool_groups:
|
||||
- toolgroup_id: builtin::websearch
|
||||
provider_id: tavily-search
|
||||
- toolgroup_id: builtin::rag
|
||||
provider_id: rag-runtime
|
||||
server:
|
||||
port: 8321
|
||||
telemetry:
|
||||
|
|
|
@ -26,7 +26,6 @@ distribution_spec:
|
|||
tool_runtime:
|
||||
- provider_type: remote::brave-search
|
||||
- provider_type: remote::tavily-search
|
||||
- provider_type: inline::rag-runtime
|
||||
image_type: venv
|
||||
additional_pip_packages:
|
||||
- aiosqlite
|
||||
|
|
|
@ -45,7 +45,6 @@ def get_distribution_template() -> DistributionTemplate:
|
|||
"tool_runtime": [
|
||||
BuildProvider(provider_type="remote::brave-search"),
|
||||
BuildProvider(provider_type="remote::tavily-search"),
|
||||
BuildProvider(provider_type="inline::rag-runtime"),
|
||||
],
|
||||
}
|
||||
name = "dell"
|
||||
|
@ -98,10 +97,6 @@ def get_distribution_template() -> DistributionTemplate:
|
|||
toolgroup_id="builtin::websearch",
|
||||
provider_id="brave-search",
|
||||
),
|
||||
ToolGroupInput(
|
||||
toolgroup_id="builtin::rag",
|
||||
provider_id="rag-runtime",
|
||||
),
|
||||
]
|
||||
|
||||
return DistributionTemplate(
|
||||
|
|
|
@ -157,7 +157,7 @@ docker run \
|
|||
Make sure you have done `pip install llama-stack` and have the Llama Stack CLI available.
|
||||
|
||||
```bash
|
||||
llama stack build --distro {{ name }} --image-type conda
|
||||
llama stack list-deps {{ name }} | xargs -L1 pip install
|
||||
INFERENCE_MODEL=$INFERENCE_MODEL \
|
||||
DEH_URL=$DEH_URL \
|
||||
CHROMA_URL=$CHROMA_URL \
|
||||
|
|
|
@ -87,8 +87,6 @@ providers:
|
|||
config:
|
||||
api_key: ${env.TAVILY_SEARCH_API_KEY:=}
|
||||
max_results: 3
|
||||
- provider_id: rag-runtime
|
||||
provider_type: inline::rag-runtime
|
||||
storage:
|
||||
backends:
|
||||
kv_default:
|
||||
|
@ -133,8 +131,6 @@ registered_resources:
|
|||
tool_groups:
|
||||
- toolgroup_id: builtin::websearch
|
||||
provider_id: brave-search
|
||||
- toolgroup_id: builtin::rag
|
||||
provider_id: rag-runtime
|
||||
server:
|
||||
port: 8321
|
||||
telemetry:
|
||||
|
|
|
@ -83,8 +83,6 @@ providers:
|
|||
config:
|
||||
api_key: ${env.TAVILY_SEARCH_API_KEY:=}
|
||||
max_results: 3
|
||||
- provider_id: rag-runtime
|
||||
provider_type: inline::rag-runtime
|
||||
storage:
|
||||
backends:
|
||||
kv_default:
|
||||
|
@ -124,8 +122,6 @@ registered_resources:
|
|||
tool_groups:
|
||||
- toolgroup_id: builtin::websearch
|
||||
provider_id: brave-search
|
||||
- toolgroup_id: builtin::rag
|
||||
provider_id: rag-runtime
|
||||
server:
|
||||
port: 8321
|
||||
telemetry:
|
||||
|
|
|
@ -24,7 +24,6 @@ distribution_spec:
|
|||
tool_runtime:
|
||||
- provider_type: remote::brave-search
|
||||
- provider_type: remote::tavily-search
|
||||
- provider_type: inline::rag-runtime
|
||||
- provider_type: remote::model-context-protocol
|
||||
image_type: venv
|
||||
additional_pip_packages:
|
||||
|
|
|
@ -47,7 +47,6 @@ def get_distribution_template() -> DistributionTemplate:
|
|||
"tool_runtime": [
|
||||
BuildProvider(provider_type="remote::brave-search"),
|
||||
BuildProvider(provider_type="remote::tavily-search"),
|
||||
BuildProvider(provider_type="inline::rag-runtime"),
|
||||
BuildProvider(provider_type="remote::model-context-protocol"),
|
||||
],
|
||||
}
|
||||
|
@ -92,10 +91,6 @@ def get_distribution_template() -> DistributionTemplate:
|
|||
toolgroup_id="builtin::websearch",
|
||||
provider_id="tavily-search",
|
||||
),
|
||||
ToolGroupInput(
|
||||
toolgroup_id="builtin::rag",
|
||||
provider_id="rag-runtime",
|
||||
),
|
||||
]
|
||||
|
||||
return DistributionTemplate(
|
||||
|
|
|
@ -98,8 +98,6 @@ providers:
|
|||
config:
|
||||
api_key: ${env.TAVILY_SEARCH_API_KEY:=}
|
||||
max_results: 3
|
||||
- provider_id: rag-runtime
|
||||
provider_type: inline::rag-runtime
|
||||
- provider_id: model-context-protocol
|
||||
provider_type: remote::model-context-protocol
|
||||
storage:
|
||||
|
@ -146,8 +144,6 @@ registered_resources:
|
|||
tool_groups:
|
||||
- toolgroup_id: builtin::websearch
|
||||
provider_id: tavily-search
|
||||
- toolgroup_id: builtin::rag
|
||||
provider_id: rag-runtime
|
||||
server:
|
||||
port: 8321
|
||||
telemetry:
|
||||
|
|
|
@ -88,8 +88,6 @@ providers:
|
|||
config:
|
||||
api_key: ${env.TAVILY_SEARCH_API_KEY:=}
|
||||
max_results: 3
|
||||
- provider_id: rag-runtime
|
||||
provider_type: inline::rag-runtime
|
||||
- provider_id: model-context-protocol
|
||||
provider_type: remote::model-context-protocol
|
||||
storage:
|
||||
|
@ -131,8 +129,6 @@ registered_resources:
|
|||
tool_groups:
|
||||
- toolgroup_id: builtin::websearch
|
||||
provider_id: tavily-search
|
||||
- toolgroup_id: builtin::rag
|
||||
provider_id: rag-runtime
|
||||
server:
|
||||
port: 8321
|
||||
telemetry:
|
||||
|
|
|
@ -19,8 +19,7 @@ distribution_spec:
|
|||
- provider_type: remote::nvidia
|
||||
scoring:
|
||||
- provider_type: inline::basic
|
||||
tool_runtime:
|
||||
- provider_type: inline::rag-runtime
|
||||
tool_runtime: []
|
||||
files:
|
||||
- provider_type: inline::localfs
|
||||
image_type: venv
|
||||
|
|
|
@ -28,7 +28,7 @@ def get_distribution_template(name: str = "nvidia") -> DistributionTemplate:
|
|||
BuildProvider(provider_type="remote::nvidia"),
|
||||
],
|
||||
"scoring": [BuildProvider(provider_type="inline::basic")],
|
||||
"tool_runtime": [BuildProvider(provider_type="inline::rag-runtime")],
|
||||
"tool_runtime": [],
|
||||
"files": [BuildProvider(provider_type="inline::localfs")],
|
||||
}
|
||||
|
||||
|
@ -66,12 +66,7 @@ def get_distribution_template(name: str = "nvidia") -> DistributionTemplate:
|
|||
provider_id="nvidia",
|
||||
)
|
||||
|
||||
default_tool_groups = [
|
||||
ToolGroupInput(
|
||||
toolgroup_id="builtin::rag",
|
||||
provider_id="rag-runtime",
|
||||
),
|
||||
]
|
||||
default_tool_groups: list[ToolGroupInput] = []
|
||||
|
||||
return DistributionTemplate(
|
||||
name=name,
|
||||
|
|
|
@ -80,9 +80,7 @@ providers:
|
|||
scoring:
|
||||
- provider_id: basic
|
||||
provider_type: inline::basic
|
||||
tool_runtime:
|
||||
- provider_id: rag-runtime
|
||||
provider_type: inline::rag-runtime
|
||||
tool_runtime: []
|
||||
files:
|
||||
- provider_id: meta-reference-files
|
||||
provider_type: inline::localfs
|
||||
|
@ -128,9 +126,7 @@ registered_resources:
|
|||
datasets: []
|
||||
scoring_fns: []
|
||||
benchmarks: []
|
||||
tool_groups:
|
||||
- toolgroup_id: builtin::rag
|
||||
provider_id: rag-runtime
|
||||
tool_groups: []
|
||||
server:
|
||||
port: 8321
|
||||
telemetry:
|
||||
|
|
|
@ -69,9 +69,7 @@ providers:
|
|||
scoring:
|
||||
- provider_id: basic
|
||||
provider_type: inline::basic
|
||||
tool_runtime:
|
||||
- provider_id: rag-runtime
|
||||
provider_type: inline::rag-runtime
|
||||
tool_runtime: []
|
||||
files:
|
||||
- provider_id: meta-reference-files
|
||||
provider_type: inline::localfs
|
||||
|
@ -107,9 +105,7 @@ registered_resources:
|
|||
datasets: []
|
||||
scoring_fns: []
|
||||
benchmarks: []
|
||||
tool_groups:
|
||||
- toolgroup_id: builtin::rag
|
||||
provider_id: rag-runtime
|
||||
tool_groups: []
|
||||
server:
|
||||
port: 8321
|
||||
telemetry:
|
||||
|
|
|
@ -28,7 +28,6 @@ distribution_spec:
|
|||
tool_runtime:
|
||||
- provider_type: remote::brave-search
|
||||
- provider_type: remote::tavily-search
|
||||
- provider_type: inline::rag-runtime
|
||||
- provider_type: remote::model-context-protocol
|
||||
image_type: venv
|
||||
additional_pip_packages:
|
||||
|
|
|
@ -118,7 +118,6 @@ def get_distribution_template() -> DistributionTemplate:
|
|||
"tool_runtime": [
|
||||
BuildProvider(provider_type="remote::brave-search"),
|
||||
BuildProvider(provider_type="remote::tavily-search"),
|
||||
BuildProvider(provider_type="inline::rag-runtime"),
|
||||
BuildProvider(provider_type="remote::model-context-protocol"),
|
||||
],
|
||||
}
|
||||
|
@ -154,10 +153,6 @@ def get_distribution_template() -> DistributionTemplate:
|
|||
toolgroup_id="builtin::websearch",
|
||||
provider_id="tavily-search",
|
||||
),
|
||||
ToolGroupInput(
|
||||
toolgroup_id="builtin::rag",
|
||||
provider_id="rag-runtime",
|
||||
),
|
||||
]
|
||||
|
||||
models, _ = get_model_registry(available_models)
|
||||
|
|
|
@ -118,8 +118,6 @@ providers:
|
|||
config:
|
||||
api_key: ${env.TAVILY_SEARCH_API_KEY:=}
|
||||
max_results: 3
|
||||
- provider_id: rag-runtime
|
||||
provider_type: inline::rag-runtime
|
||||
- provider_id: model-context-protocol
|
||||
provider_type: remote::model-context-protocol
|
||||
storage:
|
||||
|
@ -244,8 +242,6 @@ registered_resources:
|
|||
tool_groups:
|
||||
- toolgroup_id: builtin::websearch
|
||||
provider_id: tavily-search
|
||||
- toolgroup_id: builtin::rag
|
||||
provider_id: rag-runtime
|
||||
server:
|
||||
port: 8321
|
||||
telemetry:
|
||||
|
|
|
@ -14,7 +14,6 @@ distribution_spec:
|
|||
tool_runtime:
|
||||
- provider_type: remote::brave-search
|
||||
- provider_type: remote::tavily-search
|
||||
- provider_type: inline::rag-runtime
|
||||
- provider_type: remote::model-context-protocol
|
||||
image_type: venv
|
||||
additional_pip_packages:
|
||||
|
|
|
@ -45,7 +45,6 @@ def get_distribution_template() -> DistributionTemplate:
|
|||
"tool_runtime": [
|
||||
BuildProvider(provider_type="remote::brave-search"),
|
||||
BuildProvider(provider_type="remote::tavily-search"),
|
||||
BuildProvider(provider_type="inline::rag-runtime"),
|
||||
BuildProvider(provider_type="remote::model-context-protocol"),
|
||||
],
|
||||
}
|
||||
|
@ -66,10 +65,6 @@ def get_distribution_template() -> DistributionTemplate:
|
|||
toolgroup_id="builtin::websearch",
|
||||
provider_id="tavily-search",
|
||||
),
|
||||
ToolGroupInput(
|
||||
toolgroup_id="builtin::rag",
|
||||
provider_id="rag-runtime",
|
||||
),
|
||||
]
|
||||
|
||||
default_models = [
|
||||
|
|
|
@ -54,8 +54,6 @@ providers:
|
|||
config:
|
||||
api_key: ${env.TAVILY_SEARCH_API_KEY:=}
|
||||
max_results: 3
|
||||
- provider_id: rag-runtime
|
||||
provider_type: inline::rag-runtime
|
||||
- provider_id: model-context-protocol
|
||||
provider_type: remote::model-context-protocol
|
||||
storage:
|
||||
|
@ -107,8 +105,6 @@ registered_resources:
|
|||
tool_groups:
|
||||
- toolgroup_id: builtin::websearch
|
||||
provider_id: tavily-search
|
||||
- toolgroup_id: builtin::rag
|
||||
provider_id: rag-runtime
|
||||
server:
|
||||
port: 8321
|
||||
telemetry:
|
||||
|
|
|
@ -49,7 +49,6 @@ distribution_spec:
|
|||
tool_runtime:
|
||||
- provider_type: remote::brave-search
|
||||
- provider_type: remote::tavily-search
|
||||
- provider_type: inline::rag-runtime
|
||||
- provider_type: remote::model-context-protocol
|
||||
batches:
|
||||
- provider_type: inline::reference
|
||||
|
|
|
@ -219,8 +219,6 @@ providers:
|
|||
config:
|
||||
api_key: ${env.TAVILY_SEARCH_API_KEY:=}
|
||||
max_results: 3
|
||||
- provider_id: rag-runtime
|
||||
provider_type: inline::rag-runtime
|
||||
- provider_id: model-context-protocol
|
||||
provider_type: remote::model-context-protocol
|
||||
batches:
|
||||
|
@ -266,8 +264,6 @@ registered_resources:
|
|||
tool_groups:
|
||||
- toolgroup_id: builtin::websearch
|
||||
provider_id: tavily-search
|
||||
- toolgroup_id: builtin::rag
|
||||
provider_id: rag-runtime
|
||||
server:
|
||||
port: 8321
|
||||
telemetry:
|
||||
|
|
|
@ -49,7 +49,6 @@ distribution_spec:
|
|||
tool_runtime:
|
||||
- provider_type: remote::brave-search
|
||||
- provider_type: remote::tavily-search
|
||||
- provider_type: inline::rag-runtime
|
||||
- provider_type: remote::model-context-protocol
|
||||
batches:
|
||||
- provider_type: inline::reference
|
||||
|
|
|
@ -216,8 +216,6 @@ providers:
|
|||
config:
|
||||
api_key: ${env.TAVILY_SEARCH_API_KEY:=}
|
||||
max_results: 3
|
||||
- provider_id: rag-runtime
|
||||
provider_type: inline::rag-runtime
|
||||
- provider_id: model-context-protocol
|
||||
provider_type: remote::model-context-protocol
|
||||
batches:
|
||||
|
@ -263,8 +261,6 @@ registered_resources:
|
|||
tool_groups:
|
||||
- toolgroup_id: builtin::websearch
|
||||
provider_id: tavily-search
|
||||
- toolgroup_id: builtin::rag
|
||||
provider_id: rag-runtime
|
||||
server:
|
||||
port: 8321
|
||||
telemetry:
|
||||
|
|
|
@ -140,7 +140,6 @@ def get_distribution_template(name: str = "starter") -> DistributionTemplate:
|
|||
"tool_runtime": [
|
||||
BuildProvider(provider_type="remote::brave-search"),
|
||||
BuildProvider(provider_type="remote::tavily-search"),
|
||||
BuildProvider(provider_type="inline::rag-runtime"),
|
||||
BuildProvider(provider_type="remote::model-context-protocol"),
|
||||
],
|
||||
"batches": [
|
||||
|
@ -162,10 +161,6 @@ def get_distribution_template(name: str = "starter") -> DistributionTemplate:
|
|||
toolgroup_id="builtin::websearch",
|
||||
provider_id="tavily-search",
|
||||
),
|
||||
ToolGroupInput(
|
||||
toolgroup_id="builtin::rag",
|
||||
provider_id="rag-runtime",
|
||||
),
|
||||
]
|
||||
default_shields = [
|
||||
# if the
|
||||
|
|
|
@ -23,7 +23,6 @@ distribution_spec:
|
|||
tool_runtime:
|
||||
- provider_type: remote::brave-search
|
||||
- provider_type: remote::tavily-search
|
||||
- provider_type: inline::rag-runtime
|
||||
- provider_type: remote::model-context-protocol
|
||||
files:
|
||||
- provider_type: inline::localfs
|
||||
|
|
|
@ -83,8 +83,6 @@ providers:
|
|||
config:
|
||||
api_key: ${env.TAVILY_SEARCH_API_KEY:=}
|
||||
max_results: 3
|
||||
- provider_id: rag-runtime
|
||||
provider_type: inline::rag-runtime
|
||||
- provider_id: model-context-protocol
|
||||
provider_type: remote::model-context-protocol
|
||||
files:
|
||||
|
@ -125,8 +123,6 @@ registered_resources:
|
|||
tool_groups:
|
||||
- toolgroup_id: builtin::websearch
|
||||
provider_id: tavily-search
|
||||
- toolgroup_id: builtin::rag
|
||||
provider_id: rag-runtime
|
||||
server:
|
||||
port: 8321
|
||||
telemetry:
|
||||
|
|
|
@ -33,7 +33,6 @@ def get_distribution_template(name: str = "watsonx") -> DistributionTemplate:
|
|||
"tool_runtime": [
|
||||
BuildProvider(provider_type="remote::brave-search"),
|
||||
BuildProvider(provider_type="remote::tavily-search"),
|
||||
BuildProvider(provider_type="inline::rag-runtime"),
|
||||
BuildProvider(provider_type="remote::model-context-protocol"),
|
||||
],
|
||||
"files": [BuildProvider(provider_type="inline::localfs")],
|
||||
|
@ -50,10 +49,6 @@ def get_distribution_template(name: str = "watsonx") -> DistributionTemplate:
|
|||
toolgroup_id="builtin::websearch",
|
||||
provider_id="tavily-search",
|
||||
),
|
||||
ToolGroupInput(
|
||||
toolgroup_id="builtin::rag",
|
||||
provider_id="rag-runtime",
|
||||
),
|
||||
]
|
||||
|
||||
files_provider = Provider(
|
||||
|
|
|
@ -17,7 +17,7 @@ from llama_stack.apis.models import Model
|
|||
from llama_stack.apis.scoring_functions import ScoringFn
|
||||
from llama_stack.apis.shields import Shield
|
||||
from llama_stack.apis.tools import ToolGroup
|
||||
from llama_stack.apis.vector_dbs import VectorDB
|
||||
from llama_stack.apis.vector_stores import VectorStore
|
||||
from llama_stack.schema_utils import json_schema_type
|
||||
|
||||
|
||||
|
@ -68,10 +68,10 @@ class ShieldsProtocolPrivate(Protocol):
|
|||
async def unregister_shield(self, identifier: str) -> None: ...
|
||||
|
||||
|
||||
class VectorDBsProtocolPrivate(Protocol):
|
||||
async def register_vector_db(self, vector_db: VectorDB) -> None: ...
|
||||
class VectorStoresProtocolPrivate(Protocol):
|
||||
async def register_vector_store(self, vector_store: VectorStore) -> None: ...
|
||||
|
||||
async def unregister_vector_db(self, vector_db_id: str) -> None: ...
|
||||
async def unregister_vector_store(self, vector_store_id: str) -> None: ...
|
||||
|
||||
|
||||
class DatasetsProtocolPrivate(Protocol):
|
||||
|
|
|
@ -1,75 +0,0 @@
|
|||
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
# All rights reserved.
|
||||
#
|
||||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
|
||||
import json
|
||||
from datetime import UTC, datetime
|
||||
|
||||
from opentelemetry.sdk.trace import ReadableSpan
|
||||
from opentelemetry.sdk.trace.export import SpanProcessor
|
||||
from opentelemetry.trace.status import StatusCode
|
||||
|
||||
from llama_stack.log import get_logger
|
||||
|
||||
logger = get_logger(name="console_span_processor", category="telemetry")
|
||||
|
||||
|
||||
class ConsoleSpanProcessor(SpanProcessor):
|
||||
def __init__(self, print_attributes: bool = False):
|
||||
self.print_attributes = print_attributes
|
||||
|
||||
def on_start(self, span: ReadableSpan, parent_context=None) -> None:
|
||||
if span.attributes and span.attributes.get("__autotraced__"):
|
||||
return
|
||||
|
||||
timestamp = datetime.fromtimestamp(span.start_time / 1e9, tz=UTC).strftime("%H:%M:%S.%f")[:-3]
|
||||
logger.info(f"[dim]{timestamp}[/dim] [bold magenta][START][/bold magenta] [dim]{span.name}[/dim]")
|
||||
|
||||
def on_end(self, span: ReadableSpan) -> None:
|
||||
timestamp = datetime.fromtimestamp(span.end_time / 1e9, tz=UTC).strftime("%H:%M:%S.%f")[:-3]
|
||||
span_context = f"[dim]{timestamp}[/dim] [bold magenta][END][/bold magenta] [dim]{span.name}[/dim]"
|
||||
if span.status.status_code == StatusCode.ERROR:
|
||||
span_context += " [bold red][ERROR][/bold red]"
|
||||
elif span.status.status_code != StatusCode.UNSET:
|
||||
span_context += f" [{span.status.status_code}]"
|
||||
duration_ms = (span.end_time - span.start_time) / 1e6
|
||||
span_context += f" ({duration_ms:.2f}ms)"
|
||||
logger.info(span_context)
|
||||
|
||||
if self.print_attributes and span.attributes:
|
||||
for key, value in span.attributes.items():
|
||||
if key.startswith("__"):
|
||||
continue
|
||||
str_value = str(value)
|
||||
if len(str_value) > 1000:
|
||||
str_value = str_value[:997] + "..."
|
||||
logger.info(f" [dim]{key}[/dim]: {str_value}")
|
||||
|
||||
for event in span.events:
|
||||
event_time = datetime.fromtimestamp(event.timestamp / 1e9, tz=UTC).strftime("%H:%M:%S.%f")[:-3]
|
||||
severity = event.attributes.get("severity", "info")
|
||||
message = event.attributes.get("message", event.name)
|
||||
if isinstance(message, dict) or isinstance(message, list):
|
||||
message = json.dumps(message, indent=2)
|
||||
severity_color = {
|
||||
"error": "red",
|
||||
"warn": "yellow",
|
||||
"info": "white",
|
||||
"debug": "dim",
|
||||
}.get(severity, "white")
|
||||
logger.info(f" {event_time} [bold {severity_color}][{severity.upper()}][/bold {severity_color}] {message}")
|
||||
if event.attributes:
|
||||
for key, value in event.attributes.items():
|
||||
if key.startswith("__") or key in ["message", "severity"]:
|
||||
continue
|
||||
logger.info(f"[dim]{key}[/dim]: {value}")
|
||||
|
||||
def shutdown(self) -> None:
|
||||
"""Shutdown the processor."""
|
||||
pass
|
||||
|
||||
def force_flush(self, timeout_millis: float | None = None) -> bool:
|
||||
"""Force flush any pending spans."""
|
||||
return True
|
|
@ -79,8 +79,10 @@ class TelemetryAdapter(Telemetry):
|
|||
metric_reader = PeriodicExportingMetricReader(OTLPMetricExporter())
|
||||
metric_provider = MeterProvider(metric_readers=[metric_reader])
|
||||
metrics.set_meter_provider(metric_provider)
|
||||
self.is_otel_endpoint_set = True
|
||||
else:
|
||||
logger.warning("OTEL_EXPORTER_OTLP_ENDPOINT is not set, skipping telemetry")
|
||||
self.is_otel_endpoint_set = False
|
||||
|
||||
self.meter = metrics.get_meter(__name__)
|
||||
self._lock = _global_lock
|
||||
|
@ -89,7 +91,8 @@ class TelemetryAdapter(Telemetry):
|
|||
pass
|
||||
|
||||
async def shutdown(self) -> None:
|
||||
trace.get_tracer_provider().force_flush()
|
||||
if self.is_otel_endpoint_set:
|
||||
trace.get_tracer_provider().force_flush()
|
||||
|
||||
async def log_event(self, event: Event, ttl_seconds: int = 604800) -> None:
|
||||
if isinstance(event, UnstructuredLogEvent):
|
||||
|
|
|
@ -1,5 +0,0 @@
|
|||
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
# All rights reserved.
|
||||
#
|
||||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
|
@ -1,19 +0,0 @@
|
|||
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
# All rights reserved.
|
||||
#
|
||||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
|
||||
from typing import Any
|
||||
|
||||
from llama_stack.providers.datatypes import Api
|
||||
|
||||
from .config import RagToolRuntimeConfig
|
||||
|
||||
|
||||
async def get_provider_impl(config: RagToolRuntimeConfig, deps: dict[Api, Any]):
|
||||
from .memory import MemoryToolRuntimeImpl
|
||||
|
||||
impl = MemoryToolRuntimeImpl(config, deps[Api.vector_io], deps[Api.inference], deps[Api.files])
|
||||
await impl.initialize()
|
||||
return impl
|
|
@ -1,15 +0,0 @@
|
|||
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
# All rights reserved.
|
||||
#
|
||||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
|
||||
from typing import Any
|
||||
|
||||
from pydantic import BaseModel
|
||||
|
||||
|
||||
class RagToolRuntimeConfig(BaseModel):
|
||||
@classmethod
|
||||
def sample_run_config(cls, __distro_dir__: str, **kwargs: Any) -> dict[str, Any]:
|
||||
return {}
|
|
@ -1,77 +0,0 @@
|
|||
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
# All rights reserved.
|
||||
#
|
||||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
|
||||
|
||||
from jinja2 import Template
|
||||
|
||||
from llama_stack.apis.common.content_types import InterleavedContent
|
||||
from llama_stack.apis.inference import OpenAIChatCompletionRequestWithExtraBody, OpenAIUserMessageParam
|
||||
from llama_stack.apis.tools.rag_tool import (
|
||||
DefaultRAGQueryGeneratorConfig,
|
||||
LLMRAGQueryGeneratorConfig,
|
||||
RAGQueryGenerator,
|
||||
RAGQueryGeneratorConfig,
|
||||
)
|
||||
from llama_stack.providers.utils.inference.prompt_adapter import (
|
||||
interleaved_content_as_str,
|
||||
)
|
||||
|
||||
|
||||
async def generate_rag_query(
|
||||
config: RAGQueryGeneratorConfig,
|
||||
content: InterleavedContent,
|
||||
**kwargs,
|
||||
):
|
||||
"""
|
||||
Generates a query that will be used for
|
||||
retrieving relevant information from the memory bank.
|
||||
"""
|
||||
if config.type == RAGQueryGenerator.default.value:
|
||||
query = await default_rag_query_generator(config, content, **kwargs)
|
||||
elif config.type == RAGQueryGenerator.llm.value:
|
||||
query = await llm_rag_query_generator(config, content, **kwargs)
|
||||
else:
|
||||
raise NotImplementedError(f"Unsupported memory query generator {config.type}")
|
||||
return query
|
||||
|
||||
|
||||
async def default_rag_query_generator(
|
||||
config: DefaultRAGQueryGeneratorConfig,
|
||||
content: InterleavedContent,
|
||||
**kwargs,
|
||||
):
|
||||
return interleaved_content_as_str(content, sep=config.separator)
|
||||
|
||||
|
||||
async def llm_rag_query_generator(
|
||||
config: LLMRAGQueryGeneratorConfig,
|
||||
content: InterleavedContent,
|
||||
**kwargs,
|
||||
):
|
||||
assert "inference_api" in kwargs, "LLMRAGQueryGenerator needs inference_api"
|
||||
inference_api = kwargs["inference_api"]
|
||||
|
||||
messages = []
|
||||
if isinstance(content, list):
|
||||
messages = [interleaved_content_as_str(m) for m in content]
|
||||
else:
|
||||
messages = [interleaved_content_as_str(content)]
|
||||
|
||||
template = Template(config.template)
|
||||
rendered_content: str = template.render({"messages": messages})
|
||||
|
||||
model = config.model
|
||||
message = OpenAIUserMessageParam(content=rendered_content)
|
||||
params = OpenAIChatCompletionRequestWithExtraBody(
|
||||
model=model,
|
||||
messages=[message],
|
||||
stream=False,
|
||||
)
|
||||
response = await inference_api.openai_chat_completion(params)
|
||||
|
||||
query = response.choices[0].message.content
|
||||
|
||||
return query
|
|
@ -1,332 +0,0 @@
|
|||
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
# All rights reserved.
|
||||
#
|
||||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
|
||||
import asyncio
|
||||
import base64
|
||||
import io
|
||||
import mimetypes
|
||||
from typing import Any
|
||||
|
||||
import httpx
|
||||
from fastapi import UploadFile
|
||||
from pydantic import TypeAdapter
|
||||
|
||||
from llama_stack.apis.common.content_types import (
|
||||
URL,
|
||||
InterleavedContent,
|
||||
InterleavedContentItem,
|
||||
TextContentItem,
|
||||
)
|
||||
from llama_stack.apis.files import Files, OpenAIFilePurpose
|
||||
from llama_stack.apis.inference import Inference
|
||||
from llama_stack.apis.tools import (
|
||||
ListToolDefsResponse,
|
||||
RAGDocument,
|
||||
RAGQueryConfig,
|
||||
RAGQueryResult,
|
||||
RAGToolRuntime,
|
||||
ToolDef,
|
||||
ToolGroup,
|
||||
ToolInvocationResult,
|
||||
ToolRuntime,
|
||||
)
|
||||
from llama_stack.apis.vector_io import (
|
||||
QueryChunksResponse,
|
||||
VectorIO,
|
||||
VectorStoreChunkingStrategyStatic,
|
||||
VectorStoreChunkingStrategyStaticConfig,
|
||||
)
|
||||
from llama_stack.log import get_logger
|
||||
from llama_stack.providers.datatypes import ToolGroupsProtocolPrivate
|
||||
from llama_stack.providers.utils.inference.prompt_adapter import interleaved_content_as_str
|
||||
from llama_stack.providers.utils.memory.vector_store import parse_data_url
|
||||
|
||||
from .config import RagToolRuntimeConfig
|
||||
from .context_retriever import generate_rag_query
|
||||
|
||||
log = get_logger(name=__name__, category="tool_runtime")
|
||||
|
||||
|
||||
async def raw_data_from_doc(doc: RAGDocument) -> tuple[bytes, str]:
|
||||
"""Get raw binary data and mime type from a RAGDocument for file upload."""
|
||||
if isinstance(doc.content, URL):
|
||||
if doc.content.uri.startswith("data:"):
|
||||
parts = parse_data_url(doc.content.uri)
|
||||
mime_type = parts["mimetype"]
|
||||
data = parts["data"]
|
||||
|
||||
if parts["is_base64"]:
|
||||
file_data = base64.b64decode(data)
|
||||
else:
|
||||
file_data = data.encode("utf-8")
|
||||
|
||||
return file_data, mime_type
|
||||
else:
|
||||
async with httpx.AsyncClient() as client:
|
||||
r = await client.get(doc.content.uri)
|
||||
r.raise_for_status()
|
||||
mime_type = r.headers.get("content-type", "application/octet-stream")
|
||||
return r.content, mime_type
|
||||
else:
|
||||
if isinstance(doc.content, str):
|
||||
content_str = doc.content
|
||||
else:
|
||||
content_str = interleaved_content_as_str(doc.content)
|
||||
|
||||
if content_str.startswith("data:"):
|
||||
parts = parse_data_url(content_str)
|
||||
mime_type = parts["mimetype"]
|
||||
data = parts["data"]
|
||||
|
||||
if parts["is_base64"]:
|
||||
file_data = base64.b64decode(data)
|
||||
else:
|
||||
file_data = data.encode("utf-8")
|
||||
|
||||
return file_data, mime_type
|
||||
else:
|
||||
return content_str.encode("utf-8"), "text/plain"
|
||||
|
||||
|
||||
class MemoryToolRuntimeImpl(ToolGroupsProtocolPrivate, ToolRuntime, RAGToolRuntime):
|
||||
def __init__(
|
||||
self,
|
||||
config: RagToolRuntimeConfig,
|
||||
vector_io_api: VectorIO,
|
||||
inference_api: Inference,
|
||||
files_api: Files,
|
||||
):
|
||||
self.config = config
|
||||
self.vector_io_api = vector_io_api
|
||||
self.inference_api = inference_api
|
||||
self.files_api = files_api
|
||||
|
||||
async def initialize(self):
|
||||
pass
|
||||
|
||||
async def shutdown(self):
|
||||
pass
|
||||
|
||||
async def register_toolgroup(self, toolgroup: ToolGroup) -> None:
|
||||
pass
|
||||
|
||||
async def unregister_toolgroup(self, toolgroup_id: str) -> None:
|
||||
return
|
||||
|
||||
async def insert(
|
||||
self,
|
||||
documents: list[RAGDocument],
|
||||
vector_db_id: str,
|
||||
chunk_size_in_tokens: int = 512,
|
||||
) -> None:
|
||||
if not documents:
|
||||
return
|
||||
|
||||
for doc in documents:
|
||||
try:
|
||||
try:
|
||||
file_data, mime_type = await raw_data_from_doc(doc)
|
||||
except Exception as e:
|
||||
log.error(f"Failed to extract content from document {doc.document_id}: {e}")
|
||||
continue
|
||||
|
||||
file_extension = mimetypes.guess_extension(mime_type) or ".txt"
|
||||
filename = doc.metadata.get("filename", f"{doc.document_id}{file_extension}")
|
||||
|
||||
file_obj = io.BytesIO(file_data)
|
||||
file_obj.name = filename
|
||||
|
||||
upload_file = UploadFile(file=file_obj, filename=filename)
|
||||
|
||||
try:
|
||||
created_file = await self.files_api.openai_upload_file(
|
||||
file=upload_file, purpose=OpenAIFilePurpose.ASSISTANTS
|
||||
)
|
||||
except Exception as e:
|
||||
log.error(f"Failed to upload file for document {doc.document_id}: {e}")
|
||||
continue
|
||||
|
||||
chunking_strategy = VectorStoreChunkingStrategyStatic(
|
||||
static=VectorStoreChunkingStrategyStaticConfig(
|
||||
max_chunk_size_tokens=chunk_size_in_tokens,
|
||||
chunk_overlap_tokens=chunk_size_in_tokens // 4,
|
||||
)
|
||||
)
|
||||
|
||||
try:
|
||||
await self.vector_io_api.openai_attach_file_to_vector_store(
|
||||
vector_store_id=vector_db_id,
|
||||
file_id=created_file.id,
|
||||
attributes=doc.metadata,
|
||||
chunking_strategy=chunking_strategy,
|
||||
)
|
||||
except Exception as e:
|
||||
log.error(
|
||||
f"Failed to attach file {created_file.id} to vector store {vector_db_id} for document {doc.document_id}: {e}"
|
||||
)
|
||||
continue
|
||||
|
||||
except Exception as e:
|
||||
log.error(f"Unexpected error processing document {doc.document_id}: {e}")
|
||||
continue
|
||||
|
||||
async def query(
|
||||
self,
|
||||
content: InterleavedContent,
|
||||
vector_db_ids: list[str],
|
||||
query_config: RAGQueryConfig | None = None,
|
||||
) -> RAGQueryResult:
|
||||
if not vector_db_ids:
|
||||
raise ValueError(
|
||||
"No vector DBs were provided to the knowledge search tool. Please provide at least one vector DB ID."
|
||||
)
|
||||
|
||||
query_config = query_config or RAGQueryConfig()
|
||||
query = await generate_rag_query(
|
||||
query_config.query_generator_config,
|
||||
content,
|
||||
inference_api=self.inference_api,
|
||||
)
|
||||
tasks = [
|
||||
self.vector_io_api.query_chunks(
|
||||
vector_db_id=vector_db_id,
|
||||
query=query,
|
||||
params={
|
||||
"mode": query_config.mode,
|
||||
"max_chunks": query_config.max_chunks,
|
||||
"score_threshold": 0.0,
|
||||
"ranker": query_config.ranker,
|
||||
},
|
||||
)
|
||||
for vector_db_id in vector_db_ids
|
||||
]
|
||||
results: list[QueryChunksResponse] = await asyncio.gather(*tasks)
|
||||
|
||||
chunks = []
|
||||
scores = []
|
||||
|
||||
for vector_db_id, result in zip(vector_db_ids, results, strict=False):
|
||||
for chunk, score in zip(result.chunks, result.scores, strict=False):
|
||||
if not hasattr(chunk, "metadata") or chunk.metadata is None:
|
||||
chunk.metadata = {}
|
||||
chunk.metadata["vector_db_id"] = vector_db_id
|
||||
|
||||
chunks.append(chunk)
|
||||
scores.append(score)
|
||||
|
||||
if not chunks:
|
||||
return RAGQueryResult(content=None)
|
||||
|
||||
# sort by score
|
||||
chunks, scores = zip(*sorted(zip(chunks, scores, strict=False), key=lambda x: x[1], reverse=True), strict=False) # type: ignore
|
||||
chunks = chunks[: query_config.max_chunks]
|
||||
|
||||
tokens = 0
|
||||
picked: list[InterleavedContentItem] = [
|
||||
TextContentItem(
|
||||
text=f"knowledge_search tool found {len(chunks)} chunks:\nBEGIN of knowledge_search tool results.\n"
|
||||
)
|
||||
]
|
||||
for i, chunk in enumerate(chunks):
|
||||
metadata = chunk.metadata
|
||||
tokens += metadata.get("token_count", 0)
|
||||
tokens += metadata.get("metadata_token_count", 0)
|
||||
|
||||
if tokens > query_config.max_tokens_in_context:
|
||||
log.error(
|
||||
f"Using {len(picked)} chunks; reached max tokens in context: {tokens}",
|
||||
)
|
||||
break
|
||||
|
||||
# Add useful keys from chunk_metadata to metadata and remove some from metadata
|
||||
chunk_metadata_keys_to_include_from_context = [
|
||||
"chunk_id",
|
||||
"document_id",
|
||||
"source",
|
||||
]
|
||||
metadata_keys_to_exclude_from_context = [
|
||||
"token_count",
|
||||
"metadata_token_count",
|
||||
"vector_db_id",
|
||||
]
|
||||
metadata_for_context = {}
|
||||
for k in chunk_metadata_keys_to_include_from_context:
|
||||
metadata_for_context[k] = getattr(chunk.chunk_metadata, k)
|
||||
for k in metadata:
|
||||
if k not in metadata_keys_to_exclude_from_context:
|
||||
metadata_for_context[k] = metadata[k]
|
||||
|
||||
text_content = query_config.chunk_template.format(index=i + 1, chunk=chunk, metadata=metadata_for_context)
|
||||
picked.append(TextContentItem(text=text_content))
|
||||
|
||||
picked.append(TextContentItem(text="END of knowledge_search tool results.\n"))
|
||||
picked.append(
|
||||
TextContentItem(
|
||||
text=f'The above results were retrieved to help answer the user\'s query: "{interleaved_content_as_str(content)}". Use them as supporting information only in answering this query.\n',
|
||||
)
|
||||
)
|
||||
|
||||
return RAGQueryResult(
|
||||
content=picked,
|
||||
metadata={
|
||||
"document_ids": [c.document_id for c in chunks[: len(picked)]],
|
||||
"chunks": [c.content for c in chunks[: len(picked)]],
|
||||
"scores": scores[: len(picked)],
|
||||
"vector_db_ids": [c.metadata["vector_db_id"] for c in chunks[: len(picked)]],
|
||||
},
|
||||
)
|
||||
|
||||
async def list_runtime_tools(
|
||||
self, tool_group_id: str | None = None, mcp_endpoint: URL | None = None
|
||||
) -> ListToolDefsResponse:
|
||||
# Parameters are not listed since these methods are not yet invoked automatically
|
||||
# by the LLM. The method is only implemented so things like /tools can list without
|
||||
# encountering fatals.
|
||||
return ListToolDefsResponse(
|
||||
data=[
|
||||
ToolDef(
|
||||
name="insert_into_memory",
|
||||
description="Insert documents into memory",
|
||||
),
|
||||
ToolDef(
|
||||
name="knowledge_search",
|
||||
description="Search for information in a database.",
|
||||
input_schema={
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"query": {
|
||||
"type": "string",
|
||||
"description": "The query to search for. Can be a natural language sentence or keywords.",
|
||||
}
|
||||
},
|
||||
"required": ["query"],
|
||||
},
|
||||
),
|
||||
]
|
||||
)
|
||||
|
||||
async def invoke_tool(self, tool_name: str, kwargs: dict[str, Any]) -> ToolInvocationResult:
|
||||
vector_db_ids = kwargs.get("vector_db_ids", [])
|
||||
query_config = kwargs.get("query_config")
|
||||
if query_config:
|
||||
query_config = TypeAdapter(RAGQueryConfig).validate_python(query_config)
|
||||
else:
|
||||
query_config = RAGQueryConfig()
|
||||
|
||||
query = kwargs["query"]
|
||||
result = await self.query(
|
||||
content=query,
|
||||
vector_db_ids=vector_db_ids,
|
||||
query_config=query_config,
|
||||
)
|
||||
|
||||
return ToolInvocationResult(
|
||||
content=result.content or [],
|
||||
metadata={
|
||||
**(result.metadata or {}),
|
||||
"citation_files": getattr(result, "citation_files", None),
|
||||
},
|
||||
)
|
|
@ -17,21 +17,21 @@ from numpy.typing import NDArray
|
|||
from llama_stack.apis.common.errors import VectorStoreNotFoundError
|
||||
from llama_stack.apis.files import Files
|
||||
from llama_stack.apis.inference import Inference, InterleavedContent
|
||||
from llama_stack.apis.vector_dbs import VectorDB
|
||||
from llama_stack.apis.vector_io import Chunk, QueryChunksResponse, VectorIO
|
||||
from llama_stack.apis.vector_stores import VectorStore
|
||||
from llama_stack.log import get_logger
|
||||
from llama_stack.providers.datatypes import HealthResponse, HealthStatus, VectorDBsProtocolPrivate
|
||||
from llama_stack.providers.datatypes import HealthResponse, HealthStatus, VectorStoresProtocolPrivate
|
||||
from llama_stack.providers.utils.kvstore import kvstore_impl
|
||||
from llama_stack.providers.utils.kvstore.api import KVStore
|
||||
from llama_stack.providers.utils.memory.openai_vector_store_mixin import OpenAIVectorStoreMixin
|
||||
from llama_stack.providers.utils.memory.vector_store import ChunkForDeletion, EmbeddingIndex, VectorDBWithIndex
|
||||
from llama_stack.providers.utils.memory.vector_store import ChunkForDeletion, EmbeddingIndex, VectorStoreWithIndex
|
||||
|
||||
from .config import FaissVectorIOConfig
|
||||
|
||||
logger = get_logger(name=__name__, category="vector_io")
|
||||
|
||||
VERSION = "v3"
|
||||
VECTOR_DBS_PREFIX = f"vector_dbs:{VERSION}::"
|
||||
VECTOR_DBS_PREFIX = f"vector_stores:{VERSION}::"
|
||||
FAISS_INDEX_PREFIX = f"faiss_index:{VERSION}::"
|
||||
OPENAI_VECTOR_STORES_PREFIX = f"openai_vector_stores:{VERSION}::"
|
||||
OPENAI_VECTOR_STORES_FILES_PREFIX = f"openai_vector_stores_files:{VERSION}::"
|
||||
|
@ -176,28 +176,28 @@ class FaissIndex(EmbeddingIndex):
|
|||
)
|
||||
|
||||
|
||||
class FaissVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorDBsProtocolPrivate):
|
||||
class FaissVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorStoresProtocolPrivate):
|
||||
def __init__(self, config: FaissVectorIOConfig, inference_api: Inference, files_api: Files | None) -> None:
|
||||
super().__init__(files_api=files_api, kvstore=None)
|
||||
self.config = config
|
||||
self.inference_api = inference_api
|
||||
self.cache: dict[str, VectorDBWithIndex] = {}
|
||||
self.cache: dict[str, VectorStoreWithIndex] = {}
|
||||
|
||||
async def initialize(self) -> None:
|
||||
self.kvstore = await kvstore_impl(self.config.persistence)
|
||||
# Load existing banks from kvstore
|
||||
start_key = VECTOR_DBS_PREFIX
|
||||
end_key = f"{VECTOR_DBS_PREFIX}\xff"
|
||||
stored_vector_dbs = await self.kvstore.values_in_range(start_key, end_key)
|
||||
stored_vector_stores = await self.kvstore.values_in_range(start_key, end_key)
|
||||
|
||||
for vector_db_data in stored_vector_dbs:
|
||||
vector_db = VectorDB.model_validate_json(vector_db_data)
|
||||
index = VectorDBWithIndex(
|
||||
vector_db,
|
||||
await FaissIndex.create(vector_db.embedding_dimension, self.kvstore, vector_db.identifier),
|
||||
for vector_store_data in stored_vector_stores:
|
||||
vector_store = VectorStore.model_validate_json(vector_store_data)
|
||||
index = VectorStoreWithIndex(
|
||||
vector_store,
|
||||
await FaissIndex.create(vector_store.embedding_dimension, self.kvstore, vector_store.identifier),
|
||||
self.inference_api,
|
||||
)
|
||||
self.cache[vector_db.identifier] = index
|
||||
self.cache[vector_store.identifier] = index
|
||||
|
||||
# Load existing OpenAI vector stores into the in-memory cache
|
||||
await self.initialize_openai_vector_stores()
|
||||
|
@ -222,32 +222,31 @@ class FaissVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorDBsProtocolPr
|
|||
except Exception as e:
|
||||
return HealthResponse(status=HealthStatus.ERROR, message=f"Health check failed: {str(e)}")
|
||||
|
||||
async def register_vector_db(self, vector_db: VectorDB) -> None:
|
||||
async def register_vector_store(self, vector_store: VectorStore) -> None:
|
||||
assert self.kvstore is not None
|
||||
|
||||
key = f"{VECTOR_DBS_PREFIX}{vector_db.identifier}"
|
||||
await self.kvstore.set(key=key, value=vector_db.model_dump_json())
|
||||
key = f"{VECTOR_DBS_PREFIX}{vector_store.identifier}"
|
||||
await self.kvstore.set(key=key, value=vector_store.model_dump_json())
|
||||
|
||||
# Store in cache
|
||||
self.cache[vector_db.identifier] = VectorDBWithIndex(
|
||||
vector_db=vector_db,
|
||||
index=await FaissIndex.create(vector_db.embedding_dimension, self.kvstore, vector_db.identifier),
|
||||
self.cache[vector_store.identifier] = VectorStoreWithIndex(
|
||||
vector_store=vector_store,
|
||||
index=await FaissIndex.create(vector_store.embedding_dimension, self.kvstore, vector_store.identifier),
|
||||
inference_api=self.inference_api,
|
||||
)
|
||||
|
||||
async def list_vector_dbs(self) -> list[VectorDB]:
|
||||
return [i.vector_db for i in self.cache.values()]
|
||||
async def list_vector_stores(self) -> list[VectorStore]:
|
||||
return [i.vector_store for i in self.cache.values()]
|
||||
|
||||
async def unregister_vector_db(self, vector_db_id: str) -> None:
|
||||
async def unregister_vector_store(self, vector_store_id: str) -> None:
|
||||
assert self.kvstore is not None
|
||||
|
||||
if vector_db_id not in self.cache:
|
||||
logger.warning(f"Vector DB {vector_db_id} not found")
|
||||
if vector_store_id not in self.cache:
|
||||
return
|
||||
|
||||
await self.cache[vector_db_id].index.delete()
|
||||
del self.cache[vector_db_id]
|
||||
await self.kvstore.delete(f"{VECTOR_DBS_PREFIX}{vector_db_id}")
|
||||
await self.cache[vector_store_id].index.delete()
|
||||
del self.cache[vector_store_id]
|
||||
await self.kvstore.delete(f"{VECTOR_DBS_PREFIX}{vector_store_id}")
|
||||
|
||||
async def insert_chunks(self, vector_db_id: str, chunks: list[Chunk], ttl_seconds: int | None = None) -> None:
|
||||
index = self.cache.get(vector_db_id)
|
||||
|
|
|
@ -17,10 +17,10 @@ from numpy.typing import NDArray
|
|||
from llama_stack.apis.common.errors import VectorStoreNotFoundError
|
||||
from llama_stack.apis.files import Files
|
||||
from llama_stack.apis.inference import Inference
|
||||
from llama_stack.apis.vector_dbs import VectorDB
|
||||
from llama_stack.apis.vector_io import Chunk, QueryChunksResponse, VectorIO
|
||||
from llama_stack.apis.vector_stores import VectorStore
|
||||
from llama_stack.log import get_logger
|
||||
from llama_stack.providers.datatypes import VectorDBsProtocolPrivate
|
||||
from llama_stack.providers.datatypes import VectorStoresProtocolPrivate
|
||||
from llama_stack.providers.utils.kvstore import kvstore_impl
|
||||
from llama_stack.providers.utils.kvstore.api import KVStore
|
||||
from llama_stack.providers.utils.memory.openai_vector_store_mixin import OpenAIVectorStoreMixin
|
||||
|
@ -28,7 +28,7 @@ from llama_stack.providers.utils.memory.vector_store import (
|
|||
RERANKER_TYPE_RRF,
|
||||
ChunkForDeletion,
|
||||
EmbeddingIndex,
|
||||
VectorDBWithIndex,
|
||||
VectorStoreWithIndex,
|
||||
)
|
||||
from llama_stack.providers.utils.vector_io.vector_utils import WeightedInMemoryAggregator
|
||||
|
||||
|
@ -41,7 +41,7 @@ HYBRID_SEARCH = "hybrid"
|
|||
SEARCH_MODES = {VECTOR_SEARCH, KEYWORD_SEARCH, HYBRID_SEARCH}
|
||||
|
||||
VERSION = "v3"
|
||||
VECTOR_DBS_PREFIX = f"vector_dbs:sqlite_vec:{VERSION}::"
|
||||
VECTOR_DBS_PREFIX = f"vector_stores:sqlite_vec:{VERSION}::"
|
||||
VECTOR_INDEX_PREFIX = f"vector_index:sqlite_vec:{VERSION}::"
|
||||
OPENAI_VECTOR_STORES_PREFIX = f"openai_vector_stores:sqlite_vec:{VERSION}::"
|
||||
OPENAI_VECTOR_STORES_FILES_PREFIX = f"openai_vector_stores_files:sqlite_vec:{VERSION}::"
|
||||
|
@ -374,32 +374,32 @@ class SQLiteVecIndex(EmbeddingIndex):
|
|||
await asyncio.to_thread(_delete_chunks)
|
||||
|
||||
|
||||
class SQLiteVecVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorDBsProtocolPrivate):
|
||||
class SQLiteVecVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorStoresProtocolPrivate):
|
||||
"""
|
||||
A VectorIO implementation using SQLite + sqlite_vec.
|
||||
This class handles vector database registration (with metadata stored in a table named `vector_dbs`)
|
||||
and creates a cache of VectorDBWithIndex instances (each wrapping a SQLiteVecIndex).
|
||||
This class handles vector database registration (with metadata stored in a table named `vector_stores`)
|
||||
and creates a cache of VectorStoreWithIndex instances (each wrapping a SQLiteVecIndex).
|
||||
"""
|
||||
|
||||
def __init__(self, config, inference_api: Inference, files_api: Files | None) -> None:
|
||||
super().__init__(files_api=files_api, kvstore=None)
|
||||
self.config = config
|
||||
self.inference_api = inference_api
|
||||
self.cache: dict[str, VectorDBWithIndex] = {}
|
||||
self.vector_db_store = None
|
||||
self.cache: dict[str, VectorStoreWithIndex] = {}
|
||||
self.vector_store_table = None
|
||||
|
||||
async def initialize(self) -> None:
|
||||
self.kvstore = await kvstore_impl(self.config.persistence)
|
||||
|
||||
start_key = VECTOR_DBS_PREFIX
|
||||
end_key = f"{VECTOR_DBS_PREFIX}\xff"
|
||||
stored_vector_dbs = await self.kvstore.values_in_range(start_key, end_key)
|
||||
for db_json in stored_vector_dbs:
|
||||
vector_db = VectorDB.model_validate_json(db_json)
|
||||
stored_vector_stores = await self.kvstore.values_in_range(start_key, end_key)
|
||||
for db_json in stored_vector_stores:
|
||||
vector_store = VectorStore.model_validate_json(db_json)
|
||||
index = await SQLiteVecIndex.create(
|
||||
vector_db.embedding_dimension, self.config.db_path, vector_db.identifier
|
||||
vector_store.embedding_dimension, self.config.db_path, vector_store.identifier
|
||||
)
|
||||
self.cache[vector_db.identifier] = VectorDBWithIndex(vector_db, index, self.inference_api)
|
||||
self.cache[vector_store.identifier] = VectorStoreWithIndex(vector_store, index, self.inference_api)
|
||||
|
||||
# Load existing OpenAI vector stores into the in-memory cache
|
||||
await self.initialize_openai_vector_stores()
|
||||
|
@ -408,63 +408,64 @@ class SQLiteVecVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorDBsProtoc
|
|||
# Clean up mixin resources (file batch tasks)
|
||||
await super().shutdown()
|
||||
|
||||
async def list_vector_dbs(self) -> list[VectorDB]:
|
||||
return [v.vector_db for v in self.cache.values()]
|
||||
async def list_vector_stores(self) -> list[VectorStore]:
|
||||
return [v.vector_store for v in self.cache.values()]
|
||||
|
||||
async def register_vector_db(self, vector_db: VectorDB) -> None:
|
||||
index = await SQLiteVecIndex.create(vector_db.embedding_dimension, self.config.db_path, vector_db.identifier)
|
||||
self.cache[vector_db.identifier] = VectorDBWithIndex(vector_db, index, self.inference_api)
|
||||
async def register_vector_store(self, vector_store: VectorStore) -> None:
|
||||
index = await SQLiteVecIndex.create(
|
||||
vector_store.embedding_dimension, self.config.db_path, vector_store.identifier
|
||||
)
|
||||
self.cache[vector_store.identifier] = VectorStoreWithIndex(vector_store, index, self.inference_api)
|
||||
|
||||
async def _get_and_cache_vector_db_index(self, vector_db_id: str) -> VectorDBWithIndex | None:
|
||||
if vector_db_id in self.cache:
|
||||
return self.cache[vector_db_id]
|
||||
async def _get_and_cache_vector_store_index(self, vector_store_id: str) -> VectorStoreWithIndex | None:
|
||||
if vector_store_id in self.cache:
|
||||
return self.cache[vector_store_id]
|
||||
|
||||
if self.vector_db_store is None:
|
||||
raise VectorStoreNotFoundError(vector_db_id)
|
||||
if self.vector_store_table is None:
|
||||
raise VectorStoreNotFoundError(vector_store_id)
|
||||
|
||||
vector_db = self.vector_db_store.get_vector_db(vector_db_id)
|
||||
if not vector_db:
|
||||
raise VectorStoreNotFoundError(vector_db_id)
|
||||
vector_store = self.vector_store_table.get_vector_store(vector_store_id)
|
||||
if not vector_store:
|
||||
raise VectorStoreNotFoundError(vector_store_id)
|
||||
|
||||
index = VectorDBWithIndex(
|
||||
vector_db=vector_db,
|
||||
index = VectorStoreWithIndex(
|
||||
vector_store=vector_store,
|
||||
index=SQLiteVecIndex(
|
||||
dimension=vector_db.embedding_dimension,
|
||||
dimension=vector_store.embedding_dimension,
|
||||
db_path=self.config.db_path,
|
||||
bank_id=vector_db.identifier,
|
||||
bank_id=vector_store.identifier,
|
||||
kvstore=self.kvstore,
|
||||
),
|
||||
inference_api=self.inference_api,
|
||||
)
|
||||
self.cache[vector_db_id] = index
|
||||
self.cache[vector_store_id] = index
|
||||
return index
|
||||
|
||||
async def unregister_vector_db(self, vector_db_id: str) -> None:
|
||||
if vector_db_id not in self.cache:
|
||||
logger.warning(f"Vector DB {vector_db_id} not found")
|
||||
async def unregister_vector_store(self, vector_store_id: str) -> None:
|
||||
if vector_store_id not in self.cache:
|
||||
return
|
||||
await self.cache[vector_db_id].index.delete()
|
||||
del self.cache[vector_db_id]
|
||||
await self.cache[vector_store_id].index.delete()
|
||||
del self.cache[vector_store_id]
|
||||
|
||||
async def insert_chunks(self, vector_db_id: str, chunks: list[Chunk], ttl_seconds: int | None = None) -> None:
|
||||
index = await self._get_and_cache_vector_db_index(vector_db_id)
|
||||
index = await self._get_and_cache_vector_store_index(vector_db_id)
|
||||
if not index:
|
||||
raise VectorStoreNotFoundError(vector_db_id)
|
||||
# The VectorDBWithIndex helper is expected to compute embeddings via the inference_api
|
||||
# The VectorStoreWithIndex helper is expected to compute embeddings via the inference_api
|
||||
# and then call our index's add_chunks.
|
||||
await index.insert_chunks(chunks)
|
||||
|
||||
async def query_chunks(
|
||||
self, vector_db_id: str, query: Any, params: dict[str, Any] | None = None
|
||||
) -> QueryChunksResponse:
|
||||
index = await self._get_and_cache_vector_db_index(vector_db_id)
|
||||
index = await self._get_and_cache_vector_store_index(vector_db_id)
|
||||
if not index:
|
||||
raise VectorStoreNotFoundError(vector_db_id)
|
||||
return await index.query_chunks(query, params)
|
||||
|
||||
async def delete_chunks(self, store_id: str, chunks_for_deletion: list[ChunkForDeletion]) -> None:
|
||||
"""Delete chunks from a sqlite_vec index."""
|
||||
index = await self._get_and_cache_vector_db_index(store_id)
|
||||
index = await self._get_and_cache_vector_store_index(store_id)
|
||||
if not index:
|
||||
raise VectorStoreNotFoundError(store_id)
|
||||
|
||||
|
|
|
@ -42,6 +42,7 @@ def available_providers() -> list[ProviderSpec]:
|
|||
# CrossEncoder depends on torchao.quantization
|
||||
pip_packages=[
|
||||
"torch torchvision torchao>=0.12.0 --extra-index-url https://download.pytorch.org/whl/cpu",
|
||||
"numpy tqdm transformers",
|
||||
"sentence-transformers --no-deps",
|
||||
# required by some SentenceTransformers architectures for tensor rearrange/merge ops
|
||||
"einops",
|
||||
|
|
|
@ -7,33 +7,13 @@
|
|||
|
||||
from llama_stack.providers.datatypes import (
|
||||
Api,
|
||||
InlineProviderSpec,
|
||||
ProviderSpec,
|
||||
RemoteProviderSpec,
|
||||
)
|
||||
from llama_stack.providers.registry.vector_io import DEFAULT_VECTOR_IO_DEPS
|
||||
|
||||
|
||||
def available_providers() -> list[ProviderSpec]:
|
||||
return [
|
||||
InlineProviderSpec(
|
||||
api=Api.tool_runtime,
|
||||
provider_type="inline::rag-runtime",
|
||||
pip_packages=DEFAULT_VECTOR_IO_DEPS
|
||||
+ [
|
||||
"tqdm",
|
||||
"numpy",
|
||||
"scikit-learn",
|
||||
"scipy",
|
||||
"nltk",
|
||||
"sentencepiece",
|
||||
"transformers",
|
||||
],
|
||||
module="llama_stack.providers.inline.tool_runtime.rag",
|
||||
config_class="llama_stack.providers.inline.tool_runtime.rag.config.RagToolRuntimeConfig",
|
||||
api_dependencies=[Api.vector_io, Api.inference, Api.files],
|
||||
description="RAG (Retrieval-Augmented Generation) tool runtime for document ingestion, chunking, and semantic search.",
|
||||
),
|
||||
RemoteProviderSpec(
|
||||
api=Api.tool_runtime,
|
||||
adapter_type="brave-search",
|
||||
|
|
|
@ -119,7 +119,7 @@ Datasets that can fit in memory, frequent reads | Faiss | Optimized for speed, i
|
|||
#### Empirical Example
|
||||
|
||||
Consider the histogram below in which 10,000 randomly generated strings were inserted
|
||||
in batches of 100 into both Faiss and sqlite-vec using `client.tool_runtime.rag_tool.insert()`.
|
||||
in batches of 100 into both Faiss and sqlite-vec.
|
||||
|
||||
```{image} ../../../../_static/providers/vector_io/write_time_comparison_sqlite-vec-faiss.png
|
||||
:alt: Comparison of SQLite-Vec and Faiss write times
|
||||
|
|
|
@ -20,7 +20,7 @@ This provider enables dataset management using NVIDIA's NeMo Customizer service.
|
|||
Build the NVIDIA environment:
|
||||
|
||||
```bash
|
||||
llama stack build --distro nvidia --image-type venv
|
||||
uv run llama stack list-deps nvidia | xargs -L1 uv pip install
|
||||
```
|
||||
|
||||
### Basic Usage using the LlamaStack Python Client
|
||||
|
|
|
@ -18,7 +18,7 @@ This provider enables running inference using NVIDIA NIM.
|
|||
Build the NVIDIA environment:
|
||||
|
||||
```bash
|
||||
llama stack build --distro nvidia --image-type venv
|
||||
uv run llama stack list-deps nvidia | xargs -L1 uv pip install
|
||||
```
|
||||
|
||||
### Basic Usage using the LlamaStack Python Client
|
||||
|
|
|
@ -10,7 +10,7 @@ from .config import NVIDIAConfig
|
|||
|
||||
|
||||
async def get_adapter_impl(config: NVIDIAConfig, _deps) -> Inference:
|
||||
# import dynamically so `llama stack build` does not fail due to missing dependencies
|
||||
# import dynamically so `llama stack list-deps` does not fail due to missing dependencies
|
||||
from .nvidia import NVIDIAInferenceAdapter
|
||||
|
||||
if not isinstance(config, NVIDIAConfig):
|
||||
|
|
|
@ -22,7 +22,7 @@ This provider enables fine-tuning of LLMs using NVIDIA's NeMo Customizer service
|
|||
Build the NVIDIA environment:
|
||||
|
||||
```bash
|
||||
llama stack build --distro nvidia --image-type venv
|
||||
uv run llama stack list-deps nvidia | xargs -L1 uv pip install
|
||||
```
|
||||
|
||||
### Basic Usage using the LlamaStack Python Client
|
||||
|
|
|
@ -19,7 +19,7 @@ This provider enables safety checks and guardrails for LLM interactions using NV
|
|||
Build the NVIDIA environment:
|
||||
|
||||
```bash
|
||||
llama stack build --distro nvidia --image-type venv
|
||||
uv run llama stack list-deps nvidia | xargs -L1 uv pip install
|
||||
```
|
||||
|
||||
### Basic Usage using the LlamaStack Python Client
|
||||
|
|
|
@ -13,15 +13,15 @@ from numpy.typing import NDArray
|
|||
|
||||
from llama_stack.apis.files import Files
|
||||
from llama_stack.apis.inference import Inference, InterleavedContent
|
||||
from llama_stack.apis.vector_dbs import VectorDB
|
||||
from llama_stack.apis.vector_io import Chunk, QueryChunksResponse, VectorIO
|
||||
from llama_stack.apis.vector_stores import VectorStore
|
||||
from llama_stack.log import get_logger
|
||||
from llama_stack.providers.datatypes import VectorDBsProtocolPrivate
|
||||
from llama_stack.providers.datatypes import VectorStoresProtocolPrivate
|
||||
from llama_stack.providers.inline.vector_io.chroma import ChromaVectorIOConfig as InlineChromaVectorIOConfig
|
||||
from llama_stack.providers.utils.kvstore import kvstore_impl
|
||||
from llama_stack.providers.utils.kvstore.api import KVStore
|
||||
from llama_stack.providers.utils.memory.openai_vector_store_mixin import OpenAIVectorStoreMixin
|
||||
from llama_stack.providers.utils.memory.vector_store import ChunkForDeletion, EmbeddingIndex, VectorDBWithIndex
|
||||
from llama_stack.providers.utils.memory.vector_store import ChunkForDeletion, EmbeddingIndex, VectorStoreWithIndex
|
||||
|
||||
from .config import ChromaVectorIOConfig as RemoteChromaVectorIOConfig
|
||||
|
||||
|
@ -30,7 +30,7 @@ log = get_logger(name=__name__, category="vector_io::chroma")
|
|||
ChromaClientType = chromadb.api.AsyncClientAPI | chromadb.api.ClientAPI
|
||||
|
||||
VERSION = "v3"
|
||||
VECTOR_DBS_PREFIX = f"vector_dbs:chroma:{VERSION}::"
|
||||
VECTOR_DBS_PREFIX = f"vector_stores:chroma:{VERSION}::"
|
||||
VECTOR_INDEX_PREFIX = f"vector_index:chroma:{VERSION}::"
|
||||
OPENAI_VECTOR_STORES_PREFIX = f"openai_vector_stores:chroma:{VERSION}::"
|
||||
OPENAI_VECTOR_STORES_FILES_PREFIX = f"openai_vector_stores_files:chroma:{VERSION}::"
|
||||
|
@ -114,7 +114,7 @@ class ChromaIndex(EmbeddingIndex):
|
|||
raise NotImplementedError("Hybrid search is not supported in Chroma")
|
||||
|
||||
|
||||
class ChromaVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorDBsProtocolPrivate):
|
||||
class ChromaVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorStoresProtocolPrivate):
|
||||
def __init__(
|
||||
self,
|
||||
config: RemoteChromaVectorIOConfig | InlineChromaVectorIOConfig,
|
||||
|
@ -127,11 +127,11 @@ class ChromaVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorDBsProtocolP
|
|||
self.inference_api = inference_api
|
||||
self.client = None
|
||||
self.cache = {}
|
||||
self.vector_db_store = None
|
||||
self.vector_store_table = None
|
||||
|
||||
async def initialize(self) -> None:
|
||||
self.kvstore = await kvstore_impl(self.config.persistence)
|
||||
self.vector_db_store = self.kvstore
|
||||
self.vector_store_table = self.kvstore
|
||||
|
||||
if isinstance(self.config, RemoteChromaVectorIOConfig):
|
||||
log.info(f"Connecting to Chroma server at: {self.config.url}")
|
||||
|
@ -151,26 +151,26 @@ class ChromaVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorDBsProtocolP
|
|||
# Clean up mixin resources (file batch tasks)
|
||||
await super().shutdown()
|
||||
|
||||
async def register_vector_db(self, vector_db: VectorDB) -> None:
|
||||
async def register_vector_store(self, vector_store: VectorStore) -> None:
|
||||
collection = await maybe_await(
|
||||
self.client.get_or_create_collection(
|
||||
name=vector_db.identifier, metadata={"vector_db": vector_db.model_dump_json()}
|
||||
name=vector_store.identifier, metadata={"vector_store": vector_store.model_dump_json()}
|
||||
)
|
||||
)
|
||||
self.cache[vector_db.identifier] = VectorDBWithIndex(
|
||||
vector_db, ChromaIndex(self.client, collection), self.inference_api
|
||||
self.cache[vector_store.identifier] = VectorStoreWithIndex(
|
||||
vector_store, ChromaIndex(self.client, collection), self.inference_api
|
||||
)
|
||||
|
||||
async def unregister_vector_db(self, vector_db_id: str) -> None:
|
||||
if vector_db_id not in self.cache:
|
||||
log.warning(f"Vector DB {vector_db_id} not found")
|
||||
async def unregister_vector_store(self, vector_store_id: str) -> None:
|
||||
if vector_store_id not in self.cache:
|
||||
log.warning(f"Vector DB {vector_store_id} not found")
|
||||
return
|
||||
|
||||
await self.cache[vector_db_id].index.delete()
|
||||
del self.cache[vector_db_id]
|
||||
await self.cache[vector_store_id].index.delete()
|
||||
del self.cache[vector_store_id]
|
||||
|
||||
async def insert_chunks(self, vector_db_id: str, chunks: list[Chunk], ttl_seconds: int | None = None) -> None:
|
||||
index = await self._get_and_cache_vector_db_index(vector_db_id)
|
||||
index = await self._get_and_cache_vector_store_index(vector_db_id)
|
||||
if index is None:
|
||||
raise ValueError(f"Vector DB {vector_db_id} not found in Chroma")
|
||||
|
||||
|
@ -179,30 +179,30 @@ class ChromaVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorDBsProtocolP
|
|||
async def query_chunks(
|
||||
self, vector_db_id: str, query: InterleavedContent, params: dict[str, Any] | None = None
|
||||
) -> QueryChunksResponse:
|
||||
index = await self._get_and_cache_vector_db_index(vector_db_id)
|
||||
index = await self._get_and_cache_vector_store_index(vector_db_id)
|
||||
|
||||
if index is None:
|
||||
raise ValueError(f"Vector DB {vector_db_id} not found in Chroma")
|
||||
|
||||
return await index.query_chunks(query, params)
|
||||
|
||||
async def _get_and_cache_vector_db_index(self, vector_db_id: str) -> VectorDBWithIndex:
|
||||
if vector_db_id in self.cache:
|
||||
return self.cache[vector_db_id]
|
||||
async def _get_and_cache_vector_store_index(self, vector_store_id: str) -> VectorStoreWithIndex:
|
||||
if vector_store_id in self.cache:
|
||||
return self.cache[vector_store_id]
|
||||
|
||||
vector_db = await self.vector_db_store.get_vector_db(vector_db_id)
|
||||
if not vector_db:
|
||||
raise ValueError(f"Vector DB {vector_db_id} not found in Llama Stack")
|
||||
collection = await maybe_await(self.client.get_collection(vector_db_id))
|
||||
vector_store = await self.vector_store_table.get_vector_store(vector_store_id)
|
||||
if not vector_store:
|
||||
raise ValueError(f"Vector DB {vector_store_id} not found in Llama Stack")
|
||||
collection = await maybe_await(self.client.get_collection(vector_store_id))
|
||||
if not collection:
|
||||
raise ValueError(f"Vector DB {vector_db_id} not found in Chroma")
|
||||
index = VectorDBWithIndex(vector_db, ChromaIndex(self.client, collection), self.inference_api)
|
||||
self.cache[vector_db_id] = index
|
||||
raise ValueError(f"Vector DB {vector_store_id} not found in Chroma")
|
||||
index = VectorStoreWithIndex(vector_store, ChromaIndex(self.client, collection), self.inference_api)
|
||||
self.cache[vector_store_id] = index
|
||||
return index
|
||||
|
||||
async def delete_chunks(self, store_id: str, chunks_for_deletion: list[ChunkForDeletion]) -> None:
|
||||
"""Delete chunks from a Chroma vector store."""
|
||||
index = await self._get_and_cache_vector_db_index(store_id)
|
||||
index = await self._get_and_cache_vector_store_index(store_id)
|
||||
if not index:
|
||||
raise ValueError(f"Vector DB {store_id} not found")
|
||||
|
||||
|
|
|
@ -14,10 +14,10 @@ from pymilvus import AnnSearchRequest, DataType, Function, FunctionType, MilvusC
|
|||
from llama_stack.apis.common.errors import VectorStoreNotFoundError
|
||||
from llama_stack.apis.files import Files
|
||||
from llama_stack.apis.inference import Inference, InterleavedContent
|
||||
from llama_stack.apis.vector_dbs import VectorDB
|
||||
from llama_stack.apis.vector_io import Chunk, QueryChunksResponse, VectorIO
|
||||
from llama_stack.apis.vector_stores import VectorStore
|
||||
from llama_stack.log import get_logger
|
||||
from llama_stack.providers.datatypes import VectorDBsProtocolPrivate
|
||||
from llama_stack.providers.datatypes import VectorStoresProtocolPrivate
|
||||
from llama_stack.providers.inline.vector_io.milvus import MilvusVectorIOConfig as InlineMilvusVectorIOConfig
|
||||
from llama_stack.providers.utils.kvstore import kvstore_impl
|
||||
from llama_stack.providers.utils.kvstore.api import KVStore
|
||||
|
@ -26,7 +26,7 @@ from llama_stack.providers.utils.memory.vector_store import (
|
|||
RERANKER_TYPE_WEIGHTED,
|
||||
ChunkForDeletion,
|
||||
EmbeddingIndex,
|
||||
VectorDBWithIndex,
|
||||
VectorStoreWithIndex,
|
||||
)
|
||||
from llama_stack.providers.utils.vector_io.vector_utils import sanitize_collection_name
|
||||
|
||||
|
@ -35,7 +35,7 @@ from .config import MilvusVectorIOConfig as RemoteMilvusVectorIOConfig
|
|||
logger = get_logger(name=__name__, category="vector_io::milvus")
|
||||
|
||||
VERSION = "v3"
|
||||
VECTOR_DBS_PREFIX = f"vector_dbs:milvus:{VERSION}::"
|
||||
VECTOR_DBS_PREFIX = f"vector_stores:milvus:{VERSION}::"
|
||||
VECTOR_INDEX_PREFIX = f"vector_index:milvus:{VERSION}::"
|
||||
OPENAI_VECTOR_STORES_PREFIX = f"openai_vector_stores:milvus:{VERSION}::"
|
||||
OPENAI_VECTOR_STORES_FILES_PREFIX = f"openai_vector_stores_files:milvus:{VERSION}::"
|
||||
|
@ -261,7 +261,7 @@ class MilvusIndex(EmbeddingIndex):
|
|||
raise
|
||||
|
||||
|
||||
class MilvusVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorDBsProtocolPrivate):
|
||||
class MilvusVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorStoresProtocolPrivate):
|
||||
def __init__(
|
||||
self,
|
||||
config: RemoteMilvusVectorIOConfig | InlineMilvusVectorIOConfig,
|
||||
|
@ -273,28 +273,28 @@ class MilvusVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorDBsProtocolP
|
|||
self.cache = {}
|
||||
self.client = None
|
||||
self.inference_api = inference_api
|
||||
self.vector_db_store = None
|
||||
self.vector_store_table = None
|
||||
self.metadata_collection_name = "openai_vector_stores_metadata"
|
||||
|
||||
async def initialize(self) -> None:
|
||||
self.kvstore = await kvstore_impl(self.config.persistence)
|
||||
start_key = VECTOR_DBS_PREFIX
|
||||
end_key = f"{VECTOR_DBS_PREFIX}\xff"
|
||||
stored_vector_dbs = await self.kvstore.values_in_range(start_key, end_key)
|
||||
stored_vector_stores = await self.kvstore.values_in_range(start_key, end_key)
|
||||
|
||||
for vector_db_data in stored_vector_dbs:
|
||||
vector_db = VectorDB.model_validate_json(vector_db_data)
|
||||
index = VectorDBWithIndex(
|
||||
vector_db,
|
||||
for vector_store_data in stored_vector_stores:
|
||||
vector_store = VectorStore.model_validate_json(vector_store_data)
|
||||
index = VectorStoreWithIndex(
|
||||
vector_store,
|
||||
index=MilvusIndex(
|
||||
client=self.client,
|
||||
collection_name=vector_db.identifier,
|
||||
collection_name=vector_store.identifier,
|
||||
consistency_level=self.config.consistency_level,
|
||||
kvstore=self.kvstore,
|
||||
),
|
||||
inference_api=self.inference_api,
|
||||
)
|
||||
self.cache[vector_db.identifier] = index
|
||||
self.cache[vector_store.identifier] = index
|
||||
if isinstance(self.config, RemoteMilvusVectorIOConfig):
|
||||
logger.info(f"Connecting to Milvus server at {self.config.uri}")
|
||||
self.client = MilvusClient(**self.config.model_dump(exclude_none=True))
|
||||
|
@ -311,45 +311,45 @@ class MilvusVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorDBsProtocolP
|
|||
# Clean up mixin resources (file batch tasks)
|
||||
await super().shutdown()
|
||||
|
||||
async def register_vector_db(self, vector_db: VectorDB) -> None:
|
||||
async def register_vector_store(self, vector_store: VectorStore) -> None:
|
||||
if isinstance(self.config, RemoteMilvusVectorIOConfig):
|
||||
consistency_level = self.config.consistency_level
|
||||
else:
|
||||
consistency_level = "Strong"
|
||||
index = VectorDBWithIndex(
|
||||
vector_db=vector_db,
|
||||
index=MilvusIndex(self.client, vector_db.identifier, consistency_level=consistency_level),
|
||||
index = VectorStoreWithIndex(
|
||||
vector_store=vector_store,
|
||||
index=MilvusIndex(self.client, vector_store.identifier, consistency_level=consistency_level),
|
||||
inference_api=self.inference_api,
|
||||
)
|
||||
|
||||
self.cache[vector_db.identifier] = index
|
||||
self.cache[vector_store.identifier] = index
|
||||
|
||||
async def _get_and_cache_vector_db_index(self, vector_db_id: str) -> VectorDBWithIndex | None:
|
||||
if vector_db_id in self.cache:
|
||||
return self.cache[vector_db_id]
|
||||
async def _get_and_cache_vector_store_index(self, vector_store_id: str) -> VectorStoreWithIndex | None:
|
||||
if vector_store_id in self.cache:
|
||||
return self.cache[vector_store_id]
|
||||
|
||||
if self.vector_db_store is None:
|
||||
raise VectorStoreNotFoundError(vector_db_id)
|
||||
if self.vector_store_table is None:
|
||||
raise VectorStoreNotFoundError(vector_store_id)
|
||||
|
||||
vector_db = await self.vector_db_store.get_vector_db(vector_db_id)
|
||||
if not vector_db:
|
||||
raise VectorStoreNotFoundError(vector_db_id)
|
||||
vector_store = await self.vector_store_table.get_vector_store(vector_store_id)
|
||||
if not vector_store:
|
||||
raise VectorStoreNotFoundError(vector_store_id)
|
||||
|
||||
index = VectorDBWithIndex(
|
||||
vector_db=vector_db,
|
||||
index=MilvusIndex(client=self.client, collection_name=vector_db.identifier, kvstore=self.kvstore),
|
||||
index = VectorStoreWithIndex(
|
||||
vector_store=vector_store,
|
||||
index=MilvusIndex(client=self.client, collection_name=vector_store.identifier, kvstore=self.kvstore),
|
||||
inference_api=self.inference_api,
|
||||
)
|
||||
self.cache[vector_db_id] = index
|
||||
self.cache[vector_store_id] = index
|
||||
return index
|
||||
|
||||
async def unregister_vector_db(self, vector_db_id: str) -> None:
|
||||
if vector_db_id in self.cache:
|
||||
await self.cache[vector_db_id].index.delete()
|
||||
del self.cache[vector_db_id]
|
||||
async def unregister_vector_store(self, vector_store_id: str) -> None:
|
||||
if vector_store_id in self.cache:
|
||||
await self.cache[vector_store_id].index.delete()
|
||||
del self.cache[vector_store_id]
|
||||
|
||||
async def insert_chunks(self, vector_db_id: str, chunks: list[Chunk], ttl_seconds: int | None = None) -> None:
|
||||
index = await self._get_and_cache_vector_db_index(vector_db_id)
|
||||
index = await self._get_and_cache_vector_store_index(vector_db_id)
|
||||
if not index:
|
||||
raise VectorStoreNotFoundError(vector_db_id)
|
||||
|
||||
|
@ -358,14 +358,14 @@ class MilvusVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorDBsProtocolP
|
|||
async def query_chunks(
|
||||
self, vector_db_id: str, query: InterleavedContent, params: dict[str, Any] | None = None
|
||||
) -> QueryChunksResponse:
|
||||
index = await self._get_and_cache_vector_db_index(vector_db_id)
|
||||
index = await self._get_and_cache_vector_store_index(vector_db_id)
|
||||
if not index:
|
||||
raise VectorStoreNotFoundError(vector_db_id)
|
||||
return await index.query_chunks(query, params)
|
||||
|
||||
async def delete_chunks(self, store_id: str, chunks_for_deletion: list[ChunkForDeletion]) -> None:
|
||||
"""Delete a chunk from a milvus vector store."""
|
||||
index = await self._get_and_cache_vector_db_index(store_id)
|
||||
index = await self._get_and_cache_vector_store_index(store_id)
|
||||
if not index:
|
||||
raise VectorStoreNotFoundError(store_id)
|
||||
|
||||
|
|
|
@ -16,15 +16,15 @@ from pydantic import BaseModel, TypeAdapter
|
|||
from llama_stack.apis.common.errors import VectorStoreNotFoundError
|
||||
from llama_stack.apis.files import Files
|
||||
from llama_stack.apis.inference import Inference, InterleavedContent
|
||||
from llama_stack.apis.vector_dbs import VectorDB
|
||||
from llama_stack.apis.vector_io import Chunk, QueryChunksResponse, VectorIO
|
||||
from llama_stack.apis.vector_stores import VectorStore
|
||||
from llama_stack.log import get_logger
|
||||
from llama_stack.providers.datatypes import VectorDBsProtocolPrivate
|
||||
from llama_stack.providers.datatypes import VectorStoresProtocolPrivate
|
||||
from llama_stack.providers.utils.inference.prompt_adapter import interleaved_content_as_str
|
||||
from llama_stack.providers.utils.kvstore import kvstore_impl
|
||||
from llama_stack.providers.utils.kvstore.api import KVStore
|
||||
from llama_stack.providers.utils.memory.openai_vector_store_mixin import OpenAIVectorStoreMixin
|
||||
from llama_stack.providers.utils.memory.vector_store import ChunkForDeletion, EmbeddingIndex, VectorDBWithIndex
|
||||
from llama_stack.providers.utils.memory.vector_store import ChunkForDeletion, EmbeddingIndex, VectorStoreWithIndex
|
||||
from llama_stack.providers.utils.vector_io.vector_utils import WeightedInMemoryAggregator, sanitize_collection_name
|
||||
|
||||
from .config import PGVectorVectorIOConfig
|
||||
|
@ -32,7 +32,7 @@ from .config import PGVectorVectorIOConfig
|
|||
log = get_logger(name=__name__, category="vector_io::pgvector")
|
||||
|
||||
VERSION = "v3"
|
||||
VECTOR_DBS_PREFIX = f"vector_dbs:pgvector:{VERSION}::"
|
||||
VECTOR_DBS_PREFIX = f"vector_stores:pgvector:{VERSION}::"
|
||||
VECTOR_INDEX_PREFIX = f"vector_index:pgvector:{VERSION}::"
|
||||
OPENAI_VECTOR_STORES_PREFIX = f"openai_vector_stores:pgvector:{VERSION}::"
|
||||
OPENAI_VECTOR_STORES_FILES_PREFIX = f"openai_vector_stores_files:pgvector:{VERSION}::"
|
||||
|
@ -79,13 +79,13 @@ class PGVectorIndex(EmbeddingIndex):
|
|||
|
||||
def __init__(
|
||||
self,
|
||||
vector_db: VectorDB,
|
||||
vector_store: VectorStore,
|
||||
dimension: int,
|
||||
conn: psycopg2.extensions.connection,
|
||||
kvstore: KVStore | None = None,
|
||||
distance_metric: str = "COSINE",
|
||||
):
|
||||
self.vector_db = vector_db
|
||||
self.vector_store = vector_store
|
||||
self.dimension = dimension
|
||||
self.conn = conn
|
||||
self.kvstore = kvstore
|
||||
|
@ -97,9 +97,9 @@ class PGVectorIndex(EmbeddingIndex):
|
|||
try:
|
||||
with self.conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur:
|
||||
# Sanitize the table name by replacing hyphens with underscores
|
||||
# SQL doesn't allow hyphens in table names, and vector_db.identifier may contain hyphens
|
||||
# SQL doesn't allow hyphens in table names, and vector_store.identifier may contain hyphens
|
||||
# when created with patterns like "test-vector-db-{uuid4()}"
|
||||
sanitized_identifier = sanitize_collection_name(self.vector_db.identifier)
|
||||
sanitized_identifier = sanitize_collection_name(self.vector_store.identifier)
|
||||
self.table_name = f"vs_{sanitized_identifier}"
|
||||
|
||||
cur.execute(
|
||||
|
@ -122,8 +122,8 @@ class PGVectorIndex(EmbeddingIndex):
|
|||
"""
|
||||
)
|
||||
except Exception as e:
|
||||
log.exception(f"Error creating PGVectorIndex for vector_db: {self.vector_db.identifier}")
|
||||
raise RuntimeError(f"Error creating PGVectorIndex for vector_db: {self.vector_db.identifier}") from e
|
||||
log.exception(f"Error creating PGVectorIndex for vector_store: {self.vector_store.identifier}")
|
||||
raise RuntimeError(f"Error creating PGVectorIndex for vector_store: {self.vector_store.identifier}") from e
|
||||
|
||||
async def add_chunks(self, chunks: list[Chunk], embeddings: NDArray):
|
||||
assert len(chunks) == len(embeddings), (
|
||||
|
@ -323,7 +323,7 @@ class PGVectorIndex(EmbeddingIndex):
|
|||
)
|
||||
|
||||
|
||||
class PGVectorVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorDBsProtocolPrivate):
|
||||
class PGVectorVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorStoresProtocolPrivate):
|
||||
def __init__(
|
||||
self, config: PGVectorVectorIOConfig, inference_api: Inference, files_api: Files | None = None
|
||||
) -> None:
|
||||
|
@ -332,7 +332,7 @@ class PGVectorVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorDBsProtoco
|
|||
self.inference_api = inference_api
|
||||
self.conn = None
|
||||
self.cache = {}
|
||||
self.vector_db_store = None
|
||||
self.vector_store_table = None
|
||||
self.metadata_collection_name = "openai_vector_stores_metadata"
|
||||
|
||||
async def initialize(self) -> None:
|
||||
|
@ -375,59 +375,59 @@ class PGVectorVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorDBsProtoco
|
|||
# Clean up mixin resources (file batch tasks)
|
||||
await super().shutdown()
|
||||
|
||||
async def register_vector_db(self, vector_db: VectorDB) -> None:
|
||||
async def register_vector_store(self, vector_store: VectorStore) -> None:
|
||||
# Persist vector DB metadata in the KV store
|
||||
assert self.kvstore is not None
|
||||
# Upsert model metadata in Postgres
|
||||
upsert_models(self.conn, [(vector_db.identifier, vector_db)])
|
||||
upsert_models(self.conn, [(vector_store.identifier, vector_store)])
|
||||
|
||||
# Create and cache the PGVector index table for the vector DB
|
||||
pgvector_index = PGVectorIndex(
|
||||
vector_db=vector_db, dimension=vector_db.embedding_dimension, conn=self.conn, kvstore=self.kvstore
|
||||
vector_store=vector_store, dimension=vector_store.embedding_dimension, conn=self.conn, kvstore=self.kvstore
|
||||
)
|
||||
await pgvector_index.initialize()
|
||||
index = VectorDBWithIndex(vector_db, index=pgvector_index, inference_api=self.inference_api)
|
||||
self.cache[vector_db.identifier] = index
|
||||
index = VectorStoreWithIndex(vector_store, index=pgvector_index, inference_api=self.inference_api)
|
||||
self.cache[vector_store.identifier] = index
|
||||
|
||||
async def unregister_vector_db(self, vector_db_id: str) -> None:
|
||||
async def unregister_vector_store(self, vector_store_id: str) -> None:
|
||||
# Remove provider index and cache
|
||||
if vector_db_id in self.cache:
|
||||
await self.cache[vector_db_id].index.delete()
|
||||
del self.cache[vector_db_id]
|
||||
if vector_store_id in self.cache:
|
||||
await self.cache[vector_store_id].index.delete()
|
||||
del self.cache[vector_store_id]
|
||||
|
||||
# Delete vector DB metadata from KV store
|
||||
assert self.kvstore is not None
|
||||
await self.kvstore.delete(key=f"{VECTOR_DBS_PREFIX}{vector_db_id}")
|
||||
await self.kvstore.delete(key=f"{VECTOR_DBS_PREFIX}{vector_store_id}")
|
||||
|
||||
async def insert_chunks(self, vector_db_id: str, chunks: list[Chunk], ttl_seconds: int | None = None) -> None:
|
||||
index = await self._get_and_cache_vector_db_index(vector_db_id)
|
||||
index = await self._get_and_cache_vector_store_index(vector_db_id)
|
||||
await index.insert_chunks(chunks)
|
||||
|
||||
async def query_chunks(
|
||||
self, vector_db_id: str, query: InterleavedContent, params: dict[str, Any] | None = None
|
||||
) -> QueryChunksResponse:
|
||||
index = await self._get_and_cache_vector_db_index(vector_db_id)
|
||||
index = await self._get_and_cache_vector_store_index(vector_db_id)
|
||||
return await index.query_chunks(query, params)
|
||||
|
||||
async def _get_and_cache_vector_db_index(self, vector_db_id: str) -> VectorDBWithIndex:
|
||||
if vector_db_id in self.cache:
|
||||
return self.cache[vector_db_id]
|
||||
async def _get_and_cache_vector_store_index(self, vector_store_id: str) -> VectorStoreWithIndex:
|
||||
if vector_store_id in self.cache:
|
||||
return self.cache[vector_store_id]
|
||||
|
||||
if self.vector_db_store is None:
|
||||
raise VectorStoreNotFoundError(vector_db_id)
|
||||
if self.vector_store_table is None:
|
||||
raise VectorStoreNotFoundError(vector_store_id)
|
||||
|
||||
vector_db = await self.vector_db_store.get_vector_db(vector_db_id)
|
||||
if not vector_db:
|
||||
raise VectorStoreNotFoundError(vector_db_id)
|
||||
vector_store = await self.vector_store_table.get_vector_store(vector_store_id)
|
||||
if not vector_store:
|
||||
raise VectorStoreNotFoundError(vector_store_id)
|
||||
|
||||
index = PGVectorIndex(vector_db, vector_db.embedding_dimension, self.conn)
|
||||
index = PGVectorIndex(vector_store, vector_store.embedding_dimension, self.conn)
|
||||
await index.initialize()
|
||||
self.cache[vector_db_id] = VectorDBWithIndex(vector_db, index, self.inference_api)
|
||||
return self.cache[vector_db_id]
|
||||
self.cache[vector_store_id] = VectorStoreWithIndex(vector_store, index, self.inference_api)
|
||||
return self.cache[vector_store_id]
|
||||
|
||||
async def delete_chunks(self, store_id: str, chunks_for_deletion: list[ChunkForDeletion]) -> None:
|
||||
"""Delete a chunk from a PostgreSQL vector store."""
|
||||
index = await self._get_and_cache_vector_db_index(store_id)
|
||||
index = await self._get_and_cache_vector_store_index(store_id)
|
||||
if not index:
|
||||
raise VectorStoreNotFoundError(store_id)
|
||||
|
||||
|
|
|
@ -16,7 +16,6 @@ from qdrant_client.models import PointStruct
|
|||
from llama_stack.apis.common.errors import VectorStoreNotFoundError
|
||||
from llama_stack.apis.files import Files
|
||||
from llama_stack.apis.inference import Inference, InterleavedContent
|
||||
from llama_stack.apis.vector_dbs import VectorDB
|
||||
from llama_stack.apis.vector_io import (
|
||||
Chunk,
|
||||
QueryChunksResponse,
|
||||
|
@ -24,12 +23,13 @@ from llama_stack.apis.vector_io import (
|
|||
VectorStoreChunkingStrategy,
|
||||
VectorStoreFileObject,
|
||||
)
|
||||
from llama_stack.apis.vector_stores import VectorStore
|
||||
from llama_stack.log import get_logger
|
||||
from llama_stack.providers.datatypes import VectorDBsProtocolPrivate
|
||||
from llama_stack.providers.datatypes import VectorStoresProtocolPrivate
|
||||
from llama_stack.providers.inline.vector_io.qdrant import QdrantVectorIOConfig as InlineQdrantVectorIOConfig
|
||||
from llama_stack.providers.utils.kvstore import kvstore_impl
|
||||
from llama_stack.providers.utils.memory.openai_vector_store_mixin import OpenAIVectorStoreMixin
|
||||
from llama_stack.providers.utils.memory.vector_store import ChunkForDeletion, EmbeddingIndex, VectorDBWithIndex
|
||||
from llama_stack.providers.utils.memory.vector_store import ChunkForDeletion, EmbeddingIndex, VectorStoreWithIndex
|
||||
|
||||
from .config import QdrantVectorIOConfig as RemoteQdrantVectorIOConfig
|
||||
|
||||
|
@ -38,7 +38,7 @@ CHUNK_ID_KEY = "_chunk_id"
|
|||
|
||||
# KV store prefixes for vector databases
|
||||
VERSION = "v3"
|
||||
VECTOR_DBS_PREFIX = f"vector_dbs:qdrant:{VERSION}::"
|
||||
VECTOR_DBS_PREFIX = f"vector_stores:qdrant:{VERSION}::"
|
||||
|
||||
|
||||
def convert_id(_id: str) -> str:
|
||||
|
@ -145,7 +145,7 @@ class QdrantIndex(EmbeddingIndex):
|
|||
await self.client.delete_collection(collection_name=self.collection_name)
|
||||
|
||||
|
||||
class QdrantVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorDBsProtocolPrivate):
|
||||
class QdrantVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorStoresProtocolPrivate):
|
||||
def __init__(
|
||||
self,
|
||||
config: RemoteQdrantVectorIOConfig | InlineQdrantVectorIOConfig,
|
||||
|
@ -157,7 +157,7 @@ class QdrantVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorDBsProtocolP
|
|||
self.client: AsyncQdrantClient = None
|
||||
self.cache = {}
|
||||
self.inference_api = inference_api
|
||||
self.vector_db_store = None
|
||||
self.vector_store_table = None
|
||||
self._qdrant_lock = asyncio.Lock()
|
||||
|
||||
async def initialize(self) -> None:
|
||||
|
@ -167,12 +167,14 @@ class QdrantVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorDBsProtocolP
|
|||
|
||||
start_key = VECTOR_DBS_PREFIX
|
||||
end_key = f"{VECTOR_DBS_PREFIX}\xff"
|
||||
stored_vector_dbs = await self.kvstore.values_in_range(start_key, end_key)
|
||||
stored_vector_stores = await self.kvstore.values_in_range(start_key, end_key)
|
||||
|
||||
for vector_db_data in stored_vector_dbs:
|
||||
vector_db = VectorDB.model_validate_json(vector_db_data)
|
||||
index = VectorDBWithIndex(vector_db, QdrantIndex(self.client, vector_db.identifier), self.inference_api)
|
||||
self.cache[vector_db.identifier] = index
|
||||
for vector_store_data in stored_vector_stores:
|
||||
vector_store = VectorStore.model_validate_json(vector_store_data)
|
||||
index = VectorStoreWithIndex(
|
||||
vector_store, QdrantIndex(self.client, vector_store.identifier), self.inference_api
|
||||
)
|
||||
self.cache[vector_store.identifier] = index
|
||||
self.openai_vector_stores = await self._load_openai_vector_stores()
|
||||
|
||||
async def shutdown(self) -> None:
|
||||
|
@ -180,46 +182,48 @@ class QdrantVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorDBsProtocolP
|
|||
# Clean up mixin resources (file batch tasks)
|
||||
await super().shutdown()
|
||||
|
||||
async def register_vector_db(self, vector_db: VectorDB) -> None:
|
||||
async def register_vector_store(self, vector_store: VectorStore) -> None:
|
||||
assert self.kvstore is not None
|
||||
key = f"{VECTOR_DBS_PREFIX}{vector_db.identifier}"
|
||||
await self.kvstore.set(key=key, value=vector_db.model_dump_json())
|
||||
key = f"{VECTOR_DBS_PREFIX}{vector_store.identifier}"
|
||||
await self.kvstore.set(key=key, value=vector_store.model_dump_json())
|
||||
|
||||
index = VectorDBWithIndex(
|
||||
vector_db=vector_db, index=QdrantIndex(self.client, vector_db.identifier), inference_api=self.inference_api
|
||||
)
|
||||
|
||||
self.cache[vector_db.identifier] = index
|
||||
|
||||
async def unregister_vector_db(self, vector_db_id: str) -> None:
|
||||
if vector_db_id in self.cache:
|
||||
await self.cache[vector_db_id].index.delete()
|
||||
del self.cache[vector_db_id]
|
||||
|
||||
assert self.kvstore is not None
|
||||
await self.kvstore.delete(f"{VECTOR_DBS_PREFIX}{vector_db_id}")
|
||||
|
||||
async def _get_and_cache_vector_db_index(self, vector_db_id: str) -> VectorDBWithIndex | None:
|
||||
if vector_db_id in self.cache:
|
||||
return self.cache[vector_db_id]
|
||||
|
||||
if self.vector_db_store is None:
|
||||
raise ValueError(f"Vector DB not found {vector_db_id}")
|
||||
|
||||
vector_db = await self.vector_db_store.get_vector_db(vector_db_id)
|
||||
if not vector_db:
|
||||
raise VectorStoreNotFoundError(vector_db_id)
|
||||
|
||||
index = VectorDBWithIndex(
|
||||
vector_db=vector_db,
|
||||
index=QdrantIndex(client=self.client, collection_name=vector_db.identifier),
|
||||
index = VectorStoreWithIndex(
|
||||
vector_store=vector_store,
|
||||
index=QdrantIndex(self.client, vector_store.identifier),
|
||||
inference_api=self.inference_api,
|
||||
)
|
||||
self.cache[vector_db_id] = index
|
||||
|
||||
self.cache[vector_store.identifier] = index
|
||||
|
||||
async def unregister_vector_store(self, vector_store_id: str) -> None:
|
||||
if vector_store_id in self.cache:
|
||||
await self.cache[vector_store_id].index.delete()
|
||||
del self.cache[vector_store_id]
|
||||
|
||||
assert self.kvstore is not None
|
||||
await self.kvstore.delete(f"{VECTOR_DBS_PREFIX}{vector_store_id}")
|
||||
|
||||
async def _get_and_cache_vector_store_index(self, vector_store_id: str) -> VectorStoreWithIndex | None:
|
||||
if vector_store_id in self.cache:
|
||||
return self.cache[vector_store_id]
|
||||
|
||||
if self.vector_store_table is None:
|
||||
raise ValueError(f"Vector DB not found {vector_store_id}")
|
||||
|
||||
vector_store = await self.vector_store_table.get_vector_store(vector_store_id)
|
||||
if not vector_store:
|
||||
raise VectorStoreNotFoundError(vector_store_id)
|
||||
|
||||
index = VectorStoreWithIndex(
|
||||
vector_store=vector_store,
|
||||
index=QdrantIndex(client=self.client, collection_name=vector_store.identifier),
|
||||
inference_api=self.inference_api,
|
||||
)
|
||||
self.cache[vector_store_id] = index
|
||||
return index
|
||||
|
||||
async def insert_chunks(self, vector_db_id: str, chunks: list[Chunk], ttl_seconds: int | None = None) -> None:
|
||||
index = await self._get_and_cache_vector_db_index(vector_db_id)
|
||||
index = await self._get_and_cache_vector_store_index(vector_db_id)
|
||||
if not index:
|
||||
raise VectorStoreNotFoundError(vector_db_id)
|
||||
|
||||
|
@ -228,7 +232,7 @@ class QdrantVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorDBsProtocolP
|
|||
async def query_chunks(
|
||||
self, vector_db_id: str, query: InterleavedContent, params: dict[str, Any] | None = None
|
||||
) -> QueryChunksResponse:
|
||||
index = await self._get_and_cache_vector_db_index(vector_db_id)
|
||||
index = await self._get_and_cache_vector_store_index(vector_db_id)
|
||||
if not index:
|
||||
raise VectorStoreNotFoundError(vector_db_id)
|
||||
|
||||
|
@ -249,7 +253,7 @@ class QdrantVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorDBsProtocolP
|
|||
|
||||
async def delete_chunks(self, store_id: str, chunks_for_deletion: list[ChunkForDeletion]) -> None:
|
||||
"""Delete chunks from a Qdrant vector store."""
|
||||
index = await self._get_and_cache_vector_db_index(store_id)
|
||||
index = await self._get_and_cache_vector_store_index(store_id)
|
||||
if not index:
|
||||
raise ValueError(f"Vector DB {store_id} not found")
|
||||
|
||||
|
|
Some files were not shown because too many files have changed in this diff Show more
Loading…
Add table
Add a link
Reference in a new issue