mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-12-04 02:03:44 +00:00
Merge 9aef325934 into sapling-pr-archive-ehhuang
This commit is contained in:
commit
e9a8967ed5
41 changed files with 1280 additions and 197 deletions
|
|
@ -9862,7 +9862,7 @@ components:
|
||||||
$ref: '#/components/schemas/RAGDocument'
|
$ref: '#/components/schemas/RAGDocument'
|
||||||
description: >-
|
description: >-
|
||||||
List of documents to index in the RAG system
|
List of documents to index in the RAG system
|
||||||
vector_db_id:
|
vector_store_id:
|
||||||
type: string
|
type: string
|
||||||
description: >-
|
description: >-
|
||||||
ID of the vector database to store the document embeddings
|
ID of the vector database to store the document embeddings
|
||||||
|
|
@ -9873,7 +9873,7 @@ components:
|
||||||
additionalProperties: false
|
additionalProperties: false
|
||||||
required:
|
required:
|
||||||
- documents
|
- documents
|
||||||
- vector_db_id
|
- vector_store_id
|
||||||
- chunk_size_in_tokens
|
- chunk_size_in_tokens
|
||||||
title: InsertRequest
|
title: InsertRequest
|
||||||
DefaultRAGQueryGeneratorConfig:
|
DefaultRAGQueryGeneratorConfig:
|
||||||
|
|
@ -10044,7 +10044,7 @@ components:
|
||||||
$ref: '#/components/schemas/InterleavedContent'
|
$ref: '#/components/schemas/InterleavedContent'
|
||||||
description: >-
|
description: >-
|
||||||
The query content to search for in the indexed documents
|
The query content to search for in the indexed documents
|
||||||
vector_db_ids:
|
vector_store_ids:
|
||||||
type: array
|
type: array
|
||||||
items:
|
items:
|
||||||
type: string
|
type: string
|
||||||
|
|
@ -10057,7 +10057,7 @@ components:
|
||||||
additionalProperties: false
|
additionalProperties: false
|
||||||
required:
|
required:
|
||||||
- content
|
- content
|
||||||
- vector_db_ids
|
- vector_store_ids
|
||||||
title: QueryRequest
|
title: QueryRequest
|
||||||
RAGQueryResult:
|
RAGQueryResult:
|
||||||
type: object
|
type: object
|
||||||
|
|
@ -10281,7 +10281,7 @@ components:
|
||||||
InsertChunksRequest:
|
InsertChunksRequest:
|
||||||
type: object
|
type: object
|
||||||
properties:
|
properties:
|
||||||
vector_db_id:
|
vector_store_id:
|
||||||
type: string
|
type: string
|
||||||
description: >-
|
description: >-
|
||||||
The identifier of the vector database to insert the chunks into.
|
The identifier of the vector database to insert the chunks into.
|
||||||
|
|
@ -10300,13 +10300,13 @@ components:
|
||||||
description: The time to live of the chunks.
|
description: The time to live of the chunks.
|
||||||
additionalProperties: false
|
additionalProperties: false
|
||||||
required:
|
required:
|
||||||
- vector_db_id
|
- vector_store_id
|
||||||
- chunks
|
- chunks
|
||||||
title: InsertChunksRequest
|
title: InsertChunksRequest
|
||||||
QueryChunksRequest:
|
QueryChunksRequest:
|
||||||
type: object
|
type: object
|
||||||
properties:
|
properties:
|
||||||
vector_db_id:
|
vector_store_id:
|
||||||
type: string
|
type: string
|
||||||
description: >-
|
description: >-
|
||||||
The identifier of the vector database to query.
|
The identifier of the vector database to query.
|
||||||
|
|
@ -10326,7 +10326,7 @@ components:
|
||||||
description: The parameters of the query.
|
description: The parameters of the query.
|
||||||
additionalProperties: false
|
additionalProperties: false
|
||||||
required:
|
required:
|
||||||
- vector_db_id
|
- vector_store_id
|
||||||
- query
|
- query
|
||||||
title: QueryChunksRequest
|
title: QueryChunksRequest
|
||||||
QueryChunksResponse:
|
QueryChunksResponse:
|
||||||
|
|
@ -11844,7 +11844,7 @@ components:
|
||||||
description: Type of the step in an agent turn.
|
description: Type of the step in an agent turn.
|
||||||
const: memory_retrieval
|
const: memory_retrieval
|
||||||
default: memory_retrieval
|
default: memory_retrieval
|
||||||
vector_db_ids:
|
vector_store_ids:
|
||||||
type: string
|
type: string
|
||||||
description: >-
|
description: >-
|
||||||
The IDs of the vector databases to retrieve context from.
|
The IDs of the vector databases to retrieve context from.
|
||||||
|
|
@ -11857,7 +11857,7 @@ components:
|
||||||
- turn_id
|
- turn_id
|
||||||
- step_id
|
- step_id
|
||||||
- step_type
|
- step_type
|
||||||
- vector_db_ids
|
- vector_store_ids
|
||||||
- inserted_context
|
- inserted_context
|
||||||
title: MemoryRetrievalStep
|
title: MemoryRetrievalStep
|
||||||
description: >-
|
description: >-
|
||||||
|
|
|
||||||
|
|
@ -72,14 +72,14 @@ description: |
|
||||||
Example with hybrid search:
|
Example with hybrid search:
|
||||||
```python
|
```python
|
||||||
response = await vector_io.query_chunks(
|
response = await vector_io.query_chunks(
|
||||||
vector_db_id="my_db",
|
vector_store_id="my_db",
|
||||||
query="your query here",
|
query="your query here",
|
||||||
params={"mode": "hybrid", "max_chunks": 3, "score_threshold": 0.7},
|
params={"mode": "hybrid", "max_chunks": 3, "score_threshold": 0.7},
|
||||||
)
|
)
|
||||||
|
|
||||||
# Using RRF ranker
|
# Using RRF ranker
|
||||||
response = await vector_io.query_chunks(
|
response = await vector_io.query_chunks(
|
||||||
vector_db_id="my_db",
|
vector_store_id="my_db",
|
||||||
query="your query here",
|
query="your query here",
|
||||||
params={
|
params={
|
||||||
"mode": "hybrid",
|
"mode": "hybrid",
|
||||||
|
|
@ -91,7 +91,7 @@ description: |
|
||||||
|
|
||||||
# Using weighted ranker
|
# Using weighted ranker
|
||||||
response = await vector_io.query_chunks(
|
response = await vector_io.query_chunks(
|
||||||
vector_db_id="my_db",
|
vector_store_id="my_db",
|
||||||
query="your query here",
|
query="your query here",
|
||||||
params={
|
params={
|
||||||
"mode": "hybrid",
|
"mode": "hybrid",
|
||||||
|
|
@ -105,7 +105,7 @@ description: |
|
||||||
Example with explicit vector search:
|
Example with explicit vector search:
|
||||||
```python
|
```python
|
||||||
response = await vector_io.query_chunks(
|
response = await vector_io.query_chunks(
|
||||||
vector_db_id="my_db",
|
vector_store_id="my_db",
|
||||||
query="your query here",
|
query="your query here",
|
||||||
params={"mode": "vector", "max_chunks": 3, "score_threshold": 0.7},
|
params={"mode": "vector", "max_chunks": 3, "score_threshold": 0.7},
|
||||||
)
|
)
|
||||||
|
|
@ -114,7 +114,7 @@ description: |
|
||||||
Example with keyword search:
|
Example with keyword search:
|
||||||
```python
|
```python
|
||||||
response = await vector_io.query_chunks(
|
response = await vector_io.query_chunks(
|
||||||
vector_db_id="my_db",
|
vector_store_id="my_db",
|
||||||
query="your query here",
|
query="your query here",
|
||||||
params={"mode": "keyword", "max_chunks": 3, "score_threshold": 0.7},
|
params={"mode": "keyword", "max_chunks": 3, "score_threshold": 0.7},
|
||||||
)
|
)
|
||||||
|
|
@ -277,14 +277,14 @@ The SQLite-vec provider supports three search modes:
|
||||||
Example with hybrid search:
|
Example with hybrid search:
|
||||||
```python
|
```python
|
||||||
response = await vector_io.query_chunks(
|
response = await vector_io.query_chunks(
|
||||||
vector_db_id="my_db",
|
vector_store_id="my_db",
|
||||||
query="your query here",
|
query="your query here",
|
||||||
params={"mode": "hybrid", "max_chunks": 3, "score_threshold": 0.7},
|
params={"mode": "hybrid", "max_chunks": 3, "score_threshold": 0.7},
|
||||||
)
|
)
|
||||||
|
|
||||||
# Using RRF ranker
|
# Using RRF ranker
|
||||||
response = await vector_io.query_chunks(
|
response = await vector_io.query_chunks(
|
||||||
vector_db_id="my_db",
|
vector_store_id="my_db",
|
||||||
query="your query here",
|
query="your query here",
|
||||||
params={
|
params={
|
||||||
"mode": "hybrid",
|
"mode": "hybrid",
|
||||||
|
|
@ -296,7 +296,7 @@ response = await vector_io.query_chunks(
|
||||||
|
|
||||||
# Using weighted ranker
|
# Using weighted ranker
|
||||||
response = await vector_io.query_chunks(
|
response = await vector_io.query_chunks(
|
||||||
vector_db_id="my_db",
|
vector_store_id="my_db",
|
||||||
query="your query here",
|
query="your query here",
|
||||||
params={
|
params={
|
||||||
"mode": "hybrid",
|
"mode": "hybrid",
|
||||||
|
|
@ -310,7 +310,7 @@ response = await vector_io.query_chunks(
|
||||||
Example with explicit vector search:
|
Example with explicit vector search:
|
||||||
```python
|
```python
|
||||||
response = await vector_io.query_chunks(
|
response = await vector_io.query_chunks(
|
||||||
vector_db_id="my_db",
|
vector_store_id="my_db",
|
||||||
query="your query here",
|
query="your query here",
|
||||||
params={"mode": "vector", "max_chunks": 3, "score_threshold": 0.7},
|
params={"mode": "vector", "max_chunks": 3, "score_threshold": 0.7},
|
||||||
)
|
)
|
||||||
|
|
@ -319,7 +319,7 @@ response = await vector_io.query_chunks(
|
||||||
Example with keyword search:
|
Example with keyword search:
|
||||||
```python
|
```python
|
||||||
response = await vector_io.query_chunks(
|
response = await vector_io.query_chunks(
|
||||||
vector_db_id="my_db",
|
vector_store_id="my_db",
|
||||||
query="your query here",
|
query="your query here",
|
||||||
params={"mode": "keyword", "max_chunks": 3, "score_threshold": 0.7},
|
params={"mode": "keyword", "max_chunks": 3, "score_threshold": 0.7},
|
||||||
)
|
)
|
||||||
|
|
|
||||||
4
docs/static/deprecated-llama-stack-spec.html
vendored
4
docs/static/deprecated-llama-stack-spec.html
vendored
|
|
@ -4390,7 +4390,7 @@
|
||||||
"const": "memory_retrieval",
|
"const": "memory_retrieval",
|
||||||
"default": "memory_retrieval"
|
"default": "memory_retrieval"
|
||||||
},
|
},
|
||||||
"vector_db_ids": {
|
"vector_store_ids": {
|
||||||
"type": "string",
|
"type": "string",
|
||||||
"description": "The IDs of the vector databases to retrieve context from."
|
"description": "The IDs of the vector databases to retrieve context from."
|
||||||
},
|
},
|
||||||
|
|
@ -4404,7 +4404,7 @@
|
||||||
"turn_id",
|
"turn_id",
|
||||||
"step_id",
|
"step_id",
|
||||||
"step_type",
|
"step_type",
|
||||||
"vector_db_ids",
|
"vector_store_ids",
|
||||||
"inserted_context"
|
"inserted_context"
|
||||||
],
|
],
|
||||||
"title": "MemoryRetrievalStep",
|
"title": "MemoryRetrievalStep",
|
||||||
|
|
|
||||||
4
docs/static/deprecated-llama-stack-spec.yaml
vendored
4
docs/static/deprecated-llama-stack-spec.yaml
vendored
|
|
@ -3252,7 +3252,7 @@ components:
|
||||||
description: Type of the step in an agent turn.
|
description: Type of the step in an agent turn.
|
||||||
const: memory_retrieval
|
const: memory_retrieval
|
||||||
default: memory_retrieval
|
default: memory_retrieval
|
||||||
vector_db_ids:
|
vector_store_ids:
|
||||||
type: string
|
type: string
|
||||||
description: >-
|
description: >-
|
||||||
The IDs of the vector databases to retrieve context from.
|
The IDs of the vector databases to retrieve context from.
|
||||||
|
|
@ -3265,7 +3265,7 @@ components:
|
||||||
- turn_id
|
- turn_id
|
||||||
- step_id
|
- step_id
|
||||||
- step_type
|
- step_type
|
||||||
- vector_db_ids
|
- vector_store_ids
|
||||||
- inserted_context
|
- inserted_context
|
||||||
title: MemoryRetrievalStep
|
title: MemoryRetrievalStep
|
||||||
description: >-
|
description: >-
|
||||||
|
|
|
||||||
|
|
@ -2865,7 +2865,7 @@
|
||||||
"const": "memory_retrieval",
|
"const": "memory_retrieval",
|
||||||
"default": "memory_retrieval"
|
"default": "memory_retrieval"
|
||||||
},
|
},
|
||||||
"vector_db_ids": {
|
"vector_store_ids": {
|
||||||
"type": "string",
|
"type": "string",
|
||||||
"description": "The IDs of the vector databases to retrieve context from."
|
"description": "The IDs of the vector databases to retrieve context from."
|
||||||
},
|
},
|
||||||
|
|
@ -2879,7 +2879,7 @@
|
||||||
"turn_id",
|
"turn_id",
|
||||||
"step_id",
|
"step_id",
|
||||||
"step_type",
|
"step_type",
|
||||||
"vector_db_ids",
|
"vector_store_ids",
|
||||||
"inserted_context"
|
"inserted_context"
|
||||||
],
|
],
|
||||||
"title": "MemoryRetrievalStep",
|
"title": "MemoryRetrievalStep",
|
||||||
|
|
|
||||||
|
|
@ -2085,7 +2085,7 @@ components:
|
||||||
description: Type of the step in an agent turn.
|
description: Type of the step in an agent turn.
|
||||||
const: memory_retrieval
|
const: memory_retrieval
|
||||||
default: memory_retrieval
|
default: memory_retrieval
|
||||||
vector_db_ids:
|
vector_store_ids:
|
||||||
type: string
|
type: string
|
||||||
description: >-
|
description: >-
|
||||||
The IDs of the vector databases to retrieve context from.
|
The IDs of the vector databases to retrieve context from.
|
||||||
|
|
@ -2098,7 +2098,7 @@ components:
|
||||||
- turn_id
|
- turn_id
|
||||||
- step_id
|
- step_id
|
||||||
- step_type
|
- step_type
|
||||||
- vector_db_ids
|
- vector_store_ids
|
||||||
- inserted_context
|
- inserted_context
|
||||||
title: MemoryRetrievalStep
|
title: MemoryRetrievalStep
|
||||||
description: >-
|
description: >-
|
||||||
|
|
|
||||||
16
docs/static/llama-stack-spec.html
vendored
16
docs/static/llama-stack-spec.html
vendored
|
|
@ -11412,7 +11412,7 @@
|
||||||
},
|
},
|
||||||
"description": "List of documents to index in the RAG system"
|
"description": "List of documents to index in the RAG system"
|
||||||
},
|
},
|
||||||
"vector_db_id": {
|
"vector_store_id": {
|
||||||
"type": "string",
|
"type": "string",
|
||||||
"description": "ID of the vector database to store the document embeddings"
|
"description": "ID of the vector database to store the document embeddings"
|
||||||
},
|
},
|
||||||
|
|
@ -11424,7 +11424,7 @@
|
||||||
"additionalProperties": false,
|
"additionalProperties": false,
|
||||||
"required": [
|
"required": [
|
||||||
"documents",
|
"documents",
|
||||||
"vector_db_id",
|
"vector_store_id",
|
||||||
"chunk_size_in_tokens"
|
"chunk_size_in_tokens"
|
||||||
],
|
],
|
||||||
"title": "InsertRequest"
|
"title": "InsertRequest"
|
||||||
|
|
@ -11615,7 +11615,7 @@
|
||||||
"$ref": "#/components/schemas/InterleavedContent",
|
"$ref": "#/components/schemas/InterleavedContent",
|
||||||
"description": "The query content to search for in the indexed documents"
|
"description": "The query content to search for in the indexed documents"
|
||||||
},
|
},
|
||||||
"vector_db_ids": {
|
"vector_store_ids": {
|
||||||
"type": "array",
|
"type": "array",
|
||||||
"items": {
|
"items": {
|
||||||
"type": "string"
|
"type": "string"
|
||||||
|
|
@ -11630,7 +11630,7 @@
|
||||||
"additionalProperties": false,
|
"additionalProperties": false,
|
||||||
"required": [
|
"required": [
|
||||||
"content",
|
"content",
|
||||||
"vector_db_ids"
|
"vector_store_ids"
|
||||||
],
|
],
|
||||||
"title": "QueryRequest"
|
"title": "QueryRequest"
|
||||||
},
|
},
|
||||||
|
|
@ -11923,7 +11923,7 @@
|
||||||
"InsertChunksRequest": {
|
"InsertChunksRequest": {
|
||||||
"type": "object",
|
"type": "object",
|
||||||
"properties": {
|
"properties": {
|
||||||
"vector_db_id": {
|
"vector_store_id": {
|
||||||
"type": "string",
|
"type": "string",
|
||||||
"description": "The identifier of the vector database to insert the chunks into."
|
"description": "The identifier of the vector database to insert the chunks into."
|
||||||
},
|
},
|
||||||
|
|
@ -11941,7 +11941,7 @@
|
||||||
},
|
},
|
||||||
"additionalProperties": false,
|
"additionalProperties": false,
|
||||||
"required": [
|
"required": [
|
||||||
"vector_db_id",
|
"vector_store_id",
|
||||||
"chunks"
|
"chunks"
|
||||||
],
|
],
|
||||||
"title": "InsertChunksRequest"
|
"title": "InsertChunksRequest"
|
||||||
|
|
@ -11949,7 +11949,7 @@
|
||||||
"QueryChunksRequest": {
|
"QueryChunksRequest": {
|
||||||
"type": "object",
|
"type": "object",
|
||||||
"properties": {
|
"properties": {
|
||||||
"vector_db_id": {
|
"vector_store_id": {
|
||||||
"type": "string",
|
"type": "string",
|
||||||
"description": "The identifier of the vector database to query."
|
"description": "The identifier of the vector database to query."
|
||||||
},
|
},
|
||||||
|
|
@ -11986,7 +11986,7 @@
|
||||||
},
|
},
|
||||||
"additionalProperties": false,
|
"additionalProperties": false,
|
||||||
"required": [
|
"required": [
|
||||||
"vector_db_id",
|
"vector_store_id",
|
||||||
"query"
|
"query"
|
||||||
],
|
],
|
||||||
"title": "QueryChunksRequest"
|
"title": "QueryChunksRequest"
|
||||||
|
|
|
||||||
16
docs/static/llama-stack-spec.yaml
vendored
16
docs/static/llama-stack-spec.yaml
vendored
|
|
@ -8649,7 +8649,7 @@ components:
|
||||||
$ref: '#/components/schemas/RAGDocument'
|
$ref: '#/components/schemas/RAGDocument'
|
||||||
description: >-
|
description: >-
|
||||||
List of documents to index in the RAG system
|
List of documents to index in the RAG system
|
||||||
vector_db_id:
|
vector_store_id:
|
||||||
type: string
|
type: string
|
||||||
description: >-
|
description: >-
|
||||||
ID of the vector database to store the document embeddings
|
ID of the vector database to store the document embeddings
|
||||||
|
|
@ -8660,7 +8660,7 @@ components:
|
||||||
additionalProperties: false
|
additionalProperties: false
|
||||||
required:
|
required:
|
||||||
- documents
|
- documents
|
||||||
- vector_db_id
|
- vector_store_id
|
||||||
- chunk_size_in_tokens
|
- chunk_size_in_tokens
|
||||||
title: InsertRequest
|
title: InsertRequest
|
||||||
DefaultRAGQueryGeneratorConfig:
|
DefaultRAGQueryGeneratorConfig:
|
||||||
|
|
@ -8831,7 +8831,7 @@ components:
|
||||||
$ref: '#/components/schemas/InterleavedContent'
|
$ref: '#/components/schemas/InterleavedContent'
|
||||||
description: >-
|
description: >-
|
||||||
The query content to search for in the indexed documents
|
The query content to search for in the indexed documents
|
||||||
vector_db_ids:
|
vector_store_ids:
|
||||||
type: array
|
type: array
|
||||||
items:
|
items:
|
||||||
type: string
|
type: string
|
||||||
|
|
@ -8844,7 +8844,7 @@ components:
|
||||||
additionalProperties: false
|
additionalProperties: false
|
||||||
required:
|
required:
|
||||||
- content
|
- content
|
||||||
- vector_db_ids
|
- vector_store_ids
|
||||||
title: QueryRequest
|
title: QueryRequest
|
||||||
RAGQueryResult:
|
RAGQueryResult:
|
||||||
type: object
|
type: object
|
||||||
|
|
@ -9068,7 +9068,7 @@ components:
|
||||||
InsertChunksRequest:
|
InsertChunksRequest:
|
||||||
type: object
|
type: object
|
||||||
properties:
|
properties:
|
||||||
vector_db_id:
|
vector_store_id:
|
||||||
type: string
|
type: string
|
||||||
description: >-
|
description: >-
|
||||||
The identifier of the vector database to insert the chunks into.
|
The identifier of the vector database to insert the chunks into.
|
||||||
|
|
@ -9087,13 +9087,13 @@ components:
|
||||||
description: The time to live of the chunks.
|
description: The time to live of the chunks.
|
||||||
additionalProperties: false
|
additionalProperties: false
|
||||||
required:
|
required:
|
||||||
- vector_db_id
|
- vector_store_id
|
||||||
- chunks
|
- chunks
|
||||||
title: InsertChunksRequest
|
title: InsertChunksRequest
|
||||||
QueryChunksRequest:
|
QueryChunksRequest:
|
||||||
type: object
|
type: object
|
||||||
properties:
|
properties:
|
||||||
vector_db_id:
|
vector_store_id:
|
||||||
type: string
|
type: string
|
||||||
description: >-
|
description: >-
|
||||||
The identifier of the vector database to query.
|
The identifier of the vector database to query.
|
||||||
|
|
@ -9113,7 +9113,7 @@ components:
|
||||||
description: The parameters of the query.
|
description: The parameters of the query.
|
||||||
additionalProperties: false
|
additionalProperties: false
|
||||||
required:
|
required:
|
||||||
- vector_db_id
|
- vector_store_id
|
||||||
- query
|
- query
|
||||||
title: QueryChunksRequest
|
title: QueryChunksRequest
|
||||||
QueryChunksResponse:
|
QueryChunksResponse:
|
||||||
|
|
|
||||||
20
docs/static/stainless-llama-stack-spec.html
vendored
20
docs/static/stainless-llama-stack-spec.html
vendored
|
|
@ -13084,7 +13084,7 @@
|
||||||
},
|
},
|
||||||
"description": "List of documents to index in the RAG system"
|
"description": "List of documents to index in the RAG system"
|
||||||
},
|
},
|
||||||
"vector_db_id": {
|
"vector_store_id": {
|
||||||
"type": "string",
|
"type": "string",
|
||||||
"description": "ID of the vector database to store the document embeddings"
|
"description": "ID of the vector database to store the document embeddings"
|
||||||
},
|
},
|
||||||
|
|
@ -13096,7 +13096,7 @@
|
||||||
"additionalProperties": false,
|
"additionalProperties": false,
|
||||||
"required": [
|
"required": [
|
||||||
"documents",
|
"documents",
|
||||||
"vector_db_id",
|
"vector_store_id",
|
||||||
"chunk_size_in_tokens"
|
"chunk_size_in_tokens"
|
||||||
],
|
],
|
||||||
"title": "InsertRequest"
|
"title": "InsertRequest"
|
||||||
|
|
@ -13287,7 +13287,7 @@
|
||||||
"$ref": "#/components/schemas/InterleavedContent",
|
"$ref": "#/components/schemas/InterleavedContent",
|
||||||
"description": "The query content to search for in the indexed documents"
|
"description": "The query content to search for in the indexed documents"
|
||||||
},
|
},
|
||||||
"vector_db_ids": {
|
"vector_store_ids": {
|
||||||
"type": "array",
|
"type": "array",
|
||||||
"items": {
|
"items": {
|
||||||
"type": "string"
|
"type": "string"
|
||||||
|
|
@ -13302,7 +13302,7 @@
|
||||||
"additionalProperties": false,
|
"additionalProperties": false,
|
||||||
"required": [
|
"required": [
|
||||||
"content",
|
"content",
|
||||||
"vector_db_ids"
|
"vector_store_ids"
|
||||||
],
|
],
|
||||||
"title": "QueryRequest"
|
"title": "QueryRequest"
|
||||||
},
|
},
|
||||||
|
|
@ -13595,7 +13595,7 @@
|
||||||
"InsertChunksRequest": {
|
"InsertChunksRequest": {
|
||||||
"type": "object",
|
"type": "object",
|
||||||
"properties": {
|
"properties": {
|
||||||
"vector_db_id": {
|
"vector_store_id": {
|
||||||
"type": "string",
|
"type": "string",
|
||||||
"description": "The identifier of the vector database to insert the chunks into."
|
"description": "The identifier of the vector database to insert the chunks into."
|
||||||
},
|
},
|
||||||
|
|
@ -13613,7 +13613,7 @@
|
||||||
},
|
},
|
||||||
"additionalProperties": false,
|
"additionalProperties": false,
|
||||||
"required": [
|
"required": [
|
||||||
"vector_db_id",
|
"vector_store_id",
|
||||||
"chunks"
|
"chunks"
|
||||||
],
|
],
|
||||||
"title": "InsertChunksRequest"
|
"title": "InsertChunksRequest"
|
||||||
|
|
@ -13621,7 +13621,7 @@
|
||||||
"QueryChunksRequest": {
|
"QueryChunksRequest": {
|
||||||
"type": "object",
|
"type": "object",
|
||||||
"properties": {
|
"properties": {
|
||||||
"vector_db_id": {
|
"vector_store_id": {
|
||||||
"type": "string",
|
"type": "string",
|
||||||
"description": "The identifier of the vector database to query."
|
"description": "The identifier of the vector database to query."
|
||||||
},
|
},
|
||||||
|
|
@ -13658,7 +13658,7 @@
|
||||||
},
|
},
|
||||||
"additionalProperties": false,
|
"additionalProperties": false,
|
||||||
"required": [
|
"required": [
|
||||||
"vector_db_id",
|
"vector_store_id",
|
||||||
"query"
|
"query"
|
||||||
],
|
],
|
||||||
"title": "QueryChunksRequest"
|
"title": "QueryChunksRequest"
|
||||||
|
|
@ -15719,7 +15719,7 @@
|
||||||
"const": "memory_retrieval",
|
"const": "memory_retrieval",
|
||||||
"default": "memory_retrieval"
|
"default": "memory_retrieval"
|
||||||
},
|
},
|
||||||
"vector_db_ids": {
|
"vector_store_ids": {
|
||||||
"type": "string",
|
"type": "string",
|
||||||
"description": "The IDs of the vector databases to retrieve context from."
|
"description": "The IDs of the vector databases to retrieve context from."
|
||||||
},
|
},
|
||||||
|
|
@ -15733,7 +15733,7 @@
|
||||||
"turn_id",
|
"turn_id",
|
||||||
"step_id",
|
"step_id",
|
||||||
"step_type",
|
"step_type",
|
||||||
"vector_db_ids",
|
"vector_store_ids",
|
||||||
"inserted_context"
|
"inserted_context"
|
||||||
],
|
],
|
||||||
"title": "MemoryRetrievalStep",
|
"title": "MemoryRetrievalStep",
|
||||||
|
|
|
||||||
20
docs/static/stainless-llama-stack-spec.yaml
vendored
20
docs/static/stainless-llama-stack-spec.yaml
vendored
|
|
@ -9862,7 +9862,7 @@ components:
|
||||||
$ref: '#/components/schemas/RAGDocument'
|
$ref: '#/components/schemas/RAGDocument'
|
||||||
description: >-
|
description: >-
|
||||||
List of documents to index in the RAG system
|
List of documents to index in the RAG system
|
||||||
vector_db_id:
|
vector_store_id:
|
||||||
type: string
|
type: string
|
||||||
description: >-
|
description: >-
|
||||||
ID of the vector database to store the document embeddings
|
ID of the vector database to store the document embeddings
|
||||||
|
|
@ -9873,7 +9873,7 @@ components:
|
||||||
additionalProperties: false
|
additionalProperties: false
|
||||||
required:
|
required:
|
||||||
- documents
|
- documents
|
||||||
- vector_db_id
|
- vector_store_id
|
||||||
- chunk_size_in_tokens
|
- chunk_size_in_tokens
|
||||||
title: InsertRequest
|
title: InsertRequest
|
||||||
DefaultRAGQueryGeneratorConfig:
|
DefaultRAGQueryGeneratorConfig:
|
||||||
|
|
@ -10044,7 +10044,7 @@ components:
|
||||||
$ref: '#/components/schemas/InterleavedContent'
|
$ref: '#/components/schemas/InterleavedContent'
|
||||||
description: >-
|
description: >-
|
||||||
The query content to search for in the indexed documents
|
The query content to search for in the indexed documents
|
||||||
vector_db_ids:
|
vector_store_ids:
|
||||||
type: array
|
type: array
|
||||||
items:
|
items:
|
||||||
type: string
|
type: string
|
||||||
|
|
@ -10057,7 +10057,7 @@ components:
|
||||||
additionalProperties: false
|
additionalProperties: false
|
||||||
required:
|
required:
|
||||||
- content
|
- content
|
||||||
- vector_db_ids
|
- vector_store_ids
|
||||||
title: QueryRequest
|
title: QueryRequest
|
||||||
RAGQueryResult:
|
RAGQueryResult:
|
||||||
type: object
|
type: object
|
||||||
|
|
@ -10281,7 +10281,7 @@ components:
|
||||||
InsertChunksRequest:
|
InsertChunksRequest:
|
||||||
type: object
|
type: object
|
||||||
properties:
|
properties:
|
||||||
vector_db_id:
|
vector_store_id:
|
||||||
type: string
|
type: string
|
||||||
description: >-
|
description: >-
|
||||||
The identifier of the vector database to insert the chunks into.
|
The identifier of the vector database to insert the chunks into.
|
||||||
|
|
@ -10300,13 +10300,13 @@ components:
|
||||||
description: The time to live of the chunks.
|
description: The time to live of the chunks.
|
||||||
additionalProperties: false
|
additionalProperties: false
|
||||||
required:
|
required:
|
||||||
- vector_db_id
|
- vector_store_id
|
||||||
- chunks
|
- chunks
|
||||||
title: InsertChunksRequest
|
title: InsertChunksRequest
|
||||||
QueryChunksRequest:
|
QueryChunksRequest:
|
||||||
type: object
|
type: object
|
||||||
properties:
|
properties:
|
||||||
vector_db_id:
|
vector_store_id:
|
||||||
type: string
|
type: string
|
||||||
description: >-
|
description: >-
|
||||||
The identifier of the vector database to query.
|
The identifier of the vector database to query.
|
||||||
|
|
@ -10326,7 +10326,7 @@ components:
|
||||||
description: The parameters of the query.
|
description: The parameters of the query.
|
||||||
additionalProperties: false
|
additionalProperties: false
|
||||||
required:
|
required:
|
||||||
- vector_db_id
|
- vector_store_id
|
||||||
- query
|
- query
|
||||||
title: QueryChunksRequest
|
title: QueryChunksRequest
|
||||||
QueryChunksResponse:
|
QueryChunksResponse:
|
||||||
|
|
@ -11844,7 +11844,7 @@ components:
|
||||||
description: Type of the step in an agent turn.
|
description: Type of the step in an agent turn.
|
||||||
const: memory_retrieval
|
const: memory_retrieval
|
||||||
default: memory_retrieval
|
default: memory_retrieval
|
||||||
vector_db_ids:
|
vector_store_ids:
|
||||||
type: string
|
type: string
|
||||||
description: >-
|
description: >-
|
||||||
The IDs of the vector databases to retrieve context from.
|
The IDs of the vector databases to retrieve context from.
|
||||||
|
|
@ -11857,7 +11857,7 @@ components:
|
||||||
- turn_id
|
- turn_id
|
||||||
- step_id
|
- step_id
|
||||||
- step_type
|
- step_type
|
||||||
- vector_db_ids
|
- vector_store_ids
|
||||||
- inserted_context
|
- inserted_context
|
||||||
title: MemoryRetrievalStep
|
title: MemoryRetrievalStep
|
||||||
description: >-
|
description: >-
|
||||||
|
|
|
||||||
|
|
@ -30,8 +30,10 @@ materialize_telemetry_configs() {
|
||||||
local otel_cfg="${dest}/otel-collector-config.yaml"
|
local otel_cfg="${dest}/otel-collector-config.yaml"
|
||||||
local prom_cfg="${dest}/prometheus.yml"
|
local prom_cfg="${dest}/prometheus.yml"
|
||||||
local graf_cfg="${dest}/grafana-datasources.yaml"
|
local graf_cfg="${dest}/grafana-datasources.yaml"
|
||||||
|
local graf_dash_cfg="${dest}/grafana-dashboards.yaml"
|
||||||
|
local dash_json="${dest}/llama-stack-dashboard.json"
|
||||||
|
|
||||||
for asset in "$otel_cfg" "$prom_cfg" "$graf_cfg"; do
|
for asset in "$otel_cfg" "$prom_cfg" "$graf_cfg" "$graf_dash_cfg" "$dash_json"; do
|
||||||
if [ -e "$asset" ]; then
|
if [ -e "$asset" ]; then
|
||||||
die "Telemetry asset ${asset} already exists; refusing to overwrite"
|
die "Telemetry asset ${asset} already exists; refusing to overwrite"
|
||||||
fi
|
fi
|
||||||
|
|
@ -103,6 +105,7 @@ datasources:
|
||||||
type: prometheus
|
type: prometheus
|
||||||
access: proxy
|
access: proxy
|
||||||
url: http://prometheus:9090
|
url: http://prometheus:9090
|
||||||
|
uid: prometheus
|
||||||
isDefault: true
|
isDefault: true
|
||||||
editable: true
|
editable: true
|
||||||
|
|
||||||
|
|
@ -112,6 +115,224 @@ datasources:
|
||||||
url: http://jaeger:16686
|
url: http://jaeger:16686
|
||||||
editable: true
|
editable: true
|
||||||
EOF
|
EOF
|
||||||
|
|
||||||
|
cat <<'EOF' > "$graf_dash_cfg"
|
||||||
|
apiVersion: 1
|
||||||
|
|
||||||
|
providers:
|
||||||
|
- name: 'Llama Stack'
|
||||||
|
orgId: 1
|
||||||
|
folder: ''
|
||||||
|
type: file
|
||||||
|
disableDeletion: false
|
||||||
|
updateIntervalSeconds: 10
|
||||||
|
allowUiUpdates: true
|
||||||
|
options:
|
||||||
|
path: /etc/grafana/provisioning/dashboards
|
||||||
|
EOF
|
||||||
|
|
||||||
|
# Copy the dashboard JSON inline to avoid line-length issues
|
||||||
|
cat > "$dash_json" <<'DASHBOARD_JSON'
|
||||||
|
{
|
||||||
|
"annotations": {
|
||||||
|
"list": []
|
||||||
|
},
|
||||||
|
"editable": true,
|
||||||
|
"fiscalYearStartMonth": 0,
|
||||||
|
"graphTooltip": 0,
|
||||||
|
"id": null,
|
||||||
|
"links": [],
|
||||||
|
"liveNow": false,
|
||||||
|
"panels": [
|
||||||
|
{
|
||||||
|
"datasource": {
|
||||||
|
"type": "prometheus",
|
||||||
|
"uid": "prometheus"
|
||||||
|
},
|
||||||
|
"fieldConfig": {
|
||||||
|
"defaults": {
|
||||||
|
"custom": {
|
||||||
|
"drawStyle": "line",
|
||||||
|
"lineInterpolation": "linear",
|
||||||
|
"showPoints": "auto",
|
||||||
|
"fillOpacity": 10
|
||||||
|
},
|
||||||
|
"mappings": [],
|
||||||
|
"thresholds": {
|
||||||
|
"mode": "absolute",
|
||||||
|
"steps": [{"color": "green", "value": null}]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"gridPos": {"h": 8, "w": 12, "x": 0, "y": 0},
|
||||||
|
"id": 1,
|
||||||
|
"options": {
|
||||||
|
"legend": {"calcs": [], "displayMode": "table", "placement": "bottom", "showLegend": true},
|
||||||
|
"tooltip": {"mode": "multi", "sort": "none"}
|
||||||
|
},
|
||||||
|
"targets": [
|
||||||
|
{
|
||||||
|
"datasource": {"type": "prometheus", "uid": "prometheus"},
|
||||||
|
"expr": "llama_stack_completion_tokens_total",
|
||||||
|
"legendFormat": "{{model_id}} ({{provider_id}})",
|
||||||
|
"refId": "A"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"title": "Completion Tokens",
|
||||||
|
"type": "timeseries"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"datasource": {"type": "prometheus", "uid": "$(DS_PROMETHEUS}"},
|
||||||
|
"fieldConfig": {
|
||||||
|
"defaults": {
|
||||||
|
"custom": {"drawStyle": "line", "lineInterpolation": "linear", "showPoints": "auto", "fillOpacity": 10},
|
||||||
|
"mappings": [],
|
||||||
|
"thresholds": {"mode": "absolute", "steps": [{"color": "green", "value": null}]}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"gridPos": {"h": 8, "w": 12, "x": 12, "y": 0},
|
||||||
|
"id": 2,
|
||||||
|
"options": {
|
||||||
|
"legend": {"calcs": [], "displayMode": "table", "placement": "bottom", "showLegend": true},
|
||||||
|
"tooltip": {"mode": "multi", "sort": "none"}
|
||||||
|
},
|
||||||
|
"targets": [
|
||||||
|
{"datasource": {"type": "prometheus", "uid": "$(DS_PROMETHEUS}"}, "expr": "llama_stack_prompt_tokens_total", "legendFormat": "Prompt - {{model_id}}", "refId": "A"},
|
||||||
|
{"datasource": {"type": "prometheus", "uid": "$(DS_PROMETHEUS}"}, "expr": "llama_stack_tokens_total", "legendFormat": "Total - {{model_id}}", "refId": "B"}
|
||||||
|
],
|
||||||
|
"title": "Prompt & Total Tokens",
|
||||||
|
"type": "timeseries"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"datasource": {"type": "prometheus", "uid": "$(DS_PROMETHEUS}"},
|
||||||
|
"fieldConfig": {
|
||||||
|
"defaults": {
|
||||||
|
"custom": {"drawStyle": "line", "lineInterpolation": "linear", "showPoints": "auto", "fillOpacity": 10},
|
||||||
|
"mappings": [],
|
||||||
|
"thresholds": {"mode": "absolute", "steps": [{"color": "green", "value": null}]},
|
||||||
|
"unit": "ms"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"gridPos": {"h": 8, "w": 12, "x": 0, "y": 8},
|
||||||
|
"id": 3,
|
||||||
|
"options": {
|
||||||
|
"legend": {"calcs": [], "displayMode": "list", "placement": "bottom", "showLegend": true},
|
||||||
|
"tooltip": {"mode": "multi", "sort": "none"}
|
||||||
|
},
|
||||||
|
"targets": [
|
||||||
|
{"datasource": {"type": "prometheus", "uid": "$(DS_PROMETHEUS}"}, "expr": "histogram_quantile(0.95, rate(llama_stack_http_server_duration_milliseconds_bucket[5m]))", "legendFormat": "p95", "refId": "A"},
|
||||||
|
{"datasource": {"type": "prometheus", "uid": "$(DS_PROMETHEUS}"}, "expr": "histogram_quantile(0.99, rate(llama_stack_http_server_duration_milliseconds_bucket[5m]))", "legendFormat": "p99", "refId": "B"}
|
||||||
|
],
|
||||||
|
"title": "HTTP Request Duration (p95, p99)",
|
||||||
|
"type": "timeseries"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"datasource": {"type": "prometheus", "uid": "$(DS_PROMETHEUS}"},
|
||||||
|
"fieldConfig": {
|
||||||
|
"defaults": {
|
||||||
|
"mappings": [],
|
||||||
|
"thresholds": {"mode": "absolute", "steps": [{"color": "green", "value": null}]}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"gridPos": {"h": 8, "w": 6, "x": 12, "y": 8},
|
||||||
|
"id": 4,
|
||||||
|
"options": {
|
||||||
|
"colorMode": "value",
|
||||||
|
"graphMode": "area",
|
||||||
|
"justifyMode": "auto",
|
||||||
|
"orientation": "auto",
|
||||||
|
"reduceOptions": {"calcs": ["lastNotNull"], "fields": "", "values": false},
|
||||||
|
"textMode": "auto"
|
||||||
|
},
|
||||||
|
"targets": [
|
||||||
|
{"datasource": {"type": "prometheus", "uid": "$(DS_PROMETHEUS}"}, "expr": "sum(llama_stack_http_server_duration_milliseconds_count)", "refId": "A"}
|
||||||
|
],
|
||||||
|
"title": "Total Requests",
|
||||||
|
"type": "stat"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"datasource": {"type": "prometheus", "uid": "$(DS_PROMETHEUS}"},
|
||||||
|
"fieldConfig": {
|
||||||
|
"defaults": {
|
||||||
|
"mappings": [],
|
||||||
|
"thresholds": {"mode": "absolute", "steps": [{"color": "green", "value": null}]}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"gridPos": {"h": 8, "w": 6, "x": 18, "y": 8},
|
||||||
|
"id": 5,
|
||||||
|
"options": {
|
||||||
|
"colorMode": "value",
|
||||||
|
"graphMode": "none",
|
||||||
|
"justifyMode": "auto",
|
||||||
|
"orientation": "auto",
|
||||||
|
"reduceOptions": {"calcs": ["lastNotNull"], "fields": "", "values": false},
|
||||||
|
"textMode": "auto"
|
||||||
|
},
|
||||||
|
"targets": [
|
||||||
|
{"datasource": {"type": "prometheus", "uid": "$(DS_PROMETHEUS}"}, "expr": "sum(llama_stack_http_server_active_requests)", "refId": "A"}
|
||||||
|
],
|
||||||
|
"title": "Active Requests",
|
||||||
|
"type": "stat"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"datasource": {"type": "prometheus", "uid": "$(DS_PROMETHEUS}"},
|
||||||
|
"fieldConfig": {
|
||||||
|
"defaults": {
|
||||||
|
"custom": {"drawStyle": "line", "lineInterpolation": "linear", "showPoints": "auto", "fillOpacity": 10},
|
||||||
|
"mappings": [],
|
||||||
|
"thresholds": {"mode": "absolute", "steps": [{"color": "green", "value": null}]},
|
||||||
|
"unit": "reqps"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"gridPos": {"h": 8, "w": 12, "x": 0, "y": 16},
|
||||||
|
"id": 6,
|
||||||
|
"options": {
|
||||||
|
"legend": {"calcs": [], "displayMode": "list", "placement": "bottom", "showLegend": true},
|
||||||
|
"tooltip": {"mode": "multi", "sort": "none"}
|
||||||
|
},
|
||||||
|
"targets": [
|
||||||
|
{"datasource": {"type": "prometheus", "uid": "$(DS_PROMETHEUS}"}, "expr": "rate(llama_stack_http_server_duration_milliseconds_count[5m])", "legendFormat": "{{http_target}} - {{http_status_code}}", "refId": "A"}
|
||||||
|
],
|
||||||
|
"title": "Request Rate",
|
||||||
|
"type": "timeseries"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"datasource": {"type": "prometheus", "uid": "$(DS_PROMETHEUS}"},
|
||||||
|
"fieldConfig": {
|
||||||
|
"defaults": {
|
||||||
|
"custom": {"drawStyle": "line", "lineInterpolation": "linear", "showPoints": "auto", "fillOpacity": 10},
|
||||||
|
"mappings": [],
|
||||||
|
"thresholds": {"mode": "absolute", "steps": [{"color": "green", "value": null}]},
|
||||||
|
"unit": "Bps"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"gridPos": {"h": 8, "w": 12, "x": 12, "y": 16},
|
||||||
|
"id": 7,
|
||||||
|
"options": {
|
||||||
|
"legend": {"calcs": [], "displayMode": "list", "placement": "bottom", "showLegend": true},
|
||||||
|
"tooltip": {"mode": "multi", "sort": "none"}
|
||||||
|
},
|
||||||
|
"targets": [
|
||||||
|
{"datasource": {"type": "prometheus", "uid": "$(DS_PROMETHEUS}"}, "expr": "rate(llama_stack_http_server_request_size_bytes_sum[5m])", "legendFormat": "Request", "refId": "A"},
|
||||||
|
{"datasource": {"type": "prometheus", "uid": "$(DS_PROMETHEUS}"}, "expr": "rate(llama_stack_http_server_response_size_bytes_sum[5m])", "legendFormat": "Response", "refId": "B"}
|
||||||
|
],
|
||||||
|
"title": "Request/Response Sizes",
|
||||||
|
"type": "timeseries"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"refresh": "5s",
|
||||||
|
"schemaVersion": 38,
|
||||||
|
"tags": ["llama-stack"],
|
||||||
|
"templating": {"list": []},
|
||||||
|
"time": {"from": "now-15m", "to": "now"},
|
||||||
|
"timepicker": {},
|
||||||
|
"timezone": "browser",
|
||||||
|
"title": "Llama Stack Metrics",
|
||||||
|
"uid": "llama-stack-metrics",
|
||||||
|
"version": 0,
|
||||||
|
"weekStart": ""
|
||||||
|
}
|
||||||
|
DASHBOARD_JSON
|
||||||
}
|
}
|
||||||
|
|
||||||
# Cleanup function to remove temporary files
|
# Cleanup function to remove temporary files
|
||||||
|
|
@ -372,6 +593,8 @@ if [ "$WITH_TELEMETRY" = true ]; then
|
||||||
-e GF_SECURITY_ADMIN_PASSWORD=admin \
|
-e GF_SECURITY_ADMIN_PASSWORD=admin \
|
||||||
-e GF_USERS_ALLOW_SIGN_UP=false \
|
-e GF_USERS_ALLOW_SIGN_UP=false \
|
||||||
-v "${TELEMETRY_ASSETS_DIR}/grafana-datasources.yaml:/etc/grafana/provisioning/datasources/datasources.yaml:Z" \
|
-v "${TELEMETRY_ASSETS_DIR}/grafana-datasources.yaml:/etc/grafana/provisioning/datasources/datasources.yaml:Z" \
|
||||||
|
-v "${TELEMETRY_ASSETS_DIR}/grafana-dashboards.yaml:/etc/grafana/provisioning/dashboards/dashboards.yaml:Z" \
|
||||||
|
-v "${TELEMETRY_ASSETS_DIR}/llama-stack-dashboard.json:/etc/grafana/provisioning/dashboards/llama-stack-dashboard.json:Z" \
|
||||||
docker.io/grafana/grafana:11.0.0 > /dev/null 2>&1; then
|
docker.io/grafana/grafana:11.0.0 > /dev/null 2>&1; then
|
||||||
die "Grafana startup failed"
|
die "Grafana startup failed"
|
||||||
fi
|
fi
|
||||||
|
|
|
||||||
12
scripts/telemetry/grafana-dashboards.yaml
Normal file
12
scripts/telemetry/grafana-dashboards.yaml
Normal file
|
|
@ -0,0 +1,12 @@
|
||||||
|
apiVersion: 1
|
||||||
|
|
||||||
|
providers:
|
||||||
|
- name: 'Llama Stack'
|
||||||
|
orgId: 1
|
||||||
|
folder: ''
|
||||||
|
type: file
|
||||||
|
disableDeletion: false
|
||||||
|
updateIntervalSeconds: 10
|
||||||
|
allowUiUpdates: true
|
||||||
|
options:
|
||||||
|
path: /etc/grafana/provisioning/dashboards
|
||||||
|
|
@ -5,6 +5,7 @@ datasources:
|
||||||
type: prometheus
|
type: prometheus
|
||||||
access: proxy
|
access: proxy
|
||||||
url: http://prometheus:9090
|
url: http://prometheus:9090
|
||||||
|
uid: prometheus
|
||||||
isDefault: true
|
isDefault: true
|
||||||
editable: true
|
editable: true
|
||||||
|
|
||||||
|
|
|
||||||
457
scripts/telemetry/llama-stack-dashboard.json
Normal file
457
scripts/telemetry/llama-stack-dashboard.json
Normal file
|
|
@ -0,0 +1,457 @@
|
||||||
|
{
|
||||||
|
"annotations": {
|
||||||
|
"list": []
|
||||||
|
},
|
||||||
|
"editable": true,
|
||||||
|
"fiscalYearStartMonth": 0,
|
||||||
|
"graphTooltip": 0,
|
||||||
|
"id": null,
|
||||||
|
"links": [],
|
||||||
|
"liveNow": false,
|
||||||
|
"panels": [
|
||||||
|
{
|
||||||
|
"datasource": {
|
||||||
|
"type": "prometheus",
|
||||||
|
"uid": "prometheus"
|
||||||
|
},
|
||||||
|
"fieldConfig": {
|
||||||
|
"defaults": {
|
||||||
|
"custom": {
|
||||||
|
"drawStyle": "line",
|
||||||
|
"lineInterpolation": "linear",
|
||||||
|
"showPoints": "auto",
|
||||||
|
"fillOpacity": 10
|
||||||
|
},
|
||||||
|
"mappings": [],
|
||||||
|
"thresholds": {
|
||||||
|
"mode": "absolute",
|
||||||
|
"steps": [
|
||||||
|
{
|
||||||
|
"color": "green",
|
||||||
|
"value": null
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"gridPos": {
|
||||||
|
"h": 8,
|
||||||
|
"w": 12,
|
||||||
|
"x": 0,
|
||||||
|
"y": 0
|
||||||
|
},
|
||||||
|
"id": 1,
|
||||||
|
"options": {
|
||||||
|
"legend": {
|
||||||
|
"calcs": [],
|
||||||
|
"displayMode": "table",
|
||||||
|
"placement": "bottom",
|
||||||
|
"showLegend": true
|
||||||
|
},
|
||||||
|
"tooltip": {
|
||||||
|
"mode": "multi",
|
||||||
|
"sort": "none"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"targets": [
|
||||||
|
{
|
||||||
|
"datasource": {
|
||||||
|
"type": "prometheus",
|
||||||
|
"uid": "prometheus"
|
||||||
|
},
|
||||||
|
"expr": "llama_stack_completion_tokens_total",
|
||||||
|
"legendFormat": "{{model_id}} ({{provider_id}})",
|
||||||
|
"refId": "A"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"title": "Completion Tokens",
|
||||||
|
"type": "timeseries"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"datasource": {
|
||||||
|
"type": "prometheus",
|
||||||
|
"uid": "prometheus"
|
||||||
|
},
|
||||||
|
"fieldConfig": {
|
||||||
|
"defaults": {
|
||||||
|
"custom": {
|
||||||
|
"drawStyle": "line",
|
||||||
|
"lineInterpolation": "linear",
|
||||||
|
"showPoints": "auto",
|
||||||
|
"fillOpacity": 10
|
||||||
|
},
|
||||||
|
"mappings": [],
|
||||||
|
"thresholds": {
|
||||||
|
"mode": "absolute",
|
||||||
|
"steps": [
|
||||||
|
{
|
||||||
|
"color": "green",
|
||||||
|
"value": null
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"gridPos": {
|
||||||
|
"h": 8,
|
||||||
|
"w": 12,
|
||||||
|
"x": 12,
|
||||||
|
"y": 0
|
||||||
|
},
|
||||||
|
"id": 2,
|
||||||
|
"options": {
|
||||||
|
"legend": {
|
||||||
|
"calcs": [],
|
||||||
|
"displayMode": "table",
|
||||||
|
"placement": "bottom",
|
||||||
|
"showLegend": true
|
||||||
|
},
|
||||||
|
"tooltip": {
|
||||||
|
"mode": "multi",
|
||||||
|
"sort": "none"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"targets": [
|
||||||
|
{
|
||||||
|
"datasource": {
|
||||||
|
"type": "prometheus",
|
||||||
|
"uid": "prometheus"
|
||||||
|
},
|
||||||
|
"expr": "llama_stack_prompt_tokens_total",
|
||||||
|
"legendFormat": "Prompt - {{model_id}}",
|
||||||
|
"refId": "A"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"datasource": {
|
||||||
|
"type": "prometheus",
|
||||||
|
"uid": "prometheus"
|
||||||
|
},
|
||||||
|
"expr": "llama_stack_tokens_total",
|
||||||
|
"legendFormat": "Total - {{model_id}}",
|
||||||
|
"refId": "B"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"title": "Prompt & Total Tokens",
|
||||||
|
"type": "timeseries"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"datasource": {
|
||||||
|
"type": "prometheus",
|
||||||
|
"uid": "prometheus"
|
||||||
|
},
|
||||||
|
"fieldConfig": {
|
||||||
|
"defaults": {
|
||||||
|
"custom": {
|
||||||
|
"drawStyle": "line",
|
||||||
|
"lineInterpolation": "linear",
|
||||||
|
"showPoints": "auto",
|
||||||
|
"fillOpacity": 10
|
||||||
|
},
|
||||||
|
"mappings": [],
|
||||||
|
"thresholds": {
|
||||||
|
"mode": "absolute",
|
||||||
|
"steps": [
|
||||||
|
{
|
||||||
|
"color": "green",
|
||||||
|
"value": null
|
||||||
|
}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"unit": "ms"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"gridPos": {
|
||||||
|
"h": 8,
|
||||||
|
"w": 12,
|
||||||
|
"x": 0,
|
||||||
|
"y": 8
|
||||||
|
},
|
||||||
|
"id": 3,
|
||||||
|
"options": {
|
||||||
|
"legend": {
|
||||||
|
"calcs": [],
|
||||||
|
"displayMode": "list",
|
||||||
|
"placement": "bottom",
|
||||||
|
"showLegend": true
|
||||||
|
},
|
||||||
|
"tooltip": {
|
||||||
|
"mode": "multi",
|
||||||
|
"sort": "none"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"targets": [
|
||||||
|
{
|
||||||
|
"datasource": {
|
||||||
|
"type": "prometheus",
|
||||||
|
"uid": "prometheus"
|
||||||
|
},
|
||||||
|
"expr": "histogram_quantile(0.95, rate(llama_stack_http_server_duration_milliseconds_bucket[5m]))",
|
||||||
|
"legendFormat": "p95",
|
||||||
|
"refId": "A"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"datasource": {
|
||||||
|
"type": "prometheus",
|
||||||
|
"uid": "prometheus"
|
||||||
|
},
|
||||||
|
"expr": "histogram_quantile(0.99, rate(llama_stack_http_server_duration_milliseconds_bucket[5m]))",
|
||||||
|
"legendFormat": "p99",
|
||||||
|
"refId": "B"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"title": "HTTP Request Duration (p95, p99)",
|
||||||
|
"type": "timeseries"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"datasource": {
|
||||||
|
"type": "prometheus",
|
||||||
|
"uid": "prometheus"
|
||||||
|
},
|
||||||
|
"fieldConfig": {
|
||||||
|
"defaults": {
|
||||||
|
"mappings": [],
|
||||||
|
"thresholds": {
|
||||||
|
"mode": "absolute",
|
||||||
|
"steps": [
|
||||||
|
{
|
||||||
|
"color": "green",
|
||||||
|
"value": null
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"gridPos": {
|
||||||
|
"h": 8,
|
||||||
|
"w": 6,
|
||||||
|
"x": 12,
|
||||||
|
"y": 8
|
||||||
|
},
|
||||||
|
"id": 4,
|
||||||
|
"options": {
|
||||||
|
"colorMode": "value",
|
||||||
|
"graphMode": "area",
|
||||||
|
"justifyMode": "auto",
|
||||||
|
"orientation": "auto",
|
||||||
|
"reduceOptions": {
|
||||||
|
"calcs": [
|
||||||
|
"lastNotNull"
|
||||||
|
],
|
||||||
|
"fields": "",
|
||||||
|
"values": false
|
||||||
|
},
|
||||||
|
"textMode": "auto"
|
||||||
|
},
|
||||||
|
"targets": [
|
||||||
|
{
|
||||||
|
"datasource": {
|
||||||
|
"type": "prometheus",
|
||||||
|
"uid": "prometheus"
|
||||||
|
},
|
||||||
|
"expr": "sum(llama_stack_http_server_duration_milliseconds_count)",
|
||||||
|
"refId": "A"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"title": "Total Requests",
|
||||||
|
"type": "stat"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"datasource": {
|
||||||
|
"type": "prometheus",
|
||||||
|
"uid": "prometheus"
|
||||||
|
},
|
||||||
|
"fieldConfig": {
|
||||||
|
"defaults": {
|
||||||
|
"mappings": [],
|
||||||
|
"thresholds": {
|
||||||
|
"mode": "absolute",
|
||||||
|
"steps": [
|
||||||
|
{
|
||||||
|
"color": "green",
|
||||||
|
"value": null
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"gridPos": {
|
||||||
|
"h": 8,
|
||||||
|
"w": 6,
|
||||||
|
"x": 18,
|
||||||
|
"y": 8
|
||||||
|
},
|
||||||
|
"id": 5,
|
||||||
|
"options": {
|
||||||
|
"colorMode": "value",
|
||||||
|
"graphMode": "none",
|
||||||
|
"justifyMode": "auto",
|
||||||
|
"orientation": "auto",
|
||||||
|
"reduceOptions": {
|
||||||
|
"calcs": [
|
||||||
|
"lastNotNull"
|
||||||
|
],
|
||||||
|
"fields": "",
|
||||||
|
"values": false
|
||||||
|
},
|
||||||
|
"textMode": "auto"
|
||||||
|
},
|
||||||
|
"targets": [
|
||||||
|
{
|
||||||
|
"datasource": {
|
||||||
|
"type": "prometheus",
|
||||||
|
"uid": "prometheus"
|
||||||
|
},
|
||||||
|
"expr": "sum(llama_stack_http_server_active_requests)",
|
||||||
|
"refId": "A"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"title": "Active Requests",
|
||||||
|
"type": "stat"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"datasource": {
|
||||||
|
"type": "prometheus",
|
||||||
|
"uid": "prometheus"
|
||||||
|
},
|
||||||
|
"fieldConfig": {
|
||||||
|
"defaults": {
|
||||||
|
"custom": {
|
||||||
|
"drawStyle": "line",
|
||||||
|
"lineInterpolation": "linear",
|
||||||
|
"showPoints": "auto",
|
||||||
|
"fillOpacity": 10
|
||||||
|
},
|
||||||
|
"mappings": [],
|
||||||
|
"thresholds": {
|
||||||
|
"mode": "absolute",
|
||||||
|
"steps": [
|
||||||
|
{
|
||||||
|
"color": "green",
|
||||||
|
"value": null
|
||||||
|
}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"unit": "reqps"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"gridPos": {
|
||||||
|
"h": 8,
|
||||||
|
"w": 12,
|
||||||
|
"x": 0,
|
||||||
|
"y": 16
|
||||||
|
},
|
||||||
|
"id": 6,
|
||||||
|
"options": {
|
||||||
|
"legend": {
|
||||||
|
"calcs": [],
|
||||||
|
"displayMode": "list",
|
||||||
|
"placement": "bottom",
|
||||||
|
"showLegend": true
|
||||||
|
},
|
||||||
|
"tooltip": {
|
||||||
|
"mode": "multi",
|
||||||
|
"sort": "none"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"targets": [
|
||||||
|
{
|
||||||
|
"datasource": {
|
||||||
|
"type": "prometheus",
|
||||||
|
"uid": "prometheus"
|
||||||
|
},
|
||||||
|
"expr": "rate(llama_stack_http_server_duration_milliseconds_count[5m])",
|
||||||
|
"legendFormat": "{{http_target}} - {{http_status_code}}",
|
||||||
|
"refId": "A"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"title": "Request Rate",
|
||||||
|
"type": "timeseries"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"datasource": {
|
||||||
|
"type": "prometheus",
|
||||||
|
"uid": "prometheus"
|
||||||
|
},
|
||||||
|
"fieldConfig": {
|
||||||
|
"defaults": {
|
||||||
|
"custom": {
|
||||||
|
"drawStyle": "line",
|
||||||
|
"lineInterpolation": "linear",
|
||||||
|
"showPoints": "auto",
|
||||||
|
"fillOpacity": 10
|
||||||
|
},
|
||||||
|
"mappings": [],
|
||||||
|
"thresholds": {
|
||||||
|
"mode": "absolute",
|
||||||
|
"steps": [
|
||||||
|
{
|
||||||
|
"color": "green",
|
||||||
|
"value": null
|
||||||
|
}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"unit": "Bps"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"gridPos": {
|
||||||
|
"h": 8,
|
||||||
|
"w": 12,
|
||||||
|
"x": 12,
|
||||||
|
"y": 16
|
||||||
|
},
|
||||||
|
"id": 7,
|
||||||
|
"options": {
|
||||||
|
"legend": {
|
||||||
|
"calcs": [],
|
||||||
|
"displayMode": "list",
|
||||||
|
"placement": "bottom",
|
||||||
|
"showLegend": true
|
||||||
|
},
|
||||||
|
"tooltip": {
|
||||||
|
"mode": "multi",
|
||||||
|
"sort": "none"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"targets": [
|
||||||
|
{
|
||||||
|
"datasource": {
|
||||||
|
"type": "prometheus",
|
||||||
|
"uid": "prometheus"
|
||||||
|
},
|
||||||
|
"expr": "rate(llama_stack_http_server_request_size_bytes_sum[5m])",
|
||||||
|
"legendFormat": "Request",
|
||||||
|
"refId": "A"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"datasource": {
|
||||||
|
"type": "prometheus",
|
||||||
|
"uid": "prometheus"
|
||||||
|
},
|
||||||
|
"expr": "rate(llama_stack_http_server_response_size_bytes_sum[5m])",
|
||||||
|
"legendFormat": "Response",
|
||||||
|
"refId": "B"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"title": "Request/Response Sizes",
|
||||||
|
"type": "timeseries"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"refresh": "5s",
|
||||||
|
"schemaVersion": 38,
|
||||||
|
"tags": [
|
||||||
|
"llama-stack"
|
||||||
|
],
|
||||||
|
"templating": {
|
||||||
|
"list": []
|
||||||
|
},
|
||||||
|
"time": {
|
||||||
|
"from": "now-15m",
|
||||||
|
"to": "now"
|
||||||
|
},
|
||||||
|
"timepicker": {},
|
||||||
|
"timezone": "browser",
|
||||||
|
"title": "Llama Stack Metrics",
|
||||||
|
"uid": "llama-stack-metrics",
|
||||||
|
"version": 0,
|
||||||
|
"weekStart": ""
|
||||||
|
}
|
||||||
|
|
@ -135,6 +135,8 @@ $CONTAINER_RUNTIME run -d --name grafana \
|
||||||
-e GF_SECURITY_ADMIN_PASSWORD=admin \
|
-e GF_SECURITY_ADMIN_PASSWORD=admin \
|
||||||
-e GF_USERS_ALLOW_SIGN_UP=false \
|
-e GF_USERS_ALLOW_SIGN_UP=false \
|
||||||
-v "$SCRIPT_DIR/grafana-datasources.yaml:/etc/grafana/provisioning/datasources/datasources.yaml:Z" \
|
-v "$SCRIPT_DIR/grafana-datasources.yaml:/etc/grafana/provisioning/datasources/datasources.yaml:Z" \
|
||||||
|
-v "$SCRIPT_DIR/grafana-dashboards.yaml:/etc/grafana/provisioning/dashboards/dashboards.yaml:Z" \
|
||||||
|
-v "$SCRIPT_DIR/llama-stack-dashboard.json:/etc/grafana/provisioning/dashboards/llama-stack-dashboard.json:Z" \
|
||||||
docker.io/grafana/grafana:11.0.0
|
docker.io/grafana/grafana:11.0.0
|
||||||
|
|
||||||
# Wait for services to start
|
# Wait for services to start
|
||||||
|
|
|
||||||
|
|
@ -149,13 +149,13 @@ class ShieldCallStep(StepCommon):
|
||||||
class MemoryRetrievalStep(StepCommon):
|
class MemoryRetrievalStep(StepCommon):
|
||||||
"""A memory retrieval step in an agent turn.
|
"""A memory retrieval step in an agent turn.
|
||||||
|
|
||||||
:param vector_db_ids: The IDs of the vector databases to retrieve context from.
|
:param vector_store_ids: The IDs of the vector databases to retrieve context from.
|
||||||
:param inserted_context: The context retrieved from the vector databases.
|
:param inserted_context: The context retrieved from the vector databases.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
step_type: Literal[StepType.memory_retrieval] = StepType.memory_retrieval
|
step_type: Literal[StepType.memory_retrieval] = StepType.memory_retrieval
|
||||||
# TODO: should this be List[str]?
|
# TODO: should this be List[str]?
|
||||||
vector_db_ids: str
|
vector_store_ids: str
|
||||||
inserted_context: InterleavedContent
|
inserted_context: InterleavedContent
|
||||||
|
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -21,8 +21,8 @@ from typing_extensions import TypedDict
|
||||||
from llama_stack.apis.common.content_types import ContentDelta, InterleavedContent
|
from llama_stack.apis.common.content_types import ContentDelta, InterleavedContent
|
||||||
from llama_stack.apis.common.responses import Order
|
from llama_stack.apis.common.responses import Order
|
||||||
from llama_stack.apis.models import Model
|
from llama_stack.apis.models import Model
|
||||||
from llama_stack.apis.telemetry import MetricResponseMixin
|
|
||||||
from llama_stack.apis.version import LLAMA_STACK_API_V1, LLAMA_STACK_API_V1ALPHA
|
from llama_stack.apis.version import LLAMA_STACK_API_V1, LLAMA_STACK_API_V1ALPHA
|
||||||
|
from llama_stack.core.telemetry.telemetry import MetricResponseMixin
|
||||||
from llama_stack.core.telemetry.trace_protocol import trace_protocol
|
from llama_stack.core.telemetry.trace_protocol import trace_protocol
|
||||||
from llama_stack.models.llama.datatypes import (
|
from llama_stack.models.llama.datatypes import (
|
||||||
BuiltinTool,
|
BuiltinTool,
|
||||||
|
|
|
||||||
|
|
@ -190,13 +190,13 @@ class RAGToolRuntime(Protocol):
|
||||||
async def insert(
|
async def insert(
|
||||||
self,
|
self,
|
||||||
documents: list[RAGDocument],
|
documents: list[RAGDocument],
|
||||||
vector_db_id: str,
|
vector_store_id: str,
|
||||||
chunk_size_in_tokens: int = 512,
|
chunk_size_in_tokens: int = 512,
|
||||||
) -> None:
|
) -> None:
|
||||||
"""Index documents so they can be used by the RAG system.
|
"""Index documents so they can be used by the RAG system.
|
||||||
|
|
||||||
:param documents: List of documents to index in the RAG system
|
:param documents: List of documents to index in the RAG system
|
||||||
:param vector_db_id: ID of the vector database to store the document embeddings
|
:param vector_store_id: ID of the vector database to store the document embeddings
|
||||||
:param chunk_size_in_tokens: (Optional) Size in tokens for document chunking during indexing
|
:param chunk_size_in_tokens: (Optional) Size in tokens for document chunking during indexing
|
||||||
"""
|
"""
|
||||||
...
|
...
|
||||||
|
|
@ -205,13 +205,13 @@ class RAGToolRuntime(Protocol):
|
||||||
async def query(
|
async def query(
|
||||||
self,
|
self,
|
||||||
content: InterleavedContent,
|
content: InterleavedContent,
|
||||||
vector_db_ids: list[str],
|
vector_store_ids: list[str],
|
||||||
query_config: RAGQueryConfig | None = None,
|
query_config: RAGQueryConfig | None = None,
|
||||||
) -> RAGQueryResult:
|
) -> RAGQueryResult:
|
||||||
"""Query the RAG system for context; typically invoked by the agent.
|
"""Query the RAG system for context; typically invoked by the agent.
|
||||||
|
|
||||||
:param content: The query content to search for in the indexed documents
|
:param content: The query content to search for in the indexed documents
|
||||||
:param vector_db_ids: List of vector database IDs to search within
|
:param vector_store_ids: List of vector database IDs to search within
|
||||||
:param query_config: (Optional) Configuration parameters for the query operation
|
:param query_config: (Optional) Configuration parameters for the query operation
|
||||||
:returns: RAGQueryResult containing the retrieved content and metadata
|
:returns: RAGQueryResult containing the retrieved content and metadata
|
||||||
"""
|
"""
|
||||||
|
|
|
||||||
|
|
@ -529,17 +529,17 @@ class VectorIO(Protocol):
|
||||||
|
|
||||||
# this will just block now until chunks are inserted, but it should
|
# this will just block now until chunks are inserted, but it should
|
||||||
# probably return a Job instance which can be polled for completion
|
# probably return a Job instance which can be polled for completion
|
||||||
# TODO: rename vector_db_id to vector_store_id once Stainless is working
|
# TODO: rename vector_store_id to vector_store_id once Stainless is working
|
||||||
@webmethod(route="/vector-io/insert", method="POST", level=LLAMA_STACK_API_V1)
|
@webmethod(route="/vector-io/insert", method="POST", level=LLAMA_STACK_API_V1)
|
||||||
async def insert_chunks(
|
async def insert_chunks(
|
||||||
self,
|
self,
|
||||||
vector_db_id: str,
|
vector_store_id: str,
|
||||||
chunks: list[Chunk],
|
chunks: list[Chunk],
|
||||||
ttl_seconds: int | None = None,
|
ttl_seconds: int | None = None,
|
||||||
) -> None:
|
) -> None:
|
||||||
"""Insert chunks into a vector database.
|
"""Insert chunks into a vector database.
|
||||||
|
|
||||||
:param vector_db_id: The identifier of the vector database to insert the chunks into.
|
:param vector_store_id: The identifier of the vector database to insert the chunks into.
|
||||||
:param chunks: The chunks to insert. Each `Chunk` should contain content which can be interleaved text, images, or other types.
|
:param chunks: The chunks to insert. Each `Chunk` should contain content which can be interleaved text, images, or other types.
|
||||||
`metadata`: `dict[str, Any]` and `embedding`: `List[float]` are optional.
|
`metadata`: `dict[str, Any]` and `embedding`: `List[float]` are optional.
|
||||||
If `metadata` is provided, you configure how Llama Stack formats the chunk during generation.
|
If `metadata` is provided, you configure how Llama Stack formats the chunk during generation.
|
||||||
|
|
@ -548,17 +548,17 @@ class VectorIO(Protocol):
|
||||||
"""
|
"""
|
||||||
...
|
...
|
||||||
|
|
||||||
# TODO: rename vector_db_id to vector_store_id once Stainless is working
|
# TODO: rename vector_store_id to vector_store_id once Stainless is working
|
||||||
@webmethod(route="/vector-io/query", method="POST", level=LLAMA_STACK_API_V1)
|
@webmethod(route="/vector-io/query", method="POST", level=LLAMA_STACK_API_V1)
|
||||||
async def query_chunks(
|
async def query_chunks(
|
||||||
self,
|
self,
|
||||||
vector_db_id: str,
|
vector_store_id: str,
|
||||||
query: InterleavedContent,
|
query: InterleavedContent,
|
||||||
params: dict[str, Any] | None = None,
|
params: dict[str, Any] | None = None,
|
||||||
) -> QueryChunksResponse:
|
) -> QueryChunksResponse:
|
||||||
"""Query chunks from a vector database.
|
"""Query chunks from a vector database.
|
||||||
|
|
||||||
:param vector_db_id: The identifier of the vector database to query.
|
:param vector_store_id: The identifier of the vector database to query.
|
||||||
:param query: The query to search for.
|
:param query: The query to search for.
|
||||||
:param params: The parameters of the query.
|
:param params: The parameters of the query.
|
||||||
:returns: A QueryChunksResponse.
|
:returns: A QueryChunksResponse.
|
||||||
|
|
|
||||||
|
|
@ -312,3 +312,6 @@ class ConversationServiceImpl(Conversations):
|
||||||
|
|
||||||
logger.debug(f"Deleted item {item_id} from conversation {conversation_id}")
|
logger.debug(f"Deleted item {item_id} from conversation {conversation_id}")
|
||||||
return ConversationItemDeletedResource(id=item_id)
|
return ConversationItemDeletedResource(id=item_id)
|
||||||
|
|
||||||
|
async def shutdown(self) -> None:
|
||||||
|
pass
|
||||||
|
|
|
||||||
|
|
@ -230,3 +230,6 @@ class PromptServiceImpl(Prompts):
|
||||||
await self.kvstore.set(default_key, str(version))
|
await self.kvstore.set(default_key, str(version))
|
||||||
|
|
||||||
return self._deserialize_prompt(data)
|
return self._deserialize_prompt(data)
|
||||||
|
|
||||||
|
async def shutdown(self) -> None:
|
||||||
|
pass
|
||||||
|
|
|
||||||
|
|
@ -53,7 +53,7 @@ from llama_stack.apis.inference.inference import (
|
||||||
OpenAIChatCompletionContentPartTextParam,
|
OpenAIChatCompletionContentPartTextParam,
|
||||||
)
|
)
|
||||||
from llama_stack.apis.models import Model, ModelType
|
from llama_stack.apis.models import Model, ModelType
|
||||||
from llama_stack.apis.telemetry import MetricEvent, MetricInResponse
|
from llama_stack.core.telemetry.telemetry import MetricEvent, MetricInResponse
|
||||||
from llama_stack.core.telemetry.tracing import enqueue_event, get_current_span
|
from llama_stack.core.telemetry.tracing import enqueue_event, get_current_span
|
||||||
from llama_stack.log import get_logger
|
from llama_stack.log import get_logger
|
||||||
from llama_stack.models.llama.llama3.chat_format import ChatFormat
|
from llama_stack.models.llama.llama3.chat_format import ChatFormat
|
||||||
|
|
|
||||||
|
|
@ -73,27 +73,27 @@ class VectorIORouter(VectorIO):
|
||||||
|
|
||||||
async def insert_chunks(
|
async def insert_chunks(
|
||||||
self,
|
self,
|
||||||
vector_db_id: str,
|
vector_store_id: str,
|
||||||
chunks: list[Chunk],
|
chunks: list[Chunk],
|
||||||
ttl_seconds: int | None = None,
|
ttl_seconds: int | None = None,
|
||||||
) -> None:
|
) -> None:
|
||||||
doc_ids = [chunk.document_id for chunk in chunks[:3]]
|
doc_ids = [chunk.document_id for chunk in chunks[:3]]
|
||||||
logger.debug(
|
logger.debug(
|
||||||
f"VectorIORouter.insert_chunks: {vector_db_id}, {len(chunks)} chunks, "
|
f"VectorIORouter.insert_chunks: {vector_store_id}, {len(chunks)} chunks, "
|
||||||
f"ttl_seconds={ttl_seconds}, chunk_ids={doc_ids}{' and more...' if len(chunks) > 3 else ''}"
|
f"ttl_seconds={ttl_seconds}, chunk_ids={doc_ids}{' and more...' if len(chunks) > 3 else ''}"
|
||||||
)
|
)
|
||||||
provider = await self.routing_table.get_provider_impl(vector_db_id)
|
provider = await self.routing_table.get_provider_impl(vector_store_id)
|
||||||
return await provider.insert_chunks(vector_db_id, chunks, ttl_seconds)
|
return await provider.insert_chunks(vector_store_id, chunks, ttl_seconds)
|
||||||
|
|
||||||
async def query_chunks(
|
async def query_chunks(
|
||||||
self,
|
self,
|
||||||
vector_db_id: str,
|
vector_store_id: str,
|
||||||
query: InterleavedContent,
|
query: InterleavedContent,
|
||||||
params: dict[str, Any] | None = None,
|
params: dict[str, Any] | None = None,
|
||||||
) -> QueryChunksResponse:
|
) -> QueryChunksResponse:
|
||||||
logger.debug(f"VectorIORouter.query_chunks: {vector_db_id}")
|
logger.debug(f"VectorIORouter.query_chunks: {vector_store_id}")
|
||||||
provider = await self.routing_table.get_provider_impl(vector_db_id)
|
provider = await self.routing_table.get_provider_impl(vector_store_id)
|
||||||
return await provider.query_chunks(vector_db_id, query, params)
|
return await provider.query_chunks(vector_store_id, query, params)
|
||||||
|
|
||||||
# OpenAI Vector Stores API endpoints
|
# OpenAI Vector Stores API endpoints
|
||||||
async def openai_create_vector_store(
|
async def openai_create_vector_store(
|
||||||
|
|
|
||||||
|
|
@ -31,7 +31,6 @@ from llama_stack.apis.scoring import Scoring
|
||||||
from llama_stack.apis.scoring_functions import ScoringFunctions
|
from llama_stack.apis.scoring_functions import ScoringFunctions
|
||||||
from llama_stack.apis.shields import Shields
|
from llama_stack.apis.shields import Shields
|
||||||
from llama_stack.apis.synthetic_data_generation import SyntheticDataGeneration
|
from llama_stack.apis.synthetic_data_generation import SyntheticDataGeneration
|
||||||
from llama_stack.apis.telemetry import Telemetry
|
|
||||||
from llama_stack.apis.tools import RAGToolRuntime, ToolGroups, ToolRuntime
|
from llama_stack.apis.tools import RAGToolRuntime, ToolGroups, ToolRuntime
|
||||||
from llama_stack.apis.vector_io import VectorIO
|
from llama_stack.apis.vector_io import VectorIO
|
||||||
from llama_stack.core.conversations.conversations import ConversationServiceConfig, ConversationServiceImpl
|
from llama_stack.core.conversations.conversations import ConversationServiceConfig, ConversationServiceImpl
|
||||||
|
|
@ -67,7 +66,6 @@ class LlamaStack(
|
||||||
Safety,
|
Safety,
|
||||||
SyntheticDataGeneration,
|
SyntheticDataGeneration,
|
||||||
Datasets,
|
Datasets,
|
||||||
Telemetry,
|
|
||||||
PostTraining,
|
PostTraining,
|
||||||
VectorIO,
|
VectorIO,
|
||||||
Eval,
|
Eval,
|
||||||
|
|
|
||||||
|
|
@ -6,7 +6,13 @@
|
||||||
|
|
||||||
import os
|
import os
|
||||||
import threading
|
import threading
|
||||||
from typing import Any
|
from datetime import datetime
|
||||||
|
from enum import Enum
|
||||||
|
from typing import (
|
||||||
|
Annotated,
|
||||||
|
Any,
|
||||||
|
Literal,
|
||||||
|
)
|
||||||
|
|
||||||
from opentelemetry import metrics, trace
|
from opentelemetry import metrics, trace
|
||||||
from opentelemetry.exporter.otlp.proto.http.metric_exporter import OTLPMetricExporter
|
from opentelemetry.exporter.otlp.proto.http.metric_exporter import OTLPMetricExporter
|
||||||
|
|
@ -16,21 +22,399 @@ from opentelemetry.sdk.metrics.export import PeriodicExportingMetricReader
|
||||||
from opentelemetry.sdk.trace import TracerProvider
|
from opentelemetry.sdk.trace import TracerProvider
|
||||||
from opentelemetry.sdk.trace.export import BatchSpanProcessor
|
from opentelemetry.sdk.trace.export import BatchSpanProcessor
|
||||||
from opentelemetry.trace.propagation.tracecontext import TraceContextTextMapPropagator
|
from opentelemetry.trace.propagation.tracecontext import TraceContextTextMapPropagator
|
||||||
|
from pydantic import BaseModel, Field
|
||||||
|
|
||||||
from llama_stack.apis.telemetry import (
|
|
||||||
Event,
|
|
||||||
MetricEvent,
|
|
||||||
SpanEndPayload,
|
|
||||||
SpanStartPayload,
|
|
||||||
SpanStatus,
|
|
||||||
StructuredLogEvent,
|
|
||||||
UnstructuredLogEvent,
|
|
||||||
)
|
|
||||||
from llama_stack.apis.telemetry import (
|
|
||||||
Telemetry as TelemetryBase,
|
|
||||||
)
|
|
||||||
from llama_stack.core.telemetry.tracing import ROOT_SPAN_MARKERS
|
|
||||||
from llama_stack.log import get_logger
|
from llama_stack.log import get_logger
|
||||||
|
from llama_stack.models.llama.datatypes import Primitive
|
||||||
|
from llama_stack.schema_utils import json_schema_type, register_schema
|
||||||
|
|
||||||
|
ROOT_SPAN_MARKERS = ["__root__", "__root_span__"]
|
||||||
|
|
||||||
|
|
||||||
|
@json_schema_type
|
||||||
|
class SpanStatus(Enum):
|
||||||
|
"""The status of a span indicating whether it completed successfully or with an error.
|
||||||
|
:cvar OK: Span completed successfully without errors
|
||||||
|
:cvar ERROR: Span completed with an error or failure
|
||||||
|
"""
|
||||||
|
|
||||||
|
OK = "ok"
|
||||||
|
ERROR = "error"
|
||||||
|
|
||||||
|
|
||||||
|
@json_schema_type
|
||||||
|
class Span(BaseModel):
|
||||||
|
"""A span representing a single operation within a trace.
|
||||||
|
:param span_id: Unique identifier for the span
|
||||||
|
:param trace_id: Unique identifier for the trace this span belongs to
|
||||||
|
:param parent_span_id: (Optional) Unique identifier for the parent span, if this is a child span
|
||||||
|
:param name: Human-readable name describing the operation this span represents
|
||||||
|
:param start_time: Timestamp when the operation began
|
||||||
|
:param end_time: (Optional) Timestamp when the operation finished, if completed
|
||||||
|
:param attributes: (Optional) Key-value pairs containing additional metadata about the span
|
||||||
|
"""
|
||||||
|
|
||||||
|
span_id: str
|
||||||
|
trace_id: str
|
||||||
|
parent_span_id: str | None = None
|
||||||
|
name: str
|
||||||
|
start_time: datetime
|
||||||
|
end_time: datetime | None = None
|
||||||
|
attributes: dict[str, Any] | None = Field(default_factory=lambda: {})
|
||||||
|
|
||||||
|
def set_attribute(self, key: str, value: Any):
|
||||||
|
if self.attributes is None:
|
||||||
|
self.attributes = {}
|
||||||
|
self.attributes[key] = value
|
||||||
|
|
||||||
|
|
||||||
|
@json_schema_type
|
||||||
|
class Trace(BaseModel):
|
||||||
|
"""A trace representing the complete execution path of a request across multiple operations.
|
||||||
|
:param trace_id: Unique identifier for the trace
|
||||||
|
:param root_span_id: Unique identifier for the root span that started this trace
|
||||||
|
:param start_time: Timestamp when the trace began
|
||||||
|
:param end_time: (Optional) Timestamp when the trace finished, if completed
|
||||||
|
"""
|
||||||
|
|
||||||
|
trace_id: str
|
||||||
|
root_span_id: str
|
||||||
|
start_time: datetime
|
||||||
|
end_time: datetime | None = None
|
||||||
|
|
||||||
|
|
||||||
|
@json_schema_type
|
||||||
|
class EventType(Enum):
|
||||||
|
"""The type of telemetry event being logged.
|
||||||
|
:cvar UNSTRUCTURED_LOG: A simple log message with severity level
|
||||||
|
:cvar STRUCTURED_LOG: A structured log event with typed payload data
|
||||||
|
:cvar METRIC: A metric measurement with value and unit
|
||||||
|
"""
|
||||||
|
|
||||||
|
UNSTRUCTURED_LOG = "unstructured_log"
|
||||||
|
STRUCTURED_LOG = "structured_log"
|
||||||
|
METRIC = "metric"
|
||||||
|
|
||||||
|
|
||||||
|
@json_schema_type
|
||||||
|
class LogSeverity(Enum):
|
||||||
|
"""The severity level of a log message.
|
||||||
|
:cvar VERBOSE: Detailed diagnostic information for troubleshooting
|
||||||
|
:cvar DEBUG: Debug information useful during development
|
||||||
|
:cvar INFO: General informational messages about normal operation
|
||||||
|
:cvar WARN: Warning messages about potentially problematic situations
|
||||||
|
:cvar ERROR: Error messages indicating failures that don't stop execution
|
||||||
|
:cvar CRITICAL: Critical error messages indicating severe failures
|
||||||
|
"""
|
||||||
|
|
||||||
|
VERBOSE = "verbose"
|
||||||
|
DEBUG = "debug"
|
||||||
|
INFO = "info"
|
||||||
|
WARN = "warn"
|
||||||
|
ERROR = "error"
|
||||||
|
CRITICAL = "critical"
|
||||||
|
|
||||||
|
|
||||||
|
class EventCommon(BaseModel):
|
||||||
|
"""Common fields shared by all telemetry events.
|
||||||
|
:param trace_id: Unique identifier for the trace this event belongs to
|
||||||
|
:param span_id: Unique identifier for the span this event belongs to
|
||||||
|
:param timestamp: Timestamp when the event occurred
|
||||||
|
:param attributes: (Optional) Key-value pairs containing additional metadata about the event
|
||||||
|
"""
|
||||||
|
|
||||||
|
trace_id: str
|
||||||
|
span_id: str
|
||||||
|
timestamp: datetime
|
||||||
|
attributes: dict[str, Primitive] | None = Field(default_factory=lambda: {})
|
||||||
|
|
||||||
|
|
||||||
|
@json_schema_type
|
||||||
|
class UnstructuredLogEvent(EventCommon):
|
||||||
|
"""An unstructured log event containing a simple text message.
|
||||||
|
:param type: Event type identifier set to UNSTRUCTURED_LOG
|
||||||
|
:param message: The log message text
|
||||||
|
:param severity: The severity level of the log message
|
||||||
|
"""
|
||||||
|
|
||||||
|
type: Literal[EventType.UNSTRUCTURED_LOG] = EventType.UNSTRUCTURED_LOG
|
||||||
|
message: str
|
||||||
|
severity: LogSeverity
|
||||||
|
|
||||||
|
|
||||||
|
@json_schema_type
|
||||||
|
class MetricEvent(EventCommon):
|
||||||
|
"""A metric event containing a measured value.
|
||||||
|
:param type: Event type identifier set to METRIC
|
||||||
|
:param metric: The name of the metric being measured
|
||||||
|
:param value: The numeric value of the metric measurement
|
||||||
|
:param unit: The unit of measurement for the metric value
|
||||||
|
"""
|
||||||
|
|
||||||
|
type: Literal[EventType.METRIC] = EventType.METRIC
|
||||||
|
metric: str # this would be an enum
|
||||||
|
value: int | float
|
||||||
|
unit: str
|
||||||
|
|
||||||
|
|
||||||
|
@json_schema_type
|
||||||
|
class MetricInResponse(BaseModel):
|
||||||
|
"""A metric value included in API responses.
|
||||||
|
:param metric: The name of the metric
|
||||||
|
:param value: The numeric value of the metric
|
||||||
|
:param unit: (Optional) The unit of measurement for the metric value
|
||||||
|
"""
|
||||||
|
|
||||||
|
metric: str
|
||||||
|
value: int | float
|
||||||
|
unit: str | None = None
|
||||||
|
|
||||||
|
|
||||||
|
# This is a short term solution to allow inference API to return metrics
|
||||||
|
# The ideal way to do this is to have a way for all response types to include metrics
|
||||||
|
# and all metric events logged to the telemetry API to be included with the response
|
||||||
|
# To do this, we will need to augment all response types with a metrics field.
|
||||||
|
# We have hit a blocker from stainless SDK that prevents us from doing this.
|
||||||
|
# The blocker is that if we were to augment the response types that have a data field
|
||||||
|
# in them like so
|
||||||
|
# class ListModelsResponse(BaseModel):
|
||||||
|
# metrics: Optional[List[MetricEvent]] = None
|
||||||
|
# data: List[Models]
|
||||||
|
# ...
|
||||||
|
# The client SDK will need to access the data by using a .data field, which is not
|
||||||
|
# ergonomic. Stainless SDK does support unwrapping the response type, but it
|
||||||
|
# requires that the response type to only have a single field.
|
||||||
|
|
||||||
|
# We will need a way in the client SDK to signal that the metrics are needed
|
||||||
|
# and if they are needed, the client SDK has to return the full response type
|
||||||
|
# without unwrapping it.
|
||||||
|
|
||||||
|
|
||||||
|
class MetricResponseMixin(BaseModel):
|
||||||
|
"""Mixin class for API responses that can include metrics.
|
||||||
|
:param metrics: (Optional) List of metrics associated with the API response
|
||||||
|
"""
|
||||||
|
|
||||||
|
metrics: list[MetricInResponse] | None = None
|
||||||
|
|
||||||
|
|
||||||
|
@json_schema_type
|
||||||
|
class StructuredLogType(Enum):
|
||||||
|
"""The type of structured log event payload.
|
||||||
|
:cvar SPAN_START: Event indicating the start of a new span
|
||||||
|
:cvar SPAN_END: Event indicating the completion of a span
|
||||||
|
"""
|
||||||
|
|
||||||
|
SPAN_START = "span_start"
|
||||||
|
SPAN_END = "span_end"
|
||||||
|
|
||||||
|
|
||||||
|
@json_schema_type
|
||||||
|
class SpanStartPayload(BaseModel):
|
||||||
|
"""Payload for a span start event.
|
||||||
|
:param type: Payload type identifier set to SPAN_START
|
||||||
|
:param name: Human-readable name describing the operation this span represents
|
||||||
|
:param parent_span_id: (Optional) Unique identifier for the parent span, if this is a child span
|
||||||
|
"""
|
||||||
|
|
||||||
|
type: Literal[StructuredLogType.SPAN_START] = StructuredLogType.SPAN_START
|
||||||
|
name: str
|
||||||
|
parent_span_id: str | None = None
|
||||||
|
|
||||||
|
|
||||||
|
@json_schema_type
|
||||||
|
class SpanEndPayload(BaseModel):
|
||||||
|
"""Payload for a span end event.
|
||||||
|
:param type: Payload type identifier set to SPAN_END
|
||||||
|
:param status: The final status of the span indicating success or failure
|
||||||
|
"""
|
||||||
|
|
||||||
|
type: Literal[StructuredLogType.SPAN_END] = StructuredLogType.SPAN_END
|
||||||
|
status: SpanStatus
|
||||||
|
|
||||||
|
|
||||||
|
StructuredLogPayload = Annotated[
|
||||||
|
SpanStartPayload | SpanEndPayload,
|
||||||
|
Field(discriminator="type"),
|
||||||
|
]
|
||||||
|
register_schema(StructuredLogPayload, name="StructuredLogPayload")
|
||||||
|
|
||||||
|
|
||||||
|
@json_schema_type
|
||||||
|
class StructuredLogEvent(EventCommon):
|
||||||
|
"""A structured log event containing typed payload data.
|
||||||
|
:param type: Event type identifier set to STRUCTURED_LOG
|
||||||
|
:param payload: The structured payload data for the log event
|
||||||
|
"""
|
||||||
|
|
||||||
|
type: Literal[EventType.STRUCTURED_LOG] = EventType.STRUCTURED_LOG
|
||||||
|
payload: StructuredLogPayload
|
||||||
|
|
||||||
|
|
||||||
|
Event = Annotated[
|
||||||
|
UnstructuredLogEvent | MetricEvent | StructuredLogEvent,
|
||||||
|
Field(discriminator="type"),
|
||||||
|
]
|
||||||
|
register_schema(Event, name="Event")
|
||||||
|
|
||||||
|
|
||||||
|
@json_schema_type
|
||||||
|
class EvalTrace(BaseModel):
|
||||||
|
"""A trace record for evaluation purposes.
|
||||||
|
:param session_id: Unique identifier for the evaluation session
|
||||||
|
:param step: The evaluation step or phase identifier
|
||||||
|
:param input: The input data for the evaluation
|
||||||
|
:param output: The actual output produced during evaluation
|
||||||
|
:param expected_output: The expected output for comparison during evaluation
|
||||||
|
"""
|
||||||
|
|
||||||
|
session_id: str
|
||||||
|
step: str
|
||||||
|
input: str
|
||||||
|
output: str
|
||||||
|
expected_output: str
|
||||||
|
|
||||||
|
|
||||||
|
@json_schema_type
|
||||||
|
class SpanWithStatus(Span):
|
||||||
|
"""A span that includes status information.
|
||||||
|
:param status: (Optional) The current status of the span
|
||||||
|
"""
|
||||||
|
|
||||||
|
status: SpanStatus | None = None
|
||||||
|
|
||||||
|
|
||||||
|
@json_schema_type
|
||||||
|
class QueryConditionOp(Enum):
|
||||||
|
"""Comparison operators for query conditions.
|
||||||
|
:cvar EQ: Equal to comparison
|
||||||
|
:cvar NE: Not equal to comparison
|
||||||
|
:cvar GT: Greater than comparison
|
||||||
|
:cvar LT: Less than comparison
|
||||||
|
"""
|
||||||
|
|
||||||
|
EQ = "eq"
|
||||||
|
NE = "ne"
|
||||||
|
GT = "gt"
|
||||||
|
LT = "lt"
|
||||||
|
|
||||||
|
|
||||||
|
@json_schema_type
|
||||||
|
class QueryCondition(BaseModel):
|
||||||
|
"""A condition for filtering query results.
|
||||||
|
:param key: The attribute key to filter on
|
||||||
|
:param op: The comparison operator to apply
|
||||||
|
:param value: The value to compare against
|
||||||
|
"""
|
||||||
|
|
||||||
|
key: str
|
||||||
|
op: QueryConditionOp
|
||||||
|
value: Any
|
||||||
|
|
||||||
|
|
||||||
|
class QueryTracesResponse(BaseModel):
|
||||||
|
"""Response containing a list of traces.
|
||||||
|
:param data: List of traces matching the query criteria
|
||||||
|
"""
|
||||||
|
|
||||||
|
data: list[Trace]
|
||||||
|
|
||||||
|
|
||||||
|
class QuerySpansResponse(BaseModel):
|
||||||
|
"""Response containing a list of spans.
|
||||||
|
:param data: List of spans matching the query criteria
|
||||||
|
"""
|
||||||
|
|
||||||
|
data: list[Span]
|
||||||
|
|
||||||
|
|
||||||
|
class QuerySpanTreeResponse(BaseModel):
|
||||||
|
"""Response containing a tree structure of spans.
|
||||||
|
:param data: Dictionary mapping span IDs to spans with status information
|
||||||
|
"""
|
||||||
|
|
||||||
|
data: dict[str, SpanWithStatus]
|
||||||
|
|
||||||
|
|
||||||
|
class MetricQueryType(Enum):
|
||||||
|
"""The type of metric query to perform.
|
||||||
|
:cvar RANGE: Query metrics over a time range
|
||||||
|
:cvar INSTANT: Query metrics at a specific point in time
|
||||||
|
"""
|
||||||
|
|
||||||
|
RANGE = "range"
|
||||||
|
INSTANT = "instant"
|
||||||
|
|
||||||
|
|
||||||
|
class MetricLabelOperator(Enum):
|
||||||
|
"""Operators for matching metric labels.
|
||||||
|
:cvar EQUALS: Label value must equal the specified value
|
||||||
|
:cvar NOT_EQUALS: Label value must not equal the specified value
|
||||||
|
:cvar REGEX_MATCH: Label value must match the specified regular expression
|
||||||
|
:cvar REGEX_NOT_MATCH: Label value must not match the specified regular expression
|
||||||
|
"""
|
||||||
|
|
||||||
|
EQUALS = "="
|
||||||
|
NOT_EQUALS = "!="
|
||||||
|
REGEX_MATCH = "=~"
|
||||||
|
REGEX_NOT_MATCH = "!~"
|
||||||
|
|
||||||
|
|
||||||
|
class MetricLabelMatcher(BaseModel):
|
||||||
|
"""A matcher for filtering metrics by label values.
|
||||||
|
:param name: The name of the label to match
|
||||||
|
:param value: The value to match against
|
||||||
|
:param operator: The comparison operator to use for matching
|
||||||
|
"""
|
||||||
|
|
||||||
|
name: str
|
||||||
|
value: str
|
||||||
|
operator: MetricLabelOperator = MetricLabelOperator.EQUALS
|
||||||
|
|
||||||
|
|
||||||
|
@json_schema_type
|
||||||
|
class MetricLabel(BaseModel):
|
||||||
|
"""A label associated with a metric.
|
||||||
|
:param name: The name of the label
|
||||||
|
:param value: The value of the label
|
||||||
|
"""
|
||||||
|
|
||||||
|
name: str
|
||||||
|
value: str
|
||||||
|
|
||||||
|
|
||||||
|
@json_schema_type
|
||||||
|
class MetricDataPoint(BaseModel):
|
||||||
|
"""A single data point in a metric time series.
|
||||||
|
:param timestamp: Unix timestamp when the metric value was recorded
|
||||||
|
:param value: The numeric value of the metric at this timestamp
|
||||||
|
"""
|
||||||
|
|
||||||
|
timestamp: int
|
||||||
|
value: float
|
||||||
|
unit: str
|
||||||
|
|
||||||
|
|
||||||
|
@json_schema_type
|
||||||
|
class MetricSeries(BaseModel):
|
||||||
|
"""A time series of metric data points.
|
||||||
|
:param metric: The name of the metric
|
||||||
|
:param labels: List of labels associated with this metric series
|
||||||
|
:param values: List of data points in chronological order
|
||||||
|
"""
|
||||||
|
|
||||||
|
metric: str
|
||||||
|
labels: list[MetricLabel]
|
||||||
|
values: list[MetricDataPoint]
|
||||||
|
|
||||||
|
|
||||||
|
class QueryMetricsResponse(BaseModel):
|
||||||
|
"""Response containing metric time series data.
|
||||||
|
:param data: List of metric series matching the query criteria
|
||||||
|
"""
|
||||||
|
|
||||||
|
data: list[MetricSeries]
|
||||||
|
|
||||||
|
|
||||||
_GLOBAL_STORAGE: dict[str, dict[str | int, Any]] = {
|
_GLOBAL_STORAGE: dict[str, dict[str | int, Any]] = {
|
||||||
"active_spans": {},
|
"active_spans": {},
|
||||||
|
|
@ -49,7 +433,7 @@ def is_tracing_enabled(tracer):
|
||||||
return span.is_recording()
|
return span.is_recording()
|
||||||
|
|
||||||
|
|
||||||
class Telemetry(TelemetryBase):
|
class Telemetry:
|
||||||
def __init__(self) -> None:
|
def __init__(self) -> None:
|
||||||
self.meter = None
|
self.meter = None
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -17,7 +17,8 @@ from datetime import UTC, datetime
|
||||||
from functools import wraps
|
from functools import wraps
|
||||||
from typing import Any, Self
|
from typing import Any, Self
|
||||||
|
|
||||||
from llama_stack.apis.telemetry import (
|
from llama_stack.core.telemetry.telemetry import (
|
||||||
|
ROOT_SPAN_MARKERS,
|
||||||
Event,
|
Event,
|
||||||
LogSeverity,
|
LogSeverity,
|
||||||
Span,
|
Span,
|
||||||
|
|
@ -47,7 +48,6 @@ if not _fallback_logger.handlers:
|
||||||
INVALID_SPAN_ID = 0x0000000000000000
|
INVALID_SPAN_ID = 0x0000000000000000
|
||||||
INVALID_TRACE_ID = 0x00000000000000000000000000000000
|
INVALID_TRACE_ID = 0x00000000000000000000000000000000
|
||||||
|
|
||||||
ROOT_SPAN_MARKERS = ["__root__", "__root_span__"]
|
|
||||||
# The logical root span may not be visible to this process if a parent context
|
# The logical root span may not be visible to this process if a parent context
|
||||||
# is passed in. The local root span is the first local span in a trace.
|
# is passed in. The local root span is the first local span in a trace.
|
||||||
LOCAL_ROOT_SPAN_MARKER = "__local_root_span__"
|
LOCAL_ROOT_SPAN_MARKER = "__local_root_span__"
|
||||||
|
|
|
||||||
|
|
@ -488,13 +488,13 @@ class ChatAgent(ShieldRunnerMixin):
|
||||||
|
|
||||||
session_info = await self.storage.get_session_info(session_id)
|
session_info = await self.storage.get_session_info(session_id)
|
||||||
# if the session has a memory bank id, let the memory tool use it
|
# if the session has a memory bank id, let the memory tool use it
|
||||||
if session_info and session_info.vector_db_id:
|
if session_info and session_info.vector_store_id:
|
||||||
for tool_name in self.tool_name_to_args.keys():
|
for tool_name in self.tool_name_to_args.keys():
|
||||||
if tool_name == MEMORY_QUERY_TOOL:
|
if tool_name == MEMORY_QUERY_TOOL:
|
||||||
if "vector_db_ids" not in self.tool_name_to_args[tool_name]:
|
if "vector_store_ids" not in self.tool_name_to_args[tool_name]:
|
||||||
self.tool_name_to_args[tool_name]["vector_db_ids"] = [session_info.vector_db_id]
|
self.tool_name_to_args[tool_name]["vector_store_ids"] = [session_info.vector_store_id]
|
||||||
else:
|
else:
|
||||||
self.tool_name_to_args[tool_name]["vector_db_ids"].append(session_info.vector_db_id)
|
self.tool_name_to_args[tool_name]["vector_store_ids"].append(session_info.vector_store_id)
|
||||||
|
|
||||||
output_attachments = []
|
output_attachments = []
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -22,7 +22,7 @@ log = get_logger(name=__name__, category="agents::meta_reference")
|
||||||
|
|
||||||
class AgentSessionInfo(Session):
|
class AgentSessionInfo(Session):
|
||||||
# TODO: is this used anywhere?
|
# TODO: is this used anywhere?
|
||||||
vector_db_id: str | None = None
|
vector_store_id: str | None = None
|
||||||
started_at: datetime
|
started_at: datetime
|
||||||
owner: User | None = None
|
owner: User | None = None
|
||||||
identifier: str | None = None
|
identifier: str | None = None
|
||||||
|
|
@ -93,12 +93,12 @@ class AgentPersistence:
|
||||||
|
|
||||||
return session_info
|
return session_info
|
||||||
|
|
||||||
async def add_vector_db_to_session(self, session_id: str, vector_db_id: str):
|
async def add_vector_db_to_session(self, session_id: str, vector_store_id: str):
|
||||||
session_info = await self.get_session_if_accessible(session_id)
|
session_info = await self.get_session_if_accessible(session_id)
|
||||||
if session_info is None:
|
if session_info is None:
|
||||||
raise SessionNotFoundError(session_id)
|
raise SessionNotFoundError(session_id)
|
||||||
|
|
||||||
session_info.vector_db_id = vector_db_id
|
session_info.vector_store_id = vector_store_id
|
||||||
await self.kvstore.set(
|
await self.kvstore.set(
|
||||||
key=f"session:{self.agent_id}:{session_id}",
|
key=f"session:{self.agent_id}:{session_id}",
|
||||||
value=session_info.model_dump_json(),
|
value=session_info.model_dump_json(),
|
||||||
|
|
|
||||||
|
|
@ -119,7 +119,7 @@ class MemoryToolRuntimeImpl(ToolGroupsProtocolPrivate, ToolRuntime, RAGToolRunti
|
||||||
async def insert(
|
async def insert(
|
||||||
self,
|
self,
|
||||||
documents: list[RAGDocument],
|
documents: list[RAGDocument],
|
||||||
vector_db_id: str,
|
vector_store_id: str,
|
||||||
chunk_size_in_tokens: int = 512,
|
chunk_size_in_tokens: int = 512,
|
||||||
) -> None:
|
) -> None:
|
||||||
if not documents:
|
if not documents:
|
||||||
|
|
@ -158,14 +158,14 @@ class MemoryToolRuntimeImpl(ToolGroupsProtocolPrivate, ToolRuntime, RAGToolRunti
|
||||||
|
|
||||||
try:
|
try:
|
||||||
await self.vector_io_api.openai_attach_file_to_vector_store(
|
await self.vector_io_api.openai_attach_file_to_vector_store(
|
||||||
vector_store_id=vector_db_id,
|
vector_store_id=vector_store_id,
|
||||||
file_id=created_file.id,
|
file_id=created_file.id,
|
||||||
attributes=doc.metadata,
|
attributes=doc.metadata,
|
||||||
chunking_strategy=chunking_strategy,
|
chunking_strategy=chunking_strategy,
|
||||||
)
|
)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
log.error(
|
log.error(
|
||||||
f"Failed to attach file {created_file.id} to vector store {vector_db_id} for document {doc.document_id}: {e}"
|
f"Failed to attach file {created_file.id} to vector store {vector_store_id} for document {doc.document_id}: {e}"
|
||||||
)
|
)
|
||||||
continue
|
continue
|
||||||
|
|
||||||
|
|
@ -176,10 +176,10 @@ class MemoryToolRuntimeImpl(ToolGroupsProtocolPrivate, ToolRuntime, RAGToolRunti
|
||||||
async def query(
|
async def query(
|
||||||
self,
|
self,
|
||||||
content: InterleavedContent,
|
content: InterleavedContent,
|
||||||
vector_db_ids: list[str],
|
vector_store_ids: list[str],
|
||||||
query_config: RAGQueryConfig | None = None,
|
query_config: RAGQueryConfig | None = None,
|
||||||
) -> RAGQueryResult:
|
) -> RAGQueryResult:
|
||||||
if not vector_db_ids:
|
if not vector_store_ids:
|
||||||
raise ValueError(
|
raise ValueError(
|
||||||
"No vector DBs were provided to the knowledge search tool. Please provide at least one vector DB ID."
|
"No vector DBs were provided to the knowledge search tool. Please provide at least one vector DB ID."
|
||||||
)
|
)
|
||||||
|
|
@ -192,7 +192,7 @@ class MemoryToolRuntimeImpl(ToolGroupsProtocolPrivate, ToolRuntime, RAGToolRunti
|
||||||
)
|
)
|
||||||
tasks = [
|
tasks = [
|
||||||
self.vector_io_api.query_chunks(
|
self.vector_io_api.query_chunks(
|
||||||
vector_db_id=vector_db_id,
|
vector_store_id=vector_store_id,
|
||||||
query=query,
|
query=query,
|
||||||
params={
|
params={
|
||||||
"mode": query_config.mode,
|
"mode": query_config.mode,
|
||||||
|
|
@ -201,18 +201,18 @@ class MemoryToolRuntimeImpl(ToolGroupsProtocolPrivate, ToolRuntime, RAGToolRunti
|
||||||
"ranker": query_config.ranker,
|
"ranker": query_config.ranker,
|
||||||
},
|
},
|
||||||
)
|
)
|
||||||
for vector_db_id in vector_db_ids
|
for vector_store_id in vector_store_ids
|
||||||
]
|
]
|
||||||
results: list[QueryChunksResponse] = await asyncio.gather(*tasks)
|
results: list[QueryChunksResponse] = await asyncio.gather(*tasks)
|
||||||
|
|
||||||
chunks = []
|
chunks = []
|
||||||
scores = []
|
scores = []
|
||||||
|
|
||||||
for vector_db_id, result in zip(vector_db_ids, results, strict=False):
|
for vector_store_id, result in zip(vector_store_ids, results, strict=False):
|
||||||
for chunk, score in zip(result.chunks, result.scores, strict=False):
|
for chunk, score in zip(result.chunks, result.scores, strict=False):
|
||||||
if not hasattr(chunk, "metadata") or chunk.metadata is None:
|
if not hasattr(chunk, "metadata") or chunk.metadata is None:
|
||||||
chunk.metadata = {}
|
chunk.metadata = {}
|
||||||
chunk.metadata["vector_db_id"] = vector_db_id
|
chunk.metadata["vector_store_id"] = vector_store_id
|
||||||
|
|
||||||
chunks.append(chunk)
|
chunks.append(chunk)
|
||||||
scores.append(score)
|
scores.append(score)
|
||||||
|
|
@ -250,7 +250,7 @@ class MemoryToolRuntimeImpl(ToolGroupsProtocolPrivate, ToolRuntime, RAGToolRunti
|
||||||
metadata_keys_to_exclude_from_context = [
|
metadata_keys_to_exclude_from_context = [
|
||||||
"token_count",
|
"token_count",
|
||||||
"metadata_token_count",
|
"metadata_token_count",
|
||||||
"vector_db_id",
|
"vector_store_id",
|
||||||
]
|
]
|
||||||
metadata_for_context = {}
|
metadata_for_context = {}
|
||||||
for k in chunk_metadata_keys_to_include_from_context:
|
for k in chunk_metadata_keys_to_include_from_context:
|
||||||
|
|
@ -275,7 +275,7 @@ class MemoryToolRuntimeImpl(ToolGroupsProtocolPrivate, ToolRuntime, RAGToolRunti
|
||||||
"document_ids": [c.document_id for c in chunks[: len(picked)]],
|
"document_ids": [c.document_id for c in chunks[: len(picked)]],
|
||||||
"chunks": [c.content for c in chunks[: len(picked)]],
|
"chunks": [c.content for c in chunks[: len(picked)]],
|
||||||
"scores": scores[: len(picked)],
|
"scores": scores[: len(picked)],
|
||||||
"vector_db_ids": [c.metadata["vector_db_id"] for c in chunks[: len(picked)]],
|
"vector_store_ids": [c.metadata["vector_store_id"] for c in chunks[: len(picked)]],
|
||||||
},
|
},
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
@ -309,7 +309,7 @@ class MemoryToolRuntimeImpl(ToolGroupsProtocolPrivate, ToolRuntime, RAGToolRunti
|
||||||
)
|
)
|
||||||
|
|
||||||
async def invoke_tool(self, tool_name: str, kwargs: dict[str, Any]) -> ToolInvocationResult:
|
async def invoke_tool(self, tool_name: str, kwargs: dict[str, Any]) -> ToolInvocationResult:
|
||||||
vector_db_ids = kwargs.get("vector_db_ids", [])
|
vector_store_ids = kwargs.get("vector_store_ids", [])
|
||||||
query_config = kwargs.get("query_config")
|
query_config = kwargs.get("query_config")
|
||||||
if query_config:
|
if query_config:
|
||||||
query_config = TypeAdapter(RAGQueryConfig).validate_python(query_config)
|
query_config = TypeAdapter(RAGQueryConfig).validate_python(query_config)
|
||||||
|
|
@ -319,7 +319,7 @@ class MemoryToolRuntimeImpl(ToolGroupsProtocolPrivate, ToolRuntime, RAGToolRunti
|
||||||
query = kwargs["query"]
|
query = kwargs["query"]
|
||||||
result = await self.query(
|
result = await self.query(
|
||||||
content=query,
|
content=query,
|
||||||
vector_db_ids=vector_db_ids,
|
vector_store_ids=vector_store_ids,
|
||||||
query_config=query_config,
|
query_config=query_config,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -248,19 +248,19 @@ class FaissVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorStoresProtoco
|
||||||
del self.cache[vector_store_id]
|
del self.cache[vector_store_id]
|
||||||
await self.kvstore.delete(f"{VECTOR_DBS_PREFIX}{vector_store_id}")
|
await self.kvstore.delete(f"{VECTOR_DBS_PREFIX}{vector_store_id}")
|
||||||
|
|
||||||
async def insert_chunks(self, vector_db_id: str, chunks: list[Chunk], ttl_seconds: int | None = None) -> None:
|
async def insert_chunks(self, vector_store_id: str, chunks: list[Chunk], ttl_seconds: int | None = None) -> None:
|
||||||
index = self.cache.get(vector_db_id)
|
index = self.cache.get(vector_store_id)
|
||||||
if index is None:
|
if index is None:
|
||||||
raise ValueError(f"Vector DB {vector_db_id} not found. found: {self.cache.keys()}")
|
raise ValueError(f"Vector DB {vector_store_id} not found. found: {self.cache.keys()}")
|
||||||
|
|
||||||
await index.insert_chunks(chunks)
|
await index.insert_chunks(chunks)
|
||||||
|
|
||||||
async def query_chunks(
|
async def query_chunks(
|
||||||
self, vector_db_id: str, query: InterleavedContent, params: dict[str, Any] | None = None
|
self, vector_store_id: str, query: InterleavedContent, params: dict[str, Any] | None = None
|
||||||
) -> QueryChunksResponse:
|
) -> QueryChunksResponse:
|
||||||
index = self.cache.get(vector_db_id)
|
index = self.cache.get(vector_store_id)
|
||||||
if index is None:
|
if index is None:
|
||||||
raise VectorStoreNotFoundError(vector_db_id)
|
raise VectorStoreNotFoundError(vector_store_id)
|
||||||
|
|
||||||
return await index.query_chunks(query, params)
|
return await index.query_chunks(query, params)
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -447,20 +447,20 @@ class SQLiteVecVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorStoresPro
|
||||||
await self.cache[vector_store_id].index.delete()
|
await self.cache[vector_store_id].index.delete()
|
||||||
del self.cache[vector_store_id]
|
del self.cache[vector_store_id]
|
||||||
|
|
||||||
async def insert_chunks(self, vector_db_id: str, chunks: list[Chunk], ttl_seconds: int | None = None) -> None:
|
async def insert_chunks(self, vector_store_id: str, chunks: list[Chunk], ttl_seconds: int | None = None) -> None:
|
||||||
index = await self._get_and_cache_vector_store_index(vector_db_id)
|
index = await self._get_and_cache_vector_store_index(vector_store_id)
|
||||||
if not index:
|
if not index:
|
||||||
raise VectorStoreNotFoundError(vector_db_id)
|
raise VectorStoreNotFoundError(vector_store_id)
|
||||||
# The VectorStoreWithIndex helper is expected to compute embeddings via the inference_api
|
# The VectorStoreWithIndex helper is expected to compute embeddings via the inference_api
|
||||||
# and then call our index's add_chunks.
|
# and then call our index's add_chunks.
|
||||||
await index.insert_chunks(chunks)
|
await index.insert_chunks(chunks)
|
||||||
|
|
||||||
async def query_chunks(
|
async def query_chunks(
|
||||||
self, vector_db_id: str, query: Any, params: dict[str, Any] | None = None
|
self, vector_store_id: str, query: Any, params: dict[str, Any] | None = None
|
||||||
) -> QueryChunksResponse:
|
) -> QueryChunksResponse:
|
||||||
index = await self._get_and_cache_vector_store_index(vector_db_id)
|
index = await self._get_and_cache_vector_store_index(vector_store_id)
|
||||||
if not index:
|
if not index:
|
||||||
raise VectorStoreNotFoundError(vector_db_id)
|
raise VectorStoreNotFoundError(vector_store_id)
|
||||||
return await index.query_chunks(query, params)
|
return await index.query_chunks(query, params)
|
||||||
|
|
||||||
async def delete_chunks(self, store_id: str, chunks_for_deletion: list[ChunkForDeletion]) -> None:
|
async def delete_chunks(self, store_id: str, chunks_for_deletion: list[ChunkForDeletion]) -> None:
|
||||||
|
|
|
||||||
|
|
@ -163,14 +163,14 @@ The SQLite-vec provider supports three search modes:
|
||||||
Example with hybrid search:
|
Example with hybrid search:
|
||||||
```python
|
```python
|
||||||
response = await vector_io.query_chunks(
|
response = await vector_io.query_chunks(
|
||||||
vector_db_id="my_db",
|
vector_store_id="my_db",
|
||||||
query="your query here",
|
query="your query here",
|
||||||
params={"mode": "hybrid", "max_chunks": 3, "score_threshold": 0.7},
|
params={"mode": "hybrid", "max_chunks": 3, "score_threshold": 0.7},
|
||||||
)
|
)
|
||||||
|
|
||||||
# Using RRF ranker
|
# Using RRF ranker
|
||||||
response = await vector_io.query_chunks(
|
response = await vector_io.query_chunks(
|
||||||
vector_db_id="my_db",
|
vector_store_id="my_db",
|
||||||
query="your query here",
|
query="your query here",
|
||||||
params={
|
params={
|
||||||
"mode": "hybrid",
|
"mode": "hybrid",
|
||||||
|
|
@ -182,7 +182,7 @@ response = await vector_io.query_chunks(
|
||||||
|
|
||||||
# Using weighted ranker
|
# Using weighted ranker
|
||||||
response = await vector_io.query_chunks(
|
response = await vector_io.query_chunks(
|
||||||
vector_db_id="my_db",
|
vector_store_id="my_db",
|
||||||
query="your query here",
|
query="your query here",
|
||||||
params={
|
params={
|
||||||
"mode": "hybrid",
|
"mode": "hybrid",
|
||||||
|
|
@ -196,7 +196,7 @@ response = await vector_io.query_chunks(
|
||||||
Example with explicit vector search:
|
Example with explicit vector search:
|
||||||
```python
|
```python
|
||||||
response = await vector_io.query_chunks(
|
response = await vector_io.query_chunks(
|
||||||
vector_db_id="my_db",
|
vector_store_id="my_db",
|
||||||
query="your query here",
|
query="your query here",
|
||||||
params={"mode": "vector", "max_chunks": 3, "score_threshold": 0.7},
|
params={"mode": "vector", "max_chunks": 3, "score_threshold": 0.7},
|
||||||
)
|
)
|
||||||
|
|
@ -205,7 +205,7 @@ response = await vector_io.query_chunks(
|
||||||
Example with keyword search:
|
Example with keyword search:
|
||||||
```python
|
```python
|
||||||
response = await vector_io.query_chunks(
|
response = await vector_io.query_chunks(
|
||||||
vector_db_id="my_db",
|
vector_store_id="my_db",
|
||||||
query="your query here",
|
query="your query here",
|
||||||
params={"mode": "keyword", "max_chunks": 3, "score_threshold": 0.7},
|
params={"mode": "keyword", "max_chunks": 3, "score_threshold": 0.7},
|
||||||
)
|
)
|
||||||
|
|
|
||||||
|
|
@ -169,20 +169,20 @@ class ChromaVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorStoresProtoc
|
||||||
await self.cache[vector_store_id].index.delete()
|
await self.cache[vector_store_id].index.delete()
|
||||||
del self.cache[vector_store_id]
|
del self.cache[vector_store_id]
|
||||||
|
|
||||||
async def insert_chunks(self, vector_db_id: str, chunks: list[Chunk], ttl_seconds: int | None = None) -> None:
|
async def insert_chunks(self, vector_store_id: str, chunks: list[Chunk], ttl_seconds: int | None = None) -> None:
|
||||||
index = await self._get_and_cache_vector_store_index(vector_db_id)
|
index = await self._get_and_cache_vector_store_index(vector_store_id)
|
||||||
if index is None:
|
if index is None:
|
||||||
raise ValueError(f"Vector DB {vector_db_id} not found in Chroma")
|
raise ValueError(f"Vector DB {vector_store_id} not found in Chroma")
|
||||||
|
|
||||||
await index.insert_chunks(chunks)
|
await index.insert_chunks(chunks)
|
||||||
|
|
||||||
async def query_chunks(
|
async def query_chunks(
|
||||||
self, vector_db_id: str, query: InterleavedContent, params: dict[str, Any] | None = None
|
self, vector_store_id: str, query: InterleavedContent, params: dict[str, Any] | None = None
|
||||||
) -> QueryChunksResponse:
|
) -> QueryChunksResponse:
|
||||||
index = await self._get_and_cache_vector_store_index(vector_db_id)
|
index = await self._get_and_cache_vector_store_index(vector_store_id)
|
||||||
|
|
||||||
if index is None:
|
if index is None:
|
||||||
raise ValueError(f"Vector DB {vector_db_id} not found in Chroma")
|
raise ValueError(f"Vector DB {vector_store_id} not found in Chroma")
|
||||||
|
|
||||||
return await index.query_chunks(query, params)
|
return await index.query_chunks(query, params)
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -348,19 +348,19 @@ class MilvusVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorStoresProtoc
|
||||||
await self.cache[vector_store_id].index.delete()
|
await self.cache[vector_store_id].index.delete()
|
||||||
del self.cache[vector_store_id]
|
del self.cache[vector_store_id]
|
||||||
|
|
||||||
async def insert_chunks(self, vector_db_id: str, chunks: list[Chunk], ttl_seconds: int | None = None) -> None:
|
async def insert_chunks(self, vector_store_id: str, chunks: list[Chunk], ttl_seconds: int | None = None) -> None:
|
||||||
index = await self._get_and_cache_vector_store_index(vector_db_id)
|
index = await self._get_and_cache_vector_store_index(vector_store_id)
|
||||||
if not index:
|
if not index:
|
||||||
raise VectorStoreNotFoundError(vector_db_id)
|
raise VectorStoreNotFoundError(vector_store_id)
|
||||||
|
|
||||||
await index.insert_chunks(chunks)
|
await index.insert_chunks(chunks)
|
||||||
|
|
||||||
async def query_chunks(
|
async def query_chunks(
|
||||||
self, vector_db_id: str, query: InterleavedContent, params: dict[str, Any] | None = None
|
self, vector_store_id: str, query: InterleavedContent, params: dict[str, Any] | None = None
|
||||||
) -> QueryChunksResponse:
|
) -> QueryChunksResponse:
|
||||||
index = await self._get_and_cache_vector_store_index(vector_db_id)
|
index = await self._get_and_cache_vector_store_index(vector_store_id)
|
||||||
if not index:
|
if not index:
|
||||||
raise VectorStoreNotFoundError(vector_db_id)
|
raise VectorStoreNotFoundError(vector_store_id)
|
||||||
return await index.query_chunks(query, params)
|
return await index.query_chunks(query, params)
|
||||||
|
|
||||||
async def delete_chunks(self, store_id: str, chunks_for_deletion: list[ChunkForDeletion]) -> None:
|
async def delete_chunks(self, store_id: str, chunks_for_deletion: list[ChunkForDeletion]) -> None:
|
||||||
|
|
|
||||||
|
|
@ -399,14 +399,14 @@ class PGVectorVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorStoresProt
|
||||||
assert self.kvstore is not None
|
assert self.kvstore is not None
|
||||||
await self.kvstore.delete(key=f"{VECTOR_DBS_PREFIX}{vector_store_id}")
|
await self.kvstore.delete(key=f"{VECTOR_DBS_PREFIX}{vector_store_id}")
|
||||||
|
|
||||||
async def insert_chunks(self, vector_db_id: str, chunks: list[Chunk], ttl_seconds: int | None = None) -> None:
|
async def insert_chunks(self, vector_store_id: str, chunks: list[Chunk], ttl_seconds: int | None = None) -> None:
|
||||||
index = await self._get_and_cache_vector_store_index(vector_db_id)
|
index = await self._get_and_cache_vector_store_index(vector_store_id)
|
||||||
await index.insert_chunks(chunks)
|
await index.insert_chunks(chunks)
|
||||||
|
|
||||||
async def query_chunks(
|
async def query_chunks(
|
||||||
self, vector_db_id: str, query: InterleavedContent, params: dict[str, Any] | None = None
|
self, vector_store_id: str, query: InterleavedContent, params: dict[str, Any] | None = None
|
||||||
) -> QueryChunksResponse:
|
) -> QueryChunksResponse:
|
||||||
index = await self._get_and_cache_vector_store_index(vector_db_id)
|
index = await self._get_and_cache_vector_store_index(vector_store_id)
|
||||||
return await index.query_chunks(query, params)
|
return await index.query_chunks(query, params)
|
||||||
|
|
||||||
async def _get_and_cache_vector_store_index(self, vector_store_id: str) -> VectorStoreWithIndex:
|
async def _get_and_cache_vector_store_index(self, vector_store_id: str) -> VectorStoreWithIndex:
|
||||||
|
|
|
||||||
|
|
@ -222,19 +222,19 @@ class QdrantVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorStoresProtoc
|
||||||
self.cache[vector_store_id] = index
|
self.cache[vector_store_id] = index
|
||||||
return index
|
return index
|
||||||
|
|
||||||
async def insert_chunks(self, vector_db_id: str, chunks: list[Chunk], ttl_seconds: int | None = None) -> None:
|
async def insert_chunks(self, vector_store_id: str, chunks: list[Chunk], ttl_seconds: int | None = None) -> None:
|
||||||
index = await self._get_and_cache_vector_store_index(vector_db_id)
|
index = await self._get_and_cache_vector_store_index(vector_store_id)
|
||||||
if not index:
|
if not index:
|
||||||
raise VectorStoreNotFoundError(vector_db_id)
|
raise VectorStoreNotFoundError(vector_store_id)
|
||||||
|
|
||||||
await index.insert_chunks(chunks)
|
await index.insert_chunks(chunks)
|
||||||
|
|
||||||
async def query_chunks(
|
async def query_chunks(
|
||||||
self, vector_db_id: str, query: InterleavedContent, params: dict[str, Any] | None = None
|
self, vector_store_id: str, query: InterleavedContent, params: dict[str, Any] | None = None
|
||||||
) -> QueryChunksResponse:
|
) -> QueryChunksResponse:
|
||||||
index = await self._get_and_cache_vector_store_index(vector_db_id)
|
index = await self._get_and_cache_vector_store_index(vector_store_id)
|
||||||
if not index:
|
if not index:
|
||||||
raise VectorStoreNotFoundError(vector_db_id)
|
raise VectorStoreNotFoundError(vector_store_id)
|
||||||
|
|
||||||
return await index.query_chunks(query, params)
|
return await index.query_chunks(query, params)
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -366,19 +366,19 @@ class WeaviateVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, NeedsRequestProv
|
||||||
self.cache[vector_store_id] = index
|
self.cache[vector_store_id] = index
|
||||||
return index
|
return index
|
||||||
|
|
||||||
async def insert_chunks(self, vector_db_id: str, chunks: list[Chunk], ttl_seconds: int | None = None) -> None:
|
async def insert_chunks(self, vector_store_id: str, chunks: list[Chunk], ttl_seconds: int | None = None) -> None:
|
||||||
index = await self._get_and_cache_vector_store_index(vector_db_id)
|
index = await self._get_and_cache_vector_store_index(vector_store_id)
|
||||||
if not index:
|
if not index:
|
||||||
raise VectorStoreNotFoundError(vector_db_id)
|
raise VectorStoreNotFoundError(vector_store_id)
|
||||||
|
|
||||||
await index.insert_chunks(chunks)
|
await index.insert_chunks(chunks)
|
||||||
|
|
||||||
async def query_chunks(
|
async def query_chunks(
|
||||||
self, vector_db_id: str, query: InterleavedContent, params: dict[str, Any] | None = None
|
self, vector_store_id: str, query: InterleavedContent, params: dict[str, Any] | None = None
|
||||||
) -> QueryChunksResponse:
|
) -> QueryChunksResponse:
|
||||||
index = await self._get_and_cache_vector_store_index(vector_db_id)
|
index = await self._get_and_cache_vector_store_index(vector_store_id)
|
||||||
if not index:
|
if not index:
|
||||||
raise VectorStoreNotFoundError(vector_db_id)
|
raise VectorStoreNotFoundError(vector_store_id)
|
||||||
|
|
||||||
return await index.query_chunks(query, params)
|
return await index.query_chunks(query, params)
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -333,7 +333,7 @@ class OpenAIVectorStoreMixin(ABC):
|
||||||
@abstractmethod
|
@abstractmethod
|
||||||
async def insert_chunks(
|
async def insert_chunks(
|
||||||
self,
|
self,
|
||||||
vector_db_id: str,
|
vector_store_id: str,
|
||||||
chunks: list[Chunk],
|
chunks: list[Chunk],
|
||||||
ttl_seconds: int | None = None,
|
ttl_seconds: int | None = None,
|
||||||
) -> None:
|
) -> None:
|
||||||
|
|
@ -342,7 +342,7 @@ class OpenAIVectorStoreMixin(ABC):
|
||||||
|
|
||||||
@abstractmethod
|
@abstractmethod
|
||||||
async def query_chunks(
|
async def query_chunks(
|
||||||
self, vector_db_id: str, query: Any, params: dict[str, Any] | None = None
|
self, vector_store_id: str, query: Any, params: dict[str, Any] | None = None
|
||||||
) -> QueryChunksResponse:
|
) -> QueryChunksResponse:
|
||||||
"""Query chunks from a vector database (provider-specific implementation)."""
|
"""Query chunks from a vector database (provider-specific implementation)."""
|
||||||
pass
|
pass
|
||||||
|
|
@ -609,7 +609,7 @@ class OpenAIVectorStoreMixin(ABC):
|
||||||
# TODO: Add support for ranking_options.ranker
|
# TODO: Add support for ranking_options.ranker
|
||||||
|
|
||||||
response = await self.query_chunks(
|
response = await self.query_chunks(
|
||||||
vector_db_id=vector_store_id,
|
vector_store_id=vector_store_id,
|
||||||
query=search_query,
|
query=search_query,
|
||||||
params=params,
|
params=params,
|
||||||
)
|
)
|
||||||
|
|
@ -803,7 +803,7 @@ class OpenAIVectorStoreMixin(ABC):
|
||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
await self.insert_chunks(
|
await self.insert_chunks(
|
||||||
vector_db_id=vector_store_id,
|
vector_store_id=vector_store_id,
|
||||||
chunks=chunks,
|
chunks=chunks,
|
||||||
)
|
)
|
||||||
vector_store_file_object.status = "completed"
|
vector_store_file_object.status = "completed"
|
||||||
|
|
|
||||||
|
|
@ -367,7 +367,7 @@ def test_openai_vector_store_with_chunks(
|
||||||
|
|
||||||
# Insert chunks using the native LlamaStack API (since OpenAI API doesn't have direct chunk insertion)
|
# Insert chunks using the native LlamaStack API (since OpenAI API doesn't have direct chunk insertion)
|
||||||
llama_client.vector_io.insert(
|
llama_client.vector_io.insert(
|
||||||
vector_db_id=vector_store.id,
|
vector_store_id=vector_store.id,
|
||||||
chunks=sample_chunks,
|
chunks=sample_chunks,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
@ -434,7 +434,7 @@ def test_openai_vector_store_search_relevance(
|
||||||
|
|
||||||
# Insert chunks using native API
|
# Insert chunks using native API
|
||||||
llama_client.vector_io.insert(
|
llama_client.vector_io.insert(
|
||||||
vector_db_id=vector_store.id,
|
vector_store_id=vector_store.id,
|
||||||
chunks=sample_chunks,
|
chunks=sample_chunks,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
@ -484,7 +484,7 @@ def test_openai_vector_store_search_with_ranking_options(
|
||||||
|
|
||||||
# Insert chunks
|
# Insert chunks
|
||||||
llama_client.vector_io.insert(
|
llama_client.vector_io.insert(
|
||||||
vector_db_id=vector_store.id,
|
vector_store_id=vector_store.id,
|
||||||
chunks=sample_chunks,
|
chunks=sample_chunks,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
@ -544,7 +544,7 @@ def test_openai_vector_store_search_with_high_score_filter(
|
||||||
|
|
||||||
# Insert chunks
|
# Insert chunks
|
||||||
llama_client.vector_io.insert(
|
llama_client.vector_io.insert(
|
||||||
vector_db_id=vector_store.id,
|
vector_store_id=vector_store.id,
|
||||||
chunks=sample_chunks,
|
chunks=sample_chunks,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
@ -610,7 +610,7 @@ def test_openai_vector_store_search_with_max_num_results(
|
||||||
|
|
||||||
# Insert chunks
|
# Insert chunks
|
||||||
llama_client.vector_io.insert(
|
llama_client.vector_io.insert(
|
||||||
vector_db_id=vector_store.id,
|
vector_store_id=vector_store.id,
|
||||||
chunks=sample_chunks,
|
chunks=sample_chunks,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
@ -1175,7 +1175,7 @@ def test_openai_vector_store_search_modes(
|
||||||
)
|
)
|
||||||
|
|
||||||
client_with_models.vector_io.insert(
|
client_with_models.vector_io.insert(
|
||||||
vector_db_id=vector_store.id,
|
vector_store_id=vector_store.id,
|
||||||
chunks=sample_chunks,
|
chunks=sample_chunks,
|
||||||
)
|
)
|
||||||
query = "Python programming language"
|
query = "Python programming language"
|
||||||
|
|
|
||||||
|
|
@ -123,12 +123,12 @@ def test_insert_chunks(
|
||||||
actual_vector_store_id = create_response.id
|
actual_vector_store_id = create_response.id
|
||||||
|
|
||||||
client_with_empty_registry.vector_io.insert(
|
client_with_empty_registry.vector_io.insert(
|
||||||
vector_db_id=actual_vector_store_id,
|
vector_store_id=actual_vector_store_id,
|
||||||
chunks=sample_chunks,
|
chunks=sample_chunks,
|
||||||
)
|
)
|
||||||
|
|
||||||
response = client_with_empty_registry.vector_io.query(
|
response = client_with_empty_registry.vector_io.query(
|
||||||
vector_db_id=actual_vector_store_id,
|
vector_store_id=actual_vector_store_id,
|
||||||
query="What is the capital of France?",
|
query="What is the capital of France?",
|
||||||
)
|
)
|
||||||
assert response is not None
|
assert response is not None
|
||||||
|
|
@ -137,7 +137,7 @@ def test_insert_chunks(
|
||||||
|
|
||||||
query, expected_doc_id = test_case
|
query, expected_doc_id = test_case
|
||||||
response = client_with_empty_registry.vector_io.query(
|
response = client_with_empty_registry.vector_io.query(
|
||||||
vector_db_id=actual_vector_store_id,
|
vector_store_id=actual_vector_store_id,
|
||||||
query=query,
|
query=query,
|
||||||
)
|
)
|
||||||
assert response is not None
|
assert response is not None
|
||||||
|
|
@ -174,13 +174,13 @@ def test_insert_chunks_with_precomputed_embeddings(
|
||||||
]
|
]
|
||||||
|
|
||||||
client_with_empty_registry.vector_io.insert(
|
client_with_empty_registry.vector_io.insert(
|
||||||
vector_db_id=actual_vector_store_id,
|
vector_store_id=actual_vector_store_id,
|
||||||
chunks=chunks_with_embeddings,
|
chunks=chunks_with_embeddings,
|
||||||
)
|
)
|
||||||
|
|
||||||
provider = [p.provider_id for p in client_with_empty_registry.providers.list() if p.api == "vector_io"][0]
|
provider = [p.provider_id for p in client_with_empty_registry.providers.list() if p.api == "vector_io"][0]
|
||||||
response = client_with_empty_registry.vector_io.query(
|
response = client_with_empty_registry.vector_io.query(
|
||||||
vector_db_id=actual_vector_store_id,
|
vector_store_id=actual_vector_store_id,
|
||||||
query="precomputed embedding test",
|
query="precomputed embedding test",
|
||||||
params=vector_io_provider_params_dict.get(provider, None),
|
params=vector_io_provider_params_dict.get(provider, None),
|
||||||
)
|
)
|
||||||
|
|
@ -224,13 +224,13 @@ def test_query_returns_valid_object_when_identical_to_embedding_in_vdb(
|
||||||
]
|
]
|
||||||
|
|
||||||
client_with_empty_registry.vector_io.insert(
|
client_with_empty_registry.vector_io.insert(
|
||||||
vector_db_id=actual_vector_store_id,
|
vector_store_id=actual_vector_store_id,
|
||||||
chunks=chunks_with_embeddings,
|
chunks=chunks_with_embeddings,
|
||||||
)
|
)
|
||||||
|
|
||||||
provider = [p.provider_id for p in client_with_empty_registry.providers.list() if p.api == "vector_io"][0]
|
provider = [p.provider_id for p in client_with_empty_registry.providers.list() if p.api == "vector_io"][0]
|
||||||
response = client_with_empty_registry.vector_io.query(
|
response = client_with_empty_registry.vector_io.query(
|
||||||
vector_db_id=actual_vector_store_id,
|
vector_store_id=actual_vector_store_id,
|
||||||
query="duplicate",
|
query="duplicate",
|
||||||
params=vector_io_provider_params_dict.get(provider, None),
|
params=vector_io_provider_params_dict.get(provider, None),
|
||||||
)
|
)
|
||||||
|
|
|
||||||
|
|
@ -23,14 +23,14 @@ class TestRagQuery:
|
||||||
config=MagicMock(), vector_io_api=MagicMock(), inference_api=MagicMock(), files_api=MagicMock()
|
config=MagicMock(), vector_io_api=MagicMock(), inference_api=MagicMock(), files_api=MagicMock()
|
||||||
)
|
)
|
||||||
with pytest.raises(ValueError):
|
with pytest.raises(ValueError):
|
||||||
await rag_tool.query(content=MagicMock(), vector_db_ids=[])
|
await rag_tool.query(content=MagicMock(), vector_store_ids=[])
|
||||||
|
|
||||||
async def test_query_chunk_metadata_handling(self):
|
async def test_query_chunk_metadata_handling(self):
|
||||||
rag_tool = MemoryToolRuntimeImpl(
|
rag_tool = MemoryToolRuntimeImpl(
|
||||||
config=MagicMock(), vector_io_api=MagicMock(), inference_api=MagicMock(), files_api=MagicMock()
|
config=MagicMock(), vector_io_api=MagicMock(), inference_api=MagicMock(), files_api=MagicMock()
|
||||||
)
|
)
|
||||||
content = "test query content"
|
content = "test query content"
|
||||||
vector_db_ids = ["db1"]
|
vector_store_ids = ["db1"]
|
||||||
|
|
||||||
chunk_metadata = ChunkMetadata(
|
chunk_metadata = ChunkMetadata(
|
||||||
document_id="doc1",
|
document_id="doc1",
|
||||||
|
|
@ -55,7 +55,7 @@ class TestRagQuery:
|
||||||
query_response = QueryChunksResponse(chunks=[chunk], scores=[1.0])
|
query_response = QueryChunksResponse(chunks=[chunk], scores=[1.0])
|
||||||
|
|
||||||
rag_tool.vector_io_api.query_chunks = AsyncMock(return_value=query_response)
|
rag_tool.vector_io_api.query_chunks = AsyncMock(return_value=query_response)
|
||||||
result = await rag_tool.query(content=content, vector_db_ids=vector_db_ids)
|
result = await rag_tool.query(content=content, vector_store_ids=vector_store_ids)
|
||||||
|
|
||||||
assert result is not None
|
assert result is not None
|
||||||
expected_metadata_string = (
|
expected_metadata_string = (
|
||||||
|
|
@ -90,7 +90,7 @@ class TestRagQuery:
|
||||||
files_api=MagicMock(),
|
files_api=MagicMock(),
|
||||||
)
|
)
|
||||||
|
|
||||||
vector_db_ids = ["db1", "db2"]
|
vector_store_ids = ["db1", "db2"]
|
||||||
|
|
||||||
# Fake chunks from each DB
|
# Fake chunks from each DB
|
||||||
chunk_metadata1 = ChunkMetadata(
|
chunk_metadata1 = ChunkMetadata(
|
||||||
|
|
@ -101,7 +101,7 @@ class TestRagQuery:
|
||||||
)
|
)
|
||||||
chunk1 = Chunk(
|
chunk1 = Chunk(
|
||||||
content="chunk from db1",
|
content="chunk from db1",
|
||||||
metadata={"vector_db_id": "db1", "document_id": "doc1"},
|
metadata={"vector_store_id": "db1", "document_id": "doc1"},
|
||||||
stored_chunk_id="c1",
|
stored_chunk_id="c1",
|
||||||
chunk_metadata=chunk_metadata1,
|
chunk_metadata=chunk_metadata1,
|
||||||
)
|
)
|
||||||
|
|
@ -114,7 +114,7 @@ class TestRagQuery:
|
||||||
)
|
)
|
||||||
chunk2 = Chunk(
|
chunk2 = Chunk(
|
||||||
content="chunk from db2",
|
content="chunk from db2",
|
||||||
metadata={"vector_db_id": "db2", "document_id": "doc2"},
|
metadata={"vector_store_id": "db2", "document_id": "doc2"},
|
||||||
stored_chunk_id="c2",
|
stored_chunk_id="c2",
|
||||||
chunk_metadata=chunk_metadata2,
|
chunk_metadata=chunk_metadata2,
|
||||||
)
|
)
|
||||||
|
|
@ -126,13 +126,13 @@ class TestRagQuery:
|
||||||
]
|
]
|
||||||
)
|
)
|
||||||
|
|
||||||
result = await rag_tool.query(content="test", vector_db_ids=vector_db_ids)
|
result = await rag_tool.query(content="test", vector_store_ids=vector_store_ids)
|
||||||
returned_chunks = result.metadata["chunks"]
|
returned_chunks = result.metadata["chunks"]
|
||||||
returned_scores = result.metadata["scores"]
|
returned_scores = result.metadata["scores"]
|
||||||
returned_doc_ids = result.metadata["document_ids"]
|
returned_doc_ids = result.metadata["document_ids"]
|
||||||
returned_vector_db_ids = result.metadata["vector_db_ids"]
|
returned_vector_store_ids = result.metadata["vector_store_ids"]
|
||||||
|
|
||||||
assert returned_chunks == ["chunk from db1", "chunk from db2"]
|
assert returned_chunks == ["chunk from db1", "chunk from db2"]
|
||||||
assert returned_scores == (0.9, 0.8)
|
assert returned_scores == (0.9, 0.8)
|
||||||
assert returned_doc_ids == ["doc1", "doc2"]
|
assert returned_doc_ids == ["doc1", "doc2"]
|
||||||
assert returned_vector_db_ids == ["db1", "db2"]
|
assert returned_vector_store_ids == ["db1", "db2"]
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue