Merge branch 'llamastack:main' into crewai

This commit is contained in:
Kai Wu 2025-09-02 09:35:26 -07:00 committed by GitHub
commit ad9b12a950
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
81 changed files with 12332 additions and 2029 deletions

View file

@ -4129,7 +4129,7 @@
"tags": [ "tags": [
"Files" "Files"
], ],
"description": "Upload a file that can be used across various endpoints.\nThe file upload should be a multipart form request with:\n- file: The File object (not file name) to be uploaded.\n- purpose: The intended purpose of the uploaded file.", "description": "Upload a file that can be used across various endpoints.\nThe file upload should be a multipart form request with:\n- file: The File object (not file name) to be uploaded.\n- purpose: The intended purpose of the uploaded file.\n- expires_after: Optional form values describing expiration for the file. Expected expires_after[anchor] = \"created_at\", expires_after[seconds] = <int>. Seconds must be between 3600 and 2592000 (1 hour to 30 days).",
"parameters": [], "parameters": [],
"requestBody": { "requestBody": {
"content": { "content": {
@ -4143,11 +4143,33 @@
}, },
"purpose": { "purpose": {
"$ref": "#/components/schemas/OpenAIFilePurpose" "$ref": "#/components/schemas/OpenAIFilePurpose"
},
"expires_after_anchor": {
"oneOf": [
{
"type": "string"
},
{
"type": "null"
}
]
},
"expires_after_seconds": {
"oneOf": [
{
"type": "integer"
},
{
"type": "null"
}
]
} }
}, },
"required": [ "required": [
"file", "file",
"purpose" "purpose",
"expires_after_anchor",
"expires_after_seconds"
] ]
} }
} }

View file

@ -2933,6 +2933,10 @@ paths:
- file: The File object (not file name) to be uploaded. - file: The File object (not file name) to be uploaded.
- purpose: The intended purpose of the uploaded file. - purpose: The intended purpose of the uploaded file.
- expires_after: Optional form values describing expiration for the file.
Expected expires_after[anchor] = "created_at", expires_after[seconds] = <int>.
Seconds must be between 3600 and 2592000 (1 hour to 30 days).
parameters: [] parameters: []
requestBody: requestBody:
content: content:
@ -2945,9 +2949,19 @@ paths:
format: binary format: binary
purpose: purpose:
$ref: '#/components/schemas/OpenAIFilePurpose' $ref: '#/components/schemas/OpenAIFilePurpose'
expires_after_anchor:
oneOf:
- type: string
- type: 'null'
expires_after_seconds:
oneOf:
- type: integer
- type: 'null'
required: required:
- file - file
- purpose - purpose
- expires_after_anchor
- expires_after_seconds
required: true required: true
/v1/openai/v1/models: /v1/openai/v1/models:
get: get:

View file

@ -33,7 +33,7 @@ The list of open-benchmarks we currently support:
- [MMMU](https://arxiv.org/abs/2311.16502) (A Massive Multi-discipline Multimodal Understanding and Reasoning Benchmark for Expert AGI)]: Benchmark designed to evaluate multimodal models. - [MMMU](https://arxiv.org/abs/2311.16502) (A Massive Multi-discipline Multimodal Understanding and Reasoning Benchmark for Expert AGI)]: Benchmark designed to evaluate multimodal models.
You can follow this [contributing guide](https://llama-stack.readthedocs.io/en/latest/references/evals_reference/index.html#open-benchmark-contributing-guide) to add more open-benchmarks to Llama Stack You can follow this [contributing guide](../references/evals_reference/index.md#open-benchmark-contributing-guide) to add more open-benchmarks to Llama Stack
#### Run evaluation on open-benchmarks via CLI #### Run evaluation on open-benchmarks via CLI

View file

@ -88,7 +88,7 @@ Interactive pages for users to play with and explore Llama Stack API capabilitie
- **API Resources**: Inspect Llama Stack API resources - **API Resources**: Inspect Llama Stack API resources
- This page allows you to inspect Llama Stack API resources (`models`, `datasets`, `memory_banks`, `benchmarks`, `shields`). - This page allows you to inspect Llama Stack API resources (`models`, `datasets`, `memory_banks`, `benchmarks`, `shields`).
- Under the hood, it uses Llama Stack's `/<resources>/list` API to get information about each resources. - Under the hood, it uses Llama Stack's `/<resources>/list` API to get information about each resources.
- Please visit [Core Concepts](https://llama-stack.readthedocs.io/en/latest/concepts/index.html) for more details about the resources. - Please visit [Core Concepts](../../concepts/index.md) for more details about the resources.
### Starting the Llama Stack Playground ### Starting the Llama Stack Playground

View file

@ -3,7 +3,7 @@
Llama Stack (LLS) provides two different APIs for building AI applications with tool calling capabilities: the **Agents API** and the **OpenAI Responses API**. While both enable AI systems to use tools, and maintain full conversation history, they serve different use cases and have distinct characteristics. Llama Stack (LLS) provides two different APIs for building AI applications with tool calling capabilities: the **Agents API** and the **OpenAI Responses API**. While both enable AI systems to use tools, and maintain full conversation history, they serve different use cases and have distinct characteristics.
```{note} ```{note}
For simple and basic inferencing, you may want to use the [Chat Completions API](https://llama-stack.readthedocs.io/en/latest/providers/index.html#chat-completions) directly, before progressing to Agents or Responses API. **Note:** For simple and basic inferencing, you may want to use the [Chat Completions API](../providers/openai.md#chat-completions) directly, before progressing to Agents or Responses API.
``` ```
## Overview ## Overview
@ -173,7 +173,7 @@ Both APIs demonstrate distinct strengths that make them valuable on their own fo
## For More Information ## For More Information
- **LLS Agents API**: For detailed information on creating and managing agents, see the [Agents documentation](https://llama-stack.readthedocs.io/en/latest/building_applications/agent.html) - **LLS Agents API**: For detailed information on creating and managing agents, see the [Agents documentation](agent.md)
- **OpenAI Responses API**: For information on using the OpenAI-compatible responses API, see the [OpenAI API documentation](https://platform.openai.com/docs/api-reference/responses) - **OpenAI Responses API**: For information on using the OpenAI-compatible responses API, see the [OpenAI API documentation](https://platform.openai.com/docs/api-reference/responses)
- **Chat Completions API**: For the default backend API used by Agents, see the [Chat Completions providers documentation](https://llama-stack.readthedocs.io/en/latest/providers/index.html#chat-completions) - **Chat Completions API**: For the default backend API used by Agents, see the [Chat Completions providers documentation](../providers/openai.md#chat-completions)
- **Agent Execution Loop**: For understanding how agents process turns and steps in their execution, see the [Agent Execution Loop documentation](https://llama-stack.readthedocs.io/en/latest/building_applications/agent_execution_loop.html) - **Agent Execution Loop**: For understanding how agents process turns and steps in their execution, see the [Agent Execution Loop documentation](agent_execution_loop.md)

View file

@ -6,4 +6,4 @@ While there is a lot of flexibility to mix-and-match providers, often users will
**Locally Hosted Distro**: You may want to run Llama Stack on your own hardware. Typically though, you still need to use Inference via an external service. You can use providers like HuggingFace TGI, Fireworks, Together, etc. for this purpose. Or you may have access to GPUs and can run a [vLLM](https://github.com/vllm-project/vllm) or [NVIDIA NIM](https://build.nvidia.com/nim?filters=nimType%3Anim_type_run_anywhere&q=llama) instance. If you "just" have a regular desktop machine, you can use [Ollama](https://ollama.com/) for inference. To provide convenient quick access to these options, we provide a number of such pre-configured locally-hosted Distros. **Locally Hosted Distro**: You may want to run Llama Stack on your own hardware. Typically though, you still need to use Inference via an external service. You can use providers like HuggingFace TGI, Fireworks, Together, etc. for this purpose. Or you may have access to GPUs and can run a [vLLM](https://github.com/vllm-project/vllm) or [NVIDIA NIM](https://build.nvidia.com/nim?filters=nimType%3Anim_type_run_anywhere&q=llama) instance. If you "just" have a regular desktop machine, you can use [Ollama](https://ollama.com/) for inference. To provide convenient quick access to these options, we provide a number of such pre-configured locally-hosted Distros.
**On-device Distro**: To run Llama Stack directly on an edge device (mobile phone or a tablet), we provide Distros for [iOS](https://llama-stack.readthedocs.io/en/latest/distributions/ondevice_distro/ios_sdk.html) and [Android](https://llama-stack.readthedocs.io/en/latest/distributions/ondevice_distro/android_sdk.html) **On-device Distro**: To run Llama Stack directly on an edge device (mobile phone or a tablet), we provide Distros for [iOS](../distributions/ondevice_distro/ios_sdk.md) and [Android](../distributions/ondevice_distro/android_sdk.md)

View file

@ -14,6 +14,13 @@ Here are some example PRs to help you get started:
- [Nvidia Inference Implementation](https://github.com/meta-llama/llama-stack/pull/355) - [Nvidia Inference Implementation](https://github.com/meta-llama/llama-stack/pull/355)
- [Model context protocol Tool Runtime](https://github.com/meta-llama/llama-stack/pull/665) - [Model context protocol Tool Runtime](https://github.com/meta-llama/llama-stack/pull/665)
## Guidelines for creating Internal or External Providers
|**Type** |Internal (In-tree) |External (out-of-tree)
|---------|-------------------|---------------------|
|**Description** |A provider that is directly in the Llama Stack code|A provider that is outside of the Llama stack core codebase but is still accessible and usable by Llama Stack.
|**Benefits** |Ability to interact with the provider with minimal additional configurations or installations| Contributors do not have to add directly to the code to create providers accessible on Llama Stack. Keep provider-specific code separate from the core Llama Stack code.
## Inference Provider Patterns ## Inference Provider Patterns
When implementing Inference providers for OpenAI-compatible APIs, Llama Stack provides several mixin classes to simplify development and ensure consistent behavior across providers. When implementing Inference providers for OpenAI-compatible APIs, Llama Stack provides several mixin classes to simplify development and ensure consistent behavior across providers.

View file

@ -27,7 +27,7 @@ Then, you can access the APIs like `models` and `inference` on the client and ca
response = client.models.list() response = client.models.list()
``` ```
If you've created a [custom distribution](https://llama-stack.readthedocs.io/en/latest/distributions/building_distro.html), you can also use the run.yaml configuration file directly: If you've created a [custom distribution](building_distro.md), you can also use the run.yaml configuration file directly:
```python ```python
client = LlamaStackAsLibraryClient(config_path) client = LlamaStackAsLibraryClient(config_path)

View file

@ -22,17 +22,17 @@ else
fi fi
if [ -z "${GITHUB_CLIENT_ID:-}" ]; then if [ -z "${GITHUB_CLIENT_ID:-}" ]; then
echo "ERROR: GITHUB_CLIENT_ID not set. You need it for Github login to work. Refer to https://llama-stack.readthedocs.io/en/latest/deploying/index.html#kubernetes-deployment-guide" echo "ERROR: GITHUB_CLIENT_ID not set. You need it for Github login to work. See the Kubernetes Deployment Guide in the Llama Stack documentation."
exit 1 exit 1
fi fi
if [ -z "${GITHUB_CLIENT_SECRET:-}" ]; then if [ -z "${GITHUB_CLIENT_SECRET:-}" ]; then
echo "ERROR: GITHUB_CLIENT_SECRET not set. You need it for Github login to work. Refer to https://llama-stack.readthedocs.io/en/latest/deploying/index.html#kubernetes-deployment-guide" echo "ERROR: GITHUB_CLIENT_SECRET not set. You need it for Github login to work. See the Kubernetes Deployment Guide in the Llama Stack documentation."
exit 1 exit 1
fi fi
if [ -z "${LLAMA_STACK_UI_URL:-}" ]; then if [ -z "${LLAMA_STACK_UI_URL:-}" ]; then
echo "ERROR: LLAMA_STACK_UI_URL not set. Should be set to the external URL of the UI (excluding port). You need it for Github login to work. Refer to https://llama-stack.readthedocs.io/en/latest/deploying/index.html#kubernetes-deployment-guide" echo "ERROR: LLAMA_STACK_UI_URL not set. Should be set to the external URL of the UI (excluding port). You need it for Github login to work. See the Kubernetes Deployment Guide in the Llama Stack documentation."
exit 1 exit 1
fi fi

View file

@ -66,7 +66,7 @@ llama stack run starter --port 5050
Ensure the Llama Stack server version is the same as the Kotlin SDK Library for maximum compatibility. Ensure the Llama Stack server version is the same as the Kotlin SDK Library for maximum compatibility.
Other inference providers: [Table](https://llama-stack.readthedocs.io/en/latest/index.html#supported-llama-stack-implementations) Other inference providers: [Table](../../index.md#supported-llama-stack-implementations)
How to set remote localhost in Demo App: [Settings](https://github.com/meta-llama/llama-stack-client-kotlin/tree/latest-release/examples/android_app#settings) How to set remote localhost in Demo App: [Settings](https://github.com/meta-llama/llama-stack-client-kotlin/tree/latest-release/examples/android_app#settings)

View file

@ -2,7 +2,7 @@
orphan: true orphan: true
--- ---
<!-- This file was auto-generated by distro_codegen.py, please edit source --> <!-- This file was auto-generated by distro_codegen.py, please edit source -->
# Meta Reference Distribution # Meta Reference GPU Distribution
```{toctree} ```{toctree}
:maxdepth: 2 :maxdepth: 2
@ -41,7 +41,7 @@ The following environment variables can be configured:
## Prerequisite: Downloading Models ## Prerequisite: Downloading Models
Please use `llama model list --downloaded` to check that you have llama model checkpoints downloaded in `~/.llama` before proceeding. See [installation guide](https://llama-stack.readthedocs.io/en/latest/references/llama_cli_reference/download_models.html) here to download the models. Run `llama model list` to see the available models to download, and `llama model download` to download the checkpoints. Please use `llama model list --downloaded` to check that you have llama model checkpoints downloaded in `~/.llama` before proceeding. See [installation guide](../../references/llama_cli_reference/download_models.md) here to download the models. Run `llama model list` to see the available models to download, and `llama model download` to download the checkpoints.
``` ```
$ llama model list --downloaded $ llama model list --downloaded

View file

@ -50,6 +50,7 @@ The following models are available by default:
- `meta/llama-3.2-11b-vision-instruct ` - `meta/llama-3.2-11b-vision-instruct `
- `meta/llama-3.2-90b-vision-instruct ` - `meta/llama-3.2-90b-vision-instruct `
- `meta/llama-3.3-70b-instruct ` - `meta/llama-3.3-70b-instruct `
- `nvidia/vila `
- `nvidia/llama-3.2-nv-embedqa-1b-v2 ` - `nvidia/llama-3.2-nv-embedqa-1b-v2 `
- `nvidia/nv-embedqa-e5-v5 ` - `nvidia/nv-embedqa-e5-v5 `
- `nvidia/nv-embedqa-mistral-7b-v2 ` - `nvidia/nv-embedqa-mistral-7b-v2 `

View file

@ -12,6 +12,60 @@ That means you'll get fast and efficient vector retrieval.
- Easy to use - Easy to use
- Fully integrated with Llama Stack - Fully integrated with Llama Stack
There are three implementations of search for PGVectoIndex available:
1. Vector Search:
- How it works:
- Uses PostgreSQL's vector extension (pgvector) to perform similarity search
- Compares query embeddings against stored embeddings using Cosine distance or other distance metrics
- Eg. SQL query: SELECT document, embedding <=> %s::vector AS distance FROM table ORDER BY distance
-Characteristics:
- Semantic understanding - finds documents similar in meaning even if they don't share keywords
- Works with high-dimensional vector embeddings (typically 768, 1024, or higher dimensions)
- Best for: Finding conceptually related content, handling synonyms, cross-language search
2. Keyword Search
- How it works:
- Uses PostgreSQL's full-text search capabilities with tsvector and ts_rank
- Converts text to searchable tokens using to_tsvector('english', text). Default language is English.
- Eg. SQL query: SELECT document, ts_rank(tokenized_content, plainto_tsquery('english', %s)) AS score
- Characteristics:
- Lexical matching - finds exact keyword matches and variations
- Uses GIN (Generalized Inverted Index) for fast text search performance
- Scoring: Uses PostgreSQL's ts_rank function for relevance scoring
- Best for: Exact term matching, proper names, technical terms, Boolean-style queries
3. Hybrid Search
- How it works:
- Combines both vector and keyword search results
- Runs both searches independently, then merges results using configurable reranking
- Two reranking strategies available:
- Reciprocal Rank Fusion (RRF) - (default: 60.0)
- Weighted Average - (default: 0.5)
- Characteristics:
- Best of both worlds: semantic understanding + exact matching
- Documents appearing in both searches get boosted scores
- Configurable balance between semantic and lexical matching
- Best for: General-purpose search where you want both precision and recall
4. Database Schema
The PGVector implementation stores data optimized for all three search types:
CREATE TABLE vector_store_xxx (
id TEXT PRIMARY KEY,
document JSONB, -- Original document
embedding vector(dimension), -- For vector search
content_text TEXT, -- Raw text content
tokenized_content TSVECTOR -- For keyword search
);
-- Indexes for performance
CREATE INDEX content_gin_idx ON table USING GIN(tokenized_content); -- Keyword search
-- Vector index created automatically by pgvector
## Usage ## Usage
To use PGVector in your Llama Stack project, follow these steps: To use PGVector in your Llama Stack project, follow these steps:
@ -20,6 +74,25 @@ To use PGVector in your Llama Stack project, follow these steps:
2. Configure your Llama Stack project to use pgvector. (e.g. remote::pgvector). 2. Configure your Llama Stack project to use pgvector. (e.g. remote::pgvector).
3. Start storing and querying vectors. 3. Start storing and querying vectors.
## This is an example how you can set up your environment for using PGVector
1. Export env vars:
```bash
export ENABLE_PGVECTOR=true
export PGVECTOR_HOST=localhost
export PGVECTOR_PORT=5432
export PGVECTOR_DB=llamastack
export PGVECTOR_USER=llamastack
export PGVECTOR_PASSWORD=llamastack
```
2. Create DB:
```bash
psql -h localhost -U postgres -c "CREATE ROLE llamastack LOGIN PASSWORD 'llamastack';"
psql -h localhost -U postgres -c "CREATE DATABASE llamastack OWNER llamastack;"
psql -h localhost -U llamastack -d llamastack -c "CREATE EXTENSION IF NOT EXISTS vector;"
```
## Installation ## Installation
You can install PGVector using docker: You can install PGVector using docker:

View file

@ -17,6 +17,7 @@ Weaviate supports:
- Metadata filtering - Metadata filtering
- Multi-modal retrieval - Multi-modal retrieval
## Usage ## Usage
To use Weaviate in your Llama Stack project, follow these steps: To use Weaviate in your Llama Stack project, follow these steps:

View file

@ -202,7 +202,7 @@ pprint(response)
Llama Stack offers a library of scoring functions and the `/scoring` API, allowing you to run evaluations on your pre-annotated AI application datasets. Llama Stack offers a library of scoring functions and the `/scoring` API, allowing you to run evaluations on your pre-annotated AI application datasets.
In this example, we will work with an example RAG dataset you have built previously, label with an annotation, and use LLM-As-Judge with custom judge prompt for scoring. Please checkout our [Llama Stack Playground](https://llama-stack.readthedocs.io/en/latest/playground/index.html) for an interactive interface to upload datasets and run scorings. In this example, we will work with an example RAG dataset you have built previously, label with an annotation, and use LLM-As-Judge with custom judge prompt for scoring. Please checkout our [Llama Stack Playground](../../building_applications/playground/index.md) for an interactive interface to upload datasets and run scorings.
```python ```python
judge_model_id = "meta-llama/Llama-3.1-405B-Instruct-FP8" judge_model_id = "meta-llama/Llama-3.1-405B-Instruct-FP8"

View file

@ -478,7 +478,6 @@ llama-stack-client scoring_functions list
┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━┓ ┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━┓
┃ identifier ┃ provider_id ┃ description ┃ type ┃ ┃ identifier ┃ provider_id ┃ description ┃ type ┃
┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━┩ ┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━┩
│ basic::bfcl │ basic │ BFCL complex scoring │ scoring_function │
│ basic::docvqa │ basic │ DocVQA Visual Question & Answer scoring function │ scoring_function │ │ basic::docvqa │ basic │ DocVQA Visual Question & Answer scoring function │ scoring_function │
│ basic::equality │ basic │ Returns 1.0 if the input is equal to the target, 0.0 │ scoring_function │ │ basic::equality │ basic │ Returns 1.0 if the input is equal to the target, 0.0 │ scoring_function │
│ │ │ otherwise. │ │ │ │ │ otherwise. │ │

View file

@ -5,10 +5,10 @@
# the root directory of this source tree. # the root directory of this source tree.
from enum import StrEnum from enum import StrEnum
from typing import Annotated, Literal, Protocol, runtime_checkable from typing import Annotated, ClassVar, Literal, Protocol, runtime_checkable
from fastapi import File, Form, Response, UploadFile from fastapi import File, Form, Response, UploadFile
from pydantic import BaseModel from pydantic import BaseModel, Field
from llama_stack.apis.common.responses import Order from llama_stack.apis.common.responses import Order
from llama_stack.providers.utils.telemetry.trace_protocol import trace_protocol from llama_stack.providers.utils.telemetry.trace_protocol import trace_protocol
@ -49,6 +49,23 @@ class OpenAIFileObject(BaseModel):
purpose: OpenAIFilePurpose purpose: OpenAIFilePurpose
@json_schema_type
class ExpiresAfter(BaseModel):
"""
Control expiration of uploaded files.
Params:
- anchor, must be "created_at"
- seconds, must be int between 3600 and 2592000 (1 hour to 30 days)
"""
MIN: ClassVar[int] = 3600 # 1 hour
MAX: ClassVar[int] = 2592000 # 30 days
anchor: Literal["created_at"]
seconds: int = Field(..., ge=3600, le=2592000)
@json_schema_type @json_schema_type
class ListOpenAIFileResponse(BaseModel): class ListOpenAIFileResponse(BaseModel):
""" """
@ -92,6 +109,9 @@ class Files(Protocol):
self, self,
file: Annotated[UploadFile, File()], file: Annotated[UploadFile, File()],
purpose: Annotated[OpenAIFilePurpose, Form()], purpose: Annotated[OpenAIFilePurpose, Form()],
expires_after_anchor: Annotated[str | None, Form(alias="expires_after[anchor]")] = None,
expires_after_seconds: Annotated[int | None, Form(alias="expires_after[seconds]")] = None,
# TODO: expires_after is producing strange openapi spec, params are showing up as a required w/ oneOf being null
) -> OpenAIFileObject: ) -> OpenAIFileObject:
""" """
Upload a file that can be used across various endpoints. Upload a file that can be used across various endpoints.
@ -99,6 +119,7 @@ class Files(Protocol):
The file upload should be a multipart form request with: The file upload should be a multipart form request with:
- file: The File object (not file name) to be uploaded. - file: The File object (not file name) to be uploaded.
- purpose: The intended purpose of the uploaded file. - purpose: The intended purpose of the uploaded file.
- expires_after: Optional form values describing expiration for the file. Expected expires_after[anchor] = "created_at", expires_after[seconds] = <int>. Seconds must be between 3600 and 2592000 (1 hour to 30 days).
:param file: The uploaded file object containing content and metadata (filename, content_type, etc.). :param file: The uploaded file object containing content and metadata (filename, content_type, etc.).
:param purpose: The intended purpose of the uploaded file (e.g., "assistants", "fine-tune"). :param purpose: The intended purpose of the uploaded file (e.g., "assistants", "fine-tune").

View file

@ -105,12 +105,12 @@ async def register_resources(run_config: StackRunConfig, impls: dict[Api, Any]):
method = getattr(impls[api], register_method) method = getattr(impls[api], register_method)
for obj in objects: for obj in objects:
logger.debug(f"registering {rsrc.capitalize()} {obj} for provider {obj.provider_id}") if hasattr(obj, "provider_id"):
# Do not register models on disabled providers
# Do not register models on disabled providers if not obj.provider_id or obj.provider_id == "__disabled__":
if hasattr(obj, "provider_id") and (not obj.provider_id or obj.provider_id == "__disabled__"): logger.debug(f"Skipping {rsrc.capitalize()} registration for disabled provider.")
logger.debug(f"Skipping {rsrc.capitalize()} registration for disabled provider.") continue
continue logger.debug(f"registering {rsrc.capitalize()} {obj} for provider {obj.provider_id}")
# we want to maintain the type information in arguments to method. # we want to maintain the type information in arguments to method.
# instead of method(**obj.model_dump()), which may convert a typed attr to a dict, # instead of method(**obj.model_dump()), which may convert a typed attr to a dict,
@ -225,7 +225,10 @@ def replace_env_vars(config: Any, path: str = "") -> Any:
try: try:
result = re.sub(pattern, get_env_var, config) result = re.sub(pattern, get_env_var, config)
return _convert_string_to_proper_type(result) # Only apply type conversion if substitution actually happened
if result != config:
return _convert_string_to_proper_type(result)
return result
except EnvVarError as e: except EnvVarError as e:
raise EnvVarError(e.var_name, e.path) from None raise EnvVarError(e.var_name, e.path) from None

View file

@ -1,7 +1,7 @@
--- ---
orphan: true orphan: true
--- ---
# Meta Reference Distribution # Meta Reference GPU Distribution
```{toctree} ```{toctree}
:maxdepth: 2 :maxdepth: 2
@ -29,7 +29,7 @@ The following environment variables can be configured:
## Prerequisite: Downloading Models ## Prerequisite: Downloading Models
Please use `llama model list --downloaded` to check that you have llama model checkpoints downloaded in `~/.llama` before proceeding. See [installation guide](https://llama-stack.readthedocs.io/en/latest/references/llama_cli_reference/download_models.html) here to download the models. Run `llama model list` to see the available models to download, and `llama model download` to download the checkpoints. Please use `llama model list --downloaded` to check that you have llama model checkpoints downloaded in `~/.llama` before proceeding. See [installation guide](../../references/llama_cli_reference/download_models.md) here to download the models. Run `llama model list` to see the available models to download, and `llama model download` to download the checkpoints.
``` ```
$ llama model list --downloaded $ llama model list --downloaded

View file

@ -134,6 +134,11 @@ models:
provider_id: nvidia provider_id: nvidia
provider_model_id: meta/llama-3.3-70b-instruct provider_model_id: meta/llama-3.3-70b-instruct
model_type: llm model_type: llm
- metadata: {}
model_id: nvidia/vila
provider_id: nvidia
provider_model_id: nvidia/vila
model_type: llm
- metadata: - metadata:
embedding_dimension: 2048 embedding_dimension: 2048
context_length: 8192 context_length: 8192

View file

@ -43,7 +43,7 @@ def get_inference_providers() -> tuple[list[Provider], dict[str, list[ProviderMo
"openai", "openai",
[ [
ProviderModelEntry( ProviderModelEntry(
provider_model_id="openai/gpt-4o", provider_model_id="gpt-4o",
model_type=ModelType.llm, model_type=ModelType.llm,
) )
], ],
@ -53,7 +53,7 @@ def get_inference_providers() -> tuple[list[Provider], dict[str, list[ProviderMo
"anthropic", "anthropic",
[ [
ProviderModelEntry( ProviderModelEntry(
provider_model_id="anthropic/claude-3-5-sonnet-latest", provider_model_id="claude-3-5-sonnet-latest",
model_type=ModelType.llm, model_type=ModelType.llm,
) )
], ],
@ -206,13 +206,6 @@ def get_distribution_template() -> DistributionTemplate:
uri="huggingface://datasets/llamastack/math_500?split=test", uri="huggingface://datasets/llamastack/math_500?split=test",
), ),
), ),
DatasetInput(
dataset_id="bfcl",
purpose=DatasetPurpose.eval_messages_answer,
source=URIDataSource(
uri="huggingface://datasets/llamastack/bfcl_v3?split=train",
),
),
DatasetInput( DatasetInput(
dataset_id="ifeval", dataset_id="ifeval",
purpose=DatasetPurpose.eval_messages_answer, purpose=DatasetPurpose.eval_messages_answer,
@ -250,11 +243,6 @@ def get_distribution_template() -> DistributionTemplate:
dataset_id="math_500", dataset_id="math_500",
scoring_functions=["basic::regex_parser_math_response"], scoring_functions=["basic::regex_parser_math_response"],
), ),
BenchmarkInput(
benchmark_id="meta-reference-bfcl",
dataset_id="bfcl",
scoring_functions=["basic::bfcl"],
),
BenchmarkInput( BenchmarkInput(
benchmark_id="meta-reference-ifeval", benchmark_id="meta-reference-ifeval",
dataset_id="ifeval", dataset_id="ifeval",

View file

@ -136,14 +136,14 @@ inference_store:
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/open-benchmark}/inference_store.db db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/open-benchmark}/inference_store.db
models: models:
- metadata: {} - metadata: {}
model_id: openai/gpt-4o model_id: gpt-4o
provider_id: openai provider_id: openai
provider_model_id: openai/gpt-4o provider_model_id: gpt-4o
model_type: llm model_type: llm
- metadata: {} - metadata: {}
model_id: anthropic/claude-3-5-sonnet-latest model_id: claude-3-5-sonnet-latest
provider_id: anthropic provider_id: anthropic
provider_model_id: anthropic/claude-3-5-sonnet-latest provider_model_id: claude-3-5-sonnet-latest
model_type: llm model_type: llm
- metadata: {} - metadata: {}
model_id: gemini/gemini-1.5-flash model_id: gemini/gemini-1.5-flash
@ -188,12 +188,6 @@ datasets:
uri: huggingface://datasets/llamastack/math_500?split=test uri: huggingface://datasets/llamastack/math_500?split=test
metadata: {} metadata: {}
dataset_id: math_500 dataset_id: math_500
- purpose: eval/messages-answer
source:
type: uri
uri: huggingface://datasets/llamastack/bfcl_v3?split=train
metadata: {}
dataset_id: bfcl
- purpose: eval/messages-answer - purpose: eval/messages-answer
source: source:
type: uri type: uri
@ -228,11 +222,6 @@ benchmarks:
- basic::regex_parser_math_response - basic::regex_parser_math_response
metadata: {} metadata: {}
benchmark_id: meta-reference-math-500 benchmark_id: meta-reference-math-500
- dataset_id: bfcl
scoring_functions:
- basic::bfcl
metadata: {}
benchmark_id: meta-reference-bfcl
- dataset_id: ifeval - dataset_id: ifeval
scoring_functions: scoring_functions:
- basic::ifeval - basic::ifeval

View file

@ -86,11 +86,16 @@ class LocalfsFilesImpl(Files):
self, self,
file: Annotated[UploadFile, File()], file: Annotated[UploadFile, File()],
purpose: Annotated[OpenAIFilePurpose, Form()], purpose: Annotated[OpenAIFilePurpose, Form()],
expires_after_anchor: Annotated[str | None, Form(alias="expires_after[anchor]")] = None,
expires_after_seconds: Annotated[int | None, Form(alias="expires_after[seconds]")] = None,
) -> OpenAIFileObject: ) -> OpenAIFileObject:
"""Upload a file that can be used across various endpoints.""" """Upload a file that can be used across various endpoints."""
if not self.sql_store: if not self.sql_store:
raise RuntimeError("Files provider not initialized") raise RuntimeError("Files provider not initialized")
if expires_after_anchor is not None or expires_after_seconds is not None:
raise NotImplementedError("File expiration is not supported by this provider")
file_id = self._generate_file_id() file_id = self._generate_file_id()
file_path = self._get_file_path(file_id) file_path = self._get_file_path(file_id)

View file

@ -22,7 +22,6 @@ from llama_stack.providers.utils.common.data_schema_validator import (
) )
from .config import BasicScoringConfig from .config import BasicScoringConfig
from .scoring_fn.bfcl_scoring_fn import BFCLScoringFn
from .scoring_fn.docvqa_scoring_fn import DocVQAScoringFn from .scoring_fn.docvqa_scoring_fn import DocVQAScoringFn
from .scoring_fn.equality_scoring_fn import EqualityScoringFn from .scoring_fn.equality_scoring_fn import EqualityScoringFn
from .scoring_fn.ifeval_scoring_fn import IfEvalScoringFn from .scoring_fn.ifeval_scoring_fn import IfEvalScoringFn
@ -37,7 +36,6 @@ FIXED_FNS = [
SubsetOfScoringFn, SubsetOfScoringFn,
RegexParserScoringFn, RegexParserScoringFn,
RegexParserMathResponseScoringFn, RegexParserMathResponseScoringFn,
BFCLScoringFn,
IfEvalScoringFn, IfEvalScoringFn,
DocVQAScoringFn, DocVQAScoringFn,
] ]

View file

@ -1,93 +0,0 @@
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree.
import json
import re
from typing import Any
from llama_stack.apis.scoring import ScoringResultRow
from llama_stack.apis.scoring_functions import ScoringFnParams
from llama_stack.providers.utils.scoring.base_scoring_fn import RegisteredBaseScoringFn
from ..utils.bfcl.ast_parser import decode_ast
from ..utils.bfcl.checker import ast_checker, is_empty_output
from .fn_defs.bfcl import bfcl
def postprocess(x: dict[str, Any], test_category: str) -> dict[str, Any]:
contain_func_call = False
error = None
error_type = None
checker_result = {}
try:
prediction = decode_ast(x["generated_answer"], x["language"]) or ""
contain_func_call = True
# if not is_function_calling_format_output(prediction):
if is_empty_output(prediction):
contain_func_call = False
error = "Did not output in the specified format. Note: the model_result is wrapped in a string to ensure json serializability."
error_type = "ast_decoder:decoder_wrong_output_format"
else:
checker_result = ast_checker(
json.loads(x["function"]),
prediction,
json.loads(x["ground_truth"]),
x["language"],
test_category=test_category,
model_name="",
)
except Exception as e:
prediction = ""
error = f"Invalid syntax. Failed to decode AST. {str(e)}"
error_type = "ast_decoder:decoder_failed"
return {
"prediction": prediction,
"contain_func_call": contain_func_call,
"valid": checker_result.get("valid", False),
"error": error or checker_result.get("error", ""),
"error_type": error_type or checker_result.get("error_type", ""),
}
def gen_valid(x: dict[str, Any]) -> dict[str, float]:
return {"valid": x["valid"]}
def gen_relevance_acc(x: dict[str, Any]) -> dict[str, float]:
# This function serves for both relevance and irrelevance tests, which share the exact opposite logic.
# If `test_category` is "irrelevance", the model is expected to output no function call.
# No function call means either the AST decoding fails (a error message is generated) or the decoded AST does not contain any function call (such as a empty list, `[]`).
# If `test_category` is "relevance", the model is expected to output to a function call, and empty list doesn't count as a function call.
acc = not x["contain_func_call"] if "irrelevance" in x["id"] else x["contain_func_call"]
return {"valid": float(acc)}
class BFCLScoringFn(RegisteredBaseScoringFn):
"""
A scoring_fn for BFCL
"""
def __init__(self, *args, **kwargs) -> None:
super().__init__(*args, **kwargs)
self.supported_fn_defs_registry = {
bfcl.identifier: bfcl,
}
async def score_row(
self,
input_row: dict[str, Any],
scoring_fn_identifier: str | None = "bfcl",
scoring_params: ScoringFnParams | None = None,
) -> ScoringResultRow:
test_category = re.sub(r"_[0-9_-]+$", "", input_row["id"])
score_result = postprocess(input_row, test_category)
if test_category in {"irrelevance", "live_relevance", "live_irrelevance"}:
score = gen_relevance_acc(score_result)["valid"]
else:
score = gen_valid(score_result)["valid"]
return {
"score": float(score),
}

View file

@ -1,21 +0,0 @@
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree.
from llama_stack.apis.common.type_system import NumberType
from llama_stack.apis.scoring_functions import (
AggregationFunctionType,
BasicScoringFnParams,
ScoringFn,
)
bfcl = ScoringFn(
identifier="basic::bfcl",
description="BFCL complex scoring",
return_type=NumberType(),
provider_id="basic",
provider_resource_id="bfcl",
params=BasicScoringFnParams(aggregation_functions=[AggregationFunctionType.accuracy]),
)

View file

@ -1,5 +0,0 @@
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree.

View file

@ -1,296 +0,0 @@
# ruff: noqa
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree.
import ast
from .tree_sitter import get_parser
def parse_java_function_call(source_code):
if not source_code.endswith(";"):
source_code += ";" # Necessary for the parser not to register an error
parser = get_parser("java")
tree = parser.parse(bytes(source_code, "utf8"))
root_node = tree.root_node
if root_node.has_error:
raise Exception("Error parsing java the source code.")
def get_text(node):
"""Returns the text represented by the node."""
return source_code[node.start_byte : node.end_byte]
def traverse_node(node, nested=False):
if node.type == "string_literal":
if nested:
return get_text(node)
# Strip surrounding quotes from string literals
return get_text(node)[1:-1]
elif node.type == "character_literal":
if nested:
return get_text(node)
# Strip surrounding single quotes from character literals
return get_text(node)[1:-1]
"""Traverse the node to collect texts for complex structures."""
if node.type in [
"identifier",
"class_literal",
"type_identifier",
"method_invocation",
]:
return get_text(node)
elif node.type == "array_creation_expression":
# Handle array creation expression specifically
type_node = node.child_by_field_name("type")
value_node = node.child_by_field_name("value")
type_text = traverse_node(type_node, True)
value_text = traverse_node(value_node, True)
return f"new {type_text}[]{value_text}"
elif node.type == "object_creation_expression":
# Handle object creation expression specifically
type_node = node.child_by_field_name("type")
arguments_node = node.child_by_field_name("arguments")
type_text = traverse_node(type_node, True)
if arguments_node:
# Process each argument carefully, avoiding unnecessary punctuation
argument_texts = []
for child in arguments_node.children:
if child.type not in [
",",
"(",
")",
]: # Exclude commas and parentheses
argument_text = traverse_node(child, True)
argument_texts.append(argument_text)
arguments_text = ", ".join(argument_texts)
return f"new {type_text}({arguments_text})"
else:
return f"new {type_text}()"
elif node.type == "set":
# Handling sets specifically
items = [traverse_node(n, True) for n in node.children if n.type not in [",", "set"]]
return "{" + ", ".join(items) + "}"
elif node.child_count > 0:
return "".join(traverse_node(child, True) for child in node.children)
else:
return get_text(node)
def extract_arguments(args_node):
arguments = {}
for child in args_node.children:
if child.type == "assignment_expression":
# For named parameters
name_node, value_node = child.children[0], child.children[2]
name = get_text(name_node)
value = traverse_node(value_node)
if name in arguments:
if not isinstance(arguments[name], list):
arguments[name] = [arguments[name]]
arguments[name].append(value)
else:
arguments[name] = value
# arguments.append({'name': name, 'value': value})
elif child.type in ["identifier", "class_literal", "set"]:
# For unnamed parameters and handling sets
value = traverse_node(child)
if None in arguments:
if not isinstance(arguments[None], list):
arguments[None] = [arguments[None]]
arguments[None].append(value)
else:
arguments[None] = value
return arguments
def traverse(node):
if node.type == "method_invocation":
# Extract the function name and its arguments
method_name = get_text(node.child_by_field_name("name"))
class_name_node = node.child_by_field_name("object")
if class_name_node:
class_name = get_text(class_name_node)
function_name = f"{class_name}.{method_name}"
else:
function_name = method_name
arguments_node = node.child_by_field_name("arguments")
if arguments_node:
arguments = extract_arguments(arguments_node)
for key, value in arguments.items():
if isinstance(value, list):
raise Exception("Error: Multiple arguments with the same name are not supported.")
return [{function_name: arguments}]
else:
for child in node.children:
result = traverse(child)
if result:
return result
result = traverse(root_node)
return result if result else {}
def parse_javascript_function_call(source_code):
if not source_code.endswith(";"):
source_code += ";" # Necessary for the parser not to register an error
parser = get_parser("javascript")
# Parse the source code
tree = parser.parse(bytes(source_code, "utf8"))
root_node = tree.root_node
if root_node.has_error:
raise Exception("Error js parsing the source code.")
# Function to recursively extract argument details
def extract_arguments(node):
args = {}
for child in node.children:
if child.type == "assignment_expression":
# Extract left (name) and right (value) parts of the assignment
name = child.children[0].text.decode("utf-8")
value = child.children[2].text.decode("utf-8")
if (value.startswith('"') and value.endswith('"')) or (value.startswith("'") and value.endswith("'")):
value = value[1:-1] # Trim the quotation marks
if name in args:
if not isinstance(args[name], list):
args[name] = [args[name]]
args[name].append(value)
else:
args[name] = value
elif child.type == "identifier" or child.type == "true":
# Handle non-named arguments and boolean values
value = child.text.decode("utf-8")
if None in args:
if not isinstance(args[None], list):
args[None] = [args[None]]
args[None].append(value)
else:
args[None] = value
return args
# Find the function call and extract its name and arguments
if root_node.type == "program":
for child in root_node.children:
if child.type == "expression_statement":
for sub_child in child.children:
if sub_child.type == "call_expression":
function_name = sub_child.children[0].text.decode("utf8")
arguments_node = sub_child.children[1]
parameters = extract_arguments(arguments_node)
for key, value in parameters.items():
if isinstance(value, list):
raise Exception("Error: Multiple arguments with the same name are not supported.")
result = [{function_name: parameters}]
return result
def ast_parse(input_str, language="Python"):
if language == "Python":
cleaned_input = input_str.strip("[]'")
parsed = ast.parse(cleaned_input, mode="eval")
extracted = []
if isinstance(parsed.body, ast.Call):
extracted.append(resolve_ast_call(parsed.body))
else:
for elem in parsed.body.elts:
extracted.append(resolve_ast_call(elem))
return extracted
elif language == "Java":
return parse_java_function_call(input_str[1:-1]) # Remove the [ and ] from the string
elif language == "JavaScript":
return parse_javascript_function_call(input_str[1:-1])
else:
raise NotImplementedError(f"Unsupported language: {language}")
def resolve_ast_call(elem):
# Handle nested attributes for deeply nested module paths
func_parts = []
func_part = elem.func
while isinstance(func_part, ast.Attribute):
func_parts.append(func_part.attr)
func_part = func_part.value
if isinstance(func_part, ast.Name):
func_parts.append(func_part.id)
func_name = ".".join(reversed(func_parts))
args_dict = {}
# Parse when args are simply passed as an unnamed dictionary arg
for arg in elem.args:
if isinstance(arg, ast.Dict):
for key, value in zip(arg.keys, arg.values):
if isinstance(key, ast.Constant):
arg_name = key.value
output = resolve_ast_by_type(value)
args_dict[arg_name] = output
for arg in elem.keywords:
output = resolve_ast_by_type(arg.value)
args_dict[arg.arg] = output
return {func_name: args_dict}
def resolve_ast_by_type(value):
if isinstance(value, ast.Constant):
if value.value is Ellipsis:
output = "..."
else:
output = value.value
elif isinstance(value, ast.UnaryOp):
output = -value.operand.value
elif isinstance(value, ast.List):
output = [resolve_ast_by_type(v) for v in value.elts]
elif isinstance(value, ast.Dict):
output = {resolve_ast_by_type(k): resolve_ast_by_type(v) for k, v in zip(value.keys, value.values)}
elif isinstance(value, ast.NameConstant): # Added this condition to handle boolean values
output = value.value
elif isinstance(value, ast.BinOp): # Added this condition to handle function calls as arguments
output = eval(ast.unparse(value))
elif isinstance(value, ast.Name):
output = value.id
elif isinstance(value, ast.Call):
if len(value.keywords) == 0:
output = ast.unparse(value)
else:
output = resolve_ast_call(value)
elif isinstance(value, ast.Tuple):
output = tuple(resolve_ast_by_type(v) for v in value.elts)
elif isinstance(value, ast.Lambda):
output = eval(ast.unparse(value.body[0].value))
elif isinstance(value, ast.Ellipsis):
output = "..."
elif isinstance(value, ast.Subscript):
try:
output = ast.unparse(value.body[0].value)
except:
output = ast.unparse(value.value) + "[" + ast.unparse(value.slice) + "]"
else:
raise Exception(f"Unsupported AST type: {type(value)}")
return output
def decode_ast(result, language="Python"):
func = result
func = func.replace("\n", "") # remove new line characters
if not func.startswith("["):
func = "[" + func
if not func.endswith("]"):
func = func + "]"
decoded_output = ast_parse(func, language)
return decoded_output
def decode_execute(result):
func = result
func = func.replace("\n", "") # remove new line characters
if not func.startswith("["):
func = "[" + func
if not func.endswith("]"):
func = func + "]"
decode_output = ast_parse(func)
execution_list = []
for function_call in decode_output:
for key, value in function_call.items():
execution_list.append(f"{key}({','.join([f'{k}={repr(v)}' for k, v in value.items()])})")
return execution_list

View file

@ -1,989 +0,0 @@
# ruff: noqa
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree.
import json
import re
import time
from typing import Any
# Comment out for now until we actually use the rest checker in evals
# import requests # Do not remove this import even though it seems to be unused. It's used in the executable_checker_rest function.
class NoAPIKeyError(Exception):
def __init__(self):
self.message = "Please fill in the API keys in the function_credential_config.json file. If you do not provide the API keys, the executable test category results will be inaccurate."
super().__init__(self.message)
REAL_TIME_MATCH_ALLOWED_DIFFERENCE = 0.2
JAVA_TYPE_CONVERSION = {
"byte": int,
"short": int,
"integer": int,
"float": float,
"double": float,
"long": int,
"boolean": bool,
"char": str,
"Array": list,
"ArrayList": list,
"Set": set,
"HashMap": dict,
"Hashtable": dict,
"Queue": list, # this can be `queue.Queue` as well, for simplicity we check with list
"Stack": list,
"String": str,
"any": str,
}
JS_TYPE_CONVERSION = {
"String": str,
"integer": int,
"float": float,
"Bigint": int,
"Boolean": bool,
"dict": dict,
"array": list,
"any": str,
}
# We switch to conditional import for the following two imports to avoid unnecessary installations.
# User doesn't need to setup the tree-sitter packages if they are not running the test for that language.
# from js_type_converter import js_type_converter
# from java_type_converter import java_type_converter
PYTHON_TYPE_MAPPING = {
"string": str,
"integer": int,
"float": float,
"boolean": bool,
"array": list,
"tuple": list,
"dict": dict,
"any": str,
}
# This is the list of types that we need to recursively check its values
PYTHON_NESTED_TYPE_CHECK_LIST = ["array", "tuple"]
NESTED_CONVERSION_TYPE_LIST = ["Array", "ArrayList", "array"]
#### Helper functions for AST ####
def find_description(func_descriptions, name):
if type(func_descriptions) == list:
for func_description in func_descriptions:
if func_description["name"] == name:
return func_description
return None
else:
# it is a dict, there is only one function
return func_descriptions
def get_possible_answer_type(possible_answer: list):
for answer in possible_answer:
if answer != "": # Optional parameter
return type(answer)
return None
def type_checker(
param: str,
value,
possible_answer: list,
expected_type_description: str,
expected_type_converted,
nested_type_converted,
):
# NOTE: This type checker only supports nested type checking for one level deep.
# We didn't implement recursive type checking for nested types, as it's not needed for the current use case and it's very complex.
result: Any = {
"valid": True,
"error": [],
"is_variable": False,
"error_type": "type_error:simple",
}
is_variable = False
# check for the case where a variable is used instead of a actual value.
# use the type in possible_answer as the expected type
possible_answer_type = get_possible_answer_type(possible_answer)
# if possible_answer only contains optional parameters, we can't determine the type
if possible_answer_type != None:
# we are being precise here.
# in fact, possible_answer_type should always be string, as that's how we treat varibale in possible_answer
if possible_answer_type != expected_type_converted:
is_variable = True
# value is the same type as in function description
if type(value) == expected_type_converted:
# We don't need to do recursive check for simple types
if nested_type_converted == None:
result["is_variable"] = is_variable
return result
else:
for possible_answer_item in possible_answer:
flag = True # Each parameter should match to at least one possible answer type.
# Here, we assume that each item should be the same type. We could also relax it.
if type(possible_answer_item) == list:
for value_item in value:
checker_result = type_checker(
param,
value_item,
possible_answer_item,
str(nested_type_converted),
nested_type_converted,
None,
)
if not checker_result["valid"]:
flag = False
break
if flag:
return {"valid": True, "error": [], "is_variable": is_variable}
result["valid"] = False
result["error"] = [
f"Nested type checking failed for parameter {repr(param)}. Expected outer type {expected_type_description} with inner type {str(nested_type_converted)}. Parameter value: {repr(value)}."
]
result["error_type"] = "type_error:nested"
# value is not as expected, check for the case where a variable is used instead of a actual value
# use the type in possible_answer as the expected type
possible_answer_type = get_possible_answer_type(possible_answer)
# if possible_answer only contains optional parameters, we can't determine the type
if possible_answer_type != None:
# we are being precise here.
# in fact, possible_answer_type should always be string, as that's how we treat varibale in possible_answer
if type(value) == possible_answer_type:
result["is_variable"] = True
return result
result["valid"] = False
result["error"].append(
f"Incorrect type for parameter {repr(param)}. Expected type {expected_type_description}, got {type(value).__name__}. Parameter value: {repr(value)}."
)
result["error_type"] = "type_error:simple"
return result
def standardize_string(input_string: str):
# This function standardizes the string by removing all the spaces, ",./-_*^" punctuation, and converting it to lowercase
# It will also convert all the single quotes to double quotes
# This is used to compare the model output with the possible answers
# We don't want to punish model for answer like April 1, 2024 vs April 1,2024, vs April 1 2024
regex_string = r"[ \,\.\/\-\_\*\^]"
return re.sub(regex_string, "", input_string).lower().replace("'", '"')
def string_checker(param: str, model_output: str, possible_answer: list):
standardize_possible_answer = []
standardize_model_output = standardize_string(model_output)
for i in range(len(possible_answer)):
if type(possible_answer[i]) == str:
standardize_possible_answer.append(standardize_string(possible_answer[i]))
if standardize_model_output not in standardize_possible_answer:
return {
"valid": False,
"error": [
f"Invalid value for parameter {repr(param)}: {repr(model_output)}. Expected one of {possible_answer}. Case insensitive."
],
"error_type": "value_error:string",
}
return {"valid": True, "error": []}
def list_checker(param: str, model_output: list, possible_answer: list):
# Convert the tuple to a list
standardize_model_output = list(model_output)
# If the element in the list is a string, we need to standardize it
for i in range(len(standardize_model_output)):
if type(standardize_model_output[i]) == str:
standardize_model_output[i] = standardize_string(model_output[i])
standardize_possible_answer: Any = []
# We also need to standardize the possible answers
for i in range(len(possible_answer)):
standardize_possible_answer.append([])
for j in range(len(possible_answer[i])):
if type(possible_answer[i][j]) == str:
standardize_possible_answer[i].append(standardize_string(possible_answer[i][j]))
else:
standardize_possible_answer[i].append(possible_answer[i][j])
if standardize_model_output not in standardize_possible_answer:
return {
"valid": False,
"error": [
f"Invalid value for parameter {repr(param)}: {repr(model_output)}. Expected one of {possible_answer}."
],
"error_type": "value_error:list/tuple",
}
return {"valid": True, "error": []}
def dict_checker(param: str, model_output: dict, possible_answers: list):
# This function works for simple dictionaries, but not dictionaries with nested dictionaries.
# The current dataset only contains simple dictionaries, so this is sufficient.
result = {"valid": False, "error": [], "error_type": "dict_checker:unclear"}
for i in range(len(possible_answers)):
if possible_answers[i] == "":
continue
result = {"valid": False, "error": [], "error_type": "dict_checker:unclear"}
flag = True
possible_answer = possible_answers[i]
# possible_anwer is a single dictionary
for key, value in model_output.items():
if key not in possible_answer:
result["valid"] = False
result["error"].append(f"Unexpected dict key parameter: '{key}'.") # type: ignore[attr-defined]
result["error_type"] = "value_error:dict_key"
flag = False
break
standardize_value = value
# If the value is a string, we need to standardize it
if type(value) == str:
standardize_value = standardize_string(value)
# We also need to standardize the possible answers if they are string
standardize_possible_answer = []
for i in range(len(possible_answer[key])):
if type(possible_answer[key][i]) == str:
standardize_possible_answer.append(standardize_string(possible_answer[key][i]))
else:
standardize_possible_answer.append(possible_answer[key][i])
if standardize_value not in standardize_possible_answer:
result["valid"] = False
result["error"].append( # type: ignore[attr-defined]
f"Invalid value for parameter {repr(key)}: {repr(value)}. Expected one of {standardize_possible_answer}."
)
result["error_type"] = "value_error:dict_value"
flag = False
break
for key, value in possible_answer.items():
if key not in model_output and "" not in value:
result["valid"] = False
result["error"].append(f"Missing dict key parameter: '{key}'.") # type: ignore[attr-defined]
result["error_type"] = "value_error:dict_key"
flag = False
break
if flag:
return {"valid": True, "error": []}
return result
def list_dict_checker(param: str, model_output: list, possible_answers: list):
# This function takes in a list of dictionaries and checks if each dictionary is valid
# The order of the dictionaries in the list must match the order of the possible answers
result = {"valid": False, "error": [], "error_type": "list_dict_checker:unclear"}
for answer_index in range(len(possible_answers)):
flag = True # True means so far, all dictionaries are valid
# Only proceed if the number of dictionaries in the list matches the number of dictionaries in the possible answers
if len(model_output) != len(possible_answers[answer_index]):
result["valid"] = False
result["error"] = ["Wrong number of dictionaries in the list."]
result["error_type"] = "value_error:list_dict_count"
flag = False
continue
for dict_index in range(len(model_output)):
result = dict_checker(
param,
model_output[dict_index],
[possible_answers[answer_index][dict_index]],
)
if not result["valid"]:
flag = False
break
if flag:
return {"valid": True, "error": []}
return result
def simple_function_checker(
func_description: dict,
model_output: dict,
possible_answer: dict,
language: str,
model_name: str,
):
possible_answer = list(possible_answer.values())[0]
# Extract function name and parameters details
func_name = func_description["name"]
param_details = func_description["parameters"]["properties"]
required_params = func_description["parameters"]["required"]
# Initialize a result dictionary
result = {
"valid": True,
"error": [],
"error_type": "simple_function_checker:unclear",
}
# Check if function name matches
if func_name not in model_output:
result["valid"] = False
result["error"].append( # type: ignore[attr-defined]
f"Function name {repr(func_name)} not found in model output."
)
result["error_type"] = "simple_function_checker:wrong_func_name"
return result
model_params = model_output[func_name]
# Check for required parameters in model output
for param in required_params:
if param not in model_params:
result["valid"] = False
result["error"].append(f"Missing required parameter: {repr(param)}.") # type: ignore[attr-defined]
result["error_type"] = "simple_function_checker:missing_required"
return result
# Validate types and values for each parameter in model output
for param, value in model_params.items():
if param not in param_details or param not in possible_answer:
result["valid"] = False
result["error"].append(f"Unexpected parameter: {repr(param)}.") # type: ignore[attr-defined]
result["error_type"] = "simple_function_checker:unexpected_param"
return result
full_param_details = param_details[param]
expected_type_description = full_param_details["type"] # This is a string
is_variable = False
nested_type_converted = None
if language == "Java":
from evals.utils.bfcl.java_type_converter import java_type_converter
expected_type_converted = JAVA_TYPE_CONVERSION[expected_type_description]
if expected_type_description in JAVA_TYPE_CONVERSION:
if type(value) != str:
result["valid"] = False
result["error"].append( # type: ignore[attr-defined]
f"Incorrect type for parameter {repr(param)}. Expected type String, got {type(value).__name__}. Parameter value: {repr(value)}."
)
result["error_type"] = "type_error:java"
return result
if expected_type_description in NESTED_CONVERSION_TYPE_LIST:
nested_type = param_details[param]["items"]["type"]
nested_type_converted = JAVA_TYPE_CONVERSION[nested_type]
value = java_type_converter(value, expected_type_description, nested_type)
else:
value = java_type_converter(value, expected_type_description)
elif language == "JavaScript":
from evals.utils.bfcl.js_type_converter import js_type_converter
expected_type_converted = JS_TYPE_CONVERSION[expected_type_description]
if expected_type_description in JS_TYPE_CONVERSION:
if type(value) != str:
result["valid"] = False
result["error"].append( # type: ignore[attr-defined]
f"Incorrect type for parameter {repr(param)}. Expected type String, got {type(value).__name__}. Parameter value: {repr(value)}."
)
result["error_type"] = "type_error:js"
return result
if expected_type_description in NESTED_CONVERSION_TYPE_LIST:
nested_type = param_details[param]["items"]["type"]
nested_type_converted = JS_TYPE_CONVERSION[nested_type]
value = js_type_converter(value, expected_type_description, nested_type)
else:
value = js_type_converter(value, expected_type_description)
elif language == "Python":
expected_type_converted = PYTHON_TYPE_MAPPING[expected_type_description]
if expected_type_description in PYTHON_NESTED_TYPE_CHECK_LIST:
nested_type = param_details[param]["items"]["type"]
nested_type_converted = PYTHON_TYPE_MAPPING[nested_type]
# We convert all tuple value to list when the expected type is tuple.
# The conversion is necessary because any tuple in the possible answer would become a list after being processed through json.dump() and json.load().
# This does introduce some false positive (eg, when the model provides a list value instead of tuple). We hope to find a better solution in the future.
if expected_type_description == "tuple" and type(value) == tuple:
value = list(value)
# Allow python auto conversion from int to float
if language == "Python" and expected_type_description == "float" and type(value) == int:
value = float(value)
# Type checking
# In fact, we only check for Python here.
# Type check for other languages are handled by the type converter, and so their value (after conversion) is always correct.
type_check_result = type_checker(
param,
value,
possible_answer[param],
expected_type_description,
expected_type_converted,
nested_type_converted,
)
is_variable = type_check_result["is_variable"]
if not type_check_result["valid"]:
return type_check_result
# It doesn't make sense to special handle dictionaries and list of dictionaries if the value is a variable.
# We can just treat the variable as a string and use the normal flow.
if not is_variable:
# Special handle for dictionaries
if expected_type_converted == dict:
result = dict_checker(param, value, possible_answer[param])
if not result["valid"]:
return result
continue
# Special handle for list of dictionaries
elif expected_type_converted == list and nested_type_converted == dict:
result = list_dict_checker(param, value, possible_answer[param])
if not result["valid"]:
return result
continue
# Special handle for strings
elif expected_type_converted == str:
# We don't check for case sensitivity for string, as long as it's not a variable
result = string_checker(param, value, possible_answer[param])
if not result["valid"]:
return result
continue
elif expected_type_converted == list:
result = list_checker(param, value, possible_answer[param])
if not result["valid"]:
return result
continue
# Check if the value is within the possible answers
if value not in possible_answer[param]:
result["valid"] = False
result["error"].append( # type: ignore[attr-defined]
f"Invalid value for parameter {repr(param)}: {repr(value)}. Expected one of {possible_answer[param]}."
)
result["error_type"] = "value_error:others"
return result
# Check for optional parameters not provided but allowed
for param in possible_answer:
if param not in model_params and "" not in possible_answer[param]:
result["valid"] = False
result["error"].append( # type: ignore[attr-defined]
f"Optional parameter {repr(param)} not provided and not marked as optional."
)
result["error_type"] = "simple_function_checker:missing_optional"
return result
return result
def parallel_function_checker_enforce_order(
func_descriptions: list,
model_output: list,
possible_answers: dict,
language: str,
model_name: str,
):
if len(model_output) != len(possible_answers):
return {
"valid": False,
"error": ["Wrong number of functions."],
"error_type": "parallel_function_checker_enforce_order:wrong_count",
}
func_name_list = list(possible_answers.keys())
possible_answers_list = []
for key, value in possible_answers.items():
possible_answers_list.append({key: value})
for i in range(len(possible_answers_list)):
func_description = find_description(func_descriptions, func_name_list[i])
result = simple_function_checker(
func_description,
model_output[i],
possible_answers_list[i],
language,
model_name,
)
if not result["valid"]:
return result
return {"valid": True, "error": []}
def parallel_function_checker_no_order(
func_descriptions: list,
model_output: list,
possible_answers: list,
language: str,
model_name: str,
):
if len(model_output) != len(possible_answers):
return {
"valid": False,
"error": ["Wrong number of functions."],
"error_type": "parallel_function_checker_no_order:wrong_count",
}
matched_indices = []
# We go throught the possible answers one by one, and eliminate the model output that matches the possible answer
# It must be this way because we need ground truth to fetch the correct function description
for i in range(len(possible_answers)):
# possible_answers[i] is a dictionary with only one key
func_name_expected = list(possible_answers[i].keys())[0]
func_description = find_description(func_descriptions, func_name_expected)
all_errors = []
for index in range(len(model_output)):
if index in matched_indices:
continue
result = simple_function_checker(
func_description,
model_output[index],
possible_answers[i],
language,
model_name,
)
if result["valid"]:
matched_indices.append(index)
break
else:
all_errors.append(
{
f"Model Result Index {index}": {
"sub_error": result["error"],
"sub_error_type": result["error_type"],
"model_output_item": model_output[index],
"possible_answer_item": possible_answers[i],
}
}
)
if not result["valid"]:
considered_indices = [i for i in range(len(model_output)) if i not in matched_indices]
all_errors.insert(
0,
f"Could not find a matching function among index {considered_indices} of model output for index {i} of possible answers.", # type: ignore[arg-type]
)
return {
"valid": False,
"error": all_errors,
"error_type": "parallel_function_checker_no_order:cannot_find_match",
}
return {"valid": True, "error": []}
def multiple_function_checker(
func_descriptions: list,
model_output: list,
possible_answers: list,
language: str,
model_name: str,
):
if len(model_output) != len(possible_answers):
return {
"valid": False,
"error": ["Wrong number of functions."],
"error_type": "multiple_function_checker:wrong_count",
}
# possible_answers is a list of only one dictionary with only one key
func_name_expected = list(possible_answers[0].keys())[0]
func_description = find_description(func_descriptions, func_name_expected)
return simple_function_checker(
func_description,
model_output[0],
possible_answers[0],
language,
model_name,
)
def patten_matcher(exec_output, expected_result, function_call, is_sanity_check):
result = {"valid": True, "error": [], "error_type": "executable_checker:unclear"}
if type(exec_output) != type(expected_result):
return {
"valid": False,
"error": [
f"Wrong execution result type for {repr(function_call)}. Expected type: {type(expected_result)}, but got: {type(exec_output)}."
],
"error_type": "executable_checker:wrong_result_type",
"model_executed_output": exec_output,
}
if type(exec_output) == dict:
# We loose the requirement for the sanity check as the expected result used in the sanity check might not be the most up-to-date one.
# This happens when the key is a timestamp or a random number.
if is_sanity_check:
if len(exec_output) != len(expected_result):
return {
"valid": False,
"error": [
f"Wrong execution result pattern for {repr(function_call)}. Expect type Dict, but wrong number of elements in the output. Expected length: {len(expected_result)}, but got: {len(exec_output)}."
],
"error_type": "executable_checker:wrong_result_type:dict_length",
"model_executed_output": exec_output,
}
else:
return result
for key, value in expected_result.items():
if key not in exec_output:
return {
"valid": False,
"error": [
f"Wrong execution result pattern for {repr(function_call)}. Expect type Dict, but key {repr(key)} not found in the model output."
],
"error_type": "executable_checker:wrong_result_type:dict_key_not_found",
"model_executed_output": exec_output,
}
for key, value in exec_output.items():
if key not in expected_result:
return {
"valid": False,
"error": [
f"Wrong execution result pattern for {repr(function_call)}. Expect type Dict, but key {repr(key)} not expected in the model output."
],
"error_type": "executable_checker:wrong_result_type:dict_extra_key",
"model_executed_output": exec_output,
}
if type(exec_output) == list:
if len(exec_output) != len(expected_result):
return {
"valid": False,
"error": [
f"Wrong execution result pattern for {repr(function_call)}. Expect type list, but wrong number of elements in the output. Expected length: {len(expected_result)}, but got: {len(exec_output)}."
],
"error_type": "executable_checker:wrong_result_type:list_length",
"model_executed_output": exec_output,
}
return result
#### Helper functions for Exec ####
def executable_checker_simple(
function_call: str,
expected_result,
expected_result_type: str,
is_sanity_check=False,
):
result = {"valid": True, "error": [], "error_type": "executable_checker:unclear"}
exec_dict: Any = {}
try:
exec(
"from executable_python_function import *" + "\nresult=" + function_call,
exec_dict,
)
exec_output = exec_dict["result"]
except NoAPIKeyError as e:
raise e
except Exception as e:
result["valid"] = False
result["error"].append( # type: ignore[attr-defined]
f"Error in execution: {repr(function_call)}. Error: {str(e)}"
)
result["error_type"] = "executable_checker:execution_error"
return result
# We need to special handle the case where the execution result is a tuple and convert it to a list
# Because when json is stored, the tuple is converted to a list, and so the expected result is a list when loaded from json
if isinstance(exec_output, tuple):
exec_output = list(exec_output)
if expected_result_type == "exact_match":
if exec_output != expected_result:
result["valid"] = False
result["error"].append( # type: ignore[attr-defined]
f"Wrong execution result for {repr(function_call)}. Expected: {expected_result}, but got: {exec_output}."
)
result["error_type"] = "executable_checker:wrong_result"
result["model_executed_output"] = exec_output
return result
elif expected_result_type == "real_time_match":
# Allow for 5% difference
if (type(expected_result) == float or type(expected_result) == int) and (
type(exec_output) == float or type(exec_output) == int
):
if not (
expected_result * (1 - REAL_TIME_MATCH_ALLOWED_DIFFERENCE)
<= exec_output
<= expected_result * (1 + REAL_TIME_MATCH_ALLOWED_DIFFERENCE)
):
result["valid"] = False
result["error"].append( # type: ignore[attr-defined]
f"Wrong execution result for {repr(function_call)}. Expected: {expected_result}, but got: {exec_output}. {REAL_TIME_MATCH_ALLOWED_DIFFERENCE * 100}% difference allowed."
)
result["error_type"] = "executable_checker:wrong_result_real_time"
result["model_executed_output"] = exec_output
return result
else:
result["valid"] = False
result["error"].append( # type: ignore[attr-defined]
f"Wrong execution result for {repr(function_call)}. Expected: {expected_result}, but got: {exec_output}. Type needs to be float or int for real time match criteria."
)
result["error_type"] = "executable_checker:wrong_result_real_time"
result["model_executed_output"] = exec_output
return result
else:
# structural match
pattern_match_result = patten_matcher(exec_output, expected_result, function_call, is_sanity_check)
if not pattern_match_result["valid"]:
return pattern_match_result
return result
def executable_checker_parallel_no_order(
decoded_result: list, expected_exec_result: list, expected_exec_result_type: list
):
if len(decoded_result) != len(expected_exec_result):
return {
"valid": False,
"error": [
f"Wrong number of functions provided. Expected {len(expected_exec_result)}, but got {len(decoded_result)}."
],
"error_type": "value_error:exec_result_count",
}
matched_indices = []
for i in range(len(expected_exec_result)):
all_errors = []
for index in range(len(decoded_result)):
if index in matched_indices:
continue
result = executable_checker_simple(
decoded_result[index],
expected_exec_result[i],
expected_exec_result_type[i],
False,
)
if result["valid"]:
matched_indices.append(index)
break
else:
all_errors.append(
{
f"Model Result Index {index}": {
"sub_error": result["error"],
"sub_error_type": result["error_type"],
"model_executed_output": (
result["model_executed_output"] if "model_executed_output" in result else None
),
}
}
)
if not result["valid"]:
considered_indices = [i for i in range(len(decoded_result)) if i not in matched_indices]
all_errors.insert(
0,
f"Could not find a matching function among index {considered_indices} of model output for index {i} of possible answers.", # type: ignore[arg-type]
)
return {
"valid": False,
"error": all_errors,
"error_type": "executable_checker:cannot_find_match",
}
return {"valid": True, "error": [], "error_type": "executable_checker:unclear"}
#### Main function ####
def executable_checker_rest(func_call, idx):
# Move this here for now to avoid needing to read this file / fix paths to be relative to dataset_dir. Fix when it's actually needed / used.
EVAL_GROUND_TRUTH_PATH = "/mnt/wsfuse/fair_llm_v2/datasets/eval/bfcl/rest-eval-response_v5.jsonl" # Ground truth file for v5 for rest execution
with open(EVAL_GROUND_TRUTH_PATH, "r") as f:
EVAL_GROUND_TRUTH = f.readlines()
if "https://geocode.maps.co" in func_call:
time.sleep(2)
if "requests_get" in func_call:
func_call = func_call.replace("requests_get", "requests.get")
try:
response = eval(func_call)
except Exception as e:
return {
"valid": False,
"error": [f"Execution failed. {str(e)}"],
"error_type": "executable_checker_rest:execution_error",
}
try:
if response.status_code == 200:
eval_GT_json = json.loads(EVAL_GROUND_TRUTH[idx])
try:
if isinstance(eval_GT_json, dict):
if isinstance(response.json(), dict):
if set(eval_GT_json.keys()) == set(response.json().keys()):
return {"valid": True, "error": [], "error_type": ""}
return {
"valid": False,
"error": ["Key inconsistency"],
"error_type": "executable_checker_rest:wrong_key",
}
return {
"valid": False,
"error": [f"Expected dictionary, but got {type(response.json())}"],
"error_type": "executable_checker_rest:wrong_type",
}
elif isinstance(eval_GT_json, list):
if isinstance(response.json(), list):
if len(eval_GT_json) != len(response.json()):
return {
"valid": False,
"error": [f"Response list length inconsistency."],
"error_type": "value_error:exec_result_rest_count",
}
else:
for i in range(len(eval_GT_json)):
if set(eval_GT_json[i].keys()) != set(response.json()[i].keys()):
return {
"valid": False,
"error": [f"Key inconsistency"],
"error_type": "executable_checker_rest:wrong_key",
}
return {"valid": True, "error": []}
else:
return {
"valid": False,
"error": [f"Expected list, but got {type(response.json())}"],
"error_type": "executable_checker_rest:wrong_type",
}
return {
"valid": False,
"error": [f"Expected dict or list, but got {type(response.json())}"],
"error_type": "executable_checker_rest:wrong_type",
}
except Exception as e:
return {
"valid": False,
"error": [
f"Error in execution and type checking. Status code: {response.status_code}. Error: {str(e)}"
],
"error_type": "executable_checker_rest:response_format_error",
}
else:
return {
"valid": False,
"error": [f"Execution result status code is not 200, got {response.status_code}"],
"error_type": "executable_checker_rest:wrong_status_code",
}
except Exception as e:
return {
"valid": False,
"error": [f"Cannot get status code of the response. Error: {str(e)}"],
"error_type": "executable_checker_rest:cannot_get_status_code",
}
def ast_checker(func_description, model_output, possible_answer, language, test_category, model_name):
if "parallel" in test_category:
return parallel_function_checker_no_order(func_description, model_output, possible_answer, language, model_name)
elif "multiple" in test_category:
return multiple_function_checker(func_description, model_output, possible_answer, language, model_name)
else:
if len(model_output) != 1:
return {
"valid": False,
"error": ["Wrong number of functions."],
"error_type": "simple_function_checker:wrong_count",
}
return simple_function_checker(
func_description[0],
model_output[0],
possible_answer[0],
language,
model_name,
)
def exec_checker(decoded_result: list, func_description: dict, test_category: str):
if "multiple" in test_category or "parallel" in test_category:
return executable_checker_parallel_no_order(
decoded_result,
func_description["execution_result"],
func_description["execution_result_type"],
)
else:
if len(decoded_result) != 1:
return {
"valid": False,
"error": ["Wrong number of functions."],
"error_type": "simple_exec_checker:wrong_count",
}
return executable_checker_simple(
decoded_result[0],
func_description["execution_result"][0],
func_description["execution_result_type"][0],
False,
)
def is_empty_output(decoded_output):
# This function is a patch to the ast decoder for relevance detection
# Sometimes the ast decoder will parse successfully, but the input doens't really have a function call
# [], [{}], and anything that is not in function calling format is considered empty (and thus should be marked as correct)
if not is_function_calling_format_output(decoded_output):
return True
if len(decoded_output) == 0:
return True
if len(decoded_output) == 1 and len(decoded_output[0]) == 0:
return True
def is_function_calling_format_output(decoded_output):
# Ensure the output is a list of dictionaries
if type(decoded_output) == list:
for item in decoded_output:
if type(item) != dict:
return False
return True
return False

View file

@ -1,40 +0,0 @@
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree.
"""
Tree-sitter changes its API with unfortunate frequency. Modules that need it should
import it from here so that we can centrally manage things as necessary.
"""
# These currently work with tree-sitter 0.23.0
# NOTE: Don't import tree-sitter or any of the language modules in the main module
# because not all environments have them. Import lazily inside functions where needed.
import importlib
import typing
if typing.TYPE_CHECKING:
import tree_sitter
def get_language(language: str) -> "tree_sitter.Language":
import tree_sitter
language_module_name = f"tree_sitter_{language}"
try:
language_module = importlib.import_module(language_module_name)
except ModuleNotFoundError as exc:
raise ValueError(
f"Language {language} is not found. Please install the tree-sitter-{language} package."
) from exc
return tree_sitter.Language(language_module.language())
def get_parser(language: str, **kwargs) -> "tree_sitter.Parser":
import tree_sitter
lang = get_language(language)
return tree_sitter.Parser(lang, **kwargs)

View file

@ -116,7 +116,7 @@ def available_providers() -> list[ProviderSpec]:
adapter=AdapterSpec( adapter=AdapterSpec(
adapter_type="fireworks", adapter_type="fireworks",
pip_packages=[ pip_packages=[
"fireworks-ai", "fireworks-ai<=0.18.0",
], ],
module="llama_stack.providers.remote.inference.fireworks", module="llama_stack.providers.remote.inference.fireworks",
config_class="llama_stack.providers.remote.inference.fireworks.FireworksImplConfig", config_class="llama_stack.providers.remote.inference.fireworks.FireworksImplConfig",

View file

@ -404,6 +404,60 @@ That means you'll get fast and efficient vector retrieval.
- Easy to use - Easy to use
- Fully integrated with Llama Stack - Fully integrated with Llama Stack
There are three implementations of search for PGVectoIndex available:
1. Vector Search:
- How it works:
- Uses PostgreSQL's vector extension (pgvector) to perform similarity search
- Compares query embeddings against stored embeddings using Cosine distance or other distance metrics
- Eg. SQL query: SELECT document, embedding <=> %s::vector AS distance FROM table ORDER BY distance
-Characteristics:
- Semantic understanding - finds documents similar in meaning even if they don't share keywords
- Works with high-dimensional vector embeddings (typically 768, 1024, or higher dimensions)
- Best for: Finding conceptually related content, handling synonyms, cross-language search
2. Keyword Search
- How it works:
- Uses PostgreSQL's full-text search capabilities with tsvector and ts_rank
- Converts text to searchable tokens using to_tsvector('english', text). Default language is English.
- Eg. SQL query: SELECT document, ts_rank(tokenized_content, plainto_tsquery('english', %s)) AS score
- Characteristics:
- Lexical matching - finds exact keyword matches and variations
- Uses GIN (Generalized Inverted Index) for fast text search performance
- Scoring: Uses PostgreSQL's ts_rank function for relevance scoring
- Best for: Exact term matching, proper names, technical terms, Boolean-style queries
3. Hybrid Search
- How it works:
- Combines both vector and keyword search results
- Runs both searches independently, then merges results using configurable reranking
- Two reranking strategies available:
- Reciprocal Rank Fusion (RRF) - (default: 60.0)
- Weighted Average - (default: 0.5)
- Characteristics:
- Best of both worlds: semantic understanding + exact matching
- Documents appearing in both searches get boosted scores
- Configurable balance between semantic and lexical matching
- Best for: General-purpose search where you want both precision and recall
4. Database Schema
The PGVector implementation stores data optimized for all three search types:
CREATE TABLE vector_store_xxx (
id TEXT PRIMARY KEY,
document JSONB, -- Original document
embedding vector(dimension), -- For vector search
content_text TEXT, -- Raw text content
tokenized_content TSVECTOR -- For keyword search
);
-- Indexes for performance
CREATE INDEX content_gin_idx ON table USING GIN(tokenized_content); -- Keyword search
-- Vector index created automatically by pgvector
## Usage ## Usage
To use PGVector in your Llama Stack project, follow these steps: To use PGVector in your Llama Stack project, follow these steps:
@ -412,6 +466,25 @@ To use PGVector in your Llama Stack project, follow these steps:
2. Configure your Llama Stack project to use pgvector. (e.g. remote::pgvector). 2. Configure your Llama Stack project to use pgvector. (e.g. remote::pgvector).
3. Start storing and querying vectors. 3. Start storing and querying vectors.
## This is an example how you can set up your environment for using PGVector
1. Export env vars:
```bash
export ENABLE_PGVECTOR=true
export PGVECTOR_HOST=localhost
export PGVECTOR_PORT=5432
export PGVECTOR_DB=llamastack
export PGVECTOR_USER=llamastack
export PGVECTOR_PASSWORD=llamastack
```
2. Create DB:
```bash
psql -h localhost -U postgres -c "CREATE ROLE llamastack LOGIN PASSWORD 'llamastack';"
psql -h localhost -U postgres -c "CREATE DATABASE llamastack OWNER llamastack;"
psql -h localhost -U llamastack -d llamastack -c "CREATE EXTENSION IF NOT EXISTS vector;"
```
## Installation ## Installation
You can install PGVector using docker: You can install PGVector using docker:
@ -449,6 +522,7 @@ Weaviate supports:
- Metadata filtering - Metadata filtering
- Multi-modal retrieval - Multi-modal retrieval
## Usage ## Usage
To use Weaviate in your Llama Stack project, follow these steps: To use Weaviate in your Llama Stack project, follow these steps:

View file

@ -6,15 +6,14 @@
from typing import Any from typing import Any
from llama_stack.core.datatypes import Api from llama_stack.core.datatypes import AccessRule, Api
from .config import S3FilesImplConfig from .config import S3FilesImplConfig
async def get_adapter_impl(config: S3FilesImplConfig, deps: dict[Api, Any]): async def get_adapter_impl(config: S3FilesImplConfig, deps: dict[Api, Any], policy: list[AccessRule] | None = None):
from .files import S3FilesImpl from .files import S3FilesImpl
# TODO: authorization policies and user separation impl = S3FilesImpl(config, policy or [])
impl = S3FilesImpl(config)
await impl.initialize() await impl.initialize()
return impl return impl

View file

@ -4,9 +4,9 @@
# This source code is licensed under the terms described in the LICENSE file in # This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree. # the root directory of this source tree.
import time
import uuid import uuid
from typing import Annotated from datetime import UTC, datetime
from typing import Annotated, Any
import boto3 import boto3
from botocore.exceptions import BotoCoreError, ClientError, NoCredentialsError from botocore.exceptions import BotoCoreError, ClientError, NoCredentialsError
@ -15,14 +15,17 @@ from fastapi import File, Form, Response, UploadFile
from llama_stack.apis.common.errors import ResourceNotFoundError from llama_stack.apis.common.errors import ResourceNotFoundError
from llama_stack.apis.common.responses import Order from llama_stack.apis.common.responses import Order
from llama_stack.apis.files import ( from llama_stack.apis.files import (
ExpiresAfter,
Files, Files,
ListOpenAIFileResponse, ListOpenAIFileResponse,
OpenAIFileDeleteResponse, OpenAIFileDeleteResponse,
OpenAIFileObject, OpenAIFileObject,
OpenAIFilePurpose, OpenAIFilePurpose,
) )
from llama_stack.core.datatypes import AccessRule
from llama_stack.providers.utils.sqlstore.api import ColumnDefinition, ColumnType from llama_stack.providers.utils.sqlstore.api import ColumnDefinition, ColumnType
from llama_stack.providers.utils.sqlstore.sqlstore import SqlStore, sqlstore_impl from llama_stack.providers.utils.sqlstore.authorized_sqlstore import AuthorizedSqlStore
from llama_stack.providers.utils.sqlstore.sqlstore import sqlstore_impl
from .config import S3FilesImplConfig from .config import S3FilesImplConfig
@ -83,22 +86,85 @@ async def _create_bucket_if_not_exists(client: boto3.client, config: S3FilesImpl
raise RuntimeError(f"Failed to access S3 bucket '{config.bucket_name}': {e}") from e raise RuntimeError(f"Failed to access S3 bucket '{config.bucket_name}': {e}") from e
def _make_file_object(
*,
id: str,
filename: str,
purpose: str,
bytes: int,
created_at: int,
expires_at: int,
**kwargs: Any, # here to ignore any additional fields, e.g. extra fields from AuthorizedSqlStore
) -> OpenAIFileObject:
"""
Construct an OpenAIFileObject and normalize expires_at.
If expires_at is greater than the max we treat it as no-expiration and
return None for expires_at.
The OpenAI spec says expires_at type is Integer, but the implementation
will return None for no expiration.
"""
obj = OpenAIFileObject(
id=id,
filename=filename,
purpose=OpenAIFilePurpose(purpose),
bytes=bytes,
created_at=created_at,
expires_at=expires_at,
)
if obj.expires_at is not None and obj.expires_at > (obj.created_at + ExpiresAfter.MAX):
obj.expires_at = None # type: ignore
return obj
class S3FilesImpl(Files): class S3FilesImpl(Files):
"""S3-based implementation of the Files API.""" """S3-based implementation of the Files API."""
# TODO: implement expiration, for now a silly offset def __init__(self, config: S3FilesImplConfig, policy: list[AccessRule]) -> None:
_SILLY_EXPIRATION_OFFSET = 100 * 365 * 24 * 60 * 60
def __init__(self, config: S3FilesImplConfig) -> None:
self._config = config self._config = config
self.policy = policy
self._client: boto3.client | None = None self._client: boto3.client | None = None
self._sql_store: SqlStore | None = None self._sql_store: AuthorizedSqlStore | None = None
def _now(self) -> int:
"""Return current UTC timestamp as int seconds."""
return int(datetime.now(UTC).timestamp())
async def _get_file(self, file_id: str, return_expired: bool = False) -> dict[str, Any]:
where: dict[str, str | dict] = {"id": file_id}
if not return_expired:
where["expires_at"] = {">": self._now()}
if not (row := await self.sql_store.fetch_one("openai_files", policy=self.policy, where=where)):
raise ResourceNotFoundError(file_id, "File", "files.list()")
return row
async def _delete_file(self, file_id: str) -> None:
"""Delete a file from S3 and the database."""
try:
self.client.delete_object(
Bucket=self._config.bucket_name,
Key=file_id,
)
except ClientError as e:
if e.response["Error"]["Code"] != "NoSuchKey":
raise RuntimeError(f"Failed to delete file from S3: {e}") from e
await self.sql_store.delete("openai_files", where={"id": file_id})
async def _delete_if_expired(self, file_id: str) -> None:
"""If the file exists and is expired, delete it."""
if row := await self._get_file(file_id, return_expired=True):
if (expires_at := row.get("expires_at")) and expires_at <= self._now():
await self._delete_file(file_id)
async def initialize(self) -> None: async def initialize(self) -> None:
self._client = _create_s3_client(self._config) self._client = _create_s3_client(self._config)
await _create_bucket_if_not_exists(self._client, self._config) await _create_bucket_if_not_exists(self._client, self._config)
self._sql_store = sqlstore_impl(self._config.metadata_store) self._sql_store = AuthorizedSqlStore(sqlstore_impl(self._config.metadata_store))
await self._sql_store.create_table( await self._sql_store.create_table(
"openai_files", "openai_files",
{ {
@ -121,7 +187,7 @@ class S3FilesImpl(Files):
return self._client return self._client
@property @property
def sql_store(self) -> SqlStore: def sql_store(self) -> AuthorizedSqlStore:
assert self._sql_store is not None, "Provider not initialized" assert self._sql_store is not None, "Provider not initialized"
return self._sql_store return self._sql_store
@ -129,27 +195,47 @@ class S3FilesImpl(Files):
self, self,
file: Annotated[UploadFile, File()], file: Annotated[UploadFile, File()],
purpose: Annotated[OpenAIFilePurpose, Form()], purpose: Annotated[OpenAIFilePurpose, Form()],
expires_after_anchor: Annotated[str | None, Form(alias="expires_after[anchor]")] = None,
expires_after_seconds: Annotated[int | None, Form(alias="expires_after[seconds]")] = None,
) -> OpenAIFileObject: ) -> OpenAIFileObject:
file_id = f"file-{uuid.uuid4().hex}" file_id = f"file-{uuid.uuid4().hex}"
filename = getattr(file, "filename", None) or "uploaded_file" filename = getattr(file, "filename", None) or "uploaded_file"
created_at = int(time.time()) created_at = self._now()
expires_at = created_at + self._SILLY_EXPIRATION_OFFSET
expires_after = None
if expires_after_anchor is not None or expires_after_seconds is not None:
# we use ExpiresAfter to validate input
expires_after = ExpiresAfter(
anchor=expires_after_anchor, # type: ignore[arg-type]
seconds=expires_after_seconds, # type: ignore[arg-type]
)
# the default is no expiration.
# to implement no expiration we set an expiration beyond the max.
# we'll hide this fact from users when returning the file object.
expires_at = created_at + ExpiresAfter.MAX * 42
# the default for BATCH files is 30 days, which happens to be the expiration max.
if purpose == OpenAIFilePurpose.BATCH:
expires_at = created_at + ExpiresAfter.MAX
if expires_after is not None:
expires_at = created_at + expires_after.seconds
content = await file.read() content = await file.read()
file_size = len(content) file_size = len(content)
await self.sql_store.insert( entry: dict[str, Any] = {
"openai_files", "id": file_id,
{ "filename": filename,
"id": file_id, "purpose": purpose.value,
"filename": filename, "bytes": file_size,
"purpose": purpose.value, "created_at": created_at,
"bytes": file_size, "expires_at": expires_at,
"created_at": created_at, }
"expires_at": expires_at,
}, await self.sql_store.insert("openai_files", entry)
)
try: try:
self.client.put_object( self.client.put_object(
@ -163,14 +249,7 @@ class S3FilesImpl(Files):
raise RuntimeError(f"Failed to upload file to S3: {e}") from e raise RuntimeError(f"Failed to upload file to S3: {e}") from e
return OpenAIFileObject( return _make_file_object(**entry)
id=file_id,
filename=filename,
purpose=purpose,
bytes=file_size,
created_at=created_at,
expires_at=expires_at,
)
async def openai_list_files( async def openai_list_files(
self, self,
@ -183,29 +262,20 @@ class S3FilesImpl(Files):
if not order: if not order:
order = Order.desc order = Order.desc
where_conditions = {} where_conditions: dict[str, Any] = {"expires_at": {">": self._now()}}
if purpose: if purpose:
where_conditions["purpose"] = purpose.value where_conditions["purpose"] = purpose.value
paginated_result = await self.sql_store.fetch_all( paginated_result = await self.sql_store.fetch_all(
table="openai_files", table="openai_files",
where=where_conditions if where_conditions else None, policy=self.policy,
where=where_conditions,
order_by=[("created_at", order.value)], order_by=[("created_at", order.value)],
cursor=("id", after) if after else None, cursor=("id", after) if after else None,
limit=limit, limit=limit,
) )
files = [ files = [_make_file_object(**row) for row in paginated_result.data]
OpenAIFileObject(
id=row["id"],
filename=row["filename"],
purpose=OpenAIFilePurpose(row["purpose"]),
bytes=row["bytes"],
created_at=row["created_at"],
expires_at=row["expires_at"],
)
for row in paginated_result.data
]
return ListOpenAIFileResponse( return ListOpenAIFileResponse(
data=files, data=files,
@ -216,41 +286,20 @@ class S3FilesImpl(Files):
) )
async def openai_retrieve_file(self, file_id: str) -> OpenAIFileObject: async def openai_retrieve_file(self, file_id: str) -> OpenAIFileObject:
row = await self.sql_store.fetch_one("openai_files", where={"id": file_id}) await self._delete_if_expired(file_id)
if not row: row = await self._get_file(file_id)
raise ResourceNotFoundError(file_id, "File", "files.list()") return _make_file_object(**row)
return OpenAIFileObject(
id=row["id"],
filename=row["filename"],
purpose=OpenAIFilePurpose(row["purpose"]),
bytes=row["bytes"],
created_at=row["created_at"],
expires_at=row["expires_at"],
)
async def openai_delete_file(self, file_id: str) -> OpenAIFileDeleteResponse: async def openai_delete_file(self, file_id: str) -> OpenAIFileDeleteResponse:
row = await self.sql_store.fetch_one("openai_files", where={"id": file_id}) await self._delete_if_expired(file_id)
if not row: _ = await self._get_file(file_id) # raises if not found
raise ResourceNotFoundError(file_id, "File", "files.list()") await self._delete_file(file_id)
try:
self.client.delete_object(
Bucket=self._config.bucket_name,
Key=row["id"],
)
except ClientError as e:
if e.response["Error"]["Code"] != "NoSuchKey":
raise RuntimeError(f"Failed to delete file from S3: {e}") from e
await self.sql_store.delete("openai_files", where={"id": file_id})
return OpenAIFileDeleteResponse(id=file_id, deleted=True) return OpenAIFileDeleteResponse(id=file_id, deleted=True)
async def openai_retrieve_file_content(self, file_id: str) -> Response: async def openai_retrieve_file_content(self, file_id: str) -> Response:
row = await self.sql_store.fetch_one("openai_files", where={"id": file_id}) await self._delete_if_expired(file_id)
if not row:
raise ResourceNotFoundError(file_id, "File", "files.list()") row = await self._get_file(file_id)
try: try:
response = self.client.get_object( response = self.client.get_object(
@ -261,7 +310,7 @@ class S3FilesImpl(Files):
content = response["Body"].read() content = response["Body"].read()
except ClientError as e: except ClientError as e:
if e.response["Error"]["Code"] == "NoSuchKey": if e.response["Error"]["Code"] == "NoSuchKey":
await self.sql_store.delete("openai_files", where={"id": file_id}) await self._delete_file(file_id)
raise ResourceNotFoundError(file_id, "File", "files.list()") from e raise ResourceNotFoundError(file_id, "File", "files.list()") from e
raise RuntimeError(f"Failed to download file from S3: {e}") from e raise RuntimeError(f"Failed to download file from S3: {e}") from e

View file

@ -41,10 +41,10 @@ client.initialize()
### Create Completion ### Create Completion
> Note on Completion API The following example shows how to create a completion for an NVIDIA NIM.
>
> The hosted NVIDIA Llama NIMs (e.g., `meta-llama/Llama-3.1-8B-Instruct`) with ```NVIDIA_BASE_URL="https://integrate.api.nvidia.com"``` does not support the ```completion``` method, while the locally deployed NIM does.
> [!NOTE]
> The hosted NVIDIA Llama NIMs (for example ```meta-llama/Llama-3.1-8B-Instruct```) that have ```NVIDIA_BASE_URL="https://integrate.api.nvidia.com"``` do not support the ```completion``` method, while locally deployed NIMs do.
```python ```python
response = client.inference.completion( response = client.inference.completion(
@ -60,6 +60,8 @@ print(f"Response: {response.content}")
### Create Chat Completion ### Create Chat Completion
The following example shows how to create a chat completion for an NVIDIA NIM.
```python ```python
response = client.inference.chat_completion( response = client.inference.chat_completion(
model_id="meta-llama/Llama-3.1-8B-Instruct", model_id="meta-llama/Llama-3.1-8B-Instruct",
@ -82,6 +84,9 @@ print(f"Response: {response.completion_message.content}")
``` ```
### Tool Calling Example ### ### Tool Calling Example ###
The following example shows how to do tool calling for an NVIDIA NIM.
```python ```python
from llama_stack.models.llama.datatypes import ToolDefinition, ToolParamDefinition from llama_stack.models.llama.datatypes import ToolDefinition, ToolParamDefinition
@ -117,6 +122,9 @@ if tool_response.completion_message.tool_calls:
``` ```
### Structured Output Example ### Structured Output Example
The following example shows how to do structured output for an NVIDIA NIM.
```python ```python
from llama_stack.apis.inference import JsonSchemaResponseFormat, ResponseFormatType from llama_stack.apis.inference import JsonSchemaResponseFormat, ResponseFormatType
@ -149,8 +157,10 @@ print(f"Structured Response: {structured_response.completion_message.content}")
``` ```
### Create Embeddings ### Create Embeddings
> Note on OpenAI embeddings compatibility
> The following example shows how to create embeddings for an NVIDIA NIM.
> [!NOTE]
> NVIDIA asymmetric embedding models (e.g., `nvidia/llama-3.2-nv-embedqa-1b-v2`) require an `input_type` parameter not present in the standard OpenAI embeddings API. The NVIDIA Inference Adapter automatically sets `input_type="query"` when using the OpenAI-compatible embeddings endpoint for NVIDIA. For passage embeddings, use the `embeddings` API with `task_type="document"`. > NVIDIA asymmetric embedding models (e.g., `nvidia/llama-3.2-nv-embedqa-1b-v2`) require an `input_type` parameter not present in the standard OpenAI embeddings API. The NVIDIA Inference Adapter automatically sets `input_type="query"` when using the OpenAI-compatible embeddings endpoint for NVIDIA. For passage embeddings, use the `embeddings` API with `task_type="document"`.
```python ```python
@ -160,4 +170,42 @@ response = client.inference.embeddings(
task_type="query", task_type="query",
) )
print(f"Embeddings: {response.embeddings}") print(f"Embeddings: {response.embeddings}")
``` ```
### Vision Language Models Example
The following example shows how to run vision inference by using an NVIDIA NIM.
```python
def load_image_as_base64(image_path):
with open(image_path, "rb") as image_file:
img_bytes = image_file.read()
return base64.b64encode(img_bytes).decode("utf-8")
image_path = {path_to_the_image}
demo_image_b64 = load_image_as_base64(image_path)
vlm_response = client.inference.chat_completion(
model_id="nvidia/vila",
messages=[
{
"role": "user",
"content": [
{
"type": "image",
"image": {
"data": demo_image_b64,
},
},
{
"type": "text",
"text": "Please describe what you see in this image in detail.",
},
],
}
],
)
print(f"VLM Response: {vlm_response.completion_message.content}")
```

View file

@ -55,6 +55,10 @@ MODEL_ENTRIES = [
"meta/llama-3.3-70b-instruct", "meta/llama-3.3-70b-instruct",
CoreModelId.llama3_3_70b_instruct.value, CoreModelId.llama3_3_70b_instruct.value,
), ),
ProviderModelEntry(
provider_model_id="nvidia/vila",
model_type=ModelType.llm,
),
# NeMo Retriever Text Embedding models - # NeMo Retriever Text Embedding models -
# #
# https://docs.nvidia.com/nim/nemo-retriever/text-embedding/latest/support-matrix.html # https://docs.nvidia.com/nim/nemo-retriever/text-embedding/latest/support-matrix.html

View file

@ -4,6 +4,7 @@
# This source code is licensed under the terms described in the LICENSE file in # This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree. # the root directory of this source tree.
import heapq
from typing import Any from typing import Any
import psycopg2 import psycopg2
@ -23,6 +24,9 @@ from llama_stack.apis.vector_io import (
) )
from llama_stack.log import get_logger from llama_stack.log import get_logger
from llama_stack.providers.datatypes import Api, VectorDBsProtocolPrivate from llama_stack.providers.datatypes import Api, VectorDBsProtocolPrivate
from llama_stack.providers.utils.inference.prompt_adapter import (
interleaved_content_as_str,
)
from llama_stack.providers.utils.kvstore import kvstore_impl from llama_stack.providers.utils.kvstore import kvstore_impl
from llama_stack.providers.utils.kvstore.api import KVStore from llama_stack.providers.utils.kvstore.api import KVStore
from llama_stack.providers.utils.memory.openai_vector_store_mixin import OpenAIVectorStoreMixin from llama_stack.providers.utils.memory.openai_vector_store_mixin import OpenAIVectorStoreMixin
@ -31,6 +35,7 @@ from llama_stack.providers.utils.memory.vector_store import (
EmbeddingIndex, EmbeddingIndex,
VectorDBWithIndex, VectorDBWithIndex,
) )
from llama_stack.providers.utils.vector_io.vector_utils import WeightedInMemoryAggregator, sanitize_collection_name
from .config import PGVectorVectorIOConfig from .config import PGVectorVectorIOConfig
@ -72,25 +77,63 @@ def load_models(cur, cls):
class PGVectorIndex(EmbeddingIndex): class PGVectorIndex(EmbeddingIndex):
def __init__(self, vector_db: VectorDB, dimension: int, conn, kvstore: KVStore | None = None): # reference: https://github.com/pgvector/pgvector?tab=readme-ov-file#querying
self.conn = conn PGVECTOR_DISTANCE_METRIC_TO_SEARCH_FUNCTION: dict[str, str] = {
with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: "L2": "<->",
# Sanitize the table name by replacing hyphens with underscores "L1": "<+>",
# SQL doesn't allow hyphens in table names, and vector_db.identifier may contain hyphens "COSINE": "<=>",
# when created with patterns like "test-vector-db-{uuid4()}" "INNER_PRODUCT": "<#>",
sanitized_identifier = vector_db.identifier.replace("-", "_") "HAMMING": "<~>",
self.table_name = f"vector_store_{sanitized_identifier}" "JACCARD": "<%>",
self.kvstore = kvstore }
cur.execute( def __init__(
f""" self,
CREATE TABLE IF NOT EXISTS {self.table_name} ( vector_db: VectorDB,
id TEXT PRIMARY KEY, dimension: int,
document JSONB, conn: psycopg2.extensions.connection,
embedding vector({dimension}) kvstore: KVStore | None = None,
distance_metric: str = "COSINE",
):
self.vector_db = vector_db
self.dimension = dimension
self.conn = conn
self.kvstore = kvstore
self.check_distance_metric_availability(distance_metric)
self.distance_metric = distance_metric
self.table_name = None
async def initialize(self) -> None:
try:
with self.conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur:
# Sanitize the table name by replacing hyphens with underscores
# SQL doesn't allow hyphens in table names, and vector_db.identifier may contain hyphens
# when created with patterns like "test-vector-db-{uuid4()}"
sanitized_identifier = sanitize_collection_name(self.vector_db.identifier)
self.table_name = f"vs_{sanitized_identifier}"
cur.execute(
f"""
CREATE TABLE IF NOT EXISTS {self.table_name} (
id TEXT PRIMARY KEY,
document JSONB,
embedding vector({self.dimension}),
content_text TEXT,
tokenized_content TSVECTOR
)
"""
) )
"""
) # Create GIN index for full-text search performance
cur.execute(
f"""
CREATE INDEX IF NOT EXISTS {self.table_name}_content_gin_idx
ON {self.table_name} USING GIN(tokenized_content)
"""
)
except Exception as e:
log.exception(f"Error creating PGVectorIndex for vector_db: {self.vector_db.identifier}")
raise RuntimeError(f"Error creating PGVectorIndex for vector_db: {self.vector_db.identifier}") from e
async def add_chunks(self, chunks: list[Chunk], embeddings: NDArray): async def add_chunks(self, chunks: list[Chunk], embeddings: NDArray):
assert len(chunks) == len(embeddings), ( assert len(chunks) == len(embeddings), (
@ -99,29 +142,49 @@ class PGVectorIndex(EmbeddingIndex):
values = [] values = []
for i, chunk in enumerate(chunks): for i, chunk in enumerate(chunks):
content_text = interleaved_content_as_str(chunk.content)
values.append( values.append(
( (
f"{chunk.chunk_id}", f"{chunk.chunk_id}",
Json(chunk.model_dump()), Json(chunk.model_dump()),
embeddings[i].tolist(), embeddings[i].tolist(),
content_text,
content_text, # Pass content_text twice - once for content_text column, once for to_tsvector function. Eg. to_tsvector(content_text) = tokenized_content
) )
) )
query = sql.SQL( query = sql.SQL(
f""" f"""
INSERT INTO {self.table_name} (id, document, embedding) INSERT INTO {self.table_name} (id, document, embedding, content_text, tokenized_content)
VALUES %s VALUES %s
ON CONFLICT (id) DO UPDATE SET embedding = EXCLUDED.embedding, document = EXCLUDED.document ON CONFLICT (id) DO UPDATE SET
embedding = EXCLUDED.embedding,
document = EXCLUDED.document,
content_text = EXCLUDED.content_text,
tokenized_content = EXCLUDED.tokenized_content
""" """
) )
with self.conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: with self.conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur:
execute_values(cur, query, values, template="(%s, %s, %s::vector)") execute_values(cur, query, values, template="(%s, %s, %s::vector, %s, to_tsvector('english', %s))")
async def query_vector(self, embedding: NDArray, k: int, score_threshold: float) -> QueryChunksResponse: async def query_vector(self, embedding: NDArray, k: int, score_threshold: float) -> QueryChunksResponse:
"""
Performs vector similarity search using PostgreSQL's search function. Default distance metric is COSINE.
Args:
embedding: The query embedding vector
k: Number of results to return
score_threshold: Minimum similarity score threshold
Returns:
QueryChunksResponse with combined results
"""
pgvector_search_function = self.get_pgvector_search_function()
with self.conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: with self.conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur:
cur.execute( cur.execute(
f""" f"""
SELECT document, embedding <-> %s::vector AS distance SELECT document, embedding {pgvector_search_function} %s::vector AS distance
FROM {self.table_name} FROM {self.table_name}
ORDER BY distance ORDER BY distance
LIMIT %s LIMIT %s
@ -147,7 +210,40 @@ class PGVectorIndex(EmbeddingIndex):
k: int, k: int,
score_threshold: float, score_threshold: float,
) -> QueryChunksResponse: ) -> QueryChunksResponse:
raise NotImplementedError("Keyword search is not supported in PGVector") """
Performs keyword-based search using PostgreSQL's full-text search with ts_rank scoring.
Args:
query_string: The text query for keyword search
k: Number of results to return
score_threshold: Minimum similarity score threshold
Returns:
QueryChunksResponse with combined results
"""
with self.conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur:
# Use plainto_tsquery to handle user input safely and ts_rank for relevance scoring
cur.execute(
f"""
SELECT document, ts_rank(tokenized_content, plainto_tsquery('english', %s)) AS score
FROM {self.table_name}
WHERE tokenized_content @@ plainto_tsquery('english', %s)
ORDER BY score DESC
LIMIT %s
""",
(query_string, query_string, k),
)
results = cur.fetchall()
chunks = []
scores = []
for doc, score in results:
if score < score_threshold:
continue
chunks.append(Chunk(**doc))
scores.append(float(score))
return QueryChunksResponse(chunks=chunks, scores=scores)
async def query_hybrid( async def query_hybrid(
self, self,
@ -158,7 +254,59 @@ class PGVectorIndex(EmbeddingIndex):
reranker_type: str, reranker_type: str,
reranker_params: dict[str, Any] | None = None, reranker_params: dict[str, Any] | None = None,
) -> QueryChunksResponse: ) -> QueryChunksResponse:
raise NotImplementedError("Hybrid search is not supported in PGVector") """
Hybrid search combining vector similarity and keyword search using configurable reranking.
Args:
embedding: The query embedding vector
query_string: The text query for keyword search
k: Number of results to return
score_threshold: Minimum similarity score threshold
reranker_type: Type of reranker to use ("rrf" or "weighted")
reranker_params: Parameters for the reranker
Returns:
QueryChunksResponse with combined results
"""
if reranker_params is None:
reranker_params = {}
# Get results from both search methods
vector_response = await self.query_vector(embedding, k, score_threshold)
keyword_response = await self.query_keyword(query_string, k, score_threshold)
# Convert responses to score dictionaries using chunk_id
vector_scores = {
chunk.chunk_id: score for chunk, score in zip(vector_response.chunks, vector_response.scores, strict=False)
}
keyword_scores = {
chunk.chunk_id: score
for chunk, score in zip(keyword_response.chunks, keyword_response.scores, strict=False)
}
# Combine scores using the reranking utility
combined_scores = WeightedInMemoryAggregator.combine_search_results(
vector_scores, keyword_scores, reranker_type, reranker_params
)
# Efficient top-k selection because it only tracks the k best candidates it's seen so far
top_k_items = heapq.nlargest(k, combined_scores.items(), key=lambda x: x[1])
# Filter by score threshold
filtered_items = [(doc_id, score) for doc_id, score in top_k_items if score >= score_threshold]
# Create a map of chunk_id to chunk for both responses
chunk_map = {c.chunk_id: c for c in vector_response.chunks + keyword_response.chunks}
# Use the map to look up chunks by their IDs
chunks = []
scores = []
for doc_id, score in filtered_items:
if doc_id in chunk_map:
chunks.append(chunk_map[doc_id])
scores.append(score)
return QueryChunksResponse(chunks=chunks, scores=scores)
async def delete(self): async def delete(self):
with self.conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: with self.conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur:
@ -170,6 +318,25 @@ class PGVectorIndex(EmbeddingIndex):
with self.conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: with self.conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur:
cur.execute(f"DELETE FROM {self.table_name} WHERE id = ANY(%s)", (chunk_ids,)) cur.execute(f"DELETE FROM {self.table_name} WHERE id = ANY(%s)", (chunk_ids,))
def get_pgvector_search_function(self) -> str:
return self.PGVECTOR_DISTANCE_METRIC_TO_SEARCH_FUNCTION[self.distance_metric]
def check_distance_metric_availability(self, distance_metric: str) -> None:
"""Check if the distance metric is supported by PGVector.
Args:
distance_metric: The distance metric to check
Raises:
ValueError: If the distance metric is not supported
"""
if distance_metric not in self.PGVECTOR_DISTANCE_METRIC_TO_SEARCH_FUNCTION:
supported_metrics = list(self.PGVECTOR_DISTANCE_METRIC_TO_SEARCH_FUNCTION.keys())
raise ValueError(
f"Distance metric '{distance_metric}' is not supported by PGVector. "
f"Supported metrics are: {', '.join(supported_metrics)}"
)
class PGVectorVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorDBsProtocolPrivate): class PGVectorVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorDBsProtocolPrivate):
def __init__( def __init__(
@ -185,8 +352,8 @@ class PGVectorVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorDBsProtoco
self.files_api = files_api self.files_api = files_api
self.kvstore: KVStore | None = None self.kvstore: KVStore | None = None
self.vector_db_store = None self.vector_db_store = None
self.openai_vector_store: dict[str, dict[str, Any]] = {} self.openai_vector_stores: dict[str, dict[str, Any]] = {}
self.metadatadata_collection_name = "openai_vector_stores_metadata" self.metadata_collection_name = "openai_vector_stores_metadata"
async def initialize(self) -> None: async def initialize(self) -> None:
log.info(f"Initializing PGVector memory adapter with config: {self.config}") log.info(f"Initializing PGVector memory adapter with config: {self.config}")
@ -233,9 +400,13 @@ class PGVectorVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorDBsProtoco
upsert_models(self.conn, [(vector_db.identifier, vector_db)]) upsert_models(self.conn, [(vector_db.identifier, vector_db)])
# Create and cache the PGVector index table for the vector DB # Create and cache the PGVector index table for the vector DB
pgvector_index = PGVectorIndex(
vector_db=vector_db, dimension=vector_db.embedding_dimension, conn=self.conn, kvstore=self.kvstore
)
await pgvector_index.initialize()
index = VectorDBWithIndex( index = VectorDBWithIndex(
vector_db, vector_db,
index=PGVectorIndex(vector_db, vector_db.embedding_dimension, self.conn, kvstore=self.kvstore), index=pgvector_index,
inference_api=self.inference_api, inference_api=self.inference_api,
) )
self.cache[vector_db.identifier] = index self.cache[vector_db.identifier] = index
@ -272,8 +443,15 @@ class PGVectorVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorDBsProtoco
if vector_db_id in self.cache: if vector_db_id in self.cache:
return self.cache[vector_db_id] return self.cache[vector_db_id]
if self.vector_db_store is None:
raise VectorStoreNotFoundError(vector_db_id)
vector_db = await self.vector_db_store.get_vector_db(vector_db_id) vector_db = await self.vector_db_store.get_vector_db(vector_db_id)
if not vector_db:
raise VectorStoreNotFoundError(vector_db_id)
index = PGVectorIndex(vector_db, vector_db.embedding_dimension, self.conn) index = PGVectorIndex(vector_db, vector_db.embedding_dimension, self.conn)
await index.initialize()
self.cache[vector_db_id] = VectorDBWithIndex(vector_db, index, self.inference_api) self.cache[vector_db_id] = VectorDBWithIndex(vector_db, index, self.inference_api)
return self.cache[vector_db_id] return self.cache[vector_db_id]

View file

@ -294,12 +294,12 @@ class VectorDBWithIndex:
_validate_embedding(c.embedding, i, self.vector_db.embedding_dimension) _validate_embedding(c.embedding, i, self.vector_db.embedding_dimension)
if chunks_to_embed: if chunks_to_embed:
resp = await self.inference_api.embeddings( resp = await self.inference_api.openai_embeddings(
self.vector_db.embedding_model, self.vector_db.embedding_model,
[c.content for c in chunks_to_embed], [c.content for c in chunks_to_embed],
) )
for c, embedding in zip(chunks_to_embed, resp.embeddings, strict=False): for c, data in zip(chunks_to_embed, resp.data, strict=False):
c.embedding = embedding c.embedding = data.embedding
embeddings = np.array([c.embedding for c in chunks], dtype=np.float32) embeddings = np.array([c.embedding for c in chunks], dtype=np.float32)
await self.index.add_chunks(chunks, embeddings) await self.index.add_chunks(chunks, embeddings)
@ -334,8 +334,8 @@ class VectorDBWithIndex:
if mode == "keyword": if mode == "keyword":
return await self.index.query_keyword(query_string, k, score_threshold) return await self.index.query_keyword(query_string, k, score_threshold)
embeddings_response = await self.inference_api.embeddings(self.vector_db.embedding_model, [query_string]) embeddings_response = await self.inference_api.openai_embeddings(self.vector_db.embedding_model, [query_string])
query_vector = np.array(embeddings_response.embeddings[0], dtype=np.float32) query_vector = np.array(embeddings_response.data[0].embedding, dtype=np.float32)
if mode == "hybrid": if mode == "hybrid":
return await self.index.query_hybrid( return await self.index.query_hybrid(
query_vector, query_string, k, score_threshold, reranker_type, reranker_params query_vector, query_string, k, score_threshold, reranker_type, reranker_params

View file

@ -23,6 +23,7 @@ from sqlalchemy import (
) )
from sqlalchemy.ext.asyncio import async_sessionmaker, create_async_engine from sqlalchemy.ext.asyncio import async_sessionmaker, create_async_engine
from sqlalchemy.ext.asyncio.engine import AsyncEngine from sqlalchemy.ext.asyncio.engine import AsyncEngine
from sqlalchemy.sql.elements import ColumnElement
from llama_stack.apis.common.responses import PaginatedResponse from llama_stack.apis.common.responses import PaginatedResponse
from llama_stack.log import get_logger from llama_stack.log import get_logger
@ -43,6 +44,30 @@ TYPE_MAPPING: dict[ColumnType, Any] = {
} }
def _build_where_expr(column: ColumnElement, value: Any) -> ColumnElement:
"""Return a SQLAlchemy expression for a where condition.
`value` may be a simple scalar (equality) or a mapping like {">": 123}.
The returned expression is a SQLAlchemy ColumnElement usable in query.where(...).
"""
if isinstance(value, Mapping):
if len(value) != 1:
raise ValueError(f"Operator mapping must have a single operator, got: {value}")
op, operand = next(iter(value.items()))
if op == "==" or op == "=":
return column == operand
if op == ">":
return column > operand
if op == "<":
return column < operand
if op == ">=":
return column >= operand
if op == "<=":
return column <= operand
raise ValueError(f"Unsupported operator '{op}' in where mapping")
return column == value
class SqlAlchemySqlStoreImpl(SqlStore): class SqlAlchemySqlStoreImpl(SqlStore):
def __init__(self, config: SqlAlchemySqlStoreConfig): def __init__(self, config: SqlAlchemySqlStoreConfig):
self.config = config self.config = config
@ -111,7 +136,7 @@ class SqlAlchemySqlStoreImpl(SqlStore):
if where: if where:
for key, value in where.items(): for key, value in where.items():
query = query.where(table_obj.c[key] == value) query = query.where(_build_where_expr(table_obj.c[key], value))
if where_sql: if where_sql:
query = query.where(text(where_sql)) query = query.where(text(where_sql))
@ -222,7 +247,7 @@ class SqlAlchemySqlStoreImpl(SqlStore):
async with self.async_session() as session: async with self.async_session() as session:
stmt = self.metadata.tables[table].update() stmt = self.metadata.tables[table].update()
for key, value in where.items(): for key, value in where.items():
stmt = stmt.where(self.metadata.tables[table].c[key] == value) stmt = stmt.where(_build_where_expr(self.metadata.tables[table].c[key], value))
await session.execute(stmt, data) await session.execute(stmt, data)
await session.commit() await session.commit()
@ -233,7 +258,7 @@ class SqlAlchemySqlStoreImpl(SqlStore):
async with self.async_session() as session: async with self.async_session() as session:
stmt = self.metadata.tables[table].delete() stmt = self.metadata.tables[table].delete()
for key, value in where.items(): for key, value in where.items():
stmt = stmt.where(self.metadata.tables[table].c[key] == value) stmt = stmt.where(_build_where_expr(self.metadata.tables[table].c[key], value))
await session.execute(stmt) await session.execute(stmt)
await session.commit() await session.commit()

View file

@ -37,3 +37,122 @@ def sanitize_collection_name(name: str, weaviate_format=False) -> str:
else: else:
s = proper_case(re.sub(r"[^a-zA-Z0-9]", "", name)) s = proper_case(re.sub(r"[^a-zA-Z0-9]", "", name))
return s return s
class WeightedInMemoryAggregator:
@staticmethod
def _normalize_scores(scores: dict[str, float]) -> dict[str, float]:
"""
Normalize scores to 0-1 range using min-max normalization.
Args:
scores: dictionary of scores with document IDs as keys and scores as values
Returns:
Normalized scores with document IDs as keys and normalized scores as values
"""
if not scores:
return {}
min_score, max_score = min(scores.values()), max(scores.values())
score_range = max_score - min_score
if score_range > 0:
return {doc_id: (score - min_score) / score_range for doc_id, score in scores.items()}
return dict.fromkeys(scores, 1.0)
@staticmethod
def weighted_rerank(
vector_scores: dict[str, float],
keyword_scores: dict[str, float],
alpha: float = 0.5,
) -> dict[str, float]:
"""
Rerank via weighted average of scores.
Args:
vector_scores: scores from vector search
keyword_scores: scores from keyword search
alpha: weight factor between 0 and 1 (default: 0.5)
0 = keyword only, 1 = vector only, 0.5 = equal weight
Returns:
All unique document IDs with weighted combined scores
"""
all_ids = set(vector_scores.keys()) | set(keyword_scores.keys())
normalized_vector_scores = WeightedInMemoryAggregator._normalize_scores(vector_scores)
normalized_keyword_scores = WeightedInMemoryAggregator._normalize_scores(keyword_scores)
# Weighted formula: score = (1-alpha) * keyword_score + alpha * vector_score
# alpha=0 means keyword only, alpha=1 means vector only
return {
doc_id: ((1 - alpha) * normalized_keyword_scores.get(doc_id, 0.0))
+ (alpha * normalized_vector_scores.get(doc_id, 0.0))
for doc_id in all_ids
}
@staticmethod
def rrf_rerank(
vector_scores: dict[str, float],
keyword_scores: dict[str, float],
impact_factor: float = 60.0,
) -> dict[str, float]:
"""
Rerank via Reciprocal Rank Fusion.
Args:
vector_scores: scores from vector search
keyword_scores: scores from keyword search
impact_factor: impact factor for RRF (default: 60.0)
Returns:
All unique document IDs with RRF combined scores
"""
# Convert scores to ranks
vector_ranks = {
doc_id: i + 1
for i, (doc_id, _) in enumerate(sorted(vector_scores.items(), key=lambda x: x[1], reverse=True))
}
keyword_ranks = {
doc_id: i + 1
for i, (doc_id, _) in enumerate(sorted(keyword_scores.items(), key=lambda x: x[1], reverse=True))
}
all_ids = set(vector_scores.keys()) | set(keyword_scores.keys())
rrf_scores = {}
for doc_id in all_ids:
vector_rank = vector_ranks.get(doc_id, float("inf"))
keyword_rank = keyword_ranks.get(doc_id, float("inf"))
# RRF formula: score = 1/(k + r) where k is impact_factor (default: 60.0) and r is the rank
rrf_scores[doc_id] = (1.0 / (impact_factor + vector_rank)) + (1.0 / (impact_factor + keyword_rank))
return rrf_scores
@staticmethod
def combine_search_results(
vector_scores: dict[str, float],
keyword_scores: dict[str, float],
reranker_type: str = "rrf",
reranker_params: dict[str, float] | None = None,
) -> dict[str, float]:
"""
Combine vector and keyword search results using specified reranking strategy.
Args:
vector_scores: scores from vector search
keyword_scores: scores from keyword search
reranker_type: type of reranker to use (default: RERANKER_TYPE_RRF)
reranker_params: parameters for the reranker
Returns:
All unique document IDs with combined scores
"""
if reranker_params is None:
reranker_params = {}
if reranker_type == "weighted":
alpha = reranker_params.get("alpha", 0.5)
return WeightedInMemoryAggregator.weighted_rerank(vector_scores, keyword_scores, alpha)
else:
# Default to RRF for None, RRF, or any unknown types
impact_factor = reranker_params.get("impact_factor", 60.0)
return WeightedInMemoryAggregator.rrf_rerank(vector_scores, keyword_scores, impact_factor)

View file

@ -0,0 +1,610 @@
import { describe, test, expect } from "@jest/globals";
// Extract the exact processChunk function implementation for testing
function createProcessChunk() {
return (chunk: unknown): { text: string | null; isToolCall: boolean } => {
const chunkObj = chunk as Record<string, unknown>;
// Helper function to check if content contains function call JSON
const containsToolCall = (content: string): boolean => {
return (
content.includes('"type": "function"') ||
content.includes('"name": "knowledge_search"') ||
content.includes('"parameters":') ||
!!content.match(/\{"type":\s*"function".*?\}/)
);
};
// Check if this chunk contains a tool call (function call)
let isToolCall = false;
// Check direct chunk content if it's a string
if (typeof chunk === "string") {
isToolCall = containsToolCall(chunk);
}
// Check delta structures
if (
chunkObj?.delta &&
typeof chunkObj.delta === "object" &&
chunkObj.delta !== null
) {
const delta = chunkObj.delta as Record<string, unknown>;
if ("tool_calls" in delta) {
isToolCall = true;
}
if (typeof delta.text === "string") {
if (containsToolCall(delta.text)) {
isToolCall = true;
}
}
}
// Check event structures
if (
chunkObj?.event &&
typeof chunkObj.event === "object" &&
chunkObj.event !== null
) {
const event = chunkObj.event as Record<string, unknown>;
// Check event payload
if (
event?.payload &&
typeof event.payload === "object" &&
event.payload !== null
) {
const payload = event.payload as Record<string, unknown>;
if (typeof payload.content === "string") {
if (containsToolCall(payload.content)) {
isToolCall = true;
}
}
// Check payload delta
if (
payload?.delta &&
typeof payload.delta === "object" &&
payload.delta !== null
) {
const delta = payload.delta as Record<string, unknown>;
if (typeof delta.text === "string") {
if (containsToolCall(delta.text)) {
isToolCall = true;
}
}
}
}
// Check event delta
if (
event?.delta &&
typeof event.delta === "object" &&
event.delta !== null
) {
const delta = event.delta as Record<string, unknown>;
if (typeof delta.text === "string") {
if (containsToolCall(delta.text)) {
isToolCall = true;
}
}
if (typeof delta.content === "string") {
if (containsToolCall(delta.content)) {
isToolCall = true;
}
}
}
}
// if it's a tool call, skip it (don't display in chat)
if (isToolCall) {
return { text: null, isToolCall: true };
}
// Extract text content from various chunk formats
let text: string | null = null;
// Helper function to extract clean text content, filtering out function calls
const extractCleanText = (content: string): string | null => {
if (containsToolCall(content)) {
try {
// Try to parse and extract non-function call parts
const jsonMatch = content.match(
/\{"type":\s*"function"[^}]*\}[^}]*\}/
);
if (jsonMatch) {
const jsonPart = jsonMatch[0];
const parsedJson = JSON.parse(jsonPart);
// If it's a function call, extract text after JSON
if (parsedJson.type === "function") {
const textAfterJson = content
.substring(content.indexOf(jsonPart) + jsonPart.length)
.trim();
return textAfterJson || null;
}
}
// If we can't parse it properly, skip the whole thing
return null;
} catch {
return null;
}
}
return content;
};
// Try direct delta text
if (
chunkObj?.delta &&
typeof chunkObj.delta === "object" &&
chunkObj.delta !== null
) {
const delta = chunkObj.delta as Record<string, unknown>;
if (typeof delta.text === "string") {
text = extractCleanText(delta.text);
}
}
// Try event structures
if (
!text &&
chunkObj?.event &&
typeof chunkObj.event === "object" &&
chunkObj.event !== null
) {
const event = chunkObj.event as Record<string, unknown>;
// Try event payload content
if (
event?.payload &&
typeof event.payload === "object" &&
event.payload !== null
) {
const payload = event.payload as Record<string, unknown>;
// Try direct payload content
if (typeof payload.content === "string") {
text = extractCleanText(payload.content);
}
// Try turn_complete event structure: payload.turn.output_message.content
if (
!text &&
payload?.turn &&
typeof payload.turn === "object" &&
payload.turn !== null
) {
const turn = payload.turn as Record<string, unknown>;
if (
turn?.output_message &&
typeof turn.output_message === "object" &&
turn.output_message !== null
) {
const outputMessage = turn.output_message as Record<
string,
unknown
>;
if (typeof outputMessage.content === "string") {
text = extractCleanText(outputMessage.content);
}
}
// Fallback to model_response in steps if no output_message
if (
!text &&
turn?.steps &&
Array.isArray(turn.steps) &&
turn.steps.length > 0
) {
for (const step of turn.steps) {
if (step && typeof step === "object" && step !== null) {
const stepObj = step as Record<string, unknown>;
if (
stepObj?.model_response &&
typeof stepObj.model_response === "object" &&
stepObj.model_response !== null
) {
const modelResponse = stepObj.model_response as Record<
string,
unknown
>;
if (typeof modelResponse.content === "string") {
text = extractCleanText(modelResponse.content);
break;
}
}
}
}
}
}
// Try payload delta
if (
!text &&
payload?.delta &&
typeof payload.delta === "object" &&
payload.delta !== null
) {
const delta = payload.delta as Record<string, unknown>;
if (typeof delta.text === "string") {
text = extractCleanText(delta.text);
}
}
}
// Try event delta
if (
!text &&
event?.delta &&
typeof event.delta === "object" &&
event.delta !== null
) {
const delta = event.delta as Record<string, unknown>;
if (typeof delta.text === "string") {
text = extractCleanText(delta.text);
}
if (!text && typeof delta.content === "string") {
text = extractCleanText(delta.content);
}
}
}
// Try choices structure (ChatML format)
if (
!text &&
chunkObj?.choices &&
Array.isArray(chunkObj.choices) &&
chunkObj.choices.length > 0
) {
const choice = chunkObj.choices[0] as Record<string, unknown>;
if (
choice?.delta &&
typeof choice.delta === "object" &&
choice.delta !== null
) {
const delta = choice.delta as Record<string, unknown>;
if (typeof delta.content === "string") {
text = extractCleanText(delta.content);
}
}
}
// Try direct string content
if (!text && typeof chunk === "string") {
text = extractCleanText(chunk);
}
return { text, isToolCall: false };
};
}
describe("Chunk Processor", () => {
const processChunk = createProcessChunk();
describe("Real Event Structures", () => {
test("handles turn_complete event with cancellation policy response", () => {
const chunk = {
event: {
payload: {
event_type: "turn_complete",
turn: {
turn_id: "50a2d6b7-49ed-4d1e-b1c2-6d68b3f726db",
session_id: "e7f62b8e-518c-4450-82df-e65fe49f27a3",
input_messages: [
{
role: "user",
content: "nice, what's the cancellation policy?",
context: null,
},
],
steps: [
{
turn_id: "50a2d6b7-49ed-4d1e-b1c2-6d68b3f726db",
step_id: "54074310-af42-414c-9ffe-fba5b2ead0ad",
started_at: "2025-08-27T18:15:25.870703Z",
completed_at: "2025-08-27T18:15:51.288993Z",
step_type: "inference",
model_response: {
role: "assistant",
content:
"According to the search results, the cancellation policy for Red Hat Summit is as follows:\n\n* Cancellations must be received by 5 PM EDT on April 18, 2025 for a 50% refund of the registration fee.\n* No refunds will be given for cancellations received after 5 PM EDT on April 18, 2025.\n* Cancellation of travel reservations and hotel reservations are the responsibility of the registrant.",
stop_reason: "end_of_turn",
tool_calls: [],
},
},
],
output_message: {
role: "assistant",
content:
"According to the search results, the cancellation policy for Red Hat Summit is as follows:\n\n* Cancellations must be received by 5 PM EDT on April 18, 2025 for a 50% refund of the registration fee.\n* No refunds will be given for cancellations received after 5 PM EDT on April 18, 2025.\n* Cancellation of travel reservations and hotel reservations are the responsibility of the registrant.",
stop_reason: "end_of_turn",
tool_calls: [],
},
output_attachments: [],
started_at: "2025-08-27T18:15:25.868548Z",
completed_at: "2025-08-27T18:15:51.289262Z",
},
},
},
};
const result = processChunk(chunk);
expect(result.isToolCall).toBe(false);
expect(result.text).toContain(
"According to the search results, the cancellation policy for Red Hat Summit is as follows:"
);
expect(result.text).toContain("5 PM EDT on April 18, 2025");
});
test("handles turn_complete event with address response", () => {
const chunk = {
event: {
payload: {
event_type: "turn_complete",
turn: {
turn_id: "2f4a1520-8ecc-4cb7-bb7b-886939e042b0",
session_id: "e7f62b8e-518c-4450-82df-e65fe49f27a3",
input_messages: [
{
role: "user",
content: "what's francisco's address",
context: null,
},
],
steps: [
{
turn_id: "2f4a1520-8ecc-4cb7-bb7b-886939e042b0",
step_id: "c13dd277-1acb-4419-8fbf-d5e2f45392ea",
started_at: "2025-08-27T18:14:52.558761Z",
completed_at: "2025-08-27T18:15:11.306032Z",
step_type: "inference",
model_response: {
role: "assistant",
content:
"Francisco Arceo's address is:\n\nRed Hat\nUnited States\n17 Primrose Ln \nBasking Ridge New Jersey 07920",
stop_reason: "end_of_turn",
tool_calls: [],
},
},
],
output_message: {
role: "assistant",
content:
"Francisco Arceo's address is:\n\nRed Hat\nUnited States\n17 Primrose Ln \nBasking Ridge New Jersey 07920",
stop_reason: "end_of_turn",
tool_calls: [],
},
output_attachments: [],
started_at: "2025-08-27T18:14:52.553707Z",
completed_at: "2025-08-27T18:15:11.306729Z",
},
},
},
};
const result = processChunk(chunk);
expect(result.isToolCall).toBe(false);
expect(result.text).toContain("Francisco Arceo's address is:");
expect(result.text).toContain("17 Primrose Ln");
expect(result.text).toContain("Basking Ridge New Jersey 07920");
});
test("handles turn_complete event with ticket cost response", () => {
const chunk = {
event: {
payload: {
event_type: "turn_complete",
turn: {
turn_id: "7ef244a3-efee-42ca-a9c8-942865251002",
session_id: "e7f62b8e-518c-4450-82df-e65fe49f27a3",
input_messages: [
{
role: "user",
content: "what was the ticket cost for summit?",
context: null,
},
],
steps: [
{
turn_id: "7ef244a3-efee-42ca-a9c8-942865251002",
step_id: "7651dda0-315a-472d-b1c1-3c2725f55bc5",
started_at: "2025-08-27T18:14:21.710611Z",
completed_at: "2025-08-27T18:14:39.706452Z",
step_type: "inference",
model_response: {
role: "assistant",
content:
"The ticket cost for the Red Hat Summit was $999.00 for a conference pass.",
stop_reason: "end_of_turn",
tool_calls: [],
},
},
],
output_message: {
role: "assistant",
content:
"The ticket cost for the Red Hat Summit was $999.00 for a conference pass.",
stop_reason: "end_of_turn",
tool_calls: [],
},
output_attachments: [],
started_at: "2025-08-27T18:14:21.705289Z",
completed_at: "2025-08-27T18:14:39.706752Z",
},
},
},
};
const result = processChunk(chunk);
expect(result.isToolCall).toBe(false);
expect(result.text).toBe(
"The ticket cost for the Red Hat Summit was $999.00 for a conference pass."
);
});
});
describe("Function Call Detection", () => {
test("detects function calls in direct string chunks", () => {
const chunk =
'{"type": "function", "name": "knowledge_search", "parameters": {"query": "test"}}';
const result = processChunk(chunk);
expect(result.isToolCall).toBe(true);
expect(result.text).toBe(null);
});
test("detects function calls in event payload content", () => {
const chunk = {
event: {
payload: {
content:
'{"type": "function", "name": "knowledge_search", "parameters": {"query": "test"}}',
},
},
};
const result = processChunk(chunk);
expect(result.isToolCall).toBe(true);
expect(result.text).toBe(null);
});
test("detects tool_calls in delta structure", () => {
const chunk = {
delta: {
tool_calls: [{ function: { name: "knowledge_search" } }],
},
};
const result = processChunk(chunk);
expect(result.isToolCall).toBe(true);
expect(result.text).toBe(null);
});
test("detects function call in mixed content but skips it", () => {
const chunk =
'{"type": "function", "name": "knowledge_search", "parameters": {"query": "test"}} Based on the search results, here is your answer.';
const result = processChunk(chunk);
// This is detected as a tool call and skipped entirely - the implementation prioritizes safety
expect(result.isToolCall).toBe(true);
expect(result.text).toBe(null);
});
});
describe("Text Extraction", () => {
test("extracts text from direct string chunks", () => {
const chunk = "Hello, this is a normal response.";
const result = processChunk(chunk);
expect(result.isToolCall).toBe(false);
expect(result.text).toBe("Hello, this is a normal response.");
});
test("extracts text from delta structure", () => {
const chunk = {
delta: {
text: "Hello, this is a normal response.",
},
};
const result = processChunk(chunk);
expect(result.isToolCall).toBe(false);
expect(result.text).toBe("Hello, this is a normal response.");
});
test("extracts text from choices structure", () => {
const chunk = {
choices: [
{
delta: {
content: "Hello, this is a normal response.",
},
},
],
};
const result = processChunk(chunk);
expect(result.isToolCall).toBe(false);
expect(result.text).toBe("Hello, this is a normal response.");
});
test("prioritizes output_message over model_response in turn structure", () => {
const chunk = {
event: {
payload: {
turn: {
steps: [
{
model_response: {
content: "Model response content.",
},
},
],
output_message: {
content: "Final output message content.",
},
},
},
},
};
const result = processChunk(chunk);
expect(result.isToolCall).toBe(false);
expect(result.text).toBe("Final output message content.");
});
test("falls back to model_response when no output_message", () => {
const chunk = {
event: {
payload: {
turn: {
steps: [
{
model_response: {
content: "This is from the model response.",
},
},
],
},
},
},
};
const result = processChunk(chunk);
expect(result.isToolCall).toBe(false);
expect(result.text).toBe("This is from the model response.");
});
});
describe("Edge Cases", () => {
test("handles empty chunks", () => {
const result = processChunk("");
expect(result.isToolCall).toBe(false);
expect(result.text).toBe("");
});
test("handles null chunks", () => {
const result = processChunk(null);
expect(result.isToolCall).toBe(false);
expect(result.text).toBe(null);
});
test("handles undefined chunks", () => {
const result = processChunk(undefined);
expect(result.isToolCall).toBe(false);
expect(result.text).toBe(null);
});
test("handles chunks with no text content", () => {
const chunk = {
event: {
metadata: {
timestamp: "2024-01-01",
},
},
};
const result = processChunk(chunk);
expect(result.isToolCall).toBe(false);
expect(result.text).toBe(null);
});
test("handles malformed JSON in function calls gracefully", () => {
const chunk =
'{"type": "function", "name": "knowledge_search"} incomplete json';
const result = processChunk(chunk);
expect(result.isToolCall).toBe(true);
expect(result.text).toBe(null);
});
});
});

View file

@ -31,6 +31,9 @@ const mockClient = {
toolgroups: { toolgroups: {
list: jest.fn(), list: jest.fn(),
}, },
vectorDBs: {
list: jest.fn(),
},
}; };
jest.mock("@/hooks/use-auth-client", () => ({ jest.mock("@/hooks/use-auth-client", () => ({
@ -164,7 +167,7 @@ describe("ChatPlaygroundPage", () => {
session_name: "Test Session", session_name: "Test Session",
started_at: new Date().toISOString(), started_at: new Date().toISOString(),
turns: [], turns: [],
}); // No turns by default });
mockClient.agents.retrieve.mockResolvedValue({ mockClient.agents.retrieve.mockResolvedValue({
agent_id: "test-agent", agent_id: "test-agent",
agent_config: { agent_config: {
@ -417,7 +420,6 @@ describe("ChatPlaygroundPage", () => {
}); });
await waitFor(() => { await waitFor(() => {
// first agent should be auto-selected
expect(mockClient.agents.session.create).toHaveBeenCalledWith( expect(mockClient.agents.session.create).toHaveBeenCalledWith(
"agent_123", "agent_123",
{ session_name: "Default Session" } { session_name: "Default Session" }
@ -464,7 +466,7 @@ describe("ChatPlaygroundPage", () => {
}); });
}); });
test("hides delete button when only one agent exists", async () => { test("shows delete button even when only one agent exists", async () => {
mockClient.agents.list.mockResolvedValue({ mockClient.agents.list.mockResolvedValue({
data: [mockAgents[0]], data: [mockAgents[0]],
}); });
@ -474,9 +476,7 @@ describe("ChatPlaygroundPage", () => {
}); });
await waitFor(() => { await waitFor(() => {
expect( expect(screen.getByTitle("Delete current agent")).toBeInTheDocument();
screen.queryByTitle("Delete current agent")
).not.toBeInTheDocument();
}); });
}); });
@ -505,7 +505,7 @@ describe("ChatPlaygroundPage", () => {
await waitFor(() => { await waitFor(() => {
expect(mockClient.agents.delete).toHaveBeenCalledWith("agent_123"); expect(mockClient.agents.delete).toHaveBeenCalledWith("agent_123");
expect(global.confirm).toHaveBeenCalledWith( expect(global.confirm).toHaveBeenCalledWith(
"Are you sure you want to delete this agent? This action cannot be undone and will delete all associated sessions." "Are you sure you want to delete this agent? This action cannot be undone and will delete the agent and all its sessions."
); );
}); });
@ -584,4 +584,207 @@ describe("ChatPlaygroundPage", () => {
consoleSpy.mockRestore(); consoleSpy.mockRestore();
}); });
}); });
describe("RAG File Upload", () => {
let mockFileReader: {
readAsDataURL: jest.Mock;
readAsText: jest.Mock;
result: string | null;
onload: (() => void) | null;
onerror: (() => void) | null;
};
let mockRAGTool: {
insert: jest.Mock;
};
beforeEach(() => {
mockFileReader = {
readAsDataURL: jest.fn(),
readAsText: jest.fn(),
result: null,
onload: null,
onerror: null,
};
global.FileReader = jest.fn(() => mockFileReader);
mockRAGTool = {
insert: jest.fn().mockResolvedValue({}),
};
mockClient.toolRuntime = {
ragTool: mockRAGTool,
};
});
afterEach(() => {
jest.clearAllMocks();
});
test("handles text file upload", async () => {
new File(["Hello, world!"], "test.txt", {
type: "text/plain",
});
mockClient.agents.retrieve.mockResolvedValue({
agent_id: "test-agent",
agent_config: {
toolgroups: [
{
name: "builtin::rag/knowledge_search",
args: { vector_db_ids: ["test-vector-db"] },
},
],
},
});
await act(async () => {
render(<ChatPlaygroundPage />);
});
await waitFor(() => {
expect(screen.getByTestId("chat-component")).toBeInTheDocument();
});
const chatComponent = screen.getByTestId("chat-component");
chatComponent.getAttribute("data-onragfileupload");
// this is a simplified test
expect(mockRAGTool.insert).not.toHaveBeenCalled();
});
test("handles PDF file upload with FileReader", async () => {
new File([new ArrayBuffer(1000)], "test.pdf", {
type: "application/pdf",
});
const mockDataURL = "data:application/pdf;base64,JVBERi0xLjQK";
mockFileReader.result = mockDataURL;
mockClient.agents.retrieve.mockResolvedValue({
agent_id: "test-agent",
agent_config: {
toolgroups: [
{
name: "builtin::rag/knowledge_search",
args: { vector_db_ids: ["test-vector-db"] },
},
],
},
});
await act(async () => {
render(<ChatPlaygroundPage />);
});
await waitFor(() => {
expect(screen.getByTestId("chat-component")).toBeInTheDocument();
});
expect(global.FileReader).toBeDefined();
});
test("handles different file types correctly", () => {
const getContentType = (filename: string): string => {
const ext = filename.toLowerCase().split(".").pop();
switch (ext) {
case "pdf":
return "application/pdf";
case "txt":
return "text/plain";
case "md":
return "text/markdown";
case "html":
return "text/html";
case "csv":
return "text/csv";
case "json":
return "application/json";
case "docx":
return "application/vnd.openxmlformats-officedocument.wordprocessingml.document";
case "doc":
return "application/msword";
default:
return "application/octet-stream";
}
};
expect(getContentType("test.pdf")).toBe("application/pdf");
expect(getContentType("test.txt")).toBe("text/plain");
expect(getContentType("test.md")).toBe("text/markdown");
expect(getContentType("test.html")).toBe("text/html");
expect(getContentType("test.csv")).toBe("text/csv");
expect(getContentType("test.json")).toBe("application/json");
expect(getContentType("test.docx")).toBe(
"application/vnd.openxmlformats-officedocument.wordprocessingml.document"
);
expect(getContentType("test.doc")).toBe("application/msword");
expect(getContentType("test.unknown")).toBe("application/octet-stream");
});
test("determines text vs binary file types correctly", () => {
const isTextFile = (mimeType: string): boolean => {
return (
mimeType.startsWith("text/") ||
mimeType === "application/json" ||
mimeType === "text/markdown" ||
mimeType === "text/html" ||
mimeType === "text/csv"
);
};
expect(isTextFile("text/plain")).toBe(true);
expect(isTextFile("text/markdown")).toBe(true);
expect(isTextFile("text/html")).toBe(true);
expect(isTextFile("text/csv")).toBe(true);
expect(isTextFile("application/json")).toBe(true);
expect(isTextFile("application/pdf")).toBe(false);
expect(isTextFile("application/msword")).toBe(false);
expect(
isTextFile(
"application/vnd.openxmlformats-officedocument.wordprocessingml.document"
)
).toBe(false);
expect(isTextFile("application/octet-stream")).toBe(false);
});
test("handles FileReader error gracefully", async () => {
const pdfFile = new File([new ArrayBuffer(1000)], "test.pdf", {
type: "application/pdf",
});
mockFileReader.onerror = jest.fn();
const mockError = new Error("FileReader failed");
const fileReaderPromise = new Promise<string>((resolve, reject) => {
const reader = new FileReader();
reader.onload = () => resolve(reader.result as string);
reader.onerror = () => reject(reader.error || mockError);
reader.readAsDataURL(pdfFile);
setTimeout(() => {
reader.onerror?.(new ProgressEvent("error"));
}, 0);
});
await expect(fileReaderPromise).rejects.toBeDefined();
});
test("handles large file upload with FileReader approach", () => {
// create a large file
const largeFile = new File(
[new ArrayBuffer(10 * 1024 * 1024)],
"large.pdf",
{
type: "application/pdf",
}
);
expect(largeFile.size).toBe(10 * 1024 * 1024); // 10MB
expect(global.FileReader).toBeDefined();
const reader = new FileReader();
expect(reader.readAsDataURL).toBeDefined();
});
});
}); });

File diff suppressed because it is too large Load diff

View file

@ -35,6 +35,7 @@ interface ChatPropsBase {
) => void; ) => void;
setMessages?: (messages: Message[]) => void; setMessages?: (messages: Message[]) => void;
transcribeAudio?: (blob: Blob) => Promise<string>; transcribeAudio?: (blob: Blob) => Promise<string>;
onRAGFileUpload?: (file: File) => Promise<void>;
} }
interface ChatPropsWithoutSuggestions extends ChatPropsBase { interface ChatPropsWithoutSuggestions extends ChatPropsBase {
@ -62,6 +63,7 @@ export function Chat({
onRateResponse, onRateResponse,
setMessages, setMessages,
transcribeAudio, transcribeAudio,
onRAGFileUpload,
}: ChatProps) { }: ChatProps) {
const lastMessage = messages.at(-1); const lastMessage = messages.at(-1);
const isEmpty = messages.length === 0; const isEmpty = messages.length === 0;
@ -226,16 +228,17 @@ export function Chat({
isPending={isGenerating || isTyping} isPending={isGenerating || isTyping}
handleSubmit={handleSubmit} handleSubmit={handleSubmit}
> >
{({ files, setFiles }) => ( {() => (
<MessageInput <MessageInput
value={input} value={input}
onChange={handleInputChange} onChange={handleInputChange}
allowAttachments allowAttachments={true}
files={files} files={null}
setFiles={setFiles} setFiles={() => {}}
stop={handleStop} stop={handleStop}
isGenerating={isGenerating} isGenerating={isGenerating}
transcribeAudio={transcribeAudio} transcribeAudio={transcribeAudio}
onRAGFileUpload={onRAGFileUpload}
/> />
)} )}
</ChatForm> </ChatForm>

View file

@ -14,6 +14,7 @@ import { Card } from "@/components/ui/card";
import { Trash2 } from "lucide-react"; import { Trash2 } from "lucide-react";
import type { Message } from "@/components/chat-playground/chat-message"; import type { Message } from "@/components/chat-playground/chat-message";
import { useAuthClient } from "@/hooks/use-auth-client"; import { useAuthClient } from "@/hooks/use-auth-client";
import { cleanMessageContent } from "@/lib/message-content-utils";
import type { import type {
Session, Session,
SessionCreateParams, SessionCreateParams,
@ -219,10 +220,7 @@ export function Conversations({
messages.push({ messages.push({
id: `${turn.turn_id}-assistant-${messages.length}`, id: `${turn.turn_id}-assistant-${messages.length}`,
role: "assistant", role: "assistant",
content: content: cleanMessageContent(turn.output_message.content),
typeof turn.output_message.content === "string"
? turn.output_message.content
: JSON.stringify(turn.output_message.content),
createdAt: new Date( createdAt: new Date(
turn.completed_at || turn.started_at || Date.now() turn.completed_at || turn.started_at || Date.now()
), ),
@ -271,7 +269,7 @@ export function Conversations({
); );
const deleteSession = async (sessionId: string) => { const deleteSession = async (sessionId: string) => {
if (sessions.length <= 1 || !selectedAgentId) { if (!selectedAgentId) {
return; return;
} }
@ -324,7 +322,6 @@ export function Conversations({
} }
}, [currentSession]); }, [currentSession]);
// Don't render if no agent is selected
if (!selectedAgentId) { if (!selectedAgentId) {
return null; return null;
} }
@ -357,7 +354,7 @@ export function Conversations({
+ New + New
</Button> </Button>
{currentSession && sessions.length > 1 && ( {currentSession && (
<Button <Button
onClick={() => deleteSession(currentSession.id)} onClick={() => deleteSession(currentSession.id)}
variant="outline" variant="outline"

View file

@ -21,6 +21,7 @@ interface MessageInputBaseProps
isGenerating: boolean; isGenerating: boolean;
enableInterrupt?: boolean; enableInterrupt?: boolean;
transcribeAudio?: (blob: Blob) => Promise<string>; transcribeAudio?: (blob: Blob) => Promise<string>;
onRAGFileUpload?: (file: File) => Promise<void>;
} }
interface MessageInputWithoutAttachmentProps extends MessageInputBaseProps { interface MessageInputWithoutAttachmentProps extends MessageInputBaseProps {
@ -213,8 +214,13 @@ export function MessageInput({
className className
)} )}
{...(props.allowAttachments {...(props.allowAttachments
? omit(props, ["allowAttachments", "files", "setFiles"]) ? omit(props, [
: omit(props, ["allowAttachments"]))} "allowAttachments",
"files",
"setFiles",
"onRAGFileUpload",
])
: omit(props, ["allowAttachments", "onRAGFileUpload"]))}
/> />
{props.allowAttachments && ( {props.allowAttachments && (
@ -254,11 +260,19 @@ export function MessageInput({
size="icon" size="icon"
variant="outline" variant="outline"
className="h-8 w-8" className="h-8 w-8"
aria-label="Attach a file" aria-label="Upload file to RAG"
disabled={true} disabled={false}
onClick={async () => { onClick={async () => {
const files = await showFileUploadDialog(); const input = document.createElement("input");
addFiles(files); input.type = "file";
input.accept = ".pdf,.txt,.md,.html,.csv,.json";
input.onchange = async e => {
const file = (e.target as HTMLInputElement).files?.[0];
if (file && props.onRAGFileUpload) {
await props.onRAGFileUpload(file);
}
};
input.click();
}} }}
> >
<Paperclip className="h-4 w-4" /> <Paperclip className="h-4 w-4" />
@ -337,28 +351,6 @@ function FileUploadOverlay({ isDragging }: FileUploadOverlayProps) {
); );
} }
function showFileUploadDialog() {
const input = document.createElement("input");
input.type = "file";
input.multiple = true;
input.accept = "*/*";
input.click();
return new Promise<File[] | null>(resolve => {
input.onchange = e => {
const files = (e.currentTarget as HTMLInputElement).files;
if (files) {
resolve(Array.from(files));
return;
}
resolve(null);
};
});
}
function TranscribingOverlay() { function TranscribingOverlay() {
return ( return (
<motion.div <motion.div

View file

@ -0,0 +1,243 @@
"use client";
import { useState, useEffect } from "react";
import { Button } from "@/components/ui/button";
import { Input } from "@/components/ui/input";
import { Card } from "@/components/ui/card";
import {
Select,
SelectContent,
SelectItem,
SelectTrigger,
SelectValue,
} from "@/components/ui/select";
import { useAuthClient } from "@/hooks/use-auth-client";
import type { Model } from "llama-stack-client/resources/models";
interface VectorDBCreatorProps {
models: Model[];
onVectorDBCreated?: (vectorDbId: string) => void;
onCancel?: () => void;
}
interface VectorDBProvider {
api: string;
provider_id: string;
provider_type: string;
}
export function VectorDBCreator({
models,
onVectorDBCreated,
onCancel,
}: VectorDBCreatorProps) {
const [vectorDbName, setVectorDbName] = useState("");
const [selectedEmbeddingModel, setSelectedEmbeddingModel] = useState("");
const [selectedProvider, setSelectedProvider] = useState("faiss");
const [availableProviders, setAvailableProviders] = useState<
VectorDBProvider[]
>([]);
const [isCreating, setIsCreating] = useState(false);
const [isLoadingProviders, setIsLoadingProviders] = useState(false);
const [error, setError] = useState<string | null>(null);
const client = useAuthClient();
const embeddingModels = models.filter(
model => model.model_type === "embedding"
);
useEffect(() => {
const fetchProviders = async () => {
setIsLoadingProviders(true);
try {
const providersResponse = await client.providers.list();
const vectorIoProviders = providersResponse.filter(
(provider: VectorDBProvider) => provider.api === "vector_io"
);
setAvailableProviders(vectorIoProviders);
if (vectorIoProviders.length > 0) {
const faissProvider = vectorIoProviders.find(
(p: VectorDBProvider) => p.provider_id === "faiss"
);
setSelectedProvider(
faissProvider?.provider_id || vectorIoProviders[0].provider_id
);
}
} catch (err) {
console.error("Error fetching providers:", err);
setAvailableProviders([
{
api: "vector_io",
provider_id: "faiss",
provider_type: "inline::faiss",
},
]);
} finally {
setIsLoadingProviders(false);
}
};
fetchProviders();
}, [client]);
const handleCreate = async () => {
if (!vectorDbName.trim() || !selectedEmbeddingModel) {
setError("Please provide a name and select an embedding model");
return;
}
setIsCreating(true);
setError(null);
try {
const embeddingModel = embeddingModels.find(
m => m.identifier === selectedEmbeddingModel
);
if (!embeddingModel) {
throw new Error("Selected embedding model not found");
}
const embeddingDimension = embeddingModel.metadata
?.embedding_dimension as number;
if (!embeddingDimension) {
throw new Error("Embedding dimension not available for selected model");
}
const vectorDbId = vectorDbName.trim() || `vector_db_${Date.now()}`;
const response = await client.vectorDBs.register({
vector_db_id: vectorDbId,
embedding_model: selectedEmbeddingModel,
embedding_dimension: embeddingDimension,
provider_id: selectedProvider,
});
onVectorDBCreated?.(response.identifier || vectorDbId);
} catch (err) {
console.error("Error creating vector DB:", err);
setError(
err instanceof Error ? err.message : "Failed to create vector DB"
);
} finally {
setIsCreating(false);
}
};
return (
<Card className="p-6 space-y-4">
<h3 className="text-lg font-semibold">Create Vector Database</h3>
<div className="space-y-4">
<div>
<label className="text-sm font-medium block mb-2">
Vector DB Name
</label>
<Input
value={vectorDbName}
onChange={e => setVectorDbName(e.target.value)}
placeholder="My Vector Database"
/>
</div>
<div>
<label className="text-sm font-medium block mb-2">
Embedding Model
</label>
<Select
value={selectedEmbeddingModel}
onValueChange={setSelectedEmbeddingModel}
>
<SelectTrigger>
<SelectValue placeholder="Select Embedding Model" />
</SelectTrigger>
<SelectContent>
{embeddingModels.map(model => (
<SelectItem key={model.identifier} value={model.identifier}>
{model.identifier}
</SelectItem>
))}
</SelectContent>
</Select>
{selectedEmbeddingModel && (
<p className="text-xs text-muted-foreground mt-1">
Dimension:{" "}
{embeddingModels.find(
m => m.identifier === selectedEmbeddingModel
)?.metadata?.embedding_dimension || "Unknown"}
</p>
)}
</div>
<div>
<label className="text-sm font-medium block mb-2">
Vector Database Provider
</label>
<Select
value={selectedProvider}
onValueChange={setSelectedProvider}
disabled={isLoadingProviders}
>
<SelectTrigger>
<SelectValue
placeholder={
isLoadingProviders
? "Loading providers..."
: "Select Provider"
}
/>
</SelectTrigger>
<SelectContent>
{availableProviders.map(provider => (
<SelectItem
key={provider.provider_id}
value={provider.provider_id}
>
{provider.provider_id}
</SelectItem>
))}
</SelectContent>
</Select>
{selectedProvider && (
<p className="text-xs text-muted-foreground mt-1">
Selected provider: {selectedProvider}
</p>
)}
</div>
{error && (
<div className="text-destructive text-sm bg-destructive/10 p-2 rounded">
{error}
</div>
)}
<div className="flex gap-2 pt-2">
<Button
onClick={handleCreate}
disabled={
isCreating || !vectorDbName.trim() || !selectedEmbeddingModel
}
className="flex-1"
>
{isCreating ? "Creating..." : "Create Vector DB"}
</Button>
{onCancel && (
<Button variant="outline" onClick={onCancel} className="flex-1">
Cancel
</Button>
)}
</div>
</div>
<div className="text-xs text-muted-foreground bg-muted/50 p-3 rounded">
<strong>Note:</strong> This will create a new vector database that can
be used with RAG tools. After creation, you&apos;ll be able to upload
documents and use it for knowledge search in your agent conversations.
</div>
</Card>
);
}

View file

@ -0,0 +1,51 @@
// check if content contains function call JSON
export const containsToolCall = (content: string): boolean => {
return (
content.includes('"type": "function"') ||
content.includes('"name": "knowledge_search"') ||
content.includes('"parameters":') ||
!!content.match(/\{"type":\s*"function".*?\}/)
);
};
export const extractCleanText = (content: string): string | null => {
if (containsToolCall(content)) {
try {
// parse and extract non-function call parts
const jsonMatch = content.match(/\{"type":\s*"function"[^}]*\}[^}]*\}/);
if (jsonMatch) {
const jsonPart = jsonMatch[0];
const parsedJson = JSON.parse(jsonPart);
// if function call, extract text after JSON
if (parsedJson.type === "function") {
const textAfterJson = content
.substring(content.indexOf(jsonPart) + jsonPart.length)
.trim();
return textAfterJson || null;
}
}
return null;
} catch {
return null;
}
}
return content;
};
// removes function call JSON handling different content types
export const cleanMessageContent = (
content: string | unknown[] | unknown
): string => {
if (typeof content === "string") {
const cleaned = extractCleanText(content);
return cleaned || "";
} else if (Array.isArray(content)) {
return content
.filter((item: { type: string }) => item.type === "text")
.map((item: { text: string }) => item.text)
.join("");
} else {
return JSON.stringify(content);
}
};

View file

@ -14,11 +14,11 @@
"@radix-ui/react-select": "^2.2.5", "@radix-ui/react-select": "^2.2.5",
"@radix-ui/react-separator": "^1.1.7", "@radix-ui/react-separator": "^1.1.7",
"@radix-ui/react-slot": "^1.2.3", "@radix-ui/react-slot": "^1.2.3",
"@radix-ui/react-tooltip": "^1.2.6", "@radix-ui/react-tooltip": "^1.2.8",
"class-variance-authority": "^0.7.1", "class-variance-authority": "^0.7.1",
"clsx": "^2.1.1", "clsx": "^2.1.1",
"framer-motion": "^11.18.2", "framer-motion": "^12.23.12",
"llama-stack-client": "^0.2.19", "llama-stack-client": "^0.2.20",
"lucide-react": "^0.510.0", "lucide-react": "^0.510.0",
"next": "15.3.3", "next": "15.3.3",
"next-auth": "^4.24.11", "next-auth": "^4.24.11",
@ -39,16 +39,16 @@
"@testing-library/jest-dom": "^6.8.0", "@testing-library/jest-dom": "^6.8.0",
"@testing-library/react": "^16.3.0", "@testing-library/react": "^16.3.0",
"@types/jest": "^29.5.14", "@types/jest": "^29.5.14",
"@types/node": "^20", "@types/node": "^24",
"@types/react": "^19", "@types/react": "^19",
"@types/react-dom": "^19", "@types/react-dom": "^19",
"eslint": "^9", "eslint": "^9",
"eslint-config-next": "15.3.2", "eslint-config-next": "15.5.2",
"eslint-config-prettier": "^10.1.8", "eslint-config-prettier": "^10.1.8",
"eslint-plugin-prettier": "^5.5.4", "eslint-plugin-prettier": "^5.5.4",
"jest": "^29.7.0", "jest": "^29.7.0",
"jest-environment-jsdom": "^29.7.0", "jest-environment-jsdom": "^29.7.0",
"prettier": "3.5.3", "prettier": "3.6.2",
"tailwindcss": "^4", "tailwindcss": "^4",
"ts-node": "^10.9.2", "ts-node": "^10.9.2",
"tw-animate-css": "^1.2.9", "tw-animate-css": "^1.2.9",
@ -1854,9 +1854,9 @@
"integrity": "sha512-OdiMrzCl2Xi0VTjiQQUK0Xh7bJHnOuET2s+3V+Y40WJBAXrJeGA3f+I8MZJ/YQ3mVGi5XGR1L66oFlgqXhQ4Vw==" "integrity": "sha512-OdiMrzCl2Xi0VTjiQQUK0Xh7bJHnOuET2s+3V+Y40WJBAXrJeGA3f+I8MZJ/YQ3mVGi5XGR1L66oFlgqXhQ4Vw=="
}, },
"node_modules/@next/eslint-plugin-next": { "node_modules/@next/eslint-plugin-next": {
"version": "15.3.2", "version": "15.5.2",
"resolved": "https://registry.npmjs.org/@next/eslint-plugin-next/-/eslint-plugin-next-15.3.2.tgz", "resolved": "https://registry.npmjs.org/@next/eslint-plugin-next/-/eslint-plugin-next-15.5.2.tgz",
"integrity": "sha512-ijVRTXBgnHT33aWnDtmlG+LJD+5vhc9AKTJPquGG5NKXjpKNjc62woIhFtrAcWdBobt8kqjCoaJ0q6sDQoX7aQ==", "integrity": "sha512-lkLrRVxcftuOsJNhWatf1P2hNVfh98k/omQHrCEPPriUypR6RcS13IvLdIrEvkm9AH2Nu2YpR5vLqBuy6twH3Q==",
"dev": true, "dev": true,
"license": "MIT", "license": "MIT",
"dependencies": { "dependencies": {
@ -2861,29 +2861,6 @@
} }
} }
}, },
"node_modules/@radix-ui/react-select/node_modules/@radix-ui/react-visually-hidden": {
"version": "1.2.3",
"resolved": "https://registry.npmjs.org/@radix-ui/react-visually-hidden/-/react-visually-hidden-1.2.3.tgz",
"integrity": "sha512-pzJq12tEaaIhqjbzpCuv/OypJY/BPavOofm+dbab+MHLajy277+1lLm6JFcGgF5eskJ6mquGirhXY2GD/8u8Ug==",
"license": "MIT",
"dependencies": {
"@radix-ui/react-primitive": "2.1.3"
},
"peerDependencies": {
"@types/react": "*",
"@types/react-dom": "*",
"react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc",
"react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc"
},
"peerDependenciesMeta": {
"@types/react": {
"optional": true
},
"@types/react-dom": {
"optional": true
}
}
},
"node_modules/@radix-ui/react-separator": { "node_modules/@radix-ui/react-separator": {
"version": "1.1.7", "version": "1.1.7",
"resolved": "https://registry.npmjs.org/@radix-ui/react-separator/-/react-separator-1.1.7.tgz", "resolved": "https://registry.npmjs.org/@radix-ui/react-separator/-/react-separator-1.1.7.tgz",
@ -2949,23 +2926,23 @@
} }
}, },
"node_modules/@radix-ui/react-tooltip": { "node_modules/@radix-ui/react-tooltip": {
"version": "1.2.6", "version": "1.2.8",
"resolved": "https://registry.npmjs.org/@radix-ui/react-tooltip/-/react-tooltip-1.2.6.tgz", "resolved": "https://registry.npmjs.org/@radix-ui/react-tooltip/-/react-tooltip-1.2.8.tgz",
"integrity": "sha512-zYb+9dc9tkoN2JjBDIIPLQtk3gGyz8FMKoqYTb8EMVQ5a5hBcdHPECrsZVI4NpPAUOixhkoqg7Hj5ry5USowfA==", "integrity": "sha512-tY7sVt1yL9ozIxvmbtN5qtmH2krXcBCfjEiCgKGLqunJHvgvZG2Pcl2oQ3kbcZARb1BGEHdkLzcYGO8ynVlieg==",
"license": "MIT", "license": "MIT",
"dependencies": { "dependencies": {
"@radix-ui/primitive": "1.1.2", "@radix-ui/primitive": "1.1.3",
"@radix-ui/react-compose-refs": "1.1.2", "@radix-ui/react-compose-refs": "1.1.2",
"@radix-ui/react-context": "1.1.2", "@radix-ui/react-context": "1.1.2",
"@radix-ui/react-dismissable-layer": "1.1.9", "@radix-ui/react-dismissable-layer": "1.1.11",
"@radix-ui/react-id": "1.1.1", "@radix-ui/react-id": "1.1.1",
"@radix-ui/react-popper": "1.2.6", "@radix-ui/react-popper": "1.2.8",
"@radix-ui/react-portal": "1.1.8", "@radix-ui/react-portal": "1.1.9",
"@radix-ui/react-presence": "1.1.4", "@radix-ui/react-presence": "1.1.5",
"@radix-ui/react-primitive": "2.1.2", "@radix-ui/react-primitive": "2.1.3",
"@radix-ui/react-slot": "1.2.2", "@radix-ui/react-slot": "1.2.3",
"@radix-ui/react-use-controllable-state": "1.2.2", "@radix-ui/react-use-controllable-state": "1.2.2",
"@radix-ui/react-visually-hidden": "1.2.2" "@radix-ui/react-visually-hidden": "1.2.3"
}, },
"peerDependencies": { "peerDependencies": {
"@types/react": "*", "@types/react": "*",
@ -2982,21 +2959,162 @@
} }
} }
}, },
"node_modules/@radix-ui/react-tooltip/node_modules/@radix-ui/react-slot": { "node_modules/@radix-ui/react-tooltip/node_modules/@radix-ui/primitive": {
"version": "1.2.2", "version": "1.1.3",
"resolved": "https://registry.npmjs.org/@radix-ui/react-slot/-/react-slot-1.2.2.tgz", "resolved": "https://registry.npmjs.org/@radix-ui/primitive/-/primitive-1.1.3.tgz",
"integrity": "sha512-y7TBO4xN4Y94FvcWIOIh18fM4R1A8S4q1jhoz4PNzOoHsFcN8pogcFmZrTYAm4F9VRUrWP/Mw7xSKybIeRI+CQ==", "integrity": "sha512-JTF99U/6XIjCBo0wqkU5sK10glYe27MRRsfwoiq5zzOEZLHU3A3KCMa5X/azekYRCJ0HlwI0crAXS/5dEHTzDg==",
"license": "MIT"
},
"node_modules/@radix-ui/react-tooltip/node_modules/@radix-ui/react-arrow": {
"version": "1.1.7",
"resolved": "https://registry.npmjs.org/@radix-ui/react-arrow/-/react-arrow-1.1.7.tgz",
"integrity": "sha512-F+M1tLhO+mlQaOWspE8Wstg+z6PwxwRd8oQ8IXceWz92kfAmalTRf0EjrouQeo7QssEPfCn05B4Ihs1K9WQ/7w==",
"license": "MIT", "license": "MIT",
"dependencies": { "dependencies": {
"@radix-ui/react-compose-refs": "1.1.2" "@radix-ui/react-primitive": "2.1.3"
}, },
"peerDependencies": { "peerDependencies": {
"@types/react": "*", "@types/react": "*",
"react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc" "@types/react-dom": "*",
"react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc",
"react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc"
}, },
"peerDependenciesMeta": { "peerDependenciesMeta": {
"@types/react": { "@types/react": {
"optional": true "optional": true
},
"@types/react-dom": {
"optional": true
}
}
},
"node_modules/@radix-ui/react-tooltip/node_modules/@radix-ui/react-dismissable-layer": {
"version": "1.1.11",
"resolved": "https://registry.npmjs.org/@radix-ui/react-dismissable-layer/-/react-dismissable-layer-1.1.11.tgz",
"integrity": "sha512-Nqcp+t5cTB8BinFkZgXiMJniQH0PsUt2k51FUhbdfeKvc4ACcG2uQniY/8+h1Yv6Kza4Q7lD7PQV0z0oicE0Mg==",
"license": "MIT",
"dependencies": {
"@radix-ui/primitive": "1.1.3",
"@radix-ui/react-compose-refs": "1.1.2",
"@radix-ui/react-primitive": "2.1.3",
"@radix-ui/react-use-callback-ref": "1.1.1",
"@radix-ui/react-use-escape-keydown": "1.1.1"
},
"peerDependencies": {
"@types/react": "*",
"@types/react-dom": "*",
"react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc",
"react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc"
},
"peerDependenciesMeta": {
"@types/react": {
"optional": true
},
"@types/react-dom": {
"optional": true
}
}
},
"node_modules/@radix-ui/react-tooltip/node_modules/@radix-ui/react-popper": {
"version": "1.2.8",
"resolved": "https://registry.npmjs.org/@radix-ui/react-popper/-/react-popper-1.2.8.tgz",
"integrity": "sha512-0NJQ4LFFUuWkE7Oxf0htBKS6zLkkjBH+hM1uk7Ng705ReR8m/uelduy1DBo0PyBXPKVnBA6YBlU94MBGXrSBCw==",
"license": "MIT",
"dependencies": {
"@floating-ui/react-dom": "^2.0.0",
"@radix-ui/react-arrow": "1.1.7",
"@radix-ui/react-compose-refs": "1.1.2",
"@radix-ui/react-context": "1.1.2",
"@radix-ui/react-primitive": "2.1.3",
"@radix-ui/react-use-callback-ref": "1.1.1",
"@radix-ui/react-use-layout-effect": "1.1.1",
"@radix-ui/react-use-rect": "1.1.1",
"@radix-ui/react-use-size": "1.1.1",
"@radix-ui/rect": "1.1.1"
},
"peerDependencies": {
"@types/react": "*",
"@types/react-dom": "*",
"react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc",
"react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc"
},
"peerDependenciesMeta": {
"@types/react": {
"optional": true
},
"@types/react-dom": {
"optional": true
}
}
},
"node_modules/@radix-ui/react-tooltip/node_modules/@radix-ui/react-portal": {
"version": "1.1.9",
"resolved": "https://registry.npmjs.org/@radix-ui/react-portal/-/react-portal-1.1.9.tgz",
"integrity": "sha512-bpIxvq03if6UNwXZ+HTK71JLh4APvnXntDc6XOX8UVq4XQOVl7lwok0AvIl+b8zgCw3fSaVTZMpAPPagXbKmHQ==",
"license": "MIT",
"dependencies": {
"@radix-ui/react-primitive": "2.1.3",
"@radix-ui/react-use-layout-effect": "1.1.1"
},
"peerDependencies": {
"@types/react": "*",
"@types/react-dom": "*",
"react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc",
"react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc"
},
"peerDependenciesMeta": {
"@types/react": {
"optional": true
},
"@types/react-dom": {
"optional": true
}
}
},
"node_modules/@radix-ui/react-tooltip/node_modules/@radix-ui/react-presence": {
"version": "1.1.5",
"resolved": "https://registry.npmjs.org/@radix-ui/react-presence/-/react-presence-1.1.5.tgz",
"integrity": "sha512-/jfEwNDdQVBCNvjkGit4h6pMOzq8bHkopq458dPt2lMjx+eBQUohZNG9A7DtO/O5ukSbxuaNGXMjHicgwy6rQQ==",
"license": "MIT",
"dependencies": {
"@radix-ui/react-compose-refs": "1.1.2",
"@radix-ui/react-use-layout-effect": "1.1.1"
},
"peerDependencies": {
"@types/react": "*",
"@types/react-dom": "*",
"react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc",
"react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc"
},
"peerDependenciesMeta": {
"@types/react": {
"optional": true
},
"@types/react-dom": {
"optional": true
}
}
},
"node_modules/@radix-ui/react-tooltip/node_modules/@radix-ui/react-primitive": {
"version": "2.1.3",
"resolved": "https://registry.npmjs.org/@radix-ui/react-primitive/-/react-primitive-2.1.3.tgz",
"integrity": "sha512-m9gTwRkhy2lvCPe6QJp4d3G1TYEUHn/FzJUtq9MjH46an1wJU+GdoGC5VLof8RX8Ft/DlpshApkhswDLZzHIcQ==",
"license": "MIT",
"dependencies": {
"@radix-ui/react-slot": "1.2.3"
},
"peerDependencies": {
"@types/react": "*",
"@types/react-dom": "*",
"react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc",
"react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc"
},
"peerDependenciesMeta": {
"@types/react": {
"optional": true
},
"@types/react-dom": {
"optional": true
} }
} }
}, },
@ -3137,12 +3255,35 @@
} }
}, },
"node_modules/@radix-ui/react-visually-hidden": { "node_modules/@radix-ui/react-visually-hidden": {
"version": "1.2.2", "version": "1.2.3",
"resolved": "https://registry.npmjs.org/@radix-ui/react-visually-hidden/-/react-visually-hidden-1.2.2.tgz", "resolved": "https://registry.npmjs.org/@radix-ui/react-visually-hidden/-/react-visually-hidden-1.2.3.tgz",
"integrity": "sha512-ORCmRUbNiZIv6uV5mhFrhsIKw4UX/N3syZtyqvry61tbGm4JlgQuSn0hk5TwCARsCjkcnuRkSdCE3xfb+ADHew==", "integrity": "sha512-pzJq12tEaaIhqjbzpCuv/OypJY/BPavOofm+dbab+MHLajy277+1lLm6JFcGgF5eskJ6mquGirhXY2GD/8u8Ug==",
"license": "MIT", "license": "MIT",
"dependencies": { "dependencies": {
"@radix-ui/react-primitive": "2.1.2" "@radix-ui/react-primitive": "2.1.3"
},
"peerDependencies": {
"@types/react": "*",
"@types/react-dom": "*",
"react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc",
"react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc"
},
"peerDependenciesMeta": {
"@types/react": {
"optional": true
},
"@types/react-dom": {
"optional": true
}
}
},
"node_modules/@radix-ui/react-visually-hidden/node_modules/@radix-ui/react-primitive": {
"version": "2.1.3",
"resolved": "https://registry.npmjs.org/@radix-ui/react-primitive/-/react-primitive-2.1.3.tgz",
"integrity": "sha512-m9gTwRkhy2lvCPe6QJp4d3G1TYEUHn/FzJUtq9MjH46an1wJU+GdoGC5VLof8RX8Ft/DlpshApkhswDLZzHIcQ==",
"license": "MIT",
"dependencies": {
"@radix-ui/react-slot": "1.2.3"
}, },
"peerDependencies": { "peerDependencies": {
"@types/react": "*", "@types/react": "*",
@ -3910,12 +4051,12 @@
"license": "MIT" "license": "MIT"
}, },
"node_modules/@types/node": { "node_modules/@types/node": {
"version": "20.17.47", "version": "24.3.0",
"resolved": "https://registry.npmjs.org/@types/node/-/node-20.17.47.tgz", "resolved": "https://registry.npmjs.org/@types/node/-/node-24.3.0.tgz",
"integrity": "sha512-3dLX0Upo1v7RvUimvxLeXqwrfyKxUINk0EAM83swP2mlSUcwV73sZy8XhNz8bcZ3VbsfQyC/y6jRdL5tgCNpDQ==", "integrity": "sha512-aPTXCrfwnDLj4VvXrm+UUCQjNEvJgNA8s5F1cvwQU+3KNltTOkBm1j30uNLyqqPNe7gE3KFzImYoZEfLhp4Yow==",
"license": "MIT", "license": "MIT",
"dependencies": { "dependencies": {
"undici-types": "~6.19.2" "undici-types": "~7.10.0"
} }
}, },
"node_modules/@types/node-fetch": { "node_modules/@types/node-fetch": {
@ -6433,13 +6574,13 @@
} }
}, },
"node_modules/eslint-config-next": { "node_modules/eslint-config-next": {
"version": "15.3.2", "version": "15.5.2",
"resolved": "https://registry.npmjs.org/eslint-config-next/-/eslint-config-next-15.3.2.tgz", "resolved": "https://registry.npmjs.org/eslint-config-next/-/eslint-config-next-15.5.2.tgz",
"integrity": "sha512-FerU4DYccO4FgeYFFglz0SnaKRe1ejXQrDb8kWUkTAg036YWi+jUsgg4sIGNCDhAsDITsZaL4MzBWKB6f4G1Dg==", "integrity": "sha512-3hPZghsLupMxxZ2ggjIIrat/bPniM2yRpsVPVM40rp8ZMzKWOJp2CGWn7+EzoV2ddkUr5fxNfHpF+wU1hGt/3g==",
"dev": true, "dev": true,
"license": "MIT", "license": "MIT",
"dependencies": { "dependencies": {
"@next/eslint-plugin-next": "15.3.2", "@next/eslint-plugin-next": "15.5.2",
"@rushstack/eslint-patch": "^1.10.3", "@rushstack/eslint-patch": "^1.10.3",
"@typescript-eslint/eslint-plugin": "^5.4.2 || ^6.0.0 || ^7.0.0 || ^8.0.0", "@typescript-eslint/eslint-plugin": "^5.4.2 || ^6.0.0 || ^7.0.0 || ^8.0.0",
"@typescript-eslint/parser": "^5.4.2 || ^6.0.0 || ^7.0.0 || ^8.0.0", "@typescript-eslint/parser": "^5.4.2 || ^6.0.0 || ^7.0.0 || ^8.0.0",
@ -7268,13 +7409,13 @@
} }
}, },
"node_modules/framer-motion": { "node_modules/framer-motion": {
"version": "11.18.2", "version": "12.23.12",
"resolved": "https://registry.npmjs.org/framer-motion/-/framer-motion-11.18.2.tgz", "resolved": "https://registry.npmjs.org/framer-motion/-/framer-motion-12.23.12.tgz",
"integrity": "sha512-5F5Och7wrvtLVElIpclDT0CBzMVg3dL22B64aZwHtsIY8RB4mXICLrkajK4G9R+ieSAGcgrLeae2SeUTg2pr6w==", "integrity": "sha512-6e78rdVtnBvlEVgu6eFEAgG9v3wLnYEboM8I5O5EXvfKC8gxGQB8wXJdhkMy10iVcn05jl6CNw7/HTsTCfwcWg==",
"license": "MIT", "license": "MIT",
"dependencies": { "dependencies": {
"motion-dom": "^11.18.1", "motion-dom": "^12.23.12",
"motion-utils": "^11.18.1", "motion-utils": "^12.23.6",
"tslib": "^2.4.0" "tslib": "^2.4.0"
}, },
"peerDependencies": { "peerDependencies": {
@ -10006,9 +10147,9 @@
"license": "MIT" "license": "MIT"
}, },
"node_modules/llama-stack-client": { "node_modules/llama-stack-client": {
"version": "0.2.19", "version": "0.2.20",
"resolved": "https://registry.npmjs.org/llama-stack-client/-/llama-stack-client-0.2.19.tgz", "resolved": "https://registry.npmjs.org/llama-stack-client/-/llama-stack-client-0.2.20.tgz",
"integrity": "sha512-sDuAhUdEGlERZ3jlMUzPXcQTgMv/pGbDrPX0ifbE5S+gr7Q+7ohuQYrIXe+hXgIipFjq+y4b2c5laZ76tmAyEA==", "integrity": "sha512-1vD5nizTX5JEW8TADxKgy/P1W8YZoPSpdnmfxbdYbWgpQ3BWtbvLS6jmDk7VwVA5fRC4895VfHsRDfS1liHarw==",
"license": "MIT", "license": "MIT",
"dependencies": { "dependencies": {
"@types/node": "^18.11.18", "@types/node": "^18.11.18",
@ -11184,18 +11325,18 @@
} }
}, },
"node_modules/motion-dom": { "node_modules/motion-dom": {
"version": "11.18.1", "version": "12.23.12",
"resolved": "https://registry.npmjs.org/motion-dom/-/motion-dom-11.18.1.tgz", "resolved": "https://registry.npmjs.org/motion-dom/-/motion-dom-12.23.12.tgz",
"integrity": "sha512-g76KvA001z+atjfxczdRtw/RXOM3OMSdd1f4DL77qCTF/+avrRJiawSG4yDibEQ215sr9kpinSlX2pCTJ9zbhw==", "integrity": "sha512-RcR4fvMCTESQBD/uKQe49D5RUeDOokkGRmz4ceaJKDBgHYtZtntC/s2vLvY38gqGaytinij/yi3hMcWVcEF5Kw==",
"license": "MIT", "license": "MIT",
"dependencies": { "dependencies": {
"motion-utils": "^11.18.1" "motion-utils": "^12.23.6"
} }
}, },
"node_modules/motion-utils": { "node_modules/motion-utils": {
"version": "11.18.1", "version": "12.23.6",
"resolved": "https://registry.npmjs.org/motion-utils/-/motion-utils-11.18.1.tgz", "resolved": "https://registry.npmjs.org/motion-utils/-/motion-utils-12.23.6.tgz",
"integrity": "sha512-49Kt+HKjtbJKLtgO/LKj9Ld+6vw9BjH5d9sc40R/kVyH8GLAXgT42M2NnuPcJNuA3s9ZfZBUcwIgpmZWGEE+hA==", "integrity": "sha512-eAWoPgr4eFEOFfg2WjIsMoqJTW6Z8MTUCgn/GZ3VRpClWBdnbjryiA3ZSNLyxCTmCQx4RmYX6jX1iWHbenUPNQ==",
"license": "MIT" "license": "MIT"
}, },
"node_modules/ms": { "node_modules/ms": {
@ -12083,9 +12224,9 @@
} }
}, },
"node_modules/prettier": { "node_modules/prettier": {
"version": "3.5.3", "version": "3.6.2",
"resolved": "https://registry.npmjs.org/prettier/-/prettier-3.5.3.tgz", "resolved": "https://registry.npmjs.org/prettier/-/prettier-3.6.2.tgz",
"integrity": "sha512-QQtaxnoDJeAkDvDKWCLiwIXkTgRhwYDEQCghU9Z6q03iyek/rxRh/2lC3HB7P8sWT2xC/y5JDctPLBIGzHKbhw==", "integrity": "sha512-I7AIg5boAr5R0FFtJ6rCfD+LFsWHp81dolrFD8S79U9tb8Az2nGrJncnMSnys+bpQJfRUzqs9hnA81OAA3hCuQ==",
"dev": true, "dev": true,
"license": "MIT", "license": "MIT",
"bin": { "bin": {
@ -13986,9 +14127,9 @@
} }
}, },
"node_modules/undici-types": { "node_modules/undici-types": {
"version": "6.19.8", "version": "7.10.0",
"resolved": "https://registry.npmjs.org/undici-types/-/undici-types-6.19.8.tgz", "resolved": "https://registry.npmjs.org/undici-types/-/undici-types-7.10.0.tgz",
"integrity": "sha512-ve2KP6f/JnbPBFyobGHuerC9g1FYGn/F8n1LWTwNxCEzd6IfqTwUQcNXgEtmmQ6DlRrC1hrSrBnCZPokRrDHjw==", "integrity": "sha512-t5Fy/nfn+14LuOc2KNYg75vZqClpAiqscVvMygNnlsHBFpSXdJaYtXMcdNLpl/Qvc3P2cB3s6lOV51nqsFq4ag==",
"license": "MIT" "license": "MIT"
}, },
"node_modules/unified": { "node_modules/unified": {

View file

@ -19,11 +19,11 @@
"@radix-ui/react-select": "^2.2.5", "@radix-ui/react-select": "^2.2.5",
"@radix-ui/react-separator": "^1.1.7", "@radix-ui/react-separator": "^1.1.7",
"@radix-ui/react-slot": "^1.2.3", "@radix-ui/react-slot": "^1.2.3",
"@radix-ui/react-tooltip": "^1.2.6", "@radix-ui/react-tooltip": "^1.2.8",
"class-variance-authority": "^0.7.1", "class-variance-authority": "^0.7.1",
"clsx": "^2.1.1", "clsx": "^2.1.1",
"framer-motion": "^11.18.2", "framer-motion": "^12.23.12",
"llama-stack-client": "^0.2.19", "llama-stack-client": "^0.2.20",
"lucide-react": "^0.510.0", "lucide-react": "^0.510.0",
"next": "15.3.3", "next": "15.3.3",
"next-auth": "^4.24.11", "next-auth": "^4.24.11",
@ -44,16 +44,16 @@
"@testing-library/jest-dom": "^6.8.0", "@testing-library/jest-dom": "^6.8.0",
"@testing-library/react": "^16.3.0", "@testing-library/react": "^16.3.0",
"@types/jest": "^29.5.14", "@types/jest": "^29.5.14",
"@types/node": "^20", "@types/node": "^24",
"@types/react": "^19", "@types/react": "^19",
"@types/react-dom": "^19", "@types/react-dom": "^19",
"eslint": "^9", "eslint": "^9",
"eslint-config-next": "15.3.2", "eslint-config-next": "15.5.2",
"eslint-config-prettier": "^10.1.8", "eslint-config-prettier": "^10.1.8",
"eslint-plugin-prettier": "^5.5.4", "eslint-plugin-prettier": "^5.5.4",
"jest": "^29.7.0", "jest": "^29.7.0",
"jest-environment-jsdom": "^29.7.0", "jest-environment-jsdom": "^29.7.0",
"prettier": "3.5.3", "prettier": "3.6.2",
"tailwindcss": "^4", "tailwindcss": "^4",
"ts-node": "^10.9.2", "ts-node": "^10.9.2",
"tw-animate-css": "^1.2.9", "tw-animate-css": "^1.2.9",

View file

@ -7,7 +7,7 @@ required-version = ">=0.7.0"
[project] [project]
name = "llama_stack" name = "llama_stack"
version = "0.2.19" version = "0.2.20"
authors = [{ name = "Meta Llama", email = "llama-oss@meta.com" }] authors = [{ name = "Meta Llama", email = "llama-oss@meta.com" }]
description = "Llama Stack" description = "Llama Stack"
readme = "README.md" readme = "README.md"
@ -31,9 +31,8 @@ dependencies = [
"huggingface-hub>=0.34.0,<1.0", "huggingface-hub>=0.34.0,<1.0",
"jinja2>=3.1.6", "jinja2>=3.1.6",
"jsonschema", "jsonschema",
"llama-stack-client>=0.2.19", "llama-stack-client>=0.2.20",
"llama-api-client>=0.1.2", "openai>=1.99.6",
"openai>=1.99.6,<1.100.0",
"prompt-toolkit", "prompt-toolkit",
"python-dotenv", "python-dotenv",
"python-jose[cryptography]", "python-jose[cryptography]",
@ -56,7 +55,7 @@ dependencies = [
ui = [ ui = [
"streamlit", "streamlit",
"pandas", "pandas",
"llama-stack-client>=0.2.19", "llama-stack-client>=0.2.20",
"streamlit-option-menu", "streamlit-option-menu",
] ]
@ -84,6 +83,7 @@ unit = [
"openai", "openai",
"aiosqlite", "aiosqlite",
"aiohttp", "aiohttp",
"psycopg2-binary>=2.9.0",
"pypdf", "pypdf",
"mcp", "mcp",
"chardet", "chardet",
@ -92,7 +92,7 @@ unit = [
"sqlalchemy[asyncio]>=2.0.41", "sqlalchemy[asyncio]>=2.0.41",
"blobfile", "blobfile",
"faiss-cpu", "faiss-cpu",
"pymilvus>=2.5.12", "pymilvus>=2.6.1",
"milvus-lite>=2.5.0", "milvus-lite>=2.5.0",
"litellm", "litellm",
"together", "together",
@ -105,12 +105,13 @@ unit = [
# separately. If you are using "uv" to execute your tests, you can use the "--group" flag to specify extra # separately. If you are using "uv" to execute your tests, you can use the "--group" flag to specify extra
# dependencies. # dependencies.
test = [ test = [
"openai", "openai>=1.100.0", # for expires_after support
"aiosqlite", "aiosqlite",
"aiohttp", "aiohttp",
"torch>=2.6.0", "torch>=2.6.0",
"torchvision>=0.21.0", "torchvision>=0.21.0",
"chardet", "chardet",
"psycopg2-binary>=2.9.0",
"pypdf", "pypdf",
"mcp", "mcp",
"datasets", "datasets",
@ -119,7 +120,7 @@ test = [
"sqlalchemy", "sqlalchemy",
"sqlalchemy[asyncio]>=2.0.41", "sqlalchemy[asyncio]>=2.0.41",
"requests", "requests",
"pymilvus>=2.5.12", "pymilvus>=2.6.1",
"milvus-lite>=2.5.0", "milvus-lite>=2.5.0",
"weaviate-client>=4.16.4", "weaviate-client>=4.16.4",
] ]
@ -144,7 +145,7 @@ docs = [
] ]
codegen = ["rich", "pydantic", "jinja2>=3.1.6"] codegen = ["rich", "pydantic", "jinja2>=3.1.6"]
benchmark = [ benchmark = [
"locust>=2.37.14", "locust>=2.39.1",
] ]
[project.urls] [project.urls]

View file

@ -8,6 +8,7 @@ from io import BytesIO
from unittest.mock import patch from unittest.mock import patch
import pytest import pytest
import requests
from llama_stack.core.datatypes import User from llama_stack.core.datatypes import User
@ -79,6 +80,88 @@ def test_openai_client_basic_operations(openai_client):
pass # ignore 404 pass # ignore 404
@pytest.mark.xfail(message="expires_after not available on all providers")
def test_expires_after(openai_client):
"""Test uploading a file with expires_after parameter."""
client = openai_client
uploaded_file = None
try:
with BytesIO(b"expires_after test") as file_buffer:
file_buffer.name = "expires_after.txt"
uploaded_file = client.files.create(
file=file_buffer,
purpose="assistants",
expires_after={"anchor": "created_at", "seconds": 4545},
)
assert uploaded_file.expires_at is not None
assert uploaded_file.expires_at == uploaded_file.created_at + 4545
listed = client.files.list()
ids = [f.id for f in listed.data]
assert uploaded_file.id in ids
retrieved = client.files.retrieve(uploaded_file.id)
assert retrieved.id == uploaded_file.id
finally:
if uploaded_file is not None:
try:
client.files.delete(uploaded_file.id)
except Exception:
pass
@pytest.mark.xfail(message="expires_after not available on all providers")
def test_expires_after_requests(openai_client):
"""Upload a file using requests multipart/form-data and bracketed expires_after fields.
This ensures clients that send form fields like `expires_after[anchor]` and
`expires_after[seconds]` are handled by the server.
"""
base_url = f"{openai_client.base_url}files"
uploaded_id = None
try:
files = {"file": ("expires_after_with_requests.txt", BytesIO(b"expires_after via requests"))}
data = {
"purpose": "assistants",
"expires_after[anchor]": "created_at",
"expires_after[seconds]": "4545",
}
session = requests.Session()
request = requests.Request("POST", base_url, files=files, data=data)
prepared = session.prepare_request(request)
resp = session.send(prepared, timeout=30)
resp.raise_for_status()
result = resp.json()
assert result.get("id", "").startswith("file-")
uploaded_id = result["id"]
assert result.get("created_at") is not None
assert result.get("expires_at") == result["created_at"] + 4545
list_resp = requests.get(base_url, timeout=30)
list_resp.raise_for_status()
listed = list_resp.json()
ids = [f["id"] for f in listed.get("data", [])]
assert uploaded_id in ids
retrieve_resp = requests.get(f"{base_url}/{uploaded_id}", timeout=30)
retrieve_resp.raise_for_status()
retrieved = retrieve_resp.json()
assert retrieved["id"] == uploaded_id
finally:
if uploaded_id:
try:
requests.delete(f"{base_url}/{uploaded_id}", timeout=30)
except Exception:
pass
@pytest.mark.xfail(message="User isolation broken for current providers, must be fixed.") @pytest.mark.xfail(message="User isolation broken for current providers, must be fixed.")
@patch("llama_stack.providers.utils.sqlstore.authorized_sqlstore.get_authenticated_user") @patch("llama_stack.providers.utils.sqlstore.authorized_sqlstore.get_authenticated_user")
def test_files_authentication_isolation(mock_get_authenticated_user, llama_stack_client): def test_files_authentication_isolation(mock_get_authenticated_user, llama_stack_client):

View file

@ -0,0 +1,422 @@
{
"request": {
"method": "POST",
"url": "http://0.0.0.0:11434/v1/v1/embeddings",
"headers": {},
"body": {
"model": "all-minilm:l6-v2",
"input": [
"How do systems learn automatically?"
],
"encoding_format": "float"
},
"endpoint": "/v1/embeddings",
"model": "all-minilm:l6-v2"
},
"response": {
"body": {
"__type__": "openai.types.create_embedding_response.CreateEmbeddingResponse",
"__data__": {
"data": [
{
"embedding": [
0.042499725,
-0.061890375,
-0.07846951,
0.006408736,
0.031287834,
0.008066364,
0.058032244,
0.025457833,
0.016401615,
0.04601607,
-0.028947692,
0.04452766,
0.056886304,
-0.0153307365,
-0.070184045,
-0.057157565,
-0.0768682,
0.0067744707,
0.0043326365,
-0.1236485,
0.0031424984,
-0.032562014,
-0.029376298,
0.024144078,
-0.028531333,
0.102257624,
0.0021518522,
-0.0069792354,
0.02530627,
-0.055496883,
0.031227645,
-0.0070384145,
0.08432449,
-0.028390806,
-0.083012834,
0.009549195,
-0.020060178,
-0.00240923,
-0.007700305,
-0.023067193,
-0.092922784,
-0.04261493,
-0.019990565,
0.008238936,
0.060982026,
0.05032288,
-0.051029027,
-0.008544468,
-0.030194579,
-0.035787255,
-0.17837463,
-0.047271743,
0.033892605,
0.031609993,
-0.0088130655,
0.10480617,
0.03355418,
0.09033605,
-0.01574583,
-0.012574861,
-0.08468548,
-0.114774585,
-0.13755703,
0.021649128,
0.047812033,
0.043242246,
0.008644588,
0.03873661,
0.046728984,
-0.07743038,
-0.0488837,
0.031276364,
0.022359744,
0.00040771137,
0.05229871,
-0.012229048,
-0.035172377,
-0.008257451,
-0.0088830395,
-0.034264818,
-0.045780584,
0.0024807125,
-0.040849846,
0.080489986,
0.09471281,
0.041345056,
0.005824089,
0.04501066,
0.025380718,
0.006616412,
0.010480027,
-0.07959875,
-0.03109039,
-0.035281006,
0.018305738,
0.053488795,
0.06565703,
-0.07258639,
0.025227,
0.10518925,
0.035734728,
0.02812301,
0.0116889635,
0.04420422,
0.012585445,
0.0018629873,
0.03925016,
0.043145437,
0.097845145,
-0.08803666,
-0.060626414,
0.026821595,
0.0041026343,
0.033468857,
0.011819169,
0.009573708,
-0.009524407,
-0.021213718,
-0.008906247,
0.029348776,
-0.012694493,
-0.019262077,
0.009897482,
-0.008127538,
0.018616533,
-0.00074092194,
-0.056122895,
-3.8021082e-33,
0.020863937,
0.0047333767,
0.019744372,
0.060233314,
-0.06857584,
-0.07498767,
0.007997102,
-0.04733539,
0.05782872,
0.049535874,
0.018785646,
0.032732572,
0.017672436,
0.074836925,
0.024971113,
-0.011844539,
-0.11211646,
0.007026034,
0.028080462,
-0.017474122,
0.0817653,
-0.007904061,
0.03210623,
-0.122978985,
0.03375521,
0.02587286,
-0.004479943,
0.07948923,
0.004065995,
0.033063736,
0.008058094,
0.013444748,
-0.032908894,
0.031558145,
0.040147394,
0.001501024,
0.030767068,
0.029500617,
0.041341957,
-0.047430623,
0.039448265,
-0.075250365,
0.037944954,
-0.026018769,
0.016939783,
0.013666865,
0.007116529,
-0.053848118,
-0.074419044,
-0.006100011,
0.024430456,
-0.03985037,
-0.02065548,
-0.033364378,
0.008992889,
0.12111313,
-0.028268464,
-0.03619572,
-0.021325285,
0.05334936,
0.051584847,
-0.01202104,
0.03557552,
0.054104213,
0.06071252,
0.071583234,
0.042997945,
0.008561662,
0.07422672,
0.008418425,
-0.036365964,
-0.008559546,
-0.08816671,
-0.04907638,
0.00028750877,
-0.051279917,
0.035895903,
-0.030404305,
-0.012635731,
0.018795075,
0.017144373,
-0.06645754,
0.023793342,
0.000993731,
-0.01938052,
-0.05343233,
-0.017068349,
-0.06219081,
-0.059607625,
-0.012196407,
-0.0131753115,
-0.03705957,
0.0008210978,
0.09808552,
0.024671523,
2.1774687e-33,
-0.010076338,
-0.016777446,
-0.042147383,
0.08836867,
-0.028899672,
-0.0048874663,
-0.08209485,
0.029246984,
-0.04308444,
-0.014178017,
-0.028403133,
0.025991142,
-0.017637307,
0.04654231,
-0.0057748524,
0.029987331,
0.011357778,
0.017457604,
0.055051018,
0.03222884,
-0.07999247,
0.032465667,
-0.060007077,
-0.011553406,
0.010223051,
0.04651086,
0.0011846055,
0.07870393,
-0.044612467,
0.032810863,
0.0023138348,
-0.03884047,
-0.017668914,
0.079135194,
-0.004594527,
0.043508377,
-0.031625524,
0.008872064,
-0.050121736,
0.06896808,
0.043688085,
0.019938715,
-0.08469436,
-0.046897292,
-0.006832939,
-0.026140738,
-0.05106749,
0.054356705,
0.030691773,
-0.010932293,
0.047189884,
-0.01740432,
-0.020789616,
-0.08175918,
-0.027700473,
0.035974283,
0.05395729,
0.04489479,
0.059698317,
0.041220855,
-0.066653565,
-0.09200203,
0.008937433,
0.02581428,
-0.03863856,
-0.0043950165,
-0.05208163,
0.02743701,
0.012093444,
0.048299577,
0.059836566,
0.09734695,
-0.053629622,
-0.07637932,
0.015765766,
-0.044513486,
-0.13213192,
-0.07024786,
-0.10133136,
-0.11906537,
-0.027716314,
0.0068639666,
-0.0053682425,
0.054165307,
-0.11115557,
0.07837099,
0.03506696,
0.016077982,
0.021501223,
-0.061516896,
0.007429458,
0.048352152,
-0.013604487,
0.012456823,
-0.12730241,
-1.40081795e-08,
-0.040906876,
-0.015950777,
0.060046297,
0.038068157,
0.066364,
0.04727011,
-0.01611309,
0.09689113,
-0.044232138,
-0.028793652,
-0.012945379,
0.01303288,
0.022385143,
0.047113802,
0.06399741,
0.12131601,
0.060635034,
0.102205545,
-0.07575499,
-0.02380431,
0.12489149,
-0.045490686,
0.09547224,
0.021274548,
0.0373141,
-0.07523771,
-0.0026329542,
0.047245234,
0.048495702,
0.12357625,
0.018002188,
0.013794,
-0.03588812,
-0.05179344,
0.061835315,
0.051598098,
0.008910207,
-0.12502904,
0.016457288,
-0.08591687,
-0.07110172,
0.06984138,
-0.036050156,
-0.005367899,
-0.048767615,
0.0008031624,
-0.021520091,
-0.061076768,
0.002495028,
-0.032736864,
0.045757275,
0.0389445,
-0.024670867,
0.025894105,
0.10298855,
-0.01300183,
0.04781103,
-0.071152866,
0.04602928,
0.08051811,
-0.10304887,
0.0844638,
0.028001137,
-0.036985613
],
"index": 0,
"object": "embedding"
}
],
"model": "all-minilm:l6-v2",
"object": "list",
"usage": {
"prompt_tokens": 6,
"total_tokens": 6
}
}
},
"is_streaming": false
}
}

View file

@ -0,0 +1,422 @@
{
"request": {
"method": "POST",
"url": "http://0.0.0.0:11434/v1/v1/embeddings",
"headers": {},
"body": {
"model": "all-minilm:l6-v2",
"input": [
"This is a test file 1"
],
"encoding_format": "float"
},
"endpoint": "/v1/embeddings",
"model": "all-minilm:l6-v2"
},
"response": {
"body": {
"__type__": "openai.types.create_embedding_response.CreateEmbeddingResponse",
"__data__": {
"data": [
{
"embedding": [
-0.055977955,
0.075997174,
-0.09249559,
0.014318654,
0.05876127,
-0.032458965,
0.020946832,
0.028819378,
-0.06590933,
0.013517223,
0.13000485,
0.0045786807,
-0.0069082035,
-0.055431433,
-0.04756826,
-0.02912152,
-0.12239366,
-0.05359766,
-0.014712379,
0.059826344,
0.034466766,
0.02072927,
-0.048724595,
0.013531463,
0.05862551,
-0.0030636105,
-0.031532496,
0.08256397,
-0.031230088,
-0.12059464,
0.03833127,
0.06573049,
0.064165965,
0.03838281,
0.12570563,
0.031128457,
0.10817016,
-0.001977333,
-0.024726717,
0.028785817,
0.012688804,
-0.039854225,
0.043296516,
-0.015909227,
-0.013514834,
-0.005097704,
-0.007898244,
0.0397803,
0.0037018042,
-0.03366439,
-0.058511946,
0.0048645996,
-0.08961216,
-0.010436317,
0.05919557,
-0.020386472,
0.014281465,
0.013961121,
-0.0045877,
0.03835435,
0.004833604,
0.029750798,
-0.02082645,
0.018628312,
0.124215424,
-0.023262355,
-0.0403046,
-0.023597443,
-0.0074503124,
-0.09082856,
-0.16860788,
0.010149646,
-0.03580583,
0.0105862,
-0.02046927,
0.0021231866,
-0.109239034,
0.007925489,
0.048885852,
-0.11390797,
-0.060719617,
-0.13435687,
0.006331373,
-0.008848544,
-0.031521764,
0.09917924,
0.055304468,
0.0068802955,
-0.023466706,
-0.0031231036,
0.036759574,
0.014334804,
0.022158744,
0.04709372,
0.007092632,
0.06810656,
0.018511463,
0.040857043,
0.05504883,
0.09488118,
-0.01585433,
-0.000100159355,
0.01078331,
0.09177411,
-0.07465409,
-0.064712845,
0.070150875,
-0.044969488,
0.057672877,
-0.026067073,
0.0063218353,
-0.094980195,
-0.010527798,
-0.07887331,
0.039760627,
-0.041514914,
-0.055244483,
0.07536157,
-0.046700213,
0.03613181,
0.08028084,
-0.03635332,
-0.034757905,
0.0169972,
-0.04701302,
-0.06517364,
0.06215512,
-4.2211668e-33,
-0.001730556,
-0.09387539,
-0.029811831,
0.12576838,
0.03797533,
-0.036525473,
0.0060974187,
0.059078563,
-0.110772625,
0.005687099,
-0.025972685,
-0.074838035,
0.0083624,
0.0274395,
-0.052505072,
0.023982009,
-0.004383019,
0.03933067,
-0.0421536,
-0.0273022,
0.05469264,
0.027077684,
-0.033308104,
-0.060588703,
-0.050718505,
0.017972048,
-0.003501518,
-0.046666663,
0.073935315,
0.01332508,
-0.003336597,
-0.04653879,
-0.060137972,
0.034129404,
0.0015396234,
0.03913038,
0.039914686,
-0.012313295,
-0.03049878,
-0.001898293,
-0.014593095,
-0.013025945,
0.019526742,
-0.022328524,
0.07434842,
-0.05336983,
-0.02397039,
0.029210743,
0.027515827,
0.015095782,
-0.020450259,
0.043337505,
0.019659057,
0.01736381,
-0.0035567854,
0.019467248,
-0.0003600355,
0.0004236338,
-0.0051459596,
0.06621258,
0.027880289,
0.04102983,
-0.06717971,
0.028754033,
-0.03474935,
-0.055536743,
-0.032726888,
-0.08101375,
0.092146546,
0.06396539,
-0.04917468,
-0.039915428,
0.036926597,
-0.0015941713,
0.00030078198,
-0.026029347,
-0.006002226,
0.0547852,
-0.0956802,
-0.05187664,
-0.048835263,
-0.08641023,
-0.033999704,
-0.033261146,
-0.05655725,
-0.051167108,
0.008072844,
-0.08582387,
0.06508922,
-0.08545701,
0.027998457,
0.029824113,
-0.031671796,
-0.08560477,
0.101766,
2.1853336e-33,
0.011631667,
0.07766936,
-0.017357787,
0.00522221,
0.0009766584,
0.06540673,
0.07256414,
-0.044297714,
-0.04751489,
0.14031266,
-0.02573919,
0.005799934,
0.040961996,
-0.054869186,
0.074385494,
-0.023611594,
0.018366067,
-0.06055796,
-0.04411962,
0.0027609242,
-0.0457808,
0.11723751,
0.10269976,
0.079064004,
-0.046609085,
0.018625101,
0.02980095,
0.037249736,
0.022749124,
-0.002641677,
0.04173634,
0.06440922,
-0.08910874,
0.018179348,
0.024035122,
-0.09641835,
0.086450025,
-0.053884093,
0.01923183,
0.045059275,
0.045154754,
0.096540354,
0.014918263,
0.05959024,
0.03068157,
0.05884942,
0.11149687,
0.01664536,
0.011553633,
-0.023707153,
-0.008613074,
-0.055065807,
0.047565654,
-0.014617207,
-0.01412784,
0.06996046,
0.032047763,
0.04266437,
-0.053910665,
0.031057829,
0.009195878,
0.032976385,
-0.018986467,
0.00552569,
-0.014989692,
-0.09192638,
-0.032122552,
0.015356909,
0.02916829,
0.012490537,
-0.00481679,
0.02338388,
-0.028228622,
-0.0845363,
0.051079277,
-0.013396008,
-0.029029451,
-0.022589581,
0.010921808,
-0.009802942,
0.049751375,
-0.0032863966,
-0.038782034,
0.027910566,
0.017915333,
0.005342976,
0.058715835,
0.0958275,
-0.014351606,
0.006968306,
-0.027336437,
0.06917409,
0.057280898,
0.032035258,
0.004253816,
-1.6765805e-08,
-0.03635166,
-0.091484524,
-0.026345165,
-0.007943707,
-0.024149738,
0.09897989,
-0.04723456,
-0.037648056,
-0.029387534,
-0.022535043,
0.041274313,
-0.001120282,
-0.05565933,
0.020671127,
-0.03811821,
-0.052506164,
-0.026291005,
-0.053353462,
-0.040578876,
-0.0073704817,
-0.0014502247,
0.027114222,
0.02715861,
0.009327082,
-0.0002262999,
0.038208842,
0.037102137,
0.08402326,
-0.063428074,
-0.014857683,
0.0503535,
0.06702617,
0.027663387,
-0.04361141,
-0.012074137,
0.08499847,
0.11162084,
0.10458964,
0.019746903,
-0.0002763885,
-0.041129645,
0.009574697,
-0.05287082,
-0.0026483443,
-0.031138659,
-0.08863464,
-0.06762413,
-0.074503295,
-0.053003356,
-0.09557731,
-0.052699838,
0.013066509,
0.0029109598,
0.041860294,
-0.045234714,
0.01671661,
0.017218111,
0.021572877,
-0.037175495,
0.023540929,
0.051999625,
0.064441204,
0.023920247,
-0.025235547
],
"index": 0,
"object": "embedding"
}
],
"model": "all-minilm:l6-v2",
"object": "list",
"usage": {
"prompt_tokens": 6,
"total_tokens": 6
}
}
},
"is_streaming": false
}
}

View file

@ -0,0 +1,422 @@
{
"request": {
"method": "POST",
"url": "http://0.0.0.0:11434/v1/v1/embeddings",
"headers": {},
"body": {
"model": "all-minilm:l6-v2",
"input": [
"The secret string is foobazbar."
],
"encoding_format": "float"
},
"endpoint": "/v1/embeddings",
"model": "all-minilm:l6-v2"
},
"response": {
"body": {
"__type__": "openai.types.create_embedding_response.CreateEmbeddingResponse",
"__data__": {
"data": [
{
"embedding": [
-0.060643002,
0.063731536,
-0.059394535,
-0.010293381,
-0.119798504,
0.033409704,
0.056838214,
-0.006487789,
0.029893834,
-0.05035498,
0.015207984,
-0.0634482,
0.015118864,
-0.08356639,
0.009297568,
0.04425259,
-0.02442732,
-0.050995167,
-0.028106945,
-0.07392448,
0.070876844,
0.08103935,
0.006026678,
-0.043081142,
0.010737864,
-0.01581646,
0.035146058,
0.06534572,
0.036411658,
-0.056240093,
0.073675275,
0.047330413,
0.06715632,
-0.012079616,
-0.018175518,
0.0042696777,
0.029169064,
0.006755428,
0.037944797,
0.002459526,
0.014023556,
0.022665394,
-0.09053435,
0.041958958,
-0.0793576,
0.032003723,
-0.03836551,
0.037002493,
-0.0036971096,
-0.017005432,
0.036977224,
-0.077020966,
-0.020112924,
0.07730264,
0.04523538,
-0.007810078,
-0.005882345,
0.009965143,
0.033477366,
0.08996437,
0.016154636,
0.03699466,
-0.03920663,
-0.010970169,
0.023925098,
-0.036968958,
-0.008223206,
0.018760787,
-0.000688964,
-0.061974872,
-0.030354673,
-0.03764463,
-0.046544887,
0.03845807,
-0.010353121,
-0.032976467,
0.013553099,
-0.059050683,
0.06307999,
0.015977552,
-0.048430033,
-0.06991109,
-0.022508044,
0.04406567,
0.036172677,
0.060487013,
-0.04315455,
0.028775847,
0.006216682,
0.01028539,
-0.07873024,
-0.091566674,
0.043936655,
0.013187522,
-0.0037702306,
0.010252617,
0.020211454,
0.056324948,
-0.09704479,
0.06579238,
0.047095913,
0.018813917,
0.124447405,
-0.064461194,
-0.012602576,
0.016044088,
0.0860477,
0.02487444,
0.106261514,
-0.043173406,
-0.04631391,
-0.031489294,
-0.0018045203,
-0.0234808,
-0.050789703,
0.0046832566,
0.04323459,
0.057140227,
-0.065862894,
0.032980002,
-0.028766194,
0.03784897,
0.0002090952,
0.04331736,
-0.13265643,
0.026365368,
-0.042440306,
-3.335036e-33,
-0.0022078454,
0.050638728,
0.028040074,
-0.0339003,
-0.004550283,
-0.034626767,
-0.086259365,
0.04313123,
0.010241412,
0.04403283,
-0.030186933,
-0.0935834,
-0.06522679,
-0.059730206,
0.037564293,
-0.025941465,
-0.06653215,
0.004382199,
0.018841932,
-0.03557901,
0.022377534,
0.0894181,
0.033572253,
-0.11379638,
0.038214155,
-0.0444022,
0.10258949,
-0.07330576,
0.089417316,
0.05668133,
-0.009440494,
-0.06464684,
0.016628003,
0.0073475256,
0.00518807,
0.0051437207,
-0.013597164,
-0.04918519,
-0.06671375,
0.010821772,
0.04635121,
-0.11489337,
-0.055055846,
0.040418062,
-0.0327241,
0.034979116,
-0.02358068,
-0.012229059,
0.048057053,
0.011607797,
0.00786425,
0.038057882,
-0.027768329,
0.0033014645,
-0.0033301115,
0.006048222,
0.031986434,
0.04835162,
0.013795478,
0.03616475,
-0.022675272,
0.09197521,
0.029851481,
0.08111755,
-0.086777106,
-0.028026069,
0.055648096,
-0.030405777,
-0.016515536,
0.031827636,
-0.07586154,
-0.009904298,
0.028109884,
0.0022400685,
-0.104984276,
-0.023682386,
-0.02420211,
-0.00031999213,
0.0016354885,
-0.037583202,
0.02554201,
-0.052216183,
0.021622796,
0.099114954,
-0.06895898,
-0.018579148,
0.072459795,
-0.10584089,
-0.08503219,
-0.030006522,
-0.01574946,
-0.056850888,
-0.02701468,
-0.06409775,
0.0057065156,
1.2905196e-33,
0.054916188,
-0.036421828,
-0.0023367621,
-0.03591332,
0.10682448,
-0.049314465,
0.037890658,
0.05061744,
-0.08387186,
-0.018746993,
0.0036053627,
0.029014338,
-0.0028278087,
-0.036458995,
0.11148448,
0.050991904,
0.040261153,
0.092449345,
-0.013685468,
-0.07097927,
-0.043229934,
-0.060135942,
-0.030182164,
0.009103864,
-0.04419895,
0.04841717,
0.1172092,
-0.009820357,
0.0024167346,
0.0933731,
-0.059857536,
0.010170529,
-0.03779587,
-0.043445412,
-0.14679031,
-0.022706114,
-0.008936355,
-0.021539144,
-0.021903422,
-0.06614074,
0.016270082,
0.062619805,
0.010576195,
0.04721768,
-0.08721729,
0.009404518,
-0.017676886,
-0.03845903,
0.01042728,
0.022961272,
0.099522196,
-0.021459235,
0.0017192952,
-0.039389413,
0.01643467,
0.03967745,
-0.11970654,
0.009909872,
0.0038936618,
0.018281214,
-0.045416683,
0.002060889,
0.024235422,
0.016998425,
0.06879841,
-0.027463643,
-0.018185377,
0.053853985,
-0.02881535,
-0.04521435,
0.114714146,
0.01980149,
-0.057876598,
0.01657406,
-0.073635235,
0.040253133,
-0.015108487,
0.0066914097,
-0.049663424,
0.04593752,
0.077961996,
-0.042919736,
0.021851214,
0.06381258,
0.08111257,
-0.07067202,
-0.032432877,
0.09261935,
-0.020485587,
0.070126526,
-0.020741673,
0.09339737,
-0.05117133,
0.039423097,
0.025603252,
-1.676899e-08,
0.0015320816,
0.008086889,
-0.017632706,
-0.0340569,
0.068081565,
0.07389828,
-0.07586309,
-0.1137352,
-0.02203125,
0.00911275,
0.031093195,
-0.005707322,
-0.046190932,
0.0037106895,
0.013285116,
-0.03215832,
-0.05558973,
-0.010595662,
0.0067340815,
-0.025494263,
-0.08369286,
0.08884646,
0.0051370384,
-0.051632546,
-0.051877208,
0.039703675,
-0.042113848,
0.05714819,
0.088881046,
0.049764536,
0.04144229,
0.09467376,
-0.037112173,
-0.06844063,
-0.061656013,
0.09893085,
-0.059514027,
-0.033182237,
-0.026037138,
0.07761722,
0.05612508,
0.010711438,
0.018973859,
0.056075387,
-0.04172223,
-0.02732456,
0.101854175,
-0.036197703,
-0.029915968,
-0.043326378,
0.043677974,
0.018775862,
-0.0042756326,
0.055917986,
-0.0034246107,
0.0602753,
-0.13372745,
0.008189692,
-0.031539913,
0.022382092,
0.037938736,
0.024559673,
0.068045974,
0.07020884
],
"index": 0,
"object": "embedding"
}
],
"model": "all-minilm:l6-v2",
"object": "list",
"usage": {
"prompt_tokens": 9,
"total_tokens": 9
}
}
},
"is_streaming": false
}
}

View file

@ -0,0 +1,422 @@
{
"request": {
"method": "POST",
"url": "http://0.0.0.0:11434/v1/v1/embeddings",
"headers": {},
"body": {
"model": "all-minilm:l6-v2",
"input": [
"Python programming language"
],
"encoding_format": "float"
},
"endpoint": "/v1/embeddings",
"model": "all-minilm:l6-v2"
},
"response": {
"body": {
"__type__": "openai.types.create_embedding_response.CreateEmbeddingResponse",
"__data__": {
"data": [
{
"embedding": [
-0.06384743,
0.013436034,
-0.054533605,
0.011913119,
-0.074255615,
-0.13346045,
0.04293264,
0.045415178,
-0.069499195,
-0.03594047,
0.012013141,
0.0068701585,
0.088894635,
0.0025958198,
0.03248322,
-0.00781389,
-0.05045716,
0.0066499636,
0.02780642,
-0.1278895,
0.00061722804,
0.04524771,
-0.036062278,
0.044238217,
0.012931149,
-0.009267752,
0.011908537,
0.026050908,
0.020050693,
-0.033657826,
-0.028060015,
0.08754526,
0.059001748,
0.053905424,
0.020296838,
0.06843132,
-0.031828973,
-0.08757766,
-0.11278083,
0.022646705,
-0.09042749,
-0.0033280335,
-0.04013833,
-0.03408772,
-0.032974605,
0.029246835,
-0.03902113,
0.045517426,
-0.0331051,
-0.006541718,
-0.09631428,
-0.011705091,
-0.052590065,
-0.064790964,
0.03107029,
-0.012614695,
0.0973954,
0.0052277497,
-0.035061166,
-0.14041117,
-0.06678556,
0.03656035,
-0.039271023,
0.070130296,
-0.001007227,
-0.026842492,
-0.017554138,
0.030476976,
0.0640168,
-0.03162716,
-0.1459817,
-0.04540497,
-0.018482737,
0.06690258,
0.030561155,
-0.12253459,
0.06106281,
-0.05676725,
-0.005102081,
-0.008781471,
0.0065009934,
-0.016409436,
-0.033660814,
0.084904715,
-0.000299427,
-0.073421866,
0.038623117,
0.15695204,
0.010100481,
0.025317656,
-0.0021393092,
-0.046127863,
0.062426485,
-0.019896954,
-0.054696236,
0.097949564,
0.038487267,
-0.072427474,
-0.038710196,
0.07158003,
0.0073204385,
-0.051196836,
0.031370413,
-0.032227658,
0.03930787,
-0.009667071,
0.06993779,
-0.052014988,
0.049430363,
-0.04273174,
-0.003752437,
-0.041564792,
-0.056199003,
-0.033390746,
0.05104195,
0.038621522,
-0.002969481,
0.08187672,
-0.0035807535,
0.045314044,
0.0068791825,
0.016496154,
0.016330697,
0.007280202,
-0.021685049,
-0.004648767,
-0.007916633,
-4.153803e-33,
-0.045814347,
-0.050876923,
-0.038647644,
0.010091659,
0.0700144,
-0.025181346,
0.10506424,
-0.0049788426,
-0.0641887,
-0.047635607,
0.012736192,
0.051960304,
-0.0160108,
0.08172301,
0.023975011,
-0.02088898,
0.04570414,
0.09154945,
0.025109906,
0.019044904,
0.048153024,
0.097932264,
0.034160685,
0.035437047,
0.0114016645,
-0.043437798,
-0.0041986653,
-0.055648174,
0.011477498,
0.0071031414,
-0.06427046,
-0.02060021,
-0.004527582,
-0.012953201,
0.026594209,
-0.012370914,
0.008425176,
-0.06823755,
0.046840925,
-0.041645527,
-0.025629306,
-0.0038959885,
0.050076205,
-0.008090696,
-0.023280276,
0.023890443,
0.0015592615,
0.04615769,
-0.06899702,
0.041591667,
0.0045278594,
-0.047615696,
0.054234404,
0.06972373,
-0.016879166,
0.04805917,
0.012710964,
0.0022028312,
-0.00632154,
-0.03153454,
0.02372792,
0.06859583,
0.07721348,
-0.012276763,
0.039006572,
0.03434665,
0.030310014,
0.058712285,
0.08029841,
0.06976497,
-0.09046315,
0.02376487,
-0.008737595,
0.038339745,
-0.027534455,
0.02316122,
0.027078442,
-0.081344925,
-0.010344974,
0.04727033,
-0.020315375,
-0.025998361,
-0.017408848,
-0.0035885328,
-0.018698875,
-0.0374002,
0.041077297,
0.05317115,
-0.00557377,
-0.058558866,
-0.07202089,
-0.0750218,
0.04825297,
0.011333554,
-0.022591913,
1.3509705e-33,
0.006217277,
0.03161211,
-0.036121942,
-0.0016698099,
-0.08257381,
-0.060688194,
0.059951965,
0.014476651,
0.05951137,
0.027058002,
-0.0116078025,
-0.05761336,
0.103633516,
-0.0028178988,
0.07695233,
0.019430202,
-0.052228313,
0.015157555,
-0.001314194,
0.027793957,
-0.11528974,
0.047293015,
-0.075984485,
-0.07435121,
-0.029174728,
-0.020066952,
-0.03471861,
-0.057671476,
-0.030140208,
0.047475602,
0.0122009255,
0.011492795,
-0.051974766,
0.059714273,
0.03282909,
0.0013831124,
0.0577218,
-0.04120374,
-0.021517176,
-0.0067665633,
0.14197157,
0.057943344,
0.010075872,
0.096026145,
0.014512136,
0.021362338,
-0.07552857,
0.07883896,
-0.042723794,
-0.06604244,
-0.03871113,
-0.008144072,
0.014999539,
-0.049409784,
-0.037078433,
-0.023772687,
0.03742616,
0.008203275,
-0.08696922,
-0.05963844,
-0.07733288,
-0.056535304,
0.029040048,
0.007370859,
-0.07786975,
0.0025485628,
-0.10403352,
-0.04738507,
-0.015877869,
-0.11589796,
0.09726567,
0.0049555353,
-0.010271941,
0.0066397907,
-0.060328998,
0.025491165,
-0.052938554,
-0.0038485127,
-0.050254337,
0.07681007,
0.046079025,
0.0074015437,
0.0047005047,
0.07386609,
-0.077935226,
0.001350664,
0.01371514,
0.056624677,
0.021921877,
0.0072018835,
0.0076770596,
0.1022247,
0.06007294,
0.036791492,
-0.03775615,
-1.1873974e-08,
-0.008835198,
0.017599683,
0.0622159,
0.03203167,
-0.011572803,
0.051924217,
-0.011727461,
-0.06392444,
-0.029854134,
0.03257704,
0.005516639,
-0.012049206,
-0.054406274,
-0.056717165,
-0.030638915,
0.14277336,
0.028553458,
-0.028731374,
0.019938445,
0.025647435,
0.07379124,
-0.006680472,
0.0061455644,
0.09610866,
-0.0880125,
-0.00892061,
0.038242683,
0.04831363,
0.018802335,
-0.10537713,
0.048258167,
-0.022250284,
0.020506755,
0.014618206,
0.03079222,
-0.029113656,
0.008291428,
-0.045047753,
0.002552782,
0.02174108,
-0.0081180185,
0.009036818,
-0.013369313,
-0.014042713,
0.06843612,
0.045168996,
-0.034600396,
-0.07275618,
-0.0041681295,
-0.05823282,
-0.03303698,
0.0040505864,
-0.020017866,
-0.020105122,
0.05537091,
0.102509096,
-0.10799596,
-0.013787153,
-0.009659191,
0.015613784,
-0.031229256,
0.13294649,
0.15243623,
-0.022428894
],
"index": 0,
"object": "embedding"
}
],
"model": "all-minilm:l6-v2",
"object": "list",
"usage": {
"prompt_tokens": 3,
"total_tokens": 3
}
}
},
"is_streaming": false
}
}

View file

@ -0,0 +1,422 @@
{
"request": {
"method": "POST",
"url": "http://0.0.0.0:11434/v1/v1/embeddings",
"headers": {},
"body": {
"model": "all-minilm:l6-v2",
"input": [
"What is the secret string?"
],
"encoding_format": "float"
},
"endpoint": "/v1/embeddings",
"model": "all-minilm:l6-v2"
},
"response": {
"body": {
"__type__": "openai.types.create_embedding_response.CreateEmbeddingResponse",
"__data__": {
"data": [
{
"embedding": [
-0.07471535,
0.08136051,
-0.0646403,
0.011820692,
-0.074530184,
0.02182932,
0.077565186,
0.012791591,
0.05854512,
-0.014144753,
0.054007743,
-0.026551379,
-0.018058892,
-0.060439672,
-0.019246193,
-0.0065063615,
-0.047261372,
-0.048988443,
-0.0904866,
-0.066554815,
0.09284568,
0.021294983,
-0.013393054,
-0.0066470345,
0.008009612,
0.016829057,
0.039714802,
0.021865955,
0.014889775,
-0.039430078,
0.025233349,
-0.036833033,
0.016638417,
0.008795953,
-0.05348616,
0.0361554,
-0.034618407,
-0.009877053,
0.064839765,
-0.015148702,
0.020900138,
-0.07136567,
-0.008516019,
0.051174764,
-0.06211658,
0.059481908,
-0.047928233,
0.07046077,
-0.024866259,
-0.010772497,
0.06539378,
-0.03691645,
-0.08241172,
0.081707805,
0.017110538,
0.0129555175,
-0.047113538,
0.0025686903,
0.008714549,
0.09987858,
0.0496949,
-0.025898866,
-0.017353507,
0.03393223,
0.038376898,
-0.054239143,
0.00860024,
-0.040809266,
0.02656175,
-0.071856335,
-0.019946808,
-0.041174017,
-0.07246157,
0.00040759498,
0.018743936,
0.023058625,
0.0166551,
-0.063356385,
0.034956083,
0.05005474,
0.00041865162,
-0.06177827,
0.006278017,
0.11141626,
0.0040813377,
0.08571246,
0.023260446,
0.057005797,
-0.03149278,
-0.013331491,
-0.04513824,
-0.11731193,
0.0160608,
-0.016902346,
-0.028950376,
0.03577902,
-0.051558092,
0.03297068,
-0.11266136,
0.06640369,
0.037849367,
0.022930682,
0.05809001,
-0.03963197,
-0.03245654,
0.01767903,
-0.005010206,
0.019044327,
0.07743703,
-0.020407042,
-0.020311069,
-0.00953332,
0.003143125,
-0.00456264,
-0.02911311,
0.03384037,
0.00048523775,
0.06419016,
0.01071009,
0.124172516,
-0.0053817774,
0.004929672,
-0.059669737,
0.029508028,
-0.13410243,
0.016187606,
-0.048119176,
-6.608228e-33,
0.012317927,
0.060396116,
0.036468223,
-0.035990786,
-0.041977834,
0.01232469,
-0.08480998,
0.012524896,
0.027948672,
0.086107045,
-0.030785998,
-0.06136775,
-0.0009515558,
-0.025208496,
0.045449734,
-0.027582139,
-0.0095786555,
0.0067018326,
0.043680843,
-0.021498295,
0.003277214,
0.11862199,
0.047027264,
-0.13488089,
0.025457613,
-0.010294456,
0.0022531834,
-0.061856117,
0.10388324,
0.01866347,
-0.0017658875,
-0.051914714,
0.04644036,
0.037606996,
0.03376949,
0.006641087,
0.022004316,
-0.07835444,
-0.008207682,
0.027414316,
0.0173955,
-0.075223684,
0.006482484,
0.02727821,
0.00059299107,
-0.010945533,
-0.020044776,
-0.000120837554,
0.013701114,
0.004716937,
0.02277811,
0.015490094,
-0.0142633,
-0.013935009,
0.015847908,
-0.02308094,
0.033789054,
-0.039197993,
-0.043216396,
0.029982513,
-0.016503252,
0.0698185,
0.046076864,
0.053330805,
-0.055297256,
0.025112566,
0.014026739,
-0.09400958,
0.035901215,
0.029467817,
-0.1319919,
-0.0050726864,
-0.037837584,
-0.0318086,
-0.09549526,
-0.027866103,
0.002436243,
-0.007881375,
0.058288272,
-0.031986125,
-0.0607737,
-0.023380116,
-0.00047972053,
0.13766052,
-0.060590804,
-0.008125084,
-0.03488867,
-0.102469996,
-0.009079019,
-0.018955158,
-0.0016528872,
-0.07709843,
-0.043352164,
-0.03619871,
0.039568264,
3.0214064e-33,
0.0050480226,
0.00017108663,
-0.063063554,
0.012236582,
0.10636841,
0.015972469,
0.0066562137,
0.018790383,
-0.047090903,
0.04585031,
0.007611995,
0.032441676,
0.03210589,
-0.02090312,
0.106981054,
0.0075532557,
0.036063127,
0.14623925,
0.037788242,
-0.043172225,
-0.02176524,
-0.009350843,
-0.06982138,
0.015577218,
0.02114412,
0.030659605,
0.084352896,
-0.09288308,
0.00815284,
0.07806744,
-0.0816394,
0.011901701,
0.017101644,
0.0040163086,
-0.14144793,
0.0040214215,
0.04631442,
0.008958798,
-0.0056624487,
-0.055584785,
0.028006915,
0.055925272,
0.062281866,
0.0860523,
-0.12157215,
0.021931145,
-0.0050777225,
0.029814675,
-0.012117963,
0.048798613,
0.06408485,
-0.041422654,
0.018091682,
-0.028209666,
-0.021357967,
0.055625696,
-0.15479031,
0.027474454,
0.018845506,
0.04327976,
0.011504344,
0.017370872,
-0.023188887,
0.050985955,
0.029468553,
0.012529372,
-0.045431048,
-0.00222149,
-0.05612193,
-0.07891998,
0.0796125,
-0.02043551,
-0.076230876,
0.011581566,
-0.035624538,
-0.0480372,
-0.066065714,
-0.057384264,
-0.040163297,
0.071754575,
0.031339016,
0.023032097,
-0.023996511,
0.023609873,
0.09607155,
-0.06843605,
0.014263025,
0.088031664,
-0.037747264,
0.029464351,
-0.028663024,
0.10216597,
-0.06609628,
0.0228385,
0.04214049,
-1.4813483e-08,
0.030838875,
0.043892786,
-0.024579313,
-0.09817689,
0.0566737,
0.09298153,
-0.010350536,
-0.09840461,
0.018022444,
-0.0131554445,
0.026413994,
0.00880124,
-0.052855253,
-0.04217533,
0.030118503,
0.017092122,
-0.06243192,
-0.018758481,
-0.015982535,
-0.018381983,
-0.026471734,
0.010303105,
-0.03048123,
-0.08456848,
-0.054054197,
0.0100427205,
0.029534454,
0.1355571,
0.033424437,
0.12097715,
0.04077808,
0.0081999,
-0.018245617,
-0.056846414,
-0.12899645,
0.12415884,
-0.053460255,
-0.038143307,
0.030224878,
0.019799955,
0.047839224,
0.029400205,
0.0015434423,
0.06115486,
-0.055583358,
-0.030215869,
0.10799345,
-0.07073566,
-0.08214588,
0.0045075943,
-0.0155852465,
-0.013693905,
-0.00234985,
0.026380839,
-0.015793327,
0.016262477,
-0.040624544,
-0.013973127,
-0.08311349,
0.03198475,
0.05000169,
-0.0038599824,
0.07030323,
0.0049196184
],
"index": 0,
"object": "embedding"
}
],
"model": "all-minilm:l6-v2",
"object": "list",
"usage": {
"prompt_tokens": 6,
"total_tokens": 6
}
}
},
"is_streaming": false
}
}

View file

@ -0,0 +1,422 @@
{
"request": {
"method": "POST",
"url": "http://0.0.0.0:11434/v1/v1/embeddings",
"headers": {},
"body": {
"model": "all-minilm:l6-v2",
"input": [
"Python is a high-level programming language with code readability and fewer lines than C++ or Java"
],
"encoding_format": "float"
},
"endpoint": "/v1/embeddings",
"model": "all-minilm:l6-v2"
},
"response": {
"body": {
"__type__": "openai.types.create_embedding_response.CreateEmbeddingResponse",
"__data__": {
"data": [
{
"embedding": [
-0.07649938,
0.021244217,
-0.036287725,
-0.0011695292,
-0.048568938,
-0.13184524,
-0.08424354,
0.059378363,
-0.06171173,
-0.009400254,
-0.08092405,
0.05547966,
0.05243954,
0.026002606,
0.06304219,
-0.062263194,
-0.06520713,
-0.022376515,
0.017407224,
-0.11619268,
-0.03641897,
0.04050772,
-0.032505907,
-0.017739171,
0.057254575,
0.012360873,
-0.018550506,
-0.029990712,
0.00235547,
0.0067841834,
-0.088615544,
0.07800687,
0.037015557,
0.029492933,
-0.019656634,
0.054334868,
-0.0006793985,
-0.08961444,
-0.05305694,
-0.012659472,
-0.0860912,
0.07697376,
-0.038515005,
-0.011632789,
-0.032334387,
-0.0075316867,
-0.024749892,
-0.068094365,
-0.030428912,
-0.02603917,
-0.09692951,
0.009892155,
-0.05358676,
-0.09094546,
-0.009154104,
-0.008819028,
0.048186116,
-0.0033502842,
-0.005917261,
-0.13302499,
-0.09727019,
0.013533918,
0.047219984,
0.062738694,
-0.01572617,
-0.037660386,
-0.016604222,
0.029844316,
0.093244925,
-0.06728843,
-0.13382566,
-0.020838322,
-0.025856238,
0.11628718,
0.0306645,
-0.10493003,
0.038982447,
-0.010721579,
-0.0013596424,
0.020682583,
0.0018240656,
0.027716527,
-0.078466296,
0.10784201,
0.029109064,
-0.05404029,
0.030583676,
0.07008342,
-0.03429503,
0.009839805,
0.03469849,
-0.042428855,
0.06508966,
0.026623009,
-0.032148074,
0.07619082,
0.020044614,
-0.030803965,
-0.071872465,
0.027219178,
-0.018790914,
-0.0541197,
0.07494771,
0.01770988,
0.03380063,
0.024214497,
0.09087066,
-0.052000217,
0.04061227,
-0.018418813,
-0.012485012,
-0.06401856,
-0.023183277,
-0.06190061,
0.053444423,
0.047886662,
-0.010557972,
0.078470305,
0.03581419,
0.02720849,
0.022449464,
-0.004947443,
-0.024473231,
0.003690138,
0.00033914045,
-0.00892056,
0.00927688,
2.0050864e-34,
-0.03232352,
-0.0242469,
0.02715213,
0.021707827,
0.06515407,
-0.019538436,
0.0531206,
0.007928102,
-0.039223887,
-0.020031622,
0.007848442,
0.02391591,
0.014990736,
0.11268782,
0.06107525,
-0.011977935,
0.016781967,
0.045509085,
0.0013573953,
0.009146736,
0.013215661,
-0.01195797,
0.02703829,
0.007053157,
0.022530165,
-0.013689941,
-0.004301088,
-0.0007768117,
0.033448935,
0.011239952,
-0.05143586,
-0.07399211,
-0.031036023,
0.019600574,
-0.0103345895,
-0.0029444918,
-0.0047988347,
-0.10445514,
0.034700666,
-0.024362778,
-0.0471351,
0.03554556,
0.037065983,
-0.016996143,
0.005622871,
0.050610665,
-0.008597168,
0.0059816362,
-0.12275667,
0.03674253,
-0.022365745,
-0.00964108,
0.07596107,
0.08905326,
0.016492268,
0.044219263,
0.06803503,
0.06454952,
-0.050047003,
-0.0017108961,
-0.00074994087,
0.09930796,
0.09251372,
-0.011378917,
0.050366722,
0.07712465,
0.009745006,
0.1009996,
0.03286012,
0.064262226,
-0.044561703,
0.038564857,
-0.019407123,
0.03742708,
-0.0017875227,
0.011954917,
0.01135132,
-0.10406638,
0.06980167,
0.019202363,
-0.028420014,
-0.0136866,
0.048647687,
-0.015362756,
-0.034191117,
-0.055556074,
0.0050155777,
0.025966194,
-0.0009168385,
-0.0042535486,
-0.06399157,
-0.059880342,
0.081461415,
0.014113321,
-0.038159303,
-2.1536519e-33,
-0.027272146,
-0.034751415,
-0.024606032,
0.026892362,
-0.09076156,
-0.045825478,
0.01362092,
0.0023044816,
0.054052215,
0.032981824,
-0.029818065,
-0.058822677,
0.09836217,
0.032525893,
0.110115595,
0.020737587,
-0.09583008,
0.0005333771,
0.0019376605,
0.017484892,
-0.06849545,
0.064435944,
-0.050152197,
-0.048923954,
-0.027651085,
-0.014845199,
-0.12104595,
-0.04417338,
-0.011146107,
0.058580566,
-0.007487375,
0.038694676,
-0.07034722,
0.030289542,
0.055677116,
-0.0011476888,
0.017125413,
-0.042026866,
-0.016522061,
-0.025752945,
0.11801853,
0.042021915,
0.06467938,
0.046182197,
0.015046265,
0.029888034,
-0.039066464,
0.087210484,
-0.012382869,
-0.035691217,
-0.0481768,
0.041446336,
0.03895,
-0.025257591,
-0.028859945,
-0.029144095,
0.029815607,
0.051508367,
-0.08636757,
-0.06916314,
-0.07273463,
-0.059568703,
0.00502403,
0.025671752,
-0.022013027,
0.024832714,
-0.09721394,
0.0063272356,
-0.04942868,
-0.13045275,
0.1247814,
-0.013577642,
-0.022800498,
0.03898444,
-0.07545284,
0.04942631,
0.00082998566,
0.004718136,
-0.04070612,
0.063641116,
0.11005218,
0.020110086,
-0.048857097,
0.05847898,
-0.066304415,
0.026930936,
-0.06279101,
-0.014113123,
0.023336235,
0.023582496,
-0.0020861977,
0.07764345,
0.03095139,
0.020153554,
-0.020101866,
-2.4304368e-08,
0.020170629,
-0.008566916,
0.06203045,
-0.0083030015,
0.02522894,
0.08902528,
-0.008051052,
-0.01893583,
-0.0355399,
0.06187224,
-0.017073143,
-0.030130422,
-0.10230193,
-0.06516148,
-0.004159112,
0.10910979,
-0.021820752,
-0.05356566,
0.011770625,
0.052257556,
0.058287114,
0.0053074392,
-0.05998588,
0.0871507,
-0.082790464,
-0.040782016,
0.06573996,
0.028298022,
-0.012104256,
-0.07195988,
0.014542897,
-0.032275774,
0.0027686171,
0.038691588,
0.05546941,
-0.015204906,
0.054877073,
-0.025119307,
-0.0337842,
0.0030478975,
-0.037556846,
0.015074203,
0.022833891,
0.012173256,
0.035718966,
0.0068811844,
-0.040539283,
-0.04956289,
-0.054521065,
-0.07317816,
-0.024969948,
-0.0021052386,
-0.013215133,
-0.06650142,
0.02316441,
0.046906833,
-0.13285862,
-0.010965043,
-0.024110796,
0.043096602,
0.024323147,
0.069191284,
0.15650614,
0.0177121
],
"index": 0,
"object": "embedding"
}
],
"model": "all-minilm:l6-v2",
"object": "list",
"usage": {
"prompt_tokens": 21,
"total_tokens": 21
}
}
},
"is_streaming": false
}
}

View file

@ -0,0 +1,422 @@
{
"request": {
"method": "POST",
"url": "http://0.0.0.0:11434/v1/v1/embeddings",
"headers": {},
"body": {
"model": "all-minilm:l6-v2",
"input": [
"What inspires neural networks?"
],
"encoding_format": "float"
},
"endpoint": "/v1/embeddings",
"model": "all-minilm:l6-v2"
},
"response": {
"body": {
"__type__": "openai.types.create_embedding_response.CreateEmbeddingResponse",
"__data__": {
"data": [
{
"embedding": [
-0.08570448,
-0.095600754,
0.04398704,
-0.016002586,
0.02937856,
0.07229825,
-0.0108823925,
-0.023841137,
0.073795915,
-0.057006016,
-0.033788595,
0.051158767,
0.0050739567,
0.014298775,
-0.07881352,
-0.012878745,
-0.041616067,
0.06878784,
-0.10782497,
-0.040376976,
0.026258128,
-0.001976873,
-0.011027494,
-0.0019720662,
0.0040587694,
0.088816345,
0.014071338,
-0.018417818,
0.032645598,
-0.034702033,
0.076144606,
-0.014125607,
-0.02493309,
0.03755479,
-0.10195466,
0.05470191,
-0.022550134,
0.024206808,
0.011727895,
-0.008955921,
-0.050100796,
0.0026504535,
0.05590394,
0.009941025,
0.12794785,
-0.025010481,
0.02435104,
-0.024520388,
-0.0022285185,
-0.024684334,
-0.104818396,
-0.059973124,
-0.055206526,
0.015273937,
0.034947917,
0.05265324,
-0.00064814935,
0.06637618,
-0.031795718,
-0.0072964546,
-0.0050489027,
-0.042481057,
-0.04087265,
0.02008772,
0.03870467,
0.022511596,
-0.028690359,
0.053362943,
0.022450354,
0.019296993,
0.12269906,
0.023923857,
-0.03728355,
0.005889267,
0.052346867,
0.054002233,
0.08020592,
-0.010999822,
0.029368848,
-0.06721461,
-0.0002297595,
-0.050588466,
-0.0095366035,
0.046173498,
0.07868036,
0.014159739,
-0.03324329,
0.0018601778,
-0.066629566,
-0.020975014,
-0.017125193,
-0.043948952,
-0.059707303,
-0.073459946,
-0.039868142,
-0.030861603,
-0.019913651,
-0.10752571,
-0.02664692,
0.0689932,
-0.0049655125,
0.026640149,
0.018917048,
0.022118697,
0.06419974,
-0.053135265,
0.061616186,
0.014025234,
0.11771526,
-0.05178239,
-0.07634793,
0.030905172,
-0.03857174,
-0.025236985,
0.039299082,
-0.06143655,
0.008370295,
0.016200868,
0.03228489,
0.066803135,
-0.06503229,
0.014640972,
-0.038513865,
0.018730285,
-0.03011228,
-0.028523602,
-0.14709216,
-3.454768e-33,
-0.04858036,
-0.024983805,
0.071692064,
0.03562587,
0.07928956,
-0.07811275,
0.02311943,
-0.047469147,
0.08866776,
-0.0009905098,
-0.11322911,
0.09129462,
0.023959681,
0.11371455,
0.042178337,
-0.057762112,
-0.07452438,
-0.0021433395,
-0.051525325,
-0.05095998,
-0.0016218564,
0.030707737,
0.04509054,
-0.039753992,
-0.058684282,
-0.03064905,
0.0017237811,
0.009109253,
-0.013751708,
0.023424868,
0.0017645947,
0.046604484,
-0.07229431,
-0.027867278,
0.016140861,
0.04446358,
-0.004325922,
-0.06178838,
0.06979857,
0.031267133,
-0.013667371,
-0.0074066212,
0.031622607,
-0.0236915,
0.07152246,
0.023948636,
0.009776826,
0.0071919537,
-0.03232169,
-0.049612403,
-0.050260104,
0.02150285,
0.015312771,
-0.06745535,
0.06546945,
-0.025536334,
0.03208605,
0.020402592,
0.011268207,
0.00021468061,
-0.02349139,
-0.004954465,
-0.014090667,
0.0014277936,
0.059316903,
0.039940886,
-0.032523617,
-0.023729,
0.05446682,
0.06422314,
-0.034017127,
0.08744712,
-0.08048706,
-0.090565994,
-0.06538303,
-0.00010127551,
-0.021434912,
-0.068461135,
-0.029138267,
0.03413734,
-0.07802728,
-0.05389643,
-0.035581492,
0.044851534,
-0.040098358,
0.07973631,
0.026042009,
-0.081827834,
0.0017979769,
-0.02764713,
-0.04310408,
-0.04207307,
0.08336723,
-0.0494554,
-0.09028882,
2.6716478e-33,
-0.091917306,
0.026388643,
-0.07020338,
0.075572066,
0.039003927,
0.027942013,
-0.054444574,
-0.036634557,
-0.048207656,
0.07556485,
0.046478804,
0.025872312,
0.05219267,
-0.00020983674,
0.010589843,
-0.040604923,
-0.028473163,
-0.02054734,
0.08885036,
-0.067588866,
0.04945189,
0.13227695,
-0.06998917,
-0.040121764,
0.044024557,
0.03420703,
-0.08647228,
0.057482626,
-0.007488546,
0.04904739,
-0.014908641,
-0.018117905,
-0.020271562,
0.03883485,
0.022270914,
0.13485505,
0.06897264,
-0.0026128246,
-0.016425159,
0.0033841128,
0.017271666,
0.013608802,
0.044169303,
0.049203753,
-0.008237051,
-0.04662037,
-0.04390372,
0.041557033,
-0.0354663,
0.04278537,
0.031310573,
0.017929101,
-0.02624033,
-0.0545814,
-0.042623743,
-0.004118359,
0.029068246,
0.001052956,
0.09042771,
0.014050165,
-0.06879308,
-0.071003124,
0.020317351,
0.004283492,
-0.046952303,
0.016503377,
-0.028376328,
0.1043668,
0.0028236075,
-0.08338905,
0.03736013,
0.058911674,
0.037606813,
0.09578536,
-0.12376857,
-0.054084644,
-0.014489054,
0.0013207535,
-0.04531095,
-0.089944325,
0.0017439555,
-0.05519527,
0.00056134106,
0.0005587594,
0.07862233,
0.104556754,
0.0035775604,
0.008373316,
0.04291439,
0.010107487,
0.025184723,
0.057374246,
-0.023012979,
0.054407477,
-0.049804952,
-1.32878e-08,
-0.053895604,
0.08075507,
0.03399497,
0.024384415,
0.090608515,
-0.07165007,
0.07552621,
0.017241832,
-0.061231323,
-0.03297735,
0.07829615,
0.0396499,
-0.03669638,
0.026653878,
0.10006404,
-0.014379535,
0.02066834,
-0.039198436,
0.008517119,
-0.0012403574,
0.06739532,
0.014030484,
-0.054005865,
-0.016788486,
0.076489784,
-0.035523314,
-0.050076444,
0.083784595,
-0.00999262,
0.081417,
0.019268963,
0.049931277,
0.0022461978,
-0.07805938,
0.01945713,
0.11157225,
-0.012694483,
-0.064655006,
-0.09344128,
-0.04999159,
-0.042193726,
0.059935458,
0.034836538,
-0.014958905,
0.014489057,
-0.022633748,
0.06917315,
-0.08858699,
0.02150387,
0.013796807,
-0.007545836,
0.027875464,
0.015522231,
0.0052421056,
0.01061417,
-0.022906043,
-0.025388915,
-0.04141604,
-0.08376164,
0.09259756,
0.051795125,
0.09296195,
0.0111989025,
-0.01673378
],
"index": 0,
"object": "embedding"
}
],
"model": "all-minilm:l6-v2",
"object": "list",
"usage": {
"prompt_tokens": 6,
"total_tokens": 6
}
}
},
"is_streaming": false
}
}

View file

@ -0,0 +1,422 @@
{
"request": {
"method": "POST",
"url": "http://0.0.0.0:11434/v1/v1/embeddings",
"headers": {},
"body": {
"model": "all-minilm:l6-v2",
"input": [
"artificial intelligence"
],
"encoding_format": "float"
},
"endpoint": "/v1/embeddings",
"model": "all-minilm:l6-v2"
},
"response": {
"body": {
"__type__": "openai.types.create_embedding_response.CreateEmbeddingResponse",
"__data__": {
"data": [
{
"embedding": [
-0.024362812,
0.016713308,
0.03763492,
-0.009156733,
-0.030551745,
-0.017125947,
0.07426094,
0.045657348,
-0.0093097305,
0.009920903,
-0.005690781,
0.0076895193,
0.039548296,
0.015248784,
-0.083151944,
0.019454934,
-0.02207085,
-0.033246633,
-0.1810784,
-0.1302997,
-0.0022484967,
0.013480844,
-0.024304103,
-0.03698983,
0.001961629,
0.08568096,
0.004767316,
-0.0034146819,
-0.0060834372,
-0.11571087,
0.06683183,
-0.01873301,
0.08783993,
-0.0074664783,
-0.09357002,
0.061450087,
-0.0810802,
0.012219781,
0.039706405,
-0.002647126,
-0.046620198,
-0.081851535,
0.039566126,
0.015464555,
0.043695353,
0.10368333,
-0.058397062,
0.03668824,
-0.052697357,
0.04057381,
-0.12580334,
0.0065060873,
-0.035828654,
-0.010048116,
-0.023825277,
0.045975305,
0.014622974,
0.019410197,
0.028452095,
-0.05502182,
0.024185732,
-0.052869923,
0.015245502,
-0.00438015,
0.09234898,
0.033873633,
-0.047367375,
0.032001555,
0.0013095026,
-0.051196218,
0.025864813,
0.081560105,
0.040911082,
0.019192263,
0.056467537,
-0.052748967,
0.030553715,
-0.016636984,
0.07878182,
-0.054208696,
-0.042150352,
-0.045420144,
-0.05269096,
0.11224785,
0.019874783,
-0.0423623,
-0.011692426,
0.024343297,
0.01916104,
-0.016559148,
-0.010328452,
-0.085476756,
0.02384857,
-0.042118136,
-0.024980163,
0.062104426,
-0.004581602,
-0.15367238,
0.001102325,
0.19421555,
-0.03386706,
0.026160223,
-0.020320892,
0.0012947157,
-0.0010485641,
-0.024099724,
0.017537115,
-0.009841853,
0.070402764,
-0.13768643,
-0.111146465,
-0.017362772,
0.06603636,
-0.051869333,
0.0019475558,
0.014572362,
0.060779307,
0.09626945,
0.0135371,
0.019355945,
-8.543184e-05,
-0.026694054,
-0.009353406,
0.07085975,
-0.0034419452,
-0.062405273,
-0.044579133,
-8.80938e-34,
-0.11187708,
-0.04253664,
0.027483786,
0.06572092,
0.0028295182,
-0.044070996,
0.0052582966,
-0.036901183,
-0.015558772,
0.020610636,
-0.059269626,
0.0072413837,
-0.028733822,
0.04047375,
0.13381885,
0.0068082553,
-0.016386433,
0.08218299,
-0.022658324,
-0.036435697,
0.06526089,
0.021031637,
-0.0054843347,
-0.038373824,
0.0014984249,
0.007331966,
0.01677609,
-0.06269722,
0.035417397,
-0.014398793,
0.027875954,
0.08376195,
-0.02777757,
-0.0036516306,
0.03904687,
-0.026841529,
-0.018736342,
0.01903094,
0.0651818,
0.0070574977,
0.0047951937,
-0.002987134,
0.04006833,
0.028001927,
-0.004688176,
0.012248329,
0.08704812,
-0.0070376135,
-0.037495255,
0.011267182,
0.015406452,
0.013771707,
0.017957818,
-0.009838073,
0.09011513,
0.051697087,
-0.034220304,
0.0043991045,
-0.018898288,
-0.031457234,
0.08212252,
0.016876385,
-0.022177191,
0.06844393,
0.015856383,
0.0203176,
0.0063723125,
0.016462969,
0.12720266,
0.014975143,
-0.010839063,
0.0017705995,
0.031662926,
-0.04433757,
-0.052297786,
0.022821713,
0.050960623,
-0.018954914,
0.0027527376,
-0.033637978,
-0.13569047,
-0.027035592,
-0.035660848,
-0.03351404,
0.047857523,
-0.0054172846,
0.02130265,
-0.040015485,
0.019387608,
0.012020892,
-0.043413315,
0.0005315479,
0.03484659,
0.017950043,
-0.062462628,
8.226272e-34,
-0.09449095,
0.013739951,
-0.025383765,
0.09899241,
0.04552389,
-0.020521628,
-0.029724384,
-0.059252843,
0.042447623,
0.08444559,
-0.043226957,
-0.0077667157,
0.049366944,
0.042077936,
-0.03653644,
0.014414636,
0.04032418,
-0.05892782,
0.010031362,
0.059879642,
-0.02792402,
0.03490713,
-0.08760264,
-0.060620386,
-0.0048639597,
0.087776646,
-0.005353071,
-0.02175546,
-0.048133314,
0.046915755,
0.008341115,
-0.05175852,
-0.02040021,
0.085782945,
-0.0226071,
0.034415677,
-0.014505325,
0.0030903826,
-0.046515204,
0.030268563,
0.039748456,
0.029745733,
-0.093127884,
0.051514212,
0.007829255,
-0.057012733,
-0.041812178,
0.089898124,
-0.008121904,
-0.040828798,
-0.05349857,
-0.034339238,
-0.045287646,
-0.097146384,
-0.058177214,
0.060921844,
-0.009064236,
0.0069495556,
0.012338063,
0.062054638,
-0.0060062264,
-0.08641508,
0.058708947,
0.053361338,
-0.05353899,
0.03950934,
-0.044963278,
0.07279474,
-0.0396003,
-0.051377922,
0.10337406,
0.021824561,
0.00013547574,
0.009485335,
0.021997929,
-0.0069047622,
-0.12891105,
-0.009861611,
-0.03639449,
-0.04249355,
0.0044484157,
-0.04767584,
0.0065166815,
0.1026327,
-0.053176586,
0.073318355,
0.015824493,
-0.029136809,
0.02512151,
-0.06307736,
-0.043478984,
0.067193694,
0.014923451,
-0.0011417158,
-0.098718524,
-1.4681537e-08,
0.00463343,
-0.06712206,
0.076443635,
-0.019814128,
0.0673915,
0.044810813,
-0.051008355,
-0.0077217882,
-0.02932436,
0.028841449,
0.018885555,
-0.024309436,
0.044141307,
0.044167083,
0.03432404,
0.046535607,
0.021588394,
-0.0017551337,
-0.0029986037,
0.014399799,
0.12530664,
0.034310702,
-0.0146423085,
0.03919942,
-0.002325517,
-0.014395083,
0.0100815315,
0.024295514,
-0.04172604,
0.08835341,
-0.031463772,
0.030068664,
-0.0029138532,
0.0048975134,
0.09590149,
0.09393541,
0.0141605595,
-0.07715167,
-0.039247666,
-0.010700626,
-0.008573732,
0.06410113,
-0.03301776,
-0.030493528,
0.09457071,
-0.008976579,
-0.029922878,
-0.13298088,
0.059931017,
-0.011697307,
0.007152748,
0.03558696,
0.0040925406,
0.056160007,
0.07656515,
-0.010041294,
0.0567585,
0.023536174,
-0.06379649,
0.08937482,
0.04375676,
0.043407574,
0.04633825,
-0.07037851
],
"index": 0,
"object": "embedding"
}
],
"model": "all-minilm:l6-v2",
"object": "list",
"usage": {
"prompt_tokens": 2,
"total_tokens": 2
}
}
},
"is_streaming": false
}
}

View file

@ -0,0 +1,422 @@
{
"request": {
"method": "POST",
"url": "http://0.0.0.0:11434/v1/v1/embeddings",
"headers": {},
"body": {
"model": "all-minilm:l6-v2",
"input": [
"test query"
],
"encoding_format": "float"
},
"endpoint": "/v1/embeddings",
"model": "all-minilm:l6-v2"
},
"response": {
"body": {
"__type__": "openai.types.create_embedding_response.CreateEmbeddingResponse",
"__data__": {
"data": [
{
"embedding": [
0.06829144,
0.061772227,
-0.0064161597,
0.082678765,
-0.07824987,
0.026521353,
0.13125585,
0.041369338,
-0.019540362,
-0.02709599,
0.0887907,
-0.10275329,
0.050712623,
-0.07134879,
-0.009282846,
-0.039247703,
0.028860288,
-0.01049117,
-0.024684245,
-0.035460133,
-0.04094595,
-0.009883736,
-0.026154075,
0.057957783,
-0.00061253883,
0.0076184087,
0.013905776,
-0.0016500223,
0.044650607,
-0.05900644,
-0.037936445,
0.037789088,
-0.03326097,
0.07172011,
0.09720765,
-0.082623295,
0.027609807,
-0.014166528,
0.018201344,
-0.0026497827,
-0.024251994,
-0.114919275,
0.08516042,
-0.01674906,
-0.0063111004,
0.06525075,
-0.058014978,
0.09666779,
-0.014186084,
-0.006836795,
-0.09889106,
-0.015126775,
-0.0783394,
-0.03557229,
-0.008273864,
-0.013632112,
-0.07621237,
-0.03039195,
-0.0135569805,
0.050146695,
-0.01059567,
-0.03840819,
0.0674032,
0.035650622,
0.010801949,
-0.07822949,
-0.0068962453,
-0.03009482,
0.055947337,
-0.07680802,
-0.009078504,
-0.002788809,
-0.02937109,
0.06879565,
0.013748122,
0.030850956,
-0.03644146,
-0.07147028,
0.05473256,
-0.028970802,
-0.064664625,
-0.059753876,
-0.067655295,
0.022762805,
0.07949517,
0.051779337,
0.14793634,
-0.0025083658,
-0.05545431,
-0.027768994,
0.019383226,
0.06685648,
-0.0795505,
0.01904091,
-0.00094253226,
0.0134609025,
0.03820869,
-0.040206373,
0.0649827,
0.13925305,
0.059302386,
0.018050361,
-0.049063586,
-0.057463937,
-0.17034325,
0.0098234955,
0.04479311,
-0.08709996,
0.046848226,
-0.02031104,
-0.062256135,
0.030291956,
0.04995267,
-0.03062274,
-0.007244306,
-0.06063938,
-0.0057327296,
0.028709931,
-0.055921447,
-0.006099839,
0.07552849,
0.073059924,
-0.031967085,
-0.027995033,
-0.0013227675,
0.0237769,
0.08236448,
-2.0790976e-33,
0.014696224,
-0.0849667,
0.05938996,
-0.007827523,
-0.015969144,
0.025970377,
0.03762491,
0.1256464,
-0.04001108,
0.024740757,
0.014459392,
-0.063038975,
0.0340931,
-0.0076668505,
0.008167134,
0.10462719,
0.018821232,
-0.021525906,
-0.04383254,
0.05684103,
0.016244315,
-0.07351815,
0.02012839,
0.05243149,
0.015002977,
-0.06589196,
-0.032537818,
0.024986163,
0.018428918,
-0.0003134351,
-0.06270619,
-0.0061910586,
-0.16043852,
0.028163772,
0.033009354,
0.03727067,
0.05406701,
-0.007932531,
-0.008608034,
0.054109853,
-0.046951395,
-0.03869324,
0.084930494,
-0.005905675,
0.021937586,
-0.052074514,
-0.047481276,
-0.054886986,
0.034032077,
-0.02832154,
-0.032060325,
-0.0013834401,
-0.040383566,
-0.017775834,
0.05222146,
0.0038051854,
0.008726582,
0.032692313,
0.010791591,
0.11194475,
-0.019752404,
-0.045764305,
-0.0020202047,
0.020939285,
-0.006159919,
-0.0017409867,
-0.0068266885,
-0.081341885,
0.091841556,
0.048661314,
0.07770758,
-0.058719456,
0.0063417573,
0.0036042097,
-0.071244255,
0.022036737,
0.019486615,
0.101281255,
0.0066442927,
-0.044674896,
0.06144362,
-0.09196092,
-0.0133002605,
0.014585881,
-0.017600225,
0.007354116,
0.006177494,
-0.048051644,
0.013157643,
-0.07767093,
0.014147597,
0.035391673,
-0.026176892,
0.002718191,
0.08641935,
9.148517e-34,
-0.022012252,
0.05088286,
-0.02727955,
0.028613139,
0.013718326,
-0.07109317,
0.09039982,
-0.090625234,
-0.06567498,
0.06685471,
0.066993244,
-0.05015442,
0.019033352,
-0.041487213,
0.012605603,
0.06907699,
0.0281946,
-0.070972204,
-0.061149873,
0.031668104,
-0.09625139,
0.13133687,
-0.0035538,
-0.027149519,
-0.06298852,
-0.0009207272,
-0.008693039,
-0.031348817,
-0.018568903,
0.011527607,
0.07185478,
-0.071952716,
-0.0059043416,
0.09352268,
0.046653684,
-0.031974927,
0.069581434,
-0.045875963,
0.010133493,
0.064104505,
0.07243221,
0.04723149,
0.04880478,
0.06762142,
0.005496453,
0.035764992,
0.01831371,
-0.038210426,
0.050088413,
0.041379653,
-0.02544787,
0.021565115,
0.014279919,
-0.0071081445,
-0.014286643,
-0.010122217,
-0.091654085,
0.009356054,
0.0043320316,
-0.009591156,
-0.029850187,
0.17471492,
-0.0045922897,
0.05783941,
-0.044838578,
-0.051453117,
-0.045911513,
0.007451434,
0.0054590874,
0.039563954,
-0.05625489,
-0.0022330268,
0.047820278,
-0.039598763,
0.027334856,
0.039694488,
-0.07971524,
0.03508072,
0.029276432,
0.010155507,
-0.039020576,
-0.027874392,
-0.040846046,
0.046112783,
-0.069308,
0.061977327,
0.039240442,
0.025863856,
0.0064374707,
0.053631745,
0.06962397,
-0.008001055,
-0.03827026,
-0.10952415,
0.018512232,
-1.3332562e-08,
-0.025684418,
-0.07470214,
-0.019860886,
0.0385072,
0.027302178,
-0.010903615,
-0.03522558,
0.036009304,
-0.06320341,
0.011506822,
0.03339635,
-0.012044345,
0.004013396,
0.016582591,
-0.007978201,
-0.041656163,
-0.07090684,
0.008757652,
0.004474724,
-0.038768765,
-0.05130229,
0.017759493,
-0.018255858,
0.043951545,
-0.04284978,
0.08247418,
0.015467272,
0.022083104,
0.044421837,
0.022857197,
0.08298176,
-0.012647776,
0.013097686,
-0.06692538,
0.047861587,
-0.04503364,
0.006510086,
0.0056154854,
-0.019552445,
-0.017313117,
-0.038419757,
-0.00048296133,
-0.008638455,
-0.026783587,
-0.06596831,
-0.14337558,
0.041494913,
-0.04859091,
0.012739855,
-0.085007615,
-0.010923813,
-0.03816371,
0.03006815,
-0.03887654,
-0.036665756,
0.046499304,
0.036260363,
0.052359663,
-0.09627654,
-0.041531097,
0.05020932,
-7.9168685e-06,
0.0019163007,
0.0195528
],
"index": 0,
"object": "embedding"
}
],
"model": "all-minilm:l6-v2",
"object": "list",
"usage": {
"prompt_tokens": 2,
"total_tokens": 2
}
}
},
"is_streaming": false
}
}

View file

@ -0,0 +1,422 @@
{
"request": {
"method": "POST",
"url": "http://0.0.0.0:11434/v1/v1/embeddings",
"headers": {},
"body": {
"model": "all-minilm:l6-v2",
"input": [
"What is Python programming language?"
],
"encoding_format": "float"
},
"endpoint": "/v1/embeddings",
"model": "all-minilm:l6-v2"
},
"response": {
"body": {
"__type__": "openai.types.create_embedding_response.CreateEmbeddingResponse",
"__data__": {
"data": [
{
"embedding": [
-0.0623061,
0.043155346,
-0.056864023,
0.03486763,
-0.045145836,
-0.13253546,
0.021805322,
0.039048277,
-0.04841761,
-0.031872153,
-0.039334167,
0.0063758655,
0.07872078,
-0.0042740484,
0.023612525,
-0.02170506,
-0.055740308,
-0.0094528515,
0.039697133,
-0.11445638,
-0.011568856,
0.06161228,
-0.02625024,
0.024374798,
0.029430348,
-0.0035586308,
-0.0014398397,
-0.00313635,
0.013770647,
-0.0002185752,
-0.014788754,
0.084392585,
0.06679723,
0.042302314,
0.007701145,
0.073157564,
-0.008342027,
-0.09463514,
-0.09247907,
0.00763349,
-0.07390047,
0.015466744,
-0.04406345,
-0.044970937,
-0.041317657,
0.06967893,
-0.02747757,
0.014388817,
-0.036104802,
-0.006673772,
-0.08029175,
-6.000176e-05,
-0.038977537,
-0.049003445,
0.017844146,
-0.0064918958,
0.059797343,
-0.003170151,
-0.024797099,
-0.11498058,
-0.047404848,
0.0185016,
-0.009826349,
0.09572491,
-0.009429792,
-0.03576324,
-0.031269584,
-0.0032131649,
0.07714364,
-0.07617566,
-0.118788,
-0.06321078,
-0.0046245204,
0.06524506,
0.04577385,
-0.13796814,
0.04598187,
-0.03355735,
-0.013584839,
0.0045000566,
0.017061453,
-0.0016859988,
-0.051290352,
0.102515854,
0.015375054,
-0.053396687,
0.046739385,
0.11428208,
-0.0060018655,
0.010324239,
-0.031606395,
-0.051939677,
0.020962074,
0.008873621,
-0.06903091,
0.08133413,
0.012089255,
-0.06411361,
-0.03635769,
0.046689924,
0.011246541,
-0.05031814,
0.073784724,
-0.021187203,
0.03246321,
-0.026193537,
0.06816752,
-0.03795416,
0.030822705,
-0.0371306,
-0.03693002,
-0.029442247,
-0.032879222,
-0.005807539,
0.04255175,
0.054692194,
-0.0192783,
0.12276652,
0.0037922377,
0.0320851,
0.023700258,
0.019210111,
0.019973421,
-0.012249867,
-0.03246148,
-0.0044806604,
-0.035679862,
-6.954278e-33,
-0.0220099,
-0.06862265,
-0.03537707,
0.008910154,
0.071089186,
-0.025226729,
0.091465496,
-0.009329111,
-0.05951072,
-0.034704443,
0.04334736,
0.03334519,
0.024234882,
0.08795047,
0.020609507,
-0.0008948477,
-0.013011299,
0.08836162,
0.045687113,
0.025813619,
0.0542986,
0.09676311,
0.023140479,
0.024307383,
0.014198938,
-0.018661225,
-0.024505567,
-0.03258764,
0.025222383,
0.016810626,
-0.07629099,
0.012676406,
-0.021304907,
0.006898141,
0.030808464,
-0.000315505,
0.0005437531,
-0.08589918,
0.04053157,
0.006305948,
-0.010008999,
0.0015841384,
0.012631508,
-0.036505677,
-0.023090534,
0.012400456,
-0.00514819,
0.020243159,
-0.08760989,
0.045204975,
-0.0012632157,
-0.06573619,
0.07478642,
0.08402555,
-0.013935989,
0.05592361,
0.019318154,
-0.019661061,
-0.016006675,
-0.02916137,
0.0373911,
0.06808347,
0.06916834,
-0.0076644514,
0.02114384,
0.04043145,
0.03511955,
0.08206532,
0.08808922,
0.050526854,
-0.059352025,
0.04576268,
-0.025140414,
0.03584363,
-0.02806783,
0.019853832,
0.033893492,
-0.07974513,
0.023001093,
0.062465888,
-0.034909748,
-0.05390039,
-0.016120961,
-0.0057214363,
-0.030499708,
-0.02269443,
-0.010363369,
0.067623645,
-0.010582917,
-0.09608072,
-0.07854503,
-0.085294046,
0.029974943,
-0.005945623,
-0.039578382,
2.9788035e-33,
0.0114961,
0.010420429,
-0.06988839,
0.019277215,
-0.08453786,
-0.085693836,
0.06625677,
0.063027605,
0.050445113,
0.033733714,
-0.0058911345,
-0.06960736,
0.12548403,
0.021376437,
0.07414455,
0.034223642,
-0.045840543,
0.014842206,
-0.0126910545,
0.003648386,
-0.08023818,
0.06729063,
-0.056022517,
-0.08669063,
-0.027885731,
-0.033907417,
-0.038715098,
-0.07791038,
-0.017792802,
0.061793778,
0.014706543,
0.020005805,
-0.08145671,
0.05236086,
0.06286303,
-0.0015804858,
0.040509794,
-0.027593212,
-0.009631841,
-0.017296297,
0.11391202,
0.04420345,
0.03534961,
0.12113969,
0.018799841,
0.049258087,
-0.036080077,
0.07791577,
-0.029658308,
-0.070674755,
-0.0067282193,
0.006079021,
0.04225839,
-0.039644253,
-0.04860991,
-0.039792407,
0.032389786,
0.033703297,
-0.0924961,
-0.04988354,
-0.06596082,
-0.04236528,
0.03126068,
0.011825378,
-0.044250805,
0.046862055,
-0.123014495,
-0.034661833,
-0.01387497,
-0.13120808,
0.14482524,
0.0056040953,
-0.0031055296,
0.022885982,
-0.07644984,
0.016439024,
-0.019532247,
-0.024956707,
-0.0685838,
0.07072798,
0.026639467,
-0.0351677,
-0.0015660838,
0.02932653,
-0.089445055,
-0.022545021,
-0.03112053,
0.053812344,
0.007873327,
0.023094172,
-0.0043896562,
0.05380028,
0.017278776,
0.056359384,
-0.05330339,
-1.3478282e-08,
-0.039658625,
0.013374887,
0.03682183,
0.009698332,
0.0046835328,
0.06660773,
0.022911774,
-0.047426622,
-0.040507935,
0.006813708,
0.0086692255,
-0.0063030533,
-0.04566467,
-0.06387448,
-0.013173488,
0.11698006,
0.016895978,
-0.0013877428,
0.02321246,
0.022267532,
0.078508325,
-0.045089863,
-0.009183129,
0.066403426,
-0.06653049,
-0.0154824555,
0.054102156,
0.07644729,
0.008254354,
-0.124090366,
0.012699053,
-0.017593145,
-0.020621033,
0.032500766,
-0.012999753,
0.022328354,
0.010528125,
-0.08832318,
0.02148152,
-0.0029870127,
-0.03183275,
0.07181985,
0.01038717,
0.0036043858,
0.048932884,
0.07041019,
-0.036562778,
-0.03517641,
-0.03654687,
-0.07017274,
-0.03033558,
0.02860758,
-0.019075464,
-0.002551204,
0.02127327,
0.074368805,
-0.11424493,
-0.027312418,
-0.010811127,
0.010405173,
-0.02275616,
0.11514236,
0.18532485,
-0.026541265
],
"index": 0,
"object": "embedding"
}
],
"model": "all-minilm:l6-v2",
"object": "list",
"usage": {
"prompt_tokens": 6,
"total_tokens": 6
}
}
},
"is_streaming": false
}
}

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,422 @@
{
"request": {
"method": "POST",
"url": "http://0.0.0.0:11434/v1/v1/embeddings",
"headers": {},
"body": {
"model": "all-minilm:l6-v2",
"input": [
"What makes Python different from other languages?"
],
"encoding_format": "float"
},
"endpoint": "/v1/embeddings",
"model": "all-minilm:l6-v2"
},
"response": {
"body": {
"__type__": "openai.types.create_embedding_response.CreateEmbeddingResponse",
"__data__": {
"data": [
{
"embedding": [
-0.054539014,
-0.016468922,
-0.010608761,
0.02301095,
0.011758054,
-0.11193683,
-0.0096305525,
0.019113416,
0.048967674,
-0.040160257,
-0.022335947,
0.016229406,
0.009204825,
0.05479278,
0.049229205,
-0.09585555,
-0.031133035,
-0.010217964,
-0.029166166,
-0.08954575,
-0.0006925836,
0.034955315,
0.016062167,
0.0034184188,
0.039653763,
-0.016046634,
-0.02841708,
0.021410936,
0.046111625,
-0.062207576,
-0.023055006,
0.1013955,
0.025184965,
-0.03625098,
-0.032918476,
0.03443538,
-0.01667641,
-0.066225745,
-0.06069369,
0.0005895856,
-0.063880995,
0.0077826553,
-0.0051208152,
-0.03670025,
-0.023568328,
0.07426548,
-0.017221872,
0.064796105,
-0.009619924,
-0.0011168239,
-0.0946396,
0.029776908,
-0.082821324,
-0.053136017,
-0.014514815,
-0.015186634,
0.03710505,
0.07176102,
-0.01892326,
-0.11193171,
-0.11862717,
0.029721867,
0.030640045,
0.103079796,
-0.02800051,
-0.045588907,
0.0014006048,
0.0046053855,
0.03230686,
-0.027150096,
-0.06602394,
-0.015831675,
0.019209974,
0.06880736,
0.04709176,
-0.105855644,
0.046280492,
-0.03096076,
-0.069832,
-0.014894174,
-0.0014720439,
0.026728554,
-0.04701634,
0.07608865,
0.05755428,
-0.020295804,
0.038703557,
0.06851399,
-0.068138964,
-0.017405631,
0.057037257,
-0.07952873,
-0.014248788,
0.0036484832,
-0.052898604,
0.049604755,
0.021487204,
0.035027836,
0.02545877,
-0.004785061,
0.051205274,
-0.08541501,
0.07143089,
0.04468161,
0.03930722,
-0.0135141155,
0.07088695,
-0.0660048,
0.0592439,
-0.023046793,
-0.027459674,
-0.04689379,
-0.037509903,
-0.0084943585,
0.05313619,
0.0038019137,
-0.02021957,
0.043566354,
-0.034341905,
0.042827673,
-0.007318655,
-0.0016014964,
0.04183553,
-0.025132777,
-0.03014748,
0.056046948,
-0.03387941,
-4.800238e-33,
0.008938797,
-0.105446324,
-0.022468172,
-0.0046421383,
0.10120766,
-0.024071503,
0.0720334,
0.00824967,
-0.017588114,
-0.012572595,
0.011187751,
0.09430494,
0.025195174,
0.061279986,
0.028598385,
0.07013615,
-0.028032323,
0.042044032,
0.012670473,
0.05118446,
0.069872275,
0.113011226,
0.06393332,
0.046133682,
0.00069346296,
-0.04742425,
-0.0076766815,
-0.016270984,
-0.03935856,
-0.0060400777,
-0.057824753,
-0.032809503,
0.030087646,
0.04949177,
0.0065082232,
-0.015118406,
0.027426325,
-0.13929617,
0.04686397,
-0.0001376871,
0.023311358,
0.014268379,
0.0005033175,
-0.019155173,
-0.021629533,
0.012334637,
-0.035448097,
-0.015012808,
-0.12478333,
0.017866643,
-0.015385203,
-0.030914769,
0.07756115,
0.067938074,
-0.0029891697,
0.03446983,
0.072096206,
-0.008727331,
-0.0039063273,
-0.048090436,
0.021224795,
0.065839365,
0.07848987,
0.014581675,
0.06676033,
0.07221585,
0.033575963,
0.08418111,
0.016567666,
0.042123966,
-0.05935007,
0.020415181,
-0.06571829,
0.04579863,
0.002951678,
0.0034759378,
-0.008463108,
-0.14008056,
0.056221444,
0.05469431,
-0.060425404,
-0.035049956,
-0.05707458,
-0.010413291,
-0.08953148,
-0.023625003,
0.034471046,
0.033661205,
0.06720743,
-0.07255193,
-0.041828338,
-0.08223931,
0.010640704,
-0.042891644,
-0.0014475408,
8.39199e-34,
-0.07032797,
0.0070702634,
-0.035070483,
0.021509597,
-0.11257678,
-0.04567272,
0.08481507,
0.050335176,
0.053387776,
0.012060723,
-0.0019196937,
-0.08608223,
0.09600442,
0.0037239613,
0.060983595,
0.015279161,
-0.040586337,
0.10490671,
0.07111468,
-0.0050306814,
-0.048980962,
0.09183541,
-0.09862482,
-0.012065119,
-0.016891332,
-0.028088856,
-0.12471142,
-0.078602985,
-0.018680012,
0.021758018,
0.005759521,
0.051118605,
-0.082707904,
0.072964445,
0.0141024105,
0.0010097212,
-0.03685827,
0.00568948,
0.017905025,
0.013780462,
0.04993993,
0.021444008,
0.110891685,
0.061709184,
0.01853852,
0.036215156,
-0.06684297,
0.036332514,
-0.021102918,
-0.07972151,
0.065229,
0.0030138723,
0.018853001,
-0.008725459,
-0.058164038,
-0.040056095,
0.051841468,
0.016301498,
-0.08781288,
-0.02227259,
-0.013245076,
-0.03801183,
0.025480323,
0.030531729,
-0.054035358,
0.04038695,
-0.116109855,
-0.026073342,
-0.0043725744,
-0.15029478,
0.08059584,
-0.05766878,
0.02516043,
-0.0038830324,
-0.064506546,
0.020497749,
-0.034779944,
-0.02932536,
-0.052795924,
0.05048031,
-0.036627516,
-0.009295713,
-0.03128295,
-0.0010504925,
-0.089731686,
0.044538505,
-0.058741618,
0.028392328,
0.05705406,
-0.021216048,
0.024795407,
0.023279097,
-0.025490018,
0.066466905,
0.011147595,
-1.57812e-08,
-0.043579992,
0.050845813,
0.009048856,
0.036609128,
0.0027812773,
0.03891625,
-0.013210705,
0.0068475637,
-0.0067914757,
0.020505553,
-0.029885264,
-0.0055864784,
-0.06776668,
-0.054356683,
0.024375776,
0.13760787,
-0.07139099,
0.007762989,
0.051617414,
0.05973323,
0.042459413,
-0.03560275,
-0.05791632,
0.04441552,
-0.10566783,
0.009725281,
-0.016063722,
0.035676833,
0.023308199,
-0.079277165,
0.0054484066,
-0.060464006,
-0.044717573,
0.013122884,
-0.015911829,
-0.012086337,
0.005874884,
-0.070992075,
0.017624497,
0.036101837,
0.023521954,
-0.007950616,
-0.036010865,
0.0059945653,
0.059922658,
0.0058807023,
-0.058820717,
-0.04119291,
-0.038226888,
-0.03001563,
0.019165142,
-0.020903448,
-0.0089449985,
-0.02588891,
0.08723996,
0.04226809,
-0.09462471,
-0.0349857,
0.05150947,
0.04254913,
-0.01820297,
0.06036542,
0.19380692,
0.014680669
],
"index": 0,
"object": "embedding"
}
],
"model": "all-minilm:l6-v2",
"object": "list",
"usage": {
"prompt_tokens": 8,
"total_tokens": 8
}
}
},
"is_streaming": false
}
}

View file

@ -0,0 +1,422 @@
{
"request": {
"method": "POST",
"url": "http://0.0.0.0:11434/v1/v1/embeddings",
"headers": {},
"body": {
"model": "all-minilm:l6-v2",
"input": [
"Why are data structures important?"
],
"encoding_format": "float"
},
"endpoint": "/v1/embeddings",
"model": "all-minilm:l6-v2"
},
"response": {
"body": {
"__type__": "openai.types.create_embedding_response.CreateEmbeddingResponse",
"__data__": {
"data": [
{
"embedding": [
-0.003989132,
0.051404107,
-0.00056249514,
-0.038048144,
0.00023617804,
-0.07165115,
-0.032934345,
0.029131265,
0.089478746,
0.027012052,
0.022988115,
0.029467529,
0.013449345,
0.02187333,
0.024701167,
0.02318687,
-0.067904875,
0.042214446,
-0.06686454,
-0.044817198,
-0.019499827,
-0.017647728,
-0.047033403,
0.01010371,
-0.035198584,
0.1279292,
-0.03992792,
-0.03702997,
0.021821143,
-0.06663628,
0.020529605,
0.03141518,
0.121698014,
0.037880983,
-0.07562467,
0.035962664,
0.11100028,
-0.025674157,
-0.0779127,
0.016963888,
-0.0807954,
0.042507604,
0.00820509,
0.07316419,
0.01111272,
0.01623341,
0.019468198,
-0.05727617,
-0.026948903,
0.02756721,
-0.10366233,
0.061819006,
-0.02805692,
0.04555006,
0.038514387,
0.102219224,
0.010187554,
0.0038878673,
-0.07438772,
-0.009772767,
-0.014589378,
0.005427063,
-0.04896932,
0.024673788,
0.08042059,
-0.0013942291,
0.0008588407,
0.0016949617,
0.016265066,
0.0036070896,
0.05801152,
-0.010051563,
-0.008403578,
0.06814287,
0.03398574,
-0.011672763,
-0.049353864,
-0.034604926,
0.022498535,
0.016111419,
0.02527047,
0.03502525,
-0.018208683,
0.068031214,
0.059953574,
-0.025391363,
0.04580482,
-0.04296594,
-0.10485879,
-0.028135728,
0.079018995,
-0.01712349,
0.012407565,
0.04061926,
-0.020135157,
0.026930887,
0.041811634,
-0.04416108,
0.080970354,
0.021775935,
0.081765614,
0.033288363,
0.021744251,
0.0920779,
-0.052091073,
-0.13620377,
0.01355201,
-0.019836528,
-0.03622741,
-0.050273415,
-0.03297705,
0.046637394,
-0.062427662,
-0.05683662,
-0.027652364,
-0.15121156,
-0.09399186,
-0.011023118,
-0.024265675,
-0.046763826,
-0.002908067,
-0.066486366,
-0.025612496,
0.018278103,
0.0020231954,
-0.062278572,
-0.11748546,
-4.4292726e-33,
-0.009130088,
-0.037159156,
-0.026047857,
0.052019667,
0.00085722556,
0.006592443,
-0.0045248135,
-0.04015857,
0.004117024,
0.0428665,
-0.049716696,
0.045335494,
0.042848498,
0.044919603,
0.11100728,
0.021570923,
-0.031257298,
0.07225882,
-0.01912497,
-0.034713253,
0.06771385,
-0.016151445,
0.05971066,
-0.022954458,
0.028852448,
0.015406495,
-0.00031955744,
-0.012751747,
-0.03327897,
-0.00012636236,
-0.02479355,
-0.042213496,
-0.002454921,
0.041260865,
0.0919246,
0.06857511,
-0.0152807245,
-0.12649235,
0.016997697,
-0.08620996,
0.055064507,
0.030273788,
0.00431866,
0.031995468,
-0.03225614,
0.004922506,
0.009020533,
-0.023137338,
-0.040697925,
-0.09105851,
0.03639921,
0.024429396,
0.013554936,
0.032427397,
0.04099883,
0.037522644,
-0.041546755,
-0.079021014,
-0.053779483,
0.06449904,
-0.08023162,
0.021288263,
0.062299646,
0.0457609,
0.03245626,
0.08930955,
-0.040566627,
-0.031877786,
0.09784694,
0.018440586,
0.0055373674,
0.033386778,
-0.069314316,
0.0050042598,
-0.011121069,
0.04041817,
-0.018704956,
-0.06160915,
-0.019937823,
0.05572433,
-0.033941865,
-0.03284764,
0.039774805,
0.032533348,
-0.014803814,
-0.04081455,
0.090428285,
-0.07119735,
-0.045317948,
0.0044284705,
-0.011297022,
0.010466631,
-0.0050936122,
-0.032272205,
-0.014571677,
1.9730937e-33,
-0.014730757,
-0.011375904,
-0.018987043,
-0.030017996,
-0.03238378,
0.00021963792,
-0.012572021,
-0.121466525,
0.0020859565,
0.031917855,
-0.0047694035,
0.009451863,
0.07091064,
-0.10059175,
0.025064182,
0.06191513,
-0.0040704445,
-0.09924964,
-0.011796679,
-0.047690243,
-0.030504584,
0.06266709,
-0.07385124,
-0.0061550937,
-0.01423386,
0.0073556406,
-0.12380783,
-0.12357105,
0.049844977,
0.013651552,
-0.042339053,
-0.05773099,
0.008854461,
-0.039381962,
-0.010391537,
0.01995317,
0.06865881,
-0.0034758614,
0.034933414,
0.016901772,
-0.041236185,
0.1275965,
-0.010944973,
-0.038379222,
0.03352998,
0.024260346,
-0.009189018,
0.08945688,
-0.037322775,
-0.033685952,
0.083590224,
0.024379434,
0.013052954,
-0.082478285,
0.081726134,
0.025851976,
-0.040732652,
0.011625263,
0.045134045,
0.05800952,
-0.043148052,
-0.02189082,
0.0076365937,
0.07503425,
-0.0371004,
-0.04029487,
-0.044494897,
-0.10995023,
-0.024031844,
-0.08961193,
0.020242436,
0.030619737,
-0.021178389,
0.04682225,
-0.08384518,
-0.04420498,
-0.041840017,
0.031129008,
0.010757745,
0.06393576,
-0.0031622013,
-0.012325239,
0.03960315,
0.038744513,
0.04009258,
0.012087899,
0.060512736,
-0.04624927,
0.00929668,
-0.051231515,
-0.0496359,
-0.015559894,
-0.08582702,
0.07392022,
-0.02927744,
-1.4551534e-08,
-0.060233776,
-0.056502644,
-0.0039323824,
-0.030575769,
0.033688147,
-0.051516674,
0.011328192,
0.14126065,
0.02396768,
0.019315943,
0.06601706,
0.030757405,
-0.106958,
0.0033853063,
0.073158585,
0.024177559,
0.08089344,
-0.078784004,
-0.032134753,
0.07526011,
0.054319587,
0.009856976,
-0.12708029,
0.06313889,
0.09004333,
-0.0015960654,
0.058387086,
0.059561662,
-0.0047651688,
0.0229759,
0.03569084,
-0.034010228,
0.07279012,
0.07974487,
0.091203436,
0.022210982,
0.04596847,
0.044025153,
-0.083589375,
-0.10002216,
0.020842535,
0.023079954,
-0.04795557,
0.08441458,
0.0771154,
0.009332128,
-0.08095578,
0.092889085,
-0.020154007,
-0.0008010522,
-0.03861009,
0.016097447,
0.0070208795,
-0.017685603,
-0.002207989,
-0.02192508,
0.033382397,
-0.03214206,
-0.012332422,
-0.002134471,
0.021111421,
0.016544258,
0.017546006,
-0.07716502
],
"index": 0,
"object": "embedding"
}
],
"model": "all-minilm:l6-v2",
"object": "list",
"usage": {
"prompt_tokens": 6,
"total_tokens": 6
}
}
},
"is_streaming": false
}
}

View file

@ -0,0 +1,422 @@
{
"request": {
"method": "POST",
"url": "http://0.0.0.0:11434/v1/v1/embeddings",
"headers": {},
"body": {
"model": "all-minilm:l6-v2",
"input": [
"This is a test file 0"
],
"encoding_format": "float"
},
"endpoint": "/v1/embeddings",
"model": "all-minilm:l6-v2"
},
"response": {
"body": {
"__type__": "openai.types.create_embedding_response.CreateEmbeddingResponse",
"__data__": {
"data": [
{
"embedding": [
-0.021827588,
0.08818103,
-0.10864717,
0.0027738505,
0.049183175,
-0.030155653,
-0.015535575,
0.027562236,
-0.025055608,
0.016142149,
0.12481904,
0.0027390872,
-0.033304155,
-0.007155499,
-0.07006565,
-0.028012667,
-0.0974939,
-0.09156265,
0.013381448,
0.08751534,
0.013976399,
0.036656633,
-0.0363098,
-0.019737098,
0.04459191,
-0.009628102,
-0.018323021,
0.048807826,
-0.015294308,
-0.071472056,
0.04096934,
0.08271212,
0.06394962,
0.014480425,
0.13194743,
0.030426797,
0.10103986,
-0.030337727,
-0.047615312,
0.044662375,
0.027032219,
-0.029383352,
0.038528103,
0.005350361,
0.014771562,
0.02561623,
0.0041866824,
0.0035074751,
0.029762248,
-0.036631253,
-0.045908086,
0.031111827,
-0.07789252,
-0.019519411,
0.053894877,
-0.015229676,
-0.0016866667,
0.016928526,
0.019906636,
0.071048684,
0.009945389,
0.031127382,
-0.010339295,
0.029969081,
0.1150558,
0.0257364,
-0.05285643,
-0.042424288,
0.00530526,
-0.09986522,
-0.12739678,
-0.012008937,
-0.013796879,
0.052672364,
-0.017240625,
0.009655106,
-0.07752442,
0.001446598,
0.06974642,
-0.084652565,
-0.06148656,
-0.1424512,
0.00971367,
-0.008617611,
-0.03184207,
0.12822424,
0.05323436,
0.021975016,
0.0026292745,
0.015444466,
-0.042529456,
0.031529475,
-0.062093526,
0.044023193,
-0.006063745,
0.06960859,
0.0050675236,
0.05936227,
0.006593922,
0.08395398,
-0.0067747384,
-0.041917052,
0.027087294,
0.1064389,
-0.03939661,
-0.053915743,
0.0969116,
-0.008478297,
0.03400473,
-0.033850323,
0.0022322247,
-0.08182309,
-0.008227045,
-0.112729885,
0.0058874753,
-0.09516338,
-0.07956543,
0.0528746,
-0.08121418,
0.034270033,
0.079010375,
-0.026773734,
-0.043880418,
0.0067898994,
-0.054401524,
-0.021739269,
0.08060149,
-3.9385423e-33,
-0.0072775874,
-0.07965713,
0.024867468,
0.115594625,
0.035952598,
-0.07256428,
0.01264772,
0.05078877,
-0.1001076,
0.019520493,
0.003609843,
-0.07002774,
0.00796547,
0.029297192,
-0.017813923,
0.026997875,
0.016828112,
0.035944253,
-0.020945141,
-0.032345034,
0.056713093,
-0.009717346,
-0.059717353,
-0.053816583,
-0.055860512,
0.0652541,
-0.024728304,
-0.07780815,
0.038602088,
0.008995879,
0.009711051,
-0.02800488,
-0.02488407,
-0.001753672,
0.025541821,
0.03461599,
3.1180356e-05,
0.0034299733,
-0.04524332,
0.034621477,
-0.025317375,
-0.029820684,
-0.019064484,
-0.023168772,
0.049378216,
-0.0614278,
0.00038631904,
0.0028947273,
0.027602436,
0.0069355685,
-0.020665208,
0.0607627,
0.015200459,
0.038925096,
-0.025373906,
-0.0017942133,
-0.019378444,
-0.005707356,
-0.01781858,
0.03804118,
0.032033492,
0.039991416,
-0.096098565,
0.0007088372,
-0.018460834,
-0.06865977,
-0.007682667,
-0.083552696,
0.10225278,
0.05144313,
-0.033060983,
-0.05033815,
0.043931242,
0.017761385,
-0.006623071,
-0.018680306,
0.012787289,
0.016647147,
-0.095078625,
-0.023556676,
0.0068797185,
-0.07225466,
-0.0030222975,
-0.06930809,
-0.027324349,
-0.06728827,
-0.0066746464,
-0.06802411,
0.044557177,
-0.09791178,
0.05094532,
0.010023194,
-0.04618695,
-0.067631915,
0.044459086,
2.564085e-33,
0.0148239555,
0.071699664,
-0.05235211,
0.011046101,
-0.01389393,
0.07070217,
0.09194932,
-0.019197263,
-0.01579352,
0.14807871,
0.03188067,
0.022338957,
0.070754,
-0.037077773,
0.08807045,
-0.018151604,
-0.013233297,
-0.04176197,
-0.05230764,
-0.0027928778,
-0.024819419,
0.13973284,
0.07498215,
0.05643386,
-0.02942886,
0.017126264,
0.03372573,
0.068746336,
0.020448433,
-0.018980682,
0.081244655,
0.06527421,
-0.09341324,
0.0037619828,
0.06348108,
-0.08774056,
0.092889525,
-0.024263546,
0.029117694,
0.0034306366,
0.055297706,
0.102015935,
-0.023556657,
0.065803,
0.015247541,
0.034352973,
0.105588056,
0.011606838,
0.04098301,
-0.056642916,
0.037729684,
-0.04976193,
0.047909457,
0.0042117573,
-0.014169,
0.07561971,
-0.0096767275,
0.055205546,
-0.031133024,
0.019914651,
-0.025017431,
0.031833746,
-0.019527186,
-0.009863273,
-0.020237885,
-0.033213306,
-0.026289295,
0.038861252,
0.012964407,
-0.041289695,
0.012831493,
0.028716395,
-0.054101057,
-0.07626151,
0.021948934,
-0.023362676,
-0.026700463,
-0.029420532,
0.0052917786,
0.012322609,
0.052309964,
0.005428001,
-0.0063846395,
0.046033006,
0.042387757,
-0.018442502,
0.012625506,
0.093027025,
-0.0059689214,
-0.015190377,
-0.011668946,
0.048090797,
0.025912488,
0.050898798,
0.005562451,
-1.5056784e-08,
-0.030993447,
-0.07005236,
-0.032605737,
-0.00874509,
-0.004551062,
0.07593507,
-0.032746524,
-0.08790053,
-0.032251474,
-0.024588991,
0.051248234,
-0.0345528,
-0.08264784,
0.013345202,
-0.020562632,
-0.05624872,
-0.009445643,
-0.015907064,
-0.036610577,
0.010109376,
-0.0343682,
0.0315048,
-0.00014384133,
0.010448328,
0.017060373,
0.015475448,
0.074810885,
0.07080812,
-0.050022244,
-0.047005255,
0.013738294,
0.060728636,
-0.009370956,
-0.015692767,
-0.01834865,
0.12297243,
0.11857768,
0.123661466,
0.022802081,
-0.019996397,
-0.07401723,
-0.004714934,
-0.02488245,
0.006072489,
-0.066606365,
-0.081319734,
-0.08740771,
-0.06348687,
-0.039211858,
-0.11271469,
-0.030644065,
0.026577946,
-0.06322251,
0.042043004,
-0.03901968,
-0.009200455,
0.0050292667,
0.001581719,
-0.058653522,
0.04309485,
0.066819645,
0.062200524,
0.021176148,
-0.04108276
],
"index": 0,
"object": "embedding"
}
],
"model": "all-minilm:l6-v2",
"object": "list",
"usage": {
"prompt_tokens": 6,
"total_tokens": 6
}
}
},
"is_streaming": false
}
}

View file

@ -0,0 +1,422 @@
{
"request": {
"method": "POST",
"url": "http://0.0.0.0:11434/v1/v1/embeddings",
"headers": {},
"body": {
"model": "all-minilm:l6-v2",
"input": [
"machine learning and artificial intelligence"
],
"encoding_format": "float"
},
"endpoint": "/v1/embeddings",
"model": "all-minilm:l6-v2"
},
"response": {
"body": {
"__type__": "openai.types.create_embedding_response.CreateEmbeddingResponse",
"__data__": {
"data": [
{
"embedding": [
-0.043112263,
0.008686894,
0.06879597,
0.018093547,
0.04600579,
0.0026370327,
-0.0032194739,
-0.04128641,
-0.090751864,
-0.03311354,
-0.026625047,
0.007723082,
0.02020638,
-0.032501053,
-0.03582959,
0.031117352,
-0.03921459,
-0.011261255,
-0.10972644,
-0.12942035,
0.0180839,
0.011446483,
-0.07227963,
-0.013646516,
0.035441313,
0.024786202,
0.033887945,
0.072541736,
-0.012643559,
-0.058576923,
0.05788946,
-0.08161914,
0.064951725,
0.0013679718,
-0.067565694,
0.03500105,
-0.04499739,
-0.004745917,
0.04001028,
-0.010447466,
0.01971203,
-0.09853681,
-0.012831109,
0.018893523,
0.09566803,
0.11574249,
-0.040688448,
-0.026871145,
-0.046950754,
0.022665758,
-0.088503055,
-0.02349465,
-0.022964876,
-0.031086901,
-0.052040946,
0.042409953,
0.011587446,
0.06698339,
0.027131157,
-0.0021599897,
0.04676616,
-0.08205926,
-0.038376193,
0.052162487,
0.097754784,
-0.0006300649,
-0.051922448,
0.09102494,
-0.016122114,
-0.068757266,
0.007674277,
0.07676188,
-0.0017702047,
0.014375106,
0.038056612,
-0.0044639558,
0.01128439,
0.0006278256,
0.08837875,
-0.059357397,
-0.042713538,
-0.048170365,
-0.053083148,
0.03308664,
0.008073919,
-0.042588204,
-0.038085114,
-0.0071590515,
0.010923276,
-0.05467666,
0.039005354,
-0.06774879,
-0.023520455,
-0.038865313,
0.03465567,
0.015331597,
0.0073779793,
-0.123536974,
0.03618996,
0.13191763,
-0.06441666,
0.03345934,
-0.014335858,
0.0014165065,
0.031064518,
-0.039842315,
0.02367409,
-0.0028713108,
0.09695666,
-0.13332556,
-0.054217666,
0.019605756,
0.069848165,
-0.05345,
0.0018457369,
0.021261381,
0.019834742,
0.0364726,
0.008800545,
0.01899199,
-0.07162491,
-0.018764688,
0.030988883,
0.09103274,
0.016486289,
-0.08622413,
-0.083044365,
-1.3872017e-34,
-0.07202043,
-0.04547031,
-0.02789685,
0.058260243,
-0.010473749,
-0.06121573,
0.026039537,
-0.06574506,
0.029187253,
0.012286592,
-0.0634218,
0.040592846,
0.036436044,
0.019791061,
0.087508686,
0.02819681,
0.044173952,
0.076273374,
0.029475076,
-0.0022728525,
0.043047428,
0.025950495,
5.87631e-06,
-0.038482204,
-0.016193746,
0.03337992,
0.021100886,
-0.023393923,
0.009839609,
0.033582654,
0.030119505,
0.060411848,
-0.06525265,
-0.016019775,
0.01918547,
-0.0026020391,
-0.046634916,
0.02794535,
0.02097679,
0.007491536,
-0.048716933,
-0.007056093,
0.019862399,
0.01642084,
-0.06380952,
0.0312326,
0.09198801,
-0.031442497,
0.022264522,
-0.015000218,
0.002577486,
-0.031360134,
-0.015259252,
-0.025491642,
0.082340494,
0.14332701,
-0.02549817,
-0.005105692,
-0.023140578,
-0.031175751,
0.069945835,
0.030767307,
0.048112787,
0.03713218,
0.006838781,
0.0676382,
0.049743734,
0.008490252,
0.0717143,
0.007724331,
-0.0051555126,
-0.0031412526,
0.024659572,
-0.06878996,
0.052448474,
-0.009324618,
0.10184338,
-0.01364986,
-0.022692662,
0.0214144,
-0.09594176,
0.024049604,
-0.07207682,
-0.044615954,
0.03346317,
-0.03939876,
0.020151427,
-0.07493882,
-0.008306699,
0.013818277,
-0.098477356,
0.03363548,
0.08237572,
-0.0034042797,
-0.05002446,
-2.0284525e-33,
-0.1366396,
0.06461703,
0.05217467,
0.10100113,
0.01633431,
-0.012683015,
-0.09023996,
-0.023585103,
0.005757103,
0.102958955,
-0.025938109,
-0.04024086,
0.03442524,
0.019281812,
-0.05693542,
0.019865949,
0.01892263,
-0.03937148,
0.011244816,
0.05603835,
-0.015989995,
0.058931332,
-0.03825127,
-0.030448802,
-0.021279855,
0.031412993,
-0.021256046,
-0.013973024,
-0.051028315,
0.048959594,
0.018415732,
-0.015543872,
-0.050339997,
0.053825643,
-0.05102614,
0.016936453,
-0.03276066,
-0.025018891,
0.00083950633,
0.10212479,
0.047226448,
0.01013783,
-0.11656542,
0.012194899,
-0.029693797,
-0.099592775,
-0.05208683,
0.068527095,
0.05462999,
-0.06600112,
0.025495205,
0.013553149,
0.008376301,
-0.10753366,
-0.08184969,
0.07179369,
0.008020084,
-0.013001388,
0.02034551,
0.07830072,
-0.073259205,
-0.11530623,
0.040887818,
0.04355819,
-0.001209231,
0.045809098,
-0.00439629,
0.07479018,
-0.017603617,
-0.046038117,
0.022736022,
0.057742845,
-0.015455795,
0.0078048306,
-0.043795776,
-0.05287881,
-0.08780934,
0.016208123,
-0.018338274,
-0.05680242,
0.036081936,
-0.040417098,
0.039246004,
0.083620116,
-0.019201642,
0.055849098,
0.047579776,
-0.07378654,
0.033696014,
-0.08679882,
-0.0106773665,
0.052387673,
0.009724484,
0.023857431,
-0.08621698,
-1.7164837e-08,
0.021028662,
-0.05131077,
0.11875527,
-0.04681493,
0.06569432,
0.05875326,
-0.050507378,
0.05572548,
-0.040579688,
0.05569073,
0.025022164,
-0.001695402,
-0.03103065,
0.022217639,
0.02812072,
0.031644266,
-0.025532138,
0.020890266,
-0.023071108,
0.013451792,
0.07502988,
0.022283832,
0.028922528,
-0.014248503,
0.025503293,
-0.051433153,
-0.0144749675,
0.014626067,
-0.028012041,
0.08404862,
-0.07754722,
0.03867142,
-0.004333606,
0.025680339,
0.12575574,
0.07000303,
0.0059297155,
-0.104100324,
-0.041432552,
0.016101085,
-0.040745873,
0.017750472,
-0.09112738,
-0.026067602,
0.055624463,
0.016697235,
0.016438706,
-0.11938217,
0.027880691,
0.015196545,
0.042352572,
0.06814026,
0.057811365,
0.063263096,
0.067467265,
0.059775982,
0.06467763,
-0.067497864,
-0.035580758,
0.06402302,
0.008630453,
0.0031874685,
0.009377425,
-0.08392178
],
"index": 0,
"object": "embedding"
}
],
"model": "all-minilm:l6-v2",
"object": "list",
"usage": {
"prompt_tokens": 5,
"total_tokens": 5
}
}
},
"is_streaming": false
}
}

View file

@ -0,0 +1,422 @@
{
"request": {
"method": "POST",
"url": "http://0.0.0.0:11434/v1/v1/embeddings",
"headers": {},
"body": {
"model": "all-minilm:l6-v2",
"input": [
"This is a test file 2"
],
"encoding_format": "float"
},
"endpoint": "/v1/embeddings",
"model": "all-minilm:l6-v2"
},
"response": {
"body": {
"__type__": "openai.types.create_embedding_response.CreateEmbeddingResponse",
"__data__": {
"data": [
{
"embedding": [
-0.028391164,
0.08177924,
-0.078595236,
0.02794012,
0.0501054,
-0.03523528,
-0.0040212795,
0.029318463,
-0.057719484,
0.013758128,
0.14608414,
-0.012030242,
-0.0244042,
-0.05507163,
-0.026622117,
-0.0132702645,
-0.109127365,
-0.037243392,
-0.003585629,
0.047631495,
0.062134072,
0.0070668682,
-0.015537441,
-0.0080097895,
0.03766712,
0.015882641,
-0.041853406,
0.09733282,
-0.025634848,
-0.11367206,
0.035507742,
0.07039588,
0.016794816,
0.022213018,
0.12344487,
0.007708932,
0.12549855,
0.00806089,
-0.02614805,
0.0028652712,
0.018172521,
-0.046700634,
0.04102468,
0.001336475,
0.0019230411,
0.008665353,
0.016688382,
0.022002129,
0.0020729597,
-0.03286714,
-0.08643458,
0.008018572,
-0.07433228,
-0.01628817,
0.060542718,
0.005992304,
0.016035207,
0.021369386,
0.009568174,
0.03177933,
0.023040457,
0.03435853,
-0.042258766,
0.024753148,
0.11620828,
-0.02494626,
-0.03897831,
-0.024997817,
-0.020839883,
-0.08836877,
-0.15072803,
0.020933837,
-0.022511186,
0.0023899842,
0.0057860566,
-0.001578469,
-0.11986527,
-0.003025397,
0.055101633,
-0.11829019,
-0.05885812,
-0.1504569,
0.01861341,
-0.009307191,
-0.028901236,
0.08401475,
0.043742407,
-0.0006705526,
-0.052525397,
0.00025590818,
0.040425412,
0.0066513056,
0.026082706,
0.051888794,
0.01259031,
0.061460704,
0.013889724,
0.03844097,
0.048208673,
0.10407735,
-0.02645537,
-0.021476867,
-0.020856835,
0.050631326,
-0.05169685,
-0.07577173,
0.05749261,
-0.0499922,
0.06527451,
-0.02872225,
0.03874818,
-0.062776215,
-0.014480463,
-0.06345894,
0.06641256,
-0.014838074,
-0.03524914,
0.07739568,
-0.039939843,
0.032204024,
0.10169046,
-0.022527538,
-0.05930125,
0.00039771595,
-0.057792112,
-0.070337616,
0.06377354,
-4.088526e-33,
-0.021773575,
-0.079873994,
-0.013886454,
0.14922747,
0.025207443,
-0.042269774,
-0.0067705857,
0.054603398,
-0.092237934,
0.008083855,
-0.03861146,
-0.11771469,
0.012989592,
0.034553546,
-0.017051153,
0.011906159,
0.012945488,
0.042745717,
-0.01759736,
-0.018408326,
0.06513165,
0.0405268,
-0.022535695,
-0.06094611,
-0.018629104,
0.011654488,
0.014083773,
-0.067636594,
0.08541857,
0.030126775,
0.010824449,
-0.054840527,
-0.024132056,
0.048314847,
0.007516418,
0.013355685,
0.024563083,
-0.005942082,
-0.045623902,
-0.004832818,
0.004424451,
-0.0023969507,
0.013589571,
-0.0168692,
0.06961138,
-0.07734751,
0.020551285,
0.0048098145,
0.055662792,
0.013124815,
-0.011720894,
0.04093993,
0.007497743,
0.042012148,
0.010350773,
0.019379916,
0.01108285,
0.017257342,
0.018258827,
0.0773061,
0.01962173,
0.052673563,
-0.05859421,
0.039764106,
-0.05021828,
-0.04896494,
-0.05262346,
-0.09227966,
0.07557037,
0.08099812,
-0.02225778,
-0.04215297,
0.056577113,
0.02356105,
0.0015294012,
-0.049797468,
0.0023656262,
0.028645845,
-0.06897522,
-0.0477758,
-0.04864175,
-0.0766266,
-0.032856915,
-0.046002492,
-0.057314955,
-0.08091142,
-0.008058203,
-0.09362831,
0.0512433,
-0.05832409,
-0.00059281266,
0.022221608,
-0.046930317,
-0.08964614,
0.11954097,
2.044738e-33,
0.01219642,
0.08643133,
-0.023233324,
0.002765521,
-0.0010344109,
0.034877002,
0.07328553,
-0.04988436,
-0.04193409,
0.13485521,
-0.006909938,
0.0062319604,
0.059107542,
-0.028918913,
0.09142895,
-0.018481337,
0.00771716,
-0.04420843,
-0.025174472,
-0.0150115965,
-0.03543459,
0.124125846,
0.13119355,
0.08100271,
-0.033272874,
0.0039677722,
0.02646281,
0.026607113,
0.017331243,
-0.0036059914,
0.03546072,
0.059571866,
-0.12454768,
0.021932347,
0.02564387,
-0.11062035,
0.09607079,
-0.06733944,
-0.01182028,
0.0423393,
0.0378881,
0.1058394,
0.00734931,
0.066321366,
0.022943782,
0.049426265,
0.14638706,
-0.0067357672,
0.0043576923,
-0.029188734,
-0.009015755,
-0.08637437,
0.035848346,
0.0030120711,
-0.029328048,
0.070184804,
0.014865788,
0.028357765,
-0.040338036,
0.019171577,
0.015582609,
0.028644681,
-0.019528968,
-0.018315561,
-0.0054145255,
-0.09313447,
-0.061137658,
0.03881072,
0.02792733,
0.034151476,
-0.027465515,
0.010710185,
-0.055215303,
-0.073805,
0.021541798,
-0.015463418,
-0.024991987,
-0.004779671,
0.030454708,
-0.02407339,
0.034101877,
-0.010341885,
-0.012655972,
0.036309235,
-0.0044550677,
-0.014974223,
0.027874243,
0.09782822,
-0.026438858,
-0.005190334,
-0.019119462,
0.06202614,
0.052122016,
0.037861902,
0.012597777,
-1.7054827e-08,
-0.04997221,
-0.08913875,
-0.0035288178,
-0.015788937,
-0.021885982,
0.07185057,
-0.050171196,
-0.010661625,
-0.03058095,
-0.015772644,
0.01322944,
-0.0025733304,
-0.04212318,
0.009266956,
-0.041135434,
-0.029588273,
0.0021936113,
-0.033001017,
-0.050396364,
-0.02149836,
-0.0068135546,
0.008485492,
0.03569217,
0.025194813,
-0.016510937,
0.04917863,
0.018346637,
0.04907251,
-0.0582019,
-0.015061549,
0.04578192,
0.049921762,
0.02044503,
-0.052017137,
-0.033587772,
0.06185581,
0.11143413,
0.07770764,
0.02244692,
0.0025846648,
-0.04391288,
0.008592464,
-0.036181543,
0.0296719,
-0.017300868,
-0.094585225,
-0.05786905,
-0.065796606,
-0.061245505,
-0.104576424,
-0.029241998,
0.0013673713,
0.0060772314,
0.04078779,
-0.036728922,
0.016783627,
0.005292796,
0.030990785,
-0.054467708,
0.0048806495,
0.07091143,
0.06684519,
0.01770421,
-0.029248381
],
"index": 0,
"object": "embedding"
}
],
"model": "all-minilm:l6-v2",
"object": "list",
"usage": {
"prompt_tokens": 6,
"total_tokens": 6
}
}
},
"is_streaming": false
}
}

View file

@ -0,0 +1,422 @@
{
"request": {
"method": "POST",
"url": "http://0.0.0.0:11434/v1/v1/embeddings",
"headers": {},
"body": {
"model": "all-minilm:l6-v2",
"input": [
"This is a test file"
],
"encoding_format": "float"
},
"endpoint": "/v1/embeddings",
"model": "all-minilm:l6-v2"
},
"response": {
"body": {
"__type__": "openai.types.create_embedding_response.CreateEmbeddingResponse",
"__data__": {
"data": [
{
"embedding": [
-0.034272887,
0.0900405,
-0.114585444,
0.0021513691,
0.059019327,
-0.02748151,
-0.020571338,
0.03373777,
-0.03872984,
0.026010917,
0.1147871,
0.027154561,
-0.015938662,
-0.02185328,
-0.046722047,
-0.04638079,
-0.07416656,
-0.052859545,
-0.028124748,
0.06325527,
0.029144203,
0.047097813,
-0.05268828,
-0.0053592497,
0.030669667,
0.01769888,
-0.01687185,
0.08683223,
-0.014155632,
-0.08387485,
0.019995376,
0.07114902,
0.08367812,
0.030923046,
0.11826658,
0.028755534,
0.06955482,
-0.017287154,
-0.005806163,
0.005812646,
0.0011825147,
-0.06533827,
0.037360404,
0.018541763,
-0.0034888012,
-0.0011040586,
-0.029778237,
-0.021269588,
0.005844319,
-0.035600223,
-0.037232384,
0.012353592,
-0.06692711,
-0.023162046,
0.05686014,
0.0014791423,
0.01440185,
-0.017189784,
0.009246685,
0.06083274,
0.024673132,
0.036989614,
-0.050630055,
0.051760096,
0.10160539,
0.008477512,
-0.048004184,
-0.013003718,
0.031101642,
-0.1659611,
-0.14100891,
0.009773047,
-0.025983926,
0.05229989,
-0.007893064,
0.0078570945,
-0.08468617,
-0.044539623,
0.054151334,
-0.07042244,
-0.05768138,
-0.10078619,
0.021822996,
0.022160508,
0.0072028935,
0.13064505,
0.08020654,
-0.0044225734,
-0.018743401,
0.0075993463,
-0.031649683,
0.031955328,
-0.022171712,
0.030735254,
-0.023809722,
0.0695489,
0.016647533,
0.0095261615,
0.027464647,
0.10212388,
0.02145324,
-0.021429047,
0.015128828,
0.039440226,
-0.09434037,
-0.11546961,
0.09468322,
-0.011139115,
0.072680146,
-0.03602365,
-0.011743472,
-0.066524595,
-0.034747,
-0.10301544,
0.030228501,
-0.06316883,
-0.090848505,
0.041170754,
-0.03368485,
0.045751248,
0.07133673,
-0.031778056,
-0.05968261,
-0.017208954,
-0.032287136,
-0.058584064,
0.0673487,
-5.023248e-33,
-0.005809502,
-0.071970925,
-0.00930889,
0.09656616,
0.037086118,
-0.034771495,
-0.00472216,
0.016682126,
-0.098648354,
0.005475455,
-0.014123589,
-0.08407786,
0.0027178645,
0.04443311,
-0.01269345,
0.034540884,
-0.0005944164,
0.06320702,
-0.026761396,
-0.013525239,
0.024135783,
0.015422592,
-0.04138039,
-0.05520989,
-0.06454275,
0.031492148,
-0.0072836457,
-0.039476894,
0.059850004,
0.026700241,
0.013972591,
-0.038822647,
-0.04851447,
0.017551823,
0.020952301,
0.03522171,
0.011540296,
-0.00842795,
-0.044636253,
0.014627958,
3.2639466e-05,
-0.046966836,
0.027031295,
0.006612757,
0.06439624,
-0.044763926,
-0.02612974,
-0.016271371,
0.055233188,
0.014105759,
-0.008459233,
0.04205111,
0.050489996,
0.021618336,
0.011294852,
0.0485963,
0.017674806,
-0.004992791,
0.00193088,
0.063277334,
0.035901506,
0.03502828,
-0.06643911,
0.008779193,
-0.027297689,
-0.059879173,
-0.027194038,
-0.087292045,
0.11242319,
0.05879699,
-0.041721053,
-0.069260724,
0.064383894,
0.015849635,
-0.027780458,
-0.03755858,
-0.011723025,
0.06948493,
-0.07109373,
-0.039075296,
-0.043134894,
-0.1120962,
-0.030726664,
-0.06376309,
-0.03524182,
-0.061186828,
-0.015275632,
-0.100939795,
0.047502656,
-0.08317205,
-0.0029857687,
0.013144553,
-0.056699008,
-0.05796209,
0.06137419,
2.7670645e-33,
0.003669078,
0.06695531,
-0.055944078,
0.025168538,
0.0147572905,
0.033805534,
0.0934766,
-0.010511114,
-0.046672594,
0.14254896,
-0.015461952,
0.0067206374,
0.07682516,
-0.045769565,
0.07989758,
0.0036198904,
0.023618277,
-0.06530977,
-0.04256109,
-0.025923597,
-0.07477869,
0.1001957,
0.1257842,
0.064083636,
-0.01666794,
0.014075608,
0.025267936,
0.0017376567,
-0.013351121,
0.0117214825,
0.037724674,
0.040572807,
-0.12054958,
0.024336847,
0.034385506,
-0.10165844,
0.11865242,
-0.035707537,
-0.012689929,
0.022641081,
0.039234713,
0.10621312,
0.010647405,
0.07653686,
0.020896297,
0.06464065,
0.08582743,
-0.03212417,
0.043577865,
0.01106648,
0.023217985,
-0.06711702,
0.05536062,
-0.008119422,
-0.0268995,
0.077022836,
-0.011600607,
0.04498788,
-0.024568135,
0.020904513,
-0.0016571331,
0.029054169,
-0.038968027,
-0.013624052,
-0.019825684,
-0.057037495,
-0.014532248,
0.010170884,
0.016871484,
0.012004644,
0.019911213,
0.019217802,
-0.06554125,
-0.050251007,
0.05082798,
-0.07560525,
-0.018781837,
-0.0122035425,
0.0019368301,
-0.00351373,
0.07000184,
-0.029289605,
-0.008412919,
0.04744267,
-0.00043944066,
-0.014024816,
-0.0035281784,
0.0844005,
-0.0015739133,
0.0016869568,
-0.023196274,
0.059908636,
0.019615034,
0.054351386,
0.012312578,
-1.5289404e-08,
-0.038118448,
-0.084228516,
-0.013602922,
-0.032792244,
-0.020994218,
0.08923806,
0.005445469,
-0.07045531,
-0.03966009,
-0.018226359,
0.05718637,
-0.026399894,
-0.098825626,
0.017524764,
-0.019498266,
-0.062369697,
-0.019561017,
-0.011198561,
-0.03005754,
0.010641676,
-0.005561297,
0.053242564,
0.04418294,
0.025771322,
0.005914542,
0.059626196,
0.06883921,
0.08894957,
-0.062240407,
-0.038899083,
0.028789395,
0.087763906,
0.017739464,
-0.050055157,
-0.0009801601,
0.1297665,
0.08312503,
0.08157199,
0.0117320195,
0.006869762,
-0.072692566,
-0.0019829427,
-0.018348025,
0.0088948505,
-0.038234424,
-0.09056964,
-0.06433111,
-0.042595394,
-0.030844258,
-0.09312696,
-0.043474108,
0.012029141,
-6.677036e-05,
0.040267132,
-0.049134284,
0.014589591,
0.017469455,
-0.005167336,
-0.03331327,
0.0075517776,
0.07486923,
0.0646153,
0.04480708,
-0.02847676
],
"index": 0,
"object": "embedding"
}
],
"model": "all-minilm:l6-v2",
"object": "list",
"usage": {
"prompt_tokens": 5,
"total_tokens": 5
}
}
},
"is_streaming": false
}
}

View file

@ -57,11 +57,13 @@ def skip_if_provider_doesnt_support_openai_vector_stores_search(client_with_mode
"inline::sqlite-vec", "inline::sqlite-vec",
"remote::milvus", "remote::milvus",
"inline::milvus", "inline::milvus",
"remote::pgvector",
], ],
"hybrid": [ "hybrid": [
"inline::sqlite-vec", "inline::sqlite-vec",
"inline::milvus", "inline::milvus",
"remote::milvus", "remote::milvus",
"remote::pgvector",
], ],
} }
supported_providers = search_mode_support.get(search_mode, []) supported_providers = search_mode_support.get(search_mode, [])

View file

@ -0,0 +1,62 @@
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree.
import boto3
import pytest
from moto import mock_aws
from llama_stack.providers.remote.files.s3 import S3FilesImplConfig, get_adapter_impl
from llama_stack.providers.utils.sqlstore.sqlstore import SqliteSqlStoreConfig
class MockUploadFile:
def __init__(self, content: bytes, filename: str, content_type: str = "text/plain"):
self.content = content
self.filename = filename
self.content_type = content_type
async def read(self):
return self.content
@pytest.fixture
def sample_text_file():
content = b"Hello, this is a test file for the S3 Files API!"
return MockUploadFile(content, "sample_text_file-0.txt")
@pytest.fixture
def sample_text_file2():
content = b"Hello, this is a second test file for the S3 Files API!"
return MockUploadFile(content, "sample_text_file-1.txt")
@pytest.fixture
def s3_config(tmp_path):
db_path = tmp_path / "s3_files_metadata.db"
return S3FilesImplConfig(
bucket_name=f"test-bucket-{tmp_path.name}",
region="not-a-region",
auto_create_bucket=True,
metadata_store=SqliteSqlStoreConfig(db_path=db_path.as_posix()),
)
@pytest.fixture
def s3_client():
# we use `with mock_aws()` because @mock_aws decorator does not support
# being a generator
with mock_aws():
# must yield or the mock will be reset before it is used
yield boto3.client("s3")
@pytest.fixture
async def s3_provider(s3_config, s3_client): # s3_client provides the moto mock, don't remove it
provider = await get_adapter_impl(s3_config, {})
yield provider
await provider.shutdown()

View file

@ -6,63 +6,11 @@
from unittest.mock import patch from unittest.mock import patch
import boto3
import pytest import pytest
from botocore.exceptions import ClientError from botocore.exceptions import ClientError
from moto import mock_aws
from llama_stack.apis.common.errors import ResourceNotFoundError from llama_stack.apis.common.errors import ResourceNotFoundError
from llama_stack.apis.files import OpenAIFilePurpose from llama_stack.apis.files import OpenAIFilePurpose
from llama_stack.providers.remote.files.s3 import (
S3FilesImplConfig,
get_adapter_impl,
)
from llama_stack.providers.utils.sqlstore.sqlstore import SqliteSqlStoreConfig
class MockUploadFile:
def __init__(self, content: bytes, filename: str, content_type: str = "text/plain"):
self.content = content
self.filename = filename
self.content_type = content_type
async def read(self):
return self.content
@pytest.fixture
def s3_config(tmp_path):
db_path = tmp_path / "s3_files_metadata.db"
return S3FilesImplConfig(
bucket_name="test-bucket",
region="not-a-region",
auto_create_bucket=True,
metadata_store=SqliteSqlStoreConfig(db_path=db_path.as_posix()),
)
@pytest.fixture
def s3_client():
"""Create a mocked S3 client for testing."""
# we use `with mock_aws()` because @mock_aws decorator does not support being a generator
with mock_aws():
# must yield or the mock will be reset before it is used
yield boto3.client("s3")
@pytest.fixture
async def s3_provider(s3_config, s3_client):
"""Create an S3 files provider with mocked S3 for testing."""
provider = await get_adapter_impl(s3_config, {})
yield provider
await provider.shutdown()
@pytest.fixture
def sample_text_file():
content = b"Hello, this is a test file for the S3 Files API!"
return MockUploadFile(content, "sample_text_file.txt")
class TestS3FilesImpl: class TestS3FilesImpl:
@ -143,7 +91,7 @@ class TestS3FilesImpl:
s3_client.head_object(Bucket=s3_config.bucket_name, Key=uploaded.id) s3_client.head_object(Bucket=s3_config.bucket_name, Key=uploaded.id)
assert exc_info.value.response["Error"]["Code"] == "404" assert exc_info.value.response["Error"]["Code"] == "404"
async def test_list_files(self, s3_provider, sample_text_file): async def test_list_files(self, s3_provider, sample_text_file, sample_text_file2):
"""Test listing files after uploading some.""" """Test listing files after uploading some."""
sample_text_file.filename = "test_list_files_with_content_file1" sample_text_file.filename = "test_list_files_with_content_file1"
file1 = await s3_provider.openai_upload_file( file1 = await s3_provider.openai_upload_file(
@ -151,9 +99,9 @@ class TestS3FilesImpl:
purpose=OpenAIFilePurpose.ASSISTANTS, purpose=OpenAIFilePurpose.ASSISTANTS,
) )
file2_content = MockUploadFile(b"Second file content", "test_list_files_with_content_file2") sample_text_file2.filename = "test_list_files_with_content_file2"
file2 = await s3_provider.openai_upload_file( file2 = await s3_provider.openai_upload_file(
file=file2_content, file=sample_text_file2,
purpose=OpenAIFilePurpose.BATCH, purpose=OpenAIFilePurpose.BATCH,
) )
@ -164,7 +112,7 @@ class TestS3FilesImpl:
assert file1.id in file_ids assert file1.id in file_ids
assert file2.id in file_ids assert file2.id in file_ids
async def test_list_files_with_purpose_filter(self, s3_provider, sample_text_file): async def test_list_files_with_purpose_filter(self, s3_provider, sample_text_file, sample_text_file2):
"""Test listing files with purpose filter.""" """Test listing files with purpose filter."""
sample_text_file.filename = "test_list_files_with_purpose_filter_file1" sample_text_file.filename = "test_list_files_with_purpose_filter_file1"
file1 = await s3_provider.openai_upload_file( file1 = await s3_provider.openai_upload_file(
@ -172,9 +120,9 @@ class TestS3FilesImpl:
purpose=OpenAIFilePurpose.ASSISTANTS, purpose=OpenAIFilePurpose.ASSISTANTS,
) )
file2_content = MockUploadFile(b"Batch file content", "test_list_files_with_purpose_filter_file2") sample_text_file2.filename = "test_list_files_with_purpose_filter_file2"
await s3_provider.openai_upload_file( await s3_provider.openai_upload_file(
file=file2_content, file=sample_text_file2,
purpose=OpenAIFilePurpose.BATCH, purpose=OpenAIFilePurpose.BATCH,
) )
@ -249,3 +197,104 @@ class TestS3FilesImpl:
files_list = await s3_provider.openai_list_files() files_list = await s3_provider.openai_list_files()
assert len(files_list.data) == 0, "No file metadata should remain after failed upload" assert len(files_list.data) == 0, "No file metadata should remain after failed upload"
@pytest.mark.parametrize("purpose", [p for p in OpenAIFilePurpose if p != OpenAIFilePurpose.BATCH])
async def test_default_no_expiration(self, s3_provider, sample_text_file, purpose):
"""Test that by default files have no expiration."""
sample_text_file.filename = "test_default_no_expiration"
uploaded = await s3_provider.openai_upload_file(
file=sample_text_file,
purpose=purpose,
)
assert uploaded.expires_at is None, "By default files should have no expiration"
async def test_default_batch_expiration(self, s3_provider, sample_text_file):
"""Test that by default batch files have an expiration."""
sample_text_file.filename = "test_default_batch_an_expiration"
uploaded = await s3_provider.openai_upload_file(
file=sample_text_file,
purpose=OpenAIFilePurpose.BATCH,
)
assert uploaded.expires_at is not None, "By default batch files should have an expiration"
thirty_days_seconds = 30 * 24 * 3600
assert uploaded.expires_at == uploaded.created_at + thirty_days_seconds, (
"Batch default expiration should be 30 days"
)
async def test_expired_file_is_unavailable(self, s3_provider, sample_text_file, s3_config, s3_client):
"""Uploaded file that has expired should not be listed or retrievable/deletable."""
with patch.object(s3_provider, "_now") as mock_now: # control time
two_hours = 2 * 60 * 60
mock_now.return_value = 0
sample_text_file.filename = "test_expired_file"
uploaded = await s3_provider.openai_upload_file(
file=sample_text_file,
purpose=OpenAIFilePurpose.ASSISTANTS,
expires_after_anchor="created_at",
expires_after_seconds=two_hours,
)
mock_now.return_value = two_hours * 2 # fast forward 4 hours
listed = await s3_provider.openai_list_files()
assert uploaded.id not in [f.id for f in listed.data]
with pytest.raises(ResourceNotFoundError, match="not found"):
await s3_provider.openai_retrieve_file(uploaded.id)
with pytest.raises(ResourceNotFoundError, match="not found"):
await s3_provider.openai_retrieve_file_content(uploaded.id)
with pytest.raises(ResourceNotFoundError, match="not found"):
await s3_provider.openai_delete_file(uploaded.id)
with pytest.raises(ClientError) as exc_info:
s3_client.head_object(Bucket=s3_config.bucket_name, Key=uploaded.id)
assert exc_info.value.response["Error"]["Code"] == "404"
with pytest.raises(ResourceNotFoundError, match="not found"):
await s3_provider._get_file(uploaded.id, return_expired=True)
async def test_unsupported_expires_after_anchor(self, s3_provider, sample_text_file):
"""Unsupported anchor value should raise ValueError."""
sample_text_file.filename = "test_unsupported_expires_after_anchor"
with pytest.raises(ValueError, match="Input should be 'created_at'"):
await s3_provider.openai_upload_file(
file=sample_text_file,
purpose=OpenAIFilePurpose.ASSISTANTS,
expires_after_anchor="now",
expires_after_seconds=3600,
)
async def test_nonint_expires_after_seconds(self, s3_provider, sample_text_file):
"""Non-integer seconds in expires_after should raise ValueError."""
sample_text_file.filename = "test_nonint_expires_after_seconds"
with pytest.raises(ValueError, match="should be a valid integer"):
await s3_provider.openai_upload_file(
file=sample_text_file,
purpose=OpenAIFilePurpose.ASSISTANTS,
expires_after_anchor="created_at",
expires_after_seconds="many",
)
async def test_expires_after_seconds_out_of_bounds(self, s3_provider, sample_text_file):
"""Seconds outside allowed range should raise ValueError."""
with pytest.raises(ValueError, match="greater than or equal to 3600"):
await s3_provider.openai_upload_file(
file=sample_text_file,
purpose=OpenAIFilePurpose.ASSISTANTS,
expires_after_anchor="created_at",
expires_after_seconds=3599,
)
with pytest.raises(ValueError, match="less than or equal to 2592000"):
await s3_provider.openai_upload_file(
file=sample_text_file,
purpose=OpenAIFilePurpose.ASSISTANTS,
expires_after_anchor="created_at",
expires_after_seconds=2592001,
)

View file

@ -0,0 +1,89 @@
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree.
from unittest.mock import patch
import pytest
from llama_stack.apis.common.errors import ResourceNotFoundError
from llama_stack.apis.files import OpenAIFilePurpose
from llama_stack.core.datatypes import User
from llama_stack.providers.remote.files.s3.files import S3FilesImpl
async def test_listing_hides_other_users_file(s3_provider, sample_text_file):
"""Listing should not show files uploaded by other users."""
user_a = User("user-a", {"roles": ["team-a"]})
user_b = User("user-b", {"roles": ["team-b"]})
with patch("llama_stack.providers.utils.sqlstore.authorized_sqlstore.get_authenticated_user") as mock_get_user:
mock_get_user.return_value = user_a
uploaded = await s3_provider.openai_upload_file(file=sample_text_file, purpose=OpenAIFilePurpose.ASSISTANTS)
with patch("llama_stack.providers.utils.sqlstore.authorized_sqlstore.get_authenticated_user") as mock_get_user:
mock_get_user.return_value = user_b
listed = await s3_provider.openai_list_files()
assert all(f.id != uploaded.id for f in listed.data)
@pytest.mark.parametrize(
"op",
[S3FilesImpl.openai_retrieve_file, S3FilesImpl.openai_retrieve_file_content, S3FilesImpl.openai_delete_file],
ids=["retrieve", "content", "delete"],
)
async def test_cannot_access_other_user_file(s3_provider, sample_text_file, op):
"""Operations (metadata/content/delete) on another user's file should raise ResourceNotFoundError.
`op` is an async callable (provider, file_id) -> awaits the requested operation.
"""
user_a = User("user-a", {"roles": ["team-a"]})
user_b = User("user-b", {"roles": ["team-b"]})
with patch("llama_stack.providers.utils.sqlstore.authorized_sqlstore.get_authenticated_user") as mock_get_user:
mock_get_user.return_value = user_a
uploaded = await s3_provider.openai_upload_file(file=sample_text_file, purpose=OpenAIFilePurpose.ASSISTANTS)
with patch("llama_stack.providers.utils.sqlstore.authorized_sqlstore.get_authenticated_user") as mock_get_user:
mock_get_user.return_value = user_b
with pytest.raises(ResourceNotFoundError):
await op(s3_provider, uploaded.id)
async def test_shared_role_allows_listing(s3_provider, sample_text_file):
"""Listing should show files uploaded by other users when roles are shared."""
user_a = User("user-a", {"roles": ["shared-role"]})
user_b = User("user-b", {"roles": ["shared-role"]})
with patch("llama_stack.providers.utils.sqlstore.authorized_sqlstore.get_authenticated_user") as mock_get_user:
mock_get_user.return_value = user_a
uploaded = await s3_provider.openai_upload_file(file=sample_text_file, purpose=OpenAIFilePurpose.ASSISTANTS)
with patch("llama_stack.providers.utils.sqlstore.authorized_sqlstore.get_authenticated_user") as mock_get_user:
mock_get_user.return_value = user_b
listed = await s3_provider.openai_list_files()
assert any(f.id == uploaded.id for f in listed.data)
@pytest.mark.parametrize(
"op",
[S3FilesImpl.openai_retrieve_file, S3FilesImpl.openai_retrieve_file_content, S3FilesImpl.openai_delete_file],
ids=["retrieve", "content", "delete"],
)
async def test_shared_role_allows_access(s3_provider, sample_text_file, op):
"""Operations (metadata/content/delete) on another user's file should succeed when users share a role.
`op` is an async callable (provider, file_id) -> awaits the requested operation.
"""
user_x = User("user-x", {"roles": ["shared-role"]})
user_y = User("user-y", {"roles": ["shared-role"]})
with patch("llama_stack.providers.utils.sqlstore.authorized_sqlstore.get_authenticated_user") as mock_get_user:
mock_get_user.return_value = user_x
uploaded = await s3_provider.openai_upload_file(file=sample_text_file, purpose=OpenAIFilePurpose.ASSISTANTS)
with patch("llama_stack.providers.utils.sqlstore.authorized_sqlstore.get_authenticated_user") as mock_get_user:
mock_get_user.return_value = user_y
await op(s3_provider, uploaded.id)

View file

@ -0,0 +1,248 @@
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree.
from llama_stack.providers.utils.memory.vector_store import RERANKER_TYPE_RRF, RERANKER_TYPE_WEIGHTED
from llama_stack.providers.utils.vector_io.vector_utils import WeightedInMemoryAggregator
class TestNormalizeScores:
"""Test cases for score normalization."""
def test_normalize_scores_basic(self):
"""Test basic score normalization."""
scores = {"doc1": 10.0, "doc2": 5.0, "doc3": 0.0}
normalized = WeightedInMemoryAggregator._normalize_scores(scores)
assert normalized["doc1"] == 1.0 # Max score
assert normalized["doc3"] == 0.0 # Min score
assert normalized["doc2"] == 0.5 # Middle score
assert all(0 <= score <= 1 for score in normalized.values())
def test_normalize_scores_identical(self):
"""Test normalization when all scores are identical."""
scores = {"doc1": 5.0, "doc2": 5.0, "doc3": 5.0}
normalized = WeightedInMemoryAggregator._normalize_scores(scores)
# All scores should be 1.0 when identical
assert all(score == 1.0 for score in normalized.values())
def test_normalize_scores_empty(self):
"""Test normalization with empty scores."""
scores = {}
normalized = WeightedInMemoryAggregator._normalize_scores(scores)
assert normalized == {}
def test_normalize_scores_single(self):
"""Test normalization with single score."""
scores = {"doc1": 7.5}
normalized = WeightedInMemoryAggregator._normalize_scores(scores)
assert normalized["doc1"] == 1.0
class TestWeightedRerank:
"""Test cases for weighted reranking."""
def test_weighted_rerank_basic(self):
"""Test basic weighted reranking."""
vector_scores = {"doc1": 0.9, "doc2": 0.7, "doc3": 0.5}
keyword_scores = {"doc1": 0.6, "doc2": 0.8, "doc4": 0.9}
combined = WeightedInMemoryAggregator.weighted_rerank(vector_scores, keyword_scores, alpha=0.5)
# Should include all documents
expected_docs = {"doc1", "doc2", "doc3", "doc4"}
assert set(combined.keys()) == expected_docs
# All scores should be between 0 and 1
assert all(0 <= score <= 1 for score in combined.values())
# doc1 appears in both searches, should have higher combined score
assert combined["doc1"] > 0
def test_weighted_rerank_alpha_zero(self):
"""Test weighted reranking with alpha=0 (keyword only)."""
vector_scores = {"doc1": 0.9, "doc2": 0.7, "doc3": 0.5} # All docs present in vector
keyword_scores = {"doc1": 0.1, "doc2": 0.3, "doc3": 0.9} # All docs present in keyword
combined = WeightedInMemoryAggregator.weighted_rerank(vector_scores, keyword_scores, alpha=0.0)
# Alpha=0 means vector scores are ignored, keyword scores dominate
# doc3 should score highest since it has highest keyword score
assert combined["doc3"] > combined["doc2"] > combined["doc1"]
def test_weighted_rerank_alpha_one(self):
"""Test weighted reranking with alpha=1 (vector only)."""
vector_scores = {"doc1": 0.9, "doc2": 0.7, "doc3": 0.5} # All docs present in vector
keyword_scores = {"doc1": 0.1, "doc2": 0.3, "doc3": 0.9} # All docs present in keyword
combined = WeightedInMemoryAggregator.weighted_rerank(vector_scores, keyword_scores, alpha=1.0)
# Alpha=1 means keyword scores are ignored, vector scores dominate
# doc1 should score highest since it has highest vector score
assert combined["doc1"] > combined["doc2"] > combined["doc3"]
def test_weighted_rerank_no_overlap(self):
"""Test weighted reranking with no overlapping documents."""
vector_scores = {"doc1": 0.9, "doc2": 0.7}
keyword_scores = {"doc3": 0.8, "doc4": 0.6}
combined = WeightedInMemoryAggregator.weighted_rerank(vector_scores, keyword_scores, alpha=0.5)
assert len(combined) == 4
# With min-max normalization, lowest scoring docs in each group get 0.0
# but highest scoring docs should get positive scores
assert all(score >= 0 for score in combined.values())
assert combined["doc1"] > 0 # highest vector score
assert combined["doc3"] > 0 # highest keyword score
class TestRRFRerank:
"""Test cases for RRF (Reciprocal Rank Fusion) reranking."""
def test_rrf_rerank_basic(self):
"""Test basic RRF reranking."""
vector_scores = {"doc1": 0.9, "doc2": 0.7, "doc3": 0.5}
keyword_scores = {"doc1": 0.6, "doc2": 0.8, "doc4": 0.9}
combined = WeightedInMemoryAggregator.rrf_rerank(vector_scores, keyword_scores, impact_factor=60.0)
# Should include all documents
expected_docs = {"doc1", "doc2", "doc3", "doc4"}
assert set(combined.keys()) == expected_docs
# All scores should be positive
assert all(score > 0 for score in combined.values())
# Documents appearing in both searches should have higher scores
# doc1 and doc2 appear in both, doc3 and doc4 appear in only one
assert combined["doc1"] > combined["doc3"]
assert combined["doc2"] > combined["doc4"]
def test_rrf_rerank_rank_calculation(self):
"""Test that RRF correctly calculates ranks."""
# Create clear ranking order
vector_scores = {"doc1": 1.0, "doc2": 0.8, "doc3": 0.6} # Ranks: 1, 2, 3
keyword_scores = {"doc1": 0.5, "doc2": 1.0, "doc3": 0.7} # Ranks: 3, 1, 2
combined = WeightedInMemoryAggregator.rrf_rerank(vector_scores, keyword_scores, impact_factor=60.0)
# doc1: rank 1 in vector, rank 3 in keyword
# doc2: rank 2 in vector, rank 1 in keyword
# doc3: rank 3 in vector, rank 2 in keyword
# doc2 should have the highest combined score (ranks 2+1=3)
# followed by doc1 (ranks 1+3=4) and doc3 (ranks 3+2=5)
# Remember: lower rank sum = higher RRF score
assert combined["doc2"] > combined["doc1"] > combined["doc3"]
def test_rrf_rerank_impact_factor(self):
"""Test that impact factor affects RRF scores."""
vector_scores = {"doc1": 0.9, "doc2": 0.7}
keyword_scores = {"doc1": 0.8, "doc2": 0.6}
combined_low = WeightedInMemoryAggregator.rrf_rerank(vector_scores, keyword_scores, impact_factor=10.0)
combined_high = WeightedInMemoryAggregator.rrf_rerank(vector_scores, keyword_scores, impact_factor=100.0)
# Higher impact factor should generally result in lower scores
# (because 1/(k+r) decreases as k increases)
assert combined_low["doc1"] > combined_high["doc1"]
assert combined_low["doc2"] > combined_high["doc2"]
def test_rrf_rerank_missing_documents(self):
"""Test RRF handling of documents missing from one search."""
vector_scores = {"doc1": 0.9, "doc2": 0.7}
keyword_scores = {"doc1": 0.8, "doc3": 0.6}
combined = WeightedInMemoryAggregator.rrf_rerank(vector_scores, keyword_scores, impact_factor=60.0)
# Should include all documents
assert len(combined) == 3
# doc1 appears in both searches, should have highest score
assert combined["doc1"] > combined["doc2"]
assert combined["doc1"] > combined["doc3"]
class TestCombineSearchResults:
"""Test cases for the main combine_search_results function."""
def test_combine_search_results_rrf_default(self):
"""Test combining with RRF as default."""
vector_scores = {"doc1": 0.9, "doc2": 0.7}
keyword_scores = {"doc1": 0.6, "doc3": 0.8}
combined = WeightedInMemoryAggregator.combine_search_results(vector_scores, keyword_scores)
# Should default to RRF
assert len(combined) == 3
assert all(score > 0 for score in combined.values())
def test_combine_search_results_rrf_explicit(self):
"""Test combining with explicit RRF."""
vector_scores = {"doc1": 0.9, "doc2": 0.7}
keyword_scores = {"doc1": 0.6, "doc3": 0.8}
combined = WeightedInMemoryAggregator.combine_search_results(
vector_scores, keyword_scores, reranker_type=RERANKER_TYPE_RRF, reranker_params={"impact_factor": 50.0}
)
assert len(combined) == 3
assert all(score > 0 for score in combined.values())
def test_combine_search_results_weighted(self):
"""Test combining with weighted reranking."""
vector_scores = {"doc1": 0.9, "doc2": 0.7}
keyword_scores = {"doc1": 0.6, "doc3": 0.8}
combined = WeightedInMemoryAggregator.combine_search_results(
vector_scores, keyword_scores, reranker_type=RERANKER_TYPE_WEIGHTED, reranker_params={"alpha": 0.3}
)
assert len(combined) == 3
assert all(0 <= score <= 1 for score in combined.values())
def test_combine_search_results_unknown_type(self):
"""Test combining with unknown reranker type defaults to RRF."""
vector_scores = {"doc1": 0.9}
keyword_scores = {"doc2": 0.8}
combined = WeightedInMemoryAggregator.combine_search_results(
vector_scores, keyword_scores, reranker_type="unknown_type"
)
# Should fall back to RRF
assert len(combined) == 2
assert all(score > 0 for score in combined.values())
def test_combine_search_results_empty_params(self):
"""Test combining with empty parameters."""
vector_scores = {"doc1": 0.9}
keyword_scores = {"doc2": 0.8}
combined = WeightedInMemoryAggregator.combine_search_results(vector_scores, keyword_scores, reranker_params={})
# Should use default parameters
assert len(combined) == 2
assert all(score > 0 for score in combined.values())
def test_combine_search_results_empty_scores(self):
"""Test combining with empty score dictionaries."""
# Test with empty vector scores
combined = WeightedInMemoryAggregator.combine_search_results({}, {"doc1": 0.8})
assert len(combined) == 1
assert combined["doc1"] > 0
# Test with empty keyword scores
combined = WeightedInMemoryAggregator.combine_search_results({"doc1": 0.9}, {})
assert len(combined) == 1
assert combined["doc1"] > 0
# Test with both empty
combined = WeightedInMemoryAggregator.combine_search_results({}, {})
assert len(combined) == 0

View file

@ -5,6 +5,7 @@
# the root directory of this source tree. # the root directory of this source tree.
import random import random
from unittest.mock import AsyncMock, MagicMock, patch
import numpy as np import numpy as np
import pytest import pytest
@ -12,7 +13,7 @@ from chromadb import PersistentClient
from pymilvus import MilvusClient, connections from pymilvus import MilvusClient, connections
from llama_stack.apis.vector_dbs import VectorDB from llama_stack.apis.vector_dbs import VectorDB
from llama_stack.apis.vector_io import Chunk, ChunkMetadata from llama_stack.apis.vector_io import Chunk, ChunkMetadata, QueryChunksResponse
from llama_stack.providers.inline.vector_io.chroma.config import ChromaVectorIOConfig from llama_stack.providers.inline.vector_io.chroma.config import ChromaVectorIOConfig
from llama_stack.providers.inline.vector_io.faiss.config import FaissVectorIOConfig from llama_stack.providers.inline.vector_io.faiss.config import FaissVectorIOConfig
from llama_stack.providers.inline.vector_io.faiss.faiss import FaissIndex, FaissVectorIOAdapter from llama_stack.providers.inline.vector_io.faiss.faiss import FaissIndex, FaissVectorIOAdapter
@ -22,6 +23,8 @@ from llama_stack.providers.inline.vector_io.sqlite_vec import SQLiteVectorIOConf
from llama_stack.providers.inline.vector_io.sqlite_vec.sqlite_vec import SQLiteVecIndex, SQLiteVecVectorIOAdapter from llama_stack.providers.inline.vector_io.sqlite_vec.sqlite_vec import SQLiteVecIndex, SQLiteVecVectorIOAdapter
from llama_stack.providers.remote.vector_io.chroma.chroma import ChromaIndex, ChromaVectorIOAdapter, maybe_await from llama_stack.providers.remote.vector_io.chroma.chroma import ChromaIndex, ChromaVectorIOAdapter, maybe_await
from llama_stack.providers.remote.vector_io.milvus.milvus import MilvusIndex, MilvusVectorIOAdapter from llama_stack.providers.remote.vector_io.milvus.milvus import MilvusIndex, MilvusVectorIOAdapter
from llama_stack.providers.remote.vector_io.pgvector.config import PGVectorVectorIOConfig
from llama_stack.providers.remote.vector_io.pgvector.pgvector import PGVectorIndex, PGVectorVectorIOAdapter
from llama_stack.providers.remote.vector_io.qdrant.qdrant import QdrantVectorIOAdapter from llama_stack.providers.remote.vector_io.qdrant.qdrant import QdrantVectorIOAdapter
EMBEDDING_DIMENSION = 384 EMBEDDING_DIMENSION = 384
@ -29,7 +32,7 @@ COLLECTION_PREFIX = "test_collection"
MILVUS_ALIAS = "test_milvus" MILVUS_ALIAS = "test_milvus"
@pytest.fixture(params=["milvus", "sqlite_vec", "faiss", "chroma"]) @pytest.fixture(params=["milvus", "sqlite_vec", "faiss", "chroma", "pgvector"])
def vector_provider(request): def vector_provider(request):
return request.param return request.param
@ -333,15 +336,127 @@ async def qdrant_vec_index(qdrant_vec_db_path, embedding_dimension):
await index.delete() await index.delete()
@pytest.fixture
def mock_psycopg2_connection():
connection = MagicMock()
cursor = MagicMock()
cursor.__enter__ = MagicMock(return_value=cursor)
cursor.__exit__ = MagicMock()
connection.cursor.return_value = cursor
return connection, cursor
@pytest.fixture
async def pgvector_vec_index(embedding_dimension, mock_psycopg2_connection):
connection, cursor = mock_psycopg2_connection
vector_db = VectorDB(
identifier="test-vector-db",
embedding_model="test-model",
embedding_dimension=embedding_dimension,
provider_id="pgvector",
provider_resource_id="pgvector:test-vector-db",
)
with patch("llama_stack.providers.remote.vector_io.pgvector.pgvector.psycopg2"):
with patch("llama_stack.providers.remote.vector_io.pgvector.pgvector.execute_values"):
index = PGVectorIndex(vector_db, embedding_dimension, connection, distance_metric="COSINE")
index._test_chunks = []
original_add_chunks = index.add_chunks
async def mock_add_chunks(chunks, embeddings):
index._test_chunks = list(chunks)
await original_add_chunks(chunks, embeddings)
index.add_chunks = mock_add_chunks
async def mock_query_vector(embedding, k, score_threshold):
chunks = index._test_chunks[:k] if hasattr(index, "_test_chunks") else []
scores = [1.0] * len(chunks)
return QueryChunksResponse(chunks=chunks, scores=scores)
index.query_vector = mock_query_vector
yield index
@pytest.fixture
async def pgvector_vec_adapter(mock_inference_api, embedding_dimension):
config = PGVectorVectorIOConfig(
host="localhost",
port=5432,
db="test_db",
user="test_user",
password="test_password",
kvstore=SqliteKVStoreConfig(),
)
adapter = PGVectorVectorIOAdapter(config, mock_inference_api, None)
with patch("llama_stack.providers.remote.vector_io.pgvector.pgvector.psycopg2.connect") as mock_connect:
mock_conn = MagicMock()
mock_cursor = MagicMock()
mock_cursor.__enter__ = MagicMock(return_value=mock_cursor)
mock_cursor.__exit__ = MagicMock()
mock_conn.cursor.return_value = mock_cursor
mock_conn.autocommit = True
mock_connect.return_value = mock_conn
with patch(
"llama_stack.providers.remote.vector_io.pgvector.pgvector.check_extension_version"
) as mock_check_version:
mock_check_version.return_value = "0.5.1"
with patch("llama_stack.providers.utils.kvstore.kvstore_impl") as mock_kvstore_impl:
mock_kvstore = AsyncMock()
mock_kvstore_impl.return_value = mock_kvstore
with patch.object(adapter, "initialize_openai_vector_stores", new_callable=AsyncMock):
with patch("llama_stack.providers.remote.vector_io.pgvector.pgvector.upsert_models"):
await adapter.initialize()
adapter.conn = mock_conn
async def mock_insert_chunks(vector_db_id, chunks, ttl_seconds=None):
index = await adapter._get_and_cache_vector_db_index(vector_db_id)
if not index:
raise ValueError(f"Vector DB {vector_db_id} not found")
await index.insert_chunks(chunks)
adapter.insert_chunks = mock_insert_chunks
async def mock_query_chunks(vector_db_id, query, params=None):
index = await adapter._get_and_cache_vector_db_index(vector_db_id)
if not index:
raise ValueError(f"Vector DB {vector_db_id} not found")
return await index.query_chunks(query, params)
adapter.query_chunks = mock_query_chunks
test_vector_db = VectorDB(
identifier=f"pgvector_test_collection_{random.randint(1, 1_000_000)}",
provider_id="test_provider",
embedding_model="test_model",
embedding_dimension=embedding_dimension,
)
await adapter.register_vector_db(test_vector_db)
adapter.test_collection_id = test_vector_db.identifier
yield adapter
await adapter.shutdown()
@pytest.fixture @pytest.fixture
def vector_io_adapter(vector_provider, request): def vector_io_adapter(vector_provider, request):
"""Returns the appropriate vector IO adapter based on the provider parameter."""
vector_provider_dict = { vector_provider_dict = {
"milvus": "milvus_vec_adapter", "milvus": "milvus_vec_adapter",
"faiss": "faiss_vec_adapter", "faiss": "faiss_vec_adapter",
"sqlite_vec": "sqlite_vec_adapter", "sqlite_vec": "sqlite_vec_adapter",
"chroma": "chroma_vec_adapter", "chroma": "chroma_vec_adapter",
"qdrant": "qdrant_vec_adapter", "qdrant": "qdrant_vec_adapter",
"pgvector": "pgvector_vec_adapter",
} }
return request.getfixturevalue(vector_provider_dict[vector_provider]) return request.getfixturevalue(vector_provider_dict[vector_provider])

View file

@ -0,0 +1,138 @@
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree.
import asyncio
from unittest.mock import patch
import pytest
from llama_stack.apis.vector_dbs import VectorDB
from llama_stack.providers.remote.vector_io.pgvector.pgvector import PGVectorIndex
PGVECTOR_PROVIDER = "pgvector"
@pytest.fixture(scope="session")
def loop():
return asyncio.new_event_loop()
@pytest.fixture
def embedding_dimension():
"""Default embedding dimension for tests."""
return 384
@pytest.fixture
async def pgvector_index(embedding_dimension, mock_psycopg2_connection):
"""Create a PGVectorIndex instance with mocked database connection."""
connection, cursor = mock_psycopg2_connection
vector_db = VectorDB(
identifier="test-vector-db",
embedding_model="test-model",
embedding_dimension=embedding_dimension,
provider_id=PGVECTOR_PROVIDER,
provider_resource_id=f"{PGVECTOR_PROVIDER}:test-vector-db",
)
with patch("llama_stack.providers.remote.vector_io.pgvector.pgvector.psycopg2"):
# Use explicit COSINE distance metric for consistent testing
index = PGVectorIndex(vector_db, embedding_dimension, connection, distance_metric="COSINE")
return index, cursor
class TestPGVectorIndex:
def test_distance_metric_validation(self, embedding_dimension, mock_psycopg2_connection):
connection, cursor = mock_psycopg2_connection
vector_db = VectorDB(
identifier="test-vector-db",
embedding_model="test-model",
embedding_dimension=embedding_dimension,
provider_id=PGVECTOR_PROVIDER,
provider_resource_id=f"{PGVECTOR_PROVIDER}:test-vector-db",
)
with patch("llama_stack.providers.remote.vector_io.pgvector.pgvector.psycopg2"):
index = PGVectorIndex(vector_db, embedding_dimension, connection, distance_metric="L2")
assert index.distance_metric == "L2"
with pytest.raises(ValueError, match="Distance metric 'INVALID' is not supported"):
PGVectorIndex(vector_db, embedding_dimension, connection, distance_metric="INVALID")
def test_get_pgvector_search_function(self, pgvector_index):
index, cursor = pgvector_index
supported_metrics = index.PGVECTOR_DISTANCE_METRIC_TO_SEARCH_FUNCTION
for metric, function in supported_metrics.items():
index.distance_metric = metric
assert index.get_pgvector_search_function() == function
def test_check_distance_metric_availability(self, pgvector_index):
index, cursor = pgvector_index
supported_metrics = index.PGVECTOR_DISTANCE_METRIC_TO_SEARCH_FUNCTION
for metric in supported_metrics:
index.check_distance_metric_availability(metric)
with pytest.raises(ValueError, match="Distance metric 'INVALID' is not supported"):
index.check_distance_metric_availability("INVALID")
def test_constructor_invalid_distance_metric(self, embedding_dimension, mock_psycopg2_connection):
connection, cursor = mock_psycopg2_connection
vector_db = VectorDB(
identifier="test-vector-db",
embedding_model="test-model",
embedding_dimension=embedding_dimension,
provider_id=PGVECTOR_PROVIDER,
provider_resource_id=f"{PGVECTOR_PROVIDER}:test-vector-db",
)
with patch("llama_stack.providers.remote.vector_io.pgvector.pgvector.psycopg2"):
with pytest.raises(ValueError, match="Distance metric 'INVALID_METRIC' is not supported by PGVector"):
PGVectorIndex(vector_db, embedding_dimension, connection, distance_metric="INVALID_METRIC")
with pytest.raises(ValueError, match="Supported metrics are:"):
PGVectorIndex(vector_db, embedding_dimension, connection, distance_metric="UNKNOWN")
try:
index = PGVectorIndex(vector_db, embedding_dimension, connection, distance_metric="COSINE")
assert index.distance_metric == "COSINE"
except ValueError:
pytest.fail("Valid distance metric 'COSINE' should not raise ValueError")
def test_constructor_all_supported_distance_metrics(self, embedding_dimension, mock_psycopg2_connection):
connection, cursor = mock_psycopg2_connection
vector_db = VectorDB(
identifier="test-vector-db",
embedding_model="test-model",
embedding_dimension=embedding_dimension,
provider_id=PGVECTOR_PROVIDER,
provider_resource_id=f"{PGVECTOR_PROVIDER}:test-vector-db",
)
supported_metrics = ["L2", "L1", "COSINE", "INNER_PRODUCT", "HAMMING", "JACCARD"]
with patch("llama_stack.providers.remote.vector_io.pgvector.pgvector.psycopg2"):
for metric in supported_metrics:
try:
index = PGVectorIndex(vector_db, embedding_dimension, connection, distance_metric=metric)
assert index.distance_metric == metric
expected_operators = {
"L2": "<->",
"L1": "<+>",
"COSINE": "<=>",
"INNER_PRODUCT": "<#>",
"HAMMING": "<~>",
"JACCARD": "<%>",
}
assert index.get_pgvector_search_function() == expected_operators[metric]
except Exception as e:
pytest.fail(f"Valid distance metric '{metric}' should not raise exception: {e}")

View file

@ -11,7 +11,8 @@ from unittest.mock import AsyncMock, MagicMock, patch
import pytest import pytest
from llama_stack.apis.inference import EmbeddingsResponse, Inference from llama_stack.apis.inference import Inference
from llama_stack.apis.inference.inference import OpenAIEmbeddingData, OpenAIEmbeddingsResponse, OpenAIEmbeddingUsage
from llama_stack.apis.vector_io import ( from llama_stack.apis.vector_io import (
QueryChunksResponse, QueryChunksResponse,
VectorDB, VectorDB,
@ -68,7 +69,13 @@ def mock_vector_db_store(mock_vector_db) -> MagicMock:
@pytest.fixture @pytest.fixture
def mock_api_service(sample_embeddings): def mock_api_service(sample_embeddings):
mock_api_service = MagicMock(spec=Inference) mock_api_service = MagicMock(spec=Inference)
mock_api_service.embeddings = AsyncMock(return_value=EmbeddingsResponse(embeddings=sample_embeddings)) mock_api_service.openai_embeddings = AsyncMock(
return_value=OpenAIEmbeddingsResponse(
model="mock-embedding-model",
data=[OpenAIEmbeddingData(embedding=sample, index=i) for i, sample in enumerate(sample_embeddings)],
usage=OpenAIEmbeddingUsage(prompt_tokens=10, total_tokens=10),
)
)
return mock_api_service return mock_api_service

View file

@ -13,6 +13,7 @@ from unittest.mock import AsyncMock, MagicMock
import numpy as np import numpy as np
import pytest import pytest
from llama_stack.apis.inference.inference import OpenAIEmbeddingData
from llama_stack.apis.tools import RAGDocument from llama_stack.apis.tools import RAGDocument
from llama_stack.apis.vector_io import Chunk from llama_stack.apis.vector_io import Chunk
from llama_stack.providers.utils.memory.vector_store import ( from llama_stack.providers.utils.memory.vector_store import (
@ -218,11 +219,16 @@ class TestVectorDBWithIndex:
Chunk(content="Test 2", embedding=None, metadata={}), Chunk(content="Test 2", embedding=None, metadata={}),
] ]
mock_inference_api.embeddings.return_value.embeddings = [[0.1, 0.2, 0.3], [0.4, 0.5, 0.6]] mock_inference_api.openai_embeddings.return_value.data = [
OpenAIEmbeddingData(embedding=[0.1, 0.2, 0.3], index=0),
OpenAIEmbeddingData(embedding=[0.4, 0.5, 0.6], index=1),
]
await vector_db_with_index.insert_chunks(chunks) await vector_db_with_index.insert_chunks(chunks)
mock_inference_api.embeddings.assert_called_once_with("test-model without embeddings", ["Test 1", "Test 2"]) mock_inference_api.openai_embeddings.assert_called_once_with(
"test-model without embeddings", ["Test 1", "Test 2"]
)
mock_index.add_chunks.assert_called_once() mock_index.add_chunks.assert_called_once()
args = mock_index.add_chunks.call_args[0] args = mock_index.add_chunks.call_args[0]
assert args[0] == chunks assert args[0] == chunks
@ -246,7 +252,7 @@ class TestVectorDBWithIndex:
await vector_db_with_index.insert_chunks(chunks) await vector_db_with_index.insert_chunks(chunks)
mock_inference_api.embeddings.assert_not_called() mock_inference_api.openai_embeddings.assert_not_called()
mock_index.add_chunks.assert_called_once() mock_index.add_chunks.assert_called_once()
args = mock_index.add_chunks.call_args[0] args = mock_index.add_chunks.call_args[0]
assert args[0] == chunks assert args[0] == chunks
@ -288,7 +294,7 @@ class TestVectorDBWithIndex:
with pytest.raises(ValueError, match="has dimension 4, expected 3"): with pytest.raises(ValueError, match="has dimension 4, expected 3"):
await vector_db_with_index.insert_chunks(chunks_wrong_dim) await vector_db_with_index.insert_chunks(chunks_wrong_dim)
mock_inference_api.embeddings.assert_not_called() mock_inference_api.openai_embeddings.assert_not_called()
mock_index.add_chunks.assert_not_called() mock_index.add_chunks.assert_not_called()
async def test_insert_chunks_with_partially_precomputed_embeddings(self): async def test_insert_chunks_with_partially_precomputed_embeddings(self):
@ -308,11 +314,14 @@ class TestVectorDBWithIndex:
Chunk(content="Test 3", embedding=None, metadata={}), Chunk(content="Test 3", embedding=None, metadata={}),
] ]
mock_inference_api.embeddings.return_value.embeddings = [[0.1, 0.1, 0.1], [0.3, 0.3, 0.3]] mock_inference_api.openai_embeddings.return_value.data = [
OpenAIEmbeddingData(embedding=[0.1, 0.1, 0.1], index=0),
OpenAIEmbeddingData(embedding=[0.3, 0.3, 0.3], index=1),
]
await vector_db_with_index.insert_chunks(chunks) await vector_db_with_index.insert_chunks(chunks)
mock_inference_api.embeddings.assert_called_once_with( mock_inference_api.openai_embeddings.assert_called_once_with(
"test-model with partial embeddings", ["Test 1", "Test 3"] "test-model with partial embeddings", ["Test 1", "Test 3"]
) )
mock_index.add_chunks.assert_called_once() mock_index.add_chunks.assert_called_once()

View file

@ -88,3 +88,10 @@ def test_nested_structures(setup_env_vars):
} }
expected = {"key1": "test_value", "key2": ["default", "conditional"], "key3": {"nested": None}} expected = {"key1": "test_value", "key2": ["default", "conditional"], "key3": {"nested": None}}
assert replace_env_vars(data) == expected assert replace_env_vars(data) == expected
def test_explicit_strings_preserved(setup_env_vars):
# Explicit strings that look like numbers/booleans should remain strings
data = {"port": "8080", "enabled": "true", "count": "123", "ratio": "3.14"}
expected = {"port": "8080", "enabled": "true", "count": "123", "ratio": "3.14"}
assert replace_env_vars(data) == expected

View file

@ -332,6 +332,63 @@ async def test_sqlstore_pagination_error_handling():
) )
async def test_where_operator_gt_and_update_delete():
with TemporaryDirectory() as tmp_dir:
db_path = tmp_dir + "/test.db"
store = SqlAlchemySqlStoreImpl(SqliteSqlStoreConfig(db_path=db_path))
await store.create_table(
"items",
{
"id": ColumnType.INTEGER,
"value": ColumnType.INTEGER,
"name": ColumnType.STRING,
},
)
await store.insert("items", {"id": 1, "value": 10, "name": "one"})
await store.insert("items", {"id": 2, "value": 20, "name": "two"})
await store.insert("items", {"id": 3, "value": 30, "name": "three"})
result = await store.fetch_all("items", where={"value": {">": 15}})
assert {r["id"] for r in result.data} == {2, 3}
row = await store.fetch_one("items", where={"value": {">=": 30}})
assert row["id"] == 3
await store.update("items", {"name": "small"}, {"value": {"<": 25}})
rows = (await store.fetch_all("items")).data
names = {r["id"]: r["name"] for r in rows}
assert names[1] == "small"
assert names[2] == "small"
assert names[3] == "three"
await store.delete("items", {"id": {"==": 2}})
rows_after = (await store.fetch_all("items")).data
assert {r["id"] for r in rows_after} == {1, 3}
async def test_where_operator_edge_cases():
with TemporaryDirectory() as tmp_dir:
db_path = tmp_dir + "/test.db"
store = SqlAlchemySqlStoreImpl(SqliteSqlStoreConfig(db_path=db_path))
await store.create_table(
"events",
{"id": ColumnType.STRING, "ts": ColumnType.INTEGER},
)
base = 1024
await store.insert("events", {"id": "a", "ts": base - 10})
await store.insert("events", {"id": "b", "ts": base + 10})
row = await store.fetch_one("events", where={"id": "a"})
assert row["id"] == "a"
with pytest.raises(ValueError, match="Unsupported operator"):
await store.fetch_all("events", where={"ts": {"!=": base}})
async def test_sqlstore_pagination_custom_key_column(): async def test_sqlstore_pagination_custom_key_column():
"""Test pagination with custom primary key column (not 'id').""" """Test pagination with custom primary key column (not 'id')."""
with TemporaryDirectory() as tmp_dir: with TemporaryDirectory() as tmp_dir:

94
uv.lock generated
View file

@ -1748,26 +1748,10 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/5f/e4/f1546746049c99c6b8b247e2f34485b9eae36faa9322b84e2a17262e6712/litellm-1.74.9-py3-none-any.whl", hash = "sha256:ab8f8a6e4d8689d3c7c4f9c3bbc7e46212cc3ebc74ddd0f3c0c921bb459c9874", size = 8740449, upload-time = "2025-07-28T16:42:36.8Z" }, { url = "https://files.pythonhosted.org/packages/5f/e4/f1546746049c99c6b8b247e2f34485b9eae36faa9322b84e2a17262e6712/litellm-1.74.9-py3-none-any.whl", hash = "sha256:ab8f8a6e4d8689d3c7c4f9c3bbc7e46212cc3ebc74ddd0f3c0c921bb459c9874", size = 8740449, upload-time = "2025-07-28T16:42:36.8Z" },
] ]
[[package]]
name = "llama-api-client"
version = "0.2.0"
source = { registry = "https://pypi.org/simple" }
dependencies = [
{ name = "anyio" },
{ name = "distro" },
{ name = "httpx" },
{ name = "pydantic" },
{ name = "sniffio" },
{ name = "typing-extensions" },
]
sdist = { url = "https://files.pythonhosted.org/packages/59/41/fa8521a0faff96bf5f810e2ab5b78c638f5ba44afd09aa86f94b6a1226ad/llama_api_client-0.2.0.tar.gz", hash = "sha256:b9bd5f5ad332b9133f0775a105f0940f057cbb311891f1d4487247d001c31f17", size = 117108, upload-time = "2025-08-12T17:07:07.734Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/1d/11/198e65c1a50d9e839b4e3d346b4bd0f624e532446e468d1aba6c74ed7484/llama_api_client-0.2.0-py3-none-any.whl", hash = "sha256:50614ed991e1a72439e6a624a97e6000615ada1b9e2046ecc026fe62f107663c", size = 85002, upload-time = "2025-08-12T17:07:06.293Z" },
]
[[package]] [[package]]
name = "llama-stack" name = "llama-stack"
version = "0.2.19" version = "0.2.20"
source = { editable = "." } source = { editable = "." }
dependencies = [ dependencies = [
{ name = "aiohttp" }, { name = "aiohttp" },
@ -1780,7 +1764,6 @@ dependencies = [
{ name = "huggingface-hub" }, { name = "huggingface-hub" },
{ name = "jinja2" }, { name = "jinja2" },
{ name = "jsonschema" }, { name = "jsonschema" },
{ name = "llama-api-client" },
{ name = "llama-stack-client" }, { name = "llama-stack-client" },
{ name = "openai" }, { name = "openai" },
{ name = "opentelemetry-exporter-otlp-proto-http" }, { name = "opentelemetry-exporter-otlp-proto-http" },
@ -1859,6 +1842,7 @@ test = [
{ name = "mcp" }, { name = "mcp" },
{ name = "milvus-lite" }, { name = "milvus-lite" },
{ name = "openai" }, { name = "openai" },
{ name = "psycopg2-binary" },
{ name = "pymilvus" }, { name = "pymilvus" },
{ name = "pypdf" }, { name = "pypdf" },
{ name = "requests" }, { name = "requests" },
@ -1884,6 +1868,7 @@ unit = [
{ name = "moto", extra = ["s3"] }, { name = "moto", extra = ["s3"] },
{ name = "ollama" }, { name = "ollama" },
{ name = "openai" }, { name = "openai" },
{ name = "psycopg2-binary" },
{ name = "pymilvus" }, { name = "pymilvus" },
{ name = "pypdf" }, { name = "pypdf" },
{ name = "qdrant-client" }, { name = "qdrant-client" },
@ -1904,10 +1889,9 @@ requires-dist = [
{ name = "huggingface-hub", specifier = ">=0.34.0,<1.0" }, { name = "huggingface-hub", specifier = ">=0.34.0,<1.0" },
{ name = "jinja2", specifier = ">=3.1.6" }, { name = "jinja2", specifier = ">=3.1.6" },
{ name = "jsonschema" }, { name = "jsonschema" },
{ name = "llama-api-client", specifier = ">=0.1.2" }, { name = "llama-stack-client", specifier = ">=0.2.20" },
{ name = "llama-stack-client", specifier = ">=0.2.19" }, { name = "llama-stack-client", marker = "extra == 'ui'", specifier = ">=0.2.20" },
{ name = "llama-stack-client", marker = "extra == 'ui'", specifier = ">=0.2.19" }, { name = "openai", specifier = ">=1.99.6" },
{ name = "openai", specifier = ">=1.99.6,<1.100.0" },
{ name = "opentelemetry-exporter-otlp-proto-http", specifier = ">=1.30.0" }, { name = "opentelemetry-exporter-otlp-proto-http", specifier = ">=1.30.0" },
{ name = "opentelemetry-sdk", specifier = ">=1.30.0" }, { name = "opentelemetry-sdk", specifier = ">=1.30.0" },
{ name = "pandas", marker = "extra == 'ui'" }, { name = "pandas", marker = "extra == 'ui'" },
@ -1928,7 +1912,7 @@ requires-dist = [
provides-extras = ["ui"] provides-extras = ["ui"]
[package.metadata.requires-dev] [package.metadata.requires-dev]
benchmark = [{ name = "locust", specifier = ">=2.37.14" }] benchmark = [{ name = "locust", specifier = ">=2.39.1" }]
codegen = [ codegen = [
{ name = "jinja2", specifier = ">=3.1.6" }, { name = "jinja2", specifier = ">=3.1.6" },
{ name = "pydantic" }, { name = "pydantic" },
@ -1977,8 +1961,9 @@ test = [
{ name = "datasets" }, { name = "datasets" },
{ name = "mcp" }, { name = "mcp" },
{ name = "milvus-lite", specifier = ">=2.5.0" }, { name = "milvus-lite", specifier = ">=2.5.0" },
{ name = "openai" }, { name = "openai", specifier = ">=1.100.0" },
{ name = "pymilvus", specifier = ">=2.5.12" }, { name = "psycopg2-binary", specifier = ">=2.9.0" },
{ name = "pymilvus", specifier = ">=2.6.1" },
{ name = "pypdf" }, { name = "pypdf" },
{ name = "requests" }, { name = "requests" },
{ name = "sqlalchemy" }, { name = "sqlalchemy" },
@ -2002,7 +1987,8 @@ unit = [
{ name = "moto", extras = ["s3"], specifier = ">=5.1.10" }, { name = "moto", extras = ["s3"], specifier = ">=5.1.10" },
{ name = "ollama" }, { name = "ollama" },
{ name = "openai" }, { name = "openai" },
{ name = "pymilvus", specifier = ">=2.5.12" }, { name = "psycopg2-binary", specifier = ">=2.9.0" },
{ name = "pymilvus", specifier = ">=2.6.1" },
{ name = "pypdf" }, { name = "pypdf" },
{ name = "qdrant-client" }, { name = "qdrant-client" },
{ name = "sqlalchemy" }, { name = "sqlalchemy" },
@ -2013,7 +1999,7 @@ unit = [
[[package]] [[package]]
name = "llama-stack-client" name = "llama-stack-client"
version = "0.2.19" version = "0.2.20"
source = { registry = "https://pypi.org/simple" } source = { registry = "https://pypi.org/simple" }
dependencies = [ dependencies = [
{ name = "anyio" }, { name = "anyio" },
@ -2032,14 +2018,14 @@ dependencies = [
{ name = "tqdm" }, { name = "tqdm" },
{ name = "typing-extensions" }, { name = "typing-extensions" },
] ]
sdist = { url = "https://files.pythonhosted.org/packages/14/e4/72683c10188ae93e97551ab6eeac725e46f13ec215618532505a7d91bf2b/llama_stack_client-0.2.19.tar.gz", hash = "sha256:6c857e528b83af7821120002ebe4d3db072fd9f7bf867a152a34c70fe606833f", size = 318325, upload-time = "2025-08-26T21:54:20.592Z" } sdist = { url = "https://files.pythonhosted.org/packages/21/91/c5e32219a5192825dd601700e68205c815c5cfee60c64c22172e46a0c83e/llama_stack_client-0.2.20.tar.gz", hash = "sha256:356257f0a4bbb64205f89e113d715925853d5e34ec744e72466da72790ba415b", size = 318311, upload-time = "2025-08-29T21:10:12.854Z" }
wheels = [ wheels = [
{ url = "https://files.pythonhosted.org/packages/51/51/c8dde9fae58193a539eac700502876d8edde8be354c2784ff7b707a47432/llama_stack_client-0.2.19-py3-none-any.whl", hash = "sha256:478565a54541ca03ca9f8fe2019f4136f93ab6afe9591bdd44bc6dde6ddddbd9", size = 369905, upload-time = "2025-08-26T21:54:18.929Z" }, { url = "https://files.pythonhosted.org/packages/b0/ba/84914c4eead2fd9251c149fd6a7da28b78acd620793e3c4506116645cb60/llama_stack_client-0.2.20-py3-none-any.whl", hash = "sha256:6e178981d2ce971da2145c79d5b2b123fa50e063ed431494975c2ba01c5b8016", size = 369899, upload-time = "2025-08-29T21:10:11.113Z" },
] ]
[[package]] [[package]]
name = "locust" name = "locust"
version = "2.39.0" version = "2.39.1"
source = { registry = "https://pypi.org/simple" } source = { registry = "https://pypi.org/simple" }
dependencies = [ dependencies = [
{ name = "configargparse" }, { name = "configargparse" },
@ -2051,6 +2037,7 @@ dependencies = [
{ name = "locust-cloud" }, { name = "locust-cloud" },
{ name = "msgpack" }, { name = "msgpack" },
{ name = "psutil" }, { name = "psutil" },
{ name = "python-engineio" },
{ name = "python-socketio", extra = ["client"] }, { name = "python-socketio", extra = ["client"] },
{ name = "pywin32", marker = "sys_platform == 'win32'" }, { name = "pywin32", marker = "sys_platform == 'win32'" },
{ name = "pyzmq" }, { name = "pyzmq" },
@ -2058,9 +2045,9 @@ dependencies = [
{ name = "setuptools" }, { name = "setuptools" },
{ name = "werkzeug" }, { name = "werkzeug" },
] ]
sdist = { url = "https://files.pythonhosted.org/packages/c4/6f/d6ca4483f4795747fbbd610d28e798ca4f5d4358e03f309343eb5bab128f/locust-2.39.0.tar.gz", hash = "sha256:71e82a68324f9d63d4b800035288488c08eab12811fa4c24ff07f031643b7b39", size = 1409879, upload-time = "2025-08-20T13:39:55.233Z" } sdist = { url = "https://files.pythonhosted.org/packages/95/c8/10aa5445c404eed389b56877e6714c1787190cc09dd70059ce3765979ec5/locust-2.39.1.tar.gz", hash = "sha256:6bdd19e27edf9a1c84391d6cf6e9a737dfb832be7dfbf39053191ae31b9cc498", size = 1409902, upload-time = "2025-08-29T17:41:01.544Z" }
wheels = [ wheels = [
{ url = "https://files.pythonhosted.org/packages/7c/94/7dc9a2b4ccb18a5b0c4be4bfadfa79b6c0fd860267a7114641402627e7db/locust-2.39.0-py3-none-any.whl", hash = "sha256:3817c4d7cca387b4b871da779c9e145c2a95fbb0b5602be5833976902b967a8f", size = 1428138, upload-time = "2025-08-20T13:39:52.549Z" }, { url = "https://files.pythonhosted.org/packages/ec/b3/b2f4b2ca88b1e72eba7be2b2982533b887f8b709d222db78eb9602aa5121/locust-2.39.1-py3-none-any.whl", hash = "sha256:fd5148f2f1a4ed34aee968abc4393674e69d1b5e1b54db50a397f6eb09ce0b04", size = 1428155, upload-time = "2025-08-29T17:41:00.245Z" },
] ]
[[package]] [[package]]
@ -2634,7 +2621,7 @@ wheels = [
[[package]] [[package]]
name = "openai" name = "openai"
version = "1.99.6" version = "1.102.0"
source = { registry = "https://pypi.org/simple" } source = { registry = "https://pypi.org/simple" }
dependencies = [ dependencies = [
{ name = "anyio" }, { name = "anyio" },
@ -2646,9 +2633,9 @@ dependencies = [
{ name = "tqdm" }, { name = "tqdm" },
{ name = "typing-extensions" }, { name = "typing-extensions" },
] ]
sdist = { url = "https://files.pythonhosted.org/packages/11/45/38a87bd6949236db5ae3132f41d5861824702b149f86d2627d6900919103/openai-1.99.6.tar.gz", hash = "sha256:f48f4239b938ef187062f3d5199a05b69711d8b600b9a9b6a3853cd271799183", size = 505364, upload-time = "2025-08-09T15:20:54.438Z" } sdist = { url = "https://files.pythonhosted.org/packages/07/55/da5598ed5c6bdd9939633854049cddc5cbac0da938dfcfcb3c6b119c16c0/openai-1.102.0.tar.gz", hash = "sha256:2e0153bcd64a6523071e90211cbfca1f2bbc5ceedd0993ba932a5869f93b7fc9", size = 519027, upload-time = "2025-08-26T20:50:29.397Z" }
wheels = [ wheels = [
{ url = "https://files.pythonhosted.org/packages/d6/dd/9aa956485c2856346b3181542fbb0aea4e5b457fa7a523944726746da8da/openai-1.99.6-py3-none-any.whl", hash = "sha256:e40d44b2989588c45ce13819598788b77b8fb80ba2f7ae95ce90d14e46f1bd26", size = 786296, upload-time = "2025-08-09T15:20:51.95Z" }, { url = "https://files.pythonhosted.org/packages/bd/0d/c9e7016d82c53c5b5e23e2bad36daebb8921ed44f69c0a985c6529a35106/openai-1.102.0-py3-none-any.whl", hash = "sha256:d751a7e95e222b5325306362ad02a7aa96e1fab3ed05b5888ce1c7ca63451345", size = 812015, upload-time = "2025-08-26T20:50:27.219Z" },
] ]
[[package]] [[package]]
@ -3139,6 +3126,37 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/50/1b/6921afe68c74868b4c9fa424dad3be35b095e16687989ebbb50ce4fceb7c/psutil-7.0.0-cp37-abi3-win_amd64.whl", hash = "sha256:4cf3d4eb1aa9b348dec30105c55cd9b7d4629285735a102beb4441e38db90553", size = 244885, upload-time = "2025-02-13T21:54:37.486Z" }, { url = "https://files.pythonhosted.org/packages/50/1b/6921afe68c74868b4c9fa424dad3be35b095e16687989ebbb50ce4fceb7c/psutil-7.0.0-cp37-abi3-win_amd64.whl", hash = "sha256:4cf3d4eb1aa9b348dec30105c55cd9b7d4629285735a102beb4441e38db90553", size = 244885, upload-time = "2025-02-13T21:54:37.486Z" },
] ]
[[package]]
name = "psycopg2-binary"
version = "2.9.10"
source = { registry = "https://pypi.org/simple" }
sdist = { url = "https://files.pythonhosted.org/packages/cb/0e/bdc8274dc0585090b4e3432267d7be4dfbfd8971c0fa59167c711105a6bf/psycopg2-binary-2.9.10.tar.gz", hash = "sha256:4b3df0e6990aa98acda57d983942eff13d824135fe2250e6522edaa782a06de2", size = 385764, upload-time = "2024-10-16T11:24:58.126Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/49/7d/465cc9795cf76f6d329efdafca74693714556ea3891813701ac1fee87545/psycopg2_binary-2.9.10-cp312-cp312-macosx_12_0_x86_64.whl", hash = "sha256:880845dfe1f85d9d5f7c412efea7a08946a46894537e4e5d091732eb1d34d9a0", size = 3044771, upload-time = "2024-10-16T11:20:35.234Z" },
{ url = "https://files.pythonhosted.org/packages/8b/31/6d225b7b641a1a2148e3ed65e1aa74fc86ba3fee850545e27be9e1de893d/psycopg2_binary-2.9.10-cp312-cp312-macosx_14_0_arm64.whl", hash = "sha256:9440fa522a79356aaa482aa4ba500b65f28e5d0e63b801abf6aa152a29bd842a", size = 3275336, upload-time = "2024-10-16T11:20:38.742Z" },
{ url = "https://files.pythonhosted.org/packages/30/b7/a68c2b4bff1cbb1728e3ec864b2d92327c77ad52edcd27922535a8366f68/psycopg2_binary-2.9.10-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e3923c1d9870c49a2d44f795df0c889a22380d36ef92440ff618ec315757e539", size = 2851637, upload-time = "2024-10-16T11:20:42.145Z" },
{ url = "https://files.pythonhosted.org/packages/0b/b1/cfedc0e0e6f9ad61f8657fd173b2f831ce261c02a08c0b09c652b127d813/psycopg2_binary-2.9.10-cp312-cp312-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:7b2c956c028ea5de47ff3a8d6b3cc3330ab45cf0b7c3da35a2d6ff8420896526", size = 3082097, upload-time = "2024-10-16T11:20:46.185Z" },
{ url = "https://files.pythonhosted.org/packages/18/ed/0a8e4153c9b769f59c02fb5e7914f20f0b2483a19dae7bf2db54b743d0d0/psycopg2_binary-2.9.10-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:f758ed67cab30b9a8d2833609513ce4d3bd027641673d4ebc9c067e4d208eec1", size = 3264776, upload-time = "2024-10-16T11:20:50.879Z" },
{ url = "https://files.pythonhosted.org/packages/10/db/d09da68c6a0cdab41566b74e0a6068a425f077169bed0946559b7348ebe9/psycopg2_binary-2.9.10-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8cd9b4f2cfab88ed4a9106192de509464b75a906462fb846b936eabe45c2063e", size = 3020968, upload-time = "2024-10-16T11:20:56.819Z" },
{ url = "https://files.pythonhosted.org/packages/94/28/4d6f8c255f0dfffb410db2b3f9ac5218d959a66c715c34cac31081e19b95/psycopg2_binary-2.9.10-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:6dc08420625b5a20b53551c50deae6e231e6371194fa0651dbe0fb206452ae1f", size = 2872334, upload-time = "2024-10-16T11:21:02.411Z" },
{ url = "https://files.pythonhosted.org/packages/05/f7/20d7bf796593c4fea95e12119d6cc384ff1f6141a24fbb7df5a668d29d29/psycopg2_binary-2.9.10-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:d7cd730dfa7c36dbe8724426bf5612798734bff2d3c3857f36f2733f5bfc7c00", size = 2822722, upload-time = "2024-10-16T11:21:09.01Z" },
{ url = "https://files.pythonhosted.org/packages/4d/e4/0c407ae919ef626dbdb32835a03b6737013c3cc7240169843965cada2bdf/psycopg2_binary-2.9.10-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:155e69561d54d02b3c3209545fb08938e27889ff5a10c19de8d23eb5a41be8a5", size = 2920132, upload-time = "2024-10-16T11:21:16.339Z" },
{ url = "https://files.pythonhosted.org/packages/2d/70/aa69c9f69cf09a01da224909ff6ce8b68faeef476f00f7ec377e8f03be70/psycopg2_binary-2.9.10-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:c3cc28a6fd5a4a26224007712e79b81dbaee2ffb90ff406256158ec4d7b52b47", size = 2959312, upload-time = "2024-10-16T11:21:25.584Z" },
{ url = "https://files.pythonhosted.org/packages/d3/bd/213e59854fafe87ba47814bf413ace0dcee33a89c8c8c814faca6bc7cf3c/psycopg2_binary-2.9.10-cp312-cp312-win32.whl", hash = "sha256:ec8a77f521a17506a24a5f626cb2aee7850f9b69a0afe704586f63a464f3cd64", size = 1025191, upload-time = "2024-10-16T11:21:29.912Z" },
{ url = "https://files.pythonhosted.org/packages/92/29/06261ea000e2dc1e22907dbbc483a1093665509ea586b29b8986a0e56733/psycopg2_binary-2.9.10-cp312-cp312-win_amd64.whl", hash = "sha256:18c5ee682b9c6dd3696dad6e54cc7ff3a1a9020df6a5c0f861ef8bfd338c3ca0", size = 1164031, upload-time = "2024-10-16T11:21:34.211Z" },
{ url = "https://files.pythonhosted.org/packages/3e/30/d41d3ba765609c0763505d565c4d12d8f3c79793f0d0f044ff5a28bf395b/psycopg2_binary-2.9.10-cp313-cp313-macosx_12_0_x86_64.whl", hash = "sha256:26540d4a9a4e2b096f1ff9cce51253d0504dca5a85872c7f7be23be5a53eb18d", size = 3044699, upload-time = "2024-10-16T11:21:42.841Z" },
{ url = "https://files.pythonhosted.org/packages/35/44/257ddadec7ef04536ba71af6bc6a75ec05c5343004a7ec93006bee66c0bc/psycopg2_binary-2.9.10-cp313-cp313-macosx_14_0_arm64.whl", hash = "sha256:e217ce4d37667df0bc1c397fdcd8de5e81018ef305aed9415c3b093faaeb10fb", size = 3275245, upload-time = "2024-10-16T11:21:51.989Z" },
{ url = "https://files.pythonhosted.org/packages/1b/11/48ea1cd11de67f9efd7262085588790a95d9dfcd9b8a687d46caf7305c1a/psycopg2_binary-2.9.10-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:245159e7ab20a71d989da00f280ca57da7641fa2cdcf71749c193cea540a74f7", size = 2851631, upload-time = "2024-10-16T11:21:57.584Z" },
{ url = "https://files.pythonhosted.org/packages/62/e0/62ce5ee650e6c86719d621a761fe4bc846ab9eff8c1f12b1ed5741bf1c9b/psycopg2_binary-2.9.10-cp313-cp313-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:3c4ded1a24b20021ebe677b7b08ad10bf09aac197d6943bfe6fec70ac4e4690d", size = 3082140, upload-time = "2024-10-16T11:22:02.005Z" },
{ url = "https://files.pythonhosted.org/packages/27/ce/63f946c098611f7be234c0dd7cb1ad68b0b5744d34f68062bb3c5aa510c8/psycopg2_binary-2.9.10-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:3abb691ff9e57d4a93355f60d4f4c1dd2d68326c968e7db17ea96df3c023ef73", size = 3264762, upload-time = "2024-10-16T11:22:06.412Z" },
{ url = "https://files.pythonhosted.org/packages/43/25/c603cd81402e69edf7daa59b1602bd41eb9859e2824b8c0855d748366ac9/psycopg2_binary-2.9.10-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8608c078134f0b3cbd9f89b34bd60a943b23fd33cc5f065e8d5f840061bd0673", size = 3020967, upload-time = "2024-10-16T11:22:11.583Z" },
{ url = "https://files.pythonhosted.org/packages/5f/d6/8708d8c6fca531057fa170cdde8df870e8b6a9b136e82b361c65e42b841e/psycopg2_binary-2.9.10-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:230eeae2d71594103cd5b93fd29d1ace6420d0b86f4778739cb1a5a32f607d1f", size = 2872326, upload-time = "2024-10-16T11:22:16.406Z" },
{ url = "https://files.pythonhosted.org/packages/ce/ac/5b1ea50fc08a9df82de7e1771537557f07c2632231bbab652c7e22597908/psycopg2_binary-2.9.10-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:bb89f0a835bcfc1d42ccd5f41f04870c1b936d8507c6df12b7737febc40f0909", size = 2822712, upload-time = "2024-10-16T11:22:21.366Z" },
{ url = "https://files.pythonhosted.org/packages/c4/fc/504d4503b2abc4570fac3ca56eb8fed5e437bf9c9ef13f36b6621db8ef00/psycopg2_binary-2.9.10-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:f0c2d907a1e102526dd2986df638343388b94c33860ff3bbe1384130828714b1", size = 2920155, upload-time = "2024-10-16T11:22:25.684Z" },
{ url = "https://files.pythonhosted.org/packages/b2/d1/323581e9273ad2c0dbd1902f3fb50c441da86e894b6e25a73c3fda32c57e/psycopg2_binary-2.9.10-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:f8157bed2f51db683f31306aa497311b560f2265998122abe1dce6428bd86567", size = 2959356, upload-time = "2024-10-16T11:22:30.562Z" },
{ url = "https://files.pythonhosted.org/packages/08/50/d13ea0a054189ae1bc21af1d85b6f8bb9bbc5572991055d70ad9006fe2d6/psycopg2_binary-2.9.10-cp313-cp313-win_amd64.whl", hash = "sha256:27422aa5f11fbcd9b18da48373eb67081243662f9b46e6fd07c3eb46e4535142", size = 2569224, upload-time = "2025-01-04T20:09:19.234Z" },
]
[[package]] [[package]]
name = "ptyprocess" name = "ptyprocess"
version = "0.7.0" version = "0.7.0"
@ -3473,7 +3491,7 @@ wheels = [
[[package]] [[package]]
name = "pymilvus" name = "pymilvus"
version = "2.6.0" version = "2.6.1"
source = { registry = "https://pypi.org/simple" } source = { registry = "https://pypi.org/simple" }
dependencies = [ dependencies = [
{ name = "grpcio" }, { name = "grpcio" },
@ -3484,9 +3502,9 @@ dependencies = [
{ name = "setuptools" }, { name = "setuptools" },
{ name = "ujson" }, { name = "ujson" },
] ]
sdist = { url = "https://files.pythonhosted.org/packages/86/21/5c25a975299415a5a8f26d4759ddf7852aefdf3595f002b5203c4aaf5c8e/pymilvus-2.6.0.tar.gz", hash = "sha256:2b2ca487e098abc34231755e33af2f5294e9f6a64d92d03551532defbac0a3fb", size = 1292994, upload-time = "2025-08-06T09:09:01.705Z" } sdist = { url = "https://files.pythonhosted.org/packages/70/a9/b25af985972082d1bb0b26739fece8cea3f56370733b4b1de690c42a77cc/pymilvus-2.6.1.tar.gz", hash = "sha256:ef1d7f5039719398d131ca80c19e55bc2bccc7ab6609f2cca9a04217dcb0a7fb", size = 1322169, upload-time = "2025-08-29T10:03:50.523Z" }
wheels = [ wheels = [
{ url = "https://files.pythonhosted.org/packages/f6/a2/dfc2a2225aeb90a7dff9443f2d26fe9d04f6f7bcefe537945b5d5220fddd/pymilvus-2.6.0-py3-none-any.whl", hash = "sha256:d743fdd928c9007184d24a52b4f5dfdd18d405a37b4dba66b5ea4bf196fac526", size = 248299, upload-time = "2025-08-06T09:08:58.272Z" }, { url = "https://files.pythonhosted.org/packages/d4/1a/8b677e0f4ef683bbfb00d495960573fff0844ed509b3cf0abede79a48e90/pymilvus-2.6.1-py3-none-any.whl", hash = "sha256:e3d76d45ce04d3555a6849645a18a1e2992706e248d5b6dc58a00504d0b60165", size = 254252, upload-time = "2025-08-29T10:03:48.539Z" },
] ]
[[package]] [[package]]