mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-08-15 06:00:48 +00:00
Merge 84a26339c8
into 61582f327c
This commit is contained in:
commit
538bec9cdb
8 changed files with 247 additions and 17 deletions
|
@ -39,6 +39,17 @@ runs:
|
||||||
if: ${{ inputs.provider == 'vllm' && inputs.inference-mode == 'record' }}
|
if: ${{ inputs.provider == 'vllm' && inputs.inference-mode == 'record' }}
|
||||||
uses: ./.github/actions/setup-vllm
|
uses: ./.github/actions/setup-vllm
|
||||||
|
|
||||||
|
- name: Set provider URLs for replay mode
|
||||||
|
if: ${{ inputs.inference-mode == 'replay' }}
|
||||||
|
shell: bash
|
||||||
|
run: |
|
||||||
|
# setting so providers get registered in replay mode
|
||||||
|
if [ "${{ inputs.provider }}" == "ollama" ]; then
|
||||||
|
echo "OLLAMA_URL=http://localhost:11434" >> $GITHUB_ENV
|
||||||
|
elif [ "${{ inputs.provider }}" == "vllm" ]; then
|
||||||
|
echo "VLLM_URL=http://localhost:8000/v1" >> $GITHUB_ENV
|
||||||
|
fi
|
||||||
|
|
||||||
- name: Build Llama Stack
|
- name: Build Llama Stack
|
||||||
shell: bash
|
shell: bash
|
||||||
run: |
|
run: |
|
||||||
|
|
41
docs/_static/llama-stack-spec.html
vendored
41
docs/_static/llama-stack-spec.html
vendored
|
@ -14997,6 +14997,47 @@
|
||||||
"text": {
|
"text": {
|
||||||
"type": "string",
|
"type": "string",
|
||||||
"description": "The actual text content"
|
"description": "The actual text content"
|
||||||
|
},
|
||||||
|
"embedding": {
|
||||||
|
"type": "array",
|
||||||
|
"items": {
|
||||||
|
"type": "number"
|
||||||
|
},
|
||||||
|
"description": "(Optional) Embedding vector for the content, if available"
|
||||||
|
},
|
||||||
|
"created_timestamp": {
|
||||||
|
"type": "integer",
|
||||||
|
"description": "(Optional) Timestamp when the content was created"
|
||||||
|
},
|
||||||
|
"metadata": {
|
||||||
|
"type": "object",
|
||||||
|
"additionalProperties": {
|
||||||
|
"oneOf": [
|
||||||
|
{
|
||||||
|
"type": "null"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"type": "boolean"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"type": "number"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"type": "string"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"type": "array"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"type": "object"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"description": "(Optional) Metadata associated with the content, such as source, author, etc."
|
||||||
|
},
|
||||||
|
"chunk_metadata": {
|
||||||
|
"$ref": "#/components/schemas/ChunkMetadata",
|
||||||
|
"description": "(Optional) Metadata associated with the chunk, such as document ID, source, etc."
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"additionalProperties": false,
|
"additionalProperties": false,
|
||||||
|
|
28
docs/_static/llama-stack-spec.yaml
vendored
28
docs/_static/llama-stack-spec.yaml
vendored
|
@ -11143,6 +11143,34 @@ components:
|
||||||
text:
|
text:
|
||||||
type: string
|
type: string
|
||||||
description: The actual text content
|
description: The actual text content
|
||||||
|
embedding:
|
||||||
|
type: array
|
||||||
|
items:
|
||||||
|
type: number
|
||||||
|
description: >-
|
||||||
|
(Optional) Embedding vector for the content, if available
|
||||||
|
created_timestamp:
|
||||||
|
type: integer
|
||||||
|
description: >-
|
||||||
|
(Optional) Timestamp when the content was created
|
||||||
|
metadata:
|
||||||
|
type: object
|
||||||
|
additionalProperties:
|
||||||
|
oneOf:
|
||||||
|
- type: 'null'
|
||||||
|
- type: boolean
|
||||||
|
- type: number
|
||||||
|
- type: string
|
||||||
|
- type: array
|
||||||
|
- type: object
|
||||||
|
description: >-
|
||||||
|
(Optional) Metadata associated with the content, such as source, author,
|
||||||
|
etc.
|
||||||
|
chunk_metadata:
|
||||||
|
$ref: '#/components/schemas/ChunkMetadata'
|
||||||
|
description: >-
|
||||||
|
(Optional) Metadata associated with the chunk, such as document ID, source,
|
||||||
|
etc.
|
||||||
additionalProperties: false
|
additionalProperties: false
|
||||||
required:
|
required:
|
||||||
- type
|
- type
|
||||||
|
|
|
@ -226,10 +226,18 @@ class VectorStoreContent(BaseModel):
|
||||||
|
|
||||||
:param type: Content type, currently only "text" is supported
|
:param type: Content type, currently only "text" is supported
|
||||||
:param text: The actual text content
|
:param text: The actual text content
|
||||||
|
:param embedding: (Optional) Embedding vector for the content, if available
|
||||||
|
:param created_timestamp: (Optional) Timestamp when the content was created
|
||||||
|
:param metadata: (Optional) Metadata associated with the content, such as source, author, etc.
|
||||||
|
:param chunk_metadata: (Optional) Metadata associated with the chunk, such as document ID, source, etc.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
type: Literal["text"]
|
type: Literal["text"]
|
||||||
text: str
|
text: str
|
||||||
|
embedding: list[float] | None = None
|
||||||
|
created_timestamp: int | None = None
|
||||||
|
metadata: dict[str, Any] | None = None
|
||||||
|
chunk_metadata: ChunkMetadata | None = None
|
||||||
|
|
||||||
|
|
||||||
@json_schema_type
|
@json_schema_type
|
||||||
|
|
|
@ -17,6 +17,7 @@ from llama_stack.apis.files import Files, OpenAIFileObject
|
||||||
from llama_stack.apis.vector_dbs import VectorDB
|
from llama_stack.apis.vector_dbs import VectorDB
|
||||||
from llama_stack.apis.vector_io import (
|
from llama_stack.apis.vector_io import (
|
||||||
Chunk,
|
Chunk,
|
||||||
|
ChunkMetadata,
|
||||||
QueryChunksResponse,
|
QueryChunksResponse,
|
||||||
SearchRankingOptions,
|
SearchRankingOptions,
|
||||||
VectorStoreChunkingStrategy,
|
VectorStoreChunkingStrategy,
|
||||||
|
@ -520,31 +521,68 @@ class OpenAIVectorStoreMixin(ABC):
|
||||||
raise ValueError(f"Unsupported filter type: {filter_type}")
|
raise ValueError(f"Unsupported filter type: {filter_type}")
|
||||||
|
|
||||||
def _chunk_to_vector_store_content(self, chunk: Chunk) -> list[VectorStoreContent]:
|
def _chunk_to_vector_store_content(self, chunk: Chunk) -> list[VectorStoreContent]:
|
||||||
|
created_ts = None
|
||||||
|
if chunk.chunk_metadata is not None:
|
||||||
|
created_ts = getattr(chunk.chunk_metadata, "created_timestamp", None)
|
||||||
|
|
||||||
|
metadata_dict = {}
|
||||||
|
if chunk.chunk_metadata:
|
||||||
|
if hasattr(chunk.chunk_metadata, "model_dump"):
|
||||||
|
metadata_dict = chunk.chunk_metadata.model_dump()
|
||||||
|
else:
|
||||||
|
metadata_dict = vars(chunk.chunk_metadata)
|
||||||
|
|
||||||
|
user_metadata = chunk.metadata or {}
|
||||||
|
base_meta = {**metadata_dict, **user_metadata}
|
||||||
|
|
||||||
# content is InterleavedContent
|
# content is InterleavedContent
|
||||||
if isinstance(chunk.content, str):
|
if isinstance(chunk.content, str):
|
||||||
content = [
|
content = [
|
||||||
VectorStoreContent(
|
VectorStoreContent(
|
||||||
type="text",
|
type="text",
|
||||||
text=chunk.content,
|
text=chunk.content,
|
||||||
|
embedding=chunk.embedding,
|
||||||
|
created_timestamp=created_ts,
|
||||||
|
metadata=user_metadata,
|
||||||
|
chunk_metadata=ChunkMetadata(**base_meta) if base_meta else None,
|
||||||
)
|
)
|
||||||
]
|
]
|
||||||
elif isinstance(chunk.content, list):
|
elif isinstance(chunk.content, list):
|
||||||
# TODO: Add support for other types of content
|
# TODO: Add support for other types of content
|
||||||
content = [
|
content = []
|
||||||
VectorStoreContent(
|
for item in chunk.content:
|
||||||
type="text",
|
if hasattr(item, "type") and item.type == "text":
|
||||||
text=item.text,
|
item_meta = {**base_meta}
|
||||||
)
|
item_user_meta = getattr(item, "metadata", {}) or {}
|
||||||
for item in chunk.content
|
if item_user_meta:
|
||||||
if item.type == "text"
|
item_meta.update(item_user_meta)
|
||||||
]
|
|
||||||
|
content.append(
|
||||||
|
VectorStoreContent(
|
||||||
|
type="text",
|
||||||
|
text=item.text,
|
||||||
|
embedding=getattr(item, "embedding", None),
|
||||||
|
created_timestamp=created_ts,
|
||||||
|
metadata=item_user_meta,
|
||||||
|
chunk_metadata=ChunkMetadata(**item_meta) if item_meta else None,
|
||||||
|
)
|
||||||
|
)
|
||||||
else:
|
else:
|
||||||
if chunk.content.type != "text":
|
content_item = chunk.content
|
||||||
raise ValueError(f"Unsupported content type: {chunk.content.type}")
|
if content_item.type != "text":
|
||||||
|
raise ValueError(f"Unsupported content type: {content_item.type}")
|
||||||
|
|
||||||
|
item_user_meta = getattr(content_item, "metadata", {}) or {}
|
||||||
|
combined_meta = {**base_meta, **item_user_meta}
|
||||||
|
|
||||||
content = [
|
content = [
|
||||||
VectorStoreContent(
|
VectorStoreContent(
|
||||||
type="text",
|
type="text",
|
||||||
text=chunk.content.text,
|
text=content_item.text,
|
||||||
|
embedding=getattr(content_item, "embedding", None),
|
||||||
|
created_timestamp=created_ts,
|
||||||
|
metadata=item_user_meta,
|
||||||
|
chunk_metadata=ChunkMetadata(**combined_meta) if combined_meta else None,
|
||||||
)
|
)
|
||||||
]
|
]
|
||||||
return content
|
return content
|
||||||
|
|
|
@ -108,9 +108,7 @@ pytest -s -v tests/integration/inference/ \
|
||||||
Running Vector IO tests for a number of embedding models:
|
Running Vector IO tests for a number of embedding models:
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
EMBEDDING_MODELS=all-MiniLM-L6-v2
|
uv run pytest -sv --stack-config="inference=inline::sentence-transformers,vector_io=inline::sqlite-vec,files=localfs" \
|
||||||
|
tests/integration/vector_io --embedding-model \
|
||||||
pytest -s -v tests/integration/vector_io/ \
|
sentence-transformers/all-MiniLM-L6-v2
|
||||||
--stack-config=inference=sentence-transformers,vector_io=sqlite-vec \
|
|
||||||
--embedding-model=$EMBEDDING_MODELS
|
|
||||||
```
|
```
|
||||||
|
|
|
@ -6,6 +6,7 @@
|
||||||
|
|
||||||
import logging
|
import logging
|
||||||
import time
|
import time
|
||||||
|
import uuid
|
||||||
from io import BytesIO
|
from io import BytesIO
|
||||||
|
|
||||||
import pytest
|
import pytest
|
||||||
|
@ -897,3 +898,76 @@ def test_openai_vector_store_search_modes(llama_stack_client, client_with_models
|
||||||
search_mode=search_mode,
|
search_mode=search_mode,
|
||||||
)
|
)
|
||||||
assert search_response is not None
|
assert search_response is not None
|
||||||
|
|
||||||
|
|
||||||
|
def test_openai_vector_store_file_contents_with_extended_fields(compat_client_with_empty_stores, client_with_models):
|
||||||
|
skip_if_provider_doesnt_support_openai_vector_stores(client_with_models)
|
||||||
|
|
||||||
|
compat_client = compat_client_with_empty_stores
|
||||||
|
vector_store = compat_client.vector_stores.create(
|
||||||
|
name="extended_fields_test_store", metadata={"purpose": "extended_fields_testing"}
|
||||||
|
)
|
||||||
|
|
||||||
|
test_content = b"This is a test document."
|
||||||
|
file_name = f"extended_fields_test_{uuid.uuid4().hex}.txt"
|
||||||
|
attributes = {"test_type": "extended_fields", "version": "1.0"}
|
||||||
|
|
||||||
|
with BytesIO(test_content) as file_buffer:
|
||||||
|
file_buffer.name = file_name
|
||||||
|
file = compat_client.files.create(file=file_buffer, purpose="assistants")
|
||||||
|
|
||||||
|
file_attach_response = compat_client.vector_stores.files.create(
|
||||||
|
vector_store_id=vector_store.id,
|
||||||
|
file_id=file.id,
|
||||||
|
attributes=attributes,
|
||||||
|
)
|
||||||
|
|
||||||
|
assert file_attach_response.status == "completed", f"File attach failed: {file_attach_response.last_error}"
|
||||||
|
assert file_attach_response.attributes == attributes
|
||||||
|
|
||||||
|
file_contents = compat_client.vector_stores.files.content(
|
||||||
|
vector_store_id=vector_store.id,
|
||||||
|
file_id=file.id,
|
||||||
|
)
|
||||||
|
|
||||||
|
assert file_contents
|
||||||
|
assert file_contents.filename == file_name
|
||||||
|
assert file_contents.attributes == attributes
|
||||||
|
assert len(file_contents.content) > 0
|
||||||
|
|
||||||
|
for content_item in file_contents.content:
|
||||||
|
if isinstance(compat_client, LlamaStackClient):
|
||||||
|
content_item = content_item.to_dict()
|
||||||
|
assert content_item["type"] == "text"
|
||||||
|
assert "text" in content_item
|
||||||
|
assert isinstance(content_item["text"], str)
|
||||||
|
assert len(content_item["text"]) > 0
|
||||||
|
|
||||||
|
if "embedding" in content_item:
|
||||||
|
assert isinstance(content_item["embedding"], list)
|
||||||
|
assert all(isinstance(x, (int | float)) for x in content_item["embedding"])
|
||||||
|
|
||||||
|
if "created_timestamp" in content_item:
|
||||||
|
assert isinstance(content_item["created_timestamp"], int)
|
||||||
|
assert content_item["created_timestamp"] > 0
|
||||||
|
|
||||||
|
if "chunk_metadata" in content_item:
|
||||||
|
assert isinstance(content_item["chunk_metadata"], dict)
|
||||||
|
if "chunk_id" in content_item["chunk_metadata"]:
|
||||||
|
assert isinstance(content_item["chunk_metadata"]["chunk_id"], str)
|
||||||
|
if "chunk_window" in content_item["chunk_metadata"]:
|
||||||
|
assert isinstance(content_item["chunk_metadata"]["chunk_window"], str)
|
||||||
|
|
||||||
|
search_response = compat_client.vector_stores.search(
|
||||||
|
vector_store_id=vector_store.id, query="test document", max_num_results=5
|
||||||
|
)
|
||||||
|
|
||||||
|
assert search_response is not None
|
||||||
|
assert len(search_response.data) > 0
|
||||||
|
|
||||||
|
for result_object in search_response.data:
|
||||||
|
result = result_object.to_dict()
|
||||||
|
assert "content" in result
|
||||||
|
assert len(result["content"]) > 0
|
||||||
|
assert result["content"][0]["type"] == "text"
|
||||||
|
assert "text" in result["content"][0]
|
||||||
|
|
|
@ -12,7 +12,7 @@ import numpy as np
|
||||||
import pytest
|
import pytest
|
||||||
|
|
||||||
from llama_stack.apis.vector_dbs import VectorDB
|
from llama_stack.apis.vector_dbs import VectorDB
|
||||||
from llama_stack.apis.vector_io import Chunk, QueryChunksResponse
|
from llama_stack.apis.vector_io import Chunk, ChunkMetadata, QueryChunksResponse, VectorStoreContent
|
||||||
from llama_stack.providers.remote.vector_io.milvus.milvus import VECTOR_DBS_PREFIX
|
from llama_stack.providers.remote.vector_io.milvus.milvus import VECTOR_DBS_PREFIX
|
||||||
|
|
||||||
# This test is a unit test for the inline VectoerIO providers. This should only contain
|
# This test is a unit test for the inline VectoerIO providers. This should only contain
|
||||||
|
@ -294,3 +294,35 @@ async def test_delete_openai_vector_store_file_from_storage(vector_io_adapter, t
|
||||||
assert loaded_file_info == {}
|
assert loaded_file_info == {}
|
||||||
loaded_contents = await vector_io_adapter._load_openai_vector_store_file_contents(store_id, file_id)
|
loaded_contents = await vector_io_adapter._load_openai_vector_store_file_contents(store_id, file_id)
|
||||||
assert loaded_contents == []
|
assert loaded_contents == []
|
||||||
|
|
||||||
|
|
||||||
|
async def test_chunk_to_vector_store_content_with_new_fields(vector_io_adapter):
|
||||||
|
sample_chunk_metadata = ChunkMetadata(
|
||||||
|
chunk_id="chunk123",
|
||||||
|
document_id="doc456",
|
||||||
|
source="test_source",
|
||||||
|
created_timestamp=1625133600,
|
||||||
|
updated_timestamp=1625133600,
|
||||||
|
chunk_window="0-100",
|
||||||
|
chunk_tokenizer="test_tokenizer",
|
||||||
|
chunk_embedding_model="dummy_model",
|
||||||
|
chunk_embedding_dimension=384,
|
||||||
|
content_token_count=100,
|
||||||
|
metadata_token_count=100,
|
||||||
|
)
|
||||||
|
|
||||||
|
sample_chunk = Chunk(
|
||||||
|
content="hello world", metadata={"lang": "en"}, embedding=[0.5, 0.7, 0.9], chunk_metadata=sample_chunk_metadata
|
||||||
|
)
|
||||||
|
|
||||||
|
vsc_list: VectorStoreContent = vector_io_adapter._chunk_to_vector_store_content(sample_chunk)
|
||||||
|
assert isinstance(vsc_list, list)
|
||||||
|
assert len(vsc_list) > 0
|
||||||
|
|
||||||
|
vsc = vsc_list[0]
|
||||||
|
assert vsc.text == "hello world"
|
||||||
|
assert vsc.type == "text"
|
||||||
|
assert vsc.metadata == {"lang": "en"}
|
||||||
|
assert vsc.chunk_metadata == sample_chunk_metadata
|
||||||
|
assert vsc.embedding == [0.5, 0.7, 0.9]
|
||||||
|
assert vsc.created_timestamp == 1625133600
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue