mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-12-24 12:38:04 +00:00
Merge branch 'main' into allow-dynamic-models-nvidia
This commit is contained in:
commit
6173d7a308
71 changed files with 3107 additions and 2381 deletions
|
|
@ -19,6 +19,7 @@ class VectorDB(Resource):
|
|||
|
||||
embedding_model: str
|
||||
embedding_dimension: int
|
||||
vector_db_name: str | None = None
|
||||
|
||||
@property
|
||||
def vector_db_id(self) -> str:
|
||||
|
|
@ -70,6 +71,7 @@ class VectorDBs(Protocol):
|
|||
embedding_model: str,
|
||||
embedding_dimension: int | None = 384,
|
||||
provider_id: str | None = None,
|
||||
vector_db_name: str | None = None,
|
||||
provider_vector_db_id: str | None = None,
|
||||
) -> VectorDB:
|
||||
"""Register a vector database.
|
||||
|
|
@ -78,6 +80,7 @@ class VectorDBs(Protocol):
|
|||
:param embedding_model: The embedding model to use.
|
||||
:param embedding_dimension: The dimension of the embedding model.
|
||||
:param provider_id: The identifier of the provider.
|
||||
:param vector_db_name: The name of the vector database.
|
||||
:param provider_vector_db_id: The identifier of the vector database in the provider.
|
||||
:returns: A VectorDB.
|
||||
"""
|
||||
|
|
|
|||
|
|
@ -346,7 +346,6 @@ class VectorIO(Protocol):
|
|||
embedding_model: str | None = None,
|
||||
embedding_dimension: int | None = 384,
|
||||
provider_id: str | None = None,
|
||||
provider_vector_db_id: str | None = None,
|
||||
) -> VectorStoreObject:
|
||||
"""Creates a vector store.
|
||||
|
||||
|
|
@ -358,7 +357,6 @@ class VectorIO(Protocol):
|
|||
:param embedding_model: The embedding model to use for this vector store.
|
||||
:param embedding_dimension: The dimension of the embedding vectors (default: 384).
|
||||
:param provider_id: The ID of the provider to use for this vector store.
|
||||
:param provider_vector_db_id: The provider-specific vector database ID.
|
||||
:returns: A VectorStoreObject representing the created vector store.
|
||||
"""
|
||||
...
|
||||
|
|
|
|||
|
|
@ -17,7 +17,7 @@ from llama_stack.distribution.distribution import (
|
|||
builtin_automatically_routed_apis,
|
||||
get_provider_registry,
|
||||
)
|
||||
from llama_stack.distribution.stack import replace_env_vars
|
||||
from llama_stack.distribution.stack import cast_image_name_to_string, replace_env_vars
|
||||
from llama_stack.distribution.utils.config_dirs import EXTERNAL_PROVIDERS_DIR
|
||||
from llama_stack.distribution.utils.dynamic import instantiate_class_type
|
||||
from llama_stack.distribution.utils.prompt_for_config import prompt_for_config
|
||||
|
|
@ -164,7 +164,8 @@ def upgrade_from_routing_table(
|
|||
def parse_and_maybe_upgrade_config(config_dict: dict[str, Any]) -> StackRunConfig:
|
||||
version = config_dict.get("version", None)
|
||||
if version == LLAMA_STACK_RUN_CONFIG_VERSION:
|
||||
return StackRunConfig(**replace_env_vars(config_dict))
|
||||
processed_config_dict = replace_env_vars(config_dict)
|
||||
return StackRunConfig(**cast_image_name_to_string(processed_config_dict))
|
||||
|
||||
if "routing_table" in config_dict:
|
||||
logger.info("Upgrading config...")
|
||||
|
|
@ -175,4 +176,5 @@ def parse_and_maybe_upgrade_config(config_dict: dict[str, Any]) -> StackRunConfi
|
|||
if not config_dict.get("external_providers_dir", None):
|
||||
config_dict["external_providers_dir"] = EXTERNAL_PROVIDERS_DIR
|
||||
|
||||
return StackRunConfig(**replace_env_vars(config_dict))
|
||||
processed_config_dict = replace_env_vars(config_dict)
|
||||
return StackRunConfig(**cast_image_name_to_string(processed_config_dict))
|
||||
|
|
|
|||
|
|
@ -200,7 +200,7 @@ def validate_and_prepare_providers(
|
|||
specs = {}
|
||||
for provider in providers:
|
||||
if not provider.provider_id or provider.provider_id == "__disabled__":
|
||||
logger.warning(f"Provider `{provider.provider_type}` for API `{api}` is disabled")
|
||||
logger.debug(f"Provider `{provider.provider_type}` for API `{api}` is disabled")
|
||||
continue
|
||||
|
||||
validate_provider(provider, api, provider_registry)
|
||||
|
|
|
|||
|
|
@ -5,6 +5,7 @@
|
|||
# the root directory of this source tree.
|
||||
|
||||
import asyncio
|
||||
import uuid
|
||||
from typing import Any
|
||||
|
||||
from llama_stack.apis.common.content_types import (
|
||||
|
|
@ -81,6 +82,7 @@ class VectorIORouter(VectorIO):
|
|||
embedding_model: str,
|
||||
embedding_dimension: int | None = 384,
|
||||
provider_id: str | None = None,
|
||||
vector_db_name: str | None = None,
|
||||
provider_vector_db_id: str | None = None,
|
||||
) -> None:
|
||||
logger.debug(f"VectorIORouter.register_vector_db: {vector_db_id}, {embedding_model}")
|
||||
|
|
@ -89,6 +91,7 @@ class VectorIORouter(VectorIO):
|
|||
embedding_model,
|
||||
embedding_dimension,
|
||||
provider_id,
|
||||
vector_db_name,
|
||||
provider_vector_db_id,
|
||||
)
|
||||
|
||||
|
|
@ -123,7 +126,6 @@ class VectorIORouter(VectorIO):
|
|||
embedding_model: str | None = None,
|
||||
embedding_dimension: int | None = None,
|
||||
provider_id: str | None = None,
|
||||
provider_vector_db_id: str | None = None,
|
||||
) -> VectorStoreObject:
|
||||
logger.debug(f"VectorIORouter.openai_create_vector_store: name={name}, provider_id={provider_id}")
|
||||
|
||||
|
|
@ -135,17 +137,17 @@ class VectorIORouter(VectorIO):
|
|||
embedding_model, embedding_dimension = embedding_model_info
|
||||
logger.info(f"No embedding model specified, using first available: {embedding_model}")
|
||||
|
||||
vector_db_id = name
|
||||
vector_db_id = f"vs_{uuid.uuid4()}"
|
||||
registered_vector_db = await self.routing_table.register_vector_db(
|
||||
vector_db_id,
|
||||
embedding_model,
|
||||
embedding_dimension,
|
||||
provider_id,
|
||||
provider_vector_db_id,
|
||||
vector_db_id=vector_db_id,
|
||||
embedding_model=embedding_model,
|
||||
embedding_dimension=embedding_dimension,
|
||||
provider_id=provider_id,
|
||||
provider_vector_db_id=vector_db_id,
|
||||
vector_db_name=name,
|
||||
)
|
||||
|
||||
return await self.routing_table.get_provider_impl(registered_vector_db.identifier).openai_create_vector_store(
|
||||
vector_db_id,
|
||||
name=name,
|
||||
file_ids=file_ids,
|
||||
expires_after=expires_after,
|
||||
chunking_strategy=chunking_strategy,
|
||||
|
|
|
|||
|
|
@ -36,6 +36,7 @@ class VectorDBsRoutingTable(CommonRoutingTableImpl, VectorDBs):
|
|||
embedding_dimension: int | None = 384,
|
||||
provider_id: str | None = None,
|
||||
provider_vector_db_id: str | None = None,
|
||||
vector_db_name: str | None = None,
|
||||
) -> VectorDB:
|
||||
if provider_vector_db_id is None:
|
||||
provider_vector_db_id = vector_db_id
|
||||
|
|
@ -62,6 +63,7 @@ class VectorDBsRoutingTable(CommonRoutingTableImpl, VectorDBs):
|
|||
"provider_resource_id": provider_vector_db_id,
|
||||
"embedding_model": embedding_model,
|
||||
"embedding_dimension": model.metadata["embedding_dimension"],
|
||||
"vector_db_name": vector_db_name,
|
||||
}
|
||||
vector_db = TypeAdapter(VectorDBWithOwner).validate_python(vector_db_data)
|
||||
await self.register_object(vector_db)
|
||||
|
|
|
|||
|
|
@ -47,6 +47,7 @@ from llama_stack.distribution.server.routes import (
|
|||
initialize_route_impls,
|
||||
)
|
||||
from llama_stack.distribution.stack import (
|
||||
cast_image_name_to_string,
|
||||
construct_stack,
|
||||
replace_env_vars,
|
||||
validate_env_pair,
|
||||
|
|
@ -439,7 +440,7 @@ def main(args: argparse.Namespace | None = None):
|
|||
logger.error(f"Error: {str(e)}")
|
||||
sys.exit(1)
|
||||
config = replace_env_vars(config_contents)
|
||||
config = StackRunConfig(**config)
|
||||
config = StackRunConfig(**cast_image_name_to_string(config))
|
||||
|
||||
# now that the logger is initialized, print the line about which type of config we are using.
|
||||
logger.info(log_line)
|
||||
|
|
|
|||
|
|
@ -267,6 +267,13 @@ def _convert_string_to_proper_type(value: str) -> Any:
|
|||
return value
|
||||
|
||||
|
||||
def cast_image_name_to_string(config_dict: dict[str, Any]) -> dict[str, Any]:
|
||||
"""Ensure that any value for a key 'image_name' in a config_dict is a string"""
|
||||
if "image_name" in config_dict and config_dict["image_name"] is not None:
|
||||
config_dict["image_name"] = str(config_dict["image_name"])
|
||||
return config_dict
|
||||
|
||||
|
||||
def validate_env_pair(env_pair: str) -> tuple[str, str]:
|
||||
"""Validate and split an environment variable key-value pair."""
|
||||
try:
|
||||
|
|
|
|||
|
|
@ -51,6 +51,9 @@ class LocalfsFilesImpl(Files):
|
|||
},
|
||||
)
|
||||
|
||||
async def shutdown(self) -> None:
|
||||
pass
|
||||
|
||||
def _generate_file_id(self) -> str:
|
||||
"""Generate a unique file ID for OpenAI API."""
|
||||
return f"file-{uuid.uuid4().hex}"
|
||||
|
|
|
|||
|
|
@ -7,6 +7,7 @@
|
|||
import asyncio
|
||||
import json
|
||||
import logging
|
||||
import re
|
||||
import sqlite3
|
||||
import struct
|
||||
from typing import Any
|
||||
|
|
@ -117,6 +118,10 @@ def _rrf_rerank(
|
|||
return rrf_scores
|
||||
|
||||
|
||||
def _make_sql_identifier(name: str) -> str:
|
||||
return re.sub(r"[^a-zA-Z0-9_]", "_", name)
|
||||
|
||||
|
||||
class SQLiteVecIndex(EmbeddingIndex):
|
||||
"""
|
||||
An index implementation that stores embeddings in a SQLite virtual table using sqlite-vec.
|
||||
|
|
@ -130,9 +135,9 @@ class SQLiteVecIndex(EmbeddingIndex):
|
|||
self.dimension = dimension
|
||||
self.db_path = db_path
|
||||
self.bank_id = bank_id
|
||||
self.metadata_table = f"chunks_{bank_id}".replace("-", "_")
|
||||
self.vector_table = f"vec_chunks_{bank_id}".replace("-", "_")
|
||||
self.fts_table = f"fts_chunks_{bank_id}".replace("-", "_")
|
||||
self.metadata_table = _make_sql_identifier(f"chunks_{bank_id}")
|
||||
self.vector_table = _make_sql_identifier(f"vec_chunks_{bank_id}")
|
||||
self.fts_table = _make_sql_identifier(f"fts_chunks_{bank_id}")
|
||||
self.kvstore = kvstore
|
||||
|
||||
@classmethod
|
||||
|
|
@ -148,14 +153,14 @@ class SQLiteVecIndex(EmbeddingIndex):
|
|||
try:
|
||||
# Create the table to store chunk metadata.
|
||||
cur.execute(f"""
|
||||
CREATE TABLE IF NOT EXISTS {self.metadata_table} (
|
||||
CREATE TABLE IF NOT EXISTS [{self.metadata_table}] (
|
||||
id TEXT PRIMARY KEY,
|
||||
chunk TEXT
|
||||
);
|
||||
""")
|
||||
# Create the virtual table for embeddings.
|
||||
cur.execute(f"""
|
||||
CREATE VIRTUAL TABLE IF NOT EXISTS {self.vector_table}
|
||||
CREATE VIRTUAL TABLE IF NOT EXISTS [{self.vector_table}]
|
||||
USING vec0(embedding FLOAT[{self.dimension}], id TEXT);
|
||||
""")
|
||||
connection.commit()
|
||||
|
|
@ -163,7 +168,7 @@ class SQLiteVecIndex(EmbeddingIndex):
|
|||
# based on query. Implementation of the change on client side will allow passing the search_mode option
|
||||
# during initialization to make it easier to create the table that is required.
|
||||
cur.execute(f"""
|
||||
CREATE VIRTUAL TABLE IF NOT EXISTS {self.fts_table}
|
||||
CREATE VIRTUAL TABLE IF NOT EXISTS [{self.fts_table}]
|
||||
USING fts5(id, content);
|
||||
""")
|
||||
connection.commit()
|
||||
|
|
@ -178,9 +183,9 @@ class SQLiteVecIndex(EmbeddingIndex):
|
|||
connection = _create_sqlite_connection(self.db_path)
|
||||
cur = connection.cursor()
|
||||
try:
|
||||
cur.execute(f"DROP TABLE IF EXISTS {self.metadata_table};")
|
||||
cur.execute(f"DROP TABLE IF EXISTS {self.vector_table};")
|
||||
cur.execute(f"DROP TABLE IF EXISTS {self.fts_table};")
|
||||
cur.execute(f"DROP TABLE IF EXISTS [{self.metadata_table}];")
|
||||
cur.execute(f"DROP TABLE IF EXISTS [{self.vector_table}];")
|
||||
cur.execute(f"DROP TABLE IF EXISTS [{self.fts_table}];")
|
||||
connection.commit()
|
||||
finally:
|
||||
cur.close()
|
||||
|
|
@ -212,7 +217,7 @@ class SQLiteVecIndex(EmbeddingIndex):
|
|||
metadata_data = [(chunk.chunk_id, chunk.model_dump_json()) for chunk in batch_chunks]
|
||||
cur.executemany(
|
||||
f"""
|
||||
INSERT INTO {self.metadata_table} (id, chunk)
|
||||
INSERT INTO [{self.metadata_table}] (id, chunk)
|
||||
VALUES (?, ?)
|
||||
ON CONFLICT(id) DO UPDATE SET chunk = excluded.chunk;
|
||||
""",
|
||||
|
|
@ -230,7 +235,7 @@ class SQLiteVecIndex(EmbeddingIndex):
|
|||
for chunk, emb in zip(batch_chunks, batch_embeddings, strict=True)
|
||||
]
|
||||
cur.executemany(
|
||||
f"INSERT INTO {self.vector_table} (id, embedding) VALUES (?, ?);",
|
||||
f"INSERT INTO [{self.vector_table}] (id, embedding) VALUES (?, ?);",
|
||||
embedding_data,
|
||||
)
|
||||
|
||||
|
|
@ -238,13 +243,13 @@ class SQLiteVecIndex(EmbeddingIndex):
|
|||
fts_data = [(chunk.chunk_id, chunk.content) for chunk in batch_chunks]
|
||||
# DELETE existing entries with same IDs (FTS5 doesn't support ON CONFLICT)
|
||||
cur.executemany(
|
||||
f"DELETE FROM {self.fts_table} WHERE id = ?;",
|
||||
f"DELETE FROM [{self.fts_table}] WHERE id = ?;",
|
||||
[(row[0],) for row in fts_data],
|
||||
)
|
||||
|
||||
# INSERT new entries
|
||||
cur.executemany(
|
||||
f"INSERT INTO {self.fts_table} (id, content) VALUES (?, ?);",
|
||||
f"INSERT INTO [{self.fts_table}] (id, content) VALUES (?, ?);",
|
||||
fts_data,
|
||||
)
|
||||
|
||||
|
|
@ -280,8 +285,8 @@ class SQLiteVecIndex(EmbeddingIndex):
|
|||
emb_blob = serialize_vector(emb_list)
|
||||
query_sql = f"""
|
||||
SELECT m.id, m.chunk, v.distance
|
||||
FROM {self.vector_table} AS v
|
||||
JOIN {self.metadata_table} AS m ON m.id = v.id
|
||||
FROM [{self.vector_table}] AS v
|
||||
JOIN [{self.metadata_table}] AS m ON m.id = v.id
|
||||
WHERE v.embedding MATCH ? AND k = ?
|
||||
ORDER BY v.distance;
|
||||
"""
|
||||
|
|
@ -322,9 +327,9 @@ class SQLiteVecIndex(EmbeddingIndex):
|
|||
cur = connection.cursor()
|
||||
try:
|
||||
query_sql = f"""
|
||||
SELECT DISTINCT m.id, m.chunk, bm25({self.fts_table}) AS score
|
||||
FROM {self.fts_table} AS f
|
||||
JOIN {self.metadata_table} AS m ON m.id = f.id
|
||||
SELECT DISTINCT m.id, m.chunk, bm25([{self.fts_table}]) AS score
|
||||
FROM [{self.fts_table}] AS f
|
||||
JOIN [{self.metadata_table}] AS m ON m.id = f.id
|
||||
WHERE f.content MATCH ?
|
||||
ORDER BY score ASC
|
||||
LIMIT ?;
|
||||
|
|
|
|||
|
|
@ -3,16 +3,17 @@
|
|||
#
|
||||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
import logging
|
||||
|
||||
from llama_stack.providers.remote.inference.llama_openai_compat.config import (
|
||||
LlamaCompatConfig,
|
||||
)
|
||||
from llama_stack.providers.utils.inference.litellm_openai_mixin import (
|
||||
LiteLLMOpenAIMixin,
|
||||
)
|
||||
from llama_api_client import AsyncLlamaAPIClient, NotFoundError
|
||||
|
||||
from llama_stack.providers.remote.inference.llama_openai_compat.config import LlamaCompatConfig
|
||||
from llama_stack.providers.utils.inference.litellm_openai_mixin import LiteLLMOpenAIMixin
|
||||
|
||||
from .models import MODEL_ENTRIES
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class LlamaCompatInferenceAdapter(LiteLLMOpenAIMixin):
|
||||
_config: LlamaCompatConfig
|
||||
|
|
@ -27,8 +28,32 @@ class LlamaCompatInferenceAdapter(LiteLLMOpenAIMixin):
|
|||
)
|
||||
self.config = config
|
||||
|
||||
async def check_model_availability(self, model: str) -> bool:
|
||||
"""
|
||||
Check if a specific model is available from Llama API.
|
||||
|
||||
:param model: The model identifier to check.
|
||||
:return: True if the model is available dynamically, False otherwise.
|
||||
"""
|
||||
try:
|
||||
llama_api_client = self._get_llama_api_client()
|
||||
retrieved_model = await llama_api_client.models.retrieve(model)
|
||||
logger.info(f"Model {retrieved_model.id} is available from Llama API")
|
||||
return True
|
||||
|
||||
except NotFoundError:
|
||||
logger.error(f"Model {model} is not available from Llama API")
|
||||
return False
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to check model availability from Llama API: {e}")
|
||||
return False
|
||||
|
||||
async def initialize(self):
|
||||
await super().initialize()
|
||||
|
||||
async def shutdown(self):
|
||||
await super().shutdown()
|
||||
|
||||
def _get_llama_api_client(self) -> AsyncLlamaAPIClient:
|
||||
return AsyncLlamaAPIClient(api_key=self.get_api_key(), base_url=self.config.openai_compat_api_base)
|
||||
|
|
|
|||
|
|
@ -7,7 +7,6 @@
|
|||
import logging
|
||||
import warnings
|
||||
from collections.abc import AsyncIterator
|
||||
from functools import lru_cache
|
||||
from typing import Any
|
||||
|
||||
from openai import APIConnectionError, AsyncOpenAI, BadRequestError
|
||||
|
|
@ -98,41 +97,21 @@ class NVIDIAInferenceAdapter(Inference, ModelRegistryHelper):
|
|||
# If we can't retrieve the model, it's not available
|
||||
return False
|
||||
|
||||
@lru_cache # noqa: B019
|
||||
def _get_client(self, provider_model_id: str | None = None) -> AsyncOpenAI:
|
||||
@property
|
||||
def _client(self) -> AsyncOpenAI:
|
||||
"""
|
||||
For hosted models, https://integrate.api.nvidia.com/v1 is the primary base_url. However,
|
||||
some models are hosted on different URLs. This function returns the appropriate client
|
||||
for the given provider_model_id.
|
||||
Returns an OpenAI client for the configured NVIDIA API endpoint.
|
||||
|
||||
This relies on lru_cache and self._default_client to avoid creating a new client for each request
|
||||
or for each model that is hosted on https://integrate.api.nvidia.com/v1.
|
||||
|
||||
:param provider_model_id: The provider model ID (optional, defaults to primary endpoint)
|
||||
:return: An OpenAI client
|
||||
"""
|
||||
|
||||
@lru_cache # noqa: B019
|
||||
def _get_client_for_base_url(base_url: str) -> AsyncOpenAI:
|
||||
"""
|
||||
Maintain a single OpenAI client per base_url.
|
||||
"""
|
||||
return AsyncOpenAI(
|
||||
base_url=base_url,
|
||||
api_key=(self._config.api_key.get_secret_value() if self._config.api_key else "NO KEY"),
|
||||
timeout=self._config.timeout,
|
||||
)
|
||||
|
||||
special_model_urls = {
|
||||
"meta/llama-3.2-11b-vision-instruct": "https://ai.api.nvidia.com/v1/gr/meta/llama-3.2-11b-vision-instruct",
|
||||
"meta/llama-3.2-90b-vision-instruct": "https://ai.api.nvidia.com/v1/gr/meta/llama-3.2-90b-vision-instruct",
|
||||
}
|
||||
|
||||
base_url = f"{self._config.url}/v1" if self._config.append_api_version else self._config.url
|
||||
|
||||
if provider_model_id and _is_nvidia_hosted(self._config) and provider_model_id in special_model_urls:
|
||||
base_url = special_model_urls[provider_model_id]
|
||||
return _get_client_for_base_url(base_url)
|
||||
return AsyncOpenAI(
|
||||
base_url=base_url,
|
||||
api_key=(self._config.api_key.get_secret_value() if self._config.api_key else "NO KEY"),
|
||||
timeout=self._config.timeout,
|
||||
)
|
||||
|
||||
async def _get_provider_model_id(self, model_id: str) -> str:
|
||||
if not self.model_store:
|
||||
|
|
@ -174,7 +153,7 @@ class NVIDIAInferenceAdapter(Inference, ModelRegistryHelper):
|
|||
)
|
||||
|
||||
try:
|
||||
response = await self._get_client(provider_model_id).completions.create(**request)
|
||||
response = await self._client.completions.create(**request)
|
||||
except APIConnectionError as e:
|
||||
raise ConnectionError(f"Failed to connect to NVIDIA NIM at {self._config.url}: {e}") from e
|
||||
|
||||
|
|
@ -227,7 +206,7 @@ class NVIDIAInferenceAdapter(Inference, ModelRegistryHelper):
|
|||
extra_body["input_type"] = task_type_options[task_type]
|
||||
|
||||
try:
|
||||
response = await self._get_client(provider_model_id).embeddings.create(
|
||||
response = await self._client.embeddings.create(
|
||||
model=provider_model_id,
|
||||
input=input,
|
||||
extra_body=extra_body,
|
||||
|
|
@ -288,7 +267,7 @@ class NVIDIAInferenceAdapter(Inference, ModelRegistryHelper):
|
|||
)
|
||||
|
||||
try:
|
||||
response = await self._get_client(provider_model_id).chat.completions.create(**request)
|
||||
response = await self._client.chat.completions.create(**request)
|
||||
except APIConnectionError as e:
|
||||
raise ConnectionError(f"Failed to connect to NVIDIA NIM at {self._config.url}: {e}") from e
|
||||
|
||||
|
|
@ -344,7 +323,7 @@ class NVIDIAInferenceAdapter(Inference, ModelRegistryHelper):
|
|||
)
|
||||
|
||||
try:
|
||||
return await self._get_client(provider_model_id).completions.create(**params)
|
||||
return await self._client.completions.create(**params)
|
||||
except APIConnectionError as e:
|
||||
raise ConnectionError(f"Failed to connect to NVIDIA NIM at {self._config.url}: {e}") from e
|
||||
|
||||
|
|
@ -403,6 +382,6 @@ class NVIDIAInferenceAdapter(Inference, ModelRegistryHelper):
|
|||
)
|
||||
|
||||
try:
|
||||
return await self._get_client(provider_model_id).chat.completions.create(**params)
|
||||
return await self._client.chat.completions.create(**params)
|
||||
except APIConnectionError as e:
|
||||
raise ConnectionError(f"Failed to connect to NVIDIA NIM at {self._config.url}: {e}") from e
|
||||
|
|
|
|||
|
|
@ -8,7 +8,7 @@ import logging
|
|||
from collections.abc import AsyncIterator
|
||||
from typing import Any
|
||||
|
||||
from openai import AsyncOpenAI
|
||||
from openai import AsyncOpenAI, NotFoundError
|
||||
|
||||
from llama_stack.apis.inference import (
|
||||
OpenAIChatCompletion,
|
||||
|
|
@ -60,6 +60,27 @@ class OpenAIInferenceAdapter(LiteLLMOpenAIMixin):
|
|||
# litellm specific model names, an abstraction leak.
|
||||
self.is_openai_compat = True
|
||||
|
||||
async def check_model_availability(self, model: str) -> bool:
|
||||
"""
|
||||
Check if a specific model is available from OpenAI.
|
||||
|
||||
:param model: The model identifier to check.
|
||||
:return: True if the model is available dynamically, False otherwise.
|
||||
"""
|
||||
try:
|
||||
openai_client = self._get_openai_client()
|
||||
retrieved_model = await openai_client.models.retrieve(model)
|
||||
logger.info(f"Model {retrieved_model.id} is available from OpenAI")
|
||||
return True
|
||||
|
||||
except NotFoundError:
|
||||
logger.error(f"Model {model} is not available from OpenAI")
|
||||
return False
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to check model availability from OpenAI: {e}")
|
||||
return False
|
||||
|
||||
async def initialize(self) -> None:
|
||||
await super().initialize()
|
||||
|
||||
|
|
|
|||
|
|
@ -217,7 +217,6 @@ class ChromaVectorIOAdapter(VectorIO, VectorDBsProtocolPrivate):
|
|||
embedding_model: str | None = None,
|
||||
embedding_dimension: int | None = 384,
|
||||
provider_id: str | None = None,
|
||||
provider_vector_db_id: str | None = None,
|
||||
) -> VectorStoreObject:
|
||||
raise NotImplementedError("OpenAI Vector Stores API is not supported in Chroma")
|
||||
|
||||
|
|
|
|||
|
|
@ -8,7 +8,7 @@ from typing import Any
|
|||
|
||||
from pydantic import BaseModel, ConfigDict, Field
|
||||
|
||||
from llama_stack.providers.utils.kvstore.config import KVStoreConfig
|
||||
from llama_stack.providers.utils.kvstore.config import KVStoreConfig, SqliteKVStoreConfig
|
||||
from llama_stack.schema_utils import json_schema_type
|
||||
|
||||
|
||||
|
|
@ -17,7 +17,7 @@ class MilvusVectorIOConfig(BaseModel):
|
|||
uri: str = Field(description="The URI of the Milvus server")
|
||||
token: str | None = Field(description="The token of the Milvus server")
|
||||
consistency_level: str = Field(description="The consistency level of the Milvus server", default="Strong")
|
||||
kvstore: KVStoreConfig | None = Field(description="Config for KV store backend (SQLite only for now)", default=None)
|
||||
kvstore: KVStoreConfig = Field(description="Config for KV store backend")
|
||||
|
||||
# This configuration allows additional fields to be passed through to the underlying Milvus client.
|
||||
# See the [Milvus](https://milvus.io/docs/install-overview.md) documentation for more details about Milvus in general.
|
||||
|
|
@ -25,4 +25,11 @@ class MilvusVectorIOConfig(BaseModel):
|
|||
|
||||
@classmethod
|
||||
def sample_run_config(cls, __distro_dir__: str, **kwargs: Any) -> dict[str, Any]:
|
||||
return {"uri": "${env.MILVUS_ENDPOINT}", "token": "${env.MILVUS_TOKEN}"}
|
||||
return {
|
||||
"uri": "${env.MILVUS_ENDPOINT}",
|
||||
"token": "${env.MILVUS_TOKEN}",
|
||||
"kvstore": SqliteKVStoreConfig.sample_run_config(
|
||||
__distro_dir__=__distro_dir__,
|
||||
db_name="milvus_remote_registry.db",
|
||||
),
|
||||
}
|
||||
|
|
|
|||
|
|
@ -12,7 +12,7 @@ import re
|
|||
from typing import Any
|
||||
|
||||
from numpy.typing import NDArray
|
||||
from pymilvus import DataType, MilvusClient
|
||||
from pymilvus import DataType, Function, FunctionType, MilvusClient
|
||||
|
||||
from llama_stack.apis.files.files import Files
|
||||
from llama_stack.apis.inference import Inference, InterleavedContent
|
||||
|
|
@ -74,12 +74,66 @@ class MilvusIndex(EmbeddingIndex):
|
|||
assert len(chunks) == len(embeddings), (
|
||||
f"Chunk length {len(chunks)} does not match embedding length {len(embeddings)}"
|
||||
)
|
||||
|
||||
if not await asyncio.to_thread(self.client.has_collection, self.collection_name):
|
||||
logger.info(f"Creating new collection {self.collection_name} with nullable sparse field")
|
||||
# Create schema for vector search
|
||||
schema = self.client.create_schema()
|
||||
schema.add_field(
|
||||
field_name="chunk_id",
|
||||
datatype=DataType.VARCHAR,
|
||||
is_primary=True,
|
||||
max_length=100,
|
||||
)
|
||||
schema.add_field(
|
||||
field_name="content",
|
||||
datatype=DataType.VARCHAR,
|
||||
max_length=65535,
|
||||
enable_analyzer=True, # Enable text analysis for BM25
|
||||
)
|
||||
schema.add_field(
|
||||
field_name="vector",
|
||||
datatype=DataType.FLOAT_VECTOR,
|
||||
dim=len(embeddings[0]),
|
||||
)
|
||||
schema.add_field(
|
||||
field_name="chunk_content",
|
||||
datatype=DataType.JSON,
|
||||
)
|
||||
# Add sparse vector field for BM25 (required by the function)
|
||||
schema.add_field(
|
||||
field_name="sparse",
|
||||
datatype=DataType.SPARSE_FLOAT_VECTOR,
|
||||
)
|
||||
|
||||
# Create indexes
|
||||
index_params = self.client.prepare_index_params()
|
||||
index_params.add_index(
|
||||
field_name="vector",
|
||||
index_type="FLAT",
|
||||
metric_type="COSINE",
|
||||
)
|
||||
# Add index for sparse field (required by BM25 function)
|
||||
index_params.add_index(
|
||||
field_name="sparse",
|
||||
index_type="SPARSE_INVERTED_INDEX",
|
||||
metric_type="BM25",
|
||||
)
|
||||
|
||||
# Add BM25 function for full-text search
|
||||
bm25_function = Function(
|
||||
name="text_bm25_emb",
|
||||
input_field_names=["content"],
|
||||
output_field_names=["sparse"],
|
||||
function_type=FunctionType.BM25,
|
||||
)
|
||||
schema.add_function(bm25_function)
|
||||
|
||||
await asyncio.to_thread(
|
||||
self.client.create_collection,
|
||||
self.collection_name,
|
||||
dimension=len(embeddings[0]),
|
||||
auto_id=True,
|
||||
schema=schema,
|
||||
index_params=index_params,
|
||||
consistency_level=self.consistency_level,
|
||||
)
|
||||
|
||||
|
|
@ -88,8 +142,10 @@ class MilvusIndex(EmbeddingIndex):
|
|||
data.append(
|
||||
{
|
||||
"chunk_id": chunk.chunk_id,
|
||||
"content": chunk.content,
|
||||
"vector": embedding,
|
||||
"chunk_content": chunk.model_dump(),
|
||||
# sparse field will be handled by BM25 function automatically
|
||||
}
|
||||
)
|
||||
try:
|
||||
|
|
@ -107,6 +163,7 @@ class MilvusIndex(EmbeddingIndex):
|
|||
self.client.search,
|
||||
collection_name=self.collection_name,
|
||||
data=[embedding],
|
||||
anns_field="vector",
|
||||
limit=k,
|
||||
output_fields=["*"],
|
||||
search_params={"params": {"radius": score_threshold}},
|
||||
|
|
@ -121,7 +178,64 @@ class MilvusIndex(EmbeddingIndex):
|
|||
k: int,
|
||||
score_threshold: float,
|
||||
) -> QueryChunksResponse:
|
||||
raise NotImplementedError("Keyword search is not supported in Milvus")
|
||||
"""
|
||||
Perform BM25-based keyword search using Milvus's built-in full-text search.
|
||||
"""
|
||||
try:
|
||||
# Use Milvus's built-in BM25 search
|
||||
search_res = await asyncio.to_thread(
|
||||
self.client.search,
|
||||
collection_name=self.collection_name,
|
||||
data=[query_string], # Raw text query
|
||||
anns_field="sparse", # Use sparse field for BM25
|
||||
output_fields=["chunk_content"], # Output the chunk content
|
||||
limit=k,
|
||||
search_params={
|
||||
"params": {
|
||||
"drop_ratio_search": 0.2, # Ignore low-importance terms
|
||||
}
|
||||
},
|
||||
)
|
||||
|
||||
chunks = []
|
||||
scores = []
|
||||
for res in search_res[0]:
|
||||
chunk = Chunk(**res["entity"]["chunk_content"])
|
||||
chunks.append(chunk)
|
||||
scores.append(res["distance"]) # BM25 score from Milvus
|
||||
|
||||
# Filter by score threshold
|
||||
filtered_chunks = [chunk for chunk, score in zip(chunks, scores, strict=False) if score >= score_threshold]
|
||||
filtered_scores = [score for score in scores if score >= score_threshold]
|
||||
|
||||
return QueryChunksResponse(chunks=filtered_chunks, scores=filtered_scores)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error performing BM25 search: {e}")
|
||||
# Fallback to simple text search
|
||||
return await self._fallback_keyword_search(query_string, k, score_threshold)
|
||||
|
||||
async def _fallback_keyword_search(
|
||||
self,
|
||||
query_string: str,
|
||||
k: int,
|
||||
score_threshold: float,
|
||||
) -> QueryChunksResponse:
|
||||
"""
|
||||
Fallback to simple text search when BM25 search is not available.
|
||||
"""
|
||||
# Simple text search using content field
|
||||
search_res = await asyncio.to_thread(
|
||||
self.client.query,
|
||||
collection_name=self.collection_name,
|
||||
filter='content like "%{content}%"',
|
||||
filter_params={"content": query_string},
|
||||
output_fields=["*"],
|
||||
limit=k,
|
||||
)
|
||||
chunks = [Chunk(**res["chunk_content"]) for res in search_res]
|
||||
scores = [1.0] * len(chunks) # Simple binary score for text search
|
||||
return QueryChunksResponse(chunks=chunks, scores=scores)
|
||||
|
||||
async def query_hybrid(
|
||||
self,
|
||||
|
|
@ -247,6 +361,14 @@ class MilvusVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorDBsProtocolP
|
|||
if not index:
|
||||
raise ValueError(f"Vector DB {vector_db_id} not found")
|
||||
|
||||
if params and params.get("mode") == "keyword":
|
||||
# Check if this is inline Milvus (Milvus-Lite)
|
||||
if hasattr(self.config, "db_path"):
|
||||
raise NotImplementedError(
|
||||
"Keyword search is not supported in Milvus-Lite. "
|
||||
"Please use a remote Milvus server for keyword search functionality."
|
||||
)
|
||||
|
||||
return await index.query_chunks(query, params)
|
||||
|
||||
async def _save_openai_vector_store_file(
|
||||
|
|
|
|||
|
|
@ -218,9 +218,6 @@ class PGVectorVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorDBsProtoco
|
|||
async def register_vector_db(self, vector_db: VectorDB) -> None:
|
||||
# Persist vector DB metadata in the KV store
|
||||
assert self.kvstore is not None
|
||||
key = f"{VECTOR_DBS_PREFIX}{vector_db.identifier}"
|
||||
await self.kvstore.set(key=key, value=vector_db.model_dump_json())
|
||||
|
||||
# Upsert model metadata in Postgres
|
||||
upsert_models(self.conn, [(vector_db.identifier, vector_db)])
|
||||
|
||||
|
|
@ -273,16 +270,120 @@ class PGVectorVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorDBsProtoco
|
|||
async def _save_openai_vector_store_file(
|
||||
self, store_id: str, file_id: str, file_info: dict[str, Any], file_contents: list[dict[str, Any]]
|
||||
) -> None:
|
||||
raise NotImplementedError("OpenAI Vector Stores API is not supported in PGVector")
|
||||
"""Save vector store file metadata to Postgres database."""
|
||||
if self.conn is None:
|
||||
raise RuntimeError("PostgreSQL connection is not initialized")
|
||||
try:
|
||||
with self.conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur:
|
||||
cur.execute(
|
||||
"""
|
||||
CREATE TABLE IF NOT EXISTS openai_vector_store_files (
|
||||
store_id TEXT,
|
||||
file_id TEXT,
|
||||
metadata JSONB,
|
||||
PRIMARY KEY (store_id, file_id)
|
||||
)
|
||||
"""
|
||||
)
|
||||
cur.execute(
|
||||
"""
|
||||
CREATE TABLE IF NOT EXISTS openai_vector_store_files_contents (
|
||||
store_id TEXT,
|
||||
file_id TEXT,
|
||||
contents JSONB,
|
||||
PRIMARY KEY (store_id, file_id)
|
||||
)
|
||||
"""
|
||||
)
|
||||
# Insert file metadata
|
||||
files_query = sql.SQL(
|
||||
"""
|
||||
INSERT INTO openai_vector_store_files (store_id, file_id, metadata)
|
||||
VALUES %s
|
||||
ON CONFLICT (store_id, file_id) DO UPDATE SET metadata = EXCLUDED.metadata
|
||||
"""
|
||||
)
|
||||
files_values = [(store_id, file_id, Json(file_info))]
|
||||
execute_values(cur, files_query, files_values, template="(%s, %s, %s)")
|
||||
# Insert file contents
|
||||
contents_query = sql.SQL(
|
||||
"""
|
||||
INSERT INTO openai_vector_store_files_contents (store_id, file_id, contents)
|
||||
VALUES %s
|
||||
ON CONFLICT (store_id, file_id) DO UPDATE SET contents = EXCLUDED.contents
|
||||
"""
|
||||
)
|
||||
contents_values = [(store_id, file_id, Json(file_contents))]
|
||||
execute_values(cur, contents_query, contents_values, template="(%s, %s, %s)")
|
||||
except Exception as e:
|
||||
log.error(f"Error saving openai vector store file {file_id} for store {store_id}: {e}")
|
||||
raise
|
||||
|
||||
async def _load_openai_vector_store_file(self, store_id: str, file_id: str) -> dict[str, Any]:
|
||||
raise NotImplementedError("OpenAI Vector Stores API is not supported in PGVector")
|
||||
"""Load vector store file metadata from Postgres database."""
|
||||
if self.conn is None:
|
||||
raise RuntimeError("PostgreSQL connection is not initialized")
|
||||
try:
|
||||
with self.conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur:
|
||||
cur.execute(
|
||||
"SELECT metadata FROM openai_vector_store_files WHERE store_id = %s AND file_id = %s",
|
||||
(store_id, file_id),
|
||||
)
|
||||
row = cur.fetchone()
|
||||
return row[0] if row and row[0] is not None else {}
|
||||
except Exception as e:
|
||||
log.error(f"Error loading openai vector store file {file_id} for store {store_id}: {e}")
|
||||
return {}
|
||||
|
||||
async def _load_openai_vector_store_file_contents(self, store_id: str, file_id: str) -> list[dict[str, Any]]:
|
||||
raise NotImplementedError("OpenAI Vector Stores API is not supported in PGVector")
|
||||
"""Load vector store file contents from Postgres database."""
|
||||
if self.conn is None:
|
||||
raise RuntimeError("PostgreSQL connection is not initialized")
|
||||
try:
|
||||
with self.conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur:
|
||||
cur.execute(
|
||||
"SELECT contents FROM openai_vector_store_files_contents WHERE store_id = %s AND file_id = %s",
|
||||
(store_id, file_id),
|
||||
)
|
||||
row = cur.fetchone()
|
||||
return row[0] if row and row[0] is not None else []
|
||||
except Exception as e:
|
||||
log.error(f"Error loading openai vector store file contents for {file_id} in store {store_id}: {e}")
|
||||
return []
|
||||
|
||||
async def _update_openai_vector_store_file(self, store_id: str, file_id: str, file_info: dict[str, Any]) -> None:
|
||||
raise NotImplementedError("OpenAI Vector Stores API is not supported in PGVector")
|
||||
"""Update vector store file metadata in Postgres database."""
|
||||
if self.conn is None:
|
||||
raise RuntimeError("PostgreSQL connection is not initialized")
|
||||
try:
|
||||
with self.conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur:
|
||||
query = sql.SQL(
|
||||
"""
|
||||
INSERT INTO openai_vector_store_files (store_id, file_id, metadata)
|
||||
VALUES %s
|
||||
ON CONFLICT (store_id, file_id) DO UPDATE SET metadata = EXCLUDED.metadata
|
||||
"""
|
||||
)
|
||||
values = [(store_id, file_id, Json(file_info))]
|
||||
execute_values(cur, query, values, template="(%s, %s, %s)")
|
||||
except Exception as e:
|
||||
log.error(f"Error updating openai vector store file {file_id} for store {store_id}: {e}")
|
||||
raise
|
||||
|
||||
async def _delete_openai_vector_store_file_from_storage(self, store_id: str, file_id: str) -> None:
|
||||
raise NotImplementedError("OpenAI Vector Stores API is not supported in PGVector")
|
||||
"""Delete vector store file metadata from Postgres database."""
|
||||
if self.conn is None:
|
||||
raise RuntimeError("PostgreSQL connection is not initialized")
|
||||
try:
|
||||
with self.conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur:
|
||||
cur.execute(
|
||||
"DELETE FROM openai_vector_store_files WHERE store_id = %s AND file_id = %s",
|
||||
(store_id, file_id),
|
||||
)
|
||||
cur.execute(
|
||||
"DELETE FROM openai_vector_store_files_contents WHERE store_id = %s AND file_id = %s",
|
||||
(store_id, file_id),
|
||||
)
|
||||
except Exception as e:
|
||||
log.error(f"Error deleting openai vector store file {file_id} for store {store_id}: {e}")
|
||||
raise
|
||||
|
|
|
|||
|
|
@ -214,7 +214,6 @@ class QdrantVectorIOAdapter(VectorIO, VectorDBsProtocolPrivate):
|
|||
embedding_model: str | None = None,
|
||||
embedding_dimension: int | None = 384,
|
||||
provider_id: str | None = None,
|
||||
provider_vector_db_id: str | None = None,
|
||||
) -> VectorStoreObject:
|
||||
raise NotImplementedError("OpenAI Vector Stores API is not supported in Qdrant")
|
||||
|
||||
|
|
|
|||
|
|
@ -13,7 +13,6 @@ from llama_stack.apis.common.content_types import (
|
|||
InterleavedContent,
|
||||
InterleavedContentItem,
|
||||
)
|
||||
from llama_stack.apis.common.errors import UnsupportedModelError
|
||||
from llama_stack.apis.inference import (
|
||||
ChatCompletionRequest,
|
||||
ChatCompletionResponse,
|
||||
|
|
@ -39,7 +38,6 @@ from llama_stack.apis.inference import (
|
|||
ToolDefinition,
|
||||
ToolPromptFormat,
|
||||
)
|
||||
from llama_stack.apis.models import Model
|
||||
from llama_stack.distribution.request_headers import NeedsRequestProviderData
|
||||
from llama_stack.log import get_logger
|
||||
from llama_stack.providers.utils.inference.model_registry import ModelRegistryHelper
|
||||
|
|
@ -90,12 +88,6 @@ class LiteLLMOpenAIMixin(
|
|||
async def shutdown(self):
|
||||
pass
|
||||
|
||||
async def register_model(self, model: Model) -> Model:
|
||||
model_id = self.get_provider_model_id(model.provider_resource_id)
|
||||
if model_id is None:
|
||||
raise UnsupportedModelError(model.provider_resource_id, self.alias_to_provider_id_map.keys())
|
||||
return model
|
||||
|
||||
def get_litellm_model_name(self, model_id: str) -> str:
|
||||
# users may be using openai/ prefix in their model names. the openai/models.py did this by default.
|
||||
# model_id.startswith("openai/") is for backwards compatibility.
|
||||
|
|
|
|||
|
|
@ -172,8 +172,9 @@ class OpenAIVectorStoreMixin(ABC):
|
|||
provider_vector_db_id: str | None = None,
|
||||
) -> VectorStoreObject:
|
||||
"""Creates a vector store."""
|
||||
store_id = name or str(uuid.uuid4())
|
||||
created_at = int(time.time())
|
||||
# Derive the canonical vector_db_id (allow override, else generate)
|
||||
vector_db_id = provider_vector_db_id or f"vs_{uuid.uuid4()}"
|
||||
|
||||
if provider_id is None:
|
||||
raise ValueError("Provider ID is required")
|
||||
|
|
@ -181,19 +182,19 @@ class OpenAIVectorStoreMixin(ABC):
|
|||
if embedding_model is None:
|
||||
raise ValueError("Embedding model is required")
|
||||
|
||||
# Use provided embedding dimension or default to 384
|
||||
# Embedding dimension is required (defaulted to 384 if not provided)
|
||||
if embedding_dimension is None:
|
||||
raise ValueError("Embedding dimension is required")
|
||||
|
||||
provider_vector_db_id = provider_vector_db_id or store_id
|
||||
# Register the VectorDB backing this vector store
|
||||
vector_db = VectorDB(
|
||||
identifier=store_id,
|
||||
identifier=vector_db_id,
|
||||
embedding_dimension=embedding_dimension,
|
||||
embedding_model=embedding_model,
|
||||
provider_id=provider_id,
|
||||
provider_resource_id=provider_vector_db_id,
|
||||
provider_resource_id=vector_db_id,
|
||||
vector_db_name=name,
|
||||
)
|
||||
# Register the vector DB
|
||||
await self.register_vector_db(vector_db)
|
||||
|
||||
# Create OpenAI vector store metadata
|
||||
|
|
@ -207,11 +208,11 @@ class OpenAIVectorStoreMixin(ABC):
|
|||
in_progress=0,
|
||||
total=0,
|
||||
)
|
||||
store_info = {
|
||||
"id": store_id,
|
||||
store_info: dict[str, Any] = {
|
||||
"id": vector_db_id,
|
||||
"object": "vector_store",
|
||||
"created_at": created_at,
|
||||
"name": store_id,
|
||||
"name": name,
|
||||
"usage_bytes": 0,
|
||||
"file_counts": file_counts.model_dump(),
|
||||
"status": status,
|
||||
|
|
@ -231,18 +232,18 @@ class OpenAIVectorStoreMixin(ABC):
|
|||
store_info["metadata"] = metadata
|
||||
|
||||
# Save to persistent storage (provider-specific)
|
||||
await self._save_openai_vector_store(store_id, store_info)
|
||||
await self._save_openai_vector_store(vector_db_id, store_info)
|
||||
|
||||
# Store in memory cache
|
||||
self.openai_vector_stores[store_id] = store_info
|
||||
self.openai_vector_stores[vector_db_id] = store_info
|
||||
|
||||
# Now that our vector store is created, attach any files that were provided
|
||||
file_ids = file_ids or []
|
||||
tasks = [self.openai_attach_file_to_vector_store(store_id, file_id) for file_id in file_ids]
|
||||
tasks = [self.openai_attach_file_to_vector_store(vector_db_id, file_id) for file_id in file_ids]
|
||||
await asyncio.gather(*tasks)
|
||||
|
||||
# Get the updated store info and return it
|
||||
store_info = self.openai_vector_stores[store_id]
|
||||
store_info = self.openai_vector_stores[vector_db_id]
|
||||
return VectorStoreObject.model_validate(store_info)
|
||||
|
||||
async def openai_list_vector_stores(
|
||||
|
|
|
|||
|
|
@ -20,7 +20,7 @@
|
|||
"@radix-ui/react-tooltip": "^1.2.6",
|
||||
"class-variance-authority": "^0.7.1",
|
||||
"clsx": "^2.1.1",
|
||||
"llama-stack-client": "^0.2.14",
|
||||
"llama-stack-client": "^0.2.15",
|
||||
"lucide-react": "^0.510.0",
|
||||
"next": "15.3.3",
|
||||
"next-auth": "^4.24.11",
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue