mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-12-27 17:01:59 +00:00
chore: Enabling Milvus for VectorIO CI
Signed-off-by: Francisco Javier Arceo <farceo@redhat.com>
This commit is contained in:
parent
709eb7da33
commit
c8d41d45ec
115 changed files with 2919 additions and 184 deletions
|
|
@ -141,6 +141,12 @@ Fully-qualified name of the module to import. The module is expected to have:
|
|||
provider_data_validator: str | None = Field(
|
||||
default=None,
|
||||
)
|
||||
description: str | None = Field(
|
||||
default=None,
|
||||
description="""
|
||||
A description of the provider. This is used to display in the documentation.
|
||||
""",
|
||||
)
|
||||
|
||||
|
||||
@json_schema_type
|
||||
|
|
@ -167,6 +173,12 @@ Fully-qualified name of the module to import. The module is expected to have:
|
|||
provider_data_validator: str | None = Field(
|
||||
default=None,
|
||||
)
|
||||
description: str | None = Field(
|
||||
default=None,
|
||||
description="""
|
||||
A description of the provider. This is used to display in the documentation.
|
||||
""",
|
||||
)
|
||||
|
||||
|
||||
class RemoteProviderConfig(BaseModel):
|
||||
|
|
|
|||
|
|
@ -359,3 +359,6 @@ class MetaReferenceAgentsImpl(Agents):
|
|||
return await self.openai_responses_impl.list_openai_response_input_items(
|
||||
response_id, after, before, include, limit, order
|
||||
)
|
||||
|
||||
async def delete_openai_response(self, response_id: str) -> None:
|
||||
return await self.openai_responses_impl.delete_openai_response(response_id)
|
||||
|
|
|
|||
|
|
@ -19,6 +19,7 @@ from llama_stack.apis.agents.openai_responses import (
|
|||
AllowedToolsFilter,
|
||||
ListOpenAIResponseInputItem,
|
||||
ListOpenAIResponseObject,
|
||||
OpenAIDeleteResponseObject,
|
||||
OpenAIResponseInput,
|
||||
OpenAIResponseInputFunctionToolCallOutput,
|
||||
OpenAIResponseInputMessageContent,
|
||||
|
|
@ -574,6 +575,9 @@ class OpenAIResponsesImpl:
|
|||
input=input,
|
||||
)
|
||||
|
||||
async def delete_openai_response(self, response_id: str) -> OpenAIDeleteResponseObject:
|
||||
return await self.responses_store.delete_response_object(response_id)
|
||||
|
||||
async def _convert_response_tools_to_chat_tools(
|
||||
self, tools: list[OpenAIResponseInputTool]
|
||||
) -> tuple[
|
||||
|
|
|
|||
|
|
@ -38,7 +38,7 @@ class TelemetryConfig(BaseModel):
|
|||
description="List of telemetry sinks to enable (possible values: otel, sqlite, console)",
|
||||
)
|
||||
sqlite_db_path: str = Field(
|
||||
default=(RUNTIME_BASE_DIR / "trace_store.db").as_posix(),
|
||||
default_factory=lambda: (RUNTIME_BASE_DIR / "trace_store.db").as_posix(),
|
||||
description="The path to the SQLite database to use for storing traces",
|
||||
)
|
||||
|
||||
|
|
|
|||
|
|
@ -35,5 +35,6 @@ def available_providers() -> list[ProviderSpec]:
|
|||
Api.tool_runtime,
|
||||
Api.tool_groups,
|
||||
],
|
||||
description="Meta's reference implementation of an agent system that can use tools, access vector databases, and perform complex reasoning tasks.",
|
||||
),
|
||||
]
|
||||
|
|
|
|||
|
|
@ -23,6 +23,7 @@ def available_providers() -> list[ProviderSpec]:
|
|||
module="llama_stack.providers.inline.datasetio.localfs",
|
||||
config_class="llama_stack.providers.inline.datasetio.localfs.LocalFSDatasetIOConfig",
|
||||
api_dependencies=[],
|
||||
description="Local filesystem-based dataset I/O provider for reading and writing datasets to local storage.",
|
||||
),
|
||||
remote_provider_spec(
|
||||
api=Api.datasetio,
|
||||
|
|
@ -33,6 +34,7 @@ def available_providers() -> list[ProviderSpec]:
|
|||
],
|
||||
module="llama_stack.providers.remote.datasetio.huggingface",
|
||||
config_class="llama_stack.providers.remote.datasetio.huggingface.HuggingfaceDatasetIOConfig",
|
||||
description="HuggingFace datasets provider for accessing and managing datasets from the HuggingFace Hub.",
|
||||
),
|
||||
),
|
||||
remote_provider_spec(
|
||||
|
|
@ -44,6 +46,7 @@ def available_providers() -> list[ProviderSpec]:
|
|||
],
|
||||
module="llama_stack.providers.remote.datasetio.nvidia",
|
||||
config_class="llama_stack.providers.remote.datasetio.nvidia.NvidiaDatasetIOConfig",
|
||||
description="NVIDIA's dataset I/O provider for accessing datasets from NVIDIA's data platform.",
|
||||
),
|
||||
),
|
||||
]
|
||||
|
|
|
|||
|
|
@ -23,6 +23,7 @@ def available_providers() -> list[ProviderSpec]:
|
|||
Api.inference,
|
||||
Api.agents,
|
||||
],
|
||||
description="Meta's reference implementation of evaluation tasks with support for multiple languages and evaluation metrics.",
|
||||
),
|
||||
remote_provider_spec(
|
||||
api=Api.eval,
|
||||
|
|
@ -33,6 +34,7 @@ def available_providers() -> list[ProviderSpec]:
|
|||
],
|
||||
module="llama_stack.providers.remote.eval.nvidia",
|
||||
config_class="llama_stack.providers.remote.eval.nvidia.NVIDIAEvalConfig",
|
||||
description="NVIDIA's evaluation provider for running evaluation tasks on NVIDIA's platform.",
|
||||
),
|
||||
api_dependencies=[
|
||||
Api.datasetio,
|
||||
|
|
|
|||
|
|
@ -21,5 +21,6 @@ def available_providers() -> list[ProviderSpec]:
|
|||
pip_packages=sql_store_pip_packages,
|
||||
module="llama_stack.providers.inline.files.localfs",
|
||||
config_class="llama_stack.providers.inline.files.localfs.config.LocalfsFilesImplConfig",
|
||||
description="Local filesystem-based file storage provider for managing files and documents locally.",
|
||||
),
|
||||
]
|
||||
|
|
|
|||
|
|
@ -35,6 +35,7 @@ def available_providers() -> list[ProviderSpec]:
|
|||
pip_packages=META_REFERENCE_DEPS,
|
||||
module="llama_stack.providers.inline.inference.meta_reference",
|
||||
config_class="llama_stack.providers.inline.inference.meta_reference.MetaReferenceInferenceConfig",
|
||||
description="Meta's reference implementation of inference with support for various model formats and optimization techniques.",
|
||||
),
|
||||
InlineProviderSpec(
|
||||
api=Api.inference,
|
||||
|
|
@ -44,6 +45,7 @@ def available_providers() -> list[ProviderSpec]:
|
|||
],
|
||||
module="llama_stack.providers.inline.inference.vllm",
|
||||
config_class="llama_stack.providers.inline.inference.vllm.VLLMConfig",
|
||||
description="vLLM inference provider for high-performance model serving with PagedAttention and continuous batching.",
|
||||
),
|
||||
InlineProviderSpec(
|
||||
api=Api.inference,
|
||||
|
|
@ -54,6 +56,7 @@ def available_providers() -> list[ProviderSpec]:
|
|||
],
|
||||
module="llama_stack.providers.inline.inference.sentence_transformers",
|
||||
config_class="llama_stack.providers.inline.inference.sentence_transformers.config.SentenceTransformersInferenceConfig",
|
||||
description="Sentence Transformers inference provider for text embeddings and similarity search.",
|
||||
),
|
||||
remote_provider_spec(
|
||||
api=Api.inference,
|
||||
|
|
@ -64,6 +67,7 @@ def available_providers() -> list[ProviderSpec]:
|
|||
],
|
||||
module="llama_stack.providers.remote.inference.cerebras",
|
||||
config_class="llama_stack.providers.remote.inference.cerebras.CerebrasImplConfig",
|
||||
description="Cerebras inference provider for running models on Cerebras Cloud platform.",
|
||||
),
|
||||
),
|
||||
remote_provider_spec(
|
||||
|
|
@ -73,6 +77,7 @@ def available_providers() -> list[ProviderSpec]:
|
|||
pip_packages=["ollama", "aiohttp", "h11>=0.16.0"],
|
||||
config_class="llama_stack.providers.remote.inference.ollama.OllamaImplConfig",
|
||||
module="llama_stack.providers.remote.inference.ollama",
|
||||
description="Ollama inference provider for running local models through the Ollama runtime.",
|
||||
),
|
||||
),
|
||||
remote_provider_spec(
|
||||
|
|
@ -82,6 +87,7 @@ def available_providers() -> list[ProviderSpec]:
|
|||
pip_packages=["openai"],
|
||||
module="llama_stack.providers.remote.inference.vllm",
|
||||
config_class="llama_stack.providers.remote.inference.vllm.VLLMInferenceAdapterConfig",
|
||||
description="Remote vLLM inference provider for connecting to vLLM servers.",
|
||||
),
|
||||
),
|
||||
remote_provider_spec(
|
||||
|
|
@ -91,6 +97,7 @@ def available_providers() -> list[ProviderSpec]:
|
|||
pip_packages=["huggingface_hub", "aiohttp"],
|
||||
module="llama_stack.providers.remote.inference.tgi",
|
||||
config_class="llama_stack.providers.remote.inference.tgi.TGIImplConfig",
|
||||
description="Text Generation Inference (TGI) provider for HuggingFace model serving.",
|
||||
),
|
||||
),
|
||||
remote_provider_spec(
|
||||
|
|
@ -100,6 +107,7 @@ def available_providers() -> list[ProviderSpec]:
|
|||
pip_packages=["huggingface_hub", "aiohttp"],
|
||||
module="llama_stack.providers.remote.inference.tgi",
|
||||
config_class="llama_stack.providers.remote.inference.tgi.InferenceAPIImplConfig",
|
||||
description="HuggingFace Inference API serverless provider for on-demand model inference.",
|
||||
),
|
||||
),
|
||||
remote_provider_spec(
|
||||
|
|
@ -109,6 +117,7 @@ def available_providers() -> list[ProviderSpec]:
|
|||
pip_packages=["huggingface_hub", "aiohttp"],
|
||||
module="llama_stack.providers.remote.inference.tgi",
|
||||
config_class="llama_stack.providers.remote.inference.tgi.InferenceEndpointImplConfig",
|
||||
description="HuggingFace Inference Endpoints provider for dedicated model serving.",
|
||||
),
|
||||
),
|
||||
remote_provider_spec(
|
||||
|
|
@ -121,6 +130,7 @@ def available_providers() -> list[ProviderSpec]:
|
|||
module="llama_stack.providers.remote.inference.fireworks",
|
||||
config_class="llama_stack.providers.remote.inference.fireworks.FireworksImplConfig",
|
||||
provider_data_validator="llama_stack.providers.remote.inference.fireworks.FireworksProviderDataValidator",
|
||||
description="Fireworks AI inference provider for Llama models and other AI models on the Fireworks platform.",
|
||||
),
|
||||
),
|
||||
remote_provider_spec(
|
||||
|
|
@ -133,6 +143,7 @@ def available_providers() -> list[ProviderSpec]:
|
|||
module="llama_stack.providers.remote.inference.together",
|
||||
config_class="llama_stack.providers.remote.inference.together.TogetherImplConfig",
|
||||
provider_data_validator="llama_stack.providers.remote.inference.together.TogetherProviderDataValidator",
|
||||
description="Together AI inference provider for open-source models and collaborative AI development.",
|
||||
),
|
||||
),
|
||||
remote_provider_spec(
|
||||
|
|
@ -142,6 +153,7 @@ def available_providers() -> list[ProviderSpec]:
|
|||
pip_packages=["boto3"],
|
||||
module="llama_stack.providers.remote.inference.bedrock",
|
||||
config_class="llama_stack.providers.remote.inference.bedrock.BedrockConfig",
|
||||
description="AWS Bedrock inference provider for accessing various AI models through AWS's managed service.",
|
||||
),
|
||||
),
|
||||
remote_provider_spec(
|
||||
|
|
@ -153,6 +165,7 @@ def available_providers() -> list[ProviderSpec]:
|
|||
],
|
||||
module="llama_stack.providers.remote.inference.databricks",
|
||||
config_class="llama_stack.providers.remote.inference.databricks.DatabricksImplConfig",
|
||||
description="Databricks inference provider for running models on Databricks' unified analytics platform.",
|
||||
),
|
||||
),
|
||||
remote_provider_spec(
|
||||
|
|
@ -164,6 +177,7 @@ def available_providers() -> list[ProviderSpec]:
|
|||
],
|
||||
module="llama_stack.providers.remote.inference.nvidia",
|
||||
config_class="llama_stack.providers.remote.inference.nvidia.NVIDIAConfig",
|
||||
description="NVIDIA inference provider for accessing NVIDIA NIM models and AI services.",
|
||||
),
|
||||
),
|
||||
remote_provider_spec(
|
||||
|
|
@ -173,6 +187,7 @@ def available_providers() -> list[ProviderSpec]:
|
|||
pip_packages=["openai"],
|
||||
module="llama_stack.providers.remote.inference.runpod",
|
||||
config_class="llama_stack.providers.remote.inference.runpod.RunpodImplConfig",
|
||||
description="RunPod inference provider for running models on RunPod's cloud GPU platform.",
|
||||
),
|
||||
),
|
||||
remote_provider_spec(
|
||||
|
|
@ -183,6 +198,7 @@ def available_providers() -> list[ProviderSpec]:
|
|||
module="llama_stack.providers.remote.inference.openai",
|
||||
config_class="llama_stack.providers.remote.inference.openai.OpenAIConfig",
|
||||
provider_data_validator="llama_stack.providers.remote.inference.openai.config.OpenAIProviderDataValidator",
|
||||
description="OpenAI inference provider for accessing GPT models and other OpenAI services.",
|
||||
),
|
||||
),
|
||||
remote_provider_spec(
|
||||
|
|
@ -193,6 +209,7 @@ def available_providers() -> list[ProviderSpec]:
|
|||
module="llama_stack.providers.remote.inference.anthropic",
|
||||
config_class="llama_stack.providers.remote.inference.anthropic.AnthropicConfig",
|
||||
provider_data_validator="llama_stack.providers.remote.inference.anthropic.config.AnthropicProviderDataValidator",
|
||||
description="Anthropic inference provider for accessing Claude models and Anthropic's AI services.",
|
||||
),
|
||||
),
|
||||
remote_provider_spec(
|
||||
|
|
@ -203,6 +220,7 @@ def available_providers() -> list[ProviderSpec]:
|
|||
module="llama_stack.providers.remote.inference.gemini",
|
||||
config_class="llama_stack.providers.remote.inference.gemini.GeminiConfig",
|
||||
provider_data_validator="llama_stack.providers.remote.inference.gemini.config.GeminiProviderDataValidator",
|
||||
description="Google Gemini inference provider for accessing Gemini models and Google's AI services.",
|
||||
),
|
||||
),
|
||||
remote_provider_spec(
|
||||
|
|
@ -213,6 +231,7 @@ def available_providers() -> list[ProviderSpec]:
|
|||
module="llama_stack.providers.remote.inference.groq",
|
||||
config_class="llama_stack.providers.remote.inference.groq.GroqConfig",
|
||||
provider_data_validator="llama_stack.providers.remote.inference.groq.config.GroqProviderDataValidator",
|
||||
description="Groq inference provider for ultra-fast inference using Groq's LPU technology.",
|
||||
),
|
||||
),
|
||||
remote_provider_spec(
|
||||
|
|
@ -223,6 +242,7 @@ def available_providers() -> list[ProviderSpec]:
|
|||
module="llama_stack.providers.remote.inference.fireworks_openai_compat",
|
||||
config_class="llama_stack.providers.remote.inference.fireworks_openai_compat.config.FireworksCompatConfig",
|
||||
provider_data_validator="llama_stack.providers.remote.inference.fireworks_openai_compat.config.FireworksProviderDataValidator",
|
||||
description="Fireworks AI OpenAI-compatible provider for using Fireworks models with OpenAI API format.",
|
||||
),
|
||||
),
|
||||
remote_provider_spec(
|
||||
|
|
@ -233,6 +253,7 @@ def available_providers() -> list[ProviderSpec]:
|
|||
module="llama_stack.providers.remote.inference.llama_openai_compat",
|
||||
config_class="llama_stack.providers.remote.inference.llama_openai_compat.config.LlamaCompatConfig",
|
||||
provider_data_validator="llama_stack.providers.remote.inference.llama_openai_compat.config.LlamaProviderDataValidator",
|
||||
description="Llama OpenAI-compatible provider for using Llama models with OpenAI API format.",
|
||||
),
|
||||
),
|
||||
remote_provider_spec(
|
||||
|
|
@ -243,6 +264,7 @@ def available_providers() -> list[ProviderSpec]:
|
|||
module="llama_stack.providers.remote.inference.together_openai_compat",
|
||||
config_class="llama_stack.providers.remote.inference.together_openai_compat.config.TogetherCompatConfig",
|
||||
provider_data_validator="llama_stack.providers.remote.inference.together_openai_compat.config.TogetherProviderDataValidator",
|
||||
description="Together AI OpenAI-compatible provider for using Together models with OpenAI API format.",
|
||||
),
|
||||
),
|
||||
remote_provider_spec(
|
||||
|
|
@ -253,6 +275,7 @@ def available_providers() -> list[ProviderSpec]:
|
|||
module="llama_stack.providers.remote.inference.groq_openai_compat",
|
||||
config_class="llama_stack.providers.remote.inference.groq_openai_compat.config.GroqCompatConfig",
|
||||
provider_data_validator="llama_stack.providers.remote.inference.groq_openai_compat.config.GroqProviderDataValidator",
|
||||
description="Groq OpenAI-compatible provider for using Groq models with OpenAI API format.",
|
||||
),
|
||||
),
|
||||
remote_provider_spec(
|
||||
|
|
@ -263,6 +286,7 @@ def available_providers() -> list[ProviderSpec]:
|
|||
module="llama_stack.providers.remote.inference.sambanova_openai_compat",
|
||||
config_class="llama_stack.providers.remote.inference.sambanova_openai_compat.config.SambaNovaCompatConfig",
|
||||
provider_data_validator="llama_stack.providers.remote.inference.sambanova_openai_compat.config.SambaNovaProviderDataValidator",
|
||||
description="SambaNova OpenAI-compatible provider for using SambaNova models with OpenAI API format.",
|
||||
),
|
||||
),
|
||||
remote_provider_spec(
|
||||
|
|
@ -273,6 +297,7 @@ def available_providers() -> list[ProviderSpec]:
|
|||
module="llama_stack.providers.remote.inference.cerebras_openai_compat",
|
||||
config_class="llama_stack.providers.remote.inference.cerebras_openai_compat.config.CerebrasCompatConfig",
|
||||
provider_data_validator="llama_stack.providers.remote.inference.cerebras_openai_compat.config.CerebrasProviderDataValidator",
|
||||
description="Cerebras OpenAI-compatible provider for using Cerebras models with OpenAI API format.",
|
||||
),
|
||||
),
|
||||
remote_provider_spec(
|
||||
|
|
@ -283,6 +308,7 @@ def available_providers() -> list[ProviderSpec]:
|
|||
module="llama_stack.providers.remote.inference.sambanova",
|
||||
config_class="llama_stack.providers.remote.inference.sambanova.SambaNovaImplConfig",
|
||||
provider_data_validator="llama_stack.providers.remote.inference.sambanova.config.SambaNovaProviderDataValidator",
|
||||
description="SambaNova inference provider for running models on SambaNova's dataflow architecture.",
|
||||
),
|
||||
),
|
||||
remote_provider_spec(
|
||||
|
|
@ -293,6 +319,7 @@ def available_providers() -> list[ProviderSpec]:
|
|||
module="llama_stack.providers.remote.inference.passthrough",
|
||||
config_class="llama_stack.providers.remote.inference.passthrough.PassthroughImplConfig",
|
||||
provider_data_validator="llama_stack.providers.remote.inference.passthrough.PassthroughProviderDataValidator",
|
||||
description="Passthrough inference provider for connecting to any external inference service not directly supported.",
|
||||
),
|
||||
),
|
||||
remote_provider_spec(
|
||||
|
|
@ -303,6 +330,7 @@ def available_providers() -> list[ProviderSpec]:
|
|||
module="llama_stack.providers.remote.inference.watsonx",
|
||||
config_class="llama_stack.providers.remote.inference.watsonx.WatsonXConfig",
|
||||
provider_data_validator="llama_stack.providers.remote.inference.watsonx.WatsonXProviderDataValidator",
|
||||
description="IBM WatsonX inference provider for accessing AI models on IBM's WatsonX platform.",
|
||||
),
|
||||
),
|
||||
]
|
||||
|
|
|
|||
|
|
@ -20,6 +20,7 @@ def available_providers() -> list[ProviderSpec]:
|
|||
Api.datasetio,
|
||||
Api.datasets,
|
||||
],
|
||||
description="TorchTune-based post-training provider for fine-tuning and optimizing models using Meta's TorchTune framework.",
|
||||
),
|
||||
InlineProviderSpec(
|
||||
api=Api.post_training,
|
||||
|
|
@ -31,6 +32,7 @@ def available_providers() -> list[ProviderSpec]:
|
|||
Api.datasetio,
|
||||
Api.datasets,
|
||||
],
|
||||
description="HuggingFace-based post-training provider for fine-tuning models using the HuggingFace ecosystem.",
|
||||
),
|
||||
remote_provider_spec(
|
||||
api=Api.post_training,
|
||||
|
|
@ -39,6 +41,7 @@ def available_providers() -> list[ProviderSpec]:
|
|||
pip_packages=["requests", "aiohttp"],
|
||||
module="llama_stack.providers.remote.post_training.nvidia",
|
||||
config_class="llama_stack.providers.remote.post_training.nvidia.NvidiaPostTrainingConfig",
|
||||
description="NVIDIA's post-training provider for fine-tuning models on NVIDIA's platform.",
|
||||
),
|
||||
),
|
||||
]
|
||||
|
|
|
|||
|
|
@ -25,6 +25,7 @@ def available_providers() -> list[ProviderSpec]:
|
|||
],
|
||||
module="llama_stack.providers.inline.safety.prompt_guard",
|
||||
config_class="llama_stack.providers.inline.safety.prompt_guard.PromptGuardConfig",
|
||||
description="Prompt Guard safety provider for detecting and filtering unsafe prompts and content.",
|
||||
),
|
||||
InlineProviderSpec(
|
||||
api=Api.safety,
|
||||
|
|
@ -35,6 +36,7 @@ def available_providers() -> list[ProviderSpec]:
|
|||
api_dependencies=[
|
||||
Api.inference,
|
||||
],
|
||||
description="Llama Guard safety provider for content moderation and safety filtering using Meta's Llama Guard model.",
|
||||
),
|
||||
InlineProviderSpec(
|
||||
api=Api.safety,
|
||||
|
|
@ -44,6 +46,7 @@ def available_providers() -> list[ProviderSpec]:
|
|||
],
|
||||
module="llama_stack.providers.inline.safety.code_scanner",
|
||||
config_class="llama_stack.providers.inline.safety.code_scanner.CodeScannerConfig",
|
||||
description="Code Scanner safety provider for detecting security vulnerabilities and unsafe code patterns.",
|
||||
),
|
||||
remote_provider_spec(
|
||||
api=Api.safety,
|
||||
|
|
@ -52,6 +55,7 @@ def available_providers() -> list[ProviderSpec]:
|
|||
pip_packages=["boto3"],
|
||||
module="llama_stack.providers.remote.safety.bedrock",
|
||||
config_class="llama_stack.providers.remote.safety.bedrock.BedrockSafetyConfig",
|
||||
description="AWS Bedrock safety provider for content moderation using AWS's safety services.",
|
||||
),
|
||||
),
|
||||
remote_provider_spec(
|
||||
|
|
@ -61,6 +65,7 @@ def available_providers() -> list[ProviderSpec]:
|
|||
pip_packages=["requests"],
|
||||
module="llama_stack.providers.remote.safety.nvidia",
|
||||
config_class="llama_stack.providers.remote.safety.nvidia.NVIDIASafetyConfig",
|
||||
description="NVIDIA's safety provider for content moderation and safety filtering.",
|
||||
),
|
||||
),
|
||||
remote_provider_spec(
|
||||
|
|
@ -71,6 +76,7 @@ def available_providers() -> list[ProviderSpec]:
|
|||
module="llama_stack.providers.remote.safety.sambanova",
|
||||
config_class="llama_stack.providers.remote.safety.sambanova.SambaNovaSafetyConfig",
|
||||
provider_data_validator="llama_stack.providers.remote.safety.sambanova.config.SambaNovaProviderDataValidator",
|
||||
description="SambaNova's safety provider for content moderation and safety filtering.",
|
||||
),
|
||||
),
|
||||
]
|
||||
|
|
|
|||
|
|
@ -20,6 +20,7 @@ def available_providers() -> list[ProviderSpec]:
|
|||
Api.datasetio,
|
||||
Api.datasets,
|
||||
],
|
||||
description="Basic scoring provider for simple evaluation metrics and scoring functions.",
|
||||
),
|
||||
InlineProviderSpec(
|
||||
api=Api.scoring,
|
||||
|
|
@ -32,6 +33,7 @@ def available_providers() -> list[ProviderSpec]:
|
|||
Api.datasets,
|
||||
Api.inference,
|
||||
],
|
||||
description="LLM-as-judge scoring provider that uses language models to evaluate and score responses.",
|
||||
),
|
||||
InlineProviderSpec(
|
||||
api=Api.scoring,
|
||||
|
|
@ -44,5 +46,6 @@ def available_providers() -> list[ProviderSpec]:
|
|||
Api.datasets,
|
||||
],
|
||||
provider_data_validator="llama_stack.providers.inline.scoring.braintrust.BraintrustProviderDataValidator",
|
||||
description="Braintrust scoring provider for evaluation and scoring using the Braintrust platform.",
|
||||
),
|
||||
]
|
||||
|
|
|
|||
|
|
@ -24,5 +24,6 @@ def available_providers() -> list[ProviderSpec]:
|
|||
optional_api_dependencies=[Api.datasetio],
|
||||
module="llama_stack.providers.inline.telemetry.meta_reference",
|
||||
config_class="llama_stack.providers.inline.telemetry.meta_reference.config.TelemetryConfig",
|
||||
description="Meta's reference implementation of telemetry and observability using OpenTelemetry.",
|
||||
),
|
||||
]
|
||||
|
|
|
|||
|
|
@ -33,6 +33,7 @@ def available_providers() -> list[ProviderSpec]:
|
|||
module="llama_stack.providers.inline.tool_runtime.rag",
|
||||
config_class="llama_stack.providers.inline.tool_runtime.rag.config.RagToolRuntimeConfig",
|
||||
api_dependencies=[Api.vector_io, Api.inference],
|
||||
description="RAG (Retrieval-Augmented Generation) tool runtime for document ingestion, chunking, and semantic search.",
|
||||
),
|
||||
remote_provider_spec(
|
||||
api=Api.tool_runtime,
|
||||
|
|
@ -42,6 +43,7 @@ def available_providers() -> list[ProviderSpec]:
|
|||
config_class="llama_stack.providers.remote.tool_runtime.brave_search.config.BraveSearchToolConfig",
|
||||
pip_packages=["requests"],
|
||||
provider_data_validator="llama_stack.providers.remote.tool_runtime.brave_search.BraveSearchToolProviderDataValidator",
|
||||
description="Brave Search tool for web search capabilities with privacy-focused results.",
|
||||
),
|
||||
),
|
||||
remote_provider_spec(
|
||||
|
|
@ -52,6 +54,7 @@ def available_providers() -> list[ProviderSpec]:
|
|||
config_class="llama_stack.providers.remote.tool_runtime.bing_search.config.BingSearchToolConfig",
|
||||
pip_packages=["requests"],
|
||||
provider_data_validator="llama_stack.providers.remote.tool_runtime.bing_search.BingSearchToolProviderDataValidator",
|
||||
description="Bing Search tool for web search capabilities using Microsoft's search engine.",
|
||||
),
|
||||
),
|
||||
remote_provider_spec(
|
||||
|
|
@ -62,6 +65,7 @@ def available_providers() -> list[ProviderSpec]:
|
|||
config_class="llama_stack.providers.remote.tool_runtime.tavily_search.config.TavilySearchToolConfig",
|
||||
pip_packages=["requests"],
|
||||
provider_data_validator="llama_stack.providers.remote.tool_runtime.tavily_search.TavilySearchToolProviderDataValidator",
|
||||
description="Tavily Search tool for AI-optimized web search with structured results.",
|
||||
),
|
||||
),
|
||||
remote_provider_spec(
|
||||
|
|
@ -72,6 +76,7 @@ def available_providers() -> list[ProviderSpec]:
|
|||
config_class="llama_stack.providers.remote.tool_runtime.wolfram_alpha.config.WolframAlphaToolConfig",
|
||||
pip_packages=["requests"],
|
||||
provider_data_validator="llama_stack.providers.remote.tool_runtime.wolfram_alpha.WolframAlphaToolProviderDataValidator",
|
||||
description="Wolfram Alpha tool for computational knowledge and mathematical calculations.",
|
||||
),
|
||||
),
|
||||
remote_provider_spec(
|
||||
|
|
@ -82,6 +87,7 @@ def available_providers() -> list[ProviderSpec]:
|
|||
config_class="llama_stack.providers.remote.tool_runtime.model_context_protocol.config.MCPProviderConfig",
|
||||
pip_packages=["mcp"],
|
||||
provider_data_validator="llama_stack.providers.remote.tool_runtime.model_context_protocol.config.MCPProviderDataValidator",
|
||||
description="Model Context Protocol (MCP) tool for standardized tool calling and context management.",
|
||||
),
|
||||
),
|
||||
]
|
||||
|
|
|
|||
|
|
@ -25,6 +25,7 @@ def available_providers() -> list[ProviderSpec]:
|
|||
deprecation_warning="Please use the `inline::faiss` provider instead.",
|
||||
api_dependencies=[Api.inference],
|
||||
optional_api_dependencies=[Api.files],
|
||||
description="Meta's reference implementation of a vector database.",
|
||||
),
|
||||
InlineProviderSpec(
|
||||
api=Api.vector_io,
|
||||
|
|
@ -34,6 +35,36 @@ def available_providers() -> list[ProviderSpec]:
|
|||
config_class="llama_stack.providers.inline.vector_io.faiss.FaissVectorIOConfig",
|
||||
api_dependencies=[Api.inference],
|
||||
optional_api_dependencies=[Api.files],
|
||||
description="""
|
||||
[Faiss](https://github.com/facebookresearch/faiss) is an inline vector database provider for Llama Stack. It
|
||||
allows you to store and query vectors directly in memory.
|
||||
That means you'll get fast and efficient vector retrieval.
|
||||
|
||||
## Features
|
||||
|
||||
- Lightweight and easy to use
|
||||
- Fully integrated with Llama Stack
|
||||
- GPU support
|
||||
|
||||
## Usage
|
||||
|
||||
To use Faiss in your Llama Stack project, follow these steps:
|
||||
|
||||
1. Install the necessary dependencies.
|
||||
2. Configure your Llama Stack project to use Faiss.
|
||||
3. Start storing and querying vectors.
|
||||
|
||||
## Installation
|
||||
|
||||
You can install Faiss using pip:
|
||||
|
||||
```bash
|
||||
pip install faiss-cpu
|
||||
```
|
||||
## Documentation
|
||||
See [Faiss' documentation](https://faiss.ai/) or the [Faiss Wiki](https://github.com/facebookresearch/faiss/wiki) for
|
||||
more details about Faiss in general.
|
||||
""",
|
||||
),
|
||||
# NOTE: sqlite-vec cannot be bundled into the container image because it does not have a
|
||||
# source distribution and the wheels are not available for all platforms.
|
||||
|
|
@ -45,6 +76,204 @@ def available_providers() -> list[ProviderSpec]:
|
|||
config_class="llama_stack.providers.inline.vector_io.sqlite_vec.SQLiteVectorIOConfig",
|
||||
api_dependencies=[Api.inference],
|
||||
optional_api_dependencies=[Api.files],
|
||||
description="""
|
||||
[SQLite-Vec](https://github.com/asg017/sqlite-vec) is an inline vector database provider for Llama Stack. It
|
||||
allows you to store and query vectors directly within an SQLite database.
|
||||
That means you're not limited to storing vectors in memory or in a separate service.
|
||||
|
||||
## Features
|
||||
|
||||
- Lightweight and easy to use
|
||||
- Fully integrated with Llama Stacks
|
||||
- Uses disk-based storage for persistence, allowing for larger vector storage
|
||||
|
||||
### Comparison to Faiss
|
||||
|
||||
The choice between Faiss and sqlite-vec should be made based on the needs of your application,
|
||||
as they have different strengths.
|
||||
|
||||
#### Choosing the Right Provider
|
||||
|
||||
Scenario | Recommended Tool | Reason
|
||||
-- |-----------------| --
|
||||
Online Analytical Processing (OLAP) | Faiss | Fast, in-memory searches
|
||||
Online Transaction Processing (OLTP) | sqlite-vec | Frequent writes and reads
|
||||
Frequent writes | sqlite-vec | Efficient disk-based storage and incremental indexing
|
||||
Large datasets | sqlite-vec | Disk-based storage for larger vector storage
|
||||
Datasets that can fit in memory, frequent reads | Faiss | Optimized for speed, indexing, and GPU acceleration
|
||||
|
||||
#### Empirical Example
|
||||
|
||||
Consider the histogram below in which 10,000 randomly generated strings were inserted
|
||||
in batches of 100 into both Faiss and sqlite-vec using `client.tool_runtime.rag_tool.insert()`.
|
||||
|
||||
```{image} ../../../../_static/providers/vector_io/write_time_comparison_sqlite-vec-faiss.png
|
||||
:alt: Comparison of SQLite-Vec and Faiss write times
|
||||
:width: 400px
|
||||
```
|
||||
|
||||
You will notice that the average write time for `sqlite-vec` was 788ms, compared to
|
||||
47,640ms for Faiss. While the number is jarring, if you look at the distribution, you can see that it is rather
|
||||
uniformly spread across the [1500, 100000] interval.
|
||||
|
||||
Looking at each individual write in the order that the documents are inserted you'll see the increase in
|
||||
write speed as Faiss reindexes the vectors after each write.
|
||||
```{image} ../../../../_static/providers/vector_io/write_time_sequence_sqlite-vec-faiss.png
|
||||
:alt: Comparison of SQLite-Vec and Faiss write times
|
||||
:width: 400px
|
||||
```
|
||||
|
||||
In comparison, the read times for Faiss was on average 10% faster than sqlite-vec.
|
||||
The modes of the two distributions highlight the differences much further where Faiss
|
||||
will likely yield faster read performance.
|
||||
|
||||
```{image} ../../../../_static/providers/vector_io/read_time_comparison_sqlite-vec-faiss.png
|
||||
:alt: Comparison of SQLite-Vec and Faiss read times
|
||||
:width: 400px
|
||||
```
|
||||
|
||||
## Usage
|
||||
|
||||
To use sqlite-vec in your Llama Stack project, follow these steps:
|
||||
|
||||
1. Install the necessary dependencies.
|
||||
2. Configure your Llama Stack project to use SQLite-Vec.
|
||||
3. Start storing and querying vectors.
|
||||
|
||||
The SQLite-vec provider supports three search modes:
|
||||
|
||||
1. **Vector Search** (`mode="vector"`): Performs pure vector similarity search using the embeddings.
|
||||
2. **Keyword Search** (`mode="keyword"`): Performs full-text search using SQLite's FTS5.
|
||||
3. **Hybrid Search** (`mode="hybrid"`): Combines both vector and keyword search for better results. First performs keyword search to get candidate matches, then applies vector similarity search on those candidates.
|
||||
|
||||
Example with hybrid search:
|
||||
```python
|
||||
response = await vector_io.query_chunks(
|
||||
vector_db_id="my_db",
|
||||
query="your query here",
|
||||
params={"mode": "hybrid", "max_chunks": 3, "score_threshold": 0.7},
|
||||
)
|
||||
|
||||
# Using RRF ranker
|
||||
response = await vector_io.query_chunks(
|
||||
vector_db_id="my_db",
|
||||
query="your query here",
|
||||
params={
|
||||
"mode": "hybrid",
|
||||
"max_chunks": 3,
|
||||
"score_threshold": 0.7,
|
||||
"ranker": {"type": "rrf", "impact_factor": 60.0},
|
||||
},
|
||||
)
|
||||
|
||||
# Using weighted ranker
|
||||
response = await vector_io.query_chunks(
|
||||
vector_db_id="my_db",
|
||||
query="your query here",
|
||||
params={
|
||||
"mode": "hybrid",
|
||||
"max_chunks": 3,
|
||||
"score_threshold": 0.7,
|
||||
"ranker": {"type": "weighted", "alpha": 0.7}, # 70% vector, 30% keyword
|
||||
},
|
||||
)
|
||||
```
|
||||
|
||||
Example with explicit vector search:
|
||||
```python
|
||||
response = await vector_io.query_chunks(
|
||||
vector_db_id="my_db",
|
||||
query="your query here",
|
||||
params={"mode": "vector", "max_chunks": 3, "score_threshold": 0.7},
|
||||
)
|
||||
```
|
||||
|
||||
Example with keyword search:
|
||||
```python
|
||||
response = await vector_io.query_chunks(
|
||||
vector_db_id="my_db",
|
||||
query="your query here",
|
||||
params={"mode": "keyword", "max_chunks": 3, "score_threshold": 0.7},
|
||||
)
|
||||
```
|
||||
|
||||
## Supported Search Modes
|
||||
|
||||
The SQLite vector store supports three search modes:
|
||||
|
||||
1. **Vector Search** (`mode="vector"`): Uses vector similarity to find relevant chunks
|
||||
2. **Keyword Search** (`mode="keyword"`): Uses keyword matching to find relevant chunks
|
||||
3. **Hybrid Search** (`mode="hybrid"`): Combines both vector and keyword scores using a ranker
|
||||
|
||||
### Hybrid Search
|
||||
|
||||
Hybrid search combines the strengths of both vector and keyword search by:
|
||||
- Computing vector similarity scores
|
||||
- Computing keyword match scores
|
||||
- Using a ranker to combine these scores
|
||||
|
||||
Two ranker types are supported:
|
||||
|
||||
1. **RRF (Reciprocal Rank Fusion)**:
|
||||
- Combines ranks from both vector and keyword results
|
||||
- Uses an impact factor (default: 60.0) to control the weight of higher-ranked results
|
||||
- Good for balancing between vector and keyword results
|
||||
- The default impact factor of 60.0 comes from the original RRF paper by Cormack et al. (2009) [^1], which found this value to provide optimal performance across various retrieval tasks
|
||||
|
||||
2. **Weighted**:
|
||||
- Linearly combines normalized vector and keyword scores
|
||||
- Uses an alpha parameter (0-1) to control the blend:
|
||||
- alpha=0: Only use keyword scores
|
||||
- alpha=1: Only use vector scores
|
||||
- alpha=0.5: Equal weight to both (default)
|
||||
|
||||
Example using RAGQueryConfig with different search modes:
|
||||
|
||||
```python
|
||||
from llama_stack.apis.tools import RAGQueryConfig, RRFRanker, WeightedRanker
|
||||
|
||||
# Vector search
|
||||
config = RAGQueryConfig(mode="vector", max_chunks=5)
|
||||
|
||||
# Keyword search
|
||||
config = RAGQueryConfig(mode="keyword", max_chunks=5)
|
||||
|
||||
# Hybrid search with custom RRF ranker
|
||||
config = RAGQueryConfig(
|
||||
mode="hybrid",
|
||||
max_chunks=5,
|
||||
ranker=RRFRanker(impact_factor=50.0), # Custom impact factor
|
||||
)
|
||||
|
||||
# Hybrid search with weighted ranker
|
||||
config = RAGQueryConfig(
|
||||
mode="hybrid",
|
||||
max_chunks=5,
|
||||
ranker=WeightedRanker(alpha=0.7), # 70% vector, 30% keyword
|
||||
)
|
||||
|
||||
# Hybrid search with default RRF ranker
|
||||
config = RAGQueryConfig(
|
||||
mode="hybrid", max_chunks=5
|
||||
) # Will use RRF with impact_factor=60.0
|
||||
```
|
||||
|
||||
Note: The ranker configuration is only used in hybrid mode. For vector or keyword modes, the ranker parameter is ignored.
|
||||
|
||||
## Installation
|
||||
|
||||
You can install SQLite-Vec using pip:
|
||||
|
||||
```bash
|
||||
pip install sqlite-vec
|
||||
```
|
||||
|
||||
## Documentation
|
||||
|
||||
See [sqlite-vec's GitHub repo](https://github.com/asg017/sqlite-vec/tree/main) for more details about sqlite-vec in general.
|
||||
|
||||
[^1]: Cormack, G. V., Clarke, C. L., & Buettcher, S. (2009). [Reciprocal rank fusion outperforms condorcet and individual rank learning methods](https://dl.acm.org/doi/10.1145/1571941.1572114). In Proceedings of the 32nd international ACM SIGIR conference on Research and development in information retrieval (pp. 758-759).
|
||||
""",
|
||||
),
|
||||
InlineProviderSpec(
|
||||
api=Api.vector_io,
|
||||
|
|
@ -55,6 +284,9 @@ def available_providers() -> list[ProviderSpec]:
|
|||
deprecation_warning="Please use the `inline::sqlite-vec` provider (notice the hyphen instead of underscore) instead.",
|
||||
api_dependencies=[Api.inference],
|
||||
optional_api_dependencies=[Api.files],
|
||||
description="""
|
||||
Please refer to the sqlite-vec provider documentation.
|
||||
""",
|
||||
),
|
||||
remote_provider_spec(
|
||||
Api.vector_io,
|
||||
|
|
@ -63,6 +295,39 @@ def available_providers() -> list[ProviderSpec]:
|
|||
pip_packages=["chromadb-client"],
|
||||
module="llama_stack.providers.remote.vector_io.chroma",
|
||||
config_class="llama_stack.providers.remote.vector_io.chroma.ChromaVectorIOConfig",
|
||||
description="""
|
||||
[Chroma](https://www.trychroma.com/) is an inline and remote vector
|
||||
database provider for Llama Stack. It allows you to store and query vectors directly within a Chroma database.
|
||||
That means you're not limited to storing vectors in memory or in a separate service.
|
||||
|
||||
## Features
|
||||
Chroma supports:
|
||||
- Store embeddings and their metadata
|
||||
- Vector search
|
||||
- Full-text search
|
||||
- Document storage
|
||||
- Metadata filtering
|
||||
- Multi-modal retrieval
|
||||
|
||||
## Usage
|
||||
|
||||
To use Chrome in your Llama Stack project, follow these steps:
|
||||
|
||||
1. Install the necessary dependencies.
|
||||
2. Configure your Llama Stack project to use chroma.
|
||||
3. Start storing and querying vectors.
|
||||
|
||||
## Installation
|
||||
|
||||
You can install chroma using pip:
|
||||
|
||||
```bash
|
||||
pip install chromadb
|
||||
```
|
||||
|
||||
## Documentation
|
||||
See [Chroma's documentation](https://docs.trychroma.com/docs/overview/introduction) for more details about Chroma in general.
|
||||
""",
|
||||
),
|
||||
api_dependencies=[Api.inference],
|
||||
),
|
||||
|
|
@ -73,6 +338,40 @@ def available_providers() -> list[ProviderSpec]:
|
|||
module="llama_stack.providers.inline.vector_io.chroma",
|
||||
config_class="llama_stack.providers.inline.vector_io.chroma.ChromaVectorIOConfig",
|
||||
api_dependencies=[Api.inference],
|
||||
description="""
|
||||
[Chroma](https://www.trychroma.com/) is an inline and remote vector
|
||||
database provider for Llama Stack. It allows you to store and query vectors directly within a Chroma database.
|
||||
That means you're not limited to storing vectors in memory or in a separate service.
|
||||
|
||||
## Features
|
||||
Chroma supports:
|
||||
- Store embeddings and their metadata
|
||||
- Vector search
|
||||
- Full-text search
|
||||
- Document storage
|
||||
- Metadata filtering
|
||||
- Multi-modal retrieval
|
||||
|
||||
## Usage
|
||||
|
||||
To use Chrome in your Llama Stack project, follow these steps:
|
||||
|
||||
1. Install the necessary dependencies.
|
||||
2. Configure your Llama Stack project to use chroma.
|
||||
3. Start storing and querying vectors.
|
||||
|
||||
## Installation
|
||||
|
||||
You can install chroma using pip:
|
||||
|
||||
```bash
|
||||
pip install chromadb
|
||||
```
|
||||
|
||||
## Documentation
|
||||
See [Chroma's documentation](https://docs.trychroma.com/docs/overview/introduction) for more details about Chroma in general.
|
||||
|
||||
""",
|
||||
),
|
||||
remote_provider_spec(
|
||||
Api.vector_io,
|
||||
|
|
@ -81,6 +380,34 @@ def available_providers() -> list[ProviderSpec]:
|
|||
pip_packages=["psycopg2-binary"],
|
||||
module="llama_stack.providers.remote.vector_io.pgvector",
|
||||
config_class="llama_stack.providers.remote.vector_io.pgvector.PGVectorVectorIOConfig",
|
||||
description="""
|
||||
[PGVector](https://github.com/pgvector/pgvector) is a remote vector database provider for Llama Stack. It
|
||||
allows you to store and query vectors directly in memory.
|
||||
That means you'll get fast and efficient vector retrieval.
|
||||
|
||||
## Features
|
||||
|
||||
- Easy to use
|
||||
- Fully integrated with Llama Stack
|
||||
|
||||
## Usage
|
||||
|
||||
To use PGVector in your Llama Stack project, follow these steps:
|
||||
|
||||
1. Install the necessary dependencies.
|
||||
2. Configure your Llama Stack project to use Faiss.
|
||||
3. Start storing and querying vectors.
|
||||
|
||||
## Installation
|
||||
|
||||
You can install PGVector using docker:
|
||||
|
||||
```bash
|
||||
docker pull pgvector/pgvector:pg17
|
||||
```
|
||||
## Documentation
|
||||
See [PGVector's documentation](https://github.com/pgvector/pgvector) for more details about PGVector in general.
|
||||
""",
|
||||
),
|
||||
api_dependencies=[Api.inference],
|
||||
),
|
||||
|
|
@ -92,6 +419,36 @@ def available_providers() -> list[ProviderSpec]:
|
|||
module="llama_stack.providers.remote.vector_io.weaviate",
|
||||
config_class="llama_stack.providers.remote.vector_io.weaviate.WeaviateVectorIOConfig",
|
||||
provider_data_validator="llama_stack.providers.remote.vector_io.weaviate.WeaviateRequestProviderData",
|
||||
description="""
|
||||
[Weaviate](https://weaviate.io/) is a vector database provider for Llama Stack.
|
||||
It allows you to store and query vectors directly within a Weaviate database.
|
||||
That means you're not limited to storing vectors in memory or in a separate service.
|
||||
|
||||
## Features
|
||||
Weaviate supports:
|
||||
- Store embeddings and their metadata
|
||||
- Vector search
|
||||
- Full-text search
|
||||
- Hybrid search
|
||||
- Document storage
|
||||
- Metadata filtering
|
||||
- Multi-modal retrieval
|
||||
|
||||
## Usage
|
||||
|
||||
To use Weaviate in your Llama Stack project, follow these steps:
|
||||
|
||||
1. Install the necessary dependencies.
|
||||
2. Configure your Llama Stack project to use chroma.
|
||||
3. Start storing and querying vectors.
|
||||
|
||||
## Installation
|
||||
|
||||
To install Weaviate see the [Weaviate quickstart documentation](https://weaviate.io/developers/weaviate/quickstart).
|
||||
|
||||
## Documentation
|
||||
See [Weaviate's documentation](https://weaviate.io/developers/weaviate) for more details about Weaviate in general.
|
||||
""",
|
||||
),
|
||||
api_dependencies=[Api.inference],
|
||||
),
|
||||
|
|
@ -102,6 +459,49 @@ def available_providers() -> list[ProviderSpec]:
|
|||
module="llama_stack.providers.inline.vector_io.qdrant",
|
||||
config_class="llama_stack.providers.inline.vector_io.qdrant.QdrantVectorIOConfig",
|
||||
api_dependencies=[Api.inference],
|
||||
description=r"""
|
||||
[Qdrant](https://qdrant.tech/documentation/) is an inline and remote vector database provider for Llama Stack. It
|
||||
allows you to store and query vectors directly in memory.
|
||||
That means you'll get fast and efficient vector retrieval.
|
||||
|
||||
> By default, Qdrant stores vectors in RAM, delivering incredibly fast access for datasets that fit comfortably in
|
||||
> memory. But when your dataset exceeds RAM capacity, Qdrant offers Memmap as an alternative.
|
||||
>
|
||||
> \[[An Introduction to Vector Databases](https://qdrant.tech/articles/what-is-a-vector-database/)\]
|
||||
|
||||
|
||||
|
||||
## Features
|
||||
|
||||
- Lightweight and easy to use
|
||||
- Fully integrated with Llama Stack
|
||||
- Apache 2.0 license terms
|
||||
- Store embeddings and their metadata
|
||||
- Supports search by
|
||||
[Keyword](https://qdrant.tech/articles/qdrant-introduces-full-text-filters-and-indexes/)
|
||||
and [Hybrid](https://qdrant.tech/articles/hybrid-search/#building-a-hybrid-search-system-in-qdrant) search
|
||||
- [Multilingual and Multimodal retrieval](https://qdrant.tech/documentation/multimodal-search/)
|
||||
- [Medatata filtering](https://qdrant.tech/articles/vector-search-filtering/)
|
||||
- [GPU support](https://qdrant.tech/documentation/guides/running-with-gpu/)
|
||||
|
||||
## Usage
|
||||
|
||||
To use Qdrant in your Llama Stack project, follow these steps:
|
||||
|
||||
1. Install the necessary dependencies.
|
||||
2. Configure your Llama Stack project to use Qdrant.
|
||||
3. Start storing and querying vectors.
|
||||
|
||||
## Installation
|
||||
|
||||
You can install Qdrant using docker:
|
||||
|
||||
```bash
|
||||
docker pull qdrant/qdrant
|
||||
```
|
||||
## Documentation
|
||||
See the [Qdrant documentation](https://qdrant.tech/documentation/) for more details about Qdrant in general.
|
||||
""",
|
||||
),
|
||||
remote_provider_spec(
|
||||
Api.vector_io,
|
||||
|
|
@ -110,6 +510,9 @@ def available_providers() -> list[ProviderSpec]:
|
|||
pip_packages=["qdrant-client"],
|
||||
module="llama_stack.providers.remote.vector_io.qdrant",
|
||||
config_class="llama_stack.providers.remote.vector_io.qdrant.QdrantVectorIOConfig",
|
||||
description="""
|
||||
Please refer to the inline provider documentation.
|
||||
""",
|
||||
),
|
||||
api_dependencies=[Api.inference],
|
||||
),
|
||||
|
|
@ -117,9 +520,113 @@ def available_providers() -> list[ProviderSpec]:
|
|||
Api.vector_io,
|
||||
AdapterSpec(
|
||||
adapter_type="milvus",
|
||||
pip_packages=["pymilvus"],
|
||||
pip_packages=["pymilvus[marshmallow<3.13.0]"],
|
||||
module="llama_stack.providers.remote.vector_io.milvus",
|
||||
config_class="llama_stack.providers.remote.vector_io.milvus.MilvusVectorIOConfig",
|
||||
description="""
|
||||
[Milvus](https://milvus.io/) is an inline and remote vector database provider for Llama Stack. It
|
||||
allows you to store and query vectors directly within a Milvus database.
|
||||
That means you're not limited to storing vectors in memory or in a separate service.
|
||||
|
||||
## Features
|
||||
|
||||
- Easy to use
|
||||
- Fully integrated with Llama Stack
|
||||
|
||||
## Usage
|
||||
|
||||
To use Milvus in your Llama Stack project, follow these steps:
|
||||
|
||||
1. Install the necessary dependencies.
|
||||
2. Configure your Llama Stack project to use Milvus.
|
||||
3. Start storing and querying vectors.
|
||||
|
||||
## Installation
|
||||
|
||||
You can install Milvus using pymilvus:
|
||||
|
||||
```bash
|
||||
pip install pymilvus
|
||||
```
|
||||
|
||||
## Configuration
|
||||
|
||||
In Llama Stack, Milvus can be configured in two ways:
|
||||
- **Inline (Local) Configuration** - Uses Milvus-Lite for local storage
|
||||
- **Remote Configuration** - Connects to a remote Milvus server
|
||||
|
||||
### Inline (Local) Configuration
|
||||
|
||||
The simplest method is local configuration, which requires setting `db_path`, a path for locally storing Milvus-Lite files:
|
||||
|
||||
```yaml
|
||||
vector_io:
|
||||
- provider_id: milvus
|
||||
provider_type: inline::milvus
|
||||
config:
|
||||
db_path: ~/.llama/distributions/together/milvus_store.db
|
||||
```
|
||||
|
||||
### Remote Configuration
|
||||
|
||||
Remote configuration is suitable for larger data storage requirements:
|
||||
|
||||
#### Standard Remote Connection
|
||||
|
||||
```yaml
|
||||
vector_io:
|
||||
- provider_id: milvus
|
||||
provider_type: remote::milvus
|
||||
config:
|
||||
uri: "http://<host>:<port>"
|
||||
token: "<user>:<password>"
|
||||
```
|
||||
|
||||
#### TLS-Enabled Remote Connection (One-way TLS)
|
||||
|
||||
For connections to Milvus instances with one-way TLS enabled:
|
||||
|
||||
```yaml
|
||||
vector_io:
|
||||
- provider_id: milvus
|
||||
provider_type: remote::milvus
|
||||
config:
|
||||
uri: "https://<host>:<port>"
|
||||
token: "<user>:<password>"
|
||||
secure: True
|
||||
server_pem_path: "/path/to/server.pem"
|
||||
```
|
||||
|
||||
#### Mutual TLS (mTLS) Remote Connection
|
||||
|
||||
For connections to Milvus instances with mutual TLS (mTLS) enabled:
|
||||
|
||||
```yaml
|
||||
vector_io:
|
||||
- provider_id: milvus
|
||||
provider_type: remote::milvus
|
||||
config:
|
||||
uri: "https://<host>:<port>"
|
||||
token: "<user>:<password>"
|
||||
secure: True
|
||||
ca_pem_path: "/path/to/ca.pem"
|
||||
client_pem_path: "/path/to/client.pem"
|
||||
client_key_path: "/path/to/client.key"
|
||||
```
|
||||
|
||||
#### Key Parameters for TLS Configuration
|
||||
|
||||
- **`secure`**: Enables TLS encryption when set to `true`. Defaults to `false`.
|
||||
- **`server_pem_path`**: Path to the **server certificate** for verifying the server's identity (used in one-way TLS).
|
||||
- **`ca_pem_path`**: Path to the **Certificate Authority (CA) certificate** for validating the server certificate (required in mTLS).
|
||||
- **`client_pem_path`**: Path to the **client certificate** file (required for mTLS).
|
||||
- **`client_key_path`**: Path to the **client private key** file (required for mTLS).
|
||||
|
||||
## Documentation
|
||||
See the [Milvus documentation](https://milvus.io/docs/install-overview.md) for more details about Milvus in general.
|
||||
|
||||
For more details on TLS configuration, refer to the [TLS setup guide](https://milvus.io/docs/tls.md).
|
||||
""",
|
||||
),
|
||||
api_dependencies=[Api.inference],
|
||||
),
|
||||
|
|
@ -131,5 +638,8 @@ def available_providers() -> list[ProviderSpec]:
|
|||
config_class="llama_stack.providers.inline.vector_io.milvus.MilvusVectorIOConfig",
|
||||
api_dependencies=[Api.inference],
|
||||
optional_api_dependencies=[Api.files],
|
||||
description="""
|
||||
Please refer to the remote provider documentation.
|
||||
""",
|
||||
),
|
||||
]
|
||||
|
|
|
|||
|
|
@ -5,6 +5,7 @@
|
|||
# the root directory of this source tree.
|
||||
|
||||
|
||||
import base64
|
||||
import uuid
|
||||
from collections.abc import AsyncGenerator, AsyncIterator
|
||||
from typing import Any
|
||||
|
|
@ -77,6 +78,7 @@ from llama_stack.providers.utils.inference.prompt_adapter import (
|
|||
content_has_media,
|
||||
convert_image_content_to_url,
|
||||
interleaved_content_as_str,
|
||||
localize_image_content,
|
||||
request_has_media,
|
||||
)
|
||||
|
||||
|
|
@ -496,6 +498,21 @@ class OllamaInferenceAdapter(
|
|||
user: str | None = None,
|
||||
) -> OpenAIChatCompletion | AsyncIterator[OpenAIChatCompletionChunk]:
|
||||
model_obj = await self._get_model(model)
|
||||
|
||||
# Ollama does not support image urls, so we need to download the image and convert it to base64
|
||||
async def _convert_message(m: OpenAIMessageParam) -> OpenAIMessageParam:
|
||||
if isinstance(m.content, list):
|
||||
for c in m.content:
|
||||
if c.type == "image_url" and c.image_url and c.image_url.url:
|
||||
localize_result = await localize_image_content(c.image_url.url)
|
||||
if localize_result is None:
|
||||
raise ValueError(f"Failed to localize image content from {c.image_url.url}")
|
||||
|
||||
content, format = localize_result
|
||||
c.image_url.url = f"data:image/{format};base64,{base64.b64encode(content).decode('utf-8')}"
|
||||
return m
|
||||
|
||||
messages = [await _convert_message(m) for m in messages]
|
||||
params = await prepare_openai_completion_params(
|
||||
model=model_obj.provider_resource_id,
|
||||
messages=messages,
|
||||
|
|
|
|||
|
|
@ -6,17 +6,19 @@
|
|||
|
||||
from typing import Any
|
||||
|
||||
from pydantic import BaseModel, ConfigDict
|
||||
from pydantic import BaseModel, ConfigDict, Field
|
||||
|
||||
from llama_stack.schema_utils import json_schema_type
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class MilvusVectorIOConfig(BaseModel):
|
||||
uri: str
|
||||
token: str | None = None
|
||||
consistency_level: str = "Strong"
|
||||
uri: str = Field(description="The URI of the Milvus server")
|
||||
token: str | None = Field(description="The token of the Milvus server")
|
||||
consistency_level: str = Field(description="The consistency level of the Milvus server", default="Strong")
|
||||
|
||||
# This configuration allows additional fields to be passed through to the underlying Milvus client.
|
||||
# See the [Milvus](https://milvus.io/docs/install-overview.md) documentation for more details about Milvus in general.
|
||||
model_config = ConfigDict(extra="allow")
|
||||
|
||||
@classmethod
|
||||
|
|
|
|||
|
|
@ -180,11 +180,10 @@ def request_has_media(request: ChatCompletionRequest | CompletionRequest):
|
|||
return content_has_media(request.content)
|
||||
|
||||
|
||||
async def localize_image_content(media: ImageContentItem) -> tuple[bytes, str]:
|
||||
image = media.image
|
||||
if image.url and image.url.uri.startswith("http"):
|
||||
async def localize_image_content(uri: str) -> tuple[bytes, str] | None:
|
||||
if uri.startswith("http"):
|
||||
async with httpx.AsyncClient() as client:
|
||||
r = await client.get(image.url.uri)
|
||||
r = await client.get(uri)
|
||||
content = r.content
|
||||
content_type = r.headers.get("content-type")
|
||||
if content_type:
|
||||
|
|
@ -194,11 +193,7 @@ async def localize_image_content(media: ImageContentItem) -> tuple[bytes, str]:
|
|||
|
||||
return content, format
|
||||
else:
|
||||
# data is a base64 encoded string, decode it to bytes first
|
||||
# TODO(mf): do this more efficiently, decode less
|
||||
data_bytes = base64.b64decode(image.data)
|
||||
pil_image = PIL_Image.open(io.BytesIO(data_bytes))
|
||||
return data_bytes, pil_image.format
|
||||
return None
|
||||
|
||||
|
||||
async def convert_image_content_to_url(
|
||||
|
|
@ -208,7 +203,18 @@ async def convert_image_content_to_url(
|
|||
if image.url and (not download or image.url.uri.startswith("data")):
|
||||
return image.url.uri
|
||||
|
||||
content, format = await localize_image_content(media)
|
||||
if image.data:
|
||||
# data is a base64 encoded string, decode it to bytes first
|
||||
# TODO(mf): do this more efficiently, decode less
|
||||
content = base64.b64decode(image.data)
|
||||
pil_image = PIL_Image.open(io.BytesIO(content))
|
||||
format = pil_image.format
|
||||
else:
|
||||
localize_result = await localize_image_content(image.url.uri)
|
||||
if localize_result is None:
|
||||
raise ValueError(f"Failed to localize image content from {image.url.uri}")
|
||||
content, format = localize_result
|
||||
|
||||
if include_format:
|
||||
return f"data:image/{format};base64," + base64.b64encode(content).decode("utf-8")
|
||||
else:
|
||||
|
|
|
|||
|
|
@ -9,6 +9,7 @@ from llama_stack.apis.agents import (
|
|||
from llama_stack.apis.agents.openai_responses import (
|
||||
ListOpenAIResponseInputItem,
|
||||
ListOpenAIResponseObject,
|
||||
OpenAIDeleteResponseObject,
|
||||
OpenAIResponseInput,
|
||||
OpenAIResponseObject,
|
||||
OpenAIResponseObjectWithInput,
|
||||
|
|
@ -114,6 +115,13 @@ class ResponsesStore:
|
|||
|
||||
return OpenAIResponseObjectWithInput(**row["response_object"])
|
||||
|
||||
async def delete_response_object(self, response_id: str) -> OpenAIDeleteResponseObject:
|
||||
row = await self.sql_store.fetch_one("openai_responses", where={"id": response_id}, policy=self.policy)
|
||||
if not row:
|
||||
raise ValueError(f"Response with id {response_id} not found")
|
||||
await self.sql_store.delete("openai_responses", where={"id": response_id})
|
||||
return OpenAIDeleteResponseObject(id=response_id)
|
||||
|
||||
async def list_response_input_items(
|
||||
self,
|
||||
response_id: str,
|
||||
|
|
|
|||
|
|
@ -166,6 +166,10 @@ class AuthorizedSqlStore:
|
|||
|
||||
return results.data[0] if results.data else None
|
||||
|
||||
async def delete(self, table: str, where: Mapping[str, Any]) -> None:
|
||||
"""Delete rows with automatic access control filtering."""
|
||||
await self.sql_store.delete(table, where)
|
||||
|
||||
def _build_access_control_where_clause(self, policy: list[AccessRule]) -> str:
|
||||
"""Build SQL WHERE clause for access control filtering.
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue