This commit is contained in:
Ben Browning 2025-06-25 20:53:41 -04:00 committed by GitHub
commit 7467fa21ee
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
24 changed files with 316 additions and 18 deletions

View file

@ -24,7 +24,7 @@ The `llamastack/distribution-ollama` distribution consists of the following prov
| safety | `inline::llama-guard` | | safety | `inline::llama-guard` |
| scoring | `inline::basic`, `inline::llm-as-judge`, `inline::braintrust` | | scoring | `inline::basic`, `inline::llm-as-judge`, `inline::braintrust` |
| telemetry | `inline::meta-reference` | | telemetry | `inline::meta-reference` |
| tool_runtime | `remote::brave-search`, `remote::tavily-search`, `inline::rag-runtime`, `remote::model-context-protocol`, `remote::wolfram-alpha` | | tool_runtime | `remote::brave-search`, `remote::tavily-search`, `inline::rag-runtime`, `inline::synthetic-data-kit`, `remote::model-context-protocol`, `remote::wolfram-alpha` |
| vector_io | `inline::faiss`, `remote::chromadb`, `remote::pgvector` | | vector_io | `inline::faiss`, `remote::chromadb`, `remote::pgvector` |

View file

@ -0,0 +1,19 @@
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree.
from typing import Any
from llama_stack.providers.datatypes import Api
from .config import SyntheticDataKitToolRuntimeConfig
async def get_provider_impl(config: SyntheticDataKitToolRuntimeConfig, deps: dict[Api, Any]):
from .synthetic_data_kit import SyntheticDataKitToolRuntimeImpl
impl = SyntheticDataKitToolRuntimeImpl(config, deps[Api.files])
await impl.initialize()
return impl

View file

@ -0,0 +1,15 @@
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree.
from typing import Any
from pydantic import BaseModel
class SyntheticDataKitToolRuntimeConfig(BaseModel):
@classmethod
def sample_run_config(cls, __distro_dir__: str, **kwargs: Any) -> dict[str, Any]:
return {}

View file

@ -0,0 +1,123 @@
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree.
import asyncio
import logging
import mimetypes
import os
import tempfile
from typing import Any
from llama_stack.apis.common.content_types import URL
from llama_stack.apis.files.files import Files
from llama_stack.apis.tools import (
ListToolDefsResponse,
ToolDef,
ToolGroup,
ToolInvocationResult,
ToolParameter,
ToolRuntime,
)
from llama_stack.providers.datatypes import ToolGroupsProtocolPrivate
from llama_stack.providers.utils.memory.vector_store import content_from_data_and_mime_type
from .config import SyntheticDataKitToolRuntimeConfig
log = logging.getLogger(__name__)
class SyntheticDataKitToolRuntimeImpl(ToolGroupsProtocolPrivate, ToolRuntime):
def __init__(
self,
config: SyntheticDataKitToolRuntimeConfig,
files_api: Files,
):
self.config = config
self.files_api = files_api
async def initialize(self):
pass
async def shutdown(self):
pass
async def register_toolgroup(self, toolgroup: ToolGroup) -> None:
pass
async def unregister_toolgroup(self, toolgroup_id: str) -> None:
return
async def list_runtime_tools(
self, tool_group_id: str | None = None, mcp_endpoint: URL | None = None
) -> ListToolDefsResponse:
return ListToolDefsResponse(
data=[
ToolDef(
name="convert_file_to_text",
description="Convert a file to text",
parameters=[
ToolParameter(
name="file_id",
description="The id of the file to convert.",
parameter_type="string",
),
],
),
]
)
async def invoke_tool(self, tool_name: str, kwargs: dict[str, Any]) -> ToolInvocationResult:
if tool_name != "convert_file_to_text":
raise ValueError(f"Unknown tool: {tool_name}")
file_id = kwargs["file_id"]
file_response = await self.files_api.openai_retrieve_file(file_id)
mime_type = self._guess_mime_type(file_response.filename)
content_response = await self.files_api.openai_retrieve_file_content(file_id)
mime_category = mime_type.split("/")[0] if mime_type else None
if mime_category == "text":
# Don't use synthetic-data-kit if the file is already text
content = content_from_data_and_mime_type(content_response.body, mime_type)
return ToolInvocationResult(
content=content,
metadata={},
)
else:
return await asyncio.to_thread(
self._synthetic_data_kit_convert, content_response.body, file_response.filename
)
def _guess_mime_type(self, filename: str) -> str | None:
mime_type, _ = mimetypes.guess_type(filename)
if mime_type is None and filename.endswith(".md"):
mime_type = "text/markdown"
return mime_type
def _synthetic_data_kit_convert(self, content_body: bytes, filename: str) -> ToolInvocationResult:
from synthetic_data_kit.core.ingest import process_file
try:
with tempfile.TemporaryDirectory() as tmpdir:
file_path = os.path.join(tmpdir, filename)
with open(file_path, "wb") as f:
f.write(content_body)
output_path = process_file(file_path, tmpdir)
with open(output_path) as f:
content = f.read()
return ToolInvocationResult(
content=content,
metadata={},
)
except Exception as e:
return ToolInvocationResult(
content="",
error_message=f"Error converting file: {e}",
error_code=1,
metadata={},
)

View file

@ -16,6 +16,8 @@ async def get_provider_impl(config: FaissVectorIOConfig, deps: dict[Api, Any]):
assert isinstance(config, FaissVectorIOConfig), f"Unexpected config type: {type(config)}" assert isinstance(config, FaissVectorIOConfig), f"Unexpected config type: {type(config)}"
impl = FaissVectorIOAdapter(config, deps[Api.inference], deps.get(Api.files, None)) impl = FaissVectorIOAdapter(
config, deps[Api.inference], deps.get(Api.files, None), deps.get(Api.tool_runtime, None)
)
await impl.initialize() await impl.initialize()
return impl return impl

View file

@ -18,6 +18,7 @@ from numpy.typing import NDArray
from llama_stack.apis.files import Files from llama_stack.apis.files import Files
from llama_stack.apis.inference import InterleavedContent from llama_stack.apis.inference import InterleavedContent
from llama_stack.apis.inference.inference import Inference from llama_stack.apis.inference.inference import Inference
from llama_stack.apis.tools.tools import ToolRuntime
from llama_stack.apis.vector_dbs import VectorDB from llama_stack.apis.vector_dbs import VectorDB
from llama_stack.apis.vector_io import ( from llama_stack.apis.vector_io import (
Chunk, Chunk,
@ -150,10 +151,17 @@ class FaissIndex(EmbeddingIndex):
class FaissVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorDBsProtocolPrivate): class FaissVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorDBsProtocolPrivate):
def __init__(self, config: FaissVectorIOConfig, inference_api: Inference, files_api: Files | None) -> None: def __init__(
self,
config: FaissVectorIOConfig,
inference_api: Inference,
files_api: Files | None = None,
tool_runtime_api: ToolRuntime | None = None,
) -> None:
self.config = config self.config = config
self.inference_api = inference_api self.inference_api = inference_api
self.files_api = files_api self.files_api = files_api
self.tool_runtime_api = tool_runtime_api
self.cache: dict[str, VectorDBWithIndex] = {} self.cache: dict[str, VectorDBWithIndex] = {}
self.kvstore: KVStore | None = None self.kvstore: KVStore | None = None
self.openai_vector_stores: dict[str, dict[str, Any]] = {} self.openai_vector_stores: dict[str, dict[str, Any]] = {}

View file

@ -15,6 +15,8 @@ async def get_provider_impl(config: SQLiteVectorIOConfig, deps: dict[Api, Any]):
from .sqlite_vec import SQLiteVecVectorIOAdapter from .sqlite_vec import SQLiteVecVectorIOAdapter
assert isinstance(config, SQLiteVectorIOConfig), f"Unexpected config type: {type(config)}" assert isinstance(config, SQLiteVectorIOConfig), f"Unexpected config type: {type(config)}"
impl = SQLiteVecVectorIOAdapter(config, deps[Api.inference], deps.get(Api.files, None)) impl = SQLiteVecVectorIOAdapter(
config, deps[Api.inference], deps.get(Api.files, None), deps.get(Api.tool_runtime, None)
)
await impl.initialize() await impl.initialize()
return impl return impl

View file

@ -17,6 +17,7 @@ from numpy.typing import NDArray
from llama_stack.apis.files.files import Files from llama_stack.apis.files.files import Files
from llama_stack.apis.inference.inference import Inference from llama_stack.apis.inference.inference import Inference
from llama_stack.apis.tools.tools import ToolRuntime
from llama_stack.apis.vector_dbs import VectorDB from llama_stack.apis.vector_dbs import VectorDB
from llama_stack.apis.vector_io import ( from llama_stack.apis.vector_io import (
Chunk, Chunk,
@ -419,10 +420,13 @@ class SQLiteVecVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorDBsProtoc
and creates a cache of VectorDBWithIndex instances (each wrapping a SQLiteVecIndex). and creates a cache of VectorDBWithIndex instances (each wrapping a SQLiteVecIndex).
""" """
def __init__(self, config, inference_api: Inference, files_api: Files | None) -> None: def __init__(
self, config, inference_api: Inference, files_api: Files | None, tool_runtime_api: ToolRuntime | None
) -> None:
self.config = config self.config = config
self.inference_api = inference_api self.inference_api = inference_api
self.files_api = files_api self.files_api = files_api
self.tool_runtime_api = tool_runtime_api
self.cache: dict[str, VectorDBWithIndex] = {} self.cache: dict[str, VectorDBWithIndex] = {}
self.openai_vector_stores: dict[str, dict[str, Any]] = {} self.openai_vector_stores: dict[str, dict[str, Any]] = {}

View file

@ -34,6 +34,14 @@ def available_providers() -> list[ProviderSpec]:
config_class="llama_stack.providers.inline.tool_runtime.rag.config.RagToolRuntimeConfig", config_class="llama_stack.providers.inline.tool_runtime.rag.config.RagToolRuntimeConfig",
api_dependencies=[Api.vector_io, Api.inference], api_dependencies=[Api.vector_io, Api.inference],
), ),
InlineProviderSpec(
api=Api.tool_runtime,
provider_type="inline::synthetic-data-kit",
pip_packages=["synthetic-data-kit"],
module="llama_stack.providers.inline.tool_runtime.synthetic-data-kit",
config_class="llama_stack.providers.inline.tool_runtime.synthetic-data-kit.config.SyntheticDataKitToolRuntimeConfig",
api_dependencies=[Api.files],
),
remote_provider_spec( remote_provider_spec(
api=Api.tool_runtime, api=Api.tool_runtime,
adapter=AdapterSpec( adapter=AdapterSpec(

View file

@ -24,7 +24,7 @@ def available_providers() -> list[ProviderSpec]:
config_class="llama_stack.providers.inline.vector_io.faiss.FaissVectorIOConfig", config_class="llama_stack.providers.inline.vector_io.faiss.FaissVectorIOConfig",
deprecation_warning="Please use the `inline::faiss` provider instead.", deprecation_warning="Please use the `inline::faiss` provider instead.",
api_dependencies=[Api.inference], api_dependencies=[Api.inference],
optional_api_dependencies=[Api.files], optional_api_dependencies=[Api.files, Api.tool_runtime],
), ),
InlineProviderSpec( InlineProviderSpec(
api=Api.vector_io, api=Api.vector_io,
@ -33,7 +33,7 @@ def available_providers() -> list[ProviderSpec]:
module="llama_stack.providers.inline.vector_io.faiss", module="llama_stack.providers.inline.vector_io.faiss",
config_class="llama_stack.providers.inline.vector_io.faiss.FaissVectorIOConfig", config_class="llama_stack.providers.inline.vector_io.faiss.FaissVectorIOConfig",
api_dependencies=[Api.inference], api_dependencies=[Api.inference],
optional_api_dependencies=[Api.files], optional_api_dependencies=[Api.files, Api.tool_runtime],
), ),
# NOTE: sqlite-vec cannot be bundled into the container image because it does not have a # NOTE: sqlite-vec cannot be bundled into the container image because it does not have a
# source distribution and the wheels are not available for all platforms. # source distribution and the wheels are not available for all platforms.
@ -44,7 +44,7 @@ def available_providers() -> list[ProviderSpec]:
module="llama_stack.providers.inline.vector_io.sqlite_vec", module="llama_stack.providers.inline.vector_io.sqlite_vec",
config_class="llama_stack.providers.inline.vector_io.sqlite_vec.SQLiteVectorIOConfig", config_class="llama_stack.providers.inline.vector_io.sqlite_vec.SQLiteVectorIOConfig",
api_dependencies=[Api.inference], api_dependencies=[Api.inference],
optional_api_dependencies=[Api.files], optional_api_dependencies=[Api.files, Api.tool_runtime],
), ),
InlineProviderSpec( InlineProviderSpec(
api=Api.vector_io, api=Api.vector_io,
@ -54,7 +54,7 @@ def available_providers() -> list[ProviderSpec]:
config_class="llama_stack.providers.inline.vector_io.sqlite_vec.SQLiteVectorIOConfig", config_class="llama_stack.providers.inline.vector_io.sqlite_vec.SQLiteVectorIOConfig",
deprecation_warning="Please use the `inline::sqlite-vec` provider (notice the hyphen instead of underscore) instead.", deprecation_warning="Please use the `inline::sqlite-vec` provider (notice the hyphen instead of underscore) instead.",
api_dependencies=[Api.inference], api_dependencies=[Api.inference],
optional_api_dependencies=[Api.files], optional_api_dependencies=[Api.files, Api.tool_runtime],
), ),
remote_provider_spec( remote_provider_spec(
Api.vector_io, Api.vector_io,

View file

@ -6,14 +6,14 @@
import asyncio import asyncio
import logging import logging
import mimetypes
import time import time
import uuid import uuid
from abc import ABC, abstractmethod from abc import ABC, abstractmethod
from typing import Any from typing import Any, cast
from llama_stack.apis.files import Files from llama_stack.apis.files import Files
from llama_stack.apis.files.files import OpenAIFileObject from llama_stack.apis.files.files import OpenAIFileObject
from llama_stack.apis.tools.tools import ToolRuntime
from llama_stack.apis.vector_dbs import VectorDB from llama_stack.apis.vector_dbs import VectorDB
from llama_stack.apis.vector_io import ( from llama_stack.apis.vector_io import (
Chunk, Chunk,
@ -36,7 +36,7 @@ from llama_stack.apis.vector_io import (
VectorStoreSearchResponse, VectorStoreSearchResponse,
VectorStoreSearchResponsePage, VectorStoreSearchResponsePage,
) )
from llama_stack.providers.utils.memory.vector_store import content_from_data_and_mime_type, make_overlapped_chunks from llama_stack.providers.utils.memory.vector_store import make_overlapped_chunks
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
@ -54,6 +54,7 @@ class OpenAIVectorStoreMixin(ABC):
# These should be provided by the implementing class # These should be provided by the implementing class
openai_vector_stores: dict[str, dict[str, Any]] openai_vector_stores: dict[str, dict[str, Any]]
files_api: Files | None files_api: Files | None
tool_runtime_api: ToolRuntime | None
@abstractmethod @abstractmethod
async def _save_openai_vector_store(self, store_id: str, store_info: dict[str, Any]) -> None: async def _save_openai_vector_store(self, store_id: str, store_info: dict[str, Any]) -> None:
@ -526,6 +527,14 @@ class OpenAIVectorStoreMixin(ABC):
) )
return vector_store_file_object return vector_store_file_object
if not hasattr(self, "tool_runtime_api") or not self.tool_runtime_api:
vector_store_file_object.status = "failed"
vector_store_file_object.last_error = VectorStoreFileLastError(
code="server_error",
message="Tool runtime API is not available",
)
return vector_store_file_object
if isinstance(chunking_strategy, VectorStoreChunkingStrategyStatic): if isinstance(chunking_strategy, VectorStoreChunkingStrategyStatic):
max_chunk_size_tokens = chunking_strategy.static.max_chunk_size_tokens max_chunk_size_tokens = chunking_strategy.static.max_chunk_size_tokens
chunk_overlap_tokens = chunking_strategy.static.chunk_overlap_tokens chunk_overlap_tokens = chunking_strategy.static.chunk_overlap_tokens
@ -536,11 +545,13 @@ class OpenAIVectorStoreMixin(ABC):
try: try:
file_response = await self.files_api.openai_retrieve_file(file_id) file_response = await self.files_api.openai_retrieve_file(file_id)
mime_type, _ = mimetypes.guess_type(file_response.filename) tool_result = await self.tool_runtime_api.invoke_tool(
content_response = await self.files_api.openai_retrieve_file_content(file_id) "convert_file_to_text",
{"file_id": file_id},
content = content_from_data_and_mime_type(content_response.body, mime_type) )
if tool_result.error_code or tool_result.error_message:
raise ValueError(f"Failed to convert file to text: {tool_result.error_message}")
content = cast(str, tool_result.content) # The tool always returns strings
chunks = make_overlapped_chunks( chunks = make_overlapped_chunks(
file_id, file_id,
content, content,

View file

@ -31,6 +31,7 @@ distribution_spec:
- remote::brave-search - remote::brave-search
- remote::tavily-search - remote::tavily-search
- inline::rag-runtime - inline::rag-runtime
- inline::synthetic-data-kit
- remote::model-context-protocol - remote::model-context-protocol
- remote::wolfram-alpha - remote::wolfram-alpha
image_type: conda image_type: conda

View file

@ -36,6 +36,7 @@ def get_distribution_template() -> DistributionTemplate:
"remote::brave-search", "remote::brave-search",
"remote::tavily-search", "remote::tavily-search",
"inline::rag-runtime", "inline::rag-runtime",
"inline::synthetic-data-kit",
"remote::model-context-protocol", "remote::model-context-protocol",
"remote::wolfram-alpha", "remote::wolfram-alpha",
], ],
@ -91,6 +92,10 @@ def get_distribution_template() -> DistributionTemplate:
toolgroup_id="builtin::wolfram_alpha", toolgroup_id="builtin::wolfram_alpha",
provider_id="wolfram-alpha", provider_id="wolfram-alpha",
), ),
ToolGroupInput(
toolgroup_id="builtin::document_conversion",
provider_id="synthetic-data-kit",
),
] ]
return DistributionTemplate( return DistributionTemplate(

View file

@ -115,6 +115,9 @@ providers:
- provider_id: rag-runtime - provider_id: rag-runtime
provider_type: inline::rag-runtime provider_type: inline::rag-runtime
config: {} config: {}
- provider_id: synthetic-data-kit
provider_type: inline::synthetic-data-kit
config: {}
- provider_id: model-context-protocol - provider_id: model-context-protocol
provider_type: remote::model-context-protocol provider_type: remote::model-context-protocol
config: {} config: {}
@ -159,5 +162,7 @@ tool_groups:
provider_id: rag-runtime provider_id: rag-runtime
- toolgroup_id: builtin::wolfram_alpha - toolgroup_id: builtin::wolfram_alpha
provider_id: wolfram-alpha provider_id: wolfram-alpha
- toolgroup_id: builtin::document_conversion
provider_id: synthetic-data-kit
server: server:
port: 8321 port: 8321

View file

@ -113,6 +113,9 @@ providers:
- provider_id: rag-runtime - provider_id: rag-runtime
provider_type: inline::rag-runtime provider_type: inline::rag-runtime
config: {} config: {}
- provider_id: synthetic-data-kit
provider_type: inline::synthetic-data-kit
config: {}
- provider_id: model-context-protocol - provider_id: model-context-protocol
provider_type: remote::model-context-protocol provider_type: remote::model-context-protocol
config: {} config: {}
@ -149,5 +152,7 @@ tool_groups:
provider_id: rag-runtime provider_id: rag-runtime
- toolgroup_id: builtin::wolfram_alpha - toolgroup_id: builtin::wolfram_alpha
provider_id: wolfram-alpha provider_id: wolfram-alpha
- toolgroup_id: builtin::document_conversion
provider_id: synthetic-data-kit
server: server:
port: 8321 port: 8321

View file

@ -38,6 +38,7 @@ distribution_spec:
- remote::brave-search - remote::brave-search
- remote::tavily-search - remote::tavily-search
- inline::rag-runtime - inline::rag-runtime
- inline::synthetic-data-kit
- remote::model-context-protocol - remote::model-context-protocol
image_type: conda image_type: conda
additional_pip_packages: additional_pip_packages:

View file

@ -163,6 +163,9 @@ providers:
- provider_id: rag-runtime - provider_id: rag-runtime
provider_type: inline::rag-runtime provider_type: inline::rag-runtime
config: {} config: {}
- provider_id: synthetic-data-kit
provider_type: inline::synthetic-data-kit
config: {}
- provider_id: model-context-protocol - provider_id: model-context-protocol
provider_type: remote::model-context-protocol provider_type: remote::model-context-protocol
config: {} config: {}
@ -822,5 +825,7 @@ tool_groups:
provider_id: tavily-search provider_id: tavily-search
- toolgroup_id: builtin::rag - toolgroup_id: builtin::rag
provider_id: rag-runtime provider_id: rag-runtime
- toolgroup_id: builtin::document_conversion
provider_id: synthetic-data-kit
server: server:
port: 8321 port: 8321

View file

@ -163,6 +163,7 @@ def get_distribution_template() -> DistributionTemplate:
"remote::brave-search", "remote::brave-search",
"remote::tavily-search", "remote::tavily-search",
"inline::rag-runtime", "inline::rag-runtime",
"inline::synthetic-data-kit",
"remote::model-context-protocol", "remote::model-context-protocol",
], ],
} }
@ -214,6 +215,10 @@ def get_distribution_template() -> DistributionTemplate:
toolgroup_id="builtin::rag", toolgroup_id="builtin::rag",
provider_id="rag-runtime", provider_id="rag-runtime",
), ),
ToolGroupInput(
toolgroup_id="builtin::document_conversion",
provider_id="synthetic-data-kit",
),
] ]
embedding_model = ModelInput( embedding_model = ModelInput(
model_id="all-MiniLM-L6-v2", model_id="all-MiniLM-L6-v2",

View file

@ -0,0 +1,27 @@
# Llama Stack
## Llama Stack Overview
Llama Stack standardizes the core building blocks that simplify AI application development. It codifies best practices across the Llama ecosystem. More specifically, it provides
* Unified API layer for Inference, RAG, Agents, Tools, Safety, Evals, and Telemetry.
* Plugin architecture to support the rich ecosystem of different API implementations in various environments, including local development, on-premises, cloud, and mobile.
* Prepackaged verified distributions which offer a one-stop solution for developers to get started quickly and reliably in any environment.
* Multiple developer interfaces like CLI and SDKs for Python, Typescript, iOS, and Android.
* Standalone applications as examples for how to build production-grade AI applications with Llama Stack.
## Llama Stack Benefits
* Flexible Options: Developers can choose their preferred infrastructure without changing APIs and enjoy flexible deployment choices.
* Consistent Experience: With its unified APIs, Llama Stack makes it easier to build, test, and deploy AI applications with consistent application behavior.
* Robust Ecosystem: Llama Stack is already integrated with distribution partners (cloud providers, hardware vendors, and AI-focused companies) that offer tailored infrastructure, software, and services for deploying Llama models.
# Llama 4 Maverick
Llama 4 Maverick is a Mixture-of-Experts (MoE) model with 17 billion active parameters and 128 experts.

View file

@ -0,0 +1,24 @@
Llama Stack
Llama Stack Overview
Llama Stack standardizes the core building blocks that simplify AI application development. It codifies best practices across the Llama ecosystem. More specifically, it provides
* Unified API layer for Inference, RAG, Agents, Tools, Safety, Evals, and Telemetry.
* Plugin architecture to support the rich ecosystem of different API implementations in various environments, including local development, on-premises, cloud, and mobile.
* Prepackaged verified distributions which offer a one-stop solution for developers to get started quickly and reliably in any environment.
* Multiple developer interfaces like CLI and SDKs for Python, Typescript, iOS, and Android.
* Standalone applications as examples for how to build production-grade AI applications with Llama Stack.
Llama Stack Benefits
* Flexible Options: Developers can choose their preferred infrastructure without changing APIs and enjoy flexible deployment choices.
* Consistent Experience: With its unified APIs, Llama Stack makes it easier to build, test, and deploy AI applications with consistent application behavior.
* Robust Ecosystem: Llama Stack is already integrated with distribution partners (cloud providers, hardware vendors, and AI-focused companies) that offer tailored infrastructure, software, and services for deploying Llama models.
Llama 4 Maverick
Llama 4 Maverick is a Mixture-of-Experts (MoE) model with 17 billion active parameters and 128 experts.

View file

@ -42,12 +42,40 @@ test_response_file_search:
# vector_store_ids param for file_search tool gets added by the test runner # vector_store_ids param for file_search tool gets added by the test runner
file_content: "Llama 4 Maverick has 128 experts" file_content: "Llama 4 Maverick has 128 experts"
output: "128" output: "128"
- case_id: "llama_experts_docx"
input: "How many experts does the Llama 4 Maverick model have?"
tools:
- type: file_search
# vector_store_ids param for file_search toolgets added by the test runner
file_path: "docs/llama_stack_and_models.docx"
output: "128"
- case_id: "llama_experts_md"
input: "How many experts does the Llama 4 Maverick model have?"
tools:
- type: file_search
# vector_store_ids param for file_search toolgets added by the test runner
file_path: "docs/llama_stack_and_models.md"
output: "128"
- case_id: "llama_experts_pdf" - case_id: "llama_experts_pdf"
input: "How many experts does the Llama 4 Maverick model have?" input: "How many experts does the Llama 4 Maverick model have?"
tools: tools:
- type: file_search - type: file_search
# vector_store_ids param for file_search toolgets added by the test runner # vector_store_ids param for file_search toolgets added by the test runner
file_path: "pdfs/llama_stack_and_models.pdf" file_path: "docs/llama_stack_and_models.pdf"
output: "128"
- case_id: "llama_experts_pptx"
input: "How many experts does the Llama 4 Maverick model have?"
tools:
- type: file_search
# vector_store_ids param for file_search toolgets added by the test runner
file_path: "docs/llama_stack_and_models.pptx"
output: "128"
- case_id: "llama_experts_txt"
input: "How many experts does the Llama 4 Maverick model have?"
tools:
- type: file_search
# vector_store_ids param for file_search toolgets added by the test runner
file_path: "docs/llama_stack_and_models.txt"
output: "128" output: "128"
test_response_mcp_tool: test_response_mcp_tool: