diff --git a/docs/source/distributions/self_hosted_distro/ollama.md b/docs/source/distributions/self_hosted_distro/ollama.md index e09c79359..4a77d5bd9 100644 --- a/docs/source/distributions/self_hosted_distro/ollama.md +++ b/docs/source/distributions/self_hosted_distro/ollama.md @@ -24,7 +24,7 @@ The `llamastack/distribution-ollama` distribution consists of the following prov | safety | `inline::llama-guard` | | scoring | `inline::basic`, `inline::llm-as-judge`, `inline::braintrust` | | telemetry | `inline::meta-reference` | -| tool_runtime | `remote::brave-search`, `remote::tavily-search`, `inline::rag-runtime`, `remote::model-context-protocol`, `remote::wolfram-alpha` | +| tool_runtime | `remote::brave-search`, `remote::tavily-search`, `inline::rag-runtime`, `inline::synthetic-data-kit`, `remote::model-context-protocol`, `remote::wolfram-alpha` | | vector_io | `inline::faiss`, `remote::chromadb`, `remote::pgvector` | diff --git a/llama_stack/providers/inline/tool_runtime/synthetic-data-kit/__init__.py b/llama_stack/providers/inline/tool_runtime/synthetic-data-kit/__init__.py new file mode 100644 index 000000000..fd722c0ec --- /dev/null +++ b/llama_stack/providers/inline/tool_runtime/synthetic-data-kit/__init__.py @@ -0,0 +1,19 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. + +from typing import Any + +from llama_stack.providers.datatypes import Api + +from .config import SyntheticDataKitToolRuntimeConfig + + +async def get_provider_impl(config: SyntheticDataKitToolRuntimeConfig, deps: dict[Api, Any]): + from .synthetic_data_kit import SyntheticDataKitToolRuntimeImpl + + impl = SyntheticDataKitToolRuntimeImpl(config, deps[Api.files]) + await impl.initialize() + return impl diff --git a/llama_stack/providers/inline/tool_runtime/synthetic-data-kit/config.py b/llama_stack/providers/inline/tool_runtime/synthetic-data-kit/config.py new file mode 100644 index 000000000..eae7c7550 --- /dev/null +++ b/llama_stack/providers/inline/tool_runtime/synthetic-data-kit/config.py @@ -0,0 +1,15 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. + +from typing import Any + +from pydantic import BaseModel + + +class SyntheticDataKitToolRuntimeConfig(BaseModel): + @classmethod + def sample_run_config(cls, __distro_dir__: str, **kwargs: Any) -> dict[str, Any]: + return {} diff --git a/llama_stack/providers/inline/tool_runtime/synthetic-data-kit/synthetic_data_kit.py b/llama_stack/providers/inline/tool_runtime/synthetic-data-kit/synthetic_data_kit.py new file mode 100644 index 000000000..e06b5ee97 --- /dev/null +++ b/llama_stack/providers/inline/tool_runtime/synthetic-data-kit/synthetic_data_kit.py @@ -0,0 +1,123 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. + + +import asyncio +import logging +import mimetypes +import os +import tempfile +from typing import Any + +from llama_stack.apis.common.content_types import URL +from llama_stack.apis.files.files import Files +from llama_stack.apis.tools import ( + ListToolDefsResponse, + ToolDef, + ToolGroup, + ToolInvocationResult, + ToolParameter, + ToolRuntime, +) +from llama_stack.providers.datatypes import ToolGroupsProtocolPrivate +from llama_stack.providers.utils.memory.vector_store import content_from_data_and_mime_type + +from .config import SyntheticDataKitToolRuntimeConfig + +log = logging.getLogger(__name__) + + +class SyntheticDataKitToolRuntimeImpl(ToolGroupsProtocolPrivate, ToolRuntime): + def __init__( + self, + config: SyntheticDataKitToolRuntimeConfig, + files_api: Files, + ): + self.config = config + self.files_api = files_api + + async def initialize(self): + pass + + async def shutdown(self): + pass + + async def register_toolgroup(self, toolgroup: ToolGroup) -> None: + pass + + async def unregister_toolgroup(self, toolgroup_id: str) -> None: + return + + async def list_runtime_tools( + self, tool_group_id: str | None = None, mcp_endpoint: URL | None = None + ) -> ListToolDefsResponse: + return ListToolDefsResponse( + data=[ + ToolDef( + name="convert_file_to_text", + description="Convert a file to text", + parameters=[ + ToolParameter( + name="file_id", + description="The id of the file to convert.", + parameter_type="string", + ), + ], + ), + ] + ) + + async def invoke_tool(self, tool_name: str, kwargs: dict[str, Any]) -> ToolInvocationResult: + if tool_name != "convert_file_to_text": + raise ValueError(f"Unknown tool: {tool_name}") + + file_id = kwargs["file_id"] + file_response = await self.files_api.openai_retrieve_file(file_id) + mime_type = self._guess_mime_type(file_response.filename) + content_response = await self.files_api.openai_retrieve_file_content(file_id) + + mime_category = mime_type.split("/")[0] if mime_type else None + if mime_category == "text": + # Don't use synthetic-data-kit if the file is already text + content = content_from_data_and_mime_type(content_response.body, mime_type) + return ToolInvocationResult( + content=content, + metadata={}, + ) + else: + return await asyncio.to_thread( + self._synthetic_data_kit_convert, content_response.body, file_response.filename + ) + + def _guess_mime_type(self, filename: str) -> str | None: + mime_type, _ = mimetypes.guess_type(filename) + if mime_type is None and filename.endswith(".md"): + mime_type = "text/markdown" + return mime_type + + def _synthetic_data_kit_convert(self, content_body: bytes, filename: str) -> ToolInvocationResult: + from synthetic_data_kit.core.ingest import process_file + + try: + with tempfile.TemporaryDirectory() as tmpdir: + file_path = os.path.join(tmpdir, filename) + with open(file_path, "wb") as f: + f.write(content_body) + output_path = process_file(file_path, tmpdir) + with open(output_path) as f: + content = f.read() + + return ToolInvocationResult( + content=content, + metadata={}, + ) + except Exception as e: + return ToolInvocationResult( + content="", + error_message=f"Error converting file: {e}", + error_code=1, + metadata={}, + ) diff --git a/llama_stack/providers/inline/vector_io/faiss/__init__.py b/llama_stack/providers/inline/vector_io/faiss/__init__.py index dd1c59b7b..d97cd91b2 100644 --- a/llama_stack/providers/inline/vector_io/faiss/__init__.py +++ b/llama_stack/providers/inline/vector_io/faiss/__init__.py @@ -16,6 +16,8 @@ async def get_provider_impl(config: FaissVectorIOConfig, deps: dict[Api, Any]): assert isinstance(config, FaissVectorIOConfig), f"Unexpected config type: {type(config)}" - impl = FaissVectorIOAdapter(config, deps[Api.inference], deps.get(Api.files, None)) + impl = FaissVectorIOAdapter( + config, deps[Api.inference], deps.get(Api.files, None), deps.get(Api.tool_runtime, None) + ) await impl.initialize() return impl diff --git a/llama_stack/providers/inline/vector_io/faiss/faiss.py b/llama_stack/providers/inline/vector_io/faiss/faiss.py index 12f4d6ad0..c83c9e46d 100644 --- a/llama_stack/providers/inline/vector_io/faiss/faiss.py +++ b/llama_stack/providers/inline/vector_io/faiss/faiss.py @@ -18,6 +18,7 @@ from numpy.typing import NDArray from llama_stack.apis.files import Files from llama_stack.apis.inference import InterleavedContent from llama_stack.apis.inference.inference import Inference +from llama_stack.apis.tools.tools import ToolRuntime from llama_stack.apis.vector_dbs import VectorDB from llama_stack.apis.vector_io import ( Chunk, @@ -150,10 +151,17 @@ class FaissIndex(EmbeddingIndex): class FaissVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorDBsProtocolPrivate): - def __init__(self, config: FaissVectorIOConfig, inference_api: Inference, files_api: Files | None) -> None: + def __init__( + self, + config: FaissVectorIOConfig, + inference_api: Inference, + files_api: Files | None = None, + tool_runtime_api: ToolRuntime | None = None, + ) -> None: self.config = config self.inference_api = inference_api self.files_api = files_api + self.tool_runtime_api = tool_runtime_api self.cache: dict[str, VectorDBWithIndex] = {} self.kvstore: KVStore | None = None self.openai_vector_stores: dict[str, dict[str, Any]] = {} diff --git a/llama_stack/providers/inline/vector_io/sqlite_vec/__init__.py b/llama_stack/providers/inline/vector_io/sqlite_vec/__init__.py index e5200a755..3786bb12b 100644 --- a/llama_stack/providers/inline/vector_io/sqlite_vec/__init__.py +++ b/llama_stack/providers/inline/vector_io/sqlite_vec/__init__.py @@ -15,6 +15,8 @@ async def get_provider_impl(config: SQLiteVectorIOConfig, deps: dict[Api, Any]): from .sqlite_vec import SQLiteVecVectorIOAdapter assert isinstance(config, SQLiteVectorIOConfig), f"Unexpected config type: {type(config)}" - impl = SQLiteVecVectorIOAdapter(config, deps[Api.inference], deps.get(Api.files, None)) + impl = SQLiteVecVectorIOAdapter( + config, deps[Api.inference], deps.get(Api.files, None), deps.get(Api.tool_runtime, None) + ) await impl.initialize() return impl diff --git a/llama_stack/providers/inline/vector_io/sqlite_vec/sqlite_vec.py b/llama_stack/providers/inline/vector_io/sqlite_vec/sqlite_vec.py index 3b3c5f486..c9d92d5db 100644 --- a/llama_stack/providers/inline/vector_io/sqlite_vec/sqlite_vec.py +++ b/llama_stack/providers/inline/vector_io/sqlite_vec/sqlite_vec.py @@ -17,6 +17,7 @@ from numpy.typing import NDArray from llama_stack.apis.files.files import Files from llama_stack.apis.inference.inference import Inference +from llama_stack.apis.tools.tools import ToolRuntime from llama_stack.apis.vector_dbs import VectorDB from llama_stack.apis.vector_io import ( Chunk, @@ -419,10 +420,13 @@ class SQLiteVecVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorDBsProtoc and creates a cache of VectorDBWithIndex instances (each wrapping a SQLiteVecIndex). """ - def __init__(self, config, inference_api: Inference, files_api: Files | None) -> None: + def __init__( + self, config, inference_api: Inference, files_api: Files | None, tool_runtime_api: ToolRuntime | None + ) -> None: self.config = config self.inference_api = inference_api self.files_api = files_api + self.tool_runtime_api = tool_runtime_api self.cache: dict[str, VectorDBWithIndex] = {} self.openai_vector_stores: dict[str, dict[str, Any]] = {} diff --git a/llama_stack/providers/registry/tool_runtime.py b/llama_stack/providers/registry/tool_runtime.py index fa359f6b5..9362a7297 100644 --- a/llama_stack/providers/registry/tool_runtime.py +++ b/llama_stack/providers/registry/tool_runtime.py @@ -34,6 +34,14 @@ def available_providers() -> list[ProviderSpec]: config_class="llama_stack.providers.inline.tool_runtime.rag.config.RagToolRuntimeConfig", api_dependencies=[Api.vector_io, Api.inference], ), + InlineProviderSpec( + api=Api.tool_runtime, + provider_type="inline::synthetic-data-kit", + pip_packages=["synthetic-data-kit"], + module="llama_stack.providers.inline.tool_runtime.synthetic-data-kit", + config_class="llama_stack.providers.inline.tool_runtime.synthetic-data-kit.config.SyntheticDataKitToolRuntimeConfig", + api_dependencies=[Api.files], + ), remote_provider_spec( api=Api.tool_runtime, adapter=AdapterSpec( diff --git a/llama_stack/providers/registry/vector_io.py b/llama_stack/providers/registry/vector_io.py index 55c1b5617..d367d4982 100644 --- a/llama_stack/providers/registry/vector_io.py +++ b/llama_stack/providers/registry/vector_io.py @@ -24,7 +24,7 @@ def available_providers() -> list[ProviderSpec]: config_class="llama_stack.providers.inline.vector_io.faiss.FaissVectorIOConfig", deprecation_warning="Please use the `inline::faiss` provider instead.", api_dependencies=[Api.inference], - optional_api_dependencies=[Api.files], + optional_api_dependencies=[Api.files, Api.tool_runtime], ), InlineProviderSpec( api=Api.vector_io, @@ -33,7 +33,7 @@ def available_providers() -> list[ProviderSpec]: module="llama_stack.providers.inline.vector_io.faiss", config_class="llama_stack.providers.inline.vector_io.faiss.FaissVectorIOConfig", api_dependencies=[Api.inference], - optional_api_dependencies=[Api.files], + optional_api_dependencies=[Api.files, Api.tool_runtime], ), # NOTE: sqlite-vec cannot be bundled into the container image because it does not have a # source distribution and the wheels are not available for all platforms. @@ -44,7 +44,7 @@ def available_providers() -> list[ProviderSpec]: module="llama_stack.providers.inline.vector_io.sqlite_vec", config_class="llama_stack.providers.inline.vector_io.sqlite_vec.SQLiteVectorIOConfig", api_dependencies=[Api.inference], - optional_api_dependencies=[Api.files], + optional_api_dependencies=[Api.files, Api.tool_runtime], ), InlineProviderSpec( api=Api.vector_io, @@ -54,7 +54,7 @@ def available_providers() -> list[ProviderSpec]: config_class="llama_stack.providers.inline.vector_io.sqlite_vec.SQLiteVectorIOConfig", deprecation_warning="Please use the `inline::sqlite-vec` provider (notice the hyphen instead of underscore) instead.", api_dependencies=[Api.inference], - optional_api_dependencies=[Api.files], + optional_api_dependencies=[Api.files, Api.tool_runtime], ), remote_provider_spec( Api.vector_io, diff --git a/llama_stack/providers/utils/memory/openai_vector_store_mixin.py b/llama_stack/providers/utils/memory/openai_vector_store_mixin.py index 8b962db76..442626b7c 100644 --- a/llama_stack/providers/utils/memory/openai_vector_store_mixin.py +++ b/llama_stack/providers/utils/memory/openai_vector_store_mixin.py @@ -6,14 +6,14 @@ import asyncio import logging -import mimetypes import time import uuid from abc import ABC, abstractmethod -from typing import Any +from typing import Any, cast from llama_stack.apis.files import Files from llama_stack.apis.files.files import OpenAIFileObject +from llama_stack.apis.tools.tools import ToolRuntime from llama_stack.apis.vector_dbs import VectorDB from llama_stack.apis.vector_io import ( Chunk, @@ -36,7 +36,7 @@ from llama_stack.apis.vector_io import ( VectorStoreSearchResponse, VectorStoreSearchResponsePage, ) -from llama_stack.providers.utils.memory.vector_store import content_from_data_and_mime_type, make_overlapped_chunks +from llama_stack.providers.utils.memory.vector_store import make_overlapped_chunks logger = logging.getLogger(__name__) @@ -54,6 +54,7 @@ class OpenAIVectorStoreMixin(ABC): # These should be provided by the implementing class openai_vector_stores: dict[str, dict[str, Any]] files_api: Files | None + tool_runtime_api: ToolRuntime | None @abstractmethod async def _save_openai_vector_store(self, store_id: str, store_info: dict[str, Any]) -> None: @@ -526,6 +527,14 @@ class OpenAIVectorStoreMixin(ABC): ) return vector_store_file_object + if not hasattr(self, "tool_runtime_api") or not self.tool_runtime_api: + vector_store_file_object.status = "failed" + vector_store_file_object.last_error = VectorStoreFileLastError( + code="server_error", + message="Tool runtime API is not available", + ) + return vector_store_file_object + if isinstance(chunking_strategy, VectorStoreChunkingStrategyStatic): max_chunk_size_tokens = chunking_strategy.static.max_chunk_size_tokens chunk_overlap_tokens = chunking_strategy.static.chunk_overlap_tokens @@ -536,11 +545,13 @@ class OpenAIVectorStoreMixin(ABC): try: file_response = await self.files_api.openai_retrieve_file(file_id) - mime_type, _ = mimetypes.guess_type(file_response.filename) - content_response = await self.files_api.openai_retrieve_file_content(file_id) - - content = content_from_data_and_mime_type(content_response.body, mime_type) - + tool_result = await self.tool_runtime_api.invoke_tool( + "convert_file_to_text", + {"file_id": file_id}, + ) + if tool_result.error_code or tool_result.error_message: + raise ValueError(f"Failed to convert file to text: {tool_result.error_message}") + content = cast(str, tool_result.content) # The tool always returns strings chunks = make_overlapped_chunks( file_id, content, diff --git a/llama_stack/templates/ollama/build.yaml b/llama_stack/templates/ollama/build.yaml index ebe0849f3..a75785b47 100644 --- a/llama_stack/templates/ollama/build.yaml +++ b/llama_stack/templates/ollama/build.yaml @@ -31,6 +31,7 @@ distribution_spec: - remote::brave-search - remote::tavily-search - inline::rag-runtime + - inline::synthetic-data-kit - remote::model-context-protocol - remote::wolfram-alpha image_type: conda diff --git a/llama_stack/templates/ollama/ollama.py b/llama_stack/templates/ollama/ollama.py index 46c4852a4..6c5a737bf 100644 --- a/llama_stack/templates/ollama/ollama.py +++ b/llama_stack/templates/ollama/ollama.py @@ -36,6 +36,7 @@ def get_distribution_template() -> DistributionTemplate: "remote::brave-search", "remote::tavily-search", "inline::rag-runtime", + "inline::synthetic-data-kit", "remote::model-context-protocol", "remote::wolfram-alpha", ], @@ -91,6 +92,10 @@ def get_distribution_template() -> DistributionTemplate: toolgroup_id="builtin::wolfram_alpha", provider_id="wolfram-alpha", ), + ToolGroupInput( + toolgroup_id="builtin::document_conversion", + provider_id="synthetic-data-kit", + ), ] return DistributionTemplate( diff --git a/llama_stack/templates/ollama/run-with-safety.yaml b/llama_stack/templates/ollama/run-with-safety.yaml index 2e1b7fdcc..891782e47 100644 --- a/llama_stack/templates/ollama/run-with-safety.yaml +++ b/llama_stack/templates/ollama/run-with-safety.yaml @@ -115,6 +115,9 @@ providers: - provider_id: rag-runtime provider_type: inline::rag-runtime config: {} + - provider_id: synthetic-data-kit + provider_type: inline::synthetic-data-kit + config: {} - provider_id: model-context-protocol provider_type: remote::model-context-protocol config: {} @@ -159,5 +162,7 @@ tool_groups: provider_id: rag-runtime - toolgroup_id: builtin::wolfram_alpha provider_id: wolfram-alpha +- toolgroup_id: builtin::document_conversion + provider_id: synthetic-data-kit server: port: 8321 diff --git a/llama_stack/templates/ollama/run.yaml b/llama_stack/templates/ollama/run.yaml index 8c2b17ef1..cf1b5e4bd 100644 --- a/llama_stack/templates/ollama/run.yaml +++ b/llama_stack/templates/ollama/run.yaml @@ -113,6 +113,9 @@ providers: - provider_id: rag-runtime provider_type: inline::rag-runtime config: {} + - provider_id: synthetic-data-kit + provider_type: inline::synthetic-data-kit + config: {} - provider_id: model-context-protocol provider_type: remote::model-context-protocol config: {} @@ -149,5 +152,7 @@ tool_groups: provider_id: rag-runtime - toolgroup_id: builtin::wolfram_alpha provider_id: wolfram-alpha +- toolgroup_id: builtin::document_conversion + provider_id: synthetic-data-kit server: port: 8321 diff --git a/llama_stack/templates/starter/build.yaml b/llama_stack/templates/starter/build.yaml index 9bf4913a7..418b22ceb 100644 --- a/llama_stack/templates/starter/build.yaml +++ b/llama_stack/templates/starter/build.yaml @@ -38,6 +38,7 @@ distribution_spec: - remote::brave-search - remote::tavily-search - inline::rag-runtime + - inline::synthetic-data-kit - remote::model-context-protocol image_type: conda additional_pip_packages: diff --git a/llama_stack/templates/starter/run.yaml b/llama_stack/templates/starter/run.yaml index 30df39e5d..91be86d93 100644 --- a/llama_stack/templates/starter/run.yaml +++ b/llama_stack/templates/starter/run.yaml @@ -163,6 +163,9 @@ providers: - provider_id: rag-runtime provider_type: inline::rag-runtime config: {} + - provider_id: synthetic-data-kit + provider_type: inline::synthetic-data-kit + config: {} - provider_id: model-context-protocol provider_type: remote::model-context-protocol config: {} @@ -822,5 +825,7 @@ tool_groups: provider_id: tavily-search - toolgroup_id: builtin::rag provider_id: rag-runtime +- toolgroup_id: builtin::document_conversion + provider_id: synthetic-data-kit server: port: 8321 diff --git a/llama_stack/templates/starter/starter.py b/llama_stack/templates/starter/starter.py index ec01d08e9..bde750dc7 100644 --- a/llama_stack/templates/starter/starter.py +++ b/llama_stack/templates/starter/starter.py @@ -163,6 +163,7 @@ def get_distribution_template() -> DistributionTemplate: "remote::brave-search", "remote::tavily-search", "inline::rag-runtime", + "inline::synthetic-data-kit", "remote::model-context-protocol", ], } @@ -214,6 +215,10 @@ def get_distribution_template() -> DistributionTemplate: toolgroup_id="builtin::rag", provider_id="rag-runtime", ), + ToolGroupInput( + toolgroup_id="builtin::document_conversion", + provider_id="synthetic-data-kit", + ), ] embedding_model = ModelInput( model_id="all-MiniLM-L6-v2", diff --git a/tests/verifications/openai_api/fixtures/docs/llama_stack_and_models.docx b/tests/verifications/openai_api/fixtures/docs/llama_stack_and_models.docx new file mode 100644 index 000000000..b9d6223fe Binary files /dev/null and b/tests/verifications/openai_api/fixtures/docs/llama_stack_and_models.docx differ diff --git a/tests/verifications/openai_api/fixtures/docs/llama_stack_and_models.md b/tests/verifications/openai_api/fixtures/docs/llama_stack_and_models.md new file mode 100644 index 000000000..900cf757c --- /dev/null +++ b/tests/verifications/openai_api/fixtures/docs/llama_stack_and_models.md @@ -0,0 +1,27 @@ +# Llama Stack + +## Llama Stack Overview + +Llama Stack standardizes the core building blocks that simplify AI application development. It codifies best practices across the Llama ecosystem. More specifically, it provides + +* Unified API layer for Inference, RAG, Agents, Tools, Safety, Evals, and Telemetry. + +* Plugin architecture to support the rich ecosystem of different API implementations in various environments, including local development, on-premises, cloud, and mobile. + +* Prepackaged verified distributions which offer a one-stop solution for developers to get started quickly and reliably in any environment. + +* Multiple developer interfaces like CLI and SDKs for Python, Typescript, iOS, and Android. + +* Standalone applications as examples for how to build production-grade AI applications with Llama Stack. + +## Llama Stack Benefits + +* Flexible Options: Developers can choose their preferred infrastructure without changing APIs and enjoy flexible deployment choices. + +* Consistent Experience: With its unified APIs, Llama Stack makes it easier to build, test, and deploy AI applications with consistent application behavior. + +* Robust Ecosystem: Llama Stack is already integrated with distribution partners (cloud providers, hardware vendors, and AI-focused companies) that offer tailored infrastructure, software, and services for deploying Llama models. + +# Llama 4 Maverick + +Llama 4 Maverick is a Mixture-of-Experts (MoE) model with 17 billion active parameters and 128 experts. diff --git a/tests/verifications/openai_api/fixtures/pdfs/llama_stack_and_models.pdf b/tests/verifications/openai_api/fixtures/docs/llama_stack_and_models.pdf similarity index 100% rename from tests/verifications/openai_api/fixtures/pdfs/llama_stack_and_models.pdf rename to tests/verifications/openai_api/fixtures/docs/llama_stack_and_models.pdf diff --git a/tests/verifications/openai_api/fixtures/docs/llama_stack_and_models.pptx b/tests/verifications/openai_api/fixtures/docs/llama_stack_and_models.pptx new file mode 100644 index 000000000..d1ed754e2 Binary files /dev/null and b/tests/verifications/openai_api/fixtures/docs/llama_stack_and_models.pptx differ diff --git a/tests/verifications/openai_api/fixtures/docs/llama_stack_and_models.txt b/tests/verifications/openai_api/fixtures/docs/llama_stack_and_models.txt new file mode 100644 index 000000000..694c0e7f7 --- /dev/null +++ b/tests/verifications/openai_api/fixtures/docs/llama_stack_and_models.txt @@ -0,0 +1,24 @@ +Llama Stack + + +Llama Stack Overview + +Llama Stack standardizes the core building blocks that simplify AI application development. It codifies best practices across the Llama ecosystem. More specifically, it provides + +* Unified API layer for Inference, RAG, Agents, Tools, Safety, Evals, and Telemetry. +* Plugin architecture to support the rich ecosystem of different API implementations in various environments, including local development, on-premises, cloud, and mobile. +* Prepackaged verified distributions which offer a one-stop solution for developers to get started quickly and reliably in any environment. +* Multiple developer interfaces like CLI and SDKs for Python, Typescript, iOS, and Android. +* Standalone applications as examples for how to build production-grade AI applications with Llama Stack. + + +Llama Stack Benefits + +* Flexible Options: Developers can choose their preferred infrastructure without changing APIs and enjoy flexible deployment choices. +* Consistent Experience: With its unified APIs, Llama Stack makes it easier to build, test, and deploy AI applications with consistent application behavior. +* Robust Ecosystem: Llama Stack is already integrated with distribution partners (cloud providers, hardware vendors, and AI-focused companies) that offer tailored infrastructure, software, and services for deploying Llama models. + + +Llama 4 Maverick + +Llama 4 Maverick is a Mixture-of-Experts (MoE) model with 17 billion active parameters and 128 experts. diff --git a/tests/verifications/openai_api/fixtures/test_cases/responses.yaml b/tests/verifications/openai_api/fixtures/test_cases/responses.yaml index 1acf06388..5b4efc098 100644 --- a/tests/verifications/openai_api/fixtures/test_cases/responses.yaml +++ b/tests/verifications/openai_api/fixtures/test_cases/responses.yaml @@ -42,12 +42,40 @@ test_response_file_search: # vector_store_ids param for file_search tool gets added by the test runner file_content: "Llama 4 Maverick has 128 experts" output: "128" + - case_id: "llama_experts_docx" + input: "How many experts does the Llama 4 Maverick model have?" + tools: + - type: file_search + # vector_store_ids param for file_search toolgets added by the test runner + file_path: "docs/llama_stack_and_models.docx" + output: "128" + - case_id: "llama_experts_md" + input: "How many experts does the Llama 4 Maverick model have?" + tools: + - type: file_search + # vector_store_ids param for file_search toolgets added by the test runner + file_path: "docs/llama_stack_and_models.md" + output: "128" - case_id: "llama_experts_pdf" input: "How many experts does the Llama 4 Maverick model have?" tools: - type: file_search # vector_store_ids param for file_search toolgets added by the test runner - file_path: "pdfs/llama_stack_and_models.pdf" + file_path: "docs/llama_stack_and_models.pdf" + output: "128" + - case_id: "llama_experts_pptx" + input: "How many experts does the Llama 4 Maverick model have?" + tools: + - type: file_search + # vector_store_ids param for file_search toolgets added by the test runner + file_path: "docs/llama_stack_and_models.pptx" + output: "128" + - case_id: "llama_experts_txt" + input: "How many experts does the Llama 4 Maverick model have?" + tools: + - type: file_search + # vector_store_ids param for file_search toolgets added by the test runner + file_path: "docs/llama_stack_and_models.txt" output: "128" test_response_mcp_tool: