From fa0b0c13d41a6125b743c774c8870a760a9d2ebe Mon Sep 17 00:00:00 2001 From: Ben Browning Date: Wed, 25 Jun 2025 09:54:00 -0400 Subject: [PATCH 1/8] fix: Ollama should be optional in starter distro (#2482) # What does this PR do? Our starter distro required Ollama to be running (and a large list of models available in that Ollama) to successfully start. This adjusts things so that Ollama does not have to be running to use the starter template / distro. To accomplish this, a few changes were needed: * The Ollama provider is now configurable whether it raises an Exception or just logs a warning when it cannot reach the Ollama server on startup. The default is to raise an exception (same as previous behavior), but in the starter template we adjust this to just log a warning so that we can bring the stack up without needing a running Ollama server. * The starter template no longer specifies a default list of models for Ollama, as any models specified there need to actually be pulled and available in Ollama. Instead, it adds a new `OLLAMA_INFERENCE_MODEL` environment variable where users can provide an optional model to register with the Ollama provider on startup. Additional models can also be registered via the typical `models.register(...)` at runtime. * The vLLM template was adjusted to also allow an optional `VLLM_INFERENCE_MODEL` specified on startup, so that the behavior between vLLM and Ollama was consistent here to make it easy to get up and running quickly. * The default vector store was changed from sqlite-vec to faiss. sqlite-vec can enabled via setting the `ENABLE_SQLITE_VEC` environment variable, like we do for chromadb and pgvector. This is due to sqlite-vec not shipping proper arm64 binaries, like we previously fixed in #1530 for the ollama distribution. ## Test Plan With this change, the following scenarios now work with the starter template that did not before: * no Ollama running * Ollama running but not all of the Llama models pulled locally * Ollama running with a custom model registered on startup * vLLM running with a custom model registered on startup * running the starter template on linux/arm64, like when running containers on Mac without rosetta emulation --------- Signed-off-by: Ben Browning --- docs/source/distributions/configuration.md | 12 ++ llama_stack/distribution/stack.py | 9 + .../remote/inference/ollama/__init__.py | 2 +- .../remote/inference/ollama/config.py | 10 +- .../remote/inference/ollama/ollama.py | 22 ++- .../providers/remote/inference/vllm/vllm.py | 9 +- .../templates/ollama/run-with-safety.yaml | 1 + llama_stack/templates/ollama/run.yaml | 1 + llama_stack/templates/starter/run.yaml | 170 ++---------------- llama_stack/templates/starter/starter.py | 58 +++++- 10 files changed, 121 insertions(+), 173 deletions(-) diff --git a/docs/source/distributions/configuration.md b/docs/source/distributions/configuration.md index dd73d93ea..4bc9b37e4 100644 --- a/docs/source/distributions/configuration.md +++ b/docs/source/distributions/configuration.md @@ -109,6 +109,18 @@ A Model is an instance of a "Resource" (see [Concepts](../concepts/index)) and i What's with the `provider_model_id` field? This is an identifier for the model inside the provider's model catalog. Contrast it with `model_id` which is the identifier for the same model for Llama Stack's purposes. For example, you may want to name "llama3.2:vision-11b" as "image_captioning_model" when you use it in your Stack interactions. When omitted, the server will set `provider_model_id` to be the same as `model_id`. +If you need to conditionally register a model in the configuration, such as only when specific environment variable(s) are set, this can be accomplished by utilizing a special `__disabled__` string as the default value of an environment variable substitution, as shown below: + +```yaml +models: +- metadata: {} + model_id: ${env.INFERENCE_MODEL:__disabled__} + provider_id: ollama + provider_model_id: ${env.INFERENCE_MODEL:__disabled__} +``` + +The snippet above will only register this model if the environment variable `INFERENCE_MODEL` is set and non-empty. If the environment variable is not set, the model will not get registered at all. + ## Server Configuration The `server` section configures the HTTP server that serves the Llama Stack APIs: diff --git a/llama_stack/distribution/stack.py b/llama_stack/distribution/stack.py index 5a9708497..b33b0d3f7 100644 --- a/llama_stack/distribution/stack.py +++ b/llama_stack/distribution/stack.py @@ -98,6 +98,15 @@ async def register_resources(run_config: StackRunConfig, impls: dict[Api, Any]): method = getattr(impls[api], register_method) for obj in objects: + # In complex templates, like our starter template, we may have dynamic model ids + # given by environment variables. This allows those environment variables to have + # a default value of __disabled__ to skip registration of the model if not set. + if ( + hasattr(obj, "provider_model_id") + and obj.provider_model_id is not None + and "__disabled__" in obj.provider_model_id + ): + continue # we want to maintain the type information in arguments to method. # instead of method(**obj.model_dump()), which may convert a typed attr to a dict, # we use model_dump() to find all the attrs and then getattr to get the still typed value. diff --git a/llama_stack/providers/remote/inference/ollama/__init__.py b/llama_stack/providers/remote/inference/ollama/__init__.py index 073c31cde..491339451 100644 --- a/llama_stack/providers/remote/inference/ollama/__init__.py +++ b/llama_stack/providers/remote/inference/ollama/__init__.py @@ -10,6 +10,6 @@ from .config import OllamaImplConfig async def get_adapter_impl(config: OllamaImplConfig, _deps): from .ollama import OllamaInferenceAdapter - impl = OllamaInferenceAdapter(config.url) + impl = OllamaInferenceAdapter(config) await impl.initialize() return impl diff --git a/llama_stack/providers/remote/inference/ollama/config.py b/llama_stack/providers/remote/inference/ollama/config.py index 0e4aef0e1..37b827f4f 100644 --- a/llama_stack/providers/remote/inference/ollama/config.py +++ b/llama_stack/providers/remote/inference/ollama/config.py @@ -13,7 +13,13 @@ DEFAULT_OLLAMA_URL = "http://localhost:11434" class OllamaImplConfig(BaseModel): url: str = DEFAULT_OLLAMA_URL + raise_on_connect_error: bool = True @classmethod - def sample_run_config(cls, url: str = "${env.OLLAMA_URL:http://localhost:11434}", **kwargs) -> dict[str, Any]: - return {"url": url} + def sample_run_config( + cls, url: str = "${env.OLLAMA_URL:http://localhost:11434}", raise_on_connect_error: bool = True, **kwargs + ) -> dict[str, Any]: + return { + "url": url, + "raise_on_connect_error": raise_on_connect_error, + } diff --git a/llama_stack/providers/remote/inference/ollama/ollama.py b/llama_stack/providers/remote/inference/ollama/ollama.py index d51072fbf..2f51920b5 100644 --- a/llama_stack/providers/remote/inference/ollama/ollama.py +++ b/llama_stack/providers/remote/inference/ollama/ollama.py @@ -9,7 +9,6 @@ import uuid from collections.abc import AsyncGenerator, AsyncIterator from typing import Any -import httpx from ollama import AsyncClient # type: ignore[attr-defined] from openai import AsyncOpenAI @@ -57,6 +56,7 @@ from llama_stack.providers.datatypes import ( HealthStatus, ModelsProtocolPrivate, ) +from llama_stack.providers.remote.inference.ollama.config import OllamaImplConfig from llama_stack.providers.utils.inference.model_registry import ( ModelRegistryHelper, ) @@ -90,9 +90,10 @@ class OllamaInferenceAdapter( InferenceProvider, ModelsProtocolPrivate, ): - def __init__(self, url: str) -> None: + def __init__(self, config: OllamaImplConfig) -> None: self.register_helper = ModelRegistryHelper(MODEL_ENTRIES) - self.url = url + self.url = config.url + self.raise_on_connect_error = config.raise_on_connect_error @property def client(self) -> AsyncClient: @@ -103,8 +104,13 @@ class OllamaInferenceAdapter( return AsyncOpenAI(base_url=f"{self.url}/v1", api_key="ollama") async def initialize(self) -> None: - logger.info(f"checking connectivity to Ollama at `{self.url}`...") - await self.health() + logger.debug(f"checking connectivity to Ollama at `{self.url}`...") + health_response = await self.health() + if health_response["status"] == HealthStatus.ERROR: + if self.raise_on_connect_error: + raise RuntimeError("Ollama Server is not running, start it using `ollama serve` in a separate terminal") + else: + logger.warning("Ollama Server is not running, start it using `ollama serve` in a separate terminal") async def health(self) -> HealthResponse: """ @@ -117,10 +123,8 @@ class OllamaInferenceAdapter( try: await self.client.ps() return HealthResponse(status=HealthStatus.OK) - except httpx.ConnectError as e: - raise RuntimeError( - "Ollama Server is not running, start it using `ollama serve` in a separate terminal" - ) from e + except Exception as e: + return HealthResponse(status=HealthStatus.ERROR, message=f"Health check failed: {str(e)}") async def shutdown(self) -> None: pass diff --git a/llama_stack/providers/remote/inference/vllm/vllm.py b/llama_stack/providers/remote/inference/vllm/vllm.py index 3424be6b4..ae04f206a 100644 --- a/llama_stack/providers/remote/inference/vllm/vllm.py +++ b/llama_stack/providers/remote/inference/vllm/vllm.py @@ -9,7 +9,7 @@ from collections.abc import AsyncGenerator, AsyncIterator from typing import Any import httpx -from openai import AsyncOpenAI +from openai import APIConnectionError, AsyncOpenAI from openai.types.chat.chat_completion_chunk import ( ChatCompletionChunk as OpenAIChatCompletionChunk, ) @@ -461,7 +461,12 @@ class VLLMInferenceAdapter(Inference, ModelsProtocolPrivate): model = await self.register_helper.register_model(model) except ValueError: pass # Ignore statically unknown model, will check live listing - res = await client.models.list() + try: + res = await client.models.list() + except APIConnectionError as e: + raise ValueError( + f"Failed to connect to vLLM at {self.config.url}. Please check if vLLM is running and accessible at that URL." + ) from e available_models = [m.id async for m in res] if model.provider_resource_id not in available_models: raise ValueError( diff --git a/llama_stack/templates/ollama/run-with-safety.yaml b/llama_stack/templates/ollama/run-with-safety.yaml index 85d5c813b..2e1b7fdcc 100644 --- a/llama_stack/templates/ollama/run-with-safety.yaml +++ b/llama_stack/templates/ollama/run-with-safety.yaml @@ -18,6 +18,7 @@ providers: provider_type: remote::ollama config: url: ${env.OLLAMA_URL:http://localhost:11434} + raise_on_connect_error: true vector_io: - provider_id: faiss provider_type: inline::faiss diff --git a/llama_stack/templates/ollama/run.yaml b/llama_stack/templates/ollama/run.yaml index 2d10a99a4..8c2b17ef1 100644 --- a/llama_stack/templates/ollama/run.yaml +++ b/llama_stack/templates/ollama/run.yaml @@ -18,6 +18,7 @@ providers: provider_type: remote::ollama config: url: ${env.OLLAMA_URL:http://localhost:11434} + raise_on_connect_error: true vector_io: - provider_id: faiss provider_type: inline::faiss diff --git a/llama_stack/templates/starter/run.yaml b/llama_stack/templates/starter/run.yaml index 960e96d01..30df39e5d 100644 --- a/llama_stack/templates/starter/run.yaml +++ b/llama_stack/templates/starter/run.yaml @@ -31,6 +31,7 @@ providers: provider_type: remote::ollama config: url: ${env.OLLAMA_URL:http://localhost:11434} + raise_on_connect_error: false - provider_id: anthropic provider_type: remote::anthropic config: @@ -60,7 +61,14 @@ providers: provider_type: inline::sentence-transformers config: {} vector_io: - - provider_id: sqlite-vec + - provider_id: faiss + provider_type: inline::faiss + config: + kvstore: + type: sqlite + namespace: null + db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/starter}/faiss_store.db + - provider_id: ${env.ENABLE_SQLITE_VEC+sqlite-vec} provider_type: inline::sqlite-vec config: db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/starter}/sqlite_vec.db @@ -530,160 +538,15 @@ models: provider_model_id: meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8 model_type: llm - metadata: {} - model_id: ollama/llama3.1:8b-instruct-fp16 + model_id: ollama/${env.OLLAMA_INFERENCE_MODEL:__disabled__} provider_id: ollama - provider_model_id: llama3.1:8b-instruct-fp16 - model_type: llm -- metadata: {} - model_id: ollama/meta-llama/Llama-3.1-8B-Instruct - provider_id: ollama - provider_model_id: llama3.1:8b-instruct-fp16 - model_type: llm -- metadata: {} - model_id: ollama/llama3.1:8b - provider_id: ollama - provider_model_id: llama3.1:8b - model_type: llm -- metadata: {} - model_id: ollama/llama3.1:70b-instruct-fp16 - provider_id: ollama - provider_model_id: llama3.1:70b-instruct-fp16 - model_type: llm -- metadata: {} - model_id: ollama/meta-llama/Llama-3.1-70B-Instruct - provider_id: ollama - provider_model_id: llama3.1:70b-instruct-fp16 - model_type: llm -- metadata: {} - model_id: ollama/llama3.1:70b - provider_id: ollama - provider_model_id: llama3.1:70b - model_type: llm -- metadata: {} - model_id: ollama/llama3.1:405b-instruct-fp16 - provider_id: ollama - provider_model_id: llama3.1:405b-instruct-fp16 - model_type: llm -- metadata: {} - model_id: ollama/meta-llama/Llama-3.1-405B-Instruct-FP8 - provider_id: ollama - provider_model_id: llama3.1:405b-instruct-fp16 - model_type: llm -- metadata: {} - model_id: ollama/llama3.1:405b - provider_id: ollama - provider_model_id: llama3.1:405b - model_type: llm -- metadata: {} - model_id: ollama/llama3.2:1b-instruct-fp16 - provider_id: ollama - provider_model_id: llama3.2:1b-instruct-fp16 - model_type: llm -- metadata: {} - model_id: ollama/meta-llama/Llama-3.2-1B-Instruct - provider_id: ollama - provider_model_id: llama3.2:1b-instruct-fp16 - model_type: llm -- metadata: {} - model_id: ollama/llama3.2:1b - provider_id: ollama - provider_model_id: llama3.2:1b - model_type: llm -- metadata: {} - model_id: ollama/llama3.2:3b-instruct-fp16 - provider_id: ollama - provider_model_id: llama3.2:3b-instruct-fp16 - model_type: llm -- metadata: {} - model_id: ollama/meta-llama/Llama-3.2-3B-Instruct - provider_id: ollama - provider_model_id: llama3.2:3b-instruct-fp16 - model_type: llm -- metadata: {} - model_id: ollama/llama3.2:3b - provider_id: ollama - provider_model_id: llama3.2:3b - model_type: llm -- metadata: {} - model_id: ollama/llama3.2-vision:11b-instruct-fp16 - provider_id: ollama - provider_model_id: llama3.2-vision:11b-instruct-fp16 - model_type: llm -- metadata: {} - model_id: ollama/meta-llama/Llama-3.2-11B-Vision-Instruct - provider_id: ollama - provider_model_id: llama3.2-vision:11b-instruct-fp16 - model_type: llm -- metadata: {} - model_id: ollama/llama3.2-vision:latest - provider_id: ollama - provider_model_id: llama3.2-vision:latest - model_type: llm -- metadata: {} - model_id: ollama/llama3.2-vision:90b-instruct-fp16 - provider_id: ollama - provider_model_id: llama3.2-vision:90b-instruct-fp16 - model_type: llm -- metadata: {} - model_id: ollama/meta-llama/Llama-3.2-90B-Vision-Instruct - provider_id: ollama - provider_model_id: llama3.2-vision:90b-instruct-fp16 - model_type: llm -- metadata: {} - model_id: ollama/llama3.2-vision:90b - provider_id: ollama - provider_model_id: llama3.2-vision:90b - model_type: llm -- metadata: {} - model_id: ollama/llama3.3:70b - provider_id: ollama - provider_model_id: llama3.3:70b - model_type: llm -- metadata: {} - model_id: ollama/meta-llama/Llama-3.3-70B-Instruct - provider_id: ollama - provider_model_id: llama3.3:70b - model_type: llm -- metadata: {} - model_id: ollama/llama-guard3:8b - provider_id: ollama - provider_model_id: llama-guard3:8b - model_type: llm -- metadata: {} - model_id: ollama/meta-llama/Llama-Guard-3-8B - provider_id: ollama - provider_model_id: llama-guard3:8b - model_type: llm -- metadata: {} - model_id: ollama/llama-guard3:1b - provider_id: ollama - provider_model_id: llama-guard3:1b - model_type: llm -- metadata: {} - model_id: ollama/meta-llama/Llama-Guard-3-1B - provider_id: ollama - provider_model_id: llama-guard3:1b + provider_model_id: ${env.OLLAMA_INFERENCE_MODEL:__disabled__} model_type: llm - metadata: - embedding_dimension: 384 - context_length: 512 - model_id: ollama/all-minilm:latest + embedding_dimension: ${env.OLLAMA_EMBEDDING_DIMENSION:384} + model_id: ollama/${env.OLLAMA_EMBEDDING_MODEL:__disabled__} provider_id: ollama - provider_model_id: all-minilm:latest - model_type: embedding -- metadata: - embedding_dimension: 384 - context_length: 512 - model_id: ollama/all-minilm - provider_id: ollama - provider_model_id: all-minilm:latest - model_type: embedding -- metadata: - embedding_dimension: 768 - context_length: 8192 - model_id: ollama/nomic-embed-text - provider_id: ollama - provider_model_id: nomic-embed-text + provider_model_id: ${env.OLLAMA_EMBEDDING_MODEL:__disabled__} model_type: embedding - metadata: {} model_id: anthropic/claude-3-5-sonnet-latest @@ -938,6 +801,11 @@ models: provider_id: sambanova provider_model_id: sambanova/Meta-Llama-Guard-3-8B model_type: llm +- metadata: {} + model_id: vllm/${env.VLLM_INFERENCE_MODEL:__disabled__} + provider_id: vllm + provider_model_id: ${env.VLLM_INFERENCE_MODEL:__disabled__} + model_type: llm - metadata: embedding_dimension: 384 model_id: all-MiniLM-L6-v2 diff --git a/llama_stack/templates/starter/starter.py b/llama_stack/templates/starter/starter.py index 2a44a0a37..ec01d08e9 100644 --- a/llama_stack/templates/starter/starter.py +++ b/llama_stack/templates/starter/starter.py @@ -16,6 +16,7 @@ from llama_stack.providers.inline.files.localfs.config import LocalfsFilesImplCo from llama_stack.providers.inline.inference.sentence_transformers import ( SentenceTransformersInferenceConfig, ) +from llama_stack.providers.inline.vector_io.faiss.config import FaissVectorIOConfig from llama_stack.providers.inline.vector_io.sqlite_vec.config import ( SQLiteVectorIOConfig, ) @@ -36,9 +37,6 @@ from llama_stack.providers.remote.inference.groq.models import ( MODEL_ENTRIES as GROQ_MODEL_ENTRIES, ) from llama_stack.providers.remote.inference.ollama.config import OllamaImplConfig -from llama_stack.providers.remote.inference.ollama.models import ( - MODEL_ENTRIES as OLLAMA_MODEL_ENTRIES, -) from llama_stack.providers.remote.inference.openai.config import OpenAIConfig from llama_stack.providers.remote.inference.openai.models import ( MODEL_ENTRIES as OPENAI_MODEL_ENTRIES, @@ -85,8 +83,22 @@ def get_inference_providers() -> tuple[list[Provider], dict[str, list[ProviderMo ), ( "ollama", - OLLAMA_MODEL_ENTRIES, - OllamaImplConfig.sample_run_config(), + [ + ProviderModelEntry( + provider_model_id="${env.OLLAMA_INFERENCE_MODEL:__disabled__}", + model_type=ModelType.llm, + ), + ProviderModelEntry( + provider_model_id="${env.OLLAMA_EMBEDDING_MODEL:__disabled__}", + model_type=ModelType.embedding, + metadata={ + "embedding_dimension": "${env.OLLAMA_EMBEDDING_DIMENSION:384}", + }, + ), + ], + OllamaImplConfig.sample_run_config( + url="${env.OLLAMA_URL:http://localhost:11434}", raise_on_connect_error=False + ), ), ( "anthropic", @@ -110,7 +122,12 @@ def get_inference_providers() -> tuple[list[Provider], dict[str, list[ProviderMo ), ( "vllm", - [], + [ + ProviderModelEntry( + provider_model_id="${env.VLLM_INFERENCE_MODEL:__disabled__}", + model_type=ModelType.llm, + ), + ], VLLMInferenceAdapterConfig.sample_run_config( url="${env.VLLM_URL:http://localhost:8000/v1}", ), @@ -153,7 +170,12 @@ def get_distribution_template() -> DistributionTemplate: vector_io_providers = [ Provider( - provider_id="sqlite-vec", + provider_id="faiss", + provider_type="inline::faiss", + config=FaissVectorIOConfig.sample_run_config(f"~/.llama/distributions/{name}"), + ), + Provider( + provider_id="${env.ENABLE_SQLITE_VEC+sqlite-vec}", provider_type="inline::sqlite-vec", config=SQLiteVectorIOConfig.sample_run_config(f"~/.llama/distributions/{name}"), ), @@ -257,7 +279,27 @@ def get_distribution_template() -> DistributionTemplate: ), "VLLM_URL": ( "http://localhost:8000/v1", - "VLLM URL", + "vLLM URL", + ), + "VLLM_INFERENCE_MODEL": ( + "", + "Optional vLLM Inference Model to register on startup", + ), + "OLLAMA_URL": ( + "http://localhost:11434", + "Ollama URL", + ), + "OLLAMA_INFERENCE_MODEL": ( + "", + "Optional Ollama Inference Model to register on startup", + ), + "OLLAMA_EMBEDDING_MODEL": ( + "", + "Optional Ollama Embedding Model to register on startup", + ), + "OLLAMA_EMBEDDING_DIMENSION": ( + "384", + "Ollama Embedding Dimension", ), }, ) From 82f13fe83e5c9c802595b05fb2bc2ef4ddedbe3c Mon Sep 17 00:00:00 2001 From: Francisco Arceo Date: Wed, 25 Jun 2025 13:55:23 -0600 Subject: [PATCH 2/8] feat: Add ChunkMetadata to Chunk (#2497) # What does this PR do? Adding `ChunkMetadata` so we can properly delete embeddings later. More specifically, this PR refactors and extends the chunk metadata handling in the vector database and introduces a distinction between metadata used for model context and backend-only metadata required for chunk management, storage, and retrieval. It also improves chunk ID generation and propagation throughout the stack, enhances test coverage, and adds new utility modules. ```python class ChunkMetadata(BaseModel): """ `ChunkMetadata` is backend metadata for a `Chunk` that is used to store additional information about the chunk that will NOT be inserted into the context during inference, but is required for backend functionality. Use `metadata` in `Chunk` for metadata that will be used during inference. """ document_id: str | None = None chunk_id: str | None = None source: str | None = None created_timestamp: int | None = None updated_timestamp: int | None = None chunk_window: str | None = None chunk_tokenizer: str | None = None chunk_embedding_model: str | None = None chunk_embedding_dimension: int | None = None content_token_count: int | None = None metadata_token_count: int | None = None ``` Eventually we can migrate the document_id out of the `metadata` field. I've introduced the changes so that `ChunkMetadata` is backwards compatible with `metadata`. Closes https://github.com/meta-llama/llama-stack/issues/2501 ## Test Plan Added unit tests --------- Signed-off-by: Francisco Javier Arceo --- docs/_static/llama-stack-spec.html | 205 ++++++++++-------- docs/_static/llama-stack-spec.yaml | 171 +++++++++------ llama_stack/apis/vector_io/vector_io.py | 62 +++++- .../inline/tool_runtime/rag/memory.py | 21 +- .../inline/vector_io/sqlite_vec/sqlite_vec.py | 35 +-- .../remote/vector_io/qdrant/qdrant.py | 4 +- .../providers/utils/memory/vector_store.py | 24 +- .../providers/utils/vector_io/__init__.py | 5 + .../providers/utils/vector_io/chunk_utils.py | 14 ++ tests/unit/providers/vector_io/conftest.py | 16 +- .../providers/vector_io/test_chunk_utils.py | 66 ++++++ tests/unit/providers/vector_io/test_qdrant.py | 2 +- .../providers/vector_io/test_sqlite_vec.py | 38 ++-- tests/unit/rag/test_rag_query.py | 45 +++- 14 files changed, 490 insertions(+), 218 deletions(-) create mode 100644 llama_stack/providers/utils/vector_io/__init__.py create mode 100644 llama_stack/providers/utils/vector_io/chunk_utils.py create mode 100644 tests/unit/providers/vector_io/test_chunk_utils.py diff --git a/docs/_static/llama-stack-spec.html b/docs/_static/llama-stack-spec.html index affc426d6..801e8dc33 100644 --- a/docs/_static/llama-stack-spec.html +++ b/docs/_static/llama-stack-spec.html @@ -11190,6 +11190,115 @@ ], "title": "InsertRequest" }, + "Chunk": { + "type": "object", + "properties": { + "content": { + "$ref": "#/components/schemas/InterleavedContent", + "description": "The content of the chunk, which can be interleaved text, images, or other types." + }, + "metadata": { + "type": "object", + "additionalProperties": { + "oneOf": [ + { + "type": "null" + }, + { + "type": "boolean" + }, + { + "type": "number" + }, + { + "type": "string" + }, + { + "type": "array" + }, + { + "type": "object" + } + ] + }, + "description": "Metadata associated with the chunk that will be used in the model context during inference." + }, + "embedding": { + "type": "array", + "items": { + "type": "number" + }, + "description": "Optional embedding for the chunk. If not provided, it will be computed later." + }, + "stored_chunk_id": { + "type": "string", + "description": "The chunk ID that is stored in the vector database. Used for backend functionality." + }, + "chunk_metadata": { + "$ref": "#/components/schemas/ChunkMetadata", + "description": "Metadata for the chunk that will NOT be used in the context during inference. The `chunk_metadata` is required backend functionality." + } + }, + "additionalProperties": false, + "required": [ + "content", + "metadata" + ], + "title": "Chunk", + "description": "A chunk of content that can be inserted into a vector database." + }, + "ChunkMetadata": { + "type": "object", + "properties": { + "chunk_id": { + "type": "string", + "description": "The ID of the chunk. If not set, it will be generated based on the document ID and content." + }, + "document_id": { + "type": "string", + "description": "The ID of the document this chunk belongs to." + }, + "source": { + "type": "string", + "description": "The source of the content, such as a URL, file path, or other identifier." + }, + "created_timestamp": { + "type": "integer", + "description": "An optional timestamp indicating when the chunk was created." + }, + "updated_timestamp": { + "type": "integer", + "description": "An optional timestamp indicating when the chunk was last updated." + }, + "chunk_window": { + "type": "string", + "description": "The window of the chunk, which can be used to group related chunks together." + }, + "chunk_tokenizer": { + "type": "string", + "description": "The tokenizer used to create the chunk. Default is Tiktoken." + }, + "chunk_embedding_model": { + "type": "string", + "description": "The embedding model used to create the chunk's embedding." + }, + "chunk_embedding_dimension": { + "type": "integer", + "description": "The dimension of the embedding vector for the chunk." + }, + "content_token_count": { + "type": "integer", + "description": "The number of tokens in the content of the chunk." + }, + "metadata_token_count": { + "type": "integer", + "description": "The number of tokens in the metadata of the chunk." + } + }, + "additionalProperties": false, + "title": "ChunkMetadata", + "description": "`ChunkMetadata` is backend metadata for a `Chunk` that is used to store additional information about the chunk that will not be used in the context during inference, but is required for backend functionality. The `ChunkMetadata` is set during chunk creation in `MemoryToolRuntimeImpl().insert()`and is not expected to change after. Use `Chunk.metadata` for metadata that will be used in the context during inference." + }, "InsertChunksRequest": { "type": "object", "properties": { @@ -11200,53 +11309,7 @@ "chunks": { "type": "array", "items": { - "type": "object", - "properties": { - "content": { - "$ref": "#/components/schemas/InterleavedContent", - "description": "The content of the chunk, which can be interleaved text, images, or other types." - }, - "metadata": { - "type": "object", - "additionalProperties": { - "oneOf": [ - { - "type": "null" - }, - { - "type": "boolean" - }, - { - "type": "number" - }, - { - "type": "string" - }, - { - "type": "array" - }, - { - "type": "object" - } - ] - }, - "description": "Metadata associated with the chunk, such as document ID, source, or other relevant information." - }, - "embedding": { - "type": "array", - "items": { - "type": "number" - }, - "description": "Optional embedding for the chunk. If not provided, it will be computed later." - } - }, - "additionalProperties": false, - "required": [ - "content", - "metadata" - ], - "title": "Chunk", - "description": "A chunk of content that can be inserted into a vector database." + "$ref": "#/components/schemas/Chunk" }, "description": "The chunks to insert. Each `Chunk` should contain content which can be interleaved text, images, or other types. `metadata`: `dict[str, Any]` and `embedding`: `List[float]` are optional. If `metadata` is provided, you configure how Llama Stack formats the chunk during generation. If `embedding` is not provided, it will be computed later." }, @@ -14671,53 +14734,7 @@ "chunks": { "type": "array", "items": { - "type": "object", - "properties": { - "content": { - "$ref": "#/components/schemas/InterleavedContent", - "description": "The content of the chunk, which can be interleaved text, images, or other types." - }, - "metadata": { - "type": "object", - "additionalProperties": { - "oneOf": [ - { - "type": "null" - }, - { - "type": "boolean" - }, - { - "type": "number" - }, - { - "type": "string" - }, - { - "type": "array" - }, - { - "type": "object" - } - ] - }, - "description": "Metadata associated with the chunk, such as document ID, source, or other relevant information." - }, - "embedding": { - "type": "array", - "items": { - "type": "number" - }, - "description": "Optional embedding for the chunk. If not provided, it will be computed later." - } - }, - "additionalProperties": false, - "required": [ - "content", - "metadata" - ], - "title": "Chunk", - "description": "A chunk of content that can be inserted into a vector database." + "$ref": "#/components/schemas/Chunk" } }, "scores": { diff --git a/docs/_static/llama-stack-spec.yaml b/docs/_static/llama-stack-spec.yaml index 1e1293dc2..b736cd904 100644 --- a/docs/_static/llama-stack-spec.yaml +++ b/docs/_static/llama-stack-spec.yaml @@ -7867,6 +7867,107 @@ components: - vector_db_id - chunk_size_in_tokens title: InsertRequest + Chunk: + type: object + properties: + content: + $ref: '#/components/schemas/InterleavedContent' + description: >- + The content of the chunk, which can be interleaved text, images, or other + types. + metadata: + type: object + additionalProperties: + oneOf: + - type: 'null' + - type: boolean + - type: number + - type: string + - type: array + - type: object + description: >- + Metadata associated with the chunk that will be used in the model context + during inference. + embedding: + type: array + items: + type: number + description: >- + Optional embedding for the chunk. If not provided, it will be computed + later. + stored_chunk_id: + type: string + description: >- + The chunk ID that is stored in the vector database. Used for backend functionality. + chunk_metadata: + $ref: '#/components/schemas/ChunkMetadata' + description: >- + Metadata for the chunk that will NOT be used in the context during inference. + The `chunk_metadata` is required backend functionality. + additionalProperties: false + required: + - content + - metadata + title: Chunk + description: >- + A chunk of content that can be inserted into a vector database. + ChunkMetadata: + type: object + properties: + chunk_id: + type: string + description: >- + The ID of the chunk. If not set, it will be generated based on the document + ID and content. + document_id: + type: string + description: >- + The ID of the document this chunk belongs to. + source: + type: string + description: >- + The source of the content, such as a URL, file path, or other identifier. + created_timestamp: + type: integer + description: >- + An optional timestamp indicating when the chunk was created. + updated_timestamp: + type: integer + description: >- + An optional timestamp indicating when the chunk was last updated. + chunk_window: + type: string + description: >- + The window of the chunk, which can be used to group related chunks together. + chunk_tokenizer: + type: string + description: >- + The tokenizer used to create the chunk. Default is Tiktoken. + chunk_embedding_model: + type: string + description: >- + The embedding model used to create the chunk's embedding. + chunk_embedding_dimension: + type: integer + description: >- + The dimension of the embedding vector for the chunk. + content_token_count: + type: integer + description: >- + The number of tokens in the content of the chunk. + metadata_token_count: + type: integer + description: >- + The number of tokens in the metadata of the chunk. + additionalProperties: false + title: ChunkMetadata + description: >- + `ChunkMetadata` is backend metadata for a `Chunk` that is used to store additional + information about the chunk that will not be used in the context during + inference, but is required for backend functionality. The `ChunkMetadata` is + set during chunk creation in `MemoryToolRuntimeImpl().insert()`and is not + expected to change after. Use `Chunk.metadata` for metadata that will + be used in the context during inference. InsertChunksRequest: type: object properties: @@ -7877,40 +7978,7 @@ components: chunks: type: array items: - type: object - properties: - content: - $ref: '#/components/schemas/InterleavedContent' - description: >- - The content of the chunk, which can be interleaved text, images, - or other types. - metadata: - type: object - additionalProperties: - oneOf: - - type: 'null' - - type: boolean - - type: number - - type: string - - type: array - - type: object - description: >- - Metadata associated with the chunk, such as document ID, source, - or other relevant information. - embedding: - type: array - items: - type: number - description: >- - Optional embedding for the chunk. If not provided, it will be computed - later. - additionalProperties: false - required: - - content - - metadata - title: Chunk - description: >- - A chunk of content that can be inserted into a vector database. + $ref: '#/components/schemas/Chunk' description: >- The chunks to insert. Each `Chunk` should contain content which can be interleaved text, images, or other types. `metadata`: `dict[str, Any]` @@ -10231,40 +10299,7 @@ components: chunks: type: array items: - type: object - properties: - content: - $ref: '#/components/schemas/InterleavedContent' - description: >- - The content of the chunk, which can be interleaved text, images, - or other types. - metadata: - type: object - additionalProperties: - oneOf: - - type: 'null' - - type: boolean - - type: number - - type: string - - type: array - - type: object - description: >- - Metadata associated with the chunk, such as document ID, source, - or other relevant information. - embedding: - type: array - items: - type: number - description: >- - Optional embedding for the chunk. If not provided, it will be computed - later. - additionalProperties: false - required: - - content - - metadata - title: Chunk - description: >- - A chunk of content that can be inserted into a vector database. + $ref: '#/components/schemas/Chunk' scores: type: array items: diff --git a/llama_stack/apis/vector_io/vector_io.py b/llama_stack/apis/vector_io/vector_io.py index d6de0108c..2d4131315 100644 --- a/llama_stack/apis/vector_io/vector_io.py +++ b/llama_stack/apis/vector_io/vector_io.py @@ -8,6 +8,7 @@ # # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. +import uuid from typing import Annotated, Any, Literal, Protocol, runtime_checkable from pydantic import BaseModel, Field @@ -15,21 +16,80 @@ from pydantic import BaseModel, Field from llama_stack.apis.inference import InterleavedContent from llama_stack.apis.vector_dbs import VectorDB from llama_stack.providers.utils.telemetry.trace_protocol import trace_protocol +from llama_stack.providers.utils.vector_io.chunk_utils import generate_chunk_id from llama_stack.schema_utils import json_schema_type, webmethod from llama_stack.strong_typing.schema import register_schema +@json_schema_type +class ChunkMetadata(BaseModel): + """ + `ChunkMetadata` is backend metadata for a `Chunk` that is used to store additional information about the chunk that + will not be used in the context during inference, but is required for backend functionality. The `ChunkMetadata` + is set during chunk creation in `MemoryToolRuntimeImpl().insert()`and is not expected to change after. + Use `Chunk.metadata` for metadata that will be used in the context during inference. + :param chunk_id: The ID of the chunk. If not set, it will be generated based on the document ID and content. + :param document_id: The ID of the document this chunk belongs to. + :param source: The source of the content, such as a URL, file path, or other identifier. + :param created_timestamp: An optional timestamp indicating when the chunk was created. + :param updated_timestamp: An optional timestamp indicating when the chunk was last updated. + :param chunk_window: The window of the chunk, which can be used to group related chunks together. + :param chunk_tokenizer: The tokenizer used to create the chunk. Default is Tiktoken. + :param chunk_embedding_model: The embedding model used to create the chunk's embedding. + :param chunk_embedding_dimension: The dimension of the embedding vector for the chunk. + :param content_token_count: The number of tokens in the content of the chunk. + :param metadata_token_count: The number of tokens in the metadata of the chunk. + """ + + chunk_id: str | None = None + document_id: str | None = None + source: str | None = None + created_timestamp: int | None = None + updated_timestamp: int | None = None + chunk_window: str | None = None + chunk_tokenizer: str | None = None + chunk_embedding_model: str | None = None + chunk_embedding_dimension: int | None = None + content_token_count: int | None = None + metadata_token_count: int | None = None + + +@json_schema_type class Chunk(BaseModel): """ A chunk of content that can be inserted into a vector database. :param content: The content of the chunk, which can be interleaved text, images, or other types. :param embedding: Optional embedding for the chunk. If not provided, it will be computed later. - :param metadata: Metadata associated with the chunk, such as document ID, source, or other relevant information. + :param metadata: Metadata associated with the chunk that will be used in the model context during inference. + :param stored_chunk_id: The chunk ID that is stored in the vector database. Used for backend functionality. + :param chunk_metadata: Metadata for the chunk that will NOT be used in the context during inference. + The `chunk_metadata` is required backend functionality. """ content: InterleavedContent metadata: dict[str, Any] = Field(default_factory=dict) embedding: list[float] | None = None + # The alias parameter serializes the field as "chunk_id" in JSON but keeps the internal name as "stored_chunk_id" + stored_chunk_id: str | None = Field(default=None, alias="chunk_id") + chunk_metadata: ChunkMetadata | None = None + + model_config = {"populate_by_name": True} + + def model_post_init(self, __context): + # Extract chunk_id from metadata if present + if self.metadata and "chunk_id" in self.metadata: + self.stored_chunk_id = self.metadata.pop("chunk_id") + + @property + def chunk_id(self) -> str: + """Returns the chunk ID, which is either an input `chunk_id` or a generated one if not set.""" + if self.stored_chunk_id: + return self.stored_chunk_id + + if "document_id" in self.metadata: + return generate_chunk_id(self.metadata["document_id"], str(self.content)) + + return generate_chunk_id(str(uuid.uuid4()), str(self.content)) @json_schema_type diff --git a/llama_stack/providers/inline/tool_runtime/rag/memory.py b/llama_stack/providers/inline/tool_runtime/rag/memory.py index 7f4fe5dbd..6a7c7885c 100644 --- a/llama_stack/providers/inline/tool_runtime/rag/memory.py +++ b/llama_stack/providers/inline/tool_runtime/rag/memory.py @@ -81,6 +81,7 @@ class MemoryToolRuntimeImpl(ToolGroupsProtocolPrivate, ToolRuntime, RAGToolRunti chunks = [] for doc in documents: content = await content_from_doc(doc) + # TODO: we should add enrichment here as URLs won't be added to the metadata by default chunks.extend( make_overlapped_chunks( doc.document_id, @@ -157,8 +158,24 @@ class MemoryToolRuntimeImpl(ToolGroupsProtocolPrivate, ToolRuntime, RAGToolRunti ) break - metadata_subset = {k: v for k, v in metadata.items() if k not in ["token_count", "metadata_token_count"]} - text_content = query_config.chunk_template.format(index=i + 1, chunk=chunk, metadata=metadata_subset) + # Add useful keys from chunk_metadata to metadata and remove some from metadata + chunk_metadata_keys_to_include_from_context = [ + "chunk_id", + "document_id", + "source", + ] + metadata_keys_to_exclude_from_context = [ + "token_count", + "metadata_token_count", + ] + metadata_for_context = {} + for k in chunk_metadata_keys_to_include_from_context: + metadata_for_context[k] = getattr(chunk.chunk_metadata, k) + for k in metadata: + if k not in metadata_keys_to_exclude_from_context: + metadata_for_context[k] = metadata[k] + + text_content = query_config.chunk_template.format(index=i + 1, chunk=chunk, metadata=metadata_for_context) picked.append(TextContentItem(text=text_content)) picked.append(TextContentItem(text="END of knowledge_search tool results.\n")) diff --git a/llama_stack/providers/inline/vector_io/sqlite_vec/sqlite_vec.py b/llama_stack/providers/inline/vector_io/sqlite_vec/sqlite_vec.py index d832e56f5..3b3c5f486 100644 --- a/llama_stack/providers/inline/vector_io/sqlite_vec/sqlite_vec.py +++ b/llama_stack/providers/inline/vector_io/sqlite_vec/sqlite_vec.py @@ -5,12 +5,10 @@ # the root directory of this source tree. import asyncio -import hashlib import json import logging import sqlite3 import struct -import uuid from typing import Any import numpy as np @@ -201,10 +199,7 @@ class SQLiteVecIndex(EmbeddingIndex): batch_embeddings = embeddings[i : i + batch_size] # Insert metadata - metadata_data = [ - (generate_chunk_id(chunk.metadata["document_id"], chunk.content), chunk.model_dump_json()) - for chunk in batch_chunks - ] + metadata_data = [(chunk.chunk_id, chunk.model_dump_json()) for chunk in batch_chunks] cur.executemany( f""" INSERT INTO {self.metadata_table} (id, chunk) @@ -218,7 +213,7 @@ class SQLiteVecIndex(EmbeddingIndex): embedding_data = [ ( ( - generate_chunk_id(chunk.metadata["document_id"], chunk.content), + chunk.chunk_id, serialize_vector(emb.tolist()), ) ) @@ -230,10 +225,7 @@ class SQLiteVecIndex(EmbeddingIndex): ) # Insert FTS content - fts_data = [ - (generate_chunk_id(chunk.metadata["document_id"], chunk.content), chunk.content) - for chunk in batch_chunks - ] + fts_data = [(chunk.chunk_id, chunk.content) for chunk in batch_chunks] # DELETE existing entries with same IDs (FTS5 doesn't support ON CONFLICT) cur.executemany( f"DELETE FROM {self.fts_table} WHERE id = ?;", @@ -381,13 +373,12 @@ class SQLiteVecIndex(EmbeddingIndex): vector_response = await self.query_vector(embedding, k, score_threshold) keyword_response = await self.query_keyword(query_string, k, score_threshold) - # Convert responses to score dictionaries using generate_chunk_id + # Convert responses to score dictionaries using chunk_id vector_scores = { - generate_chunk_id(chunk.metadata["document_id"], str(chunk.content)): score - for chunk, score in zip(vector_response.chunks, vector_response.scores, strict=False) + chunk.chunk_id: score for chunk, score in zip(vector_response.chunks, vector_response.scores, strict=False) } keyword_scores = { - generate_chunk_id(chunk.metadata["document_id"], str(chunk.content)): score + chunk.chunk_id: score for chunk, score in zip(keyword_response.chunks, keyword_response.scores, strict=False) } @@ -408,13 +399,7 @@ class SQLiteVecIndex(EmbeddingIndex): filtered_items = [(doc_id, score) for doc_id, score in top_k_items if score >= score_threshold] # Create a map of chunk_id to chunk for both responses - chunk_map = {} - for c in vector_response.chunks: - chunk_id = generate_chunk_id(c.metadata["document_id"], str(c.content)) - chunk_map[chunk_id] = c - for c in keyword_response.chunks: - chunk_id = generate_chunk_id(c.metadata["document_id"], str(c.content)) - chunk_map[chunk_id] = c + chunk_map = {c.chunk_id: c for c in vector_response.chunks + keyword_response.chunks} # Use the map to look up chunks by their IDs chunks = [] @@ -757,9 +742,3 @@ class SQLiteVecVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorDBsProtoc if vector_db_id not in self.cache: raise ValueError(f"Vector DB {vector_db_id} not found") return await self.cache[vector_db_id].query_chunks(query, params) - - -def generate_chunk_id(document_id: str, chunk_text: str) -> str: - """Generate a unique chunk ID using a hash of document ID and chunk text.""" - hash_input = f"{document_id}:{chunk_text}".encode() - return str(uuid.UUID(hashlib.md5(hash_input).hexdigest())) diff --git a/llama_stack/providers/remote/vector_io/qdrant/qdrant.py b/llama_stack/providers/remote/vector_io/qdrant/qdrant.py index e9d6eec22..09ea08fa0 100644 --- a/llama_stack/providers/remote/vector_io/qdrant/qdrant.py +++ b/llama_stack/providers/remote/vector_io/qdrant/qdrant.py @@ -70,8 +70,8 @@ class QdrantIndex(EmbeddingIndex): ) points = [] - for i, (chunk, embedding) in enumerate(zip(chunks, embeddings, strict=False)): - chunk_id = f"{chunk.metadata['document_id']}:chunk-{i}" + for _i, (chunk, embedding) in enumerate(zip(chunks, embeddings, strict=False)): + chunk_id = chunk.chunk_id points.append( PointStruct( id=convert_id(chunk_id), diff --git a/llama_stack/providers/utils/memory/vector_store.py b/llama_stack/providers/utils/memory/vector_store.py index a6e420feb..ab204a75a 100644 --- a/llama_stack/providers/utils/memory/vector_store.py +++ b/llama_stack/providers/utils/memory/vector_store.py @@ -7,6 +7,7 @@ import base64 import io import logging import re +import time from abc import ABC, abstractmethod from dataclasses import dataclass from typing import Any @@ -23,12 +24,13 @@ from llama_stack.apis.common.content_types import ( ) from llama_stack.apis.tools import RAGDocument from llama_stack.apis.vector_dbs import VectorDB -from llama_stack.apis.vector_io import Chunk, QueryChunksResponse +from llama_stack.apis.vector_io import Chunk, ChunkMetadata, QueryChunksResponse from llama_stack.models.llama.llama3.tokenizer import Tokenizer from llama_stack.providers.datatypes import Api from llama_stack.providers.utils.inference.prompt_adapter import ( interleaved_content_as_str, ) +from llama_stack.providers.utils.vector_io.chunk_utils import generate_chunk_id log = logging.getLogger(__name__) @@ -148,6 +150,7 @@ async def content_from_doc(doc: RAGDocument) -> str: def make_overlapped_chunks( document_id: str, text: str, window_len: int, overlap_len: int, metadata: dict[str, Any] ) -> list[Chunk]: + default_tokenizer = "DEFAULT_TIKTOKEN_TOKENIZER" tokenizer = Tokenizer.get_instance() tokens = tokenizer.encode(text, bos=False, eos=False) try: @@ -161,16 +164,32 @@ def make_overlapped_chunks( for i in range(0, len(tokens), window_len - overlap_len): toks = tokens[i : i + window_len] chunk = tokenizer.decode(toks) + chunk_id = generate_chunk_id(chunk, text) chunk_metadata = metadata.copy() + chunk_metadata["chunk_id"] = chunk_id chunk_metadata["document_id"] = document_id chunk_metadata["token_count"] = len(toks) chunk_metadata["metadata_token_count"] = len(metadata_tokens) + backend_chunk_metadata = ChunkMetadata( + chunk_id=chunk_id, + document_id=document_id, + source=metadata.get("source", None), + created_timestamp=metadata.get("created_timestamp", int(time.time())), + updated_timestamp=int(time.time()), + chunk_window=f"{i}-{i + len(toks)}", + chunk_tokenizer=default_tokenizer, + chunk_embedding_model=None, # This will be set in `VectorDBWithIndex.insert_chunks` + content_token_count=len(toks), + metadata_token_count=len(metadata_tokens), + ) + # chunk is a string chunks.append( Chunk( content=chunk, metadata=chunk_metadata, + chunk_metadata=backend_chunk_metadata, ) ) @@ -237,6 +256,9 @@ class VectorDBWithIndex: for i, c in enumerate(chunks): if c.embedding is None: chunks_to_embed.append(c) + if c.chunk_metadata: + c.chunk_metadata.chunk_embedding_model = self.vector_db.embedding_model + c.chunk_metadata.chunk_embedding_dimension = self.vector_db.embedding_dimension else: _validate_embedding(c.embedding, i, self.vector_db.embedding_dimension) diff --git a/llama_stack/providers/utils/vector_io/__init__.py b/llama_stack/providers/utils/vector_io/__init__.py new file mode 100644 index 000000000..756f351d8 --- /dev/null +++ b/llama_stack/providers/utils/vector_io/__init__.py @@ -0,0 +1,5 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. diff --git a/llama_stack/providers/utils/vector_io/chunk_utils.py b/llama_stack/providers/utils/vector_io/chunk_utils.py new file mode 100644 index 000000000..68cf11cad --- /dev/null +++ b/llama_stack/providers/utils/vector_io/chunk_utils.py @@ -0,0 +1,14 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. + +import hashlib +import uuid + + +def generate_chunk_id(document_id: str, chunk_text: str) -> str: + """Generate a unique chunk ID using a hash of document ID and chunk text.""" + hash_input = f"{document_id}:{chunk_text}".encode() + return str(uuid.UUID(hashlib.md5(hash_input).hexdigest())) diff --git a/tests/unit/providers/vector_io/conftest.py b/tests/unit/providers/vector_io/conftest.py index 3bcd0613f..5eaca8a25 100644 --- a/tests/unit/providers/vector_io/conftest.py +++ b/tests/unit/providers/vector_io/conftest.py @@ -9,7 +9,7 @@ import random import numpy as np import pytest -from llama_stack.apis.vector_io import Chunk +from llama_stack.apis.vector_io import Chunk, ChunkMetadata EMBEDDING_DIMENSION = 384 @@ -33,6 +33,20 @@ def sample_chunks(): for j in range(k) for i in range(n) ] + sample.extend( + [ + Chunk( + content=f"Sentence {i} from document {j + k}", + chunk_metadata=ChunkMetadata( + document_id=f"document-{j + k}", + chunk_id=f"document-{j}-chunk-{i}", + source=f"example source-{j + k}-{i}", + ), + ) + for j in range(k) + for i in range(n) + ] + ) return sample diff --git a/tests/unit/providers/vector_io/test_chunk_utils.py b/tests/unit/providers/vector_io/test_chunk_utils.py new file mode 100644 index 000000000..941928b6d --- /dev/null +++ b/tests/unit/providers/vector_io/test_chunk_utils.py @@ -0,0 +1,66 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. + +from llama_stack.apis.vector_io import Chunk, ChunkMetadata +from llama_stack.providers.utils.vector_io.chunk_utils import generate_chunk_id + +# This test is a unit test for the chunk_utils.py helpers. This should only contain +# tests which are specific to this file. More general (API-level) tests should be placed in +# tests/integration/vector_io/ +# +# How to run this test: +# +# pytest tests/unit/providers/vector_io/test_chunk_utils.py \ +# -v -s --tb=short --disable-warnings --asyncio-mode=auto + + +def test_generate_chunk_id(): + chunks = [ + Chunk(content="test", metadata={"document_id": "doc-1"}), + Chunk(content="test ", metadata={"document_id": "doc-1"}), + Chunk(content="test 3", metadata={"document_id": "doc-1"}), + ] + + chunk_ids = sorted([chunk.chunk_id for chunk in chunks]) + assert chunk_ids == [ + "177a1368-f6a8-0c50-6e92-18677f2c3de3", + "bc744db3-1b25-0a9c-cdff-b6ba3df73c36", + "f68df25d-d9aa-ab4d-5684-64a233add20d", + ] + + +def test_chunk_id(): + # Test with existing chunk ID + chunk_with_id = Chunk(content="test", metadata={"document_id": "existing-id"}) + assert chunk_with_id.chunk_id == "84ededcc-b80b-a83e-1a20-ca6515a11350" + + # Test with document ID in metadata + chunk_with_doc_id = Chunk(content="test", metadata={"document_id": "doc-1"}) + assert chunk_with_doc_id.chunk_id == generate_chunk_id("doc-1", "test") + + # Test chunks with ChunkMetadata + chunk_with_metadata = Chunk( + content="test", + metadata={"document_id": "existing-id", "chunk_id": "chunk-id-1"}, + chunk_metadata=ChunkMetadata(document_id="document_1"), + ) + assert chunk_with_metadata.chunk_id == "chunk-id-1" + + # Test with no ID or document ID + chunk_without_id = Chunk(content="test") + generated_id = chunk_without_id.chunk_id + assert isinstance(generated_id, str) and len(generated_id) == 36 # Should be a valid UUID + + +def test_stored_chunk_id_alias(): + # Test with existing chunk ID alias + chunk_with_alias = Chunk(content="test", metadata={"document_id": "existing-id", "chunk_id": "chunk-id-1"}) + assert chunk_with_alias.chunk_id == "chunk-id-1" + serialized_chunk = chunk_with_alias.model_dump() + assert serialized_chunk["stored_chunk_id"] == "chunk-id-1" + # showing chunk_id is not serialized (i.e., a computed field) + assert "chunk_id" not in serialized_chunk + assert chunk_with_alias.stored_chunk_id == "chunk-id-1" diff --git a/tests/unit/providers/vector_io/test_qdrant.py b/tests/unit/providers/vector_io/test_qdrant.py index 607eccb24..6902c8850 100644 --- a/tests/unit/providers/vector_io/test_qdrant.py +++ b/tests/unit/providers/vector_io/test_qdrant.py @@ -81,7 +81,7 @@ __QUERY = "Sample query" @pytest.mark.asyncio -@pytest.mark.parametrize("max_query_chunks, expected_chunks", [(2, 2), (100, 30)]) +@pytest.mark.parametrize("max_query_chunks, expected_chunks", [(2, 2), (100, 60)]) async def test_qdrant_adapter_returns_expected_chunks( qdrant_adapter: QdrantVectorIOAdapter, vector_db_id, diff --git a/tests/unit/providers/vector_io/test_sqlite_vec.py b/tests/unit/providers/vector_io/test_sqlite_vec.py index 6424b9e86..bbac717c7 100644 --- a/tests/unit/providers/vector_io/test_sqlite_vec.py +++ b/tests/unit/providers/vector_io/test_sqlite_vec.py @@ -15,7 +15,6 @@ from llama_stack.providers.inline.vector_io.sqlite_vec.sqlite_vec import ( SQLiteVecIndex, SQLiteVecVectorIOAdapter, _create_sqlite_connection, - generate_chunk_id, ) # This test is a unit test for the SQLiteVecVectorIOAdapter class. This should only contain @@ -65,6 +64,14 @@ async def test_query_chunks_vector(sqlite_vec_index, sample_chunks, sample_embed assert len(response.chunks) == 2 +@pytest.mark.xfail(reason="Chunk Metadata not yet supported for SQLite-vec", strict=True) +async def test_query_chunk_metadata(sqlite_vec_index, sample_chunks, sample_embeddings): + await sqlite_vec_index.add_chunks(sample_chunks, sample_embeddings) + query_embedding = sample_embeddings[0] + response = await sqlite_vec_index.query_vector(query_embedding, k=2, score_threshold=0.0) + assert response.chunks[-1].chunk_metadata == sample_chunks[-1].chunk_metadata + + @pytest.mark.asyncio async def test_query_chunks_full_text_search(sqlite_vec_index, sample_chunks, sample_embeddings): await sqlite_vec_index.add_chunks(sample_chunks, sample_embeddings) @@ -150,21 +157,6 @@ async def sqlite_vec_adapter(sqlite_connection): await adapter.shutdown() -def test_generate_chunk_id(): - chunks = [ - Chunk(content="test", metadata={"document_id": "doc-1"}), - Chunk(content="test ", metadata={"document_id": "doc-1"}), - Chunk(content="test 3", metadata={"document_id": "doc-1"}), - ] - - chunk_ids = sorted([generate_chunk_id(chunk.metadata["document_id"], chunk.content) for chunk in chunks]) - assert chunk_ids == [ - "177a1368-f6a8-0c50-6e92-18677f2c3de3", - "bc744db3-1b25-0a9c-cdff-b6ba3df73c36", - "f68df25d-d9aa-ab4d-5684-64a233add20d", - ] - - @pytest.mark.asyncio async def test_query_chunks_hybrid_no_keyword_matches(sqlite_vec_index, sample_chunks, sample_embeddings): """Test hybrid search when keyword search returns no matches - should still return vector results.""" @@ -339,7 +331,7 @@ async def test_query_chunks_hybrid_mixed_results(sqlite_vec_index, sample_chunks # Verify scores are in descending order assert all(response.scores[i] >= response.scores[i + 1] for i in range(len(response.scores) - 1)) # Verify we get results from both the vector-similar document and keyword-matched document - doc_ids = {chunk.metadata["document_id"] for chunk in response.chunks} + doc_ids = {chunk.metadata.get("document_id") or chunk.chunk_metadata.document_id for chunk in response.chunks} assert "document-0" in doc_ids # From vector search assert "document-2" in doc_ids # From keyword search @@ -364,7 +356,11 @@ async def test_query_chunks_hybrid_weighted_reranker_parametrization( reranker_params={"alpha": 1.0}, ) assert len(response.chunks) > 0 # Should get at least one result - assert any("document-0" in chunk.metadata["document_id"] for chunk in response.chunks) + assert any( + "document-0" + in (chunk.metadata.get("document_id") or (chunk.chunk_metadata.document_id if chunk.chunk_metadata else "")) + for chunk in response.chunks + ) # alpha=0.0 (should behave like pure vector) response = await sqlite_vec_index.query_hybrid( @@ -389,7 +385,11 @@ async def test_query_chunks_hybrid_weighted_reranker_parametrization( reranker_params={"alpha": 0.7}, ) assert len(response.chunks) > 0 # Should get at least one result - assert any("document-0" in chunk.metadata["document_id"] for chunk in response.chunks) + assert any( + "document-0" + in (chunk.metadata.get("document_id") or (chunk.chunk_metadata.document_id if chunk.chunk_metadata else "")) + for chunk in response.chunks + ) @pytest.mark.asyncio diff --git a/tests/unit/rag/test_rag_query.py b/tests/unit/rag/test_rag_query.py index b9fd8cca4..d2dd1783b 100644 --- a/tests/unit/rag/test_rag_query.py +++ b/tests/unit/rag/test_rag_query.py @@ -4,10 +4,15 @@ # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. -from unittest.mock import MagicMock +from unittest.mock import AsyncMock, MagicMock import pytest +from llama_stack.apis.vector_io import ( + Chunk, + ChunkMetadata, + QueryChunksResponse, +) from llama_stack.providers.inline.tool_runtime.rag.memory import MemoryToolRuntimeImpl @@ -17,3 +22,41 @@ class TestRagQuery: rag_tool = MemoryToolRuntimeImpl(config=MagicMock(), vector_io_api=MagicMock(), inference_api=MagicMock()) with pytest.raises(ValueError): await rag_tool.query(content=MagicMock(), vector_db_ids=[]) + + @pytest.mark.asyncio + async def test_query_chunk_metadata_handling(self): + rag_tool = MemoryToolRuntimeImpl(config=MagicMock(), vector_io_api=MagicMock(), inference_api=MagicMock()) + content = "test query content" + vector_db_ids = ["db1"] + + chunk_metadata = ChunkMetadata( + document_id="doc1", + chunk_id="chunk1", + source="test_source", + metadata_token_count=5, + ) + interleaved_content = MagicMock() + chunk = Chunk( + content=interleaved_content, + metadata={ + "key1": "value1", + "token_count": 10, + "metadata_token_count": 5, + # Note this is inserted into `metadata` during MemoryToolRuntimeImpl().insert() + "document_id": "doc1", + }, + stored_chunk_id="chunk1", + chunk_metadata=chunk_metadata, + ) + + query_response = QueryChunksResponse(chunks=[chunk], scores=[1.0]) + + rag_tool.vector_io_api.query_chunks = AsyncMock(return_value=query_response) + result = await rag_tool.query(content=content, vector_db_ids=vector_db_ids) + + assert result is not None + expected_metadata_string = ( + "Metadata: {'chunk_id': 'chunk1', 'document_id': 'doc1', 'source': 'test_source', 'key1': 'value1'}" + ) + assert expected_metadata_string in result.content[1].text + assert result.content is not None From 1d3f27fe5b359b8a6fde7741f72a6593ac0f0774 Mon Sep 17 00:00:00 2001 From: ehhuang Date: Wed, 25 Jun 2025 14:43:37 -0700 Subject: [PATCH 3/8] fix: resume responses with tool call output (#2524) # What does this PR do? closes #2522 ## Test Plan added integration test LLAMA_STACK_CONFIG=http://localhost:8321 pytest -v tests/integration/agents/test_openai_responses.py --text-model "accounts/fireworks/models/llama-v3p3-70b-instruct" -vv -k 'function_call' --- .../utils/inference/openai_compat.py | 4 +- .../agents/test_openai_responses.py | 53 +++++++++++++++++++ 2 files changed, 56 insertions(+), 1 deletion(-) diff --git a/llama_stack/providers/utils/inference/openai_compat.py b/llama_stack/providers/utils/inference/openai_compat.py index ff95b12a7..01dfb8d61 100644 --- a/llama_stack/providers/utils/inference/openai_compat.py +++ b/llama_stack/providers/utils/inference/openai_compat.py @@ -1026,7 +1026,9 @@ def openai_messages_to_messages( return converted_messages -def openai_content_to_content(content: str | Iterable[OpenAIChatCompletionContentPartParam]): +def openai_content_to_content(content: str | Iterable[OpenAIChatCompletionContentPartParam] | None): + if content is None: + return "" if isinstance(content, str): return content elif isinstance(content, list): diff --git a/tests/integration/agents/test_openai_responses.py b/tests/integration/agents/test_openai_responses.py index 26eac527b..b0b123c45 100644 --- a/tests/integration/agents/test_openai_responses.py +++ b/tests/integration/agents/test_openai_responses.py @@ -221,3 +221,56 @@ def test_list_response_input_items_with_limit_and_order(openai_client, client_wi assert hasattr(item, "type") assert item.type == "message" assert item.role in ["user", "assistant"] + + +@pytest.mark.skip(reason="Tool calling is not reliable.") +def test_function_call_output_response(openai_client, client_with_models, text_model_id): + """Test handling of function call outputs in responses.""" + if isinstance(client_with_models, LlamaStackAsLibraryClient): + pytest.skip("OpenAI responses are not supported when testing with library client yet.") + + client = openai_client + + # First create a response that triggers a function call + response = client.responses.create( + model=text_model_id, + input=[ + { + "role": "user", + "content": "what's the weather in tokyo? You MUST call the `get_weather` function to find out.", + } + ], + tools=[ + { + "type": "function", + "name": "get_weather", + "description": "Get the weather in a given city", + "parameters": { + "type": "object", + "properties": { + "city": {"type": "string", "description": "The city to get the weather for"}, + }, + }, + } + ], + stream=False, + ) + + # Verify we got a function call + assert response.output[0].type == "function_call" + call_id = response.output[0].call_id + + # Now send the function call output as a follow-up + response2 = client.responses.create( + model=text_model_id, + input=[{"type": "function_call_output", "call_id": call_id, "output": "sunny and warm"}], + previous_response_id=response.id, + stream=False, + ) + + # Verify the second response processed successfully + assert response2.id is not None + assert response2.output[0].type == "message" + assert ( + "sunny" in response2.output[0].content[0].text.lower() or "warm" in response2.output[0].content[0].text.lower() + ) From 2d9fd041eb7c5d8d163d1f97e6e9942b6a366af4 Mon Sep 17 00:00:00 2001 From: Ben Browning Date: Wed, 25 Jun 2025 22:29:33 -0400 Subject: [PATCH 4/8] fix: annotations list and web_search_preview in Responses (#2520) # What does this PR do? These are a couple of fixes to get an example LangChain app working with our OpenAI Responses API implementation. The Responses API spec requires an annotations array in `output[*].content[*].annotations` and we were not providing one. So, this adds that as an empty list, even though we don't do anything to populate it yet. This prevents an error from client libraries like Langchain that expect this field to always exist, even if an empty list. The other fix is `web_search_preview` is a valid name for the web search tool in the Responses API, but we only responded to `web_search` or `web_search_preview_2025_03_11`. ## Test Plan The existing Responses unit tests were expanded to test these cases, via: ``` pytest -sv tests/unit/providers/agents/meta_reference/test_openai_responses.py ``` The existing test_openai_responses.py integration tests still pass with this change, tested as below with Fireworks: ``` uv run llama stack run llama_stack/templates/starter/run.yaml LLAMA_STACK_CONFIG=http://localhost:8321 \ uv run pytest -sv tests/integration/agents/test_openai_responses.py \ --text-model accounts/fireworks/models/llama4-scout-instruct-basic ``` Lastly, this example LangChain app now works with Llama stack (tested with Ollama in the starter template in this case). This LangChain code is using the example snippets for using Responses API at https://python.langchain.com/docs/integrations/chat/openai/#responses-api ```python from langchain_openai import ChatOpenAI llm = ChatOpenAI( base_url="http://localhost:8321/v1/openai/v1", api_key="fake", model="ollama/meta-llama/Llama-3.2-3B-Instruct", ) tool = {"type": "web_search_preview"} llm_with_tools = llm.bind_tools([tool]) response = llm_with_tools.invoke("What was a positive news story from today?") print(response.content) ``` Signed-off-by: Ben Browning --- docs/_static/llama-stack-spec.html | 154 +++++++++++++++++- docs/_static/llama-stack-spec.yaml | 107 ++++++++++++ llama_stack/apis/agents/openai_responses.py | 54 +++++- .../agents/meta_reference/openai_responses.py | 3 +- .../meta_reference/test_openai_responses.py | 73 +++++---- 5 files changed, 355 insertions(+), 36 deletions(-) diff --git a/docs/_static/llama-stack-spec.html b/docs/_static/llama-stack-spec.html index 801e8dc33..f9e4bb38e 100644 --- a/docs/_static/llama-stack-spec.html +++ b/docs/_static/llama-stack-spec.html @@ -7390,6 +7390,147 @@ ], "title": "AgentTurnResponseTurnStartPayload" }, + "OpenAIResponseAnnotationCitation": { + "type": "object", + "properties": { + "type": { + "type": "string", + "const": "url_citation", + "default": "url_citation" + }, + "end_index": { + "type": "integer" + }, + "start_index": { + "type": "integer" + }, + "title": { + "type": "string" + }, + "url": { + "type": "string" + } + }, + "additionalProperties": false, + "required": [ + "type", + "end_index", + "start_index", + "title", + "url" + ], + "title": "OpenAIResponseAnnotationCitation" + }, + "OpenAIResponseAnnotationContainerFileCitation": { + "type": "object", + "properties": { + "type": { + "type": "string", + "const": "container_file_citation", + "default": "container_file_citation" + }, + "container_id": { + "type": "string" + }, + "end_index": { + "type": "integer" + }, + "file_id": { + "type": "string" + }, + "filename": { + "type": "string" + }, + "start_index": { + "type": "integer" + } + }, + "additionalProperties": false, + "required": [ + "type", + "container_id", + "end_index", + "file_id", + "filename", + "start_index" + ], + "title": "OpenAIResponseAnnotationContainerFileCitation" + }, + "OpenAIResponseAnnotationFileCitation": { + "type": "object", + "properties": { + "type": { + "type": "string", + "const": "file_citation", + "default": "file_citation" + }, + "file_id": { + "type": "string" + }, + "filename": { + "type": "string" + }, + "index": { + "type": "integer" + } + }, + "additionalProperties": false, + "required": [ + "type", + "file_id", + "filename", + "index" + ], + "title": "OpenAIResponseAnnotationFileCitation" + }, + "OpenAIResponseAnnotationFilePath": { + "type": "object", + "properties": { + "type": { + "type": "string", + "const": "file_path", + "default": "file_path" + }, + "file_id": { + "type": "string" + }, + "index": { + "type": "integer" + } + }, + "additionalProperties": false, + "required": [ + "type", + "file_id", + "index" + ], + "title": "OpenAIResponseAnnotationFilePath" + }, + "OpenAIResponseAnnotations": { + "oneOf": [ + { + "$ref": "#/components/schemas/OpenAIResponseAnnotationFileCitation" + }, + { + "$ref": "#/components/schemas/OpenAIResponseAnnotationCitation" + }, + { + "$ref": "#/components/schemas/OpenAIResponseAnnotationContainerFileCitation" + }, + { + "$ref": "#/components/schemas/OpenAIResponseAnnotationFilePath" + } + ], + "discriminator": { + "propertyName": "type", + "mapping": { + "file_citation": "#/components/schemas/OpenAIResponseAnnotationFileCitation", + "url_citation": "#/components/schemas/OpenAIResponseAnnotationCitation", + "container_file_citation": "#/components/schemas/OpenAIResponseAnnotationContainerFileCitation", + "file_path": "#/components/schemas/OpenAIResponseAnnotationFilePath" + } + } + }, "OpenAIResponseInput": { "oneOf": [ { @@ -7764,6 +7905,10 @@ "type": "string", "const": "web_search" }, + { + "type": "string", + "const": "web_search_preview" + }, { "type": "string", "const": "web_search_preview_2025_03_11" @@ -7855,12 +8000,19 @@ "type": "string", "const": "output_text", "default": "output_text" + }, + "annotations": { + "type": "array", + "items": { + "$ref": "#/components/schemas/OpenAIResponseAnnotations" + } } }, "additionalProperties": false, "required": [ "text", - "type" + "type", + "annotations" ], "title": "OpenAIResponseOutputMessageContentOutputText" }, diff --git a/docs/_static/llama-stack-spec.yaml b/docs/_static/llama-stack-spec.yaml index b736cd904..9175c97fc 100644 --- a/docs/_static/llama-stack-spec.yaml +++ b/docs/_static/llama-stack-spec.yaml @@ -5263,6 +5263,106 @@ components: - event_type - turn_id title: AgentTurnResponseTurnStartPayload + OpenAIResponseAnnotationCitation: + type: object + properties: + type: + type: string + const: url_citation + default: url_citation + end_index: + type: integer + start_index: + type: integer + title: + type: string + url: + type: string + additionalProperties: false + required: + - type + - end_index + - start_index + - title + - url + title: OpenAIResponseAnnotationCitation + "OpenAIResponseAnnotationContainerFileCitation": + type: object + properties: + type: + type: string + const: container_file_citation + default: container_file_citation + container_id: + type: string + end_index: + type: integer + file_id: + type: string + filename: + type: string + start_index: + type: integer + additionalProperties: false + required: + - type + - container_id + - end_index + - file_id + - filename + - start_index + title: >- + OpenAIResponseAnnotationContainerFileCitation + OpenAIResponseAnnotationFileCitation: + type: object + properties: + type: + type: string + const: file_citation + default: file_citation + file_id: + type: string + filename: + type: string + index: + type: integer + additionalProperties: false + required: + - type + - file_id + - filename + - index + title: OpenAIResponseAnnotationFileCitation + OpenAIResponseAnnotationFilePath: + type: object + properties: + type: + type: string + const: file_path + default: file_path + file_id: + type: string + index: + type: integer + additionalProperties: false + required: + - type + - file_id + - index + title: OpenAIResponseAnnotationFilePath + OpenAIResponseAnnotations: + oneOf: + - $ref: '#/components/schemas/OpenAIResponseAnnotationFileCitation' + - $ref: '#/components/schemas/OpenAIResponseAnnotationCitation' + - $ref: '#/components/schemas/OpenAIResponseAnnotationContainerFileCitation' + - $ref: '#/components/schemas/OpenAIResponseAnnotationFilePath' + discriminator: + propertyName: type + mapping: + file_citation: '#/components/schemas/OpenAIResponseAnnotationFileCitation' + url_citation: '#/components/schemas/OpenAIResponseAnnotationCitation' + container_file_citation: '#/components/schemas/OpenAIResponseAnnotationContainerFileCitation' + file_path: '#/components/schemas/OpenAIResponseAnnotationFilePath' OpenAIResponseInput: oneOf: - $ref: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall' @@ -5488,6 +5588,8 @@ components: oneOf: - type: string const: web_search + - type: string + const: web_search_preview - type: string const: web_search_preview_2025_03_11 default: web_search @@ -5547,10 +5649,15 @@ components: type: string const: output_text default: output_text + annotations: + type: array + items: + $ref: '#/components/schemas/OpenAIResponseAnnotations' additionalProperties: false required: - text - type + - annotations title: >- OpenAIResponseOutputMessageContentOutputText "OpenAIResponseOutputMessageFileSearchToolCall": diff --git a/llama_stack/apis/agents/openai_responses.py b/llama_stack/apis/agents/openai_responses.py index addb72f14..27b85e2d6 100644 --- a/llama_stack/apis/agents/openai_responses.py +++ b/llama_stack/apis/agents/openai_responses.py @@ -44,10 +44,55 @@ OpenAIResponseInputMessageContent = Annotated[ register_schema(OpenAIResponseInputMessageContent, name="OpenAIResponseInputMessageContent") +@json_schema_type +class OpenAIResponseAnnotationFileCitation(BaseModel): + type: Literal["file_citation"] = "file_citation" + file_id: str + filename: str + index: int + + +@json_schema_type +class OpenAIResponseAnnotationCitation(BaseModel): + type: Literal["url_citation"] = "url_citation" + end_index: int + start_index: int + title: str + url: str + + +@json_schema_type +class OpenAIResponseAnnotationContainerFileCitation(BaseModel): + type: Literal["container_file_citation"] = "container_file_citation" + container_id: str + end_index: int + file_id: str + filename: str + start_index: int + + +@json_schema_type +class OpenAIResponseAnnotationFilePath(BaseModel): + type: Literal["file_path"] = "file_path" + file_id: str + index: int + + +OpenAIResponseAnnotations = Annotated[ + OpenAIResponseAnnotationFileCitation + | OpenAIResponseAnnotationCitation + | OpenAIResponseAnnotationContainerFileCitation + | OpenAIResponseAnnotationFilePath, + Field(discriminator="type"), +] +register_schema(OpenAIResponseAnnotations, name="OpenAIResponseAnnotations") + + @json_schema_type class OpenAIResponseOutputMessageContentOutputText(BaseModel): text: str type: Literal["output_text"] = "output_text" + annotations: list[OpenAIResponseAnnotations] = Field(default_factory=list) OpenAIResponseOutputMessageContent = Annotated[ @@ -384,9 +429,16 @@ OpenAIResponseInput = Annotated[ register_schema(OpenAIResponseInput, name="OpenAIResponseInput") +# Must match type Literals of OpenAIResponseInputToolWebSearch below +WebSearchToolTypes = ["web_search", "web_search_preview", "web_search_preview_2025_03_11"] + + @json_schema_type class OpenAIResponseInputToolWebSearch(BaseModel): - type: Literal["web_search"] | Literal["web_search_preview_2025_03_11"] = "web_search" + # Must match values of WebSearchToolTypes above + type: Literal["web_search"] | Literal["web_search_preview"] | Literal["web_search_preview_2025_03_11"] = ( + "web_search" + ) # TODO: actually use search_context_size somewhere... search_context_size: str | None = Field(default="medium", pattern="^low|medium|high$") # TODO: add user_location diff --git a/llama_stack/providers/inline/agents/meta_reference/openai_responses.py b/llama_stack/providers/inline/agents/meta_reference/openai_responses.py index 4465a32fe..cf3293ed0 100644 --- a/llama_stack/providers/inline/agents/meta_reference/openai_responses.py +++ b/llama_stack/providers/inline/agents/meta_reference/openai_responses.py @@ -42,6 +42,7 @@ from llama_stack.apis.agents.openai_responses import ( OpenAIResponseOutputMessageWebSearchToolCall, OpenAIResponseText, OpenAIResponseTextFormat, + WebSearchToolTypes, ) from llama_stack.apis.common.content_types import TextContentItem from llama_stack.apis.inference.inference import ( @@ -609,7 +610,7 @@ class OpenAIResponsesImpl: # TODO: Handle other tool types if input_tool.type == "function": chat_tools.append(ChatCompletionToolParam(type="function", function=input_tool.model_dump())) - elif input_tool.type == "web_search": + elif input_tool.type in WebSearchToolTypes: tool_name = "web_search" tool = await self.tool_groups_api.get_tool(tool_name) if not tool: diff --git a/tests/unit/providers/agents/meta_reference/test_openai_responses.py b/tests/unit/providers/agents/meta_reference/test_openai_responses.py index a3d798083..7772dd2cc 100644 --- a/tests/unit/providers/agents/meta_reference/test_openai_responses.py +++ b/tests/unit/providers/agents/meta_reference/test_openai_responses.py @@ -27,6 +27,7 @@ from llama_stack.apis.agents.openai_responses import ( OpenAIResponseOutputMessageWebSearchToolCall, OpenAIResponseText, OpenAIResponseTextFormat, + WebSearchToolTypes, ) from llama_stack.apis.inference.inference import ( OpenAIAssistantMessageParam, @@ -161,11 +162,6 @@ async def test_create_openai_response_with_string_input_with_tools(openai_respon input_text = "What is the capital of Ireland?" model = "meta-llama/Llama-3.1-8B-Instruct" - mock_inference_api.openai_chat_completion.side_effect = [ - fake_stream("tool_call_completion.yaml"), - fake_stream(), - ] - openai_responses_impl.tool_groups_api.get_tool.return_value = Tool( identifier="web_search", provider_id="client", @@ -182,39 +178,50 @@ async def test_create_openai_response_with_string_input_with_tools(openai_respon ) # Execute - result = await openai_responses_impl.create_openai_response( - input=input_text, - model=model, - temperature=0.1, - tools=[ - OpenAIResponseInputToolWebSearch( - name="web_search", - ) - ], - ) + for tool_name in WebSearchToolTypes: + # Reset mock states as we loop through each tool type + mock_inference_api.openai_chat_completion.side_effect = [ + fake_stream("tool_call_completion.yaml"), + fake_stream(), + ] + openai_responses_impl.tool_groups_api.get_tool.reset_mock() + openai_responses_impl.tool_runtime_api.invoke_tool.reset_mock() + openai_responses_impl.responses_store.store_response_object.reset_mock() - # Verify - first_call = mock_inference_api.openai_chat_completion.call_args_list[0] - assert first_call.kwargs["messages"][0].content == "What is the capital of Ireland?" - assert first_call.kwargs["tools"] is not None - assert first_call.kwargs["temperature"] == 0.1 + result = await openai_responses_impl.create_openai_response( + input=input_text, + model=model, + temperature=0.1, + tools=[ + OpenAIResponseInputToolWebSearch( + name=tool_name, + ) + ], + ) - second_call = mock_inference_api.openai_chat_completion.call_args_list[1] - assert second_call.kwargs["messages"][-1].content == "Dublin" - assert second_call.kwargs["temperature"] == 0.1 + # Verify + first_call = mock_inference_api.openai_chat_completion.call_args_list[0] + assert first_call.kwargs["messages"][0].content == "What is the capital of Ireland?" + assert first_call.kwargs["tools"] is not None + assert first_call.kwargs["temperature"] == 0.1 - openai_responses_impl.tool_groups_api.get_tool.assert_called_once_with("web_search") - openai_responses_impl.tool_runtime_api.invoke_tool.assert_called_once_with( - tool_name="web_search", - kwargs={"query": "What is the capital of Ireland?"}, - ) + second_call = mock_inference_api.openai_chat_completion.call_args_list[1] + assert second_call.kwargs["messages"][-1].content == "Dublin" + assert second_call.kwargs["temperature"] == 0.1 - openai_responses_impl.responses_store.store_response_object.assert_called_once() + openai_responses_impl.tool_groups_api.get_tool.assert_called_once_with("web_search") + openai_responses_impl.tool_runtime_api.invoke_tool.assert_called_once_with( + tool_name="web_search", + kwargs={"query": "What is the capital of Ireland?"}, + ) - # Check that we got the content from our mocked tool execution result - assert len(result.output) >= 1 - assert isinstance(result.output[1], OpenAIResponseMessage) - assert result.output[1].content[0].text == "Dublin" + openai_responses_impl.responses_store.store_response_object.assert_called_once() + + # Check that we got the content from our mocked tool execution result + assert len(result.output) >= 1 + assert isinstance(result.output[1], OpenAIResponseMessage) + assert result.output[1].content[0].text == "Dublin" + assert result.output[1].content[0].annotations == [] @pytest.mark.asyncio From ac5fd57387f8fded5e6129789e2d09d01f6d67ba Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?S=C3=A9bastien=20Han?= Date: Thu, 26 Jun 2025 04:31:05 +0200 Subject: [PATCH 5/8] chore: remove nested imports (#2515) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit # What does this PR do? * Given that our API packages use "import *" in `__init.py__` we don't need to do `from llama_stack.apis.models.models` but simply from llama_stack.apis.models. The decision to use `import *` is debatable and should probably be revisited at one point. * Remove unneeded Ruff F401 rule * Consolidate Ruff F403 rule in the pyprojectfrom llama_stack.apis.models.models Signed-off-by: Sébastien Han --- llama_stack/apis/agents/__init__.py | 2 +- llama_stack/apis/batch_inference/__init__.py | 2 +- llama_stack/apis/benchmarks/__init__.py | 2 +- llama_stack/apis/datasetio/__init__.py | 2 +- llama_stack/apis/datasets/__init__.py | 2 +- llama_stack/apis/eval/__init__.py | 2 +- llama_stack/apis/files/__init__.py | 2 +- llama_stack/apis/inference/__init__.py | 2 +- llama_stack/apis/inference/inference.py | 2 +- llama_stack/apis/inspect/__init__.py | 2 +- llama_stack/apis/models/__init__.py | 2 +- llama_stack/apis/post_training/__init__.py | 2 +- llama_stack/apis/providers/__init__.py | 2 +- llama_stack/apis/safety/__init__.py | 2 +- llama_stack/apis/scoring/__init__.py | 2 +- .../apis/scoring_functions/__init__.py | 2 +- llama_stack/apis/shields/__init__.py | 2 +- .../synthetic_data_generation/__init__.py | 2 +- llama_stack/apis/telemetry/__init__.py | 2 +- llama_stack/apis/tools/__init__.py | 4 ++-- llama_stack/apis/vector_dbs/__init__.py | 2 +- llama_stack/apis/vector_io/__init__.py | 2 +- llama_stack/distribution/routers/inference.py | 14 +++++------- llama_stack/distribution/routers/vector_io.py | 10 ++++----- llama_stack/distribution/ui/modules/api.py | 2 +- llama_stack/log.py | 2 +- .../agents/meta_reference/openai_responses.py | 4 ++-- .../inline/eval/meta_reference/eval.py | 2 +- .../inline/scoring/llm_as_judge/scoring.py | 2 +- .../scoring_fn/llm_as_judge_scoring_fn.py | 2 +- .../providers/inline/vector_io/faiss/faiss.py | 3 +-- .../inline/vector_io/sqlite_vec/sqlite_vec.py | 6 ++--- .../remote/datasetio/nvidia/datasetio.py | 2 +- .../remote/inference/anthropic/models.py | 2 +- .../remote/inference/fireworks/fireworks.py | 14 +++++------- .../remote/inference/fireworks/models.py | 2 +- .../remote/inference/gemini/models.py | 2 +- .../providers/remote/inference/groq/groq.py | 2 +- .../remote/inference/nvidia/nvidia.py | 12 +++++----- .../remote/inference/ollama/models.py | 2 +- .../remote/inference/ollama/ollama.py | 16 ++++++-------- .../remote/inference/openai/models.py | 2 +- .../remote/inference/openai/openai.py | 2 +- .../inference/passthrough/passthrough.py | 12 +++++----- .../remote/inference/runpod/runpod.py | 2 +- .../remote/inference/together/models.py | 2 +- .../remote/inference/together/together.py | 12 +++++----- .../providers/remote/inference/vllm/vllm.py | 10 ++++----- .../remote/inference/watsonx/watsonx.py | 14 +++++------- .../utils/inference/litellm_openai_mixin.py | 18 +++++++-------- .../utils/inference/model_registry.py | 2 +- .../utils/inference/openai_compat.py | 22 +++++++++---------- .../utils/memory/openai_vector_store_mixin.py | 3 +-- .../providers/utils/telemetry/tracing.py | 2 +- llama_stack/templates/cerebras/cerebras.py | 2 +- llama_stack/templates/ci-tests/ci_tests.py | 2 +- llama_stack/templates/dell/dell.py | 2 +- llama_stack/templates/fireworks/fireworks.py | 2 +- llama_stack/templates/groq/groq.py | 2 +- .../templates/hf-endpoint/hf_endpoint.py | 2 +- .../templates/hf-serverless/hf_serverless.py | 2 +- llama_stack/templates/llama_api/llama_api.py | 2 +- .../meta-reference-gpu/meta_reference.py | 2 +- llama_stack/templates/ollama/ollama.py | 2 +- .../open-benchmark/open_benchmark.py | 2 +- .../templates/passthrough/passthrough.py | 2 +- .../templates/postgres-demo/postgres_demo.py | 2 +- llama_stack/templates/remote-vllm/vllm.py | 2 +- llama_stack/templates/sambanova/sambanova.py | 2 +- llama_stack/templates/starter/starter.py | 2 +- llama_stack/templates/template.py | 2 +- llama_stack/templates/tgi/tgi.py | 2 +- llama_stack/templates/together/together.py | 2 +- llama_stack/templates/vllm-gpu/vllm.py | 2 +- llama_stack/templates/watsonx/watsonx.py | 2 +- pyproject.toml | 3 +++ .../routers/test_routing_tables.py | 2 +- .../meta_reference/fixtures/__init__.py | 2 +- .../meta_reference/test_openai_responses.py | 2 +- tests/unit/providers/nvidia/test_safety.py | 2 +- .../utils/inference/test_openai_compat.py | 2 +- .../providers/utils/test_model_registry.py | 2 +- 82 files changed, 143 insertions(+), 164 deletions(-) diff --git a/llama_stack/apis/agents/__init__.py b/llama_stack/apis/agents/__init__.py index ab203b6cd..6416b283b 100644 --- a/llama_stack/apis/agents/__init__.py +++ b/llama_stack/apis/agents/__init__.py @@ -4,4 +4,4 @@ # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. -from .agents import * # noqa: F401 F403 +from .agents import * diff --git a/llama_stack/apis/batch_inference/__init__.py b/llama_stack/apis/batch_inference/__init__.py index 3249475ee..b9b2944b2 100644 --- a/llama_stack/apis/batch_inference/__init__.py +++ b/llama_stack/apis/batch_inference/__init__.py @@ -4,4 +4,4 @@ # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. -from .batch_inference import * # noqa: F401 F403 +from .batch_inference import * diff --git a/llama_stack/apis/benchmarks/__init__.py b/llama_stack/apis/benchmarks/__init__.py index f8f564957..62d1b367c 100644 --- a/llama_stack/apis/benchmarks/__init__.py +++ b/llama_stack/apis/benchmarks/__init__.py @@ -4,4 +4,4 @@ # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. -from .benchmarks import * # noqa: F401 F403 +from .benchmarks import * diff --git a/llama_stack/apis/datasetio/__init__.py b/llama_stack/apis/datasetio/__init__.py index 378afbba8..8c087bfa4 100644 --- a/llama_stack/apis/datasetio/__init__.py +++ b/llama_stack/apis/datasetio/__init__.py @@ -4,4 +4,4 @@ # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. -from .datasetio import * # noqa: F401 F403 +from .datasetio import * diff --git a/llama_stack/apis/datasets/__init__.py b/llama_stack/apis/datasets/__init__.py index 102b9927f..9c9a128d2 100644 --- a/llama_stack/apis/datasets/__init__.py +++ b/llama_stack/apis/datasets/__init__.py @@ -4,4 +4,4 @@ # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. -from .datasets import * # noqa: F401 F403 +from .datasets import * diff --git a/llama_stack/apis/eval/__init__.py b/llama_stack/apis/eval/__init__.py index 5f91ad70d..28a1d6049 100644 --- a/llama_stack/apis/eval/__init__.py +++ b/llama_stack/apis/eval/__init__.py @@ -4,4 +4,4 @@ # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. -from .eval import * # noqa: F401 F403 +from .eval import * diff --git a/llama_stack/apis/files/__init__.py b/llama_stack/apis/files/__init__.py index 269baf177..189e4de19 100644 --- a/llama_stack/apis/files/__init__.py +++ b/llama_stack/apis/files/__init__.py @@ -4,4 +4,4 @@ # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. -from .files import * # noqa: F401 F403 +from .files import * diff --git a/llama_stack/apis/inference/__init__.py b/llama_stack/apis/inference/__init__.py index f9f77f769..f0c8783c1 100644 --- a/llama_stack/apis/inference/__init__.py +++ b/llama_stack/apis/inference/__init__.py @@ -4,4 +4,4 @@ # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. -from .inference import * # noqa: F401 F403 +from .inference import * diff --git a/llama_stack/apis/inference/inference.py b/llama_stack/apis/inference/inference.py index c64a5f750..222099064 100644 --- a/llama_stack/apis/inference/inference.py +++ b/llama_stack/apis/inference/inference.py @@ -20,7 +20,7 @@ from typing_extensions import TypedDict from llama_stack.apis.common.content_types import ContentDelta, InterleavedContent, InterleavedContentItem from llama_stack.apis.common.responses import Order from llama_stack.apis.models import Model -from llama_stack.apis.telemetry.telemetry import MetricResponseMixin +from llama_stack.apis.telemetry import MetricResponseMixin from llama_stack.models.llama.datatypes import ( BuiltinTool, StopReason, diff --git a/llama_stack/apis/inspect/__init__.py b/llama_stack/apis/inspect/__init__.py index 88ba8e908..016937e3d 100644 --- a/llama_stack/apis/inspect/__init__.py +++ b/llama_stack/apis/inspect/__init__.py @@ -4,4 +4,4 @@ # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. -from .inspect import * # noqa: F401 F403 +from .inspect import * diff --git a/llama_stack/apis/models/__init__.py b/llama_stack/apis/models/__init__.py index 410d8d1f9..ee90106b6 100644 --- a/llama_stack/apis/models/__init__.py +++ b/llama_stack/apis/models/__init__.py @@ -4,4 +4,4 @@ # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. -from .models import * # noqa: F401 F403 +from .models import * diff --git a/llama_stack/apis/post_training/__init__.py b/llama_stack/apis/post_training/__init__.py index 7129c4abd..695575a30 100644 --- a/llama_stack/apis/post_training/__init__.py +++ b/llama_stack/apis/post_training/__init__.py @@ -4,4 +4,4 @@ # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. -from .post_training import * # noqa: F401 F403 +from .post_training import * diff --git a/llama_stack/apis/providers/__init__.py b/llama_stack/apis/providers/__init__.py index b554a5d23..e35e2fe47 100644 --- a/llama_stack/apis/providers/__init__.py +++ b/llama_stack/apis/providers/__init__.py @@ -4,4 +4,4 @@ # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. -from .providers import * # noqa: F401 F403 +from .providers import * diff --git a/llama_stack/apis/safety/__init__.py b/llama_stack/apis/safety/__init__.py index dc3fe90b4..d93bc1355 100644 --- a/llama_stack/apis/safety/__init__.py +++ b/llama_stack/apis/safety/__init__.py @@ -4,4 +4,4 @@ # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. -from .safety import * # noqa: F401 F403 +from .safety import * diff --git a/llama_stack/apis/scoring/__init__.py b/llama_stack/apis/scoring/__init__.py index 0739dfc80..624b9e704 100644 --- a/llama_stack/apis/scoring/__init__.py +++ b/llama_stack/apis/scoring/__init__.py @@ -4,4 +4,4 @@ # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. -from .scoring import * # noqa: F401 F403 +from .scoring import * diff --git a/llama_stack/apis/scoring_functions/__init__.py b/llama_stack/apis/scoring_functions/__init__.py index b96acb45f..fc1de0311 100644 --- a/llama_stack/apis/scoring_functions/__init__.py +++ b/llama_stack/apis/scoring_functions/__init__.py @@ -4,4 +4,4 @@ # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. -from .scoring_functions import * # noqa: F401 F403 +from .scoring_functions import * diff --git a/llama_stack/apis/shields/__init__.py b/llama_stack/apis/shields/__init__.py index edad26100..783a4d124 100644 --- a/llama_stack/apis/shields/__init__.py +++ b/llama_stack/apis/shields/__init__.py @@ -4,4 +4,4 @@ # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. -from .shields import * # noqa: F401 F403 +from .shields import * diff --git a/llama_stack/apis/synthetic_data_generation/__init__.py b/llama_stack/apis/synthetic_data_generation/__init__.py index cfdec76ce..bc169e8e6 100644 --- a/llama_stack/apis/synthetic_data_generation/__init__.py +++ b/llama_stack/apis/synthetic_data_generation/__init__.py @@ -4,4 +4,4 @@ # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. -from .synthetic_data_generation import * # noqa: F401 F403 +from .synthetic_data_generation import * diff --git a/llama_stack/apis/telemetry/__init__.py b/llama_stack/apis/telemetry/__init__.py index 6a111dc9e..1250767f7 100644 --- a/llama_stack/apis/telemetry/__init__.py +++ b/llama_stack/apis/telemetry/__init__.py @@ -4,4 +4,4 @@ # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. -from .telemetry import * # noqa: F401 F403 +from .telemetry import * diff --git a/llama_stack/apis/tools/__init__.py b/llama_stack/apis/tools/__init__.py index be8846ba2..b25310ecf 100644 --- a/llama_stack/apis/tools/__init__.py +++ b/llama_stack/apis/tools/__init__.py @@ -4,5 +4,5 @@ # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. -from .rag_tool import * # noqa: F401 F403 -from .tools import * # noqa: F401 F403 +from .rag_tool import * +from .tools import * diff --git a/llama_stack/apis/vector_dbs/__init__.py b/llama_stack/apis/vector_dbs/__init__.py index 158241a6d..af34ba9d4 100644 --- a/llama_stack/apis/vector_dbs/__init__.py +++ b/llama_stack/apis/vector_dbs/__init__.py @@ -4,4 +4,4 @@ # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. -from .vector_dbs import * # noqa: F401 F403 +from .vector_dbs import * diff --git a/llama_stack/apis/vector_io/__init__.py b/llama_stack/apis/vector_io/__init__.py index 3fe4fa4b6..3f4c60805 100644 --- a/llama_stack/apis/vector_io/__init__.py +++ b/llama_stack/apis/vector_io/__init__.py @@ -4,4 +4,4 @@ # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. -from .vector_io import * # noqa: F401 F403 +from .vector_io import * diff --git a/llama_stack/distribution/routers/inference.py b/llama_stack/distribution/routers/inference.py index 50c429315..b39da7810 100644 --- a/llama_stack/distribution/routers/inference.py +++ b/llama_stack/distribution/routers/inference.py @@ -30,7 +30,13 @@ from llama_stack.apis.inference import ( ListOpenAIChatCompletionResponse, LogProbConfig, Message, + OpenAIChatCompletion, + OpenAIChatCompletionChunk, + OpenAICompletion, OpenAICompletionWithInputMessages, + OpenAIEmbeddingsResponse, + OpenAIMessageParam, + OpenAIResponseFormatParam, Order, ResponseFormat, SamplingParams, @@ -41,14 +47,6 @@ from llama_stack.apis.inference import ( ToolDefinition, ToolPromptFormat, ) -from llama_stack.apis.inference.inference import ( - OpenAIChatCompletion, - OpenAIChatCompletionChunk, - OpenAICompletion, - OpenAIEmbeddingsResponse, - OpenAIMessageParam, - OpenAIResponseFormatParam, -) from llama_stack.apis.models import Model, ModelType from llama_stack.apis.telemetry import MetricEvent, MetricInResponse, Telemetry from llama_stack.log import get_logger diff --git a/llama_stack/distribution/routers/vector_io.py b/llama_stack/distribution/routers/vector_io.py index 6af3bd416..4bd5952dc 100644 --- a/llama_stack/distribution/routers/vector_io.py +++ b/llama_stack/distribution/routers/vector_io.py @@ -16,17 +16,15 @@ from llama_stack.apis.vector_io import ( QueryChunksResponse, SearchRankingOptions, VectorIO, - VectorStoreDeleteResponse, - VectorStoreListResponse, - VectorStoreObject, - VectorStoreSearchResponsePage, -) -from llama_stack.apis.vector_io.vector_io import ( VectorStoreChunkingStrategy, + VectorStoreDeleteResponse, VectorStoreFileContentsResponse, VectorStoreFileDeleteResponse, VectorStoreFileObject, VectorStoreFileStatus, + VectorStoreListResponse, + VectorStoreObject, + VectorStoreSearchResponsePage, ) from llama_stack.log import get_logger from llama_stack.providers.datatypes import HealthResponse, HealthStatus, RoutingTable diff --git a/llama_stack/distribution/ui/modules/api.py b/llama_stack/distribution/ui/modules/api.py index 11455ed46..9db87b280 100644 --- a/llama_stack/distribution/ui/modules/api.py +++ b/llama_stack/distribution/ui/modules/api.py @@ -25,7 +25,7 @@ class LlamaStackApi: def run_scoring(self, row, scoring_function_ids: list[str], scoring_params: dict | None): """Run scoring on a single row""" if not scoring_params: - scoring_params = {fn_id: None for fn_id in scoring_function_ids} + scoring_params = dict.fromkeys(scoring_function_ids) return self.client.scoring.score(input_rows=[row], scoring_functions=scoring_params) diff --git a/llama_stack/log.py b/llama_stack/log.py index c14967f0a..fcbb79a5d 100644 --- a/llama_stack/log.py +++ b/llama_stack/log.py @@ -33,7 +33,7 @@ CATEGORIES = [ ] # Initialize category levels with default level -_category_levels: dict[str, int] = {category: DEFAULT_LOG_LEVEL for category in CATEGORIES} +_category_levels: dict[str, int] = dict.fromkeys(CATEGORIES, DEFAULT_LOG_LEVEL) def config_to_category_levels(category: str, level: str): diff --git a/llama_stack/providers/inline/agents/meta_reference/openai_responses.py b/llama_stack/providers/inline/agents/meta_reference/openai_responses.py index cf3293ed0..f291593f4 100644 --- a/llama_stack/providers/inline/agents/meta_reference/openai_responses.py +++ b/llama_stack/providers/inline/agents/meta_reference/openai_responses.py @@ -45,7 +45,7 @@ from llama_stack.apis.agents.openai_responses import ( WebSearchToolTypes, ) from llama_stack.apis.common.content_types import TextContentItem -from llama_stack.apis.inference.inference import ( +from llama_stack.apis.inference import ( Inference, OpenAIAssistantMessageParam, OpenAIChatCompletion, @@ -584,7 +584,7 @@ class OpenAIResponsesImpl: from llama_stack.apis.agents.openai_responses import ( MCPListToolsTool, ) - from llama_stack.apis.tools.tools import Tool + from llama_stack.apis.tools import Tool mcp_tool_to_server = {} diff --git a/llama_stack/providers/inline/eval/meta_reference/eval.py b/llama_stack/providers/inline/eval/meta_reference/eval.py index bc0898dc5..9ae2018c4 100644 --- a/llama_stack/providers/inline/eval/meta_reference/eval.py +++ b/llama_stack/providers/inline/eval/meta_reference/eval.py @@ -208,7 +208,7 @@ class MetaReferenceEvalImpl( for scoring_fn_id in scoring_functions } else: - scoring_functions_dict = {scoring_fn_id: None for scoring_fn_id in scoring_functions} + scoring_functions_dict = dict.fromkeys(scoring_functions) score_response = await self.scoring_api.score( input_rows=score_input_rows, scoring_functions=scoring_functions_dict diff --git a/llama_stack/providers/inline/scoring/llm_as_judge/scoring.py b/llama_stack/providers/inline/scoring/llm_as_judge/scoring.py index b705cb9b3..2bd113a94 100644 --- a/llama_stack/providers/inline/scoring/llm_as_judge/scoring.py +++ b/llama_stack/providers/inline/scoring/llm_as_judge/scoring.py @@ -7,7 +7,7 @@ from typing import Any from llama_stack.apis.datasetio import DatasetIO from llama_stack.apis.datasets import Datasets -from llama_stack.apis.inference.inference import Inference +from llama_stack.apis.inference import Inference from llama_stack.apis.scoring import ( ScoreBatchResponse, ScoreResponse, diff --git a/llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/llm_as_judge_scoring_fn.py b/llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/llm_as_judge_scoring_fn.py index 51cdf6c3f..340215a53 100644 --- a/llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/llm_as_judge_scoring_fn.py +++ b/llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/llm_as_judge_scoring_fn.py @@ -6,7 +6,7 @@ import re from typing import Any -from llama_stack.apis.inference.inference import Inference, UserMessage +from llama_stack.apis.inference import Inference, UserMessage from llama_stack.apis.scoring import ScoringResultRow from llama_stack.apis.scoring_functions import ScoringFnParams from llama_stack.providers.utils.scoring.base_scoring_fn import RegisteredBaseScoringFn diff --git a/llama_stack/providers/inline/vector_io/faiss/faiss.py b/llama_stack/providers/inline/vector_io/faiss/faiss.py index 12f4d6ad0..355750b25 100644 --- a/llama_stack/providers/inline/vector_io/faiss/faiss.py +++ b/llama_stack/providers/inline/vector_io/faiss/faiss.py @@ -16,8 +16,7 @@ import numpy as np from numpy.typing import NDArray from llama_stack.apis.files import Files -from llama_stack.apis.inference import InterleavedContent -from llama_stack.apis.inference.inference import Inference +from llama_stack.apis.inference import Inference, InterleavedContent from llama_stack.apis.vector_dbs import VectorDB from llama_stack.apis.vector_io import ( Chunk, diff --git a/llama_stack/providers/inline/vector_io/sqlite_vec/sqlite_vec.py b/llama_stack/providers/inline/vector_io/sqlite_vec/sqlite_vec.py index 3b3c5f486..7e977635a 100644 --- a/llama_stack/providers/inline/vector_io/sqlite_vec/sqlite_vec.py +++ b/llama_stack/providers/inline/vector_io/sqlite_vec/sqlite_vec.py @@ -15,8 +15,8 @@ import numpy as np import sqlite_vec from numpy.typing import NDArray -from llama_stack.apis.files.files import Files -from llama_stack.apis.inference.inference import Inference +from llama_stack.apis.files import Files +from llama_stack.apis.inference import Inference from llama_stack.apis.vector_dbs import VectorDB from llama_stack.apis.vector_io import ( Chunk, @@ -64,7 +64,7 @@ def _normalize_scores(scores: dict[str, float]) -> dict[str, float]: score_range = max_score - min_score if score_range > 0: return {doc_id: (score - min_score) / score_range for doc_id, score in scores.items()} - return {doc_id: 1.0 for doc_id in scores} + return dict.fromkeys(scores, 1.0) def _weighted_rerank( diff --git a/llama_stack/providers/remote/datasetio/nvidia/datasetio.py b/llama_stack/providers/remote/datasetio/nvidia/datasetio.py index 1f22a935b..f723c92cc 100644 --- a/llama_stack/providers/remote/datasetio/nvidia/datasetio.py +++ b/llama_stack/providers/remote/datasetio/nvidia/datasetio.py @@ -66,7 +66,7 @@ class NvidiaDatasetIOAdapter: Returns: Dataset """ - ## add warnings for unsupported params + # add warnings for unsupported params request_body = { "name": dataset_def.identifier, "namespace": self.config.dataset_namespace, diff --git a/llama_stack/providers/remote/inference/anthropic/models.py b/llama_stack/providers/remote/inference/anthropic/models.py index 39cb64440..afaf3c4e4 100644 --- a/llama_stack/providers/remote/inference/anthropic/models.py +++ b/llama_stack/providers/remote/inference/anthropic/models.py @@ -4,7 +4,7 @@ # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. -from llama_stack.apis.models.models import ModelType +from llama_stack.apis.models import ModelType from llama_stack.providers.utils.inference.model_registry import ( ProviderModelEntry, ) diff --git a/llama_stack/providers/remote/inference/fireworks/fireworks.py b/llama_stack/providers/remote/inference/fireworks/fireworks.py index 79b1b5f08..1c82ff3a8 100644 --- a/llama_stack/providers/remote/inference/fireworks/fireworks.py +++ b/llama_stack/providers/remote/inference/fireworks/fireworks.py @@ -24,6 +24,12 @@ from llama_stack.apis.inference import ( Inference, LogProbConfig, Message, + OpenAIChatCompletion, + OpenAIChatCompletionChunk, + OpenAICompletion, + OpenAIEmbeddingsResponse, + OpenAIMessageParam, + OpenAIResponseFormatParam, ResponseFormat, ResponseFormatType, SamplingParams, @@ -33,14 +39,6 @@ from llama_stack.apis.inference import ( ToolDefinition, ToolPromptFormat, ) -from llama_stack.apis.inference.inference import ( - OpenAIChatCompletion, - OpenAIChatCompletionChunk, - OpenAICompletion, - OpenAIEmbeddingsResponse, - OpenAIMessageParam, - OpenAIResponseFormatParam, -) from llama_stack.distribution.request_headers import NeedsRequestProviderData from llama_stack.log import get_logger from llama_stack.providers.utils.inference.model_registry import ( diff --git a/llama_stack/providers/remote/inference/fireworks/models.py b/llama_stack/providers/remote/inference/fireworks/models.py index 027eeab8d..392aed72f 100644 --- a/llama_stack/providers/remote/inference/fireworks/models.py +++ b/llama_stack/providers/remote/inference/fireworks/models.py @@ -4,7 +4,7 @@ # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. -from llama_stack.apis.models.models import ModelType +from llama_stack.apis.models import ModelType from llama_stack.models.llama.sku_types import CoreModelId from llama_stack.providers.utils.inference.model_registry import ( ProviderModelEntry, diff --git a/llama_stack/providers/remote/inference/gemini/models.py b/llama_stack/providers/remote/inference/gemini/models.py index ef1cf339f..c4bb4f08b 100644 --- a/llama_stack/providers/remote/inference/gemini/models.py +++ b/llama_stack/providers/remote/inference/gemini/models.py @@ -4,7 +4,7 @@ # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. -from llama_stack.apis.models.models import ModelType +from llama_stack.apis.models import ModelType from llama_stack.providers.utils.inference.model_registry import ( ProviderModelEntry, ) diff --git a/llama_stack/providers/remote/inference/groq/groq.py b/llama_stack/providers/remote/inference/groq/groq.py index 27d7d7961..4b295e788 100644 --- a/llama_stack/providers/remote/inference/groq/groq.py +++ b/llama_stack/providers/remote/inference/groq/groq.py @@ -9,7 +9,7 @@ from typing import Any from openai import AsyncOpenAI -from llama_stack.apis.inference.inference import ( +from llama_stack.apis.inference import ( OpenAIChatCompletion, OpenAIChatCompletionChunk, OpenAIChoiceDelta, diff --git a/llama_stack/providers/remote/inference/nvidia/nvidia.py b/llama_stack/providers/remote/inference/nvidia/nvidia.py index cb6c6e279..1dd72da3f 100644 --- a/llama_stack/providers/remote/inference/nvidia/nvidia.py +++ b/llama_stack/providers/remote/inference/nvidia/nvidia.py @@ -29,20 +29,18 @@ from llama_stack.apis.inference import ( Inference, LogProbConfig, Message, + OpenAIChatCompletion, + OpenAIChatCompletionChunk, + OpenAICompletion, OpenAIEmbeddingsResponse, + OpenAIMessageParam, + OpenAIResponseFormatParam, ResponseFormat, SamplingParams, TextTruncation, ToolChoice, ToolConfig, ) -from llama_stack.apis.inference.inference import ( - OpenAIChatCompletion, - OpenAIChatCompletionChunk, - OpenAICompletion, - OpenAIMessageParam, - OpenAIResponseFormatParam, -) from llama_stack.apis.models import Model, ModelType from llama_stack.models.llama.datatypes import ToolDefinition, ToolPromptFormat from llama_stack.providers.utils.inference import ( diff --git a/llama_stack/providers/remote/inference/ollama/models.py b/llama_stack/providers/remote/inference/ollama/models.py index 8f0f0421a..cacf88861 100644 --- a/llama_stack/providers/remote/inference/ollama/models.py +++ b/llama_stack/providers/remote/inference/ollama/models.py @@ -4,7 +4,7 @@ # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. -from llama_stack.apis.models.models import ModelType +from llama_stack.apis.models import ModelType from llama_stack.models.llama.sku_types import CoreModelId from llama_stack.providers.utils.inference.model_registry import ( ProviderModelEntry, diff --git a/llama_stack/providers/remote/inference/ollama/ollama.py b/llama_stack/providers/remote/inference/ollama/ollama.py index 2f51920b5..e9df0dcc8 100644 --- a/llama_stack/providers/remote/inference/ollama/ollama.py +++ b/llama_stack/providers/remote/inference/ollama/ollama.py @@ -32,15 +32,6 @@ from llama_stack.apis.inference import ( JsonSchemaResponseFormat, LogProbConfig, Message, - ResponseFormat, - SamplingParams, - TextTruncation, - ToolChoice, - ToolConfig, - ToolDefinition, - ToolPromptFormat, -) -from llama_stack.apis.inference.inference import ( OpenAIChatCompletion, OpenAIChatCompletionChunk, OpenAICompletion, @@ -48,6 +39,13 @@ from llama_stack.apis.inference.inference import ( OpenAIEmbeddingUsage, OpenAIMessageParam, OpenAIResponseFormatParam, + ResponseFormat, + SamplingParams, + TextTruncation, + ToolChoice, + ToolConfig, + ToolDefinition, + ToolPromptFormat, ) from llama_stack.apis.models import Model, ModelType from llama_stack.log import get_logger diff --git a/llama_stack/providers/remote/inference/openai/models.py b/llama_stack/providers/remote/inference/openai/models.py index e029c456c..14a6955d5 100644 --- a/llama_stack/providers/remote/inference/openai/models.py +++ b/llama_stack/providers/remote/inference/openai/models.py @@ -6,7 +6,7 @@ from dataclasses import dataclass -from llama_stack.apis.models.models import ModelType +from llama_stack.apis.models import ModelType from llama_stack.providers.utils.inference.model_registry import ( ProviderModelEntry, ) diff --git a/llama_stack/providers/remote/inference/openai/openai.py b/llama_stack/providers/remote/inference/openai/openai.py index ed4ec22aa..72428422f 100644 --- a/llama_stack/providers/remote/inference/openai/openai.py +++ b/llama_stack/providers/remote/inference/openai/openai.py @@ -10,7 +10,7 @@ from typing import Any from openai import AsyncOpenAI -from llama_stack.apis.inference.inference import ( +from llama_stack.apis.inference import ( OpenAIChatCompletion, OpenAIChatCompletionChunk, OpenAICompletion, diff --git a/llama_stack/providers/remote/inference/passthrough/passthrough.py b/llama_stack/providers/remote/inference/passthrough/passthrough.py index e9660abb9..d5b3a5973 100644 --- a/llama_stack/providers/remote/inference/passthrough/passthrough.py +++ b/llama_stack/providers/remote/inference/passthrough/passthrough.py @@ -19,7 +19,12 @@ from llama_stack.apis.inference import ( Inference, LogProbConfig, Message, + OpenAIChatCompletion, + OpenAIChatCompletionChunk, + OpenAICompletion, OpenAIEmbeddingsResponse, + OpenAIMessageParam, + OpenAIResponseFormatParam, ResponseFormat, SamplingParams, TextTruncation, @@ -28,13 +33,6 @@ from llama_stack.apis.inference import ( ToolDefinition, ToolPromptFormat, ) -from llama_stack.apis.inference.inference import ( - OpenAIChatCompletion, - OpenAIChatCompletionChunk, - OpenAICompletion, - OpenAIMessageParam, - OpenAIResponseFormatParam, -) from llama_stack.apis.models import Model from llama_stack.distribution.library_client import convert_pydantic_to_json_value, convert_to_pydantic from llama_stack.providers.utils.inference.model_registry import ModelRegistryHelper diff --git a/llama_stack/providers/remote/inference/runpod/runpod.py b/llama_stack/providers/remote/inference/runpod/runpod.py index f8c98893e..1863b8a50 100644 --- a/llama_stack/providers/remote/inference/runpod/runpod.py +++ b/llama_stack/providers/remote/inference/runpod/runpod.py @@ -8,7 +8,7 @@ from collections.abc import AsyncGenerator from openai import OpenAI from llama_stack.apis.inference import * # noqa: F403 -from llama_stack.apis.inference.inference import OpenAIEmbeddingsResponse +from llama_stack.apis.inference import OpenAIEmbeddingsResponse # from llama_stack.providers.datatypes import ModelsProtocolPrivate from llama_stack.providers.utils.inference.model_registry import ModelRegistryHelper diff --git a/llama_stack/providers/remote/inference/together/models.py b/llama_stack/providers/remote/inference/together/models.py index f4b259767..85e1b1848 100644 --- a/llama_stack/providers/remote/inference/together/models.py +++ b/llama_stack/providers/remote/inference/together/models.py @@ -4,7 +4,7 @@ # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. -from llama_stack.apis.models.models import ModelType +from llama_stack.apis.models import ModelType from llama_stack.models.llama.sku_types import CoreModelId from llama_stack.providers.utils.inference.model_registry import ( ProviderModelEntry, diff --git a/llama_stack/providers/remote/inference/together/together.py b/llama_stack/providers/remote/inference/together/together.py index 7030a644d..9e6877b7c 100644 --- a/llama_stack/providers/remote/inference/together/together.py +++ b/llama_stack/providers/remote/inference/together/together.py @@ -23,7 +23,12 @@ from llama_stack.apis.inference import ( Inference, LogProbConfig, Message, + OpenAIChatCompletion, + OpenAIChatCompletionChunk, + OpenAICompletion, OpenAIEmbeddingsResponse, + OpenAIMessageParam, + OpenAIResponseFormatParam, ResponseFormat, ResponseFormatType, SamplingParams, @@ -33,13 +38,6 @@ from llama_stack.apis.inference import ( ToolDefinition, ToolPromptFormat, ) -from llama_stack.apis.inference.inference import ( - OpenAIChatCompletion, - OpenAIChatCompletionChunk, - OpenAICompletion, - OpenAIMessageParam, - OpenAIResponseFormatParam, -) from llama_stack.distribution.request_headers import NeedsRequestProviderData from llama_stack.log import get_logger from llama_stack.providers.utils.inference.model_registry import ModelRegistryHelper diff --git a/llama_stack/providers/remote/inference/vllm/vllm.py b/llama_stack/providers/remote/inference/vllm/vllm.py index ae04f206a..d1455acaa 100644 --- a/llama_stack/providers/remote/inference/vllm/vllm.py +++ b/llama_stack/providers/remote/inference/vllm/vllm.py @@ -38,9 +38,13 @@ from llama_stack.apis.inference import ( JsonSchemaResponseFormat, LogProbConfig, Message, + OpenAIChatCompletion, + OpenAICompletion, OpenAIEmbeddingData, OpenAIEmbeddingsResponse, OpenAIEmbeddingUsage, + OpenAIMessageParam, + OpenAIResponseFormatParam, ResponseFormat, SamplingParams, TextTruncation, @@ -49,12 +53,6 @@ from llama_stack.apis.inference import ( ToolDefinition, ToolPromptFormat, ) -from llama_stack.apis.inference.inference import ( - OpenAIChatCompletion, - OpenAICompletion, - OpenAIMessageParam, - OpenAIResponseFormatParam, -) from llama_stack.apis.models import Model, ModelType from llama_stack.models.llama.datatypes import BuiltinTool, StopReason, ToolCall from llama_stack.models.llama.sku_list import all_registered_models diff --git a/llama_stack/providers/remote/inference/watsonx/watsonx.py b/llama_stack/providers/remote/inference/watsonx/watsonx.py index 7cdd06a1f..78161d1cb 100644 --- a/llama_stack/providers/remote/inference/watsonx/watsonx.py +++ b/llama_stack/providers/remote/inference/watsonx/watsonx.py @@ -18,10 +18,16 @@ from llama_stack.apis.inference import ( CompletionRequest, EmbeddingsResponse, EmbeddingTaskType, + GreedySamplingStrategy, Inference, LogProbConfig, Message, + OpenAIChatCompletion, + OpenAIChatCompletionChunk, + OpenAICompletion, OpenAIEmbeddingsResponse, + OpenAIMessageParam, + OpenAIResponseFormatParam, ResponseFormat, SamplingParams, TextTruncation, @@ -29,14 +35,6 @@ from llama_stack.apis.inference import ( ToolConfig, ToolDefinition, ToolPromptFormat, -) -from llama_stack.apis.inference.inference import ( - GreedySamplingStrategy, - OpenAIChatCompletion, - OpenAIChatCompletionChunk, - OpenAICompletion, - OpenAIMessageParam, - OpenAIResponseFormatParam, TopKSamplingStrategy, TopPSamplingStrategy, ) diff --git a/llama_stack/providers/utils/inference/litellm_openai_mixin.py b/llama_stack/providers/utils/inference/litellm_openai_mixin.py index c21f379c9..d19908368 100644 --- a/llama_stack/providers/utils/inference/litellm_openai_mixin.py +++ b/llama_stack/providers/utils/inference/litellm_openai_mixin.py @@ -23,6 +23,13 @@ from llama_stack.apis.inference import ( JsonSchemaResponseFormat, LogProbConfig, Message, + OpenAIChatCompletion, + OpenAIChatCompletionChunk, + OpenAICompletion, + OpenAIEmbeddingsResponse, + OpenAIEmbeddingUsage, + OpenAIMessageParam, + OpenAIResponseFormatParam, ResponseFormat, SamplingParams, TextTruncation, @@ -31,16 +38,7 @@ from llama_stack.apis.inference import ( ToolDefinition, ToolPromptFormat, ) -from llama_stack.apis.inference.inference import ( - OpenAIChatCompletion, - OpenAIChatCompletionChunk, - OpenAICompletion, - OpenAIEmbeddingsResponse, - OpenAIEmbeddingUsage, - OpenAIMessageParam, - OpenAIResponseFormatParam, -) -from llama_stack.apis.models.models import Model +from llama_stack.apis.models import Model from llama_stack.distribution.request_headers import NeedsRequestProviderData from llama_stack.log import get_logger from llama_stack.providers.utils.inference.model_registry import ModelRegistryHelper diff --git a/llama_stack/providers/utils/inference/model_registry.py b/llama_stack/providers/utils/inference/model_registry.py index d707e36c2..de67e5288 100644 --- a/llama_stack/providers/utils/inference/model_registry.py +++ b/llama_stack/providers/utils/inference/model_registry.py @@ -8,7 +8,7 @@ from typing import Any from pydantic import BaseModel, Field -from llama_stack.apis.models.models import ModelType +from llama_stack.apis.models import ModelType from llama_stack.models.llama.sku_list import all_registered_models from llama_stack.providers.datatypes import Model, ModelsProtocolPrivate from llama_stack.providers.utils.inference import ( diff --git a/llama_stack/providers/utils/inference/openai_compat.py b/llama_stack/providers/utils/inference/openai_compat.py index 01dfb8d61..47144ee0e 100644 --- a/llama_stack/providers/utils/inference/openai_compat.py +++ b/llama_stack/providers/utils/inference/openai_compat.py @@ -95,27 +95,25 @@ from llama_stack.apis.inference import ( CompletionResponse, CompletionResponseStreamChunk, GreedySamplingStrategy, - Message, - SamplingParams, - SystemMessage, - TokenLogProbs, - ToolChoice, - ToolResponseMessage, - TopKSamplingStrategy, - TopPSamplingStrategy, - UserMessage, -) -from llama_stack.apis.inference.inference import ( JsonSchemaResponseFormat, + Message, OpenAIChatCompletion, OpenAICompletion, OpenAICompletionChoice, OpenAIEmbeddingData, OpenAIMessageParam, OpenAIResponseFormatParam, + SamplingParams, + SystemMessage, + TokenLogProbs, + ToolChoice, ToolConfig, + ToolResponseMessage, + TopKSamplingStrategy, + TopPSamplingStrategy, + UserMessage, ) -from llama_stack.apis.inference.inference import ( +from llama_stack.apis.inference import ( OpenAIChoice as OpenAIChatCompletionChoice, ) from llama_stack.models.llama.datatypes import ( diff --git a/llama_stack/providers/utils/memory/openai_vector_store_mixin.py b/llama_stack/providers/utils/memory/openai_vector_store_mixin.py index 8b962db76..d00624aed 100644 --- a/llama_stack/providers/utils/memory/openai_vector_store_mixin.py +++ b/llama_stack/providers/utils/memory/openai_vector_store_mixin.py @@ -12,8 +12,7 @@ import uuid from abc import ABC, abstractmethod from typing import Any -from llama_stack.apis.files import Files -from llama_stack.apis.files.files import OpenAIFileObject +from llama_stack.apis.files import Files, OpenAIFileObject from llama_stack.apis.vector_dbs import VectorDB from llama_stack.apis.vector_io import ( Chunk, diff --git a/llama_stack/providers/utils/telemetry/tracing.py b/llama_stack/providers/utils/telemetry/tracing.py index 10081f037..4ae68ee1d 100644 --- a/llama_stack/providers/utils/telemetry/tracing.py +++ b/llama_stack/providers/utils/telemetry/tracing.py @@ -180,7 +180,7 @@ async def start_trace(name: str, attributes: dict[str, Any] = None) -> TraceCont trace_id = generate_trace_id() context = TraceContext(BACKGROUND_LOGGER, trace_id) - attributes = {marker: True for marker in ROOT_SPAN_MARKERS} | (attributes or {}) + attributes = dict.fromkeys(ROOT_SPAN_MARKERS, True) | (attributes or {}) context.push_span(name, attributes) CURRENT_TRACE_CONTEXT.set(context) diff --git a/llama_stack/templates/cerebras/cerebras.py b/llama_stack/templates/cerebras/cerebras.py index d891502d8..f341a88c1 100644 --- a/llama_stack/templates/cerebras/cerebras.py +++ b/llama_stack/templates/cerebras/cerebras.py @@ -6,7 +6,7 @@ from pathlib import Path -from llama_stack.apis.models.models import ModelType +from llama_stack.apis.models import ModelType from llama_stack.distribution.datatypes import ModelInput, Provider, ToolGroupInput from llama_stack.providers.inline.inference.sentence_transformers import ( SentenceTransformersInferenceConfig, diff --git a/llama_stack/templates/ci-tests/ci_tests.py b/llama_stack/templates/ci-tests/ci_tests.py index afa8a23ce..7de8069ae 100644 --- a/llama_stack/templates/ci-tests/ci_tests.py +++ b/llama_stack/templates/ci-tests/ci_tests.py @@ -5,7 +5,7 @@ # the root directory of this source tree. -from llama_stack.apis.models.models import ModelType +from llama_stack.apis.models import ModelType from llama_stack.distribution.datatypes import ( ModelInput, Provider, diff --git a/llama_stack/templates/dell/dell.py b/llama_stack/templates/dell/dell.py index a7ec5f3b8..5a6f52a89 100644 --- a/llama_stack/templates/dell/dell.py +++ b/llama_stack/templates/dell/dell.py @@ -4,7 +4,7 @@ # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. -from llama_stack.apis.models.models import ModelType +from llama_stack.apis.models import ModelType from llama_stack.distribution.datatypes import ( ModelInput, Provider, diff --git a/llama_stack/templates/fireworks/fireworks.py b/llama_stack/templates/fireworks/fireworks.py index 5e8935361..ad29c648f 100644 --- a/llama_stack/templates/fireworks/fireworks.py +++ b/llama_stack/templates/fireworks/fireworks.py @@ -6,7 +6,7 @@ from pathlib import Path -from llama_stack.apis.models.models import ModelType +from llama_stack.apis.models import ModelType from llama_stack.distribution.datatypes import ( ModelInput, Provider, diff --git a/llama_stack/templates/groq/groq.py b/llama_stack/templates/groq/groq.py index 4e52aa42d..9e166a288 100644 --- a/llama_stack/templates/groq/groq.py +++ b/llama_stack/templates/groq/groq.py @@ -6,7 +6,7 @@ from pathlib import Path -from llama_stack.apis.models.models import ModelType +from llama_stack.apis.models import ModelType from llama_stack.distribution.datatypes import ModelInput, Provider, ToolGroupInput from llama_stack.providers.inline.inference.sentence_transformers import ( SentenceTransformersInferenceConfig, diff --git a/llama_stack/templates/hf-endpoint/hf_endpoint.py b/llama_stack/templates/hf-endpoint/hf_endpoint.py index 69e037299..23887469f 100644 --- a/llama_stack/templates/hf-endpoint/hf_endpoint.py +++ b/llama_stack/templates/hf-endpoint/hf_endpoint.py @@ -4,7 +4,7 @@ # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. -from llama_stack.apis.models.models import ModelType +from llama_stack.apis.models import ModelType from llama_stack.distribution.datatypes import ( ModelInput, Provider, diff --git a/llama_stack/templates/hf-serverless/hf_serverless.py b/llama_stack/templates/hf-serverless/hf_serverless.py index ecfe2a167..c58c0921d 100644 --- a/llama_stack/templates/hf-serverless/hf_serverless.py +++ b/llama_stack/templates/hf-serverless/hf_serverless.py @@ -4,7 +4,7 @@ # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. -from llama_stack.apis.models.models import ModelType +from llama_stack.apis.models import ModelType from llama_stack.distribution.datatypes import ( ModelInput, Provider, diff --git a/llama_stack/templates/llama_api/llama_api.py b/llama_stack/templates/llama_api/llama_api.py index b4641b9da..723cc44a3 100644 --- a/llama_stack/templates/llama_api/llama_api.py +++ b/llama_stack/templates/llama_api/llama_api.py @@ -5,7 +5,7 @@ # the root directory of this source tree. -from llama_stack.apis.models.models import ModelType +from llama_stack.apis.models import ModelType from llama_stack.distribution.datatypes import ( ModelInput, Provider, diff --git a/llama_stack/templates/meta-reference-gpu/meta_reference.py b/llama_stack/templates/meta-reference-gpu/meta_reference.py index 95d126095..57fb8f2af 100644 --- a/llama_stack/templates/meta-reference-gpu/meta_reference.py +++ b/llama_stack/templates/meta-reference-gpu/meta_reference.py @@ -6,7 +6,7 @@ from pathlib import Path -from llama_stack.apis.models.models import ModelType +from llama_stack.apis.models import ModelType from llama_stack.distribution.datatypes import ( ModelInput, Provider, diff --git a/llama_stack/templates/ollama/ollama.py b/llama_stack/templates/ollama/ollama.py index 46c4852a4..cba25296b 100644 --- a/llama_stack/templates/ollama/ollama.py +++ b/llama_stack/templates/ollama/ollama.py @@ -6,7 +6,7 @@ from pathlib import Path -from llama_stack.apis.models.models import ModelType +from llama_stack.apis.models import ModelType from llama_stack.distribution.datatypes import ( ModelInput, Provider, diff --git a/llama_stack/templates/open-benchmark/open_benchmark.py b/llama_stack/templates/open-benchmark/open_benchmark.py index d944d4eff..f0738ae5b 100644 --- a/llama_stack/templates/open-benchmark/open_benchmark.py +++ b/llama_stack/templates/open-benchmark/open_benchmark.py @@ -6,7 +6,7 @@ from llama_stack.apis.datasets import DatasetPurpose, URIDataSource -from llama_stack.apis.models.models import ModelType +from llama_stack.apis.models import ModelType from llama_stack.distribution.datatypes import ( BenchmarkInput, DatasetInput, diff --git a/llama_stack/templates/passthrough/passthrough.py b/llama_stack/templates/passthrough/passthrough.py index 6a30625c5..1b94a9aae 100644 --- a/llama_stack/templates/passthrough/passthrough.py +++ b/llama_stack/templates/passthrough/passthrough.py @@ -6,7 +6,7 @@ from pathlib import Path -from llama_stack.apis.models.models import ModelType +from llama_stack.apis.models import ModelType from llama_stack.distribution.datatypes import ( ModelInput, Provider, diff --git a/llama_stack/templates/postgres-demo/postgres_demo.py b/llama_stack/templates/postgres-demo/postgres_demo.py index 759281567..a1a2aa2b7 100644 --- a/llama_stack/templates/postgres-demo/postgres_demo.py +++ b/llama_stack/templates/postgres-demo/postgres_demo.py @@ -5,7 +5,7 @@ # the root directory of this source tree. -from llama_stack.apis.models.models import ModelType +from llama_stack.apis.models import ModelType from llama_stack.distribution.datatypes import ( ModelInput, Provider, diff --git a/llama_stack/templates/remote-vllm/vllm.py b/llama_stack/templates/remote-vllm/vllm.py index 2782a3ea0..94606e9d0 100644 --- a/llama_stack/templates/remote-vllm/vllm.py +++ b/llama_stack/templates/remote-vllm/vllm.py @@ -6,7 +6,7 @@ from pathlib import Path -from llama_stack.apis.models.models import ModelType +from llama_stack.apis.models import ModelType from llama_stack.distribution.datatypes import ( ModelInput, Provider, diff --git a/llama_stack/templates/sambanova/sambanova.py b/llama_stack/templates/sambanova/sambanova.py index 54a49423d..38df6a4be 100644 --- a/llama_stack/templates/sambanova/sambanova.py +++ b/llama_stack/templates/sambanova/sambanova.py @@ -6,7 +6,7 @@ from pathlib import Path -from llama_stack.apis.models.models import ModelType +from llama_stack.apis.models import ModelType from llama_stack.distribution.datatypes import ( ModelInput, Provider, diff --git a/llama_stack/templates/starter/starter.py b/llama_stack/templates/starter/starter.py index ec01d08e9..8e111e80a 100644 --- a/llama_stack/templates/starter/starter.py +++ b/llama_stack/templates/starter/starter.py @@ -5,7 +5,7 @@ # the root directory of this source tree. -from llama_stack.apis.models.models import ModelType +from llama_stack.apis.models import ModelType from llama_stack.distribution.datatypes import ( ModelInput, Provider, diff --git a/llama_stack/templates/template.py b/llama_stack/templates/template.py index 712d2dcb4..7badff140 100644 --- a/llama_stack/templates/template.py +++ b/llama_stack/templates/template.py @@ -13,7 +13,7 @@ import yaml from pydantic import BaseModel, Field from llama_stack.apis.datasets import DatasetPurpose -from llama_stack.apis.models.models import ModelType +from llama_stack.apis.models import ModelType from llama_stack.distribution.datatypes import ( Api, BenchmarkInput, diff --git a/llama_stack/templates/tgi/tgi.py b/llama_stack/templates/tgi/tgi.py index 2c97cbf80..394cde18e 100644 --- a/llama_stack/templates/tgi/tgi.py +++ b/llama_stack/templates/tgi/tgi.py @@ -6,7 +6,7 @@ from pathlib import Path -from llama_stack.apis.models.models import ModelType +from llama_stack.apis.models import ModelType from llama_stack.distribution.datatypes import ( ModelInput, Provider, diff --git a/llama_stack/templates/together/together.py b/llama_stack/templates/together/together.py index 7761bd9fd..4c64ff3cd 100644 --- a/llama_stack/templates/together/together.py +++ b/llama_stack/templates/together/together.py @@ -6,7 +6,7 @@ from pathlib import Path -from llama_stack.apis.models.models import ModelType +from llama_stack.apis.models import ModelType from llama_stack.distribution.datatypes import ( ModelInput, Provider, diff --git a/llama_stack/templates/vllm-gpu/vllm.py b/llama_stack/templates/vllm-gpu/vllm.py index 5775138b1..443fcd7a3 100644 --- a/llama_stack/templates/vllm-gpu/vllm.py +++ b/llama_stack/templates/vllm-gpu/vllm.py @@ -4,7 +4,7 @@ # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. -from llama_stack.apis.models.models import ModelType +from llama_stack.apis.models import ModelType from llama_stack.distribution.datatypes import ModelInput, Provider from llama_stack.providers.inline.inference.sentence_transformers import ( SentenceTransformersInferenceConfig, diff --git a/llama_stack/templates/watsonx/watsonx.py b/llama_stack/templates/watsonx/watsonx.py index 802aaf8f1..7fa3a55e5 100644 --- a/llama_stack/templates/watsonx/watsonx.py +++ b/llama_stack/templates/watsonx/watsonx.py @@ -6,7 +6,7 @@ from pathlib import Path -from llama_stack.apis.models.models import ModelType +from llama_stack.apis.models import ModelType from llama_stack.distribution.datatypes import ModelInput, Provider, ToolGroupInput from llama_stack.providers.inline.inference.sentence_transformers import ( SentenceTransformersInferenceConfig, diff --git a/pyproject.toml b/pyproject.toml index 968a3ae60..97624fade 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -204,6 +204,9 @@ unfixable = [ "RUF001", "PLE2515", ] +"llama_stack/apis/**/__init__.py" = [ + "F403", +] # Using import * is acceptable (or at least tolerated) in an __init__.py of a package API [tool.mypy] mypy_path = ["llama_stack"] diff --git a/tests/unit/distribution/routers/test_routing_tables.py b/tests/unit/distribution/routers/test_routing_tables.py index 9cbdc8e51..0eeb68167 100644 --- a/tests/unit/distribution/routers/test_routing_tables.py +++ b/tests/unit/distribution/routers/test_routing_tables.py @@ -13,7 +13,7 @@ import pytest from llama_stack.apis.common.type_system import NumberType from llama_stack.apis.datasets.datasets import Dataset, DatasetPurpose, URIDataSource from llama_stack.apis.datatypes import Api -from llama_stack.apis.models.models import Model, ModelType +from llama_stack.apis.models import Model, ModelType from llama_stack.apis.shields.shields import Shield from llama_stack.apis.tools import ListToolDefsResponse, ToolDef, ToolGroup, ToolParameter from llama_stack.apis.vector_dbs.vector_dbs import VectorDB diff --git a/tests/unit/providers/agents/meta_reference/fixtures/__init__.py b/tests/unit/providers/agents/meta_reference/fixtures/__init__.py index e112bb6e5..2ebcd9970 100644 --- a/tests/unit/providers/agents/meta_reference/fixtures/__init__.py +++ b/tests/unit/providers/agents/meta_reference/fixtures/__init__.py @@ -8,7 +8,7 @@ import os import yaml -from llama_stack.apis.inference.inference import ( +from llama_stack.apis.inference import ( OpenAIChatCompletion, ) diff --git a/tests/unit/providers/agents/meta_reference/test_openai_responses.py b/tests/unit/providers/agents/meta_reference/test_openai_responses.py index 7772dd2cc..0d1ef8eca 100644 --- a/tests/unit/providers/agents/meta_reference/test_openai_responses.py +++ b/tests/unit/providers/agents/meta_reference/test_openai_responses.py @@ -29,7 +29,7 @@ from llama_stack.apis.agents.openai_responses import ( OpenAIResponseTextFormat, WebSearchToolTypes, ) -from llama_stack.apis.inference.inference import ( +from llama_stack.apis.inference import ( OpenAIAssistantMessageParam, OpenAIChatCompletionContentPartTextParam, OpenAIDeveloperMessageParam, diff --git a/tests/unit/providers/nvidia/test_safety.py b/tests/unit/providers/nvidia/test_safety.py index 8c74f178b..73fc32a02 100644 --- a/tests/unit/providers/nvidia/test_safety.py +++ b/tests/unit/providers/nvidia/test_safety.py @@ -11,7 +11,7 @@ from unittest.mock import AsyncMock, MagicMock, patch import pytest -from llama_stack.apis.inference.inference import CompletionMessage, UserMessage +from llama_stack.apis.inference import CompletionMessage, UserMessage from llama_stack.apis.safety import RunShieldResponse, ViolationLevel from llama_stack.apis.shields import Shield from llama_stack.providers.remote.safety.nvidia.config import NVIDIASafetyConfig diff --git a/tests/unit/providers/utils/inference/test_openai_compat.py b/tests/unit/providers/utils/inference/test_openai_compat.py index 4c75b8a2f..3598e4810 100644 --- a/tests/unit/providers/utils/inference/test_openai_compat.py +++ b/tests/unit/providers/utils/inference/test_openai_compat.py @@ -7,7 +7,7 @@ import pytest from llama_stack.apis.common.content_types import TextContentItem -from llama_stack.apis.inference.inference import ( +from llama_stack.apis.inference import ( CompletionMessage, OpenAIAssistantMessageParam, OpenAIChatCompletionContentPartTextParam, diff --git a/tests/unit/providers/utils/test_model_registry.py b/tests/unit/providers/utils/test_model_registry.py index 67f8a138f..10fa1e075 100644 --- a/tests/unit/providers/utils/test_model_registry.py +++ b/tests/unit/providers/utils/test_model_registry.py @@ -35,7 +35,7 @@ import pytest -from llama_stack.apis.models.models import Model +from llama_stack.apis.models import Model from llama_stack.providers.utils.inference.model_registry import ModelRegistryHelper, ProviderModelEntry From 36d70637b98441da9e7c7035dc5f35048bcd9e4a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?S=C3=A9bastien=20Han?= Date: Thu, 26 Jun 2025 04:31:26 +0200 Subject: [PATCH 6/8] fix: finish conversion to StrEnum (#2514) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit # What does this PR do? We still had a few enum declared to behave like string as well as enum. Let's use StrEnum for those. Signed-off-by: Sébastien Han --- llama_stack/apis/datasets/datasets.py | 4 ++-- llama_stack/apis/files/files.py | 4 ++-- llama_stack/apis/models/models.py | 4 ++-- llama_stack/distribution/access_control/datatypes.py | 4 ++-- llama_stack/distribution/datatypes.py | 6 +++--- llama_stack/models/llama/datatypes.py | 4 ++-- llama_stack/providers/datatypes.py | 4 ++-- .../providers/inline/telemetry/meta_reference/config.py | 4 ++-- 8 files changed, 17 insertions(+), 17 deletions(-) diff --git a/llama_stack/apis/datasets/datasets.py b/llama_stack/apis/datasets/datasets.py index e3de3d5cb..8bf7a48d0 100644 --- a/llama_stack/apis/datasets/datasets.py +++ b/llama_stack/apis/datasets/datasets.py @@ -4,7 +4,7 @@ # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. -from enum import Enum +from enum import Enum, StrEnum from typing import Annotated, Any, Literal, Protocol from pydantic import BaseModel, Field @@ -13,7 +13,7 @@ from llama_stack.apis.resource import Resource, ResourceType from llama_stack.schema_utils import json_schema_type, register_schema, webmethod -class DatasetPurpose(str, Enum): +class DatasetPurpose(StrEnum): """ Purpose of the dataset. Each purpose has a required input data schema. diff --git a/llama_stack/apis/files/files.py b/llama_stack/apis/files/files.py index 4dfeed448..a72dcd8d4 100644 --- a/llama_stack/apis/files/files.py +++ b/llama_stack/apis/files/files.py @@ -4,7 +4,7 @@ # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. -from enum import Enum +from enum import StrEnum from typing import Annotated, Literal, Protocol, runtime_checkable from fastapi import File, Form, Response, UploadFile @@ -16,7 +16,7 @@ from llama_stack.schema_utils import json_schema_type, webmethod # OpenAI Files API Models -class OpenAIFilePurpose(str, Enum): +class OpenAIFilePurpose(StrEnum): """ Valid purpose values for OpenAI Files API. """ diff --git a/llama_stack/apis/models/models.py b/llama_stack/apis/models/models.py index 3d90a92a0..36da97e62 100644 --- a/llama_stack/apis/models/models.py +++ b/llama_stack/apis/models/models.py @@ -4,7 +4,7 @@ # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. -from enum import Enum +from enum import StrEnum from typing import Any, Literal, Protocol, runtime_checkable from pydantic import BaseModel, ConfigDict, Field @@ -22,7 +22,7 @@ class CommonModelFields(BaseModel): @json_schema_type -class ModelType(str, Enum): +class ModelType(StrEnum): llm = "llm" embedding = "embedding" diff --git a/llama_stack/distribution/access_control/datatypes.py b/llama_stack/distribution/access_control/datatypes.py index bc5ed6645..c833ed51b 100644 --- a/llama_stack/distribution/access_control/datatypes.py +++ b/llama_stack/distribution/access_control/datatypes.py @@ -4,7 +4,7 @@ # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. -from enum import Enum +from enum import StrEnum from typing import Self from pydantic import BaseModel, model_validator @@ -12,7 +12,7 @@ from pydantic import BaseModel, model_validator from .conditions import parse_conditions -class Action(str, Enum): +class Action(StrEnum): CREATE = "create" READ = "read" UPDATE = "update" diff --git a/llama_stack/distribution/datatypes.py b/llama_stack/distribution/datatypes.py index abc3f0065..5324e4c29 100644 --- a/llama_stack/distribution/datatypes.py +++ b/llama_stack/distribution/datatypes.py @@ -4,7 +4,7 @@ # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. -from enum import Enum +from enum import StrEnum from pathlib import Path from typing import Annotated, Any @@ -159,7 +159,7 @@ class LoggingConfig(BaseModel): ) -class AuthProviderType(str, Enum): +class AuthProviderType(StrEnum): """Supported authentication provider types.""" OAUTH2_TOKEN = "oauth2_token" @@ -182,7 +182,7 @@ class AuthenticationRequiredError(Exception): pass -class QuotaPeriod(str, Enum): +class QuotaPeriod(StrEnum): DAY = "day" diff --git a/llama_stack/models/llama/datatypes.py b/llama_stack/models/llama/datatypes.py index f9f094c3d..7f1ebed55 100644 --- a/llama_stack/models/llama/datatypes.py +++ b/llama_stack/models/llama/datatypes.py @@ -5,7 +5,7 @@ # the root directory of this source tree. import base64 -from enum import Enum +from enum import Enum, StrEnum from io import BytesIO from typing import Annotated, Any, Literal @@ -171,7 +171,7 @@ class GenerationResult(BaseModel): ignore_token: bool -class QuantizationMode(str, Enum): +class QuantizationMode(StrEnum): none = "none" fp8_mixed = "fp8_mixed" int4_mixed = "int4_mixed" diff --git a/llama_stack/providers/datatypes.py b/llama_stack/providers/datatypes.py index 60b05545b..221ed9027 100644 --- a/llama_stack/providers/datatypes.py +++ b/llama_stack/providers/datatypes.py @@ -4,7 +4,7 @@ # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. -from enum import Enum +from enum import StrEnum from typing import Any, Protocol from urllib.parse import urlparse @@ -225,7 +225,7 @@ def remote_provider_spec( ) -class HealthStatus(str, Enum): +class HealthStatus(StrEnum): OK = "OK" ERROR = "Error" NOT_IMPLEMENTED = "Not Implemented" diff --git a/llama_stack/providers/inline/telemetry/meta_reference/config.py b/llama_stack/providers/inline/telemetry/meta_reference/config.py index 93509040c..2baa204c9 100644 --- a/llama_stack/providers/inline/telemetry/meta_reference/config.py +++ b/llama_stack/providers/inline/telemetry/meta_reference/config.py @@ -4,7 +4,7 @@ # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. -from enum import Enum +from enum import StrEnum from typing import Any from pydantic import BaseModel, Field, field_validator @@ -12,7 +12,7 @@ from pydantic import BaseModel, Field, field_validator from llama_stack.distribution.utils.config_dirs import RUNTIME_BASE_DIR -class TelemetrySink(str, Enum): +class TelemetrySink(StrEnum): OTEL_TRACE = "otel_trace" OTEL_METRIC = "otel_metric" SQLITE = "sqlite" From 43c1f39bd6fc9450f10dadfc23c27b4eaff37233 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?S=C3=A9bastien=20Han?= Date: Thu, 26 Jun 2025 04:50:08 +0200 Subject: [PATCH 7/8] refactor(env)!: enhanced environment variable substitution (#2490) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit # What does this PR do? This commit significantly improves the environment variable substitution functionality in Llama Stack configuration files: * The version field in configuration files has been changed from string to integer type for better type consistency across build and run configurations. * The environment variable substitution system for ${env.FOO:} was fixed and properly returns an error * The environment variable substitution system for ${env.FOO+} returns None instead of an empty strings, it better matches type annotations in config fields * The system includes automatic type conversion for boolean, integer, and float values. * The error messages have been enhanced to provide clearer guidance when environment variables are missing, including suggestions for using default values or conditional syntax. * Comprehensive documentation has been added to the configuration guide explaining all supported syntax patterns, best practices, and runtime override capabilities. * Multiple provider configurations have been updated to use the new conditional syntax for optional API keys, making the system more flexible for different deployment scenarios. The telemetry configuration has been improved to properly handle optional endpoints with appropriate validation, ensuring that required endpoints are specified when their corresponding sinks are enabled. * There were many instances of ${env.NVIDIA_API_KEY:} that should have caused the code to fail. However, due to a bug, the distro server was still being started, and early validation wasn’t triggered. As a result, failures were likely being handled downstream by the providers. I’ve maintained similar behavior by using ${env.NVIDIA_API_KEY:+}, though I believe this is incorrect for many configurations. I’ll leave it to each provider to correct it as needed. * Environment variable substitution now uses the same syntax as Bash parameter expansion. Signed-off-by: Sébastien Han --- docs/source/distributions/configuration.md | 105 ++++++++++++++++-- .../distributions/k8s/stack-configmap.yaml | 60 +++++----- .../distributions/k8s/stack_run_config.yaml | 72 ++++++------ llama_stack/distribution/datatypes.py | 8 +- llama_stack/distribution/stack.py | 54 +++++++-- .../providers/inline/files/localfs/config.py | 2 +- .../inline/inference/meta_reference/config.py | 10 +- .../providers/inline/inference/vllm/config.py | 12 +- .../inline/scoring/braintrust/config.py | 2 +- .../inline/telemetry/meta_reference/config.py | 14 +-- .../telemetry/meta_reference/telemetry.py | 4 + .../inline/vector_io/qdrant/config.py | 2 +- .../inline/vector_io/sqlite_vec/config.py | 2 +- .../remote/datasetio/nvidia/config.py | 8 +- .../providers/remote/eval/nvidia/config.py | 2 +- .../remote/inference/nvidia/config.py | 6 +- .../remote/inference/ollama/config.py | 2 +- .../remote/inference/runpod/config.py | 4 +- .../remote/inference/together/config.py | 2 +- .../providers/remote/inference/vllm/config.py | 9 +- .../remote/inference/watsonx/config.py | 6 +- .../remote/post_training/nvidia/config.py | 8 +- .../providers/remote/safety/nvidia/config.py | 4 +- .../tool_runtime/brave_search/config.py | 2 +- .../tool_runtime/tavily_search/config.py | 2 +- .../tool_runtime/wolfram_alpha/config.py | 2 +- .../remote/vector_io/pgvector/config.py | 4 +- llama_stack/providers/utils/kvstore/config.py | 24 ++-- .../providers/utils/sqlstore/sqlstore.py | 12 +- llama_stack/templates/bedrock/build.yaml | 2 +- llama_stack/templates/bedrock/run.yaml | 30 ++--- llama_stack/templates/cerebras/build.yaml | 2 +- llama_stack/templates/cerebras/run.yaml | 30 ++--- llama_stack/templates/ci-tests/build.yaml | 2 +- llama_stack/templates/ci-tests/run.yaml | 30 ++--- llama_stack/templates/dell/build.yaml | 2 +- .../templates/dell/run-with-safety.yaml | 28 ++--- llama_stack/templates/dell/run.yaml | 28 ++--- llama_stack/templates/fireworks/build.yaml | 2 +- .../templates/fireworks/run-with-safety.yaml | 36 +++--- llama_stack/templates/fireworks/run.yaml | 36 +++--- llama_stack/templates/groq/build.yaml | 2 +- llama_stack/templates/groq/run.yaml | 30 ++--- llama_stack/templates/hf-endpoint/build.yaml | 2 +- .../hf-endpoint/run-with-safety.yaml | 30 ++--- llama_stack/templates/hf-endpoint/run.yaml | 30 ++--- .../templates/hf-serverless/build.yaml | 2 +- .../hf-serverless/run-with-safety.yaml | 30 ++--- llama_stack/templates/hf-serverless/run.yaml | 30 ++--- llama_stack/templates/llama_api/build.yaml | 2 +- llama_stack/templates/llama_api/llama_api.py | 14 +-- llama_stack/templates/llama_api/run.yaml | 48 ++++---- .../templates/meta-reference-gpu/build.yaml | 2 +- .../meta-reference-gpu/run-with-safety.yaml | 46 ++++---- .../templates/meta-reference-gpu/run.yaml | 38 +++---- llama_stack/templates/nvidia/build.yaml | 2 +- .../templates/nvidia/run-with-safety.yaml | 52 ++++----- llama_stack/templates/nvidia/run.yaml | 46 ++++---- llama_stack/templates/ollama/build.yaml | 2 +- .../templates/ollama/run-with-safety.yaml | 38 +++---- llama_stack/templates/ollama/run.yaml | 38 +++---- .../templates/open-benchmark/build.yaml | 2 +- .../open-benchmark/open_benchmark.py | 12 +- llama_stack/templates/open-benchmark/run.yaml | 48 ++++---- llama_stack/templates/passthrough/build.yaml | 2 +- .../passthrough/run-with-safety.yaml | 32 +++--- llama_stack/templates/passthrough/run.yaml | 32 +++--- .../templates/postgres-demo/build.yaml | 2 +- .../templates/postgres-demo/postgres_demo.py | 10 +- llama_stack/templates/postgres-demo/run.yaml | 64 +++++------ llama_stack/templates/remote-vllm/build.yaml | 2 +- .../remote-vllm/run-with-safety.yaml | 44 ++++---- llama_stack/templates/remote-vllm/run.yaml | 38 +++---- llama_stack/templates/sambanova/build.yaml | 2 +- llama_stack/templates/sambanova/run.yaml | 40 +++---- llama_stack/templates/sambanova/sambanova.py | 12 +- llama_stack/templates/starter/build.yaml | 2 +- llama_stack/templates/starter/run.yaml | 90 +++++++-------- llama_stack/templates/starter/starter.py | 38 +++---- llama_stack/templates/tgi/build.yaml | 2 +- .../templates/tgi/run-with-safety.yaml | 30 ++--- llama_stack/templates/tgi/run.yaml | 30 ++--- llama_stack/templates/together/build.yaml | 2 +- .../templates/together/run-with-safety.yaml | 34 +++--- llama_stack/templates/together/run.yaml | 34 +++--- llama_stack/templates/vllm-gpu/build.yaml | 2 +- llama_stack/templates/vllm-gpu/run.yaml | 42 +++---- llama_stack/templates/watsonx/build.yaml | 2 +- llama_stack/templates/watsonx/run.yaml | 36 +++--- .../llama-stack-provider-ollama/run.yaml | 72 ++++++++---- tests/unit/server/test_replace_env_vars.py | 31 +++--- 91 files changed, 1053 insertions(+), 892 deletions(-) diff --git a/docs/source/distributions/configuration.md b/docs/source/distributions/configuration.md index 4bc9b37e4..1b50ee712 100644 --- a/docs/source/distributions/configuration.md +++ b/docs/source/distributions/configuration.md @@ -18,7 +18,7 @@ providers: - provider_id: ollama provider_type: remote::ollama config: - url: ${env.OLLAMA_URL:http://localhost:11434} + url: ${env.OLLAMA_URL:=http://localhost:11434} vector_io: - provider_id: faiss provider_type: inline::faiss @@ -26,7 +26,7 @@ providers: kvstore: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ollama}/faiss_store.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ollama}/faiss_store.db safety: - provider_id: llama-guard provider_type: inline::llama-guard @@ -38,7 +38,7 @@ providers: persistence_store: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ollama}/agents_store.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ollama}/agents_store.db telemetry: - provider_id: meta-reference provider_type: inline::meta-reference @@ -46,7 +46,7 @@ providers: metadata_store: namespace: null type: sqlite - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ollama}/registry.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ollama}/registry.db models: - metadata: {} model_id: ${env.INFERENCE_MODEL} @@ -85,7 +85,7 @@ providers: # config is a dictionary that contains the configuration for the provider. # in this case, the configuration is the url of the ollama server config: - url: ${env.OLLAMA_URL:http://localhost:11434} + url: ${env.OLLAMA_URL:=http://localhost:11434} ``` A few things to note: - A _provider instance_ is identified with an (id, type, configuration) triplet. @@ -94,6 +94,95 @@ A few things to note: - The configuration dictionary is provider-specific. - Notice that configuration can reference environment variables (with default values), which are expanded at runtime. When you run a stack server (via docker or via `llama stack run`), you can specify `--env OLLAMA_URL=http://my-server:11434` to override the default value. +### Environment Variable Substitution + +Llama Stack supports environment variable substitution in configuration values using the +`${env.VARIABLE_NAME}` syntax. This allows you to externalize configuration values and provide +different settings for different environments. The syntax is inspired by [bash parameter expansion](https://www.gnu.org/software/bash/manual/html_node/Shell-Parameter-Expansion.html) +and follows similar patterns. + +#### Basic Syntax + +The basic syntax for environment variable substitution is: + +```yaml +config: + api_key: ${env.API_KEY} + url: ${env.SERVICE_URL} +``` + +If the environment variable is not set, the server will raise an error during startup. + +#### Default Values + +You can provide default values using the `:=` operator: + +```yaml +config: + url: ${env.OLLAMA_URL:=http://localhost:11434} + port: ${env.PORT:=8321} + timeout: ${env.TIMEOUT:=60} +``` + +If the environment variable is not set, the default value `http://localhost:11434` will be used. +Empty defaults are not allowed so `url: ${env.OLLAMA_URL:=}` will raise an error if the environment variable is not set. + +#### Conditional Values + +You can use the `:+` operator to provide a value only when the environment variable is set: + +```yaml +config: + # Only include this field if ENVIRONMENT is set + environment: ${env.ENVIRONMENT:+production} +``` + +If the environment variable is set, the value after `:+` will be used. If it's not set, the field +will be omitted with a `None` value. +So `${env.ENVIRONMENT:+}` is supported, it means that the field will be omitted if the environment +variable is not set. It can be used to make a field optional and then enabled at runtime when desired. + +#### Examples + +Here are some common patterns: + +```yaml +# Required environment variable (will error if not set) +api_key: ${env.OPENAI_API_KEY} + +# Optional with default +base_url: ${env.API_BASE_URL:=https://api.openai.com/v1} + +# Conditional field +debug_mode: ${env.DEBUG:+true} + +# Optional field that becomes None if not set +optional_token: ${env.OPTIONAL_TOKEN:+} +``` + +#### Runtime Override + +You can override environment variables at runtime when starting the server: + +```bash +# Override specific environment variables +llama stack run --config run.yaml --env API_KEY=sk-123 --env BASE_URL=https://custom-api.com + +# Or set them in your shell +export API_KEY=sk-123 +export BASE_URL=https://custom-api.com +llama stack run --config run.yaml +``` + +#### Type Safety + +The environment variable substitution system is type-safe: + +- String values remain strings +- Empty defaults (`${env.VAR:+}`) are converted to `None` for fields that accept `str | None` +- Numeric defaults are properly typed (e.g., `${env.PORT:=8321}` becomes an integer) +- Boolean defaults work correctly (e.g., `${env.DEBUG:=false}` becomes a boolean) + ## Resources Finally, let's look at the `models` section: @@ -152,7 +241,7 @@ server: config: jwks: uri: "https://kubernetes.default.svc:8443/openid/v1/jwks" - token: "${env.TOKEN:}" + token: "${env.TOKEN:+}" key_recheck_period: 3600 tls_cafile: "/path/to/ca.crt" issuer: "https://kubernetes.default.svc" @@ -396,12 +485,12 @@ providers: - provider_id: vllm-0 provider_type: remote::vllm config: - url: ${env.VLLM_URL:http://localhost:8000} + url: ${env.VLLM_URL:=http://localhost:8000} # this vLLM server serves the llama-guard model (e.g., llama-guard:3b) - provider_id: vllm-1 provider_type: remote::vllm config: - url: ${env.SAFETY_VLLM_URL:http://localhost:8001} + url: ${env.SAFETY_VLLM_URL:=http://localhost:8001} ... models: - metadata: {} diff --git a/docs/source/distributions/k8s/stack-configmap.yaml b/docs/source/distributions/k8s/stack-configmap.yaml index fa7bacd8f..0a08bca03 100644 --- a/docs/source/distributions/k8s/stack-configmap.yaml +++ b/docs/source/distributions/k8s/stack-configmap.yaml @@ -15,10 +15,10 @@ data: - provider_id: vllm-inference provider_type: remote::vllm config: - url: ${env.VLLM_URL:http://localhost:8000/v1} - max_tokens: ${env.VLLM_MAX_TOKENS:4096} - api_token: ${env.VLLM_API_TOKEN:fake} - tls_verify: ${env.VLLM_TLS_VERIFY:true} + url: ${env.VLLM_URL:=http://localhost:8000/v1} + max_tokens: ${env.VLLM_MAX_TOKENS:=4096} + api_token: ${env.VLLM_API_TOKEN:=fake} + tls_verify: ${env.VLLM_TLS_VERIFY:=true} - provider_id: vllm-safety provider_type: remote::vllm config: @@ -30,10 +30,10 @@ data: provider_type: inline::sentence-transformers config: {} vector_io: - - provider_id: ${env.ENABLE_CHROMADB+chromadb} + - provider_id: ${env.ENABLE_CHROMADB:+chromadb} provider_type: remote::chromadb config: - url: ${env.CHROMADB_URL:} + url: ${env.CHROMADB_URL:+} safety: - provider_id: llama-guard provider_type: inline::llama-guard @@ -45,34 +45,34 @@ data: config: persistence_store: type: postgres - host: ${env.POSTGRES_HOST:localhost} - port: ${env.POSTGRES_PORT:5432} - db: ${env.POSTGRES_DB:llamastack} + host: ${env.POSTGRES_HOST:=localhost} + port: ${env.POSTGRES_PORT:=5432} + db: ${env.POSTGRES_DB:=llamastack} user: ${env.POSTGRES_USER:llamastack} - password: ${env.POSTGRES_PASSWORD:llamastack} + password: ${env.POSTGRES_PASSWORD:=llamastack} responses_store: type: postgres - host: ${env.POSTGRES_HOST:localhost} - port: ${env.POSTGRES_PORT:5432} - db: ${env.POSTGRES_DB:llamastack} - user: ${env.POSTGRES_USER:llamastack} - password: ${env.POSTGRES_PASSWORD:llamastack} + host: ${env.POSTGRES_HOST:=localhost} + port: ${env.POSTGRES_PORT:=5432} + db: ${env.POSTGRES_DB:=llamastack} + user: ${env.POSTGRES_USER:=llamastack} + password: ${env.POSTGRES_PASSWORD:=llamastack} telemetry: - provider_id: meta-reference provider_type: inline::meta-reference config: - service_name: ${env.OTEL_SERVICE_NAME:} + service_name: ${env.OTEL_SERVICE_NAME:+} sinks: ${env.TELEMETRY_SINKS:console} tool_runtime: - provider_id: brave-search provider_type: remote::brave-search config: - api_key: ${env.BRAVE_SEARCH_API_KEY:} + api_key: ${env.BRAVE_SEARCH_API_KEY:+} max_results: 3 - provider_id: tavily-search provider_type: remote::tavily-search config: - api_key: ${env.TAVILY_SEARCH_API_KEY:} + api_key: ${env.TAVILY_SEARCH_API_KEY:+} max_results: 3 - provider_id: rag-runtime provider_type: inline::rag-runtime @@ -82,19 +82,19 @@ data: config: {} metadata_store: type: postgres - host: ${env.POSTGRES_HOST:localhost} - port: ${env.POSTGRES_PORT:5432} - db: ${env.POSTGRES_DB:llamastack} - user: ${env.POSTGRES_USER:llamastack} - password: ${env.POSTGRES_PASSWORD:llamastack} + host: ${env.POSTGRES_HOST:=localhost} + port: ${env.POSTGRES_PORT:=5432} + db: ${env.POSTGRES_DB:=llamastack} + user: ${env.POSTGRES_USER:=llamastack} + password: ${env.POSTGRES_PASSWORD:=llamastack} table_name: llamastack_kvstore inference_store: type: postgres - host: ${env.POSTGRES_HOST:localhost} - port: ${env.POSTGRES_PORT:5432} - db: ${env.POSTGRES_DB:llamastack} - user: ${env.POSTGRES_USER:llamastack} - password: ${env.POSTGRES_PASSWORD:llamastack} + host: ${env.POSTGRES_HOST:=localhost} + port: ${env.POSTGRES_PORT:=5432} + db: ${env.POSTGRES_DB:=llamastack} + user: ${env.POSTGRES_USER:=llamastack} + password: ${env.POSTGRES_PASSWORD:=llamastack} models: - metadata: embedding_dimension: 384 @@ -106,11 +106,11 @@ data: provider_id: vllm-inference model_type: llm - metadata: {} - model_id: ${env.SAFETY_MODEL:meta-llama/Llama-Guard-3-1B} + model_id: ${env.SAFETY_MODEL:=meta-llama/Llama-Guard-3-1B} provider_id: vllm-safety model_type: llm shields: - - shield_id: ${env.SAFETY_MODEL:meta-llama/Llama-Guard-3-1B} + - shield_id: ${env.SAFETY_MODEL:=meta-llama/Llama-Guard-3-1B} vector_dbs: [] datasets: [] scoring_fns: [] diff --git a/docs/source/distributions/k8s/stack_run_config.yaml b/docs/source/distributions/k8s/stack_run_config.yaml index 8e2773dd1..5ac08134c 100644 --- a/docs/source/distributions/k8s/stack_run_config.yaml +++ b/docs/source/distributions/k8s/stack_run_config.yaml @@ -12,25 +12,25 @@ providers: - provider_id: vllm-inference provider_type: remote::vllm config: - url: ${env.VLLM_URL:http://localhost:8000/v1} - max_tokens: ${env.VLLM_MAX_TOKENS:4096} - api_token: ${env.VLLM_API_TOKEN:fake} - tls_verify: ${env.VLLM_TLS_VERIFY:true} + url: ${env.VLLM_URL:=http://localhost:8000/v1} + max_tokens: ${env.VLLM_MAX_TOKENS:=4096} + api_token: ${env.VLLM_API_TOKEN:=fake} + tls_verify: ${env.VLLM_TLS_VERIFY:=true} - provider_id: vllm-safety provider_type: remote::vllm config: - url: ${env.VLLM_SAFETY_URL:http://localhost:8000/v1} - max_tokens: ${env.VLLM_MAX_TOKENS:4096} - api_token: ${env.VLLM_API_TOKEN:fake} - tls_verify: ${env.VLLM_TLS_VERIFY:true} + url: ${env.VLLM_SAFETY_URL:=http://localhost:8000/v1} + max_tokens: ${env.VLLM_MAX_TOKENS:=4096} + api_token: ${env.VLLM_API_TOKEN:=fake} + tls_verify: ${env.VLLM_TLS_VERIFY:=true} - provider_id: sentence-transformers provider_type: inline::sentence-transformers config: {} vector_io: - - provider_id: ${env.ENABLE_CHROMADB+chromadb} + - provider_id: ${env.ENABLE_CHROMADB:+chromadb} provider_type: remote::chromadb config: - url: ${env.CHROMADB_URL:} + url: ${env.CHROMADB_URL:+} safety: - provider_id: llama-guard provider_type: inline::llama-guard @@ -42,34 +42,34 @@ providers: config: persistence_store: type: postgres - host: ${env.POSTGRES_HOST:localhost} - port: ${env.POSTGRES_PORT:5432} - db: ${env.POSTGRES_DB:llamastack} - user: ${env.POSTGRES_USER:llamastack} - password: ${env.POSTGRES_PASSWORD:llamastack} + host: ${env.POSTGRES_HOST:=localhost} + port: ${env.POSTGRES_PORT:=5432} + db: ${env.POSTGRES_DB:=llamastack} + user: ${env.POSTGRES_USER:=llamastack} + password: ${env.POSTGRES_PASSWORD:=llamastack} responses_store: type: postgres - host: ${env.POSTGRES_HOST:localhost} - port: ${env.POSTGRES_PORT:5432} - db: ${env.POSTGRES_DB:llamastack} - user: ${env.POSTGRES_USER:llamastack} - password: ${env.POSTGRES_PASSWORD:llamastack} + host: ${env.POSTGRES_HOST:=localhost} + port: ${env.POSTGRES_PORT:=5432} + db: ${env.POSTGRES_DB:=llamastack} + user: ${env.POSTGRES_USER:=llamastack} + password: ${env.POSTGRES_PASSWORD:=llamastack} telemetry: - provider_id: meta-reference provider_type: inline::meta-reference config: - service_name: ${env.OTEL_SERVICE_NAME:} - sinks: ${env.TELEMETRY_SINKS:console} + service_name: ${env.OTEL_SERVICE_NAME:+console} + sinks: ${env.TELEMETRY_SINKS:+console} tool_runtime: - provider_id: brave-search provider_type: remote::brave-search config: - api_key: ${env.BRAVE_SEARCH_API_KEY:} + api_key: ${env.BRAVE_SEARCH_API_KEY:+} max_results: 3 - provider_id: tavily-search provider_type: remote::tavily-search config: - api_key: ${env.TAVILY_SEARCH_API_KEY:} + api_key: ${env.TAVILY_SEARCH_API_KEY:+} max_results: 3 - provider_id: rag-runtime provider_type: inline::rag-runtime @@ -79,19 +79,19 @@ providers: config: {} metadata_store: type: postgres - host: ${env.POSTGRES_HOST:localhost} - port: ${env.POSTGRES_PORT:5432} - db: ${env.POSTGRES_DB:llamastack} - user: ${env.POSTGRES_USER:llamastack} - password: ${env.POSTGRES_PASSWORD:llamastack} + host: ${env.POSTGRES_HOST:=localhost} + port: ${env.POSTGRES_PORT:=5432} + db: ${env.POSTGRES_DB:=llamastack} + user: ${env.POSTGRES_USER:=llamastack} + password: ${env.POSTGRES_PASSWORD:=llamastack} table_name: llamastack_kvstore inference_store: type: postgres - host: ${env.POSTGRES_HOST:localhost} - port: ${env.POSTGRES_PORT:5432} - db: ${env.POSTGRES_DB:llamastack} - user: ${env.POSTGRES_USER:llamastack} - password: ${env.POSTGRES_PASSWORD:llamastack} + host: ${env.POSTGRES_HOST:=localhost} + port: ${env.POSTGRES_PORT:=5432} + db: ${env.POSTGRES_DB:=llamastack} + user: ${env.POSTGRES_USER:=llamastack} + password: ${env.POSTGRES_PASSWORD:=llamastack} models: - metadata: embedding_dimension: 384 @@ -103,11 +103,11 @@ models: provider_id: vllm-inference model_type: llm - metadata: {} - model_id: ${env.SAFETY_MODEL:meta-llama/Llama-Guard-3-1B} + model_id: ${env.SAFETY_MODEL:=meta-llama/Llama-Guard-3-1B} provider_id: vllm-safety model_type: llm shields: -- shield_id: ${env.SAFETY_MODEL:meta-llama/Llama-Guard-3-1B} +- shield_id: ${env.SAFETY_MODEL:=meta-llama/Llama-Guard-3-1B} vector_dbs: [] datasets: [] scoring_fns: [] diff --git a/llama_stack/distribution/datatypes.py b/llama_stack/distribution/datatypes.py index 5324e4c29..e07da001e 100644 --- a/llama_stack/distribution/datatypes.py +++ b/llama_stack/distribution/datatypes.py @@ -29,8 +29,8 @@ from llama_stack.providers.datatypes import Api, ProviderSpec from llama_stack.providers.utils.kvstore.config import KVStoreConfig, SqliteKVStoreConfig from llama_stack.providers.utils.sqlstore.sqlstore import SqlStoreConfig -LLAMA_STACK_BUILD_CONFIG_VERSION = "2" -LLAMA_STACK_RUN_CONFIG_VERSION = "2" +LLAMA_STACK_BUILD_CONFIG_VERSION = 2 +LLAMA_STACK_RUN_CONFIG_VERSION = 2 RoutingKey = str | list[str] @@ -229,7 +229,7 @@ class ServerConfig(BaseModel): class StackRunConfig(BaseModel): - version: str = LLAMA_STACK_RUN_CONFIG_VERSION + version: int = LLAMA_STACK_RUN_CONFIG_VERSION image_name: str = Field( ..., @@ -300,7 +300,7 @@ a default SQLite store will be used.""", class BuildConfig(BaseModel): - version: str = LLAMA_STACK_BUILD_CONFIG_VERSION + version: int = LLAMA_STACK_BUILD_CONFIG_VERSION distribution_spec: DistributionSpec = Field(description="The distribution spec to build including API providers. ") image_type: str = Field( diff --git a/llama_stack/distribution/stack.py b/llama_stack/distribution/stack.py index b33b0d3f7..c86880669 100644 --- a/llama_stack/distribution/stack.py +++ b/llama_stack/distribution/stack.py @@ -127,7 +127,12 @@ class EnvVarError(Exception): def __init__(self, var_name: str, path: str = ""): self.var_name = var_name self.path = path - super().__init__(f"Environment variable '{var_name}' not set or empty{f' at {path}' if path else ''}") + super().__init__( + f"Environment variable '{var_name}' not set or empty {f'at {path}' if path else ''}. " + f"Use ${{env.{var_name}:=default_value}} to provide a default value, " + f"${{env.{var_name}:+value_if_set}} to make the field conditional, " + f"or ensure the environment variable is set." + ) def replace_env_vars(config: Any, path: str = "") -> Any: @@ -150,25 +155,27 @@ def replace_env_vars(config: Any, path: str = "") -> Any: return result elif isinstance(config, str): - # Updated pattern to support both default values (:) and conditional values (+) - pattern = r"\${env\.([A-Z0-9_]+)(?:([:\+])([^}]*))?}" + # Pattern supports bash-like syntax: := for default and :+ for conditional and a optional value + pattern = r"\${env\.([A-Z0-9_]+)(?::([=+])([^}]*))?}" - def get_env_var(match): + def get_env_var(match: re.Match): env_var = match.group(1) - operator = match.group(2) # ':' for default, '+' for conditional + operator = match.group(2) # '=' for default, '+' for conditional value_expr = match.group(3) env_value = os.environ.get(env_var) - if operator == ":": # Default value syntax: ${env.FOO:default} + if operator == "=": # Default value syntax: ${env.FOO:=default} if not env_value: - if value_expr is None: + # value_expr returns empty string (not None) when not matched + # This means ${env.FOO:=} is an error + if value_expr == "": raise EnvVarError(env_var, path) else: value = value_expr else: value = env_value - elif operator == "+": # Conditional value syntax: ${env.FOO+value_if_set} + elif operator == "+": # Conditional value syntax: ${env.FOO:+value_if_set} if env_value: value = value_expr else: @@ -183,13 +190,42 @@ def replace_env_vars(config: Any, path: str = "") -> Any: return os.path.expanduser(value) try: - return re.sub(pattern, get_env_var, config) + result = re.sub(pattern, get_env_var, config) + return _convert_string_to_proper_type(result) except EnvVarError as e: raise EnvVarError(e.var_name, e.path) from None return config +def _convert_string_to_proper_type(value: str) -> Any: + # This might be tricky depending on what the config type is, if 'str | None' we are + # good, if 'str' we need to keep the empty string... 'str | None' is more common and + # providers config should be typed this way. + # TODO: we could try to load the config class and see if the config has a field with type 'str | None' + # and then convert the empty string to None or not + if value == "": + return None + + lowered = value.lower() + if lowered == "true": + return True + elif lowered == "false": + return False + + try: + return int(value) + except ValueError: + pass + + try: + return float(value) + except ValueError: + pass + + return value + + def validate_env_pair(env_pair: str) -> tuple[str, str]: """Validate and split an environment variable key-value pair.""" try: diff --git a/llama_stack/providers/inline/files/localfs/config.py b/llama_stack/providers/inline/files/localfs/config.py index 757a70742..6c767af8f 100644 --- a/llama_stack/providers/inline/files/localfs/config.py +++ b/llama_stack/providers/inline/files/localfs/config.py @@ -23,7 +23,7 @@ class LocalfsFilesImplConfig(BaseModel): @classmethod def sample_run_config(cls, __distro_dir__: str) -> dict[str, Any]: return { - "storage_dir": "${env.FILES_STORAGE_DIR:" + __distro_dir__ + "/files}", + "storage_dir": "${env.FILES_STORAGE_DIR:=" + __distro_dir__ + "/files}", "metadata_store": SqliteSqlStoreConfig.sample_run_config( __distro_dir__=__distro_dir__, db_name="files_metadata.db", diff --git a/llama_stack/providers/inline/inference/meta_reference/config.py b/llama_stack/providers/inline/inference/meta_reference/config.py index 7bc961443..9556b026a 100644 --- a/llama_stack/providers/inline/inference/meta_reference/config.py +++ b/llama_stack/providers/inline/inference/meta_reference/config.py @@ -49,11 +49,11 @@ class MetaReferenceInferenceConfig(BaseModel): def sample_run_config( cls, model: str = "Llama3.2-3B-Instruct", - checkpoint_dir: str = "${env.CHECKPOINT_DIR:null}", - quantization_type: str = "${env.QUANTIZATION_TYPE:bf16}", - model_parallel_size: str = "${env.MODEL_PARALLEL_SIZE:0}", - max_batch_size: str = "${env.MAX_BATCH_SIZE:1}", - max_seq_len: str = "${env.MAX_SEQ_LEN:4096}", + checkpoint_dir: str = "${env.CHECKPOINT_DIR:=null}", + quantization_type: str = "${env.QUANTIZATION_TYPE:=bf16}", + model_parallel_size: str = "${env.MODEL_PARALLEL_SIZE:=0}", + max_batch_size: str = "${env.MAX_BATCH_SIZE:=1}", + max_seq_len: str = "${env.MAX_SEQ_LEN:=4096}", **kwargs, ) -> dict[str, Any]: return { diff --git a/llama_stack/providers/inline/inference/vllm/config.py b/llama_stack/providers/inline/inference/vllm/config.py index ce8743c74..660ef206b 100644 --- a/llama_stack/providers/inline/inference/vllm/config.py +++ b/llama_stack/providers/inline/inference/vllm/config.py @@ -44,10 +44,10 @@ class VLLMConfig(BaseModel): @classmethod def sample_run_config(cls, **kwargs: Any) -> dict[str, Any]: return { - "tensor_parallel_size": "${env.TENSOR_PARALLEL_SIZE:1}", - "max_tokens": "${env.MAX_TOKENS:4096}", - "max_model_len": "${env.MAX_MODEL_LEN:4096}", - "max_num_seqs": "${env.MAX_NUM_SEQS:4}", - "enforce_eager": "${env.ENFORCE_EAGER:False}", - "gpu_memory_utilization": "${env.GPU_MEMORY_UTILIZATION:0.3}", + "tensor_parallel_size": "${env.TENSOR_PARALLEL_SIZE:=1}", + "max_tokens": "${env.MAX_TOKENS:=4096}", + "max_model_len": "${env.MAX_MODEL_LEN:=4096}", + "max_num_seqs": "${env.MAX_NUM_SEQS:=4}", + "enforce_eager": "${env.ENFORCE_EAGER:=False}", + "gpu_memory_utilization": "${env.GPU_MEMORY_UTILIZATION:=0.3}", } diff --git a/llama_stack/providers/inline/scoring/braintrust/config.py b/llama_stack/providers/inline/scoring/braintrust/config.py index 4a80f1e4f..f44d27f96 100644 --- a/llama_stack/providers/inline/scoring/braintrust/config.py +++ b/llama_stack/providers/inline/scoring/braintrust/config.py @@ -17,5 +17,5 @@ class BraintrustScoringConfig(BaseModel): @classmethod def sample_run_config(cls, **kwargs) -> dict[str, Any]: return { - "openai_api_key": "${env.OPENAI_API_KEY:}", + "openai_api_key": "${env.OPENAI_API_KEY:+}", } diff --git a/llama_stack/providers/inline/telemetry/meta_reference/config.py b/llama_stack/providers/inline/telemetry/meta_reference/config.py index 2baa204c9..50dd8a788 100644 --- a/llama_stack/providers/inline/telemetry/meta_reference/config.py +++ b/llama_stack/providers/inline/telemetry/meta_reference/config.py @@ -20,12 +20,12 @@ class TelemetrySink(StrEnum): class TelemetryConfig(BaseModel): - otel_trace_endpoint: str = Field( - default="http://localhost:4318/v1/traces", + otel_trace_endpoint: str | None = Field( + default=None, description="The OpenTelemetry collector endpoint URL for traces", ) - otel_metric_endpoint: str = Field( - default="http://localhost:4318/v1/metrics", + otel_metric_endpoint: str | None = Field( + default=None, description="The OpenTelemetry collector endpoint URL for metrics", ) service_name: str = Field( @@ -52,7 +52,7 @@ class TelemetryConfig(BaseModel): @classmethod def sample_run_config(cls, __distro_dir__: str, db_name: str = "trace_store.db") -> dict[str, Any]: return { - "service_name": "${env.OTEL_SERVICE_NAME:\u200b}", - "sinks": "${env.TELEMETRY_SINKS:console,sqlite}", - "sqlite_db_path": "${env.SQLITE_STORE_DIR:" + __distro_dir__ + "}/" + db_name, + "service_name": "${env.OTEL_SERVICE_NAME:=\u200b}", + "sinks": "${env.TELEMETRY_SINKS:=console,sqlite}", + "sqlite_db_path": "${env.SQLITE_STORE_DIR:=" + __distro_dir__ + "}/" + db_name, } diff --git a/llama_stack/providers/inline/telemetry/meta_reference/telemetry.py b/llama_stack/providers/inline/telemetry/meta_reference/telemetry.py index 0f6cf8619..98f5bf5a1 100644 --- a/llama_stack/providers/inline/telemetry/meta_reference/telemetry.py +++ b/llama_stack/providers/inline/telemetry/meta_reference/telemetry.py @@ -87,12 +87,16 @@ class TelemetryAdapter(TelemetryDatasetMixin, Telemetry): trace.set_tracer_provider(provider) _TRACER_PROVIDER = provider if TelemetrySink.OTEL_TRACE in self.config.sinks: + if self.config.otel_trace_endpoint is None: + raise ValueError("otel_trace_endpoint is required when OTEL_TRACE is enabled") span_exporter = OTLPSpanExporter( endpoint=self.config.otel_trace_endpoint, ) span_processor = BatchSpanProcessor(span_exporter) trace.get_tracer_provider().add_span_processor(span_processor) if TelemetrySink.OTEL_METRIC in self.config.sinks: + if self.config.otel_metric_endpoint is None: + raise ValueError("otel_metric_endpoint is required when OTEL_METRIC is enabled") metric_reader = PeriodicExportingMetricReader( OTLPMetricExporter( endpoint=self.config.otel_metric_endpoint, diff --git a/llama_stack/providers/inline/vector_io/qdrant/config.py b/llama_stack/providers/inline/vector_io/qdrant/config.py index 283724b41..7cc91d918 100644 --- a/llama_stack/providers/inline/vector_io/qdrant/config.py +++ b/llama_stack/providers/inline/vector_io/qdrant/config.py @@ -19,5 +19,5 @@ class QdrantVectorIOConfig(BaseModel): @classmethod def sample_run_config(cls, __distro_dir__: str) -> dict[str, Any]: return { - "path": "${env.QDRANT_PATH:~/.llama/" + __distro_dir__ + "}/" + "qdrant.db", + "path": "${env.QDRANT_PATH:=~/.llama/" + __distro_dir__ + "}/" + "qdrant.db", } diff --git a/llama_stack/providers/inline/vector_io/sqlite_vec/config.py b/llama_stack/providers/inline/vector_io/sqlite_vec/config.py index cb806cb39..4c57f4aba 100644 --- a/llama_stack/providers/inline/vector_io/sqlite_vec/config.py +++ b/llama_stack/providers/inline/vector_io/sqlite_vec/config.py @@ -15,5 +15,5 @@ class SQLiteVectorIOConfig(BaseModel): @classmethod def sample_run_config(cls, __distro_dir__: str) -> dict[str, Any]: return { - "db_path": "${env.SQLITE_STORE_DIR:" + __distro_dir__ + "}/" + "sqlite_vec.db", + "db_path": "${env.SQLITE_STORE_DIR:=" + __distro_dir__ + "}/" + "sqlite_vec.db", } diff --git a/llama_stack/providers/remote/datasetio/nvidia/config.py b/llama_stack/providers/remote/datasetio/nvidia/config.py index e616ce25c..0f5ea22e9 100644 --- a/llama_stack/providers/remote/datasetio/nvidia/config.py +++ b/llama_stack/providers/remote/datasetio/nvidia/config.py @@ -54,8 +54,8 @@ class NvidiaDatasetIOConfig(BaseModel): @classmethod def sample_run_config(cls, **kwargs) -> dict[str, Any]: return { - "api_key": "${env.NVIDIA_API_KEY:}", - "dataset_namespace": "${env.NVIDIA_DATASET_NAMESPACE:default}", - "project_id": "${env.NVIDIA_PROJECT_ID:test-project}", - "datasets_url": "${env.NVIDIA_DATASETS_URL:http://nemo.test}", + "api_key": "${env.NVIDIA_API_KEY:+}", + "dataset_namespace": "${env.NVIDIA_DATASET_NAMESPACE:=default}", + "project_id": "${env.NVIDIA_PROJECT_ID:=test-project}", + "datasets_url": "${env.NVIDIA_DATASETS_URL:=http://nemo.test}", } diff --git a/llama_stack/providers/remote/eval/nvidia/config.py b/llama_stack/providers/remote/eval/nvidia/config.py index 5c8f9ff76..7a1c04304 100644 --- a/llama_stack/providers/remote/eval/nvidia/config.py +++ b/llama_stack/providers/remote/eval/nvidia/config.py @@ -25,5 +25,5 @@ class NVIDIAEvalConfig(BaseModel): @classmethod def sample_run_config(cls, **kwargs) -> dict[str, Any]: return { - "evaluator_url": "${env.NVIDIA_EVALUATOR_URL:http://localhost:7331}", + "evaluator_url": "${env.NVIDIA_EVALUATOR_URL:=http://localhost:7331}", } diff --git a/llama_stack/providers/remote/inference/nvidia/config.py b/llama_stack/providers/remote/inference/nvidia/config.py index 4c449edc2..6369928bb 100644 --- a/llama_stack/providers/remote/inference/nvidia/config.py +++ b/llama_stack/providers/remote/inference/nvidia/config.py @@ -55,7 +55,7 @@ class NVIDIAConfig(BaseModel): @classmethod def sample_run_config(cls, **kwargs) -> dict[str, Any]: return { - "url": "${env.NVIDIA_BASE_URL:https://integrate.api.nvidia.com}", - "api_key": "${env.NVIDIA_API_KEY:}", - "append_api_version": "${env.NVIDIA_APPEND_API_VERSION:True}", + "url": "${env.NVIDIA_BASE_URL:=https://integrate.api.nvidia.com}", + "api_key": "${env.NVIDIA_API_KEY:+}", + "append_api_version": "${env.NVIDIA_APPEND_API_VERSION:=True}", } diff --git a/llama_stack/providers/remote/inference/ollama/config.py b/llama_stack/providers/remote/inference/ollama/config.py index 37b827f4f..b2cc4d8a7 100644 --- a/llama_stack/providers/remote/inference/ollama/config.py +++ b/llama_stack/providers/remote/inference/ollama/config.py @@ -17,7 +17,7 @@ class OllamaImplConfig(BaseModel): @classmethod def sample_run_config( - cls, url: str = "${env.OLLAMA_URL:http://localhost:11434}", raise_on_connect_error: bool = True, **kwargs + cls, url: str = "${env.OLLAMA_URL:=http://localhost:11434}", raise_on_connect_error: bool = True, **kwargs ) -> dict[str, Any]: return { "url": url, diff --git a/llama_stack/providers/remote/inference/runpod/config.py b/llama_stack/providers/remote/inference/runpod/config.py index e3913dc35..ff32a971c 100644 --- a/llama_stack/providers/remote/inference/runpod/config.py +++ b/llama_stack/providers/remote/inference/runpod/config.py @@ -25,6 +25,6 @@ class RunpodImplConfig(BaseModel): @classmethod def sample_run_config(cls, **kwargs: Any) -> dict[str, Any]: return { - "url": "${env.RUNPOD_URL:}", - "api_token": "${env.RUNPOD_API_TOKEN:}", + "url": "${env.RUNPOD_URL:+}", + "api_token": "${env.RUNPOD_API_TOKEN:+}", } diff --git a/llama_stack/providers/remote/inference/together/config.py b/llama_stack/providers/remote/inference/together/config.py index 5c7f60519..121e2cae7 100644 --- a/llama_stack/providers/remote/inference/together/config.py +++ b/llama_stack/providers/remote/inference/together/config.py @@ -26,5 +26,5 @@ class TogetherImplConfig(BaseModel): def sample_run_config(cls, **kwargs) -> dict[str, Any]: return { "url": "https://api.together.xyz/v1", - "api_key": "${env.TOGETHER_API_KEY:}", + "api_key": "${env.TOGETHER_API_KEY:+}", } diff --git a/llama_stack/providers/remote/inference/vllm/config.py b/llama_stack/providers/remote/inference/vllm/config.py index 99abddf51..e11efa7f0 100644 --- a/llama_stack/providers/remote/inference/vllm/config.py +++ b/llama_stack/providers/remote/inference/vllm/config.py @@ -34,9 +34,6 @@ class VLLMInferenceAdapterConfig(BaseModel): @classmethod def validate_tls_verify(cls, v): if isinstance(v, str): - # Check if it's a boolean string - if v.lower() in ("true", "false"): - return v.lower() == "true" # Otherwise, treat it as a cert path cert_path = Path(v).expanduser().resolve() if not cert_path.exists(): @@ -54,7 +51,7 @@ class VLLMInferenceAdapterConfig(BaseModel): ): return { "url": url, - "max_tokens": "${env.VLLM_MAX_TOKENS:4096}", - "api_token": "${env.VLLM_API_TOKEN:fake}", - "tls_verify": "${env.VLLM_TLS_VERIFY:true}", + "max_tokens": "${env.VLLM_MAX_TOKENS:=4096}", + "api_token": "${env.VLLM_API_TOKEN:=fake}", + "tls_verify": "${env.VLLM_TLS_VERIFY:=true}", } diff --git a/llama_stack/providers/remote/inference/watsonx/config.py b/llama_stack/providers/remote/inference/watsonx/config.py index 5eda9c5c0..9534eceeb 100644 --- a/llama_stack/providers/remote/inference/watsonx/config.py +++ b/llama_stack/providers/remote/inference/watsonx/config.py @@ -40,7 +40,7 @@ class WatsonXConfig(BaseModel): @classmethod def sample_run_config(cls, **kwargs) -> dict[str, Any]: return { - "url": "${env.WATSONX_BASE_URL:https://us-south.ml.cloud.ibm.com}", - "api_key": "${env.WATSONX_API_KEY:}", - "project_id": "${env.WATSONX_PROJECT_ID:}", + "url": "${env.WATSONX_BASE_URL:=https://us-south.ml.cloud.ibm.com}", + "api_key": "${env.WATSONX_API_KEY:+}", + "project_id": "${env.WATSONX_PROJECT_ID:+}", } diff --git a/llama_stack/providers/remote/post_training/nvidia/config.py b/llama_stack/providers/remote/post_training/nvidia/config.py index fa08b6e3f..ea6dff0b5 100644 --- a/llama_stack/providers/remote/post_training/nvidia/config.py +++ b/llama_stack/providers/remote/post_training/nvidia/config.py @@ -55,10 +55,10 @@ class NvidiaPostTrainingConfig(BaseModel): @classmethod def sample_run_config(cls, **kwargs) -> dict[str, Any]: return { - "api_key": "${env.NVIDIA_API_KEY:}", - "dataset_namespace": "${env.NVIDIA_DATASET_NAMESPACE:default}", - "project_id": "${env.NVIDIA_PROJECT_ID:test-project}", - "customizer_url": "${env.NVIDIA_CUSTOMIZER_URL:http://nemo.test}", + "api_key": "${env.NVIDIA_API_KEY:+}", + "dataset_namespace": "${env.NVIDIA_DATASET_NAMESPACE:=default}", + "project_id": "${env.NVIDIA_PROJECT_ID:=test-project}", + "customizer_url": "${env.NVIDIA_CUSTOMIZER_URL:=http://nemo.test}", } diff --git a/llama_stack/providers/remote/safety/nvidia/config.py b/llama_stack/providers/remote/safety/nvidia/config.py index ddf5a3a0b..1c618f4f4 100644 --- a/llama_stack/providers/remote/safety/nvidia/config.py +++ b/llama_stack/providers/remote/safety/nvidia/config.py @@ -35,6 +35,6 @@ class NVIDIASafetyConfig(BaseModel): @classmethod def sample_run_config(cls, **kwargs) -> dict[str, Any]: return { - "guardrails_service_url": "${env.GUARDRAILS_SERVICE_URL:http://localhost:7331}", - "config_id": "${env.NVIDIA_GUARDRAILS_CONFIG_ID:self-check}", + "guardrails_service_url": "${env.GUARDRAILS_SERVICE_URL:=http://localhost:7331}", + "config_id": "${env.NVIDIA_GUARDRAILS_CONFIG_ID:=self-check}", } diff --git a/llama_stack/providers/remote/tool_runtime/brave_search/config.py b/llama_stack/providers/remote/tool_runtime/brave_search/config.py index 37ba21304..93b97a1b2 100644 --- a/llama_stack/providers/remote/tool_runtime/brave_search/config.py +++ b/llama_stack/providers/remote/tool_runtime/brave_search/config.py @@ -22,6 +22,6 @@ class BraveSearchToolConfig(BaseModel): @classmethod def sample_run_config(cls, __distro_dir__: str) -> dict[str, Any]: return { - "api_key": "${env.BRAVE_SEARCH_API_KEY:}", + "api_key": "${env.BRAVE_SEARCH_API_KEY:+}", "max_results": 3, } diff --git a/llama_stack/providers/remote/tool_runtime/tavily_search/config.py b/llama_stack/providers/remote/tool_runtime/tavily_search/config.py index c9b18d30d..5bdd27807 100644 --- a/llama_stack/providers/remote/tool_runtime/tavily_search/config.py +++ b/llama_stack/providers/remote/tool_runtime/tavily_search/config.py @@ -22,6 +22,6 @@ class TavilySearchToolConfig(BaseModel): @classmethod def sample_run_config(cls, __distro_dir__: str) -> dict[str, Any]: return { - "api_key": "${env.TAVILY_SEARCH_API_KEY:}", + "api_key": "${env.TAVILY_SEARCH_API_KEY:+}", "max_results": 3, } diff --git a/llama_stack/providers/remote/tool_runtime/wolfram_alpha/config.py b/llama_stack/providers/remote/tool_runtime/wolfram_alpha/config.py index aefc86bd6..b5b10e371 100644 --- a/llama_stack/providers/remote/tool_runtime/wolfram_alpha/config.py +++ b/llama_stack/providers/remote/tool_runtime/wolfram_alpha/config.py @@ -17,5 +17,5 @@ class WolframAlphaToolConfig(BaseModel): @classmethod def sample_run_config(cls, __distro_dir__: str, **kwargs: Any) -> dict[str, Any]: return { - "api_key": "${env.WOLFRAM_ALPHA_API_KEY:}", + "api_key": "${env.WOLFRAM_ALPHA_API_KEY:+}", } diff --git a/llama_stack/providers/remote/vector_io/pgvector/config.py b/llama_stack/providers/remote/vector_io/pgvector/config.py index 04b92a2e4..041e864ca 100644 --- a/llama_stack/providers/remote/vector_io/pgvector/config.py +++ b/llama_stack/providers/remote/vector_io/pgvector/config.py @@ -22,8 +22,8 @@ class PGVectorVectorIOConfig(BaseModel): @classmethod def sample_run_config( cls, - host: str = "${env.PGVECTOR_HOST:localhost}", - port: int = "${env.PGVECTOR_PORT:5432}", + host: str = "${env.PGVECTOR_HOST:=localhost}", + port: int = "${env.PGVECTOR_PORT:=5432}", db: str = "${env.PGVECTOR_DB}", user: str = "${env.PGVECTOR_USER}", password: str = "${env.PGVECTOR_PASSWORD}", diff --git a/llama_stack/providers/utils/kvstore/config.py b/llama_stack/providers/utils/kvstore/config.py index e966e13ba..a45ff4ce8 100644 --- a/llama_stack/providers/utils/kvstore/config.py +++ b/llama_stack/providers/utils/kvstore/config.py @@ -45,8 +45,8 @@ class RedisKVStoreConfig(CommonConfig): return { "type": "redis", "namespace": None, - "host": "${env.REDIS_HOST:localhost}", - "port": "${env.REDIS_PORT:6379}", + "host": "${env.REDIS_HOST:=localhost}", + "port": "${env.REDIS_PORT:=6379}", } @@ -66,7 +66,7 @@ class SqliteKVStoreConfig(CommonConfig): return { "type": "sqlite", "namespace": None, - "db_path": "${env.SQLITE_STORE_DIR:" + __distro_dir__ + "}/" + db_name, + "db_path": "${env.SQLITE_STORE_DIR:=" + __distro_dir__ + "}/" + db_name, } @@ -84,12 +84,12 @@ class PostgresKVStoreConfig(CommonConfig): return { "type": "postgres", "namespace": None, - "host": "${env.POSTGRES_HOST:localhost}", - "port": "${env.POSTGRES_PORT:5432}", - "db": "${env.POSTGRES_DB:llamastack}", - "user": "${env.POSTGRES_USER:llamastack}", - "password": "${env.POSTGRES_PASSWORD:llamastack}", - "table_name": "${env.POSTGRES_TABLE_NAME:" + table_name + "}", + "host": "${env.POSTGRES_HOST:=localhost}", + "port": "${env.POSTGRES_PORT:=5432}", + "db": "${env.POSTGRES_DB:=llamastack}", + "user": "${env.POSTGRES_USER:=llamastack}", + "password": "${env.POSTGRES_PASSWORD:=llamastack}", + "table_name": "${env.POSTGRES_TABLE_NAME:=" + table_name + "}", } @classmethod @@ -131,12 +131,12 @@ class MongoDBKVStoreConfig(CommonConfig): return { "type": "mongodb", "namespace": None, - "host": "${env.MONGODB_HOST:localhost}", - "port": "${env.MONGODB_PORT:5432}", + "host": "${env.MONGODB_HOST:=localhost}", + "port": "${env.MONGODB_PORT:=5432}", "db": "${env.MONGODB_DB}", "user": "${env.MONGODB_USER}", "password": "${env.MONGODB_PASSWORD}", - "collection_name": "${env.MONGODB_COLLECTION_NAME:" + collection_name + "}", + "collection_name": "${env.MONGODB_COLLECTION_NAME:=" + collection_name + "}", } diff --git a/llama_stack/providers/utils/sqlstore/sqlstore.py b/llama_stack/providers/utils/sqlstore/sqlstore.py index edc7672a3..d558a2a26 100644 --- a/llama_stack/providers/utils/sqlstore/sqlstore.py +++ b/llama_stack/providers/utils/sqlstore/sqlstore.py @@ -50,7 +50,7 @@ class SqliteSqlStoreConfig(SqlAlchemySqlStoreConfig): def sample_run_config(cls, __distro_dir__: str, db_name: str = "sqlstore.db"): return cls( type="sqlite", - db_path="${env.SQLITE_STORE_DIR:" + __distro_dir__ + "}/" + db_name, + db_path="${env.SQLITE_STORE_DIR:=" + __distro_dir__ + "}/" + db_name, ) @property @@ -78,11 +78,11 @@ class PostgresSqlStoreConfig(SqlAlchemySqlStoreConfig): def sample_run_config(cls, **kwargs): return cls( type="postgres", - host="${env.POSTGRES_HOST:localhost}", - port="${env.POSTGRES_PORT:5432}", - db="${env.POSTGRES_DB:llamastack}", - user="${env.POSTGRES_USER:llamastack}", - password="${env.POSTGRES_PASSWORD:llamastack}", + host="${env.POSTGRES_HOST:=localhost}", + port="${env.POSTGRES_PORT:=5432}", + db="${env.POSTGRES_DB:=llamastack}", + user="${env.POSTGRES_USER:=llamastack}", + password="${env.POSTGRES_PASSWORD:=llamastack}", ) diff --git a/llama_stack/templates/bedrock/build.yaml b/llama_stack/templates/bedrock/build.yaml index 97a06f77a..1a2c883fa 100644 --- a/llama_stack/templates/bedrock/build.yaml +++ b/llama_stack/templates/bedrock/build.yaml @@ -1,4 +1,4 @@ -version: '2' +version: 2 distribution_spec: description: Use AWS Bedrock for running LLM inference and safety providers: diff --git a/llama_stack/templates/bedrock/run.yaml b/llama_stack/templates/bedrock/run.yaml index 8033b2086..61bc83f02 100644 --- a/llama_stack/templates/bedrock/run.yaml +++ b/llama_stack/templates/bedrock/run.yaml @@ -1,4 +1,4 @@ -version: '2' +version: 2 image_name: bedrock apis: - agents @@ -22,7 +22,7 @@ providers: kvstore: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/bedrock}/faiss_store.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/bedrock}/faiss_store.db safety: - provider_id: bedrock provider_type: remote::bedrock @@ -34,17 +34,17 @@ providers: persistence_store: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/bedrock}/agents_store.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/bedrock}/agents_store.db responses_store: type: sqlite - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/bedrock}/responses_store.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/bedrock}/responses_store.db telemetry: - provider_id: meta-reference provider_type: inline::meta-reference config: - service_name: "${env.OTEL_SERVICE_NAME:\u200B}" - sinks: ${env.TELEMETRY_SINKS:console,sqlite} - sqlite_db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/bedrock}/trace_store.db + service_name: "${env.OTEL_SERVICE_NAME:=\u200B}" + sinks: ${env.TELEMETRY_SINKS:=console,sqlite} + sqlite_db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/bedrock}/trace_store.db eval: - provider_id: meta-reference provider_type: inline::meta-reference @@ -52,7 +52,7 @@ providers: kvstore: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/bedrock}/meta_reference_eval.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/bedrock}/meta_reference_eval.db datasetio: - provider_id: huggingface provider_type: remote::huggingface @@ -60,14 +60,14 @@ providers: kvstore: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/bedrock}/huggingface_datasetio.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/bedrock}/huggingface_datasetio.db - provider_id: localfs provider_type: inline::localfs config: kvstore: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/bedrock}/localfs_datasetio.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/bedrock}/localfs_datasetio.db scoring: - provider_id: basic provider_type: inline::basic @@ -78,17 +78,17 @@ providers: - provider_id: braintrust provider_type: inline::braintrust config: - openai_api_key: ${env.OPENAI_API_KEY:} + openai_api_key: ${env.OPENAI_API_KEY:+} tool_runtime: - provider_id: brave-search provider_type: remote::brave-search config: - api_key: ${env.BRAVE_SEARCH_API_KEY:} + api_key: ${env.BRAVE_SEARCH_API_KEY:+} max_results: 3 - provider_id: tavily-search provider_type: remote::tavily-search config: - api_key: ${env.TAVILY_SEARCH_API_KEY:} + api_key: ${env.TAVILY_SEARCH_API_KEY:+} max_results: 3 - provider_id: rag-runtime provider_type: inline::rag-runtime @@ -98,10 +98,10 @@ providers: config: {} metadata_store: type: sqlite - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/bedrock}/registry.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/bedrock}/registry.db inference_store: type: sqlite - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/bedrock}/inference_store.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/bedrock}/inference_store.db models: - metadata: {} model_id: meta.llama3-1-8b-instruct-v1:0 diff --git a/llama_stack/templates/cerebras/build.yaml b/llama_stack/templates/cerebras/build.yaml index f26f4ed9b..ecd0ac418 100644 --- a/llama_stack/templates/cerebras/build.yaml +++ b/llama_stack/templates/cerebras/build.yaml @@ -1,4 +1,4 @@ -version: '2' +version: 2 distribution_spec: description: Use Cerebras for running LLM inference providers: diff --git a/llama_stack/templates/cerebras/run.yaml b/llama_stack/templates/cerebras/run.yaml index 490648302..9bd8fcc7c 100644 --- a/llama_stack/templates/cerebras/run.yaml +++ b/llama_stack/templates/cerebras/run.yaml @@ -1,4 +1,4 @@ -version: '2' +version: 2 image_name: cerebras apis: - agents @@ -32,7 +32,7 @@ providers: kvstore: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/cerebras}/faiss_store.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/cerebras}/faiss_store.db agents: - provider_id: meta-reference provider_type: inline::meta-reference @@ -40,10 +40,10 @@ providers: persistence_store: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/cerebras}/agents_store.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/cerebras}/agents_store.db responses_store: type: sqlite - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/cerebras}/responses_store.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/cerebras}/responses_store.db eval: - provider_id: meta-reference provider_type: inline::meta-reference @@ -51,7 +51,7 @@ providers: kvstore: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/cerebras}/meta_reference_eval.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/cerebras}/meta_reference_eval.db datasetio: - provider_id: huggingface provider_type: remote::huggingface @@ -59,14 +59,14 @@ providers: kvstore: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/cerebras}/huggingface_datasetio.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/cerebras}/huggingface_datasetio.db - provider_id: localfs provider_type: inline::localfs config: kvstore: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/cerebras}/localfs_datasetio.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/cerebras}/localfs_datasetio.db scoring: - provider_id: basic provider_type: inline::basic @@ -77,34 +77,34 @@ providers: - provider_id: braintrust provider_type: inline::braintrust config: - openai_api_key: ${env.OPENAI_API_KEY:} + openai_api_key: ${env.OPENAI_API_KEY:+} telemetry: - provider_id: meta-reference provider_type: inline::meta-reference config: - service_name: "${env.OTEL_SERVICE_NAME:\u200B}" - sinks: ${env.TELEMETRY_SINKS:console,sqlite} - sqlite_db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/cerebras}/trace_store.db + service_name: "${env.OTEL_SERVICE_NAME:=\u200B}" + sinks: ${env.TELEMETRY_SINKS:=console,sqlite} + sqlite_db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/cerebras}/trace_store.db tool_runtime: - provider_id: brave-search provider_type: remote::brave-search config: - api_key: ${env.BRAVE_SEARCH_API_KEY:} + api_key: ${env.BRAVE_SEARCH_API_KEY:+} max_results: 3 - provider_id: tavily-search provider_type: remote::tavily-search config: - api_key: ${env.TAVILY_SEARCH_API_KEY:} + api_key: ${env.TAVILY_SEARCH_API_KEY:+} max_results: 3 - provider_id: rag-runtime provider_type: inline::rag-runtime config: {} metadata_store: type: sqlite - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/cerebras}/registry.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/cerebras}/registry.db inference_store: type: sqlite - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/cerebras}/inference_store.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/cerebras}/inference_store.db models: - metadata: {} model_id: llama3.1-8b diff --git a/llama_stack/templates/ci-tests/build.yaml b/llama_stack/templates/ci-tests/build.yaml index 9f4fbbdda..c061d0793 100644 --- a/llama_stack/templates/ci-tests/build.yaml +++ b/llama_stack/templates/ci-tests/build.yaml @@ -1,4 +1,4 @@ -version: '2' +version: 2 distribution_spec: description: Distribution for running e2e tests in CI providers: diff --git a/llama_stack/templates/ci-tests/run.yaml b/llama_stack/templates/ci-tests/run.yaml index 92497b0bf..4b7de1c0c 100644 --- a/llama_stack/templates/ci-tests/run.yaml +++ b/llama_stack/templates/ci-tests/run.yaml @@ -1,4 +1,4 @@ -version: '2' +version: 2 image_name: ci-tests apis: - agents @@ -24,7 +24,7 @@ providers: - provider_id: sqlite-vec provider_type: inline::sqlite-vec config: - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ci-tests}/sqlite_vec.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ci-tests}/sqlite_vec.db safety: - provider_id: llama-guard provider_type: inline::llama-guard @@ -37,17 +37,17 @@ providers: persistence_store: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ci-tests}/agents_store.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ci-tests}/agents_store.db responses_store: type: sqlite - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ci-tests}/responses_store.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ci-tests}/responses_store.db telemetry: - provider_id: meta-reference provider_type: inline::meta-reference config: - service_name: "${env.OTEL_SERVICE_NAME:\u200B}" - sinks: ${env.TELEMETRY_SINKS:console,sqlite} - sqlite_db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ci-tests}/trace_store.db + service_name: "${env.OTEL_SERVICE_NAME:=\u200B}" + sinks: ${env.TELEMETRY_SINKS:=console,sqlite} + sqlite_db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ci-tests}/trace_store.db eval: - provider_id: meta-reference provider_type: inline::meta-reference @@ -55,7 +55,7 @@ providers: kvstore: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ci-tests}/meta_reference_eval.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ci-tests}/meta_reference_eval.db datasetio: - provider_id: huggingface provider_type: remote::huggingface @@ -63,14 +63,14 @@ providers: kvstore: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ci-tests}/huggingface_datasetio.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ci-tests}/huggingface_datasetio.db - provider_id: localfs provider_type: inline::localfs config: kvstore: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ci-tests}/localfs_datasetio.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ci-tests}/localfs_datasetio.db scoring: - provider_id: basic provider_type: inline::basic @@ -81,17 +81,17 @@ providers: - provider_id: braintrust provider_type: inline::braintrust config: - openai_api_key: ${env.OPENAI_API_KEY:} + openai_api_key: ${env.OPENAI_API_KEY:+} tool_runtime: - provider_id: brave-search provider_type: remote::brave-search config: - api_key: ${env.BRAVE_SEARCH_API_KEY:} + api_key: ${env.BRAVE_SEARCH_API_KEY:+} max_results: 3 - provider_id: tavily-search provider_type: remote::tavily-search config: - api_key: ${env.TAVILY_SEARCH_API_KEY:} + api_key: ${env.TAVILY_SEARCH_API_KEY:+} max_results: 3 - provider_id: rag-runtime provider_type: inline::rag-runtime @@ -101,10 +101,10 @@ providers: config: {} metadata_store: type: sqlite - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ci-tests}/registry.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ci-tests}/registry.db inference_store: type: sqlite - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ci-tests}/inference_store.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ci-tests}/inference_store.db models: - metadata: {} model_id: accounts/fireworks/models/llama-v3p1-8b-instruct diff --git a/llama_stack/templates/dell/build.yaml b/llama_stack/templates/dell/build.yaml index 513df16c1..ff8d58a08 100644 --- a/llama_stack/templates/dell/build.yaml +++ b/llama_stack/templates/dell/build.yaml @@ -1,4 +1,4 @@ -version: '2' +version: 2 distribution_spec: description: Dell's distribution of Llama Stack. TGI inference via Dell's custom container diff --git a/llama_stack/templates/dell/run-with-safety.yaml b/llama_stack/templates/dell/run-with-safety.yaml index 22cf1fd24..7f1d0a8c0 100644 --- a/llama_stack/templates/dell/run-with-safety.yaml +++ b/llama_stack/templates/dell/run-with-safety.yaml @@ -1,4 +1,4 @@ -version: '2' +version: 2 image_name: dell apis: - agents @@ -40,17 +40,17 @@ providers: persistence_store: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/dell}/agents_store.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/dell}/agents_store.db responses_store: type: sqlite - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/dell}/responses_store.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/dell}/responses_store.db telemetry: - provider_id: meta-reference provider_type: inline::meta-reference config: - service_name: "${env.OTEL_SERVICE_NAME:\u200B}" - sinks: ${env.TELEMETRY_SINKS:console,sqlite} - sqlite_db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/dell}/trace_store.db + service_name: "${env.OTEL_SERVICE_NAME:=\u200B}" + sinks: ${env.TELEMETRY_SINKS:=console,sqlite} + sqlite_db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/dell}/trace_store.db eval: - provider_id: meta-reference provider_type: inline::meta-reference @@ -58,7 +58,7 @@ providers: kvstore: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/dell}/meta_reference_eval.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/dell}/meta_reference_eval.db datasetio: - provider_id: huggingface provider_type: remote::huggingface @@ -66,14 +66,14 @@ providers: kvstore: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/dell}/huggingface_datasetio.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/dell}/huggingface_datasetio.db - provider_id: localfs provider_type: inline::localfs config: kvstore: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/dell}/localfs_datasetio.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/dell}/localfs_datasetio.db scoring: - provider_id: basic provider_type: inline::basic @@ -84,27 +84,27 @@ providers: - provider_id: braintrust provider_type: inline::braintrust config: - openai_api_key: ${env.OPENAI_API_KEY:} + openai_api_key: ${env.OPENAI_API_KEY:+} tool_runtime: - provider_id: brave-search provider_type: remote::brave-search config: - api_key: ${env.BRAVE_SEARCH_API_KEY:} + api_key: ${env.BRAVE_SEARCH_API_KEY:+} max_results: 3 - provider_id: tavily-search provider_type: remote::tavily-search config: - api_key: ${env.TAVILY_SEARCH_API_KEY:} + api_key: ${env.TAVILY_SEARCH_API_KEY:+} max_results: 3 - provider_id: rag-runtime provider_type: inline::rag-runtime config: {} metadata_store: type: sqlite - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/dell}/registry.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/dell}/registry.db inference_store: type: sqlite - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/dell}/inference_store.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/dell}/inference_store.db models: - metadata: {} model_id: ${env.INFERENCE_MODEL} diff --git a/llama_stack/templates/dell/run.yaml b/llama_stack/templates/dell/run.yaml index aeca2fc26..310f3cc20 100644 --- a/llama_stack/templates/dell/run.yaml +++ b/llama_stack/templates/dell/run.yaml @@ -1,4 +1,4 @@ -version: '2' +version: 2 image_name: dell apis: - agents @@ -36,17 +36,17 @@ providers: persistence_store: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/dell}/agents_store.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/dell}/agents_store.db responses_store: type: sqlite - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/dell}/responses_store.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/dell}/responses_store.db telemetry: - provider_id: meta-reference provider_type: inline::meta-reference config: - service_name: "${env.OTEL_SERVICE_NAME:\u200B}" - sinks: ${env.TELEMETRY_SINKS:console,sqlite} - sqlite_db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/dell}/trace_store.db + service_name: "${env.OTEL_SERVICE_NAME:=\u200B}" + sinks: ${env.TELEMETRY_SINKS:=console,sqlite} + sqlite_db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/dell}/trace_store.db eval: - provider_id: meta-reference provider_type: inline::meta-reference @@ -54,7 +54,7 @@ providers: kvstore: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/dell}/meta_reference_eval.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/dell}/meta_reference_eval.db datasetio: - provider_id: huggingface provider_type: remote::huggingface @@ -62,14 +62,14 @@ providers: kvstore: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/dell}/huggingface_datasetio.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/dell}/huggingface_datasetio.db - provider_id: localfs provider_type: inline::localfs config: kvstore: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/dell}/localfs_datasetio.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/dell}/localfs_datasetio.db scoring: - provider_id: basic provider_type: inline::basic @@ -80,27 +80,27 @@ providers: - provider_id: braintrust provider_type: inline::braintrust config: - openai_api_key: ${env.OPENAI_API_KEY:} + openai_api_key: ${env.OPENAI_API_KEY:+} tool_runtime: - provider_id: brave-search provider_type: remote::brave-search config: - api_key: ${env.BRAVE_SEARCH_API_KEY:} + api_key: ${env.BRAVE_SEARCH_API_KEY:+} max_results: 3 - provider_id: tavily-search provider_type: remote::tavily-search config: - api_key: ${env.TAVILY_SEARCH_API_KEY:} + api_key: ${env.TAVILY_SEARCH_API_KEY:+} max_results: 3 - provider_id: rag-runtime provider_type: inline::rag-runtime config: {} metadata_store: type: sqlite - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/dell}/registry.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/dell}/registry.db inference_store: type: sqlite - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/dell}/inference_store.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/dell}/inference_store.db models: - metadata: {} model_id: ${env.INFERENCE_MODEL} diff --git a/llama_stack/templates/fireworks/build.yaml b/llama_stack/templates/fireworks/build.yaml index 53b47da41..eb08c1d43 100644 --- a/llama_stack/templates/fireworks/build.yaml +++ b/llama_stack/templates/fireworks/build.yaml @@ -1,4 +1,4 @@ -version: '2' +version: 2 distribution_spec: description: Use Fireworks.AI for running LLM inference providers: diff --git a/llama_stack/templates/fireworks/run-with-safety.yaml b/llama_stack/templates/fireworks/run-with-safety.yaml index 302328486..6265f5cae 100644 --- a/llama_stack/templates/fireworks/run-with-safety.yaml +++ b/llama_stack/templates/fireworks/run-with-safety.yaml @@ -1,4 +1,4 @@ -version: '2' +version: 2 image_name: fireworks apis: - agents @@ -28,7 +28,7 @@ providers: kvstore: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/fireworks}/faiss_store.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/fireworks}/faiss_store.db safety: - provider_id: llama-guard provider_type: inline::llama-guard @@ -46,17 +46,17 @@ providers: persistence_store: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/fireworks}/agents_store.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/fireworks}/agents_store.db responses_store: type: sqlite - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/fireworks}/responses_store.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/fireworks}/responses_store.db telemetry: - provider_id: meta-reference provider_type: inline::meta-reference config: - service_name: "${env.OTEL_SERVICE_NAME:\u200B}" - sinks: ${env.TELEMETRY_SINKS:console,sqlite} - sqlite_db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/fireworks}/trace_store.db + service_name: "${env.OTEL_SERVICE_NAME:=\u200B}" + sinks: ${env.TELEMETRY_SINKS:=console,sqlite} + sqlite_db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/fireworks}/trace_store.db eval: - provider_id: meta-reference provider_type: inline::meta-reference @@ -64,7 +64,7 @@ providers: kvstore: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/fireworks}/meta_reference_eval.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/fireworks}/meta_reference_eval.db datasetio: - provider_id: huggingface provider_type: remote::huggingface @@ -72,14 +72,14 @@ providers: kvstore: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/fireworks}/huggingface_datasetio.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/fireworks}/huggingface_datasetio.db - provider_id: localfs provider_type: inline::localfs config: kvstore: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/fireworks}/localfs_datasetio.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/fireworks}/localfs_datasetio.db scoring: - provider_id: basic provider_type: inline::basic @@ -90,30 +90,30 @@ providers: - provider_id: braintrust provider_type: inline::braintrust config: - openai_api_key: ${env.OPENAI_API_KEY:} + openai_api_key: ${env.OPENAI_API_KEY:+} files: - provider_id: meta-reference-files provider_type: inline::localfs config: - storage_dir: ${env.FILES_STORAGE_DIR:~/.llama/distributions/fireworks/files} + storage_dir: ${env.FILES_STORAGE_DIR:=~/.llama/distributions/fireworks/files} metadata_store: type: sqlite - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/fireworks}/files_metadata.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/fireworks}/files_metadata.db tool_runtime: - provider_id: brave-search provider_type: remote::brave-search config: - api_key: ${env.BRAVE_SEARCH_API_KEY:} + api_key: ${env.BRAVE_SEARCH_API_KEY:+} max_results: 3 - provider_id: tavily-search provider_type: remote::tavily-search config: - api_key: ${env.TAVILY_SEARCH_API_KEY:} + api_key: ${env.TAVILY_SEARCH_API_KEY:+} max_results: 3 - provider_id: wolfram-alpha provider_type: remote::wolfram-alpha config: - api_key: ${env.WOLFRAM_ALPHA_API_KEY:} + api_key: ${env.WOLFRAM_ALPHA_API_KEY:+} - provider_id: rag-runtime provider_type: inline::rag-runtime config: {} @@ -122,10 +122,10 @@ providers: config: {} metadata_store: type: sqlite - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/fireworks}/registry.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/fireworks}/registry.db inference_store: type: sqlite - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/fireworks}/inference_store.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/fireworks}/inference_store.db models: - metadata: {} model_id: accounts/fireworks/models/llama-v3p1-8b-instruct diff --git a/llama_stack/templates/fireworks/run.yaml b/llama_stack/templates/fireworks/run.yaml index a31ed732b..e10404e92 100644 --- a/llama_stack/templates/fireworks/run.yaml +++ b/llama_stack/templates/fireworks/run.yaml @@ -1,4 +1,4 @@ -version: '2' +version: 2 image_name: fireworks apis: - agents @@ -28,7 +28,7 @@ providers: kvstore: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/fireworks}/faiss_store.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/fireworks}/faiss_store.db safety: - provider_id: llama-guard provider_type: inline::llama-guard @@ -41,17 +41,17 @@ providers: persistence_store: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/fireworks}/agents_store.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/fireworks}/agents_store.db responses_store: type: sqlite - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/fireworks}/responses_store.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/fireworks}/responses_store.db telemetry: - provider_id: meta-reference provider_type: inline::meta-reference config: - service_name: "${env.OTEL_SERVICE_NAME:\u200B}" - sinks: ${env.TELEMETRY_SINKS:console,sqlite} - sqlite_db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/fireworks}/trace_store.db + service_name: "${env.OTEL_SERVICE_NAME:=\u200B}" + sinks: ${env.TELEMETRY_SINKS:=console,sqlite} + sqlite_db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/fireworks}/trace_store.db eval: - provider_id: meta-reference provider_type: inline::meta-reference @@ -59,7 +59,7 @@ providers: kvstore: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/fireworks}/meta_reference_eval.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/fireworks}/meta_reference_eval.db datasetio: - provider_id: huggingface provider_type: remote::huggingface @@ -67,14 +67,14 @@ providers: kvstore: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/fireworks}/huggingface_datasetio.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/fireworks}/huggingface_datasetio.db - provider_id: localfs provider_type: inline::localfs config: kvstore: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/fireworks}/localfs_datasetio.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/fireworks}/localfs_datasetio.db scoring: - provider_id: basic provider_type: inline::basic @@ -85,30 +85,30 @@ providers: - provider_id: braintrust provider_type: inline::braintrust config: - openai_api_key: ${env.OPENAI_API_KEY:} + openai_api_key: ${env.OPENAI_API_KEY:+} files: - provider_id: meta-reference-files provider_type: inline::localfs config: - storage_dir: ${env.FILES_STORAGE_DIR:~/.llama/distributions/fireworks/files} + storage_dir: ${env.FILES_STORAGE_DIR:=~/.llama/distributions/fireworks/files} metadata_store: type: sqlite - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/fireworks}/files_metadata.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/fireworks}/files_metadata.db tool_runtime: - provider_id: brave-search provider_type: remote::brave-search config: - api_key: ${env.BRAVE_SEARCH_API_KEY:} + api_key: ${env.BRAVE_SEARCH_API_KEY:+} max_results: 3 - provider_id: tavily-search provider_type: remote::tavily-search config: - api_key: ${env.TAVILY_SEARCH_API_KEY:} + api_key: ${env.TAVILY_SEARCH_API_KEY:+} max_results: 3 - provider_id: wolfram-alpha provider_type: remote::wolfram-alpha config: - api_key: ${env.WOLFRAM_ALPHA_API_KEY:} + api_key: ${env.WOLFRAM_ALPHA_API_KEY:+} - provider_id: rag-runtime provider_type: inline::rag-runtime config: {} @@ -117,10 +117,10 @@ providers: config: {} metadata_store: type: sqlite - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/fireworks}/registry.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/fireworks}/registry.db inference_store: type: sqlite - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/fireworks}/inference_store.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/fireworks}/inference_store.db models: - metadata: {} model_id: accounts/fireworks/models/llama-v3p1-8b-instruct diff --git a/llama_stack/templates/groq/build.yaml b/llama_stack/templates/groq/build.yaml index 819df22f0..7e50a899f 100644 --- a/llama_stack/templates/groq/build.yaml +++ b/llama_stack/templates/groq/build.yaml @@ -1,4 +1,4 @@ -version: '2' +version: 2 distribution_spec: description: Use Groq for running LLM inference providers: diff --git a/llama_stack/templates/groq/run.yaml b/llama_stack/templates/groq/run.yaml index 7f1912a6f..21c8f7e0f 100644 --- a/llama_stack/templates/groq/run.yaml +++ b/llama_stack/templates/groq/run.yaml @@ -1,4 +1,4 @@ -version: '2' +version: 2 image_name: groq apis: - agents @@ -27,7 +27,7 @@ providers: kvstore: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/groq}/faiss_store.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/groq}/faiss_store.db safety: - provider_id: llama-guard provider_type: inline::llama-guard @@ -40,17 +40,17 @@ providers: persistence_store: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/groq}/agents_store.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/groq}/agents_store.db responses_store: type: sqlite - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/groq}/responses_store.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/groq}/responses_store.db telemetry: - provider_id: meta-reference provider_type: inline::meta-reference config: - service_name: "${env.OTEL_SERVICE_NAME:\u200B}" - sinks: ${env.TELEMETRY_SINKS:console,sqlite} - sqlite_db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/groq}/trace_store.db + service_name: "${env.OTEL_SERVICE_NAME:=\u200B}" + sinks: ${env.TELEMETRY_SINKS:=console,sqlite} + sqlite_db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/groq}/trace_store.db eval: - provider_id: meta-reference provider_type: inline::meta-reference @@ -58,7 +58,7 @@ providers: kvstore: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/groq}/meta_reference_eval.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/groq}/meta_reference_eval.db datasetio: - provider_id: huggingface provider_type: remote::huggingface @@ -66,14 +66,14 @@ providers: kvstore: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/groq}/huggingface_datasetio.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/groq}/huggingface_datasetio.db - provider_id: localfs provider_type: inline::localfs config: kvstore: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/groq}/localfs_datasetio.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/groq}/localfs_datasetio.db scoring: - provider_id: basic provider_type: inline::basic @@ -84,27 +84,27 @@ providers: - provider_id: braintrust provider_type: inline::braintrust config: - openai_api_key: ${env.OPENAI_API_KEY:} + openai_api_key: ${env.OPENAI_API_KEY:+} tool_runtime: - provider_id: brave-search provider_type: remote::brave-search config: - api_key: ${env.BRAVE_SEARCH_API_KEY:} + api_key: ${env.BRAVE_SEARCH_API_KEY:+} max_results: 3 - provider_id: tavily-search provider_type: remote::tavily-search config: - api_key: ${env.TAVILY_SEARCH_API_KEY:} + api_key: ${env.TAVILY_SEARCH_API_KEY:+} max_results: 3 - provider_id: rag-runtime provider_type: inline::rag-runtime config: {} metadata_store: type: sqlite - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/groq}/registry.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/groq}/registry.db inference_store: type: sqlite - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/groq}/inference_store.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/groq}/inference_store.db models: - metadata: {} model_id: groq/llama3-8b-8192 diff --git a/llama_stack/templates/hf-endpoint/build.yaml b/llama_stack/templates/hf-endpoint/build.yaml index 8ede83694..9fca9ac22 100644 --- a/llama_stack/templates/hf-endpoint/build.yaml +++ b/llama_stack/templates/hf-endpoint/build.yaml @@ -1,4 +1,4 @@ -version: '2' +version: 2 distribution_spec: description: Use (an external) Hugging Face Inference Endpoint for running LLM inference providers: diff --git a/llama_stack/templates/hf-endpoint/run-with-safety.yaml b/llama_stack/templates/hf-endpoint/run-with-safety.yaml index 8b00f4ba5..2ae1d7685 100644 --- a/llama_stack/templates/hf-endpoint/run-with-safety.yaml +++ b/llama_stack/templates/hf-endpoint/run-with-safety.yaml @@ -1,4 +1,4 @@ -version: '2' +version: 2 image_name: hf-endpoint apis: - agents @@ -32,7 +32,7 @@ providers: kvstore: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/hf-endpoint}/faiss_store.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/hf-endpoint}/faiss_store.db safety: - provider_id: llama-guard provider_type: inline::llama-guard @@ -45,17 +45,17 @@ providers: persistence_store: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/hf-endpoint}/agents_store.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/hf-endpoint}/agents_store.db responses_store: type: sqlite - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/hf-endpoint}/responses_store.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/hf-endpoint}/responses_store.db telemetry: - provider_id: meta-reference provider_type: inline::meta-reference config: - service_name: "${env.OTEL_SERVICE_NAME:\u200B}" - sinks: ${env.TELEMETRY_SINKS:console,sqlite} - sqlite_db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/hf-endpoint}/trace_store.db + service_name: "${env.OTEL_SERVICE_NAME:=\u200B}" + sinks: ${env.TELEMETRY_SINKS:=console,sqlite} + sqlite_db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/hf-endpoint}/trace_store.db eval: - provider_id: meta-reference provider_type: inline::meta-reference @@ -63,7 +63,7 @@ providers: kvstore: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/hf-endpoint}/meta_reference_eval.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/hf-endpoint}/meta_reference_eval.db datasetio: - provider_id: huggingface provider_type: remote::huggingface @@ -71,14 +71,14 @@ providers: kvstore: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/hf-endpoint}/huggingface_datasetio.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/hf-endpoint}/huggingface_datasetio.db - provider_id: localfs provider_type: inline::localfs config: kvstore: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/hf-endpoint}/localfs_datasetio.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/hf-endpoint}/localfs_datasetio.db scoring: - provider_id: basic provider_type: inline::basic @@ -89,17 +89,17 @@ providers: - provider_id: braintrust provider_type: inline::braintrust config: - openai_api_key: ${env.OPENAI_API_KEY:} + openai_api_key: ${env.OPENAI_API_KEY:+} tool_runtime: - provider_id: brave-search provider_type: remote::brave-search config: - api_key: ${env.BRAVE_SEARCH_API_KEY:} + api_key: ${env.BRAVE_SEARCH_API_KEY:+} max_results: 3 - provider_id: tavily-search provider_type: remote::tavily-search config: - api_key: ${env.TAVILY_SEARCH_API_KEY:} + api_key: ${env.TAVILY_SEARCH_API_KEY:+} max_results: 3 - provider_id: rag-runtime provider_type: inline::rag-runtime @@ -109,10 +109,10 @@ providers: config: {} metadata_store: type: sqlite - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/hf-endpoint}/registry.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/hf-endpoint}/registry.db inference_store: type: sqlite - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/hf-endpoint}/inference_store.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/hf-endpoint}/inference_store.db models: - metadata: {} model_id: ${env.INFERENCE_MODEL} diff --git a/llama_stack/templates/hf-endpoint/run.yaml b/llama_stack/templates/hf-endpoint/run.yaml index 8a9cd5c49..3ec5ae9c1 100644 --- a/llama_stack/templates/hf-endpoint/run.yaml +++ b/llama_stack/templates/hf-endpoint/run.yaml @@ -1,4 +1,4 @@ -version: '2' +version: 2 image_name: hf-endpoint apis: - agents @@ -27,7 +27,7 @@ providers: kvstore: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/hf-endpoint}/faiss_store.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/hf-endpoint}/faiss_store.db safety: - provider_id: llama-guard provider_type: inline::llama-guard @@ -40,17 +40,17 @@ providers: persistence_store: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/hf-endpoint}/agents_store.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/hf-endpoint}/agents_store.db responses_store: type: sqlite - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/hf-endpoint}/responses_store.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/hf-endpoint}/responses_store.db telemetry: - provider_id: meta-reference provider_type: inline::meta-reference config: - service_name: "${env.OTEL_SERVICE_NAME:\u200B}" - sinks: ${env.TELEMETRY_SINKS:console,sqlite} - sqlite_db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/hf-endpoint}/trace_store.db + service_name: "${env.OTEL_SERVICE_NAME:=\u200B}" + sinks: ${env.TELEMETRY_SINKS:=console,sqlite} + sqlite_db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/hf-endpoint}/trace_store.db eval: - provider_id: meta-reference provider_type: inline::meta-reference @@ -58,7 +58,7 @@ providers: kvstore: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/hf-endpoint}/meta_reference_eval.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/hf-endpoint}/meta_reference_eval.db datasetio: - provider_id: huggingface provider_type: remote::huggingface @@ -66,14 +66,14 @@ providers: kvstore: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/hf-endpoint}/huggingface_datasetio.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/hf-endpoint}/huggingface_datasetio.db - provider_id: localfs provider_type: inline::localfs config: kvstore: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/hf-endpoint}/localfs_datasetio.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/hf-endpoint}/localfs_datasetio.db scoring: - provider_id: basic provider_type: inline::basic @@ -84,17 +84,17 @@ providers: - provider_id: braintrust provider_type: inline::braintrust config: - openai_api_key: ${env.OPENAI_API_KEY:} + openai_api_key: ${env.OPENAI_API_KEY:+} tool_runtime: - provider_id: brave-search provider_type: remote::brave-search config: - api_key: ${env.BRAVE_SEARCH_API_KEY:} + api_key: ${env.BRAVE_SEARCH_API_KEY:+} max_results: 3 - provider_id: tavily-search provider_type: remote::tavily-search config: - api_key: ${env.TAVILY_SEARCH_API_KEY:} + api_key: ${env.TAVILY_SEARCH_API_KEY:+} max_results: 3 - provider_id: rag-runtime provider_type: inline::rag-runtime @@ -104,10 +104,10 @@ providers: config: {} metadata_store: type: sqlite - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/hf-endpoint}/registry.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/hf-endpoint}/registry.db inference_store: type: sqlite - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/hf-endpoint}/inference_store.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/hf-endpoint}/inference_store.db models: - metadata: {} model_id: ${env.INFERENCE_MODEL} diff --git a/llama_stack/templates/hf-serverless/build.yaml b/llama_stack/templates/hf-serverless/build.yaml index d0752db9a..214245116 100644 --- a/llama_stack/templates/hf-serverless/build.yaml +++ b/llama_stack/templates/hf-serverless/build.yaml @@ -1,4 +1,4 @@ -version: '2' +version: 2 distribution_spec: description: Use (an external) Hugging Face Inference Endpoint for running LLM inference providers: diff --git a/llama_stack/templates/hf-serverless/run-with-safety.yaml b/llama_stack/templates/hf-serverless/run-with-safety.yaml index fec64c1df..3871b77e7 100644 --- a/llama_stack/templates/hf-serverless/run-with-safety.yaml +++ b/llama_stack/templates/hf-serverless/run-with-safety.yaml @@ -1,4 +1,4 @@ -version: '2' +version: 2 image_name: hf-serverless apis: - agents @@ -32,7 +32,7 @@ providers: kvstore: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/hf-serverless}/faiss_store.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/hf-serverless}/faiss_store.db safety: - provider_id: llama-guard provider_type: inline::llama-guard @@ -45,17 +45,17 @@ providers: persistence_store: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/hf-serverless}/agents_store.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/hf-serverless}/agents_store.db responses_store: type: sqlite - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/hf-serverless}/responses_store.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/hf-serverless}/responses_store.db telemetry: - provider_id: meta-reference provider_type: inline::meta-reference config: - service_name: "${env.OTEL_SERVICE_NAME:\u200B}" - sinks: ${env.TELEMETRY_SINKS:console,sqlite} - sqlite_db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/hf-serverless}/trace_store.db + service_name: "${env.OTEL_SERVICE_NAME:=\u200B}" + sinks: ${env.TELEMETRY_SINKS:=console,sqlite} + sqlite_db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/hf-serverless}/trace_store.db eval: - provider_id: meta-reference provider_type: inline::meta-reference @@ -63,7 +63,7 @@ providers: kvstore: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/hf-serverless}/meta_reference_eval.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/hf-serverless}/meta_reference_eval.db datasetio: - provider_id: huggingface provider_type: remote::huggingface @@ -71,14 +71,14 @@ providers: kvstore: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/hf-serverless}/huggingface_datasetio.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/hf-serverless}/huggingface_datasetio.db - provider_id: localfs provider_type: inline::localfs config: kvstore: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/hf-serverless}/localfs_datasetio.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/hf-serverless}/localfs_datasetio.db scoring: - provider_id: basic provider_type: inline::basic @@ -89,17 +89,17 @@ providers: - provider_id: braintrust provider_type: inline::braintrust config: - openai_api_key: ${env.OPENAI_API_KEY:} + openai_api_key: ${env.OPENAI_API_KEY:+} tool_runtime: - provider_id: brave-search provider_type: remote::brave-search config: - api_key: ${env.BRAVE_SEARCH_API_KEY:} + api_key: ${env.BRAVE_SEARCH_API_KEY:+} max_results: 3 - provider_id: tavily-search provider_type: remote::tavily-search config: - api_key: ${env.TAVILY_SEARCH_API_KEY:} + api_key: ${env.TAVILY_SEARCH_API_KEY:+} max_results: 3 - provider_id: rag-runtime provider_type: inline::rag-runtime @@ -109,10 +109,10 @@ providers: config: {} metadata_store: type: sqlite - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/hf-serverless}/registry.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/hf-serverless}/registry.db inference_store: type: sqlite - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/hf-serverless}/inference_store.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/hf-serverless}/inference_store.db models: - metadata: {} model_id: ${env.INFERENCE_MODEL} diff --git a/llama_stack/templates/hf-serverless/run.yaml b/llama_stack/templates/hf-serverless/run.yaml index d4a6286d7..0a5b59400 100644 --- a/llama_stack/templates/hf-serverless/run.yaml +++ b/llama_stack/templates/hf-serverless/run.yaml @@ -1,4 +1,4 @@ -version: '2' +version: 2 image_name: hf-serverless apis: - agents @@ -27,7 +27,7 @@ providers: kvstore: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/hf-serverless}/faiss_store.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/hf-serverless}/faiss_store.db safety: - provider_id: llama-guard provider_type: inline::llama-guard @@ -40,17 +40,17 @@ providers: persistence_store: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/hf-serverless}/agents_store.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/hf-serverless}/agents_store.db responses_store: type: sqlite - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/hf-serverless}/responses_store.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/hf-serverless}/responses_store.db telemetry: - provider_id: meta-reference provider_type: inline::meta-reference config: - service_name: "${env.OTEL_SERVICE_NAME:\u200B}" - sinks: ${env.TELEMETRY_SINKS:console,sqlite} - sqlite_db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/hf-serverless}/trace_store.db + service_name: "${env.OTEL_SERVICE_NAME:=\u200B}" + sinks: ${env.TELEMETRY_SINKS:=console,sqlite} + sqlite_db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/hf-serverless}/trace_store.db eval: - provider_id: meta-reference provider_type: inline::meta-reference @@ -58,7 +58,7 @@ providers: kvstore: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/hf-serverless}/meta_reference_eval.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/hf-serverless}/meta_reference_eval.db datasetio: - provider_id: huggingface provider_type: remote::huggingface @@ -66,14 +66,14 @@ providers: kvstore: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/hf-serverless}/huggingface_datasetio.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/hf-serverless}/huggingface_datasetio.db - provider_id: localfs provider_type: inline::localfs config: kvstore: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/hf-serverless}/localfs_datasetio.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/hf-serverless}/localfs_datasetio.db scoring: - provider_id: basic provider_type: inline::basic @@ -84,17 +84,17 @@ providers: - provider_id: braintrust provider_type: inline::braintrust config: - openai_api_key: ${env.OPENAI_API_KEY:} + openai_api_key: ${env.OPENAI_API_KEY:+} tool_runtime: - provider_id: brave-search provider_type: remote::brave-search config: - api_key: ${env.BRAVE_SEARCH_API_KEY:} + api_key: ${env.BRAVE_SEARCH_API_KEY:+} max_results: 3 - provider_id: tavily-search provider_type: remote::tavily-search config: - api_key: ${env.TAVILY_SEARCH_API_KEY:} + api_key: ${env.TAVILY_SEARCH_API_KEY:+} max_results: 3 - provider_id: rag-runtime provider_type: inline::rag-runtime @@ -104,10 +104,10 @@ providers: config: {} metadata_store: type: sqlite - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/hf-serverless}/registry.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/hf-serverless}/registry.db inference_store: type: sqlite - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/hf-serverless}/inference_store.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/hf-serverless}/inference_store.db models: - metadata: {} model_id: ${env.INFERENCE_MODEL} diff --git a/llama_stack/templates/llama_api/build.yaml b/llama_stack/templates/llama_api/build.yaml index 857e5f014..44a42594a 100644 --- a/llama_stack/templates/llama_api/build.yaml +++ b/llama_stack/templates/llama_api/build.yaml @@ -1,4 +1,4 @@ -version: '2' +version: 2 distribution_spec: description: Distribution for running e2e tests in CI providers: diff --git a/llama_stack/templates/llama_api/llama_api.py b/llama_stack/templates/llama_api/llama_api.py index 723cc44a3..7631781af 100644 --- a/llama_stack/templates/llama_api/llama_api.py +++ b/llama_stack/templates/llama_api/llama_api.py @@ -41,7 +41,7 @@ def get_inference_providers() -> tuple[list[Provider], list[ModelInput]]: ( "llama-openai-compat", LLLAMA_MODEL_ENTRIES, - LlamaCompatConfig.sample_run_config(api_key="${env.LLAMA_API_KEY:}"), + LlamaCompatConfig.sample_run_config(api_key="${env.LLAMA_API_KEY:+}"), ), ] inference_providers = [] @@ -85,17 +85,17 @@ def get_distribution_template() -> DistributionTemplate: config=SQLiteVectorIOConfig.sample_run_config(f"~/.llama/distributions/{name}"), ), Provider( - provider_id="${env.ENABLE_CHROMADB+chromadb}", + provider_id="${env.ENABLE_CHROMADB:+chromadb}", provider_type="remote::chromadb", - config=ChromaVectorIOConfig.sample_run_config(url="${env.CHROMADB_URL:}"), + config=ChromaVectorIOConfig.sample_run_config(url="${env.CHROMADB_URL:+}"), ), Provider( - provider_id="${env.ENABLE_PGVECTOR+pgvector}", + provider_id="${env.ENABLE_PGVECTOR:+pgvector}", provider_type="remote::pgvector", config=PGVectorVectorIOConfig.sample_run_config( - db="${env.PGVECTOR_DB:}", - user="${env.PGVECTOR_USER:}", - password="${env.PGVECTOR_PASSWORD:}", + db="${env.PGVECTOR_DB:+}", + user="${env.PGVECTOR_USER:+}", + password="${env.PGVECTOR_PASSWORD:+}", ), ), ] diff --git a/llama_stack/templates/llama_api/run.yaml b/llama_stack/templates/llama_api/run.yaml index 2185eb4fc..b627ed2f1 100644 --- a/llama_stack/templates/llama_api/run.yaml +++ b/llama_stack/templates/llama_api/run.yaml @@ -1,4 +1,4 @@ -version: '2' +version: 2 image_name: llama_api apis: - agents @@ -16,7 +16,7 @@ providers: provider_type: remote::llama-openai-compat config: openai_compat_api_base: https://api.llama.com/compat/v1/ - api_key: ${env.LLAMA_API_KEY:} + api_key: ${env.LLAMA_API_KEY:+} - provider_id: sentence-transformers provider_type: inline::sentence-transformers config: {} @@ -24,19 +24,19 @@ providers: - provider_id: sqlite-vec provider_type: inline::sqlite-vec config: - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/llama_api}/sqlite_vec.db - - provider_id: ${env.ENABLE_CHROMADB+chromadb} + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/llama_api}/sqlite_vec.db + - provider_id: ${env.ENABLE_CHROMADB:+chromadb} provider_type: remote::chromadb config: - url: ${env.CHROMADB_URL:} - - provider_id: ${env.ENABLE_PGVECTOR+pgvector} + url: ${env.CHROMADB_URL:+} + - provider_id: ${env.ENABLE_PGVECTOR:+pgvector} provider_type: remote::pgvector config: - host: ${env.PGVECTOR_HOST:localhost} - port: ${env.PGVECTOR_PORT:5432} - db: ${env.PGVECTOR_DB:} - user: ${env.PGVECTOR_USER:} - password: ${env.PGVECTOR_PASSWORD:} + host: ${env.PGVECTOR_HOST:=localhost} + port: ${env.PGVECTOR_PORT:=5432} + db: ${env.PGVECTOR_DB:+} + user: ${env.PGVECTOR_USER:+} + password: ${env.PGVECTOR_PASSWORD:+} safety: - provider_id: llama-guard provider_type: inline::llama-guard @@ -49,17 +49,17 @@ providers: persistence_store: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/llama_api}/agents_store.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/llama_api}/agents_store.db responses_store: type: sqlite - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/llama_api}/responses_store.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/llama_api}/responses_store.db telemetry: - provider_id: meta-reference provider_type: inline::meta-reference config: - service_name: "${env.OTEL_SERVICE_NAME:\u200B}" - sinks: ${env.TELEMETRY_SINKS:console,sqlite} - sqlite_db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/llama_api}/trace_store.db + service_name: "${env.OTEL_SERVICE_NAME:=\u200B}" + sinks: ${env.TELEMETRY_SINKS:=console,sqlite} + sqlite_db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/llama_api}/trace_store.db eval: - provider_id: meta-reference provider_type: inline::meta-reference @@ -67,7 +67,7 @@ providers: kvstore: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/llama_api}/meta_reference_eval.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/llama_api}/meta_reference_eval.db datasetio: - provider_id: huggingface provider_type: remote::huggingface @@ -75,14 +75,14 @@ providers: kvstore: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/llama_api}/huggingface_datasetio.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/llama_api}/huggingface_datasetio.db - provider_id: localfs provider_type: inline::localfs config: kvstore: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/llama_api}/localfs_datasetio.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/llama_api}/localfs_datasetio.db scoring: - provider_id: basic provider_type: inline::basic @@ -93,17 +93,17 @@ providers: - provider_id: braintrust provider_type: inline::braintrust config: - openai_api_key: ${env.OPENAI_API_KEY:} + openai_api_key: ${env.OPENAI_API_KEY:+} tool_runtime: - provider_id: brave-search provider_type: remote::brave-search config: - api_key: ${env.BRAVE_SEARCH_API_KEY:} + api_key: ${env.BRAVE_SEARCH_API_KEY:+} max_results: 3 - provider_id: tavily-search provider_type: remote::tavily-search config: - api_key: ${env.TAVILY_SEARCH_API_KEY:} + api_key: ${env.TAVILY_SEARCH_API_KEY:+} max_results: 3 - provider_id: rag-runtime provider_type: inline::rag-runtime @@ -113,10 +113,10 @@ providers: config: {} metadata_store: type: sqlite - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/llama_api}/registry.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/llama_api}/registry.db inference_store: type: sqlite - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/llama_api}/inference_store.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/llama_api}/inference_store.db models: - metadata: {} model_id: Llama-3.3-70B-Instruct diff --git a/llama_stack/templates/meta-reference-gpu/build.yaml b/llama_stack/templates/meta-reference-gpu/build.yaml index 53ad411e3..2119eeddd 100644 --- a/llama_stack/templates/meta-reference-gpu/build.yaml +++ b/llama_stack/templates/meta-reference-gpu/build.yaml @@ -1,4 +1,4 @@ -version: '2' +version: 2 distribution_spec: description: Use Meta Reference for running LLM inference providers: diff --git a/llama_stack/templates/meta-reference-gpu/run-with-safety.yaml b/llama_stack/templates/meta-reference-gpu/run-with-safety.yaml index e65445a9e..6b15a1e01 100644 --- a/llama_stack/templates/meta-reference-gpu/run-with-safety.yaml +++ b/llama_stack/templates/meta-reference-gpu/run-with-safety.yaml @@ -1,4 +1,4 @@ -version: '2' +version: 2 image_name: meta-reference-gpu apis: - agents @@ -18,10 +18,10 @@ providers: model: ${env.INFERENCE_MODEL} checkpoint_dir: ${env.INFERENCE_CHECKPOINT_DIR:null} quantization: - type: ${env.QUANTIZATION_TYPE:bf16} - model_parallel_size: ${env.MODEL_PARALLEL_SIZE:0} - max_batch_size: ${env.MAX_BATCH_SIZE:1} - max_seq_len: ${env.MAX_SEQ_LEN:4096} + type: ${env.QUANTIZATION_TYPE:=bf16} + model_parallel_size: ${env.MODEL_PARALLEL_SIZE:=0} + max_batch_size: ${env.MAX_BATCH_SIZE:=1} + max_seq_len: ${env.MAX_SEQ_LEN:=4096} - provider_id: sentence-transformers provider_type: inline::sentence-transformers config: {} @@ -31,10 +31,10 @@ providers: model: ${env.SAFETY_MODEL} checkpoint_dir: ${env.SAFETY_CHECKPOINT_DIR:null} quantization: - type: ${env.QUANTIZATION_TYPE:bf16} - model_parallel_size: ${env.MODEL_PARALLEL_SIZE:0} - max_batch_size: ${env.MAX_BATCH_SIZE:1} - max_seq_len: ${env.MAX_SEQ_LEN:4096} + type: ${env.QUANTIZATION_TYPE:=bf16} + model_parallel_size: ${env.MODEL_PARALLEL_SIZE:=0} + max_batch_size: ${env.MAX_BATCH_SIZE:=1} + max_seq_len: ${env.MAX_SEQ_LEN:=4096} vector_io: - provider_id: faiss provider_type: inline::faiss @@ -42,7 +42,7 @@ providers: kvstore: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/meta-reference-gpu}/faiss_store.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/meta-reference-gpu}/faiss_store.db safety: - provider_id: llama-guard provider_type: inline::llama-guard @@ -55,17 +55,17 @@ providers: persistence_store: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/meta-reference-gpu}/agents_store.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/meta-reference-gpu}/agents_store.db responses_store: type: sqlite - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/meta-reference-gpu}/responses_store.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/meta-reference-gpu}/responses_store.db telemetry: - provider_id: meta-reference provider_type: inline::meta-reference config: - service_name: "${env.OTEL_SERVICE_NAME:\u200B}" - sinks: ${env.TELEMETRY_SINKS:console,sqlite} - sqlite_db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/meta-reference-gpu}/trace_store.db + service_name: "${env.OTEL_SERVICE_NAME:=\u200B}" + sinks: ${env.TELEMETRY_SINKS:=console,sqlite} + sqlite_db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/meta-reference-gpu}/trace_store.db eval: - provider_id: meta-reference provider_type: inline::meta-reference @@ -73,7 +73,7 @@ providers: kvstore: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/meta-reference-gpu}/meta_reference_eval.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/meta-reference-gpu}/meta_reference_eval.db datasetio: - provider_id: huggingface provider_type: remote::huggingface @@ -81,14 +81,14 @@ providers: kvstore: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/meta-reference-gpu}/huggingface_datasetio.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/meta-reference-gpu}/huggingface_datasetio.db - provider_id: localfs provider_type: inline::localfs config: kvstore: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/meta-reference-gpu}/localfs_datasetio.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/meta-reference-gpu}/localfs_datasetio.db scoring: - provider_id: basic provider_type: inline::basic @@ -99,17 +99,17 @@ providers: - provider_id: braintrust provider_type: inline::braintrust config: - openai_api_key: ${env.OPENAI_API_KEY:} + openai_api_key: ${env.OPENAI_API_KEY:+} tool_runtime: - provider_id: brave-search provider_type: remote::brave-search config: - api_key: ${env.BRAVE_SEARCH_API_KEY:} + api_key: ${env.BRAVE_SEARCH_API_KEY:+} max_results: 3 - provider_id: tavily-search provider_type: remote::tavily-search config: - api_key: ${env.TAVILY_SEARCH_API_KEY:} + api_key: ${env.TAVILY_SEARCH_API_KEY:+} max_results: 3 - provider_id: rag-runtime provider_type: inline::rag-runtime @@ -119,10 +119,10 @@ providers: config: {} metadata_store: type: sqlite - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/meta-reference-gpu}/registry.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/meta-reference-gpu}/registry.db inference_store: type: sqlite - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/meta-reference-gpu}/inference_store.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/meta-reference-gpu}/inference_store.db models: - metadata: {} model_id: ${env.INFERENCE_MODEL} diff --git a/llama_stack/templates/meta-reference-gpu/run.yaml b/llama_stack/templates/meta-reference-gpu/run.yaml index 8ef02f14d..1b44a0b3e 100644 --- a/llama_stack/templates/meta-reference-gpu/run.yaml +++ b/llama_stack/templates/meta-reference-gpu/run.yaml @@ -1,4 +1,4 @@ -version: '2' +version: 2 image_name: meta-reference-gpu apis: - agents @@ -18,10 +18,10 @@ providers: model: ${env.INFERENCE_MODEL} checkpoint_dir: ${env.INFERENCE_CHECKPOINT_DIR:null} quantization: - type: ${env.QUANTIZATION_TYPE:bf16} - model_parallel_size: ${env.MODEL_PARALLEL_SIZE:0} - max_batch_size: ${env.MAX_BATCH_SIZE:1} - max_seq_len: ${env.MAX_SEQ_LEN:4096} + type: ${env.QUANTIZATION_TYPE:=bf16} + model_parallel_size: ${env.MODEL_PARALLEL_SIZE:=0} + max_batch_size: ${env.MAX_BATCH_SIZE:=1} + max_seq_len: ${env.MAX_SEQ_LEN:=4096} - provider_id: sentence-transformers provider_type: inline::sentence-transformers config: {} @@ -32,7 +32,7 @@ providers: kvstore: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/meta-reference-gpu}/faiss_store.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/meta-reference-gpu}/faiss_store.db safety: - provider_id: llama-guard provider_type: inline::llama-guard @@ -45,17 +45,17 @@ providers: persistence_store: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/meta-reference-gpu}/agents_store.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/meta-reference-gpu}/agents_store.db responses_store: type: sqlite - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/meta-reference-gpu}/responses_store.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/meta-reference-gpu}/responses_store.db telemetry: - provider_id: meta-reference provider_type: inline::meta-reference config: - service_name: "${env.OTEL_SERVICE_NAME:\u200B}" - sinks: ${env.TELEMETRY_SINKS:console,sqlite} - sqlite_db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/meta-reference-gpu}/trace_store.db + service_name: "${env.OTEL_SERVICE_NAME:=\u200B}" + sinks: ${env.TELEMETRY_SINKS:=console,sqlite} + sqlite_db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/meta-reference-gpu}/trace_store.db eval: - provider_id: meta-reference provider_type: inline::meta-reference @@ -63,7 +63,7 @@ providers: kvstore: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/meta-reference-gpu}/meta_reference_eval.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/meta-reference-gpu}/meta_reference_eval.db datasetio: - provider_id: huggingface provider_type: remote::huggingface @@ -71,14 +71,14 @@ providers: kvstore: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/meta-reference-gpu}/huggingface_datasetio.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/meta-reference-gpu}/huggingface_datasetio.db - provider_id: localfs provider_type: inline::localfs config: kvstore: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/meta-reference-gpu}/localfs_datasetio.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/meta-reference-gpu}/localfs_datasetio.db scoring: - provider_id: basic provider_type: inline::basic @@ -89,17 +89,17 @@ providers: - provider_id: braintrust provider_type: inline::braintrust config: - openai_api_key: ${env.OPENAI_API_KEY:} + openai_api_key: ${env.OPENAI_API_KEY:+} tool_runtime: - provider_id: brave-search provider_type: remote::brave-search config: - api_key: ${env.BRAVE_SEARCH_API_KEY:} + api_key: ${env.BRAVE_SEARCH_API_KEY:+} max_results: 3 - provider_id: tavily-search provider_type: remote::tavily-search config: - api_key: ${env.TAVILY_SEARCH_API_KEY:} + api_key: ${env.TAVILY_SEARCH_API_KEY:+} max_results: 3 - provider_id: rag-runtime provider_type: inline::rag-runtime @@ -109,10 +109,10 @@ providers: config: {} metadata_store: type: sqlite - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/meta-reference-gpu}/registry.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/meta-reference-gpu}/registry.db inference_store: type: sqlite - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/meta-reference-gpu}/inference_store.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/meta-reference-gpu}/inference_store.db models: - metadata: {} model_id: ${env.INFERENCE_MODEL} diff --git a/llama_stack/templates/nvidia/build.yaml b/llama_stack/templates/nvidia/build.yaml index 6bd8a0100..51685b2e3 100644 --- a/llama_stack/templates/nvidia/build.yaml +++ b/llama_stack/templates/nvidia/build.yaml @@ -1,4 +1,4 @@ -version: '2' +version: 2 distribution_spec: description: Use NVIDIA NIM for running LLM inference, evaluation and safety providers: diff --git a/llama_stack/templates/nvidia/run-with-safety.yaml b/llama_stack/templates/nvidia/run-with-safety.yaml index eebfa1066..875fccc9d 100644 --- a/llama_stack/templates/nvidia/run-with-safety.yaml +++ b/llama_stack/templates/nvidia/run-with-safety.yaml @@ -1,4 +1,4 @@ -version: '2' +version: 2 image_name: nvidia apis: - agents @@ -16,14 +16,14 @@ providers: - provider_id: nvidia provider_type: remote::nvidia config: - url: ${env.NVIDIA_BASE_URL:https://integrate.api.nvidia.com} - api_key: ${env.NVIDIA_API_KEY:} - append_api_version: ${env.NVIDIA_APPEND_API_VERSION:True} + url: ${env.NVIDIA_BASE_URL:=https://integrate.api.nvidia.com} + api_key: ${env.NVIDIA_API_KEY:+} + append_api_version: ${env.NVIDIA_APPEND_API_VERSION:=True} - provider_id: nvidia provider_type: remote::nvidia config: - guardrails_service_url: ${env.GUARDRAILS_SERVICE_URL:http://localhost:7331} - config_id: ${env.NVIDIA_GUARDRAILS_CONFIG_ID:self-check} + guardrails_service_url: ${env.GUARDRAILS_SERVICE_URL:=http://localhost:7331} + config_id: ${env.NVIDIA_GUARDRAILS_CONFIG_ID:=self-check} vector_io: - provider_id: faiss provider_type: inline::faiss @@ -31,13 +31,13 @@ providers: kvstore: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/nvidia}/faiss_store.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/nvidia}/faiss_store.db safety: - provider_id: nvidia provider_type: remote::nvidia config: - guardrails_service_url: ${env.GUARDRAILS_SERVICE_URL:http://localhost:7331} - config_id: ${env.NVIDIA_GUARDRAILS_CONFIG_ID:self-check} + guardrails_service_url: ${env.GUARDRAILS_SERVICE_URL:=http://localhost:7331} + config_id: ${env.NVIDIA_GUARDRAILS_CONFIG_ID:=self-check} agents: - provider_id: meta-reference provider_type: inline::meta-reference @@ -45,30 +45,30 @@ providers: persistence_store: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/nvidia}/agents_store.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/nvidia}/agents_store.db responses_store: type: sqlite - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/nvidia}/responses_store.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/nvidia}/responses_store.db telemetry: - provider_id: meta-reference provider_type: inline::meta-reference config: - service_name: "${env.OTEL_SERVICE_NAME:\u200B}" - sinks: ${env.TELEMETRY_SINKS:console,sqlite} - sqlite_db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/nvidia}/trace_store.db + service_name: "${env.OTEL_SERVICE_NAME:=\u200B}" + sinks: ${env.TELEMETRY_SINKS:=console,sqlite} + sqlite_db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/nvidia}/trace_store.db eval: - provider_id: nvidia provider_type: remote::nvidia config: - evaluator_url: ${env.NVIDIA_EVALUATOR_URL:http://localhost:7331} + evaluator_url: ${env.NVIDIA_EVALUATOR_URL:=http://localhost:7331} post_training: - provider_id: nvidia provider_type: remote::nvidia config: - api_key: ${env.NVIDIA_API_KEY:} - dataset_namespace: ${env.NVIDIA_DATASET_NAMESPACE:default} - project_id: ${env.NVIDIA_PROJECT_ID:test-project} - customizer_url: ${env.NVIDIA_CUSTOMIZER_URL:http://nemo.test} + api_key: ${env.NVIDIA_API_KEY:+} + dataset_namespace: ${env.NVIDIA_DATASET_NAMESPACE:=default} + project_id: ${env.NVIDIA_PROJECT_ID:=test-project} + customizer_url: ${env.NVIDIA_CUSTOMIZER_URL:=http://nemo.test} datasetio: - provider_id: localfs provider_type: inline::localfs @@ -76,14 +76,14 @@ providers: kvstore: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/nvidia}/localfs_datasetio.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/nvidia}/localfs_datasetio.db - provider_id: nvidia provider_type: remote::nvidia config: - api_key: ${env.NVIDIA_API_KEY:} - dataset_namespace: ${env.NVIDIA_DATASET_NAMESPACE:default} - project_id: ${env.NVIDIA_PROJECT_ID:test-project} - datasets_url: ${env.NVIDIA_DATASETS_URL:http://nemo.test} + api_key: ${env.NVIDIA_API_KEY:+} + dataset_namespace: ${env.NVIDIA_DATASET_NAMESPACE:=default} + project_id: ${env.NVIDIA_PROJECT_ID:=test-project} + datasets_url: ${env.NVIDIA_DATASETS_URL:=http://nemo.test} scoring: - provider_id: basic provider_type: inline::basic @@ -94,10 +94,10 @@ providers: config: {} metadata_store: type: sqlite - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/nvidia}/registry.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/nvidia}/registry.db inference_store: type: sqlite - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/nvidia}/inference_store.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/nvidia}/inference_store.db models: - metadata: {} model_id: ${env.INFERENCE_MODEL} diff --git a/llama_stack/templates/nvidia/run.yaml b/llama_stack/templates/nvidia/run.yaml index be0e3f6d1..4477d5244 100644 --- a/llama_stack/templates/nvidia/run.yaml +++ b/llama_stack/templates/nvidia/run.yaml @@ -1,4 +1,4 @@ -version: '2' +version: 2 image_name: nvidia apis: - agents @@ -16,9 +16,9 @@ providers: - provider_id: nvidia provider_type: remote::nvidia config: - url: ${env.NVIDIA_BASE_URL:https://integrate.api.nvidia.com} - api_key: ${env.NVIDIA_API_KEY:} - append_api_version: ${env.NVIDIA_APPEND_API_VERSION:True} + url: ${env.NVIDIA_BASE_URL:=https://integrate.api.nvidia.com} + api_key: ${env.NVIDIA_API_KEY:+} + append_api_version: ${env.NVIDIA_APPEND_API_VERSION:=True} vector_io: - provider_id: faiss provider_type: inline::faiss @@ -26,13 +26,13 @@ providers: kvstore: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/nvidia}/faiss_store.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/nvidia}/faiss_store.db safety: - provider_id: nvidia provider_type: remote::nvidia config: - guardrails_service_url: ${env.GUARDRAILS_SERVICE_URL:http://localhost:7331} - config_id: ${env.NVIDIA_GUARDRAILS_CONFIG_ID:self-check} + guardrails_service_url: ${env.GUARDRAILS_SERVICE_URL:=http://localhost:7331} + config_id: ${env.NVIDIA_GUARDRAILS_CONFIG_ID:=self-check} agents: - provider_id: meta-reference provider_type: inline::meta-reference @@ -40,38 +40,38 @@ providers: persistence_store: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/nvidia}/agents_store.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/nvidia}/agents_store.db responses_store: type: sqlite - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/nvidia}/responses_store.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/nvidia}/responses_store.db telemetry: - provider_id: meta-reference provider_type: inline::meta-reference config: - service_name: "${env.OTEL_SERVICE_NAME:\u200B}" - sinks: ${env.TELEMETRY_SINKS:console,sqlite} - sqlite_db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/nvidia}/trace_store.db + service_name: "${env.OTEL_SERVICE_NAME:=\u200B}" + sinks: ${env.TELEMETRY_SINKS:=console,sqlite} + sqlite_db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/nvidia}/trace_store.db eval: - provider_id: nvidia provider_type: remote::nvidia config: - evaluator_url: ${env.NVIDIA_EVALUATOR_URL:http://localhost:7331} + evaluator_url: ${env.NVIDIA_EVALUATOR_URL:=http://localhost:7331} post_training: - provider_id: nvidia provider_type: remote::nvidia config: - api_key: ${env.NVIDIA_API_KEY:} - dataset_namespace: ${env.NVIDIA_DATASET_NAMESPACE:default} - project_id: ${env.NVIDIA_PROJECT_ID:test-project} - customizer_url: ${env.NVIDIA_CUSTOMIZER_URL:http://nemo.test} + api_key: ${env.NVIDIA_API_KEY:+} + dataset_namespace: ${env.NVIDIA_DATASET_NAMESPACE:=default} + project_id: ${env.NVIDIA_PROJECT_ID:=test-project} + customizer_url: ${env.NVIDIA_CUSTOMIZER_URL:=http://nemo.test} datasetio: - provider_id: nvidia provider_type: remote::nvidia config: - api_key: ${env.NVIDIA_API_KEY:} - dataset_namespace: ${env.NVIDIA_DATASET_NAMESPACE:default} - project_id: ${env.NVIDIA_PROJECT_ID:test-project} - datasets_url: ${env.NVIDIA_DATASETS_URL:http://nemo.test} + api_key: ${env.NVIDIA_API_KEY:+} + dataset_namespace: ${env.NVIDIA_DATASET_NAMESPACE:=default} + project_id: ${env.NVIDIA_PROJECT_ID:=test-project} + datasets_url: ${env.NVIDIA_DATASETS_URL:=http://nemo.test} scoring: - provider_id: basic provider_type: inline::basic @@ -82,10 +82,10 @@ providers: config: {} metadata_store: type: sqlite - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/nvidia}/registry.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/nvidia}/registry.db inference_store: type: sqlite - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/nvidia}/inference_store.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/nvidia}/inference_store.db models: - metadata: {} model_id: meta/llama3-8b-instruct diff --git a/llama_stack/templates/ollama/build.yaml b/llama_stack/templates/ollama/build.yaml index ebe0849f3..cbf4281a2 100644 --- a/llama_stack/templates/ollama/build.yaml +++ b/llama_stack/templates/ollama/build.yaml @@ -1,4 +1,4 @@ -version: '2' +version: 2 distribution_spec: description: Use (an external) Ollama server for running LLM inference providers: diff --git a/llama_stack/templates/ollama/run-with-safety.yaml b/llama_stack/templates/ollama/run-with-safety.yaml index 2e1b7fdcc..5e906a12c 100644 --- a/llama_stack/templates/ollama/run-with-safety.yaml +++ b/llama_stack/templates/ollama/run-with-safety.yaml @@ -1,4 +1,4 @@ -version: '2' +version: 2 image_name: ollama apis: - agents @@ -17,7 +17,7 @@ providers: - provider_id: ollama provider_type: remote::ollama config: - url: ${env.OLLAMA_URL:http://localhost:11434} + url: ${env.OLLAMA_URL:=http://localhost:11434} raise_on_connect_error: true vector_io: - provider_id: faiss @@ -26,7 +26,7 @@ providers: kvstore: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ollama}/faiss_store.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ollama}/faiss_store.db safety: - provider_id: llama-guard provider_type: inline::llama-guard @@ -41,17 +41,17 @@ providers: persistence_store: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ollama}/agents_store.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ollama}/agents_store.db responses_store: type: sqlite - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ollama}/responses_store.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ollama}/responses_store.db telemetry: - provider_id: meta-reference provider_type: inline::meta-reference config: - service_name: "${env.OTEL_SERVICE_NAME:\u200B}" - sinks: ${env.TELEMETRY_SINKS:console,sqlite} - sqlite_db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ollama}/trace_store.db + service_name: "${env.OTEL_SERVICE_NAME:=\u200B}" + sinks: ${env.TELEMETRY_SINKS:=console,sqlite} + sqlite_db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ollama}/trace_store.db eval: - provider_id: meta-reference provider_type: inline::meta-reference @@ -59,7 +59,7 @@ providers: kvstore: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ollama}/meta_reference_eval.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ollama}/meta_reference_eval.db datasetio: - provider_id: huggingface provider_type: remote::huggingface @@ -67,14 +67,14 @@ providers: kvstore: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ollama}/huggingface_datasetio.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ollama}/huggingface_datasetio.db - provider_id: localfs provider_type: inline::localfs config: kvstore: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ollama}/localfs_datasetio.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ollama}/localfs_datasetio.db scoring: - provider_id: basic provider_type: inline::basic @@ -85,15 +85,15 @@ providers: - provider_id: braintrust provider_type: inline::braintrust config: - openai_api_key: ${env.OPENAI_API_KEY:} + openai_api_key: ${env.OPENAI_API_KEY:+} files: - provider_id: meta-reference-files provider_type: inline::localfs config: - storage_dir: ${env.FILES_STORAGE_DIR:~/.llama/distributions/ollama/files} + storage_dir: ${env.FILES_STORAGE_DIR:=~/.llama/distributions/ollama/files} metadata_store: type: sqlite - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ollama}/files_metadata.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ollama}/files_metadata.db post_training: - provider_id: huggingface provider_type: inline::huggingface @@ -105,12 +105,12 @@ providers: - provider_id: brave-search provider_type: remote::brave-search config: - api_key: ${env.BRAVE_SEARCH_API_KEY:} + api_key: ${env.BRAVE_SEARCH_API_KEY:+} max_results: 3 - provider_id: tavily-search provider_type: remote::tavily-search config: - api_key: ${env.TAVILY_SEARCH_API_KEY:} + api_key: ${env.TAVILY_SEARCH_API_KEY:+} max_results: 3 - provider_id: rag-runtime provider_type: inline::rag-runtime @@ -121,13 +121,13 @@ providers: - provider_id: wolfram-alpha provider_type: remote::wolfram-alpha config: - api_key: ${env.WOLFRAM_ALPHA_API_KEY:} + api_key: ${env.WOLFRAM_ALPHA_API_KEY:+} metadata_store: type: sqlite - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ollama}/registry.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ollama}/registry.db inference_store: type: sqlite - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ollama}/inference_store.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ollama}/inference_store.db models: - metadata: {} model_id: ${env.INFERENCE_MODEL} diff --git a/llama_stack/templates/ollama/run.yaml b/llama_stack/templates/ollama/run.yaml index 8c2b17ef1..d2b4e3978 100644 --- a/llama_stack/templates/ollama/run.yaml +++ b/llama_stack/templates/ollama/run.yaml @@ -1,4 +1,4 @@ -version: '2' +version: 2 image_name: ollama apis: - agents @@ -17,7 +17,7 @@ providers: - provider_id: ollama provider_type: remote::ollama config: - url: ${env.OLLAMA_URL:http://localhost:11434} + url: ${env.OLLAMA_URL:=http://localhost:11434} raise_on_connect_error: true vector_io: - provider_id: faiss @@ -26,7 +26,7 @@ providers: kvstore: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ollama}/faiss_store.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ollama}/faiss_store.db safety: - provider_id: llama-guard provider_type: inline::llama-guard @@ -39,17 +39,17 @@ providers: persistence_store: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ollama}/agents_store.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ollama}/agents_store.db responses_store: type: sqlite - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ollama}/responses_store.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ollama}/responses_store.db telemetry: - provider_id: meta-reference provider_type: inline::meta-reference config: - service_name: "${env.OTEL_SERVICE_NAME:\u200B}" - sinks: ${env.TELEMETRY_SINKS:console,sqlite} - sqlite_db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ollama}/trace_store.db + service_name: "${env.OTEL_SERVICE_NAME:=\u200B}" + sinks: ${env.TELEMETRY_SINKS:=console,sqlite} + sqlite_db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ollama}/trace_store.db eval: - provider_id: meta-reference provider_type: inline::meta-reference @@ -57,7 +57,7 @@ providers: kvstore: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ollama}/meta_reference_eval.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ollama}/meta_reference_eval.db datasetio: - provider_id: huggingface provider_type: remote::huggingface @@ -65,14 +65,14 @@ providers: kvstore: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ollama}/huggingface_datasetio.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ollama}/huggingface_datasetio.db - provider_id: localfs provider_type: inline::localfs config: kvstore: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ollama}/localfs_datasetio.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ollama}/localfs_datasetio.db scoring: - provider_id: basic provider_type: inline::basic @@ -83,15 +83,15 @@ providers: - provider_id: braintrust provider_type: inline::braintrust config: - openai_api_key: ${env.OPENAI_API_KEY:} + openai_api_key: ${env.OPENAI_API_KEY:+} files: - provider_id: meta-reference-files provider_type: inline::localfs config: - storage_dir: ${env.FILES_STORAGE_DIR:~/.llama/distributions/ollama/files} + storage_dir: ${env.FILES_STORAGE_DIR:=~/.llama/distributions/ollama/files} metadata_store: type: sqlite - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ollama}/files_metadata.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ollama}/files_metadata.db post_training: - provider_id: huggingface provider_type: inline::huggingface @@ -103,12 +103,12 @@ providers: - provider_id: brave-search provider_type: remote::brave-search config: - api_key: ${env.BRAVE_SEARCH_API_KEY:} + api_key: ${env.BRAVE_SEARCH_API_KEY:+} max_results: 3 - provider_id: tavily-search provider_type: remote::tavily-search config: - api_key: ${env.TAVILY_SEARCH_API_KEY:} + api_key: ${env.TAVILY_SEARCH_API_KEY:+} max_results: 3 - provider_id: rag-runtime provider_type: inline::rag-runtime @@ -119,13 +119,13 @@ providers: - provider_id: wolfram-alpha provider_type: remote::wolfram-alpha config: - api_key: ${env.WOLFRAM_ALPHA_API_KEY:} + api_key: ${env.WOLFRAM_ALPHA_API_KEY:+} metadata_store: type: sqlite - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ollama}/registry.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ollama}/registry.db inference_store: type: sqlite - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ollama}/inference_store.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ollama}/inference_store.db models: - metadata: {} model_id: ${env.INFERENCE_MODEL} diff --git a/llama_stack/templates/open-benchmark/build.yaml b/llama_stack/templates/open-benchmark/build.yaml index 840f1e1db..5f82c5243 100644 --- a/llama_stack/templates/open-benchmark/build.yaml +++ b/llama_stack/templates/open-benchmark/build.yaml @@ -1,4 +1,4 @@ -version: '2' +version: 2 distribution_spec: description: Distribution for running open benchmarks providers: diff --git a/llama_stack/templates/open-benchmark/open_benchmark.py b/llama_stack/templates/open-benchmark/open_benchmark.py index f0738ae5b..b4cfbdb52 100644 --- a/llama_stack/templates/open-benchmark/open_benchmark.py +++ b/llama_stack/templates/open-benchmark/open_benchmark.py @@ -120,17 +120,17 @@ def get_distribution_template() -> DistributionTemplate: config=SQLiteVectorIOConfig.sample_run_config(f"~/.llama/distributions/{name}"), ), Provider( - provider_id="${env.ENABLE_CHROMADB+chromadb}", + provider_id="${env.ENABLE_CHROMADB:+chromadb}", provider_type="remote::chromadb", - config=ChromaVectorIOConfig.sample_run_config(url="${env.CHROMADB_URL:}"), + config=ChromaVectorIOConfig.sample_run_config(url="${env.CHROMADB_URL:+}"), ), Provider( - provider_id="${env.ENABLE_PGVECTOR+pgvector}", + provider_id="${env.ENABLE_PGVECTOR:+pgvector}", provider_type="remote::pgvector", config=PGVectorVectorIOConfig.sample_run_config( - db="${env.PGVECTOR_DB:}", - user="${env.PGVECTOR_USER:}", - password="${env.PGVECTOR_PASSWORD:}", + db="${env.PGVECTOR_DB:+}", + user="${env.PGVECTOR_USER:+}", + password="${env.PGVECTOR_PASSWORD:+}", ), ), ] diff --git a/llama_stack/templates/open-benchmark/run.yaml b/llama_stack/templates/open-benchmark/run.yaml index 051ca6f8e..403b0fd3d 100644 --- a/llama_stack/templates/open-benchmark/run.yaml +++ b/llama_stack/templates/open-benchmark/run.yaml @@ -1,4 +1,4 @@ -version: '2' +version: 2 image_name: open-benchmark apis: - agents @@ -33,24 +33,24 @@ providers: provider_type: remote::together config: url: https://api.together.xyz/v1 - api_key: ${env.TOGETHER_API_KEY:} + api_key: ${env.TOGETHER_API_KEY:+} vector_io: - provider_id: sqlite-vec provider_type: inline::sqlite-vec config: - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/open-benchmark}/sqlite_vec.db - - provider_id: ${env.ENABLE_CHROMADB+chromadb} + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/open-benchmark}/sqlite_vec.db + - provider_id: ${env.ENABLE_CHROMADB:+chromadb} provider_type: remote::chromadb config: - url: ${env.CHROMADB_URL:} - - provider_id: ${env.ENABLE_PGVECTOR+pgvector} + url: ${env.CHROMADB_URL:+} + - provider_id: ${env.ENABLE_PGVECTOR:+pgvector} provider_type: remote::pgvector config: - host: ${env.PGVECTOR_HOST:localhost} - port: ${env.PGVECTOR_PORT:5432} - db: ${env.PGVECTOR_DB:} - user: ${env.PGVECTOR_USER:} - password: ${env.PGVECTOR_PASSWORD:} + host: ${env.PGVECTOR_HOST:=localhost} + port: ${env.PGVECTOR_PORT:=5432} + db: ${env.PGVECTOR_DB:+} + user: ${env.PGVECTOR_USER:+} + password: ${env.PGVECTOR_PASSWORD:+} safety: - provider_id: llama-guard provider_type: inline::llama-guard @@ -63,17 +63,17 @@ providers: persistence_store: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/open-benchmark}/agents_store.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/open-benchmark}/agents_store.db responses_store: type: sqlite - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/open-benchmark}/responses_store.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/open-benchmark}/responses_store.db telemetry: - provider_id: meta-reference provider_type: inline::meta-reference config: - service_name: "${env.OTEL_SERVICE_NAME:\u200B}" - sinks: ${env.TELEMETRY_SINKS:console,sqlite} - sqlite_db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/open-benchmark}/trace_store.db + service_name: "${env.OTEL_SERVICE_NAME:=\u200B}" + sinks: ${env.TELEMETRY_SINKS:=console,sqlite} + sqlite_db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/open-benchmark}/trace_store.db eval: - provider_id: meta-reference provider_type: inline::meta-reference @@ -81,7 +81,7 @@ providers: kvstore: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/open-benchmark}/meta_reference_eval.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/open-benchmark}/meta_reference_eval.db datasetio: - provider_id: huggingface provider_type: remote::huggingface @@ -89,14 +89,14 @@ providers: kvstore: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/open-benchmark}/huggingface_datasetio.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/open-benchmark}/huggingface_datasetio.db - provider_id: localfs provider_type: inline::localfs config: kvstore: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/open-benchmark}/localfs_datasetio.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/open-benchmark}/localfs_datasetio.db scoring: - provider_id: basic provider_type: inline::basic @@ -107,17 +107,17 @@ providers: - provider_id: braintrust provider_type: inline::braintrust config: - openai_api_key: ${env.OPENAI_API_KEY:} + openai_api_key: ${env.OPENAI_API_KEY:+} tool_runtime: - provider_id: brave-search provider_type: remote::brave-search config: - api_key: ${env.BRAVE_SEARCH_API_KEY:} + api_key: ${env.BRAVE_SEARCH_API_KEY:+} max_results: 3 - provider_id: tavily-search provider_type: remote::tavily-search config: - api_key: ${env.TAVILY_SEARCH_API_KEY:} + api_key: ${env.TAVILY_SEARCH_API_KEY:+} max_results: 3 - provider_id: rag-runtime provider_type: inline::rag-runtime @@ -127,10 +127,10 @@ providers: config: {} metadata_store: type: sqlite - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/open-benchmark}/registry.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/open-benchmark}/registry.db inference_store: type: sqlite - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/open-benchmark}/inference_store.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/open-benchmark}/inference_store.db models: - metadata: {} model_id: openai/gpt-4o diff --git a/llama_stack/templates/passthrough/build.yaml b/llama_stack/templates/passthrough/build.yaml index 46b99cb75..e2e041dbc 100644 --- a/llama_stack/templates/passthrough/build.yaml +++ b/llama_stack/templates/passthrough/build.yaml @@ -1,4 +1,4 @@ -version: '2' +version: 2 distribution_spec: description: Use Passthrough hosted llama-stack endpoint for LLM inference providers: diff --git a/llama_stack/templates/passthrough/run-with-safety.yaml b/llama_stack/templates/passthrough/run-with-safety.yaml index 3168eeb9f..c5b047511 100644 --- a/llama_stack/templates/passthrough/run-with-safety.yaml +++ b/llama_stack/templates/passthrough/run-with-safety.yaml @@ -1,4 +1,4 @@ -version: '2' +version: 2 image_name: passthrough apis: - agents @@ -27,7 +27,7 @@ providers: kvstore: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/passthrough}/faiss_store.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/passthrough}/faiss_store.db safety: - provider_id: llama-guard provider_type: inline::llama-guard @@ -45,17 +45,17 @@ providers: persistence_store: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/passthrough}/agents_store.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/passthrough}/agents_store.db responses_store: type: sqlite - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/passthrough}/responses_store.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/passthrough}/responses_store.db telemetry: - provider_id: meta-reference provider_type: inline::meta-reference config: - service_name: "${env.OTEL_SERVICE_NAME:\u200B}" - sinks: ${env.TELEMETRY_SINKS:console,sqlite} - sqlite_db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/passthrough}/trace_store.db + service_name: "${env.OTEL_SERVICE_NAME:=\u200B}" + sinks: ${env.TELEMETRY_SINKS:=console,sqlite} + sqlite_db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/passthrough}/trace_store.db eval: - provider_id: meta-reference provider_type: inline::meta-reference @@ -63,7 +63,7 @@ providers: kvstore: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/passthrough}/meta_reference_eval.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/passthrough}/meta_reference_eval.db datasetio: - provider_id: huggingface provider_type: remote::huggingface @@ -71,14 +71,14 @@ providers: kvstore: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/passthrough}/huggingface_datasetio.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/passthrough}/huggingface_datasetio.db - provider_id: localfs provider_type: inline::localfs config: kvstore: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/passthrough}/localfs_datasetio.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/passthrough}/localfs_datasetio.db scoring: - provider_id: basic provider_type: inline::basic @@ -89,22 +89,22 @@ providers: - provider_id: braintrust provider_type: inline::braintrust config: - openai_api_key: ${env.OPENAI_API_KEY:} + openai_api_key: ${env.OPENAI_API_KEY:+} tool_runtime: - provider_id: brave-search provider_type: remote::brave-search config: - api_key: ${env.BRAVE_SEARCH_API_KEY:} + api_key: ${env.BRAVE_SEARCH_API_KEY:+} max_results: 3 - provider_id: tavily-search provider_type: remote::tavily-search config: - api_key: ${env.TAVILY_SEARCH_API_KEY:} + api_key: ${env.TAVILY_SEARCH_API_KEY:+} max_results: 3 - provider_id: wolfram-alpha provider_type: remote::wolfram-alpha config: - api_key: ${env.WOLFRAM_ALPHA_API_KEY:} + api_key: ${env.WOLFRAM_ALPHA_API_KEY:+} - provider_id: rag-runtime provider_type: inline::rag-runtime config: {} @@ -113,10 +113,10 @@ providers: config: {} metadata_store: type: sqlite - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/passthrough}/registry.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/passthrough}/registry.db inference_store: type: sqlite - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/passthrough}/inference_store.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/passthrough}/inference_store.db models: - metadata: {} model_id: meta-llama/Llama-3.1-8B-Instruct diff --git a/llama_stack/templates/passthrough/run.yaml b/llama_stack/templates/passthrough/run.yaml index 48abf8577..896b3c91e 100644 --- a/llama_stack/templates/passthrough/run.yaml +++ b/llama_stack/templates/passthrough/run.yaml @@ -1,4 +1,4 @@ -version: '2' +version: 2 image_name: passthrough apis: - agents @@ -27,7 +27,7 @@ providers: kvstore: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/passthrough}/faiss_store.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/passthrough}/faiss_store.db safety: - provider_id: llama-guard provider_type: inline::llama-guard @@ -40,17 +40,17 @@ providers: persistence_store: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/passthrough}/agents_store.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/passthrough}/agents_store.db responses_store: type: sqlite - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/passthrough}/responses_store.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/passthrough}/responses_store.db telemetry: - provider_id: meta-reference provider_type: inline::meta-reference config: - service_name: "${env.OTEL_SERVICE_NAME:\u200B}" - sinks: ${env.TELEMETRY_SINKS:console,sqlite} - sqlite_db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/passthrough}/trace_store.db + service_name: "${env.OTEL_SERVICE_NAME:=\u200B}" + sinks: ${env.TELEMETRY_SINKS:=console,sqlite} + sqlite_db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/passthrough}/trace_store.db eval: - provider_id: meta-reference provider_type: inline::meta-reference @@ -58,7 +58,7 @@ providers: kvstore: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/passthrough}/meta_reference_eval.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/passthrough}/meta_reference_eval.db datasetio: - provider_id: huggingface provider_type: remote::huggingface @@ -66,14 +66,14 @@ providers: kvstore: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/passthrough}/huggingface_datasetio.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/passthrough}/huggingface_datasetio.db - provider_id: localfs provider_type: inline::localfs config: kvstore: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/passthrough}/localfs_datasetio.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/passthrough}/localfs_datasetio.db scoring: - provider_id: basic provider_type: inline::basic @@ -84,22 +84,22 @@ providers: - provider_id: braintrust provider_type: inline::braintrust config: - openai_api_key: ${env.OPENAI_API_KEY:} + openai_api_key: ${env.OPENAI_API_KEY:+} tool_runtime: - provider_id: brave-search provider_type: remote::brave-search config: - api_key: ${env.BRAVE_SEARCH_API_KEY:} + api_key: ${env.BRAVE_SEARCH_API_KEY:+} max_results: 3 - provider_id: tavily-search provider_type: remote::tavily-search config: - api_key: ${env.TAVILY_SEARCH_API_KEY:} + api_key: ${env.TAVILY_SEARCH_API_KEY:+} max_results: 3 - provider_id: wolfram-alpha provider_type: remote::wolfram-alpha config: - api_key: ${env.WOLFRAM_ALPHA_API_KEY:} + api_key: ${env.WOLFRAM_ALPHA_API_KEY:+} - provider_id: rag-runtime provider_type: inline::rag-runtime config: {} @@ -108,10 +108,10 @@ providers: config: {} metadata_store: type: sqlite - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/passthrough}/registry.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/passthrough}/registry.db inference_store: type: sqlite - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/passthrough}/inference_store.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/passthrough}/inference_store.db models: - metadata: {} model_id: meta-llama/Llama-3.1-8B-Instruct diff --git a/llama_stack/templates/postgres-demo/build.yaml b/llama_stack/templates/postgres-demo/build.yaml index 6416cd00f..645b59613 100644 --- a/llama_stack/templates/postgres-demo/build.yaml +++ b/llama_stack/templates/postgres-demo/build.yaml @@ -1,4 +1,4 @@ -version: '2' +version: 2 distribution_spec: description: Quick start template for running Llama Stack with several popular providers providers: diff --git a/llama_stack/templates/postgres-demo/postgres_demo.py b/llama_stack/templates/postgres-demo/postgres_demo.py index a1a2aa2b7..5d42b8901 100644 --- a/llama_stack/templates/postgres-demo/postgres_demo.py +++ b/llama_stack/templates/postgres-demo/postgres_demo.py @@ -50,9 +50,9 @@ def get_distribution_template() -> DistributionTemplate: vector_io_providers = [ Provider( - provider_id="${env.ENABLE_CHROMADB+chromadb}", + provider_id="${env.ENABLE_CHROMADB:+chromadb}", provider_type="remote::chromadb", - config=ChromaVectorIOConfig.sample_run_config(url="${env.CHROMADB_URL:}"), + config=ChromaVectorIOConfig.sample_run_config(url="${env.CHROMADB_URL:+}"), ), ] default_tool_groups = [ @@ -114,9 +114,9 @@ def get_distribution_template() -> DistributionTemplate: provider_id="meta-reference", provider_type="inline::meta-reference", config=dict( - service_name="${env.OTEL_SERVICE_NAME:}", - sinks="${env.TELEMETRY_SINKS:console,otel_trace}", - otel_trace_endpoint="${env.OTEL_TRACE_ENDPOINT:http://localhost:4318/v1/traces}", + service_name="${env.OTEL_SERVICE_NAME:+}", + sinks="${env.TELEMETRY_SINKS:=console,otel_trace}", + otel_trace_endpoint="${env.OTEL_TRACE_ENDPOINT:=http://localhost:4318/v1/traces}", ), ) ], diff --git a/llama_stack/templates/postgres-demo/run.yaml b/llama_stack/templates/postgres-demo/run.yaml index 0e0d020b2..03b7a59fb 100644 --- a/llama_stack/templates/postgres-demo/run.yaml +++ b/llama_stack/templates/postgres-demo/run.yaml @@ -1,4 +1,4 @@ -version: '2' +version: 2 image_name: postgres-demo apis: - agents @@ -13,17 +13,17 @@ providers: provider_type: remote::vllm config: url: ${env.VLLM_URL:http://localhost:8000/v1} - max_tokens: ${env.VLLM_MAX_TOKENS:4096} - api_token: ${env.VLLM_API_TOKEN:fake} - tls_verify: ${env.VLLM_TLS_VERIFY:true} + max_tokens: ${env.VLLM_MAX_TOKENS:=4096} + api_token: ${env.VLLM_API_TOKEN:=fake} + tls_verify: ${env.VLLM_TLS_VERIFY:=true} - provider_id: sentence-transformers provider_type: inline::sentence-transformers config: {} vector_io: - - provider_id: ${env.ENABLE_CHROMADB+chromadb} + - provider_id: ${env.ENABLE_CHROMADB:+chromadb} provider_type: remote::chromadb config: - url: ${env.CHROMADB_URL:} + url: ${env.CHROMADB_URL:+} safety: - provider_id: llama-guard provider_type: inline::llama-guard @@ -35,35 +35,35 @@ providers: config: persistence_store: type: postgres - host: ${env.POSTGRES_HOST:localhost} - port: ${env.POSTGRES_PORT:5432} - db: ${env.POSTGRES_DB:llamastack} - user: ${env.POSTGRES_USER:llamastack} - password: ${env.POSTGRES_PASSWORD:llamastack} + host: ${env.POSTGRES_HOST:=localhost} + port: ${env.POSTGRES_PORT:=5432} + db: ${env.POSTGRES_DB:=llamastack} + user: ${env.POSTGRES_USER:=llamastack} + password: ${env.POSTGRES_PASSWORD:=llamastack} responses_store: type: postgres - host: ${env.POSTGRES_HOST:localhost} - port: ${env.POSTGRES_PORT:5432} - db: ${env.POSTGRES_DB:llamastack} - user: ${env.POSTGRES_USER:llamastack} - password: ${env.POSTGRES_PASSWORD:llamastack} + host: ${env.POSTGRES_HOST:=localhost} + port: ${env.POSTGRES_PORT:=5432} + db: ${env.POSTGRES_DB:=llamastack} + user: ${env.POSTGRES_USER:=llamastack} + password: ${env.POSTGRES_PASSWORD:=llamastack} telemetry: - provider_id: meta-reference provider_type: inline::meta-reference config: - service_name: ${env.OTEL_SERVICE_NAME:} - sinks: ${env.TELEMETRY_SINKS:console,otel_trace} - otel_trace_endpoint: ${env.OTEL_TRACE_ENDPOINT:http://localhost:4318/v1/traces} + service_name: ${env.OTEL_SERVICE_NAME:+} + sinks: ${env.TELEMETRY_SINKS:=console,otel_trace} + otel_trace_endpoint: ${env.OTEL_TRACE_ENDPOINT:=http://localhost:4318/v1/traces} tool_runtime: - provider_id: brave-search provider_type: remote::brave-search config: - api_key: ${env.BRAVE_SEARCH_API_KEY:} + api_key: ${env.BRAVE_SEARCH_API_KEY:+} max_results: 3 - provider_id: tavily-search provider_type: remote::tavily-search config: - api_key: ${env.TAVILY_SEARCH_API_KEY:} + api_key: ${env.TAVILY_SEARCH_API_KEY:+} max_results: 3 - provider_id: rag-runtime provider_type: inline::rag-runtime @@ -73,19 +73,19 @@ providers: config: {} metadata_store: type: postgres - host: ${env.POSTGRES_HOST:localhost} - port: ${env.POSTGRES_PORT:5432} - db: ${env.POSTGRES_DB:llamastack} - user: ${env.POSTGRES_USER:llamastack} - password: ${env.POSTGRES_PASSWORD:llamastack} - table_name: ${env.POSTGRES_TABLE_NAME:llamastack_kvstore} + host: ${env.POSTGRES_HOST:=localhost} + port: ${env.POSTGRES_PORT:=5432} + db: ${env.POSTGRES_DB:=llamastack} + user: ${env.POSTGRES_USER:=llamastack} + password: ${env.POSTGRES_PASSWORD:=llamastack} + table_name: ${env.POSTGRES_TABLE_NAME:=llamastack_kvstore} inference_store: type: postgres - host: ${env.POSTGRES_HOST:localhost} - port: ${env.POSTGRES_PORT:5432} - db: ${env.POSTGRES_DB:llamastack} - user: ${env.POSTGRES_USER:llamastack} - password: ${env.POSTGRES_PASSWORD:llamastack} + host: ${env.POSTGRES_HOST:=localhost} + port: ${env.POSTGRES_PORT:=5432} + db: ${env.POSTGRES_DB:=llamastack} + user: ${env.POSTGRES_USER:=llamastack} + password: ${env.POSTGRES_PASSWORD:=llamastack} models: - metadata: {} model_id: ${env.INFERENCE_MODEL} diff --git a/llama_stack/templates/remote-vllm/build.yaml b/llama_stack/templates/remote-vllm/build.yaml index 16fe5d4fd..0298b01c7 100644 --- a/llama_stack/templates/remote-vllm/build.yaml +++ b/llama_stack/templates/remote-vllm/build.yaml @@ -1,4 +1,4 @@ -version: '2' +version: 2 distribution_spec: description: Use (an external) vLLM server for running LLM inference providers: diff --git a/llama_stack/templates/remote-vllm/run-with-safety.yaml b/llama_stack/templates/remote-vllm/run-with-safety.yaml index 64f71087a..b297f1489 100644 --- a/llama_stack/templates/remote-vllm/run-with-safety.yaml +++ b/llama_stack/templates/remote-vllm/run-with-safety.yaml @@ -1,4 +1,4 @@ -version: '2' +version: 2 image_name: remote-vllm apis: - agents @@ -16,16 +16,16 @@ providers: provider_type: remote::vllm config: url: ${env.VLLM_URL:http://localhost:8000/v1} - max_tokens: ${env.VLLM_MAX_TOKENS:4096} - api_token: ${env.VLLM_API_TOKEN:fake} - tls_verify: ${env.VLLM_TLS_VERIFY:true} + max_tokens: ${env.VLLM_MAX_TOKENS:=4096} + api_token: ${env.VLLM_API_TOKEN:=fake} + tls_verify: ${env.VLLM_TLS_VERIFY:=true} - provider_id: vllm-safety provider_type: remote::vllm config: url: ${env.SAFETY_VLLM_URL} - max_tokens: ${env.VLLM_MAX_TOKENS:4096} - api_token: ${env.VLLM_API_TOKEN:fake} - tls_verify: ${env.VLLM_TLS_VERIFY:true} + max_tokens: ${env.VLLM_MAX_TOKENS:=4096} + api_token: ${env.VLLM_API_TOKEN:=fake} + tls_verify: ${env.VLLM_TLS_VERIFY:=true} - provider_id: sentence-transformers provider_type: inline::sentence-transformers config: {} @@ -36,7 +36,7 @@ providers: kvstore: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/remote-vllm}/faiss_store.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/remote-vllm}/faiss_store.db safety: - provider_id: llama-guard provider_type: inline::llama-guard @@ -49,10 +49,10 @@ providers: persistence_store: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/remote-vllm}/agents_store.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/remote-vllm}/agents_store.db responses_store: type: sqlite - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/remote-vllm}/responses_store.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/remote-vllm}/responses_store.db eval: - provider_id: meta-reference provider_type: inline::meta-reference @@ -60,7 +60,7 @@ providers: kvstore: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/remote-vllm}/meta_reference_eval.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/remote-vllm}/meta_reference_eval.db datasetio: - provider_id: huggingface provider_type: remote::huggingface @@ -68,14 +68,14 @@ providers: kvstore: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/remote-vllm}/huggingface_datasetio.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/remote-vllm}/huggingface_datasetio.db - provider_id: localfs provider_type: inline::localfs config: kvstore: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/remote-vllm}/localfs_datasetio.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/remote-vllm}/localfs_datasetio.db scoring: - provider_id: basic provider_type: inline::basic @@ -86,24 +86,24 @@ providers: - provider_id: braintrust provider_type: inline::braintrust config: - openai_api_key: ${env.OPENAI_API_KEY:} + openai_api_key: ${env.OPENAI_API_KEY:+} telemetry: - provider_id: meta-reference provider_type: inline::meta-reference config: - service_name: "${env.OTEL_SERVICE_NAME:\u200B}" - sinks: ${env.TELEMETRY_SINKS:console,sqlite} - sqlite_db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/remote-vllm}/trace_store.db + service_name: "${env.OTEL_SERVICE_NAME:=\u200B}" + sinks: ${env.TELEMETRY_SINKS:=console,sqlite} + sqlite_db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/remote-vllm}/trace_store.db tool_runtime: - provider_id: brave-search provider_type: remote::brave-search config: - api_key: ${env.BRAVE_SEARCH_API_KEY:} + api_key: ${env.BRAVE_SEARCH_API_KEY:+} max_results: 3 - provider_id: tavily-search provider_type: remote::tavily-search config: - api_key: ${env.TAVILY_SEARCH_API_KEY:} + api_key: ${env.TAVILY_SEARCH_API_KEY:+} max_results: 3 - provider_id: rag-runtime provider_type: inline::rag-runtime @@ -114,13 +114,13 @@ providers: - provider_id: wolfram-alpha provider_type: remote::wolfram-alpha config: - api_key: ${env.WOLFRAM_ALPHA_API_KEY:} + api_key: ${env.WOLFRAM_ALPHA_API_KEY:+} metadata_store: type: sqlite - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/remote-vllm}/registry.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/remote-vllm}/registry.db inference_store: type: sqlite - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/remote-vllm}/inference_store.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/remote-vllm}/inference_store.db models: - metadata: {} model_id: ${env.INFERENCE_MODEL} diff --git a/llama_stack/templates/remote-vllm/run.yaml b/llama_stack/templates/remote-vllm/run.yaml index 353b9902d..6bd332cc9 100644 --- a/llama_stack/templates/remote-vllm/run.yaml +++ b/llama_stack/templates/remote-vllm/run.yaml @@ -1,4 +1,4 @@ -version: '2' +version: 2 image_name: remote-vllm apis: - agents @@ -16,9 +16,9 @@ providers: provider_type: remote::vllm config: url: ${env.VLLM_URL:http://localhost:8000/v1} - max_tokens: ${env.VLLM_MAX_TOKENS:4096} - api_token: ${env.VLLM_API_TOKEN:fake} - tls_verify: ${env.VLLM_TLS_VERIFY:true} + max_tokens: ${env.VLLM_MAX_TOKENS:=4096} + api_token: ${env.VLLM_API_TOKEN:=fake} + tls_verify: ${env.VLLM_TLS_VERIFY:=true} - provider_id: sentence-transformers provider_type: inline::sentence-transformers config: {} @@ -29,7 +29,7 @@ providers: kvstore: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/remote-vllm}/faiss_store.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/remote-vllm}/faiss_store.db safety: - provider_id: llama-guard provider_type: inline::llama-guard @@ -42,10 +42,10 @@ providers: persistence_store: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/remote-vllm}/agents_store.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/remote-vllm}/agents_store.db responses_store: type: sqlite - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/remote-vllm}/responses_store.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/remote-vllm}/responses_store.db eval: - provider_id: meta-reference provider_type: inline::meta-reference @@ -53,7 +53,7 @@ providers: kvstore: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/remote-vllm}/meta_reference_eval.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/remote-vllm}/meta_reference_eval.db datasetio: - provider_id: huggingface provider_type: remote::huggingface @@ -61,14 +61,14 @@ providers: kvstore: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/remote-vllm}/huggingface_datasetio.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/remote-vllm}/huggingface_datasetio.db - provider_id: localfs provider_type: inline::localfs config: kvstore: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/remote-vllm}/localfs_datasetio.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/remote-vllm}/localfs_datasetio.db scoring: - provider_id: basic provider_type: inline::basic @@ -79,24 +79,24 @@ providers: - provider_id: braintrust provider_type: inline::braintrust config: - openai_api_key: ${env.OPENAI_API_KEY:} + openai_api_key: ${env.OPENAI_API_KEY:+} telemetry: - provider_id: meta-reference provider_type: inline::meta-reference config: - service_name: "${env.OTEL_SERVICE_NAME:\u200B}" - sinks: ${env.TELEMETRY_SINKS:console,sqlite} - sqlite_db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/remote-vllm}/trace_store.db + service_name: "${env.OTEL_SERVICE_NAME:=\u200B}" + sinks: ${env.TELEMETRY_SINKS:=console,sqlite} + sqlite_db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/remote-vllm}/trace_store.db tool_runtime: - provider_id: brave-search provider_type: remote::brave-search config: - api_key: ${env.BRAVE_SEARCH_API_KEY:} + api_key: ${env.BRAVE_SEARCH_API_KEY:+} max_results: 3 - provider_id: tavily-search provider_type: remote::tavily-search config: - api_key: ${env.TAVILY_SEARCH_API_KEY:} + api_key: ${env.TAVILY_SEARCH_API_KEY:+} max_results: 3 - provider_id: rag-runtime provider_type: inline::rag-runtime @@ -107,13 +107,13 @@ providers: - provider_id: wolfram-alpha provider_type: remote::wolfram-alpha config: - api_key: ${env.WOLFRAM_ALPHA_API_KEY:} + api_key: ${env.WOLFRAM_ALPHA_API_KEY:+} metadata_store: type: sqlite - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/remote-vllm}/registry.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/remote-vllm}/registry.db inference_store: type: sqlite - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/remote-vllm}/inference_store.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/remote-vllm}/inference_store.db models: - metadata: {} model_id: ${env.INFERENCE_MODEL} diff --git a/llama_stack/templates/sambanova/build.yaml b/llama_stack/templates/sambanova/build.yaml index 14b1c8974..ba70f88c6 100644 --- a/llama_stack/templates/sambanova/build.yaml +++ b/llama_stack/templates/sambanova/build.yaml @@ -1,4 +1,4 @@ -version: '2' +version: 2 distribution_spec: description: Use SambaNova for running LLM inference and safety providers: diff --git a/llama_stack/templates/sambanova/run.yaml b/llama_stack/templates/sambanova/run.yaml index 58d0d36e3..b96621b58 100644 --- a/llama_stack/templates/sambanova/run.yaml +++ b/llama_stack/templates/sambanova/run.yaml @@ -1,4 +1,4 @@ -version: '2' +version: 2 image_name: sambanova apis: - agents @@ -24,19 +24,19 @@ providers: kvstore: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/sambanova}/faiss_store.db - - provider_id: ${env.ENABLE_CHROMADB+chromadb} + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/sambanova}/faiss_store.db + - provider_id: ${env.ENABLE_CHROMADB:+chromadb} provider_type: remote::chromadb config: - url: ${env.CHROMADB_URL:} - - provider_id: ${env.ENABLE_PGVECTOR+pgvector} + url: ${env.CHROMADB_URL:+} + - provider_id: ${env.ENABLE_PGVECTOR:+pgvector} provider_type: remote::pgvector config: - host: ${env.PGVECTOR_HOST:localhost} - port: ${env.PGVECTOR_PORT:5432} - db: ${env.PGVECTOR_DB:} - user: ${env.PGVECTOR_USER:} - password: ${env.PGVECTOR_PASSWORD:} + host: ${env.PGVECTOR_HOST:=localhost} + port: ${env.PGVECTOR_PORT:=5432} + db: ${env.PGVECTOR_DB:+} + user: ${env.PGVECTOR_USER:+} + password: ${env.PGVECTOR_PASSWORD:+} safety: - provider_id: sambanova provider_type: remote::sambanova @@ -50,27 +50,27 @@ providers: persistence_store: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/sambanova}/agents_store.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/sambanova}/agents_store.db responses_store: type: sqlite - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/sambanova}/responses_store.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/sambanova}/responses_store.db telemetry: - provider_id: meta-reference provider_type: inline::meta-reference config: - service_name: "${env.OTEL_SERVICE_NAME:\u200B}" - sinks: ${env.TELEMETRY_SINKS:console,sqlite} - sqlite_db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/sambanova}/trace_store.db + service_name: "${env.OTEL_SERVICE_NAME:=\u200B}" + sinks: ${env.TELEMETRY_SINKS:=console,sqlite} + sqlite_db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/sambanova}/trace_store.db tool_runtime: - provider_id: brave-search provider_type: remote::brave-search config: - api_key: ${env.BRAVE_SEARCH_API_KEY:} + api_key: ${env.BRAVE_SEARCH_API_KEY:+} max_results: 3 - provider_id: tavily-search provider_type: remote::tavily-search config: - api_key: ${env.TAVILY_SEARCH_API_KEY:} + api_key: ${env.TAVILY_SEARCH_API_KEY:+} max_results: 3 - provider_id: rag-runtime provider_type: inline::rag-runtime @@ -81,13 +81,13 @@ providers: - provider_id: wolfram-alpha provider_type: remote::wolfram-alpha config: - api_key: ${env.WOLFRAM_ALPHA_API_KEY:} + api_key: ${env.WOLFRAM_ALPHA_API_KEY:+} metadata_store: type: sqlite - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/sambanova}/registry.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/sambanova}/registry.db inference_store: type: sqlite - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/sambanova}/inference_store.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/sambanova}/inference_store.db models: - metadata: {} model_id: sambanova/Meta-Llama-3.1-8B-Instruct diff --git a/llama_stack/templates/sambanova/sambanova.py b/llama_stack/templates/sambanova/sambanova.py index 38df6a4be..428577697 100644 --- a/llama_stack/templates/sambanova/sambanova.py +++ b/llama_stack/templates/sambanova/sambanova.py @@ -73,17 +73,17 @@ def get_distribution_template() -> DistributionTemplate: ), ), Provider( - provider_id="${env.ENABLE_CHROMADB+chromadb}", + provider_id="${env.ENABLE_CHROMADB:+chromadb}", provider_type="remote::chromadb", - config=ChromaVectorIOConfig.sample_run_config(url="${env.CHROMADB_URL:}"), + config=ChromaVectorIOConfig.sample_run_config(url="${env.CHROMADB_URL:+}"), ), Provider( - provider_id="${env.ENABLE_PGVECTOR+pgvector}", + provider_id="${env.ENABLE_PGVECTOR:+pgvector}", provider_type="remote::pgvector", config=PGVectorVectorIOConfig.sample_run_config( - db="${env.PGVECTOR_DB:}", - user="${env.PGVECTOR_USER:}", - password="${env.PGVECTOR_PASSWORD:}", + db="${env.PGVECTOR_DB:+}", + user="${env.PGVECTOR_USER:+}", + password="${env.PGVECTOR_PASSWORD:+}", ), ), ] diff --git a/llama_stack/templates/starter/build.yaml b/llama_stack/templates/starter/build.yaml index 9bf4913a7..3b48dcf7a 100644 --- a/llama_stack/templates/starter/build.yaml +++ b/llama_stack/templates/starter/build.yaml @@ -1,4 +1,4 @@ -version: '2' +version: 2 distribution_spec: description: Quick start template for running Llama Stack with several popular providers providers: diff --git a/llama_stack/templates/starter/run.yaml b/llama_stack/templates/starter/run.yaml index 30df39e5d..f7c53170b 100644 --- a/llama_stack/templates/starter/run.yaml +++ b/llama_stack/templates/starter/run.yaml @@ -1,4 +1,4 @@ -version: '2' +version: 2 image_name: starter apis: - agents @@ -16,47 +16,47 @@ providers: - provider_id: openai provider_type: remote::openai config: - api_key: ${env.OPENAI_API_KEY:} + api_key: ${env.OPENAI_API_KEY:+} - provider_id: fireworks provider_type: remote::fireworks config: url: https://api.fireworks.ai/inference/v1 - api_key: ${env.FIREWORKS_API_KEY:} + api_key: ${env.FIREWORKS_API_KEY:+} - provider_id: together provider_type: remote::together config: url: https://api.together.xyz/v1 - api_key: ${env.TOGETHER_API_KEY:} + api_key: ${env.TOGETHER_API_KEY:+} - provider_id: ollama provider_type: remote::ollama config: - url: ${env.OLLAMA_URL:http://localhost:11434} + url: ${env.OLLAMA_URL:=http://localhost:11434} raise_on_connect_error: false - provider_id: anthropic provider_type: remote::anthropic config: - api_key: ${env.ANTHROPIC_API_KEY:} + api_key: ${env.ANTHROPIC_API_KEY:+} - provider_id: gemini provider_type: remote::gemini config: - api_key: ${env.GEMINI_API_KEY:} + api_key: ${env.GEMINI_API_KEY:+} - provider_id: groq provider_type: remote::groq config: url: https://api.groq.com - api_key: ${env.GROQ_API_KEY:} + api_key: ${env.GROQ_API_KEY:+} - provider_id: sambanova provider_type: remote::sambanova config: url: https://api.sambanova.ai/v1 - api_key: ${env.SAMBANOVA_API_KEY:} + api_key: ${env.SAMBANOVA_API_KEY:+} - provider_id: vllm provider_type: remote::vllm config: - url: ${env.VLLM_URL:http://localhost:8000/v1} - max_tokens: ${env.VLLM_MAX_TOKENS:4096} - api_token: ${env.VLLM_API_TOKEN:fake} - tls_verify: ${env.VLLM_TLS_VERIFY:true} + url: ${env.VLLM_URL:=http://localhost:8000/v1} + max_tokens: ${env.VLLM_MAX_TOKENS:=4096} + api_token: ${env.VLLM_API_TOKEN:=fake} + tls_verify: ${env.VLLM_TLS_VERIFY:=true} - provider_id: sentence-transformers provider_type: inline::sentence-transformers config: {} @@ -67,31 +67,31 @@ providers: kvstore: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/starter}/faiss_store.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/faiss_store.db - provider_id: ${env.ENABLE_SQLITE_VEC+sqlite-vec} provider_type: inline::sqlite-vec config: - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/starter}/sqlite_vec.db - - provider_id: ${env.ENABLE_CHROMADB+chromadb} + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/sqlite_vec.db + - provider_id: ${env.ENABLE_CHROMADB:+chromadb} provider_type: remote::chromadb config: - url: ${env.CHROMADB_URL:} - - provider_id: ${env.ENABLE_PGVECTOR+pgvector} + url: ${env.CHROMADB_URL:+} + - provider_id: ${env.ENABLE_PGVECTOR:+pgvector} provider_type: remote::pgvector config: - host: ${env.PGVECTOR_HOST:localhost} - port: ${env.PGVECTOR_PORT:5432} - db: ${env.PGVECTOR_DB:} - user: ${env.PGVECTOR_USER:} - password: ${env.PGVECTOR_PASSWORD:} + host: ${env.PGVECTOR_HOST:=localhost} + port: ${env.PGVECTOR_PORT:=5432} + db: ${env.PGVECTOR_DB:+} + user: ${env.PGVECTOR_USER:+} + password: ${env.PGVECTOR_PASSWORD:+} files: - provider_id: meta-reference-files provider_type: inline::localfs config: - storage_dir: ${env.FILES_STORAGE_DIR:~/.llama/distributions/starter/files} + storage_dir: ${env.FILES_STORAGE_DIR:=~/.llama/distributions/starter/files} metadata_store: type: sqlite - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/starter}/files_metadata.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/files_metadata.db safety: - provider_id: llama-guard provider_type: inline::llama-guard @@ -104,17 +104,17 @@ providers: persistence_store: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/starter}/agents_store.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/agents_store.db responses_store: type: sqlite - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/starter}/responses_store.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/responses_store.db telemetry: - provider_id: meta-reference provider_type: inline::meta-reference config: - service_name: "${env.OTEL_SERVICE_NAME:\u200B}" - sinks: ${env.TELEMETRY_SINKS:console,sqlite} - sqlite_db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/starter}/trace_store.db + service_name: "${env.OTEL_SERVICE_NAME:=\u200B}" + sinks: ${env.TELEMETRY_SINKS:=console,sqlite} + sqlite_db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/trace_store.db eval: - provider_id: meta-reference provider_type: inline::meta-reference @@ -122,7 +122,7 @@ providers: kvstore: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/starter}/meta_reference_eval.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/meta_reference_eval.db datasetio: - provider_id: huggingface provider_type: remote::huggingface @@ -130,14 +130,14 @@ providers: kvstore: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/starter}/huggingface_datasetio.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/huggingface_datasetio.db - provider_id: localfs provider_type: inline::localfs config: kvstore: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/starter}/localfs_datasetio.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/localfs_datasetio.db scoring: - provider_id: basic provider_type: inline::basic @@ -148,17 +148,17 @@ providers: - provider_id: braintrust provider_type: inline::braintrust config: - openai_api_key: ${env.OPENAI_API_KEY:} + openai_api_key: ${env.OPENAI_API_KEY:+} tool_runtime: - provider_id: brave-search provider_type: remote::brave-search config: - api_key: ${env.BRAVE_SEARCH_API_KEY:} + api_key: ${env.BRAVE_SEARCH_API_KEY:+} max_results: 3 - provider_id: tavily-search provider_type: remote::tavily-search config: - api_key: ${env.TAVILY_SEARCH_API_KEY:} + api_key: ${env.TAVILY_SEARCH_API_KEY:+} max_results: 3 - provider_id: rag-runtime provider_type: inline::rag-runtime @@ -168,10 +168,10 @@ providers: config: {} metadata_store: type: sqlite - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/starter}/registry.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/registry.db inference_store: type: sqlite - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/starter}/inference_store.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/inference_store.db models: - metadata: {} model_id: openai/gpt-4o @@ -538,15 +538,15 @@ models: provider_model_id: meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8 model_type: llm - metadata: {} - model_id: ollama/${env.OLLAMA_INFERENCE_MODEL:__disabled__} + model_id: ollama/${env.OLLAMA_INFERENCE_MODEL:=__disabled__} provider_id: ollama - provider_model_id: ${env.OLLAMA_INFERENCE_MODEL:__disabled__} + provider_model_id: ${env.OLLAMA_INFERENCE_MODEL:=__disabled__} model_type: llm - metadata: - embedding_dimension: ${env.OLLAMA_EMBEDDING_DIMENSION:384} - model_id: ollama/${env.OLLAMA_EMBEDDING_MODEL:__disabled__} + embedding_dimension: ${env.OLLAMA_EMBEDDING_DIMENSION:=384} + model_id: ollama/${env.OLLAMA_EMBEDDING_MODEL:=__disabled__} provider_id: ollama - provider_model_id: ${env.OLLAMA_EMBEDDING_MODEL:__disabled__} + provider_model_id: ${env.OLLAMA_EMBEDDING_MODEL:=__disabled__} model_type: embedding - metadata: {} model_id: anthropic/claude-3-5-sonnet-latest @@ -802,9 +802,9 @@ models: provider_model_id: sambanova/Meta-Llama-Guard-3-8B model_type: llm - metadata: {} - model_id: vllm/${env.VLLM_INFERENCE_MODEL:__disabled__} + model_id: vllm/${env.VLLM_INFERENCE_MODEL:=__disabled__} provider_id: vllm - provider_model_id: ${env.VLLM_INFERENCE_MODEL:__disabled__} + provider_model_id: ${env.VLLM_INFERENCE_MODEL:=__disabled__} model_type: llm - metadata: embedding_dimension: 384 diff --git a/llama_stack/templates/starter/starter.py b/llama_stack/templates/starter/starter.py index 8e111e80a..df31fed84 100644 --- a/llama_stack/templates/starter/starter.py +++ b/llama_stack/templates/starter/starter.py @@ -69,67 +69,67 @@ def get_inference_providers() -> tuple[list[Provider], dict[str, list[ProviderMo ( "openai", OPENAI_MODEL_ENTRIES, - OpenAIConfig.sample_run_config(api_key="${env.OPENAI_API_KEY:}"), + OpenAIConfig.sample_run_config(api_key="${env.OPENAI_API_KEY:+}"), ), ( "fireworks", FIREWORKS_MODEL_ENTRIES, - FireworksImplConfig.sample_run_config(api_key="${env.FIREWORKS_API_KEY:}"), + FireworksImplConfig.sample_run_config(api_key="${env.FIREWORKS_API_KEY:+}"), ), ( "together", TOGETHER_MODEL_ENTRIES, - TogetherImplConfig.sample_run_config(api_key="${env.TOGETHER_API_KEY:}"), + TogetherImplConfig.sample_run_config(api_key="${env.TOGETHER_API_KEY:+}"), ), ( "ollama", [ ProviderModelEntry( - provider_model_id="${env.OLLAMA_INFERENCE_MODEL:__disabled__}", + provider_model_id="${env.OLLAMA_INFERENCE_MODEL:=__disabled__}", model_type=ModelType.llm, ), ProviderModelEntry( - provider_model_id="${env.OLLAMA_EMBEDDING_MODEL:__disabled__}", + provider_model_id="${env.OLLAMA_EMBEDDING_MODEL:=__disabled__}", model_type=ModelType.embedding, metadata={ - "embedding_dimension": "${env.OLLAMA_EMBEDDING_DIMENSION:384}", + "embedding_dimension": "${env.OLLAMA_EMBEDDING_DIMENSION:=384}", }, ), ], OllamaImplConfig.sample_run_config( - url="${env.OLLAMA_URL:http://localhost:11434}", raise_on_connect_error=False + url="${env.OLLAMA_URL:=http://localhost:11434}", raise_on_connect_error=False ), ), ( "anthropic", ANTHROPIC_MODEL_ENTRIES, - AnthropicConfig.sample_run_config(api_key="${env.ANTHROPIC_API_KEY:}"), + AnthropicConfig.sample_run_config(api_key="${env.ANTHROPIC_API_KEY:+}"), ), ( "gemini", GEMINI_MODEL_ENTRIES, - GeminiConfig.sample_run_config(api_key="${env.GEMINI_API_KEY:}"), + GeminiConfig.sample_run_config(api_key="${env.GEMINI_API_KEY:+}"), ), ( "groq", GROQ_MODEL_ENTRIES, - GroqConfig.sample_run_config(api_key="${env.GROQ_API_KEY:}"), + GroqConfig.sample_run_config(api_key="${env.GROQ_API_KEY:+}"), ), ( "sambanova", SAMBANOVA_MODEL_ENTRIES, - SambaNovaImplConfig.sample_run_config(api_key="${env.SAMBANOVA_API_KEY:}"), + SambaNovaImplConfig.sample_run_config(api_key="${env.SAMBANOVA_API_KEY:+}"), ), ( "vllm", [ ProviderModelEntry( - provider_model_id="${env.VLLM_INFERENCE_MODEL:__disabled__}", + provider_model_id="${env.VLLM_INFERENCE_MODEL:=__disabled__}", model_type=ModelType.llm, ), ], VLLMInferenceAdapterConfig.sample_run_config( - url="${env.VLLM_URL:http://localhost:8000/v1}", + url="${env.VLLM_URL:=http://localhost:8000/v1}", ), ), ] @@ -180,17 +180,17 @@ def get_distribution_template() -> DistributionTemplate: config=SQLiteVectorIOConfig.sample_run_config(f"~/.llama/distributions/{name}"), ), Provider( - provider_id="${env.ENABLE_CHROMADB+chromadb}", + provider_id="${env.ENABLE_CHROMADB:+chromadb}", provider_type="remote::chromadb", - config=ChromaVectorIOConfig.sample_run_config(url="${env.CHROMADB_URL:}"), + config=ChromaVectorIOConfig.sample_run_config(url="${env.CHROMADB_URL:+}"), ), Provider( - provider_id="${env.ENABLE_PGVECTOR+pgvector}", + provider_id="${env.ENABLE_PGVECTOR:+pgvector}", provider_type="remote::pgvector", config=PGVectorVectorIOConfig.sample_run_config( - db="${env.PGVECTOR_DB:}", - user="${env.PGVECTOR_USER:}", - password="${env.PGVECTOR_PASSWORD:}", + db="${env.PGVECTOR_DB:+}", + user="${env.PGVECTOR_USER:+}", + password="${env.PGVECTOR_PASSWORD:+}", ), ), ] diff --git a/llama_stack/templates/tgi/build.yaml b/llama_stack/templates/tgi/build.yaml index 361b0b680..3ac3968e8 100644 --- a/llama_stack/templates/tgi/build.yaml +++ b/llama_stack/templates/tgi/build.yaml @@ -1,4 +1,4 @@ -version: '2' +version: 2 distribution_spec: description: Use (an external) TGI server for running LLM inference providers: diff --git a/llama_stack/templates/tgi/run-with-safety.yaml b/llama_stack/templates/tgi/run-with-safety.yaml index 22b7bcde6..63da62a03 100644 --- a/llama_stack/templates/tgi/run-with-safety.yaml +++ b/llama_stack/templates/tgi/run-with-safety.yaml @@ -1,4 +1,4 @@ -version: '2' +version: 2 image_name: tgi apis: - agents @@ -27,7 +27,7 @@ providers: kvstore: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/tgi}/faiss_store.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/tgi}/faiss_store.db safety: - provider_id: llama-guard provider_type: inline::llama-guard @@ -40,17 +40,17 @@ providers: persistence_store: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/tgi}/agents_store.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/tgi}/agents_store.db responses_store: type: sqlite - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/tgi}/responses_store.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/tgi}/responses_store.db telemetry: - provider_id: meta-reference provider_type: inline::meta-reference config: - service_name: "${env.OTEL_SERVICE_NAME:\u200B}" - sinks: ${env.TELEMETRY_SINKS:console,sqlite} - sqlite_db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/tgi}/trace_store.db + service_name: "${env.OTEL_SERVICE_NAME:=\u200B}" + sinks: ${env.TELEMETRY_SINKS:=console,sqlite} + sqlite_db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/tgi}/trace_store.db eval: - provider_id: meta-reference provider_type: inline::meta-reference @@ -58,7 +58,7 @@ providers: kvstore: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/tgi}/meta_reference_eval.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/tgi}/meta_reference_eval.db datasetio: - provider_id: huggingface provider_type: remote::huggingface @@ -66,14 +66,14 @@ providers: kvstore: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/tgi}/huggingface_datasetio.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/tgi}/huggingface_datasetio.db - provider_id: localfs provider_type: inline::localfs config: kvstore: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/tgi}/localfs_datasetio.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/tgi}/localfs_datasetio.db scoring: - provider_id: basic provider_type: inline::basic @@ -84,17 +84,17 @@ providers: - provider_id: braintrust provider_type: inline::braintrust config: - openai_api_key: ${env.OPENAI_API_KEY:} + openai_api_key: ${env.OPENAI_API_KEY:+} tool_runtime: - provider_id: brave-search provider_type: remote::brave-search config: - api_key: ${env.BRAVE_SEARCH_API_KEY:} + api_key: ${env.BRAVE_SEARCH_API_KEY:+} max_results: 3 - provider_id: tavily-search provider_type: remote::tavily-search config: - api_key: ${env.TAVILY_SEARCH_API_KEY:} + api_key: ${env.TAVILY_SEARCH_API_KEY:+} max_results: 3 - provider_id: rag-runtime provider_type: inline::rag-runtime @@ -104,10 +104,10 @@ providers: config: {} metadata_store: type: sqlite - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/tgi}/registry.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/tgi}/registry.db inference_store: type: sqlite - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/tgi}/inference_store.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/tgi}/inference_store.db models: - metadata: {} model_id: ${env.INFERENCE_MODEL} diff --git a/llama_stack/templates/tgi/run.yaml b/llama_stack/templates/tgi/run.yaml index dd012323c..430494121 100644 --- a/llama_stack/templates/tgi/run.yaml +++ b/llama_stack/templates/tgi/run.yaml @@ -1,4 +1,4 @@ -version: '2' +version: 2 image_name: tgi apis: - agents @@ -26,7 +26,7 @@ providers: kvstore: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/tgi}/faiss_store.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/tgi}/faiss_store.db safety: - provider_id: llama-guard provider_type: inline::llama-guard @@ -39,17 +39,17 @@ providers: persistence_store: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/tgi}/agents_store.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/tgi}/agents_store.db responses_store: type: sqlite - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/tgi}/responses_store.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/tgi}/responses_store.db telemetry: - provider_id: meta-reference provider_type: inline::meta-reference config: - service_name: "${env.OTEL_SERVICE_NAME:\u200B}" - sinks: ${env.TELEMETRY_SINKS:console,sqlite} - sqlite_db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/tgi}/trace_store.db + service_name: "${env.OTEL_SERVICE_NAME:=\u200B}" + sinks: ${env.TELEMETRY_SINKS:=console,sqlite} + sqlite_db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/tgi}/trace_store.db eval: - provider_id: meta-reference provider_type: inline::meta-reference @@ -57,7 +57,7 @@ providers: kvstore: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/tgi}/meta_reference_eval.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/tgi}/meta_reference_eval.db datasetio: - provider_id: huggingface provider_type: remote::huggingface @@ -65,14 +65,14 @@ providers: kvstore: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/tgi}/huggingface_datasetio.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/tgi}/huggingface_datasetio.db - provider_id: localfs provider_type: inline::localfs config: kvstore: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/tgi}/localfs_datasetio.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/tgi}/localfs_datasetio.db scoring: - provider_id: basic provider_type: inline::basic @@ -83,17 +83,17 @@ providers: - provider_id: braintrust provider_type: inline::braintrust config: - openai_api_key: ${env.OPENAI_API_KEY:} + openai_api_key: ${env.OPENAI_API_KEY:+} tool_runtime: - provider_id: brave-search provider_type: remote::brave-search config: - api_key: ${env.BRAVE_SEARCH_API_KEY:} + api_key: ${env.BRAVE_SEARCH_API_KEY:+} max_results: 3 - provider_id: tavily-search provider_type: remote::tavily-search config: - api_key: ${env.TAVILY_SEARCH_API_KEY:} + api_key: ${env.TAVILY_SEARCH_API_KEY:+} max_results: 3 - provider_id: rag-runtime provider_type: inline::rag-runtime @@ -103,10 +103,10 @@ providers: config: {} metadata_store: type: sqlite - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/tgi}/registry.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/tgi}/registry.db inference_store: type: sqlite - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/tgi}/inference_store.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/tgi}/inference_store.db models: - metadata: {} model_id: ${env.INFERENCE_MODEL} diff --git a/llama_stack/templates/together/build.yaml b/llama_stack/templates/together/build.yaml index 5ffeac873..518a843da 100644 --- a/llama_stack/templates/together/build.yaml +++ b/llama_stack/templates/together/build.yaml @@ -1,4 +1,4 @@ -version: '2' +version: 2 distribution_spec: description: Use Together.AI for running LLM inference providers: diff --git a/llama_stack/templates/together/run-with-safety.yaml b/llama_stack/templates/together/run-with-safety.yaml index a24843416..7ae2a1d1a 100644 --- a/llama_stack/templates/together/run-with-safety.yaml +++ b/llama_stack/templates/together/run-with-safety.yaml @@ -1,4 +1,4 @@ -version: '2' +version: 2 image_name: together apis: - agents @@ -16,7 +16,7 @@ providers: provider_type: remote::together config: url: https://api.together.xyz/v1 - api_key: ${env.TOGETHER_API_KEY:} + api_key: ${env.TOGETHER_API_KEY:+} - provider_id: sentence-transformers provider_type: inline::sentence-transformers config: {} @@ -27,7 +27,7 @@ providers: kvstore: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/together}/faiss_store.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/together}/faiss_store.db safety: - provider_id: llama-guard provider_type: inline::llama-guard @@ -45,17 +45,17 @@ providers: persistence_store: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/together}/agents_store.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/together}/agents_store.db responses_store: type: sqlite - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/together}/responses_store.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/together}/responses_store.db telemetry: - provider_id: meta-reference provider_type: inline::meta-reference config: - service_name: "${env.OTEL_SERVICE_NAME:\u200B}" - sinks: ${env.TELEMETRY_SINKS:console,sqlite} - sqlite_db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/together}/trace_store.db + service_name: "${env.OTEL_SERVICE_NAME:=\u200B}" + sinks: ${env.TELEMETRY_SINKS:=console,sqlite} + sqlite_db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/together}/trace_store.db eval: - provider_id: meta-reference provider_type: inline::meta-reference @@ -63,7 +63,7 @@ providers: kvstore: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/together}/meta_reference_eval.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/together}/meta_reference_eval.db datasetio: - provider_id: huggingface provider_type: remote::huggingface @@ -71,14 +71,14 @@ providers: kvstore: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/together}/huggingface_datasetio.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/together}/huggingface_datasetio.db - provider_id: localfs provider_type: inline::localfs config: kvstore: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/together}/localfs_datasetio.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/together}/localfs_datasetio.db scoring: - provider_id: basic provider_type: inline::basic @@ -89,17 +89,17 @@ providers: - provider_id: braintrust provider_type: inline::braintrust config: - openai_api_key: ${env.OPENAI_API_KEY:} + openai_api_key: ${env.OPENAI_API_KEY:+} tool_runtime: - provider_id: brave-search provider_type: remote::brave-search config: - api_key: ${env.BRAVE_SEARCH_API_KEY:} + api_key: ${env.BRAVE_SEARCH_API_KEY:+} max_results: 3 - provider_id: tavily-search provider_type: remote::tavily-search config: - api_key: ${env.TAVILY_SEARCH_API_KEY:} + api_key: ${env.TAVILY_SEARCH_API_KEY:+} max_results: 3 - provider_id: rag-runtime provider_type: inline::rag-runtime @@ -110,13 +110,13 @@ providers: - provider_id: wolfram-alpha provider_type: remote::wolfram-alpha config: - api_key: ${env.WOLFRAM_ALPHA_API_KEY:} + api_key: ${env.WOLFRAM_ALPHA_API_KEY:+} metadata_store: type: sqlite - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/together}/registry.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/together}/registry.db inference_store: type: sqlite - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/together}/inference_store.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/together}/inference_store.db models: - metadata: {} model_id: meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo diff --git a/llama_stack/templates/together/run.yaml b/llama_stack/templates/together/run.yaml index c71f960bd..dc09aeac9 100644 --- a/llama_stack/templates/together/run.yaml +++ b/llama_stack/templates/together/run.yaml @@ -1,4 +1,4 @@ -version: '2' +version: 2 image_name: together apis: - agents @@ -16,7 +16,7 @@ providers: provider_type: remote::together config: url: https://api.together.xyz/v1 - api_key: ${env.TOGETHER_API_KEY:} + api_key: ${env.TOGETHER_API_KEY:+} - provider_id: sentence-transformers provider_type: inline::sentence-transformers config: {} @@ -27,7 +27,7 @@ providers: kvstore: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/together}/faiss_store.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/together}/faiss_store.db safety: - provider_id: llama-guard provider_type: inline::llama-guard @@ -40,17 +40,17 @@ providers: persistence_store: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/together}/agents_store.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/together}/agents_store.db responses_store: type: sqlite - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/together}/responses_store.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/together}/responses_store.db telemetry: - provider_id: meta-reference provider_type: inline::meta-reference config: - service_name: "${env.OTEL_SERVICE_NAME:\u200B}" - sinks: ${env.TELEMETRY_SINKS:console,sqlite} - sqlite_db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/together}/trace_store.db + service_name: "${env.OTEL_SERVICE_NAME:=\u200B}" + sinks: ${env.TELEMETRY_SINKS:=console,sqlite} + sqlite_db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/together}/trace_store.db eval: - provider_id: meta-reference provider_type: inline::meta-reference @@ -58,7 +58,7 @@ providers: kvstore: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/together}/meta_reference_eval.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/together}/meta_reference_eval.db datasetio: - provider_id: huggingface provider_type: remote::huggingface @@ -66,14 +66,14 @@ providers: kvstore: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/together}/huggingface_datasetio.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/together}/huggingface_datasetio.db - provider_id: localfs provider_type: inline::localfs config: kvstore: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/together}/localfs_datasetio.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/together}/localfs_datasetio.db scoring: - provider_id: basic provider_type: inline::basic @@ -84,17 +84,17 @@ providers: - provider_id: braintrust provider_type: inline::braintrust config: - openai_api_key: ${env.OPENAI_API_KEY:} + openai_api_key: ${env.OPENAI_API_KEY:+} tool_runtime: - provider_id: brave-search provider_type: remote::brave-search config: - api_key: ${env.BRAVE_SEARCH_API_KEY:} + api_key: ${env.BRAVE_SEARCH_API_KEY:+} max_results: 3 - provider_id: tavily-search provider_type: remote::tavily-search config: - api_key: ${env.TAVILY_SEARCH_API_KEY:} + api_key: ${env.TAVILY_SEARCH_API_KEY:+} max_results: 3 - provider_id: rag-runtime provider_type: inline::rag-runtime @@ -105,13 +105,13 @@ providers: - provider_id: wolfram-alpha provider_type: remote::wolfram-alpha config: - api_key: ${env.WOLFRAM_ALPHA_API_KEY:} + api_key: ${env.WOLFRAM_ALPHA_API_KEY:+} metadata_store: type: sqlite - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/together}/registry.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/together}/registry.db inference_store: type: sqlite - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/together}/inference_store.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/together}/inference_store.db models: - metadata: {} model_id: meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo diff --git a/llama_stack/templates/vllm-gpu/build.yaml b/llama_stack/templates/vllm-gpu/build.yaml index d5ff0f1f4..147dca50d 100644 --- a/llama_stack/templates/vllm-gpu/build.yaml +++ b/llama_stack/templates/vllm-gpu/build.yaml @@ -1,4 +1,4 @@ -version: '2' +version: 2 distribution_spec: description: Use a built-in vLLM engine for running LLM inference providers: diff --git a/llama_stack/templates/vllm-gpu/run.yaml b/llama_stack/templates/vllm-gpu/run.yaml index 6878c22b2..104b3a239 100644 --- a/llama_stack/templates/vllm-gpu/run.yaml +++ b/llama_stack/templates/vllm-gpu/run.yaml @@ -1,4 +1,4 @@ -version: '2' +version: 2 image_name: vllm-gpu apis: - agents @@ -15,12 +15,12 @@ providers: - provider_id: vllm provider_type: inline::vllm config: - tensor_parallel_size: ${env.TENSOR_PARALLEL_SIZE:1} - max_tokens: ${env.MAX_TOKENS:4096} - max_model_len: ${env.MAX_MODEL_LEN:4096} - max_num_seqs: ${env.MAX_NUM_SEQS:4} - enforce_eager: ${env.ENFORCE_EAGER:False} - gpu_memory_utilization: ${env.GPU_MEMORY_UTILIZATION:0.3} + tensor_parallel_size: ${env.TENSOR_PARALLEL_SIZE:=1} + max_tokens: ${env.MAX_TOKENS:=4096} + max_model_len: ${env.MAX_MODEL_LEN:=4096} + max_num_seqs: ${env.MAX_NUM_SEQS:=4} + enforce_eager: ${env.ENFORCE_EAGER:=False} + gpu_memory_utilization: ${env.GPU_MEMORY_UTILIZATION:=0.3} - provider_id: sentence-transformers provider_type: inline::sentence-transformers config: {} @@ -31,7 +31,7 @@ providers: kvstore: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/vllm-gpu}/faiss_store.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/vllm-gpu}/faiss_store.db safety: - provider_id: llama-guard provider_type: inline::llama-guard @@ -44,17 +44,17 @@ providers: persistence_store: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/vllm-gpu}/agents_store.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/vllm-gpu}/agents_store.db responses_store: type: sqlite - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/vllm-gpu}/responses_store.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/vllm-gpu}/responses_store.db telemetry: - provider_id: meta-reference provider_type: inline::meta-reference config: - service_name: "${env.OTEL_SERVICE_NAME:\u200B}" - sinks: ${env.TELEMETRY_SINKS:console,sqlite} - sqlite_db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/vllm-gpu}/trace_store.db + service_name: "${env.OTEL_SERVICE_NAME:=\u200B}" + sinks: ${env.TELEMETRY_SINKS:=console,sqlite} + sqlite_db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/vllm-gpu}/trace_store.db eval: - provider_id: meta-reference provider_type: inline::meta-reference @@ -62,7 +62,7 @@ providers: kvstore: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/vllm-gpu}/meta_reference_eval.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/vllm-gpu}/meta_reference_eval.db datasetio: - provider_id: huggingface provider_type: remote::huggingface @@ -70,14 +70,14 @@ providers: kvstore: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/vllm-gpu}/huggingface_datasetio.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/vllm-gpu}/huggingface_datasetio.db - provider_id: localfs provider_type: inline::localfs config: kvstore: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/vllm-gpu}/localfs_datasetio.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/vllm-gpu}/localfs_datasetio.db scoring: - provider_id: basic provider_type: inline::basic @@ -88,17 +88,17 @@ providers: - provider_id: braintrust provider_type: inline::braintrust config: - openai_api_key: ${env.OPENAI_API_KEY:} + openai_api_key: ${env.OPENAI_API_KEY:+} tool_runtime: - provider_id: brave-search provider_type: remote::brave-search config: - api_key: ${env.BRAVE_SEARCH_API_KEY:} + api_key: ${env.BRAVE_SEARCH_API_KEY:+} max_results: 3 - provider_id: tavily-search provider_type: remote::tavily-search config: - api_key: ${env.TAVILY_SEARCH_API_KEY:} + api_key: ${env.TAVILY_SEARCH_API_KEY:+} max_results: 3 - provider_id: rag-runtime provider_type: inline::rag-runtime @@ -108,10 +108,10 @@ providers: config: {} metadata_store: type: sqlite - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/vllm-gpu}/registry.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/vllm-gpu}/registry.db inference_store: type: sqlite - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/vllm-gpu}/inference_store.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/vllm-gpu}/inference_store.db models: - metadata: {} model_id: ${env.INFERENCE_MODEL} diff --git a/llama_stack/templates/watsonx/build.yaml b/llama_stack/templates/watsonx/build.yaml index e68ace183..08ee2c5ce 100644 --- a/llama_stack/templates/watsonx/build.yaml +++ b/llama_stack/templates/watsonx/build.yaml @@ -1,4 +1,4 @@ -version: '2' +version: 2 distribution_spec: description: Use watsonx for running LLM inference providers: diff --git a/llama_stack/templates/watsonx/run.yaml b/llama_stack/templates/watsonx/run.yaml index d60a87906..678bf72ff 100644 --- a/llama_stack/templates/watsonx/run.yaml +++ b/llama_stack/templates/watsonx/run.yaml @@ -1,4 +1,4 @@ -version: '2' +version: 2 image_name: watsonx apis: - agents @@ -15,9 +15,9 @@ providers: - provider_id: watsonx provider_type: remote::watsonx config: - url: ${env.WATSONX_BASE_URL:https://us-south.ml.cloud.ibm.com} - api_key: ${env.WATSONX_API_KEY:} - project_id: ${env.WATSONX_PROJECT_ID:} + url: ${env.WATSONX_BASE_URL:=https://us-south.ml.cloud.ibm.com} + api_key: ${env.WATSONX_API_KEY:+} + project_id: ${env.WATSONX_PROJECT_ID:+} - provider_id: sentence-transformers provider_type: inline::sentence-transformers config: {} @@ -28,7 +28,7 @@ providers: kvstore: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/watsonx}/faiss_store.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/watsonx}/faiss_store.db safety: - provider_id: llama-guard provider_type: inline::llama-guard @@ -41,17 +41,17 @@ providers: persistence_store: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/watsonx}/agents_store.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/watsonx}/agents_store.db responses_store: type: sqlite - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/watsonx}/responses_store.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/watsonx}/responses_store.db telemetry: - provider_id: meta-reference provider_type: inline::meta-reference config: - service_name: "${env.OTEL_SERVICE_NAME:\u200B}" - sinks: ${env.TELEMETRY_SINKS:console,sqlite} - sqlite_db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/watsonx}/trace_store.db + service_name: "${env.OTEL_SERVICE_NAME:=\u200B}" + sinks: ${env.TELEMETRY_SINKS:=console,sqlite} + sqlite_db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/watsonx}/trace_store.db eval: - provider_id: meta-reference provider_type: inline::meta-reference @@ -59,7 +59,7 @@ providers: kvstore: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/watsonx}/meta_reference_eval.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/watsonx}/meta_reference_eval.db datasetio: - provider_id: huggingface provider_type: remote::huggingface @@ -67,14 +67,14 @@ providers: kvstore: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/watsonx}/huggingface_datasetio.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/watsonx}/huggingface_datasetio.db - provider_id: localfs provider_type: inline::localfs config: kvstore: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/watsonx}/localfs_datasetio.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/watsonx}/localfs_datasetio.db scoring: - provider_id: basic provider_type: inline::basic @@ -85,17 +85,17 @@ providers: - provider_id: braintrust provider_type: inline::braintrust config: - openai_api_key: ${env.OPENAI_API_KEY:} + openai_api_key: ${env.OPENAI_API_KEY:+} tool_runtime: - provider_id: brave-search provider_type: remote::brave-search config: - api_key: ${env.BRAVE_SEARCH_API_KEY:} + api_key: ${env.BRAVE_SEARCH_API_KEY:+} max_results: 3 - provider_id: tavily-search provider_type: remote::tavily-search config: - api_key: ${env.TAVILY_SEARCH_API_KEY:} + api_key: ${env.TAVILY_SEARCH_API_KEY:+} max_results: 3 - provider_id: rag-runtime provider_type: inline::rag-runtime @@ -105,10 +105,10 @@ providers: config: {} metadata_store: type: sqlite - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/watsonx}/registry.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/watsonx}/registry.db inference_store: type: sqlite - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/watsonx}/inference_store.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/watsonx}/inference_store.db models: - metadata: {} model_id: meta-llama/llama-3-3-70b-instruct diff --git a/tests/external-provider/llama-stack-provider-ollama/run.yaml b/tests/external-provider/llama-stack-provider-ollama/run.yaml index 158f6800f..60cff7503 100644 --- a/tests/external-provider/llama-stack-provider-ollama/run.yaml +++ b/tests/external-provider/llama-stack-provider-ollama/run.yaml @@ -1,71 +1,101 @@ -version: '2' +version: 2 image_name: ollama apis: +- agents +- datasetio +- eval - inference +- safety +- scoring - telemetry - tool_runtime -- datasetio - vector_io + providers: inference: - - provider_id: custom_ollama - provider_type: remote::custom_ollama + - provider_id: ollama + provider_type: remote::ollama config: - url: ${env.OLLAMA_URL:http://localhost:11434} + url: ${env.OLLAMA_URL:=http://localhost:11434} vector_io: - provider_id: faiss provider_type: inline::faiss config: - kvstore: + metadata_store: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ollama}/faiss_store.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ollama}/faiss_store.db + safety: + - provider_id: llama-guard + provider_type: inline::llama-guard + config: {} + agents: + - provider_id: meta-reference + provider_type: inline::meta-reference + config: + agents_store: + type: sqlite + namespace: null + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ollama}/agents_store.db + responses_store: + type: sqlite + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ollama}/responses_store.db telemetry: - provider_id: meta-reference provider_type: inline::meta-reference config: - service_name: "${env.OTEL_SERVICE_NAME:\u200B}" - sinks: ${env.TELEMETRY_SINKS:console,sqlite} - sqlite_db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ollama}/trace_store.db + service_name: "${env.OTEL_SERVICE_NAME:=\u200b}" + sinks: ${env.TELEMETRY_SINKS:=console,sqlite} + sqlite_db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ollama}/trace_store.db + eval: + - provider_id: meta-reference + provider_type: inline::meta-reference + config: + metadata_store: + type: sqlite + namespace: null + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ollama}/huggingface_datasetio.db datasetio: - provider_id: huggingface provider_type: remote::huggingface config: - kvstore: + metadata_store: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ollama}/huggingface_datasetio.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ollama}/huggingface_datasetio.db - provider_id: localfs provider_type: inline::localfs config: - kvstore: + metadata_store: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ollama}/localfs_datasetio.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ollama}/localfs_datasetio.db + scoring: + - provider_id: basic + provider_type: inline::basic + config: {} tool_runtime: - provider_id: brave-search provider_type: remote::brave-search config: - api_key: ${env.BRAVE_SEARCH_API_KEY:} + api_key: ${env.BRAVE_SEARCH_API_KEY:+} max_results: 3 - provider_id: tavily-search provider_type: remote::tavily-search config: - api_key: ${env.TAVILY_SEARCH_API_KEY:} + api_key: ${env.TAVILY_SEARCH_API_KEY:+} max_results: 3 - provider_id: rag-runtime provider_type: inline::rag-runtime config: {} - - provider_id: model-context-protocol - provider_type: remote::model-context-protocol - config: {} - provider_id: wolfram-alpha provider_type: remote::wolfram-alpha config: - api_key: ${env.WOLFRAM_ALPHA_API_KEY:} + api_key: ${env.WOLFRAM_ALPHA_API_KEY:+} + metadata_store: type: sqlite - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ollama}/registry.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ollama}/registry.db models: - metadata: {} model_id: ${env.INFERENCE_MODEL} diff --git a/tests/unit/server/test_replace_env_vars.py b/tests/unit/server/test_replace_env_vars.py index 7fcbbfde9..0fb7c395e 100644 --- a/tests/unit/server/test_replace_env_vars.py +++ b/tests/unit/server/test_replace_env_vars.py @@ -26,39 +26,44 @@ class TestReplaceEnvVars(unittest.TestCase): self.assertEqual(replace_env_vars("${env.TEST_VAR}"), "test_value") def test_default_value_when_not_set(self): - self.assertEqual(replace_env_vars("${env.NOT_SET:default}"), "default") + self.assertEqual(replace_env_vars("${env.NOT_SET:=default}"), "default") def test_default_value_when_set(self): - self.assertEqual(replace_env_vars("${env.TEST_VAR:default}"), "test_value") + self.assertEqual(replace_env_vars("${env.TEST_VAR:=default}"), "test_value") def test_default_value_when_empty(self): - self.assertEqual(replace_env_vars("${env.EMPTY_VAR:default}"), "default") + self.assertEqual(replace_env_vars("${env.EMPTY_VAR:=default}"), "default") + + def test_empty_var_no_default(self): + self.assertEqual(replace_env_vars("${env.EMPTY_VAR_NO_DEFAULT:+}"), None) def test_conditional_value_when_set(self): - self.assertEqual(replace_env_vars("${env.TEST_VAR+conditional}"), "conditional") + self.assertEqual(replace_env_vars("${env.TEST_VAR:+conditional}"), "conditional") def test_conditional_value_when_not_set(self): - self.assertEqual(replace_env_vars("${env.NOT_SET+conditional}"), "") + self.assertEqual(replace_env_vars("${env.NOT_SET:+conditional}"), None) def test_conditional_value_when_empty(self): - self.assertEqual(replace_env_vars("${env.EMPTY_VAR+conditional}"), "") + self.assertEqual(replace_env_vars("${env.EMPTY_VAR:+conditional}"), None) def test_conditional_value_with_zero(self): - self.assertEqual(replace_env_vars("${env.ZERO_VAR+conditional}"), "conditional") + self.assertEqual(replace_env_vars("${env.ZERO_VAR:+conditional}"), "conditional") def test_mixed_syntax(self): - self.assertEqual(replace_env_vars("${env.TEST_VAR:default} and ${env.NOT_SET+conditional}"), "test_value and ") self.assertEqual( - replace_env_vars("${env.NOT_SET:default} and ${env.TEST_VAR+conditional}"), "default and conditional" + replace_env_vars("${env.TEST_VAR:=default} and ${env.NOT_SET:+conditional}"), "test_value and " + ) + self.assertEqual( + replace_env_vars("${env.NOT_SET:=default} and ${env.TEST_VAR:+conditional}"), "default and conditional" ) def test_nested_structures(self): data = { - "key1": "${env.TEST_VAR:default}", - "key2": ["${env.NOT_SET:default}", "${env.TEST_VAR+conditional}"], - "key3": {"nested": "${env.NOT_SET+conditional}"}, + "key1": "${env.TEST_VAR:=default}", + "key2": ["${env.NOT_SET:=default}", "${env.TEST_VAR:+conditional}"], + "key3": {"nested": "${env.NOT_SET:+conditional}"}, } - expected = {"key1": "test_value", "key2": ["default", "conditional"], "key3": {"nested": ""}} + expected = {"key1": "test_value", "key2": ["default", "conditional"], "key3": {"nested": None}} self.assertEqual(replace_env_vars(data), expected) From dbdc811d1684cfac8056dacbc09784b0803eef40 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?S=C3=A9bastien=20Han?= Date: Thu, 26 Jun 2025 10:14:27 +0200 Subject: [PATCH 8/8] chore: isolate bare minimum project dependencies (#2282) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit # What does this PR do? The goal is to promote the minimal set of dependencies the project needs to run, this includes: * dependencies needed to work with the CLI * dependencies needed for the server to run with no providers This also: * Relocate redundant dependencies out of the core project and into the individual providers that actually require them. * Include all necessary server dependencies so the project can run standalone, even without any providers. ## Test Plan Build and run distro a server. Signed-off-by: Sébastien Han --- .github/workflows/providers-build.yml | 3 ++ llama_stack/providers/registry/inference.py | 2 +- llama_stack/providers/registry/safety.py | 2 +- llama_stack/providers/registry/scoring.py | 2 +- pyproject.toml | 19 +++---- requirements.txt | 53 +++++++++++++++++-- uv.lock | 58 ++++++++++++++++----- 7 files changed, 111 insertions(+), 28 deletions(-) diff --git a/.github/workflows/providers-build.yml b/.github/workflows/providers-build.yml index 8268a0085..6de72cd60 100644 --- a/.github/workflows/providers-build.yml +++ b/.github/workflows/providers-build.yml @@ -11,6 +11,8 @@ on: - 'llama_stack/distribution/*.sh' - '.github/workflows/providers-build.yml' - 'llama_stack/templates/**' + - 'pyproject.toml' + pull_request: paths: - 'llama_stack/cli/stack/build.py' @@ -19,6 +21,7 @@ on: - 'llama_stack/distribution/*.sh' - '.github/workflows/providers-build.yml' - 'llama_stack/templates/**' + - 'pyproject.toml' concurrency: group: ${{ github.workflow }}-${{ github.ref }} diff --git a/llama_stack/providers/registry/inference.py b/llama_stack/providers/registry/inference.py index 66f2e8bce..47be57eee 100644 --- a/llama_stack/providers/registry/inference.py +++ b/llama_stack/providers/registry/inference.py @@ -70,7 +70,7 @@ def available_providers() -> list[ProviderSpec]: api=Api.inference, adapter=AdapterSpec( adapter_type="ollama", - pip_packages=["ollama", "aiohttp"], + pip_packages=["ollama", "aiohttp", "h11>=0.16.0"], config_class="llama_stack.providers.remote.inference.ollama.OllamaImplConfig", module="llama_stack.providers.remote.inference.ollama", ), diff --git a/llama_stack/providers/registry/safety.py b/llama_stack/providers/registry/safety.py index e0a04be48..f0fe1e9f5 100644 --- a/llama_stack/providers/registry/safety.py +++ b/llama_stack/providers/registry/safety.py @@ -67,7 +67,7 @@ def available_providers() -> list[ProviderSpec]: api=Api.safety, adapter=AdapterSpec( adapter_type="sambanova", - pip_packages=["litellm"], + pip_packages=["litellm", "requests"], module="llama_stack.providers.remote.safety.sambanova", config_class="llama_stack.providers.remote.safety.sambanova.SambaNovaSafetyConfig", provider_data_validator="llama_stack.providers.remote.safety.sambanova.config.SambaNovaProviderDataValidator", diff --git a/llama_stack/providers/registry/scoring.py b/llama_stack/providers/registry/scoring.py index 7980d6a13..244b06842 100644 --- a/llama_stack/providers/registry/scoring.py +++ b/llama_stack/providers/registry/scoring.py @@ -13,7 +13,7 @@ def available_providers() -> list[ProviderSpec]: InlineProviderSpec( api=Api.scoring, provider_type="inline::basic", - pip_packages=[], + pip_packages=["requests"], module="llama_stack.providers.inline.scoring.basic", config_class="llama_stack.providers.inline.scoring.basic.BasicScoringConfig", api_dependencies=[ diff --git a/pyproject.toml b/pyproject.toml index 97624fade..99be1a80a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -22,8 +22,8 @@ classifiers = [ ] dependencies = [ "aiohttp", - "fastapi>=0.115.0,<1.0", - "fire", + "fastapi>=0.115.0,<1.0", # server + "fire", # for MCP in LLS client "httpx", "huggingface-hub>=0.30.0,<1.0", "jinja2>=3.1.6", @@ -34,15 +34,18 @@ dependencies = [ "python-dotenv", "python-jose", "pydantic>=2", - "requests", "rich", - "setuptools", "starlette", "termcolor", "tiktoken", "pillow", "h11>=0.16.0", - "python-multipart>=0.0.20", + "python-multipart>=0.0.20", # For fastapi Form + "uvicorn>=0.34.0", # server + "opentelemetry-sdk", # server + "opentelemetry-exporter-otlp-proto-http", # server + "aiosqlite>=0.21.0", # server - for metadata store + "asyncpg", # for metadata store ] [project.optional-dependencies] @@ -67,7 +70,6 @@ dev = [ "types-requests", "types-setuptools", "pre-commit", - "uvicorn", "ruamel.yaml", # needed for openapi generator ] # These are the dependencies required for running unit tests. @@ -80,7 +82,6 @@ unit = [ "mcp", "chardet", "qdrant-client", - "opentelemetry-exporter-otlp-proto-http", "sqlalchemy", "sqlalchemy[asyncio]>=2.0.41", "blobfile", @@ -96,8 +97,6 @@ test = [ "aiohttp", "torch>=2.6.0", "torchvision>=0.21.0", - "opentelemetry-sdk", - "opentelemetry-exporter-otlp-proto-http", "chardet", "pypdf", "mcp", @@ -106,6 +105,7 @@ test = [ "transformers", "sqlalchemy", "sqlalchemy[asyncio]>=2.0.41", + "requests", ] docs = [ "sphinx-autobuild", @@ -122,6 +122,7 @@ docs = [ "tomli", "linkify", "sphinxcontrib.openapi", + "requests", ] codegen = ["rich", "pydantic", "jinja2>=3.1.6"] diff --git a/requirements.txt b/requirements.txt index 7e7aa38ce..2e016ef72 100644 --- a/requirements.txt +++ b/requirements.txt @@ -6,6 +6,8 @@ aiohttp==3.11.13 # via llama-stack aiosignal==1.3.2 # via aiohttp +aiosqlite==0.21.0 + # via llama-stack annotated-types==0.7.0 # via pydantic anyio==4.8.0 @@ -14,6 +16,8 @@ anyio==4.8.0 # llama-stack-client # openai # starlette +asyncpg==0.30.0 + # via llama-stack attrs==25.1.0 # via # aiohttp @@ -27,11 +31,18 @@ certifi==2025.1.31 charset-normalizer==3.4.1 # via requests click==8.1.8 - # via llama-stack-client + # via + # llama-stack-client + # uvicorn colorama==0.4.6 ; sys_platform == 'win32' # via # click # tqdm +deprecated==1.2.18 + # via + # opentelemetry-api + # opentelemetry-exporter-otlp-proto-http + # opentelemetry-semantic-conventions distro==1.9.0 # via # llama-stack-client @@ -50,10 +61,13 @@ frozenlist==1.5.0 # aiosignal fsspec==2024.12.0 # via huggingface-hub +googleapis-common-protos==1.67.0 + # via opentelemetry-exporter-otlp-proto-http h11==0.16.0 # via # httpcore # llama-stack + # uvicorn hf-xet==1.1.5 ; platform_machine == 'aarch64' or platform_machine == 'amd64' or platform_machine == 'arm64' or platform_machine == 'x86_64' # via huggingface-hub httpcore==1.0.9 @@ -71,6 +85,8 @@ idna==3.10 # httpx # requests # yarl +importlib-metadata==8.5.0 + # via opentelemetry-api jinja2==3.1.6 # via llama-stack jiter==0.8.2 @@ -95,6 +111,25 @@ numpy==2.2.3 # via pandas openai==1.71.0 # via llama-stack +opentelemetry-api==1.30.0 + # via + # opentelemetry-exporter-otlp-proto-http + # opentelemetry-sdk + # opentelemetry-semantic-conventions +opentelemetry-exporter-otlp-proto-common==1.30.0 + # via opentelemetry-exporter-otlp-proto-http +opentelemetry-exporter-otlp-proto-http==1.30.0 + # via llama-stack +opentelemetry-proto==1.30.0 + # via + # opentelemetry-exporter-otlp-proto-common + # opentelemetry-exporter-otlp-proto-http +opentelemetry-sdk==1.30.0 + # via + # llama-stack + # opentelemetry-exporter-otlp-proto-http +opentelemetry-semantic-conventions==0.51b0 + # via opentelemetry-sdk packaging==24.2 # via huggingface-hub pandas==2.2.3 @@ -109,6 +144,10 @@ propcache==0.3.0 # via # aiohttp # yarl +protobuf==5.29.3 + # via + # googleapis-common-protos + # opentelemetry-proto pyaml==25.1.0 # via llama-stack-client pyasn1==0.4.8 @@ -148,7 +187,7 @@ regex==2024.11.6 requests==2.32.4 # via # huggingface-hub - # llama-stack + # opentelemetry-exporter-otlp-proto-http # tiktoken rich==13.9.4 # via @@ -160,8 +199,6 @@ rpds-py==0.22.3 # referencing rsa==4.9 # via python-jose -setuptools==80.8.0 - # via llama-stack six==1.17.0 # via # ecdsa @@ -189,11 +226,13 @@ tqdm==4.67.1 # openai typing-extensions==4.12.2 # via + # aiosqlite # anyio # fastapi # huggingface-hub # llama-stack-client # openai + # opentelemetry-sdk # pydantic # pydantic-core # referencing @@ -201,7 +240,13 @@ tzdata==2025.1 # via pandas urllib3==2.3.0 # via requests +uvicorn==0.34.0 + # via llama-stack wcwidth==0.2.13 # via prompt-toolkit +wrapt==1.17.2 + # via deprecated yarl==1.18.3 # via aiohttp +zipp==3.21.0 + # via importlib-metadata diff --git a/uv.lock b/uv.lock index 42eece4e1..31e296642 100644 --- a/uv.lock +++ b/uv.lock @@ -158,6 +158,38 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/25/8a/c46dcc25341b5bce5472c718902eb3d38600a903b14fa6aeecef3f21a46f/asttokens-3.0.0-py3-none-any.whl", hash = "sha256:e3078351a059199dd5138cb1c706e6430c05eff2ff136af5eb4790f9d28932e2", size = 26918, upload-time = "2024-11-30T04:30:10.946Z" }, ] +[[package]] +name = "asyncpg" +version = "0.30.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/2f/4c/7c991e080e106d854809030d8584e15b2e996e26f16aee6d757e387bc17d/asyncpg-0.30.0.tar.gz", hash = "sha256:c551e9928ab6707602f44811817f82ba3c446e018bfe1d3abecc8ba5f3eac851", size = 957746, upload-time = "2024-10-20T00:30:41.127Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/4c/0e/f5d708add0d0b97446c402db7e8dd4c4183c13edaabe8a8500b411e7b495/asyncpg-0.30.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:5e0511ad3dec5f6b4f7a9e063591d407eee66b88c14e2ea636f187da1dcfff6a", size = 674506, upload-time = "2024-10-20T00:29:27.988Z" }, + { url = "https://files.pythonhosted.org/packages/6a/a0/67ec9a75cb24a1d99f97b8437c8d56da40e6f6bd23b04e2f4ea5d5ad82ac/asyncpg-0.30.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:915aeb9f79316b43c3207363af12d0e6fd10776641a7de8a01212afd95bdf0ed", size = 645922, upload-time = "2024-10-20T00:29:29.391Z" }, + { url = "https://files.pythonhosted.org/packages/5c/d9/a7584f24174bd86ff1053b14bb841f9e714380c672f61c906eb01d8ec433/asyncpg-0.30.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1c198a00cce9506fcd0bf219a799f38ac7a237745e1d27f0e1f66d3707c84a5a", size = 3079565, upload-time = "2024-10-20T00:29:30.832Z" }, + { url = "https://files.pythonhosted.org/packages/a0/d7/a4c0f9660e333114bdb04d1a9ac70db690dd4ae003f34f691139a5cbdae3/asyncpg-0.30.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3326e6d7381799e9735ca2ec9fd7be4d5fef5dcbc3cb555d8a463d8460607956", size = 3109962, upload-time = "2024-10-20T00:29:33.114Z" }, + { url = "https://files.pythonhosted.org/packages/3c/21/199fd16b5a981b1575923cbb5d9cf916fdc936b377e0423099f209e7e73d/asyncpg-0.30.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:51da377487e249e35bd0859661f6ee2b81db11ad1f4fc036194bc9cb2ead5056", size = 3064791, upload-time = "2024-10-20T00:29:34.677Z" }, + { url = "https://files.pythonhosted.org/packages/77/52/0004809b3427534a0c9139c08c87b515f1c77a8376a50ae29f001e53962f/asyncpg-0.30.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:bc6d84136f9c4d24d358f3b02be4b6ba358abd09f80737d1ac7c444f36108454", size = 3188696, upload-time = "2024-10-20T00:29:36.389Z" }, + { url = "https://files.pythonhosted.org/packages/52/cb/fbad941cd466117be58b774a3f1cc9ecc659af625f028b163b1e646a55fe/asyncpg-0.30.0-cp311-cp311-win32.whl", hash = "sha256:574156480df14f64c2d76450a3f3aaaf26105869cad3865041156b38459e935d", size = 567358, upload-time = "2024-10-20T00:29:37.915Z" }, + { url = "https://files.pythonhosted.org/packages/3c/0a/0a32307cf166d50e1ad120d9b81a33a948a1a5463ebfa5a96cc5606c0863/asyncpg-0.30.0-cp311-cp311-win_amd64.whl", hash = "sha256:3356637f0bd830407b5597317b3cb3571387ae52ddc3bca6233682be88bbbc1f", size = 629375, upload-time = "2024-10-20T00:29:39.987Z" }, + { url = "https://files.pythonhosted.org/packages/4b/64/9d3e887bb7b01535fdbc45fbd5f0a8447539833b97ee69ecdbb7a79d0cb4/asyncpg-0.30.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:c902a60b52e506d38d7e80e0dd5399f657220f24635fee368117b8b5fce1142e", size = 673162, upload-time = "2024-10-20T00:29:41.88Z" }, + { url = "https://files.pythonhosted.org/packages/6e/eb/8b236663f06984f212a087b3e849731f917ab80f84450e943900e8ca4052/asyncpg-0.30.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:aca1548e43bbb9f0f627a04666fedaca23db0a31a84136ad1f868cb15deb6e3a", size = 637025, upload-time = "2024-10-20T00:29:43.352Z" }, + { url = "https://files.pythonhosted.org/packages/cc/57/2dc240bb263d58786cfaa60920779af6e8d32da63ab9ffc09f8312bd7a14/asyncpg-0.30.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6c2a2ef565400234a633da0eafdce27e843836256d40705d83ab7ec42074efb3", size = 3496243, upload-time = "2024-10-20T00:29:44.922Z" }, + { url = "https://files.pythonhosted.org/packages/f4/40/0ae9d061d278b10713ea9021ef6b703ec44698fe32178715a501ac696c6b/asyncpg-0.30.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1292b84ee06ac8a2ad8e51c7475aa309245874b61333d97411aab835c4a2f737", size = 3575059, upload-time = "2024-10-20T00:29:46.891Z" }, + { url = "https://files.pythonhosted.org/packages/c3/75/d6b895a35a2c6506952247640178e5f768eeb28b2e20299b6a6f1d743ba0/asyncpg-0.30.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:0f5712350388d0cd0615caec629ad53c81e506b1abaaf8d14c93f54b35e3595a", size = 3473596, upload-time = "2024-10-20T00:29:49.201Z" }, + { url = "https://files.pythonhosted.org/packages/c8/e7/3693392d3e168ab0aebb2d361431375bd22ffc7b4a586a0fc060d519fae7/asyncpg-0.30.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:db9891e2d76e6f425746c5d2da01921e9a16b5a71a1c905b13f30e12a257c4af", size = 3641632, upload-time = "2024-10-20T00:29:50.768Z" }, + { url = "https://files.pythonhosted.org/packages/32/ea/15670cea95745bba3f0352341db55f506a820b21c619ee66b7d12ea7867d/asyncpg-0.30.0-cp312-cp312-win32.whl", hash = "sha256:68d71a1be3d83d0570049cd1654a9bdfe506e794ecc98ad0873304a9f35e411e", size = 560186, upload-time = "2024-10-20T00:29:52.394Z" }, + { url = "https://files.pythonhosted.org/packages/7e/6b/fe1fad5cee79ca5f5c27aed7bd95baee529c1bf8a387435c8ba4fe53d5c1/asyncpg-0.30.0-cp312-cp312-win_amd64.whl", hash = "sha256:9a0292c6af5c500523949155ec17b7fe01a00ace33b68a476d6b5059f9630305", size = 621064, upload-time = "2024-10-20T00:29:53.757Z" }, + { url = "https://files.pythonhosted.org/packages/3a/22/e20602e1218dc07692acf70d5b902be820168d6282e69ef0d3cb920dc36f/asyncpg-0.30.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:05b185ebb8083c8568ea8a40e896d5f7af4b8554b64d7719c0eaa1eb5a5c3a70", size = 670373, upload-time = "2024-10-20T00:29:55.165Z" }, + { url = "https://files.pythonhosted.org/packages/3d/b3/0cf269a9d647852a95c06eb00b815d0b95a4eb4b55aa2d6ba680971733b9/asyncpg-0.30.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:c47806b1a8cbb0a0db896f4cd34d89942effe353a5035c62734ab13b9f938da3", size = 634745, upload-time = "2024-10-20T00:29:57.14Z" }, + { url = "https://files.pythonhosted.org/packages/8e/6d/a4f31bf358ce8491d2a31bfe0d7bcf25269e80481e49de4d8616c4295a34/asyncpg-0.30.0-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9b6fde867a74e8c76c71e2f64f80c64c0f3163e687f1763cfaf21633ec24ec33", size = 3512103, upload-time = "2024-10-20T00:29:58.499Z" }, + { url = "https://files.pythonhosted.org/packages/96/19/139227a6e67f407b9c386cb594d9628c6c78c9024f26df87c912fabd4368/asyncpg-0.30.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:46973045b567972128a27d40001124fbc821c87a6cade040cfcd4fa8a30bcdc4", size = 3592471, upload-time = "2024-10-20T00:30:00.354Z" }, + { url = "https://files.pythonhosted.org/packages/67/e4/ab3ca38f628f53f0fd28d3ff20edff1c975dd1cb22482e0061916b4b9a74/asyncpg-0.30.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:9110df111cabc2ed81aad2f35394a00cadf4f2e0635603db6ebbd0fc896f46a4", size = 3496253, upload-time = "2024-10-20T00:30:02.794Z" }, + { url = "https://files.pythonhosted.org/packages/ef/5f/0bf65511d4eeac3a1f41c54034a492515a707c6edbc642174ae79034d3ba/asyncpg-0.30.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:04ff0785ae7eed6cc138e73fc67b8e51d54ee7a3ce9b63666ce55a0bf095f7ba", size = 3662720, upload-time = "2024-10-20T00:30:04.501Z" }, + { url = "https://files.pythonhosted.org/packages/e7/31/1513d5a6412b98052c3ed9158d783b1e09d0910f51fbe0e05f56cc370bc4/asyncpg-0.30.0-cp313-cp313-win32.whl", hash = "sha256:ae374585f51c2b444510cdf3595b97ece4f233fde739aa14b50e0d64e8a7a590", size = 560404, upload-time = "2024-10-20T00:30:06.537Z" }, + { url = "https://files.pythonhosted.org/packages/c8/a4/cec76b3389c4c5ff66301cd100fe88c318563ec8a520e0b2e792b5b84972/asyncpg-0.30.0-cp313-cp313-win_amd64.whl", hash = "sha256:f59b430b8e27557c3fb9869222559f7417ced18688375825f8f12302c34e915e", size = 621623, upload-time = "2024-10-20T00:30:09.024Z" }, +] + [[package]] name = "attrs" version = "25.1.0" @@ -1187,6 +1219,8 @@ version = "0.2.12" source = { editable = "." } dependencies = [ { name = "aiohttp" }, + { name = "aiosqlite" }, + { name = "asyncpg" }, { name = "fastapi" }, { name = "fire" }, { name = "h11" }, @@ -1196,18 +1230,19 @@ dependencies = [ { name = "jsonschema" }, { name = "llama-stack-client" }, { name = "openai" }, + { name = "opentelemetry-exporter-otlp-proto-http" }, + { name = "opentelemetry-sdk" }, { name = "pillow" }, { name = "prompt-toolkit" }, { name = "pydantic" }, { name = "python-dotenv" }, { name = "python-jose" }, { name = "python-multipart" }, - { name = "requests" }, { name = "rich" }, - { name = "setuptools" }, { name = "starlette" }, { name = "termcolor" }, { name = "tiktoken" }, + { name = "uvicorn" }, ] [package.optional-dependencies] @@ -1238,11 +1273,11 @@ dev = [ { name = "ruff" }, { name = "types-requests" }, { name = "types-setuptools" }, - { name = "uvicorn" }, ] docs = [ { name = "linkify" }, { name = "myst-parser" }, + { name = "requests" }, { name = "sphinx" }, { name = "sphinx-autobuild" }, { name = "sphinx-copybutton" }, @@ -1264,9 +1299,8 @@ test = [ { name = "datasets" }, { name = "mcp" }, { name = "openai" }, - { name = "opentelemetry-exporter-otlp-proto-http" }, - { name = "opentelemetry-sdk" }, { name = "pypdf" }, + { name = "requests" }, { name = "sqlalchemy", extra = ["asyncio"] }, { name = "torch", version = "2.6.0", source = { registry = "https://download.pytorch.org/whl/cpu" }, marker = "sys_platform == 'darwin'" }, { name = "torch", version = "2.6.0+cpu", source = { registry = "https://download.pytorch.org/whl/cpu" }, marker = "sys_platform != 'darwin'" }, @@ -1282,7 +1316,6 @@ unit = [ { name = "faiss-cpu" }, { name = "mcp" }, { name = "openai" }, - { name = "opentelemetry-exporter-otlp-proto-http" }, { name = "pypdf" }, { name = "qdrant-client" }, { name = "sqlalchemy", extra = ["asyncio"] }, @@ -1292,6 +1325,8 @@ unit = [ [package.metadata] requires-dist = [ { name = "aiohttp" }, + { name = "aiosqlite", specifier = ">=0.21.0" }, + { name = "asyncpg" }, { name = "fastapi", specifier = ">=0.115.0,<1.0" }, { name = "fire" }, { name = "h11", specifier = ">=0.16.0" }, @@ -1302,6 +1337,8 @@ requires-dist = [ { name = "llama-stack-client", specifier = ">=0.2.12" }, { name = "llama-stack-client", marker = "extra == 'ui'", specifier = ">=0.2.12" }, { name = "openai", specifier = ">=1.66" }, + { name = "opentelemetry-exporter-otlp-proto-http" }, + { name = "opentelemetry-sdk" }, { name = "pandas", marker = "extra == 'ui'" }, { name = "pillow" }, { name = "prompt-toolkit" }, @@ -1309,14 +1346,13 @@ requires-dist = [ { name = "python-dotenv" }, { name = "python-jose" }, { name = "python-multipart", specifier = ">=0.0.20" }, - { name = "requests" }, { name = "rich" }, - { name = "setuptools" }, { name = "starlette" }, { name = "streamlit", marker = "extra == 'ui'" }, { name = "streamlit-option-menu", marker = "extra == 'ui'" }, { name = "termcolor" }, { name = "tiktoken" }, + { name = "uvicorn", specifier = ">=0.34.0" }, ] provides-extras = ["ui"] @@ -1340,11 +1376,11 @@ dev = [ { name = "ruff" }, { name = "types-requests" }, { name = "types-setuptools" }, - { name = "uvicorn" }, ] docs = [ { name = "linkify" }, { name = "myst-parser" }, + { name = "requests" }, { name = "sphinx" }, { name = "sphinx-autobuild" }, { name = "sphinx-copybutton" }, @@ -1366,9 +1402,8 @@ test = [ { name = "datasets" }, { name = "mcp" }, { name = "openai" }, - { name = "opentelemetry-exporter-otlp-proto-http" }, - { name = "opentelemetry-sdk" }, { name = "pypdf" }, + { name = "requests" }, { name = "sqlalchemy" }, { name = "sqlalchemy", extras = ["asyncio"], specifier = ">=2.0.41" }, { name = "torch", specifier = ">=2.6.0", index = "https://download.pytorch.org/whl/cpu" }, @@ -1383,7 +1418,6 @@ unit = [ { name = "faiss-cpu" }, { name = "mcp" }, { name = "openai" }, - { name = "opentelemetry-exporter-otlp-proto-http" }, { name = "pypdf" }, { name = "qdrant-client" }, { name = "sqlalchemy" },