diff --git a/docs/source/providers/inference/remote_anthropic.md b/docs/source/providers/inference/remote_anthropic.md index 79d5a3f6e..4680608b1 100644 --- a/docs/source/providers/inference/remote_anthropic.md +++ b/docs/source/providers/inference/remote_anthropic.md @@ -13,7 +13,7 @@ Anthropic inference provider for accessing Claude models and Anthropic's AI serv ## Sample Configuration ```yaml -api_key: ${env.ANTHROPIC_API_KEY} +api_key: ${env.ANTHROPIC_API_KEY:=} ``` diff --git a/docs/source/providers/inference/remote_cerebras-openai-compat.md b/docs/source/providers/inference/remote_cerebras-openai-compat.md index 64b899246..f0a75d0ed 100644 --- a/docs/source/providers/inference/remote_cerebras-openai-compat.md +++ b/docs/source/providers/inference/remote_cerebras-openai-compat.md @@ -15,7 +15,7 @@ Cerebras OpenAI-compatible provider for using Cerebras models with OpenAI API fo ```yaml openai_compat_api_base: https://api.cerebras.ai/v1 -api_key: ${env.CEREBRAS_API_KEY} +api_key: ${env.CEREBRAS_API_KEY:=} ``` diff --git a/docs/source/providers/inference/remote_cerebras.md b/docs/source/providers/inference/remote_cerebras.md index c9793d7de..7aa03dd0b 100644 --- a/docs/source/providers/inference/remote_cerebras.md +++ b/docs/source/providers/inference/remote_cerebras.md @@ -15,7 +15,7 @@ Cerebras inference provider for running models on Cerebras Cloud platform. ```yaml base_url: https://api.cerebras.ai -api_key: ${env.CEREBRAS_API_KEY} +api_key: ${env.CEREBRAS_API_KEY:=} ``` diff --git a/docs/source/providers/inference/remote_databricks.md b/docs/source/providers/inference/remote_databricks.md index c611d9414..d0ac89055 100644 --- a/docs/source/providers/inference/remote_databricks.md +++ b/docs/source/providers/inference/remote_databricks.md @@ -14,8 +14,8 @@ Databricks inference provider for running models on Databricks' unified analytic ## Sample Configuration ```yaml -url: ${env.DATABRICKS_URL} -api_token: ${env.DATABRICKS_API_TOKEN} +url: ${env.DATABRICKS_URL:=} +api_token: ${env.DATABRICKS_API_TOKEN:=} ``` diff --git a/docs/source/providers/inference/remote_fireworks-openai-compat.md b/docs/source/providers/inference/remote_fireworks-openai-compat.md index 0a2bd0fe8..f9ff1dc80 100644 --- a/docs/source/providers/inference/remote_fireworks-openai-compat.md +++ b/docs/source/providers/inference/remote_fireworks-openai-compat.md @@ -15,7 +15,7 @@ Fireworks AI OpenAI-compatible provider for using Fireworks models with OpenAI A ```yaml openai_compat_api_base: https://api.fireworks.ai/inference/v1 -api_key: ${env.FIREWORKS_API_KEY} +api_key: ${env.FIREWORKS_API_KEY:=} ``` diff --git a/docs/source/providers/inference/remote_fireworks.md b/docs/source/providers/inference/remote_fireworks.md index 351586c34..c950cb7d7 100644 --- a/docs/source/providers/inference/remote_fireworks.md +++ b/docs/source/providers/inference/remote_fireworks.md @@ -15,7 +15,7 @@ Fireworks AI inference provider for Llama models and other AI models on the Fire ```yaml url: https://api.fireworks.ai/inference/v1 -api_key: ${env.FIREWORKS_API_KEY} +api_key: ${env.FIREWORKS_API_KEY:=} ``` diff --git a/docs/source/providers/inference/remote_gemini.md b/docs/source/providers/inference/remote_gemini.md index cafcd787d..14b3223f2 100644 --- a/docs/source/providers/inference/remote_gemini.md +++ b/docs/source/providers/inference/remote_gemini.md @@ -13,7 +13,7 @@ Google Gemini inference provider for accessing Gemini models and Google's AI ser ## Sample Configuration ```yaml -api_key: ${env.GEMINI_API_KEY} +api_key: ${env.GEMINI_API_KEY:=} ``` diff --git a/docs/source/providers/inference/remote_groq-openai-compat.md b/docs/source/providers/inference/remote_groq-openai-compat.md index e424bedd2..c04f78d7e 100644 --- a/docs/source/providers/inference/remote_groq-openai-compat.md +++ b/docs/source/providers/inference/remote_groq-openai-compat.md @@ -15,7 +15,7 @@ Groq OpenAI-compatible provider for using Groq models with OpenAI API format. ```yaml openai_compat_api_base: https://api.groq.com/openai/v1 -api_key: ${env.GROQ_API_KEY} +api_key: ${env.GROQ_API_KEY:=} ``` diff --git a/docs/source/providers/inference/remote_groq.md b/docs/source/providers/inference/remote_groq.md index 4f734f263..68bd4d5b3 100644 --- a/docs/source/providers/inference/remote_groq.md +++ b/docs/source/providers/inference/remote_groq.md @@ -15,7 +15,7 @@ Groq inference provider for ultra-fast inference using Groq's LPU technology. ```yaml url: https://api.groq.com -api_key: ${env.GROQ_API_KEY} +api_key: ${env.GROQ_API_KEY:=} ``` diff --git a/docs/source/providers/inference/remote_llama-openai-compat.md b/docs/source/providers/inference/remote_llama-openai-compat.md index 5c97aebc3..c2f960bf5 100644 --- a/docs/source/providers/inference/remote_llama-openai-compat.md +++ b/docs/source/providers/inference/remote_llama-openai-compat.md @@ -15,7 +15,7 @@ Llama OpenAI-compatible provider for using Llama models with OpenAI API format. ```yaml openai_compat_api_base: https://api.llama.com/compat/v1/ -api_key: ${env.LLAMA_API_KEY} +api_key: ${env.LLAMA_API_KEY:=} ``` diff --git a/docs/source/providers/inference/remote_openai.md b/docs/source/providers/inference/remote_openai.md index b4cfb5880..36e4b5454 100644 --- a/docs/source/providers/inference/remote_openai.md +++ b/docs/source/providers/inference/remote_openai.md @@ -13,7 +13,7 @@ OpenAI inference provider for accessing GPT models and other OpenAI services. ## Sample Configuration ```yaml -api_key: ${env.OPENAI_API_KEY} +api_key: ${env.OPENAI_API_KEY:=} ``` diff --git a/docs/source/providers/inference/remote_passthrough.md b/docs/source/providers/inference/remote_passthrough.md index 9005e5339..e5633097e 100644 --- a/docs/source/providers/inference/remote_passthrough.md +++ b/docs/source/providers/inference/remote_passthrough.md @@ -15,7 +15,7 @@ Passthrough inference provider for connecting to any external inference service ```yaml url: ${env.PASSTHROUGH_URL} -api_key: ${env.PASSTHROUGH_API_KEY} +api_key: ${env.PASSTHROUGH_API_KEY:=} ``` diff --git a/docs/source/providers/inference/remote_sambanova-openai-compat.md b/docs/source/providers/inference/remote_sambanova-openai-compat.md index c213d962f..3074a5885 100644 --- a/docs/source/providers/inference/remote_sambanova-openai-compat.md +++ b/docs/source/providers/inference/remote_sambanova-openai-compat.md @@ -15,7 +15,7 @@ SambaNova OpenAI-compatible provider for using SambaNova models with OpenAI API ```yaml openai_compat_api_base: https://api.sambanova.ai/v1 -api_key: ${env.SAMBANOVA_API_KEY} +api_key: ${env.SAMBANOVA_API_KEY:=} ``` diff --git a/docs/source/providers/inference/remote_sambanova.md b/docs/source/providers/inference/remote_sambanova.md index 006c41ac1..9d15c97d5 100644 --- a/docs/source/providers/inference/remote_sambanova.md +++ b/docs/source/providers/inference/remote_sambanova.md @@ -15,7 +15,7 @@ SambaNova inference provider for running models on SambaNova's dataflow architec ```yaml url: https://api.sambanova.ai/v1 -api_key: ${env.SAMBANOVA_API_KEY} +api_key: ${env.SAMBANOVA_API_KEY:=} ``` diff --git a/docs/source/providers/inference/remote_together-openai-compat.md b/docs/source/providers/inference/remote_together-openai-compat.md index 833fa8cb0..5dafef8e8 100644 --- a/docs/source/providers/inference/remote_together-openai-compat.md +++ b/docs/source/providers/inference/remote_together-openai-compat.md @@ -15,7 +15,7 @@ Together AI OpenAI-compatible provider for using Together models with OpenAI API ```yaml openai_compat_api_base: https://api.together.xyz/v1 -api_key: ${env.TOGETHER_API_KEY} +api_key: ${env.TOGETHER_API_KEY:=} ``` diff --git a/docs/source/providers/inference/remote_together.md b/docs/source/providers/inference/remote_together.md index f33ff42f2..1e19021d2 100644 --- a/docs/source/providers/inference/remote_together.md +++ b/docs/source/providers/inference/remote_together.md @@ -15,7 +15,7 @@ Together AI inference provider for open-source models and collaborative AI devel ```yaml url: https://api.together.xyz/v1 -api_key: ${env.TOGETHER_API_KEY} +api_key: ${env.TOGETHER_API_KEY:=} ``` diff --git a/docs/source/providers/inference/remote_vllm.md b/docs/source/providers/inference/remote_vllm.md index 6c725fb41..cba7aac52 100644 --- a/docs/source/providers/inference/remote_vllm.md +++ b/docs/source/providers/inference/remote_vllm.md @@ -16,7 +16,7 @@ Remote vLLM inference provider for connecting to vLLM servers. ## Sample Configuration ```yaml -url: ${env.VLLM_URL} +url: ${env.VLLM_URL:=http://localhost:8000/v1} max_tokens: ${env.VLLM_MAX_TOKENS:=4096} api_token: ${env.VLLM_API_TOKEN:=fake} tls_verify: ${env.VLLM_TLS_VERIFY:=true} diff --git a/docs/source/providers/safety/remote_sambanova.md b/docs/source/providers/safety/remote_sambanova.md index c680f9764..7e608f1b7 100644 --- a/docs/source/providers/safety/remote_sambanova.md +++ b/docs/source/providers/safety/remote_sambanova.md @@ -15,7 +15,7 @@ SambaNova's safety provider for content moderation and safety filtering. ```yaml url: https://api.sambanova.ai/v1 -api_key: ${env.SAMBANOVA_API_KEY} +api_key: ${env.SAMBANOVA_API_KEY:=} ``` diff --git a/docs/source/providers/vector_io/remote_qdrant.md b/docs/source/providers/vector_io/remote_qdrant.md index 14c821f35..1667e4e3b 100644 --- a/docs/source/providers/vector_io/remote_qdrant.md +++ b/docs/source/providers/vector_io/remote_qdrant.md @@ -24,7 +24,7 @@ Please refer to the inline provider documentation. ## Sample Configuration ```yaml -api_key: ${env.QDRANT_API_KEY} +api_key: ${env.QDRANT_API_KEY:=} ``` diff --git a/llama_stack/providers/remote/inference/anthropic/config.py b/llama_stack/providers/remote/inference/anthropic/config.py index 10da0025e..a74b97a9e 100644 --- a/llama_stack/providers/remote/inference/anthropic/config.py +++ b/llama_stack/providers/remote/inference/anthropic/config.py @@ -26,7 +26,7 @@ class AnthropicConfig(BaseModel): ) @classmethod - def sample_run_config(cls, api_key: str = "${env.ANTHROPIC_API_KEY}", **kwargs) -> dict[str, Any]: + def sample_run_config(cls, api_key: str = "${env.ANTHROPIC_API_KEY:=}", **kwargs) -> dict[str, Any]: return { "api_key": api_key, } diff --git a/llama_stack/providers/remote/inference/cerebras/cerebras.py b/llama_stack/providers/remote/inference/cerebras/cerebras.py index 952118e24..9845f0d69 100644 --- a/llama_stack/providers/remote/inference/cerebras/cerebras.py +++ b/llama_stack/providers/remote/inference/cerebras/cerebras.py @@ -65,9 +65,10 @@ class CerebrasInferenceAdapter( ) self.config = config + api_key = self.config.api_key.get_secret_value() if self.config.api_key else "" self.client = AsyncCerebras( base_url=self.config.base_url, - api_key=self.config.api_key.get_secret_value(), + api_key=api_key, ) async def initialize(self) -> None: diff --git a/llama_stack/providers/remote/inference/cerebras/config.py b/llama_stack/providers/remote/inference/cerebras/config.py index 5ad7376fc..699f6a1ef 100644 --- a/llama_stack/providers/remote/inference/cerebras/config.py +++ b/llama_stack/providers/remote/inference/cerebras/config.py @@ -26,7 +26,7 @@ class CerebrasImplConfig(BaseModel): ) @classmethod - def sample_run_config(cls, api_key: str = "${env.CEREBRAS_API_KEY}", **kwargs) -> dict[str, Any]: + def sample_run_config(cls, api_key: str = "${env.CEREBRAS_API_KEY:=}", **kwargs) -> dict[str, Any]: return { "base_url": DEFAULT_BASE_URL, "api_key": api_key, diff --git a/llama_stack/providers/remote/inference/cerebras_openai_compat/config.py b/llama_stack/providers/remote/inference/cerebras_openai_compat/config.py index cb8daff6a..11fd07ebb 100644 --- a/llama_stack/providers/remote/inference/cerebras_openai_compat/config.py +++ b/llama_stack/providers/remote/inference/cerebras_openai_compat/config.py @@ -31,7 +31,7 @@ class CerebrasCompatConfig(BaseModel): ) @classmethod - def sample_run_config(cls, api_key: str = "${env.CEREBRAS_API_KEY}", **kwargs) -> dict[str, Any]: + def sample_run_config(cls, api_key: str = "${env.CEREBRAS_API_KEY:=}", **kwargs) -> dict[str, Any]: return { "openai_compat_api_base": "https://api.cerebras.ai/v1", "api_key": api_key, diff --git a/llama_stack/providers/remote/inference/databricks/config.py b/llama_stack/providers/remote/inference/databricks/config.py index 5710dcef3..cc2a2c302 100644 --- a/llama_stack/providers/remote/inference/databricks/config.py +++ b/llama_stack/providers/remote/inference/databricks/config.py @@ -25,8 +25,8 @@ class DatabricksImplConfig(BaseModel): @classmethod def sample_run_config( cls, - url: str = "${env.DATABRICKS_URL}", - api_token: str = "${env.DATABRICKS_API_TOKEN}", + url: str = "${env.DATABRICKS_URL:=}", + api_token: str = "${env.DATABRICKS_API_TOKEN:=}", **kwargs: Any, ) -> dict[str, Any]: return { diff --git a/llama_stack/providers/remote/inference/fireworks/config.py b/llama_stack/providers/remote/inference/fireworks/config.py index 072d558f4..2f81e1cef 100644 --- a/llama_stack/providers/remote/inference/fireworks/config.py +++ b/llama_stack/providers/remote/inference/fireworks/config.py @@ -23,7 +23,7 @@ class FireworksImplConfig(BaseModel): ) @classmethod - def sample_run_config(cls, api_key: str = "${env.FIREWORKS_API_KEY}", **kwargs) -> dict[str, Any]: + def sample_run_config(cls, api_key: str = "${env.FIREWORKS_API_KEY:=}", **kwargs) -> dict[str, Any]: return { "url": "https://api.fireworks.ai/inference/v1", "api_key": api_key, diff --git a/llama_stack/providers/remote/inference/fireworks/fireworks.py b/llama_stack/providers/remote/inference/fireworks/fireworks.py index 1c82ff3a8..130a012dd 100644 --- a/llama_stack/providers/remote/inference/fireworks/fireworks.py +++ b/llama_stack/providers/remote/inference/fireworks/fireworks.py @@ -96,6 +96,7 @@ class FireworksInferenceAdapter(ModelRegistryHelper, Inference, NeedsRequestProv def _get_client(self) -> Fireworks: fireworks_api_key = self._get_api_key() + print(f">>>>>> fireworks_api_key: {fireworks_api_key} <<<<<") return Fireworks(api_key=fireworks_api_key) def _get_openai_client(self) -> AsyncOpenAI: diff --git a/llama_stack/providers/remote/inference/fireworks_openai_compat/config.py b/llama_stack/providers/remote/inference/fireworks_openai_compat/config.py index bf38cdd2b..3eafcfdb6 100644 --- a/llama_stack/providers/remote/inference/fireworks_openai_compat/config.py +++ b/llama_stack/providers/remote/inference/fireworks_openai_compat/config.py @@ -31,7 +31,7 @@ class FireworksCompatConfig(BaseModel): ) @classmethod - def sample_run_config(cls, api_key: str = "${env.FIREWORKS_API_KEY}", **kwargs) -> dict[str, Any]: + def sample_run_config(cls, api_key: str = "${env.FIREWORKS_API_KEY:=}", **kwargs) -> dict[str, Any]: return { "openai_compat_api_base": "https://api.fireworks.ai/inference/v1", "api_key": api_key, diff --git a/llama_stack/providers/remote/inference/gemini/config.py b/llama_stack/providers/remote/inference/gemini/config.py index 63ef4de01..c897777f7 100644 --- a/llama_stack/providers/remote/inference/gemini/config.py +++ b/llama_stack/providers/remote/inference/gemini/config.py @@ -26,7 +26,7 @@ class GeminiConfig(BaseModel): ) @classmethod - def sample_run_config(cls, api_key: str = "${env.GEMINI_API_KEY}", **kwargs) -> dict[str, Any]: + def sample_run_config(cls, api_key: str = "${env.GEMINI_API_KEY:=}", **kwargs) -> dict[str, Any]: return { "api_key": api_key, } diff --git a/llama_stack/providers/remote/inference/groq/config.py b/llama_stack/providers/remote/inference/groq/config.py index fe060507a..67e9fa358 100644 --- a/llama_stack/providers/remote/inference/groq/config.py +++ b/llama_stack/providers/remote/inference/groq/config.py @@ -32,7 +32,7 @@ class GroqConfig(BaseModel): ) @classmethod - def sample_run_config(cls, api_key: str = "${env.GROQ_API_KEY}", **kwargs) -> dict[str, Any]: + def sample_run_config(cls, api_key: str = "${env.GROQ_API_KEY:=}", **kwargs) -> dict[str, Any]: return { "url": "https://api.groq.com", "api_key": api_key, diff --git a/llama_stack/providers/remote/inference/groq_openai_compat/config.py b/llama_stack/providers/remote/inference/groq_openai_compat/config.py index 481f740f9..a3bb3bd51 100644 --- a/llama_stack/providers/remote/inference/groq_openai_compat/config.py +++ b/llama_stack/providers/remote/inference/groq_openai_compat/config.py @@ -31,7 +31,7 @@ class GroqCompatConfig(BaseModel): ) @classmethod - def sample_run_config(cls, api_key: str = "${env.GROQ_API_KEY}", **kwargs) -> dict[str, Any]: + def sample_run_config(cls, api_key: str = "${env.GROQ_API_KEY:=}", **kwargs) -> dict[str, Any]: return { "openai_compat_api_base": "https://api.groq.com/openai/v1", "api_key": api_key, diff --git a/llama_stack/providers/remote/inference/llama_openai_compat/config.py b/llama_stack/providers/remote/inference/llama_openai_compat/config.py index 57bc7240d..833ecfa87 100644 --- a/llama_stack/providers/remote/inference/llama_openai_compat/config.py +++ b/llama_stack/providers/remote/inference/llama_openai_compat/config.py @@ -31,7 +31,7 @@ class LlamaCompatConfig(BaseModel): ) @classmethod - def sample_run_config(cls, api_key: str = "${env.LLAMA_API_KEY}", **kwargs) -> dict[str, Any]: + def sample_run_config(cls, api_key: str = "${env.LLAMA_API_KEY:=}", **kwargs) -> dict[str, Any]: return { "openai_compat_api_base": "https://api.llama.com/compat/v1/", "api_key": api_key, diff --git a/llama_stack/providers/remote/inference/openai/config.py b/llama_stack/providers/remote/inference/openai/config.py index 17fb98831..2768e98d0 100644 --- a/llama_stack/providers/remote/inference/openai/config.py +++ b/llama_stack/providers/remote/inference/openai/config.py @@ -26,7 +26,7 @@ class OpenAIConfig(BaseModel): ) @classmethod - def sample_run_config(cls, api_key: str = "${env.OPENAI_API_KEY}", **kwargs) -> dict[str, Any]: + def sample_run_config(cls, api_key: str = "${env.OPENAI_API_KEY:=}", **kwargs) -> dict[str, Any]: return { "api_key": api_key, } diff --git a/llama_stack/providers/remote/inference/passthrough/config.py b/llama_stack/providers/remote/inference/passthrough/config.py index 647b2db46..ef6849707 100644 --- a/llama_stack/providers/remote/inference/passthrough/config.py +++ b/llama_stack/providers/remote/inference/passthrough/config.py @@ -25,7 +25,7 @@ class PassthroughImplConfig(BaseModel): @classmethod def sample_run_config( - cls, url: str = "${env.PASSTHROUGH_URL}", api_key: str = "${env.PASSTHROUGH_API_KEY}", **kwargs + cls, url: str = "${env.PASSTHROUGH_URL}", api_key: str = "${env.PASSTHROUGH_API_KEY:=}", **kwargs ) -> dict[str, Any]: return { "url": url, diff --git a/llama_stack/providers/remote/inference/sambanova/config.py b/llama_stack/providers/remote/inference/sambanova/config.py index abbf9430f..50ad53d06 100644 --- a/llama_stack/providers/remote/inference/sambanova/config.py +++ b/llama_stack/providers/remote/inference/sambanova/config.py @@ -30,7 +30,7 @@ class SambaNovaImplConfig(BaseModel): ) @classmethod - def sample_run_config(cls, api_key: str = "${env.SAMBANOVA_API_KEY}", **kwargs) -> dict[str, Any]: + def sample_run_config(cls, api_key: str = "${env.SAMBANOVA_API_KEY:=}", **kwargs) -> dict[str, Any]: return { "url": "https://api.sambanova.ai/v1", "api_key": api_key, diff --git a/llama_stack/providers/remote/inference/sambanova_openai_compat/config.py b/llama_stack/providers/remote/inference/sambanova_openai_compat/config.py index 072fa85d1..751665651 100644 --- a/llama_stack/providers/remote/inference/sambanova_openai_compat/config.py +++ b/llama_stack/providers/remote/inference/sambanova_openai_compat/config.py @@ -31,7 +31,7 @@ class SambaNovaCompatConfig(BaseModel): ) @classmethod - def sample_run_config(cls, api_key: str = "${env.SAMBANOVA_API_KEY}", **kwargs) -> dict[str, Any]: + def sample_run_config(cls, api_key: str = "${env.SAMBANOVA_API_KEY:=}", **kwargs) -> dict[str, Any]: return { "openai_compat_api_base": "https://api.sambanova.ai/v1", "api_key": api_key, diff --git a/llama_stack/providers/remote/inference/together/config.py b/llama_stack/providers/remote/inference/together/config.py index f166e4277..de80d3d3c 100644 --- a/llama_stack/providers/remote/inference/together/config.py +++ b/llama_stack/providers/remote/inference/together/config.py @@ -26,5 +26,5 @@ class TogetherImplConfig(BaseModel): def sample_run_config(cls, **kwargs) -> dict[str, Any]: return { "url": "https://api.together.xyz/v1", - "api_key": "${env.TOGETHER_API_KEY}", + "api_key": "${env.TOGETHER_API_KEY:=}", } diff --git a/llama_stack/providers/remote/inference/together_openai_compat/config.py b/llama_stack/providers/remote/inference/together_openai_compat/config.py index 0c6d4f748..7fd5c04a3 100644 --- a/llama_stack/providers/remote/inference/together_openai_compat/config.py +++ b/llama_stack/providers/remote/inference/together_openai_compat/config.py @@ -31,7 +31,7 @@ class TogetherCompatConfig(BaseModel): ) @classmethod - def sample_run_config(cls, api_key: str = "${env.TOGETHER_API_KEY}", **kwargs) -> dict[str, Any]: + def sample_run_config(cls, api_key: str = "${env.TOGETHER_API_KEY:=}", **kwargs) -> dict[str, Any]: return { "openai_compat_api_base": "https://api.together.xyz/v1", "api_key": api_key, diff --git a/llama_stack/providers/remote/inference/vllm/config.py b/llama_stack/providers/remote/inference/vllm/config.py index e11efa7f0..8be8d5b8b 100644 --- a/llama_stack/providers/remote/inference/vllm/config.py +++ b/llama_stack/providers/remote/inference/vllm/config.py @@ -46,7 +46,7 @@ class VLLMInferenceAdapterConfig(BaseModel): @classmethod def sample_run_config( cls, - url: str = "${env.VLLM_URL}", + url: str = "${env.VLLM_URL:=http://localhost:8000/v1}", **kwargs, ): return { diff --git a/llama_stack/providers/remote/safety/sambanova/config.py b/llama_stack/providers/remote/safety/sambanova/config.py index 383cea244..2cde97098 100644 --- a/llama_stack/providers/remote/safety/sambanova/config.py +++ b/llama_stack/providers/remote/safety/sambanova/config.py @@ -30,7 +30,7 @@ class SambaNovaSafetyConfig(BaseModel): ) @classmethod - def sample_run_config(cls, api_key: str = "${env.SAMBANOVA_API_KEY}", **kwargs) -> dict[str, Any]: + def sample_run_config(cls, api_key: str = "${env.SAMBANOVA_API_KEY:=}", **kwargs) -> dict[str, Any]: return { "url": "https://api.sambanova.ai/v1", "api_key": api_key, diff --git a/llama_stack/providers/remote/vector_io/qdrant/config.py b/llama_stack/providers/remote/vector_io/qdrant/config.py index 314d3f5f1..22b73c6d4 100644 --- a/llama_stack/providers/remote/vector_io/qdrant/config.py +++ b/llama_stack/providers/remote/vector_io/qdrant/config.py @@ -27,5 +27,5 @@ class QdrantVectorIOConfig(BaseModel): @classmethod def sample_run_config(cls, **kwargs: Any) -> dict[str, Any]: return { - "api_key": "${env.QDRANT_API_KEY}", + "api_key": "${env.QDRANT_API_KEY:=}", } diff --git a/llama_stack/templates/open-benchmark/run.yaml b/llama_stack/templates/open-benchmark/run.yaml index 76c029864..41ba578c4 100644 --- a/llama_stack/templates/open-benchmark/run.yaml +++ b/llama_stack/templates/open-benchmark/run.yaml @@ -33,7 +33,7 @@ providers: provider_type: remote::together config: url: https://api.together.xyz/v1 - api_key: ${env.TOGETHER_API_KEY} + api_key: ${env.TOGETHER_API_KEY:=} vector_io: - provider_id: sqlite-vec provider_type: inline::sqlite-vec diff --git a/llama_stack/templates/starter/build.yaml b/llama_stack/templates/starter/build.yaml index dc7565d46..8c7337720 100644 --- a/llama_stack/templates/starter/build.yaml +++ b/llama_stack/templates/starter/build.yaml @@ -6,27 +6,13 @@ distribution_spec: - remote::cerebras - remote::ollama - remote::vllm - - remote::tgi - - remote::hf::serverless - - remote::hf::endpoint - remote::fireworks - remote::together - - remote::bedrock - - remote::databricks - - remote::nvidia - - remote::runpod - remote::openai - remote::anthropic - remote::gemini - remote::groq - - remote::fireworks-openai-compat - - remote::llama-openai-compat - - remote::together-openai-compat - - remote::groq-openai-compat - - remote::sambanova-openai-compat - - remote::cerebras-openai-compat - remote::sambanova - - remote::passthrough - inline::sentence-transformers vector_io: - inline::faiss diff --git a/llama_stack/templates/starter/run.yaml b/llama_stack/templates/starter/run.yaml index b3dfe32d5..aee334b51 100644 --- a/llama_stack/templates/starter/run.yaml +++ b/llama_stack/templates/starter/run.yaml @@ -14,122 +14,54 @@ apis: - vector_io providers: inference: - - provider_id: ${env.ENABLE_CEREBRAS:=__disabled__} + - provider_id: cerebras provider_type: remote::cerebras config: base_url: https://api.cerebras.ai - api_key: ${env.CEREBRAS_API_KEY} - - provider_id: ${env.ENABLE_OLLAMA:=__disabled__} + api_key: ${env.CEREBRAS_API_KEY:=} + - provider_id: ollama provider_type: remote::ollama config: url: ${env.OLLAMA_URL:=http://localhost:11434} - - provider_id: ${env.ENABLE_VLLM:=__disabled__} + - provider_id: vllm provider_type: remote::vllm config: - url: ${env.VLLM_URL} + url: ${env.VLLM_URL:=http://localhost:8000/v1} max_tokens: ${env.VLLM_MAX_TOKENS:=4096} api_token: ${env.VLLM_API_TOKEN:=fake} tls_verify: ${env.VLLM_TLS_VERIFY:=true} - - provider_id: ${env.ENABLE_TGI:=__disabled__} - provider_type: remote::tgi - config: - url: ${env.TGI_URL} - - provider_id: ${env.ENABLE_HF_SERVERLESS:=__disabled__} - provider_type: remote::hf::serverless - config: - huggingface_repo: ${env.INFERENCE_MODEL} - api_token: ${env.HF_API_TOKEN} - - provider_id: ${env.ENABLE_HF_ENDPOINT:=__disabled__} - provider_type: remote::hf::endpoint - config: - endpoint_name: ${env.INFERENCE_ENDPOINT_NAME} - api_token: ${env.HF_API_TOKEN} - - provider_id: ${env.ENABLE_FIREWORKS:=__disabled__} + - provider_id: fireworks provider_type: remote::fireworks config: url: https://api.fireworks.ai/inference/v1 - api_key: ${env.FIREWORKS_API_KEY} - - provider_id: ${env.ENABLE_TOGETHER:=__disabled__} + api_key: ${env.FIREWORKS_API_KEY:=} + - provider_id: together provider_type: remote::together config: url: https://api.together.xyz/v1 - api_key: ${env.TOGETHER_API_KEY} - - provider_id: ${env.ENABLE_BEDROCK:=__disabled__} - provider_type: remote::bedrock - config: {} - - provider_id: ${env.ENABLE_DATABRICKS:=__disabled__} - provider_type: remote::databricks - config: - url: ${env.DATABRICKS_URL} - api_token: ${env.DATABRICKS_API_TOKEN} - - provider_id: ${env.ENABLE_NVIDIA:=__disabled__} - provider_type: remote::nvidia - config: - url: ${env.NVIDIA_BASE_URL:=https://integrate.api.nvidia.com} - api_key: ${env.NVIDIA_API_KEY:=} - append_api_version: ${env.NVIDIA_APPEND_API_VERSION:=True} - - provider_id: ${env.ENABLE_RUNPOD:=__disabled__} - provider_type: remote::runpod - config: - url: ${env.RUNPOD_URL:=} - api_token: ${env.RUNPOD_API_TOKEN} - - provider_id: ${env.ENABLE_OPENAI:=__disabled__} + api_key: ${env.TOGETHER_API_KEY:=} + - provider_id: openai provider_type: remote::openai config: - api_key: ${env.OPENAI_API_KEY} - - provider_id: ${env.ENABLE_ANTHROPIC:=__disabled__} + api_key: ${env.OPENAI_API_KEY:=} + - provider_id: anthropic provider_type: remote::anthropic config: - api_key: ${env.ANTHROPIC_API_KEY} - - provider_id: ${env.ENABLE_GEMINI:=__disabled__} + api_key: ${env.ANTHROPIC_API_KEY:=} + - provider_id: gemini provider_type: remote::gemini config: - api_key: ${env.GEMINI_API_KEY} - - provider_id: ${env.ENABLE_GROQ:=__disabled__} + api_key: ${env.GEMINI_API_KEY:=} + - provider_id: groq provider_type: remote::groq config: url: https://api.groq.com - api_key: ${env.GROQ_API_KEY} - - provider_id: ${env.ENABLE_FIREWORKS_OPENAI_COMPAT:=__disabled__} - provider_type: remote::fireworks-openai-compat - config: - openai_compat_api_base: https://api.fireworks.ai/inference/v1 - api_key: ${env.FIREWORKS_API_KEY} - - provider_id: ${env.ENABLE_LLAMA_OPENAI_COMPAT:=__disabled__} - provider_type: remote::llama-openai-compat - config: - openai_compat_api_base: https://api.llama.com/compat/v1/ - api_key: ${env.LLAMA_API_KEY} - - provider_id: ${env.ENABLE_TOGETHER_OPENAI_COMPAT:=__disabled__} - provider_type: remote::together-openai-compat - config: - openai_compat_api_base: https://api.together.xyz/v1 - api_key: ${env.TOGETHER_API_KEY} - - provider_id: ${env.ENABLE_GROQ_OPENAI_COMPAT:=__disabled__} - provider_type: remote::groq-openai-compat - config: - openai_compat_api_base: https://api.groq.com/openai/v1 - api_key: ${env.GROQ_API_KEY} - - provider_id: ${env.ENABLE_SAMBANOVA_OPENAI_COMPAT:=__disabled__} - provider_type: remote::sambanova-openai-compat - config: - openai_compat_api_base: https://api.sambanova.ai/v1 - api_key: ${env.SAMBANOVA_API_KEY} - - provider_id: ${env.ENABLE_CEREBRAS_OPENAI_COMPAT:=__disabled__} - provider_type: remote::cerebras-openai-compat - config: - openai_compat_api_base: https://api.cerebras.ai/v1 - api_key: ${env.CEREBRAS_API_KEY} - - provider_id: ${env.ENABLE_SAMBANOVA:=__disabled__} + api_key: ${env.GROQ_API_KEY:=} + - provider_id: sambanova provider_type: remote::sambanova config: url: https://api.sambanova.ai/v1 - api_key: ${env.SAMBANOVA_API_KEY} - - provider_id: ${env.ENABLE_PASSTHROUGH:=__disabled__} - provider_type: remote::passthrough - config: - url: ${env.PASSTHROUGH_URL} - api_key: ${env.PASSTHROUGH_API_KEY} + api_key: ${env.SAMBANOVA_API_KEY:=} - provider_id: ${env.ENABLE_SENTENCE_TRANSFORMERS:=sentence-transformers} provider_type: inline::sentence-transformers config: {} @@ -257,909 +189,671 @@ inference_store: db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/inference_store.db models: - metadata: {} - model_id: ${env.ENABLE_CEREBRAS:=__disabled__}/llama3.1-8b - provider_id: ${env.ENABLE_CEREBRAS:=__disabled__} + model_id: cerebras/llama3.1-8b + provider_id: cerebras provider_model_id: llama3.1-8b model_type: llm - metadata: {} - model_id: ${env.ENABLE_CEREBRAS:=__disabled__}/meta-llama/Llama-3.1-8B-Instruct - provider_id: ${env.ENABLE_CEREBRAS:=__disabled__} + model_id: cerebras/meta-llama/Llama-3.1-8B-Instruct + provider_id: cerebras provider_model_id: llama3.1-8b model_type: llm - metadata: {} - model_id: ${env.ENABLE_CEREBRAS:=__disabled__}/llama-3.3-70b - provider_id: ${env.ENABLE_CEREBRAS:=__disabled__} + model_id: cerebras/llama-3.3-70b + provider_id: cerebras provider_model_id: llama-3.3-70b model_type: llm - metadata: {} - model_id: ${env.ENABLE_CEREBRAS:=__disabled__}/meta-llama/Llama-3.3-70B-Instruct - provider_id: ${env.ENABLE_CEREBRAS:=__disabled__} + model_id: cerebras/meta-llama/Llama-3.3-70B-Instruct + provider_id: cerebras provider_model_id: llama-3.3-70b model_type: llm - metadata: {} - model_id: ${env.ENABLE_CEREBRAS:=__disabled__}/llama-4-scout-17b-16e-instruct - provider_id: ${env.ENABLE_CEREBRAS:=__disabled__} + model_id: cerebras/llama-4-scout-17b-16e-instruct + provider_id: cerebras provider_model_id: llama-4-scout-17b-16e-instruct model_type: llm - metadata: {} - model_id: ${env.ENABLE_CEREBRAS:=__disabled__}/meta-llama/Llama-4-Scout-17B-16E-Instruct - provider_id: ${env.ENABLE_CEREBRAS:=__disabled__} + model_id: cerebras/meta-llama/Llama-4-Scout-17B-16E-Instruct + provider_id: cerebras provider_model_id: llama-4-scout-17b-16e-instruct model_type: llm - metadata: {} - model_id: ${env.ENABLE_OLLAMA:=__disabled__}/${env.OLLAMA_INFERENCE_MODEL:=__disabled__} - provider_id: ${env.ENABLE_OLLAMA:=__disabled__} + model_id: ollama/${env.OLLAMA_INFERENCE_MODEL:=__disabled__} + provider_id: ollama provider_model_id: ${env.OLLAMA_INFERENCE_MODEL:=__disabled__} model_type: llm - metadata: {} - model_id: ${env.ENABLE_OLLAMA:=__disabled__}/${env.SAFETY_MODEL:=__disabled__} - provider_id: ${env.ENABLE_OLLAMA:=__disabled__} + model_id: ollama/${env.SAFETY_MODEL:=__disabled__} + provider_id: ollama provider_model_id: ${env.SAFETY_MODEL:=__disabled__} model_type: llm - metadata: embedding_dimension: ${env.OLLAMA_EMBEDDING_DIMENSION:=384} - model_id: ${env.ENABLE_OLLAMA:=__disabled__}/${env.OLLAMA_EMBEDDING_MODEL:=__disabled__} - provider_id: ${env.ENABLE_OLLAMA:=__disabled__} + model_id: ollama/${env.OLLAMA_EMBEDDING_MODEL:=__disabled__} + provider_id: ollama provider_model_id: ${env.OLLAMA_EMBEDDING_MODEL:=__disabled__} model_type: embedding - metadata: {} - model_id: ${env.ENABLE_VLLM:=__disabled__}/${env.VLLM_INFERENCE_MODEL:=__disabled__} - provider_id: ${env.ENABLE_VLLM:=__disabled__} + model_id: vllm/${env.VLLM_INFERENCE_MODEL:=__disabled__} + provider_id: vllm provider_model_id: ${env.VLLM_INFERENCE_MODEL:=__disabled__} model_type: llm - metadata: {} - model_id: ${env.ENABLE_FIREWORKS:=__disabled__}/accounts/fireworks/models/llama-v3p1-8b-instruct - provider_id: ${env.ENABLE_FIREWORKS:=__disabled__} + model_id: accounts/fireworks/models/llama-v3p1-8b-instruct + provider_id: fireworks provider_model_id: accounts/fireworks/models/llama-v3p1-8b-instruct model_type: llm - metadata: {} - model_id: ${env.ENABLE_FIREWORKS:=__disabled__}/meta-llama/Llama-3.1-8B-Instruct - provider_id: ${env.ENABLE_FIREWORKS:=__disabled__} + model_id: fireworks/meta-llama/Llama-3.1-8B-Instruct + provider_id: fireworks provider_model_id: accounts/fireworks/models/llama-v3p1-8b-instruct model_type: llm - metadata: {} - model_id: ${env.ENABLE_FIREWORKS:=__disabled__}/accounts/fireworks/models/llama-v3p1-70b-instruct - provider_id: ${env.ENABLE_FIREWORKS:=__disabled__} + model_id: accounts/fireworks/models/llama-v3p1-70b-instruct + provider_id: fireworks provider_model_id: accounts/fireworks/models/llama-v3p1-70b-instruct model_type: llm - metadata: {} - model_id: ${env.ENABLE_FIREWORKS:=__disabled__}/meta-llama/Llama-3.1-70B-Instruct - provider_id: ${env.ENABLE_FIREWORKS:=__disabled__} + model_id: fireworks/meta-llama/Llama-3.1-70B-Instruct + provider_id: fireworks provider_model_id: accounts/fireworks/models/llama-v3p1-70b-instruct model_type: llm - metadata: {} - model_id: ${env.ENABLE_FIREWORKS:=__disabled__}/accounts/fireworks/models/llama-v3p1-405b-instruct - provider_id: ${env.ENABLE_FIREWORKS:=__disabled__} + model_id: accounts/fireworks/models/llama-v3p1-405b-instruct + provider_id: fireworks provider_model_id: accounts/fireworks/models/llama-v3p1-405b-instruct model_type: llm - metadata: {} - model_id: ${env.ENABLE_FIREWORKS:=__disabled__}/meta-llama/Llama-3.1-405B-Instruct-FP8 - provider_id: ${env.ENABLE_FIREWORKS:=__disabled__} + model_id: fireworks/meta-llama/Llama-3.1-405B-Instruct-FP8 + provider_id: fireworks provider_model_id: accounts/fireworks/models/llama-v3p1-405b-instruct model_type: llm - metadata: {} - model_id: ${env.ENABLE_FIREWORKS:=__disabled__}/accounts/fireworks/models/llama-v3p2-3b-instruct - provider_id: ${env.ENABLE_FIREWORKS:=__disabled__} + model_id: accounts/fireworks/models/llama-v3p2-3b-instruct + provider_id: fireworks provider_model_id: accounts/fireworks/models/llama-v3p2-3b-instruct model_type: llm - metadata: {} - model_id: ${env.ENABLE_FIREWORKS:=__disabled__}/meta-llama/Llama-3.2-3B-Instruct - provider_id: ${env.ENABLE_FIREWORKS:=__disabled__} + model_id: fireworks/meta-llama/Llama-3.2-3B-Instruct + provider_id: fireworks provider_model_id: accounts/fireworks/models/llama-v3p2-3b-instruct model_type: llm - metadata: {} - model_id: ${env.ENABLE_FIREWORKS:=__disabled__}/accounts/fireworks/models/llama-v3p2-11b-vision-instruct - provider_id: ${env.ENABLE_FIREWORKS:=__disabled__} + model_id: accounts/fireworks/models/llama-v3p2-11b-vision-instruct + provider_id: fireworks provider_model_id: accounts/fireworks/models/llama-v3p2-11b-vision-instruct model_type: llm - metadata: {} - model_id: ${env.ENABLE_FIREWORKS:=__disabled__}/meta-llama/Llama-3.2-11B-Vision-Instruct - provider_id: ${env.ENABLE_FIREWORKS:=__disabled__} + model_id: fireworks/meta-llama/Llama-3.2-11B-Vision-Instruct + provider_id: fireworks provider_model_id: accounts/fireworks/models/llama-v3p2-11b-vision-instruct model_type: llm - metadata: {} - model_id: ${env.ENABLE_FIREWORKS:=__disabled__}/accounts/fireworks/models/llama-v3p2-90b-vision-instruct - provider_id: ${env.ENABLE_FIREWORKS:=__disabled__} + model_id: accounts/fireworks/models/llama-v3p2-90b-vision-instruct + provider_id: fireworks provider_model_id: accounts/fireworks/models/llama-v3p2-90b-vision-instruct model_type: llm - metadata: {} - model_id: ${env.ENABLE_FIREWORKS:=__disabled__}/meta-llama/Llama-3.2-90B-Vision-Instruct - provider_id: ${env.ENABLE_FIREWORKS:=__disabled__} + model_id: fireworks/meta-llama/Llama-3.2-90B-Vision-Instruct + provider_id: fireworks provider_model_id: accounts/fireworks/models/llama-v3p2-90b-vision-instruct model_type: llm - metadata: {} - model_id: ${env.ENABLE_FIREWORKS:=__disabled__}/accounts/fireworks/models/llama-v3p3-70b-instruct - provider_id: ${env.ENABLE_FIREWORKS:=__disabled__} + model_id: accounts/fireworks/models/llama-v3p3-70b-instruct + provider_id: fireworks provider_model_id: accounts/fireworks/models/llama-v3p3-70b-instruct model_type: llm - metadata: {} - model_id: ${env.ENABLE_FIREWORKS:=__disabled__}/meta-llama/Llama-3.3-70B-Instruct - provider_id: ${env.ENABLE_FIREWORKS:=__disabled__} + model_id: fireworks/meta-llama/Llama-3.3-70B-Instruct + provider_id: fireworks provider_model_id: accounts/fireworks/models/llama-v3p3-70b-instruct model_type: llm - metadata: {} - model_id: ${env.ENABLE_FIREWORKS:=__disabled__}/accounts/fireworks/models/llama4-scout-instruct-basic - provider_id: ${env.ENABLE_FIREWORKS:=__disabled__} + model_id: accounts/fireworks/models/llama4-scout-instruct-basic + provider_id: fireworks provider_model_id: accounts/fireworks/models/llama4-scout-instruct-basic model_type: llm - metadata: {} - model_id: ${env.ENABLE_FIREWORKS:=__disabled__}/meta-llama/Llama-4-Scout-17B-16E-Instruct - provider_id: ${env.ENABLE_FIREWORKS:=__disabled__} + model_id: fireworks/meta-llama/Llama-4-Scout-17B-16E-Instruct + provider_id: fireworks provider_model_id: accounts/fireworks/models/llama4-scout-instruct-basic model_type: llm - metadata: {} - model_id: ${env.ENABLE_FIREWORKS:=__disabled__}/accounts/fireworks/models/llama4-maverick-instruct-basic - provider_id: ${env.ENABLE_FIREWORKS:=__disabled__} + model_id: accounts/fireworks/models/llama4-maverick-instruct-basic + provider_id: fireworks provider_model_id: accounts/fireworks/models/llama4-maverick-instruct-basic model_type: llm - metadata: {} - model_id: ${env.ENABLE_FIREWORKS:=__disabled__}/meta-llama/Llama-4-Maverick-17B-128E-Instruct - provider_id: ${env.ENABLE_FIREWORKS:=__disabled__} + model_id: fireworks/meta-llama/Llama-4-Maverick-17B-128E-Instruct + provider_id: fireworks provider_model_id: accounts/fireworks/models/llama4-maverick-instruct-basic model_type: llm - metadata: embedding_dimension: 768 context_length: 8192 - model_id: ${env.ENABLE_FIREWORKS:=__disabled__}/nomic-ai/nomic-embed-text-v1.5 - provider_id: ${env.ENABLE_FIREWORKS:=__disabled__} + model_id: fireworks/nomic-ai/nomic-embed-text-v1.5 + provider_id: fireworks provider_model_id: nomic-ai/nomic-embed-text-v1.5 model_type: embedding - metadata: {} - model_id: ${env.ENABLE_FIREWORKS:=__disabled__}/accounts/fireworks/models/llama-guard-3-8b - provider_id: ${env.ENABLE_FIREWORKS:=__disabled__} + model_id: accounts/fireworks/models/llama-guard-3-8b + provider_id: fireworks provider_model_id: accounts/fireworks/models/llama-guard-3-8b model_type: llm - metadata: {} - model_id: ${env.ENABLE_FIREWORKS:=__disabled__}/meta-llama/Llama-Guard-3-8B - provider_id: ${env.ENABLE_FIREWORKS:=__disabled__} + model_id: fireworks/meta-llama/Llama-Guard-3-8B + provider_id: fireworks provider_model_id: accounts/fireworks/models/llama-guard-3-8b model_type: llm - metadata: {} - model_id: ${env.ENABLE_FIREWORKS:=__disabled__}/accounts/fireworks/models/llama-guard-3-11b-vision - provider_id: ${env.ENABLE_FIREWORKS:=__disabled__} + model_id: accounts/fireworks/models/llama-guard-3-11b-vision + provider_id: fireworks provider_model_id: accounts/fireworks/models/llama-guard-3-11b-vision model_type: llm - metadata: {} - model_id: ${env.ENABLE_FIREWORKS:=__disabled__}/meta-llama/Llama-Guard-3-11B-Vision - provider_id: ${env.ENABLE_FIREWORKS:=__disabled__} + model_id: fireworks/meta-llama/Llama-Guard-3-11B-Vision + provider_id: fireworks provider_model_id: accounts/fireworks/models/llama-guard-3-11b-vision model_type: llm - metadata: {} - model_id: ${env.ENABLE_TOGETHER:=__disabled__}/meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo - provider_id: ${env.ENABLE_TOGETHER:=__disabled__} + model_id: together/meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo + provider_id: together provider_model_id: meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo model_type: llm - metadata: {} - model_id: ${env.ENABLE_TOGETHER:=__disabled__}/meta-llama/Llama-3.1-8B-Instruct - provider_id: ${env.ENABLE_TOGETHER:=__disabled__} + model_id: together/meta-llama/Llama-3.1-8B-Instruct + provider_id: together provider_model_id: meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo model_type: llm - metadata: {} - model_id: ${env.ENABLE_TOGETHER:=__disabled__}/meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo - provider_id: ${env.ENABLE_TOGETHER:=__disabled__} + model_id: together/meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo + provider_id: together provider_model_id: meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo model_type: llm - metadata: {} - model_id: ${env.ENABLE_TOGETHER:=__disabled__}/meta-llama/Llama-3.1-70B-Instruct - provider_id: ${env.ENABLE_TOGETHER:=__disabled__} + model_id: together/meta-llama/Llama-3.1-70B-Instruct + provider_id: together provider_model_id: meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo model_type: llm - metadata: {} - model_id: ${env.ENABLE_TOGETHER:=__disabled__}/meta-llama/Meta-Llama-3.1-405B-Instruct-Turbo - provider_id: ${env.ENABLE_TOGETHER:=__disabled__} + model_id: together/meta-llama/Meta-Llama-3.1-405B-Instruct-Turbo + provider_id: together provider_model_id: meta-llama/Meta-Llama-3.1-405B-Instruct-Turbo model_type: llm - metadata: {} - model_id: ${env.ENABLE_TOGETHER:=__disabled__}/meta-llama/Llama-3.1-405B-Instruct-FP8 - provider_id: ${env.ENABLE_TOGETHER:=__disabled__} + model_id: together/meta-llama/Llama-3.1-405B-Instruct-FP8 + provider_id: together provider_model_id: meta-llama/Meta-Llama-3.1-405B-Instruct-Turbo model_type: llm - metadata: {} - model_id: ${env.ENABLE_TOGETHER:=__disabled__}/meta-llama/Llama-3.2-3B-Instruct-Turbo - provider_id: ${env.ENABLE_TOGETHER:=__disabled__} + model_id: together/meta-llama/Llama-3.2-3B-Instruct-Turbo + provider_id: together provider_model_id: meta-llama/Llama-3.2-3B-Instruct-Turbo model_type: llm - metadata: {} - model_id: ${env.ENABLE_TOGETHER:=__disabled__}/meta-llama/Llama-3.2-3B-Instruct - provider_id: ${env.ENABLE_TOGETHER:=__disabled__} + model_id: together/meta-llama/Llama-3.2-3B-Instruct + provider_id: together provider_model_id: meta-llama/Llama-3.2-3B-Instruct-Turbo model_type: llm - metadata: {} - model_id: ${env.ENABLE_TOGETHER:=__disabled__}/meta-llama/Llama-3.2-11B-Vision-Instruct-Turbo - provider_id: ${env.ENABLE_TOGETHER:=__disabled__} + model_id: together/meta-llama/Llama-3.2-11B-Vision-Instruct-Turbo + provider_id: together provider_model_id: meta-llama/Llama-3.2-11B-Vision-Instruct-Turbo model_type: llm - metadata: {} - model_id: ${env.ENABLE_TOGETHER:=__disabled__}/meta-llama/Llama-3.2-11B-Vision-Instruct - provider_id: ${env.ENABLE_TOGETHER:=__disabled__} + model_id: together/meta-llama/Llama-3.2-11B-Vision-Instruct + provider_id: together provider_model_id: meta-llama/Llama-3.2-11B-Vision-Instruct-Turbo model_type: llm - metadata: {} - model_id: ${env.ENABLE_TOGETHER:=__disabled__}/meta-llama/Llama-3.2-90B-Vision-Instruct-Turbo - provider_id: ${env.ENABLE_TOGETHER:=__disabled__} + model_id: together/meta-llama/Llama-3.2-90B-Vision-Instruct-Turbo + provider_id: together provider_model_id: meta-llama/Llama-3.2-90B-Vision-Instruct-Turbo model_type: llm - metadata: {} - model_id: ${env.ENABLE_TOGETHER:=__disabled__}/meta-llama/Llama-3.2-90B-Vision-Instruct - provider_id: ${env.ENABLE_TOGETHER:=__disabled__} + model_id: together/meta-llama/Llama-3.2-90B-Vision-Instruct + provider_id: together provider_model_id: meta-llama/Llama-3.2-90B-Vision-Instruct-Turbo model_type: llm - metadata: {} - model_id: ${env.ENABLE_TOGETHER:=__disabled__}/meta-llama/Llama-3.3-70B-Instruct-Turbo - provider_id: ${env.ENABLE_TOGETHER:=__disabled__} + model_id: together/meta-llama/Llama-3.3-70B-Instruct-Turbo + provider_id: together provider_model_id: meta-llama/Llama-3.3-70B-Instruct-Turbo model_type: llm - metadata: {} - model_id: ${env.ENABLE_TOGETHER:=__disabled__}/meta-llama/Llama-3.3-70B-Instruct - provider_id: ${env.ENABLE_TOGETHER:=__disabled__} + model_id: together/meta-llama/Llama-3.3-70B-Instruct + provider_id: together provider_model_id: meta-llama/Llama-3.3-70B-Instruct-Turbo model_type: llm - metadata: embedding_dimension: 768 context_length: 8192 - model_id: ${env.ENABLE_TOGETHER:=__disabled__}/togethercomputer/m2-bert-80M-8k-retrieval - provider_id: ${env.ENABLE_TOGETHER:=__disabled__} + model_id: togethercomputer/m2-bert-80M-8k-retrieval + provider_id: together provider_model_id: togethercomputer/m2-bert-80M-8k-retrieval model_type: embedding - metadata: embedding_dimension: 768 context_length: 32768 - model_id: ${env.ENABLE_TOGETHER:=__disabled__}/togethercomputer/m2-bert-80M-32k-retrieval - provider_id: ${env.ENABLE_TOGETHER:=__disabled__} + model_id: togethercomputer/m2-bert-80M-32k-retrieval + provider_id: together provider_model_id: togethercomputer/m2-bert-80M-32k-retrieval model_type: embedding - metadata: {} - model_id: ${env.ENABLE_TOGETHER:=__disabled__}/meta-llama/Llama-4-Scout-17B-16E-Instruct - provider_id: ${env.ENABLE_TOGETHER:=__disabled__} + model_id: together/meta-llama/Llama-4-Scout-17B-16E-Instruct + provider_id: together provider_model_id: meta-llama/Llama-4-Scout-17B-16E-Instruct model_type: llm - metadata: {} - model_id: ${env.ENABLE_TOGETHER:=__disabled__}/meta-llama/Llama-4-Scout-17B-16E-Instruct - provider_id: ${env.ENABLE_TOGETHER:=__disabled__} + model_id: together/meta-llama/Llama-4-Scout-17B-16E-Instruct + provider_id: together provider_model_id: meta-llama/Llama-4-Scout-17B-16E-Instruct model_type: llm - metadata: {} - model_id: ${env.ENABLE_TOGETHER:=__disabled__}/together/meta-llama/Llama-4-Scout-17B-16E-Instruct - provider_id: ${env.ENABLE_TOGETHER:=__disabled__} + model_id: together/meta-llama/Llama-4-Scout-17B-16E-Instruct + provider_id: together provider_model_id: meta-llama/Llama-4-Scout-17B-16E-Instruct model_type: llm - metadata: {} - model_id: ${env.ENABLE_TOGETHER:=__disabled__}/meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8 - provider_id: ${env.ENABLE_TOGETHER:=__disabled__} + model_id: together/meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8 + provider_id: together provider_model_id: meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8 model_type: llm - metadata: {} - model_id: ${env.ENABLE_TOGETHER:=__disabled__}/meta-llama/Llama-4-Maverick-17B-128E-Instruct - provider_id: ${env.ENABLE_TOGETHER:=__disabled__} + model_id: together/meta-llama/Llama-4-Maverick-17B-128E-Instruct + provider_id: together provider_model_id: meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8 model_type: llm - metadata: {} - model_id: ${env.ENABLE_TOGETHER:=__disabled__}/together/meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8 - provider_id: ${env.ENABLE_TOGETHER:=__disabled__} + model_id: together/meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8 + provider_id: together provider_model_id: meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8 model_type: llm - metadata: {} - model_id: ${env.ENABLE_TOGETHER:=__disabled__}/meta-llama/Llama-Guard-3-8B - provider_id: ${env.ENABLE_TOGETHER:=__disabled__} + model_id: together/meta-llama/Llama-Guard-3-8B + provider_id: together provider_model_id: meta-llama/Llama-Guard-3-8B model_type: llm - metadata: {} - model_id: ${env.ENABLE_TOGETHER:=__disabled__}/meta-llama/Llama-Guard-3-8B - provider_id: ${env.ENABLE_TOGETHER:=__disabled__} + model_id: together/meta-llama/Llama-Guard-3-8B + provider_id: together provider_model_id: meta-llama/Llama-Guard-3-8B model_type: llm - metadata: {} - model_id: ${env.ENABLE_TOGETHER:=__disabled__}/meta-llama/Llama-Guard-3-11B-Vision-Turbo - provider_id: ${env.ENABLE_TOGETHER:=__disabled__} + model_id: together/meta-llama/Llama-Guard-3-11B-Vision-Turbo + provider_id: together provider_model_id: meta-llama/Llama-Guard-3-11B-Vision-Turbo model_type: llm - metadata: {} - model_id: ${env.ENABLE_TOGETHER:=__disabled__}/meta-llama/Llama-Guard-3-11B-Vision - provider_id: ${env.ENABLE_TOGETHER:=__disabled__} + model_id: together/meta-llama/Llama-Guard-3-11B-Vision + provider_id: together provider_model_id: meta-llama/Llama-Guard-3-11B-Vision-Turbo model_type: llm - metadata: {} - model_id: ${env.ENABLE_BEDROCK:=__disabled__}/meta.llama3-1-8b-instruct-v1:0 - provider_id: ${env.ENABLE_BEDROCK:=__disabled__} - provider_model_id: meta.llama3-1-8b-instruct-v1:0 - model_type: llm -- metadata: {} - model_id: ${env.ENABLE_BEDROCK:=__disabled__}/meta-llama/Llama-3.1-8B-Instruct - provider_id: ${env.ENABLE_BEDROCK:=__disabled__} - provider_model_id: meta.llama3-1-8b-instruct-v1:0 - model_type: llm -- metadata: {} - model_id: ${env.ENABLE_BEDROCK:=__disabled__}/meta.llama3-1-70b-instruct-v1:0 - provider_id: ${env.ENABLE_BEDROCK:=__disabled__} - provider_model_id: meta.llama3-1-70b-instruct-v1:0 - model_type: llm -- metadata: {} - model_id: ${env.ENABLE_BEDROCK:=__disabled__}/meta-llama/Llama-3.1-70B-Instruct - provider_id: ${env.ENABLE_BEDROCK:=__disabled__} - provider_model_id: meta.llama3-1-70b-instruct-v1:0 - model_type: llm -- metadata: {} - model_id: ${env.ENABLE_BEDROCK:=__disabled__}/meta.llama3-1-405b-instruct-v1:0 - provider_id: ${env.ENABLE_BEDROCK:=__disabled__} - provider_model_id: meta.llama3-1-405b-instruct-v1:0 - model_type: llm -- metadata: {} - model_id: ${env.ENABLE_BEDROCK:=__disabled__}/meta-llama/Llama-3.1-405B-Instruct-FP8 - provider_id: ${env.ENABLE_BEDROCK:=__disabled__} - provider_model_id: meta.llama3-1-405b-instruct-v1:0 - model_type: llm -- metadata: {} - model_id: ${env.ENABLE_DATABRICKS:=__disabled__}/databricks-meta-llama-3-1-70b-instruct - provider_id: ${env.ENABLE_DATABRICKS:=__disabled__} - provider_model_id: databricks-meta-llama-3-1-70b-instruct - model_type: llm -- metadata: {} - model_id: ${env.ENABLE_DATABRICKS:=__disabled__}/meta-llama/Llama-3.1-70B-Instruct - provider_id: ${env.ENABLE_DATABRICKS:=__disabled__} - provider_model_id: databricks-meta-llama-3-1-70b-instruct - model_type: llm -- metadata: {} - model_id: ${env.ENABLE_DATABRICKS:=__disabled__}/databricks-meta-llama-3-1-405b-instruct - provider_id: ${env.ENABLE_DATABRICKS:=__disabled__} - provider_model_id: databricks-meta-llama-3-1-405b-instruct - model_type: llm -- metadata: {} - model_id: ${env.ENABLE_DATABRICKS:=__disabled__}/meta-llama/Llama-3.1-405B-Instruct-FP8 - provider_id: ${env.ENABLE_DATABRICKS:=__disabled__} - provider_model_id: databricks-meta-llama-3-1-405b-instruct - model_type: llm -- metadata: {} - model_id: ${env.ENABLE_NVIDIA:=__disabled__}/meta/llama3-8b-instruct - provider_id: ${env.ENABLE_NVIDIA:=__disabled__} - provider_model_id: meta/llama3-8b-instruct - model_type: llm -- metadata: {} - model_id: ${env.ENABLE_NVIDIA:=__disabled__}/meta-llama/Llama-3-8B-Instruct - provider_id: ${env.ENABLE_NVIDIA:=__disabled__} - provider_model_id: meta/llama3-8b-instruct - model_type: llm -- metadata: {} - model_id: ${env.ENABLE_NVIDIA:=__disabled__}/meta/llama3-70b-instruct - provider_id: ${env.ENABLE_NVIDIA:=__disabled__} - provider_model_id: meta/llama3-70b-instruct - model_type: llm -- metadata: {} - model_id: ${env.ENABLE_NVIDIA:=__disabled__}/meta-llama/Llama-3-70B-Instruct - provider_id: ${env.ENABLE_NVIDIA:=__disabled__} - provider_model_id: meta/llama3-70b-instruct - model_type: llm -- metadata: {} - model_id: ${env.ENABLE_NVIDIA:=__disabled__}/meta/llama-3.1-8b-instruct - provider_id: ${env.ENABLE_NVIDIA:=__disabled__} - provider_model_id: meta/llama-3.1-8b-instruct - model_type: llm -- metadata: {} - model_id: ${env.ENABLE_NVIDIA:=__disabled__}/meta-llama/Llama-3.1-8B-Instruct - provider_id: ${env.ENABLE_NVIDIA:=__disabled__} - provider_model_id: meta/llama-3.1-8b-instruct - model_type: llm -- metadata: {} - model_id: ${env.ENABLE_NVIDIA:=__disabled__}/meta/llama-3.1-70b-instruct - provider_id: ${env.ENABLE_NVIDIA:=__disabled__} - provider_model_id: meta/llama-3.1-70b-instruct - model_type: llm -- metadata: {} - model_id: ${env.ENABLE_NVIDIA:=__disabled__}/meta-llama/Llama-3.1-70B-Instruct - provider_id: ${env.ENABLE_NVIDIA:=__disabled__} - provider_model_id: meta/llama-3.1-70b-instruct - model_type: llm -- metadata: {} - model_id: ${env.ENABLE_NVIDIA:=__disabled__}/meta/llama-3.1-405b-instruct - provider_id: ${env.ENABLE_NVIDIA:=__disabled__} - provider_model_id: meta/llama-3.1-405b-instruct - model_type: llm -- metadata: {} - model_id: ${env.ENABLE_NVIDIA:=__disabled__}/meta-llama/Llama-3.1-405B-Instruct-FP8 - provider_id: ${env.ENABLE_NVIDIA:=__disabled__} - provider_model_id: meta/llama-3.1-405b-instruct - model_type: llm -- metadata: {} - model_id: ${env.ENABLE_NVIDIA:=__disabled__}/meta/llama-3.2-1b-instruct - provider_id: ${env.ENABLE_NVIDIA:=__disabled__} - provider_model_id: meta/llama-3.2-1b-instruct - model_type: llm -- metadata: {} - model_id: ${env.ENABLE_NVIDIA:=__disabled__}/meta-llama/Llama-3.2-1B-Instruct - provider_id: ${env.ENABLE_NVIDIA:=__disabled__} - provider_model_id: meta/llama-3.2-1b-instruct - model_type: llm -- metadata: {} - model_id: ${env.ENABLE_NVIDIA:=__disabled__}/meta/llama-3.2-3b-instruct - provider_id: ${env.ENABLE_NVIDIA:=__disabled__} - provider_model_id: meta/llama-3.2-3b-instruct - model_type: llm -- metadata: {} - model_id: ${env.ENABLE_NVIDIA:=__disabled__}/meta-llama/Llama-3.2-3B-Instruct - provider_id: ${env.ENABLE_NVIDIA:=__disabled__} - provider_model_id: meta/llama-3.2-3b-instruct - model_type: llm -- metadata: {} - model_id: ${env.ENABLE_NVIDIA:=__disabled__}/meta/llama-3.2-11b-vision-instruct - provider_id: ${env.ENABLE_NVIDIA:=__disabled__} - provider_model_id: meta/llama-3.2-11b-vision-instruct - model_type: llm -- metadata: {} - model_id: ${env.ENABLE_NVIDIA:=__disabled__}/meta-llama/Llama-3.2-11B-Vision-Instruct - provider_id: ${env.ENABLE_NVIDIA:=__disabled__} - provider_model_id: meta/llama-3.2-11b-vision-instruct - model_type: llm -- metadata: {} - model_id: ${env.ENABLE_NVIDIA:=__disabled__}/meta/llama-3.2-90b-vision-instruct - provider_id: ${env.ENABLE_NVIDIA:=__disabled__} - provider_model_id: meta/llama-3.2-90b-vision-instruct - model_type: llm -- metadata: {} - model_id: ${env.ENABLE_NVIDIA:=__disabled__}/meta-llama/Llama-3.2-90B-Vision-Instruct - provider_id: ${env.ENABLE_NVIDIA:=__disabled__} - provider_model_id: meta/llama-3.2-90b-vision-instruct - model_type: llm -- metadata: {} - model_id: ${env.ENABLE_NVIDIA:=__disabled__}/meta/llama-3.3-70b-instruct - provider_id: ${env.ENABLE_NVIDIA:=__disabled__} - provider_model_id: meta/llama-3.3-70b-instruct - model_type: llm -- metadata: {} - model_id: ${env.ENABLE_NVIDIA:=__disabled__}/meta-llama/Llama-3.3-70B-Instruct - provider_id: ${env.ENABLE_NVIDIA:=__disabled__} - provider_model_id: meta/llama-3.3-70b-instruct - model_type: llm -- metadata: - embedding_dimension: 2048 - context_length: 8192 - model_id: ${env.ENABLE_NVIDIA:=__disabled__}/nvidia/llama-3.2-nv-embedqa-1b-v2 - provider_id: ${env.ENABLE_NVIDIA:=__disabled__} - provider_model_id: nvidia/llama-3.2-nv-embedqa-1b-v2 - model_type: embedding -- metadata: - embedding_dimension: 1024 - context_length: 512 - model_id: ${env.ENABLE_NVIDIA:=__disabled__}/nvidia/nv-embedqa-e5-v5 - provider_id: ${env.ENABLE_NVIDIA:=__disabled__} - provider_model_id: nvidia/nv-embedqa-e5-v5 - model_type: embedding -- metadata: - embedding_dimension: 4096 - context_length: 512 - model_id: ${env.ENABLE_NVIDIA:=__disabled__}/nvidia/nv-embedqa-mistral-7b-v2 - provider_id: ${env.ENABLE_NVIDIA:=__disabled__} - provider_model_id: nvidia/nv-embedqa-mistral-7b-v2 - model_type: embedding -- metadata: - embedding_dimension: 1024 - context_length: 512 - model_id: ${env.ENABLE_NVIDIA:=__disabled__}/snowflake/arctic-embed-l - provider_id: ${env.ENABLE_NVIDIA:=__disabled__} - provider_model_id: snowflake/arctic-embed-l - model_type: embedding -- metadata: {} - model_id: ${env.ENABLE_RUNPOD:=__disabled__}/Llama3.1-8B - provider_id: ${env.ENABLE_RUNPOD:=__disabled__} - provider_model_id: Llama3.1-8B - model_type: llm -- metadata: {} - model_id: ${env.ENABLE_RUNPOD:=__disabled__}/Llama3.1-70B - provider_id: ${env.ENABLE_RUNPOD:=__disabled__} - provider_model_id: Llama3.1-70B - model_type: llm -- metadata: {} - model_id: ${env.ENABLE_RUNPOD:=__disabled__}/Llama3.1-405B:bf16-mp8 - provider_id: ${env.ENABLE_RUNPOD:=__disabled__} - provider_model_id: Llama3.1-405B:bf16-mp8 - model_type: llm -- metadata: {} - model_id: ${env.ENABLE_RUNPOD:=__disabled__}/Llama3.1-405B - provider_id: ${env.ENABLE_RUNPOD:=__disabled__} - provider_model_id: Llama3.1-405B - model_type: llm -- metadata: {} - model_id: ${env.ENABLE_RUNPOD:=__disabled__}/Llama3.1-405B:bf16-mp16 - provider_id: ${env.ENABLE_RUNPOD:=__disabled__} - provider_model_id: Llama3.1-405B:bf16-mp16 - model_type: llm -- metadata: {} - model_id: ${env.ENABLE_RUNPOD:=__disabled__}/Llama3.1-8B-Instruct - provider_id: ${env.ENABLE_RUNPOD:=__disabled__} - provider_model_id: Llama3.1-8B-Instruct - model_type: llm -- metadata: {} - model_id: ${env.ENABLE_RUNPOD:=__disabled__}/Llama3.1-70B-Instruct - provider_id: ${env.ENABLE_RUNPOD:=__disabled__} - provider_model_id: Llama3.1-70B-Instruct - model_type: llm -- metadata: {} - model_id: ${env.ENABLE_RUNPOD:=__disabled__}/Llama3.1-405B-Instruct:bf16-mp8 - provider_id: ${env.ENABLE_RUNPOD:=__disabled__} - provider_model_id: Llama3.1-405B-Instruct:bf16-mp8 - model_type: llm -- metadata: {} - model_id: ${env.ENABLE_RUNPOD:=__disabled__}/Llama3.1-405B-Instruct - provider_id: ${env.ENABLE_RUNPOD:=__disabled__} - provider_model_id: Llama3.1-405B-Instruct - model_type: llm -- metadata: {} - model_id: ${env.ENABLE_RUNPOD:=__disabled__}/Llama3.1-405B-Instruct:bf16-mp16 - provider_id: ${env.ENABLE_RUNPOD:=__disabled__} - provider_model_id: Llama3.1-405B-Instruct:bf16-mp16 - model_type: llm -- metadata: {} - model_id: ${env.ENABLE_RUNPOD:=__disabled__}/Llama3.2-1B - provider_id: ${env.ENABLE_RUNPOD:=__disabled__} - provider_model_id: Llama3.2-1B - model_type: llm -- metadata: {} - model_id: ${env.ENABLE_RUNPOD:=__disabled__}/Llama3.2-3B - provider_id: ${env.ENABLE_RUNPOD:=__disabled__} - provider_model_id: Llama3.2-3B - model_type: llm -- metadata: {} - model_id: ${env.ENABLE_OPENAI:=__disabled__}/openai/gpt-4o - provider_id: ${env.ENABLE_OPENAI:=__disabled__} + model_id: openai/gpt-4o + provider_id: openai provider_model_id: openai/gpt-4o model_type: llm - metadata: {} - model_id: ${env.ENABLE_OPENAI:=__disabled__}/openai/gpt-4o-mini - provider_id: ${env.ENABLE_OPENAI:=__disabled__} + model_id: openai/gpt-4o-mini + provider_id: openai provider_model_id: openai/gpt-4o-mini model_type: llm - metadata: {} - model_id: ${env.ENABLE_OPENAI:=__disabled__}/openai/chatgpt-4o-latest - provider_id: ${env.ENABLE_OPENAI:=__disabled__} + model_id: openai/chatgpt-4o-latest + provider_id: openai provider_model_id: openai/chatgpt-4o-latest model_type: llm - metadata: {} - model_id: ${env.ENABLE_OPENAI:=__disabled__}/gpt-3.5-turbo-0125 - provider_id: ${env.ENABLE_OPENAI:=__disabled__} + model_id: openai/gpt-3.5-turbo-0125 + provider_id: openai provider_model_id: gpt-3.5-turbo-0125 model_type: llm - metadata: {} - model_id: ${env.ENABLE_OPENAI:=__disabled__}/gpt-3.5-turbo - provider_id: ${env.ENABLE_OPENAI:=__disabled__} + model_id: openai/gpt-3.5-turbo + provider_id: openai provider_model_id: gpt-3.5-turbo model_type: llm - metadata: {} - model_id: ${env.ENABLE_OPENAI:=__disabled__}/gpt-3.5-turbo-instruct - provider_id: ${env.ENABLE_OPENAI:=__disabled__} + model_id: openai/gpt-3.5-turbo-instruct + provider_id: openai provider_model_id: gpt-3.5-turbo-instruct model_type: llm - metadata: {} - model_id: ${env.ENABLE_OPENAI:=__disabled__}/gpt-4 - provider_id: ${env.ENABLE_OPENAI:=__disabled__} + model_id: openai/gpt-4 + provider_id: openai provider_model_id: gpt-4 model_type: llm - metadata: {} - model_id: ${env.ENABLE_OPENAI:=__disabled__}/gpt-4-turbo - provider_id: ${env.ENABLE_OPENAI:=__disabled__} + model_id: openai/gpt-4-turbo + provider_id: openai provider_model_id: gpt-4-turbo model_type: llm - metadata: {} - model_id: ${env.ENABLE_OPENAI:=__disabled__}/gpt-4o - provider_id: ${env.ENABLE_OPENAI:=__disabled__} + model_id: openai/gpt-4o + provider_id: openai provider_model_id: gpt-4o model_type: llm - metadata: {} - model_id: ${env.ENABLE_OPENAI:=__disabled__}/gpt-4o-2024-08-06 - provider_id: ${env.ENABLE_OPENAI:=__disabled__} + model_id: openai/gpt-4o-2024-08-06 + provider_id: openai provider_model_id: gpt-4o-2024-08-06 model_type: llm - metadata: {} - model_id: ${env.ENABLE_OPENAI:=__disabled__}/gpt-4o-mini - provider_id: ${env.ENABLE_OPENAI:=__disabled__} + model_id: openai/gpt-4o-mini + provider_id: openai provider_model_id: gpt-4o-mini model_type: llm - metadata: {} - model_id: ${env.ENABLE_OPENAI:=__disabled__}/gpt-4o-audio-preview - provider_id: ${env.ENABLE_OPENAI:=__disabled__} + model_id: openai/gpt-4o-audio-preview + provider_id: openai provider_model_id: gpt-4o-audio-preview model_type: llm - metadata: {} - model_id: ${env.ENABLE_OPENAI:=__disabled__}/chatgpt-4o-latest - provider_id: ${env.ENABLE_OPENAI:=__disabled__} + model_id: openai/chatgpt-4o-latest + provider_id: openai provider_model_id: chatgpt-4o-latest model_type: llm - metadata: {} - model_id: ${env.ENABLE_OPENAI:=__disabled__}/o1 - provider_id: ${env.ENABLE_OPENAI:=__disabled__} + model_id: openai/o1 + provider_id: openai provider_model_id: o1 model_type: llm - metadata: {} - model_id: ${env.ENABLE_OPENAI:=__disabled__}/o1-mini - provider_id: ${env.ENABLE_OPENAI:=__disabled__} + model_id: openai/o1-mini + provider_id: openai provider_model_id: o1-mini model_type: llm - metadata: {} - model_id: ${env.ENABLE_OPENAI:=__disabled__}/o3-mini - provider_id: ${env.ENABLE_OPENAI:=__disabled__} + model_id: openai/o3-mini + provider_id: openai provider_model_id: o3-mini model_type: llm - metadata: {} - model_id: ${env.ENABLE_OPENAI:=__disabled__}/o4-mini - provider_id: ${env.ENABLE_OPENAI:=__disabled__} + model_id: openai/o4-mini + provider_id: openai provider_model_id: o4-mini model_type: llm - metadata: embedding_dimension: 1536 context_length: 8192 - model_id: ${env.ENABLE_OPENAI:=__disabled__}/openai/text-embedding-3-small - provider_id: ${env.ENABLE_OPENAI:=__disabled__} + model_id: openai/text-embedding-3-small + provider_id: openai provider_model_id: openai/text-embedding-3-small model_type: embedding - metadata: embedding_dimension: 3072 context_length: 8192 - model_id: ${env.ENABLE_OPENAI:=__disabled__}/openai/text-embedding-3-large - provider_id: ${env.ENABLE_OPENAI:=__disabled__} + model_id: openai/text-embedding-3-large + provider_id: openai provider_model_id: openai/text-embedding-3-large model_type: embedding - metadata: embedding_dimension: 1536 context_length: 8192 - model_id: ${env.ENABLE_OPENAI:=__disabled__}/text-embedding-3-small - provider_id: ${env.ENABLE_OPENAI:=__disabled__} + model_id: openai/text-embedding-3-small + provider_id: openai provider_model_id: text-embedding-3-small model_type: embedding - metadata: embedding_dimension: 3072 context_length: 8192 - model_id: ${env.ENABLE_OPENAI:=__disabled__}/text-embedding-3-large - provider_id: ${env.ENABLE_OPENAI:=__disabled__} + model_id: openai/text-embedding-3-large + provider_id: openai provider_model_id: text-embedding-3-large model_type: embedding - metadata: {} - model_id: ${env.ENABLE_ANTHROPIC:=__disabled__}/anthropic/claude-3-5-sonnet-latest - provider_id: ${env.ENABLE_ANTHROPIC:=__disabled__} + model_id: anthropic/claude-3-5-sonnet-latest + provider_id: anthropic provider_model_id: anthropic/claude-3-5-sonnet-latest model_type: llm - metadata: {} - model_id: ${env.ENABLE_ANTHROPIC:=__disabled__}/anthropic/claude-3-7-sonnet-latest - provider_id: ${env.ENABLE_ANTHROPIC:=__disabled__} + model_id: anthropic/claude-3-7-sonnet-latest + provider_id: anthropic provider_model_id: anthropic/claude-3-7-sonnet-latest model_type: llm - metadata: {} - model_id: ${env.ENABLE_ANTHROPIC:=__disabled__}/anthropic/claude-3-5-haiku-latest - provider_id: ${env.ENABLE_ANTHROPIC:=__disabled__} + model_id: anthropic/claude-3-5-haiku-latest + provider_id: anthropic provider_model_id: anthropic/claude-3-5-haiku-latest model_type: llm - metadata: embedding_dimension: 1024 context_length: 32000 - model_id: ${env.ENABLE_ANTHROPIC:=__disabled__}/anthropic/voyage-3 - provider_id: ${env.ENABLE_ANTHROPIC:=__disabled__} + model_id: anthropic/voyage-3 + provider_id: anthropic provider_model_id: anthropic/voyage-3 model_type: embedding - metadata: embedding_dimension: 512 context_length: 32000 - model_id: ${env.ENABLE_ANTHROPIC:=__disabled__}/anthropic/voyage-3-lite - provider_id: ${env.ENABLE_ANTHROPIC:=__disabled__} + model_id: anthropic/voyage-3-lite + provider_id: anthropic provider_model_id: anthropic/voyage-3-lite model_type: embedding - metadata: embedding_dimension: 1024 context_length: 32000 - model_id: ${env.ENABLE_ANTHROPIC:=__disabled__}/anthropic/voyage-code-3 - provider_id: ${env.ENABLE_ANTHROPIC:=__disabled__} + model_id: anthropic/voyage-code-3 + provider_id: anthropic provider_model_id: anthropic/voyage-code-3 model_type: embedding - metadata: {} - model_id: ${env.ENABLE_GEMINI:=__disabled__}/gemini/gemini-1.5-flash - provider_id: ${env.ENABLE_GEMINI:=__disabled__} + model_id: gemini/gemini-1.5-flash + provider_id: gemini provider_model_id: gemini/gemini-1.5-flash model_type: llm - metadata: {} - model_id: ${env.ENABLE_GEMINI:=__disabled__}/gemini/gemini-1.5-pro - provider_id: ${env.ENABLE_GEMINI:=__disabled__} + model_id: gemini/gemini-1.5-pro + provider_id: gemini provider_model_id: gemini/gemini-1.5-pro model_type: llm - metadata: {} - model_id: ${env.ENABLE_GEMINI:=__disabled__}/gemini/gemini-2.0-flash - provider_id: ${env.ENABLE_GEMINI:=__disabled__} + model_id: gemini/gemini-2.0-flash + provider_id: gemini provider_model_id: gemini/gemini-2.0-flash model_type: llm - metadata: {} - model_id: ${env.ENABLE_GEMINI:=__disabled__}/gemini/gemini-2.5-flash - provider_id: ${env.ENABLE_GEMINI:=__disabled__} + model_id: gemini/gemini-2.5-flash + provider_id: gemini provider_model_id: gemini/gemini-2.5-flash model_type: llm - metadata: {} - model_id: ${env.ENABLE_GEMINI:=__disabled__}/gemini/gemini-2.5-pro - provider_id: ${env.ENABLE_GEMINI:=__disabled__} + model_id: gemini/gemini-2.5-pro + provider_id: gemini provider_model_id: gemini/gemini-2.5-pro model_type: llm - metadata: embedding_dimension: 768 context_length: 2048 - model_id: ${env.ENABLE_GEMINI:=__disabled__}/gemini/text-embedding-004 - provider_id: ${env.ENABLE_GEMINI:=__disabled__} + model_id: gemini/text-embedding-004 + provider_id: gemini provider_model_id: gemini/text-embedding-004 model_type: embedding - metadata: {} - model_id: ${env.ENABLE_GROQ:=__disabled__}/groq/llama3-8b-8192 - provider_id: ${env.ENABLE_GROQ:=__disabled__} + model_id: groq/llama3-8b-8192 + provider_id: groq provider_model_id: groq/llama3-8b-8192 model_type: llm - metadata: {} - model_id: ${env.ENABLE_GROQ:=__disabled__}/meta-llama/Llama-3.1-8B-Instruct - provider_id: ${env.ENABLE_GROQ:=__disabled__} + model_id: groq/meta-llama/Llama-3.1-8B-Instruct + provider_id: groq provider_model_id: groq/llama3-8b-8192 model_type: llm - metadata: {} - model_id: ${env.ENABLE_GROQ:=__disabled__}/groq/llama-3.1-8b-instant - provider_id: ${env.ENABLE_GROQ:=__disabled__} + model_id: groq/llama-3.1-8b-instant + provider_id: groq provider_model_id: groq/llama-3.1-8b-instant model_type: llm - metadata: {} - model_id: ${env.ENABLE_GROQ:=__disabled__}/groq/llama3-70b-8192 - provider_id: ${env.ENABLE_GROQ:=__disabled__} + model_id: groq/llama3-70b-8192 + provider_id: groq provider_model_id: groq/llama3-70b-8192 model_type: llm - metadata: {} - model_id: ${env.ENABLE_GROQ:=__disabled__}/meta-llama/Llama-3-70B-Instruct - provider_id: ${env.ENABLE_GROQ:=__disabled__} + model_id: groq/meta-llama/Llama-3-70B-Instruct + provider_id: groq provider_model_id: groq/llama3-70b-8192 model_type: llm - metadata: {} - model_id: ${env.ENABLE_GROQ:=__disabled__}/groq/llama-3.3-70b-versatile - provider_id: ${env.ENABLE_GROQ:=__disabled__} + model_id: groq/llama-3.3-70b-versatile + provider_id: groq provider_model_id: groq/llama-3.3-70b-versatile model_type: llm - metadata: {} - model_id: ${env.ENABLE_GROQ:=__disabled__}/meta-llama/Llama-3.3-70B-Instruct - provider_id: ${env.ENABLE_GROQ:=__disabled__} + model_id: groq/meta-llama/Llama-3.3-70B-Instruct + provider_id: groq provider_model_id: groq/llama-3.3-70b-versatile model_type: llm - metadata: {} - model_id: ${env.ENABLE_GROQ:=__disabled__}/groq/llama-3.2-3b-preview - provider_id: ${env.ENABLE_GROQ:=__disabled__} + model_id: groq/llama-3.2-3b-preview + provider_id: groq provider_model_id: groq/llama-3.2-3b-preview model_type: llm - metadata: {} - model_id: ${env.ENABLE_GROQ:=__disabled__}/meta-llama/Llama-3.2-3B-Instruct - provider_id: ${env.ENABLE_GROQ:=__disabled__} + model_id: groq/meta-llama/Llama-3.2-3B-Instruct + provider_id: groq provider_model_id: groq/llama-3.2-3b-preview model_type: llm - metadata: {} - model_id: ${env.ENABLE_GROQ:=__disabled__}/groq/llama-4-scout-17b-16e-instruct - provider_id: ${env.ENABLE_GROQ:=__disabled__} + model_id: groq/llama-4-scout-17b-16e-instruct + provider_id: groq provider_model_id: groq/llama-4-scout-17b-16e-instruct model_type: llm - metadata: {} - model_id: ${env.ENABLE_GROQ:=__disabled__}/meta-llama/Llama-4-Scout-17B-16E-Instruct - provider_id: ${env.ENABLE_GROQ:=__disabled__} + model_id: groq/meta-llama/Llama-4-Scout-17B-16E-Instruct + provider_id: groq provider_model_id: groq/llama-4-scout-17b-16e-instruct model_type: llm - metadata: {} - model_id: ${env.ENABLE_GROQ:=__disabled__}/groq/meta-llama/llama-4-scout-17b-16e-instruct - provider_id: ${env.ENABLE_GROQ:=__disabled__} + model_id: groq/meta-llama/llama-4-scout-17b-16e-instruct + provider_id: groq provider_model_id: groq/meta-llama/llama-4-scout-17b-16e-instruct model_type: llm - metadata: {} - model_id: ${env.ENABLE_GROQ:=__disabled__}/meta-llama/Llama-4-Scout-17B-16E-Instruct - provider_id: ${env.ENABLE_GROQ:=__disabled__} + model_id: groq/meta-llama/Llama-4-Scout-17B-16E-Instruct + provider_id: groq provider_model_id: groq/meta-llama/llama-4-scout-17b-16e-instruct model_type: llm - metadata: {} - model_id: ${env.ENABLE_GROQ:=__disabled__}/groq/llama-4-maverick-17b-128e-instruct - provider_id: ${env.ENABLE_GROQ:=__disabled__} + model_id: groq/llama-4-maverick-17b-128e-instruct + provider_id: groq provider_model_id: groq/llama-4-maverick-17b-128e-instruct model_type: llm - metadata: {} - model_id: ${env.ENABLE_GROQ:=__disabled__}/meta-llama/Llama-4-Maverick-17B-128E-Instruct - provider_id: ${env.ENABLE_GROQ:=__disabled__} + model_id: groq/meta-llama/Llama-4-Maverick-17B-128E-Instruct + provider_id: groq provider_model_id: groq/llama-4-maverick-17b-128e-instruct model_type: llm - metadata: {} - model_id: ${env.ENABLE_GROQ:=__disabled__}/groq/meta-llama/llama-4-maverick-17b-128e-instruct - provider_id: ${env.ENABLE_GROQ:=__disabled__} + model_id: groq/meta-llama/llama-4-maverick-17b-128e-instruct + provider_id: groq provider_model_id: groq/meta-llama/llama-4-maverick-17b-128e-instruct model_type: llm - metadata: {} - model_id: ${env.ENABLE_GROQ:=__disabled__}/meta-llama/Llama-4-Maverick-17B-128E-Instruct - provider_id: ${env.ENABLE_GROQ:=__disabled__} + model_id: groq/meta-llama/Llama-4-Maverick-17B-128E-Instruct + provider_id: groq provider_model_id: groq/meta-llama/llama-4-maverick-17b-128e-instruct model_type: llm - metadata: {} - model_id: ${env.ENABLE_SAMBANOVA:=__disabled__}/sambanova/Meta-Llama-3.1-8B-Instruct - provider_id: ${env.ENABLE_SAMBANOVA:=__disabled__} + model_id: sambanova/Meta-Llama-3.1-8B-Instruct + provider_id: sambanova provider_model_id: sambanova/Meta-Llama-3.1-8B-Instruct model_type: llm - metadata: {} - model_id: ${env.ENABLE_SAMBANOVA:=__disabled__}/meta-llama/Llama-3.1-8B-Instruct - provider_id: ${env.ENABLE_SAMBANOVA:=__disabled__} + model_id: sambanova/meta-llama/Llama-3.1-8B-Instruct + provider_id: sambanova provider_model_id: sambanova/Meta-Llama-3.1-8B-Instruct model_type: llm - metadata: {} - model_id: ${env.ENABLE_SAMBANOVA:=__disabled__}/sambanova/Meta-Llama-3.1-405B-Instruct - provider_id: ${env.ENABLE_SAMBANOVA:=__disabled__} + model_id: sambanova/Meta-Llama-3.1-405B-Instruct + provider_id: sambanova provider_model_id: sambanova/Meta-Llama-3.1-405B-Instruct model_type: llm - metadata: {} - model_id: ${env.ENABLE_SAMBANOVA:=__disabled__}/meta-llama/Llama-3.1-405B-Instruct-FP8 - provider_id: ${env.ENABLE_SAMBANOVA:=__disabled__} + model_id: sambanova/meta-llama/Llama-3.1-405B-Instruct-FP8 + provider_id: sambanova provider_model_id: sambanova/Meta-Llama-3.1-405B-Instruct model_type: llm - metadata: {} - model_id: ${env.ENABLE_SAMBANOVA:=__disabled__}/sambanova/Meta-Llama-3.2-1B-Instruct - provider_id: ${env.ENABLE_SAMBANOVA:=__disabled__} + model_id: sambanova/Meta-Llama-3.2-1B-Instruct + provider_id: sambanova provider_model_id: sambanova/Meta-Llama-3.2-1B-Instruct model_type: llm - metadata: {} - model_id: ${env.ENABLE_SAMBANOVA:=__disabled__}/meta-llama/Llama-3.2-1B-Instruct - provider_id: ${env.ENABLE_SAMBANOVA:=__disabled__} + model_id: sambanova/meta-llama/Llama-3.2-1B-Instruct + provider_id: sambanova provider_model_id: sambanova/Meta-Llama-3.2-1B-Instruct model_type: llm - metadata: {} - model_id: ${env.ENABLE_SAMBANOVA:=__disabled__}/sambanova/Meta-Llama-3.2-3B-Instruct - provider_id: ${env.ENABLE_SAMBANOVA:=__disabled__} + model_id: sambanova/Meta-Llama-3.2-3B-Instruct + provider_id: sambanova provider_model_id: sambanova/Meta-Llama-3.2-3B-Instruct model_type: llm - metadata: {} - model_id: ${env.ENABLE_SAMBANOVA:=__disabled__}/meta-llama/Llama-3.2-3B-Instruct - provider_id: ${env.ENABLE_SAMBANOVA:=__disabled__} + model_id: sambanova/meta-llama/Llama-3.2-3B-Instruct + provider_id: sambanova provider_model_id: sambanova/Meta-Llama-3.2-3B-Instruct model_type: llm - metadata: {} - model_id: ${env.ENABLE_SAMBANOVA:=__disabled__}/sambanova/Meta-Llama-3.3-70B-Instruct - provider_id: ${env.ENABLE_SAMBANOVA:=__disabled__} + model_id: sambanova/Meta-Llama-3.3-70B-Instruct + provider_id: sambanova provider_model_id: sambanova/Meta-Llama-3.3-70B-Instruct model_type: llm - metadata: {} - model_id: ${env.ENABLE_SAMBANOVA:=__disabled__}/meta-llama/Llama-3.3-70B-Instruct - provider_id: ${env.ENABLE_SAMBANOVA:=__disabled__} + model_id: sambanova/meta-llama/Llama-3.3-70B-Instruct + provider_id: sambanova provider_model_id: sambanova/Meta-Llama-3.3-70B-Instruct model_type: llm - metadata: {} - model_id: ${env.ENABLE_SAMBANOVA:=__disabled__}/sambanova/Llama-3.2-11B-Vision-Instruct - provider_id: ${env.ENABLE_SAMBANOVA:=__disabled__} + model_id: sambanova/Llama-3.2-11B-Vision-Instruct + provider_id: sambanova provider_model_id: sambanova/Llama-3.2-11B-Vision-Instruct model_type: llm - metadata: {} - model_id: ${env.ENABLE_SAMBANOVA:=__disabled__}/meta-llama/Llama-3.2-11B-Vision-Instruct - provider_id: ${env.ENABLE_SAMBANOVA:=__disabled__} + model_id: sambanova/meta-llama/Llama-3.2-11B-Vision-Instruct + provider_id: sambanova provider_model_id: sambanova/Llama-3.2-11B-Vision-Instruct model_type: llm - metadata: {} - model_id: ${env.ENABLE_SAMBANOVA:=__disabled__}/sambanova/Llama-3.2-90B-Vision-Instruct - provider_id: ${env.ENABLE_SAMBANOVA:=__disabled__} + model_id: sambanova/Llama-3.2-90B-Vision-Instruct + provider_id: sambanova provider_model_id: sambanova/Llama-3.2-90B-Vision-Instruct model_type: llm - metadata: {} - model_id: ${env.ENABLE_SAMBANOVA:=__disabled__}/meta-llama/Llama-3.2-90B-Vision-Instruct - provider_id: ${env.ENABLE_SAMBANOVA:=__disabled__} + model_id: sambanova/meta-llama/Llama-3.2-90B-Vision-Instruct + provider_id: sambanova provider_model_id: sambanova/Llama-3.2-90B-Vision-Instruct model_type: llm - metadata: {} - model_id: ${env.ENABLE_SAMBANOVA:=__disabled__}/sambanova/Llama-4-Scout-17B-16E-Instruct - provider_id: ${env.ENABLE_SAMBANOVA:=__disabled__} + model_id: sambanova/Llama-4-Scout-17B-16E-Instruct + provider_id: sambanova provider_model_id: sambanova/Llama-4-Scout-17B-16E-Instruct model_type: llm - metadata: {} - model_id: ${env.ENABLE_SAMBANOVA:=__disabled__}/meta-llama/Llama-4-Scout-17B-16E-Instruct - provider_id: ${env.ENABLE_SAMBANOVA:=__disabled__} + model_id: sambanova/meta-llama/Llama-4-Scout-17B-16E-Instruct + provider_id: sambanova provider_model_id: sambanova/Llama-4-Scout-17B-16E-Instruct model_type: llm - metadata: {} - model_id: ${env.ENABLE_SAMBANOVA:=__disabled__}/sambanova/Llama-4-Maverick-17B-128E-Instruct - provider_id: ${env.ENABLE_SAMBANOVA:=__disabled__} + model_id: sambanova/Llama-4-Maverick-17B-128E-Instruct + provider_id: sambanova provider_model_id: sambanova/Llama-4-Maverick-17B-128E-Instruct model_type: llm - metadata: {} - model_id: ${env.ENABLE_SAMBANOVA:=__disabled__}/meta-llama/Llama-4-Maverick-17B-128E-Instruct - provider_id: ${env.ENABLE_SAMBANOVA:=__disabled__} + model_id: sambanova/meta-llama/Llama-4-Maverick-17B-128E-Instruct + provider_id: sambanova provider_model_id: sambanova/Llama-4-Maverick-17B-128E-Instruct model_type: llm - metadata: {} - model_id: ${env.ENABLE_SAMBANOVA:=__disabled__}/sambanova/Meta-Llama-Guard-3-8B - provider_id: ${env.ENABLE_SAMBANOVA:=__disabled__} + model_id: sambanova/Meta-Llama-Guard-3-8B + provider_id: sambanova provider_model_id: sambanova/Meta-Llama-Guard-3-8B model_type: llm - metadata: {} - model_id: ${env.ENABLE_SAMBANOVA:=__disabled__}/meta-llama/Llama-Guard-3-8B - provider_id: ${env.ENABLE_SAMBANOVA:=__disabled__} + model_id: sambanova/meta-llama/Llama-Guard-3-8B + provider_id: sambanova provider_model_id: sambanova/Meta-Llama-Guard-3-8B model_type: llm - metadata: @@ -1168,24 +862,24 @@ models: provider_id: ${env.ENABLE_SENTENCE_TRANSFORMERS:=sentence-transformers} model_type: embedding shields: -- shield_id: ${env.ENABLE_OLLAMA:=__disabled__} +- shield_id: ollama provider_id: llama-guard - provider_shield_id: ${env.ENABLE_OLLAMA:=__disabled__}/${env.SAFETY_MODEL:=llama-guard3:1b} -- shield_id: ${env.ENABLE_FIREWORKS:=__disabled__} + provider_shield_id: ollama/${env.SAFETY_MODEL:=llama-guard3:1b} +- shield_id: fireworks provider_id: llama-guard - provider_shield_id: ${env.ENABLE_FIREWORKS:=__disabled__}/${env.SAFETY_MODEL:=accounts/fireworks/models/llama-guard-3-8b} -- shield_id: ${env.ENABLE_FIREWORKS:=__disabled__} + provider_shield_id: fireworks/${env.SAFETY_MODEL:=accounts/fireworks/models/llama-guard-3-8b} +- shield_id: fireworks provider_id: llama-guard - provider_shield_id: ${env.ENABLE_FIREWORKS:=__disabled__}/${env.SAFETY_MODEL:=accounts/fireworks/models/llama-guard-3-11b-vision} -- shield_id: ${env.ENABLE_TOGETHER:=__disabled__} + provider_shield_id: fireworks/${env.SAFETY_MODEL:=accounts/fireworks/models/llama-guard-3-11b-vision} +- shield_id: together provider_id: llama-guard - provider_shield_id: ${env.ENABLE_TOGETHER:=__disabled__}/${env.SAFETY_MODEL:=meta-llama/Llama-Guard-3-8B} -- shield_id: ${env.ENABLE_TOGETHER:=__disabled__} + provider_shield_id: together/${env.SAFETY_MODEL:=meta-llama/Llama-Guard-3-8B} +- shield_id: together provider_id: llama-guard - provider_shield_id: ${env.ENABLE_TOGETHER:=__disabled__}/${env.SAFETY_MODEL:=meta-llama/Llama-Guard-3-11B-Vision-Turbo} -- shield_id: ${env.ENABLE_SAMBANOVA:=__disabled__} + provider_shield_id: together/${env.SAFETY_MODEL:=meta-llama/Llama-Guard-3-11B-Vision-Turbo} +- shield_id: sambanova provider_id: llama-guard - provider_shield_id: ${env.ENABLE_SAMBANOVA:=__disabled__}/${env.SAFETY_MODEL:=sambanova/Meta-Llama-Guard-3-8B} + provider_shield_id: sambanova/${env.SAFETY_MODEL:=sambanova/Meta-Llama-Guard-3-8B} vector_dbs: [] datasets: [] scoring_fns: [] diff --git a/llama_stack/templates/starter/starter.py b/llama_stack/templates/starter/starter.py index 6b8aa8974..d7218b26e 100644 --- a/llama_stack/templates/starter/starter.py +++ b/llama_stack/templates/starter/starter.py @@ -113,6 +113,19 @@ from llama_stack.templates.template import ( get_model_registry, ) +REMOTE_INFERENCE_PROVIDERS_FOR_STARTER = { + "anthropic", + "cerebras", + "fireworks", + "gemini", + "groq", + "ollama", + "openai", + "sambanova", + "together", + "vllm", +} + def _get_model_entries_for_provider(provider_type: str) -> list[ProviderModelEntry]: """Get model entries for a specific provider type.""" @@ -207,41 +220,27 @@ def get_remote_inference_providers() -> tuple[list[Provider], dict[str, list[Pro remote_providers = [ provider for provider in all_providers - # TODO: re-add once the Python 3.13 issue is fixed - # discussion: https://github.com/meta-llama/llama-stack/pull/2327#discussion_r2156883828 - if hasattr(provider, "adapter") and provider.adapter.adapter_type != "watsonx" + if hasattr(provider, "adapter") and provider.adapter.adapter_type in REMOTE_INFERENCE_PROVIDERS_FOR_STARTER ] - providers = [] + inference_providers = [] available_models = {} for provider_spec in remote_providers: provider_type = provider_spec.adapter.adapter_type - # Build the environment variable name for enabling this provider - env_var = f"ENABLE_{provider_type.upper().replace('-', '_').replace('::', '_')}" model_entries = _get_model_entries_for_provider(provider_type) config = _get_config_for_provider(provider_spec) - providers.append( - ( - f"${{env.{env_var}:=__disabled__}}", - provider_type, - model_entries, - config, - ) - ) - available_models[f"${{env.{env_var}:=__disabled__}}"] = model_entries - inference_providers = [] - for provider_id, provider_type, model_entries, config in providers: inference_providers.append( Provider( - provider_id=provider_id, + provider_id=provider_type, provider_type=f"remote::{provider_type}", config=config, ) ) - available_models[provider_id] = model_entries + available_models[provider_type] = model_entries + return inference_providers, available_models