diff --git a/.github/workflows/integration-tests.yml b/.github/workflows/integration-tests.yml index c7b7fc55b..62352520f 100644 --- a/.github/workflows/integration-tests.yml +++ b/.github/workflows/integration-tests.yml @@ -117,17 +117,13 @@ jobs: EXCLUDE_TESTS="builtin_tool or safety_with_image or code_interpreter or test_rag" if [ "${{ matrix.provider }}" == "ollama" ]; then - export ENABLE_OLLAMA="ollama" export OLLAMA_URL="http://0.0.0.0:11434" - export OLLAMA_INFERENCE_MODEL="llama3.2:3b-instruct-fp16" - export TEXT_MODEL=ollama/$OLLAMA_INFERENCE_MODEL + export TEXT_MODEL=ollama/llama3.2:3b-instruct-fp16 export SAFETY_MODEL="llama-guard3:1b" EXTRA_PARAMS="--safety-shield=$SAFETY_MODEL" else - export ENABLE_VLLM="vllm" export VLLM_URL="http://localhost:8000/v1" - export VLLM_INFERENCE_MODEL="meta-llama/Llama-3.2-1B-Instruct" - export TEXT_MODEL=vllm/$VLLM_INFERENCE_MODEL + export TEXT_MODEL=vllm/meta-llama/Llama-3.2-1B-Instruct # TODO: remove the not(test_inference_store_tool_calls) once we can get the tool called consistently EXTRA_PARAMS= EXCLUDE_TESTS="${EXCLUDE_TESTS} or test_inference_store_tool_calls" diff --git a/docs/quick_start.ipynb b/docs/quick_start.ipynb index 91cfb569c..482815aa5 100644 --- a/docs/quick_start.ipynb +++ b/docs/quick_start.ipynb @@ -249,12 +249,6 @@ ], "source": [ "from llama_stack_client import Agent, AgentEventLogger, RAGDocument, LlamaStackClient\n", - "import os\n", - "\n", - "os.environ[\"ENABLE_OLLAMA\"] = \"ollama\"\n", - "os.environ[\"OLLAMA_INFERENCE_MODEL\"] = \"llama3.2:3b\"\n", - "os.environ[\"OLLAMA_EMBEDDING_MODEL\"] = \"all-minilm:l6-v2\"\n", - "os.environ[\"OLLAMA_EMBEDDING_DIMENSION\"] = \"384\"\n", "\n", "vector_db_id = \"my_demo_vector_db\"\n", "client = LlamaStackClient(base_url=\"http://0.0.0.0:8321\")\n", diff --git a/docs/source/distributions/self_hosted_distro/nvidia.md b/docs/source/distributions/self_hosted_distro/nvidia.md index 928be15ad..aeb14e6a6 100644 --- a/docs/source/distributions/self_hosted_distro/nvidia.md +++ b/docs/source/distributions/self_hosted_distro/nvidia.md @@ -40,16 +40,16 @@ The following environment variables can be configured: The following models are available by default: -- `meta/llama3-8b-instruct (aliases: meta-llama/Llama-3-8B-Instruct)` -- `meta/llama3-70b-instruct (aliases: meta-llama/Llama-3-70B-Instruct)` -- `meta/llama-3.1-8b-instruct (aliases: meta-llama/Llama-3.1-8B-Instruct)` -- `meta/llama-3.1-70b-instruct (aliases: meta-llama/Llama-3.1-70B-Instruct)` -- `meta/llama-3.1-405b-instruct (aliases: meta-llama/Llama-3.1-405B-Instruct-FP8)` -- `meta/llama-3.2-1b-instruct (aliases: meta-llama/Llama-3.2-1B-Instruct)` -- `meta/llama-3.2-3b-instruct (aliases: meta-llama/Llama-3.2-3B-Instruct)` -- `meta/llama-3.2-11b-vision-instruct (aliases: meta-llama/Llama-3.2-11B-Vision-Instruct)` -- `meta/llama-3.2-90b-vision-instruct (aliases: meta-llama/Llama-3.2-90B-Vision-Instruct)` -- `meta/llama-3.3-70b-instruct (aliases: meta-llama/Llama-3.3-70B-Instruct)` +- `meta/llama3-8b-instruct ` +- `meta/llama3-70b-instruct ` +- `meta/llama-3.1-8b-instruct ` +- `meta/llama-3.1-70b-instruct ` +- `meta/llama-3.1-405b-instruct ` +- `meta/llama-3.2-1b-instruct ` +- `meta/llama-3.2-3b-instruct ` +- `meta/llama-3.2-11b-vision-instruct ` +- `meta/llama-3.2-90b-vision-instruct ` +- `meta/llama-3.3-70b-instruct ` - `nvidia/llama-3.2-nv-embedqa-1b-v2 ` - `nvidia/nv-embedqa-e5-v5 ` - `nvidia/nv-embedqa-mistral-7b-v2 ` diff --git a/docs/source/distributions/self_hosted_distro/starter.md b/docs/source/distributions/self_hosted_distro/starter.md index 56cdd5e73..58a3e4411 100644 --- a/docs/source/distributions/self_hosted_distro/starter.md +++ b/docs/source/distributions/self_hosted_distro/starter.md @@ -158,7 +158,7 @@ export ENABLE_PGVECTOR=__disabled__ The starter distribution uses several patterns for provider IDs: 1. **Direct provider IDs**: `faiss`, `ollama`, `vllm` -2. **Environment-based provider IDs**: `${env.ENABLE_SQLITE_VEC+sqlite-vec}` +2. **Environment-based provider IDs**: `${env.ENABLE_SQLITE_VEC:+sqlite-vec}` 3. **Model-based provider IDs**: `${env.OLLAMA_INFERENCE_MODEL:__disabled__}` When using the `+` pattern (like `${env.ENABLE_SQLITE_VEC+sqlite-vec}`), the provider is enabled by default and can be disabled by setting the environment variable to `__disabled__`. diff --git a/docs/source/getting_started/detailed_tutorial.md b/docs/source/getting_started/detailed_tutorial.md index 7ceae9072..c6589e758 100644 --- a/docs/source/getting_started/detailed_tutorial.md +++ b/docs/source/getting_started/detailed_tutorial.md @@ -59,7 +59,7 @@ Now let's build and run the Llama Stack config for Ollama. We use `starter` as template. By default all providers are disabled, this requires enable ollama by passing environment variables. ```bash -ENABLE_OLLAMA=ollama OLLAMA_INFERENCE_MODEL="llama3.2:3b" llama stack build --template starter --image-type venv --run +llama stack build --template starter --image-type venv --run ``` ::: :::{tab-item} Using `conda` @@ -70,7 +70,7 @@ which defines the providers and their settings. Now let's build and run the Llama Stack config for Ollama. ```bash -ENABLE_OLLAMA=ollama INFERENCE_MODEL="llama3.2:3b" llama stack build --template starter --image-type conda --run +llama stack build --template starter --image-type conda --run ``` ::: :::{tab-item} Using a Container @@ -80,8 +80,6 @@ component that works with different inference providers out of the box. For this configurations, please check out [this guide](../distributions/building_distro.md). First lets setup some environment variables and create a local directory to mount into the container’s file system. ```bash -export INFERENCE_MODEL="llama3.2:3b" -export ENABLE_OLLAMA=ollama export LLAMA_STACK_PORT=8321 mkdir -p ~/.llama ``` @@ -94,7 +92,6 @@ docker run -it \ -v ~/.llama:/root/.llama \ llamastack/distribution-starter \ --port $LLAMA_STACK_PORT \ - --env INFERENCE_MODEL=$INFERENCE_MODEL \ --env OLLAMA_URL=http://host.docker.internal:11434 ``` Note to start the container with Podman, you can do the same but replace `docker` at the start of the command with @@ -116,7 +113,6 @@ docker run -it \ --network=host \ llamastack/distribution-starter \ --port $LLAMA_STACK_PORT \ - --env INFERENCE_MODEL=$INFERENCE_MODEL \ --env OLLAMA_URL=http://localhost:11434 ``` ::: diff --git a/docs/source/getting_started/quickstart.md b/docs/source/getting_started/quickstart.md index 5549f412c..b66fabc77 100644 --- a/docs/source/getting_started/quickstart.md +++ b/docs/source/getting_started/quickstart.md @@ -19,7 +19,7 @@ ollama run llama3.2:3b --keepalive 60m #### Step 2: Run the Llama Stack server We will use `uv` to run the Llama Stack server. ```bash -ENABLE_OLLAMA=ollama OLLAMA_INFERENCE_MODEL=llama3.2:3b uv run --with llama-stack llama stack build --template starter --image-type venv --run +uv run --with llama-stack llama stack build --template starter --image-type venv --run ``` #### Step 3: Run the demo Now open up a new terminal and copy the following script into a file named `demo_script.py`. diff --git a/docs/source/providers/inference/remote_anthropic.md b/docs/source/providers/inference/remote_anthropic.md index 79d5a3f6e..4680608b1 100644 --- a/docs/source/providers/inference/remote_anthropic.md +++ b/docs/source/providers/inference/remote_anthropic.md @@ -13,7 +13,7 @@ Anthropic inference provider for accessing Claude models and Anthropic's AI serv ## Sample Configuration ```yaml -api_key: ${env.ANTHROPIC_API_KEY} +api_key: ${env.ANTHROPIC_API_KEY:=} ``` diff --git a/docs/source/providers/inference/remote_cerebras.md b/docs/source/providers/inference/remote_cerebras.md index c9793d7de..7aa03dd0b 100644 --- a/docs/source/providers/inference/remote_cerebras.md +++ b/docs/source/providers/inference/remote_cerebras.md @@ -15,7 +15,7 @@ Cerebras inference provider for running models on Cerebras Cloud platform. ```yaml base_url: https://api.cerebras.ai -api_key: ${env.CEREBRAS_API_KEY} +api_key: ${env.CEREBRAS_API_KEY:=} ``` diff --git a/docs/source/providers/inference/remote_databricks.md b/docs/source/providers/inference/remote_databricks.md index c611d9414..d0ac89055 100644 --- a/docs/source/providers/inference/remote_databricks.md +++ b/docs/source/providers/inference/remote_databricks.md @@ -14,8 +14,8 @@ Databricks inference provider for running models on Databricks' unified analytic ## Sample Configuration ```yaml -url: ${env.DATABRICKS_URL} -api_token: ${env.DATABRICKS_API_TOKEN} +url: ${env.DATABRICKS_URL:=} +api_token: ${env.DATABRICKS_API_TOKEN:=} ``` diff --git a/docs/source/providers/inference/remote_fireworks.md b/docs/source/providers/inference/remote_fireworks.md index 862860c29..28dbf1d3f 100644 --- a/docs/source/providers/inference/remote_fireworks.md +++ b/docs/source/providers/inference/remote_fireworks.md @@ -16,7 +16,7 @@ Fireworks AI inference provider for Llama models and other AI models on the Fire ```yaml url: https://api.fireworks.ai/inference/v1 -api_key: ${env.FIREWORKS_API_KEY} +api_key: ${env.FIREWORKS_API_KEY:=} ``` diff --git a/docs/source/providers/inference/remote_gemini.md b/docs/source/providers/inference/remote_gemini.md index cafcd787d..14b3223f2 100644 --- a/docs/source/providers/inference/remote_gemini.md +++ b/docs/source/providers/inference/remote_gemini.md @@ -13,7 +13,7 @@ Google Gemini inference provider for accessing Gemini models and Google's AI ser ## Sample Configuration ```yaml -api_key: ${env.GEMINI_API_KEY} +api_key: ${env.GEMINI_API_KEY:=} ``` diff --git a/docs/source/providers/inference/remote_groq.md b/docs/source/providers/inference/remote_groq.md index 4f734f263..68bd4d5b3 100644 --- a/docs/source/providers/inference/remote_groq.md +++ b/docs/source/providers/inference/remote_groq.md @@ -15,7 +15,7 @@ Groq inference provider for ultra-fast inference using Groq's LPU technology. ```yaml url: https://api.groq.com -api_key: ${env.GROQ_API_KEY} +api_key: ${env.GROQ_API_KEY:=} ``` diff --git a/docs/source/providers/inference/remote_openai.md b/docs/source/providers/inference/remote_openai.md index b4cfb5880..36e4b5454 100644 --- a/docs/source/providers/inference/remote_openai.md +++ b/docs/source/providers/inference/remote_openai.md @@ -13,7 +13,7 @@ OpenAI inference provider for accessing GPT models and other OpenAI services. ## Sample Configuration ```yaml -api_key: ${env.OPENAI_API_KEY} +api_key: ${env.OPENAI_API_KEY:=} ``` diff --git a/docs/source/providers/inference/remote_sambanova-openai-compat.md b/docs/source/providers/inference/remote_sambanova-openai-compat.md index c213d962f..3074a5885 100644 --- a/docs/source/providers/inference/remote_sambanova-openai-compat.md +++ b/docs/source/providers/inference/remote_sambanova-openai-compat.md @@ -15,7 +15,7 @@ SambaNova OpenAI-compatible provider for using SambaNova models with OpenAI API ```yaml openai_compat_api_base: https://api.sambanova.ai/v1 -api_key: ${env.SAMBANOVA_API_KEY} +api_key: ${env.SAMBANOVA_API_KEY:=} ``` diff --git a/docs/source/providers/inference/remote_sambanova.md b/docs/source/providers/inference/remote_sambanova.md index 006c41ac1..9d15c97d5 100644 --- a/docs/source/providers/inference/remote_sambanova.md +++ b/docs/source/providers/inference/remote_sambanova.md @@ -15,7 +15,7 @@ SambaNova inference provider for running models on SambaNova's dataflow architec ```yaml url: https://api.sambanova.ai/v1 -api_key: ${env.SAMBANOVA_API_KEY} +api_key: ${env.SAMBANOVA_API_KEY:=} ``` diff --git a/docs/source/providers/inference/remote_tgi.md b/docs/source/providers/inference/remote_tgi.md index c4a749b0b..125984fab 100644 --- a/docs/source/providers/inference/remote_tgi.md +++ b/docs/source/providers/inference/remote_tgi.md @@ -13,7 +13,7 @@ Text Generation Inference (TGI) provider for HuggingFace model serving. ## Sample Configuration ```yaml -url: ${env.TGI_URL} +url: ${env.TGI_URL:=} ``` diff --git a/docs/source/providers/inference/remote_together.md b/docs/source/providers/inference/remote_together.md index d1fe3e82b..be764e635 100644 --- a/docs/source/providers/inference/remote_together.md +++ b/docs/source/providers/inference/remote_together.md @@ -16,7 +16,7 @@ Together AI inference provider for open-source models and collaborative AI devel ```yaml url: https://api.together.xyz/v1 -api_key: ${env.TOGETHER_API_KEY} +api_key: ${env.TOGETHER_API_KEY:=} ``` diff --git a/docs/source/providers/safety/remote_sambanova.md b/docs/source/providers/safety/remote_sambanova.md index c680f9764..7e608f1b7 100644 --- a/docs/source/providers/safety/remote_sambanova.md +++ b/docs/source/providers/safety/remote_sambanova.md @@ -15,7 +15,7 @@ SambaNova's safety provider for content moderation and safety filtering. ```yaml url: https://api.sambanova.ai/v1 -api_key: ${env.SAMBANOVA_API_KEY} +api_key: ${env.SAMBANOVA_API_KEY:=} ``` diff --git a/llama_stack/distribution/routing_tables/models.py b/llama_stack/distribution/routing_tables/models.py index 022c3dd40..3928307c6 100644 --- a/llama_stack/distribution/routing_tables/models.py +++ b/llama_stack/distribution/routing_tables/models.py @@ -25,7 +25,8 @@ class ModelsRoutingTable(CommonRoutingTableImpl, Models): async def refresh(self) -> None: for provider_id, provider in self.impls_by_provider_id.items(): refresh = await provider.should_refresh_models() - if not (refresh or provider_id in self.listed_providers): + refresh = refresh or provider_id not in self.listed_providers + if not refresh: continue try: @@ -138,6 +139,9 @@ class ModelsRoutingTable(CommonRoutingTableImpl, Models): # avoid overwriting a non-provider-registered model entry continue + if model.identifier == model.provider_resource_id: + model.identifier = f"{provider_id}/{model.provider_resource_id}" + logger.debug(f"registering model {model.identifier} ({model.provider_resource_id})") await self.register_object( ModelWithOwner( diff --git a/llama_stack/distribution/server/server.py b/llama_stack/distribution/server/server.py index 9259fc243..96a0d60e7 100644 --- a/llama_stack/distribution/server/server.py +++ b/llama_stack/distribution/server/server.py @@ -611,11 +611,8 @@ def extract_path_params(route: str) -> list[str]: def remove_disabled_providers(obj): if isinstance(obj, dict): - if ( - obj.get("provider_id") == "__disabled__" - or obj.get("shield_id") == "__disabled__" - or obj.get("provider_model_id") == "__disabled__" - ): + keys = ["provider_id", "shield_id", "provider_model_id", "model_id"] + if any(k in obj and obj[k] in ("__disabled__", "", None) for k in keys): return None return {k: v for k, v in ((k, remove_disabled_providers(v)) for k, v in obj.items()) if v is not None} elif isinstance(obj, list): diff --git a/llama_stack/distribution/stack.py b/llama_stack/distribution/stack.py index 0dfd12828..4b12cafcc 100644 --- a/llama_stack/distribution/stack.py +++ b/llama_stack/distribution/stack.py @@ -105,23 +105,10 @@ async def register_resources(run_config: StackRunConfig, impls: dict[Api, Any]): method = getattr(impls[api], register_method) for obj in objects: logger.debug(f"registering {rsrc.capitalize()} {obj} for provider {obj.provider_id}") - # Do not register models on disabled providers - if hasattr(obj, "provider_id") and obj.provider_id is not None and obj.provider_id == "__disabled__": - logger.debug(f"Skipping {rsrc.capitalize()} registration for disabled provider.") - continue - # In complex templates, like our starter template, we may have dynamic model ids - # given by environment variables. This allows those environment variables to have - # a default value of __disabled__ to skip registration of the model if not set. - if ( - hasattr(obj, "provider_model_id") - and obj.provider_model_id is not None - and "__disabled__" in obj.provider_model_id - ): - logger.debug(f"Skipping {rsrc.capitalize()} registration for disabled model.") - continue - if hasattr(obj, "shield_id") and obj.shield_id is not None and obj.shield_id == "__disabled__": - logger.debug(f"Skipping {rsrc.capitalize()} registration for disabled shield.") + # Do not register models on disabled providers + if hasattr(obj, "provider_id") and (not obj.provider_id or obj.provider_id == "__disabled__"): + logger.debug(f"Skipping {rsrc.capitalize()} registration for disabled provider.") continue # we want to maintain the type information in arguments to method. diff --git a/llama_stack/providers/inline/safety/llama_guard/llama_guard.py b/llama_stack/providers/inline/safety/llama_guard/llama_guard.py index 9d359e053..0d1c4ffe1 100644 --- a/llama_stack/providers/inline/safety/llama_guard/llama_guard.py +++ b/llama_stack/providers/inline/safety/llama_guard/llama_guard.py @@ -146,9 +146,9 @@ class LlamaGuardSafetyImpl(Safety, ShieldsProtocolPrivate): pass async def register_shield(self, shield: Shield) -> None: - # Allow any model to be registered as a shield - # The model will be validated during runtime when making inference calls - pass + model_id = shield.provider_resource_id + if not model_id: + raise ValueError("Llama Guard shield must have a model id") async def run_shield( self, diff --git a/llama_stack/providers/remote/inference/anthropic/anthropic.py b/llama_stack/providers/remote/inference/anthropic/anthropic.py index fa0a7e10f..31626082b 100644 --- a/llama_stack/providers/remote/inference/anthropic/anthropic.py +++ b/llama_stack/providers/remote/inference/anthropic/anthropic.py @@ -15,6 +15,7 @@ class AnthropicInferenceAdapter(LiteLLMOpenAIMixin): LiteLLMOpenAIMixin.__init__( self, MODEL_ENTRIES, + litellm_provider_name="anthropic", api_key_from_config=config.api_key, provider_data_api_key_field="anthropic_api_key", ) diff --git a/llama_stack/providers/remote/inference/anthropic/config.py b/llama_stack/providers/remote/inference/anthropic/config.py index 10da0025e..a74b97a9e 100644 --- a/llama_stack/providers/remote/inference/anthropic/config.py +++ b/llama_stack/providers/remote/inference/anthropic/config.py @@ -26,7 +26,7 @@ class AnthropicConfig(BaseModel): ) @classmethod - def sample_run_config(cls, api_key: str = "${env.ANTHROPIC_API_KEY}", **kwargs) -> dict[str, Any]: + def sample_run_config(cls, api_key: str = "${env.ANTHROPIC_API_KEY:=}", **kwargs) -> dict[str, Any]: return { "api_key": api_key, } diff --git a/llama_stack/providers/remote/inference/anthropic/models.py b/llama_stack/providers/remote/inference/anthropic/models.py index 172e06c70..4cbe44b02 100644 --- a/llama_stack/providers/remote/inference/anthropic/models.py +++ b/llama_stack/providers/remote/inference/anthropic/models.py @@ -10,9 +10,9 @@ from llama_stack.providers.utils.inference.model_registry import ( ) LLM_MODEL_IDS = [ - "anthropic/claude-3-5-sonnet-latest", - "anthropic/claude-3-7-sonnet-latest", - "anthropic/claude-3-5-haiku-latest", + "claude-3-5-sonnet-latest", + "claude-3-7-sonnet-latest", + "claude-3-5-haiku-latest", ] SAFETY_MODELS_ENTRIES = [] @@ -21,17 +21,17 @@ MODEL_ENTRIES = ( [ProviderModelEntry(provider_model_id=m) for m in LLM_MODEL_IDS] + [ ProviderModelEntry( - provider_model_id="anthropic/voyage-3", + provider_model_id="voyage-3", model_type=ModelType.embedding, metadata={"embedding_dimension": 1024, "context_length": 32000}, ), ProviderModelEntry( - provider_model_id="anthropic/voyage-3-lite", + provider_model_id="voyage-3-lite", model_type=ModelType.embedding, metadata={"embedding_dimension": 512, "context_length": 32000}, ), ProviderModelEntry( - provider_model_id="anthropic/voyage-code-3", + provider_model_id="voyage-code-3", model_type=ModelType.embedding, metadata={"embedding_dimension": 1024, "context_length": 32000}, ), diff --git a/llama_stack/providers/remote/inference/bedrock/bedrock.py b/llama_stack/providers/remote/inference/bedrock/bedrock.py index 952d86f1a..63ea196f6 100644 --- a/llama_stack/providers/remote/inference/bedrock/bedrock.py +++ b/llama_stack/providers/remote/inference/bedrock/bedrock.py @@ -63,18 +63,20 @@ class BedrockInferenceAdapter( def __init__(self, config: BedrockConfig) -> None: ModelRegistryHelper.__init__(self, MODEL_ENTRIES) self._config = config - - self._client = create_bedrock_client(config) + self._client = None @property def client(self) -> BaseClient: + if self._client is None: + self._client = create_bedrock_client(self._config) return self._client async def initialize(self) -> None: pass async def shutdown(self) -> None: - self.client.close() + if self._client is not None: + self._client.close() async def completion( self, diff --git a/llama_stack/providers/remote/inference/cerebras/cerebras.py b/llama_stack/providers/remote/inference/cerebras/cerebras.py index 952118e24..5e07c49ee 100644 --- a/llama_stack/providers/remote/inference/cerebras/cerebras.py +++ b/llama_stack/providers/remote/inference/cerebras/cerebras.py @@ -65,6 +65,7 @@ class CerebrasInferenceAdapter( ) self.config = config + # TODO: make this use provider data, etc. like other providers self.client = AsyncCerebras( base_url=self.config.base_url, api_key=self.config.api_key.get_secret_value(), diff --git a/llama_stack/providers/remote/inference/cerebras/config.py b/llama_stack/providers/remote/inference/cerebras/config.py index 5ad7376fc..699f6a1ef 100644 --- a/llama_stack/providers/remote/inference/cerebras/config.py +++ b/llama_stack/providers/remote/inference/cerebras/config.py @@ -26,7 +26,7 @@ class CerebrasImplConfig(BaseModel): ) @classmethod - def sample_run_config(cls, api_key: str = "${env.CEREBRAS_API_KEY}", **kwargs) -> dict[str, Any]: + def sample_run_config(cls, api_key: str = "${env.CEREBRAS_API_KEY:=}", **kwargs) -> dict[str, Any]: return { "base_url": DEFAULT_BASE_URL, "api_key": api_key, diff --git a/llama_stack/providers/remote/inference/databricks/config.py b/llama_stack/providers/remote/inference/databricks/config.py index 5710dcef3..cc2a2c302 100644 --- a/llama_stack/providers/remote/inference/databricks/config.py +++ b/llama_stack/providers/remote/inference/databricks/config.py @@ -25,8 +25,8 @@ class DatabricksImplConfig(BaseModel): @classmethod def sample_run_config( cls, - url: str = "${env.DATABRICKS_URL}", - api_token: str = "${env.DATABRICKS_API_TOKEN}", + url: str = "${env.DATABRICKS_URL:=}", + api_token: str = "${env.DATABRICKS_API_TOKEN:=}", **kwargs: Any, ) -> dict[str, Any]: return { diff --git a/llama_stack/providers/remote/inference/fireworks/config.py b/llama_stack/providers/remote/inference/fireworks/config.py index b23f2d31b..cd28096a5 100644 --- a/llama_stack/providers/remote/inference/fireworks/config.py +++ b/llama_stack/providers/remote/inference/fireworks/config.py @@ -24,7 +24,7 @@ class FireworksImplConfig(RemoteInferenceProviderConfig): ) @classmethod - def sample_run_config(cls, api_key: str = "${env.FIREWORKS_API_KEY}", **kwargs) -> dict[str, Any]: + def sample_run_config(cls, api_key: str = "${env.FIREWORKS_API_KEY:=}", **kwargs) -> dict[str, Any]: return { "url": "https://api.fireworks.ai/inference/v1", "api_key": api_key, diff --git a/llama_stack/providers/remote/inference/gemini/config.py b/llama_stack/providers/remote/inference/gemini/config.py index 63ef4de01..c897777f7 100644 --- a/llama_stack/providers/remote/inference/gemini/config.py +++ b/llama_stack/providers/remote/inference/gemini/config.py @@ -26,7 +26,7 @@ class GeminiConfig(BaseModel): ) @classmethod - def sample_run_config(cls, api_key: str = "${env.GEMINI_API_KEY}", **kwargs) -> dict[str, Any]: + def sample_run_config(cls, api_key: str = "${env.GEMINI_API_KEY:=}", **kwargs) -> dict[str, Any]: return { "api_key": api_key, } diff --git a/llama_stack/providers/remote/inference/gemini/gemini.py b/llama_stack/providers/remote/inference/gemini/gemini.py index 11f6f05ad..b6048eff7 100644 --- a/llama_stack/providers/remote/inference/gemini/gemini.py +++ b/llama_stack/providers/remote/inference/gemini/gemini.py @@ -15,6 +15,7 @@ class GeminiInferenceAdapter(LiteLLMOpenAIMixin): LiteLLMOpenAIMixin.__init__( self, MODEL_ENTRIES, + litellm_provider_name="gemini", api_key_from_config=config.api_key, provider_data_api_key_field="gemini_api_key", ) diff --git a/llama_stack/providers/remote/inference/gemini/models.py b/llama_stack/providers/remote/inference/gemini/models.py index a7f4732ec..6fda35e0f 100644 --- a/llama_stack/providers/remote/inference/gemini/models.py +++ b/llama_stack/providers/remote/inference/gemini/models.py @@ -10,11 +10,11 @@ from llama_stack.providers.utils.inference.model_registry import ( ) LLM_MODEL_IDS = [ - "gemini/gemini-1.5-flash", - "gemini/gemini-1.5-pro", - "gemini/gemini-2.0-flash", - "gemini/gemini-2.5-flash", - "gemini/gemini-2.5-pro", + "gemini-1.5-flash", + "gemini-1.5-pro", + "gemini-2.0-flash", + "gemini-2.5-flash", + "gemini-2.5-pro", ] SAFETY_MODELS_ENTRIES = [] @@ -23,7 +23,7 @@ MODEL_ENTRIES = ( [ProviderModelEntry(provider_model_id=m) for m in LLM_MODEL_IDS] + [ ProviderModelEntry( - provider_model_id="gemini/text-embedding-004", + provider_model_id="text-embedding-004", model_type=ModelType.embedding, metadata={"embedding_dimension": 768, "context_length": 2048}, ), diff --git a/llama_stack/providers/remote/inference/groq/config.py b/llama_stack/providers/remote/inference/groq/config.py index fe060507a..67e9fa358 100644 --- a/llama_stack/providers/remote/inference/groq/config.py +++ b/llama_stack/providers/remote/inference/groq/config.py @@ -32,7 +32,7 @@ class GroqConfig(BaseModel): ) @classmethod - def sample_run_config(cls, api_key: str = "${env.GROQ_API_KEY}", **kwargs) -> dict[str, Any]: + def sample_run_config(cls, api_key: str = "${env.GROQ_API_KEY:=}", **kwargs) -> dict[str, Any]: return { "url": "https://api.groq.com", "api_key": api_key, diff --git a/llama_stack/providers/remote/inference/groq/groq.py b/llama_stack/providers/remote/inference/groq/groq.py index 91c6b6c17..fd7212de4 100644 --- a/llama_stack/providers/remote/inference/groq/groq.py +++ b/llama_stack/providers/remote/inference/groq/groq.py @@ -34,6 +34,7 @@ class GroqInferenceAdapter(LiteLLMOpenAIMixin): LiteLLMOpenAIMixin.__init__( self, model_entries=MODEL_ENTRIES, + litellm_provider_name="groq", api_key_from_config=config.api_key, provider_data_api_key_field="groq_api_key", ) @@ -96,7 +97,7 @@ class GroqInferenceAdapter(LiteLLMOpenAIMixin): tool_choice = "required" params = await prepare_openai_completion_params( - model=model_obj.provider_resource_id.replace("groq/", ""), + model=model_obj.provider_resource_id, messages=messages, frequency_penalty=frequency_penalty, function_call=function_call, diff --git a/llama_stack/providers/remote/inference/groq/models.py b/llama_stack/providers/remote/inference/groq/models.py index 70c089c4a..fac66db72 100644 --- a/llama_stack/providers/remote/inference/groq/models.py +++ b/llama_stack/providers/remote/inference/groq/models.py @@ -14,19 +14,19 @@ SAFETY_MODELS_ENTRIES = [] MODEL_ENTRIES = [ build_hf_repo_model_entry( - "groq/llama3-8b-8192", + "llama3-8b-8192", CoreModelId.llama3_1_8b_instruct.value, ), build_model_entry( - "groq/llama-3.1-8b-instant", + "llama-3.1-8b-instant", CoreModelId.llama3_1_8b_instruct.value, ), build_hf_repo_model_entry( - "groq/llama3-70b-8192", + "llama3-70b-8192", CoreModelId.llama3_70b_instruct.value, ), build_hf_repo_model_entry( - "groq/llama-3.3-70b-versatile", + "llama-3.3-70b-versatile", CoreModelId.llama3_3_70b_instruct.value, ), # Groq only contains a preview version for llama-3.2-3b @@ -34,23 +34,15 @@ MODEL_ENTRIES = [ # to pass the test fixture # TODO(aidand): Replace this with a stable model once Groq supports it build_hf_repo_model_entry( - "groq/llama-3.2-3b-preview", + "llama-3.2-3b-preview", CoreModelId.llama3_2_3b_instruct.value, ), build_hf_repo_model_entry( - "groq/llama-4-scout-17b-16e-instruct", + "meta-llama/llama-4-scout-17b-16e-instruct", CoreModelId.llama4_scout_17b_16e_instruct.value, ), build_hf_repo_model_entry( - "groq/meta-llama/llama-4-scout-17b-16e-instruct", - CoreModelId.llama4_scout_17b_16e_instruct.value, - ), - build_hf_repo_model_entry( - "groq/llama-4-maverick-17b-128e-instruct", - CoreModelId.llama4_maverick_17b_128e_instruct.value, - ), - build_hf_repo_model_entry( - "groq/meta-llama/llama-4-maverick-17b-128e-instruct", + "meta-llama/llama-4-maverick-17b-128e-instruct", CoreModelId.llama4_maverick_17b_128e_instruct.value, ), ] + SAFETY_MODELS_ENTRIES diff --git a/llama_stack/providers/remote/inference/llama_openai_compat/llama.py b/llama_stack/providers/remote/inference/llama_openai_compat/llama.py index 576080d99..707aacc7f 100644 --- a/llama_stack/providers/remote/inference/llama_openai_compat/llama.py +++ b/llama_stack/providers/remote/inference/llama_openai_compat/llama.py @@ -32,6 +32,7 @@ class LlamaCompatInferenceAdapter(OpenAIMixin, LiteLLMOpenAIMixin): LiteLLMOpenAIMixin.__init__( self, model_entries=MODEL_ENTRIES, + litellm_provider_name="llama", api_key_from_config=config.api_key, provider_data_api_key_field="llama_api_key", openai_compat_api_base=config.openai_compat_api_base, diff --git a/llama_stack/providers/remote/inference/ollama/ollama.py b/llama_stack/providers/remote/inference/ollama/ollama.py index ba20185d3..cb026bb94 100644 --- a/llama_stack/providers/remote/inference/ollama/ollama.py +++ b/llama_stack/providers/remote/inference/ollama/ollama.py @@ -166,7 +166,7 @@ class OllamaInferenceAdapter( ] for m in response.models: # kill embedding models since we don't know dimensions for them - if m.details.family in ["bert"]: + if "bert" in m.details.family: continue models.append( Model( diff --git a/llama_stack/providers/remote/inference/openai/config.py b/llama_stack/providers/remote/inference/openai/config.py index 17fb98831..2768e98d0 100644 --- a/llama_stack/providers/remote/inference/openai/config.py +++ b/llama_stack/providers/remote/inference/openai/config.py @@ -26,7 +26,7 @@ class OpenAIConfig(BaseModel): ) @classmethod - def sample_run_config(cls, api_key: str = "${env.OPENAI_API_KEY}", **kwargs) -> dict[str, Any]: + def sample_run_config(cls, api_key: str = "${env.OPENAI_API_KEY:=}", **kwargs) -> dict[str, Any]: return { "api_key": api_key, } diff --git a/llama_stack/providers/remote/inference/openai/openai.py b/llama_stack/providers/remote/inference/openai/openai.py index 9e1b77bde..f5d4afe3f 100644 --- a/llama_stack/providers/remote/inference/openai/openai.py +++ b/llama_stack/providers/remote/inference/openai/openai.py @@ -45,6 +45,7 @@ class OpenAIInferenceAdapter(OpenAIMixin, LiteLLMOpenAIMixin): LiteLLMOpenAIMixin.__init__( self, MODEL_ENTRIES, + litellm_provider_name="openai", api_key_from_config=config.api_key, provider_data_api_key_field="openai_api_key", ) diff --git a/llama_stack/providers/remote/inference/sambanova/config.py b/llama_stack/providers/remote/inference/sambanova/config.py index abbf9430f..50ad53d06 100644 --- a/llama_stack/providers/remote/inference/sambanova/config.py +++ b/llama_stack/providers/remote/inference/sambanova/config.py @@ -30,7 +30,7 @@ class SambaNovaImplConfig(BaseModel): ) @classmethod - def sample_run_config(cls, api_key: str = "${env.SAMBANOVA_API_KEY}", **kwargs) -> dict[str, Any]: + def sample_run_config(cls, api_key: str = "${env.SAMBANOVA_API_KEY:=}", **kwargs) -> dict[str, Any]: return { "url": "https://api.sambanova.ai/v1", "api_key": api_key, diff --git a/llama_stack/providers/remote/inference/sambanova/models.py b/llama_stack/providers/remote/inference/sambanova/models.py index 0b8c2e042..db781eb86 100644 --- a/llama_stack/providers/remote/inference/sambanova/models.py +++ b/llama_stack/providers/remote/inference/sambanova/models.py @@ -9,49 +9,20 @@ from llama_stack.providers.utils.inference.model_registry import ( build_hf_repo_model_entry, ) -SAFETY_MODELS_ENTRIES = [ - build_hf_repo_model_entry( - "sambanova/Meta-Llama-Guard-3-8B", - CoreModelId.llama_guard_3_8b.value, - ), -] +SAFETY_MODELS_ENTRIES = [] MODEL_ENTRIES = [ build_hf_repo_model_entry( - "sambanova/Meta-Llama-3.1-8B-Instruct", + "Meta-Llama-3.1-8B-Instruct", CoreModelId.llama3_1_8b_instruct.value, ), build_hf_repo_model_entry( - "sambanova/Meta-Llama-3.1-405B-Instruct", - CoreModelId.llama3_1_405b_instruct.value, - ), - build_hf_repo_model_entry( - "sambanova/Meta-Llama-3.2-1B-Instruct", - CoreModelId.llama3_2_1b_instruct.value, - ), - build_hf_repo_model_entry( - "sambanova/Meta-Llama-3.2-3B-Instruct", - CoreModelId.llama3_2_3b_instruct.value, - ), - build_hf_repo_model_entry( - "sambanova/Meta-Llama-3.3-70B-Instruct", + "Meta-Llama-3.3-70B-Instruct", CoreModelId.llama3_3_70b_instruct.value, ), build_hf_repo_model_entry( - "sambanova/Llama-3.2-11B-Vision-Instruct", - CoreModelId.llama3_2_11b_vision_instruct.value, - ), - build_hf_repo_model_entry( - "sambanova/Llama-3.2-90B-Vision-Instruct", - CoreModelId.llama3_2_90b_vision_instruct.value, - ), - build_hf_repo_model_entry( - "sambanova/Llama-4-Scout-17B-16E-Instruct", - CoreModelId.llama4_scout_17b_16e_instruct.value, - ), - build_hf_repo_model_entry( - "sambanova/Llama-4-Maverick-17B-128E-Instruct", + "Llama-4-Maverick-17B-128E-Instruct", CoreModelId.llama4_maverick_17b_128e_instruct.value, ), ] + SAFETY_MODELS_ENTRIES diff --git a/llama_stack/providers/remote/inference/sambanova/sambanova.py b/llama_stack/providers/remote/inference/sambanova/sambanova.py index 9c2dda889..8ba705f59 100644 --- a/llama_stack/providers/remote/inference/sambanova/sambanova.py +++ b/llama_stack/providers/remote/inference/sambanova/sambanova.py @@ -182,6 +182,7 @@ class SambaNovaInferenceAdapter(LiteLLMOpenAIMixin): LiteLLMOpenAIMixin.__init__( self, model_entries=MODEL_ENTRIES, + litellm_provider_name="sambanova", api_key_from_config=self.config.api_key.get_secret_value() if self.config.api_key else None, provider_data_api_key_field="sambanova_api_key", ) diff --git a/llama_stack/providers/remote/inference/tgi/config.py b/llama_stack/providers/remote/inference/tgi/config.py index d4448871f..55136c8ba 100644 --- a/llama_stack/providers/remote/inference/tgi/config.py +++ b/llama_stack/providers/remote/inference/tgi/config.py @@ -19,7 +19,7 @@ class TGIImplConfig(BaseModel): @classmethod def sample_run_config( cls, - url: str = "${env.TGI_URL}", + url: str = "${env.TGI_URL:=}", **kwargs, ): return { diff --git a/llama_stack/providers/remote/inference/tgi/tgi.py b/llama_stack/providers/remote/inference/tgi/tgi.py index 031200d4a..a5bb079ef 100644 --- a/llama_stack/providers/remote/inference/tgi/tgi.py +++ b/llama_stack/providers/remote/inference/tgi/tgi.py @@ -305,6 +305,8 @@ class _HfAdapter( class TGIAdapter(_HfAdapter): async def initialize(self, config: TGIImplConfig) -> None: + if not config.url: + raise ValueError("You must provide a URL in run.yaml (or via the TGI_URL environment variable) to use TGI.") log.info(f"Initializing TGI client with url={config.url}") self.client = AsyncInferenceClient( model=config.url, diff --git a/llama_stack/providers/remote/inference/together/config.py b/llama_stack/providers/remote/inference/together/config.py index 211be7efe..f6725333c 100644 --- a/llama_stack/providers/remote/inference/together/config.py +++ b/llama_stack/providers/remote/inference/together/config.py @@ -27,5 +27,5 @@ class TogetherImplConfig(RemoteInferenceProviderConfig): def sample_run_config(cls, **kwargs) -> dict[str, Any]: return { "url": "https://api.together.xyz/v1", - "api_key": "${env.TOGETHER_API_KEY}", + "api_key": "${env.TOGETHER_API_KEY:=}", } diff --git a/llama_stack/providers/remote/inference/together/models.py b/llama_stack/providers/remote/inference/together/models.py index 3d19f8dec..575ec1f3d 100644 --- a/llama_stack/providers/remote/inference/together/models.py +++ b/llama_stack/providers/remote/inference/together/models.py @@ -69,15 +69,9 @@ MODEL_ENTRIES = [ build_hf_repo_model_entry( "meta-llama/Llama-4-Scout-17B-16E-Instruct", CoreModelId.llama4_scout_17b_16e_instruct.value, - additional_aliases=[ - "together/meta-llama/Llama-4-Scout-17B-16E-Instruct", - ], ), build_hf_repo_model_entry( "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8", CoreModelId.llama4_maverick_17b_128e_instruct.value, - additional_aliases=[ - "together/meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8", - ], ), ] + SAFETY_MODELS_ENTRIES diff --git a/llama_stack/providers/remote/inference/vllm/vllm.py b/llama_stack/providers/remote/inference/vllm/vllm.py index 621658a48..ac626874c 100644 --- a/llama_stack/providers/remote/inference/vllm/vllm.py +++ b/llama_stack/providers/remote/inference/vllm/vllm.py @@ -299,7 +299,10 @@ class VLLMInferenceAdapter(Inference, ModelsProtocolPrivate): self.client = None async def initialize(self) -> None: - pass + if not self.config.url: + raise ValueError( + "You must provide a URL in run.yaml (or via the VLLM_URL environment variable) to use vLLM." + ) async def should_refresh_models(self) -> bool: return self.config.refresh_models @@ -337,9 +340,6 @@ class VLLMInferenceAdapter(Inference, ModelsProtocolPrivate): HealthResponse: A dictionary containing the health status. """ try: - if not self.config.url: - return HealthResponse(status=HealthStatus.ERROR, message="vLLM URL is not set") - client = self._create_client() if self.client is None else self.client _ = [m async for m in client.models.list()] # Ensure the client is initialized return HealthResponse(status=HealthStatus.OK) @@ -355,11 +355,6 @@ class VLLMInferenceAdapter(Inference, ModelsProtocolPrivate): if self.client is not None: return - if not self.config.url: - raise ValueError( - "You must provide a vLLM URL in the run.yaml file (or set the VLLM_URL environment variable)" - ) - log.info(f"Initializing vLLM client with base_url={self.config.url}") self.client = self._create_client() diff --git a/llama_stack/providers/remote/safety/sambanova/config.py b/llama_stack/providers/remote/safety/sambanova/config.py index 383cea244..2cde97098 100644 --- a/llama_stack/providers/remote/safety/sambanova/config.py +++ b/llama_stack/providers/remote/safety/sambanova/config.py @@ -30,7 +30,7 @@ class SambaNovaSafetyConfig(BaseModel): ) @classmethod - def sample_run_config(cls, api_key: str = "${env.SAMBANOVA_API_KEY}", **kwargs) -> dict[str, Any]: + def sample_run_config(cls, api_key: str = "${env.SAMBANOVA_API_KEY:=}", **kwargs) -> dict[str, Any]: return { "url": "https://api.sambanova.ai/v1", "api_key": api_key, diff --git a/llama_stack/providers/utils/inference/litellm_openai_mixin.py b/llama_stack/providers/utils/inference/litellm_openai_mixin.py index 0de267f6c..02e650307 100644 --- a/llama_stack/providers/utils/inference/litellm_openai_mixin.py +++ b/llama_stack/providers/utils/inference/litellm_openai_mixin.py @@ -68,11 +68,14 @@ class LiteLLMOpenAIMixin( def __init__( self, model_entries, + litellm_provider_name: str, api_key_from_config: str | None, provider_data_api_key_field: str, openai_compat_api_base: str | None = None, ): ModelRegistryHelper.__init__(self, model_entries) + + self.litellm_provider_name = litellm_provider_name self.api_key_from_config = api_key_from_config self.provider_data_api_key_field = provider_data_api_key_field self.api_base = openai_compat_api_base @@ -91,7 +94,11 @@ class LiteLLMOpenAIMixin( def get_litellm_model_name(self, model_id: str) -> str: # users may be using openai/ prefix in their model names. the openai/models.py did this by default. # model_id.startswith("openai/") is for backwards compatibility. - return "openai/" + model_id if self.is_openai_compat and not model_id.startswith("openai/") else model_id + return ( + f"{self.litellm_provider_name}/{model_id}" + if self.is_openai_compat and not model_id.startswith(self.litellm_provider_name) + else model_id + ) async def completion( self, diff --git a/llama_stack/providers/utils/inference/model_registry.py b/llama_stack/providers/utils/inference/model_registry.py index bceeaf198..a79e4b6ae 100644 --- a/llama_stack/providers/utils/inference/model_registry.py +++ b/llama_stack/providers/utils/inference/model_registry.py @@ -50,7 +50,8 @@ def build_hf_repo_model_entry( additional_aliases: list[str] | None = None, ) -> ProviderModelEntry: aliases = [ - get_huggingface_repo(model_descriptor), + # NOTE: avoid HF aliases because they _cannot_ be unique across providers + # get_huggingface_repo(model_descriptor), ] if additional_aliases: aliases.extend(additional_aliases) @@ -75,7 +76,9 @@ class ModelRegistryHelper(ModelsProtocolPrivate): __provider_id__: str def __init__(self, model_entries: list[ProviderModelEntry], allowed_models: list[str] | None = None): + self.model_entries = model_entries self.allowed_models = allowed_models + self.alias_to_provider_id_map = {} self.provider_id_to_llama_model_map = {} for entry in model_entries: @@ -98,7 +101,7 @@ class ModelRegistryHelper(ModelsProtocolPrivate): continue models.append( Model( - model_id=id, + identifier=id, provider_resource_id=entry.provider_model_id, model_type=ModelType.llm, metadata=entry.metadata, diff --git a/llama_stack/templates/ci-tests/build.yaml b/llama_stack/templates/ci-tests/build.yaml index 2421842ec..b5df01923 100644 --- a/llama_stack/templates/ci-tests/build.yaml +++ b/llama_stack/templates/ci-tests/build.yaml @@ -7,21 +7,15 @@ distribution_spec: - provider_type: remote::ollama - provider_type: remote::vllm - provider_type: remote::tgi - - provider_type: remote::hf::serverless - - provider_type: remote::hf::endpoint - provider_type: remote::fireworks - provider_type: remote::together - provider_type: remote::bedrock - - provider_type: remote::databricks - provider_type: remote::nvidia - - provider_type: remote::runpod - provider_type: remote::openai - provider_type: remote::anthropic - provider_type: remote::gemini - provider_type: remote::groq - - provider_type: remote::llama-openai-compat - provider_type: remote::sambanova - - provider_type: remote::passthrough - provider_type: inline::sentence-transformers vector_io: - provider_type: inline::faiss diff --git a/llama_stack/templates/ci-tests/run.yaml b/llama_stack/templates/ci-tests/run.yaml index 6f8a192ee..4a9baaf90 100644 --- a/llama_stack/templates/ci-tests/run.yaml +++ b/llama_stack/templates/ci-tests/run.yaml @@ -14,127 +14,97 @@ apis: - vector_io providers: inference: - - provider_id: ${env.ENABLE_CEREBRAS:=__disabled__} + - provider_id: ${env.CEREBRAS_API_KEY:+cerebras} provider_type: remote::cerebras config: base_url: https://api.cerebras.ai - api_key: ${env.CEREBRAS_API_KEY} - - provider_id: ${env.ENABLE_OLLAMA:=__disabled__} + api_key: ${env.CEREBRAS_API_KEY:=} + - provider_id: ollama provider_type: remote::ollama config: url: ${env.OLLAMA_URL:=http://localhost:11434} - - provider_id: ${env.ENABLE_VLLM:=__disabled__} + - provider_id: ${env.VLLM_URL:+vllm} provider_type: remote::vllm config: url: ${env.VLLM_URL:=} max_tokens: ${env.VLLM_MAX_TOKENS:=4096} api_token: ${env.VLLM_API_TOKEN:=fake} tls_verify: ${env.VLLM_TLS_VERIFY:=true} - - provider_id: ${env.ENABLE_TGI:=__disabled__} + - provider_id: ${env.TGI_URL:+tgi} provider_type: remote::tgi config: - url: ${env.TGI_URL} - - provider_id: ${env.ENABLE_HF_SERVERLESS:=__disabled__} - provider_type: remote::hf::serverless - config: - huggingface_repo: ${env.INFERENCE_MODEL} - api_token: ${env.HF_API_TOKEN} - - provider_id: ${env.ENABLE_HF_ENDPOINT:=__disabled__} - provider_type: remote::hf::endpoint - config: - endpoint_name: ${env.INFERENCE_ENDPOINT_NAME} - api_token: ${env.HF_API_TOKEN} - - provider_id: ${env.ENABLE_FIREWORKS:=__disabled__} + url: ${env.TGI_URL:=} + - provider_id: fireworks provider_type: remote::fireworks config: url: https://api.fireworks.ai/inference/v1 - api_key: ${env.FIREWORKS_API_KEY} - - provider_id: ${env.ENABLE_TOGETHER:=__disabled__} + api_key: ${env.FIREWORKS_API_KEY:=} + - provider_id: together provider_type: remote::together config: url: https://api.together.xyz/v1 - api_key: ${env.TOGETHER_API_KEY} - - provider_id: ${env.ENABLE_BEDROCK:=__disabled__} + api_key: ${env.TOGETHER_API_KEY:=} + - provider_id: bedrock provider_type: remote::bedrock - - provider_id: ${env.ENABLE_DATABRICKS:=__disabled__} - provider_type: remote::databricks - config: - url: ${env.DATABRICKS_URL} - api_token: ${env.DATABRICKS_API_TOKEN} - - provider_id: ${env.ENABLE_NVIDIA:=__disabled__} + - provider_id: ${env.NVIDIA_API_KEY:+nvidia} provider_type: remote::nvidia config: url: ${env.NVIDIA_BASE_URL:=https://integrate.api.nvidia.com} api_key: ${env.NVIDIA_API_KEY:=} append_api_version: ${env.NVIDIA_APPEND_API_VERSION:=True} - - provider_id: ${env.ENABLE_RUNPOD:=__disabled__} - provider_type: remote::runpod - config: - url: ${env.RUNPOD_URL:=} - api_token: ${env.RUNPOD_API_TOKEN} - - provider_id: ${env.ENABLE_OPENAI:=__disabled__} + - provider_id: openai provider_type: remote::openai config: - api_key: ${env.OPENAI_API_KEY} - - provider_id: ${env.ENABLE_ANTHROPIC:=__disabled__} + api_key: ${env.OPENAI_API_KEY:=} + - provider_id: anthropic provider_type: remote::anthropic config: - api_key: ${env.ANTHROPIC_API_KEY} - - provider_id: ${env.ENABLE_GEMINI:=__disabled__} + api_key: ${env.ANTHROPIC_API_KEY:=} + - provider_id: gemini provider_type: remote::gemini config: - api_key: ${env.GEMINI_API_KEY} - - provider_id: ${env.ENABLE_GROQ:=__disabled__} + api_key: ${env.GEMINI_API_KEY:=} + - provider_id: groq provider_type: remote::groq config: url: https://api.groq.com - api_key: ${env.GROQ_API_KEY} - - provider_id: ${env.ENABLE_LLAMA_OPENAI_COMPAT:=__disabled__} - provider_type: remote::llama-openai-compat - config: - openai_compat_api_base: https://api.llama.com/compat/v1/ - api_key: ${env.LLAMA_API_KEY} - - provider_id: ${env.ENABLE_SAMBANOVA:=__disabled__} + api_key: ${env.GROQ_API_KEY:=} + - provider_id: sambanova provider_type: remote::sambanova config: url: https://api.sambanova.ai/v1 - api_key: ${env.SAMBANOVA_API_KEY} - - provider_id: ${env.ENABLE_PASSTHROUGH:=__disabled__} - provider_type: remote::passthrough - config: - url: ${env.PASSTHROUGH_URL} - api_key: ${env.PASSTHROUGH_API_KEY} - - provider_id: ${env.ENABLE_SENTENCE_TRANSFORMERS:=sentence-transformers} + api_key: ${env.SAMBANOVA_API_KEY:=} + - provider_id: sentence-transformers provider_type: inline::sentence-transformers vector_io: - - provider_id: ${env.ENABLE_FAISS:=faiss} + - provider_id: faiss provider_type: inline::faiss config: kvstore: type: sqlite db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/faiss_store.db - - provider_id: ${env.ENABLE_SQLITE_VEC:=__disabled__} + - provider_id: sqlite-vec provider_type: inline::sqlite-vec config: db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/sqlite_vec.db kvstore: type: sqlite db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/sqlite_vec_registry.db - - provider_id: ${env.ENABLE_MILVUS:=__disabled__} + - provider_id: ${env.MILVUS_URL:+milvus} provider_type: inline::milvus config: db_path: ${env.MILVUS_DB_PATH:=~/.llama/distributions/starter}/milvus.db kvstore: type: sqlite db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/milvus_registry.db - - provider_id: ${env.ENABLE_CHROMADB:=__disabled__} + - provider_id: ${env.CHROMADB_URL:+chromadb} provider_type: remote::chromadb config: url: ${env.CHROMADB_URL:=} kvstore: type: sqlite db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter/}/chroma_remote_registry.db - - provider_id: ${env.ENABLE_PGVECTOR:=__disabled__} + - provider_id: ${env.PGVECTOR_DB:+pgvector} provider_type: remote::pgvector config: host: ${env.PGVECTOR_HOST:=localhost} @@ -233,892 +203,11 @@ metadata_store: inference_store: type: sqlite db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ci-tests}/inference_store.db -models: -- metadata: - embedding_dimension: 384 - model_id: all-MiniLM-L6-v2 - provider_id: ${env.ENABLE_SENTENCE_TRANSFORMERS:=sentence-transformers} - model_type: embedding -- metadata: {} - model_id: ${env.ENABLE_CEREBRAS:=__disabled__}/llama3.1-8b - provider_id: ${env.ENABLE_CEREBRAS:=__disabled__} - provider_model_id: llama3.1-8b - model_type: llm -- metadata: {} - model_id: ${env.ENABLE_CEREBRAS:=__disabled__}/meta-llama/Llama-3.1-8B-Instruct - provider_id: ${env.ENABLE_CEREBRAS:=__disabled__} - provider_model_id: llama3.1-8b - model_type: llm -- metadata: {} - model_id: ${env.ENABLE_CEREBRAS:=__disabled__}/llama-3.3-70b - provider_id: ${env.ENABLE_CEREBRAS:=__disabled__} - provider_model_id: llama-3.3-70b - model_type: llm -- metadata: {} - model_id: ${env.ENABLE_CEREBRAS:=__disabled__}/meta-llama/Llama-3.3-70B-Instruct - provider_id: ${env.ENABLE_CEREBRAS:=__disabled__} - provider_model_id: llama-3.3-70b - model_type: llm -- metadata: {} - model_id: ${env.ENABLE_CEREBRAS:=__disabled__}/llama-4-scout-17b-16e-instruct - provider_id: ${env.ENABLE_CEREBRAS:=__disabled__} - provider_model_id: llama-4-scout-17b-16e-instruct - model_type: llm -- metadata: {} - model_id: ${env.ENABLE_CEREBRAS:=__disabled__}/meta-llama/Llama-4-Scout-17B-16E-Instruct - provider_id: ${env.ENABLE_CEREBRAS:=__disabled__} - provider_model_id: llama-4-scout-17b-16e-instruct - model_type: llm -- metadata: {} - model_id: ${env.ENABLE_OLLAMA:=__disabled__}/${env.OLLAMA_INFERENCE_MODEL:=__disabled__} - provider_id: ${env.ENABLE_OLLAMA:=__disabled__} - provider_model_id: ${env.OLLAMA_INFERENCE_MODEL:=__disabled__} - model_type: llm -- metadata: {} - model_id: ${env.ENABLE_OLLAMA:=__disabled__}/${env.SAFETY_MODEL:=__disabled__} - provider_id: ${env.ENABLE_OLLAMA:=__disabled__} - provider_model_id: ${env.SAFETY_MODEL:=__disabled__} - model_type: llm -- metadata: - embedding_dimension: ${env.OLLAMA_EMBEDDING_DIMENSION:=384} - model_id: ${env.ENABLE_OLLAMA:=__disabled__}/${env.OLLAMA_EMBEDDING_MODEL:=__disabled__} - provider_id: ${env.ENABLE_OLLAMA:=__disabled__} - provider_model_id: ${env.OLLAMA_EMBEDDING_MODEL:=__disabled__} - model_type: embedding -- metadata: {} - model_id: ${env.ENABLE_VLLM:=__disabled__}/${env.VLLM_INFERENCE_MODEL:=__disabled__} - provider_id: ${env.ENABLE_VLLM:=__disabled__} - provider_model_id: ${env.VLLM_INFERENCE_MODEL:=__disabled__} - model_type: llm -- metadata: {} - model_id: ${env.ENABLE_FIREWORKS:=__disabled__}/accounts/fireworks/models/llama-v3p1-8b-instruct - provider_id: ${env.ENABLE_FIREWORKS:=__disabled__} - provider_model_id: accounts/fireworks/models/llama-v3p1-8b-instruct - model_type: llm -- metadata: {} - model_id: ${env.ENABLE_FIREWORKS:=__disabled__}/meta-llama/Llama-3.1-8B-Instruct - provider_id: ${env.ENABLE_FIREWORKS:=__disabled__} - provider_model_id: accounts/fireworks/models/llama-v3p1-8b-instruct - model_type: llm -- metadata: {} - model_id: ${env.ENABLE_FIREWORKS:=__disabled__}/accounts/fireworks/models/llama-v3p1-70b-instruct - provider_id: ${env.ENABLE_FIREWORKS:=__disabled__} - provider_model_id: accounts/fireworks/models/llama-v3p1-70b-instruct - model_type: llm -- metadata: {} - model_id: ${env.ENABLE_FIREWORKS:=__disabled__}/meta-llama/Llama-3.1-70B-Instruct - provider_id: ${env.ENABLE_FIREWORKS:=__disabled__} - provider_model_id: accounts/fireworks/models/llama-v3p1-70b-instruct - model_type: llm -- metadata: {} - model_id: ${env.ENABLE_FIREWORKS:=__disabled__}/accounts/fireworks/models/llama-v3p1-405b-instruct - provider_id: ${env.ENABLE_FIREWORKS:=__disabled__} - provider_model_id: accounts/fireworks/models/llama-v3p1-405b-instruct - model_type: llm -- metadata: {} - model_id: ${env.ENABLE_FIREWORKS:=__disabled__}/meta-llama/Llama-3.1-405B-Instruct-FP8 - provider_id: ${env.ENABLE_FIREWORKS:=__disabled__} - provider_model_id: accounts/fireworks/models/llama-v3p1-405b-instruct - model_type: llm -- metadata: {} - model_id: ${env.ENABLE_FIREWORKS:=__disabled__}/accounts/fireworks/models/llama-v3p2-3b-instruct - provider_id: ${env.ENABLE_FIREWORKS:=__disabled__} - provider_model_id: accounts/fireworks/models/llama-v3p2-3b-instruct - model_type: llm -- metadata: {} - model_id: ${env.ENABLE_FIREWORKS:=__disabled__}/meta-llama/Llama-3.2-3B-Instruct - provider_id: ${env.ENABLE_FIREWORKS:=__disabled__} - provider_model_id: accounts/fireworks/models/llama-v3p2-3b-instruct - model_type: llm -- metadata: {} - model_id: ${env.ENABLE_FIREWORKS:=__disabled__}/accounts/fireworks/models/llama-v3p2-11b-vision-instruct - provider_id: ${env.ENABLE_FIREWORKS:=__disabled__} - provider_model_id: accounts/fireworks/models/llama-v3p2-11b-vision-instruct - model_type: llm -- metadata: {} - model_id: ${env.ENABLE_FIREWORKS:=__disabled__}/meta-llama/Llama-3.2-11B-Vision-Instruct - provider_id: ${env.ENABLE_FIREWORKS:=__disabled__} - provider_model_id: accounts/fireworks/models/llama-v3p2-11b-vision-instruct - model_type: llm -- metadata: {} - model_id: ${env.ENABLE_FIREWORKS:=__disabled__}/accounts/fireworks/models/llama-v3p2-90b-vision-instruct - provider_id: ${env.ENABLE_FIREWORKS:=__disabled__} - provider_model_id: accounts/fireworks/models/llama-v3p2-90b-vision-instruct - model_type: llm -- metadata: {} - model_id: ${env.ENABLE_FIREWORKS:=__disabled__}/meta-llama/Llama-3.2-90B-Vision-Instruct - provider_id: ${env.ENABLE_FIREWORKS:=__disabled__} - provider_model_id: accounts/fireworks/models/llama-v3p2-90b-vision-instruct - model_type: llm -- metadata: {} - model_id: ${env.ENABLE_FIREWORKS:=__disabled__}/accounts/fireworks/models/llama-v3p3-70b-instruct - provider_id: ${env.ENABLE_FIREWORKS:=__disabled__} - provider_model_id: accounts/fireworks/models/llama-v3p3-70b-instruct - model_type: llm -- metadata: {} - model_id: ${env.ENABLE_FIREWORKS:=__disabled__}/meta-llama/Llama-3.3-70B-Instruct - provider_id: ${env.ENABLE_FIREWORKS:=__disabled__} - provider_model_id: accounts/fireworks/models/llama-v3p3-70b-instruct - model_type: llm -- metadata: {} - model_id: ${env.ENABLE_FIREWORKS:=__disabled__}/accounts/fireworks/models/llama4-scout-instruct-basic - provider_id: ${env.ENABLE_FIREWORKS:=__disabled__} - provider_model_id: accounts/fireworks/models/llama4-scout-instruct-basic - model_type: llm -- metadata: {} - model_id: ${env.ENABLE_FIREWORKS:=__disabled__}/meta-llama/Llama-4-Scout-17B-16E-Instruct - provider_id: ${env.ENABLE_FIREWORKS:=__disabled__} - provider_model_id: accounts/fireworks/models/llama4-scout-instruct-basic - model_type: llm -- metadata: {} - model_id: ${env.ENABLE_FIREWORKS:=__disabled__}/accounts/fireworks/models/llama4-maverick-instruct-basic - provider_id: ${env.ENABLE_FIREWORKS:=__disabled__} - provider_model_id: accounts/fireworks/models/llama4-maverick-instruct-basic - model_type: llm -- metadata: {} - model_id: ${env.ENABLE_FIREWORKS:=__disabled__}/meta-llama/Llama-4-Maverick-17B-128E-Instruct - provider_id: ${env.ENABLE_FIREWORKS:=__disabled__} - provider_model_id: accounts/fireworks/models/llama4-maverick-instruct-basic - model_type: llm -- metadata: - embedding_dimension: 768 - context_length: 8192 - model_id: ${env.ENABLE_FIREWORKS:=__disabled__}/nomic-ai/nomic-embed-text-v1.5 - provider_id: ${env.ENABLE_FIREWORKS:=__disabled__} - provider_model_id: nomic-ai/nomic-embed-text-v1.5 - model_type: embedding -- metadata: {} - model_id: ${env.ENABLE_FIREWORKS:=__disabled__}/accounts/fireworks/models/llama-guard-3-8b - provider_id: ${env.ENABLE_FIREWORKS:=__disabled__} - provider_model_id: accounts/fireworks/models/llama-guard-3-8b - model_type: llm -- metadata: {} - model_id: ${env.ENABLE_FIREWORKS:=__disabled__}/meta-llama/Llama-Guard-3-8B - provider_id: ${env.ENABLE_FIREWORKS:=__disabled__} - provider_model_id: accounts/fireworks/models/llama-guard-3-8b - model_type: llm -- metadata: {} - model_id: ${env.ENABLE_FIREWORKS:=__disabled__}/accounts/fireworks/models/llama-guard-3-11b-vision - provider_id: ${env.ENABLE_FIREWORKS:=__disabled__} - provider_model_id: accounts/fireworks/models/llama-guard-3-11b-vision - model_type: llm -- metadata: {} - model_id: ${env.ENABLE_FIREWORKS:=__disabled__}/meta-llama/Llama-Guard-3-11B-Vision - provider_id: ${env.ENABLE_FIREWORKS:=__disabled__} - provider_model_id: accounts/fireworks/models/llama-guard-3-11b-vision - model_type: llm -- metadata: {} - model_id: ${env.ENABLE_TOGETHER:=__disabled__}/meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo - provider_id: ${env.ENABLE_TOGETHER:=__disabled__} - provider_model_id: meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo - model_type: llm -- metadata: {} - model_id: ${env.ENABLE_TOGETHER:=__disabled__}/meta-llama/Llama-3.1-8B-Instruct - provider_id: ${env.ENABLE_TOGETHER:=__disabled__} - provider_model_id: meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo - model_type: llm -- metadata: {} - model_id: ${env.ENABLE_TOGETHER:=__disabled__}/meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo - provider_id: ${env.ENABLE_TOGETHER:=__disabled__} - provider_model_id: meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo - model_type: llm -- metadata: {} - model_id: ${env.ENABLE_TOGETHER:=__disabled__}/meta-llama/Llama-3.1-70B-Instruct - provider_id: ${env.ENABLE_TOGETHER:=__disabled__} - provider_model_id: meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo - model_type: llm -- metadata: {} - model_id: ${env.ENABLE_TOGETHER:=__disabled__}/meta-llama/Meta-Llama-3.1-405B-Instruct-Turbo - provider_id: ${env.ENABLE_TOGETHER:=__disabled__} - provider_model_id: meta-llama/Meta-Llama-3.1-405B-Instruct-Turbo - model_type: llm -- metadata: {} - model_id: ${env.ENABLE_TOGETHER:=__disabled__}/meta-llama/Llama-3.1-405B-Instruct-FP8 - provider_id: ${env.ENABLE_TOGETHER:=__disabled__} - provider_model_id: meta-llama/Meta-Llama-3.1-405B-Instruct-Turbo - model_type: llm -- metadata: {} - model_id: ${env.ENABLE_TOGETHER:=__disabled__}/meta-llama/Llama-3.2-3B-Instruct-Turbo - provider_id: ${env.ENABLE_TOGETHER:=__disabled__} - provider_model_id: meta-llama/Llama-3.2-3B-Instruct-Turbo - model_type: llm -- metadata: {} - model_id: ${env.ENABLE_TOGETHER:=__disabled__}/meta-llama/Llama-3.2-3B-Instruct - provider_id: ${env.ENABLE_TOGETHER:=__disabled__} - provider_model_id: meta-llama/Llama-3.2-3B-Instruct-Turbo - model_type: llm -- metadata: {} - model_id: ${env.ENABLE_TOGETHER:=__disabled__}/meta-llama/Llama-3.2-11B-Vision-Instruct-Turbo - provider_id: ${env.ENABLE_TOGETHER:=__disabled__} - provider_model_id: meta-llama/Llama-3.2-11B-Vision-Instruct-Turbo - model_type: llm -- metadata: {} - model_id: ${env.ENABLE_TOGETHER:=__disabled__}/meta-llama/Llama-3.2-11B-Vision-Instruct - provider_id: ${env.ENABLE_TOGETHER:=__disabled__} - provider_model_id: meta-llama/Llama-3.2-11B-Vision-Instruct-Turbo - model_type: llm -- metadata: {} - model_id: ${env.ENABLE_TOGETHER:=__disabled__}/meta-llama/Llama-3.2-90B-Vision-Instruct-Turbo - provider_id: ${env.ENABLE_TOGETHER:=__disabled__} - provider_model_id: meta-llama/Llama-3.2-90B-Vision-Instruct-Turbo - model_type: llm -- metadata: {} - model_id: ${env.ENABLE_TOGETHER:=__disabled__}/meta-llama/Llama-3.2-90B-Vision-Instruct - provider_id: ${env.ENABLE_TOGETHER:=__disabled__} - provider_model_id: meta-llama/Llama-3.2-90B-Vision-Instruct-Turbo - model_type: llm -- metadata: {} - model_id: ${env.ENABLE_TOGETHER:=__disabled__}/meta-llama/Llama-3.3-70B-Instruct-Turbo - provider_id: ${env.ENABLE_TOGETHER:=__disabled__} - provider_model_id: meta-llama/Llama-3.3-70B-Instruct-Turbo - model_type: llm -- metadata: {} - model_id: ${env.ENABLE_TOGETHER:=__disabled__}/meta-llama/Llama-3.3-70B-Instruct - provider_id: ${env.ENABLE_TOGETHER:=__disabled__} - provider_model_id: meta-llama/Llama-3.3-70B-Instruct-Turbo - model_type: llm -- metadata: - embedding_dimension: 768 - context_length: 8192 - model_id: ${env.ENABLE_TOGETHER:=__disabled__}/togethercomputer/m2-bert-80M-8k-retrieval - provider_id: ${env.ENABLE_TOGETHER:=__disabled__} - provider_model_id: togethercomputer/m2-bert-80M-8k-retrieval - model_type: embedding -- metadata: - embedding_dimension: 768 - context_length: 32768 - model_id: ${env.ENABLE_TOGETHER:=__disabled__}/togethercomputer/m2-bert-80M-32k-retrieval - provider_id: ${env.ENABLE_TOGETHER:=__disabled__} - provider_model_id: togethercomputer/m2-bert-80M-32k-retrieval - model_type: embedding -- metadata: {} - model_id: ${env.ENABLE_TOGETHER:=__disabled__}/meta-llama/Llama-4-Scout-17B-16E-Instruct - provider_id: ${env.ENABLE_TOGETHER:=__disabled__} - provider_model_id: meta-llama/Llama-4-Scout-17B-16E-Instruct - model_type: llm -- metadata: {} - model_id: ${env.ENABLE_TOGETHER:=__disabled__}/meta-llama/Llama-4-Scout-17B-16E-Instruct - provider_id: ${env.ENABLE_TOGETHER:=__disabled__} - provider_model_id: meta-llama/Llama-4-Scout-17B-16E-Instruct - model_type: llm -- metadata: {} - model_id: ${env.ENABLE_TOGETHER:=__disabled__}/together/meta-llama/Llama-4-Scout-17B-16E-Instruct - provider_id: ${env.ENABLE_TOGETHER:=__disabled__} - provider_model_id: meta-llama/Llama-4-Scout-17B-16E-Instruct - model_type: llm -- metadata: {} - model_id: ${env.ENABLE_TOGETHER:=__disabled__}/meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8 - provider_id: ${env.ENABLE_TOGETHER:=__disabled__} - provider_model_id: meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8 - model_type: llm -- metadata: {} - model_id: ${env.ENABLE_TOGETHER:=__disabled__}/meta-llama/Llama-4-Maverick-17B-128E-Instruct - provider_id: ${env.ENABLE_TOGETHER:=__disabled__} - provider_model_id: meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8 - model_type: llm -- metadata: {} - model_id: ${env.ENABLE_TOGETHER:=__disabled__}/together/meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8 - provider_id: ${env.ENABLE_TOGETHER:=__disabled__} - provider_model_id: meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8 - model_type: llm -- metadata: {} - model_id: ${env.ENABLE_TOGETHER:=__disabled__}/meta-llama/Llama-Guard-3-8B - provider_id: ${env.ENABLE_TOGETHER:=__disabled__} - provider_model_id: meta-llama/Llama-Guard-3-8B - model_type: llm -- metadata: {} - model_id: ${env.ENABLE_TOGETHER:=__disabled__}/meta-llama/Llama-Guard-3-8B - provider_id: ${env.ENABLE_TOGETHER:=__disabled__} - provider_model_id: meta-llama/Llama-Guard-3-8B - model_type: llm -- metadata: {} - model_id: ${env.ENABLE_TOGETHER:=__disabled__}/meta-llama/Llama-Guard-3-11B-Vision-Turbo - provider_id: ${env.ENABLE_TOGETHER:=__disabled__} - provider_model_id: meta-llama/Llama-Guard-3-11B-Vision-Turbo - model_type: llm -- metadata: {} - model_id: ${env.ENABLE_TOGETHER:=__disabled__}/meta-llama/Llama-Guard-3-11B-Vision - provider_id: ${env.ENABLE_TOGETHER:=__disabled__} - provider_model_id: meta-llama/Llama-Guard-3-11B-Vision-Turbo - model_type: llm -- metadata: {} - model_id: ${env.ENABLE_BEDROCK:=__disabled__}/meta.llama3-1-8b-instruct-v1:0 - provider_id: ${env.ENABLE_BEDROCK:=__disabled__} - provider_model_id: meta.llama3-1-8b-instruct-v1:0 - model_type: llm -- metadata: {} - model_id: ${env.ENABLE_BEDROCK:=__disabled__}/meta-llama/Llama-3.1-8B-Instruct - provider_id: ${env.ENABLE_BEDROCK:=__disabled__} - provider_model_id: meta.llama3-1-8b-instruct-v1:0 - model_type: llm -- metadata: {} - model_id: ${env.ENABLE_BEDROCK:=__disabled__}/meta.llama3-1-70b-instruct-v1:0 - provider_id: ${env.ENABLE_BEDROCK:=__disabled__} - provider_model_id: meta.llama3-1-70b-instruct-v1:0 - model_type: llm -- metadata: {} - model_id: ${env.ENABLE_BEDROCK:=__disabled__}/meta-llama/Llama-3.1-70B-Instruct - provider_id: ${env.ENABLE_BEDROCK:=__disabled__} - provider_model_id: meta.llama3-1-70b-instruct-v1:0 - model_type: llm -- metadata: {} - model_id: ${env.ENABLE_BEDROCK:=__disabled__}/meta.llama3-1-405b-instruct-v1:0 - provider_id: ${env.ENABLE_BEDROCK:=__disabled__} - provider_model_id: meta.llama3-1-405b-instruct-v1:0 - model_type: llm -- metadata: {} - model_id: ${env.ENABLE_BEDROCK:=__disabled__}/meta-llama/Llama-3.1-405B-Instruct-FP8 - provider_id: ${env.ENABLE_BEDROCK:=__disabled__} - provider_model_id: meta.llama3-1-405b-instruct-v1:0 - model_type: llm -- metadata: {} - model_id: ${env.ENABLE_DATABRICKS:=__disabled__}/databricks-meta-llama-3-1-70b-instruct - provider_id: ${env.ENABLE_DATABRICKS:=__disabled__} - provider_model_id: databricks-meta-llama-3-1-70b-instruct - model_type: llm -- metadata: {} - model_id: ${env.ENABLE_DATABRICKS:=__disabled__}/meta-llama/Llama-3.1-70B-Instruct - provider_id: ${env.ENABLE_DATABRICKS:=__disabled__} - provider_model_id: databricks-meta-llama-3-1-70b-instruct - model_type: llm -- metadata: {} - model_id: ${env.ENABLE_DATABRICKS:=__disabled__}/databricks-meta-llama-3-1-405b-instruct - provider_id: ${env.ENABLE_DATABRICKS:=__disabled__} - provider_model_id: databricks-meta-llama-3-1-405b-instruct - model_type: llm -- metadata: {} - model_id: ${env.ENABLE_DATABRICKS:=__disabled__}/meta-llama/Llama-3.1-405B-Instruct-FP8 - provider_id: ${env.ENABLE_DATABRICKS:=__disabled__} - provider_model_id: databricks-meta-llama-3-1-405b-instruct - model_type: llm -- metadata: {} - model_id: ${env.ENABLE_NVIDIA:=__disabled__}/meta/llama3-8b-instruct - provider_id: ${env.ENABLE_NVIDIA:=__disabled__} - provider_model_id: meta/llama3-8b-instruct - model_type: llm -- metadata: {} - model_id: ${env.ENABLE_NVIDIA:=__disabled__}/meta-llama/Llama-3-8B-Instruct - provider_id: ${env.ENABLE_NVIDIA:=__disabled__} - provider_model_id: meta/llama3-8b-instruct - model_type: llm -- metadata: {} - model_id: ${env.ENABLE_NVIDIA:=__disabled__}/meta/llama3-70b-instruct - provider_id: ${env.ENABLE_NVIDIA:=__disabled__} - provider_model_id: meta/llama3-70b-instruct - model_type: llm -- metadata: {} - model_id: ${env.ENABLE_NVIDIA:=__disabled__}/meta-llama/Llama-3-70B-Instruct - provider_id: ${env.ENABLE_NVIDIA:=__disabled__} - provider_model_id: meta/llama3-70b-instruct - model_type: llm -- metadata: {} - model_id: ${env.ENABLE_NVIDIA:=__disabled__}/meta/llama-3.1-8b-instruct - provider_id: ${env.ENABLE_NVIDIA:=__disabled__} - provider_model_id: meta/llama-3.1-8b-instruct - model_type: llm -- metadata: {} - model_id: ${env.ENABLE_NVIDIA:=__disabled__}/meta-llama/Llama-3.1-8B-Instruct - provider_id: ${env.ENABLE_NVIDIA:=__disabled__} - provider_model_id: meta/llama-3.1-8b-instruct - model_type: llm -- metadata: {} - model_id: ${env.ENABLE_NVIDIA:=__disabled__}/meta/llama-3.1-70b-instruct - provider_id: ${env.ENABLE_NVIDIA:=__disabled__} - provider_model_id: meta/llama-3.1-70b-instruct - model_type: llm -- metadata: {} - model_id: ${env.ENABLE_NVIDIA:=__disabled__}/meta-llama/Llama-3.1-70B-Instruct - provider_id: ${env.ENABLE_NVIDIA:=__disabled__} - provider_model_id: meta/llama-3.1-70b-instruct - model_type: llm -- metadata: {} - model_id: ${env.ENABLE_NVIDIA:=__disabled__}/meta/llama-3.1-405b-instruct - provider_id: ${env.ENABLE_NVIDIA:=__disabled__} - provider_model_id: meta/llama-3.1-405b-instruct - model_type: llm -- metadata: {} - model_id: ${env.ENABLE_NVIDIA:=__disabled__}/meta-llama/Llama-3.1-405B-Instruct-FP8 - provider_id: ${env.ENABLE_NVIDIA:=__disabled__} - provider_model_id: meta/llama-3.1-405b-instruct - model_type: llm -- metadata: {} - model_id: ${env.ENABLE_NVIDIA:=__disabled__}/meta/llama-3.2-1b-instruct - provider_id: ${env.ENABLE_NVIDIA:=__disabled__} - provider_model_id: meta/llama-3.2-1b-instruct - model_type: llm -- metadata: {} - model_id: ${env.ENABLE_NVIDIA:=__disabled__}/meta-llama/Llama-3.2-1B-Instruct - provider_id: ${env.ENABLE_NVIDIA:=__disabled__} - provider_model_id: meta/llama-3.2-1b-instruct - model_type: llm -- metadata: {} - model_id: ${env.ENABLE_NVIDIA:=__disabled__}/meta/llama-3.2-3b-instruct - provider_id: ${env.ENABLE_NVIDIA:=__disabled__} - provider_model_id: meta/llama-3.2-3b-instruct - model_type: llm -- metadata: {} - model_id: ${env.ENABLE_NVIDIA:=__disabled__}/meta-llama/Llama-3.2-3B-Instruct - provider_id: ${env.ENABLE_NVIDIA:=__disabled__} - provider_model_id: meta/llama-3.2-3b-instruct - model_type: llm -- metadata: {} - model_id: ${env.ENABLE_NVIDIA:=__disabled__}/meta/llama-3.2-11b-vision-instruct - provider_id: ${env.ENABLE_NVIDIA:=__disabled__} - provider_model_id: meta/llama-3.2-11b-vision-instruct - model_type: llm -- metadata: {} - model_id: ${env.ENABLE_NVIDIA:=__disabled__}/meta-llama/Llama-3.2-11B-Vision-Instruct - provider_id: ${env.ENABLE_NVIDIA:=__disabled__} - provider_model_id: meta/llama-3.2-11b-vision-instruct - model_type: llm -- metadata: {} - model_id: ${env.ENABLE_NVIDIA:=__disabled__}/meta/llama-3.2-90b-vision-instruct - provider_id: ${env.ENABLE_NVIDIA:=__disabled__} - provider_model_id: meta/llama-3.2-90b-vision-instruct - model_type: llm -- metadata: {} - model_id: ${env.ENABLE_NVIDIA:=__disabled__}/meta-llama/Llama-3.2-90B-Vision-Instruct - provider_id: ${env.ENABLE_NVIDIA:=__disabled__} - provider_model_id: meta/llama-3.2-90b-vision-instruct - model_type: llm -- metadata: {} - model_id: ${env.ENABLE_NVIDIA:=__disabled__}/meta/llama-3.3-70b-instruct - provider_id: ${env.ENABLE_NVIDIA:=__disabled__} - provider_model_id: meta/llama-3.3-70b-instruct - model_type: llm -- metadata: {} - model_id: ${env.ENABLE_NVIDIA:=__disabled__}/meta-llama/Llama-3.3-70B-Instruct - provider_id: ${env.ENABLE_NVIDIA:=__disabled__} - provider_model_id: meta/llama-3.3-70b-instruct - model_type: llm -- metadata: - embedding_dimension: 2048 - context_length: 8192 - model_id: ${env.ENABLE_NVIDIA:=__disabled__}/nvidia/llama-3.2-nv-embedqa-1b-v2 - provider_id: ${env.ENABLE_NVIDIA:=__disabled__} - provider_model_id: nvidia/llama-3.2-nv-embedqa-1b-v2 - model_type: embedding -- metadata: - embedding_dimension: 1024 - context_length: 512 - model_id: ${env.ENABLE_NVIDIA:=__disabled__}/nvidia/nv-embedqa-e5-v5 - provider_id: ${env.ENABLE_NVIDIA:=__disabled__} - provider_model_id: nvidia/nv-embedqa-e5-v5 - model_type: embedding -- metadata: - embedding_dimension: 4096 - context_length: 512 - model_id: ${env.ENABLE_NVIDIA:=__disabled__}/nvidia/nv-embedqa-mistral-7b-v2 - provider_id: ${env.ENABLE_NVIDIA:=__disabled__} - provider_model_id: nvidia/nv-embedqa-mistral-7b-v2 - model_type: embedding -- metadata: - embedding_dimension: 1024 - context_length: 512 - model_id: ${env.ENABLE_NVIDIA:=__disabled__}/snowflake/arctic-embed-l - provider_id: ${env.ENABLE_NVIDIA:=__disabled__} - provider_model_id: snowflake/arctic-embed-l - model_type: embedding -- metadata: {} - model_id: ${env.ENABLE_RUNPOD:=__disabled__}/Llama3.1-8B - provider_id: ${env.ENABLE_RUNPOD:=__disabled__} - provider_model_id: Llama3.1-8B - model_type: llm -- metadata: {} - model_id: ${env.ENABLE_RUNPOD:=__disabled__}/Llama3.1-70B - provider_id: ${env.ENABLE_RUNPOD:=__disabled__} - provider_model_id: Llama3.1-70B - model_type: llm -- metadata: {} - model_id: ${env.ENABLE_RUNPOD:=__disabled__}/Llama3.1-405B:bf16-mp8 - provider_id: ${env.ENABLE_RUNPOD:=__disabled__} - provider_model_id: Llama3.1-405B:bf16-mp8 - model_type: llm -- metadata: {} - model_id: ${env.ENABLE_RUNPOD:=__disabled__}/Llama3.1-405B - provider_id: ${env.ENABLE_RUNPOD:=__disabled__} - provider_model_id: Llama3.1-405B - model_type: llm -- metadata: {} - model_id: ${env.ENABLE_RUNPOD:=__disabled__}/Llama3.1-405B:bf16-mp16 - provider_id: ${env.ENABLE_RUNPOD:=__disabled__} - provider_model_id: Llama3.1-405B:bf16-mp16 - model_type: llm -- metadata: {} - model_id: ${env.ENABLE_RUNPOD:=__disabled__}/Llama3.1-8B-Instruct - provider_id: ${env.ENABLE_RUNPOD:=__disabled__} - provider_model_id: Llama3.1-8B-Instruct - model_type: llm -- metadata: {} - model_id: ${env.ENABLE_RUNPOD:=__disabled__}/Llama3.1-70B-Instruct - provider_id: ${env.ENABLE_RUNPOD:=__disabled__} - provider_model_id: Llama3.1-70B-Instruct - model_type: llm -- metadata: {} - model_id: ${env.ENABLE_RUNPOD:=__disabled__}/Llama3.1-405B-Instruct:bf16-mp8 - provider_id: ${env.ENABLE_RUNPOD:=__disabled__} - provider_model_id: Llama3.1-405B-Instruct:bf16-mp8 - model_type: llm -- metadata: {} - model_id: ${env.ENABLE_RUNPOD:=__disabled__}/Llama3.1-405B-Instruct - provider_id: ${env.ENABLE_RUNPOD:=__disabled__} - provider_model_id: Llama3.1-405B-Instruct - model_type: llm -- metadata: {} - model_id: ${env.ENABLE_RUNPOD:=__disabled__}/Llama3.1-405B-Instruct:bf16-mp16 - provider_id: ${env.ENABLE_RUNPOD:=__disabled__} - provider_model_id: Llama3.1-405B-Instruct:bf16-mp16 - model_type: llm -- metadata: {} - model_id: ${env.ENABLE_RUNPOD:=__disabled__}/Llama3.2-1B - provider_id: ${env.ENABLE_RUNPOD:=__disabled__} - provider_model_id: Llama3.2-1B - model_type: llm -- metadata: {} - model_id: ${env.ENABLE_RUNPOD:=__disabled__}/Llama3.2-3B - provider_id: ${env.ENABLE_RUNPOD:=__disabled__} - provider_model_id: Llama3.2-3B - model_type: llm -- metadata: {} - model_id: ${env.ENABLE_OPENAI:=__disabled__}/gpt-3.5-turbo-0125 - provider_id: ${env.ENABLE_OPENAI:=__disabled__} - provider_model_id: gpt-3.5-turbo-0125 - model_type: llm -- metadata: {} - model_id: ${env.ENABLE_OPENAI:=__disabled__}/gpt-3.5-turbo - provider_id: ${env.ENABLE_OPENAI:=__disabled__} - provider_model_id: gpt-3.5-turbo - model_type: llm -- metadata: {} - model_id: ${env.ENABLE_OPENAI:=__disabled__}/gpt-3.5-turbo-instruct - provider_id: ${env.ENABLE_OPENAI:=__disabled__} - provider_model_id: gpt-3.5-turbo-instruct - model_type: llm -- metadata: {} - model_id: ${env.ENABLE_OPENAI:=__disabled__}/gpt-4 - provider_id: ${env.ENABLE_OPENAI:=__disabled__} - provider_model_id: gpt-4 - model_type: llm -- metadata: {} - model_id: ${env.ENABLE_OPENAI:=__disabled__}/gpt-4-turbo - provider_id: ${env.ENABLE_OPENAI:=__disabled__} - provider_model_id: gpt-4-turbo - model_type: llm -- metadata: {} - model_id: ${env.ENABLE_OPENAI:=__disabled__}/gpt-4o - provider_id: ${env.ENABLE_OPENAI:=__disabled__} - provider_model_id: gpt-4o - model_type: llm -- metadata: {} - model_id: ${env.ENABLE_OPENAI:=__disabled__}/gpt-4o-2024-08-06 - provider_id: ${env.ENABLE_OPENAI:=__disabled__} - provider_model_id: gpt-4o-2024-08-06 - model_type: llm -- metadata: {} - model_id: ${env.ENABLE_OPENAI:=__disabled__}/gpt-4o-mini - provider_id: ${env.ENABLE_OPENAI:=__disabled__} - provider_model_id: gpt-4o-mini - model_type: llm -- metadata: {} - model_id: ${env.ENABLE_OPENAI:=__disabled__}/gpt-4o-audio-preview - provider_id: ${env.ENABLE_OPENAI:=__disabled__} - provider_model_id: gpt-4o-audio-preview - model_type: llm -- metadata: {} - model_id: ${env.ENABLE_OPENAI:=__disabled__}/chatgpt-4o-latest - provider_id: ${env.ENABLE_OPENAI:=__disabled__} - provider_model_id: chatgpt-4o-latest - model_type: llm -- metadata: {} - model_id: ${env.ENABLE_OPENAI:=__disabled__}/o1 - provider_id: ${env.ENABLE_OPENAI:=__disabled__} - provider_model_id: o1 - model_type: llm -- metadata: {} - model_id: ${env.ENABLE_OPENAI:=__disabled__}/o1-mini - provider_id: ${env.ENABLE_OPENAI:=__disabled__} - provider_model_id: o1-mini - model_type: llm -- metadata: {} - model_id: ${env.ENABLE_OPENAI:=__disabled__}/o3-mini - provider_id: ${env.ENABLE_OPENAI:=__disabled__} - provider_model_id: o3-mini - model_type: llm -- metadata: {} - model_id: ${env.ENABLE_OPENAI:=__disabled__}/o4-mini - provider_id: ${env.ENABLE_OPENAI:=__disabled__} - provider_model_id: o4-mini - model_type: llm -- metadata: - embedding_dimension: 1536 - context_length: 8192 - model_id: ${env.ENABLE_OPENAI:=__disabled__}/text-embedding-3-small - provider_id: ${env.ENABLE_OPENAI:=__disabled__} - provider_model_id: text-embedding-3-small - model_type: embedding -- metadata: - embedding_dimension: 3072 - context_length: 8192 - model_id: ${env.ENABLE_OPENAI:=__disabled__}/text-embedding-3-large - provider_id: ${env.ENABLE_OPENAI:=__disabled__} - provider_model_id: text-embedding-3-large - model_type: embedding -- metadata: {} - model_id: ${env.ENABLE_ANTHROPIC:=__disabled__}/anthropic/claude-3-5-sonnet-latest - provider_id: ${env.ENABLE_ANTHROPIC:=__disabled__} - provider_model_id: anthropic/claude-3-5-sonnet-latest - model_type: llm -- metadata: {} - model_id: ${env.ENABLE_ANTHROPIC:=__disabled__}/anthropic/claude-3-7-sonnet-latest - provider_id: ${env.ENABLE_ANTHROPIC:=__disabled__} - provider_model_id: anthropic/claude-3-7-sonnet-latest - model_type: llm -- metadata: {} - model_id: ${env.ENABLE_ANTHROPIC:=__disabled__}/anthropic/claude-3-5-haiku-latest - provider_id: ${env.ENABLE_ANTHROPIC:=__disabled__} - provider_model_id: anthropic/claude-3-5-haiku-latest - model_type: llm -- metadata: - embedding_dimension: 1024 - context_length: 32000 - model_id: ${env.ENABLE_ANTHROPIC:=__disabled__}/anthropic/voyage-3 - provider_id: ${env.ENABLE_ANTHROPIC:=__disabled__} - provider_model_id: anthropic/voyage-3 - model_type: embedding -- metadata: - embedding_dimension: 512 - context_length: 32000 - model_id: ${env.ENABLE_ANTHROPIC:=__disabled__}/anthropic/voyage-3-lite - provider_id: ${env.ENABLE_ANTHROPIC:=__disabled__} - provider_model_id: anthropic/voyage-3-lite - model_type: embedding -- metadata: - embedding_dimension: 1024 - context_length: 32000 - model_id: ${env.ENABLE_ANTHROPIC:=__disabled__}/anthropic/voyage-code-3 - provider_id: ${env.ENABLE_ANTHROPIC:=__disabled__} - provider_model_id: anthropic/voyage-code-3 - model_type: embedding -- metadata: {} - model_id: ${env.ENABLE_GEMINI:=__disabled__}/gemini/gemini-1.5-flash - provider_id: ${env.ENABLE_GEMINI:=__disabled__} - provider_model_id: gemini/gemini-1.5-flash - model_type: llm -- metadata: {} - model_id: ${env.ENABLE_GEMINI:=__disabled__}/gemini/gemini-1.5-pro - provider_id: ${env.ENABLE_GEMINI:=__disabled__} - provider_model_id: gemini/gemini-1.5-pro - model_type: llm -- metadata: {} - model_id: ${env.ENABLE_GEMINI:=__disabled__}/gemini/gemini-2.0-flash - provider_id: ${env.ENABLE_GEMINI:=__disabled__} - provider_model_id: gemini/gemini-2.0-flash - model_type: llm -- metadata: {} - model_id: ${env.ENABLE_GEMINI:=__disabled__}/gemini/gemini-2.5-flash - provider_id: ${env.ENABLE_GEMINI:=__disabled__} - provider_model_id: gemini/gemini-2.5-flash - model_type: llm -- metadata: {} - model_id: ${env.ENABLE_GEMINI:=__disabled__}/gemini/gemini-2.5-pro - provider_id: ${env.ENABLE_GEMINI:=__disabled__} - provider_model_id: gemini/gemini-2.5-pro - model_type: llm -- metadata: - embedding_dimension: 768 - context_length: 2048 - model_id: ${env.ENABLE_GEMINI:=__disabled__}/gemini/text-embedding-004 - provider_id: ${env.ENABLE_GEMINI:=__disabled__} - provider_model_id: gemini/text-embedding-004 - model_type: embedding -- metadata: {} - model_id: ${env.ENABLE_GROQ:=__disabled__}/groq/llama3-8b-8192 - provider_id: ${env.ENABLE_GROQ:=__disabled__} - provider_model_id: groq/llama3-8b-8192 - model_type: llm -- metadata: {} - model_id: ${env.ENABLE_GROQ:=__disabled__}/meta-llama/Llama-3.1-8B-Instruct - provider_id: ${env.ENABLE_GROQ:=__disabled__} - provider_model_id: groq/llama3-8b-8192 - model_type: llm -- metadata: {} - model_id: ${env.ENABLE_GROQ:=__disabled__}/groq/llama-3.1-8b-instant - provider_id: ${env.ENABLE_GROQ:=__disabled__} - provider_model_id: groq/llama-3.1-8b-instant - model_type: llm -- metadata: {} - model_id: ${env.ENABLE_GROQ:=__disabled__}/groq/llama3-70b-8192 - provider_id: ${env.ENABLE_GROQ:=__disabled__} - provider_model_id: groq/llama3-70b-8192 - model_type: llm -- metadata: {} - model_id: ${env.ENABLE_GROQ:=__disabled__}/meta-llama/Llama-3-70B-Instruct - provider_id: ${env.ENABLE_GROQ:=__disabled__} - provider_model_id: groq/llama3-70b-8192 - model_type: llm -- metadata: {} - model_id: ${env.ENABLE_GROQ:=__disabled__}/groq/llama-3.3-70b-versatile - provider_id: ${env.ENABLE_GROQ:=__disabled__} - provider_model_id: groq/llama-3.3-70b-versatile - model_type: llm -- metadata: {} - model_id: ${env.ENABLE_GROQ:=__disabled__}/meta-llama/Llama-3.3-70B-Instruct - provider_id: ${env.ENABLE_GROQ:=__disabled__} - provider_model_id: groq/llama-3.3-70b-versatile - model_type: llm -- metadata: {} - model_id: ${env.ENABLE_GROQ:=__disabled__}/groq/llama-3.2-3b-preview - provider_id: ${env.ENABLE_GROQ:=__disabled__} - provider_model_id: groq/llama-3.2-3b-preview - model_type: llm -- metadata: {} - model_id: ${env.ENABLE_GROQ:=__disabled__}/meta-llama/Llama-3.2-3B-Instruct - provider_id: ${env.ENABLE_GROQ:=__disabled__} - provider_model_id: groq/llama-3.2-3b-preview - model_type: llm -- metadata: {} - model_id: ${env.ENABLE_GROQ:=__disabled__}/groq/llama-4-scout-17b-16e-instruct - provider_id: ${env.ENABLE_GROQ:=__disabled__} - provider_model_id: groq/llama-4-scout-17b-16e-instruct - model_type: llm -- metadata: {} - model_id: ${env.ENABLE_GROQ:=__disabled__}/meta-llama/Llama-4-Scout-17B-16E-Instruct - provider_id: ${env.ENABLE_GROQ:=__disabled__} - provider_model_id: groq/llama-4-scout-17b-16e-instruct - model_type: llm -- metadata: {} - model_id: ${env.ENABLE_GROQ:=__disabled__}/groq/meta-llama/llama-4-scout-17b-16e-instruct - provider_id: ${env.ENABLE_GROQ:=__disabled__} - provider_model_id: groq/meta-llama/llama-4-scout-17b-16e-instruct - model_type: llm -- metadata: {} - model_id: ${env.ENABLE_GROQ:=__disabled__}/meta-llama/Llama-4-Scout-17B-16E-Instruct - provider_id: ${env.ENABLE_GROQ:=__disabled__} - provider_model_id: groq/meta-llama/llama-4-scout-17b-16e-instruct - model_type: llm -- metadata: {} - model_id: ${env.ENABLE_GROQ:=__disabled__}/groq/llama-4-maverick-17b-128e-instruct - provider_id: ${env.ENABLE_GROQ:=__disabled__} - provider_model_id: groq/llama-4-maverick-17b-128e-instruct - model_type: llm -- metadata: {} - model_id: ${env.ENABLE_GROQ:=__disabled__}/meta-llama/Llama-4-Maverick-17B-128E-Instruct - provider_id: ${env.ENABLE_GROQ:=__disabled__} - provider_model_id: groq/llama-4-maverick-17b-128e-instruct - model_type: llm -- metadata: {} - model_id: ${env.ENABLE_GROQ:=__disabled__}/groq/meta-llama/llama-4-maverick-17b-128e-instruct - provider_id: ${env.ENABLE_GROQ:=__disabled__} - provider_model_id: groq/meta-llama/llama-4-maverick-17b-128e-instruct - model_type: llm -- metadata: {} - model_id: ${env.ENABLE_GROQ:=__disabled__}/meta-llama/Llama-4-Maverick-17B-128E-Instruct - provider_id: ${env.ENABLE_GROQ:=__disabled__} - provider_model_id: groq/meta-llama/llama-4-maverick-17b-128e-instruct - model_type: llm -- metadata: {} - model_id: ${env.ENABLE_SAMBANOVA:=__disabled__}/sambanova/Meta-Llama-3.1-8B-Instruct - provider_id: ${env.ENABLE_SAMBANOVA:=__disabled__} - provider_model_id: sambanova/Meta-Llama-3.1-8B-Instruct - model_type: llm -- metadata: {} - model_id: ${env.ENABLE_SAMBANOVA:=__disabled__}/meta-llama/Llama-3.1-8B-Instruct - provider_id: ${env.ENABLE_SAMBANOVA:=__disabled__} - provider_model_id: sambanova/Meta-Llama-3.1-8B-Instruct - model_type: llm -- metadata: {} - model_id: ${env.ENABLE_SAMBANOVA:=__disabled__}/sambanova/Meta-Llama-3.1-405B-Instruct - provider_id: ${env.ENABLE_SAMBANOVA:=__disabled__} - provider_model_id: sambanova/Meta-Llama-3.1-405B-Instruct - model_type: llm -- metadata: {} - model_id: ${env.ENABLE_SAMBANOVA:=__disabled__}/meta-llama/Llama-3.1-405B-Instruct-FP8 - provider_id: ${env.ENABLE_SAMBANOVA:=__disabled__} - provider_model_id: sambanova/Meta-Llama-3.1-405B-Instruct - model_type: llm -- metadata: {} - model_id: ${env.ENABLE_SAMBANOVA:=__disabled__}/sambanova/Meta-Llama-3.2-1B-Instruct - provider_id: ${env.ENABLE_SAMBANOVA:=__disabled__} - provider_model_id: sambanova/Meta-Llama-3.2-1B-Instruct - model_type: llm -- metadata: {} - model_id: ${env.ENABLE_SAMBANOVA:=__disabled__}/meta-llama/Llama-3.2-1B-Instruct - provider_id: ${env.ENABLE_SAMBANOVA:=__disabled__} - provider_model_id: sambanova/Meta-Llama-3.2-1B-Instruct - model_type: llm -- metadata: {} - model_id: ${env.ENABLE_SAMBANOVA:=__disabled__}/sambanova/Meta-Llama-3.2-3B-Instruct - provider_id: ${env.ENABLE_SAMBANOVA:=__disabled__} - provider_model_id: sambanova/Meta-Llama-3.2-3B-Instruct - model_type: llm -- metadata: {} - model_id: ${env.ENABLE_SAMBANOVA:=__disabled__}/meta-llama/Llama-3.2-3B-Instruct - provider_id: ${env.ENABLE_SAMBANOVA:=__disabled__} - provider_model_id: sambanova/Meta-Llama-3.2-3B-Instruct - model_type: llm -- metadata: {} - model_id: ${env.ENABLE_SAMBANOVA:=__disabled__}/sambanova/Meta-Llama-3.3-70B-Instruct - provider_id: ${env.ENABLE_SAMBANOVA:=__disabled__} - provider_model_id: sambanova/Meta-Llama-3.3-70B-Instruct - model_type: llm -- metadata: {} - model_id: ${env.ENABLE_SAMBANOVA:=__disabled__}/meta-llama/Llama-3.3-70B-Instruct - provider_id: ${env.ENABLE_SAMBANOVA:=__disabled__} - provider_model_id: sambanova/Meta-Llama-3.3-70B-Instruct - model_type: llm -- metadata: {} - model_id: ${env.ENABLE_SAMBANOVA:=__disabled__}/sambanova/Llama-3.2-11B-Vision-Instruct - provider_id: ${env.ENABLE_SAMBANOVA:=__disabled__} - provider_model_id: sambanova/Llama-3.2-11B-Vision-Instruct - model_type: llm -- metadata: {} - model_id: ${env.ENABLE_SAMBANOVA:=__disabled__}/meta-llama/Llama-3.2-11B-Vision-Instruct - provider_id: ${env.ENABLE_SAMBANOVA:=__disabled__} - provider_model_id: sambanova/Llama-3.2-11B-Vision-Instruct - model_type: llm -- metadata: {} - model_id: ${env.ENABLE_SAMBANOVA:=__disabled__}/sambanova/Llama-3.2-90B-Vision-Instruct - provider_id: ${env.ENABLE_SAMBANOVA:=__disabled__} - provider_model_id: sambanova/Llama-3.2-90B-Vision-Instruct - model_type: llm -- metadata: {} - model_id: ${env.ENABLE_SAMBANOVA:=__disabled__}/meta-llama/Llama-3.2-90B-Vision-Instruct - provider_id: ${env.ENABLE_SAMBANOVA:=__disabled__} - provider_model_id: sambanova/Llama-3.2-90B-Vision-Instruct - model_type: llm -- metadata: {} - model_id: ${env.ENABLE_SAMBANOVA:=__disabled__}/sambanova/Llama-4-Scout-17B-16E-Instruct - provider_id: ${env.ENABLE_SAMBANOVA:=__disabled__} - provider_model_id: sambanova/Llama-4-Scout-17B-16E-Instruct - model_type: llm -- metadata: {} - model_id: ${env.ENABLE_SAMBANOVA:=__disabled__}/meta-llama/Llama-4-Scout-17B-16E-Instruct - provider_id: ${env.ENABLE_SAMBANOVA:=__disabled__} - provider_model_id: sambanova/Llama-4-Scout-17B-16E-Instruct - model_type: llm -- metadata: {} - model_id: ${env.ENABLE_SAMBANOVA:=__disabled__}/sambanova/Llama-4-Maverick-17B-128E-Instruct - provider_id: ${env.ENABLE_SAMBANOVA:=__disabled__} - provider_model_id: sambanova/Llama-4-Maverick-17B-128E-Instruct - model_type: llm -- metadata: {} - model_id: ${env.ENABLE_SAMBANOVA:=__disabled__}/meta-llama/Llama-4-Maverick-17B-128E-Instruct - provider_id: ${env.ENABLE_SAMBANOVA:=__disabled__} - provider_model_id: sambanova/Llama-4-Maverick-17B-128E-Instruct - model_type: llm -- metadata: {} - model_id: ${env.ENABLE_SAMBANOVA:=__disabled__}/sambanova/Meta-Llama-Guard-3-8B - provider_id: ${env.ENABLE_SAMBANOVA:=__disabled__} - provider_model_id: sambanova/Meta-Llama-Guard-3-8B - model_type: llm -- metadata: {} - model_id: ${env.ENABLE_SAMBANOVA:=__disabled__}/meta-llama/Llama-Guard-3-8B - provider_id: ${env.ENABLE_SAMBANOVA:=__disabled__} - provider_model_id: sambanova/Meta-Llama-Guard-3-8B - model_type: llm +models: [] shields: -- shield_id: ${env.SAFETY_MODEL:=__disabled__} - provider_shield_id: ${env.ENABLE_OLLAMA:=__disabled__}/${env.SAFETY_MODEL:=__disabled__} +- shield_id: llama-guard + provider_id: ${env.SAFETY_MODEL:+inline::llama-guard} + provider_shield_id: ${env.SAFETY_MODEL:=} vector_dbs: [] datasets: [] scoring_fns: [] diff --git a/llama_stack/templates/nvidia/run.yaml b/llama_stack/templates/nvidia/run.yaml index f087e89ee..8e915f586 100644 --- a/llama_stack/templates/nvidia/run.yaml +++ b/llama_stack/templates/nvidia/run.yaml @@ -89,101 +89,51 @@ models: provider_id: nvidia provider_model_id: meta/llama3-8b-instruct model_type: llm -- metadata: {} - model_id: meta-llama/Llama-3-8B-Instruct - provider_id: nvidia - provider_model_id: meta/llama3-8b-instruct - model_type: llm - metadata: {} model_id: meta/llama3-70b-instruct provider_id: nvidia provider_model_id: meta/llama3-70b-instruct model_type: llm -- metadata: {} - model_id: meta-llama/Llama-3-70B-Instruct - provider_id: nvidia - provider_model_id: meta/llama3-70b-instruct - model_type: llm - metadata: {} model_id: meta/llama-3.1-8b-instruct provider_id: nvidia provider_model_id: meta/llama-3.1-8b-instruct model_type: llm -- metadata: {} - model_id: meta-llama/Llama-3.1-8B-Instruct - provider_id: nvidia - provider_model_id: meta/llama-3.1-8b-instruct - model_type: llm - metadata: {} model_id: meta/llama-3.1-70b-instruct provider_id: nvidia provider_model_id: meta/llama-3.1-70b-instruct model_type: llm -- metadata: {} - model_id: meta-llama/Llama-3.1-70B-Instruct - provider_id: nvidia - provider_model_id: meta/llama-3.1-70b-instruct - model_type: llm - metadata: {} model_id: meta/llama-3.1-405b-instruct provider_id: nvidia provider_model_id: meta/llama-3.1-405b-instruct model_type: llm -- metadata: {} - model_id: meta-llama/Llama-3.1-405B-Instruct-FP8 - provider_id: nvidia - provider_model_id: meta/llama-3.1-405b-instruct - model_type: llm - metadata: {} model_id: meta/llama-3.2-1b-instruct provider_id: nvidia provider_model_id: meta/llama-3.2-1b-instruct model_type: llm -- metadata: {} - model_id: meta-llama/Llama-3.2-1B-Instruct - provider_id: nvidia - provider_model_id: meta/llama-3.2-1b-instruct - model_type: llm - metadata: {} model_id: meta/llama-3.2-3b-instruct provider_id: nvidia provider_model_id: meta/llama-3.2-3b-instruct model_type: llm -- metadata: {} - model_id: meta-llama/Llama-3.2-3B-Instruct - provider_id: nvidia - provider_model_id: meta/llama-3.2-3b-instruct - model_type: llm - metadata: {} model_id: meta/llama-3.2-11b-vision-instruct provider_id: nvidia provider_model_id: meta/llama-3.2-11b-vision-instruct model_type: llm -- metadata: {} - model_id: meta-llama/Llama-3.2-11B-Vision-Instruct - provider_id: nvidia - provider_model_id: meta/llama-3.2-11b-vision-instruct - model_type: llm - metadata: {} model_id: meta/llama-3.2-90b-vision-instruct provider_id: nvidia provider_model_id: meta/llama-3.2-90b-vision-instruct model_type: llm -- metadata: {} - model_id: meta-llama/Llama-3.2-90B-Vision-Instruct - provider_id: nvidia - provider_model_id: meta/llama-3.2-90b-vision-instruct - model_type: llm - metadata: {} model_id: meta/llama-3.3-70b-instruct provider_id: nvidia provider_model_id: meta/llama-3.3-70b-instruct model_type: llm -- metadata: {} - model_id: meta-llama/Llama-3.3-70B-Instruct - provider_id: nvidia - provider_model_id: meta/llama-3.3-70b-instruct - model_type: llm - metadata: embedding_dimension: 2048 context_length: 8192 diff --git a/llama_stack/templates/open-benchmark/run.yaml b/llama_stack/templates/open-benchmark/run.yaml index ba6a5e9d6..4e635d80f 100644 --- a/llama_stack/templates/open-benchmark/run.yaml +++ b/llama_stack/templates/open-benchmark/run.yaml @@ -33,7 +33,7 @@ providers: provider_type: remote::together config: url: https://api.together.xyz/v1 - api_key: ${env.TOGETHER_API_KEY} + api_key: ${env.TOGETHER_API_KEY:=} vector_io: - provider_id: sqlite-vec provider_type: inline::sqlite-vec diff --git a/llama_stack/templates/starter/build.yaml b/llama_stack/templates/starter/build.yaml index 14e7e157b..1c67c433e 100644 --- a/llama_stack/templates/starter/build.yaml +++ b/llama_stack/templates/starter/build.yaml @@ -7,21 +7,15 @@ distribution_spec: - provider_type: remote::ollama - provider_type: remote::vllm - provider_type: remote::tgi - - provider_type: remote::hf::serverless - - provider_type: remote::hf::endpoint - provider_type: remote::fireworks - provider_type: remote::together - provider_type: remote::bedrock - - provider_type: remote::databricks - provider_type: remote::nvidia - - provider_type: remote::runpod - provider_type: remote::openai - provider_type: remote::anthropic - provider_type: remote::gemini - provider_type: remote::groq - - provider_type: remote::llama-openai-compat - provider_type: remote::sambanova - - provider_type: remote::passthrough - provider_type: inline::sentence-transformers vector_io: - provider_type: inline::faiss diff --git a/llama_stack/templates/starter/run.yaml b/llama_stack/templates/starter/run.yaml index d60800ebb..bc38387c9 100644 --- a/llama_stack/templates/starter/run.yaml +++ b/llama_stack/templates/starter/run.yaml @@ -14,127 +14,97 @@ apis: - vector_io providers: inference: - - provider_id: ${env.ENABLE_CEREBRAS:=__disabled__} + - provider_id: ${env.CEREBRAS_API_KEY:+cerebras} provider_type: remote::cerebras config: base_url: https://api.cerebras.ai - api_key: ${env.CEREBRAS_API_KEY} - - provider_id: ${env.ENABLE_OLLAMA:=__disabled__} + api_key: ${env.CEREBRAS_API_KEY:=} + - provider_id: ollama provider_type: remote::ollama config: url: ${env.OLLAMA_URL:=http://localhost:11434} - - provider_id: ${env.ENABLE_VLLM:=__disabled__} + - provider_id: ${env.VLLM_URL:+vllm} provider_type: remote::vllm config: url: ${env.VLLM_URL:=} max_tokens: ${env.VLLM_MAX_TOKENS:=4096} api_token: ${env.VLLM_API_TOKEN:=fake} tls_verify: ${env.VLLM_TLS_VERIFY:=true} - - provider_id: ${env.ENABLE_TGI:=__disabled__} + - provider_id: ${env.TGI_URL:+tgi} provider_type: remote::tgi config: - url: ${env.TGI_URL} - - provider_id: ${env.ENABLE_HF_SERVERLESS:=__disabled__} - provider_type: remote::hf::serverless - config: - huggingface_repo: ${env.INFERENCE_MODEL} - api_token: ${env.HF_API_TOKEN} - - provider_id: ${env.ENABLE_HF_ENDPOINT:=__disabled__} - provider_type: remote::hf::endpoint - config: - endpoint_name: ${env.INFERENCE_ENDPOINT_NAME} - api_token: ${env.HF_API_TOKEN} - - provider_id: ${env.ENABLE_FIREWORKS:=__disabled__} + url: ${env.TGI_URL:=} + - provider_id: fireworks provider_type: remote::fireworks config: url: https://api.fireworks.ai/inference/v1 - api_key: ${env.FIREWORKS_API_KEY} - - provider_id: ${env.ENABLE_TOGETHER:=__disabled__} + api_key: ${env.FIREWORKS_API_KEY:=} + - provider_id: together provider_type: remote::together config: url: https://api.together.xyz/v1 - api_key: ${env.TOGETHER_API_KEY} - - provider_id: ${env.ENABLE_BEDROCK:=__disabled__} + api_key: ${env.TOGETHER_API_KEY:=} + - provider_id: bedrock provider_type: remote::bedrock - - provider_id: ${env.ENABLE_DATABRICKS:=__disabled__} - provider_type: remote::databricks - config: - url: ${env.DATABRICKS_URL} - api_token: ${env.DATABRICKS_API_TOKEN} - - provider_id: ${env.ENABLE_NVIDIA:=__disabled__} + - provider_id: ${env.NVIDIA_API_KEY:+nvidia} provider_type: remote::nvidia config: url: ${env.NVIDIA_BASE_URL:=https://integrate.api.nvidia.com} api_key: ${env.NVIDIA_API_KEY:=} append_api_version: ${env.NVIDIA_APPEND_API_VERSION:=True} - - provider_id: ${env.ENABLE_RUNPOD:=__disabled__} - provider_type: remote::runpod - config: - url: ${env.RUNPOD_URL:=} - api_token: ${env.RUNPOD_API_TOKEN} - - provider_id: ${env.ENABLE_OPENAI:=__disabled__} + - provider_id: openai provider_type: remote::openai config: - api_key: ${env.OPENAI_API_KEY} - - provider_id: ${env.ENABLE_ANTHROPIC:=__disabled__} + api_key: ${env.OPENAI_API_KEY:=} + - provider_id: anthropic provider_type: remote::anthropic config: - api_key: ${env.ANTHROPIC_API_KEY} - - provider_id: ${env.ENABLE_GEMINI:=__disabled__} + api_key: ${env.ANTHROPIC_API_KEY:=} + - provider_id: gemini provider_type: remote::gemini config: - api_key: ${env.GEMINI_API_KEY} - - provider_id: ${env.ENABLE_GROQ:=__disabled__} + api_key: ${env.GEMINI_API_KEY:=} + - provider_id: groq provider_type: remote::groq config: url: https://api.groq.com - api_key: ${env.GROQ_API_KEY} - - provider_id: ${env.ENABLE_LLAMA_OPENAI_COMPAT:=__disabled__} - provider_type: remote::llama-openai-compat - config: - openai_compat_api_base: https://api.llama.com/compat/v1/ - api_key: ${env.LLAMA_API_KEY} - - provider_id: ${env.ENABLE_SAMBANOVA:=__disabled__} + api_key: ${env.GROQ_API_KEY:=} + - provider_id: sambanova provider_type: remote::sambanova config: url: https://api.sambanova.ai/v1 - api_key: ${env.SAMBANOVA_API_KEY} - - provider_id: ${env.ENABLE_PASSTHROUGH:=__disabled__} - provider_type: remote::passthrough - config: - url: ${env.PASSTHROUGH_URL} - api_key: ${env.PASSTHROUGH_API_KEY} - - provider_id: ${env.ENABLE_SENTENCE_TRANSFORMERS:=sentence-transformers} + api_key: ${env.SAMBANOVA_API_KEY:=} + - provider_id: sentence-transformers provider_type: inline::sentence-transformers vector_io: - - provider_id: ${env.ENABLE_FAISS:=faiss} + - provider_id: faiss provider_type: inline::faiss config: kvstore: type: sqlite db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/faiss_store.db - - provider_id: ${env.ENABLE_SQLITE_VEC:=__disabled__} + - provider_id: sqlite-vec provider_type: inline::sqlite-vec config: db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/sqlite_vec.db kvstore: type: sqlite db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/sqlite_vec_registry.db - - provider_id: ${env.ENABLE_MILVUS:=__disabled__} + - provider_id: ${env.MILVUS_URL:+milvus} provider_type: inline::milvus config: db_path: ${env.MILVUS_DB_PATH:=~/.llama/distributions/starter}/milvus.db kvstore: type: sqlite db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/milvus_registry.db - - provider_id: ${env.ENABLE_CHROMADB:=__disabled__} + - provider_id: ${env.CHROMADB_URL:+chromadb} provider_type: remote::chromadb config: url: ${env.CHROMADB_URL:=} kvstore: type: sqlite db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter/}/chroma_remote_registry.db - - provider_id: ${env.ENABLE_PGVECTOR:=__disabled__} + - provider_id: ${env.PGVECTOR_DB:+pgvector} provider_type: remote::pgvector config: host: ${env.PGVECTOR_HOST:=localhost} @@ -233,892 +203,11 @@ metadata_store: inference_store: type: sqlite db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/inference_store.db -models: -- metadata: - embedding_dimension: 384 - model_id: all-MiniLM-L6-v2 - provider_id: ${env.ENABLE_SENTENCE_TRANSFORMERS:=sentence-transformers} - model_type: embedding -- metadata: {} - model_id: ${env.ENABLE_CEREBRAS:=__disabled__}/llama3.1-8b - provider_id: ${env.ENABLE_CEREBRAS:=__disabled__} - provider_model_id: llama3.1-8b - model_type: llm -- metadata: {} - model_id: ${env.ENABLE_CEREBRAS:=__disabled__}/meta-llama/Llama-3.1-8B-Instruct - provider_id: ${env.ENABLE_CEREBRAS:=__disabled__} - provider_model_id: llama3.1-8b - model_type: llm -- metadata: {} - model_id: ${env.ENABLE_CEREBRAS:=__disabled__}/llama-3.3-70b - provider_id: ${env.ENABLE_CEREBRAS:=__disabled__} - provider_model_id: llama-3.3-70b - model_type: llm -- metadata: {} - model_id: ${env.ENABLE_CEREBRAS:=__disabled__}/meta-llama/Llama-3.3-70B-Instruct - provider_id: ${env.ENABLE_CEREBRAS:=__disabled__} - provider_model_id: llama-3.3-70b - model_type: llm -- metadata: {} - model_id: ${env.ENABLE_CEREBRAS:=__disabled__}/llama-4-scout-17b-16e-instruct - provider_id: ${env.ENABLE_CEREBRAS:=__disabled__} - provider_model_id: llama-4-scout-17b-16e-instruct - model_type: llm -- metadata: {} - model_id: ${env.ENABLE_CEREBRAS:=__disabled__}/meta-llama/Llama-4-Scout-17B-16E-Instruct - provider_id: ${env.ENABLE_CEREBRAS:=__disabled__} - provider_model_id: llama-4-scout-17b-16e-instruct - model_type: llm -- metadata: {} - model_id: ${env.ENABLE_OLLAMA:=__disabled__}/${env.OLLAMA_INFERENCE_MODEL:=__disabled__} - provider_id: ${env.ENABLE_OLLAMA:=__disabled__} - provider_model_id: ${env.OLLAMA_INFERENCE_MODEL:=__disabled__} - model_type: llm -- metadata: {} - model_id: ${env.ENABLE_OLLAMA:=__disabled__}/${env.SAFETY_MODEL:=__disabled__} - provider_id: ${env.ENABLE_OLLAMA:=__disabled__} - provider_model_id: ${env.SAFETY_MODEL:=__disabled__} - model_type: llm -- metadata: - embedding_dimension: ${env.OLLAMA_EMBEDDING_DIMENSION:=384} - model_id: ${env.ENABLE_OLLAMA:=__disabled__}/${env.OLLAMA_EMBEDDING_MODEL:=__disabled__} - provider_id: ${env.ENABLE_OLLAMA:=__disabled__} - provider_model_id: ${env.OLLAMA_EMBEDDING_MODEL:=__disabled__} - model_type: embedding -- metadata: {} - model_id: ${env.ENABLE_VLLM:=__disabled__}/${env.VLLM_INFERENCE_MODEL:=__disabled__} - provider_id: ${env.ENABLE_VLLM:=__disabled__} - provider_model_id: ${env.VLLM_INFERENCE_MODEL:=__disabled__} - model_type: llm -- metadata: {} - model_id: ${env.ENABLE_FIREWORKS:=__disabled__}/accounts/fireworks/models/llama-v3p1-8b-instruct - provider_id: ${env.ENABLE_FIREWORKS:=__disabled__} - provider_model_id: accounts/fireworks/models/llama-v3p1-8b-instruct - model_type: llm -- metadata: {} - model_id: ${env.ENABLE_FIREWORKS:=__disabled__}/meta-llama/Llama-3.1-8B-Instruct - provider_id: ${env.ENABLE_FIREWORKS:=__disabled__} - provider_model_id: accounts/fireworks/models/llama-v3p1-8b-instruct - model_type: llm -- metadata: {} - model_id: ${env.ENABLE_FIREWORKS:=__disabled__}/accounts/fireworks/models/llama-v3p1-70b-instruct - provider_id: ${env.ENABLE_FIREWORKS:=__disabled__} - provider_model_id: accounts/fireworks/models/llama-v3p1-70b-instruct - model_type: llm -- metadata: {} - model_id: ${env.ENABLE_FIREWORKS:=__disabled__}/meta-llama/Llama-3.1-70B-Instruct - provider_id: ${env.ENABLE_FIREWORKS:=__disabled__} - provider_model_id: accounts/fireworks/models/llama-v3p1-70b-instruct - model_type: llm -- metadata: {} - model_id: ${env.ENABLE_FIREWORKS:=__disabled__}/accounts/fireworks/models/llama-v3p1-405b-instruct - provider_id: ${env.ENABLE_FIREWORKS:=__disabled__} - provider_model_id: accounts/fireworks/models/llama-v3p1-405b-instruct - model_type: llm -- metadata: {} - model_id: ${env.ENABLE_FIREWORKS:=__disabled__}/meta-llama/Llama-3.1-405B-Instruct-FP8 - provider_id: ${env.ENABLE_FIREWORKS:=__disabled__} - provider_model_id: accounts/fireworks/models/llama-v3p1-405b-instruct - model_type: llm -- metadata: {} - model_id: ${env.ENABLE_FIREWORKS:=__disabled__}/accounts/fireworks/models/llama-v3p2-3b-instruct - provider_id: ${env.ENABLE_FIREWORKS:=__disabled__} - provider_model_id: accounts/fireworks/models/llama-v3p2-3b-instruct - model_type: llm -- metadata: {} - model_id: ${env.ENABLE_FIREWORKS:=__disabled__}/meta-llama/Llama-3.2-3B-Instruct - provider_id: ${env.ENABLE_FIREWORKS:=__disabled__} - provider_model_id: accounts/fireworks/models/llama-v3p2-3b-instruct - model_type: llm -- metadata: {} - model_id: ${env.ENABLE_FIREWORKS:=__disabled__}/accounts/fireworks/models/llama-v3p2-11b-vision-instruct - provider_id: ${env.ENABLE_FIREWORKS:=__disabled__} - provider_model_id: accounts/fireworks/models/llama-v3p2-11b-vision-instruct - model_type: llm -- metadata: {} - model_id: ${env.ENABLE_FIREWORKS:=__disabled__}/meta-llama/Llama-3.2-11B-Vision-Instruct - provider_id: ${env.ENABLE_FIREWORKS:=__disabled__} - provider_model_id: accounts/fireworks/models/llama-v3p2-11b-vision-instruct - model_type: llm -- metadata: {} - model_id: ${env.ENABLE_FIREWORKS:=__disabled__}/accounts/fireworks/models/llama-v3p2-90b-vision-instruct - provider_id: ${env.ENABLE_FIREWORKS:=__disabled__} - provider_model_id: accounts/fireworks/models/llama-v3p2-90b-vision-instruct - model_type: llm -- metadata: {} - model_id: ${env.ENABLE_FIREWORKS:=__disabled__}/meta-llama/Llama-3.2-90B-Vision-Instruct - provider_id: ${env.ENABLE_FIREWORKS:=__disabled__} - provider_model_id: accounts/fireworks/models/llama-v3p2-90b-vision-instruct - model_type: llm -- metadata: {} - model_id: ${env.ENABLE_FIREWORKS:=__disabled__}/accounts/fireworks/models/llama-v3p3-70b-instruct - provider_id: ${env.ENABLE_FIREWORKS:=__disabled__} - provider_model_id: accounts/fireworks/models/llama-v3p3-70b-instruct - model_type: llm -- metadata: {} - model_id: ${env.ENABLE_FIREWORKS:=__disabled__}/meta-llama/Llama-3.3-70B-Instruct - provider_id: ${env.ENABLE_FIREWORKS:=__disabled__} - provider_model_id: accounts/fireworks/models/llama-v3p3-70b-instruct - model_type: llm -- metadata: {} - model_id: ${env.ENABLE_FIREWORKS:=__disabled__}/accounts/fireworks/models/llama4-scout-instruct-basic - provider_id: ${env.ENABLE_FIREWORKS:=__disabled__} - provider_model_id: accounts/fireworks/models/llama4-scout-instruct-basic - model_type: llm -- metadata: {} - model_id: ${env.ENABLE_FIREWORKS:=__disabled__}/meta-llama/Llama-4-Scout-17B-16E-Instruct - provider_id: ${env.ENABLE_FIREWORKS:=__disabled__} - provider_model_id: accounts/fireworks/models/llama4-scout-instruct-basic - model_type: llm -- metadata: {} - model_id: ${env.ENABLE_FIREWORKS:=__disabled__}/accounts/fireworks/models/llama4-maverick-instruct-basic - provider_id: ${env.ENABLE_FIREWORKS:=__disabled__} - provider_model_id: accounts/fireworks/models/llama4-maverick-instruct-basic - model_type: llm -- metadata: {} - model_id: ${env.ENABLE_FIREWORKS:=__disabled__}/meta-llama/Llama-4-Maverick-17B-128E-Instruct - provider_id: ${env.ENABLE_FIREWORKS:=__disabled__} - provider_model_id: accounts/fireworks/models/llama4-maverick-instruct-basic - model_type: llm -- metadata: - embedding_dimension: 768 - context_length: 8192 - model_id: ${env.ENABLE_FIREWORKS:=__disabled__}/nomic-ai/nomic-embed-text-v1.5 - provider_id: ${env.ENABLE_FIREWORKS:=__disabled__} - provider_model_id: nomic-ai/nomic-embed-text-v1.5 - model_type: embedding -- metadata: {} - model_id: ${env.ENABLE_FIREWORKS:=__disabled__}/accounts/fireworks/models/llama-guard-3-8b - provider_id: ${env.ENABLE_FIREWORKS:=__disabled__} - provider_model_id: accounts/fireworks/models/llama-guard-3-8b - model_type: llm -- metadata: {} - model_id: ${env.ENABLE_FIREWORKS:=__disabled__}/meta-llama/Llama-Guard-3-8B - provider_id: ${env.ENABLE_FIREWORKS:=__disabled__} - provider_model_id: accounts/fireworks/models/llama-guard-3-8b - model_type: llm -- metadata: {} - model_id: ${env.ENABLE_FIREWORKS:=__disabled__}/accounts/fireworks/models/llama-guard-3-11b-vision - provider_id: ${env.ENABLE_FIREWORKS:=__disabled__} - provider_model_id: accounts/fireworks/models/llama-guard-3-11b-vision - model_type: llm -- metadata: {} - model_id: ${env.ENABLE_FIREWORKS:=__disabled__}/meta-llama/Llama-Guard-3-11B-Vision - provider_id: ${env.ENABLE_FIREWORKS:=__disabled__} - provider_model_id: accounts/fireworks/models/llama-guard-3-11b-vision - model_type: llm -- metadata: {} - model_id: ${env.ENABLE_TOGETHER:=__disabled__}/meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo - provider_id: ${env.ENABLE_TOGETHER:=__disabled__} - provider_model_id: meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo - model_type: llm -- metadata: {} - model_id: ${env.ENABLE_TOGETHER:=__disabled__}/meta-llama/Llama-3.1-8B-Instruct - provider_id: ${env.ENABLE_TOGETHER:=__disabled__} - provider_model_id: meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo - model_type: llm -- metadata: {} - model_id: ${env.ENABLE_TOGETHER:=__disabled__}/meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo - provider_id: ${env.ENABLE_TOGETHER:=__disabled__} - provider_model_id: meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo - model_type: llm -- metadata: {} - model_id: ${env.ENABLE_TOGETHER:=__disabled__}/meta-llama/Llama-3.1-70B-Instruct - provider_id: ${env.ENABLE_TOGETHER:=__disabled__} - provider_model_id: meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo - model_type: llm -- metadata: {} - model_id: ${env.ENABLE_TOGETHER:=__disabled__}/meta-llama/Meta-Llama-3.1-405B-Instruct-Turbo - provider_id: ${env.ENABLE_TOGETHER:=__disabled__} - provider_model_id: meta-llama/Meta-Llama-3.1-405B-Instruct-Turbo - model_type: llm -- metadata: {} - model_id: ${env.ENABLE_TOGETHER:=__disabled__}/meta-llama/Llama-3.1-405B-Instruct-FP8 - provider_id: ${env.ENABLE_TOGETHER:=__disabled__} - provider_model_id: meta-llama/Meta-Llama-3.1-405B-Instruct-Turbo - model_type: llm -- metadata: {} - model_id: ${env.ENABLE_TOGETHER:=__disabled__}/meta-llama/Llama-3.2-3B-Instruct-Turbo - provider_id: ${env.ENABLE_TOGETHER:=__disabled__} - provider_model_id: meta-llama/Llama-3.2-3B-Instruct-Turbo - model_type: llm -- metadata: {} - model_id: ${env.ENABLE_TOGETHER:=__disabled__}/meta-llama/Llama-3.2-3B-Instruct - provider_id: ${env.ENABLE_TOGETHER:=__disabled__} - provider_model_id: meta-llama/Llama-3.2-3B-Instruct-Turbo - model_type: llm -- metadata: {} - model_id: ${env.ENABLE_TOGETHER:=__disabled__}/meta-llama/Llama-3.2-11B-Vision-Instruct-Turbo - provider_id: ${env.ENABLE_TOGETHER:=__disabled__} - provider_model_id: meta-llama/Llama-3.2-11B-Vision-Instruct-Turbo - model_type: llm -- metadata: {} - model_id: ${env.ENABLE_TOGETHER:=__disabled__}/meta-llama/Llama-3.2-11B-Vision-Instruct - provider_id: ${env.ENABLE_TOGETHER:=__disabled__} - provider_model_id: meta-llama/Llama-3.2-11B-Vision-Instruct-Turbo - model_type: llm -- metadata: {} - model_id: ${env.ENABLE_TOGETHER:=__disabled__}/meta-llama/Llama-3.2-90B-Vision-Instruct-Turbo - provider_id: ${env.ENABLE_TOGETHER:=__disabled__} - provider_model_id: meta-llama/Llama-3.2-90B-Vision-Instruct-Turbo - model_type: llm -- metadata: {} - model_id: ${env.ENABLE_TOGETHER:=__disabled__}/meta-llama/Llama-3.2-90B-Vision-Instruct - provider_id: ${env.ENABLE_TOGETHER:=__disabled__} - provider_model_id: meta-llama/Llama-3.2-90B-Vision-Instruct-Turbo - model_type: llm -- metadata: {} - model_id: ${env.ENABLE_TOGETHER:=__disabled__}/meta-llama/Llama-3.3-70B-Instruct-Turbo - provider_id: ${env.ENABLE_TOGETHER:=__disabled__} - provider_model_id: meta-llama/Llama-3.3-70B-Instruct-Turbo - model_type: llm -- metadata: {} - model_id: ${env.ENABLE_TOGETHER:=__disabled__}/meta-llama/Llama-3.3-70B-Instruct - provider_id: ${env.ENABLE_TOGETHER:=__disabled__} - provider_model_id: meta-llama/Llama-3.3-70B-Instruct-Turbo - model_type: llm -- metadata: - embedding_dimension: 768 - context_length: 8192 - model_id: ${env.ENABLE_TOGETHER:=__disabled__}/togethercomputer/m2-bert-80M-8k-retrieval - provider_id: ${env.ENABLE_TOGETHER:=__disabled__} - provider_model_id: togethercomputer/m2-bert-80M-8k-retrieval - model_type: embedding -- metadata: - embedding_dimension: 768 - context_length: 32768 - model_id: ${env.ENABLE_TOGETHER:=__disabled__}/togethercomputer/m2-bert-80M-32k-retrieval - provider_id: ${env.ENABLE_TOGETHER:=__disabled__} - provider_model_id: togethercomputer/m2-bert-80M-32k-retrieval - model_type: embedding -- metadata: {} - model_id: ${env.ENABLE_TOGETHER:=__disabled__}/meta-llama/Llama-4-Scout-17B-16E-Instruct - provider_id: ${env.ENABLE_TOGETHER:=__disabled__} - provider_model_id: meta-llama/Llama-4-Scout-17B-16E-Instruct - model_type: llm -- metadata: {} - model_id: ${env.ENABLE_TOGETHER:=__disabled__}/meta-llama/Llama-4-Scout-17B-16E-Instruct - provider_id: ${env.ENABLE_TOGETHER:=__disabled__} - provider_model_id: meta-llama/Llama-4-Scout-17B-16E-Instruct - model_type: llm -- metadata: {} - model_id: ${env.ENABLE_TOGETHER:=__disabled__}/together/meta-llama/Llama-4-Scout-17B-16E-Instruct - provider_id: ${env.ENABLE_TOGETHER:=__disabled__} - provider_model_id: meta-llama/Llama-4-Scout-17B-16E-Instruct - model_type: llm -- metadata: {} - model_id: ${env.ENABLE_TOGETHER:=__disabled__}/meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8 - provider_id: ${env.ENABLE_TOGETHER:=__disabled__} - provider_model_id: meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8 - model_type: llm -- metadata: {} - model_id: ${env.ENABLE_TOGETHER:=__disabled__}/meta-llama/Llama-4-Maverick-17B-128E-Instruct - provider_id: ${env.ENABLE_TOGETHER:=__disabled__} - provider_model_id: meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8 - model_type: llm -- metadata: {} - model_id: ${env.ENABLE_TOGETHER:=__disabled__}/together/meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8 - provider_id: ${env.ENABLE_TOGETHER:=__disabled__} - provider_model_id: meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8 - model_type: llm -- metadata: {} - model_id: ${env.ENABLE_TOGETHER:=__disabled__}/meta-llama/Llama-Guard-3-8B - provider_id: ${env.ENABLE_TOGETHER:=__disabled__} - provider_model_id: meta-llama/Llama-Guard-3-8B - model_type: llm -- metadata: {} - model_id: ${env.ENABLE_TOGETHER:=__disabled__}/meta-llama/Llama-Guard-3-8B - provider_id: ${env.ENABLE_TOGETHER:=__disabled__} - provider_model_id: meta-llama/Llama-Guard-3-8B - model_type: llm -- metadata: {} - model_id: ${env.ENABLE_TOGETHER:=__disabled__}/meta-llama/Llama-Guard-3-11B-Vision-Turbo - provider_id: ${env.ENABLE_TOGETHER:=__disabled__} - provider_model_id: meta-llama/Llama-Guard-3-11B-Vision-Turbo - model_type: llm -- metadata: {} - model_id: ${env.ENABLE_TOGETHER:=__disabled__}/meta-llama/Llama-Guard-3-11B-Vision - provider_id: ${env.ENABLE_TOGETHER:=__disabled__} - provider_model_id: meta-llama/Llama-Guard-3-11B-Vision-Turbo - model_type: llm -- metadata: {} - model_id: ${env.ENABLE_BEDROCK:=__disabled__}/meta.llama3-1-8b-instruct-v1:0 - provider_id: ${env.ENABLE_BEDROCK:=__disabled__} - provider_model_id: meta.llama3-1-8b-instruct-v1:0 - model_type: llm -- metadata: {} - model_id: ${env.ENABLE_BEDROCK:=__disabled__}/meta-llama/Llama-3.1-8B-Instruct - provider_id: ${env.ENABLE_BEDROCK:=__disabled__} - provider_model_id: meta.llama3-1-8b-instruct-v1:0 - model_type: llm -- metadata: {} - model_id: ${env.ENABLE_BEDROCK:=__disabled__}/meta.llama3-1-70b-instruct-v1:0 - provider_id: ${env.ENABLE_BEDROCK:=__disabled__} - provider_model_id: meta.llama3-1-70b-instruct-v1:0 - model_type: llm -- metadata: {} - model_id: ${env.ENABLE_BEDROCK:=__disabled__}/meta-llama/Llama-3.1-70B-Instruct - provider_id: ${env.ENABLE_BEDROCK:=__disabled__} - provider_model_id: meta.llama3-1-70b-instruct-v1:0 - model_type: llm -- metadata: {} - model_id: ${env.ENABLE_BEDROCK:=__disabled__}/meta.llama3-1-405b-instruct-v1:0 - provider_id: ${env.ENABLE_BEDROCK:=__disabled__} - provider_model_id: meta.llama3-1-405b-instruct-v1:0 - model_type: llm -- metadata: {} - model_id: ${env.ENABLE_BEDROCK:=__disabled__}/meta-llama/Llama-3.1-405B-Instruct-FP8 - provider_id: ${env.ENABLE_BEDROCK:=__disabled__} - provider_model_id: meta.llama3-1-405b-instruct-v1:0 - model_type: llm -- metadata: {} - model_id: ${env.ENABLE_DATABRICKS:=__disabled__}/databricks-meta-llama-3-1-70b-instruct - provider_id: ${env.ENABLE_DATABRICKS:=__disabled__} - provider_model_id: databricks-meta-llama-3-1-70b-instruct - model_type: llm -- metadata: {} - model_id: ${env.ENABLE_DATABRICKS:=__disabled__}/meta-llama/Llama-3.1-70B-Instruct - provider_id: ${env.ENABLE_DATABRICKS:=__disabled__} - provider_model_id: databricks-meta-llama-3-1-70b-instruct - model_type: llm -- metadata: {} - model_id: ${env.ENABLE_DATABRICKS:=__disabled__}/databricks-meta-llama-3-1-405b-instruct - provider_id: ${env.ENABLE_DATABRICKS:=__disabled__} - provider_model_id: databricks-meta-llama-3-1-405b-instruct - model_type: llm -- metadata: {} - model_id: ${env.ENABLE_DATABRICKS:=__disabled__}/meta-llama/Llama-3.1-405B-Instruct-FP8 - provider_id: ${env.ENABLE_DATABRICKS:=__disabled__} - provider_model_id: databricks-meta-llama-3-1-405b-instruct - model_type: llm -- metadata: {} - model_id: ${env.ENABLE_NVIDIA:=__disabled__}/meta/llama3-8b-instruct - provider_id: ${env.ENABLE_NVIDIA:=__disabled__} - provider_model_id: meta/llama3-8b-instruct - model_type: llm -- metadata: {} - model_id: ${env.ENABLE_NVIDIA:=__disabled__}/meta-llama/Llama-3-8B-Instruct - provider_id: ${env.ENABLE_NVIDIA:=__disabled__} - provider_model_id: meta/llama3-8b-instruct - model_type: llm -- metadata: {} - model_id: ${env.ENABLE_NVIDIA:=__disabled__}/meta/llama3-70b-instruct - provider_id: ${env.ENABLE_NVIDIA:=__disabled__} - provider_model_id: meta/llama3-70b-instruct - model_type: llm -- metadata: {} - model_id: ${env.ENABLE_NVIDIA:=__disabled__}/meta-llama/Llama-3-70B-Instruct - provider_id: ${env.ENABLE_NVIDIA:=__disabled__} - provider_model_id: meta/llama3-70b-instruct - model_type: llm -- metadata: {} - model_id: ${env.ENABLE_NVIDIA:=__disabled__}/meta/llama-3.1-8b-instruct - provider_id: ${env.ENABLE_NVIDIA:=__disabled__} - provider_model_id: meta/llama-3.1-8b-instruct - model_type: llm -- metadata: {} - model_id: ${env.ENABLE_NVIDIA:=__disabled__}/meta-llama/Llama-3.1-8B-Instruct - provider_id: ${env.ENABLE_NVIDIA:=__disabled__} - provider_model_id: meta/llama-3.1-8b-instruct - model_type: llm -- metadata: {} - model_id: ${env.ENABLE_NVIDIA:=__disabled__}/meta/llama-3.1-70b-instruct - provider_id: ${env.ENABLE_NVIDIA:=__disabled__} - provider_model_id: meta/llama-3.1-70b-instruct - model_type: llm -- metadata: {} - model_id: ${env.ENABLE_NVIDIA:=__disabled__}/meta-llama/Llama-3.1-70B-Instruct - provider_id: ${env.ENABLE_NVIDIA:=__disabled__} - provider_model_id: meta/llama-3.1-70b-instruct - model_type: llm -- metadata: {} - model_id: ${env.ENABLE_NVIDIA:=__disabled__}/meta/llama-3.1-405b-instruct - provider_id: ${env.ENABLE_NVIDIA:=__disabled__} - provider_model_id: meta/llama-3.1-405b-instruct - model_type: llm -- metadata: {} - model_id: ${env.ENABLE_NVIDIA:=__disabled__}/meta-llama/Llama-3.1-405B-Instruct-FP8 - provider_id: ${env.ENABLE_NVIDIA:=__disabled__} - provider_model_id: meta/llama-3.1-405b-instruct - model_type: llm -- metadata: {} - model_id: ${env.ENABLE_NVIDIA:=__disabled__}/meta/llama-3.2-1b-instruct - provider_id: ${env.ENABLE_NVIDIA:=__disabled__} - provider_model_id: meta/llama-3.2-1b-instruct - model_type: llm -- metadata: {} - model_id: ${env.ENABLE_NVIDIA:=__disabled__}/meta-llama/Llama-3.2-1B-Instruct - provider_id: ${env.ENABLE_NVIDIA:=__disabled__} - provider_model_id: meta/llama-3.2-1b-instruct - model_type: llm -- metadata: {} - model_id: ${env.ENABLE_NVIDIA:=__disabled__}/meta/llama-3.2-3b-instruct - provider_id: ${env.ENABLE_NVIDIA:=__disabled__} - provider_model_id: meta/llama-3.2-3b-instruct - model_type: llm -- metadata: {} - model_id: ${env.ENABLE_NVIDIA:=__disabled__}/meta-llama/Llama-3.2-3B-Instruct - provider_id: ${env.ENABLE_NVIDIA:=__disabled__} - provider_model_id: meta/llama-3.2-3b-instruct - model_type: llm -- metadata: {} - model_id: ${env.ENABLE_NVIDIA:=__disabled__}/meta/llama-3.2-11b-vision-instruct - provider_id: ${env.ENABLE_NVIDIA:=__disabled__} - provider_model_id: meta/llama-3.2-11b-vision-instruct - model_type: llm -- metadata: {} - model_id: ${env.ENABLE_NVIDIA:=__disabled__}/meta-llama/Llama-3.2-11B-Vision-Instruct - provider_id: ${env.ENABLE_NVIDIA:=__disabled__} - provider_model_id: meta/llama-3.2-11b-vision-instruct - model_type: llm -- metadata: {} - model_id: ${env.ENABLE_NVIDIA:=__disabled__}/meta/llama-3.2-90b-vision-instruct - provider_id: ${env.ENABLE_NVIDIA:=__disabled__} - provider_model_id: meta/llama-3.2-90b-vision-instruct - model_type: llm -- metadata: {} - model_id: ${env.ENABLE_NVIDIA:=__disabled__}/meta-llama/Llama-3.2-90B-Vision-Instruct - provider_id: ${env.ENABLE_NVIDIA:=__disabled__} - provider_model_id: meta/llama-3.2-90b-vision-instruct - model_type: llm -- metadata: {} - model_id: ${env.ENABLE_NVIDIA:=__disabled__}/meta/llama-3.3-70b-instruct - provider_id: ${env.ENABLE_NVIDIA:=__disabled__} - provider_model_id: meta/llama-3.3-70b-instruct - model_type: llm -- metadata: {} - model_id: ${env.ENABLE_NVIDIA:=__disabled__}/meta-llama/Llama-3.3-70B-Instruct - provider_id: ${env.ENABLE_NVIDIA:=__disabled__} - provider_model_id: meta/llama-3.3-70b-instruct - model_type: llm -- metadata: - embedding_dimension: 2048 - context_length: 8192 - model_id: ${env.ENABLE_NVIDIA:=__disabled__}/nvidia/llama-3.2-nv-embedqa-1b-v2 - provider_id: ${env.ENABLE_NVIDIA:=__disabled__} - provider_model_id: nvidia/llama-3.2-nv-embedqa-1b-v2 - model_type: embedding -- metadata: - embedding_dimension: 1024 - context_length: 512 - model_id: ${env.ENABLE_NVIDIA:=__disabled__}/nvidia/nv-embedqa-e5-v5 - provider_id: ${env.ENABLE_NVIDIA:=__disabled__} - provider_model_id: nvidia/nv-embedqa-e5-v5 - model_type: embedding -- metadata: - embedding_dimension: 4096 - context_length: 512 - model_id: ${env.ENABLE_NVIDIA:=__disabled__}/nvidia/nv-embedqa-mistral-7b-v2 - provider_id: ${env.ENABLE_NVIDIA:=__disabled__} - provider_model_id: nvidia/nv-embedqa-mistral-7b-v2 - model_type: embedding -- metadata: - embedding_dimension: 1024 - context_length: 512 - model_id: ${env.ENABLE_NVIDIA:=__disabled__}/snowflake/arctic-embed-l - provider_id: ${env.ENABLE_NVIDIA:=__disabled__} - provider_model_id: snowflake/arctic-embed-l - model_type: embedding -- metadata: {} - model_id: ${env.ENABLE_RUNPOD:=__disabled__}/Llama3.1-8B - provider_id: ${env.ENABLE_RUNPOD:=__disabled__} - provider_model_id: Llama3.1-8B - model_type: llm -- metadata: {} - model_id: ${env.ENABLE_RUNPOD:=__disabled__}/Llama3.1-70B - provider_id: ${env.ENABLE_RUNPOD:=__disabled__} - provider_model_id: Llama3.1-70B - model_type: llm -- metadata: {} - model_id: ${env.ENABLE_RUNPOD:=__disabled__}/Llama3.1-405B:bf16-mp8 - provider_id: ${env.ENABLE_RUNPOD:=__disabled__} - provider_model_id: Llama3.1-405B:bf16-mp8 - model_type: llm -- metadata: {} - model_id: ${env.ENABLE_RUNPOD:=__disabled__}/Llama3.1-405B - provider_id: ${env.ENABLE_RUNPOD:=__disabled__} - provider_model_id: Llama3.1-405B - model_type: llm -- metadata: {} - model_id: ${env.ENABLE_RUNPOD:=__disabled__}/Llama3.1-405B:bf16-mp16 - provider_id: ${env.ENABLE_RUNPOD:=__disabled__} - provider_model_id: Llama3.1-405B:bf16-mp16 - model_type: llm -- metadata: {} - model_id: ${env.ENABLE_RUNPOD:=__disabled__}/Llama3.1-8B-Instruct - provider_id: ${env.ENABLE_RUNPOD:=__disabled__} - provider_model_id: Llama3.1-8B-Instruct - model_type: llm -- metadata: {} - model_id: ${env.ENABLE_RUNPOD:=__disabled__}/Llama3.1-70B-Instruct - provider_id: ${env.ENABLE_RUNPOD:=__disabled__} - provider_model_id: Llama3.1-70B-Instruct - model_type: llm -- metadata: {} - model_id: ${env.ENABLE_RUNPOD:=__disabled__}/Llama3.1-405B-Instruct:bf16-mp8 - provider_id: ${env.ENABLE_RUNPOD:=__disabled__} - provider_model_id: Llama3.1-405B-Instruct:bf16-mp8 - model_type: llm -- metadata: {} - model_id: ${env.ENABLE_RUNPOD:=__disabled__}/Llama3.1-405B-Instruct - provider_id: ${env.ENABLE_RUNPOD:=__disabled__} - provider_model_id: Llama3.1-405B-Instruct - model_type: llm -- metadata: {} - model_id: ${env.ENABLE_RUNPOD:=__disabled__}/Llama3.1-405B-Instruct:bf16-mp16 - provider_id: ${env.ENABLE_RUNPOD:=__disabled__} - provider_model_id: Llama3.1-405B-Instruct:bf16-mp16 - model_type: llm -- metadata: {} - model_id: ${env.ENABLE_RUNPOD:=__disabled__}/Llama3.2-1B - provider_id: ${env.ENABLE_RUNPOD:=__disabled__} - provider_model_id: Llama3.2-1B - model_type: llm -- metadata: {} - model_id: ${env.ENABLE_RUNPOD:=__disabled__}/Llama3.2-3B - provider_id: ${env.ENABLE_RUNPOD:=__disabled__} - provider_model_id: Llama3.2-3B - model_type: llm -- metadata: {} - model_id: ${env.ENABLE_OPENAI:=__disabled__}/gpt-3.5-turbo-0125 - provider_id: ${env.ENABLE_OPENAI:=__disabled__} - provider_model_id: gpt-3.5-turbo-0125 - model_type: llm -- metadata: {} - model_id: ${env.ENABLE_OPENAI:=__disabled__}/gpt-3.5-turbo - provider_id: ${env.ENABLE_OPENAI:=__disabled__} - provider_model_id: gpt-3.5-turbo - model_type: llm -- metadata: {} - model_id: ${env.ENABLE_OPENAI:=__disabled__}/gpt-3.5-turbo-instruct - provider_id: ${env.ENABLE_OPENAI:=__disabled__} - provider_model_id: gpt-3.5-turbo-instruct - model_type: llm -- metadata: {} - model_id: ${env.ENABLE_OPENAI:=__disabled__}/gpt-4 - provider_id: ${env.ENABLE_OPENAI:=__disabled__} - provider_model_id: gpt-4 - model_type: llm -- metadata: {} - model_id: ${env.ENABLE_OPENAI:=__disabled__}/gpt-4-turbo - provider_id: ${env.ENABLE_OPENAI:=__disabled__} - provider_model_id: gpt-4-turbo - model_type: llm -- metadata: {} - model_id: ${env.ENABLE_OPENAI:=__disabled__}/gpt-4o - provider_id: ${env.ENABLE_OPENAI:=__disabled__} - provider_model_id: gpt-4o - model_type: llm -- metadata: {} - model_id: ${env.ENABLE_OPENAI:=__disabled__}/gpt-4o-2024-08-06 - provider_id: ${env.ENABLE_OPENAI:=__disabled__} - provider_model_id: gpt-4o-2024-08-06 - model_type: llm -- metadata: {} - model_id: ${env.ENABLE_OPENAI:=__disabled__}/gpt-4o-mini - provider_id: ${env.ENABLE_OPENAI:=__disabled__} - provider_model_id: gpt-4o-mini - model_type: llm -- metadata: {} - model_id: ${env.ENABLE_OPENAI:=__disabled__}/gpt-4o-audio-preview - provider_id: ${env.ENABLE_OPENAI:=__disabled__} - provider_model_id: gpt-4o-audio-preview - model_type: llm -- metadata: {} - model_id: ${env.ENABLE_OPENAI:=__disabled__}/chatgpt-4o-latest - provider_id: ${env.ENABLE_OPENAI:=__disabled__} - provider_model_id: chatgpt-4o-latest - model_type: llm -- metadata: {} - model_id: ${env.ENABLE_OPENAI:=__disabled__}/o1 - provider_id: ${env.ENABLE_OPENAI:=__disabled__} - provider_model_id: o1 - model_type: llm -- metadata: {} - model_id: ${env.ENABLE_OPENAI:=__disabled__}/o1-mini - provider_id: ${env.ENABLE_OPENAI:=__disabled__} - provider_model_id: o1-mini - model_type: llm -- metadata: {} - model_id: ${env.ENABLE_OPENAI:=__disabled__}/o3-mini - provider_id: ${env.ENABLE_OPENAI:=__disabled__} - provider_model_id: o3-mini - model_type: llm -- metadata: {} - model_id: ${env.ENABLE_OPENAI:=__disabled__}/o4-mini - provider_id: ${env.ENABLE_OPENAI:=__disabled__} - provider_model_id: o4-mini - model_type: llm -- metadata: - embedding_dimension: 1536 - context_length: 8192 - model_id: ${env.ENABLE_OPENAI:=__disabled__}/text-embedding-3-small - provider_id: ${env.ENABLE_OPENAI:=__disabled__} - provider_model_id: text-embedding-3-small - model_type: embedding -- metadata: - embedding_dimension: 3072 - context_length: 8192 - model_id: ${env.ENABLE_OPENAI:=__disabled__}/text-embedding-3-large - provider_id: ${env.ENABLE_OPENAI:=__disabled__} - provider_model_id: text-embedding-3-large - model_type: embedding -- metadata: {} - model_id: ${env.ENABLE_ANTHROPIC:=__disabled__}/anthropic/claude-3-5-sonnet-latest - provider_id: ${env.ENABLE_ANTHROPIC:=__disabled__} - provider_model_id: anthropic/claude-3-5-sonnet-latest - model_type: llm -- metadata: {} - model_id: ${env.ENABLE_ANTHROPIC:=__disabled__}/anthropic/claude-3-7-sonnet-latest - provider_id: ${env.ENABLE_ANTHROPIC:=__disabled__} - provider_model_id: anthropic/claude-3-7-sonnet-latest - model_type: llm -- metadata: {} - model_id: ${env.ENABLE_ANTHROPIC:=__disabled__}/anthropic/claude-3-5-haiku-latest - provider_id: ${env.ENABLE_ANTHROPIC:=__disabled__} - provider_model_id: anthropic/claude-3-5-haiku-latest - model_type: llm -- metadata: - embedding_dimension: 1024 - context_length: 32000 - model_id: ${env.ENABLE_ANTHROPIC:=__disabled__}/anthropic/voyage-3 - provider_id: ${env.ENABLE_ANTHROPIC:=__disabled__} - provider_model_id: anthropic/voyage-3 - model_type: embedding -- metadata: - embedding_dimension: 512 - context_length: 32000 - model_id: ${env.ENABLE_ANTHROPIC:=__disabled__}/anthropic/voyage-3-lite - provider_id: ${env.ENABLE_ANTHROPIC:=__disabled__} - provider_model_id: anthropic/voyage-3-lite - model_type: embedding -- metadata: - embedding_dimension: 1024 - context_length: 32000 - model_id: ${env.ENABLE_ANTHROPIC:=__disabled__}/anthropic/voyage-code-3 - provider_id: ${env.ENABLE_ANTHROPIC:=__disabled__} - provider_model_id: anthropic/voyage-code-3 - model_type: embedding -- metadata: {} - model_id: ${env.ENABLE_GEMINI:=__disabled__}/gemini/gemini-1.5-flash - provider_id: ${env.ENABLE_GEMINI:=__disabled__} - provider_model_id: gemini/gemini-1.5-flash - model_type: llm -- metadata: {} - model_id: ${env.ENABLE_GEMINI:=__disabled__}/gemini/gemini-1.5-pro - provider_id: ${env.ENABLE_GEMINI:=__disabled__} - provider_model_id: gemini/gemini-1.5-pro - model_type: llm -- metadata: {} - model_id: ${env.ENABLE_GEMINI:=__disabled__}/gemini/gemini-2.0-flash - provider_id: ${env.ENABLE_GEMINI:=__disabled__} - provider_model_id: gemini/gemini-2.0-flash - model_type: llm -- metadata: {} - model_id: ${env.ENABLE_GEMINI:=__disabled__}/gemini/gemini-2.5-flash - provider_id: ${env.ENABLE_GEMINI:=__disabled__} - provider_model_id: gemini/gemini-2.5-flash - model_type: llm -- metadata: {} - model_id: ${env.ENABLE_GEMINI:=__disabled__}/gemini/gemini-2.5-pro - provider_id: ${env.ENABLE_GEMINI:=__disabled__} - provider_model_id: gemini/gemini-2.5-pro - model_type: llm -- metadata: - embedding_dimension: 768 - context_length: 2048 - model_id: ${env.ENABLE_GEMINI:=__disabled__}/gemini/text-embedding-004 - provider_id: ${env.ENABLE_GEMINI:=__disabled__} - provider_model_id: gemini/text-embedding-004 - model_type: embedding -- metadata: {} - model_id: ${env.ENABLE_GROQ:=__disabled__}/groq/llama3-8b-8192 - provider_id: ${env.ENABLE_GROQ:=__disabled__} - provider_model_id: groq/llama3-8b-8192 - model_type: llm -- metadata: {} - model_id: ${env.ENABLE_GROQ:=__disabled__}/meta-llama/Llama-3.1-8B-Instruct - provider_id: ${env.ENABLE_GROQ:=__disabled__} - provider_model_id: groq/llama3-8b-8192 - model_type: llm -- metadata: {} - model_id: ${env.ENABLE_GROQ:=__disabled__}/groq/llama-3.1-8b-instant - provider_id: ${env.ENABLE_GROQ:=__disabled__} - provider_model_id: groq/llama-3.1-8b-instant - model_type: llm -- metadata: {} - model_id: ${env.ENABLE_GROQ:=__disabled__}/groq/llama3-70b-8192 - provider_id: ${env.ENABLE_GROQ:=__disabled__} - provider_model_id: groq/llama3-70b-8192 - model_type: llm -- metadata: {} - model_id: ${env.ENABLE_GROQ:=__disabled__}/meta-llama/Llama-3-70B-Instruct - provider_id: ${env.ENABLE_GROQ:=__disabled__} - provider_model_id: groq/llama3-70b-8192 - model_type: llm -- metadata: {} - model_id: ${env.ENABLE_GROQ:=__disabled__}/groq/llama-3.3-70b-versatile - provider_id: ${env.ENABLE_GROQ:=__disabled__} - provider_model_id: groq/llama-3.3-70b-versatile - model_type: llm -- metadata: {} - model_id: ${env.ENABLE_GROQ:=__disabled__}/meta-llama/Llama-3.3-70B-Instruct - provider_id: ${env.ENABLE_GROQ:=__disabled__} - provider_model_id: groq/llama-3.3-70b-versatile - model_type: llm -- metadata: {} - model_id: ${env.ENABLE_GROQ:=__disabled__}/groq/llama-3.2-3b-preview - provider_id: ${env.ENABLE_GROQ:=__disabled__} - provider_model_id: groq/llama-3.2-3b-preview - model_type: llm -- metadata: {} - model_id: ${env.ENABLE_GROQ:=__disabled__}/meta-llama/Llama-3.2-3B-Instruct - provider_id: ${env.ENABLE_GROQ:=__disabled__} - provider_model_id: groq/llama-3.2-3b-preview - model_type: llm -- metadata: {} - model_id: ${env.ENABLE_GROQ:=__disabled__}/groq/llama-4-scout-17b-16e-instruct - provider_id: ${env.ENABLE_GROQ:=__disabled__} - provider_model_id: groq/llama-4-scout-17b-16e-instruct - model_type: llm -- metadata: {} - model_id: ${env.ENABLE_GROQ:=__disabled__}/meta-llama/Llama-4-Scout-17B-16E-Instruct - provider_id: ${env.ENABLE_GROQ:=__disabled__} - provider_model_id: groq/llama-4-scout-17b-16e-instruct - model_type: llm -- metadata: {} - model_id: ${env.ENABLE_GROQ:=__disabled__}/groq/meta-llama/llama-4-scout-17b-16e-instruct - provider_id: ${env.ENABLE_GROQ:=__disabled__} - provider_model_id: groq/meta-llama/llama-4-scout-17b-16e-instruct - model_type: llm -- metadata: {} - model_id: ${env.ENABLE_GROQ:=__disabled__}/meta-llama/Llama-4-Scout-17B-16E-Instruct - provider_id: ${env.ENABLE_GROQ:=__disabled__} - provider_model_id: groq/meta-llama/llama-4-scout-17b-16e-instruct - model_type: llm -- metadata: {} - model_id: ${env.ENABLE_GROQ:=__disabled__}/groq/llama-4-maverick-17b-128e-instruct - provider_id: ${env.ENABLE_GROQ:=__disabled__} - provider_model_id: groq/llama-4-maverick-17b-128e-instruct - model_type: llm -- metadata: {} - model_id: ${env.ENABLE_GROQ:=__disabled__}/meta-llama/Llama-4-Maverick-17B-128E-Instruct - provider_id: ${env.ENABLE_GROQ:=__disabled__} - provider_model_id: groq/llama-4-maverick-17b-128e-instruct - model_type: llm -- metadata: {} - model_id: ${env.ENABLE_GROQ:=__disabled__}/groq/meta-llama/llama-4-maverick-17b-128e-instruct - provider_id: ${env.ENABLE_GROQ:=__disabled__} - provider_model_id: groq/meta-llama/llama-4-maverick-17b-128e-instruct - model_type: llm -- metadata: {} - model_id: ${env.ENABLE_GROQ:=__disabled__}/meta-llama/Llama-4-Maverick-17B-128E-Instruct - provider_id: ${env.ENABLE_GROQ:=__disabled__} - provider_model_id: groq/meta-llama/llama-4-maverick-17b-128e-instruct - model_type: llm -- metadata: {} - model_id: ${env.ENABLE_SAMBANOVA:=__disabled__}/sambanova/Meta-Llama-3.1-8B-Instruct - provider_id: ${env.ENABLE_SAMBANOVA:=__disabled__} - provider_model_id: sambanova/Meta-Llama-3.1-8B-Instruct - model_type: llm -- metadata: {} - model_id: ${env.ENABLE_SAMBANOVA:=__disabled__}/meta-llama/Llama-3.1-8B-Instruct - provider_id: ${env.ENABLE_SAMBANOVA:=__disabled__} - provider_model_id: sambanova/Meta-Llama-3.1-8B-Instruct - model_type: llm -- metadata: {} - model_id: ${env.ENABLE_SAMBANOVA:=__disabled__}/sambanova/Meta-Llama-3.1-405B-Instruct - provider_id: ${env.ENABLE_SAMBANOVA:=__disabled__} - provider_model_id: sambanova/Meta-Llama-3.1-405B-Instruct - model_type: llm -- metadata: {} - model_id: ${env.ENABLE_SAMBANOVA:=__disabled__}/meta-llama/Llama-3.1-405B-Instruct-FP8 - provider_id: ${env.ENABLE_SAMBANOVA:=__disabled__} - provider_model_id: sambanova/Meta-Llama-3.1-405B-Instruct - model_type: llm -- metadata: {} - model_id: ${env.ENABLE_SAMBANOVA:=__disabled__}/sambanova/Meta-Llama-3.2-1B-Instruct - provider_id: ${env.ENABLE_SAMBANOVA:=__disabled__} - provider_model_id: sambanova/Meta-Llama-3.2-1B-Instruct - model_type: llm -- metadata: {} - model_id: ${env.ENABLE_SAMBANOVA:=__disabled__}/meta-llama/Llama-3.2-1B-Instruct - provider_id: ${env.ENABLE_SAMBANOVA:=__disabled__} - provider_model_id: sambanova/Meta-Llama-3.2-1B-Instruct - model_type: llm -- metadata: {} - model_id: ${env.ENABLE_SAMBANOVA:=__disabled__}/sambanova/Meta-Llama-3.2-3B-Instruct - provider_id: ${env.ENABLE_SAMBANOVA:=__disabled__} - provider_model_id: sambanova/Meta-Llama-3.2-3B-Instruct - model_type: llm -- metadata: {} - model_id: ${env.ENABLE_SAMBANOVA:=__disabled__}/meta-llama/Llama-3.2-3B-Instruct - provider_id: ${env.ENABLE_SAMBANOVA:=__disabled__} - provider_model_id: sambanova/Meta-Llama-3.2-3B-Instruct - model_type: llm -- metadata: {} - model_id: ${env.ENABLE_SAMBANOVA:=__disabled__}/sambanova/Meta-Llama-3.3-70B-Instruct - provider_id: ${env.ENABLE_SAMBANOVA:=__disabled__} - provider_model_id: sambanova/Meta-Llama-3.3-70B-Instruct - model_type: llm -- metadata: {} - model_id: ${env.ENABLE_SAMBANOVA:=__disabled__}/meta-llama/Llama-3.3-70B-Instruct - provider_id: ${env.ENABLE_SAMBANOVA:=__disabled__} - provider_model_id: sambanova/Meta-Llama-3.3-70B-Instruct - model_type: llm -- metadata: {} - model_id: ${env.ENABLE_SAMBANOVA:=__disabled__}/sambanova/Llama-3.2-11B-Vision-Instruct - provider_id: ${env.ENABLE_SAMBANOVA:=__disabled__} - provider_model_id: sambanova/Llama-3.2-11B-Vision-Instruct - model_type: llm -- metadata: {} - model_id: ${env.ENABLE_SAMBANOVA:=__disabled__}/meta-llama/Llama-3.2-11B-Vision-Instruct - provider_id: ${env.ENABLE_SAMBANOVA:=__disabled__} - provider_model_id: sambanova/Llama-3.2-11B-Vision-Instruct - model_type: llm -- metadata: {} - model_id: ${env.ENABLE_SAMBANOVA:=__disabled__}/sambanova/Llama-3.2-90B-Vision-Instruct - provider_id: ${env.ENABLE_SAMBANOVA:=__disabled__} - provider_model_id: sambanova/Llama-3.2-90B-Vision-Instruct - model_type: llm -- metadata: {} - model_id: ${env.ENABLE_SAMBANOVA:=__disabled__}/meta-llama/Llama-3.2-90B-Vision-Instruct - provider_id: ${env.ENABLE_SAMBANOVA:=__disabled__} - provider_model_id: sambanova/Llama-3.2-90B-Vision-Instruct - model_type: llm -- metadata: {} - model_id: ${env.ENABLE_SAMBANOVA:=__disabled__}/sambanova/Llama-4-Scout-17B-16E-Instruct - provider_id: ${env.ENABLE_SAMBANOVA:=__disabled__} - provider_model_id: sambanova/Llama-4-Scout-17B-16E-Instruct - model_type: llm -- metadata: {} - model_id: ${env.ENABLE_SAMBANOVA:=__disabled__}/meta-llama/Llama-4-Scout-17B-16E-Instruct - provider_id: ${env.ENABLE_SAMBANOVA:=__disabled__} - provider_model_id: sambanova/Llama-4-Scout-17B-16E-Instruct - model_type: llm -- metadata: {} - model_id: ${env.ENABLE_SAMBANOVA:=__disabled__}/sambanova/Llama-4-Maverick-17B-128E-Instruct - provider_id: ${env.ENABLE_SAMBANOVA:=__disabled__} - provider_model_id: sambanova/Llama-4-Maverick-17B-128E-Instruct - model_type: llm -- metadata: {} - model_id: ${env.ENABLE_SAMBANOVA:=__disabled__}/meta-llama/Llama-4-Maverick-17B-128E-Instruct - provider_id: ${env.ENABLE_SAMBANOVA:=__disabled__} - provider_model_id: sambanova/Llama-4-Maverick-17B-128E-Instruct - model_type: llm -- metadata: {} - model_id: ${env.ENABLE_SAMBANOVA:=__disabled__}/sambanova/Meta-Llama-Guard-3-8B - provider_id: ${env.ENABLE_SAMBANOVA:=__disabled__} - provider_model_id: sambanova/Meta-Llama-Guard-3-8B - model_type: llm -- metadata: {} - model_id: ${env.ENABLE_SAMBANOVA:=__disabled__}/meta-llama/Llama-Guard-3-8B - provider_id: ${env.ENABLE_SAMBANOVA:=__disabled__} - provider_model_id: sambanova/Meta-Llama-Guard-3-8B - model_type: llm +models: [] shields: -- shield_id: ${env.SAFETY_MODEL:=__disabled__} - provider_shield_id: ${env.ENABLE_OLLAMA:=__disabled__}/${env.SAFETY_MODEL:=__disabled__} +- shield_id: llama-guard + provider_id: ${env.SAFETY_MODEL:+inline::llama-guard} + provider_shield_id: ${env.SAFETY_MODEL:=} vector_dbs: [] datasets: [] scoring_fns: [] diff --git a/llama_stack/templates/starter/starter.py b/llama_stack/templates/starter/starter.py index 6c4ffbc06..4931c6a42 100644 --- a/llama_stack/templates/starter/starter.py +++ b/llama_stack/templates/starter/starter.py @@ -7,20 +7,19 @@ from typing import Any -from llama_stack.apis.models import ModelType from llama_stack.distribution.datatypes import ( BuildProvider, - ModelInput, Provider, ProviderSpec, + ShieldInput, ToolGroupInput, ) from llama_stack.distribution.utils.dynamic import instantiate_class_type +from llama_stack.providers.datatypes import RemoteProviderSpec from llama_stack.providers.inline.files.localfs.config import LocalfsFilesImplConfig from llama_stack.providers.inline.inference.sentence_transformers import ( SentenceTransformersInferenceConfig, ) -from llama_stack.providers.inline.post_training.huggingface import HuggingFacePostTrainingConfig from llama_stack.providers.inline.vector_io.faiss.config import FaissVectorIOConfig from llama_stack.providers.inline.vector_io.milvus.config import ( MilvusVectorIOConfig, @@ -29,117 +28,17 @@ from llama_stack.providers.inline.vector_io.sqlite_vec.config import ( SQLiteVectorIOConfig, ) from llama_stack.providers.registry.inference import available_providers -from llama_stack.providers.remote.inference.anthropic.models import ( - MODEL_ENTRIES as ANTHROPIC_MODEL_ENTRIES, -) -from llama_stack.providers.remote.inference.bedrock.models import ( - MODEL_ENTRIES as BEDROCK_MODEL_ENTRIES, -) -from llama_stack.providers.remote.inference.cerebras.models import ( - MODEL_ENTRIES as CEREBRAS_MODEL_ENTRIES, -) -from llama_stack.providers.remote.inference.databricks.databricks import ( - MODEL_ENTRIES as DATABRICKS_MODEL_ENTRIES, -) -from llama_stack.providers.remote.inference.fireworks.models import ( - MODEL_ENTRIES as FIREWORKS_MODEL_ENTRIES, -) -from llama_stack.providers.remote.inference.gemini.models import ( - MODEL_ENTRIES as GEMINI_MODEL_ENTRIES, -) -from llama_stack.providers.remote.inference.groq.models import ( - MODEL_ENTRIES as GROQ_MODEL_ENTRIES, -) -from llama_stack.providers.remote.inference.nvidia.models import ( - MODEL_ENTRIES as NVIDIA_MODEL_ENTRIES, -) -from llama_stack.providers.remote.inference.openai.models import ( - MODEL_ENTRIES as OPENAI_MODEL_ENTRIES, -) -from llama_stack.providers.remote.inference.runpod.runpod import ( - MODEL_ENTRIES as RUNPOD_MODEL_ENTRIES, -) -from llama_stack.providers.remote.inference.sambanova.models import ( - MODEL_ENTRIES as SAMBANOVA_MODEL_ENTRIES, -) -from llama_stack.providers.remote.inference.together.models import ( - MODEL_ENTRIES as TOGETHER_MODEL_ENTRIES, -) from llama_stack.providers.remote.vector_io.chroma.config import ChromaVectorIOConfig from llama_stack.providers.remote.vector_io.pgvector.config import ( PGVectorVectorIOConfig, ) -from llama_stack.providers.utils.inference.model_registry import ProviderModelEntry from llama_stack.providers.utils.sqlstore.sqlstore import PostgresSqlStoreConfig from llama_stack.templates.template import ( DistributionTemplate, RunConfigSettings, - get_model_registry, - get_shield_registry, ) -def _get_model_entries_for_provider(provider_type: str) -> list[ProviderModelEntry]: - """Get model entries for a specific provider type.""" - model_entries_map = { - "openai": OPENAI_MODEL_ENTRIES, - "fireworks": FIREWORKS_MODEL_ENTRIES, - "together": TOGETHER_MODEL_ENTRIES, - "anthropic": ANTHROPIC_MODEL_ENTRIES, - "gemini": GEMINI_MODEL_ENTRIES, - "groq": GROQ_MODEL_ENTRIES, - "sambanova": SAMBANOVA_MODEL_ENTRIES, - "cerebras": CEREBRAS_MODEL_ENTRIES, - "bedrock": BEDROCK_MODEL_ENTRIES, - "databricks": DATABRICKS_MODEL_ENTRIES, - "nvidia": NVIDIA_MODEL_ENTRIES, - "runpod": RUNPOD_MODEL_ENTRIES, - } - - # Special handling for providers with dynamic model entries - if provider_type == "ollama": - return [ - ProviderModelEntry( - provider_model_id="${env.OLLAMA_INFERENCE_MODEL:=__disabled__}", - model_type=ModelType.llm, - ), - ProviderModelEntry( - provider_model_id="${env.SAFETY_MODEL:=__disabled__}", - model_type=ModelType.llm, - ), - ProviderModelEntry( - provider_model_id="${env.OLLAMA_EMBEDDING_MODEL:=__disabled__}", - model_type=ModelType.embedding, - metadata={ - "embedding_dimension": "${env.OLLAMA_EMBEDDING_DIMENSION:=384}", - }, - ), - ] - elif provider_type == "vllm": - return [ - ProviderModelEntry( - provider_model_id="${env.VLLM_INFERENCE_MODEL:=__disabled__}", - model_type=ModelType.llm, - ), - ] - - return model_entries_map.get(provider_type, []) - - -def _get_model_safety_entries_for_provider(provider_type: str) -> list[ProviderModelEntry]: - """Get model entries for a specific provider type.""" - safety_model_entries_map = { - "ollama": [ - ProviderModelEntry( - provider_model_id="${env.SAFETY_MODEL:=__disabled__}", - model_type=ModelType.llm, - ), - ], - } - - return safety_model_entries_map.get(provider_type, []) - - def _get_config_for_provider(provider_spec: ProviderSpec) -> dict[str, Any]: """Get configuration for a provider using its adapter's config class.""" config_class = instantiate_class_type(provider_spec.config_class) @@ -150,40 +49,48 @@ def _get_config_for_provider(provider_spec: ProviderSpec) -> dict[str, Any]: return {} -def get_remote_inference_providers() -> tuple[list[Provider], dict[str, list[ProviderModelEntry]]]: - all_providers = available_providers() +ENABLED_INFERENCE_PROVIDERS = [ + "ollama", + "vllm", + "tgi", + "fireworks", + "together", + "gemini", + "groq", + "sambanova", + "anthropic", + "openai", + "cerebras", + "nvidia", + "bedrock", +] - # Filter out inline providers and watsonx - the starter distro only exposes remote providers +INFERENCE_PROVIDER_IDS = { + "vllm": "${env.VLLM_URL:+vllm}", + "tgi": "${env.TGI_URL:+tgi}", + "cerebras": "${env.CEREBRAS_API_KEY:+cerebras}", + "nvidia": "${env.NVIDIA_API_KEY:+nvidia}", +} + + +def get_remote_inference_providers() -> list[Provider]: + # Filter out inline providers and some others - the starter distro only exposes remote providers remote_providers = [ provider - for provider in all_providers - # TODO: re-add once the Python 3.13 issue is fixed - # discussion: https://github.com/meta-llama/llama-stack/pull/2327#discussion_r2156883828 - if hasattr(provider, "adapter") and provider.adapter.adapter_type != "watsonx" + for provider in available_providers() + if isinstance(provider, RemoteProviderSpec) and provider.adapter.adapter_type in ENABLED_INFERENCE_PROVIDERS ] - providers = [] - available_models = {} - + inference_providers = [] for provider_spec in remote_providers: provider_type = provider_spec.adapter.adapter_type - # Build the environment variable name for enabling this provider - env_var = f"ENABLE_{provider_type.upper().replace('-', '_').replace('::', '_')}" - model_entries = _get_model_entries_for_provider(provider_type) + if provider_type in INFERENCE_PROVIDER_IDS: + provider_id = INFERENCE_PROVIDER_IDS[provider_type] + else: + provider_id = provider_type.replace("-", "_").replace("::", "_") config = _get_config_for_provider(provider_spec) - providers.append( - ( - f"${{env.{env_var}:=__disabled__}}", - provider_type, - model_entries, - config, - ) - ) - available_models[f"${{env.{env_var}:=__disabled__}}"] = model_entries - inference_providers = [] - for provider_id, provider_type, model_entries, config in providers: inference_providers.append( Provider( provider_id=provider_id, @@ -191,31 +98,13 @@ def get_remote_inference_providers() -> tuple[list[Provider], dict[str, list[Pro config=config, ) ) - available_models[provider_id] = model_entries - return inference_providers, available_models - - -# build a list of shields for all possible providers -def get_safety_models_for_providers(providers: list[Provider]) -> dict[str, list[ProviderModelEntry]]: - available_models = {} - for provider in providers: - provider_type = provider.provider_type.split("::")[1] - safety_model_entries = _get_model_safety_entries_for_provider(provider_type) - if len(safety_model_entries) == 0: - continue - - env_var = f"ENABLE_{provider_type.upper().replace('-', '_').replace('::', '_')}" - provider_id = f"${{env.{env_var}:=__disabled__}}" - - available_models[provider_id] = safety_model_entries - - return available_models + return inference_providers def get_distribution_template() -> DistributionTemplate: - remote_inference_providers, available_models = get_remote_inference_providers() + remote_inference_providers = get_remote_inference_providers() name = "starter" - # For build config, use BuildProvider with only provider_type and module + providers = { "inference": [BuildProvider(provider_type=p.provider_type, module=p.module) for p in remote_inference_providers] + [BuildProvider(provider_type="inline::sentence-transformers")], @@ -254,15 +143,10 @@ def get_distribution_template() -> DistributionTemplate: config=LocalfsFilesImplConfig.sample_run_config(f"~/.llama/distributions/{name}"), ) embedding_provider = Provider( - provider_id="${env.ENABLE_SENTENCE_TRANSFORMERS:=sentence-transformers}", + provider_id="sentence-transformers", provider_type="inline::sentence-transformers", config=SentenceTransformersInferenceConfig.sample_run_config(), ) - post_training_provider = Provider( - provider_id="huggingface", - provider_type="inline::huggingface", - config=HuggingFacePostTrainingConfig.sample_run_config(f"~/.llama/distributions/{name}"), - ) default_tool_groups = [ ToolGroupInput( toolgroup_id="builtin::websearch", @@ -273,19 +157,14 @@ def get_distribution_template() -> DistributionTemplate: provider_id="rag-runtime", ), ] - embedding_model = ModelInput( - model_id="all-MiniLM-L6-v2", - provider_id=embedding_provider.provider_id, - model_type=ModelType.embedding, - metadata={ - "embedding_dimension": 384, - }, - ) - - default_models, ids_conflict_in_models = get_model_registry(available_models) - - available_safety_models = get_safety_models_for_providers(remote_inference_providers) - shields = get_shield_registry(available_safety_models, ids_conflict_in_models) + default_shields = [ + # if the + ShieldInput( + shield_id="llama-guard", + provider_id="${env.SAFETY_MODEL:+inline::llama-guard}", + provider_shield_id="${env.SAFETY_MODEL:=}", + ), + ] return DistributionTemplate( name=name, @@ -294,7 +173,6 @@ def get_distribution_template() -> DistributionTemplate: container_image=None, template_path=None, providers=providers, - available_models_by_provider=available_models, additional_pip_packages=PostgresSqlStoreConfig.pip_packages(), run_configs={ "run.yaml": RunConfigSettings( @@ -302,22 +180,22 @@ def get_distribution_template() -> DistributionTemplate: "inference": remote_inference_providers + [embedding_provider], "vector_io": [ Provider( - provider_id="${env.ENABLE_FAISS:=faiss}", + provider_id="faiss", provider_type="inline::faiss", config=FaissVectorIOConfig.sample_run_config(f"~/.llama/distributions/{name}"), ), Provider( - provider_id="${env.ENABLE_SQLITE_VEC:=__disabled__}", + provider_id="sqlite-vec", provider_type="inline::sqlite-vec", config=SQLiteVectorIOConfig.sample_run_config(f"~/.llama/distributions/{name}"), ), Provider( - provider_id="${env.ENABLE_MILVUS:=__disabled__}", + provider_id="${env.MILVUS_URL:+milvus}", provider_type="inline::milvus", config=MilvusVectorIOConfig.sample_run_config(f"~/.llama/distributions/{name}"), ), Provider( - provider_id="${env.ENABLE_CHROMADB:=__disabled__}", + provider_id="${env.CHROMADB_URL:+chromadb}", provider_type="remote::chromadb", config=ChromaVectorIOConfig.sample_run_config( f"~/.llama/distributions/{name}/", @@ -325,7 +203,7 @@ def get_distribution_template() -> DistributionTemplate: ), ), Provider( - provider_id="${env.ENABLE_PGVECTOR:=__disabled__}", + provider_id="${env.PGVECTOR_DB:+pgvector}", provider_type="remote::pgvector", config=PGVectorVectorIOConfig.sample_run_config( f"~/.llama/distributions/{name}", @@ -336,12 +214,10 @@ def get_distribution_template() -> DistributionTemplate: ), ], "files": [files_provider], - "post_training": [post_training_provider], }, - default_models=[embedding_model] + default_models, + default_models=[], default_tool_groups=default_tool_groups, - # TODO: add a way to enable/disable shields on the fly - default_shields=shields, + default_shields=default_shields, ), }, run_config_env_vars={ @@ -385,17 +261,5 @@ def get_distribution_template() -> DistributionTemplate: "http://localhost:11434", "Ollama URL", ), - "OLLAMA_INFERENCE_MODEL": ( - "", - "Optional Ollama Inference Model to register on startup", - ), - "OLLAMA_EMBEDDING_MODEL": ( - "", - "Optional Ollama Embedding Model to register on startup", - ), - "OLLAMA_EMBEDDING_DIMENSION": ( - "384", - "Ollama Embedding Dimension", - ), }, ) diff --git a/scripts/install.sh b/scripts/install.sh index 5dc74fae1..e49924512 100755 --- a/scripts/install.sh +++ b/scripts/install.sh @@ -222,9 +222,7 @@ cmd=( run -d "${PLATFORM_OPTS[@]}" --name llama-stack \ --network llama-net \ -p "${PORT}:${PORT}" \ "${SERVER_IMAGE}" --port "${PORT}" \ - --env OLLAMA_INFERENCE_MODEL="${MODEL_ALIAS}" \ - --env OLLAMA_URL="http://ollama-server:${OLLAMA_PORT}" \ - --env ENABLE_OLLAMA=ollama) + --env OLLAMA_URL="http://ollama-server:${OLLAMA_PORT}") log "🦙 Starting Llama Stack..." if ! execute_with_log $ENGINE "${cmd[@]}"; then