diff --git a/llama_stack/templates/ssambanova/__init__.py b/llama_stack/templates/ssambanova/__init__.py new file mode 100644 index 000000000..5b2050dc9 --- /dev/null +++ b/llama_stack/templates/ssambanova/__init__.py @@ -0,0 +1,7 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. + +from .ssambanova import get_distribution_template # noqa: F401 diff --git a/llama_stack/templates/ssambanova/build.yaml b/llama_stack/templates/ssambanova/build.yaml new file mode 100644 index 000000000..e77317400 --- /dev/null +++ b/llama_stack/templates/ssambanova/build.yaml @@ -0,0 +1,17 @@ +version: "2" +name: ssambanova +distribution_spec: + description: Use Ssambanova for running LLM inference + docker_image: null + providers: + inference: + - remote::ssambanova + memory: + - inline::faiss + safety: + - inline::llama-guard + agents: + - inline::meta-reference + telemetry: + - inline::meta-reference +image_type: conda diff --git a/llama_stack/templates/ssambanova/doc_template.md b/llama_stack/templates/ssambanova/doc_template.md new file mode 100644 index 000000000..b3fc8ff28 --- /dev/null +++ b/llama_stack/templates/ssambanova/doc_template.md @@ -0,0 +1,67 @@ +# Ssambanova Distribution + +```{toctree} +:maxdepth: 2 +:hidden: + +self +``` + +The `llamastack/distribution-{{ name }}` distribution consists of the following provider configurations. + +{{ providers_table }} + +{% if run_config_env_vars %} + +### Environment Variables + +The following environment variables can be configured: + +{% for var, (default_value, description) in run_config_env_vars.items() %} + +- `{{ var }}`: {{ description }} (default: `{{ default_value }}`) + {% endfor %} + {% endif %} + +{% if default_models %} + +### Models + +The following models are available by default: + +{% for model in default_models %} + +- `{{ model.model_id }} ({{ model.provider_model_id }})` + {% endfor %} + {% endif %} + +### Prerequisite: API Keys + +Make sure you have access to a Ssambanova API Key. You can get one by visiting [Ssambanova](https://cloud.sambanova.ai/apis). + +## Running Llama Stack with Ssambanova + +You can do this via Conda (build code) or Docker which has a pre-built image. + +### Via Docker + +This method allows you to get started quickly without having to build the distribution code. + +```bash +LLAMA_STACK_PORT=5001 +docker run \ + -it \ + -p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \ + llamastack/distribution-{{ name }} \ + --port $LLAMA_STACK_PORT \ + --env SSAMBANOVA_API_KEY=$SSAMBANOVA_API_KEY +``` + +### Via Conda + +```bash +llama stack build --template ssambanova --image-type conda +llama stack run ./run.yaml \ + --port $LLAMA_STACK_PORT \ + --env SSAMBANOVA_API_KEY=$SSAMBANOVA_API_KEY +``` diff --git a/llama_stack/templates/ssambanova/run.yaml b/llama_stack/templates/ssambanova/run.yaml new file mode 100644 index 000000000..6e48ac8e4 --- /dev/null +++ b/llama_stack/templates/ssambanova/run.yaml @@ -0,0 +1,87 @@ +version: "2" +image_name: ssambanova +docker_image: null +conda_env: ssambanova +apis: + - inference + - safety + - agents + - memory + - datasetio + - scoring + - eval + - telemetry +providers: + inference: + - provider_id: ssambanova + provider_type: remote::ssambanova + config: + url: https://api.sambanova.ai/v1 + api_key: ${env.SSAMBANOVA_API_KEY} + safety: + - provider_id: llama-guard + provider_type: inline::llama-guard + config: {} + agents: + - provider_id: meta-reference + provider_type: inline::meta-reference + config: + persistence_store: + type: sqlite + namespace: null + db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ssambanova}/agents_store.db + memory: + - provider_id: faiss + provider_type: inline::faiss + config: + kvstore: + type: sqlite + namespace: null + db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ssambanova}/faiss_store.db + datasetio: + - provider_id: localfs + provider_type: inline::localfs + config: {} + scoring: + - provider_id: basic + provider_type: inline::basic + config: {} + eval: + - provider_id: meta-reference + provider_type: inline::meta-reference + config: {} + telemetry: + - provider_id: meta-reference + provider_type: inline::meta-reference + config: {} +metadata_store: null +models: + - metadata: {} + model_id: meta-llama/Llama-3.1-8B-Instruct + provider_id: null + provider_model_id: Meta-Llama-3.1-8B-Instruct + - metadata: {} + model_id: meta-llama/Llama-3.1-70B-Instruct + provider_id: null + provider_model_id: Meta-Llama-3.1-70B-Instruct + - metadata: {} + model_id: meta-llama/Llama-3.1-405B-Instruct-FP8 + provider_id: null + provider_model_id: Meta-Llama-3.1-405B-Instruct + - metadata: {} + model_id: meta-llama/Llama-3.2-1B-Instruct + provider_id: null + provider_model_id: Meta-Llama-3.2-1B-Instruct + - metadata: {} + model_id: meta-llama/Llama-3.2-3B-Instruct + provider_id: null + provider_model_id: Meta-Llama-3.2-3B-Instruct +shields: + - params: null + shield_id: meta-llama/Llama-Guard-3-8B + provider_id: null + provider_shield_id: null +memory_banks: [] +datasets: [] +scoring_fns: [] +eval_tasks: [] diff --git a/llama_stack/templates/ssambanova/ssambanova.py b/llama_stack/templates/ssambanova/ssambanova.py new file mode 100644 index 000000000..e553462b5 --- /dev/null +++ b/llama_stack/templates/ssambanova/ssambanova.py @@ -0,0 +1,71 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. + +from pathlib import Path + +from llama_models.sku_list import all_registered_models + +from llama_stack.distribution.datatypes import ModelInput, Provider, ShieldInput +from llama_stack.providers.remote.inference.ssambanova import SsambanovaImplConfig +from llama_stack.providers.remote.inference.ssambanova.ssambanova import MODEL_ALIASES + +from llama_stack.templates.template import DistributionTemplate, RunConfigSettings + + +def get_distribution_template() -> DistributionTemplate: + providers = { + "inference": ["remote::ssambanova"], + "memory": ["inline::faiss", "remote::chromadb", "remote::pgvector"], + "safety": ["inline::llama-guard"], + "agents": ["inline::meta-reference"], + "telemetry": ["inline::meta-reference"], + } + + inference_provider = Provider( + provider_id="ssambanova", + provider_type="remote::ssambanova", + config=SsambanovaImplConfig.sample_run_config(), + ) + + core_model_to_hf_repo = { + m.descriptor(): m.huggingface_repo for m in all_registered_models() + } + default_models = [ + ModelInput( + model_id=core_model_to_hf_repo[m.llama_model], + provider_model_id=m.provider_model_id, + ) + for m in MODEL_ALIASES + ] + + return DistributionTemplate( + name="ssambanova", + distro_type="self_hosted", + description="Use Ssambanova for running LLM inference", + docker_image=None, + template_path=Path(__file__).parent / "doc_template.md", + providers=providers, + default_models=default_models, + run_configs={ + "run.yaml": RunConfigSettings( + provider_overrides={ + "inference": [inference_provider], + }, + default_models=default_models, + default_shields=[ShieldInput(shield_id="meta-llama/Llama-Guard-3-8B")], + ), + }, + run_config_env_vars={ + "LLAMASTACK_PORT": ( + "5001", + "Port for the Llama Stack distribution server", + ), + "SSAMBANOVA_API_KEY": ( + "", + "ssambanova API Key", + ), + }, + )