Support llamaguade

This commit is contained in:
Edward Ma 2025-01-27 10:38:15 -08:00
parent e5936a8df8
commit 2565af31c5
7 changed files with 80 additions and 117 deletions

View file

@ -36,6 +36,7 @@ Here is a list of the various API providers and available distributions to devel
| **API Provider Builder** | **Environments** | **Agents** | **Inference** | **Memory** | **Safety** | **Telemetry** | | **API Provider Builder** | **Environments** | **Agents** | **Inference** | **Memory** | **Safety** | **Telemetry** |
|:------------------------------------------------------------------------------------------:|:----------------------:|:------------------:|:------------------:|:------------------:|:------------------:|:------------------:| |:------------------------------------------------------------------------------------------:|:----------------------:|:------------------:|:------------------:|:------------------:|:------------------:|:------------------:|
| Meta Reference | Single Node | :heavy_check_mark: | :heavy_check_mark: | :heavy_check_mark: | :heavy_check_mark: | :heavy_check_mark: | | Meta Reference | Single Node | :heavy_check_mark: | :heavy_check_mark: | :heavy_check_mark: | :heavy_check_mark: | :heavy_check_mark: |
| SambaNova | Hosted | | :heavy_check_mark: | | | |
| Cerebras | Hosted | | :heavy_check_mark: | | | | | Cerebras | Hosted | | :heavy_check_mark: | | | |
| Fireworks | Hosted | :heavy_check_mark: | :heavy_check_mark: | :heavy_check_mark: | | | | Fireworks | Hosted | :heavy_check_mark: | :heavy_check_mark: | :heavy_check_mark: | | |
| AWS Bedrock | Hosted | | :heavy_check_mark: | | :heavy_check_mark: | | | AWS Bedrock | Hosted | | :heavy_check_mark: | | :heavy_check_mark: | |
@ -57,6 +58,7 @@ A Llama Stack Distribution (or "distro") is a pre-configured bundle of provider
|:---------------------------------------------:|:-------------------------------------------------------------------------------------------------------------------------------------------------------------:|:------------------------------------------------------------------------------------------------------------------------:| |:---------------------------------------------:|:-------------------------------------------------------------------------------------------------------------------------------------------------------------:|:------------------------------------------------------------------------------------------------------------------------:|
| Meta Reference | [llamastack/distribution-meta-reference-gpu](https://hub.docker.com/repository/docker/llamastack/distribution-meta-reference-gpu/general) | [Guide](https://llama-stack.readthedocs.io/en/latest/distributions/self_hosted_distro/meta-reference-gpu.html) | | Meta Reference | [llamastack/distribution-meta-reference-gpu](https://hub.docker.com/repository/docker/llamastack/distribution-meta-reference-gpu/general) | [Guide](https://llama-stack.readthedocs.io/en/latest/distributions/self_hosted_distro/meta-reference-gpu.html) |
| Meta Reference Quantized | [llamastack/distribution-meta-reference-quantized-gpu](https://hub.docker.com/repository/docker/llamastack/distribution-meta-reference-quantized-gpu/general) | [Guide](https://llama-stack.readthedocs.io/en/latest/distributions/self_hosted_distro/meta-reference-quantized-gpu.html) | | Meta Reference Quantized | [llamastack/distribution-meta-reference-quantized-gpu](https://hub.docker.com/repository/docker/llamastack/distribution-meta-reference-quantized-gpu/general) | [Guide](https://llama-stack.readthedocs.io/en/latest/distributions/self_hosted_distro/meta-reference-quantized-gpu.html) |
| SambaNova | [llamastack/distribution-sambanova](https://hub.docker.com/repository/docker/llamastack/distribution-sambanova/general) | [Guide](https://llama-stack.readthedocs.io/en/latest/distributions/self_hosted_distro/sambanova.html) |
| Cerebras | [llamastack/distribution-cerebras](https://hub.docker.com/repository/docker/llamastack/distribution-cerebras/general) | [Guide](https://llama-stack.readthedocs.io/en/latest/distributions/self_hosted_distro/cerebras.html) | | Cerebras | [llamastack/distribution-cerebras](https://hub.docker.com/repository/docker/llamastack/distribution-cerebras/general) | [Guide](https://llama-stack.readthedocs.io/en/latest/distributions/self_hosted_distro/cerebras.html) |
| Ollama | [llamastack/distribution-ollama](https://hub.docker.com/repository/docker/llamastack/distribution-ollama/general) | [Guide](https://llama-stack.readthedocs.io/en/latest/distributions/self_hosted_distro/ollama.html) | | Ollama | [llamastack/distribution-ollama](https://hub.docker.com/repository/docker/llamastack/distribution-ollama/general) | [Guide](https://llama-stack.readthedocs.io/en/latest/distributions/self_hosted_distro/ollama.html) |
| TGI | [llamastack/distribution-tgi](https://hub.docker.com/repository/docker/llamastack/distribution-tgi/general) | [Guide](https://llama-stack.readthedocs.io/en/latest/distributions/self_hosted_distro/tgi.html) | | TGI | [llamastack/distribution-tgi](https://hub.docker.com/repository/docker/llamastack/distribution-tgi/general) | [Guide](https://llama-stack.readthedocs.io/en/latest/distributions/self_hosted_distro/tgi.html) |

View file

@ -1,19 +1 @@
version: '2' ../../llama_stack/templates/sambanova/build.yaml
name: sambanova
distribution_spec:
description: Use SambaNova.AI for running LLM inference
docker_image: null
providers:
inference:
- remote::sambanova
memory:
- inline::faiss
- remote::chromadb
- remote::pgvector
safety:
- inline::llama-guard
agents:
- inline::meta-reference
telemetry:
- inline::meta-reference
image_type: conda

View file

@ -1,83 +1 @@
version: '2' ../../llama_stack/templates/sambanova/run.yaml
image_name: sambanova
docker_image: null
conda_env: sambanova
apis:
- agents
- inference
- memory
- safety
- telemetry
providers:
inference:
- provider_id: sambanova
provider_type: remote::sambanova
config:
url: https://api.sambanova.ai/v1/
api_key: ${env.SAMBANOVA_API_KEY}
memory:
- provider_id: faiss
provider_type: inline::faiss
config:
kvstore:
type: sqlite
namespace: null
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/sambanova}/faiss_store.db
safety:
- provider_id: llama-guard
provider_type: inline::llama-guard
config: {}
agents:
- provider_id: meta-reference
provider_type: inline::meta-reference
config:
persistence_store:
type: sqlite
namespace: null
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/sambanova}/agents_store.db
telemetry:
- provider_id: meta-reference
provider_type: inline::meta-reference
config: {}
metadata_store:
namespace: null
type: sqlite
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/sambanova}/registry.db
models:
- metadata: {}
model_id: meta-llama/Llama-3.1-8B-Instruct
provider_id: null
provider_model_id: Meta-Llama-3.1-8B-Instruct
- metadata: {}
model_id: meta-llama/Llama-3.1-70B-Instruct
provider_id: null
provider_model_id: Meta-Llama-3.1-70B-Instruct
- metadata: {}
model_id: meta-llama/Llama-3.1-405B-Instruct
provider_id: null
provider_model_id: Meta-Llama-3.1-405B-Instruct
- metadata: {}
model_id: meta-llama/Llama-3.2-1B-Instruct
provider_id: null
provider_model_id: Meta-Llama-3.2-1B-Instruct
- metadata: {}
model_id: meta-llama/Llama-3.2-3B-Instruct
provider_id: null
provider_model_id: Meta-Llama-3.2-3B-Instruct
- metadata: {}
model_id: meta-llama/Llama-3.2-11B-Vision-Instruct
provider_id: null
provider_model_id: Llama-3.2-11B-Vision-Instruct
- metadata: {}
model_id: meta-llama/Llama-3.2-90B-Vision-Instruct
provider_id: null
provider_model_id: Llama-3.2-90B-Vision-Instruct
shields:
- params: null
shield_id: meta-llama/Llama-Guard-3-8B
provider_id: null
provider_shield_id: null
memory_banks: []
datasets: []
scoring_fns: []
eval_tasks: []

View file

@ -22,7 +22,7 @@ class SambaNovaImplConfig(BaseModel):
) )
@classmethod @classmethod
def sample_run_config(cls) -> Dict[str, Any]: def sample_run_config(cls, **kwargs) -> Dict[str, Any]:
return { return {
"url": "https://api.sambanova.ai/v1", "url": "https://api.sambanova.ai/v1",
"api_key": "${env.SAMBANOVA_API_KEY}", "api_key": "${env.SAMBANOVA_API_KEY}",

View file

@ -7,7 +7,7 @@
import json import json
from typing import AsyncGenerator from typing import AsyncGenerator
from llama_models.datatypes import CoreModelId, SamplingStrategy from llama_models.datatypes import CoreModelId, TopPSamplingStrategy, TopKSamplingStrategy, GreedySamplingStrategy
from llama_models.llama3.api.chat_format import ChatFormat from llama_models.llama3.api.chat_format import ChatFormat
from llama_models.llama3.api.tokenizer import Tokenizer from llama_models.llama3.api.tokenizer import Tokenizer
from openai import OpenAI from openai import OpenAI
@ -60,6 +60,10 @@ MODEL_ALIASES = [
"Llama-3.2-90B-Vision-Instruct", "Llama-3.2-90B-Vision-Instruct",
CoreModelId.llama3_2_90b_vision_instruct.value, CoreModelId.llama3_2_90b_vision_instruct.value,
), ),
build_model_alias(
"Meta-Llama-Guard-3-8B",
CoreModelId.llama_guard_3_8b.value,
),
] ]
@ -197,12 +201,12 @@ class SambaNovaInferenceAdapter(ModelRegistryHelper, Inference):
else: else:
params["max_completion_tokens"] = sampling_params.max_tokens params["max_completion_tokens"] = sampling_params.max_tokens
if sampling_params.strategy == SamplingStrategy.top_p: if isinstance(sampling_params.strategy, TopPSamplingStrategy):
params["top_p"] = sampling_params.top_p params["top_p"] = sampling_params.strategy.top_p
elif sampling_params.strategy == "top_k": if isinstance(sampling_params.strategy, TopKSamplingStrategy):
params["extra_body"]["top_k"] = sampling_params.top_k params["extra_body"]["top_k"] = sampling_params.strategy.top_k
elif sampling_params.strategy == "greedy": if isinstance(sampling_params.strategy, GreedySamplingStrategy):
params["temperature"] = sampling_params.temperature params["temperature"] = 0.0
return params return params

View file

@ -14,9 +14,20 @@ distribution_spec:
- inline::meta-reference - inline::meta-reference
telemetry: telemetry:
- inline::meta-reference - inline::meta-reference
eval:
- inline::meta-reference
datasetio:
- remote::huggingface
- inline::localfs
scoring:
- inline::basic
- inline::llm-as-judge
- inline::braintrust
tool_runtime: tool_runtime:
- remote::brave-search - remote::brave-search
- remote::tavily-search - remote::tavily-search
- inline::code-interpreter - inline::code-interpreter
- inline::rag-runtime - inline::rag-runtime
- remote::model-context-protocol
image_type: conda image_type: conda

View file

@ -2,8 +2,11 @@ version: '2'
image_name: sambanova image_name: sambanova
apis: apis:
- agents - agents
- datasetio
- eval
- inference - inference
- safety - safety
- scoring
- telemetry - telemetry
- tool_runtime - tool_runtime
- vector_io - vector_io
@ -22,12 +25,6 @@ providers:
type: sqlite type: sqlite
namespace: null namespace: null
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/sambanova}/faiss_store.db db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/sambanova}/faiss_store.db
- provider_id: chromadb
provider_type: remote::chromadb
config: {}
- provider_id: pgvector
provider_type: remote::pgvector
config: {}
safety: safety:
- provider_id: llama-guard - provider_id: llama-guard
provider_type: inline::llama-guard provider_type: inline::llama-guard
@ -47,6 +44,28 @@ providers:
service_name: ${env.OTEL_SERVICE_NAME:llama-stack} service_name: ${env.OTEL_SERVICE_NAME:llama-stack}
sinks: ${env.TELEMETRY_SINKS:console,sqlite} sinks: ${env.TELEMETRY_SINKS:console,sqlite}
sqlite_db_path: ${env.SQLITE_DB_PATH:~/.llama/distributions/sambanova/trace_store.db} sqlite_db_path: ${env.SQLITE_DB_PATH:~/.llama/distributions/sambanova/trace_store.db}
eval:
- provider_id: meta-reference
provider_type: inline::meta-reference
config: {}
datasetio:
- provider_id: huggingface
provider_type: remote::huggingface
config: {}
- provider_id: localfs
provider_type: inline::localfs
config: {}
scoring:
- provider_id: basic
provider_type: inline::basic
config: {}
- provider_id: llm-as-judge
provider_type: inline::llm-as-judge
config: {}
- provider_id: braintrust
provider_type: inline::braintrust
config:
openai_api_key: ${env.OPENAI_API_KEY:}
tool_runtime: tool_runtime:
- provider_id: brave-search - provider_id: brave-search
provider_type: remote::brave-search provider_type: remote::brave-search
@ -64,42 +83,69 @@ providers:
- provider_id: rag-runtime - provider_id: rag-runtime
provider_type: inline::rag-runtime provider_type: inline::rag-runtime
config: {} config: {}
- provider_id: model-context-protocol
provider_type: remote::model-context-protocol
config: {}
metadata_store: metadata_store:
type: sqlite type: sqlite
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/sambanova}/registry.db db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/sambanova}/registry.db
models: models:
- metadata: {} - metadata: {}
model_id: meta-llama/Llama-3.1-8B-Instruct model_id: meta-llama/Llama-3.1-8B-Instruct
provider_id: sambanova
provider_model_id: Meta-Llama-3.1-8B-Instruct provider_model_id: Meta-Llama-3.1-8B-Instruct
model_type: llm model_type: llm
- metadata: {} - metadata: {}
model_id: meta-llama/Llama-3.1-70B-Instruct model_id: meta-llama/Llama-3.1-70B-Instruct
model_type: llm
provider_id: sambanova
provider_model_id: Meta-Llama-3.1-70B-Instruct provider_model_id: Meta-Llama-3.1-70B-Instruct
model_type: llm model_type: llm
- metadata: {} - metadata: {}
model_id: meta-llama/Llama-3.1-405B-Instruct-FP8 model_id: meta-llama/Llama-3.1-405B-Instruct-FP8
provider_id: sambanova
provider_model_id: Meta-Llama-3.1-405B-Instruct provider_model_id: Meta-Llama-3.1-405B-Instruct
model_type: llm model_type: llm
- metadata: {} - metadata: {}
model_id: meta-llama/Llama-3.2-1B-Instruct model_id: meta-llama/Llama-3.2-1B-Instruct
provider_id: sambanova
provider_model_id: Meta-Llama-3.2-1B-Instruct provider_model_id: Meta-Llama-3.2-1B-Instruct
model_type: llm model_type: llm
- metadata: {} - metadata: {}
model_id: meta-llama/Llama-3.2-3B-Instruct model_id: meta-llama/Llama-3.2-3B-Instruct
provider_id: sambanova
provider_model_id: Meta-Llama-3.2-3B-Instruct provider_model_id: Meta-Llama-3.2-3B-Instruct
model_type: llm model_type: llm
- metadata: {} - metadata: {}
model_id: meta-llama/Llama-3.2-11B-Vision-Instruct model_id: meta-llama/Llama-3.2-11B-Vision-Instruct
provider_id: sambanova
provider_model_id: Llama-3.2-11B-Vision-Instruct provider_model_id: Llama-3.2-11B-Vision-Instruct
model_type: llm model_type: llm
- metadata: {} - metadata: {}
model_id: meta-llama/Llama-3.2-90B-Vision-Instruct model_id: meta-llama/Llama-3.2-90B-Vision-Instruct
provider_id: sambanova
provider_model_id: Llama-3.2-90B-Vision-Instruct provider_model_id: Llama-3.2-90B-Vision-Instruct
model_type: llm model_type: llm
- metadata: {}
model_id: meta-llama/Llama-3.2-90B-Vision-Instruct
provider_id: sambanova
provider_model_id: Llama-3.2-90B-Vision-Instruct
model_type: llm
- metadata: {}
model_id: meta-llama/Llama-Guard-3-8B
provider_id: sambanova
provider_model_id: Llama-Guard-3-8B
model_type: llm
shields: shields:
- shield_id: meta-llama/Llama-Guard-3-8B - shield_id: meta-llama/Llama-Guard-3-8B
vector_dbs: [] vector_dbs: []
datasets: [] datasets: []
scoring_fns: [] scoring_fns: []
eval_tasks: [] eval_tasks: []
tool_groups: [] tool_groups:
- toolgroup_id: builtin::websearch
provider_id: tavily-search
- toolgroup_id: builtin::rag
provider_id: rag-runtime
- toolgroup_id: builtin::code_interpreter
provider_id: code-interpreter