mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-07-21 12:09:40 +00:00
Merge branch 'main' into nvidia-e2e-notebook
This commit is contained in:
commit
51b68b4be6
234 changed files with 21943 additions and 7540 deletions
|
@ -46,7 +46,7 @@ docker run \
|
|||
-p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
|
||||
-v ./run.yaml:/root/my-run.yaml \
|
||||
llamastack/distribution-{{ name }} \
|
||||
--yaml-config /root/my-run.yaml \
|
||||
--config /root/my-run.yaml \
|
||||
--port $LLAMA_STACK_PORT \
|
||||
--env CEREBRAS_API_KEY=$CEREBRAS_API_KEY
|
||||
```
|
||||
|
|
|
@ -1,43 +0,0 @@
|
|||
# Report for cerebras distribution
|
||||
|
||||
## Supported Models
|
||||
| Model Descriptor | cerebras |
|
||||
|:---|:---|
|
||||
| meta-llama/Llama-3-8B-Instruct | ❌ |
|
||||
| meta-llama/Llama-3-70B-Instruct | ❌ |
|
||||
| meta-llama/Llama-3.1-8B-Instruct | ✅ |
|
||||
| meta-llama/Llama-3.1-70B-Instruct | ❌ |
|
||||
| meta-llama/Llama-3.1-405B-Instruct-FP8 | ❌ |
|
||||
| meta-llama/Llama-3.2-1B-Instruct | ❌ |
|
||||
| meta-llama/Llama-3.2-3B-Instruct | ❌ |
|
||||
| meta-llama/Llama-3.2-11B-Vision-Instruct | ❌ |
|
||||
| meta-llama/Llama-3.2-90B-Vision-Instruct | ❌ |
|
||||
| meta-llama/Llama-3.3-70B-Instruct | ✅ |
|
||||
| meta-llama/Llama-Guard-3-11B-Vision | ❌ |
|
||||
| meta-llama/Llama-Guard-3-1B | ❌ |
|
||||
| meta-llama/Llama-Guard-3-8B | ❌ |
|
||||
| meta-llama/Llama-Guard-2-8B | ❌ |
|
||||
|
||||
## Inference
|
||||
| Model | API | Capability | Test | Status |
|
||||
|:----- |:-----|:-----|:-----|:-----|
|
||||
| Llama-3.1-8B-Instruct | /chat_completion | streaming | test_text_chat_completion_streaming | ✅ |
|
||||
| Llama-3.2-11B-Vision-Instruct | /chat_completion | streaming | test_image_chat_completion_streaming | ❌ |
|
||||
| Llama-3.2-11B-Vision-Instruct | /chat_completion | non_streaming | test_image_chat_completion_non_streaming | ❌ |
|
||||
| Llama-3.1-8B-Instruct | /chat_completion | non_streaming | test_text_chat_completion_non_streaming | ✅ |
|
||||
| Llama-3.1-8B-Instruct | /chat_completion | tool_calling | test_text_chat_completion_with_tool_calling_and_streaming | ✅ |
|
||||
| Llama-3.1-8B-Instruct | /chat_completion | tool_calling | test_text_chat_completion_with_tool_calling_and_non_streaming | ✅ |
|
||||
| Llama-3.1-8B-Instruct | /completion | streaming | test_text_completion_streaming | ✅ |
|
||||
| Llama-3.1-8B-Instruct | /completion | non_streaming | test_text_completion_non_streaming | ✅ |
|
||||
| Llama-3.1-8B-Instruct | /completion | structured_output | test_text_completion_structured_output | ❌ |
|
||||
|
||||
## Vector IO
|
||||
| API | Capability | Test | Status |
|
||||
|:-----|:-----|:-----|:-----|
|
||||
| /retrieve | | test_vector_db_retrieve | ✅ |
|
||||
|
||||
## Agents
|
||||
| API | Capability | Test | Status |
|
||||
|:-----|:-----|:-----|:-----|
|
||||
| /create_agent_turn | rag | test_rag_agent | ✅ |
|
||||
| /create_agent_turn | custom_tool | test_custom_tool | ❌ |
|
|
@ -143,7 +143,7 @@ docker run \
|
|||
-v $HOME/.llama:/root/.llama \
|
||||
-v ./llama_stack/templates/tgi/run-with-safety.yaml:/root/my-run.yaml \
|
||||
llamastack/distribution-{{ name }} \
|
||||
--yaml-config /root/my-run.yaml \
|
||||
--config /root/my-run.yaml \
|
||||
--port $LLAMA_STACK_PORT \
|
||||
--env INFERENCE_MODEL=$INFERENCE_MODEL \
|
||||
--env DEH_URL=$DEH_URL \
|
||||
|
|
|
@ -152,46 +152,6 @@
|
|||
"sentence-transformers --no-deps",
|
||||
"torch torchvision --index-url https://download.pytorch.org/whl/cpu"
|
||||
],
|
||||
"dev": [
|
||||
"aiosqlite",
|
||||
"autoevals",
|
||||
"blobfile",
|
||||
"chardet",
|
||||
"chromadb-client",
|
||||
"datasets",
|
||||
"emoji",
|
||||
"fastapi",
|
||||
"fire",
|
||||
"fireworks-ai",
|
||||
"httpx",
|
||||
"langdetect",
|
||||
"litellm",
|
||||
"matplotlib",
|
||||
"mcp",
|
||||
"nltk",
|
||||
"numpy",
|
||||
"openai",
|
||||
"opentelemetry-exporter-otlp-proto-http",
|
||||
"opentelemetry-sdk",
|
||||
"pandas",
|
||||
"pillow",
|
||||
"psycopg2-binary",
|
||||
"pymongo",
|
||||
"pypdf",
|
||||
"pythainlp",
|
||||
"redis",
|
||||
"requests",
|
||||
"scikit-learn",
|
||||
"scipy",
|
||||
"sentencepiece",
|
||||
"sqlite-vec",
|
||||
"tqdm",
|
||||
"transformers",
|
||||
"tree_sitter",
|
||||
"uvicorn",
|
||||
"sentence-transformers --no-deps",
|
||||
"torch torchvision --index-url https://download.pytorch.org/whl/cpu"
|
||||
],
|
||||
"fireworks": [
|
||||
"aiosqlite",
|
||||
"autoevals",
|
||||
|
@ -481,6 +441,7 @@
|
|||
"opentelemetry-exporter-otlp-proto-http",
|
||||
"opentelemetry-sdk",
|
||||
"pandas",
|
||||
"peft",
|
||||
"pillow",
|
||||
"psycopg2-binary",
|
||||
"pymongo",
|
||||
|
@ -491,9 +452,11 @@
|
|||
"scikit-learn",
|
||||
"scipy",
|
||||
"sentencepiece",
|
||||
"torch",
|
||||
"tqdm",
|
||||
"transformers",
|
||||
"tree_sitter",
|
||||
"trl",
|
||||
"uvicorn"
|
||||
],
|
||||
"open-benchmark": [
|
||||
|
@ -619,10 +582,11 @@
|
|||
"fastapi",
|
||||
"fire",
|
||||
"httpx",
|
||||
"litellm",
|
||||
"matplotlib",
|
||||
"mcp",
|
||||
"nltk",
|
||||
"numpy",
|
||||
"openai",
|
||||
"opentelemetry-exporter-otlp-proto-http",
|
||||
"opentelemetry-sdk",
|
||||
"pandas",
|
||||
|
@ -637,7 +601,49 @@
|
|||
"sentencepiece",
|
||||
"tqdm",
|
||||
"transformers",
|
||||
"uvicorn"
|
||||
"uvicorn",
|
||||
"sentence-transformers --no-deps",
|
||||
"torch torchvision --index-url https://download.pytorch.org/whl/cpu"
|
||||
],
|
||||
"starter": [
|
||||
"aiosqlite",
|
||||
"autoevals",
|
||||
"blobfile",
|
||||
"chardet",
|
||||
"chromadb-client",
|
||||
"datasets",
|
||||
"emoji",
|
||||
"fastapi",
|
||||
"fire",
|
||||
"fireworks-ai",
|
||||
"httpx",
|
||||
"langdetect",
|
||||
"litellm",
|
||||
"matplotlib",
|
||||
"mcp",
|
||||
"nltk",
|
||||
"numpy",
|
||||
"openai",
|
||||
"opentelemetry-exporter-otlp-proto-http",
|
||||
"opentelemetry-sdk",
|
||||
"pandas",
|
||||
"pillow",
|
||||
"psycopg2-binary",
|
||||
"pymongo",
|
||||
"pypdf",
|
||||
"pythainlp",
|
||||
"redis",
|
||||
"requests",
|
||||
"scikit-learn",
|
||||
"scipy",
|
||||
"sentencepiece",
|
||||
"sqlite-vec",
|
||||
"tqdm",
|
||||
"transformers",
|
||||
"tree_sitter",
|
||||
"uvicorn",
|
||||
"sentence-transformers --no-deps",
|
||||
"torch torchvision --index-url https://download.pytorch.org/whl/cpu"
|
||||
],
|
||||
"tgi": [
|
||||
"aiohttp",
|
||||
|
@ -830,6 +836,8 @@
|
|||
"tqdm",
|
||||
"transformers",
|
||||
"tree_sitter",
|
||||
"uvicorn"
|
||||
"uvicorn",
|
||||
"sentence-transformers --no-deps",
|
||||
"torch torchvision --index-url https://download.pytorch.org/whl/cpu"
|
||||
]
|
||||
}
|
||||
|
|
|
@ -13,9 +13,10 @@ distribution_spec:
|
|||
- inline::basic
|
||||
- inline::braintrust
|
||||
post_training:
|
||||
- inline::torchtune
|
||||
- inline::huggingface
|
||||
datasetio:
|
||||
- inline::localfs
|
||||
- remote::huggingface
|
||||
telemetry:
|
||||
- inline::meta-reference
|
||||
agents:
|
||||
|
|
|
@ -49,16 +49,24 @@ providers:
|
|||
type: sqlite
|
||||
namespace: null
|
||||
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/experimental-post-training}/localfs_datasetio.db
|
||||
- provider_id: huggingface
|
||||
provider_type: remote::huggingface
|
||||
config:
|
||||
kvstore:
|
||||
type: sqlite
|
||||
namespace: null
|
||||
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/huggingface}/huggingface_datasetio.db
|
||||
telemetry:
|
||||
- provider_id: meta-reference
|
||||
provider_type: inline::meta-reference
|
||||
config: {}
|
||||
post_training:
|
||||
- provider_id: torchtune-post-training
|
||||
provider_type: inline::torchtune
|
||||
config: {
|
||||
- provider_id: huggingface
|
||||
provider_type: inline::huggingface
|
||||
config:
|
||||
checkpoint_format: huggingface
|
||||
}
|
||||
distributed_backend: null
|
||||
device: cpu
|
||||
agents:
|
||||
- provider_id: meta-reference
|
||||
provider_type: inline::meta-reference
|
||||
|
|
|
@ -1,45 +0,0 @@
|
|||
# Report for fireworks distribution
|
||||
|
||||
## Supported Models
|
||||
| Model Descriptor | fireworks |
|
||||
|:---|:---|
|
||||
| Llama-3-8B-Instruct | ❌ |
|
||||
| Llama-3-70B-Instruct | ❌ |
|
||||
| Llama3.1-8B-Instruct | ✅ |
|
||||
| Llama3.1-70B-Instruct | ✅ |
|
||||
| Llama3.1-405B-Instruct | ✅ |
|
||||
| Llama3.2-1B-Instruct | ✅ |
|
||||
| Llama3.2-3B-Instruct | ✅ |
|
||||
| Llama3.2-11B-Vision-Instruct | ✅ |
|
||||
| Llama3.2-90B-Vision-Instruct | ✅ |
|
||||
| Llama3.3-70B-Instruct | ✅ |
|
||||
| Llama-Guard-3-11B-Vision | ✅ |
|
||||
| Llama-Guard-3-1B | ❌ |
|
||||
| Llama-Guard-3-8B | ✅ |
|
||||
| Llama-Guard-2-8B | ❌ |
|
||||
|
||||
## Inference
|
||||
| Model | API | Capability | Test | Status |
|
||||
|:----- |:-----|:-----|:-----|:-----|
|
||||
| Llama-3.1-8B-Instruct | /chat_completion | streaming | test_text_chat_completion_streaming | ✅ |
|
||||
| Llama-3.2-11B-Vision-Instruct | /chat_completion | streaming | test_image_chat_completion_streaming | ✅ |
|
||||
| Llama-3.2-11B-Vision-Instruct | /chat_completion | non_streaming | test_image_chat_completion_non_streaming | ✅ |
|
||||
| Llama-3.1-8B-Instruct | /chat_completion | non_streaming | test_text_chat_completion_non_streaming | ✅ |
|
||||
| Llama-3.1-8B-Instruct | /chat_completion | tool_calling | test_text_chat_completion_with_tool_calling_and_streaming | ✅ |
|
||||
| Llama-3.1-8B-Instruct | /chat_completion | tool_calling | test_text_chat_completion_with_tool_calling_and_non_streaming | ✅ |
|
||||
| Llama-3.2-11B-Vision-Instruct | /chat_completion | log_probs | test_completion_log_probs_non_streaming | ✅ |
|
||||
| Llama-3.2-11B-Vision-Instruct | /chat_completion | log_probs | test_completion_log_probs_streaming | ✅ |
|
||||
| Llama-3.1-8B-Instruct | /completion | streaming | test_text_completion_streaming | ✅ |
|
||||
| Llama-3.1-8B-Instruct | /completion | non_streaming | test_text_completion_non_streaming | ✅ |
|
||||
| Llama-3.1-8B-Instruct | /completion | structured_output | test_text_completion_structured_output | ✅ |
|
||||
|
||||
## Vector IO
|
||||
| Provider | API | Capability | Test | Status |
|
||||
|:-----|:-----|:-----|:-----|:-----|
|
||||
| inline::faiss | /retrieve | | test_vector_db_retrieve | ✅ |
|
||||
|
||||
## Agents
|
||||
| Provider | API | Capability | Test | Status |
|
||||
|:-----|:-----|:-----|:-----|:-----|
|
||||
| inline::meta-reference | /create_agent_turn | rag | test_rag_agent | ✅ |
|
||||
| inline::meta-reference | /create_agent_turn | custom_tool | test_custom_tool | ✅ |
|
|
@ -116,7 +116,7 @@ docker run \
|
|||
-p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
|
||||
-v ./run.yaml:/root/my-run.yaml \
|
||||
llamastack/distribution-{{ name }} \
|
||||
--yaml-config /root/my-run.yaml \
|
||||
--config /root/my-run.yaml \
|
||||
--port $LLAMA_STACK_PORT \
|
||||
--env NVIDIA_API_KEY=$NVIDIA_API_KEY
|
||||
```
|
||||
|
|
|
@ -23,6 +23,8 @@ distribution_spec:
|
|||
- inline::basic
|
||||
- inline::llm-as-judge
|
||||
- inline::braintrust
|
||||
post_training:
|
||||
- inline::huggingface
|
||||
tool_runtime:
|
||||
- remote::brave-search
|
||||
- remote::tavily-search
|
||||
|
|
|
@ -86,7 +86,7 @@ docker run \
|
|||
-v ~/.llama:/root/.llama \
|
||||
-v ./llama_stack/templates/ollama/run-with-safety.yaml:/root/my-run.yaml \
|
||||
llamastack/distribution-{{ name }} \
|
||||
--yaml-config /root/my-run.yaml \
|
||||
--config /root/my-run.yaml \
|
||||
--port $LLAMA_STACK_PORT \
|
||||
--env INFERENCE_MODEL=$INFERENCE_MODEL \
|
||||
--env SAFETY_MODEL=$SAFETY_MODEL \
|
||||
|
|
|
@ -13,6 +13,7 @@ from llama_stack.distribution.datatypes import (
|
|||
ShieldInput,
|
||||
ToolGroupInput,
|
||||
)
|
||||
from llama_stack.providers.inline.post_training.huggingface import HuggingFacePostTrainingConfig
|
||||
from llama_stack.providers.inline.vector_io.faiss.config import FaissVectorIOConfig
|
||||
from llama_stack.providers.remote.inference.ollama import OllamaImplConfig
|
||||
from llama_stack.templates.template import DistributionTemplate, RunConfigSettings
|
||||
|
@ -28,6 +29,7 @@ def get_distribution_template() -> DistributionTemplate:
|
|||
"eval": ["inline::meta-reference"],
|
||||
"datasetio": ["remote::huggingface", "inline::localfs"],
|
||||
"scoring": ["inline::basic", "inline::llm-as-judge", "inline::braintrust"],
|
||||
"post_training": ["inline::huggingface"],
|
||||
"tool_runtime": [
|
||||
"remote::brave-search",
|
||||
"remote::tavily-search",
|
||||
|
@ -47,7 +49,11 @@ def get_distribution_template() -> DistributionTemplate:
|
|||
provider_type="inline::faiss",
|
||||
config=FaissVectorIOConfig.sample_run_config(f"~/.llama/distributions/{name}"),
|
||||
)
|
||||
|
||||
posttraining_provider = Provider(
|
||||
provider_id="huggingface",
|
||||
provider_type="inline::huggingface",
|
||||
config=HuggingFacePostTrainingConfig.sample_run_config(f"~/.llama/distributions/{name}"),
|
||||
)
|
||||
inference_model = ModelInput(
|
||||
model_id="${env.INFERENCE_MODEL}",
|
||||
provider_id="ollama",
|
||||
|
@ -92,6 +98,7 @@ def get_distribution_template() -> DistributionTemplate:
|
|||
provider_overrides={
|
||||
"inference": [inference_provider],
|
||||
"vector_io": [vector_io_provider_faiss],
|
||||
"post_training": [posttraining_provider],
|
||||
},
|
||||
default_models=[inference_model, embedding_model],
|
||||
default_tool_groups=default_tool_groups,
|
||||
|
@ -100,6 +107,7 @@ def get_distribution_template() -> DistributionTemplate:
|
|||
provider_overrides={
|
||||
"inference": [inference_provider],
|
||||
"vector_io": [vector_io_provider_faiss],
|
||||
"post_training": [posttraining_provider],
|
||||
"safety": [
|
||||
Provider(
|
||||
provider_id="llama-guard",
|
||||
|
|
|
@ -1,43 +0,0 @@
|
|||
# Report for ollama distribution
|
||||
|
||||
## Supported Models
|
||||
| Model Descriptor | ollama |
|
||||
|:---|:---|
|
||||
| Llama-3-8B-Instruct | ❌ |
|
||||
| Llama-3-70B-Instruct | ❌ |
|
||||
| Llama3.1-8B-Instruct | ✅ |
|
||||
| Llama3.1-70B-Instruct | ✅ |
|
||||
| Llama3.1-405B-Instruct | ✅ |
|
||||
| Llama3.2-1B-Instruct | ✅ |
|
||||
| Llama3.2-3B-Instruct | ✅ |
|
||||
| Llama3.2-11B-Vision-Instruct | ✅ |
|
||||
| Llama3.2-90B-Vision-Instruct | ✅ |
|
||||
| Llama3.3-70B-Instruct | ✅ |
|
||||
| Llama-Guard-3-11B-Vision | ❌ |
|
||||
| Llama-Guard-3-1B | ✅ |
|
||||
| Llama-Guard-3-8B | ✅ |
|
||||
| Llama-Guard-2-8B | ❌ |
|
||||
|
||||
## Inference
|
||||
| Model | API | Capability | Test | Status |
|
||||
|:----- |:-----|:-----|:-----|:-----|
|
||||
| Llama-3.1-8B-Instruct | /chat_completion | streaming | test_text_chat_completion_streaming | ✅ |
|
||||
| Llama-3.2-11B-Vision-Instruct | /chat_completion | streaming | test_image_chat_completion_streaming | ❌ |
|
||||
| Llama-3.2-11B-Vision-Instruct | /chat_completion | non_streaming | test_image_chat_completion_non_streaming | ❌ |
|
||||
| Llama-3.1-8B-Instruct | /chat_completion | non_streaming | test_text_chat_completion_non_streaming | ✅ |
|
||||
| Llama-3.1-8B-Instruct | /chat_completion | tool_calling | test_text_chat_completion_with_tool_calling_and_streaming | ✅ |
|
||||
| Llama-3.1-8B-Instruct | /chat_completion | tool_calling | test_text_chat_completion_with_tool_calling_and_non_streaming | ✅ |
|
||||
| Llama-3.1-8B-Instruct | /completion | streaming | test_text_completion_streaming | ✅ |
|
||||
| Llama-3.1-8B-Instruct | /completion | non_streaming | test_text_completion_non_streaming | ✅ |
|
||||
| Llama-3.1-8B-Instruct | /completion | structured_output | test_text_completion_structured_output | ✅ |
|
||||
|
||||
## Vector IO
|
||||
| API | Capability | Test | Status |
|
||||
|:-----|:-----|:-----|:-----|
|
||||
| /retrieve | | test_vector_db_retrieve | ✅ |
|
||||
|
||||
## Agents
|
||||
| API | Capability | Test | Status |
|
||||
|:-----|:-----|:-----|:-----|
|
||||
| /create_agent_turn | rag | test_rag_agent | ✅ |
|
||||
| /create_agent_turn | custom_tool | test_custom_tool | ✅ |
|
|
@ -5,6 +5,7 @@ apis:
|
|||
- datasetio
|
||||
- eval
|
||||
- inference
|
||||
- post_training
|
||||
- safety
|
||||
- scoring
|
||||
- telemetry
|
||||
|
@ -80,6 +81,13 @@ providers:
|
|||
provider_type: inline::braintrust
|
||||
config:
|
||||
openai_api_key: ${env.OPENAI_API_KEY:}
|
||||
post_training:
|
||||
- provider_id: huggingface
|
||||
provider_type: inline::huggingface
|
||||
config:
|
||||
checkpoint_format: huggingface
|
||||
distributed_backend: null
|
||||
device: cpu
|
||||
tool_runtime:
|
||||
- provider_id: brave-search
|
||||
provider_type: remote::brave-search
|
||||
|
|
|
@ -5,6 +5,7 @@ apis:
|
|||
- datasetio
|
||||
- eval
|
||||
- inference
|
||||
- post_training
|
||||
- safety
|
||||
- scoring
|
||||
- telemetry
|
||||
|
@ -78,6 +79,13 @@ providers:
|
|||
provider_type: inline::braintrust
|
||||
config:
|
||||
openai_api_key: ${env.OPENAI_API_KEY:}
|
||||
post_training:
|
||||
- provider_id: huggingface
|
||||
provider_type: inline::huggingface
|
||||
config:
|
||||
checkpoint_format: huggingface
|
||||
distributed_backend: null
|
||||
device: cpu
|
||||
tool_runtime:
|
||||
- provider_id: brave-search
|
||||
provider_type: remote::brave-search
|
||||
|
|
|
@ -220,7 +220,7 @@ docker run \
|
|||
-p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
|
||||
-v ./llama_stack/templates/remote-vllm/run.yaml:/root/my-run.yaml \
|
||||
llamastack/distribution-{{ name }} \
|
||||
--yaml-config /root/my-run.yaml \
|
||||
--config /root/my-run.yaml \
|
||||
--port $LLAMA_STACK_PORT \
|
||||
--env INFERENCE_MODEL=$INFERENCE_MODEL \
|
||||
--env VLLM_URL=http://host.docker.internal:$INFERENCE_PORT/v1
|
||||
|
@ -242,7 +242,7 @@ docker run \
|
|||
-v ~/.llama:/root/.llama \
|
||||
-v ./llama_stack/templates/remote-vllm/run-with-safety.yaml:/root/my-run.yaml \
|
||||
llamastack/distribution-{{ name }} \
|
||||
--yaml-config /root/my-run.yaml \
|
||||
--config /root/my-run.yaml \
|
||||
--port $LLAMA_STACK_PORT \
|
||||
--env INFERENCE_MODEL=$INFERENCE_MODEL \
|
||||
--env VLLM_URL=http://host.docker.internal:$INFERENCE_PORT/v1 \
|
||||
|
|
|
@ -1,9 +1,10 @@
|
|||
version: '2'
|
||||
distribution_spec:
|
||||
description: Use SambaNova.AI for running LLM inference
|
||||
description: Use SambaNova for running LLM inference
|
||||
providers:
|
||||
inference:
|
||||
- remote::sambanova
|
||||
- inline::sentence-transformers
|
||||
vector_io:
|
||||
- inline::faiss
|
||||
- remote::chromadb
|
||||
|
@ -18,4 +19,6 @@ distribution_spec:
|
|||
- remote::brave-search
|
||||
- remote::tavily-search
|
||||
- inline::rag-runtime
|
||||
- remote::model-context-protocol
|
||||
- remote::wolfram-alpha
|
||||
image_type: conda
|
||||
|
|
|
@ -14,6 +14,9 @@ providers:
|
|||
config:
|
||||
url: https://api.sambanova.ai/v1
|
||||
api_key: ${env.SAMBANOVA_API_KEY}
|
||||
- provider_id: sentence-transformers
|
||||
provider_type: inline::sentence-transformers
|
||||
config: {}
|
||||
vector_io:
|
||||
- provider_id: faiss
|
||||
provider_type: inline::faiss
|
||||
|
@ -68,110 +71,122 @@ providers:
|
|||
- provider_id: rag-runtime
|
||||
provider_type: inline::rag-runtime
|
||||
config: {}
|
||||
- provider_id: model-context-protocol
|
||||
provider_type: remote::model-context-protocol
|
||||
config: {}
|
||||
- provider_id: wolfram-alpha
|
||||
provider_type: remote::wolfram-alpha
|
||||
config:
|
||||
api_key: ${env.WOLFRAM_ALPHA_API_KEY:}
|
||||
metadata_store:
|
||||
type: sqlite
|
||||
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/sambanova}/registry.db
|
||||
models:
|
||||
- metadata: {}
|
||||
model_id: Meta-Llama-3.1-8B-Instruct
|
||||
model_id: sambanova/Meta-Llama-3.1-8B-Instruct
|
||||
provider_id: sambanova
|
||||
provider_model_id: Meta-Llama-3.1-8B-Instruct
|
||||
provider_model_id: sambanova/Meta-Llama-3.1-8B-Instruct
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: meta-llama/Llama-3.1-8B-Instruct
|
||||
provider_id: sambanova
|
||||
provider_model_id: Meta-Llama-3.1-8B-Instruct
|
||||
provider_model_id: sambanova/Meta-Llama-3.1-8B-Instruct
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: Meta-Llama-3.1-70B-Instruct
|
||||
model_id: sambanova/Meta-Llama-3.1-405B-Instruct
|
||||
provider_id: sambanova
|
||||
provider_model_id: Meta-Llama-3.1-70B-Instruct
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: meta-llama/Llama-3.1-70B-Instruct
|
||||
provider_id: sambanova
|
||||
provider_model_id: Meta-Llama-3.1-70B-Instruct
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: Meta-Llama-3.1-405B-Instruct
|
||||
provider_id: sambanova
|
||||
provider_model_id: Meta-Llama-3.1-405B-Instruct
|
||||
provider_model_id: sambanova/Meta-Llama-3.1-405B-Instruct
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: meta-llama/Llama-3.1-405B-Instruct-FP8
|
||||
provider_id: sambanova
|
||||
provider_model_id: Meta-Llama-3.1-405B-Instruct
|
||||
provider_model_id: sambanova/Meta-Llama-3.1-405B-Instruct
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: Meta-Llama-3.2-1B-Instruct
|
||||
model_id: sambanova/Meta-Llama-3.2-1B-Instruct
|
||||
provider_id: sambanova
|
||||
provider_model_id: Meta-Llama-3.2-1B-Instruct
|
||||
provider_model_id: sambanova/Meta-Llama-3.2-1B-Instruct
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: meta-llama/Llama-3.2-1B-Instruct
|
||||
provider_id: sambanova
|
||||
provider_model_id: Meta-Llama-3.2-1B-Instruct
|
||||
provider_model_id: sambanova/Meta-Llama-3.2-1B-Instruct
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: Meta-Llama-3.2-3B-Instruct
|
||||
model_id: sambanova/Meta-Llama-3.2-3B-Instruct
|
||||
provider_id: sambanova
|
||||
provider_model_id: Meta-Llama-3.2-3B-Instruct
|
||||
provider_model_id: sambanova/Meta-Llama-3.2-3B-Instruct
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: meta-llama/Llama-3.2-3B-Instruct
|
||||
provider_id: sambanova
|
||||
provider_model_id: Meta-Llama-3.2-3B-Instruct
|
||||
provider_model_id: sambanova/Meta-Llama-3.2-3B-Instruct
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: Meta-Llama-3.3-70B-Instruct
|
||||
model_id: sambanova/Meta-Llama-3.3-70B-Instruct
|
||||
provider_id: sambanova
|
||||
provider_model_id: Meta-Llama-3.3-70B-Instruct
|
||||
provider_model_id: sambanova/Meta-Llama-3.3-70B-Instruct
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: meta-llama/Llama-3.3-70B-Instruct
|
||||
provider_id: sambanova
|
||||
provider_model_id: Meta-Llama-3.3-70B-Instruct
|
||||
provider_model_id: sambanova/Meta-Llama-3.3-70B-Instruct
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: Llama-3.2-11B-Vision-Instruct
|
||||
model_id: sambanova/Llama-3.2-11B-Vision-Instruct
|
||||
provider_id: sambanova
|
||||
provider_model_id: Llama-3.2-11B-Vision-Instruct
|
||||
provider_model_id: sambanova/Llama-3.2-11B-Vision-Instruct
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: meta-llama/Llama-3.2-11B-Vision-Instruct
|
||||
provider_id: sambanova
|
||||
provider_model_id: Llama-3.2-11B-Vision-Instruct
|
||||
provider_model_id: sambanova/Llama-3.2-11B-Vision-Instruct
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: Llama-3.2-90B-Vision-Instruct
|
||||
model_id: sambanova/Llama-3.2-90B-Vision-Instruct
|
||||
provider_id: sambanova
|
||||
provider_model_id: Llama-3.2-90B-Vision-Instruct
|
||||
provider_model_id: sambanova/Llama-3.2-90B-Vision-Instruct
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: meta-llama/Llama-3.2-90B-Vision-Instruct
|
||||
provider_id: sambanova
|
||||
provider_model_id: Llama-3.2-90B-Vision-Instruct
|
||||
provider_model_id: sambanova/Llama-3.2-90B-Vision-Instruct
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: Meta-Llama-Guard-3-8B
|
||||
model_id: sambanova/Llama-4-Scout-17B-16E-Instruct
|
||||
provider_id: sambanova
|
||||
provider_model_id: Meta-Llama-Guard-3-8B
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: meta-llama/Llama-Guard-3-8B
|
||||
provider_id: sambanova
|
||||
provider_model_id: Meta-Llama-Guard-3-8B
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: Llama-4-Scout-17B-16E-Instruct
|
||||
provider_id: sambanova
|
||||
provider_model_id: Llama-4-Scout-17B-16E-Instruct
|
||||
provider_model_id: sambanova/Llama-4-Scout-17B-16E-Instruct
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: meta-llama/Llama-4-Scout-17B-16E-Instruct
|
||||
provider_id: sambanova
|
||||
provider_model_id: Llama-4-Scout-17B-16E-Instruct
|
||||
provider_model_id: sambanova/Llama-4-Scout-17B-16E-Instruct
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: sambanova/Llama-4-Maverick-17B-128E-Instruct
|
||||
provider_id: sambanova
|
||||
provider_model_id: sambanova/Llama-4-Maverick-17B-128E-Instruct
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: meta-llama/Llama-4-Maverick-17B-128E-Instruct
|
||||
provider_id: sambanova
|
||||
provider_model_id: sambanova/Llama-4-Maverick-17B-128E-Instruct
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: sambanova/Meta-Llama-Guard-3-8B
|
||||
provider_id: sambanova
|
||||
provider_model_id: sambanova/Meta-Llama-Guard-3-8B
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: meta-llama/Llama-Guard-3-8B
|
||||
provider_id: sambanova
|
||||
provider_model_id: sambanova/Meta-Llama-Guard-3-8B
|
||||
model_type: llm
|
||||
- metadata:
|
||||
embedding_dimension: 384
|
||||
model_id: all-MiniLM-L6-v2
|
||||
provider_id: sentence-transformers
|
||||
model_type: embedding
|
||||
shields:
|
||||
- shield_id: meta-llama/Llama-Guard-3-8B
|
||||
vector_dbs: []
|
||||
|
@ -183,5 +198,7 @@ tool_groups:
|
|||
provider_id: tavily-search
|
||||
- toolgroup_id: builtin::rag
|
||||
provider_id: rag-runtime
|
||||
- toolgroup_id: builtin::wolfram_alpha
|
||||
provider_id: wolfram-alpha
|
||||
server:
|
||||
port: 8321
|
||||
|
|
|
@ -6,7 +6,16 @@
|
|||
|
||||
from pathlib import Path
|
||||
|
||||
from llama_stack.distribution.datatypes import Provider, ShieldInput, ToolGroupInput
|
||||
from llama_stack.apis.models.models import ModelType
|
||||
from llama_stack.distribution.datatypes import (
|
||||
ModelInput,
|
||||
Provider,
|
||||
ShieldInput,
|
||||
ToolGroupInput,
|
||||
)
|
||||
from llama_stack.providers.inline.inference.sentence_transformers import (
|
||||
SentenceTransformersInferenceConfig,
|
||||
)
|
||||
from llama_stack.providers.inline.vector_io.faiss.config import FaissVectorIOConfig
|
||||
from llama_stack.providers.remote.inference.sambanova import SambaNovaImplConfig
|
||||
from llama_stack.providers.remote.inference.sambanova.models import MODEL_ENTRIES
|
||||
|
@ -23,7 +32,7 @@ from llama_stack.templates.template import (
|
|||
|
||||
def get_distribution_template() -> DistributionTemplate:
|
||||
providers = {
|
||||
"inference": ["remote::sambanova"],
|
||||
"inference": ["remote::sambanova", "inline::sentence-transformers"],
|
||||
"vector_io": ["inline::faiss", "remote::chromadb", "remote::pgvector"],
|
||||
"safety": ["inline::llama-guard"],
|
||||
"agents": ["inline::meta-reference"],
|
||||
|
@ -32,16 +41,29 @@ def get_distribution_template() -> DistributionTemplate:
|
|||
"remote::brave-search",
|
||||
"remote::tavily-search",
|
||||
"inline::rag-runtime",
|
||||
"remote::model-context-protocol",
|
||||
"remote::wolfram-alpha",
|
||||
],
|
||||
}
|
||||
name = "sambanova"
|
||||
|
||||
inference_provider = Provider(
|
||||
provider_id=name,
|
||||
provider_type=f"remote::{name}",
|
||||
config=SambaNovaImplConfig.sample_run_config(),
|
||||
)
|
||||
|
||||
embedding_provider = Provider(
|
||||
provider_id="sentence-transformers",
|
||||
provider_type="inline::sentence-transformers",
|
||||
config=SentenceTransformersInferenceConfig.sample_run_config(),
|
||||
)
|
||||
embedding_model = ModelInput(
|
||||
model_id="all-MiniLM-L6-v2",
|
||||
provider_id="sentence-transformers",
|
||||
model_type=ModelType.embedding,
|
||||
metadata={
|
||||
"embedding_dimension": 384,
|
||||
},
|
||||
)
|
||||
vector_io_providers = [
|
||||
Provider(
|
||||
provider_id="faiss",
|
||||
|
@ -79,23 +101,27 @@ def get_distribution_template() -> DistributionTemplate:
|
|||
toolgroup_id="builtin::rag",
|
||||
provider_id="rag-runtime",
|
||||
),
|
||||
ToolGroupInput(
|
||||
toolgroup_id="builtin::wolfram_alpha",
|
||||
provider_id="wolfram-alpha",
|
||||
),
|
||||
]
|
||||
|
||||
return DistributionTemplate(
|
||||
name=name,
|
||||
distro_type="self_hosted",
|
||||
description="Use SambaNova.AI for running LLM inference",
|
||||
docker_image=None,
|
||||
description="Use SambaNova for running LLM inference",
|
||||
container_image=None,
|
||||
template_path=Path(__file__).parent / "doc_template.md",
|
||||
providers=providers,
|
||||
available_models_by_provider=available_models,
|
||||
run_configs={
|
||||
"run.yaml": RunConfigSettings(
|
||||
provider_overrides={
|
||||
"inference": [inference_provider],
|
||||
"inference": [inference_provider, embedding_provider],
|
||||
"vector_io": vector_io_providers,
|
||||
},
|
||||
default_models=default_models,
|
||||
default_models=default_models + [embedding_model],
|
||||
default_shields=[ShieldInput(shield_id="meta-llama/Llama-Guard-3-8B")],
|
||||
default_tool_groups=default_tool_groups,
|
||||
),
|
||||
|
@ -107,7 +133,7 @@ def get_distribution_template() -> DistributionTemplate:
|
|||
),
|
||||
"SAMBANOVA_API_KEY": (
|
||||
"",
|
||||
"SambaNova.AI API Key",
|
||||
"SambaNova API Key",
|
||||
),
|
||||
},
|
||||
)
|
||||
|
|
|
@ -4,4 +4,4 @@
|
|||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
|
||||
from .dev import get_distribution_template # noqa: F401
|
||||
from .starter import get_distribution_template # noqa: F401
|
|
@ -1,6 +1,6 @@
|
|||
version: '2'
|
||||
distribution_spec:
|
||||
description: Distribution for running e2e tests in CI
|
||||
description: Quick start template for running Llama Stack with several popular providers
|
||||
providers:
|
||||
inference:
|
||||
- remote::openai
|
||||
|
@ -8,6 +8,7 @@ distribution_spec:
|
|||
- remote::anthropic
|
||||
- remote::gemini
|
||||
- remote::groq
|
||||
- remote::sambanova
|
||||
- inline::sentence-transformers
|
||||
vector_io:
|
||||
- inline::sqlite-vec
|
|
@ -1,5 +1,5 @@
|
|||
version: '2'
|
||||
image_name: dev
|
||||
image_name: starter
|
||||
apis:
|
||||
- agents
|
||||
- datasetio
|
||||
|
@ -34,6 +34,11 @@ providers:
|
|||
config:
|
||||
url: https://api.groq.com
|
||||
api_key: ${env.GROQ_API_KEY:}
|
||||
- provider_id: sambanova
|
||||
provider_type: remote::sambanova
|
||||
config:
|
||||
url: https://api.sambanova.ai/v1
|
||||
api_key: ${env.SAMBANOVA_API_KEY:}
|
||||
- provider_id: sentence-transformers
|
||||
provider_type: inline::sentence-transformers
|
||||
config: {}
|
||||
|
@ -41,7 +46,7 @@ providers:
|
|||
- provider_id: sqlite-vec
|
||||
provider_type: inline::sqlite-vec
|
||||
config:
|
||||
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/dev}/sqlite_vec.db
|
||||
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/starter}/sqlite_vec.db
|
||||
- provider_id: ${env.ENABLE_CHROMADB+chromadb}
|
||||
provider_type: remote::chromadb
|
||||
config:
|
||||
|
@ -66,14 +71,14 @@ providers:
|
|||
persistence_store:
|
||||
type: sqlite
|
||||
namespace: null
|
||||
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/dev}/agents_store.db
|
||||
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/starter}/agents_store.db
|
||||
telemetry:
|
||||
- provider_id: meta-reference
|
||||
provider_type: inline::meta-reference
|
||||
config:
|
||||
service_name: ${env.OTEL_SERVICE_NAME:}
|
||||
sinks: ${env.TELEMETRY_SINKS:console,sqlite}
|
||||
sqlite_db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/dev}/trace_store.db
|
||||
sqlite_db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/starter}/trace_store.db
|
||||
eval:
|
||||
- provider_id: meta-reference
|
||||
provider_type: inline::meta-reference
|
||||
|
@ -81,7 +86,7 @@ providers:
|
|||
kvstore:
|
||||
type: sqlite
|
||||
namespace: null
|
||||
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/dev}/meta_reference_eval.db
|
||||
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/starter}/meta_reference_eval.db
|
||||
datasetio:
|
||||
- provider_id: huggingface
|
||||
provider_type: remote::huggingface
|
||||
|
@ -89,14 +94,14 @@ providers:
|
|||
kvstore:
|
||||
type: sqlite
|
||||
namespace: null
|
||||
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/dev}/huggingface_datasetio.db
|
||||
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/starter}/huggingface_datasetio.db
|
||||
- provider_id: localfs
|
||||
provider_type: inline::localfs
|
||||
config:
|
||||
kvstore:
|
||||
type: sqlite
|
||||
namespace: null
|
||||
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/dev}/localfs_datasetio.db
|
||||
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/starter}/localfs_datasetio.db
|
||||
scoring:
|
||||
- provider_id: basic
|
||||
provider_type: inline::basic
|
||||
|
@ -127,7 +132,7 @@ providers:
|
|||
config: {}
|
||||
metadata_store:
|
||||
type: sqlite
|
||||
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/dev}/registry.db
|
||||
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/starter}/registry.db
|
||||
models:
|
||||
- metadata: {}
|
||||
model_id: openai/gpt-4o
|
||||
|
@ -144,6 +149,76 @@ models:
|
|||
provider_id: openai
|
||||
provider_model_id: openai/chatgpt-4o-latest
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: gpt-3.5-turbo-0125
|
||||
provider_id: openai
|
||||
provider_model_id: gpt-3.5-turbo-0125
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: gpt-3.5-turbo
|
||||
provider_id: openai
|
||||
provider_model_id: gpt-3.5-turbo
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: gpt-3.5-turbo-instruct
|
||||
provider_id: openai
|
||||
provider_model_id: gpt-3.5-turbo-instruct
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: gpt-4
|
||||
provider_id: openai
|
||||
provider_model_id: gpt-4
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: gpt-4-turbo
|
||||
provider_id: openai
|
||||
provider_model_id: gpt-4-turbo
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: gpt-4o
|
||||
provider_id: openai
|
||||
provider_model_id: gpt-4o
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: gpt-4o-2024-08-06
|
||||
provider_id: openai
|
||||
provider_model_id: gpt-4o-2024-08-06
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: gpt-4o-mini
|
||||
provider_id: openai
|
||||
provider_model_id: gpt-4o-mini
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: gpt-4o-audio-preview
|
||||
provider_id: openai
|
||||
provider_model_id: gpt-4o-audio-preview
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: chatgpt-4o-latest
|
||||
provider_id: openai
|
||||
provider_model_id: chatgpt-4o-latest
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: o1
|
||||
provider_id: openai
|
||||
provider_model_id: o1
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: o1-mini
|
||||
provider_id: openai
|
||||
provider_model_id: o1-mini
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: o3-mini
|
||||
provider_id: openai
|
||||
provider_model_id: o3-mini
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: o4-mini
|
||||
provider_id: openai
|
||||
provider_model_id: o4-mini
|
||||
model_type: llm
|
||||
- metadata:
|
||||
embedding_dimension: 1536
|
||||
context_length: 8192
|
||||
|
@ -158,6 +233,20 @@ models:
|
|||
provider_id: openai
|
||||
provider_model_id: openai/text-embedding-3-large
|
||||
model_type: embedding
|
||||
- metadata:
|
||||
embedding_dimension: 1536
|
||||
context_length: 8192
|
||||
model_id: text-embedding-3-small
|
||||
provider_id: openai
|
||||
provider_model_id: text-embedding-3-small
|
||||
model_type: embedding
|
||||
- metadata:
|
||||
embedding_dimension: 3072
|
||||
context_length: 8192
|
||||
model_id: text-embedding-3-large
|
||||
provider_id: openai
|
||||
provider_model_id: text-embedding-3-large
|
||||
model_type: embedding
|
||||
- metadata: {}
|
||||
model_id: accounts/fireworks/models/llama-v3p1-8b-instruct
|
||||
provider_id: fireworks
|
||||
|
@ -413,6 +502,106 @@ models:
|
|||
provider_id: groq
|
||||
provider_model_id: groq/meta-llama/llama-4-maverick-17b-128e-instruct
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: sambanova/Meta-Llama-3.1-8B-Instruct
|
||||
provider_id: sambanova
|
||||
provider_model_id: sambanova/Meta-Llama-3.1-8B-Instruct
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: meta-llama/Llama-3.1-8B-Instruct
|
||||
provider_id: sambanova
|
||||
provider_model_id: sambanova/Meta-Llama-3.1-8B-Instruct
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: sambanova/Meta-Llama-3.1-405B-Instruct
|
||||
provider_id: sambanova
|
||||
provider_model_id: sambanova/Meta-Llama-3.1-405B-Instruct
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: meta-llama/Llama-3.1-405B-Instruct-FP8
|
||||
provider_id: sambanova
|
||||
provider_model_id: sambanova/Meta-Llama-3.1-405B-Instruct
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: sambanova/Meta-Llama-3.2-1B-Instruct
|
||||
provider_id: sambanova
|
||||
provider_model_id: sambanova/Meta-Llama-3.2-1B-Instruct
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: meta-llama/Llama-3.2-1B-Instruct
|
||||
provider_id: sambanova
|
||||
provider_model_id: sambanova/Meta-Llama-3.2-1B-Instruct
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: sambanova/Meta-Llama-3.2-3B-Instruct
|
||||
provider_id: sambanova
|
||||
provider_model_id: sambanova/Meta-Llama-3.2-3B-Instruct
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: meta-llama/Llama-3.2-3B-Instruct
|
||||
provider_id: sambanova
|
||||
provider_model_id: sambanova/Meta-Llama-3.2-3B-Instruct
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: sambanova/Meta-Llama-3.3-70B-Instruct
|
||||
provider_id: sambanova
|
||||
provider_model_id: sambanova/Meta-Llama-3.3-70B-Instruct
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: meta-llama/Llama-3.3-70B-Instruct
|
||||
provider_id: sambanova
|
||||
provider_model_id: sambanova/Meta-Llama-3.3-70B-Instruct
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: sambanova/Llama-3.2-11B-Vision-Instruct
|
||||
provider_id: sambanova
|
||||
provider_model_id: sambanova/Llama-3.2-11B-Vision-Instruct
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: meta-llama/Llama-3.2-11B-Vision-Instruct
|
||||
provider_id: sambanova
|
||||
provider_model_id: sambanova/Llama-3.2-11B-Vision-Instruct
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: sambanova/Llama-3.2-90B-Vision-Instruct
|
||||
provider_id: sambanova
|
||||
provider_model_id: sambanova/Llama-3.2-90B-Vision-Instruct
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: meta-llama/Llama-3.2-90B-Vision-Instruct
|
||||
provider_id: sambanova
|
||||
provider_model_id: sambanova/Llama-3.2-90B-Vision-Instruct
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: sambanova/Llama-4-Scout-17B-16E-Instruct
|
||||
provider_id: sambanova
|
||||
provider_model_id: sambanova/Llama-4-Scout-17B-16E-Instruct
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: meta-llama/Llama-4-Scout-17B-16E-Instruct
|
||||
provider_id: sambanova
|
||||
provider_model_id: sambanova/Llama-4-Scout-17B-16E-Instruct
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: sambanova/Llama-4-Maverick-17B-128E-Instruct
|
||||
provider_id: sambanova
|
||||
provider_model_id: sambanova/Llama-4-Maverick-17B-128E-Instruct
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: meta-llama/Llama-4-Maverick-17B-128E-Instruct
|
||||
provider_id: sambanova
|
||||
provider_model_id: sambanova/Llama-4-Maverick-17B-128E-Instruct
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: sambanova/Meta-Llama-Guard-3-8B
|
||||
provider_id: sambanova
|
||||
provider_model_id: sambanova/Meta-Llama-Guard-3-8B
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: meta-llama/Llama-Guard-3-8B
|
||||
provider_id: sambanova
|
||||
provider_model_id: sambanova/Meta-Llama-Guard-3-8B
|
||||
model_type: llm
|
||||
- metadata:
|
||||
embedding_dimension: 384
|
||||
model_id: all-MiniLM-L6-v2
|
|
@ -38,10 +38,15 @@ from llama_stack.providers.remote.inference.openai.config import OpenAIConfig
|
|||
from llama_stack.providers.remote.inference.openai.models import (
|
||||
MODEL_ENTRIES as OPENAI_MODEL_ENTRIES,
|
||||
)
|
||||
from llama_stack.providers.remote.inference.sambanova.config import SambaNovaImplConfig
|
||||
from llama_stack.providers.remote.inference.sambanova.models import (
|
||||
MODEL_ENTRIES as SAMBANOVA_MODEL_ENTRIES,
|
||||
)
|
||||
from llama_stack.providers.remote.vector_io.chroma.config import ChromaVectorIOConfig
|
||||
from llama_stack.providers.remote.vector_io.pgvector.config import (
|
||||
PGVectorVectorIOConfig,
|
||||
)
|
||||
from llama_stack.providers.utils.inference.model_registry import ProviderModelEntry
|
||||
from llama_stack.templates.template import (
|
||||
DistributionTemplate,
|
||||
RunConfigSettings,
|
||||
|
@ -49,7 +54,7 @@ from llama_stack.templates.template import (
|
|||
)
|
||||
|
||||
|
||||
def get_inference_providers() -> tuple[list[Provider], list[ModelInput]]:
|
||||
def get_inference_providers() -> tuple[list[Provider], dict[str, list[ProviderModelEntry]]]:
|
||||
# in this template, we allow each API key to be optional
|
||||
providers = [
|
||||
(
|
||||
|
@ -77,6 +82,11 @@ def get_inference_providers() -> tuple[list[Provider], list[ModelInput]]:
|
|||
GROQ_MODEL_ENTRIES,
|
||||
GroqConfig.sample_run_config(api_key="${env.GROQ_API_KEY:}"),
|
||||
),
|
||||
(
|
||||
"sambanova",
|
||||
SAMBANOVA_MODEL_ENTRIES,
|
||||
SambaNovaImplConfig.sample_run_config(api_key="${env.SAMBANOVA_API_KEY:}"),
|
||||
),
|
||||
]
|
||||
inference_providers = []
|
||||
available_models = {}
|
||||
|
@ -110,7 +120,7 @@ def get_distribution_template() -> DistributionTemplate:
|
|||
"remote::model-context-protocol",
|
||||
],
|
||||
}
|
||||
name = "dev"
|
||||
name = "starter"
|
||||
|
||||
vector_io_providers = [
|
||||
Provider(
|
||||
|
@ -162,7 +172,7 @@ def get_distribution_template() -> DistributionTemplate:
|
|||
return DistributionTemplate(
|
||||
name=name,
|
||||
distro_type="self_hosted",
|
||||
description="Distribution for running e2e tests in CI",
|
||||
description="Quick start template for running Llama Stack with several popular providers",
|
||||
container_image=None,
|
||||
template_path=None,
|
||||
providers=providers,
|
|
@ -105,7 +105,7 @@ docker run \
|
|||
-v ~/.llama:/root/.llama \
|
||||
-v ./llama_stack/templates/tgi/run-with-safety.yaml:/root/my-run.yaml \
|
||||
llamastack/distribution-{{ name }} \
|
||||
--yaml-config /root/my-run.yaml \
|
||||
--config /root/my-run.yaml \
|
||||
--port $LLAMA_STACK_PORT \
|
||||
--env INFERENCE_MODEL=$INFERENCE_MODEL \
|
||||
--env TGI_URL=http://host.docker.internal:$INFERENCE_PORT \
|
||||
|
|
|
@ -1,44 +0,0 @@
|
|||
# Report for tgi distribution
|
||||
|
||||
## Supported Models
|
||||
| Model Descriptor | tgi |
|
||||
|:---|:---|
|
||||
| Llama-3-8B-Instruct | ✅ |
|
||||
| Llama-3-70B-Instruct | ✅ |
|
||||
| Llama3.1-8B-Instruct | ✅ |
|
||||
| Llama3.1-70B-Instruct | ✅ |
|
||||
| Llama3.1-405B-Instruct | ✅ |
|
||||
| Llama3.2-1B-Instruct | ✅ |
|
||||
| Llama3.2-3B-Instruct | ✅ |
|
||||
| Llama3.2-11B-Vision-Instruct | ✅ |
|
||||
| Llama3.2-90B-Vision-Instruct | ✅ |
|
||||
| Llama3.3-70B-Instruct | ✅ |
|
||||
| Llama-Guard-3-11B-Vision | ✅ |
|
||||
| Llama-Guard-3-1B | ✅ |
|
||||
| Llama-Guard-3-8B | ✅ |
|
||||
| Llama-Guard-2-8B | ✅ |
|
||||
|
||||
## Inference
|
||||
| Model | API | Capability | Test | Status |
|
||||
|:----- |:-----|:-----|:-----|:-----|
|
||||
| Llama-3.1-8B-Instruct | /chat_completion | streaming | test_text_chat_completion_streaming | ✅ |
|
||||
| Llama-3.2-11B-Vision-Instruct | /chat_completion | streaming | test_image_chat_completion_streaming | ❌ |
|
||||
| Llama-3.2-11B-Vision-Instruct | /chat_completion | non_streaming | test_image_chat_completion_non_streaming | ❌ |
|
||||
| Llama-3.1-8B-Instruct | /chat_completion | non_streaming | test_text_chat_completion_non_streaming | ✅ |
|
||||
| Llama-3.1-8B-Instruct | /chat_completion | tool_calling | test_text_chat_completion_with_tool_calling_and_streaming | ✅ |
|
||||
| Llama-3.1-8B-Instruct | /chat_completion | tool_calling | test_text_chat_completion_with_tool_calling_and_non_streaming | ✅ |
|
||||
| Llama-3.1-8B-Instruct | /completion | streaming | test_text_completion_streaming | ✅ |
|
||||
| Llama-3.1-8B-Instruct | /completion | non_streaming | test_text_completion_non_streaming | ✅ |
|
||||
| Llama-3.1-8B-Instruct | /completion | structured_output | test_text_completion_structured_output | ✅ |
|
||||
|
||||
## Vector IO
|
||||
| API | Capability | Test | Status |
|
||||
|:-----|:-----|:-----|:-----|
|
||||
| /retrieve | | test_vector_db_retrieve | ✅ |
|
||||
|
||||
## Agents
|
||||
| API | Capability | Test | Status |
|
||||
|:-----|:-----|:-----|:-----|
|
||||
| /create_agent_turn | rag | test_rag_agent | ✅ |
|
||||
| /create_agent_turn | custom_tool | test_custom_tool | ✅ |
|
||||
| /create_agent_turn | code_execution | test_code_interpreter_for_attachments | ✅ |
|
|
@ -1,45 +0,0 @@
|
|||
# Report for together distribution
|
||||
|
||||
## Supported Models
|
||||
| Model Descriptor | together |
|
||||
|:---|:---|
|
||||
| Llama-3-8B-Instruct | ❌ |
|
||||
| Llama-3-70B-Instruct | ❌ |
|
||||
| Llama3.1-8B-Instruct | ✅ |
|
||||
| Llama3.1-70B-Instruct | ✅ |
|
||||
| Llama3.1-405B-Instruct | ✅ |
|
||||
| Llama3.2-1B-Instruct | ❌ |
|
||||
| Llama3.2-3B-Instruct | ✅ |
|
||||
| Llama3.2-11B-Vision-Instruct | ✅ |
|
||||
| Llama3.2-90B-Vision-Instruct | ✅ |
|
||||
| Llama3.3-70B-Instruct | ✅ |
|
||||
| Llama-Guard-3-11B-Vision | ✅ |
|
||||
| Llama-Guard-3-1B | ❌ |
|
||||
| Llama-Guard-3-8B | ✅ |
|
||||
| Llama-Guard-2-8B | ❌ |
|
||||
|
||||
## Inference
|
||||
| Model | API | Capability | Test | Status |
|
||||
|:----- |:-----|:-----|:-----|:-----|
|
||||
| Llama-3.1-8B-Instruct | /chat_completion | streaming | test_text_chat_completion_streaming | ✅ |
|
||||
| Llama-3.2-11B-Vision-Instruct | /chat_completion | streaming | test_image_chat_completion_streaming | ✅ |
|
||||
| Llama-3.2-11B-Vision-Instruct | /chat_completion | non_streaming | test_image_chat_completion_non_streaming | ✅ |
|
||||
| Llama-3.1-8B-Instruct | /chat_completion | non_streaming | test_text_chat_completion_non_streaming | ✅ |
|
||||
| Llama-3.1-8B-Instruct | /chat_completion | tool_calling | test_text_chat_completion_with_tool_calling_and_streaming | ✅ |
|
||||
| Llama-3.1-8B-Instruct | /chat_completion | tool_calling | test_text_chat_completion_with_tool_calling_and_non_streaming | ✅ |
|
||||
| Llama-3.2-11B-Vision-Instruct | /chat_completion | log_probs | test_completion_log_probs_non_streaming | ✅ |
|
||||
| Llama-3.2-11B-Vision-Instruct | /chat_completion | log_probs | test_completion_log_probs_streaming | ✅ |
|
||||
| Llama-3.1-8B-Instruct | /completion | streaming | test_text_completion_streaming | ✅ |
|
||||
| Llama-3.1-8B-Instruct | /completion | non_streaming | test_text_completion_non_streaming | ✅ |
|
||||
| Llama-3.1-8B-Instruct | /completion | structured_output | test_text_completion_structured_output | ✅ |
|
||||
|
||||
## Vector IO
|
||||
| Provider | API | Capability | Test | Status |
|
||||
|:-----|:-----|:-----|:-----|:-----|
|
||||
| inline::faiss | /retrieve | | test_vector_db_retrieve | ✅ |
|
||||
|
||||
## Agents
|
||||
| Provider | API | Capability | Test | Status |
|
||||
|:-----|:-----|:-----|:-----|:-----|
|
||||
| inline::meta-reference | /create_agent_turn | rag | test_rag_agent | ✅ |
|
||||
| inline::meta-reference | /create_agent_turn | custom_tool | test_custom_tool | ✅ |
|
|
@ -151,6 +151,76 @@ models:
|
|||
provider_id: openai
|
||||
provider_model_id: openai/chatgpt-4o-latest
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: gpt-3.5-turbo-0125
|
||||
provider_id: openai
|
||||
provider_model_id: gpt-3.5-turbo-0125
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: gpt-3.5-turbo
|
||||
provider_id: openai
|
||||
provider_model_id: gpt-3.5-turbo
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: gpt-3.5-turbo-instruct
|
||||
provider_id: openai
|
||||
provider_model_id: gpt-3.5-turbo-instruct
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: gpt-4
|
||||
provider_id: openai
|
||||
provider_model_id: gpt-4
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: gpt-4-turbo
|
||||
provider_id: openai
|
||||
provider_model_id: gpt-4-turbo
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: gpt-4o
|
||||
provider_id: openai
|
||||
provider_model_id: gpt-4o
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: gpt-4o-2024-08-06
|
||||
provider_id: openai
|
||||
provider_model_id: gpt-4o-2024-08-06
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: gpt-4o-mini
|
||||
provider_id: openai
|
||||
provider_model_id: gpt-4o-mini
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: gpt-4o-audio-preview
|
||||
provider_id: openai
|
||||
provider_model_id: gpt-4o-audio-preview
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: chatgpt-4o-latest
|
||||
provider_id: openai
|
||||
provider_model_id: chatgpt-4o-latest
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: o1
|
||||
provider_id: openai
|
||||
provider_model_id: o1
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: o1-mini
|
||||
provider_id: openai
|
||||
provider_model_id: o1-mini
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: o3-mini
|
||||
provider_id: openai
|
||||
provider_model_id: o3-mini
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: o4-mini
|
||||
provider_id: openai
|
||||
provider_model_id: o4-mini
|
||||
model_type: llm
|
||||
- metadata:
|
||||
embedding_dimension: 1536
|
||||
context_length: 8192
|
||||
|
@ -165,6 +235,20 @@ models:
|
|||
provider_id: openai
|
||||
provider_model_id: openai/text-embedding-3-large
|
||||
model_type: embedding
|
||||
- metadata:
|
||||
embedding_dimension: 1536
|
||||
context_length: 8192
|
||||
model_id: text-embedding-3-small
|
||||
provider_id: openai
|
||||
provider_model_id: text-embedding-3-small
|
||||
model_type: embedding
|
||||
- metadata:
|
||||
embedding_dimension: 3072
|
||||
context_length: 8192
|
||||
model_id: text-embedding-3-large
|
||||
provider_id: openai
|
||||
provider_model_id: text-embedding-3-large
|
||||
model_type: embedding
|
||||
- metadata: {}
|
||||
model_id: accounts/fireworks/models/llama-v3p1-8b-instruct
|
||||
provider_id: fireworks-openai-compat
|
||||
|
@ -502,104 +586,104 @@ models:
|
|||
provider_model_id: groq/meta-llama/llama-4-maverick-17b-128e-instruct
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: Meta-Llama-3.1-8B-Instruct
|
||||
model_id: sambanova/Meta-Llama-3.1-8B-Instruct
|
||||
provider_id: sambanova-openai-compat
|
||||
provider_model_id: Meta-Llama-3.1-8B-Instruct
|
||||
provider_model_id: sambanova/Meta-Llama-3.1-8B-Instruct
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: meta-llama/Llama-3.1-8B-Instruct
|
||||
provider_id: sambanova-openai-compat
|
||||
provider_model_id: Meta-Llama-3.1-8B-Instruct
|
||||
provider_model_id: sambanova/Meta-Llama-3.1-8B-Instruct
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: Meta-Llama-3.1-70B-Instruct
|
||||
model_id: sambanova/Meta-Llama-3.1-405B-Instruct
|
||||
provider_id: sambanova-openai-compat
|
||||
provider_model_id: Meta-Llama-3.1-70B-Instruct
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: meta-llama/Llama-3.1-70B-Instruct
|
||||
provider_id: sambanova-openai-compat
|
||||
provider_model_id: Meta-Llama-3.1-70B-Instruct
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: Meta-Llama-3.1-405B-Instruct
|
||||
provider_id: sambanova-openai-compat
|
||||
provider_model_id: Meta-Llama-3.1-405B-Instruct
|
||||
provider_model_id: sambanova/Meta-Llama-3.1-405B-Instruct
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: meta-llama/Llama-3.1-405B-Instruct-FP8
|
||||
provider_id: sambanova-openai-compat
|
||||
provider_model_id: Meta-Llama-3.1-405B-Instruct
|
||||
provider_model_id: sambanova/Meta-Llama-3.1-405B-Instruct
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: Meta-Llama-3.2-1B-Instruct
|
||||
model_id: sambanova/Meta-Llama-3.2-1B-Instruct
|
||||
provider_id: sambanova-openai-compat
|
||||
provider_model_id: Meta-Llama-3.2-1B-Instruct
|
||||
provider_model_id: sambanova/Meta-Llama-3.2-1B-Instruct
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: meta-llama/Llama-3.2-1B-Instruct
|
||||
provider_id: sambanova-openai-compat
|
||||
provider_model_id: Meta-Llama-3.2-1B-Instruct
|
||||
provider_model_id: sambanova/Meta-Llama-3.2-1B-Instruct
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: Meta-Llama-3.2-3B-Instruct
|
||||
model_id: sambanova/Meta-Llama-3.2-3B-Instruct
|
||||
provider_id: sambanova-openai-compat
|
||||
provider_model_id: Meta-Llama-3.2-3B-Instruct
|
||||
provider_model_id: sambanova/Meta-Llama-3.2-3B-Instruct
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: meta-llama/Llama-3.2-3B-Instruct
|
||||
provider_id: sambanova-openai-compat
|
||||
provider_model_id: Meta-Llama-3.2-3B-Instruct
|
||||
provider_model_id: sambanova/Meta-Llama-3.2-3B-Instruct
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: Meta-Llama-3.3-70B-Instruct
|
||||
model_id: sambanova/Meta-Llama-3.3-70B-Instruct
|
||||
provider_id: sambanova-openai-compat
|
||||
provider_model_id: Meta-Llama-3.3-70B-Instruct
|
||||
provider_model_id: sambanova/Meta-Llama-3.3-70B-Instruct
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: meta-llama/Llama-3.3-70B-Instruct
|
||||
provider_id: sambanova-openai-compat
|
||||
provider_model_id: Meta-Llama-3.3-70B-Instruct
|
||||
provider_model_id: sambanova/Meta-Llama-3.3-70B-Instruct
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: Llama-3.2-11B-Vision-Instruct
|
||||
model_id: sambanova/Llama-3.2-11B-Vision-Instruct
|
||||
provider_id: sambanova-openai-compat
|
||||
provider_model_id: Llama-3.2-11B-Vision-Instruct
|
||||
provider_model_id: sambanova/Llama-3.2-11B-Vision-Instruct
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: meta-llama/Llama-3.2-11B-Vision-Instruct
|
||||
provider_id: sambanova-openai-compat
|
||||
provider_model_id: Llama-3.2-11B-Vision-Instruct
|
||||
provider_model_id: sambanova/Llama-3.2-11B-Vision-Instruct
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: Llama-3.2-90B-Vision-Instruct
|
||||
model_id: sambanova/Llama-3.2-90B-Vision-Instruct
|
||||
provider_id: sambanova-openai-compat
|
||||
provider_model_id: Llama-3.2-90B-Vision-Instruct
|
||||
provider_model_id: sambanova/Llama-3.2-90B-Vision-Instruct
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: meta-llama/Llama-3.2-90B-Vision-Instruct
|
||||
provider_id: sambanova-openai-compat
|
||||
provider_model_id: Llama-3.2-90B-Vision-Instruct
|
||||
provider_model_id: sambanova/Llama-3.2-90B-Vision-Instruct
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: Meta-Llama-Guard-3-8B
|
||||
model_id: sambanova/Llama-4-Scout-17B-16E-Instruct
|
||||
provider_id: sambanova-openai-compat
|
||||
provider_model_id: Meta-Llama-Guard-3-8B
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: meta-llama/Llama-Guard-3-8B
|
||||
provider_id: sambanova-openai-compat
|
||||
provider_model_id: Meta-Llama-Guard-3-8B
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: Llama-4-Scout-17B-16E-Instruct
|
||||
provider_id: sambanova-openai-compat
|
||||
provider_model_id: Llama-4-Scout-17B-16E-Instruct
|
||||
provider_model_id: sambanova/Llama-4-Scout-17B-16E-Instruct
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: meta-llama/Llama-4-Scout-17B-16E-Instruct
|
||||
provider_id: sambanova-openai-compat
|
||||
provider_model_id: Llama-4-Scout-17B-16E-Instruct
|
||||
provider_model_id: sambanova/Llama-4-Scout-17B-16E-Instruct
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: sambanova/Llama-4-Maverick-17B-128E-Instruct
|
||||
provider_id: sambanova-openai-compat
|
||||
provider_model_id: sambanova/Llama-4-Maverick-17B-128E-Instruct
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: meta-llama/Llama-4-Maverick-17B-128E-Instruct
|
||||
provider_id: sambanova-openai-compat
|
||||
provider_model_id: sambanova/Llama-4-Maverick-17B-128E-Instruct
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: sambanova/Meta-Llama-Guard-3-8B
|
||||
provider_id: sambanova-openai-compat
|
||||
provider_model_id: sambanova/Meta-Llama-Guard-3-8B
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: meta-llama/Llama-Guard-3-8B
|
||||
provider_id: sambanova-openai-compat
|
||||
provider_model_id: sambanova/Meta-Llama-Guard-3-8B
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: llama3.1-8b
|
||||
|
|
|
@ -4,6 +4,7 @@ distribution_spec:
|
|||
providers:
|
||||
inference:
|
||||
- remote::watsonx
|
||||
- inline::sentence-transformers
|
||||
vector_io:
|
||||
- inline::faiss
|
||||
safety:
|
||||
|
|
|
@ -56,7 +56,7 @@ docker run \
|
|||
-p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
|
||||
-v ./run.yaml:/root/my-run.yaml \
|
||||
llamastack/distribution-{{ name }} \
|
||||
--yaml-config /root/my-run.yaml \
|
||||
--config /root/my-run.yaml \
|
||||
--port $LLAMA_STACK_PORT \
|
||||
--env WATSONX_API_KEY=$WATSONX_API_KEY \
|
||||
--env WATSONX_PROJECT_ID=$WATSONX_PROJECT_ID \
|
||||
|
|
|
@ -18,6 +18,9 @@ providers:
|
|||
url: ${env.WATSONX_BASE_URL:https://us-south.ml.cloud.ibm.com}
|
||||
api_key: ${env.WATSONX_API_KEY:}
|
||||
project_id: ${env.WATSONX_PROJECT_ID:}
|
||||
- provider_id: sentence-transformers
|
||||
provider_type: inline::sentence-transformers
|
||||
config: {}
|
||||
vector_io:
|
||||
- provider_id: faiss
|
||||
provider_type: inline::faiss
|
||||
|
@ -191,6 +194,11 @@ models:
|
|||
provider_id: watsonx
|
||||
provider_model_id: meta-llama/llama-guard-3-11b-vision
|
||||
model_type: llm
|
||||
- metadata:
|
||||
embedding_dimension: 384
|
||||
model_id: all-MiniLM-L6-v2
|
||||
provider_id: sentence-transformers
|
||||
model_type: embedding
|
||||
shields: []
|
||||
vector_dbs: []
|
||||
datasets: []
|
||||
|
|
|
@ -6,7 +6,11 @@
|
|||
|
||||
from pathlib import Path
|
||||
|
||||
from llama_stack.distribution.datatypes import Provider, ToolGroupInput
|
||||
from llama_stack.apis.models.models import ModelType
|
||||
from llama_stack.distribution.datatypes import ModelInput, Provider, ToolGroupInput
|
||||
from llama_stack.providers.inline.inference.sentence_transformers import (
|
||||
SentenceTransformersInferenceConfig,
|
||||
)
|
||||
from llama_stack.providers.remote.inference.watsonx import WatsonXConfig
|
||||
from llama_stack.providers.remote.inference.watsonx.models import MODEL_ENTRIES
|
||||
from llama_stack.templates.template import DistributionTemplate, RunConfigSettings, get_model_registry
|
||||
|
@ -14,7 +18,7 @@ from llama_stack.templates.template import DistributionTemplate, RunConfigSettin
|
|||
|
||||
def get_distribution_template() -> DistributionTemplate:
|
||||
providers = {
|
||||
"inference": ["remote::watsonx"],
|
||||
"inference": ["remote::watsonx", "inline::sentence-transformers"],
|
||||
"vector_io": ["inline::faiss"],
|
||||
"safety": ["inline::llama-guard"],
|
||||
"agents": ["inline::meta-reference"],
|
||||
|
@ -36,6 +40,12 @@ def get_distribution_template() -> DistributionTemplate:
|
|||
config=WatsonXConfig.sample_run_config(),
|
||||
)
|
||||
|
||||
embedding_provider = Provider(
|
||||
provider_id="sentence-transformers",
|
||||
provider_type="inline::sentence-transformers",
|
||||
config=SentenceTransformersInferenceConfig.sample_run_config(),
|
||||
)
|
||||
|
||||
available_models = {
|
||||
"watsonx": MODEL_ENTRIES,
|
||||
}
|
||||
|
@ -50,6 +60,15 @@ def get_distribution_template() -> DistributionTemplate:
|
|||
),
|
||||
]
|
||||
|
||||
embedding_model = ModelInput(
|
||||
model_id="all-MiniLM-L6-v2",
|
||||
provider_id="sentence-transformers",
|
||||
model_type=ModelType.embedding,
|
||||
metadata={
|
||||
"embedding_dimension": 384,
|
||||
},
|
||||
)
|
||||
|
||||
default_models = get_model_registry(available_models)
|
||||
return DistributionTemplate(
|
||||
name="watsonx",
|
||||
|
@ -62,9 +81,9 @@ def get_distribution_template() -> DistributionTemplate:
|
|||
run_configs={
|
||||
"run.yaml": RunConfigSettings(
|
||||
provider_overrides={
|
||||
"inference": [inference_provider],
|
||||
"inference": [inference_provider, embedding_provider],
|
||||
},
|
||||
default_models=default_models,
|
||||
default_models=default_models + [embedding_model],
|
||||
default_tool_groups=default_tool_groups,
|
||||
),
|
||||
},
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue