forked from phoenix-oss/llama-stack-mirror
parent
7ea14ae62e
commit
a7b929f17e
9 changed files with 53 additions and 56 deletions
|
@ -250,7 +250,9 @@ class ContentBuilder:
|
||||||
value = sample_transformer(object_to_json(example))
|
value = sample_transformer(object_to_json(example))
|
||||||
|
|
||||||
hash_string = (
|
hash_string = (
|
||||||
hashlib.md5(json_dump_string(value).encode("utf-8")).digest().hex()
|
hashlib.sha256(json_dump_string(value).encode("utf-8"))
|
||||||
|
.digest()
|
||||||
|
.hex()[:16]
|
||||||
)
|
)
|
||||||
name = f"ex-{hash_string}"
|
name = f"ex-{hash_string}"
|
||||||
|
|
||||||
|
|
|
@ -38,13 +38,15 @@ The following models are available by default:
|
||||||
- `meta-llama/Llama-3.1-405B-Instruct-FP8 (Meta-Llama-3.1-405B-Instruct)`
|
- `meta-llama/Llama-3.1-405B-Instruct-FP8 (Meta-Llama-3.1-405B-Instruct)`
|
||||||
- `meta-llama/Llama-3.2-1B-Instruct (Meta-Llama-3.2-1B-Instruct)`
|
- `meta-llama/Llama-3.2-1B-Instruct (Meta-Llama-3.2-1B-Instruct)`
|
||||||
- `meta-llama/Llama-3.2-3B-Instruct (Meta-Llama-3.2-3B-Instruct)`
|
- `meta-llama/Llama-3.2-3B-Instruct (Meta-Llama-3.2-3B-Instruct)`
|
||||||
|
- `meta-llama/Llama-3.3-70B-Instruct (Meta-Llama-3.3-70B-Instruct)`
|
||||||
- `meta-llama/Llama-3.2-11B-Vision-Instruct (Llama-3.2-11B-Vision-Instruct)`
|
- `meta-llama/Llama-3.2-11B-Vision-Instruct (Llama-3.2-11B-Vision-Instruct)`
|
||||||
- `meta-llama/Llama-3.2-90B-Vision-Instruct (Llama-3.2-90B-Vision-Instruct)`
|
- `meta-llama/Llama-3.2-90B-Vision-Instruct (Llama-3.2-90B-Vision-Instruct)`
|
||||||
|
- `meta-llama/Llama-Guard-3-8B (Meta-Llama-Guard-3-8B)`
|
||||||
|
|
||||||
|
|
||||||
### Prerequisite: API Keys
|
### Prerequisite: API Keys
|
||||||
|
|
||||||
Make sure you have access to a SambaNova API Key. You can get one by visiting [SambaNova.ai](https://cloud.sambanova.ai/).
|
Make sure you have access to a SambaNova API Key. You can get one by visiting [SambaNova.ai](https://sambanova.ai/).
|
||||||
|
|
||||||
|
|
||||||
## Running Llama Stack with SambaNova
|
## Running Llama Stack with SambaNova
|
||||||
|
|
|
@ -50,7 +50,10 @@ def setup_verify_download_parser(parser: argparse.ArgumentParser) -> None:
|
||||||
|
|
||||||
|
|
||||||
def calculate_md5(filepath: Path, chunk_size: int = 8192) -> str:
|
def calculate_md5(filepath: Path, chunk_size: int = 8192) -> str:
|
||||||
md5_hash = hashlib.md5()
|
# NOTE: MD5 is used here only for download integrity verification,
|
||||||
|
# not for security purposes
|
||||||
|
# TODO: switch to SHA256
|
||||||
|
md5_hash = hashlib.md5(usedforsecurity=False)
|
||||||
with open(filepath, "rb") as f:
|
with open(filepath, "rb") as f:
|
||||||
for chunk in iter(lambda: f.read(chunk_size), b""):
|
for chunk in iter(lambda: f.read(chunk_size), b""):
|
||||||
md5_hash.update(chunk)
|
md5_hash.update(chunk)
|
||||||
|
|
|
@ -52,6 +52,6 @@ class NVIDIAConfig(BaseModel):
|
||||||
@classmethod
|
@classmethod
|
||||||
def sample_run_config(cls, **kwargs) -> Dict[str, Any]:
|
def sample_run_config(cls, **kwargs) -> Dict[str, Any]:
|
||||||
return {
|
return {
|
||||||
"url": "https://integrate.api.nvidia.com",
|
"url": "${env.NVIDIA_BASE_URL:https://integrate.api.nvidia.com}",
|
||||||
"api_key": "${env.NVIDIA_API_KEY}",
|
"api_key": "${env.NVIDIA_API_KEY:}",
|
||||||
}
|
}
|
||||||
|
|
|
@ -14,19 +14,9 @@ distribution_spec:
|
||||||
- inline::meta-reference
|
- inline::meta-reference
|
||||||
telemetry:
|
telemetry:
|
||||||
- inline::meta-reference
|
- inline::meta-reference
|
||||||
eval:
|
|
||||||
- inline::meta-reference
|
|
||||||
datasetio:
|
|
||||||
- remote::huggingface
|
|
||||||
- inline::localfs
|
|
||||||
scoring:
|
|
||||||
- inline::basic
|
|
||||||
- inline::llm-as-judge
|
|
||||||
- inline::braintrust
|
|
||||||
tool_runtime:
|
tool_runtime:
|
||||||
- remote::brave-search
|
- remote::brave-search
|
||||||
- remote::tavily-search
|
- remote::tavily-search
|
||||||
- inline::code-interpreter
|
- inline::code-interpreter
|
||||||
- inline::rag-runtime
|
- inline::rag-runtime
|
||||||
- remote::model-context-protocol
|
|
||||||
image_type: conda
|
image_type: conda
|
||||||
|
|
|
@ -37,7 +37,7 @@ The following models are available by default:
|
||||||
|
|
||||||
### Prerequisite: API Keys
|
### Prerequisite: API Keys
|
||||||
|
|
||||||
Make sure you have access to a SambaNova API Key. You can get one by visiting [SambaBova.ai](https://sambanova.ai/).
|
Make sure you have access to a SambaNova API Key. You can get one by visiting [SambaNova.ai](https://sambanova.ai/).
|
||||||
|
|
||||||
|
|
||||||
## Running Llama Stack with SambaNova
|
## Running Llama Stack with SambaNova
|
||||||
|
|
|
@ -2,11 +2,8 @@ version: '2'
|
||||||
image_name: sambanova
|
image_name: sambanova
|
||||||
apis:
|
apis:
|
||||||
- agents
|
- agents
|
||||||
- datasetio
|
|
||||||
- eval
|
|
||||||
- inference
|
- inference
|
||||||
- safety
|
- safety
|
||||||
- scoring
|
|
||||||
- telemetry
|
- telemetry
|
||||||
- tool_runtime
|
- tool_runtime
|
||||||
- vector_io
|
- vector_io
|
||||||
|
@ -25,6 +22,12 @@ providers:
|
||||||
type: sqlite
|
type: sqlite
|
||||||
namespace: null
|
namespace: null
|
||||||
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/sambanova}/faiss_store.db
|
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/sambanova}/faiss_store.db
|
||||||
|
- provider_id: chromadb
|
||||||
|
provider_type: remote::chromadb
|
||||||
|
config: {}
|
||||||
|
- provider_id: pgvector
|
||||||
|
provider_type: remote::pgvector
|
||||||
|
config: {}
|
||||||
safety:
|
safety:
|
||||||
- provider_id: llama-guard
|
- provider_id: llama-guard
|
||||||
provider_type: inline::llama-guard
|
provider_type: inline::llama-guard
|
||||||
|
@ -44,28 +47,6 @@ providers:
|
||||||
service_name: ${env.OTEL_SERVICE_NAME:llama-stack}
|
service_name: ${env.OTEL_SERVICE_NAME:llama-stack}
|
||||||
sinks: ${env.TELEMETRY_SINKS:console,sqlite}
|
sinks: ${env.TELEMETRY_SINKS:console,sqlite}
|
||||||
sqlite_db_path: ${env.SQLITE_DB_PATH:~/.llama/distributions/sambanova/trace_store.db}
|
sqlite_db_path: ${env.SQLITE_DB_PATH:~/.llama/distributions/sambanova/trace_store.db}
|
||||||
eval:
|
|
||||||
- provider_id: meta-reference
|
|
||||||
provider_type: inline::meta-reference
|
|
||||||
config: {}
|
|
||||||
datasetio:
|
|
||||||
- provider_id: huggingface
|
|
||||||
provider_type: remote::huggingface
|
|
||||||
config: {}
|
|
||||||
- provider_id: localfs
|
|
||||||
provider_type: inline::localfs
|
|
||||||
config: {}
|
|
||||||
scoring:
|
|
||||||
- provider_id: basic
|
|
||||||
provider_type: inline::basic
|
|
||||||
config: {}
|
|
||||||
- provider_id: llm-as-judge
|
|
||||||
provider_type: inline::llm-as-judge
|
|
||||||
config: {}
|
|
||||||
- provider_id: braintrust
|
|
||||||
provider_type: inline::braintrust
|
|
||||||
config:
|
|
||||||
openai_api_key: ${env.OPENAI_API_KEY:}
|
|
||||||
tool_runtime:
|
tool_runtime:
|
||||||
- provider_id: brave-search
|
- provider_id: brave-search
|
||||||
provider_type: remote::brave-search
|
provider_type: remote::brave-search
|
||||||
|
@ -83,9 +64,6 @@ providers:
|
||||||
- provider_id: rag-runtime
|
- provider_id: rag-runtime
|
||||||
provider_type: inline::rag-runtime
|
provider_type: inline::rag-runtime
|
||||||
config: {}
|
config: {}
|
||||||
- provider_id: model-context-protocol
|
|
||||||
provider_type: remote::model-context-protocol
|
|
||||||
config: {}
|
|
||||||
metadata_store:
|
metadata_store:
|
||||||
type: sqlite
|
type: sqlite
|
||||||
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/sambanova}/registry.db
|
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/sambanova}/registry.db
|
||||||
|
@ -97,7 +75,6 @@ models:
|
||||||
model_type: llm
|
model_type: llm
|
||||||
- metadata: {}
|
- metadata: {}
|
||||||
model_id: meta-llama/Llama-3.1-70B-Instruct
|
model_id: meta-llama/Llama-3.1-70B-Instruct
|
||||||
model_type: llm
|
|
||||||
provider_id: sambanova
|
provider_id: sambanova
|
||||||
provider_model_id: Meta-Llama-3.1-70B-Instruct
|
provider_model_id: Meta-Llama-3.1-70B-Instruct
|
||||||
model_type: llm
|
model_type: llm
|
||||||
|
@ -131,15 +108,10 @@ models:
|
||||||
provider_id: sambanova
|
provider_id: sambanova
|
||||||
provider_model_id: Llama-3.2-90B-Vision-Instruct
|
provider_model_id: Llama-3.2-90B-Vision-Instruct
|
||||||
model_type: llm
|
model_type: llm
|
||||||
- metadata: {}
|
|
||||||
model_id: meta-llama/Llama-3.2-90B-Vision-Instruct
|
|
||||||
provider_id: sambanova
|
|
||||||
provider_model_id: Llama-3.2-90B-Vision-Instruct
|
|
||||||
model_type: llm
|
|
||||||
- metadata: {}
|
- metadata: {}
|
||||||
model_id: meta-llama/Llama-Guard-3-8B
|
model_id: meta-llama/Llama-Guard-3-8B
|
||||||
provider_id: sambanova
|
provider_id: sambanova
|
||||||
provider_model_id: Llama-Guard-3-8B
|
provider_model_id: Meta-Llama-Guard-3-8B
|
||||||
model_type: llm
|
model_type: llm
|
||||||
shields:
|
shields:
|
||||||
- shield_id: meta-llama/Llama-Guard-3-8B
|
- shield_id: meta-llama/Llama-Guard-3-8B
|
||||||
|
|
|
@ -8,7 +8,12 @@ from pathlib import Path
|
||||||
|
|
||||||
from llama_models.sku_list import all_registered_models
|
from llama_models.sku_list import all_registered_models
|
||||||
|
|
||||||
from llama_stack.distribution.datatypes import ModelInput, Provider, ShieldInput
|
from llama_stack.distribution.datatypes import (
|
||||||
|
ModelInput,
|
||||||
|
Provider,
|
||||||
|
ShieldInput,
|
||||||
|
ToolGroupInput,
|
||||||
|
)
|
||||||
from llama_stack.providers.remote.inference.sambanova import SambaNovaImplConfig
|
from llama_stack.providers.remote.inference.sambanova import SambaNovaImplConfig
|
||||||
from llama_stack.providers.remote.inference.sambanova.sambanova import MODEL_ALIASES
|
from llama_stack.providers.remote.inference.sambanova.sambanova import MODEL_ALIASES
|
||||||
|
|
||||||
|
@ -29,10 +34,11 @@ def get_distribution_template() -> DistributionTemplate:
|
||||||
"inline::rag-runtime",
|
"inline::rag-runtime",
|
||||||
],
|
],
|
||||||
}
|
}
|
||||||
|
name = "sambanova"
|
||||||
|
|
||||||
inference_provider = Provider(
|
inference_provider = Provider(
|
||||||
provider_id="sambanova",
|
provider_id=name,
|
||||||
provider_type="remote::sambanova",
|
provider_type=f"remote::{name}",
|
||||||
config=SambaNovaImplConfig.sample_run_config(),
|
config=SambaNovaImplConfig.sample_run_config(),
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@ -43,12 +49,28 @@ def get_distribution_template() -> DistributionTemplate:
|
||||||
ModelInput(
|
ModelInput(
|
||||||
model_id=core_model_to_hf_repo[m.llama_model],
|
model_id=core_model_to_hf_repo[m.llama_model],
|
||||||
provider_model_id=m.provider_model_id,
|
provider_model_id=m.provider_model_id,
|
||||||
|
provider_id=name,
|
||||||
)
|
)
|
||||||
for m in MODEL_ALIASES
|
for m in MODEL_ALIASES
|
||||||
]
|
]
|
||||||
|
|
||||||
|
default_tool_groups = [
|
||||||
|
ToolGroupInput(
|
||||||
|
toolgroup_id="builtin::websearch",
|
||||||
|
provider_id="tavily-search",
|
||||||
|
),
|
||||||
|
ToolGroupInput(
|
||||||
|
toolgroup_id="builtin::rag",
|
||||||
|
provider_id="rag-runtime",
|
||||||
|
),
|
||||||
|
ToolGroupInput(
|
||||||
|
toolgroup_id="builtin::code_interpreter",
|
||||||
|
provider_id="code-interpreter",
|
||||||
|
),
|
||||||
|
]
|
||||||
|
|
||||||
return DistributionTemplate(
|
return DistributionTemplate(
|
||||||
name="sambanova",
|
name=name,
|
||||||
distro_type="self_hosted",
|
distro_type="self_hosted",
|
||||||
description="Use SambaNova.AI for running LLM inference",
|
description="Use SambaNova.AI for running LLM inference",
|
||||||
docker_image=None,
|
docker_image=None,
|
||||||
|
@ -62,6 +84,7 @@ def get_distribution_template() -> DistributionTemplate:
|
||||||
},
|
},
|
||||||
default_models=default_models,
|
default_models=default_models,
|
||||||
default_shields=[ShieldInput(shield_id="meta-llama/Llama-Guard-3-8B")],
|
default_shields=[ShieldInput(shield_id="meta-llama/Llama-Guard-3-8B")],
|
||||||
|
default_tool_groups=default_tool_groups,
|
||||||
),
|
),
|
||||||
},
|
},
|
||||||
run_config_env_vars={
|
run_config_env_vars={
|
||||||
|
|
|
@ -137,7 +137,12 @@ class DistributionTemplate(BaseModel):
|
||||||
|
|
||||||
template = self.template_path.read_text()
|
template = self.template_path.read_text()
|
||||||
# Render template with rich-generated table
|
# Render template with rich-generated table
|
||||||
env = jinja2.Environment(trim_blocks=True, lstrip_blocks=True)
|
env = jinja2.Environment(
|
||||||
|
trim_blocks=True,
|
||||||
|
lstrip_blocks=True,
|
||||||
|
# NOTE: autoescape is required to prevent XSS attacks
|
||||||
|
autoescape=True,
|
||||||
|
)
|
||||||
template = env.from_string(template)
|
template = env.from_string(template)
|
||||||
return template.render(
|
return template.render(
|
||||||
name=self.name,
|
name=self.name,
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue