diff --git a/docs/openapi_generator/pyopenapi/generator.py b/docs/openapi_generator/pyopenapi/generator.py index 05c609867..d3de0427f 100644 --- a/docs/openapi_generator/pyopenapi/generator.py +++ b/docs/openapi_generator/pyopenapi/generator.py @@ -250,7 +250,9 @@ class ContentBuilder: value = sample_transformer(object_to_json(example)) hash_string = ( - hashlib.md5(json_dump_string(value).encode("utf-8")).digest().hex() + hashlib.sha256(json_dump_string(value).encode("utf-8")) + .digest() + .hex()[:16] ) name = f"ex-{hash_string}" diff --git a/docs/source/distributions/self_hosted_distro/sambanova.md b/docs/source/distributions/self_hosted_distro/sambanova.md index 6dbc0e94e..e428e085a 100644 --- a/docs/source/distributions/self_hosted_distro/sambanova.md +++ b/docs/source/distributions/self_hosted_distro/sambanova.md @@ -38,13 +38,15 @@ The following models are available by default: - `meta-llama/Llama-3.1-405B-Instruct-FP8 (Meta-Llama-3.1-405B-Instruct)` - `meta-llama/Llama-3.2-1B-Instruct (Meta-Llama-3.2-1B-Instruct)` - `meta-llama/Llama-3.2-3B-Instruct (Meta-Llama-3.2-3B-Instruct)` +- `meta-llama/Llama-3.3-70B-Instruct (Meta-Llama-3.3-70B-Instruct)` - `meta-llama/Llama-3.2-11B-Vision-Instruct (Llama-3.2-11B-Vision-Instruct)` - `meta-llama/Llama-3.2-90B-Vision-Instruct (Llama-3.2-90B-Vision-Instruct)` +- `meta-llama/Llama-Guard-3-8B (Meta-Llama-Guard-3-8B)` ### Prerequisite: API Keys -Make sure you have access to a SambaNova API Key. You can get one by visiting [SambaNova.ai](https://cloud.sambanova.ai/). +Make sure you have access to a SambaNova API Key. You can get one by visiting [SambaNova.ai](https://sambanova.ai/). ## Running Llama Stack with SambaNova diff --git a/llama_stack/cli/verify_download.py b/llama_stack/cli/verify_download.py index f86bed6af..68158243b 100644 --- a/llama_stack/cli/verify_download.py +++ b/llama_stack/cli/verify_download.py @@ -50,7 +50,10 @@ def setup_verify_download_parser(parser: argparse.ArgumentParser) -> None: def calculate_md5(filepath: Path, chunk_size: int = 8192) -> str: - md5_hash = hashlib.md5() + # NOTE: MD5 is used here only for download integrity verification, + # not for security purposes + # TODO: switch to SHA256 + md5_hash = hashlib.md5(usedforsecurity=False) with open(filepath, "rb") as f: for chunk in iter(lambda: f.read(chunk_size), b""): md5_hash.update(chunk) diff --git a/llama_stack/providers/remote/inference/nvidia/config.py b/llama_stack/providers/remote/inference/nvidia/config.py index d062e65d2..d31fa9d25 100644 --- a/llama_stack/providers/remote/inference/nvidia/config.py +++ b/llama_stack/providers/remote/inference/nvidia/config.py @@ -52,6 +52,6 @@ class NVIDIAConfig(BaseModel): @classmethod def sample_run_config(cls, **kwargs) -> Dict[str, Any]: return { - "url": "https://integrate.api.nvidia.com", - "api_key": "${env.NVIDIA_API_KEY}", + "url": "${env.NVIDIA_BASE_URL:https://integrate.api.nvidia.com}", + "api_key": "${env.NVIDIA_API_KEY:}", } diff --git a/llama_stack/templates/sambanova/build.yaml b/llama_stack/templates/sambanova/build.yaml index 0966bfdd9..ca5ffe618 100644 --- a/llama_stack/templates/sambanova/build.yaml +++ b/llama_stack/templates/sambanova/build.yaml @@ -14,19 +14,9 @@ distribution_spec: - inline::meta-reference telemetry: - inline::meta-reference - eval: - - inline::meta-reference - datasetio: - - remote::huggingface - - inline::localfs - scoring: - - inline::basic - - inline::llm-as-judge - - inline::braintrust tool_runtime: - remote::brave-search - remote::tavily-search - inline::code-interpreter - inline::rag-runtime - - remote::model-context-protocol image_type: conda diff --git a/llama_stack/templates/sambanova/doc_template.md b/llama_stack/templates/sambanova/doc_template.md index 4af4718e5..4b18aa756 100644 --- a/llama_stack/templates/sambanova/doc_template.md +++ b/llama_stack/templates/sambanova/doc_template.md @@ -37,7 +37,7 @@ The following models are available by default: ### Prerequisite: API Keys -Make sure you have access to a SambaNova API Key. You can get one by visiting [SambaBova.ai](https://sambanova.ai/). +Make sure you have access to a SambaNova API Key. You can get one by visiting [SambaNova.ai](https://sambanova.ai/). ## Running Llama Stack with SambaNova diff --git a/llama_stack/templates/sambanova/run.yaml b/llama_stack/templates/sambanova/run.yaml index 36f07dc73..6cec51824 100644 --- a/llama_stack/templates/sambanova/run.yaml +++ b/llama_stack/templates/sambanova/run.yaml @@ -2,11 +2,8 @@ version: '2' image_name: sambanova apis: - agents -- datasetio -- eval - inference - safety -- scoring - telemetry - tool_runtime - vector_io @@ -25,6 +22,12 @@ providers: type: sqlite namespace: null db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/sambanova}/faiss_store.db + - provider_id: chromadb + provider_type: remote::chromadb + config: {} + - provider_id: pgvector + provider_type: remote::pgvector + config: {} safety: - provider_id: llama-guard provider_type: inline::llama-guard @@ -44,28 +47,6 @@ providers: service_name: ${env.OTEL_SERVICE_NAME:llama-stack} sinks: ${env.TELEMETRY_SINKS:console,sqlite} sqlite_db_path: ${env.SQLITE_DB_PATH:~/.llama/distributions/sambanova/trace_store.db} - eval: - - provider_id: meta-reference - provider_type: inline::meta-reference - config: {} - datasetio: - - provider_id: huggingface - provider_type: remote::huggingface - config: {} - - provider_id: localfs - provider_type: inline::localfs - config: {} - scoring: - - provider_id: basic - provider_type: inline::basic - config: {} - - provider_id: llm-as-judge - provider_type: inline::llm-as-judge - config: {} - - provider_id: braintrust - provider_type: inline::braintrust - config: - openai_api_key: ${env.OPENAI_API_KEY:} tool_runtime: - provider_id: brave-search provider_type: remote::brave-search @@ -83,9 +64,6 @@ providers: - provider_id: rag-runtime provider_type: inline::rag-runtime config: {} - - provider_id: model-context-protocol - provider_type: remote::model-context-protocol - config: {} metadata_store: type: sqlite db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/sambanova}/registry.db @@ -97,7 +75,6 @@ models: model_type: llm - metadata: {} model_id: meta-llama/Llama-3.1-70B-Instruct - model_type: llm provider_id: sambanova provider_model_id: Meta-Llama-3.1-70B-Instruct model_type: llm @@ -131,15 +108,10 @@ models: provider_id: sambanova provider_model_id: Llama-3.2-90B-Vision-Instruct model_type: llm -- metadata: {} - model_id: meta-llama/Llama-3.2-90B-Vision-Instruct - provider_id: sambanova - provider_model_id: Llama-3.2-90B-Vision-Instruct - model_type: llm - metadata: {} model_id: meta-llama/Llama-Guard-3-8B provider_id: sambanova - provider_model_id: Llama-Guard-3-8B + provider_model_id: Meta-Llama-Guard-3-8B model_type: llm shields: - shield_id: meta-llama/Llama-Guard-3-8B diff --git a/llama_stack/templates/sambanova/sambanova.py b/llama_stack/templates/sambanova/sambanova.py index 389e2a6c5..9c0b87e3c 100644 --- a/llama_stack/templates/sambanova/sambanova.py +++ b/llama_stack/templates/sambanova/sambanova.py @@ -8,7 +8,12 @@ from pathlib import Path from llama_models.sku_list import all_registered_models -from llama_stack.distribution.datatypes import ModelInput, Provider, ShieldInput +from llama_stack.distribution.datatypes import ( + ModelInput, + Provider, + ShieldInput, + ToolGroupInput, +) from llama_stack.providers.remote.inference.sambanova import SambaNovaImplConfig from llama_stack.providers.remote.inference.sambanova.sambanova import MODEL_ALIASES @@ -29,10 +34,11 @@ def get_distribution_template() -> DistributionTemplate: "inline::rag-runtime", ], } + name = "sambanova" inference_provider = Provider( - provider_id="sambanova", - provider_type="remote::sambanova", + provider_id=name, + provider_type=f"remote::{name}", config=SambaNovaImplConfig.sample_run_config(), ) @@ -43,12 +49,28 @@ def get_distribution_template() -> DistributionTemplate: ModelInput( model_id=core_model_to_hf_repo[m.llama_model], provider_model_id=m.provider_model_id, + provider_id=name, ) for m in MODEL_ALIASES ] + default_tool_groups = [ + ToolGroupInput( + toolgroup_id="builtin::websearch", + provider_id="tavily-search", + ), + ToolGroupInput( + toolgroup_id="builtin::rag", + provider_id="rag-runtime", + ), + ToolGroupInput( + toolgroup_id="builtin::code_interpreter", + provider_id="code-interpreter", + ), + ] + return DistributionTemplate( - name="sambanova", + name=name, distro_type="self_hosted", description="Use SambaNova.AI for running LLM inference", docker_image=None, @@ -62,6 +84,7 @@ def get_distribution_template() -> DistributionTemplate: }, default_models=default_models, default_shields=[ShieldInput(shield_id="meta-llama/Llama-Guard-3-8B")], + default_tool_groups=default_tool_groups, ), }, run_config_env_vars={ diff --git a/llama_stack/templates/template.py b/llama_stack/templates/template.py index 78f57b795..d87830bca 100644 --- a/llama_stack/templates/template.py +++ b/llama_stack/templates/template.py @@ -137,7 +137,12 @@ class DistributionTemplate(BaseModel): template = self.template_path.read_text() # Render template with rich-generated table - env = jinja2.Environment(trim_blocks=True, lstrip_blocks=True) + env = jinja2.Environment( + trim_blocks=True, + lstrip_blocks=True, + # NOTE: autoescape is required to prevent XSS attacks + autoescape=True, + ) template = env.from_string(template) return template.render( name=self.name,