From 4412694018ec1ac80edb99c1ff5f810e897a10b8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?S=C3=A9bastien=20Han?= Date: Wed, 30 Apr 2025 17:56:46 +0200 Subject: [PATCH 1/5] chore: Remove zero-width space characters from OTEL service name env var defaults (#2060) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit # What does this PR do? Replaced `${env.OTEL_SERVICE_NAME:\u200B}` and similar variants with properly formatted `${env.OTEL_SERVICE_NAME:}` across all YAML templates and TelemetryConfig. This prevents silent parsing issues and ensures consistent environment variable resolution. Slipped in https://github.com/meta-llama/llama-stack/pull/2058 Signed-off-by: Sébastien Han --- .../providers/inline/telemetry/meta_reference/config.py | 4 ++-- llama_stack/templates/bedrock/run.yaml | 2 +- llama_stack/templates/cerebras/run.yaml | 2 +- llama_stack/templates/ci-tests/run.yaml | 2 +- llama_stack/templates/dell/run-with-safety.yaml | 2 +- llama_stack/templates/dell/run.yaml | 2 +- llama_stack/templates/dev/run.yaml | 2 +- llama_stack/templates/fireworks/run-with-safety.yaml | 2 +- llama_stack/templates/fireworks/run.yaml | 2 +- llama_stack/templates/groq/run.yaml | 2 +- llama_stack/templates/hf-endpoint/run-with-safety.yaml | 2 +- llama_stack/templates/hf-endpoint/run.yaml | 2 +- llama_stack/templates/hf-serverless/run-with-safety.yaml | 2 +- llama_stack/templates/hf-serverless/run.yaml | 2 +- llama_stack/templates/llama_api/run.yaml | 2 +- llama_stack/templates/meta-reference-gpu/run-with-safety.yaml | 2 +- llama_stack/templates/meta-reference-gpu/run.yaml | 2 +- llama_stack/templates/nvidia/run-with-safety.yaml | 2 +- llama_stack/templates/nvidia/run.yaml | 2 +- llama_stack/templates/ollama/run-with-safety.yaml | 2 +- llama_stack/templates/ollama/run.yaml | 2 +- llama_stack/templates/open-benchmark/run.yaml | 2 +- llama_stack/templates/passthrough/run-with-safety.yaml | 2 +- llama_stack/templates/passthrough/run.yaml | 2 +- llama_stack/templates/remote-vllm/run-with-safety.yaml | 2 +- llama_stack/templates/remote-vllm/run.yaml | 2 +- llama_stack/templates/sambanova/run.yaml | 2 +- llama_stack/templates/tgi/run-with-safety.yaml | 2 +- llama_stack/templates/tgi/run.yaml | 2 +- llama_stack/templates/together/run-with-safety.yaml | 2 +- llama_stack/templates/together/run.yaml | 2 +- llama_stack/templates/verification/run.yaml | 2 +- llama_stack/templates/vllm-gpu/run.yaml | 2 +- llama_stack/templates/watsonx/run.yaml | 2 +- 34 files changed, 35 insertions(+), 35 deletions(-) diff --git a/llama_stack/providers/inline/telemetry/meta_reference/config.py b/llama_stack/providers/inline/telemetry/meta_reference/config.py index dfbc8a683..54bdc083c 100644 --- a/llama_stack/providers/inline/telemetry/meta_reference/config.py +++ b/llama_stack/providers/inline/telemetry/meta_reference/config.py @@ -30,7 +30,7 @@ class TelemetryConfig(BaseModel): ) service_name: str = Field( # service name is always the same, use zero-width space to avoid clutter - default="​", + default="", description="The service name to use for telemetry", ) sinks: List[TelemetrySink] = Field( @@ -52,7 +52,7 @@ class TelemetryConfig(BaseModel): @classmethod def sample_run_config(cls, __distro_dir__: str, db_name: str = "trace_store.db") -> Dict[str, Any]: return { - "service_name": "${env.OTEL_SERVICE_NAME:​}", + "service_name": "${env.OTEL_SERVICE_NAME:}", "sinks": "${env.TELEMETRY_SINKS:console,sqlite}", "sqlite_db_path": "${env.SQLITE_STORE_DIR:" + __distro_dir__ + "}/" + db_name, } diff --git a/llama_stack/templates/bedrock/run.yaml b/llama_stack/templates/bedrock/run.yaml index fcfd7dfb9..eaa1989ee 100644 --- a/llama_stack/templates/bedrock/run.yaml +++ b/llama_stack/templates/bedrock/run.yaml @@ -39,7 +39,7 @@ providers: - provider_id: meta-reference provider_type: inline::meta-reference config: - service_name: "${env.OTEL_SERVICE_NAME:\u200B}" + service_name: ${env.OTEL_SERVICE_NAME:} sinks: ${env.TELEMETRY_SINKS:console,sqlite} sqlite_db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/bedrock}/trace_store.db eval: diff --git a/llama_stack/templates/cerebras/run.yaml b/llama_stack/templates/cerebras/run.yaml index adc86d19f..bade3d24c 100644 --- a/llama_stack/templates/cerebras/run.yaml +++ b/llama_stack/templates/cerebras/run.yaml @@ -79,7 +79,7 @@ providers: - provider_id: meta-reference provider_type: inline::meta-reference config: - service_name: "${env.OTEL_SERVICE_NAME:\u200B}" + service_name: ${env.OTEL_SERVICE_NAME:} sinks: ${env.TELEMETRY_SINKS:console,sqlite} sqlite_db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/cerebras}/trace_store.db tool_runtime: diff --git a/llama_stack/templates/ci-tests/run.yaml b/llama_stack/templates/ci-tests/run.yaml index b66ef3054..1b8698c64 100644 --- a/llama_stack/templates/ci-tests/run.yaml +++ b/llama_stack/templates/ci-tests/run.yaml @@ -42,7 +42,7 @@ providers: - provider_id: meta-reference provider_type: inline::meta-reference config: - service_name: "${env.OTEL_SERVICE_NAME:\u200B}" + service_name: ${env.OTEL_SERVICE_NAME:} sinks: ${env.TELEMETRY_SINKS:console,sqlite} sqlite_db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ci-tests}/trace_store.db eval: diff --git a/llama_stack/templates/dell/run-with-safety.yaml b/llama_stack/templates/dell/run-with-safety.yaml index 6c144ea24..d9693c8e3 100644 --- a/llama_stack/templates/dell/run-with-safety.yaml +++ b/llama_stack/templates/dell/run-with-safety.yaml @@ -45,7 +45,7 @@ providers: - provider_id: meta-reference provider_type: inline::meta-reference config: - service_name: "${env.OTEL_SERVICE_NAME:\u200B}" + service_name: ${env.OTEL_SERVICE_NAME:} sinks: ${env.TELEMETRY_SINKS:console,sqlite} sqlite_db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/dell}/trace_store.db eval: diff --git a/llama_stack/templates/dell/run.yaml b/llama_stack/templates/dell/run.yaml index 61d849d83..2147724b9 100644 --- a/llama_stack/templates/dell/run.yaml +++ b/llama_stack/templates/dell/run.yaml @@ -41,7 +41,7 @@ providers: - provider_id: meta-reference provider_type: inline::meta-reference config: - service_name: "${env.OTEL_SERVICE_NAME:\u200B}" + service_name: ${env.OTEL_SERVICE_NAME:} sinks: ${env.TELEMETRY_SINKS:console,sqlite} sqlite_db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/dell}/trace_store.db eval: diff --git a/llama_stack/templates/dev/run.yaml b/llama_stack/templates/dev/run.yaml index d799efaec..7a33892b1 100644 --- a/llama_stack/templates/dev/run.yaml +++ b/llama_stack/templates/dev/run.yaml @@ -71,7 +71,7 @@ providers: - provider_id: meta-reference provider_type: inline::meta-reference config: - service_name: "${env.OTEL_SERVICE_NAME:\u200B}" + service_name: ${env.OTEL_SERVICE_NAME:} sinks: ${env.TELEMETRY_SINKS:console,sqlite} sqlite_db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/dev}/trace_store.db eval: diff --git a/llama_stack/templates/fireworks/run-with-safety.yaml b/llama_stack/templates/fireworks/run-with-safety.yaml index 3ea8afff5..3c1b613f5 100644 --- a/llama_stack/templates/fireworks/run-with-safety.yaml +++ b/llama_stack/templates/fireworks/run-with-safety.yaml @@ -50,7 +50,7 @@ providers: - provider_id: meta-reference provider_type: inline::meta-reference config: - service_name: "${env.OTEL_SERVICE_NAME:\u200B}" + service_name: ${env.OTEL_SERVICE_NAME:} sinks: ${env.TELEMETRY_SINKS:console,sqlite} sqlite_db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/fireworks}/trace_store.db eval: diff --git a/llama_stack/templates/fireworks/run.yaml b/llama_stack/templates/fireworks/run.yaml index 993c50449..7ad98a62d 100644 --- a/llama_stack/templates/fireworks/run.yaml +++ b/llama_stack/templates/fireworks/run.yaml @@ -45,7 +45,7 @@ providers: - provider_id: meta-reference provider_type: inline::meta-reference config: - service_name: "${env.OTEL_SERVICE_NAME:\u200B}" + service_name: ${env.OTEL_SERVICE_NAME:} sinks: ${env.TELEMETRY_SINKS:console,sqlite} sqlite_db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/fireworks}/trace_store.db eval: diff --git a/llama_stack/templates/groq/run.yaml b/llama_stack/templates/groq/run.yaml index a75870c4b..1566cb9ca 100644 --- a/llama_stack/templates/groq/run.yaml +++ b/llama_stack/templates/groq/run.yaml @@ -45,7 +45,7 @@ providers: - provider_id: meta-reference provider_type: inline::meta-reference config: - service_name: "${env.OTEL_SERVICE_NAME:\u200B}" + service_name: ${env.OTEL_SERVICE_NAME:} sinks: ${env.TELEMETRY_SINKS:console,sqlite} sqlite_db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/groq}/trace_store.db eval: diff --git a/llama_stack/templates/hf-endpoint/run-with-safety.yaml b/llama_stack/templates/hf-endpoint/run-with-safety.yaml index 11fcae875..94d280a2a 100644 --- a/llama_stack/templates/hf-endpoint/run-with-safety.yaml +++ b/llama_stack/templates/hf-endpoint/run-with-safety.yaml @@ -50,7 +50,7 @@ providers: - provider_id: meta-reference provider_type: inline::meta-reference config: - service_name: "${env.OTEL_SERVICE_NAME:\u200B}" + service_name: ${env.OTEL_SERVICE_NAME:} sinks: ${env.TELEMETRY_SINKS:console,sqlite} sqlite_db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/hf-endpoint}/trace_store.db eval: diff --git a/llama_stack/templates/hf-endpoint/run.yaml b/llama_stack/templates/hf-endpoint/run.yaml index 66272b2dd..86ee5c7e1 100644 --- a/llama_stack/templates/hf-endpoint/run.yaml +++ b/llama_stack/templates/hf-endpoint/run.yaml @@ -45,7 +45,7 @@ providers: - provider_id: meta-reference provider_type: inline::meta-reference config: - service_name: "${env.OTEL_SERVICE_NAME:\u200B}" + service_name: ${env.OTEL_SERVICE_NAME:} sinks: ${env.TELEMETRY_SINKS:console,sqlite} sqlite_db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/hf-endpoint}/trace_store.db eval: diff --git a/llama_stack/templates/hf-serverless/run-with-safety.yaml b/llama_stack/templates/hf-serverless/run-with-safety.yaml index 05cafc364..74617925b 100644 --- a/llama_stack/templates/hf-serverless/run-with-safety.yaml +++ b/llama_stack/templates/hf-serverless/run-with-safety.yaml @@ -50,7 +50,7 @@ providers: - provider_id: meta-reference provider_type: inline::meta-reference config: - service_name: "${env.OTEL_SERVICE_NAME:\u200B}" + service_name: ${env.OTEL_SERVICE_NAME:} sinks: ${env.TELEMETRY_SINKS:console,sqlite} sqlite_db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/hf-serverless}/trace_store.db eval: diff --git a/llama_stack/templates/hf-serverless/run.yaml b/llama_stack/templates/hf-serverless/run.yaml index 712a9b829..03484c455 100644 --- a/llama_stack/templates/hf-serverless/run.yaml +++ b/llama_stack/templates/hf-serverless/run.yaml @@ -45,7 +45,7 @@ providers: - provider_id: meta-reference provider_type: inline::meta-reference config: - service_name: "${env.OTEL_SERVICE_NAME:\u200B}" + service_name: ${env.OTEL_SERVICE_NAME:} sinks: ${env.TELEMETRY_SINKS:console,sqlite} sqlite_db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/hf-serverless}/trace_store.db eval: diff --git a/llama_stack/templates/llama_api/run.yaml b/llama_stack/templates/llama_api/run.yaml index b077318b7..36c54db6c 100644 --- a/llama_stack/templates/llama_api/run.yaml +++ b/llama_stack/templates/llama_api/run.yaml @@ -54,7 +54,7 @@ providers: - provider_id: meta-reference provider_type: inline::meta-reference config: - service_name: "${env.OTEL_SERVICE_NAME:\u200B}" + service_name: ${env.OTEL_SERVICE_NAME:} sinks: ${env.TELEMETRY_SINKS:console,sqlite} sqlite_db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/llama_api}/trace_store.db eval: diff --git a/llama_stack/templates/meta-reference-gpu/run-with-safety.yaml b/llama_stack/templates/meta-reference-gpu/run-with-safety.yaml index ca836665b..f1e3a67be 100644 --- a/llama_stack/templates/meta-reference-gpu/run-with-safety.yaml +++ b/llama_stack/templates/meta-reference-gpu/run-with-safety.yaml @@ -60,7 +60,7 @@ providers: - provider_id: meta-reference provider_type: inline::meta-reference config: - service_name: "${env.OTEL_SERVICE_NAME:\u200B}" + service_name: ${env.OTEL_SERVICE_NAME:} sinks: ${env.TELEMETRY_SINKS:console,sqlite} sqlite_db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/meta-reference-gpu}/trace_store.db eval: diff --git a/llama_stack/templates/meta-reference-gpu/run.yaml b/llama_stack/templates/meta-reference-gpu/run.yaml index 8ebf280be..ac1058373 100644 --- a/llama_stack/templates/meta-reference-gpu/run.yaml +++ b/llama_stack/templates/meta-reference-gpu/run.yaml @@ -50,7 +50,7 @@ providers: - provider_id: meta-reference provider_type: inline::meta-reference config: - service_name: "${env.OTEL_SERVICE_NAME:\u200B}" + service_name: ${env.OTEL_SERVICE_NAME:} sinks: ${env.TELEMETRY_SINKS:console,sqlite} sqlite_db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/meta-reference-gpu}/trace_store.db eval: diff --git a/llama_stack/templates/nvidia/run-with-safety.yaml b/llama_stack/templates/nvidia/run-with-safety.yaml index d9d9bf447..3cdb8e3d2 100644 --- a/llama_stack/templates/nvidia/run-with-safety.yaml +++ b/llama_stack/templates/nvidia/run-with-safety.yaml @@ -50,7 +50,7 @@ providers: - provider_id: meta-reference provider_type: inline::meta-reference config: - service_name: "${env.OTEL_SERVICE_NAME:\u200B}" + service_name: ${env.OTEL_SERVICE_NAME:} sinks: ${env.TELEMETRY_SINKS:console,sqlite} sqlite_db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/nvidia}/trace_store.db eval: diff --git a/llama_stack/templates/nvidia/run.yaml b/llama_stack/templates/nvidia/run.yaml index c83a4350c..3337b7942 100644 --- a/llama_stack/templates/nvidia/run.yaml +++ b/llama_stack/templates/nvidia/run.yaml @@ -45,7 +45,7 @@ providers: - provider_id: meta-reference provider_type: inline::meta-reference config: - service_name: "${env.OTEL_SERVICE_NAME:\u200B}" + service_name: ${env.OTEL_SERVICE_NAME:} sinks: ${env.TELEMETRY_SINKS:console,sqlite} sqlite_db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/nvidia}/trace_store.db eval: diff --git a/llama_stack/templates/ollama/run-with-safety.yaml b/llama_stack/templates/ollama/run-with-safety.yaml index 38a09390d..e84e46a0b 100644 --- a/llama_stack/templates/ollama/run-with-safety.yaml +++ b/llama_stack/templates/ollama/run-with-safety.yaml @@ -43,7 +43,7 @@ providers: - provider_id: meta-reference provider_type: inline::meta-reference config: - service_name: "${env.OTEL_SERVICE_NAME:\u200B}" + service_name: ${env.OTEL_SERVICE_NAME:} sinks: ${env.TELEMETRY_SINKS:console,sqlite} sqlite_db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ollama}/trace_store.db eval: diff --git a/llama_stack/templates/ollama/run.yaml b/llama_stack/templates/ollama/run.yaml index 3531cc581..66410a1e0 100644 --- a/llama_stack/templates/ollama/run.yaml +++ b/llama_stack/templates/ollama/run.yaml @@ -41,7 +41,7 @@ providers: - provider_id: meta-reference provider_type: inline::meta-reference config: - service_name: "${env.OTEL_SERVICE_NAME:\u200B}" + service_name: ${env.OTEL_SERVICE_NAME:} sinks: ${env.TELEMETRY_SINKS:console,sqlite} sqlite_db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ollama}/trace_store.db eval: diff --git a/llama_stack/templates/open-benchmark/run.yaml b/llama_stack/templates/open-benchmark/run.yaml index dfa5e7280..9acfd0e5c 100644 --- a/llama_stack/templates/open-benchmark/run.yaml +++ b/llama_stack/templates/open-benchmark/run.yaml @@ -68,7 +68,7 @@ providers: - provider_id: meta-reference provider_type: inline::meta-reference config: - service_name: "${env.OTEL_SERVICE_NAME:\u200B}" + service_name: ${env.OTEL_SERVICE_NAME:} sinks: ${env.TELEMETRY_SINKS:console,sqlite} sqlite_db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/open-benchmark}/trace_store.db eval: diff --git a/llama_stack/templates/passthrough/run-with-safety.yaml b/llama_stack/templates/passthrough/run-with-safety.yaml index 80cbf20e4..927f808ab 100644 --- a/llama_stack/templates/passthrough/run-with-safety.yaml +++ b/llama_stack/templates/passthrough/run-with-safety.yaml @@ -50,7 +50,7 @@ providers: - provider_id: meta-reference provider_type: inline::meta-reference config: - service_name: "${env.OTEL_SERVICE_NAME:\u200B}" + service_name: ${env.OTEL_SERVICE_NAME:} sinks: ${env.TELEMETRY_SINKS:console,sqlite} sqlite_db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/passthrough}/trace_store.db eval: diff --git a/llama_stack/templates/passthrough/run.yaml b/llama_stack/templates/passthrough/run.yaml index 4bebd91cc..40ce959ac 100644 --- a/llama_stack/templates/passthrough/run.yaml +++ b/llama_stack/templates/passthrough/run.yaml @@ -45,7 +45,7 @@ providers: - provider_id: meta-reference provider_type: inline::meta-reference config: - service_name: "${env.OTEL_SERVICE_NAME:\u200B}" + service_name: ${env.OTEL_SERVICE_NAME:} sinks: ${env.TELEMETRY_SINKS:console,sqlite} sqlite_db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/passthrough}/trace_store.db eval: diff --git a/llama_stack/templates/remote-vllm/run-with-safety.yaml b/llama_stack/templates/remote-vllm/run-with-safety.yaml index 4b060c591..e01bca127 100644 --- a/llama_stack/templates/remote-vllm/run-with-safety.yaml +++ b/llama_stack/templates/remote-vllm/run-with-safety.yaml @@ -88,7 +88,7 @@ providers: - provider_id: meta-reference provider_type: inline::meta-reference config: - service_name: "${env.OTEL_SERVICE_NAME:\u200B}" + service_name: ${env.OTEL_SERVICE_NAME:} sinks: ${env.TELEMETRY_SINKS:console,sqlite} sqlite_db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/remote-vllm}/trace_store.db tool_runtime: diff --git a/llama_stack/templates/remote-vllm/run.yaml b/llama_stack/templates/remote-vllm/run.yaml index 48afa64dd..222097f05 100644 --- a/llama_stack/templates/remote-vllm/run.yaml +++ b/llama_stack/templates/remote-vllm/run.yaml @@ -81,7 +81,7 @@ providers: - provider_id: meta-reference provider_type: inline::meta-reference config: - service_name: "${env.OTEL_SERVICE_NAME:\u200B}" + service_name: ${env.OTEL_SERVICE_NAME:} sinks: ${env.TELEMETRY_SINKS:console,sqlite} sqlite_db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/remote-vllm}/trace_store.db tool_runtime: diff --git a/llama_stack/templates/sambanova/run.yaml b/llama_stack/templates/sambanova/run.yaml index f88d4171d..dcf56b483 100644 --- a/llama_stack/templates/sambanova/run.yaml +++ b/llama_stack/templates/sambanova/run.yaml @@ -51,7 +51,7 @@ providers: - provider_id: meta-reference provider_type: inline::meta-reference config: - service_name: "${env.OTEL_SERVICE_NAME:\u200B}" + service_name: ${env.OTEL_SERVICE_NAME:} sinks: ${env.TELEMETRY_SINKS:console,sqlite} sqlite_db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/sambanova}/trace_store.db tool_runtime: diff --git a/llama_stack/templates/tgi/run-with-safety.yaml b/llama_stack/templates/tgi/run-with-safety.yaml index 970b01c42..629389f6c 100644 --- a/llama_stack/templates/tgi/run-with-safety.yaml +++ b/llama_stack/templates/tgi/run-with-safety.yaml @@ -45,7 +45,7 @@ providers: - provider_id: meta-reference provider_type: inline::meta-reference config: - service_name: "${env.OTEL_SERVICE_NAME:\u200B}" + service_name: ${env.OTEL_SERVICE_NAME:} sinks: ${env.TELEMETRY_SINKS:console,sqlite} sqlite_db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/tgi}/trace_store.db eval: diff --git a/llama_stack/templates/tgi/run.yaml b/llama_stack/templates/tgi/run.yaml index bc836d46e..ad1826695 100644 --- a/llama_stack/templates/tgi/run.yaml +++ b/llama_stack/templates/tgi/run.yaml @@ -44,7 +44,7 @@ providers: - provider_id: meta-reference provider_type: inline::meta-reference config: - service_name: "${env.OTEL_SERVICE_NAME:\u200B}" + service_name: ${env.OTEL_SERVICE_NAME:} sinks: ${env.TELEMETRY_SINKS:console,sqlite} sqlite_db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/tgi}/trace_store.db eval: diff --git a/llama_stack/templates/together/run-with-safety.yaml b/llama_stack/templates/together/run-with-safety.yaml index 39ac71c78..34985a8a3 100644 --- a/llama_stack/templates/together/run-with-safety.yaml +++ b/llama_stack/templates/together/run-with-safety.yaml @@ -50,7 +50,7 @@ providers: - provider_id: meta-reference provider_type: inline::meta-reference config: - service_name: "${env.OTEL_SERVICE_NAME:\u200B}" + service_name: ${env.OTEL_SERVICE_NAME:} sinks: ${env.TELEMETRY_SINKS:console,sqlite} sqlite_db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/together}/trace_store.db eval: diff --git a/llama_stack/templates/together/run.yaml b/llama_stack/templates/together/run.yaml index fa28482b7..c107d6f3f 100644 --- a/llama_stack/templates/together/run.yaml +++ b/llama_stack/templates/together/run.yaml @@ -45,7 +45,7 @@ providers: - provider_id: meta-reference provider_type: inline::meta-reference config: - service_name: "${env.OTEL_SERVICE_NAME:\u200B}" + service_name: ${env.OTEL_SERVICE_NAME:} sinks: ${env.TELEMETRY_SINKS:console,sqlite} sqlite_db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/together}/trace_store.db eval: diff --git a/llama_stack/templates/verification/run.yaml b/llama_stack/templates/verification/run.yaml index d27fad540..d59d0cd8e 100644 --- a/llama_stack/templates/verification/run.yaml +++ b/llama_stack/templates/verification/run.yaml @@ -78,7 +78,7 @@ providers: - provider_id: meta-reference provider_type: inline::meta-reference config: - service_name: "${env.OTEL_SERVICE_NAME:\u200B}" + service_name: ${env.OTEL_SERVICE_NAME:} sinks: ${env.TELEMETRY_SINKS:console,sqlite} sqlite_db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/verification}/trace_store.db eval: diff --git a/llama_stack/templates/vllm-gpu/run.yaml b/llama_stack/templates/vllm-gpu/run.yaml index 269a6b278..dfe64de4a 100644 --- a/llama_stack/templates/vllm-gpu/run.yaml +++ b/llama_stack/templates/vllm-gpu/run.yaml @@ -49,7 +49,7 @@ providers: - provider_id: meta-reference provider_type: inline::meta-reference config: - service_name: "${env.OTEL_SERVICE_NAME:\u200B}" + service_name: ${env.OTEL_SERVICE_NAME:} sinks: ${env.TELEMETRY_SINKS:console,sqlite} sqlite_db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/vllm-gpu}/trace_store.db eval: diff --git a/llama_stack/templates/watsonx/run.yaml b/llama_stack/templates/watsonx/run.yaml index 29922b188..f4c986d6d 100644 --- a/llama_stack/templates/watsonx/run.yaml +++ b/llama_stack/templates/watsonx/run.yaml @@ -43,7 +43,7 @@ providers: - provider_id: meta-reference provider_type: inline::meta-reference config: - service_name: "${env.OTEL_SERVICE_NAME:\u200B}" + service_name: ${env.OTEL_SERVICE_NAME:} sinks: ${env.TELEMETRY_SINKS:console,sqlite} sqlite_db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/watsonx}/trace_store.db eval: From eab550f7d2409ec6fc487948295466f41cf0f6e6 Mon Sep 17 00:00:00 2001 From: Jash Gulabrai <37194352+JashG@users.noreply.github.com> Date: Wed, 30 Apr 2025 12:01:28 -0400 Subject: [PATCH 2/5] fix: Fix messages format in NVIDIA safety check request body (#2063) # What does this PR do? When running a Llama Stack server and invoking the `/v1/safety/run-shield` endpoint, the NVIDIA Guardrails endpoint in some cases errors with a `422: Unprocessable Entity` due to malformed input. For example, given an request body like: ``` { "model": "test", "messages": [ { "role": "user", "content": "You are stupid." } ] } ``` `convert_pydantic_to_json_value` converts the message to: ``` { "role": "user", "content": "You are stupid.", "context": null } ``` Which causes NVIDIA Guardrails to return an error `HTTPError: 422 Client Error: Unprocessable Entity for url: http://nemo.test/v1/guardrail/checks`, because `context` shouldn't be included in the body. [//]: # (If resolving an issue, uncomment and update the line below) [//]: # (Closes #[issue-number]) ## Test Plan I ran the Llama Stack server locally and manually verified that the endpoint now succeeds. ``` message = {"role": "user", "content": "You are stupid."} response = client.safety.run_shield(messages=[message], shield_id=shield_id, params={}) ``` Server logs: ``` 14:29:09.656 [START] /v1/safety/run-shield INFO: 127.0.0.1:54616 - "POST /v1/safety/run-shield HTTP/1.1" 200 OK 14:29:09.918 [END] /v1/safety/run-shield [StatusCode.OK] (262.26ms ``` [//]: # (## Documentation) Co-authored-by: Jash Gulabrai --- .../providers/remote/safety/nvidia/nvidia.py | 6 +++--- tests/unit/providers/nvidia/test_safety.py | 13 ++++++------- 2 files changed, 9 insertions(+), 10 deletions(-) diff --git a/llama_stack/providers/remote/safety/nvidia/nvidia.py b/llama_stack/providers/remote/safety/nvidia/nvidia.py index 1ff4a6ad9..13bc212a1 100644 --- a/llama_stack/providers/remote/safety/nvidia/nvidia.py +++ b/llama_stack/providers/remote/safety/nvidia/nvidia.py @@ -12,8 +12,8 @@ import requests from llama_stack.apis.inference import Message from llama_stack.apis.safety import RunShieldResponse, Safety, SafetyViolation, ViolationLevel from llama_stack.apis.shields import Shield -from llama_stack.distribution.library_client import convert_pydantic_to_json_value from llama_stack.providers.datatypes import ShieldsProtocolPrivate +from llama_stack.providers.utils.inference.openai_compat import convert_message_to_openai_dict_new from .config import NVIDIASafetyConfig @@ -28,7 +28,6 @@ class NVIDIASafetyAdapter(Safety, ShieldsProtocolPrivate): Args: config (NVIDIASafetyConfig): The configuration containing the guardrails service URL and config ID. """ - print(f"Initializing NVIDIASafetyAdapter({config.guardrails_service_url})...") self.config = config async def initialize(self) -> None: @@ -127,9 +126,10 @@ class NeMoGuardrails: Raises: requests.HTTPError: If the POST request fails. """ + request_messages = [await convert_message_to_openai_dict_new(message) for message in messages] request_data = { "model": self.model, - "messages": convert_pydantic_to_json_value(messages), + "messages": request_messages, "temperature": self.temperature, "top_p": 1, "frequency_penalty": 0, diff --git a/tests/unit/providers/nvidia/test_safety.py b/tests/unit/providers/nvidia/test_safety.py index e7e1cb3dc..8c74f178b 100644 --- a/tests/unit/providers/nvidia/test_safety.py +++ b/tests/unit/providers/nvidia/test_safety.py @@ -4,7 +4,6 @@ # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. -import json import os import unittest from typing import Any @@ -139,8 +138,8 @@ class TestNVIDIASafetyAdapter(unittest.TestCase): data={ "model": shield_id, "messages": [ - json.loads(messages[0].model_dump_json()), - json.loads(messages[1].model_dump_json()), + {"role": "user", "content": "Hello, how are you?"}, + {"role": "assistant", "content": "I'm doing well, thank you for asking!"}, ], "temperature": 1.0, "top_p": 1, @@ -193,8 +192,8 @@ class TestNVIDIASafetyAdapter(unittest.TestCase): data={ "model": shield_id, "messages": [ - json.loads(messages[0].model_dump_json()), - json.loads(messages[1].model_dump_json()), + {"role": "user", "content": "Hello, how are you?"}, + {"role": "assistant", "content": "I'm doing well, thank you for asking!"}, ], "temperature": 1.0, "top_p": 1, @@ -269,8 +268,8 @@ class TestNVIDIASafetyAdapter(unittest.TestCase): data={ "model": shield_id, "messages": [ - json.loads(messages[0].model_dump_json()), - json.loads(messages[1].model_dump_json()), + {"role": "user", "content": "Hello, how are you?"}, + {"role": "assistant", "content": "I'm doing well, thank you for asking!"}, ], "temperature": 1.0, "top_p": 1, From 2c7aba415837f6f56ffa37547732da6bb257df60 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?S=C3=A9bastien=20Han?= Date: Wed, 30 Apr 2025 18:05:27 +0200 Subject: [PATCH 3/5] fix: enforce stricter ASCII rules lint rules in Ruff (#2062) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit # What does this PR do? - Added new Ruff lint rules to detect ambiguous or non-ASCII characters: - Added per-file ignores where Unicode usage is still required. - Fixed whatever had to be fixed Signed-off-by: Sébastien Han --- llama_stack/cli/stack/run.py | 2 +- llama_stack/distribution/build.py | 3 +-- pyproject.toml | 36 +++++++++++++++++++++++-------- 3 files changed, 29 insertions(+), 12 deletions(-) diff --git a/llama_stack/cli/stack/run.py b/llama_stack/cli/stack/run.py index d8234bb46..2eee20883 100644 --- a/llama_stack/cli/stack/run.py +++ b/llama_stack/cli/stack/run.py @@ -119,7 +119,7 @@ class StackRun(Subcommand): if not config_file.is_file(): self.parser.error( - f"Config file must be a valid file path, '{config_file}’ is not a file: type={type(config_file)}" + f"Config file must be a valid file path, '{config_file}' is not a file: type={type(config_file)}" ) logger.info(f"Using run configuration: {config_file}") diff --git a/llama_stack/distribution/build.py b/llama_stack/distribution/build.py index 9664449f3..1d39063f0 100644 --- a/llama_stack/distribution/build.py +++ b/llama_stack/distribution/build.py @@ -47,14 +47,13 @@ def get_provider_dependencies( providers = config.distribution_spec.providers deps = [] registry = get_provider_registry(config) - for api_str, provider_or_providers in providers.items(): providers_for_api = registry[Api(api_str)] providers = provider_or_providers if isinstance(provider_or_providers, list) else [provider_or_providers] for provider in providers: - # Providers from BuildConfig and RunConfig are subtly different – not great + # Providers from BuildConfig and RunConfig are subtly different - not great provider_type = provider if isinstance(provider, str) else provider.provider_type if provider_type not in providers_for_api: diff --git a/pyproject.toml b/pyproject.toml index 36af789ef..f1f65be90 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -144,15 +144,25 @@ exclude = [ [tool.ruff.lint] select = [ - "B", # flake8-bugbear - "B9", # flake8-bugbear subset - "C", # comprehensions - "E", # pycodestyle - "F", # Pyflakes - "N", # Naming - "W", # Warnings - "DTZ", # datetime rules - "I", # isort (imports order) + "B", # flake8-bugbear + "B9", # flake8-bugbear subset + "C", # comprehensions + "E", # pycodestyle + "F", # Pyflakes + "N", # Naming + "W", # Warnings + "DTZ", # datetime rules + "I", # isort (imports order) + "RUF001", # Checks for ambiguous Unicode characters in strings + "RUF002", # Checks for ambiguous Unicode characters in docstrings + "RUF003", # Checks for ambiguous Unicode characters in comments + "PLC2401", # Checks for the use of non-ASCII characters in variable names + "PLC2403", # Checks for the use of non-ASCII characters in import statements + "PLE2510", # Checks for strings that contain the control character BS. + "PLE2512", # Checks for strings that contain the raw control character SUB. + "PLE2513", # Checks for strings that contain the raw control character ESC. + "PLE2514", # Checks for strings that contain the raw control character NUL (0 byte). + "PLE2515", # Checks for strings that contain the zero width space character. ] ignore = [ # The following ignores are desired by the project maintainers. @@ -165,10 +175,18 @@ ignore = [ # These are the additional ones we started ignoring after moving to ruff. We should look into each one of them later. "C901", # Complexity of the function is too high ] +unfixable = [ + "PLE2515", +] # Do not fix this automatically since ruff will replace the zero-width space with \u200b - let's do it manually # Ignore the following errors for the following files [tool.ruff.lint.per-file-ignores] "tests/**/*.py" = ["DTZ"] # Ignore datetime rules for tests +"llama_stack/providers/inline/scoring/basic/utils/ifeval_utils.py" = ["RUF001"] +"llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/regex_parser_multiple_choice_answer.py" = [ + "RUF001", + "PLE2515", +] [tool.mypy] mypy_path = ["llama_stack"] From d897313e0b62c6d3f993d72a653443b5e2cce6fe Mon Sep 17 00:00:00 2001 From: Nathan Weinberg <31703736+nathan-weinberg@users.noreply.github.com> Date: Wed, 30 Apr 2025 14:06:24 -0400 Subject: [PATCH 4/5] feat: add additional logging to llama stack build (#1689) # What does this PR do? Partial revert of fa68ded07c5a6469f113b016a335f355a94ed504 this commit ensures users know where their new templates are generated and how to run the newly built distro locally discussion on Discord: https://discordapp.com/channels/1257833999603335178/1257834000190275586/1351652390113378415 ## Test Plan Did a local run - let me know if we want any unit testing covering this ![Screenshot from 2025-03-18 22-38-18](https://github.com/user-attachments/assets/6d5dac52-edad-4a84-992f-a3c23cda10c8) ## Documentation Updated "Zero to Hero" guide with new output --------- Signed-off-by: Nathan Weinberg --- docs/zero_to_hero_guide/README.md | 8 ++++---- llama_stack/cli/stack/_build.py | 7 ++++++- 2 files changed, 10 insertions(+), 5 deletions(-) diff --git a/docs/zero_to_hero_guide/README.md b/docs/zero_to_hero_guide/README.md index 9f756de26..96f9768de 100644 --- a/docs/zero_to_hero_guide/README.md +++ b/docs/zero_to_hero_guide/README.md @@ -86,11 +86,11 @@ If you're looking for more specific topics, we have a [Zero to Hero Guide](#next llama stack build --template ollama --image-type conda ``` **Expected Output:** - ``` + ```bash ... - Build Successful! Next steps: - 1. Set the environment variables: LLAMA_STACK_PORT, OLLAMA_URL, INFERENCE_MODEL, SAFETY_MODEL - 2. `llama stack run /Users//.llama/distributions/llamastack-ollama/ollama-run.yaml + Build Successful! + You can find the newly-built template here: ~/.llama/distributions/ollama/ollama-run.yaml + You can run the new Llama Stack Distro via: llama stack run ~/.llama/distributions/ollama/ollama-run.yaml --image-type conda ``` 3. **Set the ENV variables by exporting them to the terminal**: diff --git a/llama_stack/cli/stack/_build.py b/llama_stack/cli/stack/_build.py index 2787a93d5..f3a29b947 100644 --- a/llama_stack/cli/stack/_build.py +++ b/llama_stack/cli/stack/_build.py @@ -19,7 +19,7 @@ import yaml from prompt_toolkit import prompt from prompt_toolkit.completion import WordCompleter from prompt_toolkit.validation import Validator -from termcolor import cprint +from termcolor import colored, cprint from llama_stack.cli.stack.utils import ImageType from llama_stack.cli.table import print_table @@ -389,6 +389,11 @@ def _run_stack_build_command_from_build_config( shutil.copy(path, run_config_file) cprint("Build Successful!", color="green") + cprint("You can find the newly-built template here: " + colored(template_path, "light_blue")) + cprint( + "You can run the new Llama Stack distro via: " + + colored(f"llama stack run {template_path} --image-type {build_config.image_type}", "light_blue") + ) return template_path else: return _generate_run_config(build_config, build_dir, image_name) From dc9443307299b8e98cc64883f21fee202ff1d1ac Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?S=C3=A9bastien=20Han?= Date: Wed, 30 Apr 2025 20:35:49 +0200 Subject: [PATCH 5/5] feat(pre-commit): enhance pre-commit hooks with additional checks (#2014) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit # What does this PR do? Add several new pre-commit hooks to improve code quality and security: - no-commit-to-branch: prevent direct commits to protected branches like `main` - check-yaml: validate YAML files - detect-private-key: prevent accidental commit of private keys - requirements-txt-fixer: maintain consistent requirements.txt format and sorting - mixed-line-ending: enforce LF line endings to avoid mixed line endings - check-executables-have-shebangs: ensure executable scripts have shebangs - check-json: validate JSON files - check-shebang-scripts-are-executable: verify shebang scripts are executable - check-symlinks: validate symlinks and report broken ones - check-toml: validate TOML files mainly for pyproject.toml The respective fixes have been included. Signed-off-by: Sébastien Han --- .pre-commit-config.yaml | 12 + CHANGELOG.md | 562 +++++++++--------- docs/make.bat | 70 +-- docs/requirements.txt | 12 +- llama_stack/distribution/common.sh | 2 + llama_stack/distribution/ui/requirements.txt | 8 +- .../models/llama/llama4/tokenizer.model | 0 .../remote/inference/bedrock/__init__.py | 36 +- .../remote/inference/bedrock/config.py | 22 +- tests/verifications/generate_report.py | 2 + 10 files changed, 371 insertions(+), 355 deletions(-) mode change 100755 => 100644 llama_stack/models/llama/llama4/tokenizer.model diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index ff3bc1250..42228d828 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -15,6 +15,18 @@ repos: args: ['--maxkb=1000'] - id: end-of-file-fixer exclude: '^(.*\.svg)$' + - id: no-commit-to-branch + - id: check-yaml + args: ["--unsafe"] + - id: detect-private-key + - id: requirements-txt-fixer + - id: mixed-line-ending + args: [--fix=lf] # Forces to replace line ending by LF (line feed) + - id: check-executables-have-shebangs + - id: check-json + - id: check-shebang-scripts-are-executable + - id: check-symlinks + - id: check-toml - repo: https://github.com/Lucas-C/pre-commit-hooks rev: v1.5.4 diff --git a/CHANGELOG.md b/CHANGELOG.md index 373e6b4fb..d6fc12c2a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -38,10 +38,10 @@ Published on: 2025-04-05T23:13:00Z # v0.2.0 Published on: 2025-04-05T19:04:29Z -## Llama 4 Support - -Checkout more at https://www.llama.com - +## Llama 4 Support + +Checkout more at https://www.llama.com + --- @@ -49,58 +49,58 @@ Checkout more at https://www.llama.com # v0.1.9 Published on: 2025-03-29T00:52:23Z -### Build and Test Agents -* Agents: Entire document context with attachments -* RAG: Documentation with sqlite-vec faiss comparison -* Getting started: Fixes to getting started notebook. - -### Agent Evals and Model Customization -* (**New**) Post-training: Add nemo customizer - -### Better Engineering -* Moved sqlite-vec to non-blocking calls -* Don't return a payload on file delete - - +### Build and Test Agents +* Agents: Entire document context with attachments +* RAG: Documentation with sqlite-vec faiss comparison +* Getting started: Fixes to getting started notebook. + +### Agent Evals and Model Customization +* (**New**) Post-training: Add nemo customizer + +### Better Engineering +* Moved sqlite-vec to non-blocking calls +* Don't return a payload on file delete + + --- # v0.1.8 Published on: 2025-03-24T01:28:50Z -# v0.1.8 Release Notes - -### Build and Test Agents -* Safety: Integrated NVIDIA as a safety provider. -* VectorDB: Added Qdrant as an inline provider. -* Agents: Added support for multiple tool groups in agents. -* Agents: Simplified imports for Agents in client package - - -### Agent Evals and Model Customization -* Introduced DocVQA and IfEval benchmarks. - -### Deploying and Monitoring Agents -* Introduced a Containerfile and image workflow for the Playground. -* Implemented support for Bearer (API Key) authentication. -* Added attribute-based access control for resources. -* Fixes on docker deployments: use --pull always and standardized the default port to 8321 -* Deprecated: /v1/inspect/providers use /v1/providers/ instead - -### Better Engineering -* Consolidated scripts under the ./scripts directory. -* Addressed mypy violations in various modules. -* Added Dependabot scans for Python dependencies. -* Implemented a scheduled workflow to update the changelog automatically. -* Enforced concurrency to reduce CI loads. - - -### New Contributors -* @cmodi-meta made their first contribution in https://github.com/meta-llama/llama-stack/pull/1650 -* @jeffmaury made their first contribution in https://github.com/meta-llama/llama-stack/pull/1671 -* @derekhiggins made their first contribution in https://github.com/meta-llama/llama-stack/pull/1698 -* @Bobbins228 made their first contribution in https://github.com/meta-llama/llama-stack/pull/1745 - +# v0.1.8 Release Notes + +### Build and Test Agents +* Safety: Integrated NVIDIA as a safety provider. +* VectorDB: Added Qdrant as an inline provider. +* Agents: Added support for multiple tool groups in agents. +* Agents: Simplified imports for Agents in client package + + +### Agent Evals and Model Customization +* Introduced DocVQA and IfEval benchmarks. + +### Deploying and Monitoring Agents +* Introduced a Containerfile and image workflow for the Playground. +* Implemented support for Bearer (API Key) authentication. +* Added attribute-based access control for resources. +* Fixes on docker deployments: use --pull always and standardized the default port to 8321 +* Deprecated: /v1/inspect/providers use /v1/providers/ instead + +### Better Engineering +* Consolidated scripts under the ./scripts directory. +* Addressed mypy violations in various modules. +* Added Dependabot scans for Python dependencies. +* Implemented a scheduled workflow to update the changelog automatically. +* Enforced concurrency to reduce CI loads. + + +### New Contributors +* @cmodi-meta made their first contribution in https://github.com/meta-llama/llama-stack/pull/1650 +* @jeffmaury made their first contribution in https://github.com/meta-llama/llama-stack/pull/1671 +* @derekhiggins made their first contribution in https://github.com/meta-llama/llama-stack/pull/1698 +* @Bobbins228 made their first contribution in https://github.com/meta-llama/llama-stack/pull/1745 + **Full Changelog**: https://github.com/meta-llama/llama-stack/compare/v0.1.7...v0.1.8 --- @@ -108,73 +108,73 @@ Published on: 2025-03-24T01:28:50Z # v0.1.7 Published on: 2025-03-14T22:30:51Z -## 0.1.7 Release Notes - -### Build and Test Agents -* Inference: ImageType is now refactored to LlamaStackImageType -* Inference: Added tests to measure TTFT -* Inference: Bring back usage metrics -* Agents: Added endpoint for get agent, list agents and list sessions -* Agents: Automated conversion of type hints in client tool for lite llm format -* Agents: Deprecated ToolResponseMessage in agent.resume API -* Added Provider API for listing and inspecting provider info - -### Agent Evals and Model Customization -* Eval: Added new eval benchmarks Math 500 and BFCL v3 -* Deploy and Monitoring of Agents -* Telemetry: Fix tracing to work across coroutines - -### Better Engineering -* Display code coverage for unit tests -* Updated call sites (inference, tool calls, agents) to move to async non blocking calls -* Unit tests also run on Python 3.11, 3.12, and 3.13 -* Added ollama inference to Integration tests CI -* Improved documentation across examples, testing, CLI, updated providers table ) - - - +## 0.1.7 Release Notes + +### Build and Test Agents +* Inference: ImageType is now refactored to LlamaStackImageType +* Inference: Added tests to measure TTFT +* Inference: Bring back usage metrics +* Agents: Added endpoint for get agent, list agents and list sessions +* Agents: Automated conversion of type hints in client tool for lite llm format +* Agents: Deprecated ToolResponseMessage in agent.resume API +* Added Provider API for listing and inspecting provider info + +### Agent Evals and Model Customization +* Eval: Added new eval benchmarks Math 500 and BFCL v3 +* Deploy and Monitoring of Agents +* Telemetry: Fix tracing to work across coroutines + +### Better Engineering +* Display code coverage for unit tests +* Updated call sites (inference, tool calls, agents) to move to async non blocking calls +* Unit tests also run on Python 3.11, 3.12, and 3.13 +* Added ollama inference to Integration tests CI +* Improved documentation across examples, testing, CLI, updated providers table ) + + + --- # v0.1.6 Published on: 2025-03-08T04:35:08Z -## 0.1.6 Release Notes - -### Build and Test Agents -* Inference: Fixed support for inline vllm provider -* (**New**) Agent: Build & Monitor Agent Workflows with Llama Stack + Anthropic's Best Practice [Notebook](https://github.com/meta-llama/llama-stack/blob/main/docs/notebooks/Llama_Stack_Agent_Workflows.ipynb) -* (**New**) Agent: Revamped agent [documentation](https://llama-stack.readthedocs.io/en/latest/building_applications/agent.html) with more details and examples -* Agent: Unify tools and Python SDK Agents API -* Agent: AsyncAgent Python SDK wrapper supporting async client tool calls -* Agent: Support python functions without @client_tool decorator as client tools -* Agent: deprecation for allow_resume_turn flag, and remove need to specify tool_prompt_format -* VectorIO: MilvusDB support added - -### Agent Evals and Model Customization -* (**New**) Agent: Llama Stack RAG Lifecycle [Notebook](https://github.com/meta-llama/llama-stack/blob/main/docs/notebooks/Llama_Stack_RAG_Lifecycle.ipynb) -* Eval: Documentation for eval, scoring, adding new benchmarks -* Eval: Distribution template to run benchmarks on llama & non-llama models -* Eval: Ability to register new custom LLM-as-judge scoring functions -* (**New**) Looking for contributors for open benchmarks. See [documentation](https://llama-stack.readthedocs.io/en/latest/references/evals_reference/index.html#open-benchmark-contributing-guide) for details. - -### Deploy and Monitoring of Agents -* Better support for different log levels across all components for better monitoring - -### Better Engineering -* Enhance OpenAPI spec to include Error types across all APIs -* Moved all tests to /tests and created unit tests to run on each PR -* Removed all dependencies on llama-models repo - +## 0.1.6 Release Notes + +### Build and Test Agents +* Inference: Fixed support for inline vllm provider +* (**New**) Agent: Build & Monitor Agent Workflows with Llama Stack + Anthropic's Best Practice [Notebook](https://github.com/meta-llama/llama-stack/blob/main/docs/notebooks/Llama_Stack_Agent_Workflows.ipynb) +* (**New**) Agent: Revamped agent [documentation](https://llama-stack.readthedocs.io/en/latest/building_applications/agent.html) with more details and examples +* Agent: Unify tools and Python SDK Agents API +* Agent: AsyncAgent Python SDK wrapper supporting async client tool calls +* Agent: Support python functions without @client_tool decorator as client tools +* Agent: deprecation for allow_resume_turn flag, and remove need to specify tool_prompt_format +* VectorIO: MilvusDB support added + +### Agent Evals and Model Customization +* (**New**) Agent: Llama Stack RAG Lifecycle [Notebook](https://github.com/meta-llama/llama-stack/blob/main/docs/notebooks/Llama_Stack_RAG_Lifecycle.ipynb) +* Eval: Documentation for eval, scoring, adding new benchmarks +* Eval: Distribution template to run benchmarks on llama & non-llama models +* Eval: Ability to register new custom LLM-as-judge scoring functions +* (**New**) Looking for contributors for open benchmarks. See [documentation](https://llama-stack.readthedocs.io/en/latest/references/evals_reference/index.html#open-benchmark-contributing-guide) for details. + +### Deploy and Monitoring of Agents +* Better support for different log levels across all components for better monitoring + +### Better Engineering +* Enhance OpenAPI spec to include Error types across all APIs +* Moved all tests to /tests and created unit tests to run on each PR +* Removed all dependencies on llama-models repo + --- # v0.1.5.1 Published on: 2025-02-28T22:37:44Z -## 0.1.5.1 Release Notes -* Fixes for security risk in https://github.com/meta-llama/llama-stack/pull/1327 and https://github.com/meta-llama/llama-stack/pull/1328 - +## 0.1.5.1 Release Notes +* Fixes for security risk in https://github.com/meta-llama/llama-stack/pull/1327 and https://github.com/meta-llama/llama-stack/pull/1328 + **Full Changelog**: https://github.com/meta-llama/llama-stack/compare/v0.1.5...v0.1.5.1 --- @@ -182,176 +182,176 @@ Published on: 2025-02-28T22:37:44Z # v0.1.5 Published on: 2025-02-28T18:14:01Z -## 0.1.5 Release Notes -### Build Agents -* Inference: Support more non-llama models (openai, anthropic, gemini) -* Inference: Can use the provider's model name in addition to the HF alias -* Inference: Fixed issues with calling tools that weren't specified in the prompt -* RAG: Improved system prompt for RAG and no more need for hard-coded rag-tool calling -* Embeddings: Added support for Nemo retriever embedding models -* Tools: Added support for MCP tools in Ollama Distribution -* Distributions: Added new Groq distribution - -### Customize Models -* Save post-trained checkpoint in SafeTensor format to allow Ollama inference provider to use the post-trained model - -### Monitor agents -* More comprehensive logging of agent steps including client tools -* Telemetry inputs/outputs are now structured and queryable -* Ability to retrieve agents session, turn, step by ids - -### Better Engineering -* Moved executorch Swift code out of this repo into the llama-stack-client-swift repo, similar to kotlin -* Move most logging to use logger instead of prints -* Completed text /chat-completion and /completion tests - +## 0.1.5 Release Notes +### Build Agents +* Inference: Support more non-llama models (openai, anthropic, gemini) +* Inference: Can use the provider's model name in addition to the HF alias +* Inference: Fixed issues with calling tools that weren't specified in the prompt +* RAG: Improved system prompt for RAG and no more need for hard-coded rag-tool calling +* Embeddings: Added support for Nemo retriever embedding models +* Tools: Added support for MCP tools in Ollama Distribution +* Distributions: Added new Groq distribution + +### Customize Models +* Save post-trained checkpoint in SafeTensor format to allow Ollama inference provider to use the post-trained model + +### Monitor agents +* More comprehensive logging of agent steps including client tools +* Telemetry inputs/outputs are now structured and queryable +* Ability to retrieve agents session, turn, step by ids + +### Better Engineering +* Moved executorch Swift code out of this repo into the llama-stack-client-swift repo, similar to kotlin +* Move most logging to use logger instead of prints +* Completed text /chat-completion and /completion tests + --- # v0.1.4 Published on: 2025-02-25T00:02:43Z -## v0.1.4 Release Notes -Here are the key changes coming as part of this release: - -### Build and Test Agents -* Inference: Added support for non-llama models -* Inference: Added option to list all downloaded models and remove models -* Agent: Introduce new api agents.resume_turn to include client side tool execution in the same turn -* Agent: AgentConfig introduces new variable “tool_config” that allows for better tool configuration and system prompt overrides -* Agent: Added logging for agent step start and completion times -* Agent: Added support for logging for tool execution metadata -* Embedding: Updated /inference/embeddings to support asymmetric models, truncation and variable sized outputs -* Embedding: Updated embedding models for Ollama, Together, and Fireworks with available defaults -* VectorIO: Improved performance of sqlite-vec using chunked writes -### Agent Evals and Model Customization -* Deprecated api /eval-tasks. Use /eval/benchmark instead -* Added CPU training support for TorchTune -### Deploy and Monitoring of Agents -* Consistent view of client and server tool calls in telemetry -### Better Engineering -* Made tests more data-driven for consistent evaluation -* Fixed documentation links and improved API reference generation -* Various small fixes for build scripts and system reliability - - +## v0.1.4 Release Notes +Here are the key changes coming as part of this release: + +### Build and Test Agents +* Inference: Added support for non-llama models +* Inference: Added option to list all downloaded models and remove models +* Agent: Introduce new api agents.resume_turn to include client side tool execution in the same turn +* Agent: AgentConfig introduces new variable “tool_config” that allows for better tool configuration and system prompt overrides +* Agent: Added logging for agent step start and completion times +* Agent: Added support for logging for tool execution metadata +* Embedding: Updated /inference/embeddings to support asymmetric models, truncation and variable sized outputs +* Embedding: Updated embedding models for Ollama, Together, and Fireworks with available defaults +* VectorIO: Improved performance of sqlite-vec using chunked writes +### Agent Evals and Model Customization +* Deprecated api /eval-tasks. Use /eval/benchmark instead +* Added CPU training support for TorchTune +### Deploy and Monitoring of Agents +* Consistent view of client and server tool calls in telemetry +### Better Engineering +* Made tests more data-driven for consistent evaluation +* Fixed documentation links and improved API reference generation +* Various small fixes for build scripts and system reliability + + --- # v0.1.3 Published on: 2025-02-14T20:24:32Z -## v0.1.3 Release - -Here are some key changes that are coming as part of this release. - -### Build and Test Agents -Streamlined the initial development experience -- Added support for llama stack run --image-type venv -- Enhanced vector store options with new sqlite-vec provider and improved Qdrant integration -- vLLM improvements for tool calling and logprobs -- Better handling of sporadic code_interpreter tool calls - -### Agent Evals -Better benchmarking and Agent performance assessment -- Renamed eval API /eval-task to /benchmarks -- Improved documentation and notebooks for RAG and evals - -### Deploy and Monitoring of Agents -Improved production readiness -- Added usage metrics collection for chat completions -- CLI improvements for provider information -- Improved error handling and system reliability -- Better model endpoint handling and accessibility -- Improved signal handling on distro server - -### Better Engineering -Infrastructure and code quality improvements -- Faster text-based chat completion tests -- Improved testing for non-streaming agent apis -- Standardized import formatting with ruff linter -- Added conventional commits standard -- Fixed documentation parsing issues - +## v0.1.3 Release + +Here are some key changes that are coming as part of this release. + +### Build and Test Agents +Streamlined the initial development experience +- Added support for llama stack run --image-type venv +- Enhanced vector store options with new sqlite-vec provider and improved Qdrant integration +- vLLM improvements for tool calling and logprobs +- Better handling of sporadic code_interpreter tool calls + +### Agent Evals +Better benchmarking and Agent performance assessment +- Renamed eval API /eval-task to /benchmarks +- Improved documentation and notebooks for RAG and evals + +### Deploy and Monitoring of Agents +Improved production readiness +- Added usage metrics collection for chat completions +- CLI improvements for provider information +- Improved error handling and system reliability +- Better model endpoint handling and accessibility +- Improved signal handling on distro server + +### Better Engineering +Infrastructure and code quality improvements +- Faster text-based chat completion tests +- Improved testing for non-streaming agent apis +- Standardized import formatting with ruff linter +- Added conventional commits standard +- Fixed documentation parsing issues + --- # v0.1.2 Published on: 2025-02-07T22:06:49Z -# TL;DR -- Several stabilizations to development flows after the switch to `uv` -- Migrated CI workflows to new OSS repo - [llama-stack-ops](https://github.com/meta-llama/llama-stack-ops) -- Added automated rebuilds for ReadTheDocs -- Llama Stack server supports HTTPS -- Added system prompt overrides support -- Several bug fixes and improvements to documentation (check out Kubernetes deployment guide by @terrytangyuan ) - +# TL;DR +- Several stabilizations to development flows after the switch to `uv` +- Migrated CI workflows to new OSS repo - [llama-stack-ops](https://github.com/meta-llama/llama-stack-ops) +- Added automated rebuilds for ReadTheDocs +- Llama Stack server supports HTTPS +- Added system prompt overrides support +- Several bug fixes and improvements to documentation (check out Kubernetes deployment guide by @terrytangyuan ) + --- # v0.1.1 Published on: 2025-02-02T02:29:24Z -A bunch of small / big improvements everywhere including support for Windows, switching to `uv` and many provider improvements. - +A bunch of small / big improvements everywhere including support for Windows, switching to `uv` and many provider improvements. + --- # v0.1.0 Published on: 2025-01-24T17:47:47Z -We are excited to announce a stable API release of Llama Stack, which enables developers to build RAG applications and Agents using tools and safety shields, monitor and those agents with telemetry, and evaluate the agent with scoring functions. - -## Context -GenAI application developers need more than just an LLM - they need to integrate tools, connect with their data sources, establish guardrails, and ground the LLM responses effectively. Currently, developers must piece together various tools and APIs, complicating the development lifecycle and increasing costs. The result is that developers are spending more time on these integrations rather than focusing on the application logic itself. The bespoke coupling of components also makes it challenging to adopt state-of-the-art solutions in the rapidly evolving GenAI space. This is particularly difficult for open models like Llama, as best practices are not widely established in the open. - -Llama Stack was created to provide developers with a comprehensive and coherent interface that simplifies AI application development and codifies best practices across the Llama ecosystem. Since our launch in September 2024, we have seen a huge uptick in interest in Llama Stack APIs by both AI developers and from partners building AI services with Llama models. Partners like Nvidia, Fireworks, and Ollama have collaborated with us to develop implementations across various APIs, including inference, memory, and safety. - -With Llama Stack, you can easily build a RAG agent which can also search the web, do complex math, and custom tool calling. You can use telemetry to inspect those traces, and convert telemetry into evals datasets. And with Llama Stack’s plugin architecture and prepackage distributions, you choose to run your agent anywhere - in the cloud with our partners, deploy your own environment using virtualenv, conda, or Docker, operate locally with Ollama, or even run on mobile devices with our SDKs. Llama Stack offers unprecedented flexibility while also simplifying the developer experience. - -## Release -After iterating on the APIs for the last 3 months, today we’re launching a stable release (V1) of the Llama Stack APIs and the corresponding llama-stack server and client packages(v0.1.0). We now have automated tests for providers. These tests make sure that all provider implementations are verified. Developers can now easily and reliably select distributions or providers based on their specific requirements. - -There are example standalone apps in llama-stack-apps. - - -## Key Features of this release - -- **Unified API Layer** - - Inference: Run LLM models - - RAG: Store and retrieve knowledge for RAG - - Agents: Build multi-step agentic workflows - - Tools: Register tools that can be called by the agent - - Safety: Apply content filtering and safety policies - - Evaluation: Test model and agent quality - - Telemetry: Collect and analyze usage data and complex agentic traces - - Post Training ( Coming Soon ): Fine tune models for specific use cases - -- **Rich Provider Ecosystem** - - Local Development: Meta's Reference, Ollama - - Cloud: Fireworks, Together, Nvidia, AWS Bedrock, Groq, Cerebras - - On-premises: Nvidia NIM, vLLM, TGI, Dell-TGI - - On-device: iOS and Android support - -- **Built for Production** - - Pre-packaged distributions for common deployment scenarios - - Backwards compatibility across model versions - - Comprehensive evaluation capabilities - - Full observability and monitoring - -- **Multiple developer interfaces** - - CLI: Command line interface - - Python SDK - - Swift iOS SDK - - Kotlin Android SDK - -- **Sample llama stack applications** - - Python - - iOS - - Android - - +We are excited to announce a stable API release of Llama Stack, which enables developers to build RAG applications and Agents using tools and safety shields, monitor and those agents with telemetry, and evaluate the agent with scoring functions. + +## Context +GenAI application developers need more than just an LLM - they need to integrate tools, connect with their data sources, establish guardrails, and ground the LLM responses effectively. Currently, developers must piece together various tools and APIs, complicating the development lifecycle and increasing costs. The result is that developers are spending more time on these integrations rather than focusing on the application logic itself. The bespoke coupling of components also makes it challenging to adopt state-of-the-art solutions in the rapidly evolving GenAI space. This is particularly difficult for open models like Llama, as best practices are not widely established in the open. + +Llama Stack was created to provide developers with a comprehensive and coherent interface that simplifies AI application development and codifies best practices across the Llama ecosystem. Since our launch in September 2024, we have seen a huge uptick in interest in Llama Stack APIs by both AI developers and from partners building AI services with Llama models. Partners like Nvidia, Fireworks, and Ollama have collaborated with us to develop implementations across various APIs, including inference, memory, and safety. + +With Llama Stack, you can easily build a RAG agent which can also search the web, do complex math, and custom tool calling. You can use telemetry to inspect those traces, and convert telemetry into evals datasets. And with Llama Stack’s plugin architecture and prepackage distributions, you choose to run your agent anywhere - in the cloud with our partners, deploy your own environment using virtualenv, conda, or Docker, operate locally with Ollama, or even run on mobile devices with our SDKs. Llama Stack offers unprecedented flexibility while also simplifying the developer experience. + +## Release +After iterating on the APIs for the last 3 months, today we’re launching a stable release (V1) of the Llama Stack APIs and the corresponding llama-stack server and client packages(v0.1.0). We now have automated tests for providers. These tests make sure that all provider implementations are verified. Developers can now easily and reliably select distributions or providers based on their specific requirements. + +There are example standalone apps in llama-stack-apps. + + +## Key Features of this release + +- **Unified API Layer** + - Inference: Run LLM models + - RAG: Store and retrieve knowledge for RAG + - Agents: Build multi-step agentic workflows + - Tools: Register tools that can be called by the agent + - Safety: Apply content filtering and safety policies + - Evaluation: Test model and agent quality + - Telemetry: Collect and analyze usage data and complex agentic traces + - Post Training ( Coming Soon ): Fine tune models for specific use cases + +- **Rich Provider Ecosystem** + - Local Development: Meta's Reference, Ollama + - Cloud: Fireworks, Together, Nvidia, AWS Bedrock, Groq, Cerebras + - On-premises: Nvidia NIM, vLLM, TGI, Dell-TGI + - On-device: iOS and Android support + +- **Built for Production** + - Pre-packaged distributions for common deployment scenarios + - Backwards compatibility across model versions + - Comprehensive evaluation capabilities + - Full observability and monitoring + +- **Multiple developer interfaces** + - CLI: Command line interface + - Python SDK + - Swift iOS SDK + - Kotlin Android SDK + +- **Sample llama stack applications** + - Python + - iOS + - Android + + --- @@ -365,8 +365,8 @@ Published on: 2025-01-22T22:24:01Z # v0.0.63 Published on: 2024-12-18T07:17:43Z -A small but important bug-fix release to update the URL datatype for the client-SDKs. The issue affected multimodal agentic turns especially. - +A small but important bug-fix release to update the URL datatype for the client-SDKs. The issue affected multimodal agentic turns especially. + **Full Changelog**: https://github.com/meta-llama/llama-stack/compare/v0.0.62...v0.0.63 --- @@ -402,39 +402,39 @@ Published on: 2024-11-22T00:36:09Z # v0.0.53 Published on: 2024-11-20T22:18:00Z -🚀 Initial Release Notes for Llama Stack! - -### Added -- Resource-oriented design for models, shields, memory banks, datasets and eval tasks -- Persistence for registered objects with distribution -- Ability to persist memory banks created for FAISS -- PostgreSQL KVStore implementation -- Environment variable placeholder support in run.yaml files -- Comprehensive Zero-to-Hero notebooks and quickstart guides -- Support for quantized models in Ollama -- Vision models support for Together, Fireworks, Meta-Reference, and Ollama, and vLLM -- Bedrock distribution with safety shields support -- Evals API with task registration and scoring functions -- MMLU and SimpleQA benchmark scoring functions -- Huggingface dataset provider integration for benchmarks -- Support for custom dataset registration from local paths -- Benchmark evaluation CLI tools with visualization tables -- RAG evaluation scoring functions and metrics -- Local persistence for datasets and eval tasks - -### Changed -- Split safety into distinct providers (llama-guard, prompt-guard, code-scanner) -- Changed provider naming convention (`impls` → `inline`, `adapters` → `remote`) -- Updated API signatures for dataset and eval task registration -- Restructured folder organization for providers -- Enhanced Docker build configuration -- Added version prefixing for REST API routes -- Enhanced evaluation task registration workflow -- Improved benchmark evaluation output formatting -- Restructured evals folder organization for better modularity - -### Removed -- `llama stack configure` command - +🚀 Initial Release Notes for Llama Stack! + +### Added +- Resource-oriented design for models, shields, memory banks, datasets and eval tasks +- Persistence for registered objects with distribution +- Ability to persist memory banks created for FAISS +- PostgreSQL KVStore implementation +- Environment variable placeholder support in run.yaml files +- Comprehensive Zero-to-Hero notebooks and quickstart guides +- Support for quantized models in Ollama +- Vision models support for Together, Fireworks, Meta-Reference, and Ollama, and vLLM +- Bedrock distribution with safety shields support +- Evals API with task registration and scoring functions +- MMLU and SimpleQA benchmark scoring functions +- Huggingface dataset provider integration for benchmarks +- Support for custom dataset registration from local paths +- Benchmark evaluation CLI tools with visualization tables +- RAG evaluation scoring functions and metrics +- Local persistence for datasets and eval tasks + +### Changed +- Split safety into distinct providers (llama-guard, prompt-guard, code-scanner) +- Changed provider naming convention (`impls` → `inline`, `adapters` → `remote`) +- Updated API signatures for dataset and eval task registration +- Restructured folder organization for providers +- Enhanced Docker build configuration +- Added version prefixing for REST API routes +- Enhanced evaluation task registration workflow +- Improved benchmark evaluation output formatting +- Restructured evals folder organization for better modularity + +### Removed +- `llama stack configure` command + --- diff --git a/docs/make.bat b/docs/make.bat index 32bb24529..954237b9b 100644 --- a/docs/make.bat +++ b/docs/make.bat @@ -1,35 +1,35 @@ -@ECHO OFF - -pushd %~dp0 - -REM Command file for Sphinx documentation - -if "%SPHINXBUILD%" == "" ( - set SPHINXBUILD=sphinx-build -) -set SOURCEDIR=. -set BUILDDIR=_build - -%SPHINXBUILD% >NUL 2>NUL -if errorlevel 9009 ( - echo. - echo.The 'sphinx-build' command was not found. Make sure you have Sphinx - echo.installed, then set the SPHINXBUILD environment variable to point - echo.to the full path of the 'sphinx-build' executable. Alternatively you - echo.may add the Sphinx directory to PATH. - echo. - echo.If you don't have Sphinx installed, grab it from - echo.https://www.sphinx-doc.org/ - exit /b 1 -) - -if "%1" == "" goto help - -%SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% -goto end - -:help -%SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% - -:end -popd +@ECHO OFF + +pushd %~dp0 + +REM Command file for Sphinx documentation + +if "%SPHINXBUILD%" == "" ( + set SPHINXBUILD=sphinx-build +) +set SOURCEDIR=. +set BUILDDIR=_build + +%SPHINXBUILD% >NUL 2>NUL +if errorlevel 9009 ( + echo. + echo.The 'sphinx-build' command was not found. Make sure you have Sphinx + echo.installed, then set the SPHINXBUILD environment variable to point + echo.to the full path of the 'sphinx-build' executable. Alternatively you + echo.may add the Sphinx directory to PATH. + echo. + echo.If you don't have Sphinx installed, grab it from + echo.https://www.sphinx-doc.org/ + exit /b 1 +) + +if "%1" == "" goto help + +%SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% +goto end + +:help +%SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% + +:end +popd diff --git a/docs/requirements.txt b/docs/requirements.txt index e31d08ff1..6cd45c33b 100644 --- a/docs/requirements.txt +++ b/docs/requirements.txt @@ -1,16 +1,16 @@ -sphinx==8.1.3 -myst-parser linkify +myst-parser -e git+https://github.com/pytorch/pytorch_sphinx_theme.git#egg=pytorch_sphinx_theme -sphinx-rtd-theme>=1.0.0 -sphinx_autobuild +sphinx==8.1.3 sphinx-copybutton sphinx-design sphinx-pdj-theme -sphinx_rtd_dark_mode +sphinx-rtd-theme>=1.0.0 sphinx-tabs +sphinx_autobuild +sphinx_rtd_dark_mode +sphinxcontrib-mermaid sphinxcontrib-openapi sphinxcontrib-redoc -sphinxcontrib-mermaid sphinxcontrib-video tomli diff --git a/llama_stack/distribution/common.sh b/llama_stack/distribution/common.sh index 15220048b..5f764bcca 100755 --- a/llama_stack/distribution/common.sh +++ b/llama_stack/distribution/common.sh @@ -1,3 +1,5 @@ +#!/usr/bin/env bash + # Copyright (c) Meta Platforms, Inc. and affiliates. # All rights reserved. # diff --git a/llama_stack/distribution/ui/requirements.txt b/llama_stack/distribution/ui/requirements.txt index 61d42768d..53a1e7bf3 100644 --- a/llama_stack/distribution/ui/requirements.txt +++ b/llama_stack/distribution/ui/requirements.txt @@ -1,5 +1,5 @@ -streamlit -pandas -llama-stack-client>=0.2.1 -streamlit-option-menu llama-stack>=0.2.1 +llama-stack-client>=0.2.1 +pandas +streamlit +streamlit-option-menu diff --git a/llama_stack/models/llama/llama4/tokenizer.model b/llama_stack/models/llama/llama4/tokenizer.model old mode 100755 new mode 100644 diff --git a/llama_stack/providers/remote/inference/bedrock/__init__.py b/llama_stack/providers/remote/inference/bedrock/__init__.py index e72c6ada9..4d98f4999 100644 --- a/llama_stack/providers/remote/inference/bedrock/__init__.py +++ b/llama_stack/providers/remote/inference/bedrock/__init__.py @@ -1,18 +1,18 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the terms described in the LICENSE file in -# the root directory of this source tree. -from .config import BedrockConfig - - -async def get_adapter_impl(config: BedrockConfig, _deps): - from .bedrock import BedrockInferenceAdapter - - assert isinstance(config, BedrockConfig), f"Unexpected config type: {type(config)}" - - impl = BedrockInferenceAdapter(config) - - await impl.initialize() - - return impl +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. +from .config import BedrockConfig + + +async def get_adapter_impl(config: BedrockConfig, _deps): + from .bedrock import BedrockInferenceAdapter + + assert isinstance(config, BedrockConfig), f"Unexpected config type: {type(config)}" + + impl = BedrockInferenceAdapter(config) + + await impl.initialize() + + return impl diff --git a/llama_stack/providers/remote/inference/bedrock/config.py b/llama_stack/providers/remote/inference/bedrock/config.py index f2e8930be..5961a2f15 100644 --- a/llama_stack/providers/remote/inference/bedrock/config.py +++ b/llama_stack/providers/remote/inference/bedrock/config.py @@ -1,11 +1,11 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the terms described in the LICENSE file in -# the root directory of this source tree. - -from llama_stack.providers.utils.bedrock.config import BedrockBaseConfig - - -class BedrockConfig(BedrockBaseConfig): - pass +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. + +from llama_stack.providers.utils.bedrock.config import BedrockBaseConfig + + +class BedrockConfig(BedrockBaseConfig): + pass diff --git a/tests/verifications/generate_report.py b/tests/verifications/generate_report.py index bdaea3ebf..54d46ef41 100755 --- a/tests/verifications/generate_report.py +++ b/tests/verifications/generate_report.py @@ -1,3 +1,5 @@ +#!/usr/bin/env python3 + # Copyright (c) Meta Platforms, Inc. and affiliates. # All rights reserved. #