From b1d119466e02b8cfe7ffc2181814bf20d5a2be4e Mon Sep 17 00:00:00 2001 From: Ashwin Bharambe Date: Sun, 17 Nov 2024 19:33:48 -0800 Subject: [PATCH] Allow setting environment variables from llama stack run and fix ollama --- distributions/ollama/run-with-safety.yaml | 12 ++++----- distributions/ollama/run.yaml | 12 ++++----- .../remote-vllm/run-with-safety.yaml | 8 +++--- distributions/remote-vllm/run.yaml | 8 +++--- distributions/tgi/run-with-safety.yaml | 8 +++--- distributions/tgi/run.yaml | 8 +++--- .../self_hosted_distro/ollama.md | 2 +- llama_stack/cli/stack/build.py | 20 +++++++++----- llama_stack/cli/stack/run.py | 19 +++++++++++++ llama_stack/distribution/build_container.sh | 2 ++ llama_stack/distribution/start_conda_env.sh | 25 ++++++++++++++++- llama_stack/distribution/start_container.sh | 27 +++++++++++++++++-- .../remote/inference/ollama/config.py | 12 ++++----- .../remote/inference/ollama/ollama.py | 2 +- .../providers/remote/inference/tgi/config.py | 2 +- .../providers/remote/inference/vllm/config.py | 1 + llama_stack/templates/ollama/build.yaml | 2 +- llama_stack/templates/ollama/ollama.py | 12 ++++----- llama_stack/templates/template.py | 2 +- 19 files changed, 129 insertions(+), 55 deletions(-) diff --git a/distributions/ollama/run-with-safety.yaml b/distributions/ollama/run-with-safety.yaml index 4582ab800..32eb30eea 100644 --- a/distributions/ollama/run-with-safety.yaml +++ b/distributions/ollama/run-with-safety.yaml @@ -1,20 +1,20 @@ version: '2' -built_at: 2024-11-17 15:19:07.405618 +built_at: 2024-11-17 19:33:00 image_name: ollama -docker_image: llamastack/distribution-ollama:test-0.0.52rc3 +docker_image: null conda_env: null apis: -- telemetry -- agents - memory -- inference +- agents - safety +- inference +- telemetry providers: inference: - provider_id: ollama provider_type: remote::ollama config: - port: ${env.OLLAMA_PORT} + url: ${env.OLLAMA_URL:http://localhost:11434} memory: - provider_id: faiss provider_type: inline::faiss diff --git a/distributions/ollama/run.yaml b/distributions/ollama/run.yaml index b6d411614..110b63b0c 100644 --- a/distributions/ollama/run.yaml +++ b/distributions/ollama/run.yaml @@ -1,20 +1,20 @@ version: '2' -built_at: 2024-11-17 15:19:07.395495 +built_at: 2024-11-17 19:33:00 image_name: ollama -docker_image: llamastack/distribution-ollama:test-0.0.52rc3 +docker_image: null conda_env: null apis: -- telemetry -- agents - memory -- inference +- agents - safety +- inference +- telemetry providers: inference: - provider_id: ollama provider_type: remote::ollama config: - port: ${env.OLLAMA_PORT} + url: ${env.OLLAMA_URL:http://localhost:11434} memory: - provider_id: faiss provider_type: inline::faiss diff --git a/distributions/remote-vllm/run-with-safety.yaml b/distributions/remote-vllm/run-with-safety.yaml index 43eb955d7..4f4cce415 100644 --- a/distributions/remote-vllm/run-with-safety.yaml +++ b/distributions/remote-vllm/run-with-safety.yaml @@ -1,14 +1,14 @@ version: '2' -built_at: 2024-11-17 15:19:07.405727 +built_at: 2024-11-17 19:33:00 image_name: remote-vllm docker_image: llamastack/distribution-remote-vllm:test-0.0.52rc3 conda_env: null apis: -- telemetry -- agents - memory -- inference +- agents - safety +- inference +- telemetry providers: inference: - provider_id: vllm-inference diff --git a/distributions/remote-vllm/run.yaml b/distributions/remote-vllm/run.yaml index 4552e6571..4617a48d2 100644 --- a/distributions/remote-vllm/run.yaml +++ b/distributions/remote-vllm/run.yaml @@ -1,14 +1,14 @@ version: '2' -built_at: 2024-11-17 15:19:07.395327 +built_at: 2024-11-17 19:33:00 image_name: remote-vllm docker_image: llamastack/distribution-remote-vllm:test-0.0.52rc3 conda_env: null apis: -- telemetry -- agents - memory -- inference +- agents - safety +- inference +- telemetry providers: inference: - provider_id: vllm-inference diff --git a/distributions/tgi/run-with-safety.yaml b/distributions/tgi/run-with-safety.yaml index d8a4619f6..35828a37a 100644 --- a/distributions/tgi/run-with-safety.yaml +++ b/distributions/tgi/run-with-safety.yaml @@ -1,14 +1,14 @@ version: '2' -built_at: 2024-11-17 15:19:09.184709 +built_at: 2024-11-17 19:33:00 image_name: tgi docker_image: llamastack/distribution-tgi:test-0.0.52rc3 conda_env: null apis: -- telemetry -- agents - memory -- inference +- agents - safety +- inference +- telemetry providers: inference: - provider_id: tgi-inference diff --git a/distributions/tgi/run.yaml b/distributions/tgi/run.yaml index 1d01c8ea8..fea2f5da8 100644 --- a/distributions/tgi/run.yaml +++ b/distributions/tgi/run.yaml @@ -1,14 +1,14 @@ version: '2' -built_at: 2024-11-17 15:19:09.156305 +built_at: 2024-11-17 19:33:00 image_name: tgi docker_image: llamastack/distribution-tgi:test-0.0.52rc3 conda_env: null apis: -- telemetry -- agents - memory -- inference +- agents - safety +- inference +- telemetry providers: inference: - provider_id: tgi-inference diff --git a/docs/source/getting_started/distributions/self_hosted_distro/ollama.md b/docs/source/getting_started/distributions/self_hosted_distro/ollama.md index 37c5851ab..9f3757301 100644 --- a/docs/source/getting_started/distributions/self_hosted_distro/ollama.md +++ b/docs/source/getting_started/distributions/self_hosted_distro/ollama.md @@ -20,7 +20,7 @@ The following environment variables can be configured: - `LLAMASTACK_PORT`: Port for the Llama Stack distribution server (default: `5001`) - `INFERENCE_MODEL`: Inference model loaded into the TGI server (default: `meta-llama/Llama-3.2-3B-Instruct`) -- `OLLAMA_PORT`: Port of the Ollama server (default: `14343`) +- `OLLAMA_URL`: URL of the Ollama server (default: `http://host.docker.internal:11434`) - `SAFETY_MODEL`: Name of the safety (Llama-Guard) model to use (default: `meta-llama/Llama-Guard-3-1B`) ### Models diff --git a/llama_stack/cli/stack/build.py b/llama_stack/cli/stack/build.py index 94d41cfab..f7631bca3 100644 --- a/llama_stack/cli/stack/build.py +++ b/llama_stack/cli/stack/build.py @@ -217,15 +217,23 @@ class StackBuild(Subcommand): provider_types = [provider_types] for i, provider_type in enumerate(provider_types): - p_spec = Provider( - provider_id=f"{provider_type}-{i}", - provider_type=provider_type, - config={}, - ) + pid = provider_type.split("::")[-1] + config_type = instantiate_class_type( provider_registry[Api(api)][provider_type].config_class ) - p_spec.config = config_type() + if hasattr(config_type, "sample_run_config"): + config = config_type.sample_run_config( + __distro_dir__=f"distributions/{build_config.name}" + ) + else: + config = {} + + p_spec = Provider( + provider_id=f"{pid}-{i}" if len(provider_types) > 1 else pid, + provider_type=provider_type, + config=config, + ) run_config.providers[api].append(p_spec) os.makedirs(build_dir, exist_ok=True) diff --git a/llama_stack/cli/stack/run.py b/llama_stack/cli/stack/run.py index 842703d4c..961d02d5b 100644 --- a/llama_stack/cli/stack/run.py +++ b/llama_stack/cli/stack/run.py @@ -39,6 +39,13 @@ class StackRun(Subcommand): help="Disable IPv6 support", default=False, ) + self.parser.add_argument( + "--env", + action="append", + help="Environment variables to pass to the server in KEY=VALUE format. Can be specified multiple times.", + default=[], + metavar="KEY=VALUE", + ) def _run_stack_run_cmd(self, args: argparse.Namespace) -> None: from pathlib import Path @@ -97,4 +104,16 @@ class StackRun(Subcommand): if args.disable_ipv6: run_args.append("--disable-ipv6") + for env_var in args.env: + if "=" not in env_var: + self.parser.error( + f"Environment variable '{env_var}' must be in KEY=VALUE format" + ) + return + key, value = env_var.split("=", 1) # split on first = only + if not key: + self.parser.error(f"Environment variable '{env_var}' has empty key") + return + run_args.extend(["--env", f"{key}={value}"]) + run_with_pty(run_args) diff --git a/llama_stack/distribution/build_container.sh b/llama_stack/distribution/build_container.sh index 0764fee62..139883618 100755 --- a/llama_stack/distribution/build_container.sh +++ b/llama_stack/distribution/build_container.sh @@ -146,6 +146,8 @@ fi # Set version tag based on PyPI version if [ -n "$TEST_PYPI_VERSION" ]; then version_tag="test-$TEST_PYPI_VERSION" +elif [[ -n "$LLAMA_STACK_DIR" || -n "$LLAMA_MODELS_DIR" ]]; then + version_tag="dev" else URL="https://pypi.org/pypi/llama-stack/json" version_tag=$(curl -s $URL | jq -r '.info.version') diff --git a/llama_stack/distribution/start_conda_env.sh b/llama_stack/distribution/start_conda_env.sh index 3d91564b8..56e921d13 100755 --- a/llama_stack/distribution/start_conda_env.sh +++ b/llama_stack/distribution/start_conda_env.sh @@ -33,10 +33,33 @@ shift port="$1" shift +# Process environment variables from --env arguments +env_vars="" +while [[ $# -gt 0 ]]; do + case "$1" in + --env) + + if [[ -n "$2" ]]; then + # collect environment variables so we can set them after activating the conda env + env_vars="$env_vars $2" + shift 2 + else + echo -e "${RED}Error: --env requires a KEY=VALUE argument${NC}" >&2 + exit 1 + fi + ;; + *) + shift + ;; + esac +done + eval "$(conda shell.bash hook)" conda deactivate && conda activate "$env_name" -$CONDA_PREFIX/bin/python \ +set -x +$env_vars \ + $CONDA_PREFIX/bin/python \ -m llama_stack.distribution.server.server \ --yaml_config "$yaml_config" \ --port "$port" "$@" diff --git a/llama_stack/distribution/start_container.sh b/llama_stack/distribution/start_container.sh index 1efb76fb9..c56606826 100755 --- a/llama_stack/distribution/start_container.sh +++ b/llama_stack/distribution/start_container.sh @@ -31,7 +31,7 @@ if [ $# -lt 3 ]; then fi build_name="$1" -docker_image="distribution-$build_name" +docker_image="localhost/distribution-$build_name" shift yaml_config="$1" @@ -40,6 +40,26 @@ shift port="$1" shift +# Process environment variables from --env arguments +env_vars="" +while [[ $# -gt 0 ]]; do + case "$1" in + --env) + echo "env = $2" + if [[ -n "$2" ]]; then + env_vars="$env_vars -e $2" + shift 2 + else + echo -e "${RED}Error: --env requires a KEY=VALUE argument${NC}" >&2 + exit 1 + fi + ;; + *) + shift + ;; + esac +done + set -x if command -v selinuxenabled &> /dev/null && selinuxenabled; then @@ -59,15 +79,18 @@ fi version_tag="latest" if [ -n "$PYPI_VERSION" ]; then version_tag="$PYPI_VERSION" +elif [ -n "$LLAMA_STACK_DIR" ]; then + version_tag="dev" elif [ -n "$TEST_PYPI_VERSION" ]; then version_tag="test-$TEST_PYPI_VERSION" fi $DOCKER_BINARY run $DOCKER_OPTS -it \ -p $port:$port \ + $env_vars \ -v "$yaml_config:/app/config.yaml" \ $mounts \ $docker_image:$version_tag \ python -m llama_stack.distribution.server.server \ --yaml_config /app/config.yaml \ - --port $port "$@" + --port "$port" diff --git a/llama_stack/providers/remote/inference/ollama/config.py b/llama_stack/providers/remote/inference/ollama/config.py index 09900ecf2..ad16cac62 100644 --- a/llama_stack/providers/remote/inference/ollama/config.py +++ b/llama_stack/providers/remote/inference/ollama/config.py @@ -6,17 +6,17 @@ from typing import Any, Dict -from llama_stack.distribution.datatypes import RemoteProviderConfig +from pydantic import BaseModel -DEFAULT_OLLAMA_PORT = 11434 +DEFAULT_OLLAMA_URL = "http://localhost:11434" -class OllamaImplConfig(RemoteProviderConfig): - port: int +class OllamaImplConfig(BaseModel): + url: str = DEFAULT_OLLAMA_URL @classmethod def sample_run_config( - cls, port_str: str = str(DEFAULT_OLLAMA_PORT) + cls, url: str = "${env.OLLAMA_URL:http://localhost:11434}", **kwargs ) -> Dict[str, Any]: - return {"port": port_str} + return {"url": url} diff --git a/llama_stack/providers/remote/inference/ollama/ollama.py b/llama_stack/providers/remote/inference/ollama/ollama.py index 3b3f3868b..27bf0088e 100644 --- a/llama_stack/providers/remote/inference/ollama/ollama.py +++ b/llama_stack/providers/remote/inference/ollama/ollama.py @@ -82,7 +82,7 @@ class OllamaInferenceAdapter(Inference, ModelsProtocolPrivate): return AsyncClient(host=self.url) async def initialize(self) -> None: - print("Initializing Ollama, checking connectivity to server...") + print(f"checking connectivity to Ollama at `{self.url}`...") try: await self.client.ps() except httpx.ConnectError as e: diff --git a/llama_stack/providers/remote/inference/tgi/config.py b/llama_stack/providers/remote/inference/tgi/config.py index 4441b1352..55bda4179 100644 --- a/llama_stack/providers/remote/inference/tgi/config.py +++ b/llama_stack/providers/remote/inference/tgi/config.py @@ -21,7 +21,7 @@ class TGIImplConfig(BaseModel): ) @classmethod - def sample_run_config(cls, url: str = "${env.TGI_URL}"): + def sample_run_config(cls, url: str = "${env.TGI_URL}", **kwargs): return { "url": url, } diff --git a/llama_stack/providers/remote/inference/vllm/config.py b/llama_stack/providers/remote/inference/vllm/config.py index e1d932c87..a3a4c6930 100644 --- a/llama_stack/providers/remote/inference/vllm/config.py +++ b/llama_stack/providers/remote/inference/vllm/config.py @@ -29,6 +29,7 @@ class VLLMInferenceAdapterConfig(BaseModel): def sample_run_config( cls, url: str = "${env.VLLM_URL}", + **kwargs, ): return { "url": url, diff --git a/llama_stack/templates/ollama/build.yaml b/llama_stack/templates/ollama/build.yaml index 45ab2a6e5..106449309 100644 --- a/llama_stack/templates/ollama/build.yaml +++ b/llama_stack/templates/ollama/build.yaml @@ -2,7 +2,7 @@ version: '2' name: ollama distribution_spec: description: Use (an external) Ollama server for running LLM inference - docker_image: llamastack/distribution-ollama:test-0.0.52rc3 + docker_image: null providers: inference: - remote::ollama diff --git a/llama_stack/templates/ollama/ollama.py b/llama_stack/templates/ollama/ollama.py index 0c45f8dc1..deb254c80 100644 --- a/llama_stack/templates/ollama/ollama.py +++ b/llama_stack/templates/ollama/ollama.py @@ -23,9 +23,7 @@ def get_distribution_template() -> DistributionTemplate: inference_provider = Provider( provider_id="ollama", provider_type="remote::ollama", - config=OllamaImplConfig.sample_run_config( - port_str="${env.OLLAMA_PORT}", - ), + config=OllamaImplConfig.sample_run_config(), ) inference_model = ModelInput( @@ -41,7 +39,7 @@ def get_distribution_template() -> DistributionTemplate: name="ollama", distro_type="self_hosted", description="Use (an external) Ollama server for running LLM inference", - docker_image="llamastack/distribution-ollama:test-0.0.52rc3", + docker_image=None, template_path=Path(__file__).parent / "doc_template.md", providers=providers, default_models=[inference_model, safety_model], @@ -74,9 +72,9 @@ def get_distribution_template() -> DistributionTemplate: "meta-llama/Llama-3.2-3B-Instruct", "Inference model loaded into the TGI server", ), - "OLLAMA_PORT": ( - "14343", - "Port of the Ollama server", + "OLLAMA_URL": ( + "http://host.docker.internal:11434", + "URL of the Ollama server", ), "SAFETY_MODEL": ( "meta-llama/Llama-Guard-3-1B", diff --git a/llama_stack/templates/template.py b/llama_stack/templates/template.py index 2074f19c3..feddadb9a 100644 --- a/llama_stack/templates/template.py +++ b/llama_stack/templates/template.py @@ -87,7 +87,7 @@ class RunConfigSettings(BaseModel): return StackRunConfig( image_name=name, docker_image=docker_image, - built_at=datetime.now(), + built_at=datetime.now().strftime("%Y-%m-%d %H:%M"), apis=list(apis), providers=provider_configs, metadata_store=SqliteKVStoreConfig.sample_run_config(