diff --git a/docs/source/distributions/building_distro.md b/docs/source/distributions/building_distro.md index 0dbabf8aa..521071cc6 100644 --- a/docs/source/distributions/building_distro.md +++ b/docs/source/distributions/building_distro.md @@ -260,7 +260,41 @@ Containerfile created successfully in /tmp/tmp.viA3a3Rdsg/ContainerfileFROM pyth You can now edit ~/meta-llama/llama-stack/tmp/configs/ollama-run.yaml and run `llama stack run ~/meta-llama/llama-stack/tmp/configs/ollama-run.yaml` ``` -After this step is successful, you should be able to find the built container image and test it with `llama stack run `. +Now set some environment variables for the inference model ID and Llama Stack Port and create a local directory to mount into the container's file system. +``` +export INFERENCE_MODEL="llama3.2:3b" +export LLAMA_STACK_PORT=8321 +mkdir -p ~/.llama +``` + +After this step is successful, you should be able to find the built container image and test it with the below Docker command: + +``` +docker run -d \ + -p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \ + -v ~/.llama:/root/.llama \ + localhost/distribution-ollama:dev \ + --port $LLAMA_STACK_PORT \ + --env INFERENCE_MODEL=$INFERENCE_MODEL \ + --env OLLAMA_URL=http://host.docker.internal:11434 +``` + +Here are the docker flags and their uses: + +* `-d`: Runs the container in the detached mode as a background process + +* `-p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT`: Maps the container port to the host port for accessing the server + +* `-v ~/.llama:/root/.llama`: Mounts the local .llama directory to persist configurations and data + +* `localhost/distribution-ollama:dev`: The name and tag of the container image to run + +* `--port $LLAMA_STACK_PORT`: Port number for the server to listen on + +* `--env INFERENCE_MODEL=$INFERENCE_MODEL`: Sets the model to use for inference + +* `--env OLLAMA_URL=http://host.docker.internal:11434`: Configures the URL for the Ollama service + ::: :::: diff --git a/llama_stack/distribution/start_stack.sh b/llama_stack/distribution/start_stack.sh index 74a3a89ac..85bfceec4 100755 --- a/llama_stack/distribution/start_stack.sh +++ b/llama_stack/distribution/start_stack.sh @@ -7,10 +7,6 @@ # the root directory of this source tree. -CONTAINER_BINARY=${CONTAINER_BINARY:-docker} -CONTAINER_OPTS=${CONTAINER_OPTS:-} -LLAMA_CHECKPOINT_DIR=${LLAMA_CHECKPOINT_DIR:-} -LLAMA_STACK_DIR=${LLAMA_STACK_DIR:-} TEST_PYPI_VERSION=${TEST_PYPI_VERSION:-} PYPI_VERSION=${PYPI_VERSION:-} VIRTUAL_ENV=${VIRTUAL_ENV:-} @@ -132,31 +128,7 @@ if [[ "$env_type" == "venv" || "$env_type" == "conda" ]]; then $env_vars \ $other_args elif [[ "$env_type" == "container" ]]; then - # Determine the internal container address based on container runtime - if [ "$CONTAINER_BINARY" = "docker" ]; then - internal_host="host.docker.internal" - elif [ "$CONTAINER_BINARY" = "podman" ]; then - internal_host="host.containers.internal" - else - internal_host="localhost" - fi - echo -e "${RED}Warning: Llama Stack no longer supports running Container.${NC}" - echo -e "Please use one of the following alternatives:" - echo -e "1. Use venv or conda environments" - echo -e "2. Run the container directly with Docker/Podman" - echo -e "\nExample $CONTAINER_BINARY command for ollama distribution:" - echo -e "$CONTAINER_BINARY run \\" - echo -e " -it \\" - echo -e " --network host \\" - echo -e " -p $port:$port \\" - echo -e " -v :/app/run.yaml \\" - echo -e " --entrypoint python \\" - echo -e " localhost/distribution-ollama: \\" - echo -e " -m llama_stack.distribution.server.server \\" - echo -e " --config /app/run.yaml \\" - echo -e " --env INFERENCE_MODEL=\"llama3.2:3b\" \\" - echo -e " --env LLAMA_STACK_PORT= \\" - echo -e " --env OLLAMA_URL=\"http://$internal_host:11434\"" - echo -e "\nExiting..." + echo -e "${RED}Warning: Llama Stack no longer supports running Containers via the 'llama stack run' command.${NC}" + echo -e "Please refer to the documentation for more information: https://llama-stack.readthedocs.io/en/latest/distributions/building_distro.html#llama-stack-build" exit 1 fi