mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-07-29 07:14:20 +00:00
Merge 79805c607f
into 09abdb0a37
This commit is contained in:
commit
d82aa39336
10 changed files with 38 additions and 25 deletions
|
@ -278,7 +278,7 @@ After this step is successful, you should be able to find the built container im
|
|||
```
|
||||
docker run -d \
|
||||
-p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
|
||||
-v ~/.llama:/root/.llama \
|
||||
-v ~/.llama:/.llama \
|
||||
localhost/distribution-ollama:dev \
|
||||
--port $LLAMA_STACK_PORT \
|
||||
--env INFERENCE_MODEL=$INFERENCE_MODEL \
|
||||
|
@ -291,7 +291,7 @@ Here are the docker flags and their uses:
|
|||
|
||||
* `-p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT`: Maps the container port to the host port for accessing the server
|
||||
|
||||
* `-v ~/.llama:/root/.llama`: Mounts the local .llama directory to persist configurations and data
|
||||
* `-v ~/.llama:/.llama`: Mounts the local .llama directory to persist configurations and data
|
||||
|
||||
* `localhost/distribution-ollama:dev`: The name and tag of the container image to run
|
||||
|
||||
|
|
|
@ -68,9 +68,9 @@ LLAMA_STACK_PORT=5001
|
|||
docker run \
|
||||
-it \
|
||||
-p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
|
||||
-v ./run.yaml:/root/my-run.yaml \
|
||||
-v ./run.yaml:/.llama/my-run.yaml \
|
||||
llamastack/distribution-watsonx \
|
||||
--config /root/my-run.yaml \
|
||||
--config /.llama/my-run.yaml \
|
||||
--port $LLAMA_STACK_PORT \
|
||||
--env WATSONX_API_KEY=$WATSONX_API_KEY \
|
||||
--env WATSONX_PROJECT_ID=$WATSONX_PROJECT_ID \
|
||||
|
|
|
@ -65,7 +65,7 @@ registry.dell.huggingface.co/enterprise-dell-inference-meta-llama-meta-llama-3.1
|
|||
#### Start Llama Stack server pointing to TGI server
|
||||
|
||||
```
|
||||
docker run --pull always --network host -it -p 8321:8321 -v ./run.yaml:/root/my-run.yaml --gpus=all llamastack/distribution-tgi --yaml_config /root/my-run.yaml
|
||||
docker run --pull always --network host -it -p 8321:8321 -v ./run.yaml:/.llama/my-run.yaml --gpus=all llamastack/distribution-tgi --yaml_config /.llama/my-run.yaml
|
||||
```
|
||||
|
||||
Make sure in you `run.yaml` file, you inference provider is pointing to the correct TGI server endpoint. E.g.
|
||||
|
|
|
@ -125,7 +125,7 @@ docker run -it \
|
|||
--pull always \
|
||||
--network host \
|
||||
-p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
|
||||
-v $HOME/.llama:/root/.llama \
|
||||
-v $HOME/.llama:/.llama \
|
||||
# NOTE: mount the llama-stack / llama-model directories if testing local changes else not needed
|
||||
-v /home/hjshah/git/llama-stack:/app/llama-stack-source -v /home/hjshah/git/llama-models:/app/llama-models-source \
|
||||
# localhost/distribution-dell:dev if building / testing locally
|
||||
|
@ -152,10 +152,10 @@ docker run \
|
|||
-it \
|
||||
--pull always \
|
||||
-p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
|
||||
-v $HOME/.llama:/root/.llama \
|
||||
-v ./llama_stack/templates/tgi/run-with-safety.yaml:/root/my-run.yaml \
|
||||
-v $HOME/.llama:/.llama \
|
||||
-v ./llama_stack/templates/tgi/run-with-safety.yaml:/.llama/my-run.yaml \
|
||||
llamastack/distribution-dell \
|
||||
--config /root/my-run.yaml \
|
||||
--config /.llama/my-run.yaml \
|
||||
--port $LLAMA_STACK_PORT \
|
||||
--env INFERENCE_MODEL=$INFERENCE_MODEL \
|
||||
--env DEH_URL=$DEH_URL \
|
||||
|
|
|
@ -83,7 +83,7 @@ docker run \
|
|||
--pull always \
|
||||
--gpu all \
|
||||
-p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
|
||||
-v ~/.llama:/root/.llama \
|
||||
-v ~/.llama:/.llama \
|
||||
llamastack/distribution-meta-reference-gpu \
|
||||
--port $LLAMA_STACK_PORT \
|
||||
--env INFERENCE_MODEL=meta-llama/Llama-3.2-3B-Instruct
|
||||
|
@ -97,7 +97,7 @@ docker run \
|
|||
--pull always \
|
||||
--gpu all \
|
||||
-p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
|
||||
-v ~/.llama:/root/.llama \
|
||||
-v ~/.llama:/.llama \
|
||||
llamastack/distribution-meta-reference-gpu \
|
||||
--port $LLAMA_STACK_PORT \
|
||||
--env INFERENCE_MODEL=meta-llama/Llama-3.2-3B-Instruct \
|
||||
|
|
|
@ -145,9 +145,9 @@ docker run \
|
|||
-it \
|
||||
--pull always \
|
||||
-p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
|
||||
-v ./run.yaml:/root/my-run.yaml \
|
||||
-v ./run.yaml:/.llama/my-run.yaml \
|
||||
llamastack/distribution-nvidia \
|
||||
--config /root/my-run.yaml \
|
||||
--config /.llama/my-run.yaml \
|
||||
--port $LLAMA_STACK_PORT \
|
||||
--env NVIDIA_API_KEY=$NVIDIA_API_KEY
|
||||
```
|
||||
|
|
|
@ -89,7 +89,7 @@ following command:
|
|||
docker run -it \
|
||||
--pull always \
|
||||
-p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
|
||||
-v ~/.llama:/root/.llama \
|
||||
-v ~/.llama:/.llama \
|
||||
llamastack/distribution-starter \
|
||||
--port $LLAMA_STACK_PORT \
|
||||
--env OLLAMA_URL=http://host.docker.internal:11434
|
||||
|
@ -109,7 +109,7 @@ Linux users having issues running the above command should instead try the follo
|
|||
docker run -it \
|
||||
--pull always \
|
||||
-p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
|
||||
-v ~/.llama:/root/.llama \
|
||||
-v ~/.llama:/.llama \
|
||||
--network=host \
|
||||
llamastack/distribution-starter \
|
||||
--port $LLAMA_STACK_PORT \
|
||||
|
|
|
@ -333,6 +333,25 @@ fi
|
|||
RUN pip uninstall -y uv
|
||||
EOF
|
||||
|
||||
# Add non-root user setup before entrypoint
|
||||
add_to_container << EOF
|
||||
|
||||
# Create group with GID 1001 and user with UID 1001
|
||||
RUN groupadd -g 1001 appgroup && useradd -u 1001 -g appgroup -M appuser
|
||||
|
||||
# Create necessary directories with appropriate permissions for UID 1001
|
||||
RUN mkdir -p /.llama /.cache && chown -R 1001:1001 /.llama /.cache && chmod -R 775 /.llama /.cache && chmod -R g+w /app
|
||||
|
||||
# Set the Llama Stack config directory environment variable to use /.llama
|
||||
ENV LLAMA_STACK_CONFIG_DIR=/.llama
|
||||
|
||||
# This prevents dual storage while keeping /app as working directory for CI compatibility
|
||||
ENV HOME=/
|
||||
|
||||
# Switch to non-root user (UID 1001 directly)
|
||||
USER 1001
|
||||
EOF
|
||||
|
||||
# If a run config is provided, we use the --config flag
|
||||
if [[ -n "$run_config" ]]; then
|
||||
add_to_container << EOF
|
||||
|
@ -345,12 +364,6 @@ ENTRYPOINT ["python", "-m", "llama_stack.distribution.server.server", "--templat
|
|||
EOF
|
||||
fi
|
||||
|
||||
# Add other require item commands genearic to all containers
|
||||
add_to_container << EOF
|
||||
|
||||
RUN mkdir -p /.llama /.cache && chmod -R g+rw /app /.llama /.cache
|
||||
EOF
|
||||
|
||||
printf "Containerfile created successfully in %s/Containerfile\n\n" "$TEMP_DIR"
|
||||
cat "$TEMP_DIR"/Containerfile
|
||||
printf "\n"
|
||||
|
|
|
@ -71,7 +71,7 @@ docker run \
|
|||
--pull always \
|
||||
--gpu all \
|
||||
-p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
|
||||
-v ~/.llama:/root/.llama \
|
||||
-v ~/.llama:/.llama \
|
||||
llamastack/distribution-{{ name }} \
|
||||
--port $LLAMA_STACK_PORT \
|
||||
--env INFERENCE_MODEL=meta-llama/Llama-3.2-3B-Instruct
|
||||
|
@ -85,7 +85,7 @@ docker run \
|
|||
--pull always \
|
||||
--gpu all \
|
||||
-p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
|
||||
-v ~/.llama:/root/.llama \
|
||||
-v ~/.llama:/.llama \
|
||||
llamastack/distribution-{{ name }} \
|
||||
--port $LLAMA_STACK_PORT \
|
||||
--env INFERENCE_MODEL=meta-llama/Llama-3.2-3B-Instruct \
|
||||
|
|
|
@ -117,9 +117,9 @@ docker run \
|
|||
-it \
|
||||
--pull always \
|
||||
-p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
|
||||
-v ./run.yaml:/root/my-run.yaml \
|
||||
-v ./run.yaml:/.llama/my-run.yaml \
|
||||
llamastack/distribution-{{ name }} \
|
||||
--config /root/my-run.yaml \
|
||||
--config /.llama/my-run.yaml \
|
||||
--port $LLAMA_STACK_PORT \
|
||||
--env NVIDIA_API_KEY=$NVIDIA_API_KEY
|
||||
```
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue