mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-07-21 20:18:52 +00:00
update distributions compose/readme (#338)
* readme updates * quantied compose * dell tgi * config update
This commit is contained in:
parent
985ff4d6ce
commit
a70a4706fc
14 changed files with 219 additions and 31 deletions
|
@ -50,7 +50,7 @@ compose.yaml run.yaml
|
|||
$ docker compose up
|
||||
```
|
||||
|
||||
### (Alternative) ollama run + llama stack Run
|
||||
### (Alternative) ollama run + llama stack run
|
||||
|
||||
If you wish to separately spin up a Ollama server, and connect with Llama Stack, you may use the following commands.
|
||||
|
||||
|
@ -71,7 +71,7 @@ ollama run <model_id>
|
|||
|
||||
**Via Docker**
|
||||
```
|
||||
docker run --network host -it -p 5000:5000 -v ~/.llama:/root/.llama -v ./gpu/run.yaml:/root/llamastack-run-ollama.yaml --gpus=all distribution-ollama --yaml_config /root/llamastack-run-ollama.yaml
|
||||
docker run --network host -it -p 5000:5000 -v ~/.llama:/root/.llama -v ./gpu/run.yaml:/root/llamastack-run-ollama.yaml --gpus=all llamastack/distribution-ollama --yaml_config /root/llamastack-run-ollama.yaml
|
||||
```
|
||||
|
||||
Make sure in you `run.yaml` file, you inference provider is pointing to the correct Ollama endpoint. E.g.
|
||||
|
|
|
@ -10,7 +10,7 @@ services:
|
|||
llamastack:
|
||||
depends_on:
|
||||
- ollama
|
||||
image: llamastack/llamastack-local-cpu
|
||||
image: llamastack/distribution-ollama
|
||||
network_mode: "host"
|
||||
volumes:
|
||||
- ~/.llama:/root/.llama
|
||||
|
|
|
@ -25,10 +25,10 @@ services:
|
|||
# reservation.
|
||||
capabilities: [gpu]
|
||||
runtime: nvidia
|
||||
llamastack-local-cpu:
|
||||
llamastack:
|
||||
depends_on:
|
||||
- ollama
|
||||
image: llamastack/llamastack-local-cpu
|
||||
image: llamastack/distribution-ollama
|
||||
network_mode: "host"
|
||||
volumes:
|
||||
- ~/.llama:/root/.llama
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue