forked from phoenix-oss/llama-stack-mirror
update distributions compose/readme (#338)
* readme updates * quantied compose * dell tgi * config update
This commit is contained in:
parent
985ff4d6ce
commit
a70a4706fc
14 changed files with 219 additions and 31 deletions
|
@ -73,7 +73,7 @@ docker run --rm -it -v $HOME/.cache/huggingface:/data -p 5009:5009 --gpus all gh
|
|||
#### Start Llama Stack server pointing to TGI server
|
||||
|
||||
```
|
||||
docker run --network host -it -p 5000:5000 -v ./run.yaml:/root/my-run.yaml --gpus=all llamastack-local-cpu --yaml_config /root/my-run.yaml
|
||||
docker run --network host -it -p 5000:5000 -v ./run.yaml:/root/my-run.yaml --gpus=all llamastack/distribution-tgi --yaml_config /root/my-run.yaml
|
||||
```
|
||||
|
||||
Make sure in you `run.yaml` file, you inference provider is pointing to the correct TGI server endpoint. E.g.
|
||||
|
|
|
@ -38,7 +38,7 @@ services:
|
|||
depends_on:
|
||||
text-generation-inference:
|
||||
condition: service_healthy
|
||||
image: llamastack/llamastack-local-cpu
|
||||
image: llamastack/distribution-tgi
|
||||
network_mode: "host"
|
||||
volumes:
|
||||
- ~/.llama:/root/.llama
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue