update distributions compose/readme (#338)

* readme updates * quantied compose * dell tgi * config update
2024-10-28 16:34:43 -07:00 · 2024-10-28 16:34:43 -07:00 · a70a4706fc
commit a70a4706fc
parent 985ff4d6ce
14 changed files with 219 additions and 31 deletions
--- a/distributions/tgi/README.md
+++ b/distributions/tgi/README.md
@ -73,7 +73,7 @@ docker run --rm -it -v $HOME/.cache/huggingface:/data -p 5009:5009 --gpus all gh
 #### Start Llama Stack server pointing to TGI server

 ```
-docker run --network host -it -p 5000:5000 -v ./run.yaml:/root/my-run.yaml --gpus=all llamastack-local-cpu --yaml_config /root/my-run.yaml
+docker run --network host -it -p 5000:5000 -v ./run.yaml:/root/my-run.yaml --gpus=all llamastack/distribution-tgi --yaml_config /root/my-run.yaml
 ```

 Make sure in you `run.yaml` file, you inference provider is pointing to the correct TGI server endpoint. E.g.
--- a/distributions/tgi/gpu/compose.yaml
+++ b/distributions/tgi/gpu/compose.yaml
@ -38,7 +38,7 @@ services:
    depends_on:
      text-generation-inference:
        condition: service_healthy
-    image: llamastack/llamastack-local-cpu
+    image: llamastack/distribution-tgi
    network_mode: "host"
    volumes:
      - ~/.llama:/root/.llama