distro readmes with model serving instructions (#339)

* readme updates * quantied compose * dell tgi * config update * readme * update model serving readmes * update * update * config
2025-10-06 20:44:58 +00:00 · 2024-10-28 17:47:14 -07:00 · 2024-10-28 17:47:14 -07:00 · ae671eaf7a
commit ae671eaf7a
parent a70a4706fc
8 changed files with 136 additions and 4 deletions
--- a/distributions/meta-reference-gpu/README.md
+++ b/distributions/meta-reference-gpu/README.md
@ -84,3 +84,19 @@ memory:
 ```

 3. Run `docker compose up` with the updated `run.yaml` file.
+
+### Serving a new model
+You may change the `config.model` in `run.yaml` to update the model currently being served by the distribution. Make sure you have the model checkpoint downloaded in your `~/.llama`.
+```
+inference:
+  - provider_id: meta0
+    provider_type: meta-reference
+    config:
+      model: Llama3.2-11B-Vision-Instruct
+      quantization: null
+      torch_seed: null
+      max_seq_len: 4096
+      max_batch_size: 1
+```
+
+Run `llama model list` to see the available models to download, and `llama model download` to download the checkpoints.
--- a/distributions/meta-reference-gpu/run.yaml
+++ b/distributions/meta-reference-gpu/run.yaml
@ -36,6 +36,15 @@ providers:
  - provider_id: meta0
    provider_type: meta-reference
    config: {}
+  # Uncomment to use pgvector
+  # - provider_id: pgvector
+  #   provider_type: remote::pgvector
+  #   config:
+  #     host: 127.0.0.1
+  #     port: 5432
+  #     db: postgres
+  #     user: postgres
+  #     password: mysecretpassword
  agents:
  - provider_id: meta0
    provider_type: meta-reference