mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-07-23 21:04:29 +00:00
add more distro templates (#279)
* verify dockers * together distro verified * readme * fireworks distro * fireworks compose up * fireworks verified
This commit is contained in:
parent
cf27d19dd5
commit
4d2bd2d39e
18 changed files with 265 additions and 42 deletions
|
@ -71,10 +71,10 @@ ollama run <model_id>
|
|||
|
||||
**Via Docker**
|
||||
```
|
||||
docker run --network host -it -p 5000:5000 -v ~/.llama:/root/.llama -v ./ollama-run.yaml:/root/llamastack-run-ollama.yaml --gpus=all llamastack-local-cpu --yaml_config /root/llamastack-run-ollama.yaml
|
||||
docker run --network host -it -p 5000:5000 -v ~/.llama:/root/.llama -v ./gpu/run.yaml:/root/llamastack-run-ollama.yaml --gpus=all distribution-ollama --yaml_config /root/llamastack-run-ollama.yaml
|
||||
```
|
||||
|
||||
Make sure in you `ollama-run.yaml` file, you inference provider is pointing to the correct Ollama endpoint. E.g.
|
||||
Make sure in you `run.yaml` file, you inference provider is pointing to the correct Ollama endpoint. E.g.
|
||||
```
|
||||
inference:
|
||||
- provider_id: ollama0
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
name: distribution-ollama
|
||||
name: ollama
|
||||
distribution_spec:
|
||||
description: Use ollama for running LLM inference
|
||||
providers:
|
||||
|
@ -10,4 +10,4 @@ distribution_spec:
|
|||
safety: meta-reference
|
||||
agents: meta-reference
|
||||
telemetry: meta-reference
|
||||
image_type: conda
|
||||
image_type: docker
|
||||
|
|
|
@ -33,7 +33,7 @@ services:
|
|||
volumes:
|
||||
- ~/.llama:/root/.llama
|
||||
# Link to ollama run.yaml file
|
||||
- ./ollama-run.yaml:/root/llamastack-run-ollama.yaml
|
||||
- ./run.yaml:/root/llamastack-run-ollama.yaml
|
||||
ports:
|
||||
- "5000:5000"
|
||||
# Hack: wait for ollama server to start before starting docker
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue