diff --git a/distributions/fireworks/README.md b/distributions/fireworks/README.md new file mode 100644 index 000000000..fcf74d809 --- /dev/null +++ b/distributions/fireworks/README.md @@ -0,0 +1,55 @@ +# Fireworks Distribution + +The `llamastack/distribution-` distribution consists of the following provider configurations. + + +| **API** | **Inference** | **Agents** | **Memory** | **Safety** | **Telemetry** | +|----------------- |--------------- |---------------- |-------------------------------------------------- |---------------- |---------------- | +| **Provider(s)** | remote::fireworks | meta-reference | meta-reference | meta-reference | meta-reference | + + +### Start the Distribution (Single Node CPU) + +> [!NOTE] +> This assumes you have an hosted endpoint at Fireworks with API Key. + +``` +$ cd llama-stack/distribution/fireworks +$ ls +compose.yaml run.yaml +$ docker compose up +``` + +Make sure in you `run.yaml` file, you inference provider is pointing to the correct Fireworks URL server endpoint. E.g. +``` +inference: + - provider_id: fireworks + provider_type: remote::fireworks + config: + url: https://api.fireworks.ai/inferenc + api_key: +``` + +### (Alternative) TGI server + llama stack run (Single Node GPU) + +``` +docker run --network host -it -p 5000:5000 -v ./run.yaml:/root/my-run.yaml --gpus=all llamastack/distribution-fireworks --yaml_config /root/my-run.yaml +``` + +Make sure in you `run.yaml` file, you inference provider is pointing to the correct Fireworks URL server endpoint. E.g. +``` +inference: + - provider_id: fireworks + provider_type: remote::fireworks + config: + url: https://api.fireworks.ai/inference + api_key: +``` + +**Via Conda** + +```bash +llama stack build --config ./build.yaml +# -- modify run.yaml to a valid Fireworks server endpoint +llama stack run ./run.yaml +``` diff --git a/distributions/fireworks/build.yaml b/distributions/fireworks/build.yaml index 831643ff1..2e5cf0753 100644 --- a/distributions/fireworks/build.yaml +++ b/distributions/fireworks/build.yaml @@ -7,4 +7,4 @@ distribution_spec: safety: meta-reference agents: meta-reference telemetry: meta-reference -image_type: conda +image_type: docker diff --git a/distributions/fireworks/compose.yaml b/distributions/fireworks/compose.yaml new file mode 100644 index 000000000..e69de29bb diff --git a/distributions/fireworks/run.yaml b/distributions/fireworks/run.yaml new file mode 100644 index 000000000..c48b0cb7b --- /dev/null +++ b/distributions/fireworks/run.yaml @@ -0,0 +1,46 @@ +version: '2' +built_at: '2024-10-08T17:40:45.325529' +image_name: local +docker_image: null +conda_env: local +apis: +- shields +- agents +- models +- memory +- memory_banks +- inference +- safety +providers: + inference: + - provider_id: fireworks0 + provider_type: remote::fireworks + config: + url: https://api.fireworks.ai/inference + safety: + - provider_id: meta0 + provider_type: meta-reference + config: + llama_guard_shield: + model: Llama-Guard-3-1B + excluded_categories: [] + disable_input_check: false + disable_output_check: false + prompt_guard_shield: + model: Prompt-Guard-86M + memory: + - provider_id: meta0 + provider_type: meta-reference + config: {} + agents: + - provider_id: meta0 + provider_type: meta-reference + config: + persistence_store: + namespace: null + type: sqlite + db_path: ~/.llama/runtime/kvstore.db + telemetry: + - provider_id: meta0 + provider_type: meta-reference + config: {} diff --git a/distributions/together/README.md b/distributions/together/README.md index f5e591004..227c7a450 100644 --- a/distributions/together/README.md +++ b/distributions/together/README.md @@ -23,13 +23,14 @@ compose.yaml run.yaml $ docker compose up ``` -Replace in `run.yaml` file with your TGI endpoint. +Make sure in you `run.yaml` file, you inference provider is pointing to the correct Together URL server endpoint. E.g. ``` inference: - - provider_id: tgi0 - provider_type: remote::tgi + - provider_id: together + provider_type: remote::together config: - url: + url: https://api.together.xyz/v1 + api_key: ``` ### (Alternative) TGI server + llama stack run (Single Node GPU)