Fix issue when generating distros (#755)

Addressed comment https://github.com/meta-llama/llama-stack/pull/723#issuecomment-2581902075. cc @yanxi0830 I am not 100% sure if the diff is correct though but this is the result of running `python llama_stack/scripts/distro_codegen.py`. --------- Signed-off-by: Yuan Tang <terrytangyuan@gmail.com>
2025-01-15 08:34:08 -05:00 · 2025-01-15 08:34:08 -05:00 · 300e6e2702
commit 300e6e2702
parent 52a21ce78f
3 changed files with 133 additions and 143 deletions
--- a/distributions/dependencies.json
+++ b/distributions/dependencies.json
@ -1,9 +1,9 @@
 {
-  "hf-serverless": [
+  "bedrock": [
    "aiohttp",
    "aiosqlite",
    "autoevals",
    "blobfile",
    "boto3",
    "chardet",
    "chromadb-client",
    "datasets",
@ -11,103 +11,6 @@
    "fastapi",
    "fire",
    "httpx",
    "huggingface_hub",
    "matplotlib",
    "nltk",
    "numpy",
    "openai",
    "opentelemetry-exporter-otlp-proto-http",
    "opentelemetry-sdk",
    "pandas",
    "pillow",
    "psycopg2-binary",
    "pypdf",
    "redis",
    "requests",
    "scikit-learn",
    "scipy",
    "sentencepiece",
    "tqdm",
    "transformers",
    "uvicorn",
    "sentence-transformers --no-deps",
    "torch --index-url https://download.pytorch.org/whl/cpu"
  ],
  "together": [
    "aiosqlite",
    "autoevals",
    "blobfile",
    "chardet",
    "chromadb-client",
    "datasets",
    "faiss-cpu",
    "fastapi",
    "fire",
    "httpx",
    "matplotlib",
    "nltk",
    "numpy",
    "openai",
    "opentelemetry-exporter-otlp-proto-http",
    "opentelemetry-sdk",
    "pandas",
    "pillow",
    "psycopg2-binary",
    "pypdf",
    "redis",
    "requests",
    "scikit-learn",
    "scipy",
    "sentencepiece",
    "together",
    "tqdm",
    "transformers",
    "uvicorn",
    "sentence-transformers --no-deps",
    "torch --index-url https://download.pytorch.org/whl/cpu"
  ],
  "vllm-gpu": [
    "aiosqlite",
    "autoevals",
    "blobfile",
    "chardet",
    "chromadb-client",
    "datasets",
    "faiss-cpu",
    "fastapi",
    "fire",
    "httpx",
    "matplotlib",
    "nltk",
    "numpy",
    "openai",
    "opentelemetry-exporter-otlp-proto-http",
    "opentelemetry-sdk",
    "pandas",
    "pillow",
    "psycopg2-binary",
    "pypdf",
    "redis",
    "requests",
    "scikit-learn",
    "scipy",
    "sentencepiece",
    "tqdm",
    "transformers",
    "uvicorn",
    "vllm",
    "sentence-transformers --no-deps",
    "torch --index-url https://download.pytorch.org/whl/cpu"
  ],
  "remote-vllm": [
    "aiosqlite",
    "blobfile",
    "chardet",
    "chromadb-client",
    "faiss-cpu",
    "fastapi",
    "fire",
    "httpx",
    "matplotlib",
    "nltk",
    "numpy",
@ -162,7 +65,7 @@
    "sentence-transformers --no-deps",
    "torch --index-url https://download.pytorch.org/whl/cpu"
  ],
-  "tgi": [
+  "hf-endpoint": [
    "aiohttp",
    "aiosqlite",
    "autoevals",
@ -196,11 +99,11 @@
    "sentence-transformers --no-deps",
    "torch --index-url https://download.pytorch.org/whl/cpu"
  ],
-  "bedrock": [
+  "hf-serverless": [
    "aiohttp",
    "aiosqlite",
    "autoevals",
    "blobfile",
    "boto3",
    "chardet",
    "chromadb-client",
    "datasets",
@ -208,6 +111,7 @@
    "fastapi",
    "fire",
    "httpx",
    "huggingface_hub",
    "matplotlib",
    "nltk",
    "numpy",
@ -309,35 +213,6 @@
    "sentence-transformers --no-deps",
    "torch --index-url https://download.pytorch.org/whl/cpu"
  ],
  "cerebras": [
    "aiosqlite",
    "blobfile",
    "cerebras_cloud_sdk",
    "chardet",
    "faiss-cpu",
    "fastapi",
    "fire",
    "httpx",
    "matplotlib",
    "nltk",
    "numpy",
    "opentelemetry-exporter-otlp-proto-http",
    "opentelemetry-sdk",
    "pandas",
    "pillow",
    "psycopg2-binary",
    "pypdf",
    "redis",
    "requests",
    "scikit-learn",
    "scipy",
    "sentencepiece",
    "tqdm",
    "transformers",
    "uvicorn",
    "sentence-transformers --no-deps",
    "torch --index-url https://download.pytorch.org/whl/cpu"
  ],
  "ollama": [
    "aiohttp",
    "aiosqlite",
@ -372,7 +247,7 @@
    "sentence-transformers --no-deps",
    "torch --index-url https://download.pytorch.org/whl/cpu"
  ],
-  "hf-endpoint": [
+  "tgi": [
    "aiohttp",
    "aiosqlite",
    "autoevals",
@ -405,5 +280,130 @@
    "uvicorn",
    "sentence-transformers --no-deps",
    "torch --index-url https://download.pytorch.org/whl/cpu"
  ],
  "together": [
    "aiosqlite",
    "autoevals",
    "blobfile",
    "chardet",
    "chromadb-client",
    "datasets",
    "faiss-cpu",
    "fastapi",
    "fire",
    "httpx",
    "matplotlib",
    "nltk",
    "numpy",
    "openai",
    "opentelemetry-exporter-otlp-proto-http",
    "opentelemetry-sdk",
    "pandas",
    "pillow",
    "psycopg2-binary",
    "pypdf",
    "redis",
    "requests",
    "scikit-learn",
    "scipy",
    "sentencepiece",
    "together",
    "tqdm",
    "transformers",
    "uvicorn",
    "sentence-transformers --no-deps",
    "torch --index-url https://download.pytorch.org/whl/cpu"
  ],
  "cerebras": [
    "aiosqlite",
    "blobfile",
    "cerebras_cloud_sdk",
    "chardet",
    "faiss-cpu",
    "fastapi",
    "fire",
    "httpx",
    "matplotlib",
    "nltk",
    "numpy",
    "opentelemetry-exporter-otlp-proto-http",
    "opentelemetry-sdk",
    "pandas",
    "pillow",
    "psycopg2-binary",
    "pypdf",
    "redis",
    "requests",
    "scikit-learn",
    "scipy",
    "sentencepiece",
    "tqdm",
    "transformers",
    "uvicorn",
    "sentence-transformers --no-deps",
    "torch --index-url https://download.pytorch.org/whl/cpu"
  ],
  "remote-vllm": [
    "aiosqlite",
    "blobfile",
    "chardet",
    "chromadb-client",
    "faiss-cpu",
    "fastapi",
    "fire",
    "httpx",
    "matplotlib",
    "nltk",
    "numpy",
    "openai",
    "opentelemetry-exporter-otlp-proto-http",
    "opentelemetry-sdk",
    "pandas",
    "pillow",
    "psycopg2-binary",
    "pypdf",
    "redis",
    "requests",
    "scikit-learn",
    "scipy",
    "sentencepiece",
    "tqdm",
    "transformers",
    "uvicorn",
    "sentence-transformers --no-deps",
    "torch --index-url https://download.pytorch.org/whl/cpu"
  ],
  "vllm-gpu": [
    "aiosqlite",
    "autoevals",
    "blobfile",
    "chardet",
    "chromadb-client",
    "datasets",
    "faiss-cpu",
    "fastapi",
    "fire",
    "httpx",
    "matplotlib",
    "nltk",
    "numpy",
    "openai",
    "opentelemetry-exporter-otlp-proto-http",
    "opentelemetry-sdk",
    "pandas",
    "pillow",
    "psycopg2-binary",
    "pypdf",
    "redis",
    "requests",
    "scikit-learn",
    "scipy",
    "sentencepiece",
    "tqdm",
    "transformers",
    "uvicorn",
    "vllm",
    "sentence-transformers --no-deps",
    "torch --index-url https://download.pytorch.org/whl/cpu"
  ]
 }
--- a/docs/source/distributions/self_hosted_distro/cerebras.md
+++ b/docs/source/distributions/self_hosted_distro/cerebras.md
@ -1,15 +1,5 @@
 ---
 orphan: true
 ---
 # Cerebras Distribution
 ```{toctree}
 :maxdepth: 2
 :hidden:
 self
 ```
 The `llamastack/distribution-cerebras` distribution consists of the following provider configurations.
 | API | Provider(s) |
--- a/llama_stack/templates/remote-vllm/vllm.py
+++ b/llama_stack/templates/remote-vllm/vllm.py
@ -134,7 +134,7 @@ def get_distribution_template() -> DistributionTemplate:
                "Inference model loaded into the vLLM server",
            ),
            "VLLM_URL": (
-                "http://host.docker.internal:5100}/v1",
+                "http://host.docker.internal:5100/v1",
                "URL of the vLLM server with the main inference model",
            ),
            "MAX_TOKENS": (