more progress on auto-generation

2025-12-17 09:32:36 +00:00 · 2024-11-15 09:35:38 -08:00 · 2024-11-15 09:35:38 -08:00 · e4509cb568
commit e4509cb568
parent cfa913fdd5
10 changed files with 309 additions and 73 deletions
--- a/llama_stack/providers/inline/agents/meta_reference/config.py
+++ b/llama_stack/providers/inline/agents/meta_reference/config.py
@ -14,9 +14,9 @@ class MetaReferenceAgentsImplConfig(BaseModel):
    persistence_store: KVStoreConfig = Field(default=SqliteKVStoreConfig())

    @classmethod
-    def sample_dict(cls):
+    def sample_run_config(cls):
        return {
-            "persistence_store": SqliteKVStoreConfig.sample_dict(
+            "persistence_store": SqliteKVStoreConfig.sample_run_config(
                db_name="agents_store.db"
            ),
        }
--- a/llama_stack/providers/inline/inference/vllm/config.py
+++ b/llama_stack/providers/inline/inference/vllm/config.py
@ -35,7 +35,7 @@ class VLLMConfig(BaseModel):
    )

    @classmethod
-    def sample_dict(cls):
+    def sample_run_config(cls):
        return {
            "model": "${env.VLLM_INFERENCE_MODEL:Llama3.2-3B-Instruct}",
            "tensor_parallel_size": "${env.VLLM_TENSOR_PARALLEL_SIZE:1}",
--- a/llama_stack/providers/inline/safety/llama_guard/config.py
+++ b/llama_stack/providers/inline/safety/llama_guard/config.py
@ -11,9 +11,3 @@ from pydantic import BaseModel

 class LlamaGuardConfig(BaseModel):
    excluded_categories: List[str] = []
-
-    @classmethod
-    def sample_dict(cls):
-        return {
-            "excluded_categories": [],
-        }
--- a/llama_stack/providers/remote/inference/ollama/init.py
+++ b/llama_stack/providers/remote/inference/ollama/init.py
@ -4,11 +4,34 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.

+from typing import Optional
+
 from llama_stack.distribution.datatypes import RemoteProviderConfig
+from llama_stack.providers.utils.docker.service_config import DockerComposeServiceConfig
+
+
+DEFAULT_OLLAMA_PORT = 11434


 class OllamaImplConfig(RemoteProviderConfig):
-    port: int = 11434
+    port: int = DEFAULT_OLLAMA_PORT
+
+    @classmethod
+    def sample_docker_compose_config(cls) -> Optional[DockerComposeServiceConfig]:
+        return DockerComposeServiceConfig(
+            image="ollama/ollama:latest",
+            volumes=["$HOME/.ollama:/root/.ollama"],
+            devices=["nvidia.com/gpu=all"],
+            deploy={
+                "resources": {
+                    "reservations": {
+                        "devices": [{"driver": "nvidia", "capabilities": ["gpu"]}]
+                    }
+                }
+            },
+            runtime="nvidia",
+            ports=[f"{DEFAULT_OLLAMA_PORT}:{DEFAULT_OLLAMA_PORT}"],
+        )


 async def get_adapter_impl(config: RemoteProviderConfig, _deps):
--- a/llama_stack/providers/remote/inference/vllm/config.py
+++ b/llama_stack/providers/remote/inference/vllm/config.py
@ -9,6 +9,11 @@ from typing import Optional
 from llama_models.schema_utils import json_schema_type
 from pydantic import BaseModel, Field

+from llama_stack.providers.utils.docker.service_config import DockerComposeServiceConfig
+
+
+DEFAULT_VLLM_PORT = 8000
+

@json_schema_type
 class VLLMInferenceAdapterConfig(BaseModel):
@ -26,10 +31,50 @@ class VLLMInferenceAdapterConfig(BaseModel):
    )

    @classmethod
-    def sample_dict(cls):
-        # TODO: we may need two modes, one for conda and one for docker
+    def sample_run_config(
+        cls,
+        url: str = "${env.VLLM_URL:http://host.docker.internal:5100/v1}",
+    ):
        return {
-            "url": "${env.VLLM_URL:http://host.docker.internal:5100/v1}",
+            "url": url,
            "max_tokens": "${env.VLLM_MAX_TOKENS:4096}",
            "api_token": "${env.VLLM_API_TOKEN:fake}",
        }
+
+    @classmethod
+    def sample_docker_compose_config(
+        cls,
+        port: int = DEFAULT_VLLM_PORT,
+        cuda_visible_devices: str = "0",
+        model: str = "meta-llama/Llama-3.2-3B-Instruct",
+    ) -> Optional[DockerComposeServiceConfig]:
+        return DockerComposeServiceConfig(
+            image="vllm/vllm-openai:latest",
+            volumes=["$HOME/.cache/huggingface:/root/.cache/huggingface"],
+            devices=["nvidia.com/gpu=all"],
+            deploy={
+                "resources": {
+                    "reservations": {
+                        "devices": [{"driver": "nvidia", "capabilities": ["gpu"]}]
+                    }
+                }
+            },
+            runtime="nvidia",
+            ports=[f"{port}:{port}"],
+            environment={
+                "CUDA_VISIBLE_DEVICES": cuda_visible_devices,
+                "HUGGING_FACE_HUB_TOKEN": "$HF_TOKEN",
+            },
+            command=(
+                " ".join(
+                    [
+                        "--gpu-memory-utilization 0.75",
+                        f"--model {model}",
+                        "--enforce-eager",
+                        "--max-model-len 8192",
+                        "--max-num-seqs 16",
+                        f"--port {port}",
+                    ]
+                )
+            ),
+        )
--- a/llama_stack/providers/utils/docker/init.py
+++ b/llama_stack/providers/utils/docker/init.py
@ -0,0 +1,5 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
--- a/llama_stack/providers/utils/docker/service_config.py
+++ b/llama_stack/providers/utils/docker/service_config.py
@ -0,0 +1,29 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+from typing import Any, Dict, List, Optional
+
+from pydantic import BaseModel
+
+
+class LiteralString(str):
+    pass  # Marker class for strings we want to format with >
+
+
+class DockerComposeServiceConfig(BaseModel):
+    """Configuration for a single service in docker-compose."""
+
+    image: str
+    volumes: Optional[List[str]] = None
+    network_mode: str = "bridge"
+    ports: Optional[List[str]] = None
+    devices: Optional[List[str]] = None
+    environment: Optional[Dict[str, str]] = None
+    command: Optional[str] = None
+    depends_on: Optional[List[str]] = None
+    deploy: Optional[Dict[str, Any]] = None
+    runtime: Optional[str] = None
+    entrypoint: Optional[str] = None
--- a/llama_stack/providers/utils/kvstore/config.py
+++ b/llama_stack/providers/utils/kvstore/config.py
@ -37,7 +37,7 @@ class RedisKVStoreConfig(CommonConfig):
        return f"redis://{self.host}:{self.port}"

    @classmethod
-    def sample_dict(cls):
+    def sample_run_config(cls):
        return {
            "type": "redis",
            "namespace": None,
@ -54,7 +54,7 @@ class SqliteKVStoreConfig(CommonConfig):
    )

    @classmethod
-    def sample_dict(cls, db_name: str = "kvstore.db"):
+    def sample_run_config(cls, db_name: str = "kvstore.db"):
        return {
            "type": "sqlite",
            "namespace": None,
@ -72,7 +72,7 @@ class PostgresKVStoreConfig(CommonConfig):
    table_name: str = "llamastack_kvstore"

    @classmethod
-    def sample_dict(cls, table_name: str = "llamastack_kvstore"):
+    def sample_run_config(cls, table_name: str = "llamastack_kvstore"):
        return {
            "type": "postgres",
            "namespace": None,