Convert ollama to the new model

2025-12-17 11:32:36 +00:00 · 2024-11-17 15:19:55 -08:00 · 2024-11-17 15:19:55 -08:00 · a061f3f8c1
commit a061f3f8c1
parent 028530546f
14 changed files with 379 additions and 113 deletions
--- a/llama_stack/providers/remote/inference/ollama/config.py
+++ b/llama_stack/providers/remote/inference/ollama/config.py
@ -4,62 +4,19 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.

-from typing import List
+from typing import Any, Dict

 from llama_stack.distribution.datatypes import RemoteProviderConfig
-from llama_stack.providers.utils.docker.service_config import DockerComposeServiceConfig


 DEFAULT_OLLAMA_PORT = 11434


 class OllamaImplConfig(RemoteProviderConfig):
-    port: int = DEFAULT_OLLAMA_PORT
+    port: int

    @classmethod
-    def sample_docker_compose_services(cls) -> List[DockerComposeServiceConfig]:
-        return [
-            DockerComposeServiceConfig(
-                service_name="ollama",
-                image="ollama/ollama:latest",
-                volumes=["$HOME/.ollama:/root/.ollama"],
-                devices=["nvidia.com/gpu=all"],
-                deploy={
-                    "resources": {
-                        "reservations": {
-                            "devices": [{"driver": "nvidia", "capabilities": ["gpu"]}]
-                        }
-                    }
-                },
-                runtime="nvidia",
-                ports=[f"{DEFAULT_OLLAMA_PORT}:{DEFAULT_OLLAMA_PORT}"],
-                healthcheck={
-                    "test": ["CMD", "curl", "-f", "http://ollama:11434"],
-                    "interval": "10s",
-                    "timeout": "5s",
-                    "retries": 5,
-                },
-            ),
-            DockerComposeServiceConfig(
-                service_name="ollama-init",
-                image="ollama/ollama",
-                depends_on={"ollama": {"condition": "service_healthy"}},
-                environment={
-                    "OLLAMA_HOST": "ollama",
-                    "OLLAMA_MODELS": "${OLLAMA_MODELS}",
-                },
-                volumes=["ollama_data:/root/.ollama"],
-                entrypoint=(
-                    'sh -c \'max_attempts=30;attempt=0;echo "Waiting for Ollama server...";'
-                    "until curl -s http://ollama:11434 > /dev/null; do"
-                    "attempt=$((attempt + 1));"
-                    "if [ $attempt -ge $max_attempts ]; then"
-                    'echo "Timeout waiting for Ollama server";'
-                    "exit 1;"
-                    "fi;"
-                    'echo "Attempt $attempt: Server not ready yet...";'
-                    "sleep 5;"
-                    "done'"
-                ),
-            ),
-        ]
+    def sample_run_config(
+        cls, port_str: str = str(DEFAULT_OLLAMA_PORT)
+    ) -> Dict[str, Any]:
+        return {"port": port_str}