mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-12-17 13:52:38 +00:00
Convert ollama to the new model
This commit is contained in:
parent
028530546f
commit
a061f3f8c1
14 changed files with 379 additions and 113 deletions
|
|
@ -4,62 +4,19 @@
|
|||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
|
||||
from typing import List
|
||||
from typing import Any, Dict
|
||||
|
||||
from llama_stack.distribution.datatypes import RemoteProviderConfig
|
||||
from llama_stack.providers.utils.docker.service_config import DockerComposeServiceConfig
|
||||
|
||||
|
||||
DEFAULT_OLLAMA_PORT = 11434
|
||||
|
||||
|
||||
class OllamaImplConfig(RemoteProviderConfig):
|
||||
port: int = DEFAULT_OLLAMA_PORT
|
||||
port: int
|
||||
|
||||
@classmethod
|
||||
def sample_docker_compose_services(cls) -> List[DockerComposeServiceConfig]:
|
||||
return [
|
||||
DockerComposeServiceConfig(
|
||||
service_name="ollama",
|
||||
image="ollama/ollama:latest",
|
||||
volumes=["$HOME/.ollama:/root/.ollama"],
|
||||
devices=["nvidia.com/gpu=all"],
|
||||
deploy={
|
||||
"resources": {
|
||||
"reservations": {
|
||||
"devices": [{"driver": "nvidia", "capabilities": ["gpu"]}]
|
||||
}
|
||||
}
|
||||
},
|
||||
runtime="nvidia",
|
||||
ports=[f"{DEFAULT_OLLAMA_PORT}:{DEFAULT_OLLAMA_PORT}"],
|
||||
healthcheck={
|
||||
"test": ["CMD", "curl", "-f", "http://ollama:11434"],
|
||||
"interval": "10s",
|
||||
"timeout": "5s",
|
||||
"retries": 5,
|
||||
},
|
||||
),
|
||||
DockerComposeServiceConfig(
|
||||
service_name="ollama-init",
|
||||
image="ollama/ollama",
|
||||
depends_on={"ollama": {"condition": "service_healthy"}},
|
||||
environment={
|
||||
"OLLAMA_HOST": "ollama",
|
||||
"OLLAMA_MODELS": "${OLLAMA_MODELS}",
|
||||
},
|
||||
volumes=["ollama_data:/root/.ollama"],
|
||||
entrypoint=(
|
||||
'sh -c \'max_attempts=30;attempt=0;echo "Waiting for Ollama server...";'
|
||||
"until curl -s http://ollama:11434 > /dev/null; do"
|
||||
"attempt=$((attempt + 1));"
|
||||
"if [ $attempt -ge $max_attempts ]; then"
|
||||
'echo "Timeout waiting for Ollama server";'
|
||||
"exit 1;"
|
||||
"fi;"
|
||||
'echo "Attempt $attempt: Server not ready yet...";'
|
||||
"sleep 5;"
|
||||
"done'"
|
||||
),
|
||||
),
|
||||
]
|
||||
def sample_run_config(
|
||||
cls, port_str: str = str(DEFAULT_OLLAMA_PORT)
|
||||
) -> Dict[str, Any]:
|
||||
return {"port": port_str}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue