mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-12-17 09:32:36 +00:00
more progress on auto-generation
This commit is contained in:
parent
cfa913fdd5
commit
e4509cb568
10 changed files with 309 additions and 73 deletions
|
|
@ -14,9 +14,9 @@ class MetaReferenceAgentsImplConfig(BaseModel):
|
|||
persistence_store: KVStoreConfig = Field(default=SqliteKVStoreConfig())
|
||||
|
||||
@classmethod
|
||||
def sample_dict(cls):
|
||||
def sample_run_config(cls):
|
||||
return {
|
||||
"persistence_store": SqliteKVStoreConfig.sample_dict(
|
||||
"persistence_store": SqliteKVStoreConfig.sample_run_config(
|
||||
db_name="agents_store.db"
|
||||
),
|
||||
}
|
||||
|
|
|
|||
|
|
@ -35,7 +35,7 @@ class VLLMConfig(BaseModel):
|
|||
)
|
||||
|
||||
@classmethod
|
||||
def sample_dict(cls):
|
||||
def sample_run_config(cls):
|
||||
return {
|
||||
"model": "${env.VLLM_INFERENCE_MODEL:Llama3.2-3B-Instruct}",
|
||||
"tensor_parallel_size": "${env.VLLM_TENSOR_PARALLEL_SIZE:1}",
|
||||
|
|
|
|||
|
|
@ -11,9 +11,3 @@ from pydantic import BaseModel
|
|||
|
||||
class LlamaGuardConfig(BaseModel):
|
||||
excluded_categories: List[str] = []
|
||||
|
||||
@classmethod
|
||||
def sample_dict(cls):
|
||||
return {
|
||||
"excluded_categories": [],
|
||||
}
|
||||
|
|
|
|||
|
|
@ -4,11 +4,34 @@
|
|||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
|
||||
from typing import Optional
|
||||
|
||||
from llama_stack.distribution.datatypes import RemoteProviderConfig
|
||||
from llama_stack.providers.utils.docker.service_config import DockerComposeServiceConfig
|
||||
|
||||
|
||||
DEFAULT_OLLAMA_PORT = 11434
|
||||
|
||||
|
||||
class OllamaImplConfig(RemoteProviderConfig):
|
||||
port: int = 11434
|
||||
port: int = DEFAULT_OLLAMA_PORT
|
||||
|
||||
@classmethod
|
||||
def sample_docker_compose_config(cls) -> Optional[DockerComposeServiceConfig]:
|
||||
return DockerComposeServiceConfig(
|
||||
image="ollama/ollama:latest",
|
||||
volumes=["$HOME/.ollama:/root/.ollama"],
|
||||
devices=["nvidia.com/gpu=all"],
|
||||
deploy={
|
||||
"resources": {
|
||||
"reservations": {
|
||||
"devices": [{"driver": "nvidia", "capabilities": ["gpu"]}]
|
||||
}
|
||||
}
|
||||
},
|
||||
runtime="nvidia",
|
||||
ports=[f"{DEFAULT_OLLAMA_PORT}:{DEFAULT_OLLAMA_PORT}"],
|
||||
)
|
||||
|
||||
|
||||
async def get_adapter_impl(config: RemoteProviderConfig, _deps):
|
||||
|
|
|
|||
|
|
@ -9,6 +9,11 @@ from typing import Optional
|
|||
from llama_models.schema_utils import json_schema_type
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
from llama_stack.providers.utils.docker.service_config import DockerComposeServiceConfig
|
||||
|
||||
|
||||
DEFAULT_VLLM_PORT = 8000
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class VLLMInferenceAdapterConfig(BaseModel):
|
||||
|
|
@ -26,10 +31,50 @@ class VLLMInferenceAdapterConfig(BaseModel):
|
|||
)
|
||||
|
||||
@classmethod
|
||||
def sample_dict(cls):
|
||||
# TODO: we may need two modes, one for conda and one for docker
|
||||
def sample_run_config(
|
||||
cls,
|
||||
url: str = "${env.VLLM_URL:http://host.docker.internal:5100/v1}",
|
||||
):
|
||||
return {
|
||||
"url": "${env.VLLM_URL:http://host.docker.internal:5100/v1}",
|
||||
"url": url,
|
||||
"max_tokens": "${env.VLLM_MAX_TOKENS:4096}",
|
||||
"api_token": "${env.VLLM_API_TOKEN:fake}",
|
||||
}
|
||||
|
||||
@classmethod
|
||||
def sample_docker_compose_config(
|
||||
cls,
|
||||
port: int = DEFAULT_VLLM_PORT,
|
||||
cuda_visible_devices: str = "0",
|
||||
model: str = "meta-llama/Llama-3.2-3B-Instruct",
|
||||
) -> Optional[DockerComposeServiceConfig]:
|
||||
return DockerComposeServiceConfig(
|
||||
image="vllm/vllm-openai:latest",
|
||||
volumes=["$HOME/.cache/huggingface:/root/.cache/huggingface"],
|
||||
devices=["nvidia.com/gpu=all"],
|
||||
deploy={
|
||||
"resources": {
|
||||
"reservations": {
|
||||
"devices": [{"driver": "nvidia", "capabilities": ["gpu"]}]
|
||||
}
|
||||
}
|
||||
},
|
||||
runtime="nvidia",
|
||||
ports=[f"{port}:{port}"],
|
||||
environment={
|
||||
"CUDA_VISIBLE_DEVICES": cuda_visible_devices,
|
||||
"HUGGING_FACE_HUB_TOKEN": "$HF_TOKEN",
|
||||
},
|
||||
command=(
|
||||
" ".join(
|
||||
[
|
||||
"--gpu-memory-utilization 0.75",
|
||||
f"--model {model}",
|
||||
"--enforce-eager",
|
||||
"--max-model-len 8192",
|
||||
"--max-num-seqs 16",
|
||||
f"--port {port}",
|
||||
]
|
||||
)
|
||||
),
|
||||
)
|
||||
|
|
|
|||
5
llama_stack/providers/utils/docker/__init__.py
Normal file
5
llama_stack/providers/utils/docker/__init__.py
Normal file
|
|
@ -0,0 +1,5 @@
|
|||
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
# All rights reserved.
|
||||
#
|
||||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
29
llama_stack/providers/utils/docker/service_config.py
Normal file
29
llama_stack/providers/utils/docker/service_config.py
Normal file
|
|
@ -0,0 +1,29 @@
|
|||
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
# All rights reserved.
|
||||
#
|
||||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
from pydantic import BaseModel
|
||||
|
||||
|
||||
class LiteralString(str):
|
||||
pass # Marker class for strings we want to format with >
|
||||
|
||||
|
||||
class DockerComposeServiceConfig(BaseModel):
|
||||
"""Configuration for a single service in docker-compose."""
|
||||
|
||||
image: str
|
||||
volumes: Optional[List[str]] = None
|
||||
network_mode: str = "bridge"
|
||||
ports: Optional[List[str]] = None
|
||||
devices: Optional[List[str]] = None
|
||||
environment: Optional[Dict[str, str]] = None
|
||||
command: Optional[str] = None
|
||||
depends_on: Optional[List[str]] = None
|
||||
deploy: Optional[Dict[str, Any]] = None
|
||||
runtime: Optional[str] = None
|
||||
entrypoint: Optional[str] = None
|
||||
|
|
@ -37,7 +37,7 @@ class RedisKVStoreConfig(CommonConfig):
|
|||
return f"redis://{self.host}:{self.port}"
|
||||
|
||||
@classmethod
|
||||
def sample_dict(cls):
|
||||
def sample_run_config(cls):
|
||||
return {
|
||||
"type": "redis",
|
||||
"namespace": None,
|
||||
|
|
@ -54,7 +54,7 @@ class SqliteKVStoreConfig(CommonConfig):
|
|||
)
|
||||
|
||||
@classmethod
|
||||
def sample_dict(cls, db_name: str = "kvstore.db"):
|
||||
def sample_run_config(cls, db_name: str = "kvstore.db"):
|
||||
return {
|
||||
"type": "sqlite",
|
||||
"namespace": None,
|
||||
|
|
@ -72,7 +72,7 @@ class PostgresKVStoreConfig(CommonConfig):
|
|||
table_name: str = "llamastack_kvstore"
|
||||
|
||||
@classmethod
|
||||
def sample_dict(cls, table_name: str = "llamastack_kvstore"):
|
||||
def sample_run_config(cls, table_name: str = "llamastack_kvstore"):
|
||||
return {
|
||||
"type": "postgres",
|
||||
"namespace": None,
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue