example config

This commit is contained in:
Xi Yan 2024-09-20 11:22:58 -07:00
parent 942cb87a3c
commit 9bb6ce54ff
2 changed files with 106 additions and 5 deletions

View file

@ -35,9 +35,6 @@ from fastapi import Body, FastAPI, HTTPException, Request, Response
from fastapi.exceptions import RequestValidationError
from fastapi.responses import JSONResponse, StreamingResponse
from fastapi.routing import APIRoute
from pydantic import BaseModel, ValidationError
from termcolor import cprint
from typing_extensions import Annotated
from llama_stack.providers.utils.telemetry.tracing import (
end_trace,
@ -45,6 +42,9 @@ from llama_stack.providers.utils.telemetry.tracing import (
SpanStatus,
start_trace,
)
from pydantic import BaseModel, ValidationError
from termcolor import cprint
from typing_extensions import Annotated
from llama_stack.distribution.datatypes import * # noqa: F403
from llama_stack.distribution.distribution import api_endpoints, api_providers
@ -276,14 +276,21 @@ def snake_to_camel(snake_str):
return "".join(word.capitalize() for word in snake_str.split("_"))
async def resolve_impls_with_routing(
stack_run_config: StackRunConfig,
) -> Dict[Api, Any]:
raise NotImplementedError("This is not implemented yet")
async def resolve_impls(
provider_map: Dict[str, ProviderMapEntry],
stack_run_config: StackRunConfig,
) -> Dict[Api, Any]:
"""
Does two things:
- flatmaps, sorts and resolves the providers in dependency order
- for each API, produces either a (local, passthrough or router) implementation
"""
provider_map = stack_run_config.provider_map
all_providers = api_providers()
specs = {}
@ -333,7 +340,7 @@ def main(yaml_config: str, port: int = 5000, disable_ipv6: bool = False):
app = FastAPI()
impls, specs = asyncio.run(resolve_impls(config.provider_map))
impls, specs = asyncio.run(resolve_impls(config))
if Api.telemetry in impls:
setup_logger(impls[Api.telemetry])

View file

@ -0,0 +1,94 @@
built_at: '2024-09-18T13:41:17.656743'
image_name: local
docker_image: null
conda_env: local
apis_to_serve:
- inference
- memory
provider_map:
# use builtin-router as dummy field
memory: builtin-router
inference: builtin-router
routing_table:
inference:
- routing_key: Meta-Llama3.1-8B-Instruct
provider_id: meta-reference
config:
model: Meta-Llama3.1-8B-Instruct
quantization: null
torch_seed: null
max_seq_len: 4096
max_batch_size: 1
- routing_key: Meta-Llama3.1-8B
provider_id: remote::ollama
config:
url: http:ollama-url-1.com
memory:
- routing_key: keyvalue
provider_id: remote::pgvector
config:
host: localhost
port: 5432
db: vectordb
user: vectoruser
- routing_key: vector
provider_id: meta-reference
config: {}
# safety:
# provider_id: meta-reference
# config:
# llama_guard_shield:
# model: Llama-Guard-3-8B
# excluded_categories: []
# disable_input_check: false
# disable_output_check: false
# prompt_guard_shield:
# model: Prompt-Guard-86M
# telemetry:
# provider_id: meta-reference
# config: {}
# agents:
# provider_id: meta-reference
# config: {}
# memory:
# provider_id: meta-reference
# config: {}
# models:
# provider_id: builtin
# config:
# models_config:
# - core_model_id: Meta-Llama3.1-8B-Instruct
# provider_id: meta-reference
# api: inference
# config:
# model: Meta-Llama3.1-8B-Instruct
# quantization: null
# torch_seed: null
# max_seq_len: 4096
# max_batch_size: 1
# - core_model_id: Meta-Llama3.1-8B
# provider_id: meta-reference
# api: inference
# config:
# model: Meta-Llama3.1-8B
# quantization: null
# torch_seed: null
# max_seq_len: 4096
# max_batch_size: 1
# - core_model_id: Llama-Guard-3-8B
# provider_id: meta-reference
# api: safety
# config:
# model: Llama-Guard-3-8B
# excluded_categories: []
# disable_input_check: false
# disable_output_check: false
# - core_model_id: Prompt-Guard-86M
# provider_id: meta-reference
# api: safety
# config:
# model: Prompt-Guard-86M