stage tmp changes

This commit is contained in:
Xi Yan 2024-09-20 15:33:31 -07:00
parent abe312c092
commit 2dc14cba2c
2 changed files with 99 additions and 3 deletions

View file

@ -5,6 +5,7 @@
# the root directory of this source tree. # the root directory of this source tree.
import asyncio import asyncio
import importlib
import inspect import inspect
import json import json
import signal import signal
@ -35,9 +36,6 @@ from fastapi import Body, FastAPI, HTTPException, Request, Response
from fastapi.exceptions import RequestValidationError from fastapi.exceptions import RequestValidationError
from fastapi.responses import JSONResponse, StreamingResponse from fastapi.responses import JSONResponse, StreamingResponse
from fastapi.routing import APIRoute from fastapi.routing import APIRoute
from pydantic import BaseModel, ValidationError
from termcolor import cprint
from typing_extensions import Annotated
from llama_stack.providers.utils.telemetry.tracing import ( from llama_stack.providers.utils.telemetry.tracing import (
end_trace, end_trace,
@ -45,6 +43,9 @@ from llama_stack.providers.utils.telemetry.tracing import (
SpanStatus, SpanStatus,
start_trace, start_trace,
) )
from pydantic import BaseModel, ValidationError
from termcolor import cprint
from typing_extensions import Annotated
from llama_stack.distribution.datatypes import * # noqa: F403 from llama_stack.distribution.datatypes import * # noqa: F403
from llama_stack.distribution.distribution import api_endpoints, api_providers from llama_stack.distribution.distribution import api_endpoints, api_providers

View file

@ -0,0 +1,95 @@
built_at: '2024-09-18T13:41:17.656743'
image_name: local
docker_image: null
conda_env: local
apis_to_serve:
- inference
# - memory
# - telemetry
provider_map:
telemetry:
provider_id: meta-reference
config: {}
provider_routing_table:
inference:
- routing_key: Meta-Llama3.1-8B-Instruct
provider_id: meta-reference
config:
model: Meta-Llama3.1-8B-Instruct
quantization: null
torch_seed: null
max_seq_len: 4096
max_batch_size: 1
- routing_key: Meta-Llama3.1-8B
provider_id: remote::ollama
config:
url: http:ollama-url-1.com
memory:
- routing_key: keyvalue
provider_id: remote::pgvector
config:
host: localhost
port: 5432
db: vectordb
user: vectoruser
password: xxxx
- routing_key: vector
provider_id: meta-reference
config: {}
# safety:
# provider_id: meta-reference
# config:
# llama_guard_shield:
# model: Llama-Guard-3-8B
# excluded_categories: []
# disable_input_check: false
# disable_output_check: false
# prompt_guard_shield:
# model: Prompt-Guard-86M
# telemetry:
# provider_id: meta-reference
# config: {}
# agents:
# provider_id: meta-reference
# config: {}
# memory:
# provider_id: meta-reference
# config: {}
# models:
# provider_id: builtin
# config:
# models_config:
# - core_model_id: Meta-Llama3.1-8B-Instruct
# provider_id: meta-reference
# api: inference
# config:
# model: Meta-Llama3.1-8B-Instruct
# quantization: null
# torch_seed: null
# max_seq_len: 4096
# max_batch_size: 1
# - core_model_id: Meta-Llama3.1-8B
# provider_id: meta-reference
# api: inference
# config:
# model: Meta-Llama3.1-8B
# quantization: null
# torch_seed: null
# max_seq_len: 4096
# max_batch_size: 1
# - core_model_id: Llama-Guard-3-8B
# provider_id: meta-reference
# api: safety
# config:
# model: Llama-Guard-3-8B
# excluded_categories: []
# disable_input_check: false
# disable_output_check: false
# - core_model_id: Prompt-Guard-86M
# provider_id: meta-reference
# api: safety
# config:
# model: Prompt-Guard-86M