mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-10-10 05:24:39 +00:00
chore: refactor server.main
# What does this PR do? Refactor main to split out the app construction so that we can use `uvicorn --workers` to enable multi-process stack. ## Test Plan CI > uv run --with llama-stack python -m llama_stack.core.server.server benchmarking/k8s-benchmark/stack_run_config.yaml works. > LLAMA_STACK_CONFIG=benchmarking/k8s-benchmark/stack_run_config.yaml uv run uvicorn llama_stack.core.server.server:create_app --port 8321 --workers 4 works.
This commit is contained in:
parent
ac1414b571
commit
a285f9c95f
7 changed files with 233 additions and 146 deletions
|
@ -17,11 +17,8 @@ export POSTGRES_PASSWORD=llamastack
|
|||
export INFERENCE_MODEL=meta-llama/Llama-3.2-3B-Instruct
|
||||
export SAFETY_MODEL=meta-llama/Llama-Guard-3-1B
|
||||
|
||||
export MOCK_INFERENCE_MODEL=mock-inference
|
||||
|
||||
export MOCK_INFERENCE_URL=openai-mock-service:8080
|
||||
|
||||
export BENCHMARK_INFERENCE_MODEL=$INFERENCE_MODEL
|
||||
export LLAMA_STACK_WORKERS=4
|
||||
|
||||
set -euo pipefail
|
||||
set -x
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue