mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-06-27 18:50:41 +00:00
feat: implement get chat completions APIs (#2200)
# What does this PR do? * Provide sqlite implementation of the APIs introduced in https://github.com/meta-llama/llama-stack/pull/2145. * Introduced a SqlStore API: llama_stack/providers/utils/sqlstore/api.py and the first Sqlite implementation * Pagination support will be added in a future PR. ## Test Plan Unit test on sql store: <img width="1005" alt="image" src="https://github.com/user-attachments/assets/9b8b7ec8-632b-4667-8127-5583426b2e29" /> Integration test: ``` INFERENCE_MODEL="llama3.2:3b-instruct-fp16" llama stack build --template ollama --image-type conda --run ``` ``` LLAMA_STACK_CONFIG=http://localhost:5001 INFERENCE_MODEL="llama3.2:3b-instruct-fp16" python -m pytest -v tests/integration/inference/test_openai_completion.py --text-model "llama3.2:3b-instruct-fp16" -k 'inference_store and openai' ```
This commit is contained in:
parent
633bb9c5b3
commit
549812f51e
71 changed files with 1111 additions and 10 deletions
|
@ -29,3 +29,5 @@ distribution_spec:
|
|||
- inline::rag-runtime
|
||||
- remote::model-context-protocol
|
||||
image_type: conda
|
||||
additional_pip_packages:
|
||||
- sqlalchemy[asyncio]
|
||||
|
|
|
@ -96,6 +96,9 @@ providers:
|
|||
metadata_store:
|
||||
type: sqlite
|
||||
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/bedrock}/registry.db
|
||||
inference_store:
|
||||
type: sqlite
|
||||
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/bedrock}/inference_store.db
|
||||
models:
|
||||
- metadata: {}
|
||||
model_id: meta.llama3-1-8b-instruct-v1:0
|
||||
|
|
|
@ -29,3 +29,5 @@ distribution_spec:
|
|||
- remote::tavily-search
|
||||
- inline::rag-runtime
|
||||
image_type: conda
|
||||
additional_pip_packages:
|
||||
- sqlalchemy[asyncio]
|
||||
|
|
|
@ -99,6 +99,9 @@ providers:
|
|||
metadata_store:
|
||||
type: sqlite
|
||||
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/cerebras}/registry.db
|
||||
inference_store:
|
||||
type: sqlite
|
||||
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/cerebras}/inference_store.db
|
||||
models:
|
||||
- metadata: {}
|
||||
model_id: llama3.1-8b
|
||||
|
|
|
@ -30,3 +30,5 @@ distribution_spec:
|
|||
- inline::rag-runtime
|
||||
- remote::model-context-protocol
|
||||
image_type: conda
|
||||
additional_pip_packages:
|
||||
- sqlalchemy[asyncio]
|
||||
|
|
|
@ -99,6 +99,9 @@ providers:
|
|||
metadata_store:
|
||||
type: sqlite
|
||||
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ci-tests}/registry.db
|
||||
inference_store:
|
||||
type: sqlite
|
||||
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ci-tests}/inference_store.db
|
||||
models:
|
||||
- metadata: {}
|
||||
model_id: accounts/fireworks/models/llama-v3p1-8b-instruct
|
||||
|
|
|
@ -30,3 +30,6 @@ distribution_spec:
|
|||
- remote::tavily-search
|
||||
- inline::rag-runtime
|
||||
image_type: conda
|
||||
additional_pip_packages:
|
||||
- sqlalchemy[asyncio]
|
||||
- sqlalchemy[asyncio]
|
||||
|
|
|
@ -99,6 +99,9 @@ providers:
|
|||
metadata_store:
|
||||
type: sqlite
|
||||
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/dell}/registry.db
|
||||
inference_store:
|
||||
type: sqlite
|
||||
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/dell}/inference_store.db
|
||||
models:
|
||||
- metadata: {}
|
||||
model_id: ${env.INFERENCE_MODEL}
|
||||
|
|
|
@ -95,6 +95,9 @@ providers:
|
|||
metadata_store:
|
||||
type: sqlite
|
||||
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/dell}/registry.db
|
||||
inference_store:
|
||||
type: sqlite
|
||||
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/dell}/inference_store.db
|
||||
models:
|
||||
- metadata: {}
|
||||
model_id: ${env.INFERENCE_MODEL}
|
||||
|
|
|
@ -31,6 +31,7 @@
|
|||
"scikit-learn",
|
||||
"scipy",
|
||||
"sentencepiece",
|
||||
"sqlalchemy[asyncio]",
|
||||
"tqdm",
|
||||
"transformers",
|
||||
"tree_sitter",
|
||||
|
@ -67,6 +68,7 @@
|
|||
"scikit-learn",
|
||||
"scipy",
|
||||
"sentencepiece",
|
||||
"sqlalchemy[asyncio]",
|
||||
"tqdm",
|
||||
"transformers",
|
||||
"tree_sitter",
|
||||
|
@ -105,6 +107,7 @@
|
|||
"scikit-learn",
|
||||
"scipy",
|
||||
"sentencepiece",
|
||||
"sqlalchemy[asyncio]",
|
||||
"sqlite-vec",
|
||||
"tqdm",
|
||||
"transformers",
|
||||
|
@ -145,6 +148,7 @@
|
|||
"scikit-learn",
|
||||
"scipy",
|
||||
"sentencepiece",
|
||||
"sqlalchemy[asyncio]",
|
||||
"tqdm",
|
||||
"transformers",
|
||||
"tree_sitter",
|
||||
|
@ -184,6 +188,7 @@
|
|||
"scikit-learn",
|
||||
"scipy",
|
||||
"sentencepiece",
|
||||
"sqlalchemy[asyncio]",
|
||||
"tqdm",
|
||||
"transformers",
|
||||
"tree_sitter",
|
||||
|
@ -221,6 +226,7 @@
|
|||
"scikit-learn",
|
||||
"scipy",
|
||||
"sentencepiece",
|
||||
"sqlalchemy[asyncio]",
|
||||
"tqdm",
|
||||
"transformers",
|
||||
"tree_sitter",
|
||||
|
@ -259,6 +265,7 @@
|
|||
"scikit-learn",
|
||||
"scipy",
|
||||
"sentencepiece",
|
||||
"sqlalchemy[asyncio]",
|
||||
"tqdm",
|
||||
"transformers",
|
||||
"tree_sitter",
|
||||
|
@ -297,6 +304,7 @@
|
|||
"scikit-learn",
|
||||
"scipy",
|
||||
"sentencepiece",
|
||||
"sqlalchemy[asyncio]",
|
||||
"tqdm",
|
||||
"transformers",
|
||||
"tree_sitter",
|
||||
|
@ -335,6 +343,7 @@
|
|||
"scikit-learn",
|
||||
"scipy",
|
||||
"sentencepiece",
|
||||
"sqlalchemy[asyncio]",
|
||||
"sqlite-vec",
|
||||
"tqdm",
|
||||
"transformers",
|
||||
|
@ -379,6 +388,7 @@
|
|||
"scipy",
|
||||
"sentence-transformers",
|
||||
"sentencepiece",
|
||||
"sqlalchemy[asyncio]",
|
||||
"torch",
|
||||
"torchao==0.8.0",
|
||||
"torchvision",
|
||||
|
@ -414,6 +424,7 @@
|
|||
"scikit-learn",
|
||||
"scipy",
|
||||
"sentencepiece",
|
||||
"sqlalchemy[asyncio]",
|
||||
"tqdm",
|
||||
"transformers",
|
||||
"uvicorn"
|
||||
|
@ -452,6 +463,7 @@
|
|||
"scikit-learn",
|
||||
"scipy",
|
||||
"sentencepiece",
|
||||
"sqlalchemy[asyncio]",
|
||||
"torch",
|
||||
"tqdm",
|
||||
"transformers",
|
||||
|
@ -490,6 +502,7 @@
|
|||
"scikit-learn",
|
||||
"scipy",
|
||||
"sentencepiece",
|
||||
"sqlalchemy[asyncio]",
|
||||
"sqlite-vec",
|
||||
"together",
|
||||
"tqdm",
|
||||
|
@ -528,6 +541,7 @@
|
|||
"scikit-learn",
|
||||
"scipy",
|
||||
"sentencepiece",
|
||||
"sqlalchemy[asyncio]",
|
||||
"tqdm",
|
||||
"transformers",
|
||||
"tree_sitter",
|
||||
|
@ -566,6 +580,7 @@
|
|||
"scikit-learn",
|
||||
"scipy",
|
||||
"sentencepiece",
|
||||
"sqlalchemy[asyncio]",
|
||||
"tqdm",
|
||||
"transformers",
|
||||
"tree_sitter",
|
||||
|
@ -599,6 +614,7 @@
|
|||
"scikit-learn",
|
||||
"scipy",
|
||||
"sentencepiece",
|
||||
"sqlalchemy[asyncio]",
|
||||
"tqdm",
|
||||
"transformers",
|
||||
"uvicorn",
|
||||
|
@ -637,6 +653,7 @@
|
|||
"scikit-learn",
|
||||
"scipy",
|
||||
"sentencepiece",
|
||||
"sqlalchemy[asyncio]",
|
||||
"sqlite-vec",
|
||||
"tqdm",
|
||||
"transformers",
|
||||
|
@ -678,6 +695,7 @@
|
|||
"scikit-learn",
|
||||
"scipy",
|
||||
"sentencepiece",
|
||||
"sqlalchemy[asyncio]",
|
||||
"tqdm",
|
||||
"transformers",
|
||||
"tree_sitter",
|
||||
|
@ -716,6 +734,7 @@
|
|||
"scikit-learn",
|
||||
"scipy",
|
||||
"sentencepiece",
|
||||
"sqlalchemy[asyncio]",
|
||||
"together",
|
||||
"tqdm",
|
||||
"transformers",
|
||||
|
@ -755,6 +774,7 @@
|
|||
"scikit-learn",
|
||||
"scipy",
|
||||
"sentencepiece",
|
||||
"sqlalchemy[asyncio]",
|
||||
"sqlite-vec",
|
||||
"tqdm",
|
||||
"transformers",
|
||||
|
@ -794,6 +814,7 @@
|
|||
"scikit-learn",
|
||||
"scipy",
|
||||
"sentencepiece",
|
||||
"sqlalchemy[asyncio]",
|
||||
"tqdm",
|
||||
"transformers",
|
||||
"tree_sitter",
|
||||
|
@ -833,6 +854,7 @@
|
|||
"scikit-learn",
|
||||
"scipy",
|
||||
"sentencepiece",
|
||||
"sqlalchemy[asyncio]",
|
||||
"tqdm",
|
||||
"transformers",
|
||||
"tree_sitter",
|
||||
|
|
|
@ -31,3 +31,6 @@ distribution_spec:
|
|||
- inline::rag-runtime
|
||||
- remote::model-context-protocol
|
||||
image_type: conda
|
||||
additional_pip_packages:
|
||||
- sqlalchemy[asyncio]
|
||||
- sqlalchemy[asyncio]
|
||||
|
|
|
@ -111,6 +111,9 @@ providers:
|
|||
metadata_store:
|
||||
type: sqlite
|
||||
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/fireworks}/registry.db
|
||||
inference_store:
|
||||
type: sqlite
|
||||
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/fireworks}/inference_store.db
|
||||
models:
|
||||
- metadata: {}
|
||||
model_id: accounts/fireworks/models/llama-v3p1-8b-instruct
|
||||
|
|
|
@ -106,6 +106,9 @@ providers:
|
|||
metadata_store:
|
||||
type: sqlite
|
||||
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/fireworks}/registry.db
|
||||
inference_store:
|
||||
type: sqlite
|
||||
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/fireworks}/inference_store.db
|
||||
models:
|
||||
- metadata: {}
|
||||
model_id: accounts/fireworks/models/llama-v3p1-8b-instruct
|
||||
|
|
|
@ -26,3 +26,5 @@ distribution_spec:
|
|||
- remote::tavily-search
|
||||
- inline::rag-runtime
|
||||
image_type: conda
|
||||
additional_pip_packages:
|
||||
- sqlalchemy[asyncio]
|
||||
|
|
|
@ -99,6 +99,9 @@ providers:
|
|||
metadata_store:
|
||||
type: sqlite
|
||||
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/groq}/registry.db
|
||||
inference_store:
|
||||
type: sqlite
|
||||
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/groq}/inference_store.db
|
||||
models:
|
||||
- metadata: {}
|
||||
model_id: groq/llama3-8b-8192
|
||||
|
|
|
@ -29,3 +29,6 @@ distribution_spec:
|
|||
- inline::rag-runtime
|
||||
- remote::model-context-protocol
|
||||
image_type: conda
|
||||
additional_pip_packages:
|
||||
- sqlalchemy[asyncio]
|
||||
- sqlalchemy[asyncio]
|
||||
|
|
|
@ -107,6 +107,9 @@ providers:
|
|||
metadata_store:
|
||||
type: sqlite
|
||||
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/hf-endpoint}/registry.db
|
||||
inference_store:
|
||||
type: sqlite
|
||||
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/hf-endpoint}/inference_store.db
|
||||
models:
|
||||
- metadata: {}
|
||||
model_id: ${env.INFERENCE_MODEL}
|
||||
|
|
|
@ -102,6 +102,9 @@ providers:
|
|||
metadata_store:
|
||||
type: sqlite
|
||||
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/hf-endpoint}/registry.db
|
||||
inference_store:
|
||||
type: sqlite
|
||||
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/hf-endpoint}/inference_store.db
|
||||
models:
|
||||
- metadata: {}
|
||||
model_id: ${env.INFERENCE_MODEL}
|
||||
|
|
|
@ -30,3 +30,6 @@ distribution_spec:
|
|||
- inline::rag-runtime
|
||||
- remote::model-context-protocol
|
||||
image_type: conda
|
||||
additional_pip_packages:
|
||||
- sqlalchemy[asyncio]
|
||||
- sqlalchemy[asyncio]
|
||||
|
|
|
@ -107,6 +107,9 @@ providers:
|
|||
metadata_store:
|
||||
type: sqlite
|
||||
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/hf-serverless}/registry.db
|
||||
inference_store:
|
||||
type: sqlite
|
||||
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/hf-serverless}/inference_store.db
|
||||
models:
|
||||
- metadata: {}
|
||||
model_id: ${env.INFERENCE_MODEL}
|
||||
|
|
|
@ -102,6 +102,9 @@ providers:
|
|||
metadata_store:
|
||||
type: sqlite
|
||||
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/hf-serverless}/registry.db
|
||||
inference_store:
|
||||
type: sqlite
|
||||
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/hf-serverless}/inference_store.db
|
||||
models:
|
||||
- metadata: {}
|
||||
model_id: ${env.INFERENCE_MODEL}
|
||||
|
|
|
@ -30,3 +30,5 @@ distribution_spec:
|
|||
- inline::rag-runtime
|
||||
- remote::model-context-protocol
|
||||
image_type: conda
|
||||
additional_pip_packages:
|
||||
- sqlalchemy[asyncio]
|
||||
|
|
|
@ -111,6 +111,9 @@ providers:
|
|||
metadata_store:
|
||||
type: sqlite
|
||||
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/llama_api}/registry.db
|
||||
inference_store:
|
||||
type: sqlite
|
||||
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/llama_api}/inference_store.db
|
||||
models:
|
||||
- metadata: {}
|
||||
model_id: Llama-3.3-70B-Instruct
|
||||
|
|
|
@ -29,3 +29,6 @@ distribution_spec:
|
|||
- inline::rag-runtime
|
||||
- remote::model-context-protocol
|
||||
image_type: conda
|
||||
additional_pip_packages:
|
||||
- sqlalchemy[asyncio]
|
||||
- sqlalchemy[asyncio]
|
||||
|
|
|
@ -117,6 +117,9 @@ providers:
|
|||
metadata_store:
|
||||
type: sqlite
|
||||
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/meta-reference-gpu}/registry.db
|
||||
inference_store:
|
||||
type: sqlite
|
||||
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/meta-reference-gpu}/inference_store.db
|
||||
models:
|
||||
- metadata: {}
|
||||
model_id: ${env.INFERENCE_MODEL}
|
||||
|
|
|
@ -107,6 +107,9 @@ providers:
|
|||
metadata_store:
|
||||
type: sqlite
|
||||
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/meta-reference-gpu}/registry.db
|
||||
inference_store:
|
||||
type: sqlite
|
||||
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/meta-reference-gpu}/inference_store.db
|
||||
models:
|
||||
- metadata: {}
|
||||
model_id: ${env.INFERENCE_MODEL}
|
||||
|
|
|
@ -24,3 +24,6 @@ distribution_spec:
|
|||
tool_runtime:
|
||||
- inline::rag-runtime
|
||||
image_type: conda
|
||||
additional_pip_packages:
|
||||
- sqlalchemy[asyncio]
|
||||
- sqlalchemy[asyncio]
|
||||
|
|
|
@ -92,6 +92,9 @@ providers:
|
|||
metadata_store:
|
||||
type: sqlite
|
||||
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/nvidia}/registry.db
|
||||
inference_store:
|
||||
type: sqlite
|
||||
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/nvidia}/inference_store.db
|
||||
models:
|
||||
- metadata: {}
|
||||
model_id: ${env.INFERENCE_MODEL}
|
||||
|
|
|
@ -80,6 +80,9 @@ providers:
|
|||
metadata_store:
|
||||
type: sqlite
|
||||
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/nvidia}/registry.db
|
||||
inference_store:
|
||||
type: sqlite
|
||||
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/nvidia}/inference_store.db
|
||||
models:
|
||||
- metadata: {}
|
||||
model_id: meta/llama3-8b-instruct
|
||||
|
|
|
@ -32,3 +32,6 @@ distribution_spec:
|
|||
- remote::model-context-protocol
|
||||
- remote::wolfram-alpha
|
||||
image_type: conda
|
||||
additional_pip_packages:
|
||||
- sqlalchemy[asyncio]
|
||||
- sqlalchemy[asyncio]
|
||||
|
|
|
@ -112,6 +112,9 @@ providers:
|
|||
metadata_store:
|
||||
type: sqlite
|
||||
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ollama}/registry.db
|
||||
inference_store:
|
||||
type: sqlite
|
||||
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ollama}/inference_store.db
|
||||
models:
|
||||
- metadata: {}
|
||||
model_id: ${env.INFERENCE_MODEL}
|
||||
|
|
|
@ -110,6 +110,9 @@ providers:
|
|||
metadata_store:
|
||||
type: sqlite
|
||||
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ollama}/registry.db
|
||||
inference_store:
|
||||
type: sqlite
|
||||
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ollama}/inference_store.db
|
||||
models:
|
||||
- metadata: {}
|
||||
model_id: ${env.INFERENCE_MODEL}
|
||||
|
|
|
@ -33,3 +33,5 @@ distribution_spec:
|
|||
- inline::rag-runtime
|
||||
- remote::model-context-protocol
|
||||
image_type: conda
|
||||
additional_pip_packages:
|
||||
- sqlalchemy[asyncio]
|
||||
|
|
|
@ -125,6 +125,9 @@ providers:
|
|||
metadata_store:
|
||||
type: sqlite
|
||||
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/open-benchmark}/registry.db
|
||||
inference_store:
|
||||
type: sqlite
|
||||
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/open-benchmark}/inference_store.db
|
||||
models:
|
||||
- metadata: {}
|
||||
model_id: openai/gpt-4o
|
||||
|
|
|
@ -31,3 +31,6 @@ distribution_spec:
|
|||
- inline::rag-runtime
|
||||
- remote::model-context-protocol
|
||||
image_type: conda
|
||||
additional_pip_packages:
|
||||
- sqlalchemy[asyncio]
|
||||
- sqlalchemy[asyncio]
|
||||
|
|
|
@ -111,6 +111,9 @@ providers:
|
|||
metadata_store:
|
||||
type: sqlite
|
||||
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/passthrough}/registry.db
|
||||
inference_store:
|
||||
type: sqlite
|
||||
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/passthrough}/inference_store.db
|
||||
models:
|
||||
- metadata: {}
|
||||
model_id: meta-llama/Llama-3.1-8B-Instruct
|
||||
|
|
|
@ -106,6 +106,9 @@ providers:
|
|||
metadata_store:
|
||||
type: sqlite
|
||||
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/passthrough}/registry.db
|
||||
inference_store:
|
||||
type: sqlite
|
||||
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/passthrough}/inference_store.db
|
||||
models:
|
||||
- metadata: {}
|
||||
model_id: meta-llama/Llama-3.1-8B-Instruct
|
||||
|
|
|
@ -31,3 +31,6 @@ distribution_spec:
|
|||
- remote::model-context-protocol
|
||||
- remote::wolfram-alpha
|
||||
image_type: conda
|
||||
additional_pip_packages:
|
||||
- sqlalchemy[asyncio]
|
||||
- sqlalchemy[asyncio]
|
||||
|
|
|
@ -115,6 +115,9 @@ providers:
|
|||
metadata_store:
|
||||
type: sqlite
|
||||
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/remote-vllm}/registry.db
|
||||
inference_store:
|
||||
type: sqlite
|
||||
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/remote-vllm}/inference_store.db
|
||||
models:
|
||||
- metadata: {}
|
||||
model_id: ${env.INFERENCE_MODEL}
|
||||
|
|
|
@ -108,6 +108,9 @@ providers:
|
|||
metadata_store:
|
||||
type: sqlite
|
||||
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/remote-vllm}/registry.db
|
||||
inference_store:
|
||||
type: sqlite
|
||||
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/remote-vllm}/inference_store.db
|
||||
models:
|
||||
- metadata: {}
|
||||
model_id: ${env.INFERENCE_MODEL}
|
||||
|
|
|
@ -22,3 +22,5 @@ distribution_spec:
|
|||
- remote::model-context-protocol
|
||||
- remote::wolfram-alpha
|
||||
image_type: conda
|
||||
additional_pip_packages:
|
||||
- sqlalchemy[asyncio]
|
||||
|
|
|
@ -82,6 +82,9 @@ providers:
|
|||
metadata_store:
|
||||
type: sqlite
|
||||
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/sambanova}/registry.db
|
||||
inference_store:
|
||||
type: sqlite
|
||||
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/sambanova}/inference_store.db
|
||||
models:
|
||||
- metadata: {}
|
||||
model_id: sambanova/Meta-Llama-3.1-8B-Instruct
|
||||
|
|
|
@ -35,3 +35,5 @@ distribution_spec:
|
|||
- inline::rag-runtime
|
||||
- remote::model-context-protocol
|
||||
image_type: conda
|
||||
additional_pip_packages:
|
||||
- sqlalchemy[asyncio]
|
||||
|
|
|
@ -133,6 +133,9 @@ providers:
|
|||
metadata_store:
|
||||
type: sqlite
|
||||
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/starter}/registry.db
|
||||
inference_store:
|
||||
type: sqlite
|
||||
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/starter}/inference_store.db
|
||||
models:
|
||||
- metadata: {}
|
||||
model_id: openai/gpt-4o
|
||||
|
|
|
@ -29,6 +29,7 @@ from llama_stack.distribution.distribution import get_provider_registry
|
|||
from llama_stack.distribution.utils.dynamic import instantiate_class_type
|
||||
from llama_stack.providers.utils.inference.model_registry import ProviderModelEntry
|
||||
from llama_stack.providers.utils.kvstore.config import SqliteKVStoreConfig
|
||||
from llama_stack.providers.utils.sqlstore.sqlstore import SqliteSqlStoreConfig
|
||||
|
||||
|
||||
def get_model_registry(
|
||||
|
@ -117,6 +118,10 @@ class RunConfigSettings(BaseModel):
|
|||
__distro_dir__=f"~/.llama/distributions/{name}",
|
||||
db_name="registry.db",
|
||||
),
|
||||
inference_store=SqliteSqlStoreConfig.sample_run_config(
|
||||
__distro_dir__=f"~/.llama/distributions/{name}",
|
||||
db_name="inference_store.db",
|
||||
),
|
||||
models=self.default_models or [],
|
||||
shields=self.default_shields or [],
|
||||
tool_groups=self.default_tool_groups or [],
|
||||
|
@ -146,14 +151,20 @@ class DistributionTemplate(BaseModel):
|
|||
available_models_by_provider: dict[str, list[ProviderModelEntry]] | None = None
|
||||
|
||||
def build_config(self) -> BuildConfig:
|
||||
additional_pip_packages: list[str] = []
|
||||
for run_config in self.run_configs.values():
|
||||
run_config_ = run_config.run_config(self.name, self.providers, self.container_image)
|
||||
if run_config_.inference_store:
|
||||
additional_pip_packages.extend(run_config_.inference_store.pip_packages)
|
||||
|
||||
return BuildConfig(
|
||||
name=self.name,
|
||||
distribution_spec=DistributionSpec(
|
||||
description=self.description,
|
||||
container_image=self.container_image,
|
||||
providers=self.providers,
|
||||
),
|
||||
image_type="conda", # default to conda, can be overridden
|
||||
additional_pip_packages=additional_pip_packages,
|
||||
)
|
||||
|
||||
def generate_markdown_docs(self) -> str:
|
||||
|
|
|
@ -30,3 +30,6 @@ distribution_spec:
|
|||
- inline::rag-runtime
|
||||
- remote::model-context-protocol
|
||||
image_type: conda
|
||||
additional_pip_packages:
|
||||
- sqlalchemy[asyncio]
|
||||
- sqlalchemy[asyncio]
|
||||
|
|
|
@ -102,6 +102,9 @@ providers:
|
|||
metadata_store:
|
||||
type: sqlite
|
||||
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/tgi}/registry.db
|
||||
inference_store:
|
||||
type: sqlite
|
||||
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/tgi}/inference_store.db
|
||||
models:
|
||||
- metadata: {}
|
||||
model_id: ${env.INFERENCE_MODEL}
|
||||
|
|
|
@ -101,6 +101,9 @@ providers:
|
|||
metadata_store:
|
||||
type: sqlite
|
||||
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/tgi}/registry.db
|
||||
inference_store:
|
||||
type: sqlite
|
||||
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/tgi}/inference_store.db
|
||||
models:
|
||||
- metadata: {}
|
||||
model_id: ${env.INFERENCE_MODEL}
|
||||
|
|
|
@ -31,3 +31,6 @@ distribution_spec:
|
|||
- remote::model-context-protocol
|
||||
- remote::wolfram-alpha
|
||||
image_type: conda
|
||||
additional_pip_packages:
|
||||
- sqlalchemy[asyncio]
|
||||
- sqlalchemy[asyncio]
|
||||
|
|
|
@ -111,6 +111,9 @@ providers:
|
|||
metadata_store:
|
||||
type: sqlite
|
||||
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/together}/registry.db
|
||||
inference_store:
|
||||
type: sqlite
|
||||
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/together}/inference_store.db
|
||||
models:
|
||||
- metadata: {}
|
||||
model_id: meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo
|
||||
|
|
|
@ -106,6 +106,9 @@ providers:
|
|||
metadata_store:
|
||||
type: sqlite
|
||||
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/together}/registry.db
|
||||
inference_store:
|
||||
type: sqlite
|
||||
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/together}/inference_store.db
|
||||
models:
|
||||
- metadata: {}
|
||||
model_id: meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo
|
||||
|
|
|
@ -35,3 +35,5 @@ distribution_spec:
|
|||
- inline::rag-runtime
|
||||
- remote::model-context-protocol
|
||||
image_type: conda
|
||||
additional_pip_packages:
|
||||
- sqlalchemy[asyncio]
|
||||
|
|
|
@ -135,6 +135,9 @@ providers:
|
|||
metadata_store:
|
||||
type: sqlite
|
||||
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/verification}/registry.db
|
||||
inference_store:
|
||||
type: sqlite
|
||||
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/verification}/inference_store.db
|
||||
models:
|
||||
- metadata: {}
|
||||
model_id: openai/gpt-4o
|
||||
|
|
|
@ -30,3 +30,5 @@ distribution_spec:
|
|||
- inline::rag-runtime
|
||||
- remote::model-context-protocol
|
||||
image_type: conda
|
||||
additional_pip_packages:
|
||||
- sqlalchemy[asyncio]
|
||||
|
|
|
@ -106,6 +106,9 @@ providers:
|
|||
metadata_store:
|
||||
type: sqlite
|
||||
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/vllm-gpu}/registry.db
|
||||
inference_store:
|
||||
type: sqlite
|
||||
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/vllm-gpu}/inference_store.db
|
||||
models:
|
||||
- metadata: {}
|
||||
model_id: ${env.INFERENCE_MODEL}
|
||||
|
|
|
@ -28,3 +28,5 @@ distribution_spec:
|
|||
- inline::rag-runtime
|
||||
- remote::model-context-protocol
|
||||
image_type: conda
|
||||
additional_pip_packages:
|
||||
- sqlalchemy[asyncio]
|
||||
|
|
|
@ -103,6 +103,9 @@ providers:
|
|||
metadata_store:
|
||||
type: sqlite
|
||||
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/watsonx}/registry.db
|
||||
inference_store:
|
||||
type: sqlite
|
||||
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/watsonx}/inference_store.db
|
||||
models:
|
||||
- metadata: {}
|
||||
model_id: meta-llama/llama-3-3-70b-instruct
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue