feat: implement get chat completions APIs (#2200)

# What does this PR do?
* Provide sqlite implementation of the APIs introduced in
https://github.com/meta-llama/llama-stack/pull/2145.
* Introduced a SqlStore API: llama_stack/providers/utils/sqlstore/api.py
and the first Sqlite implementation
* Pagination support will be added in a future PR.

## Test Plan
Unit test on sql store:
<img width="1005" alt="image"
src="https://github.com/user-attachments/assets/9b8b7ec8-632b-4667-8127-5583426b2e29"
/>


Integration test:
```
INFERENCE_MODEL="llama3.2:3b-instruct-fp16" llama stack build --template ollama --image-type conda --run
```
```
LLAMA_STACK_CONFIG=http://localhost:5001 INFERENCE_MODEL="llama3.2:3b-instruct-fp16" python -m pytest -v tests/integration/inference/test_openai_completion.py --text-model "llama3.2:3b-instruct-fp16" -k 'inference_store and openai'
```
This commit is contained in:
ehhuang 2025-05-21 22:21:52 -07:00 committed by GitHub
parent 633bb9c5b3
commit 549812f51e
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
71 changed files with 1111 additions and 10 deletions

View file

@ -29,3 +29,5 @@ distribution_spec:
- inline::rag-runtime
- remote::model-context-protocol
image_type: conda
additional_pip_packages:
- sqlalchemy[asyncio]

View file

@ -96,6 +96,9 @@ providers:
metadata_store:
type: sqlite
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/bedrock}/registry.db
inference_store:
type: sqlite
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/bedrock}/inference_store.db
models:
- metadata: {}
model_id: meta.llama3-1-8b-instruct-v1:0

View file

@ -29,3 +29,5 @@ distribution_spec:
- remote::tavily-search
- inline::rag-runtime
image_type: conda
additional_pip_packages:
- sqlalchemy[asyncio]

View file

@ -99,6 +99,9 @@ providers:
metadata_store:
type: sqlite
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/cerebras}/registry.db
inference_store:
type: sqlite
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/cerebras}/inference_store.db
models:
- metadata: {}
model_id: llama3.1-8b

View file

@ -30,3 +30,5 @@ distribution_spec:
- inline::rag-runtime
- remote::model-context-protocol
image_type: conda
additional_pip_packages:
- sqlalchemy[asyncio]

View file

@ -99,6 +99,9 @@ providers:
metadata_store:
type: sqlite
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ci-tests}/registry.db
inference_store:
type: sqlite
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ci-tests}/inference_store.db
models:
- metadata: {}
model_id: accounts/fireworks/models/llama-v3p1-8b-instruct

View file

@ -30,3 +30,6 @@ distribution_spec:
- remote::tavily-search
- inline::rag-runtime
image_type: conda
additional_pip_packages:
- sqlalchemy[asyncio]
- sqlalchemy[asyncio]

View file

@ -99,6 +99,9 @@ providers:
metadata_store:
type: sqlite
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/dell}/registry.db
inference_store:
type: sqlite
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/dell}/inference_store.db
models:
- metadata: {}
model_id: ${env.INFERENCE_MODEL}

View file

@ -95,6 +95,9 @@ providers:
metadata_store:
type: sqlite
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/dell}/registry.db
inference_store:
type: sqlite
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/dell}/inference_store.db
models:
- metadata: {}
model_id: ${env.INFERENCE_MODEL}

View file

@ -31,6 +31,7 @@
"scikit-learn",
"scipy",
"sentencepiece",
"sqlalchemy[asyncio]",
"tqdm",
"transformers",
"tree_sitter",
@ -67,6 +68,7 @@
"scikit-learn",
"scipy",
"sentencepiece",
"sqlalchemy[asyncio]",
"tqdm",
"transformers",
"tree_sitter",
@ -105,6 +107,7 @@
"scikit-learn",
"scipy",
"sentencepiece",
"sqlalchemy[asyncio]",
"sqlite-vec",
"tqdm",
"transformers",
@ -145,6 +148,7 @@
"scikit-learn",
"scipy",
"sentencepiece",
"sqlalchemy[asyncio]",
"tqdm",
"transformers",
"tree_sitter",
@ -184,6 +188,7 @@
"scikit-learn",
"scipy",
"sentencepiece",
"sqlalchemy[asyncio]",
"tqdm",
"transformers",
"tree_sitter",
@ -221,6 +226,7 @@
"scikit-learn",
"scipy",
"sentencepiece",
"sqlalchemy[asyncio]",
"tqdm",
"transformers",
"tree_sitter",
@ -259,6 +265,7 @@
"scikit-learn",
"scipy",
"sentencepiece",
"sqlalchemy[asyncio]",
"tqdm",
"transformers",
"tree_sitter",
@ -297,6 +304,7 @@
"scikit-learn",
"scipy",
"sentencepiece",
"sqlalchemy[asyncio]",
"tqdm",
"transformers",
"tree_sitter",
@ -335,6 +343,7 @@
"scikit-learn",
"scipy",
"sentencepiece",
"sqlalchemy[asyncio]",
"sqlite-vec",
"tqdm",
"transformers",
@ -379,6 +388,7 @@
"scipy",
"sentence-transformers",
"sentencepiece",
"sqlalchemy[asyncio]",
"torch",
"torchao==0.8.0",
"torchvision",
@ -414,6 +424,7 @@
"scikit-learn",
"scipy",
"sentencepiece",
"sqlalchemy[asyncio]",
"tqdm",
"transformers",
"uvicorn"
@ -452,6 +463,7 @@
"scikit-learn",
"scipy",
"sentencepiece",
"sqlalchemy[asyncio]",
"torch",
"tqdm",
"transformers",
@ -490,6 +502,7 @@
"scikit-learn",
"scipy",
"sentencepiece",
"sqlalchemy[asyncio]",
"sqlite-vec",
"together",
"tqdm",
@ -528,6 +541,7 @@
"scikit-learn",
"scipy",
"sentencepiece",
"sqlalchemy[asyncio]",
"tqdm",
"transformers",
"tree_sitter",
@ -566,6 +580,7 @@
"scikit-learn",
"scipy",
"sentencepiece",
"sqlalchemy[asyncio]",
"tqdm",
"transformers",
"tree_sitter",
@ -599,6 +614,7 @@
"scikit-learn",
"scipy",
"sentencepiece",
"sqlalchemy[asyncio]",
"tqdm",
"transformers",
"uvicorn",
@ -637,6 +653,7 @@
"scikit-learn",
"scipy",
"sentencepiece",
"sqlalchemy[asyncio]",
"sqlite-vec",
"tqdm",
"transformers",
@ -678,6 +695,7 @@
"scikit-learn",
"scipy",
"sentencepiece",
"sqlalchemy[asyncio]",
"tqdm",
"transformers",
"tree_sitter",
@ -716,6 +734,7 @@
"scikit-learn",
"scipy",
"sentencepiece",
"sqlalchemy[asyncio]",
"together",
"tqdm",
"transformers",
@ -755,6 +774,7 @@
"scikit-learn",
"scipy",
"sentencepiece",
"sqlalchemy[asyncio]",
"sqlite-vec",
"tqdm",
"transformers",
@ -794,6 +814,7 @@
"scikit-learn",
"scipy",
"sentencepiece",
"sqlalchemy[asyncio]",
"tqdm",
"transformers",
"tree_sitter",
@ -833,6 +854,7 @@
"scikit-learn",
"scipy",
"sentencepiece",
"sqlalchemy[asyncio]",
"tqdm",
"transformers",
"tree_sitter",

View file

@ -31,3 +31,6 @@ distribution_spec:
- inline::rag-runtime
- remote::model-context-protocol
image_type: conda
additional_pip_packages:
- sqlalchemy[asyncio]
- sqlalchemy[asyncio]

View file

@ -111,6 +111,9 @@ providers:
metadata_store:
type: sqlite
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/fireworks}/registry.db
inference_store:
type: sqlite
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/fireworks}/inference_store.db
models:
- metadata: {}
model_id: accounts/fireworks/models/llama-v3p1-8b-instruct

View file

@ -106,6 +106,9 @@ providers:
metadata_store:
type: sqlite
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/fireworks}/registry.db
inference_store:
type: sqlite
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/fireworks}/inference_store.db
models:
- metadata: {}
model_id: accounts/fireworks/models/llama-v3p1-8b-instruct

View file

@ -26,3 +26,5 @@ distribution_spec:
- remote::tavily-search
- inline::rag-runtime
image_type: conda
additional_pip_packages:
- sqlalchemy[asyncio]

View file

@ -99,6 +99,9 @@ providers:
metadata_store:
type: sqlite
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/groq}/registry.db
inference_store:
type: sqlite
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/groq}/inference_store.db
models:
- metadata: {}
model_id: groq/llama3-8b-8192

View file

@ -29,3 +29,6 @@ distribution_spec:
- inline::rag-runtime
- remote::model-context-protocol
image_type: conda
additional_pip_packages:
- sqlalchemy[asyncio]
- sqlalchemy[asyncio]

View file

@ -107,6 +107,9 @@ providers:
metadata_store:
type: sqlite
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/hf-endpoint}/registry.db
inference_store:
type: sqlite
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/hf-endpoint}/inference_store.db
models:
- metadata: {}
model_id: ${env.INFERENCE_MODEL}

View file

@ -102,6 +102,9 @@ providers:
metadata_store:
type: sqlite
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/hf-endpoint}/registry.db
inference_store:
type: sqlite
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/hf-endpoint}/inference_store.db
models:
- metadata: {}
model_id: ${env.INFERENCE_MODEL}

View file

@ -30,3 +30,6 @@ distribution_spec:
- inline::rag-runtime
- remote::model-context-protocol
image_type: conda
additional_pip_packages:
- sqlalchemy[asyncio]
- sqlalchemy[asyncio]

View file

@ -107,6 +107,9 @@ providers:
metadata_store:
type: sqlite
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/hf-serverless}/registry.db
inference_store:
type: sqlite
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/hf-serverless}/inference_store.db
models:
- metadata: {}
model_id: ${env.INFERENCE_MODEL}

View file

@ -102,6 +102,9 @@ providers:
metadata_store:
type: sqlite
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/hf-serverless}/registry.db
inference_store:
type: sqlite
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/hf-serverless}/inference_store.db
models:
- metadata: {}
model_id: ${env.INFERENCE_MODEL}

View file

@ -30,3 +30,5 @@ distribution_spec:
- inline::rag-runtime
- remote::model-context-protocol
image_type: conda
additional_pip_packages:
- sqlalchemy[asyncio]

View file

@ -111,6 +111,9 @@ providers:
metadata_store:
type: sqlite
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/llama_api}/registry.db
inference_store:
type: sqlite
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/llama_api}/inference_store.db
models:
- metadata: {}
model_id: Llama-3.3-70B-Instruct

View file

@ -29,3 +29,6 @@ distribution_spec:
- inline::rag-runtime
- remote::model-context-protocol
image_type: conda
additional_pip_packages:
- sqlalchemy[asyncio]
- sqlalchemy[asyncio]

View file

@ -117,6 +117,9 @@ providers:
metadata_store:
type: sqlite
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/meta-reference-gpu}/registry.db
inference_store:
type: sqlite
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/meta-reference-gpu}/inference_store.db
models:
- metadata: {}
model_id: ${env.INFERENCE_MODEL}

View file

@ -107,6 +107,9 @@ providers:
metadata_store:
type: sqlite
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/meta-reference-gpu}/registry.db
inference_store:
type: sqlite
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/meta-reference-gpu}/inference_store.db
models:
- metadata: {}
model_id: ${env.INFERENCE_MODEL}

View file

@ -24,3 +24,6 @@ distribution_spec:
tool_runtime:
- inline::rag-runtime
image_type: conda
additional_pip_packages:
- sqlalchemy[asyncio]
- sqlalchemy[asyncio]

View file

@ -92,6 +92,9 @@ providers:
metadata_store:
type: sqlite
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/nvidia}/registry.db
inference_store:
type: sqlite
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/nvidia}/inference_store.db
models:
- metadata: {}
model_id: ${env.INFERENCE_MODEL}

View file

@ -80,6 +80,9 @@ providers:
metadata_store:
type: sqlite
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/nvidia}/registry.db
inference_store:
type: sqlite
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/nvidia}/inference_store.db
models:
- metadata: {}
model_id: meta/llama3-8b-instruct

View file

@ -32,3 +32,6 @@ distribution_spec:
- remote::model-context-protocol
- remote::wolfram-alpha
image_type: conda
additional_pip_packages:
- sqlalchemy[asyncio]
- sqlalchemy[asyncio]

View file

@ -112,6 +112,9 @@ providers:
metadata_store:
type: sqlite
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ollama}/registry.db
inference_store:
type: sqlite
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ollama}/inference_store.db
models:
- metadata: {}
model_id: ${env.INFERENCE_MODEL}

View file

@ -110,6 +110,9 @@ providers:
metadata_store:
type: sqlite
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ollama}/registry.db
inference_store:
type: sqlite
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ollama}/inference_store.db
models:
- metadata: {}
model_id: ${env.INFERENCE_MODEL}

View file

@ -33,3 +33,5 @@ distribution_spec:
- inline::rag-runtime
- remote::model-context-protocol
image_type: conda
additional_pip_packages:
- sqlalchemy[asyncio]

View file

@ -125,6 +125,9 @@ providers:
metadata_store:
type: sqlite
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/open-benchmark}/registry.db
inference_store:
type: sqlite
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/open-benchmark}/inference_store.db
models:
- metadata: {}
model_id: openai/gpt-4o

View file

@ -31,3 +31,6 @@ distribution_spec:
- inline::rag-runtime
- remote::model-context-protocol
image_type: conda
additional_pip_packages:
- sqlalchemy[asyncio]
- sqlalchemy[asyncio]

View file

@ -111,6 +111,9 @@ providers:
metadata_store:
type: sqlite
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/passthrough}/registry.db
inference_store:
type: sqlite
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/passthrough}/inference_store.db
models:
- metadata: {}
model_id: meta-llama/Llama-3.1-8B-Instruct

View file

@ -106,6 +106,9 @@ providers:
metadata_store:
type: sqlite
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/passthrough}/registry.db
inference_store:
type: sqlite
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/passthrough}/inference_store.db
models:
- metadata: {}
model_id: meta-llama/Llama-3.1-8B-Instruct

View file

@ -31,3 +31,6 @@ distribution_spec:
- remote::model-context-protocol
- remote::wolfram-alpha
image_type: conda
additional_pip_packages:
- sqlalchemy[asyncio]
- sqlalchemy[asyncio]

View file

@ -115,6 +115,9 @@ providers:
metadata_store:
type: sqlite
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/remote-vllm}/registry.db
inference_store:
type: sqlite
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/remote-vllm}/inference_store.db
models:
- metadata: {}
model_id: ${env.INFERENCE_MODEL}

View file

@ -108,6 +108,9 @@ providers:
metadata_store:
type: sqlite
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/remote-vllm}/registry.db
inference_store:
type: sqlite
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/remote-vllm}/inference_store.db
models:
- metadata: {}
model_id: ${env.INFERENCE_MODEL}

View file

@ -22,3 +22,5 @@ distribution_spec:
- remote::model-context-protocol
- remote::wolfram-alpha
image_type: conda
additional_pip_packages:
- sqlalchemy[asyncio]

View file

@ -82,6 +82,9 @@ providers:
metadata_store:
type: sqlite
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/sambanova}/registry.db
inference_store:
type: sqlite
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/sambanova}/inference_store.db
models:
- metadata: {}
model_id: sambanova/Meta-Llama-3.1-8B-Instruct

View file

@ -35,3 +35,5 @@ distribution_spec:
- inline::rag-runtime
- remote::model-context-protocol
image_type: conda
additional_pip_packages:
- sqlalchemy[asyncio]

View file

@ -133,6 +133,9 @@ providers:
metadata_store:
type: sqlite
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/starter}/registry.db
inference_store:
type: sqlite
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/starter}/inference_store.db
models:
- metadata: {}
model_id: openai/gpt-4o

View file

@ -29,6 +29,7 @@ from llama_stack.distribution.distribution import get_provider_registry
from llama_stack.distribution.utils.dynamic import instantiate_class_type
from llama_stack.providers.utils.inference.model_registry import ProviderModelEntry
from llama_stack.providers.utils.kvstore.config import SqliteKVStoreConfig
from llama_stack.providers.utils.sqlstore.sqlstore import SqliteSqlStoreConfig
def get_model_registry(
@ -117,6 +118,10 @@ class RunConfigSettings(BaseModel):
__distro_dir__=f"~/.llama/distributions/{name}",
db_name="registry.db",
),
inference_store=SqliteSqlStoreConfig.sample_run_config(
__distro_dir__=f"~/.llama/distributions/{name}",
db_name="inference_store.db",
),
models=self.default_models or [],
shields=self.default_shields or [],
tool_groups=self.default_tool_groups or [],
@ -146,14 +151,20 @@ class DistributionTemplate(BaseModel):
available_models_by_provider: dict[str, list[ProviderModelEntry]] | None = None
def build_config(self) -> BuildConfig:
additional_pip_packages: list[str] = []
for run_config in self.run_configs.values():
run_config_ = run_config.run_config(self.name, self.providers, self.container_image)
if run_config_.inference_store:
additional_pip_packages.extend(run_config_.inference_store.pip_packages)
return BuildConfig(
name=self.name,
distribution_spec=DistributionSpec(
description=self.description,
container_image=self.container_image,
providers=self.providers,
),
image_type="conda", # default to conda, can be overridden
additional_pip_packages=additional_pip_packages,
)
def generate_markdown_docs(self) -> str:

View file

@ -30,3 +30,6 @@ distribution_spec:
- inline::rag-runtime
- remote::model-context-protocol
image_type: conda
additional_pip_packages:
- sqlalchemy[asyncio]
- sqlalchemy[asyncio]

View file

@ -102,6 +102,9 @@ providers:
metadata_store:
type: sqlite
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/tgi}/registry.db
inference_store:
type: sqlite
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/tgi}/inference_store.db
models:
- metadata: {}
model_id: ${env.INFERENCE_MODEL}

View file

@ -101,6 +101,9 @@ providers:
metadata_store:
type: sqlite
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/tgi}/registry.db
inference_store:
type: sqlite
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/tgi}/inference_store.db
models:
- metadata: {}
model_id: ${env.INFERENCE_MODEL}

View file

@ -31,3 +31,6 @@ distribution_spec:
- remote::model-context-protocol
- remote::wolfram-alpha
image_type: conda
additional_pip_packages:
- sqlalchemy[asyncio]
- sqlalchemy[asyncio]

View file

@ -111,6 +111,9 @@ providers:
metadata_store:
type: sqlite
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/together}/registry.db
inference_store:
type: sqlite
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/together}/inference_store.db
models:
- metadata: {}
model_id: meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo

View file

@ -106,6 +106,9 @@ providers:
metadata_store:
type: sqlite
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/together}/registry.db
inference_store:
type: sqlite
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/together}/inference_store.db
models:
- metadata: {}
model_id: meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo

View file

@ -35,3 +35,5 @@ distribution_spec:
- inline::rag-runtime
- remote::model-context-protocol
image_type: conda
additional_pip_packages:
- sqlalchemy[asyncio]

View file

@ -135,6 +135,9 @@ providers:
metadata_store:
type: sqlite
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/verification}/registry.db
inference_store:
type: sqlite
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/verification}/inference_store.db
models:
- metadata: {}
model_id: openai/gpt-4o

View file

@ -30,3 +30,5 @@ distribution_spec:
- inline::rag-runtime
- remote::model-context-protocol
image_type: conda
additional_pip_packages:
- sqlalchemy[asyncio]

View file

@ -106,6 +106,9 @@ providers:
metadata_store:
type: sqlite
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/vllm-gpu}/registry.db
inference_store:
type: sqlite
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/vllm-gpu}/inference_store.db
models:
- metadata: {}
model_id: ${env.INFERENCE_MODEL}

View file

@ -28,3 +28,5 @@ distribution_spec:
- inline::rag-runtime
- remote::model-context-protocol
image_type: conda
additional_pip_packages:
- sqlalchemy[asyncio]

View file

@ -103,6 +103,9 @@ providers:
metadata_store:
type: sqlite
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/watsonx}/registry.db
inference_store:
type: sqlite
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/watsonx}/inference_store.db
models:
- metadata: {}
model_id: meta-llama/llama-3-3-70b-instruct