feat: implement get chat completions APIs (#2200)

# What does this PR do?
* Provide sqlite implementation of the APIs introduced in
https://github.com/meta-llama/llama-stack/pull/2145.
* Introduced a SqlStore API: llama_stack/providers/utils/sqlstore/api.py
and the first Sqlite implementation
* Pagination support will be added in a future PR.

## Test Plan
Unit test on sql store:
<img width="1005" alt="image"
src="https://github.com/user-attachments/assets/9b8b7ec8-632b-4667-8127-5583426b2e29"
/>


Integration test:
```
INFERENCE_MODEL="llama3.2:3b-instruct-fp16" llama stack build --template ollama --image-type conda --run
```
```
LLAMA_STACK_CONFIG=http://localhost:5001 INFERENCE_MODEL="llama3.2:3b-instruct-fp16" python -m pytest -v tests/integration/inference/test_openai_completion.py --text-model "llama3.2:3b-instruct-fp16" -k 'inference_store and openai'
```
This commit is contained in:
ehhuang 2025-05-21 22:21:52 -07:00 committed by GitHub
parent 633bb9c5b3
commit 549812f51e
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
71 changed files with 1111 additions and 10 deletions

View file

@ -31,6 +31,7 @@
"scikit-learn",
"scipy",
"sentencepiece",
"sqlalchemy[asyncio]",
"tqdm",
"transformers",
"tree_sitter",
@ -67,6 +68,7 @@
"scikit-learn",
"scipy",
"sentencepiece",
"sqlalchemy[asyncio]",
"tqdm",
"transformers",
"tree_sitter",
@ -105,6 +107,7 @@
"scikit-learn",
"scipy",
"sentencepiece",
"sqlalchemy[asyncio]",
"sqlite-vec",
"tqdm",
"transformers",
@ -145,6 +148,7 @@
"scikit-learn",
"scipy",
"sentencepiece",
"sqlalchemy[asyncio]",
"tqdm",
"transformers",
"tree_sitter",
@ -184,6 +188,7 @@
"scikit-learn",
"scipy",
"sentencepiece",
"sqlalchemy[asyncio]",
"tqdm",
"transformers",
"tree_sitter",
@ -221,6 +226,7 @@
"scikit-learn",
"scipy",
"sentencepiece",
"sqlalchemy[asyncio]",
"tqdm",
"transformers",
"tree_sitter",
@ -259,6 +265,7 @@
"scikit-learn",
"scipy",
"sentencepiece",
"sqlalchemy[asyncio]",
"tqdm",
"transformers",
"tree_sitter",
@ -297,6 +304,7 @@
"scikit-learn",
"scipy",
"sentencepiece",
"sqlalchemy[asyncio]",
"tqdm",
"transformers",
"tree_sitter",
@ -335,6 +343,7 @@
"scikit-learn",
"scipy",
"sentencepiece",
"sqlalchemy[asyncio]",
"sqlite-vec",
"tqdm",
"transformers",
@ -379,6 +388,7 @@
"scipy",
"sentence-transformers",
"sentencepiece",
"sqlalchemy[asyncio]",
"torch",
"torchao==0.8.0",
"torchvision",
@ -414,6 +424,7 @@
"scikit-learn",
"scipy",
"sentencepiece",
"sqlalchemy[asyncio]",
"tqdm",
"transformers",
"uvicorn"
@ -452,6 +463,7 @@
"scikit-learn",
"scipy",
"sentencepiece",
"sqlalchemy[asyncio]",
"torch",
"tqdm",
"transformers",
@ -490,6 +502,7 @@
"scikit-learn",
"scipy",
"sentencepiece",
"sqlalchemy[asyncio]",
"sqlite-vec",
"together",
"tqdm",
@ -528,6 +541,7 @@
"scikit-learn",
"scipy",
"sentencepiece",
"sqlalchemy[asyncio]",
"tqdm",
"transformers",
"tree_sitter",
@ -566,6 +580,7 @@
"scikit-learn",
"scipy",
"sentencepiece",
"sqlalchemy[asyncio]",
"tqdm",
"transformers",
"tree_sitter",
@ -599,6 +614,7 @@
"scikit-learn",
"scipy",
"sentencepiece",
"sqlalchemy[asyncio]",
"tqdm",
"transformers",
"uvicorn",
@ -637,6 +653,7 @@
"scikit-learn",
"scipy",
"sentencepiece",
"sqlalchemy[asyncio]",
"sqlite-vec",
"tqdm",
"transformers",
@ -678,6 +695,7 @@
"scikit-learn",
"scipy",
"sentencepiece",
"sqlalchemy[asyncio]",
"tqdm",
"transformers",
"tree_sitter",
@ -716,6 +734,7 @@
"scikit-learn",
"scipy",
"sentencepiece",
"sqlalchemy[asyncio]",
"together",
"tqdm",
"transformers",
@ -755,6 +774,7 @@
"scikit-learn",
"scipy",
"sentencepiece",
"sqlalchemy[asyncio]",
"sqlite-vec",
"tqdm",
"transformers",
@ -794,6 +814,7 @@
"scikit-learn",
"scipy",
"sentencepiece",
"sqlalchemy[asyncio]",
"tqdm",
"transformers",
"tree_sitter",
@ -833,6 +854,7 @@
"scikit-learn",
"scipy",
"sentencepiece",
"sqlalchemy[asyncio]",
"tqdm",
"transformers",
"tree_sitter",