feat: implement get chat completions APIs (#2200)

# What does this PR do?
* Provide sqlite implementation of the APIs introduced in
https://github.com/meta-llama/llama-stack/pull/2145.
* Introduced a SqlStore API: llama_stack/providers/utils/sqlstore/api.py
and the first Sqlite implementation
* Pagination support will be added in a future PR.

## Test Plan
Unit test on sql store:
<img width="1005" alt="image"
src="https://github.com/user-attachments/assets/9b8b7ec8-632b-4667-8127-5583426b2e29"
/>


Integration test:
```
INFERENCE_MODEL="llama3.2:3b-instruct-fp16" llama stack build --template ollama --image-type conda --run
```
```
LLAMA_STACK_CONFIG=http://localhost:5001 INFERENCE_MODEL="llama3.2:3b-instruct-fp16" python -m pytest -v tests/integration/inference/test_openai_completion.py --text-model "llama3.2:3b-instruct-fp16" -k 'inference_store and openai'
```
This commit is contained in:
ehhuang 2025-05-21 22:21:52 -07:00 committed by GitHub
parent 633bb9c5b3
commit 549812f51e
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
71 changed files with 1111 additions and 10 deletions

View file

@ -140,7 +140,7 @@ async def resolve_impls(
sorted_providers = sort_providers_by_deps(providers_with_specs, run_config)
return await instantiate_providers(sorted_providers, router_apis, dist_registry)
return await instantiate_providers(sorted_providers, router_apis, dist_registry, run_config)
def specs_for_autorouted_apis(apis_to_serve: list[str] | set[str]) -> dict[str, dict[str, ProviderWithSpec]]:
@ -243,7 +243,10 @@ def sort_providers_by_deps(
async def instantiate_providers(
sorted_providers: list[tuple[str, ProviderWithSpec]], router_apis: set[Api], dist_registry: DistributionRegistry
sorted_providers: list[tuple[str, ProviderWithSpec]],
router_apis: set[Api],
dist_registry: DistributionRegistry,
run_config: StackRunConfig,
) -> dict:
"""Instantiates providers asynchronously while managing dependencies."""
impls: dict[Api, Any] = {}
@ -258,7 +261,7 @@ async def instantiate_providers(
if isinstance(provider.spec, RoutingTableProviderSpec):
inner_impls = inner_impls_by_provider_id[f"inner-{provider.spec.router_api.value}"]
impl = await instantiate_provider(provider, deps, inner_impls, dist_registry)
impl = await instantiate_provider(provider, deps, inner_impls, dist_registry, run_config)
if api_str.startswith("inner-"):
inner_impls_by_provider_id[api_str][provider.provider_id] = impl
@ -308,6 +311,7 @@ async def instantiate_provider(
deps: dict[Api, Any],
inner_impls: dict[str, Any],
dist_registry: DistributionRegistry,
run_config: StackRunConfig,
):
provider_spec = provider.spec
if not hasattr(provider_spec, "module"):
@ -327,7 +331,7 @@ async def instantiate_provider(
method = "get_auto_router_impl"
config = None
args = [provider_spec.api, deps[provider_spec.routing_table_api], deps]
args = [provider_spec.api, deps[provider_spec.routing_table_api], deps, run_config]
elif isinstance(provider_spec, RoutingTableProviderSpec):
method = "get_routing_table_impl"