feat: implement get chat completions APIs (#2200)

# What does this PR do? * Provide sqlite implementation of the APIs introduced in https://github.com/meta-llama/llama-stack/pull/2145. * Introduced a SqlStore API: llama_stack/providers/utils/sqlstore/api.py and the first Sqlite implementation * Pagination support will be added in a future PR. ## Test Plan Unit test on sql store: <img width="1005" alt="image" src="https://github.com/user-attachments/assets/9b8b7ec8-632b-4667-8127-5583426b2e29" /> Integration test: ``` INFERENCE_MODEL="llama3.2:3b-instruct-fp16" llama stack build --template ollama --image-type conda --run ``` ``` LLAMA_STACK_CONFIG=http://localhost:5001 INFERENCE_MODEL="llama3.2:3b-instruct-fp16" python -m pytest -v tests/integration/inference/test_openai_completion.py --text-model "llama3.2:3b-instruct-fp16" -k 'inference_store and openai' ```
2025-05-21 22:21:52 -07:00 · 2025-05-21 22:21:52 -07:00 · 549812f51e
commit 549812f51e
parent 633bb9c5b3
71 changed files with 1111 additions and 10 deletions
--- a/llama_stack/distribution/routers/init.py
+++ b/llama_stack/distribution/routers/init.py
@ -7,8 +7,10 @@
 from typing import Any

 from llama_stack.distribution.datatypes import RoutedProtocol
+from llama_stack.distribution.stack import StackRunConfig
 from llama_stack.distribution.store import DistributionRegistry
 from llama_stack.providers.datatypes import Api, RoutingTable
+from llama_stack.providers.utils.inference.inference_store import InferenceStore

 from .routing_tables import (
    BenchmarksRoutingTable,
@ -45,7 +47,9 @@ async def get_routing_table_impl(
    return impl


-async def get_auto_router_impl(api: Api, routing_table: RoutingTable, deps: dict[str, Any]) -> Any:
+async def get_auto_router_impl(
+    api: Api, routing_table: RoutingTable, deps: dict[str, Any], run_config: StackRunConfig
+) -> Any:
    from .routers import (
        DatasetIORouter,
        EvalRouter,
@ -76,6 +80,12 @@ async def get_auto_router_impl(api: Api, routing_table: RoutingTable, deps: dict
        if dep_api in deps:
            api_to_dep_impl[dep_name] = deps[dep_api]

+    # TODO: move pass configs to routers instead
+    if api == Api.inference and run_config.inference_store:
+        inference_store = InferenceStore(run_config.inference_store)
+        await inference_store.initialize()
+        api_to_dep_impl["store"] = inference_store
+
    impl = api_to_routers[api.value](routing_table, **api_to_dep_impl)
    await impl.initialize()
    return impl