feat: implement get chat completions APIs (#2200)

# What does this PR do? * Provide sqlite implementation of the APIs introduced in https://github.com/meta-llama/llama-stack/pull/2145. * Introduced a SqlStore API: llama_stack/providers/utils/sqlstore/api.py and the first Sqlite implementation * Pagination support will be added in a future PR. ## Test Plan Unit test on sql store: <img width="1005" alt="image" src="https://github.com/user-attachments/assets/9b8b7ec8-632b-4667-8127-5583426b2e29" /> Integration test: ``` INFERENCE_MODEL="llama3.2:3b-instruct-fp16" llama stack build --template ollama --image-type conda --run ``` ``` LLAMA_STACK_CONFIG=http://localhost:5001 INFERENCE_MODEL="llama3.2:3b-instruct-fp16" python -m pytest -v tests/integration/inference/test_openai_completion.py --text-model "llama3.2:3b-instruct-fp16" -k 'inference_store and openai' ```
2025-05-21 22:21:52 -07:00 · 2025-05-21 22:21:52 -07:00 · 549812f51e
commit 549812f51e
parent 633bb9c5b3
71 changed files with 1111 additions and 10 deletions
--- a/tests/integration/inference/test_openai_completion.py
+++ b/tests/integration/inference/test_openai_completion.py
@ -222,3 +222,105 @@ def test_openai_chat_completion_streaming(compat_client, client_with_models, tex
            streamed_content.append(chunk.choices[0].delta.content.lower().strip())
    assert len(streamed_content) > 0
    assert expected.lower() in "".join(streamed_content)
+
+
+@pytest.mark.parametrize(
+    "stream",
+    [
+        True,
+        False,
+    ],
+)
+def test_inference_store(openai_client, client_with_models, text_model_id, stream):
+    skip_if_model_doesnt_support_openai_chat_completion(client_with_models, text_model_id)
+    client = openai_client
+    # make a chat completion
+    message = "Hello, world!"
+    response = client.chat.completions.create(
+        model=text_model_id,
+        messages=[
+            {
+                "role": "user",
+                "content": message,
+            }
+        ],
+        stream=stream,
+    )
+    if stream:
+        # accumulate the streamed content
+        content = ""
+        response_id = None
+        for chunk in response:
+            if response_id is None:
+                response_id = chunk.id
+            content += chunk.choices[0].delta.content
+    else:
+        response_id = response.id
+        content = response.choices[0].message.content
+
+    responses = client.chat.completions.list()
+    assert response_id in [r.id for r in responses.data]
+
+    retrieved_response = client.chat.completions.retrieve(response_id)
+    assert retrieved_response.id == response_id
+    assert retrieved_response.input_messages[0]["content"] == message
+    assert retrieved_response.choices[0].message.content == content
+
+
+@pytest.mark.parametrize(
+    "stream",
+    [
+        True,
+        False,
+    ],
+)
+def test_inference_store_tool_calls(openai_client, client_with_models, text_model_id, stream):
+    skip_if_model_doesnt_support_openai_chat_completion(client_with_models, text_model_id)
+    client = openai_client
+    # make a chat completion
+    message = "What's the weather in Tokyo? Use the get_weather function to get the weather."
+    response = client.chat.completions.create(
+        model=text_model_id,
+        messages=[
+            {
+                "role": "user",
+                "content": message,
+            }
+        ],
+        stream=stream,
+        tools=[
+            {
+                "type": "function",
+                "function": {
+                    "name": "get_weather",
+                    "description": "Get the weather in a given city",
+                    "parameters": {
+                        "type": "object",
+                        "properties": {
+                            "city": {"type": "string", "description": "The city to get the weather for"},
+                        },
+                    },
+                },
+            }
+        ],
+    )
+    if stream:
+        # accumulate the streamed content
+        content = ""
+        response_id = None
+        for chunk in response:
+            if response_id is None:
+                response_id = chunk.id
+            content += chunk.choices[0].delta.content
+    else:
+        response_id = response.id
+        content = response.choices[0].message.content
+
+    responses = client.chat.completions.list()
+    assert response_id in [r.id for r in responses.data]
+
+    retrieved_response = client.chat.completions.retrieve(response_id)
+    assert retrieved_response.id == response_id
+    assert retrieved_response.input_messages[0]["content"] == message
+    assert retrieved_response.choices[0].message.tool_calls[0].function.name == "get_weather"
+    assert retrieved_response.choices[0].message.tool_calls[0].function.arguments == '{"city":"Tokyo"}'
--- a/tests/unit/utils/test_sqlstore.py
+++ b/tests/unit/utils/test_sqlstore.py
@ -0,0 +1,62 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+from tempfile import TemporaryDirectory
+
+import pytest
+
+from llama_stack.providers.utils.sqlstore.api import ColumnType
+from llama_stack.providers.utils.sqlstore.sqlite.sqlite import SqliteSqlStoreImpl
+from llama_stack.providers.utils.sqlstore.sqlstore import SqliteSqlStoreConfig
+
+
+@pytest.mark.asyncio
+async def test_sqlite_sqlstore():
+    with TemporaryDirectory() as tmp_dir:
+        db_name = "test.db"
+        sqlstore = SqliteSqlStoreImpl(
+            SqliteSqlStoreConfig(
+                db_path=tmp_dir + "/" + db_name,
+            )
+        )
+        await sqlstore.create_table(
+            table="test",
+            schema={
+                "id": ColumnType.INTEGER,
+                "name": ColumnType.STRING,
+            },
+        )
+        await sqlstore.insert("test", {"id": 1, "name": "test"})
+        await sqlstore.insert("test", {"id": 12, "name": "test12"})
+        rows = await sqlstore.fetch_all("test")
+        assert rows == [{"id": 1, "name": "test"}, {"id": 12, "name": "test12"}]
+
+        row = await sqlstore.fetch_one("test", {"id": 1})
+        assert row == {"id": 1, "name": "test"}
+
+        row = await sqlstore.fetch_one("test", {"name": "test12"})
+        assert row == {"id": 12, "name": "test12"}
+
+        # order by
+        rows = await sqlstore.fetch_all("test", order_by=[("id", "asc")])
+        assert rows == [{"id": 1, "name": "test"}, {"id": 12, "name": "test12"}]
+
+        rows = await sqlstore.fetch_all("test", order_by=[("id", "desc")])
+        assert rows == [{"id": 12, "name": "test12"}, {"id": 1, "name": "test"}]
+
+        # limit
+        rows = await sqlstore.fetch_all("test", limit=1)
+        assert rows == [{"id": 1, "name": "test"}]
+
+        # update
+        await sqlstore.update("test", {"name": "test123"}, {"id": 1})
+        row = await sqlstore.fetch_one("test", {"id": 1})
+        assert row == {"id": 1, "name": "test123"}
+
+        # delete
+        await sqlstore.delete("test", {"id": 1})
+        rows = await sqlstore.fetch_all("test")
+        assert rows == [{"id": 12, "name": "test12"}]