feat: support postgresql inference store (#2310)

# What does this PR do? * Added support postgresql inference store * Added 'oracle' template that demos how to config postgresql stores (except for telemetry, which is not supported currently) ## Test Plan llama stack build --template oracle --image-type conda --run LLAMA_STACK_CONFIG=http://localhost:8321 pytest -s -v tests/integration/ --text-model accounts/fireworks/models/llama-v3p3-70b-instruct -k 'inference_store'
2025-06-28 10:54:19 +00:00 · 2025-05-29 14:33:09 -07:00 · 2025-05-29 14:33:09 -07:00 · fbc8fc6eb5
commit fbc8fc6eb5
parent 6af13bbbf0
32 changed files with 516 additions and 53 deletions
--- a/tests/integration/inference/test_openai_completion.py
+++ b/tests/integration/inference/test_openai_completion.py
@ -268,9 +268,9 @@ def test_openai_chat_completion_streaming_with_n(compat_client, client_with_mode
        False,
    ],
 )
-def test_inference_store(openai_client, client_with_models, text_model_id, stream):
+def test_inference_store(compat_client, client_with_models, text_model_id, stream):
    skip_if_model_doesnt_support_openai_chat_completion(client_with_models, text_model_id)
-    client = openai_client
+    client = compat_client
    # make a chat completion
    message = "Hello, world!"
    response = client.chat.completions.create(
@ -301,9 +301,14 @@ def test_inference_store(openai_client, client_with_models, text_model_id, strea

    retrieved_response = client.chat.completions.retrieve(response_id)
    assert retrieved_response.id == response_id
-    assert retrieved_response.input_messages[0]["content"] == message, retrieved_response
    assert retrieved_response.choices[0].message.content == content, retrieved_response

+    input_content = (
+        getattr(retrieved_response.input_messages[0], "content", None)
+        or retrieved_response.input_messages[0]["content"]
+    )
+    assert input_content == message, retrieved_response
+

@pytest.mark.parametrize(
    "stream",
@ -312,9 +317,9 @@ def test_inference_store(openai_client, client_with_models, text_model_id, strea
        False,
    ],
 )
-def test_inference_store_tool_calls(openai_client, client_with_models, text_model_id, stream):
+def test_inference_store_tool_calls(compat_client, client_with_models, text_model_id, stream):
    skip_if_model_doesnt_support_openai_chat_completion(client_with_models, text_model_id)
-    client = openai_client
+    client = compat_client
    # make a chat completion
    message = "What's the weather in Tokyo? Use the get_weather function to get the weather."
    response = client.chat.completions.create(
@ -361,7 +366,11 @@ def test_inference_store_tool_calls(openai_client, client_with_models, text_mode

    retrieved_response = client.chat.completions.retrieve(response_id)
    assert retrieved_response.id == response_id
-    assert retrieved_response.input_messages[0]["content"] == message
+    input_content = (
+        getattr(retrieved_response.input_messages[0], "content", None)
+        or retrieved_response.input_messages[0]["content"]
+    )
+    assert input_content == message, retrieved_response
    tool_calls = retrieved_response.choices[0].message.tool_calls
    # sometimes model doesn't ouptut tool calls, but we still want to test that the tool was called
    if tool_calls: