mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-07-23 21:04:29 +00:00
Merge branch 'embeddings' of https://github.com/hardikjshah/llama-stack into embeddings
This commit is contained in:
commit
535e55d7dd
37 changed files with 605 additions and 66 deletions
|
@ -268,9 +268,9 @@ def test_openai_chat_completion_streaming_with_n(compat_client, client_with_mode
|
|||
False,
|
||||
],
|
||||
)
|
||||
def test_inference_store(openai_client, client_with_models, text_model_id, stream):
|
||||
def test_inference_store(compat_client, client_with_models, text_model_id, stream):
|
||||
skip_if_model_doesnt_support_openai_chat_completion(client_with_models, text_model_id)
|
||||
client = openai_client
|
||||
client = compat_client
|
||||
# make a chat completion
|
||||
message = "Hello, world!"
|
||||
response = client.chat.completions.create(
|
||||
|
@ -301,9 +301,14 @@ def test_inference_store(openai_client, client_with_models, text_model_id, strea
|
|||
|
||||
retrieved_response = client.chat.completions.retrieve(response_id)
|
||||
assert retrieved_response.id == response_id
|
||||
assert retrieved_response.input_messages[0]["content"] == message, retrieved_response
|
||||
assert retrieved_response.choices[0].message.content == content, retrieved_response
|
||||
|
||||
input_content = (
|
||||
getattr(retrieved_response.input_messages[0], "content", None)
|
||||
or retrieved_response.input_messages[0]["content"]
|
||||
)
|
||||
assert input_content == message, retrieved_response
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"stream",
|
||||
|
@ -312,9 +317,9 @@ def test_inference_store(openai_client, client_with_models, text_model_id, strea
|
|||
False,
|
||||
],
|
||||
)
|
||||
def test_inference_store_tool_calls(openai_client, client_with_models, text_model_id, stream):
|
||||
def test_inference_store_tool_calls(compat_client, client_with_models, text_model_id, stream):
|
||||
skip_if_model_doesnt_support_openai_chat_completion(client_with_models, text_model_id)
|
||||
client = openai_client
|
||||
client = compat_client
|
||||
# make a chat completion
|
||||
message = "What's the weather in Tokyo? Use the get_weather function to get the weather."
|
||||
response = client.chat.completions.create(
|
||||
|
@ -361,7 +366,11 @@ def test_inference_store_tool_calls(openai_client, client_with_models, text_mode
|
|||
|
||||
retrieved_response = client.chat.completions.retrieve(response_id)
|
||||
assert retrieved_response.id == response_id
|
||||
assert retrieved_response.input_messages[0]["content"] == message
|
||||
input_content = (
|
||||
getattr(retrieved_response.input_messages[0], "content", None)
|
||||
or retrieved_response.input_messages[0]["content"]
|
||||
)
|
||||
assert input_content == message, retrieved_response
|
||||
tool_calls = retrieved_response.choices[0].message.tool_calls
|
||||
# sometimes model doesn't ouptut tool calls, but we still want to test that the tool was called
|
||||
if tool_calls:
|
||||
|
|
|
@ -628,3 +628,69 @@ async def test_responses_store_list_input_items_logic():
|
|||
result = await responses_store.list_response_input_items("resp_123", limit=0, order=Order.asc)
|
||||
assert result.object == "list"
|
||||
assert len(result.data) == 0 # Should return no items
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_store_response_uses_rehydrated_input_with_previous_response(
|
||||
openai_responses_impl, mock_responses_store, mock_inference_api
|
||||
):
|
||||
"""Test that _store_response uses the full re-hydrated input (including previous responses)
|
||||
rather than just the original input when previous_response_id is provided."""
|
||||
|
||||
# Setup - Create a previous response that should be included in the stored input
|
||||
previous_response = OpenAIResponseObjectWithInput(
|
||||
id="resp-previous-123",
|
||||
object="response",
|
||||
created_at=1234567890,
|
||||
model="meta-llama/Llama-3.1-8B-Instruct",
|
||||
status="completed",
|
||||
input=[
|
||||
OpenAIResponseMessage(
|
||||
id="msg-prev-user", role="user", content=[OpenAIResponseInputMessageContentText(text="What is 2+2?")]
|
||||
)
|
||||
],
|
||||
output=[
|
||||
OpenAIResponseMessage(
|
||||
id="msg-prev-assistant",
|
||||
role="assistant",
|
||||
content=[OpenAIResponseOutputMessageContentOutputText(text="2+2 equals 4.")],
|
||||
)
|
||||
],
|
||||
)
|
||||
|
||||
mock_responses_store.get_response_object.return_value = previous_response
|
||||
|
||||
current_input = "Now what is 3+3?"
|
||||
model = "meta-llama/Llama-3.1-8B-Instruct"
|
||||
mock_chat_completion = load_chat_completion_fixture("simple_chat_completion.yaml")
|
||||
mock_inference_api.openai_chat_completion.return_value = mock_chat_completion
|
||||
|
||||
# Execute - Create response with previous_response_id
|
||||
result = await openai_responses_impl.create_openai_response(
|
||||
input=current_input,
|
||||
model=model,
|
||||
previous_response_id="resp-previous-123",
|
||||
store=True,
|
||||
)
|
||||
|
||||
store_call_args = mock_responses_store.store_response_object.call_args
|
||||
stored_input = store_call_args.kwargs["input"]
|
||||
|
||||
# Verify that the stored input contains the full re-hydrated conversation:
|
||||
# 1. Previous user message
|
||||
# 2. Previous assistant response
|
||||
# 3. Current user message
|
||||
assert len(stored_input) == 3
|
||||
|
||||
assert stored_input[0].role == "user"
|
||||
assert stored_input[0].content[0].text == "What is 2+2?"
|
||||
|
||||
assert stored_input[1].role == "assistant"
|
||||
assert stored_input[1].content[0].text == "2+2 equals 4."
|
||||
|
||||
assert stored_input[2].role == "user"
|
||||
assert stored_input[2].content == "Now what is 3+3?"
|
||||
|
||||
# Verify the response itself is correct
|
||||
assert result.model == model
|
||||
assert result.status == "completed"
|
||||
|
|
|
@ -9,7 +9,7 @@ from tempfile import TemporaryDirectory
|
|||
import pytest
|
||||
|
||||
from llama_stack.providers.utils.sqlstore.api import ColumnType
|
||||
from llama_stack.providers.utils.sqlstore.sqlite.sqlite import SqliteSqlStoreImpl
|
||||
from llama_stack.providers.utils.sqlstore.sqlalchemy_sqlstore import SqlAlchemySqlStoreImpl
|
||||
from llama_stack.providers.utils.sqlstore.sqlstore import SqliteSqlStoreConfig
|
||||
|
||||
|
||||
|
@ -17,7 +17,7 @@ from llama_stack.providers.utils.sqlstore.sqlstore import SqliteSqlStoreConfig
|
|||
async def test_sqlite_sqlstore():
|
||||
with TemporaryDirectory() as tmp_dir:
|
||||
db_name = "test.db"
|
||||
sqlstore = SqliteSqlStoreImpl(
|
||||
sqlstore = SqlAlchemySqlStoreImpl(
|
||||
SqliteSqlStoreConfig(
|
||||
db_path=tmp_dir + "/" + db_name,
|
||||
)
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue