Fixes; make inference tests pass with newer tool call types

2025-01-13 23:16:16 -08:00 · 2025-01-13 23:16:16 -08:00 · 2c2969f331
commit 2c2969f331
parent d9d34433fc
5 changed files with 24 additions and 25 deletions
--- a/llama_stack/distribution/store/registry.py
+++ b/llama_stack/distribution/store/registry.py
@ -35,7 +35,7 @@ class DistributionRegistry(Protocol):


 REGISTER_PREFIX = "distributions:registry"
-KEY_VERSION = "v4"
+KEY_VERSION = "v5"
 KEY_FORMAT = f"{REGISTER_PREFIX}:{KEY_VERSION}::" + "{type}:{identifier}"


--- a/llama_stack/providers/utils/inference/openai_compat.py
+++ b/llama_stack/providers/utils/inference/openai_compat.py
@ -142,7 +142,7 @@ async def process_completion_stream_response(
            text = ""
            continue
        yield CompletionResponseStreamChunk(
-            delta=TextDelta(text=text),
+            delta=text,
            stop_reason=stop_reason,
        )
        if finish_reason:
@ -153,7 +153,7 @@ async def process_completion_stream_response(
            break

    yield CompletionResponseStreamChunk(
-        delta=TextDelta(text=""),
+        delta="",
        stop_reason=stop_reason,
    )

--- a/llama_stack/providers/utils/inference/prompt_adapter.py
+++ b/llama_stack/providers/utils/inference/prompt_adapter.py
@ -265,6 +265,7 @@ def chat_completion_request_to_messages(
    For eg. for llama_3_1, add system message with the appropriate tools or
    add user messsage for custom tools, etc.
    """
+    assert llama_model is not None, "llama_model is required"
    model = resolve_model(llama_model)
    if model is None:
        log.error(f"Could not resolve model {llama_model}")
--- a/tests/client-sdk/conftest.py
+++ b/tests/client-sdk/conftest.py
@ -12,6 +12,11 @@ from llama_stack.providers.tests.env import get_env_or_fail
 from llama_stack_client import LlamaStackClient


+def pytest_configure(config):
+    config.option.tbstyle = "short"
+    config.option.disable_warnings = True
+
+
@pytest.fixture(scope="session")
 def provider_data():
    # check env for tavily secret, brave secret and inject all into provider data
@ -29,6 +34,7 @@ def llama_stack_client(provider_data):
        client = LlamaStackAsLibraryClient(
            get_env_or_fail("LLAMA_STACK_CONFIG"),
            provider_data=provider_data,
+            skip_logger_removal=True,
        )
        client.initialize()
    elif os.environ.get("LLAMA_STACK_BASE_URL"):
--- a/tests/client-sdk/inference/test_inference.py
+++ b/tests/client-sdk/inference/test_inference.py
@ -6,9 +6,9 @@

 import pytest

-from llama_stack_client.lib.inference.event_logger import EventLogger
 from pydantic import BaseModel

+
 PROVIDER_TOOL_PROMPT_FORMAT = {
    "remote::ollama": "python_list",
    "remote::together": "json",
@ -39,7 +39,7 @@ def text_model_id(llama_stack_client):
    available_models = [
        model.identifier
        for model in llama_stack_client.models.list()
-        if model.identifier.startswith("meta-llama")
+        if model.identifier.startswith("meta-llama") and "405" not in model.identifier
    ]
    assert len(available_models) > 0
    return available_models[0]
@ -208,12 +208,9 @@ def test_text_chat_completion_streaming(
        stream=True,
    )
    streamed_content = [
-        str(log.content.lower().strip())
-        for log in EventLogger().log(response)
-        if log is not None
+        str(chunk.event.delta.text.lower().strip()) for chunk in response
    ]
    assert len(streamed_content) > 0
-    assert "assistant>" in streamed_content[0]
    assert expected.lower() in "".join(streamed_content)


@ -250,17 +247,16 @@ def test_text_chat_completion_with_tool_calling_and_non_streaming(
 def extract_tool_invocation_content(response):
    text_content: str = ""
    tool_invocation_content: str = ""
-    for log in EventLogger().log(response):
-        if log is None:
-            continue
-        if isinstance(log.content, str):
-            text_content += log.content
-        elif isinstance(log.content, object):
-            if isinstance(log.content.content, str):
-                continue
-            elif isinstance(log.content.content, object):
-                tool_invocation_content += f"[{log.content.content.tool_name}, {log.content.content.arguments}]"
-
+    for chunk in response:
+        delta = chunk.event.delta
+        if delta.type == "text":
+            text_content += delta.text
+        elif delta.type == "tool_call":
+            if isinstance(delta.content, str):
+                tool_invocation_content += delta.content
+            else:
+                call = delta.content
+                tool_invocation_content += f"[{call.tool_name}, {call.arguments}]"
    return text_content, tool_invocation_content


@ -280,7 +276,6 @@ def test_text_chat_completion_with_tool_calling_and_streaming(
    )
    text_content, tool_invocation_content = extract_tool_invocation_content(response)

-    assert "Assistant>" in text_content
    assert tool_invocation_content == "[get_weather, {'location': 'San Francisco, CA'}]"


@ -368,10 +363,7 @@ def test_image_chat_completion_streaming(llama_stack_client, vision_model_id):
        stream=True,
    )
    streamed_content = [
-        str(log.content.lower().strip())
-        for log in EventLogger().log(response)
-        if log is not None
+        str(chunk.event.delta.text.lower().strip()) for chunk in response
    ]
    assert len(streamed_content) > 0
-    assert "assistant>" in streamed_content[0]
    assert any(expected in streamed_content for expected in {"dog", "puppy", "pup"})