diff --git a/llama_stack/distribution/store/registry.py b/llama_stack/distribution/store/registry.py
index d26b4447c..010d137ec 100644
--- a/llama_stack/distribution/store/registry.py
+++ b/llama_stack/distribution/store/registry.py
@@ -35,7 +35,7 @@ class DistributionRegistry(Protocol):
 
 
 REGISTER_PREFIX = "distributions:registry"
-KEY_VERSION = "v4"
+KEY_VERSION = "v5"
 KEY_FORMAT = f"{REGISTER_PREFIX}:{KEY_VERSION}::" + "{type}:{identifier}"
 
 
diff --git a/llama_stack/providers/utils/inference/openai_compat.py b/llama_stack/providers/utils/inference/openai_compat.py
index 82e01c364..4c46954cf 100644
--- a/llama_stack/providers/utils/inference/openai_compat.py
+++ b/llama_stack/providers/utils/inference/openai_compat.py
@@ -142,7 +142,7 @@ async def process_completion_stream_response(
             text = ""
             continue
         yield CompletionResponseStreamChunk(
-            delta=TextDelta(text=text),
+            delta=text,
             stop_reason=stop_reason,
         )
         if finish_reason:
@@ -153,7 +153,7 @@ async def process_completion_stream_response(
             break
 
     yield CompletionResponseStreamChunk(
-        delta=TextDelta(text=""),
+        delta="",
         stop_reason=stop_reason,
     )
 
diff --git a/llama_stack/providers/utils/inference/prompt_adapter.py b/llama_stack/providers/utils/inference/prompt_adapter.py
index 2d66dc60b..de4918f5c 100644
--- a/llama_stack/providers/utils/inference/prompt_adapter.py
+++ b/llama_stack/providers/utils/inference/prompt_adapter.py
@@ -265,6 +265,7 @@ def chat_completion_request_to_messages(
     For eg. for llama_3_1, add system message with the appropriate tools or
     add user messsage for custom tools, etc.
     """
+    assert llama_model is not None, "llama_model is required"
     model = resolve_model(llama_model)
     if model is None:
         log.error(f"Could not resolve model {llama_model}")
diff --git a/tests/client-sdk/conftest.py b/tests/client-sdk/conftest.py
index 16e6d1bbd..b40d54ee5 100644
--- a/tests/client-sdk/conftest.py
+++ b/tests/client-sdk/conftest.py
@@ -12,6 +12,11 @@ from llama_stack.providers.tests.env import get_env_or_fail
 from llama_stack_client import LlamaStackClient
 
 
+def pytest_configure(config):
+    config.option.tbstyle = "short"
+    config.option.disable_warnings = True
+
+
 @pytest.fixture(scope="session")
 def provider_data():
     # check env for tavily secret, brave secret and inject all into provider data
@@ -29,6 +34,7 @@ def llama_stack_client(provider_data):
         client = LlamaStackAsLibraryClient(
             get_env_or_fail("LLAMA_STACK_CONFIG"),
             provider_data=provider_data,
+            skip_logger_removal=True,
         )
         client.initialize()
     elif os.environ.get("LLAMA_STACK_BASE_URL"):
diff --git a/tests/client-sdk/inference/test_inference.py b/tests/client-sdk/inference/test_inference.py
index ef6219389..a50dba3a0 100644
--- a/tests/client-sdk/inference/test_inference.py
+++ b/tests/client-sdk/inference/test_inference.py
@@ -6,9 +6,9 @@
 
 import pytest
 
-from llama_stack_client.lib.inference.event_logger import EventLogger
 from pydantic import BaseModel
 
+
 PROVIDER_TOOL_PROMPT_FORMAT = {
     "remote::ollama": "python_list",
     "remote::together": "json",
@@ -39,7 +39,7 @@ def text_model_id(llama_stack_client):
     available_models = [
         model.identifier
         for model in llama_stack_client.models.list()
-        if model.identifier.startswith("meta-llama")
+        if model.identifier.startswith("meta-llama") and "405" not in model.identifier
     ]
     assert len(available_models) > 0
     return available_models[0]
@@ -208,12 +208,9 @@ def test_text_chat_completion_streaming(
         stream=True,
     )
     streamed_content = [
-        str(log.content.lower().strip())
-        for log in EventLogger().log(response)
-        if log is not None
+        str(chunk.event.delta.text.lower().strip()) for chunk in response
     ]
     assert len(streamed_content) > 0
-    assert "assistant>" in streamed_content[0]
     assert expected.lower() in "".join(streamed_content)
 
 
@@ -250,17 +247,16 @@ def test_text_chat_completion_with_tool_calling_and_non_streaming(
 def extract_tool_invocation_content(response):
     text_content: str = ""
     tool_invocation_content: str = ""
-    for log in EventLogger().log(response):
-        if log is None:
-            continue
-        if isinstance(log.content, str):
-            text_content += log.content
-        elif isinstance(log.content, object):
-            if isinstance(log.content.content, str):
-                continue
-            elif isinstance(log.content.content, object):
-                tool_invocation_content += f"[{log.content.content.tool_name}, {log.content.content.arguments}]"
-
+    for chunk in response:
+        delta = chunk.event.delta
+        if delta.type == "text":
+            text_content += delta.text
+        elif delta.type == "tool_call":
+            if isinstance(delta.content, str):
+                tool_invocation_content += delta.content
+            else:
+                call = delta.content
+                tool_invocation_content += f"[{call.tool_name}, {call.arguments}]"
     return text_content, tool_invocation_content
 
 
@@ -280,7 +276,6 @@ def test_text_chat_completion_with_tool_calling_and_streaming(
     )
     text_content, tool_invocation_content = extract_tool_invocation_content(response)
 
-    assert "Assistant>" in text_content
     assert tool_invocation_content == "[get_weather, {'location': 'San Francisco, CA'}]"
 
 
@@ -368,10 +363,7 @@ def test_image_chat_completion_streaming(llama_stack_client, vision_model_id):
         stream=True,
     )
     streamed_content = [
-        str(log.content.lower().strip())
-        for log in EventLogger().log(response)
-        if log is not None
+        str(chunk.event.delta.text.lower().strip()) for chunk in response
     ]
     assert len(streamed_content) > 0
-    assert "assistant>" in streamed_content[0]
     assert any(expected in streamed_content for expected in {"dog", "puppy", "pup"})