Minor

2025-08-02 08:44:44 +00:00 · 2024-12-16 22:30:10 -08:00 · 2024-12-16 22:30:10 -08:00 · 7a1b60fccf
commit 7a1b60fccf
parent 1bcc26ccd1
2 changed files with 4 additions and 16 deletions
--- a/llama_stack/distribution/library_client.py
+++ b/llama_stack/distribution/library_client.py
@ -25,7 +25,6 @@ from llama_stack_client import (
    AsyncStream,
    LlamaStackClient,
    NOT_GIVEN,
-    Stream,
 )
 from pydantic import BaseModel, TypeAdapter
 from rich.console import Console
@ -370,8 +369,10 @@ class AsyncLlamaStackAsLibraryClient(AsyncLlamaStackClient):
                    json=options.json_data,
                ),
            )
-            origin = get_origin(stream_cls)
-            assert origin is Stream
+
+            # we use asynchronous impl always internally and channel all requests to AsyncLlamaStackClient
+            # however, the top-level caller may be a SyncAPIClient -- so its stream_cls might be a Stream (SyncStream)
+            # so we need to convert it to AsyncStream
            args = get_args(stream_cls)
            stream_cls = AsyncStream[args[0]]
            response = AsyncAPIResponse(
--- a/tests/client-sdk/inference/test_inference.py
+++ b/tests/client-sdk/inference/test_inference.py
@ -4,23 +4,10 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.

-import sys
-import traceback
-import warnings
-
 import pytest
 from llama_stack_client.lib.inference.event_logger import EventLogger


-def warn_with_traceback(message, category, filename, lineno, file=None, line=None):
-    log = file if hasattr(file, "write") else sys.stderr
-    traceback.print_stack(file=log)
-    log.write(warnings.formatwarning(message, category, filename, lineno, line))
-
-
-warnings.showwarning = warn_with_traceback
-
-
 def test_text_chat_completion(llama_stack_client):
    # non-streaming
    available_models = [