add NVIDIA NIM inference adapter

2025-10-17 15:13:49 +00:00 · 2024-10-22 14:31:11 -04:00 · 2024-10-22 14:31:11 -04:00 · 2dd8c4bcb6
commit 2dd8c4bcb6
parent ac93dd89cf
12 changed files with 1115 additions and 0 deletions
--- a/tests/nvidia/unit/test_health.py
+++ b/tests/nvidia/unit/test_health.py
@ -0,0 +1,35 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+import pytest
+
+from llama_stack.apis.inference import Inference
+from pytest_httpx import HTTPXMock
+
+pytestmark = pytest.mark.asyncio
+
+
+async def test_chat_completion(
+    mock_health: HTTPXMock,
+    mock_chat_completion: HTTPXMock,
+    client: Inference,
+    base_url: str,
+) -> None:
+    """
+    Test that health endpoints are checked when chat_completion is called.
+    """
+    client = await client
+
+    await client.chat_completion(
+        model="Llama-3-8B-Instruct",
+        messages=[{"role": "user", "content": "BOGUS"}],
+        stream=False,
+    )
+
+
+# TODO(mf): test stream=True for each case
+# TODO(mf): test completion
+# TODO(mf): test embedding