add NVIDIA NIM inference adapter

This commit is contained in:
Matthew Farrellee 2024-10-22 14:31:11 -04:00
parent ac93dd89cf
commit 2dd8c4bcb6
12 changed files with 1115 additions and 0 deletions

View file

@ -0,0 +1,35 @@
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree.
import pytest
from llama_stack.apis.inference import Inference
from pytest_httpx import HTTPXMock
pytestmark = pytest.mark.asyncio
async def test_chat_completion(
mock_health: HTTPXMock,
mock_chat_completion: HTTPXMock,
client: Inference,
base_url: str,
) -> None:
"""
Test that health endpoints are checked when chat_completion is called.
"""
client = await client
await client.chat_completion(
model="Llama-3-8B-Instruct",
messages=[{"role": "user", "content": "BOGUS"}],
stream=False,
)
# TODO(mf): test stream=True for each case
# TODO(mf): test completion
# TODO(mf): test embedding