llama-stack-mirror/tests/nvidia/unit/test_health.py
2024-11-04 10:23:31 -05:00

35 lines
860 B
Python

# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree.
import pytest
from llama_stack.apis.inference import Inference
from pytest_httpx import HTTPXMock
pytestmark = pytest.mark.asyncio
async def test_chat_completion(
mock_health: HTTPXMock,
mock_chat_completion: HTTPXMock,
client: Inference,
base_url: str,
) -> None:
"""
Test that health endpoints are checked when chat_completion is called.
"""
client = await client
await client.chat_completion(
model="Llama-3-8B-Instruct",
messages=[{"role": "user", "content": "BOGUS"}],
stream=False,
)
# TODO(mf): test stream=True for each case
# TODO(mf): test completion
# TODO(mf): test embedding