mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-12-17 17:22:38 +00:00
Merge branch 'main' into aidand-391-guided-decoding-vllm_3
This commit is contained in:
commit
44bb23ebc8
177 changed files with 5725 additions and 594 deletions
|
|
@ -94,6 +94,7 @@ class TestInference:
|
|||
"remote::tgi",
|
||||
"remote::together",
|
||||
"remote::fireworks",
|
||||
"remote::cerebras",
|
||||
):
|
||||
pytest.skip("Other inference providers don't support completion() yet")
|
||||
|
||||
|
|
@ -140,6 +141,8 @@ class TestInference:
|
|||
"remote::together",
|
||||
"remote::fireworks",
|
||||
"remote::vllm",
|
||||
"remote::cerebras",
|
||||
|
||||
):
|
||||
pytest.skip(
|
||||
"Other inference providers don't support structured output in completions yet"
|
||||
|
|
@ -213,7 +216,15 @@ class TestInference:
|
|||
response = await inference_impl.chat_completion(
|
||||
model_id=inference_model,
|
||||
messages=[
|
||||
SystemMessage(content="You are a helpful assistant."),
|
||||
# we include context about Michael Jordan in the prompt so that the test is
|
||||
# focused on the funtionality of the model and not on the information embedded
|
||||
# in the model. Llama 3.2 3B Instruct tends to think MJ played for 14 seasons.
|
||||
SystemMessage(
|
||||
content=(
|
||||
"You are a helpful assistant.\n\n"
|
||||
"Michael Jordan was born in 1963. He played basketball for the Chicago Bulls for 15 seasons."
|
||||
)
|
||||
),
|
||||
UserMessage(content="Please give me information about Michael Jordan."),
|
||||
],
|
||||
stream=False,
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue