forked from phoenix-oss/llama-stack-mirror
Merge branch 'main' into eval_api_final
This commit is contained in:
commit
bc0cd07008
79 changed files with 3257 additions and 2358 deletions
|
@ -99,6 +99,33 @@ def test_text_completion_streaming(client_with_models, text_model_id, test_case)
|
|||
assert len(content_str) > 10
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"test_case",
|
||||
[
|
||||
"inference:completion:stop_sequence",
|
||||
],
|
||||
)
|
||||
def test_text_completion_stop_sequence(client_with_models, text_model_id, inference_provider_type, test_case):
|
||||
skip_if_model_doesnt_support_completion(client_with_models, text_model_id)
|
||||
# This is only supported/tested for remote vLLM: https://github.com/meta-llama/llama-stack/issues/1771
|
||||
if inference_provider_type != "remote::vllm":
|
||||
pytest.xfail(f"{inference_provider_type} doesn't support 'stop' parameter yet")
|
||||
tc = TestCase(test_case)
|
||||
|
||||
response = client_with_models.inference.completion(
|
||||
content=tc["content"],
|
||||
stream=True,
|
||||
model_id=text_model_id,
|
||||
sampling_params={
|
||||
"max_tokens": 50,
|
||||
"stop": ["1963"],
|
||||
},
|
||||
)
|
||||
streamed_content = [chunk.delta for chunk in response]
|
||||
content_str = "".join(streamed_content).lower().strip()
|
||||
assert "1963" not in content_str
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"test_case",
|
||||
[
|
||||
|
|
|
@ -10,6 +10,11 @@
|
|||
"expected": "1963"
|
||||
}
|
||||
},
|
||||
"stop_sequence": {
|
||||
"data": {
|
||||
"content": "Return the exact same sentence and don't add additional words): Michael Jordan was born in the year of 1963"
|
||||
}
|
||||
},
|
||||
"streaming": {
|
||||
"data": {
|
||||
"content": "Roses are red,"
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue