add test for completion logprobs (#532)

# What does this PR do? adds a test for the completion api's logprobs parameter tbd which providers pass this test ## Before submitting - [ ] This PR fixes a typo or improves the docs (you can dismiss the other checks if that's the case). - [x] Ran pre-commit to handle lint / formatting issues. - [x] Read the [contributor guideline](https://github.com/meta-llama/llama-stack/blob/main/CONTRIBUTING.md), Pull Request section? - [ ] Updated relevant documentation. - [x] Wrote necessary unit or integration tests.
2024-12-12 15:19:48 -05:00 · 2024-12-12 15:19:48 -05:00 · 2a9b13dd52
commit 2a9b13dd52
parent 96e158eaac
1 changed files with 55 additions and 0 deletions
--- a/llama_stack/providers/tests/inference/test_text_inference.py
+++ b/llama_stack/providers/tests/inference/test_text_inference.py
@ -128,6 +128,61 @@ class TestInference:
        last = chunks[-1]
        assert last.stop_reason == StopReason.out_of_tokens

+    @pytest.mark.asyncio
+    async def test_completion_logprobs(self, inference_model, inference_stack):
+        inference_impl, _ = inference_stack
+
+        provider = inference_impl.routing_table.get_provider_impl(inference_model)
+        if provider.__provider_spec__.provider_type not in (
+            # "remote::nvidia", -- provider doesn't provide all logprobs
+        ):
+            pytest.skip("Other inference providers don't support completion() yet")
+
+        response = await inference_impl.completion(
+            content="Micheael Jordan is born in ",
+            stream=False,
+            model_id=inference_model,
+            sampling_params=SamplingParams(
+                max_tokens=5,
+            ),
+            logprobs=LogProbConfig(
+                top_k=3,
+            ),
+        )
+
+        assert isinstance(response, CompletionResponse)
+        assert 1 <= len(response.logprobs) <= 5
+        assert response.logprobs, "Logprobs should not be empty"
+        assert all(len(logprob.logprobs_by_token) == 3 for logprob in response.logprobs)
+
+        chunks = [
+            r
+            async for r in await inference_impl.completion(
+                content="Roses are red,",
+                stream=True,
+                model_id=inference_model,
+                sampling_params=SamplingParams(
+                    max_tokens=5,
+                ),
+                logprobs=LogProbConfig(
+                    top_k=3,
+                ),
+            )
+        ]
+
+        assert all(isinstance(chunk, CompletionResponseStreamChunk) for chunk in chunks)
+        assert (
+            1 <= len(chunks) <= 6
+        )  # why 6 and not 5? the response may have an extra closing chunk, e.g. for usage or stop_reason
+        for chunk in chunks:
+            if chunk.delta:  # if there's a token, we expect logprobs
+                assert chunk.logprobs, "Logprobs should not be empty"
+                assert all(
+                    len(logprob.logprobs_by_token) == 3 for logprob in chunk.logprobs
+                )
+            else:  # no token, no logprobs
+                assert not chunk.logprobs, "Logprobs should be empty"
+
    @pytest.mark.asyncio
    @pytest.mark.skip("This test is not quite robust")
    async def test_completion_structured_output(self, inference_model, inference_stack):