feat: Add /v1/embeddings endpoint to batches API

This PR extends the Llama Stack Batches API to support the /v1/embeddings endpoint, enabling efficient batch processing of embedding requests alongside the existing /v1/chat/completions and /v1/completions support. Signed-off-by: Varsha Prasad Narsing <varshaprasad96@gmail.com>
2025-10-04 04:04:14 +00:00 · 2025-09-08 16:55:17 -07:00 · 2025-09-08 16:55:17 -07:00 · 531b1451dc
commit 531b1451dc
parent aab22dc759
3 changed files with 122 additions and 6 deletions
--- a/tests/unit/providers/batches/test_reference.py
+++ b/tests/unit/providers/batches/test_reference.py
@ -213,7 +213,6 @@ class TestReferenceBatchesImpl:
    @pytest.mark.parametrize(
        "endpoint",
        [
-            "/v1/embeddings",
            "/v1/invalid/endpoint",
            "",
        ],
@ -765,3 +764,12 @@ class TestReferenceBatchesImpl:
        await asyncio.sleep(0.042)  # let tasks start

        assert active_batches == 2, f"Expected 2 active batches, got {active_batches}"
+
+    async def test_create_batch_embeddings_endpoint(self, provider):
+        """Test that batch creation succeeds with embeddings endpoint."""
+        batch = await provider.create_batch(
+            input_file_id="file_123",
+            endpoint="/v1/embeddings",
+            completion_window="24h",
+        )
+        assert batch.endpoint == "/v1/embeddings"