feat: Add /v1/embeddings endpoint to batches API (#3384)

# What does this PR do?
This PR extends the Llama Stack Batches API to support the
/v1/embeddings endpoint, enabling efficient batch processing of
embedding requests alongside the existing /v1/chat/completions and
/v1/completions support.

<!-- If resolving an issue, uncomment and update the line below -->
<!-- Closes #[issue-number] -->
Closes: https://github.com/llamastack/llama-stack/issues/3145

## Test Plan
<!-- Describe the tests you ran to verify your changes with result
summaries. *Provide clear instructions so the plan can be easily
re-executed.* -->
```
(stack-client) ➜  llama-stack git:(support/embeddings-api) conda activate stack-client && python -m pytest tests/unit/providers/batches/test_reference.py -v                             
============================================================================================================================================ test session starts =============================================================================================================================================
platform darwin -- Python 3.12.11, pytest-7.4.4, pluggy-1.5.0 -- /Users/vnarsing/miniconda3/envs/stack-client/bin/python
cachedir: .pytest_cache
metadata: {'Python': '3.12.11', 'Platform': 'macOS-15.6.1-arm64-arm-64bit', 'Packages': {'pytest': '7.4.4', 'pluggy': '1.5.0'}, 'Plugins': {'asyncio': '0.23.8', 'cov': '6.0.0', 'timeout': '2.2.0', 'socket': '0.7.0', 'xdist': '3.8.0', 'html': '3.1.1', 'langsmith': '0.3.39', 'anyio': '4.8.0', 'metadata': '3.0.0'}}
rootdir: /Users/vnarsing/go/src/github/meta-llama/llama-stack
configfile: pyproject.toml
plugins: asyncio-0.23.8, cov-6.0.0, timeout-2.2.0, socket-0.7.0, xdist-3.8.0, html-3.1.1, langsmith-0.3.39, anyio-4.8.0, metadata-3.0.0
asyncio: mode=Mode.AUTO
collected 46 items                                                                                                                                                                                                                                                                                           

tests/unit/providers/batches/test_reference.py::TestReferenceBatchesImpl::test_create_and_retrieve_batch_success PASSED                                                                                                                                                                                [  2%]
tests/unit/providers/batches/test_reference.py::TestReferenceBatchesImpl::test_create_batch_without_metadata PASSED                                                                                                                                                                                    [  4%]
tests/unit/providers/batches/test_reference.py::TestReferenceBatchesImpl::test_create_batch_completion_window PASSED                                                                                                                                                                                   [  6%]
tests/unit/providers/batches/test_reference.py::TestReferenceBatchesImpl::test_create_batch_invalid_endpoints[/v1/invalid/endpoint] PASSED                                                                                                                                                             [  8%]
tests/unit/providers/batches/test_reference.py::TestReferenceBatchesImpl::test_create_batch_invalid_endpoints[] PASSED                                                                                                                                                                                 [ 10%]
tests/unit/providers/batches/test_reference.py::TestReferenceBatchesImpl::test_create_batch_invalid_metadata PASSED                                                                                                                                                                                    [ 13%]
tests/unit/providers/batches/test_reference.py::TestReferenceBatchesImpl::test_retrieve_batch_not_found PASSED                                                                                                                                                                                         [ 15%]
tests/unit/providers/batches/test_reference.py::TestReferenceBatchesImpl::test_cancel_batch_success PASSED                                                                                                                                                                                             [ 17%]
tests/unit/providers/batches/test_reference.py::TestReferenceBatchesImpl::test_cancel_batch_invalid_statuses[failed] PASSED                                                                                                                                                                            [ 19%]
tests/unit/providers/batches/test_reference.py::TestReferenceBatchesImpl::test_cancel_batch_invalid_statuses[expired] PASSED                                                                                                                                                                           [ 21%]
tests/unit/providers/batches/test_reference.py::TestReferenceBatchesImpl::test_cancel_batch_invalid_statuses[completed] PASSED                                                                                                                                                                         [ 23%]
tests/unit/providers/batches/test_reference.py::TestReferenceBatchesImpl::test_cancel_batch_not_found PASSED                                                                                                                                                                                           [ 26%]
tests/unit/providers/batches/test_reference.py::TestReferenceBatchesImpl::test_list_batches_empty PASSED                                                                                                                                                                                               [ 28%]
tests/unit/providers/batches/test_reference.py::TestReferenceBatchesImpl::test_list_batches_single_batch PASSED                                                                                                                                                                                        [ 30%]
tests/unit/providers/batches/test_reference.py::TestReferenceBatchesImpl::test_list_batches_multiple_batches PASSED                                                                                                                                                                                    [ 32%]
tests/unit/providers/batches/test_reference.py::TestReferenceBatchesImpl::test_list_batches_with_limit PASSED                                                                                                                                                                                          [ 34%]
tests/unit/providers/batches/test_reference.py::TestReferenceBatchesImpl::test_list_batches_with_pagination PASSED                                                                                                                                                                                     [ 36%]
tests/unit/providers/batches/test_reference.py::TestReferenceBatchesImpl::test_list_batches_invalid_after PASSED                                                                                                                                                                                       [ 39%]
tests/unit/providers/batches/test_reference.py::TestReferenceBatchesImpl::test_kvstore_persistence PASSED                                                                                                                                                                                              [ 41%]
tests/unit/providers/batches/test_reference.py::TestReferenceBatchesImpl::test_validate_input_file_not_found PASSED                                                                                                                                                                                    [ 43%]
tests/unit/providers/batches/test_reference.py::TestReferenceBatchesImpl::test_validate_input_file_exists_empty_content PASSED                                                                                                                                                                         [ 45%]
tests/unit/providers/batches/test_reference.py::TestReferenceBatchesImpl::test_validate_input_file_mixed_valid_invalid_json PASSED                                                                                                                                                                     [ 47%]
tests/unit/providers/batches/test_reference.py::TestReferenceBatchesImpl::test_validate_input_invalid_model PASSED                                                                                                                                                                                     [ 50%]
tests/unit/providers/batches/test_reference.py::TestReferenceBatchesImpl::test_validate_input_missing_parameters_chat_completions[custom_id-custom_id-missing_required_parameter-Missing required parameter: custom_id] PASSED                                                                         [ 52%]
tests/unit/providers/batches/test_reference.py::TestReferenceBatchesImpl::test_validate_input_missing_parameters_chat_completions[method-method-missing_required_parameter-Missing required parameter: method] PASSED                                                                                  [ 54%]
tests/unit/providers/batches/test_reference.py::TestReferenceBatchesImpl::test_validate_input_missing_parameters_chat_completions[url-url-missing_required_parameter-Missing required parameter: url] PASSED                                                                                           [ 56%]
tests/unit/providers/batches/test_reference.py::TestReferenceBatchesImpl::test_validate_input_missing_parameters_chat_completions[body-body-missing_required_parameter-Missing required parameter: body] PASSED                                                                                        [ 58%]
tests/unit/providers/batches/test_reference.py::TestReferenceBatchesImpl::test_validate_input_missing_parameters_chat_completions[model-body.model-invalid_request-Model parameter is required] PASSED                                                                                                 [ 60%]
tests/unit/providers/batches/test_reference.py::TestReferenceBatchesImpl::test_validate_input_missing_parameters_chat_completions[messages-body.messages-invalid_request-Messages parameter is required] PASSED                                                                                        [ 63%]
tests/unit/providers/batches/test_reference.py::TestReferenceBatchesImpl::test_validate_input_missing_parameters_completions[custom_id-custom_id-missing_required_parameter-Missing required parameter: custom_id] PASSED                                                                              [ 65%]
tests/unit/providers/batches/test_reference.py::TestReferenceBatchesImpl::test_validate_input_missing_parameters_completions[method-method-missing_required_parameter-Missing required parameter: method] PASSED                                                                                       [ 67%]
tests/unit/providers/batches/test_reference.py::TestReferenceBatchesImpl::test_validate_input_missing_parameters_completions[url-url-missing_required_parameter-Missing required parameter: url] PASSED                                                                                                [ 69%]
tests/unit/providers/batches/test_reference.py::TestReferenceBatchesImpl::test_validate_input_missing_parameters_completions[body-body-missing_required_parameter-Missing required parameter: body] PASSED                                                                                             [ 71%]
tests/unit/providers/batches/test_reference.py::TestReferenceBatchesImpl::test_validate_input_missing_parameters_completions[model-body.model-invalid_request-Model parameter is required] PASSED                                                                                                      [ 73%]
tests/unit/providers/batches/test_reference.py::TestReferenceBatchesImpl::test_validate_input_missing_parameters_completions[prompt-body.prompt-invalid_request-Prompt parameter is required] PASSED                                                                                                   [ 76%]
tests/unit/providers/batches/test_reference.py::TestReferenceBatchesImpl::test_validate_input_url_mismatch PASSED                                                                                                                                                                                      [ 78%]
tests/unit/providers/batches/test_reference.py::TestReferenceBatchesImpl::test_validate_input_multiple_errors_per_request PASSED                                                                                                                                                                       [ 80%]
tests/unit/providers/batches/test_reference.py::TestReferenceBatchesImpl::test_validate_input_invalid_request_format PASSED                                                                                                                                                                            [ 82%]
tests/unit/providers/batches/test_reference.py::TestReferenceBatchesImpl::test_validate_input_invalid_parameter_types[custom_id-custom_id-12345-Custom_id must be a string] PASSED                                                                                                                     [ 84%]
tests/unit/providers/batches/test_reference.py::TestReferenceBatchesImpl::test_validate_input_invalid_parameter_types[url-url-123-URL must be a string] PASSED                                                                                                                                         [ 86%]
tests/unit/providers/batches/test_reference.py::TestReferenceBatchesImpl::test_validate_input_invalid_parameter_types[method-method-invalid_value2-Method must be a string] PASSED                                                                                                                     [ 89%]
tests/unit/providers/batches/test_reference.py::TestReferenceBatchesImpl::test_validate_input_invalid_parameter_types[body-body-invalid_value3-Body must be a JSON dictionary object] PASSED                                                                                                           [ 91%]
tests/unit/providers/batches/test_reference.py::TestReferenceBatchesImpl::test_validate_input_invalid_parameter_types[model-body.model-123-Model must be a string] PASSED                                                                                                                              [ 93%]
tests/unit/providers/batches/test_reference.py::TestReferenceBatchesImpl::test_validate_input_invalid_parameter_types[messages-body.messages-invalid messages format-Messages must be an array] PASSED                                                                                                 [ 95%]
tests/unit/providers/batches/test_reference.py::TestReferenceBatchesImpl::test_max_concurrent_batches PASSED                                                                                                                                                                                           [ 97%]
tests/unit/providers/batches/test_reference.py::TestReferenceBatchesImpl::test_create_batch_embeddings_endpoint PASSED                                                                                                                                                                                 [100%]

```

---------

Signed-off-by: Varsha Prasad Narsing <varshaprasad96@gmail.com>
Co-authored-by: Ashwin Bharambe <ashwin.bharambe@gmail.com>
This commit is contained in:
Varsha 2025-10-10 13:25:58 -07:00 committed by GitHub
parent 1394403360
commit 32fde8d9a8
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
7 changed files with 1755 additions and 28 deletions

View file

@ -0,0 +1,422 @@
{
"test_id": "tests/integration/batches/test_batches.py::TestBatchesIntegration::test_batch_e2e_embeddings[emb=ollama/all-minilm:l6-v2]",
"request": {
"method": "POST",
"url": "http://0.0.0.0:11434/v1/v1/embeddings",
"headers": {},
"body": {
"model": "all-minilm:l6-v2",
"input": "Hello world",
"encoding_format": "float"
},
"endpoint": "/v1/embeddings",
"model": "all-minilm:l6-v2"
},
"response": {
"body": {
"__type__": "openai.types.create_embedding_response.CreateEmbeddingResponse",
"__data__": {
"data": [
{
"embedding": [
-0.034477483,
0.030899182,
0.0066526434,
0.026075281,
-0.039411988,
-0.16037956,
0.06692074,
-0.006511468,
-0.047467157,
0.014774274,
0.07094562,
0.055527706,
0.019183245,
-0.026297163,
-0.010018651,
-0.02694715,
0.0223884,
-0.02220693,
-0.14977267,
-0.017530814,
0.0075938613,
0.054253556,
0.0032258728,
0.031724673,
-0.08466085,
-0.029342307,
0.05155048,
0.048105717,
-0.0032670307,
-0.05822795,
0.041971523,
0.022229431,
0.1281518,
-0.022270948,
-0.011725874,
0.06294936,
-0.032847952,
-0.09124354,
-0.031128692,
0.05274829,
0.047067728,
-0.08414196,
-0.029979317,
-0.020692566,
0.00949804,
-0.0035992558,
0.0074442336,
0.03928378,
0.09326073,
-0.0037437282,
-0.052663893,
-0.058101393,
-0.006925679,
0.0052269334,
0.08290669,
0.019312402,
0.0062818974,
-0.010331665,
0.008930684,
-0.037712026,
-0.045175705,
0.023950849,
-0.006926045,
0.013429504,
0.100098,
-0.0715888,
-0.021700105,
0.031693522,
-0.05161389,
-0.08224763,
-0.06577986,
-0.009853981,
0.005808086,
0.07364217,
-0.034008067,
0.024907362,
0.014441484,
0.02645124,
0.009659713,
0.030284341,
0.052878983,
-0.07536944,
0.009890014,
0.029907802,
0.017498897,
0.02313779,
0.0018918256,
0.0013156217,
-0.047173936,
-0.011251131,
-0.11422648,
-0.019960148,
0.040278148,
0.0022633963,
-0.07986738,
-0.025357265,
0.094500035,
-0.029062947,
-0.14495483,
0.2309815,
0.027703581,
0.03208736,
0.031073036,
0.042917974,
0.064246915,
0.032118786,
-0.004844535,
0.055775862,
-0.03756279,
-0.021487191,
-0.028432492,
-0.028887685,
0.03842892,
-0.017359573,
0.052465834,
-0.07493626,
-0.031175744,
0.021936033,
-0.039823197,
-0.008681939,
0.026978256,
-0.048551314,
0.011414809,
0.029628372,
-0.020587107,
0.013077965,
0.028824588,
-3.1978743e-33,
0.06475607,
-0.018065408,
0.05190019,
0.12193858,
0.028755108,
0.008794777,
-0.07044016,
-0.016856866,
0.040675826,
0.04222898,
0.025450956,
0.035772353,
-0.049134083,
0.0021395232,
-0.015527445,
0.05065655,
-0.04814189,
0.03586998,
-0.004134139,
0.10165314,
-0.055980552,
-0.010677752,
0.011231545,
0.09068785,
0.004311188,
0.035094332,
-0.009658399,
-0.09383056,
0.092755266,
0.00799794,
-0.0077075018,
-0.052119244,
-0.01259255,
0.0032277475,
0.005989667,
0.0075889886,
0.010571857,
-0.08629758,
-0.06985891,
-0.002511263,
-0.091053724,
0.0468712,
0.05203361,
0.0072902967,
0.010906411,
-0.0052922186,
0.013883815,
0.021929385,
0.0341257,
0.060227357,
0.00018942523,
0.0146624865,
-0.07000342,
0.028425341,
-0.027542787,
0.01082086,
0.03491755,
-0.022430921,
0.0096813915,
0.07725412,
0.021618832,
0.114911504,
-0.06805403,
0.023872944,
-0.015999107,
-0.017794114,
0.06442477,
0.03206309,
0.050293576,
-0.005988605,
-0.03376946,
0.017821673,
0.016567992,
0.063335925,
0.034753703,
0.046586752,
0.09789875,
-0.006560692,
0.025039855,
-0.07780643,
0.016878096,
-0.0010056288,
0.02257608,
-0.0382721,
0.09572481,
-0.005296001,
0.010567662,
-0.11538674,
-0.013233586,
-0.010786205,
-0.083147496,
0.073254965,
0.049377624,
-0.009025328,
-0.0957893,
3.3687185e-33,
0.12494067,
0.019226579,
-0.058172084,
-0.035952393,
-0.050862074,
-0.045700952,
-0.0826631,
0.14819908,
-0.088347495,
0.060315337,
0.05109269,
0.010308115,
0.1411753,
0.030833788,
0.06101746,
-0.052806143,
0.13661332,
0.00917483,
-0.017295862,
-0.0128495265,
-0.007851698,
-0.051084496,
-0.05235087,
0.0076632234,
-0.015217299,
0.017015414,
0.021324545,
0.020506723,
-0.12004153,
0.014523494,
0.026743378,
0.025221687,
-0.04270567,
0.00676352,
-0.014453511,
0.045142446,
-0.091383636,
-0.019459482,
-0.017806036,
-0.055010412,
-0.05270923,
-0.010370778,
-0.052053526,
0.020918628,
-0.080037735,
-0.012147244,
-0.057777684,
0.023249507,
-0.007838778,
-0.025807643,
-0.07987164,
-0.020683115,
0.04888083,
-0.020459235,
-0.049192864,
0.01407799,
-0.063744746,
-0.0077936463,
0.016429903,
-0.025707569,
0.013326097,
0.026210392,
0.009855086,
0.06317218,
0.0026150644,
-0.0065879063,
0.0166049,
0.032400407,
0.038005095,
-0.036269873,
-0.0069020875,
0.00019545198,
-0.0017537851,
-0.027427403,
-0.02801922,
0.049696837,
-0.028842367,
-0.0023814398,
0.01481421,
0.00976869,
0.0057697925,
0.01341087,
0.00551593,
0.037237898,
0.007291808,
0.040068958,
0.08141818,
0.07197348,
-0.013163506,
-0.042782705,
-0.010938265,
0.0049547236,
-0.00923014,
0.035068717,
-0.051007,
-1.5708556e-08,
-0.088558294,
0.02391312,
-0.016132735,
0.03169382,
0.027184812,
0.052484553,
-0.047118798,
-0.058789898,
-0.063239954,
0.040775288,
0.049807984,
0.106462926,
-0.07448737,
-0.012401869,
0.018361589,
0.039486438,
-0.024830224,
0.014500051,
-0.03712332,
0.020043189,
8.399218e-05,
0.009852795,
0.024823224,
-0.05252818,
0.02932855,
-0.0871494,
-0.01447227,
0.025996566,
-0.018731978,
-0.07618361,
0.03505914,
0.10363578,
-0.0280213,
0.012769872,
-0.076482065,
-0.018743375,
0.024961015,
0.08152011,
0.06866303,
-0.06411612,
-0.08387694,
0.061479986,
-0.03345598,
-0.10615398,
-0.040166635,
0.032536518,
0.076652974,
-0.07297006,
0.00039833272,
-0.0409393,
-0.07580284,
0.027465926,
0.07468789,
0.017779494,
0.09106629,
0.11033428,
0.00065298256,
0.051472265,
-0.01461242,
0.033237122,
0.023671487,
-0.022980422,
0.038988944,
0.030206418
],
"index": 0,
"object": "embedding"
}
],
"model": "all-minilm:l6-v2",
"object": "list",
"usage": {
"prompt_tokens": 2,
"total_tokens": 2
}
}
},
"is_streaming": false
},
"id_normalization_mapping": {}
}

View file

@ -323,3 +323,92 @@ class TestBatchesIntegration:
if final_batch.error_file_id is not None:
deleted_error_file = openai_client.files.delete(final_batch.error_file_id)
assert deleted_error_file.deleted
def test_batch_e2e_embeddings(self, openai_client, batch_helper, embedding_model_id):
"""Run an end-to-end batch with embeddings requests including both string and list inputs."""
batch_requests = [
{
"custom_id": "success-1",
"method": "POST",
"url": "/v1/embeddings",
"body": {"model": embedding_model_id, "input": "Hello world", "encoding_format": "float"},
},
{
"custom_id": "success-2",
"method": "POST",
"url": "/v1/embeddings",
"body": {
"model": embedding_model_id,
"input": ["How are you?", "Good morning", "Have a great day"],
"encoding_format": "float",
},
},
]
with batch_helper.create_file(batch_requests) as uploaded_file:
batch = openai_client.batches.create(
input_file_id=uploaded_file.id,
endpoint="/v1/embeddings",
completion_window="24h",
metadata={"test": "e2e_embeddings_success"},
)
final_batch = batch_helper.wait_for(
batch.id,
max_wait_time=3 * 60,
expected_statuses={"completed"},
timeout_action="skip",
)
assert final_batch.status == "completed"
assert final_batch.request_counts is not None
assert final_batch.request_counts.total == 2
assert final_batch.request_counts.completed == 2
assert final_batch.output_file_id is not None
output_content = openai_client.files.content(final_batch.output_file_id)
if isinstance(output_content, str):
output_text = output_content
else:
output_text = output_content.content.decode("utf-8")
output_lines = output_text.strip().split("\n")
assert len(output_lines) == 2
# Check first result (string input)
result1 = json.loads(output_lines[0])
assert result1["custom_id"] in ["success-1", "success-2"]
assert "response" in result1
assert result1["response"]["status_code"] == 200
# Verify the response body contains embeddings data
response_body1 = json.loads(result1["response"]["body"])
assert response_body1["object"] == "list"
assert "data" in response_body1
assert len(response_body1["data"]) == 1
assert "embedding" in response_body1["data"][0]
assert "index" in response_body1["data"][0]
assert response_body1["data"][0]["index"] == 0
# Check second result (list input)
result2 = json.loads(output_lines[1])
assert result2["custom_id"] in ["success-1", "success-2"]
assert "response" in result2
assert result2["response"]["status_code"] == 200
# Verify the response body contains embeddings data for list input
response_body2 = json.loads(result2["response"]["body"])
assert response_body2["object"] == "list"
assert "data" in response_body2
assert len(response_body2["data"]) == 3 # Three strings in the list
for i, embedding_data in enumerate(response_body2["data"]):
assert "embedding" in embedding_data
assert "index" in embedding_data
assert embedding_data["index"] == i
deleted_output_file = openai_client.files.delete(final_batch.output_file_id)
assert deleted_output_file.deleted
if final_batch.error_file_id is not None:
deleted_error_file = openai_client.files.delete(final_batch.error_file_id)
assert deleted_error_file.deleted

View file

@ -12,28 +12,10 @@
"body": {
"__type__": "ollama._types.ProcessResponse",
"__data__": {
"models": [
{
"model": "llama3.2-vision:11b",
"name": "llama3.2-vision:11b",
"digest": "6f2f9757ae97e8a3f8ea33d6adb2b11d93d9a35bef277cd2c0b1b5af8e8d0b1e",
"expires_at": "2025-10-08T12:40:47.430429-07:00",
"size": 11765236384,
"size_vram": 11765236384,
"details": {
"parent_model": "",
"format": "gguf",
"family": "mllama",
"families": [
"mllama"
],
"parameter_size": "10.7B",
"quantization_level": "Q4_K_M"
}
}
]
"models": []
}
},
"is_streaming": false
}
},
"id_normalization_mapping": {}
}

View file

@ -93,5 +93,6 @@
}
],
"is_streaming": false
}
},
"id_normalization_mapping": {}
}

View file

@ -213,7 +213,6 @@ class TestReferenceBatchesImpl:
@pytest.mark.parametrize(
"endpoint",
[
"/v1/embeddings",
"/v1/invalid/endpoint",
"",
],
@ -765,3 +764,12 @@ class TestReferenceBatchesImpl:
await asyncio.sleep(0.042) # let tasks start
assert active_batches == 2, f"Expected 2 active batches, got {active_batches}"
async def test_create_batch_embeddings_endpoint(self, provider):
"""Test that batch creation succeeds with embeddings endpoint."""
batch = await provider.create_batch(
input_file_id="file_123",
endpoint="/v1/embeddings",
completion_window="24h",
)
assert batch.endpoint == "/v1/embeddings"