mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-10-04 04:04:14 +00:00
feat: add embedding and dynamic model support to Together inference adapter (#3458)
# What does this PR do? adds embedding and dynamic model support to Together inference adapter - updated to use OpenAIMixin - workarounds for Together api quirks - recordings for together suite when subdirs=inference,pattern=openai ## Test Plan ``` $ TOGETHER_API_KEY=_NONE_ ./scripts/integration-tests.sh --stack-config server:ci-tests --setup together --subdirs inference --pattern openai ... tests/integration/inference/test_openai_completion.py::test_openai_completion_non_streaming[txt=together/meta-llama/Llama-3.3-70B-Instruct-Turbo-Free-inference:completion:sanity] instantiating llama_stack_client Port 8321 is already in use, assuming server is already running... llama_stack_client instantiated in 0.121s PASSED [ 2%] tests/integration/inference/test_openai_completion.py::test_openai_completion_non_streaming_suffix[txt=together/meta-llama/Llama-3.3-70B-Instruct-Turbo-Free-inference:completion:suffix] SKIPPED [ 4%] tests/integration/inference/test_openai_completion.py::test_openai_completion_streaming[txt=together/meta-llama/Llama-3.3-70B-Instruct-Turbo-Free-inference:completion:sanity] PASSED [ 6%] tests/integration/inference/test_openai_completion.py::test_openai_completion_prompt_logprobs[txt=together/meta-llama/Llama-3.3-70B-Instruct-Turbo-Free-1] SKIPPED [ 8%] tests/integration/inference/test_openai_completion.py::test_openai_completion_guided_choice[txt=together/meta-llama/Llama-3.3-70B-Instruct-Turbo-Free] SKIPPED [ 10%] tests/integration/inference/test_openai_completion.py::test_openai_chat_completion_non_streaming[openai_client-txt=together/meta-llama/Llama-3.3-70B-Instruct-Turbo-Free-inference:chat_completion:non_streaming_01] PASSED [ 12%] tests/integration/inference/test_openai_completion.py::test_openai_chat_completion_streaming[openai_client-txt=together/meta-llama/Llama-3.3-70B-Instruct-Turbo-Free-inference:chat_completion:streaming_01] PASSED [ 14%] tests/integration/inference/test_openai_completion.py::test_openai_chat_completion_streaming_with_n[openai_client-txt=together/meta-llama/Llama-3.3-70B-Instruct-Turbo-Free-inference:chat_completion:streaming_01] SKIPPED [ 17%] tests/integration/inference/test_openai_completion.py::test_inference_store[openai_client-txt=together/meta-llama/Llama-3.3-70B-Instruct-Turbo-Free-True] PASSED [ 19%] tests/integration/inference/test_openai_completion.py::test_inference_store_tool_calls[openai_client-txt=together/meta-llama/Llama-3.3-70B-Instruct-Turbo-Free-True] PASSED [ 21%] tests/integration/inference/test_openai_completion.py::test_openai_chat_completion_non_streaming_with_file[txt=together/meta-llama/Llama-3.3-70B-Instruct-Turbo-Free] SKIPPED [ 23%] tests/integration/inference/test_openai_embeddings.py::test_openai_embeddings_single_string[openai_client-emb=together/togethercomputer/m2-bert-80M-32k-retrieval] PASSED [ 25%] tests/integration/inference/test_openai_embeddings.py::test_openai_embeddings_multiple_strings[openai_client-emb=together/togethercomputer/m2-bert-80M-32k-retrieval] PASSED [ 27%] tests/integration/inference/test_openai_embeddings.py::test_openai_embeddings_with_encoding_format_float[openai_client-emb=together/togethercomputer/m2-bert-80M-32k-retrieval] PASSED [ 29%] tests/integration/inference/test_openai_embeddings.py::test_openai_embeddings_with_dimensions[openai_client-emb=together/togethercomputer/m2-bert-80M-32k-retrieval] SKIPPED [ 31%] tests/integration/inference/test_openai_embeddings.py::test_openai_embeddings_with_user_parameter[openai_client-emb=together/togethercomputer/m2-bert-80M-32k-retrieval] SKIPPED [ 34%] tests/integration/inference/test_openai_embeddings.py::test_openai_embeddings_empty_list_error[openai_client-emb=together/togethercomputer/m2-bert-80M-32k-retrieval] PASSED [ 36%] tests/integration/inference/test_openai_embeddings.py::test_openai_embeddings_invalid_model_error[openai_client-emb=together/togethercomputer/m2-bert-80M-32k-retrieval] PASSED [ 38%] tests/integration/inference/test_openai_embeddings.py::test_openai_embeddings_different_inputs_different_outputs[openai_client-emb=together/togethercomputer/m2-bert-80M-32k-retrieval] PASSED [ 40%] tests/integration/inference/test_openai_embeddings.py::test_openai_embeddings_with_encoding_format_base64[openai_client-emb=together/togethercomputer/m2-bert-80M-32k-retrieval] SKIPPED [ 42%] tests/integration/inference/test_openai_embeddings.py::test_openai_embeddings_base64_batch_processing[openai_client-emb=together/togethercomputer/m2-bert-80M-32k-retrieval] SKIPPED [ 44%] tests/integration/inference/test_openai_completion.py::test_openai_completion_prompt_logprobs[txt=together/meta-llama/Llama-3.3-70B-Instruct-Turbo-Free-0] SKIPPED [ 46%] tests/integration/inference/test_openai_completion.py::test_openai_chat_completion_non_streaming[openai_client-txt=together/meta-llama/Llama-3.3-70B-Instruct-Turbo-Free-inference:chat_completion:non_streaming_02] PASSED [ 48%] tests/integration/inference/test_openai_completion.py::test_openai_chat_completion_streaming[openai_client-txt=together/meta-llama/Llama-3.3-70B-Instruct-Turbo-Free-inference:chat_completion:streaming_02] PASSED [ 51%] tests/integration/inference/test_openai_completion.py::test_openai_chat_completion_streaming_with_n[openai_client-txt=together/meta-llama/Llama-3.3-70B-Instruct-Turbo-Free-inference:chat_completion:streaming_02] SKIPPED [ 53%] tests/integration/inference/test_openai_completion.py::test_inference_store[openai_client-txt=together/meta-llama/Llama-3.3-70B-Instruct-Turbo-Free-False] PASSED [ 55%] tests/integration/inference/test_openai_completion.py::test_inference_store_tool_calls[openai_client-txt=together/meta-llama/Llama-3.3-70B-Instruct-Turbo-Free-False] PASSED [ 57%] tests/integration/inference/test_openai_embeddings.py::test_openai_embeddings_single_string[llama_stack_client-emb=together/togethercomputer/m2-bert-80M-32k-retrieval] PASSED [ 59%] tests/integration/inference/test_openai_embeddings.py::test_openai_embeddings_multiple_strings[llama_stack_client-emb=together/togethercomputer/m2-bert-80M-32k-retrieval] PASSED [ 61%] tests/integration/inference/test_openai_embeddings.py::test_openai_embeddings_with_encoding_format_float[llama_stack_client-emb=together/togethercomputer/m2-bert-80M-32k-retrieval] PASSED [ 63%] tests/integration/inference/test_openai_embeddings.py::test_openai_embeddings_with_dimensions[llama_stack_client-emb=together/togethercomputer/m2-bert-80M-32k-retrieval] SKIPPED [ 65%] tests/integration/inference/test_openai_embeddings.py::test_openai_embeddings_with_user_parameter[llama_stack_client-emb=together/togethercomputer/m2-bert-80M-32k-retrieval] SKIPPED [ 68%] tests/integration/inference/test_openai_embeddings.py::test_openai_embeddings_empty_list_error[llama_stack_client-emb=together/togethercomputer/m2-bert-80M-32k-retrieval] PASSED [ 70%] tests/integration/inference/test_openai_embeddings.py::test_openai_embeddings_invalid_model_error[llama_stack_client-emb=together/togethercomputer/m2-bert-80M-32k-retrieval] PASSED [ 72%] tests/integration/inference/test_openai_embeddings.py::test_openai_embeddings_different_inputs_different_outputs[llama_stack_client-emb=together/togethercomputer/m2-bert-80M-32k-retrieval] PASSED [ 74%] tests/integration/inference/test_openai_embeddings.py::test_openai_embeddings_with_encoding_format_base64[llama_stack_client-emb=together/togethercomputer/m2-bert-80M-32k-retrieval] SKIPPED [ 76%] tests/integration/inference/test_openai_embeddings.py::test_openai_embeddings_base64_batch_processing[llama_stack_client-emb=together/togethercomputer/m2-bert-80M-32k-retrieval] SKIPPED [ 78%] tests/integration/inference/test_openai_completion.py::test_openai_chat_completion_non_streaming[client_with_models-txt=together/meta-llama/Llama-3.3-70B-Instruct-Turbo-Free-inference:chat_completion:non_streaming_01] PASSED [ 80%] tests/integration/inference/test_openai_completion.py::test_openai_chat_completion_streaming[client_with_models-txt=together/meta-llama/Llama-3.3-70B-Instruct-Turbo-Free-inference:chat_completion:streaming_01] PASSED [ 82%] tests/integration/inference/test_openai_completion.py::test_openai_chat_completion_streaming_with_n[client_with_models-txt=together/meta-llama/Llama-3.3-70B-Instruct-Turbo-Free-inference:chat_completion:streaming_01] SKIPPED [ 85%] tests/integration/inference/test_openai_completion.py::test_inference_store[client_with_models-txt=together/meta-llama/Llama-3.3-70B-Instruct-Turbo-Free-True] PASSED [ 87%] tests/integration/inference/test_openai_completion.py::test_inference_store_tool_calls[client_with_models-txt=together/meta-llama/Llama-3.3-70B-Instruct-Turbo-Free-True] PASSED [ 89%] tests/integration/inference/test_openai_completion.py::test_openai_chat_completion_non_streaming[client_with_models-txt=together/meta-llama/Llama-3.3-70B-Instruct-Turbo-Free-inference:chat_completion:non_streaming_02] PASSED [ 91%] tests/integration/inference/test_openai_completion.py::test_openai_chat_completion_streaming[client_with_models-txt=together/meta-llama/Llama-3.3-70B-Instruct-Turbo-Free-inference:chat_completion:streaming_02] PASSED [ 93%] tests/integration/inference/test_openai_completion.py::test_openai_chat_completion_streaming_with_n[client_with_models-txt=together/meta-llama/Llama-3.3-70B-Instruct-Turbo-Free-inference:chat_completion:streaming_02] SKIPPED [ 95%] tests/integration/inference/test_openai_completion.py::test_inference_store[client_with_models-txt=together/meta-llama/Llama-3.3-70B-Instruct-Turbo-Free-False] PASSED [ 97%] tests/integration/inference/test_openai_completion.py::test_inference_store_tool_calls[client_with_models-txt=together/meta-llama/Llama-3.3-70B-Instruct-Turbo-Free-False] PASSED [100%] ============================================ 30 passed, 17 skipped, 50 deselected, 4 warnings in 21.96s ============================================= ```
This commit is contained in:
parent
3defdf7d3a
commit
49d4a5cc84
20 changed files with 9229 additions and 180 deletions
|
@ -29,9 +29,35 @@ def provider_from_model(client_with_models, model_id):
|
|||
return providers[provider_id]
|
||||
|
||||
|
||||
def skip_if_model_doesnt_support_variable_dimensions(model_id):
|
||||
if "text-embedding-3" not in model_id:
|
||||
pytest.skip("{model_id} does not support variable output embedding dimensions")
|
||||
def skip_if_model_doesnt_support_user_param(client, model_id):
|
||||
provider = provider_from_model(client, model_id)
|
||||
if provider.provider_type in (
|
||||
"remote::together", # service returns 400
|
||||
):
|
||||
pytest.skip(f"Model {model_id} hosted by {provider.provider_type} does not support user param.")
|
||||
|
||||
|
||||
def skip_if_model_doesnt_support_encoding_format_base64(client, model_id):
|
||||
provider = provider_from_model(client, model_id)
|
||||
if provider.provider_type in (
|
||||
"remote::together", # param silently ignored, always returns floats
|
||||
):
|
||||
pytest.skip(f"Model {model_id} hosted by {provider.provider_type} does not support encoding_format='base64'.")
|
||||
|
||||
|
||||
def skip_if_model_doesnt_support_variable_dimensions(client_with_models, model_id):
|
||||
provider = provider_from_model(client_with_models, model_id)
|
||||
if provider.provider_type in (
|
||||
"remote::together", # returns 400
|
||||
"inline::sentence-transformers",
|
||||
):
|
||||
pytest.skip(
|
||||
f"Model {model_id} hosted by {provider.provider_type} does not support variable output embedding dimensions."
|
||||
)
|
||||
if provider.provider_type == "remote::openai" and "text-embedding-3" not in model_id:
|
||||
pytest.skip(
|
||||
f"Model {model_id} hosted by {provider.provider_type} does not support variable output embedding dimensions."
|
||||
)
|
||||
|
||||
|
||||
@pytest.fixture(params=["openai_client", "llama_stack_client"])
|
||||
|
@ -92,6 +118,7 @@ def test_openai_embeddings_multiple_strings(compat_client, client_with_models, e
|
|||
response = compat_client.embeddings.create(
|
||||
model=embedding_model_id,
|
||||
input=input_texts,
|
||||
encoding_format="float",
|
||||
)
|
||||
|
||||
assert response.object == "list"
|
||||
|
@ -127,7 +154,7 @@ def test_openai_embeddings_with_encoding_format_float(compat_client, client_with
|
|||
def test_openai_embeddings_with_dimensions(compat_client, client_with_models, embedding_model_id):
|
||||
"""Test OpenAI embeddings endpoint with custom dimensions parameter."""
|
||||
skip_if_model_doesnt_support_openai_embeddings(client_with_models, embedding_model_id)
|
||||
skip_if_model_doesnt_support_variable_dimensions(embedding_model_id)
|
||||
skip_if_model_doesnt_support_variable_dimensions(client_with_models, embedding_model_id)
|
||||
|
||||
input_text = "Test dimensions parameter"
|
||||
dimensions = 16
|
||||
|
@ -148,6 +175,7 @@ def test_openai_embeddings_with_dimensions(compat_client, client_with_models, em
|
|||
def test_openai_embeddings_with_user_parameter(compat_client, client_with_models, embedding_model_id):
|
||||
"""Test OpenAI embeddings endpoint with user parameter."""
|
||||
skip_if_model_doesnt_support_openai_embeddings(client_with_models, embedding_model_id)
|
||||
skip_if_model_doesnt_support_user_param(client_with_models, embedding_model_id)
|
||||
|
||||
input_text = "Test user parameter"
|
||||
user_id = "test-user-123"
|
||||
|
@ -196,11 +224,13 @@ def test_openai_embeddings_different_inputs_different_outputs(compat_client, cli
|
|||
response1 = compat_client.embeddings.create(
|
||||
model=embedding_model_id,
|
||||
input=input_text1,
|
||||
encoding_format="float",
|
||||
)
|
||||
|
||||
response2 = compat_client.embeddings.create(
|
||||
model=embedding_model_id,
|
||||
input=input_text2,
|
||||
encoding_format="float",
|
||||
)
|
||||
|
||||
embedding1 = response1.data[0].embedding
|
||||
|
@ -214,7 +244,8 @@ def test_openai_embeddings_different_inputs_different_outputs(compat_client, cli
|
|||
def test_openai_embeddings_with_encoding_format_base64(compat_client, client_with_models, embedding_model_id):
|
||||
"""Test OpenAI embeddings endpoint with base64 encoding format."""
|
||||
skip_if_model_doesnt_support_openai_embeddings(client_with_models, embedding_model_id)
|
||||
skip_if_model_doesnt_support_variable_dimensions(embedding_model_id)
|
||||
skip_if_model_doesnt_support_encoding_format_base64(client_with_models, embedding_model_id)
|
||||
skip_if_model_doesnt_support_variable_dimensions(client_with_models, embedding_model_id)
|
||||
|
||||
input_text = "Test base64 encoding format"
|
||||
dimensions = 12
|
||||
|
@ -247,6 +278,7 @@ def test_openai_embeddings_with_encoding_format_base64(compat_client, client_wit
|
|||
def test_openai_embeddings_base64_batch_processing(compat_client, client_with_models, embedding_model_id):
|
||||
"""Test OpenAI embeddings endpoint with base64 encoding for batch processing."""
|
||||
skip_if_model_doesnt_support_openai_embeddings(client_with_models, embedding_model_id)
|
||||
skip_if_model_doesnt_support_encoding_format_base64(client_with_models, embedding_model_id)
|
||||
|
||||
input_texts = ["First text for base64", "Second text for base64", "Third text for base64"]
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue