mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-10-04 04:04:14 +00:00
# What does this PR do? adds embedding and dynamic model support to Together inference adapter - updated to use OpenAIMixin - workarounds for Together api quirks - recordings for together suite when subdirs=inference,pattern=openai ## Test Plan ``` $ TOGETHER_API_KEY=_NONE_ ./scripts/integration-tests.sh --stack-config server:ci-tests --setup together --subdirs inference --pattern openai ... tests/integration/inference/test_openai_completion.py::test_openai_completion_non_streaming[txt=together/meta-llama/Llama-3.3-70B-Instruct-Turbo-Free-inference:completion:sanity] instantiating llama_stack_client Port 8321 is already in use, assuming server is already running... llama_stack_client instantiated in 0.121s PASSED [ 2%] tests/integration/inference/test_openai_completion.py::test_openai_completion_non_streaming_suffix[txt=together/meta-llama/Llama-3.3-70B-Instruct-Turbo-Free-inference:completion:suffix] SKIPPED [ 4%] tests/integration/inference/test_openai_completion.py::test_openai_completion_streaming[txt=together/meta-llama/Llama-3.3-70B-Instruct-Turbo-Free-inference:completion:sanity] PASSED [ 6%] tests/integration/inference/test_openai_completion.py::test_openai_completion_prompt_logprobs[txt=together/meta-llama/Llama-3.3-70B-Instruct-Turbo-Free-1] SKIPPED [ 8%] tests/integration/inference/test_openai_completion.py::test_openai_completion_guided_choice[txt=together/meta-llama/Llama-3.3-70B-Instruct-Turbo-Free] SKIPPED [ 10%] tests/integration/inference/test_openai_completion.py::test_openai_chat_completion_non_streaming[openai_client-txt=together/meta-llama/Llama-3.3-70B-Instruct-Turbo-Free-inference:chat_completion:non_streaming_01] PASSED [ 12%] tests/integration/inference/test_openai_completion.py::test_openai_chat_completion_streaming[openai_client-txt=together/meta-llama/Llama-3.3-70B-Instruct-Turbo-Free-inference:chat_completion:streaming_01] PASSED [ 14%] tests/integration/inference/test_openai_completion.py::test_openai_chat_completion_streaming_with_n[openai_client-txt=together/meta-llama/Llama-3.3-70B-Instruct-Turbo-Free-inference:chat_completion:streaming_01] SKIPPED [ 17%] tests/integration/inference/test_openai_completion.py::test_inference_store[openai_client-txt=together/meta-llama/Llama-3.3-70B-Instruct-Turbo-Free-True] PASSED [ 19%] tests/integration/inference/test_openai_completion.py::test_inference_store_tool_calls[openai_client-txt=together/meta-llama/Llama-3.3-70B-Instruct-Turbo-Free-True] PASSED [ 21%] tests/integration/inference/test_openai_completion.py::test_openai_chat_completion_non_streaming_with_file[txt=together/meta-llama/Llama-3.3-70B-Instruct-Turbo-Free] SKIPPED [ 23%] tests/integration/inference/test_openai_embeddings.py::test_openai_embeddings_single_string[openai_client-emb=together/togethercomputer/m2-bert-80M-32k-retrieval] PASSED [ 25%] tests/integration/inference/test_openai_embeddings.py::test_openai_embeddings_multiple_strings[openai_client-emb=together/togethercomputer/m2-bert-80M-32k-retrieval] PASSED [ 27%] tests/integration/inference/test_openai_embeddings.py::test_openai_embeddings_with_encoding_format_float[openai_client-emb=together/togethercomputer/m2-bert-80M-32k-retrieval] PASSED [ 29%] tests/integration/inference/test_openai_embeddings.py::test_openai_embeddings_with_dimensions[openai_client-emb=together/togethercomputer/m2-bert-80M-32k-retrieval] SKIPPED [ 31%] tests/integration/inference/test_openai_embeddings.py::test_openai_embeddings_with_user_parameter[openai_client-emb=together/togethercomputer/m2-bert-80M-32k-retrieval] SKIPPED [ 34%] tests/integration/inference/test_openai_embeddings.py::test_openai_embeddings_empty_list_error[openai_client-emb=together/togethercomputer/m2-bert-80M-32k-retrieval] PASSED [ 36%] tests/integration/inference/test_openai_embeddings.py::test_openai_embeddings_invalid_model_error[openai_client-emb=together/togethercomputer/m2-bert-80M-32k-retrieval] PASSED [ 38%] tests/integration/inference/test_openai_embeddings.py::test_openai_embeddings_different_inputs_different_outputs[openai_client-emb=together/togethercomputer/m2-bert-80M-32k-retrieval] PASSED [ 40%] tests/integration/inference/test_openai_embeddings.py::test_openai_embeddings_with_encoding_format_base64[openai_client-emb=together/togethercomputer/m2-bert-80M-32k-retrieval] SKIPPED [ 42%] tests/integration/inference/test_openai_embeddings.py::test_openai_embeddings_base64_batch_processing[openai_client-emb=together/togethercomputer/m2-bert-80M-32k-retrieval] SKIPPED [ 44%] tests/integration/inference/test_openai_completion.py::test_openai_completion_prompt_logprobs[txt=together/meta-llama/Llama-3.3-70B-Instruct-Turbo-Free-0] SKIPPED [ 46%] tests/integration/inference/test_openai_completion.py::test_openai_chat_completion_non_streaming[openai_client-txt=together/meta-llama/Llama-3.3-70B-Instruct-Turbo-Free-inference:chat_completion:non_streaming_02] PASSED [ 48%] tests/integration/inference/test_openai_completion.py::test_openai_chat_completion_streaming[openai_client-txt=together/meta-llama/Llama-3.3-70B-Instruct-Turbo-Free-inference:chat_completion:streaming_02] PASSED [ 51%] tests/integration/inference/test_openai_completion.py::test_openai_chat_completion_streaming_with_n[openai_client-txt=together/meta-llama/Llama-3.3-70B-Instruct-Turbo-Free-inference:chat_completion:streaming_02] SKIPPED [ 53%] tests/integration/inference/test_openai_completion.py::test_inference_store[openai_client-txt=together/meta-llama/Llama-3.3-70B-Instruct-Turbo-Free-False] PASSED [ 55%] tests/integration/inference/test_openai_completion.py::test_inference_store_tool_calls[openai_client-txt=together/meta-llama/Llama-3.3-70B-Instruct-Turbo-Free-False] PASSED [ 57%] tests/integration/inference/test_openai_embeddings.py::test_openai_embeddings_single_string[llama_stack_client-emb=together/togethercomputer/m2-bert-80M-32k-retrieval] PASSED [ 59%] tests/integration/inference/test_openai_embeddings.py::test_openai_embeddings_multiple_strings[llama_stack_client-emb=together/togethercomputer/m2-bert-80M-32k-retrieval] PASSED [ 61%] tests/integration/inference/test_openai_embeddings.py::test_openai_embeddings_with_encoding_format_float[llama_stack_client-emb=together/togethercomputer/m2-bert-80M-32k-retrieval] PASSED [ 63%] tests/integration/inference/test_openai_embeddings.py::test_openai_embeddings_with_dimensions[llama_stack_client-emb=together/togethercomputer/m2-bert-80M-32k-retrieval] SKIPPED [ 65%] tests/integration/inference/test_openai_embeddings.py::test_openai_embeddings_with_user_parameter[llama_stack_client-emb=together/togethercomputer/m2-bert-80M-32k-retrieval] SKIPPED [ 68%] tests/integration/inference/test_openai_embeddings.py::test_openai_embeddings_empty_list_error[llama_stack_client-emb=together/togethercomputer/m2-bert-80M-32k-retrieval] PASSED [ 70%] tests/integration/inference/test_openai_embeddings.py::test_openai_embeddings_invalid_model_error[llama_stack_client-emb=together/togethercomputer/m2-bert-80M-32k-retrieval] PASSED [ 72%] tests/integration/inference/test_openai_embeddings.py::test_openai_embeddings_different_inputs_different_outputs[llama_stack_client-emb=together/togethercomputer/m2-bert-80M-32k-retrieval] PASSED [ 74%] tests/integration/inference/test_openai_embeddings.py::test_openai_embeddings_with_encoding_format_base64[llama_stack_client-emb=together/togethercomputer/m2-bert-80M-32k-retrieval] SKIPPED [ 76%] tests/integration/inference/test_openai_embeddings.py::test_openai_embeddings_base64_batch_processing[llama_stack_client-emb=together/togethercomputer/m2-bert-80M-32k-retrieval] SKIPPED [ 78%] tests/integration/inference/test_openai_completion.py::test_openai_chat_completion_non_streaming[client_with_models-txt=together/meta-llama/Llama-3.3-70B-Instruct-Turbo-Free-inference:chat_completion:non_streaming_01] PASSED [ 80%] tests/integration/inference/test_openai_completion.py::test_openai_chat_completion_streaming[client_with_models-txt=together/meta-llama/Llama-3.3-70B-Instruct-Turbo-Free-inference:chat_completion:streaming_01] PASSED [ 82%] tests/integration/inference/test_openai_completion.py::test_openai_chat_completion_streaming_with_n[client_with_models-txt=together/meta-llama/Llama-3.3-70B-Instruct-Turbo-Free-inference:chat_completion:streaming_01] SKIPPED [ 85%] tests/integration/inference/test_openai_completion.py::test_inference_store[client_with_models-txt=together/meta-llama/Llama-3.3-70B-Instruct-Turbo-Free-True] PASSED [ 87%] tests/integration/inference/test_openai_completion.py::test_inference_store_tool_calls[client_with_models-txt=together/meta-llama/Llama-3.3-70B-Instruct-Turbo-Free-True] PASSED [ 89%] tests/integration/inference/test_openai_completion.py::test_openai_chat_completion_non_streaming[client_with_models-txt=together/meta-llama/Llama-3.3-70B-Instruct-Turbo-Free-inference:chat_completion:non_streaming_02] PASSED [ 91%] tests/integration/inference/test_openai_completion.py::test_openai_chat_completion_streaming[client_with_models-txt=together/meta-llama/Llama-3.3-70B-Instruct-Turbo-Free-inference:chat_completion:streaming_02] PASSED [ 93%] tests/integration/inference/test_openai_completion.py::test_openai_chat_completion_streaming_with_n[client_with_models-txt=together/meta-llama/Llama-3.3-70B-Instruct-Turbo-Free-inference:chat_completion:streaming_02] SKIPPED [ 95%] tests/integration/inference/test_openai_completion.py::test_inference_store[client_with_models-txt=together/meta-llama/Llama-3.3-70B-Instruct-Turbo-Free-False] PASSED [ 97%] tests/integration/inference/test_openai_completion.py::test_inference_store_tool_calls[client_with_models-txt=together/meta-llama/Llama-3.3-70B-Instruct-Turbo-Free-False] PASSED [100%] ============================================ 30 passed, 17 skipped, 50 deselected, 4 warnings in 21.96s ============================================= ```
350 lines
10 KiB
JSON
350 lines
10 KiB
JSON
{
|
|
"request": {
|
|
"method": "POST",
|
|
"url": "https://api.together.xyz/v1/v1/chat/completions",
|
|
"headers": {},
|
|
"body": {
|
|
"model": "meta-llama/Llama-3.3-70B-Instruct-Turbo-Free",
|
|
"messages": [
|
|
{
|
|
"role": "user",
|
|
"content": "What's the name of the Sun in latin?"
|
|
}
|
|
],
|
|
"stream": true
|
|
},
|
|
"endpoint": "/v1/chat/completions",
|
|
"model": "meta-llama/Llama-3.3-70B-Instruct-Turbo-Free"
|
|
},
|
|
"response": {
|
|
"body": [
|
|
{
|
|
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
|
"__data__": {
|
|
"id": "oBUtQrM-62bZhn-9801a1ac2a5f9b29",
|
|
"choices": [
|
|
{
|
|
"delta": {
|
|
"content": "The",
|
|
"function_call": null,
|
|
"refusal": null,
|
|
"role": "assistant",
|
|
"tool_calls": null,
|
|
"token_id": 791
|
|
},
|
|
"finish_reason": null,
|
|
"index": 0,
|
|
"logprobs": null,
|
|
"text": "The",
|
|
"seed": null
|
|
}
|
|
],
|
|
"created": 1758039001,
|
|
"model": "meta-llama/Llama-3.3-70B-Instruct-Turbo-Free",
|
|
"object": "chat.completion.chunk",
|
|
"service_tier": null,
|
|
"system_fingerprint": null,
|
|
"usage": null
|
|
}
|
|
},
|
|
{
|
|
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
|
"__data__": {
|
|
"id": "oBUtQrM-62bZhn-9801a1ac2a5f9b29",
|
|
"choices": [
|
|
{
|
|
"delta": {
|
|
"content": " Latin",
|
|
"function_call": null,
|
|
"refusal": null,
|
|
"role": "assistant",
|
|
"tool_calls": null,
|
|
"token_id": 20023
|
|
},
|
|
"finish_reason": null,
|
|
"index": 0,
|
|
"logprobs": null,
|
|
"text": " Latin",
|
|
"seed": null
|
|
}
|
|
],
|
|
"created": 1758039001,
|
|
"model": "meta-llama/Llama-3.3-70B-Instruct-Turbo-Free",
|
|
"object": "chat.completion.chunk",
|
|
"service_tier": null,
|
|
"system_fingerprint": null,
|
|
"usage": null
|
|
}
|
|
},
|
|
{
|
|
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
|
"__data__": {
|
|
"id": "oBUtQrM-62bZhn-9801a1ac2a5f9b29",
|
|
"choices": [
|
|
{
|
|
"delta": {
|
|
"content": " name",
|
|
"function_call": null,
|
|
"refusal": null,
|
|
"role": "assistant",
|
|
"tool_calls": null,
|
|
"token_id": 836
|
|
},
|
|
"finish_reason": null,
|
|
"index": 0,
|
|
"logprobs": null,
|
|
"text": " name",
|
|
"seed": null
|
|
}
|
|
],
|
|
"created": 1758039001,
|
|
"model": "meta-llama/Llama-3.3-70B-Instruct-Turbo-Free",
|
|
"object": "chat.completion.chunk",
|
|
"service_tier": null,
|
|
"system_fingerprint": null,
|
|
"usage": null
|
|
}
|
|
},
|
|
{
|
|
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
|
"__data__": {
|
|
"id": "oBUtQrM-62bZhn-9801a1ac2a5f9b29",
|
|
"choices": [
|
|
{
|
|
"delta": {
|
|
"content": " for",
|
|
"function_call": null,
|
|
"refusal": null,
|
|
"role": "assistant",
|
|
"tool_calls": null,
|
|
"token_id": 369
|
|
},
|
|
"finish_reason": null,
|
|
"index": 0,
|
|
"logprobs": null,
|
|
"text": " for",
|
|
"seed": null
|
|
}
|
|
],
|
|
"created": 1758039001,
|
|
"model": "meta-llama/Llama-3.3-70B-Instruct-Turbo-Free",
|
|
"object": "chat.completion.chunk",
|
|
"service_tier": null,
|
|
"system_fingerprint": null,
|
|
"usage": null
|
|
}
|
|
},
|
|
{
|
|
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
|
"__data__": {
|
|
"id": "oBUtQrM-62bZhn-9801a1ac2a5f9b29",
|
|
"choices": [
|
|
{
|
|
"delta": {
|
|
"content": " the",
|
|
"function_call": null,
|
|
"refusal": null,
|
|
"role": "assistant",
|
|
"tool_calls": null,
|
|
"token_id": 279
|
|
},
|
|
"finish_reason": null,
|
|
"index": 0,
|
|
"logprobs": null,
|
|
"text": " the",
|
|
"seed": null
|
|
}
|
|
],
|
|
"created": 1758039001,
|
|
"model": "meta-llama/Llama-3.3-70B-Instruct-Turbo-Free",
|
|
"object": "chat.completion.chunk",
|
|
"service_tier": null,
|
|
"system_fingerprint": null,
|
|
"usage": null
|
|
}
|
|
},
|
|
{
|
|
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
|
"__data__": {
|
|
"id": "oBUtQrM-62bZhn-9801a1ac2a5f9b29",
|
|
"choices": [
|
|
{
|
|
"delta": {
|
|
"content": " Sun",
|
|
"function_call": null,
|
|
"refusal": null,
|
|
"role": "assistant",
|
|
"tool_calls": null,
|
|
"token_id": 8219
|
|
},
|
|
"finish_reason": null,
|
|
"index": 0,
|
|
"logprobs": null,
|
|
"text": " Sun",
|
|
"seed": null
|
|
}
|
|
],
|
|
"created": 1758039001,
|
|
"model": "meta-llama/Llama-3.3-70B-Instruct-Turbo-Free",
|
|
"object": "chat.completion.chunk",
|
|
"service_tier": null,
|
|
"system_fingerprint": null,
|
|
"usage": null
|
|
}
|
|
},
|
|
{
|
|
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
|
"__data__": {
|
|
"id": "oBUtQrM-62bZhn-9801a1ac2a5f9b29",
|
|
"choices": [
|
|
{
|
|
"delta": {
|
|
"content": " is",
|
|
"function_call": null,
|
|
"refusal": null,
|
|
"role": "assistant",
|
|
"tool_calls": null,
|
|
"token_id": 374
|
|
},
|
|
"finish_reason": null,
|
|
"index": 0,
|
|
"logprobs": null,
|
|
"text": " is",
|
|
"seed": null
|
|
}
|
|
],
|
|
"created": 1758039001,
|
|
"model": "meta-llama/Llama-3.3-70B-Instruct-Turbo-Free",
|
|
"object": "chat.completion.chunk",
|
|
"service_tier": null,
|
|
"system_fingerprint": null,
|
|
"usage": null
|
|
}
|
|
},
|
|
{
|
|
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
|
"__data__": {
|
|
"id": "oBUtQrM-62bZhn-9801a1ac2a5f9b29",
|
|
"choices": [
|
|
{
|
|
"delta": {
|
|
"content": " \"",
|
|
"function_call": null,
|
|
"refusal": null,
|
|
"role": "assistant",
|
|
"tool_calls": null,
|
|
"token_id": 330
|
|
},
|
|
"finish_reason": null,
|
|
"index": 0,
|
|
"logprobs": null,
|
|
"text": " \"",
|
|
"seed": null
|
|
}
|
|
],
|
|
"created": 1758039001,
|
|
"model": "meta-llama/Llama-3.3-70B-Instruct-Turbo-Free",
|
|
"object": "chat.completion.chunk",
|
|
"service_tier": null,
|
|
"system_fingerprint": null,
|
|
"usage": null
|
|
}
|
|
},
|
|
{
|
|
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
|
"__data__": {
|
|
"id": "oBUtQrM-62bZhn-9801a1ac2a5f9b29",
|
|
"choices": [
|
|
{
|
|
"delta": {
|
|
"content": "Sol",
|
|
"function_call": null,
|
|
"refusal": null,
|
|
"role": "assistant",
|
|
"tool_calls": null,
|
|
"token_id": 49912
|
|
},
|
|
"finish_reason": null,
|
|
"index": 0,
|
|
"logprobs": null,
|
|
"text": "Sol",
|
|
"seed": null
|
|
}
|
|
],
|
|
"created": 1758039001,
|
|
"model": "meta-llama/Llama-3.3-70B-Instruct-Turbo-Free",
|
|
"object": "chat.completion.chunk",
|
|
"service_tier": null,
|
|
"system_fingerprint": null,
|
|
"usage": null
|
|
}
|
|
},
|
|
{
|
|
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
|
"__data__": {
|
|
"id": "oBUtQrM-62bZhn-9801a1ac2a5f9b29",
|
|
"choices": [
|
|
{
|
|
"delta": {
|
|
"content": "\".",
|
|
"function_call": null,
|
|
"refusal": null,
|
|
"role": "assistant",
|
|
"tool_calls": null,
|
|
"token_id": 3343
|
|
},
|
|
"finish_reason": null,
|
|
"index": 0,
|
|
"logprobs": null,
|
|
"text": "\".",
|
|
"seed": null
|
|
}
|
|
],
|
|
"created": 1758039001,
|
|
"model": "meta-llama/Llama-3.3-70B-Instruct-Turbo-Free",
|
|
"object": "chat.completion.chunk",
|
|
"service_tier": null,
|
|
"system_fingerprint": null,
|
|
"usage": null
|
|
}
|
|
},
|
|
{
|
|
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
|
"__data__": {
|
|
"id": "oBUtQrM-62bZhn-9801a1ac2a5f9b29",
|
|
"choices": [
|
|
{
|
|
"delta": {
|
|
"content": "",
|
|
"function_call": null,
|
|
"refusal": null,
|
|
"role": "assistant",
|
|
"tool_calls": null,
|
|
"token_id": 128009
|
|
},
|
|
"finish_reason": "stop",
|
|
"index": 0,
|
|
"logprobs": null,
|
|
"text": "",
|
|
"seed": 10870795372179526000
|
|
}
|
|
],
|
|
"created": 1758039001,
|
|
"model": "meta-llama/Llama-3.3-70B-Instruct-Turbo-Free",
|
|
"object": "chat.completion.chunk",
|
|
"service_tier": null,
|
|
"system_fingerprint": null,
|
|
"usage": {
|
|
"completion_tokens": 11,
|
|
"prompt_tokens": 45,
|
|
"total_tokens": 56,
|
|
"completion_tokens_details": null,
|
|
"prompt_tokens_details": null,
|
|
"cached_tokens": 0
|
|
}
|
|
}
|
|
}
|
|
],
|
|
"is_streaming": true
|
|
}
|
|
}
|