mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-10-04 12:07:34 +00:00
feat: add Azure OpenAI inference provider support
Llama-stack now supports a new OpenAI compatible endpoint with Azure OpenAI. The starter distro has been updated to add the new remote inference provider. A few tests have been modified and improved. Tests plan: Deploy a model in the Aure portal then: ``` $ AZURE_API_KEY=... AZURE_API_BASE=... uv run llama stack build --image-type venv --providers inference=remote::azure --run ... $ LLAMA_STACK_CONFIG=http://localhost:8321 uv run --group test pytest -v -ra --text-model azure/gpt-4.1 tests/integration/inference/test_openai_completion.py ... Results: ``` ============================================= test session starts ============================================== platform darwin -- Python 3.12.8, pytest-8.4.1, pluggy-1.6.0 -- /Users/leseb/Documents/AI/llama-stack/.venv/bin/python3 cachedir: .pytest_cache metadata: {'Python': '3.12.8', 'Platform': 'macOS-15.6.1-arm64-arm-64bit', 'Packages': {'pytest': '8.4.1', 'pluggy': '1.6.0'}, 'Plugins': {'anyio': '4.9.0', 'html': '4.1.1', 'socket': '0.7.0', 'asyncio': '1.1.0', 'json-report': '1.5.0', 'timeout': '2.4.0', 'metadata': '3.1.1', 'cov': '6.2.1', 'nbval': '0.11.0', 'hydra-core': '1.3.2'}} rootdir: /Users/leseb/Documents/AI/llama-stack configfile: pyproject.toml plugins: anyio-4.9.0, html-4.1.1, socket-0.7.0, asyncio-1.1.0, json-report-1.5.0, timeout-2.4.0, metadata-3.1.1, cov-6.2.1, nbval-0.11.0, hydra-core-1.3.2 asyncio: mode=Mode.AUTO, asyncio_default_fixture_loop_scope=None, asyncio_default_test_loop_scope=function collected 27 items tests/integration/inference/test_openai_completion.py::test_openai_completion_non_streaming[txt=azure/gpt-5-mini-inference:completion:sanity] SKIPPED [ 3%] tests/integration/inference/test_openai_completion.py::test_openai_completion_non_streaming_suffix[txt=azure/gpt-5-mini-inference:completion:suffix] SKIPPED [ 7%] tests/integration/inference/test_openai_completion.py::test_openai_completion_streaming[txt=azure/gpt-5-mini-inference:completion:sanity] SKIPPED [ 11%] tests/integration/inference/test_openai_completion.py::test_openai_completion_prompt_logprobs[txt=azure/gpt-5-mini-1] SKIPPED [ 14%] tests/integration/inference/test_openai_completion.py::test_openai_completion_guided_choice[txt=azure/gpt-5-mini] SKIPPED [ 18%] tests/integration/inference/test_openai_completion.py::test_openai_chat_completion_non_streaming[openai_client-txt=azure/gpt-5-mini-inference:chat_completion:non_streaming_01] PASSED [ 22%] tests/integration/inference/test_openai_completion.py::test_openai_chat_completion_streaming[openai_client-txt=azure/gpt-5-mini-inference:chat_completion:streaming_01] PASSED [ 25%] tests/integration/inference/test_openai_completion.py::test_openai_chat_completion_streaming_with_n[openai_client-txt=azure/gpt-5-mini-inference:chat_completion:streaming_01] PASSED [ 29%] tests/integration/inference/test_openai_completion.py::test_inference_store[openai_client-txt=azure/gpt-5-mini-True] PASSED [ 33%] tests/integration/inference/test_openai_completion.py::test_inference_store_tool_calls[openai_client-txt=azure/gpt-5-mini-True] PASSED [ 37%] tests/integration/inference/test_openai_completion.py::test_openai_chat_completion_non_streaming_with_file[txt=azure/gpt-5-mini] SKIPPEDed files.) [ 40%] tests/integration/inference/test_openai_completion.py::test_openai_completion_prompt_logprobs[txt=azure/gpt-5-mini-0] SKIPPED [ 44%] tests/integration/inference/test_openai_completion.py::test_openai_chat_completion_non_streaming[openai_client-txt=azure/gpt-5-mini-inference:chat_completion:non_streaming_02] PASSED [ 48%] tests/integration/inference/test_openai_completion.py::test_openai_chat_completion_streaming[openai_client-txt=azure/gpt-5-mini-inference:chat_completion:streaming_02] PASSED [ 51%] tests/integration/inference/test_openai_completion.py::test_openai_chat_completion_streaming_with_n[openai_client-txt=azure/gpt-5-mini-inference:chat_completion:streaming_02] PASSED [ 55%] tests/integration/inference/test_openai_completion.py::test_inference_store[openai_client-txt=azure/gpt-5-mini-False] PASSED [ 59%] tests/integration/inference/test_openai_completion.py::test_inference_store_tool_calls[openai_client-txt=azure/gpt-5-mini-False] PASSED [ 62%] tests/integration/inference/test_openai_completion.py::test_openai_chat_completion_non_streaming[client_with_models-txt=azure/gpt-5-mini-inference:chat_completion:non_streaming_01] PASSED [ 66%] tests/integration/inference/test_openai_completion.py::test_openai_chat_completion_streaming[client_with_models-txt=azure/gpt-5-mini-inference:chat_completion:streaming_01] PASSED [ 70%] tests/integration/inference/test_openai_completion.py::test_openai_chat_completion_streaming_with_n[client_with_models-txt=azure/gpt-5-mini-inference:chat_completion:streaming_01] PASSED [ 74%] tests/integration/inference/test_openai_completion.py::test_inference_store[client_with_models-txt=azure/gpt-5-mini-True] PASSED [ 77%] tests/integration/inference/test_openai_completion.py::test_inference_store_tool_calls[client_with_models-txt=azure/gpt-5-mini-True] PASSED [ 81%] tests/integration/inference/test_openai_completion.py::test_openai_chat_completion_non_streaming[client_with_models-txt=azure/gpt-5-mini-inference:chat_completion:non_streaming_02] PASSED [ 85%] tests/integration/inference/test_openai_completion.py::test_openai_chat_completion_streaming[client_with_models-txt=azure/gpt-5-mini-inference:chat_completion:streaming_02] PASSED [ 88%] tests/integration/inference/test_openai_completion.py::test_openai_chat_completion_streaming_with_n[client_with_models-txt=azure/gpt-5-mini-inference:chat_completion:streaming_02] PASSED [ 92%] tests/integration/inference/test_openai_completion.py::test_inference_store[client_with_models-txt=azure/gpt-5-mini-False] PASSED [ 96%] tests/integration/inference/test_openai_completion.py::test_inference_store_tool_calls[client_with_models-txt=azure/gpt-5-mini-False] PASSED [100%] =========================================== short test summary info ============================================ SKIPPED [3] tests/integration/inference/test_openai_completion.py:63: Model azure/gpt-5-mini hosted by remote::azure doesn't support OpenAI completions. SKIPPED [3] tests/integration/inference/test_openai_completion.py:118: Model azure/gpt-5-mini hosted by remote::azure doesn't support vllm extra_body parameters. SKIPPED [1] tests/integration/inference/test_openai_completion.py:124: Model azure/gpt-5-mini hosted by remote::azure doesn't support chat completion calls with base64 encoded files. ================================== 20 passed, 7 skipped, 2 warnings in 51.77s ================================== ``` Signed-off-by: Sébastien Han <seb@redhat.com>
This commit is contained in:
parent
8e05c68d15
commit
ee3df99de4
26 changed files with 6403 additions and 13 deletions
|
@ -6,12 +6,25 @@
|
|||
|
||||
|
||||
import time
|
||||
import unicodedata
|
||||
|
||||
import pytest
|
||||
|
||||
from ..test_cases.test_case import TestCase
|
||||
|
||||
|
||||
def _normalize_text(text: str) -> str:
|
||||
"""
|
||||
Normalize Unicode text by removing diacritical marks for comparison.
|
||||
|
||||
The test case streaming_01 expects the answer "Sol" for the question "What's the name of the Sun
|
||||
in latin?", but the model is returning "sōl" (with a macron over the 'o'), which is the correct
|
||||
Latin spelling. The test is failing because it's doing a simple case-insensitive string search
|
||||
for "sol" but the actual response contains the diacritical mark.
|
||||
"""
|
||||
return unicodedata.normalize("NFD", text).encode("ascii", "ignore").decode("ascii").lower()
|
||||
|
||||
|
||||
def provider_from_model(client_with_models, model_id):
|
||||
models = {m.identifier: m for m in client_with_models.models.list()}
|
||||
models.update({m.provider_resource_id: m for m in client_with_models.models.list()})
|
||||
|
@ -42,6 +55,10 @@ def skip_if_model_doesnt_support_openai_completion(client_with_models, model_id)
|
|||
"remote::groq",
|
||||
"remote::gemini", # https://generativelanguage.googleapis.com/v1beta/openai/completions -> 404
|
||||
"remote::anthropic", # at least claude-3-{5,7}-{haiku,sonnet}-* / claude-{sonnet,opus}-4-* are not supported
|
||||
"remote::azure", # {'error': {'code': 'OperationNotSupported', 'message': 'The completion operation
|
||||
# does not work with the specified model, gpt-5-mini. Please choose different model and try
|
||||
# again. You can learn more about which models can be used with each operation here:
|
||||
# https://go.microsoft.com/fwlink/?linkid=2197993.'}}"}
|
||||
):
|
||||
pytest.skip(f"Model {model_id} hosted by {provider.provider_type} doesn't support OpenAI completions.")
|
||||
|
||||
|
@ -157,7 +174,8 @@ def test_openai_completion_non_streaming_suffix(llama_stack_client, client_with_
|
|||
assert len(response.choices) > 0
|
||||
choice = response.choices[0]
|
||||
assert len(choice.text) > 5
|
||||
assert "france" in choice.text.lower()
|
||||
normalized_text = _normalize_text(choice.text)
|
||||
assert "france" in normalized_text
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
|
@ -248,7 +266,9 @@ def test_openai_chat_completion_non_streaming(compat_client, client_with_models,
|
|||
)
|
||||
message_content = response.choices[0].message.content.lower().strip()
|
||||
assert len(message_content) > 0
|
||||
assert expected.lower() in message_content
|
||||
normalized_expected = _normalize_text(expected)
|
||||
normalized_content = _normalize_text(message_content)
|
||||
assert normalized_expected in normalized_content
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
|
@ -272,10 +292,13 @@ def test_openai_chat_completion_streaming(compat_client, client_with_models, tex
|
|||
)
|
||||
streamed_content = []
|
||||
for chunk in response:
|
||||
if chunk.choices[0].delta.content:
|
||||
# On some providers like Azure, the choices are empty on the first chunk, so we need to check for that
|
||||
if chunk.choices and len(chunk.choices) > 0 and chunk.choices[0].delta.content:
|
||||
streamed_content.append(chunk.choices[0].delta.content.lower().strip())
|
||||
assert len(streamed_content) > 0
|
||||
assert expected.lower() in "".join(streamed_content)
|
||||
normalized_expected = _normalize_text(expected)
|
||||
normalized_content = _normalize_text("".join(streamed_content))
|
||||
assert normalized_expected in normalized_content
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
|
@ -308,8 +331,12 @@ def test_openai_chat_completion_streaming_with_n(compat_client, client_with_mode
|
|||
streamed_content.get(choice.index, "") + choice.delta.content.lower().strip()
|
||||
)
|
||||
assert len(streamed_content) == 2
|
||||
normalized_expected = _normalize_text(expected)
|
||||
for i, content in streamed_content.items():
|
||||
assert expected.lower() in content, f"Choice {i}: Expected {expected.lower()} in {content}"
|
||||
normalized_content = _normalize_text(content)
|
||||
assert normalized_expected in normalized_content, (
|
||||
f"Choice {i}: Expected {normalized_expected} in {normalized_content}"
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
|
@ -339,9 +366,9 @@ def test_inference_store(compat_client, client_with_models, text_model_id, strea
|
|||
content = ""
|
||||
response_id = None
|
||||
for chunk in response:
|
||||
if response_id is None:
|
||||
if response_id is None and chunk.id:
|
||||
response_id = chunk.id
|
||||
if chunk.choices[0].delta.content:
|
||||
if chunk.choices and len(chunk.choices) > 0 and chunk.choices[0].delta.content:
|
||||
content += chunk.choices[0].delta.content
|
||||
else:
|
||||
response_id = response.id
|
||||
|
@ -410,11 +437,12 @@ def test_inference_store_tool_calls(compat_client, client_with_models, text_mode
|
|||
content = ""
|
||||
response_id = None
|
||||
for chunk in response:
|
||||
if response_id is None:
|
||||
if response_id is None and chunk.id:
|
||||
response_id = chunk.id
|
||||
if delta := chunk.choices[0].delta:
|
||||
if delta.content:
|
||||
content += delta.content
|
||||
if chunk.choices and len(chunk.choices) > 0:
|
||||
if delta := chunk.choices[0].delta:
|
||||
if delta.content:
|
||||
content += delta.content
|
||||
else:
|
||||
response_id = response.id
|
||||
content = response.choices[0].message.content
|
||||
|
@ -484,4 +512,5 @@ def test_openai_chat_completion_non_streaming_with_file(openai_client, client_wi
|
|||
stream=False,
|
||||
)
|
||||
message_content = response.choices[0].message.content.lower().strip()
|
||||
assert "hello world" in message_content
|
||||
normalized_content = _normalize_text(message_content)
|
||||
assert "hello world" in normalized_content
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue