mirror of
				https://github.com/meta-llama/llama-stack.git
				synced 2025-10-25 17:11:12 +00:00 
			
		
		
		
	# What does this PR do? Llama-stack now supports a new OpenAI compatible endpoint with Azure OpenAI. The starter distro has been updated to add the new remote inference provider. A few tests have been modified and improved. ## Test Plan Deploy a model in the Aure portal then: ``` $ AZURE_API_KEY=... AZURE_API_BASE=... uv run llama stack build --image-type venv --providers inference=remote::azure --run ... $ LLAMA_STACK_CONFIG=http://localhost:8321 uv run --group test pytest -v -ra --text-model azure/gpt-4.1 tests/integration/inference/test_openai_completion.py ... Results: ``` ============================================= test session starts ============================================== platform darwin -- Python 3.12.8, pytest-8.4.1, pluggy-1.6.0 -- /Users/leseb/Documents/AI/llama-stack/.venv/bin/python3 cachedir: .pytest_cache metadata: {'Python': '3.12.8', 'Platform': 'macOS-15.6.1-arm64-arm-64bit', 'Packages': {'pytest': '8.4.1', 'pluggy': '1.6.0'}, 'Plugins': {'anyio': '4.9.0', 'html': '4.1.1', 'socket': '0.7.0', 'asyncio': '1.1.0', 'json-report': '1.5.0', 'timeout': '2.4.0', 'metadata': '3.1.1', 'cov': '6.2.1', 'nbval': '0.11.0', 'hydra-core': '1.3.2'}} rootdir: /Users/leseb/Documents/AI/llama-stack configfile: pyproject.toml plugins: anyio-4.9.0, html-4.1.1, socket-0.7.0, asyncio-1.1.0, json-report-1.5.0, timeout-2.4.0, metadata-3.1.1, cov-6.2.1, nbval-0.11.0, hydra-core-1.3.2 asyncio: mode=Mode.AUTO, asyncio_default_fixture_loop_scope=None, asyncio_default_test_loop_scope=function collected 27 items tests/integration/inference/test_openai_completion.py::test_openai_completion_non_streaming[txt=azure/gpt-5-mini-inference:completion:sanity] SKIPPED [ 3%] tests/integration/inference/test_openai_completion.py::test_openai_completion_non_streaming_suffix[txt=azure/gpt-5-mini-inference:completion:suffix] SKIPPED [ 7%] tests/integration/inference/test_openai_completion.py::test_openai_completion_streaming[txt=azure/gpt-5-mini-inference:completion:sanity] SKIPPED [ 11%] tests/integration/inference/test_openai_completion.py::test_openai_completion_prompt_logprobs[txt=azure/gpt-5-mini-1] SKIPPED [ 14%] tests/integration/inference/test_openai_completion.py::test_openai_completion_guided_choice[txt=azure/gpt-5-mini] SKIPPED [ 18%] tests/integration/inference/test_openai_completion.py::test_openai_chat_completion_non_streaming[openai_client-txt=azure/gpt-5-mini-inference:chat_completion:non_streaming_01] PASSED [ 22%] tests/integration/inference/test_openai_completion.py::test_openai_chat_completion_streaming[openai_client-txt=azure/gpt-5-mini-inference:chat_completion:streaming_01] PASSED [ 25%] tests/integration/inference/test_openai_completion.py::test_openai_chat_completion_streaming_with_n[openai_client-txt=azure/gpt-5-mini-inference:chat_completion:streaming_01] PASSED [ 29%] tests/integration/inference/test_openai_completion.py::test_inference_store[openai_client-txt=azure/gpt-5-mini-True] PASSED [ 33%] tests/integration/inference/test_openai_completion.py::test_inference_store_tool_calls[openai_client-txt=azure/gpt-5-mini-True] PASSED [ 37%] tests/integration/inference/test_openai_completion.py::test_openai_chat_completion_non_streaming_with_file[txt=azure/gpt-5-mini] SKIPPEDed files.) [ 40%] tests/integration/inference/test_openai_completion.py::test_openai_completion_prompt_logprobs[txt=azure/gpt-5-mini-0] SKIPPED [ 44%] tests/integration/inference/test_openai_completion.py::test_openai_chat_completion_non_streaming[openai_client-txt=azure/gpt-5-mini-inference:chat_completion:non_streaming_02] PASSED [ 48%] tests/integration/inference/test_openai_completion.py::test_openai_chat_completion_streaming[openai_client-txt=azure/gpt-5-mini-inference:chat_completion:streaming_02] PASSED [ 51%] tests/integration/inference/test_openai_completion.py::test_openai_chat_completion_streaming_with_n[openai_client-txt=azure/gpt-5-mini-inference:chat_completion:streaming_02] PASSED [ 55%] tests/integration/inference/test_openai_completion.py::test_inference_store[openai_client-txt=azure/gpt-5-mini-False] PASSED [ 59%] tests/integration/inference/test_openai_completion.py::test_inference_store_tool_calls[openai_client-txt=azure/gpt-5-mini-False] PASSED [ 62%] tests/integration/inference/test_openai_completion.py::test_openai_chat_completion_non_streaming[client_with_models-txt=azure/gpt-5-mini-inference:chat_completion:non_streaming_01] PASSED [ 66%] tests/integration/inference/test_openai_completion.py::test_openai_chat_completion_streaming[client_with_models-txt=azure/gpt-5-mini-inference:chat_completion:streaming_01] PASSED [ 70%] tests/integration/inference/test_openai_completion.py::test_openai_chat_completion_streaming_with_n[client_with_models-txt=azure/gpt-5-mini-inference:chat_completion:streaming_01] PASSED [ 74%] tests/integration/inference/test_openai_completion.py::test_inference_store[client_with_models-txt=azure/gpt-5-mini-True] PASSED [ 77%] tests/integration/inference/test_openai_completion.py::test_inference_store_tool_calls[client_with_models-txt=azure/gpt-5-mini-True] PASSED [ 81%] tests/integration/inference/test_openai_completion.py::test_openai_chat_completion_non_streaming[client_with_models-txt=azure/gpt-5-mini-inference:chat_completion:non_streaming_02] PASSED [ 85%] tests/integration/inference/test_openai_completion.py::test_openai_chat_completion_streaming[client_with_models-txt=azure/gpt-5-mini-inference:chat_completion:streaming_02] PASSED [ 88%] tests/integration/inference/test_openai_completion.py::test_openai_chat_completion_streaming_with_n[client_with_models-txt=azure/gpt-5-mini-inference:chat_completion:streaming_02] PASSED [ 92%] tests/integration/inference/test_openai_completion.py::test_inference_store[client_with_models-txt=azure/gpt-5-mini-False] PASSED [ 96%] tests/integration/inference/test_openai_completion.py::test_inference_store_tool_calls[client_with_models-txt=azure/gpt-5-mini-False] PASSED [100%] =========================================== short test summary info ============================================ SKIPPED [3] tests/integration/inference/test_openai_completion.py:63: Model azure/gpt-5-mini hosted by remote::azure doesn't support OpenAI completions. SKIPPED [3] tests/integration/inference/test_openai_completion.py:118: Model azure/gpt-5-mini hosted by remote::azure doesn't support vllm extra_body parameters. SKIPPED [1] tests/integration/inference/test_openai_completion.py:124: Model azure/gpt-5-mini hosted by remote::azure doesn't support chat completion calls with base64 encoded files. ================================== 20 passed, 7 skipped, 2 warnings in 51.77s ================================== ``` Signed-off-by: Sébastien Han <seb@redhat.com>
		
			
				
	
	
		
			60 lines
		
	
	
	
		
			1.9 KiB
		
	
	
	
		
			YAML
		
	
	
	
	
	
			
		
		
	
	
			60 lines
		
	
	
	
		
			1.9 KiB
		
	
	
	
		
			YAML
		
	
	
	
	
	
| version: 2
 | |
| distribution_spec:
 | |
|   description: Quick start template for running Llama Stack with several popular providers.
 | |
|     This distribution is intended for CPU-only environments.
 | |
|   providers:
 | |
|     inference:
 | |
|     - provider_type: remote::cerebras
 | |
|     - provider_type: remote::ollama
 | |
|     - provider_type: remote::vllm
 | |
|     - provider_type: remote::tgi
 | |
|     - provider_type: remote::fireworks
 | |
|     - provider_type: remote::together
 | |
|     - provider_type: remote::bedrock
 | |
|     - provider_type: remote::nvidia
 | |
|     - provider_type: remote::openai
 | |
|     - provider_type: remote::anthropic
 | |
|     - provider_type: remote::gemini
 | |
|     - provider_type: remote::vertexai
 | |
|     - provider_type: remote::groq
 | |
|     - provider_type: remote::sambanova
 | |
|     - provider_type: remote::azure
 | |
|     - provider_type: inline::sentence-transformers
 | |
|     vector_io:
 | |
|     - provider_type: inline::faiss
 | |
|     - provider_type: inline::sqlite-vec
 | |
|     - provider_type: inline::milvus
 | |
|     - provider_type: remote::chromadb
 | |
|     - provider_type: remote::pgvector
 | |
|     files:
 | |
|     - provider_type: inline::localfs
 | |
|     safety:
 | |
|     - provider_type: inline::llama-guard
 | |
|     - provider_type: inline::code-scanner
 | |
|     agents:
 | |
|     - provider_type: inline::meta-reference
 | |
|     telemetry:
 | |
|     - provider_type: inline::meta-reference
 | |
|     post_training:
 | |
|     - provider_type: inline::torchtune-cpu
 | |
|     eval:
 | |
|     - provider_type: inline::meta-reference
 | |
|     datasetio:
 | |
|     - provider_type: remote::huggingface
 | |
|     - provider_type: inline::localfs
 | |
|     scoring:
 | |
|     - provider_type: inline::basic
 | |
|     - provider_type: inline::llm-as-judge
 | |
|     - provider_type: inline::braintrust
 | |
|     tool_runtime:
 | |
|     - provider_type: remote::brave-search
 | |
|     - provider_type: remote::tavily-search
 | |
|     - provider_type: inline::rag-runtime
 | |
|     - provider_type: remote::model-context-protocol
 | |
|     batches:
 | |
|     - provider_type: inline::reference
 | |
| image_type: venv
 | |
| additional_pip_packages:
 | |
| - aiosqlite
 | |
| - asyncpg
 | |
| - sqlalchemy[asyncio]
 |