mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-12-05 10:23:44 +00:00
feat!: standardize base_url for inference
Completes #3732 by removing runtime URL transformations and requiring users to provide full URLs in configuration. All providers now use 'base_url' consistently and respect the exact URL provided without appending paths like /v1 or /openai/v1 at runtime. Add unit test to enforce URL standardization across remote inference providers (verifies all use 'base_url' field with HttpUrl | None type) BREAKING CHANGE: Users must update configs to include full URL paths (e.g., http://localhost:11434/v1 instead of http://localhost:11434). Signed-off-by: Charlie Doern <cdoern@redhat.com>
This commit is contained in:
parent
7093978754
commit
7a9c32f737
67 changed files with 282 additions and 227 deletions
|
|
@ -120,7 +120,7 @@ from llama_stack.providers.remote.inference.watsonx.watsonx import WatsonXInfere
|
|||
VLLMInferenceAdapter,
|
||||
"llama_stack.providers.remote.inference.vllm.VLLMProviderDataValidator",
|
||||
{
|
||||
"url": "http://fake",
|
||||
"base_url": "http://fake",
|
||||
},
|
||||
),
|
||||
],
|
||||
|
|
@ -153,7 +153,7 @@ def test_litellm_provider_data_used(config_cls, adapter_cls, provider_data_valid
|
|||
"""Validate data for LiteLLM-based providers. Similar to test_openai_provider_data_used, but without the
|
||||
assumption that there is an OpenAI-compatible client object."""
|
||||
|
||||
inference_adapter = adapter_cls(config=config_cls())
|
||||
inference_adapter = adapter_cls(config=config_cls(base_url="http://fake"))
|
||||
|
||||
inference_adapter.__provider_spec__ = MagicMock()
|
||||
inference_adapter.__provider_spec__.provider_data_validator = provider_data_validator
|
||||
|
|
|
|||
|
|
@ -40,7 +40,7 @@ from llama_stack_api import (
|
|||
|
||||
@pytest.fixture(scope="function")
|
||||
async def vllm_inference_adapter():
|
||||
config = VLLMInferenceAdapterConfig(url="http://mocked.localhost:12345")
|
||||
config = VLLMInferenceAdapterConfig(base_url="http://mocked.localhost:12345")
|
||||
inference_adapter = VLLMInferenceAdapter(config=config)
|
||||
inference_adapter.model_store = AsyncMock()
|
||||
await inference_adapter.initialize()
|
||||
|
|
@ -204,7 +204,7 @@ async def test_vllm_completion_extra_body():
|
|||
via extra_body to the underlying OpenAI client through the InferenceRouter.
|
||||
"""
|
||||
# Set up the vLLM adapter
|
||||
config = VLLMInferenceAdapterConfig(url="http://mocked.localhost:12345")
|
||||
config = VLLMInferenceAdapterConfig(base_url="http://mocked.localhost:12345")
|
||||
vllm_adapter = VLLMInferenceAdapter(config=config)
|
||||
vllm_adapter.__provider_id__ = "vllm"
|
||||
await vllm_adapter.initialize()
|
||||
|
|
@ -277,7 +277,7 @@ async def test_vllm_chat_completion_extra_body():
|
|||
via extra_body to the underlying OpenAI client through the InferenceRouter for chat completion.
|
||||
"""
|
||||
# Set up the vLLM adapter
|
||||
config = VLLMInferenceAdapterConfig(url="http://mocked.localhost:12345")
|
||||
config = VLLMInferenceAdapterConfig(base_url="http://mocked.localhost:12345")
|
||||
vllm_adapter = VLLMInferenceAdapter(config=config)
|
||||
vllm_adapter.__provider_id__ = "vllm"
|
||||
await vllm_adapter.initialize()
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue