mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-10-09 05:08:37 +00:00
feat: add refresh_models support to inference adapters (default: false) (#3719)
# What does this PR do? inference adapters can now configure `refresh_models: bool` to control periodic model listing from their providers BREAKING CHANGE: together inference adapter default changed. previously always refreshed, now follows config. addresses "models: refresh" on #3517 ## Test Plan ci w/ new tests
This commit is contained in:
parent
8b9af03a1b
commit
e892a3f7f4
31 changed files with 33 additions and 67 deletions
|
@ -186,43 +186,3 @@ async def test_openai_chat_completion_is_async(vllm_inference_adapter):
|
|||
|
||||
assert mock_create_client.call_count == 4 # no cheating
|
||||
assert total_time < (sleep_time * 2), f"Total time taken: {total_time}s exceeded expected max"
|
||||
|
||||
|
||||
async def test_should_refresh_models():
|
||||
"""
|
||||
Test the should_refresh_models method with different refresh_models configurations.
|
||||
|
||||
This test verifies that:
|
||||
1. When refresh_models is True, should_refresh_models returns True regardless of api_token
|
||||
2. When refresh_models is False, should_refresh_models returns False regardless of api_token
|
||||
"""
|
||||
|
||||
# Test case 1: refresh_models is True, api_token is None
|
||||
config1 = VLLMInferenceAdapterConfig(url="http://test.localhost", api_token=None, refresh_models=True)
|
||||
adapter1 = VLLMInferenceAdapter(config=config1)
|
||||
result1 = await adapter1.should_refresh_models()
|
||||
assert result1 is True, "should_refresh_models should return True when refresh_models is True"
|
||||
|
||||
# Test case 2: refresh_models is True, api_token is empty string
|
||||
config2 = VLLMInferenceAdapterConfig(url="http://test.localhost", api_token="", refresh_models=True)
|
||||
adapter2 = VLLMInferenceAdapter(config=config2)
|
||||
result2 = await adapter2.should_refresh_models()
|
||||
assert result2 is True, "should_refresh_models should return True when refresh_models is True"
|
||||
|
||||
# Test case 3: refresh_models is True, api_token is "fake" (default)
|
||||
config3 = VLLMInferenceAdapterConfig(url="http://test.localhost", api_token="fake", refresh_models=True)
|
||||
adapter3 = VLLMInferenceAdapter(config=config3)
|
||||
result3 = await adapter3.should_refresh_models()
|
||||
assert result3 is True, "should_refresh_models should return True when refresh_models is True"
|
||||
|
||||
# Test case 4: refresh_models is True, api_token is real token
|
||||
config4 = VLLMInferenceAdapterConfig(url="http://test.localhost", api_token="real-token-123", refresh_models=True)
|
||||
adapter4 = VLLMInferenceAdapter(config=config4)
|
||||
result4 = await adapter4.should_refresh_models()
|
||||
assert result4 is True, "should_refresh_models should return True when refresh_models is True"
|
||||
|
||||
# Test case 5: refresh_models is False, api_token is real token
|
||||
config5 = VLLMInferenceAdapterConfig(url="http://test.localhost", api_token="real-token-456", refresh_models=False)
|
||||
adapter5 = VLLMInferenceAdapter(config=config5)
|
||||
result5 = await adapter5.should_refresh_models()
|
||||
assert result5 is False, "should_refresh_models should return False when refresh_models is False"
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue