mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-12-29 15:14:44 +00:00
Merge branch 'main' into register_custom_model
This commit is contained in:
commit
afb792b9c1
69 changed files with 8875 additions and 890 deletions
|
|
@ -26,7 +26,12 @@ from openai.types.chat.chat_completion_chunk import (
|
|||
)
|
||||
from openai.types.model import Model as OpenAIModel
|
||||
|
||||
from llama_stack.apis.inference import ToolChoice, ToolConfig
|
||||
from llama_stack.apis.inference import (
|
||||
ChatCompletionRequest,
|
||||
ToolChoice,
|
||||
ToolConfig,
|
||||
UserMessage,
|
||||
)
|
||||
from llama_stack.apis.models import Model
|
||||
from llama_stack.models.llama.datatypes import StopReason
|
||||
from llama_stack.providers.remote.inference.vllm.config import VLLMInferenceAdapterConfig
|
||||
|
|
@ -232,3 +237,14 @@ def test_chat_completion_doesnt_block_event_loop(caplog):
|
|||
# above.
|
||||
asyncio_warnings = [record.message for record in caplog.records if record.name == "asyncio"]
|
||||
assert not asyncio_warnings
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_get_params_empty_tools(vllm_inference_adapter):
|
||||
request = ChatCompletionRequest(
|
||||
tools=[],
|
||||
model="test_model",
|
||||
messages=[UserMessage(content="test")],
|
||||
)
|
||||
params = await vllm_inference_adapter._get_params(request)
|
||||
assert "tools" not in params
|
||||
|
|
|
|||
120
tests/unit/providers/utils/test_scheduler.py
Normal file
120
tests/unit/providers/utils/test_scheduler.py
Normal file
|
|
@ -0,0 +1,120 @@
|
|||
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
# All rights reserved.
|
||||
#
|
||||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
|
||||
import asyncio
|
||||
|
||||
import pytest
|
||||
|
||||
from llama_stack.providers.utils.scheduler import JobStatus, Scheduler
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_scheduler_unknown_backend():
|
||||
with pytest.raises(ValueError):
|
||||
Scheduler(backend="unknown")
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_scheduler_naive():
|
||||
sched = Scheduler()
|
||||
|
||||
# make sure the scheduler starts empty
|
||||
with pytest.raises(ValueError):
|
||||
sched.get_job("unknown")
|
||||
assert sched.get_jobs() == []
|
||||
|
||||
called = False
|
||||
|
||||
# schedule a job that will exercise the handlers
|
||||
async def job_handler(on_log, on_status, on_artifact):
|
||||
nonlocal called
|
||||
called = True
|
||||
# exercise the handlers
|
||||
on_log("test log1")
|
||||
on_log("test log2")
|
||||
on_artifact({"type": "type1", "path": "path1"})
|
||||
on_artifact({"type": "type2", "path": "path2"})
|
||||
on_status(JobStatus.completed)
|
||||
|
||||
job_id = "test_job_id"
|
||||
job_type = "test_job_type"
|
||||
sched.schedule(job_type, job_id, job_handler)
|
||||
|
||||
# make sure the job was properly registered
|
||||
with pytest.raises(ValueError):
|
||||
sched.get_job("unknown")
|
||||
assert sched.get_job(job_id) is not None
|
||||
assert sched.get_jobs() == [sched.get_job(job_id)]
|
||||
|
||||
assert sched.get_jobs("unknown") == []
|
||||
assert sched.get_jobs(job_type) == [sched.get_job(job_id)]
|
||||
|
||||
# now shut the scheduler down and make sure the job ran
|
||||
await sched.shutdown()
|
||||
|
||||
assert called
|
||||
|
||||
job = sched.get_job(job_id)
|
||||
assert job is not None
|
||||
|
||||
assert job.status == JobStatus.completed
|
||||
|
||||
assert job.scheduled_at is not None
|
||||
assert job.started_at is not None
|
||||
assert job.completed_at is not None
|
||||
assert job.scheduled_at < job.started_at < job.completed_at
|
||||
|
||||
assert job.artifacts == [
|
||||
{"type": "type1", "path": "path1"},
|
||||
{"type": "type2", "path": "path2"},
|
||||
]
|
||||
assert [msg[1] for msg in job.logs] == ["test log1", "test log2"]
|
||||
assert job.logs[0][0] < job.logs[1][0]
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_scheduler_naive_handler_raises():
|
||||
sched = Scheduler()
|
||||
|
||||
async def failing_job_handler(on_log, on_status, on_artifact):
|
||||
on_status(JobStatus.running)
|
||||
raise ValueError("test error")
|
||||
|
||||
job_id = "test_job_id1"
|
||||
job_type = "test_job_type"
|
||||
sched.schedule(job_type, job_id, failing_job_handler)
|
||||
|
||||
job = sched.get_job(job_id)
|
||||
assert job is not None
|
||||
|
||||
# confirm the exception made the job transition to failed state, even
|
||||
# though it was set to `running` before the error
|
||||
for _ in range(10):
|
||||
if job.status == JobStatus.failed:
|
||||
break
|
||||
await asyncio.sleep(0.1)
|
||||
assert job.status == JobStatus.failed
|
||||
|
||||
# confirm that the raised error got registered in log
|
||||
assert job.logs[0][1] == "test error"
|
||||
|
||||
# even after failed job, we can schedule another one
|
||||
called = False
|
||||
|
||||
async def successful_job_handler(on_log, on_status, on_artifact):
|
||||
nonlocal called
|
||||
called = True
|
||||
on_status(JobStatus.completed)
|
||||
|
||||
job_id = "test_job_id2"
|
||||
sched.schedule(job_type, job_id, successful_job_handler)
|
||||
|
||||
await sched.shutdown()
|
||||
|
||||
assert called
|
||||
job = sched.get_job(job_id)
|
||||
assert job is not None
|
||||
assert job.status == JobStatus.completed
|
||||
Loading…
Add table
Add a link
Reference in a new issue