Merge branch 'main' into feat/litellm_sambanova_usage

2025-12-28 01:01:59 +00:00 · 2025-04-14 12:15:44 -05:00 · 2025-04-14 12:15:44 -05:00 · 488eb8f249
commit 488eb8f249
parent dd808a8c1e cf158f2cb9
39 changed files with 2102 additions and 164 deletions
--- a/tests/integration/inference/test_openai_completion.py
+++ b/tests/integration/inference/test_openai_completion.py
@ -115,7 +115,7 @@ def test_openai_completion_streaming(openai_client, client_with_models, text_mod
        stream=True,
        max_tokens=50,
    )
-    streamed_content = [chunk.choices[0].text for chunk in response]
+    streamed_content = [chunk.choices[0].text or "" for chunk in response]
    content_str = "".join(streamed_content).lower().strip()
    assert len(content_str) > 10

--- a/tests/unit/providers/utils/test_scheduler.py
+++ b/tests/unit/providers/utils/test_scheduler.py
@ -0,0 +1,120 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+import asyncio
+
+import pytest
+
+from llama_stack.providers.utils.scheduler import JobStatus, Scheduler
+
+
+@pytest.mark.asyncio
+async def test_scheduler_unknown_backend():
+    with pytest.raises(ValueError):
+        Scheduler(backend="unknown")
+
+
+@pytest.mark.asyncio
+async def test_scheduler_naive():
+    sched = Scheduler()
+
+    # make sure the scheduler starts empty
+    with pytest.raises(ValueError):
+        sched.get_job("unknown")
+    assert sched.get_jobs() == []
+
+    called = False
+
+    # schedule a job that will exercise the handlers
+    async def job_handler(on_log, on_status, on_artifact):
+        nonlocal called
+        called = True
+        # exercise the handlers
+        on_log("test log1")
+        on_log("test log2")
+        on_artifact({"type": "type1", "path": "path1"})
+        on_artifact({"type": "type2", "path": "path2"})
+        on_status(JobStatus.completed)
+
+    job_id = "test_job_id"
+    job_type = "test_job_type"
+    sched.schedule(job_type, job_id, job_handler)
+
+    # make sure the job was properly registered
+    with pytest.raises(ValueError):
+        sched.get_job("unknown")
+    assert sched.get_job(job_id) is not None
+    assert sched.get_jobs() == [sched.get_job(job_id)]
+
+    assert sched.get_jobs("unknown") == []
+    assert sched.get_jobs(job_type) == [sched.get_job(job_id)]
+
+    # now shut the scheduler down and make sure the job ran
+    await sched.shutdown()
+
+    assert called
+
+    job = sched.get_job(job_id)
+    assert job is not None
+
+    assert job.status == JobStatus.completed
+
+    assert job.scheduled_at is not None
+    assert job.started_at is not None
+    assert job.completed_at is not None
+    assert job.scheduled_at < job.started_at < job.completed_at
+
+    assert job.artifacts == [
+        {"type": "type1", "path": "path1"},
+        {"type": "type2", "path": "path2"},
+    ]
+    assert [msg[1] for msg in job.logs] == ["test log1", "test log2"]
+    assert job.logs[0][0] < job.logs[1][0]
+
+
+@pytest.mark.asyncio
+async def test_scheduler_naive_handler_raises():
+    sched = Scheduler()
+
+    async def failing_job_handler(on_log, on_status, on_artifact):
+        on_status(JobStatus.running)
+        raise ValueError("test error")
+
+    job_id = "test_job_id1"
+    job_type = "test_job_type"
+    sched.schedule(job_type, job_id, failing_job_handler)
+
+    job = sched.get_job(job_id)
+    assert job is not None
+
+    # confirm the exception made the job transition to failed state, even
+    # though it was set to `running` before the error
+    for _ in range(10):
+        if job.status == JobStatus.failed:
+            break
+        await asyncio.sleep(0.1)
+    assert job.status == JobStatus.failed
+
+    # confirm that the raised error got registered in log
+    assert job.logs[0][1] == "test error"
+
+    # even after failed job, we can schedule another one
+    called = False
+
+    async def successful_job_handler(on_log, on_status, on_artifact):
+        nonlocal called
+        called = True
+        on_status(JobStatus.completed)
+
+    job_id = "test_job_id2"
+    sched.schedule(job_type, job_id, successful_job_handler)
+
+    await sched.shutdown()
+
+    assert called
+    job = sched.get_job(job_id)
+    assert job is not None
+    assert job.status == JobStatus.completed
--- a/tests/verifications/conf/fireworks-llama-stack.yaml
+++ b/tests/verifications/conf/fireworks-llama-stack.yaml
@ -0,0 +1,14 @@
+base_url: http://localhost:8321/v1/openai/v1
+api_key_var: FIREWORKS_API_KEY
+models:
+- fireworks/llama-v3p3-70b-instruct
+- fireworks/llama4-scout-instruct-basic
+- fireworks/llama4-maverick-instruct-basic
+model_display_names:
+  fireworks/llama-v3p3-70b-instruct: Llama-3.3-70B-Instruct
+  fireworks/llama4-scout-instruct-basic: Llama-4-Scout-Instruct
+  fireworks/llama4-maverick-instruct-basic: Llama-4-Maverick-Instruct
+test_exclusions:
+  fireworks/llama-v3p3-70b-instruct:
+  - test_chat_non_streaming_image
+  - test_chat_streaming_image
--- a/tests/verifications/conf/groq-llama-stack.yaml
+++ b/tests/verifications/conf/groq-llama-stack.yaml
@ -0,0 +1,14 @@
+base_url: http://localhost:8321/v1/openai/v1
+api_key_var: GROQ_API_KEY
+models:
+- groq/llama-3.3-70b-versatile
+- groq/llama-4-scout-17b-16e-instruct
+- groq/llama-4-maverick-17b-128e-instruct
+model_display_names:
+  groq/llama-3.3-70b-versatile: Llama-3.3-70B-Instruct
+  groq/llama-4-scout-17b-16e-instruct: Llama-4-Scout-Instruct
+  groq/llama-4-maverick-17b-128e-instruct: Llama-4-Maverick-Instruct
+test_exclusions:
+  groq/llama-3.3-70b-versatile:
+  - test_chat_non_streaming_image
+  - test_chat_streaming_image
--- a/tests/verifications/conf/groq.yaml
+++ b/tests/verifications/conf/groq.yaml
@ -2,12 +2,12 @@ base_url: https://api.groq.com/openai/v1
 api_key_var: GROQ_API_KEY
 models:
 - llama-3.3-70b-versatile
- llama-4-scout-17b-16e-instruct
- llama-4-maverick-17b-128e-instruct
+- meta-llama/llama-4-scout-17b-16e-instruct
+- meta-llama/llama-4-maverick-17b-128e-instruct
 model_display_names:
  llama-3.3-70b-versatile: Llama-3.3-70B-Instruct
-  llama-4-scout-17b-16e-instruct: Llama-4-Scout-Instruct
-  llama-4-maverick-17b-128e-instruct: Llama-4-Maverick-Instruct
+  meta-llama/llama-4-scout-17b-16e-instruct: Llama-4-Scout-Instruct
+  meta-llama/llama-4-maverick-17b-128e-instruct: Llama-4-Maverick-Instruct
 test_exclusions:
  llama-3.3-70b-versatile:
  - test_chat_non_streaming_image
--- a/tests/verifications/conf/openai-llama-stack.yaml
+++ b/tests/verifications/conf/openai-llama-stack.yaml
@ -0,0 +1,9 @@
+base_url: http://localhost:8321/v1/openai/v1
+api_key_var: OPENAI_API_KEY
+models:
+- openai/gpt-4o
+- openai/gpt-4o-mini
+model_display_names:
+  openai/gpt-4o: gpt-4o
+  openai/gpt-4o-mini: gpt-4o-mini
+test_exclusions: {}
--- a/tests/verifications/conf/together-llama-stack.yaml
+++ b/tests/verifications/conf/together-llama-stack.yaml
@ -0,0 +1,14 @@
+base_url: http://localhost:8321/v1/openai/v1
+api_key_var: TOGETHER_API_KEY
+models:
+- together/meta-llama/Llama-3.3-70B-Instruct-Turbo
+- together/meta-llama/Llama-4-Scout-17B-16E-Instruct
+- together/meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8
+model_display_names:
+  together/meta-llama/Llama-3.3-70B-Instruct-Turbo: Llama-3.3-70B-Instruct
+  together/meta-llama/Llama-4-Scout-17B-16E-Instruct: Llama-4-Scout-Instruct
+  together/meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8: Llama-4-Maverick-Instruct
+test_exclusions:
+  together/meta-llama/Llama-3.3-70B-Instruct-Turbo:
+  - test_chat_non_streaming_image
+  - test_chat_streaming_image
--- a/tests/verifications/generate_report.py
+++ b/tests/verifications/generate_report.py
@ -67,7 +67,17 @@ RESULTS_DIR.mkdir(exist_ok=True)
 # Maximum number of test result files to keep per provider
 MAX_RESULTS_PER_PROVIDER = 1

-PROVIDER_ORDER = ["together", "fireworks", "groq", "cerebras", "openai"]
+PROVIDER_ORDER = [
+    "together",
+    "fireworks",
+    "groq",
+    "cerebras",
+    "openai",
+    "together-llama-stack",
+    "fireworks-llama-stack",
+    "groq-llama-stack",
+    "openai-llama-stack",
+]

 VERIFICATION_CONFIG = _load_all_verification_configs()

--- a/tests/verifications/openai-api-verification-run.yaml
+++ b/tests/verifications/openai-api-verification-run.yaml
@ -0,0 +1,146 @@
+version: '2'
+image_name: openai-api-verification
+apis:
+- inference
+- telemetry
+- tool_runtime
+- vector_io
+providers:
+  inference:
+  - provider_id: together
+    provider_type: remote::together
+    config:
+      url: https://api.together.xyz/v1
+      api_key: ${env.TOGETHER_API_KEY:}
+  - provider_id: fireworks
+    provider_type: remote::fireworks
+    config:
+      url: https://api.fireworks.ai/inference/v1
+      api_key: ${env.FIREWORKS_API_KEY}
+  - provider_id: groq
+    provider_type: remote::groq
+    config:
+      url: https://api.groq.com
+      api_key: ${env.GROQ_API_KEY}
+  - provider_id: openai
+    provider_type: remote::openai
+    config:
+      url: https://api.openai.com/v1
+      api_key: ${env.OPENAI_API_KEY:}
+  - provider_id: sentence-transformers
+    provider_type: inline::sentence-transformers
+    config: {}
+  vector_io:
+  - provider_id: faiss
+    provider_type: inline::faiss
+    config:
+      kvstore:
+        type: sqlite
+        namespace: null
+        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/openai}/faiss_store.db
+  telemetry:
+  - provider_id: meta-reference
+    provider_type: inline::meta-reference
+    config:
+      service_name: "${env.OTEL_SERVICE_NAME:\u200B}"
+      sinks: ${env.TELEMETRY_SINKS:console,sqlite}
+      sqlite_db_path: ${env.SQLITE_DB_PATH:~/.llama/distributions/openai/trace_store.db}
+  tool_runtime:
+  - provider_id: brave-search
+    provider_type: remote::brave-search
+    config:
+      api_key: ${env.BRAVE_SEARCH_API_KEY:}
+      max_results: 3
+  - provider_id: tavily-search
+    provider_type: remote::tavily-search
+    config:
+      api_key: ${env.TAVILY_SEARCH_API_KEY:}
+      max_results: 3
+  - provider_id: code-interpreter
+    provider_type: inline::code-interpreter
+    config: {}
+  - provider_id: rag-runtime
+    provider_type: inline::rag-runtime
+    config: {}
+  - provider_id: model-context-protocol
+    provider_type: remote::model-context-protocol
+    config: {}
+  - provider_id: wolfram-alpha
+    provider_type: remote::wolfram-alpha
+    config:
+      api_key: ${env.WOLFRAM_ALPHA_API_KEY:}
+metadata_store:
+  type: sqlite
+  db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/openai}/registry.db
+models:
+- metadata: {}
+  model_id: together/meta-llama/Llama-3.3-70B-Instruct-Turbo
+  provider_id: together
+  provider_model_id: meta-llama/Llama-3.3-70B-Instruct-Turbo
+  model_type: llm
+- metadata: {}
+  model_id: together/meta-llama/Llama-4-Scout-17B-16E-Instruct
+  provider_id: together
+  provider_model_id: meta-llama/Llama-4-Scout-17B-16E-Instruct
+  model_type: llm
+- metadata: {}
+  model_id: together/meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8
+  provider_id: together
+  provider_model_id: meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8
+  model_type: llm
+- metadata: {}
+  model_id: fireworks/llama-v3p3-70b-instruct
+  provider_id: fireworks
+  provider_model_id: accounts/fireworks/models/llama-v3p3-70b-instruct
+  model_type: llm
+- metadata: {}
+  model_id: fireworks/llama4-scout-instruct-basic
+  provider_id: fireworks
+  provider_model_id: accounts/fireworks/models/llama4-scout-instruct-basic
+  model_type: llm
+- metadata: {}
+  model_id: fireworks/llama4-maverick-instruct-basic
+  provider_id: fireworks
+  provider_model_id: accounts/fireworks/models/llama4-maverick-instruct-basic
+  model_type: llm
+- metadata: {}
+  model_id: groq/llama-3.3-70b-versatile
+  provider_id: groq
+  provider_model_id: groq/llama-3.3-70b-versatile
+  model_type: llm
+- metadata: {}
+  model_id: groq/llama-4-scout-17b-16e-instruct
+  provider_id: groq
+  provider_model_id: groq/meta-llama/llama-4-scout-17b-16e-instruct
+  model_type: llm
+- metadata: {}
+  model_id: groq/llama-4-maverick-17b-128e-instruct
+  provider_id: groq
+  provider_model_id: groq/meta-llama/llama-4-maverick-17b-128e-instruct
+  model_type: llm
+- metadata: {}
+  model_id: openai/gpt-4o
+  provider_id: openai
+  provider_model_id: openai/gpt-4o
+  model_type: llm
+- metadata: {}
+  model_id: openai/gpt-4o-mini
+  provider_id: openai
+  provider_model_id: openai/gpt-4o-mini
+  model_type: llm
+shields: []
+vector_dbs: []
+datasets: []
+scoring_fns: []
+benchmarks: []
+tool_groups:
+- toolgroup_id: builtin::websearch
+  provider_id: tavily-search
+- toolgroup_id: builtin::rag
+  provider_id: rag-runtime
+- toolgroup_id: builtin::code_interpreter
+  provider_id: code-interpreter
+- toolgroup_id: builtin::wolfram_alpha
+  provider_id: wolfram-alpha
+server:
+  port: 8321
--- a/tests/verifications/openai_api/fixtures/fixtures.py
+++ b/tests/verifications/openai_api/fixtures/fixtures.py
@ -99,6 +99,9 @@ def model_mapping(provider, providers_model_mapping):

@pytest.fixture
 def openai_client(base_url, api_key):
+    # Simplify running against a local Llama Stack
+    if "localhost" in base_url and not api_key:
+        api_key = "empty"
    return OpenAI(
        base_url=base_url,
        api_key=api_key,