mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-12-28 01:01:59 +00:00
Merge branch 'main' into feat/litellm_sambanova_usage
This commit is contained in:
commit
488eb8f249
39 changed files with 2102 additions and 164 deletions
|
|
@ -115,7 +115,7 @@ def test_openai_completion_streaming(openai_client, client_with_models, text_mod
|
|||
stream=True,
|
||||
max_tokens=50,
|
||||
)
|
||||
streamed_content = [chunk.choices[0].text for chunk in response]
|
||||
streamed_content = [chunk.choices[0].text or "" for chunk in response]
|
||||
content_str = "".join(streamed_content).lower().strip()
|
||||
assert len(content_str) > 10
|
||||
|
||||
|
|
|
|||
120
tests/unit/providers/utils/test_scheduler.py
Normal file
120
tests/unit/providers/utils/test_scheduler.py
Normal file
|
|
@ -0,0 +1,120 @@
|
|||
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
# All rights reserved.
|
||||
#
|
||||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
|
||||
import asyncio
|
||||
|
||||
import pytest
|
||||
|
||||
from llama_stack.providers.utils.scheduler import JobStatus, Scheduler
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_scheduler_unknown_backend():
|
||||
with pytest.raises(ValueError):
|
||||
Scheduler(backend="unknown")
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_scheduler_naive():
|
||||
sched = Scheduler()
|
||||
|
||||
# make sure the scheduler starts empty
|
||||
with pytest.raises(ValueError):
|
||||
sched.get_job("unknown")
|
||||
assert sched.get_jobs() == []
|
||||
|
||||
called = False
|
||||
|
||||
# schedule a job that will exercise the handlers
|
||||
async def job_handler(on_log, on_status, on_artifact):
|
||||
nonlocal called
|
||||
called = True
|
||||
# exercise the handlers
|
||||
on_log("test log1")
|
||||
on_log("test log2")
|
||||
on_artifact({"type": "type1", "path": "path1"})
|
||||
on_artifact({"type": "type2", "path": "path2"})
|
||||
on_status(JobStatus.completed)
|
||||
|
||||
job_id = "test_job_id"
|
||||
job_type = "test_job_type"
|
||||
sched.schedule(job_type, job_id, job_handler)
|
||||
|
||||
# make sure the job was properly registered
|
||||
with pytest.raises(ValueError):
|
||||
sched.get_job("unknown")
|
||||
assert sched.get_job(job_id) is not None
|
||||
assert sched.get_jobs() == [sched.get_job(job_id)]
|
||||
|
||||
assert sched.get_jobs("unknown") == []
|
||||
assert sched.get_jobs(job_type) == [sched.get_job(job_id)]
|
||||
|
||||
# now shut the scheduler down and make sure the job ran
|
||||
await sched.shutdown()
|
||||
|
||||
assert called
|
||||
|
||||
job = sched.get_job(job_id)
|
||||
assert job is not None
|
||||
|
||||
assert job.status == JobStatus.completed
|
||||
|
||||
assert job.scheduled_at is not None
|
||||
assert job.started_at is not None
|
||||
assert job.completed_at is not None
|
||||
assert job.scheduled_at < job.started_at < job.completed_at
|
||||
|
||||
assert job.artifacts == [
|
||||
{"type": "type1", "path": "path1"},
|
||||
{"type": "type2", "path": "path2"},
|
||||
]
|
||||
assert [msg[1] for msg in job.logs] == ["test log1", "test log2"]
|
||||
assert job.logs[0][0] < job.logs[1][0]
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_scheduler_naive_handler_raises():
|
||||
sched = Scheduler()
|
||||
|
||||
async def failing_job_handler(on_log, on_status, on_artifact):
|
||||
on_status(JobStatus.running)
|
||||
raise ValueError("test error")
|
||||
|
||||
job_id = "test_job_id1"
|
||||
job_type = "test_job_type"
|
||||
sched.schedule(job_type, job_id, failing_job_handler)
|
||||
|
||||
job = sched.get_job(job_id)
|
||||
assert job is not None
|
||||
|
||||
# confirm the exception made the job transition to failed state, even
|
||||
# though it was set to `running` before the error
|
||||
for _ in range(10):
|
||||
if job.status == JobStatus.failed:
|
||||
break
|
||||
await asyncio.sleep(0.1)
|
||||
assert job.status == JobStatus.failed
|
||||
|
||||
# confirm that the raised error got registered in log
|
||||
assert job.logs[0][1] == "test error"
|
||||
|
||||
# even after failed job, we can schedule another one
|
||||
called = False
|
||||
|
||||
async def successful_job_handler(on_log, on_status, on_artifact):
|
||||
nonlocal called
|
||||
called = True
|
||||
on_status(JobStatus.completed)
|
||||
|
||||
job_id = "test_job_id2"
|
||||
sched.schedule(job_type, job_id, successful_job_handler)
|
||||
|
||||
await sched.shutdown()
|
||||
|
||||
assert called
|
||||
job = sched.get_job(job_id)
|
||||
assert job is not None
|
||||
assert job.status == JobStatus.completed
|
||||
14
tests/verifications/conf/fireworks-llama-stack.yaml
Normal file
14
tests/verifications/conf/fireworks-llama-stack.yaml
Normal file
|
|
@ -0,0 +1,14 @@
|
|||
base_url: http://localhost:8321/v1/openai/v1
|
||||
api_key_var: FIREWORKS_API_KEY
|
||||
models:
|
||||
- fireworks/llama-v3p3-70b-instruct
|
||||
- fireworks/llama4-scout-instruct-basic
|
||||
- fireworks/llama4-maverick-instruct-basic
|
||||
model_display_names:
|
||||
fireworks/llama-v3p3-70b-instruct: Llama-3.3-70B-Instruct
|
||||
fireworks/llama4-scout-instruct-basic: Llama-4-Scout-Instruct
|
||||
fireworks/llama4-maverick-instruct-basic: Llama-4-Maverick-Instruct
|
||||
test_exclusions:
|
||||
fireworks/llama-v3p3-70b-instruct:
|
||||
- test_chat_non_streaming_image
|
||||
- test_chat_streaming_image
|
||||
14
tests/verifications/conf/groq-llama-stack.yaml
Normal file
14
tests/verifications/conf/groq-llama-stack.yaml
Normal file
|
|
@ -0,0 +1,14 @@
|
|||
base_url: http://localhost:8321/v1/openai/v1
|
||||
api_key_var: GROQ_API_KEY
|
||||
models:
|
||||
- groq/llama-3.3-70b-versatile
|
||||
- groq/llama-4-scout-17b-16e-instruct
|
||||
- groq/llama-4-maverick-17b-128e-instruct
|
||||
model_display_names:
|
||||
groq/llama-3.3-70b-versatile: Llama-3.3-70B-Instruct
|
||||
groq/llama-4-scout-17b-16e-instruct: Llama-4-Scout-Instruct
|
||||
groq/llama-4-maverick-17b-128e-instruct: Llama-4-Maverick-Instruct
|
||||
test_exclusions:
|
||||
groq/llama-3.3-70b-versatile:
|
||||
- test_chat_non_streaming_image
|
||||
- test_chat_streaming_image
|
||||
|
|
@ -2,12 +2,12 @@ base_url: https://api.groq.com/openai/v1
|
|||
api_key_var: GROQ_API_KEY
|
||||
models:
|
||||
- llama-3.3-70b-versatile
|
||||
- llama-4-scout-17b-16e-instruct
|
||||
- llama-4-maverick-17b-128e-instruct
|
||||
- meta-llama/llama-4-scout-17b-16e-instruct
|
||||
- meta-llama/llama-4-maverick-17b-128e-instruct
|
||||
model_display_names:
|
||||
llama-3.3-70b-versatile: Llama-3.3-70B-Instruct
|
||||
llama-4-scout-17b-16e-instruct: Llama-4-Scout-Instruct
|
||||
llama-4-maverick-17b-128e-instruct: Llama-4-Maverick-Instruct
|
||||
meta-llama/llama-4-scout-17b-16e-instruct: Llama-4-Scout-Instruct
|
||||
meta-llama/llama-4-maverick-17b-128e-instruct: Llama-4-Maverick-Instruct
|
||||
test_exclusions:
|
||||
llama-3.3-70b-versatile:
|
||||
- test_chat_non_streaming_image
|
||||
|
|
|
|||
9
tests/verifications/conf/openai-llama-stack.yaml
Normal file
9
tests/verifications/conf/openai-llama-stack.yaml
Normal file
|
|
@ -0,0 +1,9 @@
|
|||
base_url: http://localhost:8321/v1/openai/v1
|
||||
api_key_var: OPENAI_API_KEY
|
||||
models:
|
||||
- openai/gpt-4o
|
||||
- openai/gpt-4o-mini
|
||||
model_display_names:
|
||||
openai/gpt-4o: gpt-4o
|
||||
openai/gpt-4o-mini: gpt-4o-mini
|
||||
test_exclusions: {}
|
||||
14
tests/verifications/conf/together-llama-stack.yaml
Normal file
14
tests/verifications/conf/together-llama-stack.yaml
Normal file
|
|
@ -0,0 +1,14 @@
|
|||
base_url: http://localhost:8321/v1/openai/v1
|
||||
api_key_var: TOGETHER_API_KEY
|
||||
models:
|
||||
- together/meta-llama/Llama-3.3-70B-Instruct-Turbo
|
||||
- together/meta-llama/Llama-4-Scout-17B-16E-Instruct
|
||||
- together/meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8
|
||||
model_display_names:
|
||||
together/meta-llama/Llama-3.3-70B-Instruct-Turbo: Llama-3.3-70B-Instruct
|
||||
together/meta-llama/Llama-4-Scout-17B-16E-Instruct: Llama-4-Scout-Instruct
|
||||
together/meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8: Llama-4-Maverick-Instruct
|
||||
test_exclusions:
|
||||
together/meta-llama/Llama-3.3-70B-Instruct-Turbo:
|
||||
- test_chat_non_streaming_image
|
||||
- test_chat_streaming_image
|
||||
|
|
@ -67,7 +67,17 @@ RESULTS_DIR.mkdir(exist_ok=True)
|
|||
# Maximum number of test result files to keep per provider
|
||||
MAX_RESULTS_PER_PROVIDER = 1
|
||||
|
||||
PROVIDER_ORDER = ["together", "fireworks", "groq", "cerebras", "openai"]
|
||||
PROVIDER_ORDER = [
|
||||
"together",
|
||||
"fireworks",
|
||||
"groq",
|
||||
"cerebras",
|
||||
"openai",
|
||||
"together-llama-stack",
|
||||
"fireworks-llama-stack",
|
||||
"groq-llama-stack",
|
||||
"openai-llama-stack",
|
||||
]
|
||||
|
||||
VERIFICATION_CONFIG = _load_all_verification_configs()
|
||||
|
||||
|
|
|
|||
146
tests/verifications/openai-api-verification-run.yaml
Normal file
146
tests/verifications/openai-api-verification-run.yaml
Normal file
|
|
@ -0,0 +1,146 @@
|
|||
version: '2'
|
||||
image_name: openai-api-verification
|
||||
apis:
|
||||
- inference
|
||||
- telemetry
|
||||
- tool_runtime
|
||||
- vector_io
|
||||
providers:
|
||||
inference:
|
||||
- provider_id: together
|
||||
provider_type: remote::together
|
||||
config:
|
||||
url: https://api.together.xyz/v1
|
||||
api_key: ${env.TOGETHER_API_KEY:}
|
||||
- provider_id: fireworks
|
||||
provider_type: remote::fireworks
|
||||
config:
|
||||
url: https://api.fireworks.ai/inference/v1
|
||||
api_key: ${env.FIREWORKS_API_KEY}
|
||||
- provider_id: groq
|
||||
provider_type: remote::groq
|
||||
config:
|
||||
url: https://api.groq.com
|
||||
api_key: ${env.GROQ_API_KEY}
|
||||
- provider_id: openai
|
||||
provider_type: remote::openai
|
||||
config:
|
||||
url: https://api.openai.com/v1
|
||||
api_key: ${env.OPENAI_API_KEY:}
|
||||
- provider_id: sentence-transformers
|
||||
provider_type: inline::sentence-transformers
|
||||
config: {}
|
||||
vector_io:
|
||||
- provider_id: faiss
|
||||
provider_type: inline::faiss
|
||||
config:
|
||||
kvstore:
|
||||
type: sqlite
|
||||
namespace: null
|
||||
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/openai}/faiss_store.db
|
||||
telemetry:
|
||||
- provider_id: meta-reference
|
||||
provider_type: inline::meta-reference
|
||||
config:
|
||||
service_name: "${env.OTEL_SERVICE_NAME:\u200B}"
|
||||
sinks: ${env.TELEMETRY_SINKS:console,sqlite}
|
||||
sqlite_db_path: ${env.SQLITE_DB_PATH:~/.llama/distributions/openai/trace_store.db}
|
||||
tool_runtime:
|
||||
- provider_id: brave-search
|
||||
provider_type: remote::brave-search
|
||||
config:
|
||||
api_key: ${env.BRAVE_SEARCH_API_KEY:}
|
||||
max_results: 3
|
||||
- provider_id: tavily-search
|
||||
provider_type: remote::tavily-search
|
||||
config:
|
||||
api_key: ${env.TAVILY_SEARCH_API_KEY:}
|
||||
max_results: 3
|
||||
- provider_id: code-interpreter
|
||||
provider_type: inline::code-interpreter
|
||||
config: {}
|
||||
- provider_id: rag-runtime
|
||||
provider_type: inline::rag-runtime
|
||||
config: {}
|
||||
- provider_id: model-context-protocol
|
||||
provider_type: remote::model-context-protocol
|
||||
config: {}
|
||||
- provider_id: wolfram-alpha
|
||||
provider_type: remote::wolfram-alpha
|
||||
config:
|
||||
api_key: ${env.WOLFRAM_ALPHA_API_KEY:}
|
||||
metadata_store:
|
||||
type: sqlite
|
||||
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/openai}/registry.db
|
||||
models:
|
||||
- metadata: {}
|
||||
model_id: together/meta-llama/Llama-3.3-70B-Instruct-Turbo
|
||||
provider_id: together
|
||||
provider_model_id: meta-llama/Llama-3.3-70B-Instruct-Turbo
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: together/meta-llama/Llama-4-Scout-17B-16E-Instruct
|
||||
provider_id: together
|
||||
provider_model_id: meta-llama/Llama-4-Scout-17B-16E-Instruct
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: together/meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8
|
||||
provider_id: together
|
||||
provider_model_id: meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: fireworks/llama-v3p3-70b-instruct
|
||||
provider_id: fireworks
|
||||
provider_model_id: accounts/fireworks/models/llama-v3p3-70b-instruct
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: fireworks/llama4-scout-instruct-basic
|
||||
provider_id: fireworks
|
||||
provider_model_id: accounts/fireworks/models/llama4-scout-instruct-basic
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: fireworks/llama4-maverick-instruct-basic
|
||||
provider_id: fireworks
|
||||
provider_model_id: accounts/fireworks/models/llama4-maverick-instruct-basic
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: groq/llama-3.3-70b-versatile
|
||||
provider_id: groq
|
||||
provider_model_id: groq/llama-3.3-70b-versatile
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: groq/llama-4-scout-17b-16e-instruct
|
||||
provider_id: groq
|
||||
provider_model_id: groq/meta-llama/llama-4-scout-17b-16e-instruct
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: groq/llama-4-maverick-17b-128e-instruct
|
||||
provider_id: groq
|
||||
provider_model_id: groq/meta-llama/llama-4-maverick-17b-128e-instruct
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: openai/gpt-4o
|
||||
provider_id: openai
|
||||
provider_model_id: openai/gpt-4o
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: openai/gpt-4o-mini
|
||||
provider_id: openai
|
||||
provider_model_id: openai/gpt-4o-mini
|
||||
model_type: llm
|
||||
shields: []
|
||||
vector_dbs: []
|
||||
datasets: []
|
||||
scoring_fns: []
|
||||
benchmarks: []
|
||||
tool_groups:
|
||||
- toolgroup_id: builtin::websearch
|
||||
provider_id: tavily-search
|
||||
- toolgroup_id: builtin::rag
|
||||
provider_id: rag-runtime
|
||||
- toolgroup_id: builtin::code_interpreter
|
||||
provider_id: code-interpreter
|
||||
- toolgroup_id: builtin::wolfram_alpha
|
||||
provider_id: wolfram-alpha
|
||||
server:
|
||||
port: 8321
|
||||
|
|
@ -99,6 +99,9 @@ def model_mapping(provider, providers_model_mapping):
|
|||
|
||||
@pytest.fixture
|
||||
def openai_client(base_url, api_key):
|
||||
# Simplify running against a local Llama Stack
|
||||
if "localhost" in base_url and not api_key:
|
||||
api_key = "empty"
|
||||
return OpenAI(
|
||||
base_url=base_url,
|
||||
api_key=api_key,
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue