diff --git a/.github/workflows/gha_workflow_llama_stack_tests.yml b/.github/workflows/gha_workflow_llama_stack_tests.yml index 91b9d2f3b..9eae291e9 100644 --- a/.github/workflows/gha_workflow_llama_stack_tests.yml +++ b/.github/workflows/gha_workflow_llama_stack_tests.yml @@ -320,7 +320,7 @@ jobs: - name: "PR - Update comment" id: pr_update_comment if: github.event_name == 'pull_request_target' - uses: thollander/actions-comment-pull-request@65f9e5c9a1f2cd378bd74b2e057c9736982a8e74 # v3.0.1 + uses: thollander/actions-comment-pull-request@24bffb9b452ba05a4f3f77933840a6a841d1b32b # v3.0.1 with: filePath: test-summary.md diff --git a/.github/workflows/test-external-providers.yml b/.github/workflows/test-external-providers.yml new file mode 100644 index 000000000..2ead8f845 --- /dev/null +++ b/.github/workflows/test-external-providers.yml @@ -0,0 +1,93 @@ +name: Test External Providers + +on: + push: + branches: [ main ] + pull_request: + branches: [ main ] + +jobs: + test-external-providers: + runs-on: ubuntu-latest + steps: + - name: Checkout repository + uses: actions/checkout@v4 + + - name: Install uv + uses: astral-sh/setup-uv@v5 + with: + python-version: "3.10" + + - name: Install Ollama + run: | + curl -fsSL https://ollama.com/install.sh | sh + + - name: Pull Ollama image + run: | + ollama pull llama3.2:3b-instruct-fp16 + + - name: Start Ollama in background + run: | + nohup ollama run llama3.2:3b-instruct-fp16 --keepalive=30m > ollama.log 2>&1 & + + - name: Set Up Environment and Install Dependencies + run: | + uv sync --extra dev --extra test + uv pip install -e . + + - name: Install Ollama custom provider + run: | + mkdir -p tests/external-provider/llama-stack-provider-ollama/src/ + cp -a llama_stack/providers/remote/inference/ollama/ tests/external-provider/llama-stack-provider-ollama/src/llama_stack_provider_ollama + uv pip install tests/external-provider/llama-stack-provider-ollama + + - name: Create provider configuration + run: | + mkdir -p /tmp/providers.d/remote/inference + cp tests/external-provider/llama-stack-provider-ollama/custom_ollama.yaml /tmp/providers.d/remote/inference/custom_ollama.yaml + + - name: Wait for Ollama to start + run: | + echo "Waiting for Ollama..." + for i in {1..30}; do + if curl -s http://localhost:11434 | grep -q "Ollama is running"; then + echo "Ollama is running!" + exit 0 + fi + sleep 1 + done + echo "Ollama failed to start" + ollama ps + ollama.log + exit 1 + + - name: Start Llama Stack server in background + env: + INFERENCE_MODEL: "meta-llama/Llama-3.2-3B-Instruct" + run: | + source .venv/bin/activate + nohup uv run llama stack run tests/external-provider/llama-stack-provider-ollama/run.yaml --image-type venv > server.log 2>&1 & + + - name: Wait for Llama Stack server to be ready + run: | + echo "Waiting for Llama Stack server..." + for i in {1..30}; do + if curl -s http://localhost:8321/v1/health | grep -q "OK"; then + echo "Llama Stack server is up!" + if grep -q "remote::custom_ollama from /tmp/providers.d/remote/inference/custom_ollama.yaml" server.log; then + echo "Llama Stack server is using custom Ollama provider" + exit 0 + else + echo "Llama Stack server is not using custom Ollama provider" + exit 1 + fi + fi + sleep 1 + done + echo "Llama Stack server failed to start" + cat server.log + exit 1 + + - name: run inference tests + run: | + uv run pytest -v tests/integration/inference/test_text_inference.py --stack-config="http://localhost:8321" --text-model="meta-llama/Llama-3.2-3B-Instruct" --embedding-model=all-MiniLM-L6-v2 diff --git a/CHANGELOG.md b/CHANGELOG.md index 953d04def..5086094ad 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,42 @@ # Changelog +# v0.2.1 +Published on: 2025-04-05T23:13:00Z + + + +--- + +# v0.2.0 +Published on: 2025-04-05T19:04:29Z + +## Llama 4 Support + +Checkout more at https://www.llama.com + + + +--- + +# v0.1.9 +Published on: 2025-03-29T00:52:23Z + +### Build and Test Agents +* Agents: Entire document context with attachments +* RAG: Documentation with sqlite-vec faiss comparison +* Getting started: Fixes to getting started notebook. + +### Agent Evals and Model Customization +* (**New**) Post-training: Add nemo customizer + +### Better Engineering +* Moved sqlite-vec to non-blocking calls +* Don't return a payload on file delete + + + +--- + # v0.1.8 Published on: 2025-03-24T01:28:50Z diff --git a/docs/_static/js/detect_theme.js b/docs/_static/js/detect_theme.js new file mode 100644 index 000000000..484b2bb8b --- /dev/null +++ b/docs/_static/js/detect_theme.js @@ -0,0 +1,9 @@ +document.addEventListener("DOMContentLoaded", function () { + const prefersDark = window.matchMedia("(prefers-color-scheme: dark)").matches; + const htmlElement = document.documentElement; + if (prefersDark) { + htmlElement.setAttribute("data-theme", "dark"); + } else { + htmlElement.setAttribute("data-theme", "light"); + } +}); diff --git a/docs/source/conf.py b/docs/source/conf.py index 33654fe67..55c6383b2 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -112,6 +112,8 @@ html_theme_options = { # "style_nav_header_background": "#c3c9d4", } +default_dark_mode = False + html_static_path = ["../_static"] # html_logo = "../_static/llama-stack-logo.png" # html_style = "../_static/css/my_theme.css" @@ -119,6 +121,7 @@ html_static_path = ["../_static"] def setup(app): app.add_css_file("css/my_theme.css") + app.add_js_file("js/detect_theme.js") def dockerhub_role(name, rawtext, text, lineno, inliner, options={}, content=[]): url = f"https://hub.docker.com/r/llamastack/{text}" diff --git a/docs/source/distributions/kubernetes_deployment.md b/docs/source/distributions/kubernetes_deployment.md index 8ff3f0408..2daf9d785 100644 --- a/docs/source/distributions/kubernetes_deployment.md +++ b/docs/source/distributions/kubernetes_deployment.md @@ -7,13 +7,13 @@ In this guide, we'll use a local [Kind](https://kind.sigs.k8s.io/) cluster and a First, create a local Kubernetes cluster via Kind: -```bash +``` kind create cluster --image kindest/node:v1.32.0 --name llama-stack-test ``` First, create a Kubernetes PVC and Secret for downloading and storing Hugging Face model: -```bash +``` cat </tmp/test-vllm-llama-stack/Containerfile.llama-stack-run-k8s < OllamaInferenceAdapter: + return OllamaInferenceAdapter(config) +``` + +### Inline Providers + +Inline providers must expose a `get_provider_impl()` function in their module that takes two arguments: +1. `config`: An instance of the provider's config class +2. `deps`: A dictionary of API dependencies + +Example: +```python +async def get_provider_impl( + config: VectorStoreConfig, deps: Dict[Api, Any] +) -> VectorStoreImpl: + impl = VectorStoreImpl(config, deps[Api.inference]) + await impl.initialize() + return impl +``` + +## Dependencies + +The provider package must be installed on the system. For example: + +```bash +$ uv pip show llama-stack-ollama-provider +Name: llama-stack-ollama-provider +Version: 0.1.0 +Location: /path/to/venv/lib/python3.10/site-packages +``` + +## Example: Custom Ollama Provider + +Here's a complete example of creating and using a custom Ollama provider: + +1. First, create the provider package: + +```bash +mkdir -p llama-stack-provider-ollama +cd llama-stack-provider-ollama +git init +uv init +``` + +2. Edit `pyproject.toml`: + +```toml +[project] +name = "llama-stack-provider-ollama" +version = "0.1.0" +description = "Ollama provider for Llama Stack" +requires-python = ">=3.10" +dependencies = ["llama-stack", "pydantic", "ollama", "aiohttp"] +``` + +3. Create the provider specification: + +```yaml +# /etc/llama-stack/providers.d/remote/inference/custom_ollama.yaml +adapter: + adapter_type: custom_ollama + pip_packages: ["ollama", "aiohttp"] + config_class: llama_stack_provider_ollama.config.OllamaImplConfig + module: llama_stack_provider_ollama +api_dependencies: [] +optional_api_dependencies: [] +``` + +4. Install the provider: + +```bash +uv pip install -e . +``` + +5. Configure Llama Stack to use external providers: + +```yaml +external_providers_dir: /etc/llama-stack/providers.d/ +``` + +The provider will now be available in Llama Stack with the type `remote::custom_ollama`. + +## Best Practices + +1. **Package Naming**: Use the prefix `llama-stack-provider-` for your provider packages to make them easily identifiable. + +2. **Version Management**: Keep your provider package versioned and compatible with the Llama Stack version you're using. + +3. **Dependencies**: Only include the minimum required dependencies in your provider package. + +4. **Documentation**: Include clear documentation in your provider package about: + - Installation requirements + - Configuration options + - Usage examples + - Any limitations or known issues + +5. **Testing**: Include tests in your provider package to ensure it works correctly with Llama Stack. +You can refer to the [integration tests +guide](https://github.com/meta-llama/llama-stack/blob/main/tests/integration/README.md) for more +information. Execute the test for the Provider type you are developing. + +## Troubleshooting + +If your external provider isn't being loaded: + +1. Check that the `external_providers_dir` path is correct and accessible. +2. Verify that the YAML files are properly formatted. +3. Ensure all required Python packages are installed. +4. Check the Llama Stack server logs for any error messages - turn on debug logging to get more + information using `LLAMA_STACK_LOGGING=all=debug`. +5. Verify that the provider package is installed in your Python environment. diff --git a/docs/source/providers/index.md b/docs/source/providers/index.md index f8997a281..75faf7c00 100644 --- a/docs/source/providers/index.md +++ b/docs/source/providers/index.md @@ -11,6 +11,10 @@ Providers come in two flavors: Importantly, Llama Stack always strives to provide at least one fully inline provider for each API so you can iterate on a fully featured environment locally. +## External Providers + +Llama Stack supports external providers that live outside of the main codebase. This allows you to create and maintain your own providers independently. See the [External Providers Guide](external) for details. + ## Agents Run multi-step agentic workflows with LLMs with tool usage, memory (RAG), etc. @@ -50,6 +54,7 @@ The following providers (i.e., databases) are available for Vector IO: ```{toctree} :maxdepth: 1 +external vector_io/faiss vector_io/sqlite-vec vector_io/chromadb diff --git a/llama_stack/distribution/datatypes.py b/llama_stack/distribution/datatypes.py index 48f1925dd..b24b0ec50 100644 --- a/llama_stack/distribution/datatypes.py +++ b/llama_stack/distribution/datatypes.py @@ -312,6 +312,11 @@ a default SQLite store will be used.""", description="Configuration for the HTTP(S) server", ) + external_providers_dir: Optional[str] = Field( + default=None, + description="Path to directory containing external provider implementations. The providers code and dependencies must be installed on the system.", + ) + class BuildConfig(BaseModel): version: str = LLAMA_STACK_BUILD_CONFIG_VERSION diff --git a/llama_stack/distribution/distribution.py b/llama_stack/distribution/distribution.py index ddb727663..d4447139c 100644 --- a/llama_stack/distribution/distribution.py +++ b/llama_stack/distribution/distribution.py @@ -4,12 +4,25 @@ # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. +import glob import importlib -from typing import Dict, List +import os +from typing import Any, Dict, List +import yaml from pydantic import BaseModel -from llama_stack.providers.datatypes import Api, ProviderSpec +from llama_stack.distribution.datatypes import StackRunConfig +from llama_stack.log import get_logger +from llama_stack.providers.datatypes import ( + AdapterSpec, + Api, + InlineProviderSpec, + ProviderSpec, + remote_provider_spec, +) + +logger = get_logger(name=__name__, category="core") def stack_apis() -> List[Api]: @@ -59,11 +72,116 @@ def providable_apis() -> List[Api]: return [api for api in Api if api not in routing_table_apis and api != Api.inspect and api != Api.providers] -def get_provider_registry() -> Dict[Api, Dict[str, ProviderSpec]]: - ret = {} +def _load_remote_provider_spec(spec_data: Dict[str, Any], api: Api) -> ProviderSpec: + adapter = AdapterSpec(**spec_data["adapter"]) + spec = remote_provider_spec( + api=api, + adapter=adapter, + api_dependencies=[Api(dep) for dep in spec_data.get("api_dependencies", [])], + ) + return spec + + +def _load_inline_provider_spec(spec_data: Dict[str, Any], api: Api, provider_name: str) -> ProviderSpec: + spec = InlineProviderSpec( + api=api, + provider_type=f"inline::{provider_name}", + pip_packages=spec_data.get("pip_packages", []), + module=spec_data["module"], + config_class=spec_data["config_class"], + api_dependencies=[Api(dep) for dep in spec_data.get("api_dependencies", [])], + optional_api_dependencies=[Api(dep) for dep in spec_data.get("optional_api_dependencies", [])], + provider_data_validator=spec_data.get("provider_data_validator"), + container_image=spec_data.get("container_image"), + ) + return spec + + +def get_provider_registry(config: StackRunConfig | None = None) -> Dict[Api, Dict[str, ProviderSpec]]: + """Get the provider registry, optionally including external providers. + + This function loads both built-in providers and external providers from YAML files. + External providers are loaded from a directory structure like: + + providers.d/ + remote/ + inference/ + custom_ollama.yaml + vllm.yaml + vector_io/ + qdrant.yaml + safety/ + llama-guard.yaml + inline/ + inference/ + custom_ollama.yaml + vllm.yaml + vector_io/ + qdrant.yaml + safety/ + llama-guard.yaml + + Args: + config: Optional StackRunConfig containing the external providers directory path + + Returns: + A dictionary mapping APIs to their available providers + + Raises: + FileNotFoundError: If the external providers directory doesn't exist + ValueError: If any provider spec is invalid + """ + + ret: Dict[Api, Dict[str, ProviderSpec]] = {} for api in providable_apis(): name = api.name.lower() - module = importlib.import_module(f"llama_stack.providers.registry.{name}") - ret[api] = {a.provider_type: a for a in module.available_providers()} + logger.debug(f"Importing module {name}") + try: + module = importlib.import_module(f"llama_stack.providers.registry.{name}") + ret[api] = {a.provider_type: a for a in module.available_providers()} + except ImportError as e: + logger.warning(f"Failed to import module {name}: {e}") + if config and config.external_providers_dir: + external_providers_dir = os.path.abspath(config.external_providers_dir) + if not os.path.exists(external_providers_dir): + raise FileNotFoundError(f"External providers directory not found: {external_providers_dir}") + logger.info(f"Loading external providers from {external_providers_dir}") + + for api in providable_apis(): + api_name = api.name.lower() + + # Process both remote and inline providers + for provider_type in ["remote", "inline"]: + api_dir = os.path.join(external_providers_dir, provider_type, api_name) + if not os.path.exists(api_dir): + logger.debug(f"No {provider_type} provider directory found for {api_name}") + continue + + # Look for provider spec files in the API directory + for spec_path in glob.glob(os.path.join(api_dir, "*.yaml")): + provider_name = os.path.splitext(os.path.basename(spec_path))[0] + logger.info(f"Loading {provider_type} provider spec from {spec_path}") + + try: + with open(spec_path) as f: + spec_data = yaml.safe_load(f) + + if provider_type == "remote": + spec = _load_remote_provider_spec(spec_data, api) + provider_type_key = f"remote::{provider_name}" + else: + spec = _load_inline_provider_spec(spec_data, api, provider_name) + provider_type_key = f"inline::{provider_name}" + + logger.info(f"Loaded {provider_type} provider spec for {provider_type_key} from {spec_path}") + if provider_type_key in ret[api]: + logger.warning(f"Overriding already registered provider {provider_type_key} for {api.name}") + ret[api][provider_type_key] = spec + except yaml.YAMLError as yaml_err: + logger.error(f"Failed to parse YAML file {spec_path}: {yaml_err}") + raise yaml_err + except Exception as e: + logger.error(f"Failed to load provider spec from {spec_path}: {e}") + raise e return ret diff --git a/llama_stack/distribution/resolver.py b/llama_stack/distribution/resolver.py index 25fe3f184..33ad343ec 100644 --- a/llama_stack/distribution/resolver.py +++ b/llama_stack/distribution/resolver.py @@ -351,6 +351,7 @@ async def instantiate_provider( if not hasattr(provider_spec, "module"): raise AttributeError(f"ProviderSpec of type {type(provider_spec)} does not have a 'module' attribute") + logger.debug(f"Instantiating provider {provider.provider_id} from {provider_spec.module}") module = importlib.import_module(provider_spec.module) args = [] if isinstance(provider_spec, RemoteProviderSpec): diff --git a/llama_stack/distribution/routers/routing_tables.py b/llama_stack/distribution/routers/routing_tables.py index 557330df7..f6adae49d 100644 --- a/llama_stack/distribution/routers/routing_tables.py +++ b/llama_stack/distribution/routers/routing_tables.py @@ -608,8 +608,8 @@ class ToolGroupsRoutingTable(CommonRoutingTableImpl, ToolGroups): tool_group = await self.get_tool_group(toolgroup_id) if tool_group is None: raise ValueError(f"Tool group {toolgroup_id} not found") - tools = (await self.list_tools(toolgroup_id)).data - for tool in tools: + tools = await self.list_tools(toolgroup_id) + for tool in getattr(tools, "data", []): await self.unregister_object(tool) await self.unregister_object(tool_group) diff --git a/llama_stack/distribution/stack.py b/llama_stack/distribution/stack.py index 9c9289a77..d70878db4 100644 --- a/llama_stack/distribution/stack.py +++ b/llama_stack/distribution/stack.py @@ -218,7 +218,7 @@ async def construct_stack( run_config: StackRunConfig, provider_registry: Optional[ProviderRegistry] = None ) -> Dict[Api, Any]: dist_registry, _ = await create_dist_registry(run_config.metadata_store, run_config.image_name) - impls = await resolve_impls(run_config, provider_registry or get_provider_registry(), dist_registry) + impls = await resolve_impls(run_config, provider_registry or get_provider_registry(run_config), dist_registry) await register_resources(run_config, impls) return impls diff --git a/llama_stack/distribution/ui/Containerfile b/llama_stack/distribution/ui/Containerfile index a97f25753..0126d1867 100644 --- a/llama_stack/distribution/ui/Containerfile +++ b/llama_stack/distribution/ui/Containerfile @@ -1,7 +1,7 @@ # More info on playground configuration can be found here: # https://llama-stack.readthedocs.io/en/latest/playground -FROM python:3.9-slim +FROM python:3.12-slim WORKDIR /app COPY . /app/ RUN /usr/local/bin/python -m pip install --upgrade pip && \ diff --git a/llama_stack/distribution/ui/README.md b/llama_stack/distribution/ui/README.md index fe660544f..51c2d2bc2 100644 --- a/llama_stack/distribution/ui/README.md +++ b/llama_stack/distribution/ui/README.md @@ -36,9 +36,7 @@ llama-stack-client benchmarks register \ 3. Start Streamlit UI ```bash -cd llama_stack/distribution/ui -pip install -r requirements.txt -streamlit run app.py +uv run --with ".[ui]" streamlit run llama_stack/distribution/ui/app.py ``` ## Environment Variables diff --git a/llama_stack/distribution/ui/app.py b/llama_stack/distribution/ui/app.py index 045b07982..441f65d20 100644 --- a/llama_stack/distribution/ui/app.py +++ b/llama_stack/distribution/ui/app.py @@ -24,6 +24,7 @@ def main(): # Playground pages chat_page = st.Page("page/playground/chat.py", title="Chat", icon="💬", default=True) rag_page = st.Page("page/playground/rag.py", title="RAG", icon="💬", default=False) + tool_page = st.Page("page/playground/tools.py", title="Tools", icon="🛠", default=False) # Distribution pages resources_page = st.Page("page/distribution/resources.py", title="Resources", icon="🔍", default=False) @@ -39,6 +40,7 @@ def main(): "Playground": [ chat_page, rag_page, + tool_page, application_evaluation_page, native_evaluation_page, ], diff --git a/llama_stack/distribution/ui/modules/api.py b/llama_stack/distribution/ui/modules/api.py index 40caccda0..d5395c5b9 100644 --- a/llama_stack/distribution/ui/modules/api.py +++ b/llama_stack/distribution/ui/modules/api.py @@ -19,6 +19,7 @@ class LlamaStackApi: "together_api_key": os.environ.get("TOGETHER_API_KEY", ""), "sambanova_api_key": os.environ.get("SAMBANOVA_API_KEY", ""), "openai_api_key": os.environ.get("OPENAI_API_KEY", ""), + "tavily_search_api_key": os.environ.get("TAVILY_SEARCH_API_KEY", ""), }, ) diff --git a/llama_stack/distribution/ui/page/playground/tools.py b/llama_stack/distribution/ui/page/playground/tools.py new file mode 100644 index 000000000..e987f617b --- /dev/null +++ b/llama_stack/distribution/ui/page/playground/tools.py @@ -0,0 +1,116 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. + +import uuid + +import streamlit as st +from llama_stack_client import Agent + +from llama_stack.distribution.ui.modules.api import llama_stack_api + + +def tool_chat_page(): + st.title("🛠 Tools") + + client = llama_stack_api.client + models = client.models.list() + model_list = [model.identifier for model in models if model.api_model_type == "llm"] + + tool_groups = client.toolgroups.list() + tool_groups_list = [tool_group.identifier for tool_group in tool_groups] + mcp_tools_list = [tool for tool in tool_groups_list if tool.startswith("mcp::")] + builtin_tools_list = [tool for tool in tool_groups_list if not tool.startswith("mcp::")] + + def reset_agent(): + st.session_state.clear() + st.cache_resource.clear() + + with st.sidebar: + st.subheader("Model") + model = st.selectbox(label="models", options=model_list, on_change=reset_agent) + + st.subheader("Builtin Tools") + toolgroup_selection = st.pills( + label="Available ToolGroups", options=builtin_tools_list, selection_mode="multi", on_change=reset_agent + ) + + st.subheader("MCP Servers") + mcp_selection = st.pills( + label="Available MCP Servers", options=mcp_tools_list, selection_mode="multi", on_change=reset_agent + ) + + toolgroup_selection.extend(mcp_selection) + + active_tool_list = [] + for toolgroup_id in toolgroup_selection: + active_tool_list.extend( + [ + f"{''.join(toolgroup_id.split('::')[1:])}:{t.identifier}" + for t in client.tools.list(toolgroup_id=toolgroup_id) + ] + ) + + st.subheader(f"Active Tools: 🛠 {len(active_tool_list)}") + st.json(active_tool_list) + + @st.cache_resource + def create_agent(): + return Agent( + client, + model=model, + instructions="You are a helpful assistant. When you use a tool always respond with a summary of the result.", + tools=toolgroup_selection, + sampling_params={ + "strategy": {"type": "greedy"}, + }, + ) + + agent = create_agent() + + if "agent_session_id" not in st.session_state: + st.session_state["agent_session_id"] = agent.create_session(session_name=f"tool_demo_{uuid.uuid4()}") + + session_id = st.session_state["agent_session_id"] + + if "messages" not in st.session_state: + st.session_state["messages"] = [{"role": "assistant", "content": "How can I help you?"}] + + for msg in st.session_state.messages: + with st.chat_message(msg["role"]): + st.markdown(msg["content"]) + + if prompt := st.chat_input(placeholder=""): + with st.chat_message("user"): + st.markdown(prompt) + + st.session_state.messages.append({"role": "user", "content": prompt}) + + turn_response = agent.create_turn( + session_id=session_id, + messages=[{"role": "user", "content": prompt}], + stream=True, + ) + + def response_generator(turn_response): + for response in turn_response: + if hasattr(response.event, "payload"): + print(response.event.payload) + if response.event.payload.event_type == "step_progress": + if hasattr(response.event.payload.delta, "text"): + yield response.event.payload.delta.text + if response.event.payload.event_type == "step_complete": + if response.event.payload.step_details.step_type == "tool_execution": + yield " 🛠 " + else: + yield f"Error occurred in the Llama Stack Cluster: {response}" + + with st.chat_message("assistant"): + response = st.write_stream(response_generator(turn_response)) + + st.session_state.messages.append({"role": "assistant", "content": response}) + + +tool_chat_page() diff --git a/llama_stack/distribution/ui/requirements.txt b/llama_stack/distribution/ui/requirements.txt index 39f2b3d27..61d42768d 100644 --- a/llama_stack/distribution/ui/requirements.txt +++ b/llama_stack/distribution/ui/requirements.txt @@ -1,4 +1,5 @@ streamlit pandas -llama-stack-client>=0.0.55 +llama-stack-client>=0.2.1 streamlit-option-menu +llama-stack>=0.2.1 diff --git a/llama_stack/distribution/utils/context.py b/llama_stack/distribution/utils/context.py index fcc72161d..c34079ac6 100644 --- a/llama_stack/distribution/utils/context.py +++ b/llama_stack/distribution/utils/context.py @@ -29,6 +29,11 @@ def preserve_contexts_async_generator( context_var.set(initial_context_values[context_var.name]) item = await gen.__anext__() + + # Update our tracked values with any changes made during this iteration + for context_var in context_vars: + initial_context_values[context_var.name] = context_var.get() + yield item except StopAsyncIteration: diff --git a/llama_stack/models/llama/llama3/generation.py b/llama_stack/models/llama/llama3/generation.py index ee99a07ba..8c6aa242b 100644 --- a/llama_stack/models/llama/llama3/generation.py +++ b/llama_stack/models/llama/llama3/generation.py @@ -119,17 +119,16 @@ class Llama3: torch.set_default_device(device) else: print(f"Setting default device to {device}") - torch.set_default_device(device) if device.type == "cuda": if torch.cuda.is_bf16_supported(): - torch.set_default_dtype(torch.bfloat16) + torch.set_default_tensor_type(torch.cuda.BFloat16Tensor) else: - torch.set_default_dtype(torch.half) + torch.set_default_tensor_type(torch.cuda.Float16Tensor) elif device.type == "xpu": if torch.xpu.is_bf16_supported(): - torch.set_default_dtype(torch.bfloat16) + torch.set_default_tensor_type(torch.xpu.BFloat16Tensor) else: - torch.set_default_dtype(torch.half) + torch.set_default_tensor_type(torch.xpu.Float16Tensor) model = build_model() print("Loading state dict...") diff --git a/llama_stack/models/llama/llama4/args.py b/llama_stack/models/llama/llama4/args.py index 6d7c1d409..dd5f7cbde 100644 --- a/llama_stack/models/llama/llama4/args.py +++ b/llama_stack/models/llama/llama4/args.py @@ -70,6 +70,9 @@ class ModelArgs(BaseModel): attention_chunk_size: Optional[int] = None rope_theta: float = 500000 use_scaled_rope: bool = False + rope_scaling_factor: Optional[float] = None + rope_high_freq_factor: Optional[float] = None + nope_layer_interval: Optional[int] = None # No position encoding in every n layers use_qk_norm: bool = False # Set to True to enable inference-time temperature tuning (useful for very long context) @@ -92,4 +95,14 @@ class ModelArgs(BaseModel): f"n_heads ({self.n_heads}) must be divisible by n_kv_heads ({self.n_kv_heads})" ) assert self.dim % self.n_heads == 0, f"dim ({self.dim}) must be divisible by n_heads ({self.n_heads})" + + if self.use_scaled_rope: + # NOTE: ideally these values should have come from params.json. However, we have + # shipped the models everywhere. Only Llama-4-Scout uses scaled rope and needs these + # specific values. + if self.rope_scaling_factor is None: + self.rope_scaling_factor = 16 + if self.rope_high_freq_factor is None: + self.rope_high_freq_factor = 1 + return self diff --git a/llama_stack/models/llama/llama4/model.py b/llama_stack/models/llama/llama4/model.py index 08fac7714..2272b868d 100644 --- a/llama_stack/models/llama/llama4/model.py +++ b/llama_stack/models/llama/llama4/model.py @@ -23,37 +23,25 @@ from .ffn import FeedForward from .moe import MoE +def rmsnorm(x, eps): + def _norm(y): + return y * torch.rsqrt(y.pow(2).mean(-1, keepdim=True) + eps) + + return _norm(x.float()).type_as(x) + + class RMSNorm(torch.nn.Module): def __init__(self, dim: int, eps: float = 1e-6): super().__init__() self.eps = eps self.weight = nn.Parameter(torch.ones(dim)) - def _norm(self, x): - return x * torch.rsqrt(x.pow(2).mean(-1, keepdim=True) + self.eps) - def forward(self, x): - output = self._norm(x.float()).type_as(x) - return output * self.weight + return rmsnorm(x, self.eps) * self.weight -class L2Norm(torch.nn.Module): - def __init__(self, dim: int, eps: float = 1e-6): - super().__init__() - self.eps = eps - - def _norm(self, x): - return x * torch.rsqrt(x.pow(2).mean(-1, keepdim=True) + self.eps) - - def forward(self, x): - return self._norm(x.float()).type_as(x) - - -def apply_scaling(freqs: torch.Tensor): - # Values obtained from grid search - scale_factor = 8 +def apply_scaling(freqs: torch.Tensor, scale_factor: float, high_freq_factor: float): low_freq_factor = 1 - high_freq_factor = 4 old_context_len = 8192 # original llama3 length low_freq_wavelen = old_context_len / low_freq_factor @@ -72,11 +60,18 @@ def apply_scaling(freqs: torch.Tensor): return torch.tensor(new_freqs, dtype=freqs.dtype, device=freqs.device) -def precompute_freqs_cis(dim: int, end: int, theta: float = 10000.0, use_scaled: bool = False): +def precompute_freqs_cis( + dim: int, + end: int, + theta: float, + use_scaled: bool, + scale_factor: float, + high_freq_factor: float, +): freqs = 1.0 / (theta ** (torch.arange(0, dim, 2)[: (dim // 2)].float() / dim)) t = torch.arange(end, device=freqs.device, dtype=torch.float32) if use_scaled: - freqs = apply_scaling(freqs) + freqs = apply_scaling(freqs, scale_factor, high_freq_factor) freqs = torch.outer(t, freqs) freqs_cis = torch.polar(torch.ones_like(freqs), freqs) # complex64 return freqs_cis @@ -174,9 +169,7 @@ class Attention(nn.Module): self.head_dim, ) ).cuda() - self.qk_norm = None - if self.use_qk_norm: - self.qk_norm = L2Norm(args.norm_eps) + self.norm_eps = args.norm_eps self._register_load_state_dict_pre_hook(self.load_hook) def load_hook( @@ -220,8 +213,8 @@ class Attention(nn.Module): xq, xk = apply_rotary_emb(xq, xk, freqs_cis=freqs_cis) if self.use_qk_norm: - xq = self.qk_norm(xq) - xk = self.qk_norm(xk) + xq = rmsnorm(xq, self.norm_eps) + xk = rmsnorm(xk, self.norm_eps) # We are applying temperature tuning (https://arxiv.org/abs/2501.19399) to NoPE layers, where # the inference-time temperature tuning function is customized to not affect short context @@ -362,6 +355,8 @@ class Transformer(nn.Module): args.max_seq_len * 2, args.rope_theta, args.use_scaled_rope, + args.rope_scaling_factor, + args.rope_high_freq_factor, ) vision_args = self.args.vision_args if vision_args: diff --git a/llama_stack/models/llama/llama4/quantization/loader.py b/llama_stack/models/llama/llama4/quantization/loader.py index b50432896..f11d83c60 100644 --- a/llama_stack/models/llama/llama4/quantization/loader.py +++ b/llama_stack/models/llama/llama4/quantization/loader.py @@ -91,7 +91,7 @@ def convert_to_quantized_model( log_status(f"Rank {rank}: Quantizing int4 weights from bf16") def apply_quantization(_, weight): - return quantize_int4(weight, fp8_activation_scale_ub, output_device=torch.device("cuda")) + return quantize_int4(weight, output_device=torch.device("cuda")) else: fp8_scales_path = os.path.join(checkpoint_dir, f"fp8_scales_{rank}.pt") diff --git a/llama_stack/models/llama/llama4/tokenizer.py b/llama_stack/models/llama/llama4/tokenizer.py index 4d271e5fd..8eabc3205 100644 --- a/llama_stack/models/llama/llama4/tokenizer.py +++ b/llama_stack/models/llama/llama4/tokenizer.py @@ -56,9 +56,11 @@ LLAMA4_TEXT_POST_TRAIN_SPECIAL_TOKENS = [ "<|text_post_train_reserved_special_token_3|>", "<|text_post_train_reserved_special_token_4|>", "<|text_post_train_reserved_special_token_5|>", + "<|text_post_train_reserved_special_token_6|>", + "<|text_post_train_reserved_special_token_7|>", "<|finetune_right_pad|>", ] + get_reserved_special_tokens( - "text_post_train", 61, 6 + "text_post_train", 61, 8 ) # <|text_post_train_reserved_special_token_6|>, ..., <|text_post_train_reserved_special_token_66|> # 200080, ..., 201133 diff --git a/llama_stack/models/llama/quantize_impls.py b/llama_stack/models/llama/quantize_impls.py index 6e1d15cf6..a5da01588 100644 --- a/llama_stack/models/llama/quantize_impls.py +++ b/llama_stack/models/llama/quantize_impls.py @@ -65,7 +65,7 @@ class Int4Weights( Int4ScaledWeights, collections.namedtuple( "Int4Weights", - ["weight", "scale", "zero_point", "shape", "activation_scale_ub"], + ["weight", "scale", "zero_point", "shape"], ), ): pass @@ -184,20 +184,13 @@ def quantize_fp8( @torch.inference_mode() def quantize_int4( w: Tensor, - fp8_activation_scale_ub: float, output_device: Optional[torch.device] = None, ) -> Int4Weights: """Quantize [n, k/2] weight tensor. Args: w (Tensor): [n, k/2] input high precision tensor to quantize. - fp8_activation_scale_ub (float): Upper bound for activation max. """ - activation_scale_ub = torch.tensor( - [fp8_activation_scale_ub], - dtype=torch.float, - device=output_device, - ) if w.ndim >= 3: wq, scale, zero_point = zip(*[int4_row_quantize(i) for i in w], strict=False) wq = torch.stack([pack_int4(i) for i in wq], dim=0) @@ -212,7 +205,6 @@ def quantize_int4( scale=scale.to(output_device), zero_point=zero_point.to(output_device), shape=wq.shape, - activation_scale_ub=activation_scale_ub, ) @@ -247,26 +239,18 @@ def load_int4( w: Tensor, scale: Tensor, zero_point: Tensor, - fp8_activation_scale_ub: float, output_device: Optional[torch.device] = None, ) -> Int4Weights: """Load INT4 [n, k/2] weight tensor. Args: w (Tensor): [n, k/2] input INT4. - fp8_activation_scale_ub (float): Upper bound for activation max. """ - activation_scale_ub = torch.tensor( - [fp8_activation_scale_ub], - dtype=torch.float, - device=output_device, - ) return Int4Weights( weight=w.to(torch.int8).to(device=output_device), scale=scale.to(device=output_device), zero_point=zero_point.to(device=output_device), shape=w.shape, - activation_scale_ub=activation_scale_ub, ) diff --git a/llama_stack/providers/inline/agents/meta_reference/agent_instance.py b/llama_stack/providers/inline/agents/meta_reference/agent_instance.py index 6840da89f..f441d6eb6 100644 --- a/llama_stack/providers/inline/agents/meta_reference/agent_instance.py +++ b/llama_stack/providers/inline/agents/meta_reference/agent_instance.py @@ -89,7 +89,6 @@ class ChatAgent(ShieldRunnerMixin): self, agent_id: str, agent_config: AgentConfig, - tempdir: str, inference_api: Inference, safety_api: Safety, tool_runtime_api: ToolRuntime, @@ -99,7 +98,6 @@ class ChatAgent(ShieldRunnerMixin): ): self.agent_id = agent_id self.agent_config = agent_config - self.tempdir = tempdir self.inference_api = inference_api self.safety_api = safety_api self.vector_io_api = vector_io_api diff --git a/llama_stack/providers/inline/agents/meta_reference/agents.py b/llama_stack/providers/inline/agents/meta_reference/agents.py index 5ca123595..656178773 100644 --- a/llama_stack/providers/inline/agents/meta_reference/agents.py +++ b/llama_stack/providers/inline/agents/meta_reference/agents.py @@ -7,7 +7,6 @@ import json import logging import shutil -import tempfile import uuid from typing import AsyncGenerator, List, Optional, Union @@ -64,7 +63,6 @@ class MetaReferenceAgentsImpl(Agents): self.tool_groups_api = tool_groups_api self.in_memory_store = InmemoryKVStoreImpl() - self.tempdir = tempfile.mkdtemp() async def initialize(self) -> None: self.persistence_store = await kvstore_impl(self.config.persistence_store) @@ -107,7 +105,6 @@ class MetaReferenceAgentsImpl(Agents): return ChatAgent( agent_id=agent_id, agent_config=agent_config, - tempdir=self.tempdir, inference_api=self.inference_api, safety_api=self.safety_api, vector_io_api=self.vector_io_api, diff --git a/llama_stack/providers/inline/inference/meta_reference/generators.py b/llama_stack/providers/inline/inference/meta_reference/generators.py index 65bed4d8c..34dd58a9a 100644 --- a/llama_stack/providers/inline/inference/meta_reference/generators.py +++ b/llama_stack/providers/inline/inference/meta_reference/generators.py @@ -259,7 +259,7 @@ class Llama3Generator: temperature, top_p = _infer_sampling_params(sampling_params) for result in self.inner_generator.generate( - llm_inputs=[self.formatter.encode_content(request.content)], + model_inputs=[self.formatter.encode_content(request.content)], max_gen_len=max_gen_len, temperature=temperature, top_p=top_p, @@ -284,7 +284,7 @@ class Llama3Generator: temperature, top_p = _infer_sampling_params(sampling_params) for result in self.inner_generator.generate( - llm_inputs=[self.formatter.encode_dialog_prompt(request.messages, _infer_tool_prompt_format(request))], + model_inputs=[self.formatter.encode_dialog_prompt(request.messages, _infer_tool_prompt_format(request))], max_gen_len=max_gen_len, temperature=temperature, top_p=top_p, diff --git a/llama_stack/providers/remote/inference/ollama/ollama.py b/llama_stack/providers/remote/inference/ollama/ollama.py index 5a78c07cc..12902996b 100644 --- a/llama_stack/providers/remote/inference/ollama/ollama.py +++ b/llama_stack/providers/remote/inference/ollama/ollama.py @@ -307,9 +307,10 @@ class OllamaInferenceAdapter(Inference, ModelsProtocolPrivate): if model.model_type == ModelType.embedding: logger.info(f"Pulling embedding model `{model.provider_resource_id}` if necessary...") await self.client.pull(model.provider_resource_id) - response = await self.client.list() - else: - response = await self.client.ps() + # we use list() here instead of ps() - + # - ps() only lists running models, not available models + # - models not currently running are run by the ollama server as needed + response = await self.client.list() available_models = [m["model"] for m in response["models"]] if model.provider_resource_id not in available_models: raise ValueError( diff --git a/llama_stack/templates/remote-vllm/doc_template.md b/llama_stack/templates/remote-vllm/doc_template.md index 57c9f116c..efcdb62c6 100644 --- a/llama_stack/templates/remote-vllm/doc_template.md +++ b/llama_stack/templates/remote-vllm/doc_template.md @@ -13,7 +13,7 @@ The `llamastack/distribution-{{ name }}` distribution consists of the following {{ providers_table }} -You can use this distribution if you have GPUs and want to run an independent vLLM server container for running inference. +You can use this distribution if you want to run an independent vLLM server for inference. {% if run_config_env_vars %} ### Environment Variables @@ -28,6 +28,83 @@ The following environment variables can be configured: ## Setting up vLLM server +In the following sections, we'll use either AMD and NVIDIA GPUs to serve as hardware accelerators for the vLLM +server, which acts as both the LLM inference provider and the safety provider. Note that vLLM also +[supports many other hardware accelerators](https://docs.vllm.ai/en/latest/getting_started/installation.html) and +that we only use GPUs here for demonstration purposes. + +### Setting up vLLM server on AMD GPU + +AMD provides two main vLLM container options: +- rocm/vllm: Production-ready container +- rocm/vllm-dev: Development container with the latest vLLM features + +Please check the [Blog about ROCm vLLM Usage](https://rocm.blogs.amd.com/software-tools-optimization/vllm-container/README.html) to get more details. + +Here is a sample script to start a ROCm vLLM server locally via Docker: + +```bash +export INFERENCE_PORT=8000 +export INFERENCE_MODEL=meta-llama/Llama-3.2-3B-Instruct +export CUDA_VISIBLE_DEVICES=0 +export VLLM_DIMG="rocm/vllm-dev:main" + +docker run \ + --pull always \ + --ipc=host \ + --privileged \ + --shm-size 16g \ + --device=/dev/kfd \ + --device=/dev/dri \ + --group-add video \ + --cap-add=SYS_PTRACE \ + --cap-add=CAP_SYS_ADMIN \ + --security-opt seccomp=unconfined \ + --security-opt apparmor=unconfined \ + --env "HUGGING_FACE_HUB_TOKEN=$HF_TOKEN" \ + --env "HIP_VISIBLE_DEVICES=$CUDA_VISIBLE_DEVICES" \ + -p $INFERENCE_PORT:$INFERENCE_PORT \ + -v ~/.cache/huggingface:/root/.cache/huggingface \ + $VLLM_DIMG \ + python -m vllm.entrypoints.openai.api_server \ + --model $INFERENCE_MODEL \ + --port $INFERENCE_PORT +``` + +Note that you'll also need to set `--enable-auto-tool-choice` and `--tool-call-parser` to [enable tool calling in vLLM](https://docs.vllm.ai/en/latest/features/tool_calling.html). + +If you are using Llama Stack Safety / Shield APIs, then you will need to also run another instance of a vLLM with a corresponding safety model like `meta-llama/Llama-Guard-3-1B` using a script like: + +```bash +export SAFETY_PORT=8081 +export SAFETY_MODEL=meta-llama/Llama-Guard-3-1B +export CUDA_VISIBLE_DEVICES=1 +export VLLM_DIMG="rocm/vllm-dev:main" + +docker run \ + --pull always \ + --ipc=host \ + --privileged \ + --shm-size 16g \ + --device=/dev/kfd \ + --device=/dev/dri \ + --group-add video \ + --cap-add=SYS_PTRACE \ + --cap-add=CAP_SYS_ADMIN \ + --security-opt seccomp=unconfined \ + --security-opt apparmor=unconfined \ + --env "HUGGING_FACE_HUB_TOKEN=$HF_TOKEN" \ + --env "HIP_VISIBLE_DEVICES=$CUDA_VISIBLE_DEVICES" \ + -p $SAFETY_PORT:$SAFETY_PORT \ + -v ~/.cache/huggingface:/root/.cache/huggingface \ + $VLLM_DIMG \ + python -m vllm.entrypoints.openai.api_server \ + --model $SAFETY_MODEL \ + --port $SAFETY_PORT +``` + +### Setting up vLLM server on NVIDIA GPU + Please check the [vLLM Documentation](https://docs.vllm.ai/en/v0.5.5/serving/deploying_with_docker.html) to get a vLLM endpoint. Here is a sample script to start a vLLM server locally via Docker: ```bash diff --git a/pyproject.toml b/pyproject.toml index 8ae7ddbb6..83260b681 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -89,6 +89,12 @@ docs = [ "tomli", ] codegen = ["rich", "pydantic", "jinja2>=3.1.6"] +ui = [ + "streamlit", + "pandas", + "llama-stack-client>=0.2.1", + "streamlit-option-menu", +] [project.urls] Homepage = "https://github.com/meta-llama/llama-stack" diff --git a/tests/external-provider/llama-stack-provider-ollama/README.md b/tests/external-provider/llama-stack-provider-ollama/README.md new file mode 100644 index 000000000..8bd2b6a87 --- /dev/null +++ b/tests/external-provider/llama-stack-provider-ollama/README.md @@ -0,0 +1,3 @@ +# Ollama external provider for Llama Stack + +Template code to create a new external provider for Llama Stack. diff --git a/tests/external-provider/llama-stack-provider-ollama/custom_ollama.yaml b/tests/external-provider/llama-stack-provider-ollama/custom_ollama.yaml new file mode 100644 index 000000000..f0960b4d8 --- /dev/null +++ b/tests/external-provider/llama-stack-provider-ollama/custom_ollama.yaml @@ -0,0 +1,7 @@ +adapter: + adapter_type: custom_ollama + pip_packages: ["ollama", "aiohttp"] + config_class: llama_stack_provider_ollama.config.OllamaImplConfig + module: llama_stack_provider_ollama +api_dependencies: [] +optional_api_dependencies: [] diff --git a/tests/external-provider/llama-stack-provider-ollama/pyproject.toml b/tests/external-provider/llama-stack-provider-ollama/pyproject.toml new file mode 100644 index 000000000..ddebc54b0 --- /dev/null +++ b/tests/external-provider/llama-stack-provider-ollama/pyproject.toml @@ -0,0 +1,44 @@ +[project] +dependencies = [ + "llama-stack", + "pydantic", + "ollama", + "aiohttp", + "aiosqlite", + "autoevals", + "blobfile", + "chardet", + "chromadb-client", + "datasets", + "faiss-cpu", + "fastapi", + "fire", + "httpx", + "matplotlib", + "mcp", + "nltk", + "numpy", + "openai", + "opentelemetry-exporter-otlp-proto-http", + "opentelemetry-sdk", + "pandas", + "pillow", + "psycopg2-binary", + "pymongo", + "pypdf", + "redis", + "requests", + "scikit-learn", + "scipy", + "sentencepiece", + "tqdm", + "transformers", + "tree_sitter", + "uvicorn", +] + +name = "llama-stack-provider-ollama" +version = "0.1.0" +description = "External provider for Ollama using the Llama Stack API" +readme = "README.md" +requires-python = ">=3.10" diff --git a/tests/external-provider/llama-stack-provider-ollama/run.yaml b/tests/external-provider/llama-stack-provider-ollama/run.yaml new file mode 100644 index 000000000..7a3636c4d --- /dev/null +++ b/tests/external-provider/llama-stack-provider-ollama/run.yaml @@ -0,0 +1,135 @@ +version: '2' +image_name: ollama +apis: +- agents +- datasetio +- eval +- inference +- safety +- scoring +- telemetry +- tool_runtime +- vector_io +providers: + inference: + - provider_id: custom_ollama + provider_type: remote::custom_ollama + config: + url: ${env.OLLAMA_URL:http://localhost:11434} + vector_io: + - provider_id: faiss + provider_type: inline::faiss + config: + kvstore: + type: sqlite + namespace: null + db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ollama}/faiss_store.db + safety: + - provider_id: llama-guard + provider_type: inline::llama-guard + config: + excluded_categories: [] + agents: + - provider_id: meta-reference + provider_type: inline::meta-reference + config: + persistence_store: + type: sqlite + namespace: null + db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ollama}/agents_store.db + telemetry: + - provider_id: meta-reference + provider_type: inline::meta-reference + config: + service_name: ${env.OTEL_SERVICE_NAME:llama-stack} + sinks: ${env.TELEMETRY_SINKS:console,sqlite} + sqlite_db_path: ${env.SQLITE_DB_PATH:~/.llama/distributions/ollama/trace_store.db} + eval: + - provider_id: meta-reference + provider_type: inline::meta-reference + config: + kvstore: + type: sqlite + namespace: null + db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ollama}/meta_reference_eval.db + datasetio: + - provider_id: huggingface + provider_type: remote::huggingface + config: + kvstore: + type: sqlite + namespace: null + db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ollama}/huggingface_datasetio.db + - provider_id: localfs + provider_type: inline::localfs + config: + kvstore: + type: sqlite + namespace: null + db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ollama}/localfs_datasetio.db + scoring: + - provider_id: basic + provider_type: inline::basic + config: {} + - provider_id: llm-as-judge + provider_type: inline::llm-as-judge + config: {} + - provider_id: braintrust + provider_type: inline::braintrust + config: + openai_api_key: ${env.OPENAI_API_KEY:} + tool_runtime: + - provider_id: brave-search + provider_type: remote::brave-search + config: + api_key: ${env.BRAVE_SEARCH_API_KEY:} + max_results: 3 + - provider_id: tavily-search + provider_type: remote::tavily-search + config: + api_key: ${env.TAVILY_SEARCH_API_KEY:} + max_results: 3 + - provider_id: code-interpreter + provider_type: inline::code-interpreter + config: {} + - provider_id: rag-runtime + provider_type: inline::rag-runtime + config: {} + - provider_id: model-context-protocol + provider_type: remote::model-context-protocol + config: {} + - provider_id: wolfram-alpha + provider_type: remote::wolfram-alpha + config: + api_key: ${env.WOLFRAM_ALPHA_API_KEY:} +metadata_store: + type: sqlite + db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ollama}/registry.db +models: +- metadata: {} + model_id: ${env.INFERENCE_MODEL} + provider_id: custom_ollama + model_type: llm +- metadata: + embedding_dimension: 384 + model_id: all-MiniLM-L6-v2 + provider_id: custom_ollama + provider_model_id: all-minilm:latest + model_type: embedding +shields: [] +vector_dbs: [] +datasets: [] +scoring_fns: [] +benchmarks: [] +tool_groups: +- toolgroup_id: builtin::websearch + provider_id: tavily-search +- toolgroup_id: builtin::rag + provider_id: rag-runtime +- toolgroup_id: builtin::code_interpreter + provider_id: code-interpreter +- toolgroup_id: builtin::wolfram_alpha + provider_id: wolfram-alpha +server: + port: 8321 +external_providers_dir: /tmp/providers.d diff --git a/tests/integration/tool_runtime/test_registration.py b/tests/integration/tool_runtime/test_registration.py new file mode 100644 index 000000000..e04b56652 --- /dev/null +++ b/tests/integration/tool_runtime/test_registration.py @@ -0,0 +1,124 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. + +import socket +import threading +import time + +import httpx +import mcp.types as types +import pytest +import uvicorn +from llama_stack_client.types.shared_params.url import URL +from mcp.server.fastmcp import Context, FastMCP +from mcp.server.sse import SseServerTransport +from starlette.applications import Starlette +from starlette.routing import Mount, Route + + +@pytest.fixture(scope="module") +def mcp_server(): + server = FastMCP("FastMCP Test Server") + + @server.tool() + async def fetch(url: str, ctx: Context) -> list[types.TextContent | types.ImageContent | types.EmbeddedResource]: + headers = {"User-Agent": "MCP Test Server (github.com/modelcontextprotocol/python-sdk)"} + async with httpx.AsyncClient(follow_redirects=True, headers=headers) as client: + response = await client.get(url) + response.raise_for_status() + return [types.TextContent(type="text", text=response.text)] + + sse = SseServerTransport("/messages/") + + async def handle_sse(request): + async with sse.connect_sse(request.scope, request.receive, request._send) as streams: + await server._mcp_server.run( + streams[0], + streams[1], + server._mcp_server.create_initialization_options(), + ) + + app = Starlette( + debug=True, + routes=[ + Route("/sse", endpoint=handle_sse), + Mount("/messages/", app=sse.handle_post_message), + ], + ) + + def get_open_port(): + with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as sock: + sock.bind(("", 0)) + return sock.getsockname()[1] + + port = get_open_port() + + def run_server(): + uvicorn.run(app, host="0.0.0.0", port=port) + + # Start the server in a new thread + server_thread = threading.Thread(target=run_server, daemon=True) + server_thread.start() + + # Polling until the server is ready + timeout = 10 + start_time = time.time() + + while time.time() - start_time < timeout: + try: + response = httpx.get(f"http://localhost:{port}/sse") + if response.status_code == 200: + break + except (httpx.RequestError, httpx.HTTPStatusError): + pass + time.sleep(0.1) + + yield port + + +def test_register_and_unregister_toolgroup(llama_stack_client, mcp_server): + """ + Integration test for registering and unregistering a toolgroup using the ToolGroups API. + """ + port = mcp_server + test_toolgroup_id = "remote::web-fetch" + provider_id = "model-context-protocol" + + # Cleanup before running the test + toolgroups = llama_stack_client.toolgroups.list() + for toolgroup in toolgroups: + if toolgroup.identifier == test_toolgroup_id: + llama_stack_client.toolgroups.unregister(toolgroup_id=test_toolgroup_id) + + # Register the toolgroup + llama_stack_client.toolgroups.register( + toolgroup_id=test_toolgroup_id, + provider_id=provider_id, + mcp_endpoint=URL(uri=f"http://localhost:{port}/sse"), + ) + + # Verify registration + registered_toolgroup = llama_stack_client.toolgroups.get(toolgroup_id=test_toolgroup_id) + assert registered_toolgroup is not None + assert registered_toolgroup.identifier == test_toolgroup_id + assert registered_toolgroup.provider_id == provider_id + + # Verify tools listing + tools_list_response = llama_stack_client.tools.list(toolgroup_id=test_toolgroup_id) + assert isinstance(tools_list_response, list) + assert tools_list_response + + # Unregister the toolgroup + llama_stack_client.toolgroups.unregister(toolgroup_id=test_toolgroup_id) + + # Verify it is unregistered + with pytest.raises(ValueError, match=f"Tool group '{test_toolgroup_id}' not found"): + llama_stack_client.toolgroups.get(toolgroup_id=test_toolgroup_id) + + # Verify tools are also unregistered + unregister_tools_list_response = llama_stack_client.tools.list(toolgroup_id=test_toolgroup_id) + assert isinstance(unregister_tools_list_response, list) + assert not unregister_tools_list_response diff --git a/llama_stack/distribution/utils/tests/test_context.py b/tests/unit/distribution/test_context.py similarity index 100% rename from llama_stack/distribution/utils/tests/test_context.py rename to tests/unit/distribution/test_context.py diff --git a/tests/unit/distribution/test_distribution.py b/tests/unit/distribution/test_distribution.py new file mode 100644 index 000000000..a4daffb82 --- /dev/null +++ b/tests/unit/distribution/test_distribution.py @@ -0,0 +1,223 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. + +from typing import Any, Dict +from unittest.mock import patch + +import pytest +import yaml +from pydantic import BaseModel, Field, ValidationError + +from llama_stack.distribution.datatypes import Api, Provider, StackRunConfig +from llama_stack.distribution.distribution import get_provider_registry +from llama_stack.providers.datatypes import ProviderSpec + + +class SampleConfig(BaseModel): + foo: str = Field( + default="bar", + description="foo", + ) + + @classmethod + def sample_run_config(cls, **kwargs: Any) -> Dict[str, Any]: + return { + "foo": "baz", + } + + +@pytest.fixture +def mock_providers(): + """Mock the available_providers function to return test providers.""" + with patch("llama_stack.providers.registry.inference.available_providers") as mock: + mock.return_value = [ + ProviderSpec( + provider_type="test_provider", + api=Api.inference, + adapter_type="test_adapter", + config_class="test_provider.config.TestProviderConfig", + ) + ] + yield mock + + +@pytest.fixture +def base_config(tmp_path): + """Create a base StackRunConfig with common settings.""" + return StackRunConfig( + image_name="test_image", + providers={ + "inference": [ + Provider( + provider_id="sample_provider", + provider_type="sample", + config=SampleConfig.sample_run_config(), + ) + ] + }, + external_providers_dir=str(tmp_path), + ) + + +@pytest.fixture +def provider_spec_yaml(): + """Common provider spec YAML for testing.""" + return """ +adapter: + adapter_type: test_provider + config_class: test_provider.config.TestProviderConfig + module: test_provider +api_dependencies: + - safety +""" + + +@pytest.fixture +def inline_provider_spec_yaml(): + """Common inline provider spec YAML for testing.""" + return """ +module: test_provider +config_class: test_provider.config.TestProviderConfig +pip_packages: + - test-package +api_dependencies: + - safety +optional_api_dependencies: + - vector_io +provider_data_validator: test_provider.validator.TestValidator +container_image: test-image:latest +""" + + +@pytest.fixture +def api_directories(tmp_path): + """Create the API directory structure for testing.""" + # Create remote provider directory + remote_inference_dir = tmp_path / "remote" / "inference" + remote_inference_dir.mkdir(parents=True, exist_ok=True) + + # Create inline provider directory + inline_inference_dir = tmp_path / "inline" / "inference" + inline_inference_dir.mkdir(parents=True, exist_ok=True) + + return remote_inference_dir, inline_inference_dir + + +class TestProviderRegistry: + """Test suite for provider registry functionality.""" + + def test_builtin_providers(self, mock_providers): + """Test loading built-in providers.""" + registry = get_provider_registry(None) + + assert Api.inference in registry + assert "test_provider" in registry[Api.inference] + assert registry[Api.inference]["test_provider"].provider_type == "test_provider" + assert registry[Api.inference]["test_provider"].api == Api.inference + + def test_external_remote_providers(self, api_directories, mock_providers, base_config, provider_spec_yaml): + """Test loading external remote providers from YAML files.""" + remote_dir, _ = api_directories + with open(remote_dir / "test_provider.yaml", "w") as f: + f.write(provider_spec_yaml) + + registry = get_provider_registry(base_config) + assert len(registry[Api.inference]) == 2 + + assert Api.inference in registry + assert "remote::test_provider" in registry[Api.inference] + provider = registry[Api.inference]["remote::test_provider"] + assert provider.adapter.adapter_type == "test_provider" + assert provider.adapter.module == "test_provider" + assert provider.adapter.config_class == "test_provider.config.TestProviderConfig" + assert Api.safety in provider.api_dependencies + + def test_external_inline_providers(self, api_directories, mock_providers, base_config, inline_provider_spec_yaml): + """Test loading external inline providers from YAML files.""" + _, inline_dir = api_directories + with open(inline_dir / "test_provider.yaml", "w") as f: + f.write(inline_provider_spec_yaml) + + registry = get_provider_registry(base_config) + assert len(registry[Api.inference]) == 2 + + assert Api.inference in registry + assert "inline::test_provider" in registry[Api.inference] + provider = registry[Api.inference]["inline::test_provider"] + assert provider.provider_type == "inline::test_provider" + assert provider.module == "test_provider" + assert provider.config_class == "test_provider.config.TestProviderConfig" + assert provider.pip_packages == ["test-package"] + assert Api.safety in provider.api_dependencies + assert Api.vector_io in provider.optional_api_dependencies + assert provider.provider_data_validator == "test_provider.validator.TestValidator" + assert provider.container_image == "test-image:latest" + + def test_invalid_yaml(self, api_directories, mock_providers, base_config): + """Test handling of invalid YAML files.""" + remote_dir, inline_dir = api_directories + with open(remote_dir / "invalid.yaml", "w") as f: + f.write("invalid: yaml: content: -") + with open(inline_dir / "invalid.yaml", "w") as f: + f.write("invalid: yaml: content: -") + + with pytest.raises(yaml.YAMLError): + get_provider_registry(base_config) + + def test_missing_directory(self, mock_providers): + """Test handling of missing external providers directory.""" + config = StackRunConfig( + image_name="test_image", + providers={ + "inference": [ + Provider( + provider_id="sample_provider", + provider_type="sample", + config=SampleConfig.sample_run_config(), + ) + ] + }, + external_providers_dir="/nonexistent/dir", + ) + with pytest.raises(FileNotFoundError): + get_provider_registry(config) + + def test_empty_api_directory(self, api_directories, mock_providers, base_config): + """Test handling of empty API directory.""" + registry = get_provider_registry(base_config) + assert len(registry[Api.inference]) == 1 # Only built-in provider + + def test_malformed_remote_provider_spec(self, api_directories, mock_providers, base_config): + """Test handling of malformed remote provider spec (missing required fields).""" + remote_dir, _ = api_directories + malformed_spec = """ +adapter: + adapter_type: test_provider + # Missing required fields +api_dependencies: + - safety +""" + with open(remote_dir / "malformed.yaml", "w") as f: + f.write(malformed_spec) + + with pytest.raises(ValidationError): + get_provider_registry(base_config) + + def test_malformed_inline_provider_spec(self, api_directories, mock_providers, base_config): + """Test handling of malformed inline provider spec (missing required fields).""" + _, inline_dir = api_directories + malformed_spec = """ +module: test_provider +# Missing required config_class +pip_packages: + - test-package +""" + with open(inline_dir / "malformed.yaml", "w") as f: + f.write(malformed_spec) + + with pytest.raises(KeyError) as exc_info: + get_provider_registry(base_config) + assert "config_class" in str(exc_info.value) diff --git a/tests/verifications/README.md b/tests/verifications/README.md new file mode 100644 index 000000000..986ff1087 --- /dev/null +++ b/tests/verifications/README.md @@ -0,0 +1,65 @@ +# Llama Stack Verifications + +Llama Stack Verifications provide standardized test suites to ensure API compatibility and behavior consistency across different LLM providers. These tests help verify that different models and providers implement the expected interfaces and behaviors correctly. + +## Overview + +This framework allows you to run the same set of verification tests against different LLM providers' OpenAI-compatible endpoints (Fireworks, Together, Groq, Cerebras, etc., and OpenAI itself) to ensure they meet the expected behavior and interface standards. + +## Features + +The verification suite currently tests: + +- Basic chat completions (streaming and non-streaming) +- Image input capabilities +- Structured JSON output formatting +- Tool calling functionality + +## Running Tests + +To run the verification tests, use pytest with the following parameters: + +```bash +cd llama-stack +pytest tests/verifications/openai --provider= +``` + +Example: +```bash +# Run all tests +pytest tests/verifications/openai --provider=together + +# Only run tests with Llama 4 models +pytest tests/verifications/openai --provider=together -k 'Llama-4' +``` + +### Parameters + +- `--provider`: The provider name (openai, fireworks, together, groq, cerebras, etc.) +- `--base-url`: The base URL for the provider's API (optional - defaults to the standard URL for the specified provider) +- `--api-key`: Your API key for the provider (optional - defaults to the standard API_KEY name for the specified provider) + +## Supported Providers + +The verification suite currently supports: +- OpenAI +- Fireworks +- Together +- Groq +- Cerebras + +## Adding New Test Cases + +To add new test cases, create appropriate JSON files in the `openai/fixtures/test_cases/` directory following the existing patterns. + + +## Structure + +- `__init__.py` - Marks the directory as a Python package +- `conftest.py` - Global pytest configuration and fixtures +- `openai/` - Tests specific to OpenAI-compatible APIs + - `fixtures/` - Test fixtures and utilities + - `fixtures.py` - Provider-specific fixtures + - `load.py` - Utilities for loading test cases + - `test_cases/` - JSON test case definitions + - `test_chat_completion.py` - Tests for chat completion APIs diff --git a/tests/verifications/REPORT.md b/tests/verifications/REPORT.md new file mode 100644 index 000000000..d5715ae21 --- /dev/null +++ b/tests/verifications/REPORT.md @@ -0,0 +1,88 @@ +# Test Results Report + +*Generated on: 2025-04-08 21:14:02* + +*This report was generated by running `python tests/verifications/generate_report.py`* + +## Legend + +- ✅ - Test passed +- ❌ - Test failed +- ⚪ - Test not applicable or not run for this model + + +## Summary + +| Provider | Pass Rate | Tests Passed | Total Tests | +| --- | --- | --- | --- | +| Together | 67.7% | 21 | 31 | +| Fireworks | 90.3% | 28 | 31 | +| Openai | 100.0% | 22 | 22 | + + + +## Together + +*Tests run on: 2025-04-08 16:19:59* + +```bash +pytest tests/verifications/openai/test_chat_completion.py --provider=together -v +``` + +| Test | Llama-3.3-70B-Instruct | Llama-4-Maverick-17B-128E-Instruct | Llama-4-Scout-17B-16E-Instruct | +| --- | --- | --- | --- | +| test_chat_non_streaming_basic (case 0) | ✅ | ✅ | ✅ | +| test_chat_non_streaming_basic (case 1) | ✅ | ✅ | ✅ | +| test_chat_non_streaming_image (case 0) | ⚪ | ✅ | ✅ | +| test_chat_non_streaming_structured_output (case 0) | ✅ | ✅ | ✅ | +| test_chat_non_streaming_structured_output (case 1) | ✅ | ✅ | ✅ | +| test_chat_non_streaming_tool_calling (case 0) | ✅ | ✅ | ✅ | +| test_chat_streaming_basic (case 0) | ✅ | ❌ | ❌ | +| test_chat_streaming_basic (case 1) | ✅ | ❌ | ❌ | +| test_chat_streaming_image (case 0) | ⚪ | ❌ | ❌ | +| test_chat_streaming_structured_output (case 0) | ✅ | ❌ | ❌ | +| test_chat_streaming_structured_output (case 1) | ✅ | ❌ | ❌ | + +## Fireworks + +*Tests run on: 2025-04-08 16:18:28* + +```bash +pytest tests/verifications/openai/test_chat_completion.py --provider=fireworks -v +``` + +| Test | Llama-3.3-70B-Instruct | Llama-4-Maverick-17B-128E-Instruct | Llama-4-Scout-17B-16E-Instruct | +| --- | --- | --- | --- | +| test_chat_non_streaming_basic (case 0) | ✅ | ✅ | ✅ | +| test_chat_non_streaming_basic (case 1) | ✅ | ✅ | ✅ | +| test_chat_non_streaming_image (case 0) | ⚪ | ✅ | ✅ | +| test_chat_non_streaming_structured_output (case 0) | ✅ | ✅ | ✅ | +| test_chat_non_streaming_structured_output (case 1) | ✅ | ✅ | ✅ | +| test_chat_non_streaming_tool_calling (case 0) | ✅ | ❌ | ❌ | +| test_chat_streaming_basic (case 0) | ✅ | ✅ | ✅ | +| test_chat_streaming_basic (case 1) | ✅ | ✅ | ✅ | +| test_chat_streaming_image (case 0) | ⚪ | ✅ | ✅ | +| test_chat_streaming_structured_output (case 0) | ✅ | ✅ | ✅ | +| test_chat_streaming_structured_output (case 1) | ❌ | ✅ | ✅ | + +## Openai + +*Tests run on: 2025-04-08 16:22:02* + +```bash +pytest tests/verifications/openai/test_chat_completion.py --provider=openai -v +``` + +| Test | gpt-4o | gpt-4o-mini | +| --- | --- | --- | +| test_chat_non_streaming_basic (case 0) | ✅ | ✅ | +| test_chat_non_streaming_basic (case 1) | ✅ | ✅ | +| test_chat_non_streaming_image (case 0) | ✅ | ✅ | +| test_chat_non_streaming_structured_output (case 0) | ✅ | ✅ | +| test_chat_non_streaming_structured_output (case 1) | ✅ | ✅ | +| test_chat_non_streaming_tool_calling (case 0) | ✅ | ✅ | +| test_chat_streaming_basic (case 0) | ✅ | ✅ | +| test_chat_streaming_basic (case 1) | ✅ | ✅ | +| test_chat_streaming_image (case 0) | ✅ | ✅ | +| test_chat_streaming_structured_output (case 0) | ✅ | ✅ | +| test_chat_streaming_structured_output (case 1) | ✅ | ✅ | diff --git a/tests/verifications/__init__.py b/tests/verifications/__init__.py new file mode 100644 index 000000000..756f351d8 --- /dev/null +++ b/tests/verifications/__init__.py @@ -0,0 +1,5 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. diff --git a/tests/verifications/conftest.py b/tests/verifications/conftest.py new file mode 100644 index 000000000..08967e834 --- /dev/null +++ b/tests/verifications/conftest.py @@ -0,0 +1,28 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. + + +def pytest_addoption(parser): + parser.addoption( + "--base-url", + action="store", + help="Base URL for OpenAI compatible API", + ) + parser.addoption( + "--api-key", + action="store", + help="API key", + ) + parser.addoption( + "--provider", + action="store", + help="Provider to use for testing", + ) + + +pytest_plugins = [ + "tests.verifications.openai.fixtures.fixtures", +] diff --git a/tests/verifications/generate_report.py b/tests/verifications/generate_report.py new file mode 100755 index 000000000..98a5930da --- /dev/null +++ b/tests/verifications/generate_report.py @@ -0,0 +1,485 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. + +""" +Test Report Generator + +Requirements: + pip install pytest-json-report + +Usage: + # Generate a report using existing test results + python tests/verifications/generate_report.py + + # Run tests and generate a report + python tests/verifications/generate_report.py --run-tests + + # Run tests for specific providers + python tests/verifications/generate_report.py --run-tests --providers fireworks openai + + # Save the report to a custom location + python tests/verifications/generate_report.py --output custom_report.md + + # Clean up old test result files + python tests/verifications/generate_report.py --cleanup +""" + +import argparse +import json +import os +import re +import subprocess +import time +from collections import defaultdict +from pathlib import Path + +# Define the root directory for test results +RESULTS_DIR = Path(__file__).parent / "test_results" +RESULTS_DIR.mkdir(exist_ok=True) + +# Maximum number of test result files to keep per provider +MAX_RESULTS_PER_PROVIDER = 1 + +# Custom order of providers +PROVIDER_ORDER = ["together", "fireworks", "groq", "cerebras", "openai"] + +# Dictionary to store providers and their models (will be populated dynamically) +PROVIDERS = defaultdict(set) + +# Tests will be dynamically extracted from results +ALL_TESTS = set() + + +def run_tests(provider): + """Run pytest for a specific provider and save results""" + print(f"Running tests for provider: {provider}") + + timestamp = int(time.time()) + result_file = RESULTS_DIR / f"{provider}_{timestamp}.json" + temp_json_file = RESULTS_DIR / f"temp_{provider}_{timestamp}.json" + + # Run pytest with JSON output + cmd = [ + "python", + "-m", + "pytest", + "tests/verifications/openai/test_chat_completion.py", + f"--provider={provider}", + "-v", + "--json-report", + f"--json-report-file={temp_json_file}", + ] + + try: + result = subprocess.run(cmd, capture_output=True, text=True) + print(f"Pytest exit code: {result.returncode}") + + # Check if the JSON file was created + if temp_json_file.exists(): + # Read the JSON file and save it to our results format + with open(temp_json_file, "r") as f: + test_results = json.load(f) + + # Save results to our own format with a trailing newline + with open(result_file, "w") as f: + json.dump(test_results, f, indent=2) + f.write("\n") # Add a trailing newline for precommit + + # Clean up temp file + temp_json_file.unlink() + + print(f"Test results saved to {result_file}") + return result_file + else: + print(f"Error: JSON report file not created for {provider}") + print(f"Command stdout: {result.stdout}") + print(f"Command stderr: {result.stderr}") + return None + except Exception as e: + print(f"Error running tests for {provider}: {e}") + return None + + +def parse_results(result_file): + """Parse the test results file and extract pass/fail by model and test""" + if not os.path.exists(result_file): + print(f"Results file does not exist: {result_file}") + return {} + + with open(result_file, "r") as f: + results = json.load(f) + + # Initialize results dictionary + parsed_results = defaultdict(lambda: defaultdict(dict)) + provider = os.path.basename(result_file).split("_")[0] + + # Debug: Print summary of test results + print(f"Test results summary for {provider}:") + print(f"Total tests: {results.get('summary', {}).get('total', 0)}") + print(f"Passed: {results.get('summary', {}).get('passed', 0)}") + print(f"Failed: {results.get('summary', {}).get('failed', 0)}") + print(f"Error: {results.get('summary', {}).get('error', 0)}") + print(f"Skipped: {results.get('summary', {}).get('skipped', 0)}") + + # Extract test results + if "tests" not in results or not results["tests"]: + print(f"No test results found in {result_file}") + return parsed_results + + # Map for normalizing model names + model_name_map = { + "Llama-3.3-8B-Instruct": "Llama-3.3-8B-Instruct", + "Llama-3.3-70B-Instruct": "Llama-3.3-70B-Instruct", + "Llama-3.2-11B-Vision-Instruct": "Llama-3.2-11B-Vision-Instruct", + "Llama-4-Scout-17B-16E": "Llama-4-Scout-17B-16E-Instruct", + "Llama-4-Scout-17B-16E-Instruct": "Llama-4-Scout-17B-16E-Instruct", + "Llama-4-Maverick-17B-128E": "Llama-4-Maverick-17B-128E-Instruct", + "Llama-4-Maverick-17B-128E-Instruct": "Llama-4-Maverick-17B-128E-Instruct", + "gpt-4o": "gpt-4o", + "gpt-4o-mini": "gpt-4o-mini", + } + + # Keep track of all models found for this provider + provider_models = set() + + # Track all unique test cases for each base test + test_case_counts = defaultdict(int) + + # First pass: count the number of cases for each test + for test in results["tests"]: + test_id = test.get("nodeid", "") + + if "call" in test: + test_name = test_id.split("::")[1].split("[")[0] + input_output_match = re.search(r"\[input_output(\d+)-", test_id) + if input_output_match: + test_case_counts[test_name] += 1 + + # Second pass: process the tests with case numbers only for tests with multiple cases + for test in results["tests"]: + test_id = test.get("nodeid", "") + outcome = test.get("outcome", "") + + # Only process tests that have been executed (not setup errors) + if "call" in test: + # Regular test that actually ran + test_name = test_id.split("::")[1].split("[")[0] + + # Extract input_output parameter to differentiate between test cases + input_output_match = re.search(r"\[input_output(\d+)-", test_id) + input_output_index = input_output_match.group(1) if input_output_match else "" + + # Create a more detailed test name with case number only if there are multiple cases + detailed_test_name = test_name + if input_output_index and test_case_counts[test_name] > 1: + detailed_test_name = f"{test_name} (case {input_output_index})" + + # Track all unique test names + ALL_TESTS.add(detailed_test_name) + + # Extract model name from test_id using a more robust pattern + model_match = re.search(r"\[input_output\d+-([^\]]+)\]", test_id) + if model_match: + raw_model = model_match.group(1) + model = model_name_map.get(raw_model, raw_model) + + # Add to set of known models for this provider + provider_models.add(model) + + # Also update the global PROVIDERS dictionary + PROVIDERS[provider].add(model) + + # Store the result + if outcome == "passed": + parsed_results[provider][model][detailed_test_name] = True + else: + parsed_results[provider][model][detailed_test_name] = False + + print(f"Parsed test result: {detailed_test_name} for model {model}: {outcome}") + elif outcome == "error" and "setup" in test and test.get("setup", {}).get("outcome") == "failed": + # This is a setup failure, which likely means a configuration issue + # Extract the base test name and model name + parts = test_id.split("::") + if len(parts) > 1: + test_name = parts[1].split("[")[0] + + # Extract input_output parameter to differentiate between test cases + input_output_match = re.search(r"\[input_output(\d+)-", test_id) + input_output_index = input_output_match.group(1) if input_output_match else "" + + # Create a more detailed test name with case number only if there are multiple cases + detailed_test_name = test_name + if input_output_index and test_case_counts[test_name] > 1: + detailed_test_name = f"{test_name} (case {input_output_index})" + + if detailed_test_name in ALL_TESTS: + # Use a more robust pattern for model extraction + model_match = re.search(r"\[input_output\d+-([^\]]+)\]", test_id) + if model_match: + raw_model = model_match.group(1) + model = model_name_map.get(raw_model, raw_model) + + # Add to set of known models for this provider + provider_models.add(model) + + # Also update the global PROVIDERS dictionary + PROVIDERS[provider].add(model) + + # Mark setup failures as false (failed) + parsed_results[provider][model][detailed_test_name] = False + print(f"Parsed setup failure: {detailed_test_name} for model {model}") + + # Debug: Print parsed results + if not parsed_results[provider]: + print(f"Warning: No test results parsed for provider {provider}") + else: + for model, tests in parsed_results[provider].items(): + print(f"Model {model}: {len(tests)} test results") + + return parsed_results + + +def cleanup_old_results(): + """Clean up old test result files, keeping only the newest N per provider""" + for provider in PROVIDERS.keys(): + # Get all result files for this provider + provider_files = list(RESULTS_DIR.glob(f"{provider}_*.json")) + + # Sort by timestamp (newest first) + provider_files.sort(key=lambda x: int(x.stem.split("_")[1]), reverse=True) + + # Remove old files beyond the max to keep + if len(provider_files) > MAX_RESULTS_PER_PROVIDER: + for old_file in provider_files[MAX_RESULTS_PER_PROVIDER:]: + try: + old_file.unlink() + print(f"Removed old result file: {old_file}") + except Exception as e: + print(f"Error removing file {old_file}: {e}") + + +def get_latest_results_by_provider(): + """Get the latest test result file for each provider""" + provider_results = {} + + # Get all result files + result_files = list(RESULTS_DIR.glob("*.json")) + + # Extract all provider names from filenames + all_providers = set() + for file in result_files: + # File format is provider_timestamp.json + parts = file.stem.split("_") + if len(parts) >= 2: + all_providers.add(parts[0]) + + # Group by provider + for provider in all_providers: + provider_files = [f for f in result_files if f.name.startswith(f"{provider}_")] + + # Sort by timestamp (newest first) + provider_files.sort(key=lambda x: int(x.stem.split("_")[1]), reverse=True) + + if provider_files: + provider_results[provider] = provider_files[0] + + return provider_results + + +def generate_report(results_dict, output_file=None): + """Generate the markdown report""" + if output_file is None: + # Default to creating the report in the same directory as this script + output_file = Path(__file__).parent / "REPORT.md" + else: + output_file = Path(output_file) + + # Get the timestamp from result files + provider_timestamps = {} + provider_results = get_latest_results_by_provider() + for provider, result_file in provider_results.items(): + # Extract timestamp from filename (format: provider_timestamp.json) + try: + timestamp_str = result_file.stem.split("_")[1] + timestamp = int(timestamp_str) + formatted_time = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(timestamp)) + provider_timestamps[provider] = formatted_time + except (IndexError, ValueError): + provider_timestamps[provider] = "Unknown" + + # Convert provider model sets to sorted lists + for provider in PROVIDERS: + PROVIDERS[provider] = sorted(PROVIDERS[provider]) + + # Sort tests alphabetically + sorted_tests = sorted(ALL_TESTS) + + report = ["# Test Results Report\n"] + report.append(f"*Generated on: {time.strftime('%Y-%m-%d %H:%M:%S')}*\n") + report.append("*This report was generated by running `python tests/verifications/generate_report.py`*\n") + + # Icons for pass/fail + pass_icon = "✅" + fail_icon = "❌" + na_icon = "⚪" + + # Add emoji legend + report.append("## Legend\n") + report.append(f"- {pass_icon} - Test passed") + report.append(f"- {fail_icon} - Test failed") + report.append(f"- {na_icon} - Test not applicable or not run for this model") + report.append("\n") + + # Add a summary section + report.append("## Summary\n") + + # Count total tests and passes + total_tests = 0 + passed_tests = 0 + provider_totals = {} + + # Prepare summary data + for provider in PROVIDERS.keys(): + provider_passed = 0 + provider_total = 0 + + if provider in results_dict: + provider_models = PROVIDERS[provider] + for model in provider_models: + if model in results_dict[provider]: + model_results = results_dict[provider][model] + for test in sorted_tests: + if test in model_results: + provider_total += 1 + total_tests += 1 + if model_results[test]: + provider_passed += 1 + passed_tests += 1 + + provider_totals[provider] = (provider_passed, provider_total) + + # Add summary table + report.append("| Provider | Pass Rate | Tests Passed | Total Tests |") + report.append("| --- | --- | --- | --- |") + + # Use the custom order for summary table + for provider in [p for p in PROVIDER_ORDER if p in PROVIDERS]: + passed, total = provider_totals.get(provider, (0, 0)) + pass_rate = f"{(passed / total * 100):.1f}%" if total > 0 else "N/A" + report.append(f"| {provider.capitalize()} | {pass_rate} | {passed} | {total} |") + + # Add providers not in the custom order + for provider in [p for p in PROVIDERS if p not in PROVIDER_ORDER]: + passed, total = provider_totals.get(provider, (0, 0)) + pass_rate = f"{(passed / total * 100):.1f}%" if total > 0 else "N/A" + report.append(f"| {provider.capitalize()} | {pass_rate} | {passed} | {total} |") + + report.append("\n") + + # Process each provider in the custom order, then any additional providers + for provider in sorted( + PROVIDERS.keys(), key=lambda p: (PROVIDER_ORDER.index(p) if p in PROVIDER_ORDER else float("inf"), p) + ): + if not PROVIDERS[provider]: + # Skip providers with no models + continue + + report.append(f"\n## {provider.capitalize()}\n") + + # Add timestamp when test was run + if provider in provider_timestamps: + report.append(f"*Tests run on: {provider_timestamps[provider]}*\n") + + # Add test command for reproducing results + test_cmd = f"pytest tests/verifications/openai/test_chat_completion.py --provider={provider} -v" + report.append(f"```bash\n{test_cmd}\n```\n") + + # Get the relevant models for this provider + provider_models = PROVIDERS[provider] + + # Create table header with models as columns + header = "| Test | " + " | ".join(provider_models) + " |" + separator = "| --- | " + " | ".join(["---"] * len(provider_models)) + " |" + + report.append(header) + report.append(separator) + + # Get results for this provider + provider_results = results_dict.get(provider, {}) + + # Add rows for each test + for test in sorted_tests: + row = f"| {test} |" + + # Add results for each model in this test + for model in provider_models: + if model in provider_results and test in provider_results[model]: + result = pass_icon if provider_results[model][test] else fail_icon + else: + result = na_icon + row += f" {result} |" + + report.append(row) + + # Write to file + with open(output_file, "w") as f: + f.write("\n".join(report)) + f.write("\n") + + print(f"Report generated: {output_file}") + + +def main(): + parser = argparse.ArgumentParser(description="Generate test report") + parser.add_argument("--run-tests", action="store_true", help="Run tests before generating report") + parser.add_argument( + "--providers", + type=str, + nargs="+", + help="Specify providers to test (comma-separated or space-separated, default: all)", + ) + parser.add_argument("--output", type=str, help="Output file location (default: tests/verifications/REPORT.md)") + args = parser.parse_args() + + all_results = {} + + if args.run_tests: + # Get list of available providers from command line or use detected providers + if args.providers: + # Handle both comma-separated and space-separated lists + test_providers = [] + for provider_arg in args.providers: + # Split by comma if commas are present + if "," in provider_arg: + test_providers.extend(provider_arg.split(",")) + else: + test_providers.append(provider_arg) + else: + # Default providers to test + test_providers = PROVIDER_ORDER + + for provider in test_providers: + provider = provider.strip() # Remove any whitespace + result_file = run_tests(provider) + if result_file: + provider_results = parse_results(result_file) + all_results.update(provider_results) + else: + # Use existing results + provider_result_files = get_latest_results_by_provider() + + for result_file in provider_result_files.values(): + provider_results = parse_results(result_file) + all_results.update(provider_results) + + # Generate the report + generate_report(all_results, args.output) + + cleanup_old_results() + + +if __name__ == "__main__": + main() diff --git a/tests/verifications/openai/__init__.py b/tests/verifications/openai/__init__.py new file mode 100644 index 000000000..756f351d8 --- /dev/null +++ b/tests/verifications/openai/__init__.py @@ -0,0 +1,5 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. diff --git a/tests/verifications/openai/fixtures/__init__.py b/tests/verifications/openai/fixtures/__init__.py new file mode 100644 index 000000000..756f351d8 --- /dev/null +++ b/tests/verifications/openai/fixtures/__init__.py @@ -0,0 +1,5 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. diff --git a/tests/verifications/openai/fixtures/fixtures.py b/tests/verifications/openai/fixtures/fixtures.py new file mode 100644 index 000000000..b86de3662 --- /dev/null +++ b/tests/verifications/openai/fixtures/fixtures.py @@ -0,0 +1,97 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. + +import os + +import pytest +from openai import OpenAI + + +@pytest.fixture +def providers_model_mapping(): + """ + Mapping from model names used in test cases to provider's model names. + """ + return { + "fireworks": { + "Llama-3.3-70B-Instruct": "accounts/fireworks/models/llama-v3p1-70b-instruct", + "Llama-3.2-11B-Vision-Instruct": "accounts/fireworks/models/llama-v3p2-11b-vision-instruct", + "Llama-4-Scout-17B-16E-Instruct": "accounts/fireworks/models/llama4-scout-instruct-basic", + "Llama-4-Maverick-17B-128E-Instruct": "accounts/fireworks/models/llama4-maverick-instruct-basic", + }, + "together": { + "Llama-3.3-70B-Instruct": "meta-llama/Llama-3.3-70B-Instruct-Turbo", + "Llama-3.2-11B-Vision-Instruct": "meta-llama/Llama-3.2-11B-Vision-Instruct-Turbo", + "Llama-4-Scout-17B-16E-Instruct": "meta-llama/Llama-4-Scout-17B-16E-Instruct", + "Llama-4-Maverick-17B-128E-Instruct": "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8", + }, + "groq": { + "Llama-3.3-70B-Instruct": "llama-3.3-70b-versatile", + "Llama-3.2-11B-Vision-Instruct": "llama-3.2-11b-vision-preview", + "Llama-4-Scout-17B-16E-Instruct": "llama-4-scout-17b-16e-instruct", + "Llama-4-Maverick-17B-128E-Instruct": "llama-4-maverick-17b-128e-instruct", + }, + "cerebras": { + "Llama-3.3-70B-Instruct": "llama-3.3-70b", + }, + "openai": { + "gpt-4o": "gpt-4o", + "gpt-4o-mini": "gpt-4o-mini", + }, + } + + +@pytest.fixture +def provider_metadata(): + return { + "fireworks": ("https://api.fireworks.ai/inference/v1", "FIREWORKS_API_KEY"), + "together": ("https://api.together.xyz/v1", "TOGETHER_API_KEY"), + "groq": ("https://api.groq.com/openai/v1", "GROQ_API_KEY"), + "cerebras": ("https://api.cerebras.ai/v1", "CEREBRAS_API_KEY"), + "openai": ("https://api.openai.com/v1", "OPENAI_API_KEY"), + } + + +@pytest.fixture +def provider(request, provider_metadata): + provider = request.config.getoption("--provider") + base_url = request.config.getoption("--base-url") + + if provider and base_url and provider_metadata[provider][0] != base_url: + raise ValueError(f"Provider {provider} is not supported for base URL {base_url}") + + if not provider: + if not base_url: + raise ValueError("Provider and base URL are not provided") + for provider, metadata in provider_metadata.items(): + if metadata[0] == base_url: + provider = provider + break + + return provider + + +@pytest.fixture +def base_url(request, provider, provider_metadata): + return request.config.getoption("--base-url") or provider_metadata[provider][0] + + +@pytest.fixture +def api_key(request, provider, provider_metadata): + return request.config.getoption("--api-key") or os.getenv(provider_metadata[provider][1]) + + +@pytest.fixture +def model_mapping(provider, providers_model_mapping): + return providers_model_mapping[provider] + + +@pytest.fixture +def openai_client(base_url, api_key): + return OpenAI( + base_url=base_url, + api_key=api_key, + ) diff --git a/tests/verifications/openai/fixtures/load.py b/tests/verifications/openai/fixtures/load.py new file mode 100644 index 000000000..98580b2a1 --- /dev/null +++ b/tests/verifications/openai/fixtures/load.py @@ -0,0 +1,16 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. + +from pathlib import Path + +import yaml + + +def load_test_cases(name: str): + fixture_dir = Path(__file__).parent / "test_cases" + yaml_path = fixture_dir / f"{name}.yaml" + with open(yaml_path, "r") as f: + return yaml.safe_load(f) diff --git a/tests/verifications/openai/fixtures/test_cases/chat_completion.yaml b/tests/verifications/openai/fixtures/test_cases/chat_completion.yaml new file mode 100644 index 000000000..2c302a704 --- /dev/null +++ b/tests/verifications/openai/fixtures/test_cases/chat_completion.yaml @@ -0,0 +1,162 @@ +test_chat_basic: + test_name: test_chat_basic + test_params: + input_output: + - input: + messages: + - content: Which planet do humans live on? + role: user + output: Earth + - input: + messages: + - content: Which planet has rings around it with a name starting with letter + S? + role: user + output: Saturn + model: + - Llama-3.3-8B-Instruct + - Llama-3.3-70B-Instruct + - Llama-4-Scout-17B-16E + - Llama-4-Scout-17B-16E-Instruct + - Llama-4-Maverick-17B-128E + - Llama-4-Maverick-17B-128E-Instruct + - gpt-4o + - gpt-4o-mini +test_chat_image: + test_name: test_chat_image + test_params: + input_output: + - input: + messages: + - content: + - text: What is in this image? + type: text + - image_url: + url: https://upload.wikimedia.org/wikipedia/commons/f/f7/Llamas%2C_Vernagt-Stausee%2C_Italy.jpg + type: image_url + role: user + output: llama + model: + - Llama-4-Scout-17B-16E + - Llama-4-Scout-17B-16E-Instruct + - Llama-4-Maverick-17B-128E + - Llama-4-Maverick-17B-128E-Instruct + - gpt-4o + - gpt-4o-mini +test_chat_structured_output: + test_name: test_chat_structured_output + test_params: + input_output: + - input: + messages: + - content: Extract the event information. + role: system + - content: Alice and Bob are going to a science fair on Friday. + role: user + response_format: + json_schema: + name: calendar_event + schema: + properties: + date: + title: Date + type: string + name: + title: Name + type: string + participants: + items: + type: string + title: Participants + type: array + required: + - name + - date + - participants + title: CalendarEvent + type: object + type: json_schema + output: valid_calendar_event + - input: + messages: + - content: You are a helpful math tutor. Guide the user through the solution + step by step. + role: system + - content: how can I solve 8x + 7 = -23 + role: user + response_format: + json_schema: + name: math_reasoning + schema: + $defs: + Step: + properties: + explanation: + title: Explanation + type: string + output: + title: Output + type: string + required: + - explanation + - output + title: Step + type: object + properties: + final_answer: + title: Final Answer + type: string + steps: + items: + $ref: '#/$defs/Step' + title: Steps + type: array + required: + - steps + - final_answer + title: MathReasoning + type: object + type: json_schema + output: valid_math_reasoning + model: + - Llama-3.3-8B-Instruct + - Llama-3.3-70B-Instruct + - Llama-4-Scout-17B-16E + - Llama-4-Scout-17B-16E-Instruct + - Llama-4-Maverick-17B-128E + - Llama-4-Maverick-17B-128E-Instruct + - gpt-4o + - gpt-4o-mini +test_tool_calling: + test_name: test_tool_calling + test_params: + input_output: + - input: + messages: + - content: You are a helpful assistant that can use tools to get information. + role: system + - content: What's the weather like in San Francisco? + role: user + tools: + - function: + description: Get current temperature for a given location. + name: get_weather + parameters: + additionalProperties: false + properties: + location: + description: "City and country e.g. Bogot\xE1, Colombia" + type: string + required: + - location + type: object + type: function + output: get_weather_tool_call + model: + - Llama-3.3-70B-Instruct + - Llama-4-Scout-17B-16E + - Llama-4-Scout-17B-16E-Instruct + - Llama-4-Maverick-17B-128E + - Llama-4-Maverick-17B-128E-Instruct + - gpt-4o + - gpt-4o-mini diff --git a/tests/verifications/openai/test_chat_completion.py b/tests/verifications/openai/test_chat_completion.py new file mode 100644 index 000000000..c6a10de7b --- /dev/null +++ b/tests/verifications/openai/test_chat_completion.py @@ -0,0 +1,202 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. + +from typing import Any + +import pytest +from pydantic import BaseModel + +from tests.verifications.openai.fixtures.load import load_test_cases + +chat_completion_test_cases = load_test_cases("chat_completion") + + +@pytest.fixture +def correct_model_name(model, provider, providers_model_mapping): + """Return the provider-specific model name based on the generic model name.""" + mapping = providers_model_mapping[provider] + if model not in mapping: + pytest.skip(f"Provider {provider} does not support model {model}") + return mapping[model] + + +@pytest.mark.parametrize("model", chat_completion_test_cases["test_chat_basic"]["test_params"]["model"]) +@pytest.mark.parametrize( + "input_output", + chat_completion_test_cases["test_chat_basic"]["test_params"]["input_output"], +) +def test_chat_non_streaming_basic(openai_client, input_output, correct_model_name): + response = openai_client.chat.completions.create( + model=correct_model_name, + messages=input_output["input"]["messages"], + stream=False, + ) + assert response.choices[0].message.role == "assistant" + assert input_output["output"].lower() in response.choices[0].message.content.lower() + + +@pytest.mark.parametrize("model", chat_completion_test_cases["test_chat_basic"]["test_params"]["model"]) +@pytest.mark.parametrize( + "input_output", + chat_completion_test_cases["test_chat_basic"]["test_params"]["input_output"], +) +def test_chat_streaming_basic(openai_client, input_output, correct_model_name): + response = openai_client.chat.completions.create( + model=correct_model_name, + messages=input_output["input"]["messages"], + stream=True, + ) + content = "" + for chunk in response: + content += chunk.choices[0].delta.content or "" + + # TODO: add detailed type validation + + assert input_output["output"].lower() in content.lower() + + +@pytest.mark.parametrize("model", chat_completion_test_cases["test_chat_image"]["test_params"]["model"]) +@pytest.mark.parametrize( + "input_output", + chat_completion_test_cases["test_chat_image"]["test_params"]["input_output"], +) +def test_chat_non_streaming_image(openai_client, input_output, correct_model_name): + response = openai_client.chat.completions.create( + model=correct_model_name, + messages=input_output["input"]["messages"], + stream=False, + ) + assert response.choices[0].message.role == "assistant" + assert input_output["output"].lower() in response.choices[0].message.content.lower() + + +@pytest.mark.parametrize("model", chat_completion_test_cases["test_chat_image"]["test_params"]["model"]) +@pytest.mark.parametrize( + "input_output", + chat_completion_test_cases["test_chat_image"]["test_params"]["input_output"], +) +def test_chat_streaming_image(openai_client, input_output, correct_model_name): + response = openai_client.chat.completions.create( + model=correct_model_name, + messages=input_output["input"]["messages"], + stream=True, + ) + content = "" + for chunk in response: + content += chunk.choices[0].delta.content or "" + + # TODO: add detailed type validation + + assert input_output["output"].lower() in content.lower() + + +@pytest.mark.parametrize( + "model", + chat_completion_test_cases["test_chat_structured_output"]["test_params"]["model"], +) +@pytest.mark.parametrize( + "input_output", + chat_completion_test_cases["test_chat_structured_output"]["test_params"]["input_output"], +) +def test_chat_non_streaming_structured_output(openai_client, input_output, correct_model_name): + response = openai_client.chat.completions.create( + model=correct_model_name, + messages=input_output["input"]["messages"], + response_format=input_output["input"]["response_format"], + stream=False, + ) + + assert response.choices[0].message.role == "assistant" + maybe_json_content = response.choices[0].message.content + + validate_structured_output(maybe_json_content, input_output["output"]) + + +@pytest.mark.parametrize( + "model", + chat_completion_test_cases["test_chat_structured_output"]["test_params"]["model"], +) +@pytest.mark.parametrize( + "input_output", + chat_completion_test_cases["test_chat_structured_output"]["test_params"]["input_output"], +) +def test_chat_streaming_structured_output(openai_client, input_output, correct_model_name): + response = openai_client.chat.completions.create( + model=correct_model_name, + messages=input_output["input"]["messages"], + response_format=input_output["input"]["response_format"], + stream=True, + ) + maybe_json_content = "" + for chunk in response: + maybe_json_content += chunk.choices[0].delta.content or "" + validate_structured_output(maybe_json_content, input_output["output"]) + + +@pytest.mark.parametrize( + "model", + chat_completion_test_cases["test_tool_calling"]["test_params"]["model"], +) +@pytest.mark.parametrize( + "input_output", + chat_completion_test_cases["test_tool_calling"]["test_params"]["input_output"], +) +def test_chat_non_streaming_tool_calling(openai_client, input_output, correct_model_name): + response = openai_client.chat.completions.create( + model=correct_model_name, + messages=input_output["input"]["messages"], + tools=input_output["input"]["tools"], + stream=False, + ) + + assert response.choices[0].message.role == "assistant" + assert len(response.choices[0].message.tool_calls) > 0 + assert input_output["output"] == "get_weather_tool_call" + assert response.choices[0].message.tool_calls[0].function.name == "get_weather" + # TODO: add detailed type validation + + +def get_structured_output(maybe_json_content: str, schema_name: str) -> Any | None: + if schema_name == "valid_calendar_event": + + class CalendarEvent(BaseModel): + name: str + date: str + participants: list[str] + + try: + calendar_event = CalendarEvent.model_validate_json(maybe_json_content) + return calendar_event + except Exception: + return None + elif schema_name == "valid_math_reasoning": + + class Step(BaseModel): + explanation: str + output: str + + class MathReasoning(BaseModel): + steps: list[Step] + final_answer: str + + try: + math_reasoning = MathReasoning.model_validate_json(maybe_json_content) + return math_reasoning + except Exception: + return None + + return None + + +def validate_structured_output(maybe_json_content: str, schema_name: str) -> None: + structured_output = get_structured_output(maybe_json_content, schema_name) + assert structured_output is not None + if schema_name == "valid_calendar_event": + assert structured_output.name is not None + assert structured_output.date is not None + assert len(structured_output.participants) == 2 + elif schema_name == "valid_math_reasoning": + assert len(structured_output.final_answer) > 0 diff --git a/tests/verifications/test_results/fireworks_1744154308.json b/tests/verifications/test_results/fireworks_1744154308.json new file mode 100644 index 000000000..691f6e474 --- /dev/null +++ b/tests/verifications/test_results/fireworks_1744154308.json @@ -0,0 +1,2744 @@ +{ + "created": 1744154399.039055, + "duration": 87.73799800872803, + "exitcode": 1, + "root": "/Users/erichuang/projects/llama-stack", + "environment": {}, + "summary": { + "skipped": 52, + "passed": 28, + "failed": 3, + "total": 83, + "collected": 83 + }, + "collectors": [ + { + "nodeid": "", + "outcome": "passed", + "result": [ + { + "nodeid": "tests/verifications/openai/test_chat_completion.py", + "type": "Module" + } + ] + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py", + "outcome": "passed", + "result": [ + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_basic[input_output0-Llama-3.3-8B-Instruct]", + "type": "Function", + "lineno": 25 + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_basic[input_output0-Llama-3.3-70B-Instruct]", + "type": "Function", + "lineno": 25 + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_basic[input_output0-Llama-4-Scout-17B-16E]", + "type": "Function", + "lineno": 25 + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_basic[input_output0-Llama-4-Scout-17B-16E-Instruct]", + "type": "Function", + "lineno": 25 + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_basic[input_output0-Llama-4-Maverick-17B-128E]", + "type": "Function", + "lineno": 25 + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_basic[input_output0-Llama-4-Maverick-17B-128E-Instruct]", + "type": "Function", + "lineno": 25 + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_basic[input_output0-gpt-4o]", + "type": "Function", + "lineno": 25 + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_basic[input_output0-gpt-4o-mini]", + "type": "Function", + "lineno": 25 + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_basic[input_output1-Llama-3.3-8B-Instruct]", + "type": "Function", + "lineno": 25 + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_basic[input_output1-Llama-3.3-70B-Instruct]", + "type": "Function", + "lineno": 25 + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_basic[input_output1-Llama-4-Scout-17B-16E]", + "type": "Function", + "lineno": 25 + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_basic[input_output1-Llama-4-Scout-17B-16E-Instruct]", + "type": "Function", + "lineno": 25 + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_basic[input_output1-Llama-4-Maverick-17B-128E]", + "type": "Function", + "lineno": 25 + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_basic[input_output1-Llama-4-Maverick-17B-128E-Instruct]", + "type": "Function", + "lineno": 25 + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_basic[input_output1-gpt-4o]", + "type": "Function", + "lineno": 25 + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_basic[input_output1-gpt-4o-mini]", + "type": "Function", + "lineno": 25 + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_basic[input_output0-Llama-3.3-8B-Instruct]", + "type": "Function", + "lineno": 40 + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_basic[input_output0-Llama-3.3-70B-Instruct]", + "type": "Function", + "lineno": 40 + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_basic[input_output0-Llama-4-Scout-17B-16E]", + "type": "Function", + "lineno": 40 + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_basic[input_output0-Llama-4-Scout-17B-16E-Instruct]", + "type": "Function", + "lineno": 40 + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_basic[input_output0-Llama-4-Maverick-17B-128E]", + "type": "Function", + "lineno": 40 + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_basic[input_output0-Llama-4-Maverick-17B-128E-Instruct]", + "type": "Function", + "lineno": 40 + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_basic[input_output0-gpt-4o]", + "type": "Function", + "lineno": 40 + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_basic[input_output0-gpt-4o-mini]", + "type": "Function", + "lineno": 40 + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_basic[input_output1-Llama-3.3-8B-Instruct]", + "type": "Function", + "lineno": 40 + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_basic[input_output1-Llama-3.3-70B-Instruct]", + "type": "Function", + "lineno": 40 + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_basic[input_output1-Llama-4-Scout-17B-16E]", + "type": "Function", + "lineno": 40 + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_basic[input_output1-Llama-4-Scout-17B-16E-Instruct]", + "type": "Function", + "lineno": 40 + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_basic[input_output1-Llama-4-Maverick-17B-128E]", + "type": "Function", + "lineno": 40 + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_basic[input_output1-Llama-4-Maverick-17B-128E-Instruct]", + "type": "Function", + "lineno": 40 + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_basic[input_output1-gpt-4o]", + "type": "Function", + "lineno": 40 + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_basic[input_output1-gpt-4o-mini]", + "type": "Function", + "lineno": 40 + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_image[input_output0-Llama-4-Scout-17B-16E]", + "type": "Function", + "lineno": 60 + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_image[input_output0-Llama-4-Scout-17B-16E-Instruct]", + "type": "Function", + "lineno": 60 + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_image[input_output0-Llama-4-Maverick-17B-128E]", + "type": "Function", + "lineno": 60 + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_image[input_output0-Llama-4-Maverick-17B-128E-Instruct]", + "type": "Function", + "lineno": 60 + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_image[input_output0-gpt-4o]", + "type": "Function", + "lineno": 60 + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_image[input_output0-gpt-4o-mini]", + "type": "Function", + "lineno": 60 + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_image[input_output0-Llama-4-Scout-17B-16E]", + "type": "Function", + "lineno": 75 + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_image[input_output0-Llama-4-Scout-17B-16E-Instruct]", + "type": "Function", + "lineno": 75 + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_image[input_output0-Llama-4-Maverick-17B-128E]", + "type": "Function", + "lineno": 75 + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_image[input_output0-Llama-4-Maverick-17B-128E-Instruct]", + "type": "Function", + "lineno": 75 + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_image[input_output0-gpt-4o]", + "type": "Function", + "lineno": 75 + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_image[input_output0-gpt-4o-mini]", + "type": "Function", + "lineno": 75 + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_structured_output[input_output0-Llama-3.3-8B-Instruct]", + "type": "Function", + "lineno": 95 + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_structured_output[input_output0-Llama-3.3-70B-Instruct]", + "type": "Function", + "lineno": 95 + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_structured_output[input_output0-Llama-4-Scout-17B-16E]", + "type": "Function", + "lineno": 95 + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_structured_output[input_output0-Llama-4-Scout-17B-16E-Instruct]", + "type": "Function", + "lineno": 95 + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_structured_output[input_output0-Llama-4-Maverick-17B-128E]", + "type": "Function", + "lineno": 95 + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_structured_output[input_output0-Llama-4-Maverick-17B-128E-Instruct]", + "type": "Function", + "lineno": 95 + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_structured_output[input_output0-gpt-4o]", + "type": "Function", + "lineno": 95 + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_structured_output[input_output0-gpt-4o-mini]", + "type": "Function", + "lineno": 95 + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_structured_output[input_output1-Llama-3.3-8B-Instruct]", + "type": "Function", + "lineno": 95 + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_structured_output[input_output1-Llama-3.3-70B-Instruct]", + "type": "Function", + "lineno": 95 + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_structured_output[input_output1-Llama-4-Scout-17B-16E]", + "type": "Function", + "lineno": 95 + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_structured_output[input_output1-Llama-4-Scout-17B-16E-Instruct]", + "type": "Function", + "lineno": 95 + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_structured_output[input_output1-Llama-4-Maverick-17B-128E]", + "type": "Function", + "lineno": 95 + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_structured_output[input_output1-Llama-4-Maverick-17B-128E-Instruct]", + "type": "Function", + "lineno": 95 + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_structured_output[input_output1-gpt-4o]", + "type": "Function", + "lineno": 95 + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_structured_output[input_output1-gpt-4o-mini]", + "type": "Function", + "lineno": 95 + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_structured_output[input_output0-Llama-3.3-8B-Instruct]", + "type": "Function", + "lineno": 117 + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_structured_output[input_output0-Llama-3.3-70B-Instruct]", + "type": "Function", + "lineno": 117 + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_structured_output[input_output0-Llama-4-Scout-17B-16E]", + "type": "Function", + "lineno": 117 + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_structured_output[input_output0-Llama-4-Scout-17B-16E-Instruct]", + "type": "Function", + "lineno": 117 + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_structured_output[input_output0-Llama-4-Maverick-17B-128E]", + "type": "Function", + "lineno": 117 + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_structured_output[input_output0-Llama-4-Maverick-17B-128E-Instruct]", + "type": "Function", + "lineno": 117 + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_structured_output[input_output0-gpt-4o]", + "type": "Function", + "lineno": 117 + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_structured_output[input_output0-gpt-4o-mini]", + "type": "Function", + "lineno": 117 + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_structured_output[input_output1-Llama-3.3-8B-Instruct]", + "type": "Function", + "lineno": 117 + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_structured_output[input_output1-Llama-3.3-70B-Instruct]", + "type": "Function", + "lineno": 117 + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_structured_output[input_output1-Llama-4-Scout-17B-16E]", + "type": "Function", + "lineno": 117 + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_structured_output[input_output1-Llama-4-Scout-17B-16E-Instruct]", + "type": "Function", + "lineno": 117 + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_structured_output[input_output1-Llama-4-Maverick-17B-128E]", + "type": "Function", + "lineno": 117 + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_structured_output[input_output1-Llama-4-Maverick-17B-128E-Instruct]", + "type": "Function", + "lineno": 117 + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_structured_output[input_output1-gpt-4o]", + "type": "Function", + "lineno": 117 + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_structured_output[input_output1-gpt-4o-mini]", + "type": "Function", + "lineno": 117 + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_tool_calling[input_output0-Llama-3.3-70B-Instruct]", + "type": "Function", + "lineno": 138 + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_tool_calling[input_output0-Llama-4-Scout-17B-16E]", + "type": "Function", + "lineno": 138 + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_tool_calling[input_output0-Llama-4-Scout-17B-16E-Instruct]", + "type": "Function", + "lineno": 138 + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_tool_calling[input_output0-Llama-4-Maverick-17B-128E]", + "type": "Function", + "lineno": 138 + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_tool_calling[input_output0-Llama-4-Maverick-17B-128E-Instruct]", + "type": "Function", + "lineno": 138 + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_tool_calling[input_output0-gpt-4o]", + "type": "Function", + "lineno": 138 + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_tool_calling[input_output0-gpt-4o-mini]", + "type": "Function", + "lineno": 138 + } + ] + } + ], + "tests": [ + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_basic[input_output0-Llama-3.3-8B-Instruct]", + "lineno": 25, + "outcome": "skipped", + "keywords": [ + "test_chat_non_streaming_basic[input_output0-Llama-3.3-8B-Instruct]", + "parametrize", + "pytestmark", + "input_output0-Llama-3.3-8B-Instruct", + "test_chat_completion.py", + "openai", + "verifications", + "tests", + "llama-stack", + "" + ], + "setup": { + "duration": 0.17320987500716, + "outcome": "skipped", + "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 26, 'Skipped: Provider fireworks does not support model Llama-3.3-8B-Instruct')" + }, + "teardown": { + "duration": 0.000177707988768816, + "outcome": "passed" + } + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_basic[input_output0-Llama-3.3-70B-Instruct]", + "lineno": 25, + "outcome": "passed", + "keywords": [ + "test_chat_non_streaming_basic[input_output0-Llama-3.3-70B-Instruct]", + "parametrize", + "pytestmark", + "input_output0-Llama-3.3-70B-Instruct", + "test_chat_completion.py", + "openai", + "verifications", + "tests", + "llama-stack", + "" + ], + "setup": { + "duration": 0.009193749981932342, + "outcome": "passed" + }, + "call": { + "duration": 1.1473859580000862, + "outcome": "passed" + }, + "teardown": { + "duration": 0.00043337501119822264, + "outcome": "passed" + } + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_basic[input_output0-Llama-4-Scout-17B-16E]", + "lineno": 25, + "outcome": "skipped", + "keywords": [ + "test_chat_non_streaming_basic[input_output0-Llama-4-Scout-17B-16E]", + "parametrize", + "pytestmark", + "input_output0-Llama-4-Scout-17B-16E", + "test_chat_completion.py", + "openai", + "verifications", + "tests", + "llama-stack", + "" + ], + "setup": { + "duration": 0.01645291701424867, + "outcome": "skipped", + "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 26, 'Skipped: Provider fireworks does not support model Llama-4-Scout-17B-16E')" + }, + "teardown": { + "duration": 0.0002898749662563205, + "outcome": "passed" + } + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_basic[input_output0-Llama-4-Scout-17B-16E-Instruct]", + "lineno": 25, + "outcome": "passed", + "keywords": [ + "test_chat_non_streaming_basic[input_output0-Llama-4-Scout-17B-16E-Instruct]", + "parametrize", + "pytestmark", + "input_output0-Llama-4-Scout-17B-16E-Instruct", + "test_chat_completion.py", + "openai", + "verifications", + "tests", + "llama-stack", + "" + ], + "setup": { + "duration": 0.01562033302616328, + "outcome": "passed" + }, + "call": { + "duration": 0.8782661251025274, + "outcome": "passed" + }, + "teardown": { + "duration": 0.0002795408945530653, + "outcome": "passed" + } + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_basic[input_output0-Llama-4-Maverick-17B-128E]", + "lineno": 25, + "outcome": "skipped", + "keywords": [ + "test_chat_non_streaming_basic[input_output0-Llama-4-Maverick-17B-128E]", + "parametrize", + "pytestmark", + "input_output0-Llama-4-Maverick-17B-128E", + "test_chat_completion.py", + "openai", + "verifications", + "tests", + "llama-stack", + "" + ], + "setup": { + "duration": 0.008571124984882772, + "outcome": "skipped", + "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 26, 'Skipped: Provider fireworks does not support model Llama-4-Maverick-17B-128E')" + }, + "teardown": { + "duration": 0.0003043749602511525, + "outcome": "passed" + } + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_basic[input_output0-Llama-4-Maverick-17B-128E-Instruct]", + "lineno": 25, + "outcome": "passed", + "keywords": [ + "test_chat_non_streaming_basic[input_output0-Llama-4-Maverick-17B-128E-Instruct]", + "parametrize", + "pytestmark", + "input_output0-Llama-4-Maverick-17B-128E-Instruct", + "test_chat_completion.py", + "openai", + "verifications", + "tests", + "llama-stack", + "" + ], + "setup": { + "duration": 0.00842841702979058, + "outcome": "passed" + }, + "call": { + "duration": 1.3863223339430988, + "outcome": "passed" + }, + "teardown": { + "duration": 0.0009970410028472543, + "outcome": "passed" + } + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_basic[input_output0-gpt-4o]", + "lineno": 25, + "outcome": "skipped", + "keywords": [ + "test_chat_non_streaming_basic[input_output0-gpt-4o]", + "parametrize", + "pytestmark", + "input_output0-gpt-4o", + "test_chat_completion.py", + "openai", + "verifications", + "tests", + "llama-stack", + "" + ], + "setup": { + "duration": 0.007089875056408346, + "outcome": "skipped", + "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 26, 'Skipped: Provider fireworks does not support model gpt-4o')" + }, + "teardown": { + "duration": 0.00017958390526473522, + "outcome": "passed" + } + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_basic[input_output0-gpt-4o-mini]", + "lineno": 25, + "outcome": "skipped", + "keywords": [ + "test_chat_non_streaming_basic[input_output0-gpt-4o-mini]", + "parametrize", + "pytestmark", + "input_output0-gpt-4o-mini", + "test_chat_completion.py", + "openai", + "verifications", + "tests", + "llama-stack", + "" + ], + "setup": { + "duration": 0.005809499998576939, + "outcome": "skipped", + "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 26, 'Skipped: Provider fireworks does not support model gpt-4o-mini')" + }, + "teardown": { + "duration": 0.00016495899762958288, + "outcome": "passed" + } + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_basic[input_output1-Llama-3.3-8B-Instruct]", + "lineno": 25, + "outcome": "skipped", + "keywords": [ + "test_chat_non_streaming_basic[input_output1-Llama-3.3-8B-Instruct]", + "parametrize", + "pytestmark", + "input_output1-Llama-3.3-8B-Instruct", + "test_chat_completion.py", + "openai", + "verifications", + "tests", + "llama-stack", + "" + ], + "setup": { + "duration": 0.0119722920935601, + "outcome": "skipped", + "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 26, 'Skipped: Provider fireworks does not support model Llama-3.3-8B-Instruct')" + }, + "teardown": { + "duration": 0.00016962504014372826, + "outcome": "passed" + } + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_basic[input_output1-Llama-3.3-70B-Instruct]", + "lineno": 25, + "outcome": "passed", + "keywords": [ + "test_chat_non_streaming_basic[input_output1-Llama-3.3-70B-Instruct]", + "parametrize", + "pytestmark", + "input_output1-Llama-3.3-70B-Instruct", + "test_chat_completion.py", + "openai", + "verifications", + "tests", + "llama-stack", + "" + ], + "setup": { + "duration": 0.005716291954740882, + "outcome": "passed" + }, + "call": { + "duration": 0.6822018750244752, + "outcome": "passed" + }, + "teardown": { + "duration": 0.0005292498972266912, + "outcome": "passed" + } + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_basic[input_output1-Llama-4-Scout-17B-16E]", + "lineno": 25, + "outcome": "skipped", + "keywords": [ + "test_chat_non_streaming_basic[input_output1-Llama-4-Scout-17B-16E]", + "parametrize", + "pytestmark", + "input_output1-Llama-4-Scout-17B-16E", + "test_chat_completion.py", + "openai", + "verifications", + "tests", + "llama-stack", + "" + ], + "setup": { + "duration": 0.025827708072029054, + "outcome": "skipped", + "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 26, 'Skipped: Provider fireworks does not support model Llama-4-Scout-17B-16E')" + }, + "teardown": { + "duration": 0.000295999925583601, + "outcome": "passed" + } + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_basic[input_output1-Llama-4-Scout-17B-16E-Instruct]", + "lineno": 25, + "outcome": "passed", + "keywords": [ + "test_chat_non_streaming_basic[input_output1-Llama-4-Scout-17B-16E-Instruct]", + "parametrize", + "pytestmark", + "input_output1-Llama-4-Scout-17B-16E-Instruct", + "test_chat_completion.py", + "openai", + "verifications", + "tests", + "llama-stack", + "" + ], + "setup": { + "duration": 0.010980832972563803, + "outcome": "passed" + }, + "call": { + "duration": 0.7537062909686938, + "outcome": "passed" + }, + "teardown": { + "duration": 0.0008091670460999012, + "outcome": "passed" + } + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_basic[input_output1-Llama-4-Maverick-17B-128E]", + "lineno": 25, + "outcome": "skipped", + "keywords": [ + "test_chat_non_streaming_basic[input_output1-Llama-4-Maverick-17B-128E]", + "parametrize", + "pytestmark", + "input_output1-Llama-4-Maverick-17B-128E", + "test_chat_completion.py", + "openai", + "verifications", + "tests", + "llama-stack", + "" + ], + "setup": { + "duration": 0.006567832897417247, + "outcome": "skipped", + "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 26, 'Skipped: Provider fireworks does not support model Llama-4-Maverick-17B-128E')" + }, + "teardown": { + "duration": 0.0001545000122860074, + "outcome": "passed" + } + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_basic[input_output1-Llama-4-Maverick-17B-128E-Instruct]", + "lineno": 25, + "outcome": "passed", + "keywords": [ + "test_chat_non_streaming_basic[input_output1-Llama-4-Maverick-17B-128E-Instruct]", + "parametrize", + "pytestmark", + "input_output1-Llama-4-Maverick-17B-128E-Instruct", + "test_chat_completion.py", + "openai", + "verifications", + "tests", + "llama-stack", + "" + ], + "setup": { + "duration": 0.005985083989799023, + "outcome": "passed" + }, + "call": { + "duration": 0.7263387079583481, + "outcome": "passed" + }, + "teardown": { + "duration": 0.0006324589485302567, + "outcome": "passed" + } + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_basic[input_output1-gpt-4o]", + "lineno": 25, + "outcome": "skipped", + "keywords": [ + "test_chat_non_streaming_basic[input_output1-gpt-4o]", + "parametrize", + "pytestmark", + "input_output1-gpt-4o", + "test_chat_completion.py", + "openai", + "verifications", + "tests", + "llama-stack", + "" + ], + "setup": { + "duration": 0.0171962499152869, + "outcome": "skipped", + "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 26, 'Skipped: Provider fireworks does not support model gpt-4o')" + }, + "teardown": { + "duration": 0.000780042028054595, + "outcome": "passed" + } + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_basic[input_output1-gpt-4o-mini]", + "lineno": 25, + "outcome": "skipped", + "keywords": [ + "test_chat_non_streaming_basic[input_output1-gpt-4o-mini]", + "parametrize", + "pytestmark", + "input_output1-gpt-4o-mini", + "test_chat_completion.py", + "openai", + "verifications", + "tests", + "llama-stack", + "" + ], + "setup": { + "duration": 0.01365620899014175, + "outcome": "skipped", + "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 26, 'Skipped: Provider fireworks does not support model gpt-4o-mini')" + }, + "teardown": { + "duration": 0.00016758404672145844, + "outcome": "passed" + } + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_basic[input_output0-Llama-3.3-8B-Instruct]", + "lineno": 40, + "outcome": "skipped", + "keywords": [ + "test_chat_streaming_basic[input_output0-Llama-3.3-8B-Instruct]", + "parametrize", + "pytestmark", + "input_output0-Llama-3.3-8B-Instruct", + "test_chat_completion.py", + "openai", + "verifications", + "tests", + "llama-stack", + "" + ], + "setup": { + "duration": 0.0064070840599015355, + "outcome": "skipped", + "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 41, 'Skipped: Provider fireworks does not support model Llama-3.3-8B-Instruct')" + }, + "teardown": { + "duration": 0.0002031669719144702, + "outcome": "passed" + } + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_basic[input_output0-Llama-3.3-70B-Instruct]", + "lineno": 40, + "outcome": "passed", + "keywords": [ + "test_chat_streaming_basic[input_output0-Llama-3.3-70B-Instruct]", + "parametrize", + "pytestmark", + "input_output0-Llama-3.3-70B-Instruct", + "test_chat_completion.py", + "openai", + "verifications", + "tests", + "llama-stack", + "" + ], + "setup": { + "duration": 0.010951624950394034, + "outcome": "passed" + }, + "call": { + "duration": 0.5433399169705808, + "outcome": "passed" + }, + "teardown": { + "duration": 0.0013178749941289425, + "outcome": "passed" + } + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_basic[input_output0-Llama-4-Scout-17B-16E]", + "lineno": 40, + "outcome": "skipped", + "keywords": [ + "test_chat_streaming_basic[input_output0-Llama-4-Scout-17B-16E]", + "parametrize", + "pytestmark", + "input_output0-Llama-4-Scout-17B-16E", + "test_chat_completion.py", + "openai", + "verifications", + "tests", + "llama-stack", + "" + ], + "setup": { + "duration": 0.022056750021874905, + "outcome": "skipped", + "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 41, 'Skipped: Provider fireworks does not support model Llama-4-Scout-17B-16E')" + }, + "teardown": { + "duration": 0.0006570409750565886, + "outcome": "passed" + } + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_basic[input_output0-Llama-4-Scout-17B-16E-Instruct]", + "lineno": 40, + "outcome": "passed", + "keywords": [ + "test_chat_streaming_basic[input_output0-Llama-4-Scout-17B-16E-Instruct]", + "parametrize", + "pytestmark", + "input_output0-Llama-4-Scout-17B-16E-Instruct", + "test_chat_completion.py", + "openai", + "verifications", + "tests", + "llama-stack", + "" + ], + "setup": { + "duration": 0.008314333041198552, + "outcome": "passed" + }, + "call": { + "duration": 0.7779882500180975, + "outcome": "passed" + }, + "teardown": { + "duration": 0.0006799160037189722, + "outcome": "passed" + } + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_basic[input_output0-Llama-4-Maverick-17B-128E]", + "lineno": 40, + "outcome": "skipped", + "keywords": [ + "test_chat_streaming_basic[input_output0-Llama-4-Maverick-17B-128E]", + "parametrize", + "pytestmark", + "input_output0-Llama-4-Maverick-17B-128E", + "test_chat_completion.py", + "openai", + "verifications", + "tests", + "llama-stack", + "" + ], + "setup": { + "duration": 0.03601404093205929, + "outcome": "skipped", + "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 41, 'Skipped: Provider fireworks does not support model Llama-4-Maverick-17B-128E')" + }, + "teardown": { + "duration": 0.000610582996159792, + "outcome": "passed" + } + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_basic[input_output0-Llama-4-Maverick-17B-128E-Instruct]", + "lineno": 40, + "outcome": "passed", + "keywords": [ + "test_chat_streaming_basic[input_output0-Llama-4-Maverick-17B-128E-Instruct]", + "parametrize", + "pytestmark", + "input_output0-Llama-4-Maverick-17B-128E-Instruct", + "test_chat_completion.py", + "openai", + "verifications", + "tests", + "llama-stack", + "" + ], + "setup": { + "duration": 0.014321292052045465, + "outcome": "passed" + }, + "call": { + "duration": 1.0243758750148118, + "outcome": "passed" + }, + "teardown": { + "duration": 0.0010485410457476974, + "outcome": "passed" + } + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_basic[input_output0-gpt-4o]", + "lineno": 40, + "outcome": "skipped", + "keywords": [ + "test_chat_streaming_basic[input_output0-gpt-4o]", + "parametrize", + "pytestmark", + "input_output0-gpt-4o", + "test_chat_completion.py", + "openai", + "verifications", + "tests", + "llama-stack", + "" + ], + "setup": { + "duration": 0.021133000031113625, + "outcome": "skipped", + "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 41, 'Skipped: Provider fireworks does not support model gpt-4o')" + }, + "teardown": { + "duration": 0.0005400830414146185, + "outcome": "passed" + } + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_basic[input_output0-gpt-4o-mini]", + "lineno": 40, + "outcome": "skipped", + "keywords": [ + "test_chat_streaming_basic[input_output0-gpt-4o-mini]", + "parametrize", + "pytestmark", + "input_output0-gpt-4o-mini", + "test_chat_completion.py", + "openai", + "verifications", + "tests", + "llama-stack", + "" + ], + "setup": { + "duration": 0.007212458993308246, + "outcome": "skipped", + "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 41, 'Skipped: Provider fireworks does not support model gpt-4o-mini')" + }, + "teardown": { + "duration": 0.00026770797558128834, + "outcome": "passed" + } + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_basic[input_output1-Llama-3.3-8B-Instruct]", + "lineno": 40, + "outcome": "skipped", + "keywords": [ + "test_chat_streaming_basic[input_output1-Llama-3.3-8B-Instruct]", + "parametrize", + "pytestmark", + "input_output1-Llama-3.3-8B-Instruct", + "test_chat_completion.py", + "openai", + "verifications", + "tests", + "llama-stack", + "" + ], + "setup": { + "duration": 0.012334750033915043, + "outcome": "skipped", + "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 41, 'Skipped: Provider fireworks does not support model Llama-3.3-8B-Instruct')" + }, + "teardown": { + "duration": 0.00042683398351073265, + "outcome": "passed" + } + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_basic[input_output1-Llama-3.3-70B-Instruct]", + "lineno": 40, + "outcome": "passed", + "keywords": [ + "test_chat_streaming_basic[input_output1-Llama-3.3-70B-Instruct]", + "parametrize", + "pytestmark", + "input_output1-Llama-3.3-70B-Instruct", + "test_chat_completion.py", + "openai", + "verifications", + "tests", + "llama-stack", + "" + ], + "setup": { + "duration": 0.011477917083539069, + "outcome": "passed" + }, + "call": { + "duration": 1.670572166913189, + "outcome": "passed" + }, + "teardown": { + "duration": 0.0005759169580414891, + "outcome": "passed" + } + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_basic[input_output1-Llama-4-Scout-17B-16E]", + "lineno": 40, + "outcome": "skipped", + "keywords": [ + "test_chat_streaming_basic[input_output1-Llama-4-Scout-17B-16E]", + "parametrize", + "pytestmark", + "input_output1-Llama-4-Scout-17B-16E", + "test_chat_completion.py", + "openai", + "verifications", + "tests", + "llama-stack", + "" + ], + "setup": { + "duration": 0.024620208074338734, + "outcome": "skipped", + "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 41, 'Skipped: Provider fireworks does not support model Llama-4-Scout-17B-16E')" + }, + "teardown": { + "duration": 0.0005166250048205256, + "outcome": "passed" + } + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_basic[input_output1-Llama-4-Scout-17B-16E-Instruct]", + "lineno": 40, + "outcome": "passed", + "keywords": [ + "test_chat_streaming_basic[input_output1-Llama-4-Scout-17B-16E-Instruct]", + "parametrize", + "pytestmark", + "input_output1-Llama-4-Scout-17B-16E-Instruct", + "test_chat_completion.py", + "openai", + "verifications", + "tests", + "llama-stack", + "" + ], + "setup": { + "duration": 0.008708957931958139, + "outcome": "passed" + }, + "call": { + "duration": 0.6654335829662159, + "outcome": "passed" + }, + "teardown": { + "duration": 0.0002927089808508754, + "outcome": "passed" + } + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_basic[input_output1-Llama-4-Maverick-17B-128E]", + "lineno": 40, + "outcome": "skipped", + "keywords": [ + "test_chat_streaming_basic[input_output1-Llama-4-Maverick-17B-128E]", + "parametrize", + "pytestmark", + "input_output1-Llama-4-Maverick-17B-128E", + "test_chat_completion.py", + "openai", + "verifications", + "tests", + "llama-stack", + "" + ], + "setup": { + "duration": 0.018128167022950947, + "outcome": "skipped", + "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 41, 'Skipped: Provider fireworks does not support model Llama-4-Maverick-17B-128E')" + }, + "teardown": { + "duration": 0.0001929170684888959, + "outcome": "passed" + } + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_basic[input_output1-Llama-4-Maverick-17B-128E-Instruct]", + "lineno": 40, + "outcome": "passed", + "keywords": [ + "test_chat_streaming_basic[input_output1-Llama-4-Maverick-17B-128E-Instruct]", + "parametrize", + "pytestmark", + "input_output1-Llama-4-Maverick-17B-128E-Instruct", + "test_chat_completion.py", + "openai", + "verifications", + "tests", + "llama-stack", + "" + ], + "setup": { + "duration": 0.0063874589977785945, + "outcome": "passed" + }, + "call": { + "duration": 0.8047525839647278, + "outcome": "passed" + }, + "teardown": { + "duration": 0.00039245898369699717, + "outcome": "passed" + } + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_basic[input_output1-gpt-4o]", + "lineno": 40, + "outcome": "skipped", + "keywords": [ + "test_chat_streaming_basic[input_output1-gpt-4o]", + "parametrize", + "pytestmark", + "input_output1-gpt-4o", + "test_chat_completion.py", + "openai", + "verifications", + "tests", + "llama-stack", + "" + ], + "setup": { + "duration": 0.01366533397231251, + "outcome": "skipped", + "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 41, 'Skipped: Provider fireworks does not support model gpt-4o')" + }, + "teardown": { + "duration": 0.00028241705149412155, + "outcome": "passed" + } + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_basic[input_output1-gpt-4o-mini]", + "lineno": 40, + "outcome": "skipped", + "keywords": [ + "test_chat_streaming_basic[input_output1-gpt-4o-mini]", + "parametrize", + "pytestmark", + "input_output1-gpt-4o-mini", + "test_chat_completion.py", + "openai", + "verifications", + "tests", + "llama-stack", + "" + ], + "setup": { + "duration": 0.010844790958799422, + "outcome": "skipped", + "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 41, 'Skipped: Provider fireworks does not support model gpt-4o-mini')" + }, + "teardown": { + "duration": 0.000258082989603281, + "outcome": "passed" + } + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_image[input_output0-Llama-4-Scout-17B-16E]", + "lineno": 60, + "outcome": "skipped", + "keywords": [ + "test_chat_non_streaming_image[input_output0-Llama-4-Scout-17B-16E]", + "parametrize", + "pytestmark", + "input_output0-Llama-4-Scout-17B-16E", + "test_chat_completion.py", + "openai", + "verifications", + "tests", + "llama-stack", + "" + ], + "setup": { + "duration": 0.00936354196164757, + "outcome": "skipped", + "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 61, 'Skipped: Provider fireworks does not support model Llama-4-Scout-17B-16E')" + }, + "teardown": { + "duration": 0.00020533299539238214, + "outcome": "passed" + } + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_image[input_output0-Llama-4-Scout-17B-16E-Instruct]", + "lineno": 60, + "outcome": "passed", + "keywords": [ + "test_chat_non_streaming_image[input_output0-Llama-4-Scout-17B-16E-Instruct]", + "parametrize", + "pytestmark", + "input_output0-Llama-4-Scout-17B-16E-Instruct", + "test_chat_completion.py", + "openai", + "verifications", + "tests", + "llama-stack", + "" + ], + "setup": { + "duration": 0.008578249951824546, + "outcome": "passed" + }, + "call": { + "duration": 2.6288582499837503, + "outcome": "passed" + }, + "teardown": { + "duration": 0.0006052498938515782, + "outcome": "passed" + } + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_image[input_output0-Llama-4-Maverick-17B-128E]", + "lineno": 60, + "outcome": "skipped", + "keywords": [ + "test_chat_non_streaming_image[input_output0-Llama-4-Maverick-17B-128E]", + "parametrize", + "pytestmark", + "input_output0-Llama-4-Maverick-17B-128E", + "test_chat_completion.py", + "openai", + "verifications", + "tests", + "llama-stack", + "" + ], + "setup": { + "duration": 0.02061279199551791, + "outcome": "skipped", + "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 61, 'Skipped: Provider fireworks does not support model Llama-4-Maverick-17B-128E')" + }, + "teardown": { + "duration": 0.00029320805333554745, + "outcome": "passed" + } + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_image[input_output0-Llama-4-Maverick-17B-128E-Instruct]", + "lineno": 60, + "outcome": "passed", + "keywords": [ + "test_chat_non_streaming_image[input_output0-Llama-4-Maverick-17B-128E-Instruct]", + "parametrize", + "pytestmark", + "input_output0-Llama-4-Maverick-17B-128E-Instruct", + "test_chat_completion.py", + "openai", + "verifications", + "tests", + "llama-stack", + "" + ], + "setup": { + "duration": 0.00995812495239079, + "outcome": "passed" + }, + "call": { + "duration": 3.0904540000483394, + "outcome": "passed" + }, + "teardown": { + "duration": 0.0003214169992133975, + "outcome": "passed" + } + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_image[input_output0-gpt-4o]", + "lineno": 60, + "outcome": "skipped", + "keywords": [ + "test_chat_non_streaming_image[input_output0-gpt-4o]", + "parametrize", + "pytestmark", + "input_output0-gpt-4o", + "test_chat_completion.py", + "openai", + "verifications", + "tests", + "llama-stack", + "" + ], + "setup": { + "duration": 0.0261635419446975, + "outcome": "skipped", + "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 61, 'Skipped: Provider fireworks does not support model gpt-4o')" + }, + "teardown": { + "duration": 0.00032716698478907347, + "outcome": "passed" + } + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_image[input_output0-gpt-4o-mini]", + "lineno": 60, + "outcome": "skipped", + "keywords": [ + "test_chat_non_streaming_image[input_output0-gpt-4o-mini]", + "parametrize", + "pytestmark", + "input_output0-gpt-4o-mini", + "test_chat_completion.py", + "openai", + "verifications", + "tests", + "llama-stack", + "" + ], + "setup": { + "duration": 0.027220541960559785, + "outcome": "skipped", + "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 61, 'Skipped: Provider fireworks does not support model gpt-4o-mini')" + }, + "teardown": { + "duration": 0.0003192499279975891, + "outcome": "passed" + } + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_image[input_output0-Llama-4-Scout-17B-16E]", + "lineno": 75, + "outcome": "skipped", + "keywords": [ + "test_chat_streaming_image[input_output0-Llama-4-Scout-17B-16E]", + "parametrize", + "pytestmark", + "input_output0-Llama-4-Scout-17B-16E", + "test_chat_completion.py", + "openai", + "verifications", + "tests", + "llama-stack", + "" + ], + "setup": { + "duration": 0.010883458075113595, + "outcome": "skipped", + "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 76, 'Skipped: Provider fireworks does not support model Llama-4-Scout-17B-16E')" + }, + "teardown": { + "duration": 0.0002687909873202443, + "outcome": "passed" + } + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_image[input_output0-Llama-4-Scout-17B-16E-Instruct]", + "lineno": 75, + "outcome": "passed", + "keywords": [ + "test_chat_streaming_image[input_output0-Llama-4-Scout-17B-16E-Instruct]", + "parametrize", + "pytestmark", + "input_output0-Llama-4-Scout-17B-16E-Instruct", + "test_chat_completion.py", + "openai", + "verifications", + "tests", + "llama-stack", + "" + ], + "setup": { + "duration": 0.0171177500160411, + "outcome": "passed" + }, + "call": { + "duration": 1.6752691670553759, + "outcome": "passed" + }, + "teardown": { + "duration": 0.0004877089522778988, + "outcome": "passed" + } + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_image[input_output0-Llama-4-Maverick-17B-128E]", + "lineno": 75, + "outcome": "skipped", + "keywords": [ + "test_chat_streaming_image[input_output0-Llama-4-Maverick-17B-128E]", + "parametrize", + "pytestmark", + "input_output0-Llama-4-Maverick-17B-128E", + "test_chat_completion.py", + "openai", + "verifications", + "tests", + "llama-stack", + "" + ], + "setup": { + "duration": 0.011608208995312452, + "outcome": "skipped", + "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 76, 'Skipped: Provider fireworks does not support model Llama-4-Maverick-17B-128E')" + }, + "teardown": { + "duration": 0.00017137499526143074, + "outcome": "passed" + } + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_image[input_output0-Llama-4-Maverick-17B-128E-Instruct]", + "lineno": 75, + "outcome": "passed", + "keywords": [ + "test_chat_streaming_image[input_output0-Llama-4-Maverick-17B-128E-Instruct]", + "parametrize", + "pytestmark", + "input_output0-Llama-4-Maverick-17B-128E-Instruct", + "test_chat_completion.py", + "openai", + "verifications", + "tests", + "llama-stack", + "" + ], + "setup": { + "duration": 0.009284624946303666, + "outcome": "passed" + }, + "call": { + "duration": 3.537356249988079, + "outcome": "passed" + }, + "teardown": { + "duration": 0.0005068340105935931, + "outcome": "passed" + } + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_image[input_output0-gpt-4o]", + "lineno": 75, + "outcome": "skipped", + "keywords": [ + "test_chat_streaming_image[input_output0-gpt-4o]", + "parametrize", + "pytestmark", + "input_output0-gpt-4o", + "test_chat_completion.py", + "openai", + "verifications", + "tests", + "llama-stack", + "" + ], + "setup": { + "duration": 0.016660499968566, + "outcome": "skipped", + "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 76, 'Skipped: Provider fireworks does not support model gpt-4o')" + }, + "teardown": { + "duration": 0.00029341597110033035, + "outcome": "passed" + } + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_image[input_output0-gpt-4o-mini]", + "lineno": 75, + "outcome": "skipped", + "keywords": [ + "test_chat_streaming_image[input_output0-gpt-4o-mini]", + "parametrize", + "pytestmark", + "input_output0-gpt-4o-mini", + "test_chat_completion.py", + "openai", + "verifications", + "tests", + "llama-stack", + "" + ], + "setup": { + "duration": 0.01374066702555865, + "outcome": "skipped", + "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 76, 'Skipped: Provider fireworks does not support model gpt-4o-mini')" + }, + "teardown": { + "duration": 0.0002625000197440386, + "outcome": "passed" + } + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_structured_output[input_output0-Llama-3.3-8B-Instruct]", + "lineno": 95, + "outcome": "skipped", + "keywords": [ + "test_chat_non_streaming_structured_output[input_output0-Llama-3.3-8B-Instruct]", + "parametrize", + "pytestmark", + "input_output0-Llama-3.3-8B-Instruct", + "test_chat_completion.py", + "openai", + "verifications", + "tests", + "llama-stack", + "" + ], + "setup": { + "duration": 0.013120374991558492, + "outcome": "skipped", + "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 96, 'Skipped: Provider fireworks does not support model Llama-3.3-8B-Instruct')" + }, + "teardown": { + "duration": 0.00021954195108264685, + "outcome": "passed" + } + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_structured_output[input_output0-Llama-3.3-70B-Instruct]", + "lineno": 95, + "outcome": "passed", + "keywords": [ + "test_chat_non_streaming_structured_output[input_output0-Llama-3.3-70B-Instruct]", + "parametrize", + "pytestmark", + "input_output0-Llama-3.3-70B-Instruct", + "test_chat_completion.py", + "openai", + "verifications", + "tests", + "llama-stack", + "" + ], + "setup": { + "duration": 0.015080374898388982, + "outcome": "passed" + }, + "call": { + "duration": 1.157175041968003, + "outcome": "passed" + }, + "teardown": { + "duration": 0.000495875021442771, + "outcome": "passed" + } + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_structured_output[input_output0-Llama-4-Scout-17B-16E]", + "lineno": 95, + "outcome": "skipped", + "keywords": [ + "test_chat_non_streaming_structured_output[input_output0-Llama-4-Scout-17B-16E]", + "parametrize", + "pytestmark", + "input_output0-Llama-4-Scout-17B-16E", + "test_chat_completion.py", + "openai", + "verifications", + "tests", + "llama-stack", + "" + ], + "setup": { + "duration": 0.013946042046882212, + "outcome": "skipped", + "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 96, 'Skipped: Provider fireworks does not support model Llama-4-Scout-17B-16E')" + }, + "teardown": { + "duration": 0.0002954580122604966, + "outcome": "passed" + } + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_structured_output[input_output0-Llama-4-Scout-17B-16E-Instruct]", + "lineno": 95, + "outcome": "passed", + "keywords": [ + "test_chat_non_streaming_structured_output[input_output0-Llama-4-Scout-17B-16E-Instruct]", + "parametrize", + "pytestmark", + "input_output0-Llama-4-Scout-17B-16E-Instruct", + "test_chat_completion.py", + "openai", + "verifications", + "tests", + "llama-stack", + "" + ], + "setup": { + "duration": 0.011617792071774602, + "outcome": "passed" + }, + "call": { + "duration": 0.9537639999762177, + "outcome": "passed" + }, + "teardown": { + "duration": 0.0004819999448955059, + "outcome": "passed" + } + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_structured_output[input_output0-Llama-4-Maverick-17B-128E]", + "lineno": 95, + "outcome": "skipped", + "keywords": [ + "test_chat_non_streaming_structured_output[input_output0-Llama-4-Maverick-17B-128E]", + "parametrize", + "pytestmark", + "input_output0-Llama-4-Maverick-17B-128E", + "test_chat_completion.py", + "openai", + "verifications", + "tests", + "llama-stack", + "" + ], + "setup": { + "duration": 0.027436082949861884, + "outcome": "skipped", + "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 96, 'Skipped: Provider fireworks does not support model Llama-4-Maverick-17B-128E')" + }, + "teardown": { + "duration": 0.00030274991877377033, + "outcome": "passed" + } + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_structured_output[input_output0-Llama-4-Maverick-17B-128E-Instruct]", + "lineno": 95, + "outcome": "passed", + "keywords": [ + "test_chat_non_streaming_structured_output[input_output0-Llama-4-Maverick-17B-128E-Instruct]", + "parametrize", + "pytestmark", + "input_output0-Llama-4-Maverick-17B-128E-Instruct", + "test_chat_completion.py", + "openai", + "verifications", + "tests", + "llama-stack", + "" + ], + "setup": { + "duration": 0.016110333963297307, + "outcome": "passed" + }, + "call": { + "duration": 0.8493227910948917, + "outcome": "passed" + }, + "teardown": { + "duration": 0.0004883749643340707, + "outcome": "passed" + } + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_structured_output[input_output0-gpt-4o]", + "lineno": 95, + "outcome": "skipped", + "keywords": [ + "test_chat_non_streaming_structured_output[input_output0-gpt-4o]", + "parametrize", + "pytestmark", + "input_output0-gpt-4o", + "test_chat_completion.py", + "openai", + "verifications", + "tests", + "llama-stack", + "" + ], + "setup": { + "duration": 0.017850833013653755, + "outcome": "skipped", + "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 96, 'Skipped: Provider fireworks does not support model gpt-4o')" + }, + "teardown": { + "duration": 0.0003287500003352761, + "outcome": "passed" + } + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_structured_output[input_output0-gpt-4o-mini]", + "lineno": 95, + "outcome": "skipped", + "keywords": [ + "test_chat_non_streaming_structured_output[input_output0-gpt-4o-mini]", + "parametrize", + "pytestmark", + "input_output0-gpt-4o-mini", + "test_chat_completion.py", + "openai", + "verifications", + "tests", + "llama-stack", + "" + ], + "setup": { + "duration": 0.012523208046332002, + "outcome": "skipped", + "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 96, 'Skipped: Provider fireworks does not support model gpt-4o-mini')" + }, + "teardown": { + "duration": 0.00023500004317611456, + "outcome": "passed" + } + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_structured_output[input_output1-Llama-3.3-8B-Instruct]", + "lineno": 95, + "outcome": "skipped", + "keywords": [ + "test_chat_non_streaming_structured_output[input_output1-Llama-3.3-8B-Instruct]", + "parametrize", + "pytestmark", + "input_output1-Llama-3.3-8B-Instruct", + "test_chat_completion.py", + "openai", + "verifications", + "tests", + "llama-stack", + "" + ], + "setup": { + "duration": 0.007516667013987899, + "outcome": "skipped", + "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 96, 'Skipped: Provider fireworks does not support model Llama-3.3-8B-Instruct')" + }, + "teardown": { + "duration": 0.00018912507221102715, + "outcome": "passed" + } + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_structured_output[input_output1-Llama-3.3-70B-Instruct]", + "lineno": 95, + "outcome": "passed", + "keywords": [ + "test_chat_non_streaming_structured_output[input_output1-Llama-3.3-70B-Instruct]", + "parametrize", + "pytestmark", + "input_output1-Llama-3.3-70B-Instruct", + "test_chat_completion.py", + "openai", + "verifications", + "tests", + "llama-stack", + "" + ], + "setup": { + "duration": 0.007337165996432304, + "outcome": "passed" + }, + "call": { + "duration": 3.124099582899362, + "outcome": "passed" + }, + "teardown": { + "duration": 0.0006703329272568226, + "outcome": "passed" + } + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_structured_output[input_output1-Llama-4-Scout-17B-16E]", + "lineno": 95, + "outcome": "skipped", + "keywords": [ + "test_chat_non_streaming_structured_output[input_output1-Llama-4-Scout-17B-16E]", + "parametrize", + "pytestmark", + "input_output1-Llama-4-Scout-17B-16E", + "test_chat_completion.py", + "openai", + "verifications", + "tests", + "llama-stack", + "" + ], + "setup": { + "duration": 0.014259999967180192, + "outcome": "skipped", + "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 96, 'Skipped: Provider fireworks does not support model Llama-4-Scout-17B-16E')" + }, + "teardown": { + "duration": 0.00030262500513345003, + "outcome": "passed" + } + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_structured_output[input_output1-Llama-4-Scout-17B-16E-Instruct]", + "lineno": 95, + "outcome": "passed", + "keywords": [ + "test_chat_non_streaming_structured_output[input_output1-Llama-4-Scout-17B-16E-Instruct]", + "parametrize", + "pytestmark", + "input_output1-Llama-4-Scout-17B-16E-Instruct", + "test_chat_completion.py", + "openai", + "verifications", + "tests", + "llama-stack", + "" + ], + "setup": { + "duration": 0.010863124975003302, + "outcome": "passed" + }, + "call": { + "duration": 1.3330956250429153, + "outcome": "passed" + }, + "teardown": { + "duration": 0.00018679199274629354, + "outcome": "passed" + } + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_structured_output[input_output1-Llama-4-Maverick-17B-128E]", + "lineno": 95, + "outcome": "skipped", + "keywords": [ + "test_chat_non_streaming_structured_output[input_output1-Llama-4-Maverick-17B-128E]", + "parametrize", + "pytestmark", + "input_output1-Llama-4-Maverick-17B-128E", + "test_chat_completion.py", + "openai", + "verifications", + "tests", + "llama-stack", + "" + ], + "setup": { + "duration": 0.005797958001494408, + "outcome": "skipped", + "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 96, 'Skipped: Provider fireworks does not support model Llama-4-Maverick-17B-128E')" + }, + "teardown": { + "duration": 0.00017529097385704517, + "outcome": "passed" + } + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_structured_output[input_output1-Llama-4-Maverick-17B-128E-Instruct]", + "lineno": 95, + "outcome": "passed", + "keywords": [ + "test_chat_non_streaming_structured_output[input_output1-Llama-4-Maverick-17B-128E-Instruct]", + "parametrize", + "pytestmark", + "input_output1-Llama-4-Maverick-17B-128E-Instruct", + "test_chat_completion.py", + "openai", + "verifications", + "tests", + "llama-stack", + "" + ], + "setup": { + "duration": 0.005647709011100233, + "outcome": "passed" + }, + "call": { + "duration": 3.2295467499643564, + "outcome": "passed" + }, + "teardown": { + "duration": 0.0005654999986290932, + "outcome": "passed" + } + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_structured_output[input_output1-gpt-4o]", + "lineno": 95, + "outcome": "skipped", + "keywords": [ + "test_chat_non_streaming_structured_output[input_output1-gpt-4o]", + "parametrize", + "pytestmark", + "input_output1-gpt-4o", + "test_chat_completion.py", + "openai", + "verifications", + "tests", + "llama-stack", + "" + ], + "setup": { + "duration": 0.007151791942305863, + "outcome": "skipped", + "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 96, 'Skipped: Provider fireworks does not support model gpt-4o')" + }, + "teardown": { + "duration": 0.00015316694043576717, + "outcome": "passed" + } + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_structured_output[input_output1-gpt-4o-mini]", + "lineno": 95, + "outcome": "skipped", + "keywords": [ + "test_chat_non_streaming_structured_output[input_output1-gpt-4o-mini]", + "parametrize", + "pytestmark", + "input_output1-gpt-4o-mini", + "test_chat_completion.py", + "openai", + "verifications", + "tests", + "llama-stack", + "" + ], + "setup": { + "duration": 0.006435790914110839, + "outcome": "skipped", + "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 96, 'Skipped: Provider fireworks does not support model gpt-4o-mini')" + }, + "teardown": { + "duration": 0.00015954102855175734, + "outcome": "passed" + } + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_structured_output[input_output0-Llama-3.3-8B-Instruct]", + "lineno": 117, + "outcome": "skipped", + "keywords": [ + "test_chat_streaming_structured_output[input_output0-Llama-3.3-8B-Instruct]", + "parametrize", + "pytestmark", + "input_output0-Llama-3.3-8B-Instruct", + "test_chat_completion.py", + "openai", + "verifications", + "tests", + "llama-stack", + "" + ], + "setup": { + "duration": 0.006164791993796825, + "outcome": "skipped", + "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 118, 'Skipped: Provider fireworks does not support model Llama-3.3-8B-Instruct')" + }, + "teardown": { + "duration": 0.00014074996579438448, + "outcome": "passed" + } + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_structured_output[input_output0-Llama-3.3-70B-Instruct]", + "lineno": 117, + "outcome": "passed", + "keywords": [ + "test_chat_streaming_structured_output[input_output0-Llama-3.3-70B-Instruct]", + "parametrize", + "pytestmark", + "input_output0-Llama-3.3-70B-Instruct", + "test_chat_completion.py", + "openai", + "verifications", + "tests", + "llama-stack", + "" + ], + "setup": { + "duration": 0.010064583038911223, + "outcome": "passed" + }, + "call": { + "duration": 1.1676458748988807, + "outcome": "passed" + }, + "teardown": { + "duration": 0.0002513329964131117, + "outcome": "passed" + } + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_structured_output[input_output0-Llama-4-Scout-17B-16E]", + "lineno": 117, + "outcome": "skipped", + "keywords": [ + "test_chat_streaming_structured_output[input_output0-Llama-4-Scout-17B-16E]", + "parametrize", + "pytestmark", + "input_output0-Llama-4-Scout-17B-16E", + "test_chat_completion.py", + "openai", + "verifications", + "tests", + "llama-stack", + "" + ], + "setup": { + "duration": 0.011011417023837566, + "outcome": "skipped", + "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 118, 'Skipped: Provider fireworks does not support model Llama-4-Scout-17B-16E')" + }, + "teardown": { + "duration": 0.00020608294289559126, + "outcome": "passed" + } + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_structured_output[input_output0-Llama-4-Scout-17B-16E-Instruct]", + "lineno": 117, + "outcome": "passed", + "keywords": [ + "test_chat_streaming_structured_output[input_output0-Llama-4-Scout-17B-16E-Instruct]", + "parametrize", + "pytestmark", + "input_output0-Llama-4-Scout-17B-16E-Instruct", + "test_chat_completion.py", + "openai", + "verifications", + "tests", + "llama-stack", + "" + ], + "setup": { + "duration": 0.011654542060568929, + "outcome": "passed" + }, + "call": { + "duration": 0.7950789160095155, + "outcome": "passed" + }, + "teardown": { + "duration": 0.0002690000692382455, + "outcome": "passed" + } + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_structured_output[input_output0-Llama-4-Maverick-17B-128E]", + "lineno": 117, + "outcome": "skipped", + "keywords": [ + "test_chat_streaming_structured_output[input_output0-Llama-4-Maverick-17B-128E]", + "parametrize", + "pytestmark", + "input_output0-Llama-4-Maverick-17B-128E", + "test_chat_completion.py", + "openai", + "verifications", + "tests", + "llama-stack", + "" + ], + "setup": { + "duration": 0.0066834589233621955, + "outcome": "skipped", + "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 118, 'Skipped: Provider fireworks does not support model Llama-4-Maverick-17B-128E')" + }, + "teardown": { + "duration": 0.00017270795069634914, + "outcome": "passed" + } + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_structured_output[input_output0-Llama-4-Maverick-17B-128E-Instruct]", + "lineno": 117, + "outcome": "passed", + "keywords": [ + "test_chat_streaming_structured_output[input_output0-Llama-4-Maverick-17B-128E-Instruct]", + "parametrize", + "pytestmark", + "input_output0-Llama-4-Maverick-17B-128E-Instruct", + "test_chat_completion.py", + "openai", + "verifications", + "tests", + "llama-stack", + "" + ], + "setup": { + "duration": 0.011390416999347508, + "outcome": "passed" + }, + "call": { + "duration": 0.7844940840732306, + "outcome": "passed" + }, + "teardown": { + "duration": 0.000511458027176559, + "outcome": "passed" + } + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_structured_output[input_output0-gpt-4o]", + "lineno": 117, + "outcome": "skipped", + "keywords": [ + "test_chat_streaming_structured_output[input_output0-gpt-4o]", + "parametrize", + "pytestmark", + "input_output0-gpt-4o", + "test_chat_completion.py", + "openai", + "verifications", + "tests", + "llama-stack", + "" + ], + "setup": { + "duration": 0.005813500029034913, + "outcome": "skipped", + "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 118, 'Skipped: Provider fireworks does not support model gpt-4o')" + }, + "teardown": { + "duration": 0.00015495799016207457, + "outcome": "passed" + } + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_structured_output[input_output0-gpt-4o-mini]", + "lineno": 117, + "outcome": "skipped", + "keywords": [ + "test_chat_streaming_structured_output[input_output0-gpt-4o-mini]", + "parametrize", + "pytestmark", + "input_output0-gpt-4o-mini", + "test_chat_completion.py", + "openai", + "verifications", + "tests", + "llama-stack", + "" + ], + "setup": { + "duration": 0.0075639160349965096, + "outcome": "skipped", + "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 118, 'Skipped: Provider fireworks does not support model gpt-4o-mini')" + }, + "teardown": { + "duration": 0.00014358304906636477, + "outcome": "passed" + } + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_structured_output[input_output1-Llama-3.3-8B-Instruct]", + "lineno": 117, + "outcome": "skipped", + "keywords": [ + "test_chat_streaming_structured_output[input_output1-Llama-3.3-8B-Instruct]", + "parametrize", + "pytestmark", + "input_output1-Llama-3.3-8B-Instruct", + "test_chat_completion.py", + "openai", + "verifications", + "tests", + "llama-stack", + "" + ], + "setup": { + "duration": 0.008526541059836745, + "outcome": "skipped", + "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 118, 'Skipped: Provider fireworks does not support model Llama-3.3-8B-Instruct')" + }, + "teardown": { + "duration": 0.00015841599088162184, + "outcome": "passed" + } + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_structured_output[input_output1-Llama-3.3-70B-Instruct]", + "lineno": 117, + "outcome": "failed", + "keywords": [ + "test_chat_streaming_structured_output[input_output1-Llama-3.3-70B-Instruct]", + "parametrize", + "pytestmark", + "input_output1-Llama-3.3-70B-Instruct", + "test_chat_completion.py", + "openai", + "verifications", + "tests", + "llama-stack", + "" + ], + "setup": { + "duration": 0.007805416011251509, + "outcome": "passed" + }, + "call": { + "duration": 13.25898533302825, + "outcome": "failed", + "crash": { + "path": "/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py", + "lineno": 196, + "message": "assert None is not None" + }, + "traceback": [ + { + "path": "tests/verifications/openai/test_chat_completion.py", + "lineno": 136, + "message": "" + }, + { + "path": "tests/verifications/openai/test_chat_completion.py", + "lineno": 196, + "message": "AssertionError" + } + ], + "longrepr": "openai_client = \ninput_output = {'input': {'messages': [{'content': 'You are a helpful math tutor. Guide the user through the solution step by step.',... ['steps', 'final_answer'], 'title': 'MathReasoning', ...}}, 'type': 'json_schema'}}, 'output': 'valid_math_reasoning'}\ncorrect_model_name = 'accounts/fireworks/models/llama-v3p1-70b-instruct'\n\n @pytest.mark.parametrize(\n \"model\",\n chat_completion_test_cases[\"test_chat_structured_output\"][\"test_params\"][\"model\"],\n )\n @pytest.mark.parametrize(\n \"input_output\",\n chat_completion_test_cases[\"test_chat_structured_output\"][\"test_params\"][\"input_output\"],\n )\n def test_chat_streaming_structured_output(openai_client, input_output, correct_model_name):\n response = openai_client.chat.completions.create(\n model=correct_model_name,\n messages=input_output[\"input\"][\"messages\"],\n response_format=input_output[\"input\"][\"response_format\"],\n stream=True,\n )\n maybe_json_content = \"\"\n for chunk in response:\n maybe_json_content += chunk.choices[0].delta.content or \"\"\n> validate_structured_output(maybe_json_content, input_output[\"output\"])\n\ntests/verifications/openai/test_chat_completion.py:136: \n_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ \n\nmaybe_json_content = '{ \"final_answer\": \"}To solve the equation 8x + 7 = -23, we need to isolate the variable x. We can do this by followin...tassistantassistantassistantassistantassistantassistantassistantassistantassistantassistantassistantassistantassistant'\nschema_name = 'valid_math_reasoning'\n\n def validate_structured_output(maybe_json_content: str, schema_name: str) -> None:\n structured_output = get_structured_output(maybe_json_content, schema_name)\n> assert structured_output is not None\nE assert None is not None\n\ntests/verifications/openai/test_chat_completion.py:196: AssertionError" + }, + "teardown": { + "duration": 0.00022583396639674902, + "outcome": "passed" + } + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_structured_output[input_output1-Llama-4-Scout-17B-16E]", + "lineno": 117, + "outcome": "skipped", + "keywords": [ + "test_chat_streaming_structured_output[input_output1-Llama-4-Scout-17B-16E]", + "parametrize", + "pytestmark", + "input_output1-Llama-4-Scout-17B-16E", + "test_chat_completion.py", + "openai", + "verifications", + "tests", + "llama-stack", + "" + ], + "setup": { + "duration": 0.006412541959434748, + "outcome": "skipped", + "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 118, 'Skipped: Provider fireworks does not support model Llama-4-Scout-17B-16E')" + }, + "teardown": { + "duration": 0.0001449589617550373, + "outcome": "passed" + } + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_structured_output[input_output1-Llama-4-Scout-17B-16E-Instruct]", + "lineno": 117, + "outcome": "passed", + "keywords": [ + "test_chat_streaming_structured_output[input_output1-Llama-4-Scout-17B-16E-Instruct]", + "parametrize", + "pytestmark", + "input_output1-Llama-4-Scout-17B-16E-Instruct", + "test_chat_completion.py", + "openai", + "verifications", + "tests", + "llama-stack", + "" + ], + "setup": { + "duration": 0.010353000019676983, + "outcome": "passed" + }, + "call": { + "duration": 4.559281209018081, + "outcome": "passed" + }, + "teardown": { + "duration": 0.00021179206669330597, + "outcome": "passed" + } + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_structured_output[input_output1-Llama-4-Maverick-17B-128E]", + "lineno": 117, + "outcome": "skipped", + "keywords": [ + "test_chat_streaming_structured_output[input_output1-Llama-4-Maverick-17B-128E]", + "parametrize", + "pytestmark", + "input_output1-Llama-4-Maverick-17B-128E", + "test_chat_completion.py", + "openai", + "verifications", + "tests", + "llama-stack", + "" + ], + "setup": { + "duration": 0.011320417048409581, + "outcome": "skipped", + "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 118, 'Skipped: Provider fireworks does not support model Llama-4-Maverick-17B-128E')" + }, + "teardown": { + "duration": 0.0001623749267309904, + "outcome": "passed" + } + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_structured_output[input_output1-Llama-4-Maverick-17B-128E-Instruct]", + "lineno": 117, + "outcome": "passed", + "keywords": [ + "test_chat_streaming_structured_output[input_output1-Llama-4-Maverick-17B-128E-Instruct]", + "parametrize", + "pytestmark", + "input_output1-Llama-4-Maverick-17B-128E-Instruct", + "test_chat_completion.py", + "openai", + "verifications", + "tests", + "llama-stack", + "" + ], + "setup": { + "duration": 0.005637791007757187, + "outcome": "passed" + }, + "call": { + "duration": 2.9282109580235556, + "outcome": "passed" + }, + "teardown": { + "duration": 0.00019149994477629662, + "outcome": "passed" + } + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_structured_output[input_output1-gpt-4o]", + "lineno": 117, + "outcome": "skipped", + "keywords": [ + "test_chat_streaming_structured_output[input_output1-gpt-4o]", + "parametrize", + "pytestmark", + "input_output1-gpt-4o", + "test_chat_completion.py", + "openai", + "verifications", + "tests", + "llama-stack", + "" + ], + "setup": { + "duration": 0.021475916961207986, + "outcome": "skipped", + "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 118, 'Skipped: Provider fireworks does not support model gpt-4o')" + }, + "teardown": { + "duration": 0.0002605828922241926, + "outcome": "passed" + } + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_structured_output[input_output1-gpt-4o-mini]", + "lineno": 117, + "outcome": "skipped", + "keywords": [ + "test_chat_streaming_structured_output[input_output1-gpt-4o-mini]", + "parametrize", + "pytestmark", + "input_output1-gpt-4o-mini", + "test_chat_completion.py", + "openai", + "verifications", + "tests", + "llama-stack", + "" + ], + "setup": { + "duration": 0.012046082993037999, + "outcome": "skipped", + "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 118, 'Skipped: Provider fireworks does not support model gpt-4o-mini')" + }, + "teardown": { + "duration": 0.00016966694965958595, + "outcome": "passed" + } + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_tool_calling[input_output0-Llama-3.3-70B-Instruct]", + "lineno": 138, + "outcome": "passed", + "keywords": [ + "test_chat_non_streaming_tool_calling[input_output0-Llama-3.3-70B-Instruct]", + "parametrize", + "pytestmark", + "input_output0-Llama-3.3-70B-Instruct", + "test_chat_completion.py", + "openai", + "verifications", + "tests", + "llama-stack", + "" + ], + "setup": { + "duration": 0.00782629195600748, + "outcome": "passed" + }, + "call": { + "duration": 0.9290615000063553, + "outcome": "passed" + }, + "teardown": { + "duration": 0.0004110001027584076, + "outcome": "passed" + } + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_tool_calling[input_output0-Llama-4-Scout-17B-16E]", + "lineno": 138, + "outcome": "skipped", + "keywords": [ + "test_chat_non_streaming_tool_calling[input_output0-Llama-4-Scout-17B-16E]", + "parametrize", + "pytestmark", + "input_output0-Llama-4-Scout-17B-16E", + "test_chat_completion.py", + "openai", + "verifications", + "tests", + "llama-stack", + "" + ], + "setup": { + "duration": 0.00842183397617191, + "outcome": "skipped", + "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 139, 'Skipped: Provider fireworks does not support model Llama-4-Scout-17B-16E')" + }, + "teardown": { + "duration": 0.00023745803628116846, + "outcome": "passed" + } + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_tool_calling[input_output0-Llama-4-Scout-17B-16E-Instruct]", + "lineno": 138, + "outcome": "failed", + "keywords": [ + "test_chat_non_streaming_tool_calling[input_output0-Llama-4-Scout-17B-16E-Instruct]", + "parametrize", + "pytestmark", + "input_output0-Llama-4-Scout-17B-16E-Instruct", + "test_chat_completion.py", + "openai", + "verifications", + "tests", + "llama-stack", + "" + ], + "setup": { + "duration": 0.010762874968349934, + "outcome": "passed" + }, + "call": { + "duration": 23.62101216695737, + "outcome": "failed", + "crash": { + "path": "/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py", + "lineno": 156, + "message": "TypeError: object of type 'NoneType' has no len()" + }, + "traceback": [ + { + "path": "tests/verifications/openai/test_chat_completion.py", + "lineno": 156, + "message": "TypeError" + } + ], + "longrepr": "openai_client = \ninput_output = {'input': {'messages': [{'content': 'You are a helpful assistant that can use tools to get information.', 'role': 'sys..., 'properties': {...}, 'required': [...], 'type': 'object'}}, 'type': 'function'}]}, 'output': 'get_weather_tool_call'}\ncorrect_model_name = 'accounts/fireworks/models/llama4-scout-instruct-basic'\n\n @pytest.mark.parametrize(\n \"model\",\n chat_completion_test_cases[\"test_tool_calling\"][\"test_params\"][\"model\"],\n )\n @pytest.mark.parametrize(\n \"input_output\",\n chat_completion_test_cases[\"test_tool_calling\"][\"test_params\"][\"input_output\"],\n )\n def test_chat_non_streaming_tool_calling(openai_client, input_output, correct_model_name):\n response = openai_client.chat.completions.create(\n model=correct_model_name,\n messages=input_output[\"input\"][\"messages\"],\n tools=input_output[\"input\"][\"tools\"],\n stream=False,\n )\n \n assert response.choices[0].message.role == \"assistant\"\n> assert len(response.choices[0].message.tool_calls) > 0\nE TypeError: object of type 'NoneType' has no len()\n\ntests/verifications/openai/test_chat_completion.py:156: TypeError" + }, + "teardown": { + "duration": 0.0004520840011537075, + "outcome": "passed" + } + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_tool_calling[input_output0-Llama-4-Maverick-17B-128E]", + "lineno": 138, + "outcome": "skipped", + "keywords": [ + "test_chat_non_streaming_tool_calling[input_output0-Llama-4-Maverick-17B-128E]", + "parametrize", + "pytestmark", + "input_output0-Llama-4-Maverick-17B-128E", + "test_chat_completion.py", + "openai", + "verifications", + "tests", + "llama-stack", + "" + ], + "setup": { + "duration": 0.00953104195650667, + "outcome": "skipped", + "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 139, 'Skipped: Provider fireworks does not support model Llama-4-Maverick-17B-128E')" + }, + "teardown": { + "duration": 0.00017912499606609344, + "outcome": "passed" + } + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_tool_calling[input_output0-Llama-4-Maverick-17B-128E-Instruct]", + "lineno": 138, + "outcome": "failed", + "keywords": [ + "test_chat_non_streaming_tool_calling[input_output0-Llama-4-Maverick-17B-128E-Instruct]", + "parametrize", + "pytestmark", + "input_output0-Llama-4-Maverick-17B-128E-Instruct", + "test_chat_completion.py", + "openai", + "verifications", + "tests", + "llama-stack", + "" + ], + "setup": { + "duration": 0.010302042006514966, + "outcome": "passed" + }, + "call": { + "duration": 5.55651158397086, + "outcome": "failed", + "crash": { + "path": "/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py", + "lineno": 156, + "message": "TypeError: object of type 'NoneType' has no len()" + }, + "traceback": [ + { + "path": "tests/verifications/openai/test_chat_completion.py", + "lineno": 156, + "message": "TypeError" + } + ], + "longrepr": "openai_client = \ninput_output = {'input': {'messages': [{'content': 'You are a helpful assistant that can use tools to get information.', 'role': 'sys..., 'properties': {...}, 'required': [...], 'type': 'object'}}, 'type': 'function'}]}, 'output': 'get_weather_tool_call'}\ncorrect_model_name = 'accounts/fireworks/models/llama4-maverick-instruct-basic'\n\n @pytest.mark.parametrize(\n \"model\",\n chat_completion_test_cases[\"test_tool_calling\"][\"test_params\"][\"model\"],\n )\n @pytest.mark.parametrize(\n \"input_output\",\n chat_completion_test_cases[\"test_tool_calling\"][\"test_params\"][\"input_output\"],\n )\n def test_chat_non_streaming_tool_calling(openai_client, input_output, correct_model_name):\n response = openai_client.chat.completions.create(\n model=correct_model_name,\n messages=input_output[\"input\"][\"messages\"],\n tools=input_output[\"input\"][\"tools\"],\n stream=False,\n )\n \n assert response.choices[0].message.role == \"assistant\"\n> assert len(response.choices[0].message.tool_calls) > 0\nE TypeError: object of type 'NoneType' has no len()\n\ntests/verifications/openai/test_chat_completion.py:156: TypeError" + }, + "teardown": { + "duration": 0.0003929579397663474, + "outcome": "passed" + } + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_tool_calling[input_output0-gpt-4o]", + "lineno": 138, + "outcome": "skipped", + "keywords": [ + "test_chat_non_streaming_tool_calling[input_output0-gpt-4o]", + "parametrize", + "pytestmark", + "input_output0-gpt-4o", + "test_chat_completion.py", + "openai", + "verifications", + "tests", + "llama-stack", + "" + ], + "setup": { + "duration": 0.01593891705852002, + "outcome": "skipped", + "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 139, 'Skipped: Provider fireworks does not support model gpt-4o')" + }, + "teardown": { + "duration": 0.0003579579060897231, + "outcome": "passed" + } + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_tool_calling[input_output0-gpt-4o-mini]", + "lineno": 138, + "outcome": "skipped", + "keywords": [ + "test_chat_non_streaming_tool_calling[input_output0-gpt-4o-mini]", + "parametrize", + "pytestmark", + "input_output0-gpt-4o-mini", + "test_chat_completion.py", + "openai", + "verifications", + "tests", + "llama-stack", + "" + ], + "setup": { + "duration": 0.01874550001230091, + "outcome": "skipped", + "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 139, 'Skipped: Provider fireworks does not support model gpt-4o-mini')" + }, + "teardown": { + "duration": 0.00031995808240026236, + "outcome": "passed" + } + } + ] +} diff --git a/tests/verifications/test_results/openai_1744154522.json b/tests/verifications/test_results/openai_1744154522.json new file mode 100644 index 000000000..310f3500d --- /dev/null +++ b/tests/verifications/test_results/openai_1744154522.json @@ -0,0 +1,2672 @@ +{ + "created": 1744154576.251519, + "duration": 51.50739002227783, + "exitcode": 0, + "root": "/Users/erichuang/projects/llama-stack", + "environment": {}, + "summary": { + "skipped": 61, + "passed": 22, + "total": 83, + "collected": 83 + }, + "collectors": [ + { + "nodeid": "", + "outcome": "passed", + "result": [ + { + "nodeid": "tests/verifications/openai/test_chat_completion.py", + "type": "Module" + } + ] + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py", + "outcome": "passed", + "result": [ + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_basic[input_output0-Llama-3.3-8B-Instruct]", + "type": "Function", + "lineno": 25 + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_basic[input_output0-Llama-3.3-70B-Instruct]", + "type": "Function", + "lineno": 25 + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_basic[input_output0-Llama-4-Scout-17B-16E]", + "type": "Function", + "lineno": 25 + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_basic[input_output0-Llama-4-Scout-17B-16E-Instruct]", + "type": "Function", + "lineno": 25 + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_basic[input_output0-Llama-4-Maverick-17B-128E]", + "type": "Function", + "lineno": 25 + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_basic[input_output0-Llama-4-Maverick-17B-128E-Instruct]", + "type": "Function", + "lineno": 25 + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_basic[input_output0-gpt-4o]", + "type": "Function", + "lineno": 25 + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_basic[input_output0-gpt-4o-mini]", + "type": "Function", + "lineno": 25 + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_basic[input_output1-Llama-3.3-8B-Instruct]", + "type": "Function", + "lineno": 25 + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_basic[input_output1-Llama-3.3-70B-Instruct]", + "type": "Function", + "lineno": 25 + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_basic[input_output1-Llama-4-Scout-17B-16E]", + "type": "Function", + "lineno": 25 + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_basic[input_output1-Llama-4-Scout-17B-16E-Instruct]", + "type": "Function", + "lineno": 25 + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_basic[input_output1-Llama-4-Maverick-17B-128E]", + "type": "Function", + "lineno": 25 + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_basic[input_output1-Llama-4-Maverick-17B-128E-Instruct]", + "type": "Function", + "lineno": 25 + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_basic[input_output1-gpt-4o]", + "type": "Function", + "lineno": 25 + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_basic[input_output1-gpt-4o-mini]", + "type": "Function", + "lineno": 25 + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_basic[input_output0-Llama-3.3-8B-Instruct]", + "type": "Function", + "lineno": 40 + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_basic[input_output0-Llama-3.3-70B-Instruct]", + "type": "Function", + "lineno": 40 + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_basic[input_output0-Llama-4-Scout-17B-16E]", + "type": "Function", + "lineno": 40 + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_basic[input_output0-Llama-4-Scout-17B-16E-Instruct]", + "type": "Function", + "lineno": 40 + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_basic[input_output0-Llama-4-Maverick-17B-128E]", + "type": "Function", + "lineno": 40 + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_basic[input_output0-Llama-4-Maverick-17B-128E-Instruct]", + "type": "Function", + "lineno": 40 + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_basic[input_output0-gpt-4o]", + "type": "Function", + "lineno": 40 + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_basic[input_output0-gpt-4o-mini]", + "type": "Function", + "lineno": 40 + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_basic[input_output1-Llama-3.3-8B-Instruct]", + "type": "Function", + "lineno": 40 + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_basic[input_output1-Llama-3.3-70B-Instruct]", + "type": "Function", + "lineno": 40 + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_basic[input_output1-Llama-4-Scout-17B-16E]", + "type": "Function", + "lineno": 40 + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_basic[input_output1-Llama-4-Scout-17B-16E-Instruct]", + "type": "Function", + "lineno": 40 + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_basic[input_output1-Llama-4-Maverick-17B-128E]", + "type": "Function", + "lineno": 40 + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_basic[input_output1-Llama-4-Maverick-17B-128E-Instruct]", + "type": "Function", + "lineno": 40 + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_basic[input_output1-gpt-4o]", + "type": "Function", + "lineno": 40 + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_basic[input_output1-gpt-4o-mini]", + "type": "Function", + "lineno": 40 + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_image[input_output0-Llama-4-Scout-17B-16E]", + "type": "Function", + "lineno": 60 + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_image[input_output0-Llama-4-Scout-17B-16E-Instruct]", + "type": "Function", + "lineno": 60 + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_image[input_output0-Llama-4-Maverick-17B-128E]", + "type": "Function", + "lineno": 60 + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_image[input_output0-Llama-4-Maverick-17B-128E-Instruct]", + "type": "Function", + "lineno": 60 + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_image[input_output0-gpt-4o]", + "type": "Function", + "lineno": 60 + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_image[input_output0-gpt-4o-mini]", + "type": "Function", + "lineno": 60 + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_image[input_output0-Llama-4-Scout-17B-16E]", + "type": "Function", + "lineno": 75 + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_image[input_output0-Llama-4-Scout-17B-16E-Instruct]", + "type": "Function", + "lineno": 75 + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_image[input_output0-Llama-4-Maverick-17B-128E]", + "type": "Function", + "lineno": 75 + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_image[input_output0-Llama-4-Maverick-17B-128E-Instruct]", + "type": "Function", + "lineno": 75 + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_image[input_output0-gpt-4o]", + "type": "Function", + "lineno": 75 + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_image[input_output0-gpt-4o-mini]", + "type": "Function", + "lineno": 75 + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_structured_output[input_output0-Llama-3.3-8B-Instruct]", + "type": "Function", + "lineno": 95 + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_structured_output[input_output0-Llama-3.3-70B-Instruct]", + "type": "Function", + "lineno": 95 + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_structured_output[input_output0-Llama-4-Scout-17B-16E]", + "type": "Function", + "lineno": 95 + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_structured_output[input_output0-Llama-4-Scout-17B-16E-Instruct]", + "type": "Function", + "lineno": 95 + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_structured_output[input_output0-Llama-4-Maverick-17B-128E]", + "type": "Function", + "lineno": 95 + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_structured_output[input_output0-Llama-4-Maverick-17B-128E-Instruct]", + "type": "Function", + "lineno": 95 + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_structured_output[input_output0-gpt-4o]", + "type": "Function", + "lineno": 95 + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_structured_output[input_output0-gpt-4o-mini]", + "type": "Function", + "lineno": 95 + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_structured_output[input_output1-Llama-3.3-8B-Instruct]", + "type": "Function", + "lineno": 95 + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_structured_output[input_output1-Llama-3.3-70B-Instruct]", + "type": "Function", + "lineno": 95 + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_structured_output[input_output1-Llama-4-Scout-17B-16E]", + "type": "Function", + "lineno": 95 + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_structured_output[input_output1-Llama-4-Scout-17B-16E-Instruct]", + "type": "Function", + "lineno": 95 + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_structured_output[input_output1-Llama-4-Maverick-17B-128E]", + "type": "Function", + "lineno": 95 + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_structured_output[input_output1-Llama-4-Maverick-17B-128E-Instruct]", + "type": "Function", + "lineno": 95 + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_structured_output[input_output1-gpt-4o]", + "type": "Function", + "lineno": 95 + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_structured_output[input_output1-gpt-4o-mini]", + "type": "Function", + "lineno": 95 + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_structured_output[input_output0-Llama-3.3-8B-Instruct]", + "type": "Function", + "lineno": 117 + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_structured_output[input_output0-Llama-3.3-70B-Instruct]", + "type": "Function", + "lineno": 117 + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_structured_output[input_output0-Llama-4-Scout-17B-16E]", + "type": "Function", + "lineno": 117 + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_structured_output[input_output0-Llama-4-Scout-17B-16E-Instruct]", + "type": "Function", + "lineno": 117 + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_structured_output[input_output0-Llama-4-Maverick-17B-128E]", + "type": "Function", + "lineno": 117 + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_structured_output[input_output0-Llama-4-Maverick-17B-128E-Instruct]", + "type": "Function", + "lineno": 117 + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_structured_output[input_output0-gpt-4o]", + "type": "Function", + "lineno": 117 + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_structured_output[input_output0-gpt-4o-mini]", + "type": "Function", + "lineno": 117 + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_structured_output[input_output1-Llama-3.3-8B-Instruct]", + "type": "Function", + "lineno": 117 + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_structured_output[input_output1-Llama-3.3-70B-Instruct]", + "type": "Function", + "lineno": 117 + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_structured_output[input_output1-Llama-4-Scout-17B-16E]", + "type": "Function", + "lineno": 117 + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_structured_output[input_output1-Llama-4-Scout-17B-16E-Instruct]", + "type": "Function", + "lineno": 117 + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_structured_output[input_output1-Llama-4-Maverick-17B-128E]", + "type": "Function", + "lineno": 117 + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_structured_output[input_output1-Llama-4-Maverick-17B-128E-Instruct]", + "type": "Function", + "lineno": 117 + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_structured_output[input_output1-gpt-4o]", + "type": "Function", + "lineno": 117 + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_structured_output[input_output1-gpt-4o-mini]", + "type": "Function", + "lineno": 117 + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_tool_calling[input_output0-Llama-3.3-70B-Instruct]", + "type": "Function", + "lineno": 138 + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_tool_calling[input_output0-Llama-4-Scout-17B-16E]", + "type": "Function", + "lineno": 138 + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_tool_calling[input_output0-Llama-4-Scout-17B-16E-Instruct]", + "type": "Function", + "lineno": 138 + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_tool_calling[input_output0-Llama-4-Maverick-17B-128E]", + "type": "Function", + "lineno": 138 + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_tool_calling[input_output0-Llama-4-Maverick-17B-128E-Instruct]", + "type": "Function", + "lineno": 138 + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_tool_calling[input_output0-gpt-4o]", + "type": "Function", + "lineno": 138 + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_tool_calling[input_output0-gpt-4o-mini]", + "type": "Function", + "lineno": 138 + } + ] + } + ], + "tests": [ + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_basic[input_output0-Llama-3.3-8B-Instruct]", + "lineno": 25, + "outcome": "skipped", + "keywords": [ + "test_chat_non_streaming_basic[input_output0-Llama-3.3-8B-Instruct]", + "parametrize", + "pytestmark", + "input_output0-Llama-3.3-8B-Instruct", + "test_chat_completion.py", + "openai", + "verifications", + "tests", + "llama-stack", + "" + ], + "setup": { + "duration": 0.0531630830373615, + "outcome": "skipped", + "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 26, 'Skipped: Provider openai does not support model Llama-3.3-8B-Instruct')" + }, + "teardown": { + "duration": 0.0001657919492572546, + "outcome": "passed" + } + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_basic[input_output0-Llama-3.3-70B-Instruct]", + "lineno": 25, + "outcome": "skipped", + "keywords": [ + "test_chat_non_streaming_basic[input_output0-Llama-3.3-70B-Instruct]", + "parametrize", + "pytestmark", + "input_output0-Llama-3.3-70B-Instruct", + "test_chat_completion.py", + "openai", + "verifications", + "tests", + "llama-stack", + "" + ], + "setup": { + "duration": 0.006063499953597784, + "outcome": "skipped", + "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 26, 'Skipped: Provider openai does not support model Llama-3.3-70B-Instruct')" + }, + "teardown": { + "duration": 0.00014004099648445845, + "outcome": "passed" + } + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_basic[input_output0-Llama-4-Scout-17B-16E]", + "lineno": 25, + "outcome": "skipped", + "keywords": [ + "test_chat_non_streaming_basic[input_output0-Llama-4-Scout-17B-16E]", + "parametrize", + "pytestmark", + "input_output0-Llama-4-Scout-17B-16E", + "test_chat_completion.py", + "openai", + "verifications", + "tests", + "llama-stack", + "" + ], + "setup": { + "duration": 0.005356832989491522, + "outcome": "skipped", + "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 26, 'Skipped: Provider openai does not support model Llama-4-Scout-17B-16E')" + }, + "teardown": { + "duration": 0.00016508297994732857, + "outcome": "passed" + } + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_basic[input_output0-Llama-4-Scout-17B-16E-Instruct]", + "lineno": 25, + "outcome": "skipped", + "keywords": [ + "test_chat_non_streaming_basic[input_output0-Llama-4-Scout-17B-16E-Instruct]", + "parametrize", + "pytestmark", + "input_output0-Llama-4-Scout-17B-16E-Instruct", + "test_chat_completion.py", + "openai", + "verifications", + "tests", + "llama-stack", + "" + ], + "setup": { + "duration": 0.006139832898043096, + "outcome": "skipped", + "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 26, 'Skipped: Provider openai does not support model Llama-4-Scout-17B-16E-Instruct')" + }, + "teardown": { + "duration": 0.00014450005255639553, + "outcome": "passed" + } + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_basic[input_output0-Llama-4-Maverick-17B-128E]", + "lineno": 25, + "outcome": "skipped", + "keywords": [ + "test_chat_non_streaming_basic[input_output0-Llama-4-Maverick-17B-128E]", + "parametrize", + "pytestmark", + "input_output0-Llama-4-Maverick-17B-128E", + "test_chat_completion.py", + "openai", + "verifications", + "tests", + "llama-stack", + "" + ], + "setup": { + "duration": 0.00542324990965426, + "outcome": "skipped", + "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 26, 'Skipped: Provider openai does not support model Llama-4-Maverick-17B-128E')" + }, + "teardown": { + "duration": 0.00014112505596131086, + "outcome": "passed" + } + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_basic[input_output0-Llama-4-Maverick-17B-128E-Instruct]", + "lineno": 25, + "outcome": "skipped", + "keywords": [ + "test_chat_non_streaming_basic[input_output0-Llama-4-Maverick-17B-128E-Instruct]", + "parametrize", + "pytestmark", + "input_output0-Llama-4-Maverick-17B-128E-Instruct", + "test_chat_completion.py", + "openai", + "verifications", + "tests", + "llama-stack", + "" + ], + "setup": { + "duration": 0.004965625004842877, + "outcome": "skipped", + "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 26, 'Skipped: Provider openai does not support model Llama-4-Maverick-17B-128E-Instruct')" + }, + "teardown": { + "duration": 0.00013720791321247816, + "outcome": "passed" + } + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_basic[input_output0-gpt-4o]", + "lineno": 25, + "outcome": "passed", + "keywords": [ + "test_chat_non_streaming_basic[input_output0-gpt-4o]", + "parametrize", + "pytestmark", + "input_output0-gpt-4o", + "test_chat_completion.py", + "openai", + "verifications", + "tests", + "llama-stack", + "" + ], + "setup": { + "duration": 0.005054084002040327, + "outcome": "passed" + }, + "call": { + "duration": 0.6271341659594327, + "outcome": "passed" + }, + "teardown": { + "duration": 0.00043925002682954073, + "outcome": "passed" + } + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_basic[input_output0-gpt-4o-mini]", + "lineno": 25, + "outcome": "passed", + "keywords": [ + "test_chat_non_streaming_basic[input_output0-gpt-4o-mini]", + "parametrize", + "pytestmark", + "input_output0-gpt-4o-mini", + "test_chat_completion.py", + "openai", + "verifications", + "tests", + "llama-stack", + "" + ], + "setup": { + "duration": 0.0159178749890998, + "outcome": "passed" + }, + "call": { + "duration": 0.44088316697161645, + "outcome": "passed" + }, + "teardown": { + "duration": 0.0006467089988291264, + "outcome": "passed" + } + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_basic[input_output1-Llama-3.3-8B-Instruct]", + "lineno": 25, + "outcome": "skipped", + "keywords": [ + "test_chat_non_streaming_basic[input_output1-Llama-3.3-8B-Instruct]", + "parametrize", + "pytestmark", + "input_output1-Llama-3.3-8B-Instruct", + "test_chat_completion.py", + "openai", + "verifications", + "tests", + "llama-stack", + "" + ], + "setup": { + "duration": 0.016705541987903416, + "outcome": "skipped", + "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 26, 'Skipped: Provider openai does not support model Llama-3.3-8B-Instruct')" + }, + "teardown": { + "duration": 0.0005769169656559825, + "outcome": "passed" + } + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_basic[input_output1-Llama-3.3-70B-Instruct]", + "lineno": 25, + "outcome": "skipped", + "keywords": [ + "test_chat_non_streaming_basic[input_output1-Llama-3.3-70B-Instruct]", + "parametrize", + "pytestmark", + "input_output1-Llama-3.3-70B-Instruct", + "test_chat_completion.py", + "openai", + "verifications", + "tests", + "llama-stack", + "" + ], + "setup": { + "duration": 0.012067249976098537, + "outcome": "skipped", + "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 26, 'Skipped: Provider openai does not support model Llama-3.3-70B-Instruct')" + }, + "teardown": { + "duration": 0.00016683305148035288, + "outcome": "passed" + } + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_basic[input_output1-Llama-4-Scout-17B-16E]", + "lineno": 25, + "outcome": "skipped", + "keywords": [ + "test_chat_non_streaming_basic[input_output1-Llama-4-Scout-17B-16E]", + "parametrize", + "pytestmark", + "input_output1-Llama-4-Scout-17B-16E", + "test_chat_completion.py", + "openai", + "verifications", + "tests", + "llama-stack", + "" + ], + "setup": { + "duration": 0.009295083000324667, + "outcome": "skipped", + "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 26, 'Skipped: Provider openai does not support model Llama-4-Scout-17B-16E')" + }, + "teardown": { + "duration": 0.00017204193864017725, + "outcome": "passed" + } + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_basic[input_output1-Llama-4-Scout-17B-16E-Instruct]", + "lineno": 25, + "outcome": "skipped", + "keywords": [ + "test_chat_non_streaming_basic[input_output1-Llama-4-Scout-17B-16E-Instruct]", + "parametrize", + "pytestmark", + "input_output1-Llama-4-Scout-17B-16E-Instruct", + "test_chat_completion.py", + "openai", + "verifications", + "tests", + "llama-stack", + "" + ], + "setup": { + "duration": 0.009534333017654717, + "outcome": "skipped", + "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 26, 'Skipped: Provider openai does not support model Llama-4-Scout-17B-16E-Instruct')" + }, + "teardown": { + "duration": 0.00020175008103251457, + "outcome": "passed" + } + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_basic[input_output1-Llama-4-Maverick-17B-128E]", + "lineno": 25, + "outcome": "skipped", + "keywords": [ + "test_chat_non_streaming_basic[input_output1-Llama-4-Maverick-17B-128E]", + "parametrize", + "pytestmark", + "input_output1-Llama-4-Maverick-17B-128E", + "test_chat_completion.py", + "openai", + "verifications", + "tests", + "llama-stack", + "" + ], + "setup": { + "duration": 0.006628665956668556, + "outcome": "skipped", + "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 26, 'Skipped: Provider openai does not support model Llama-4-Maverick-17B-128E')" + }, + "teardown": { + "duration": 0.0003687090938910842, + "outcome": "passed" + } + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_basic[input_output1-Llama-4-Maverick-17B-128E-Instruct]", + "lineno": 25, + "outcome": "skipped", + "keywords": [ + "test_chat_non_streaming_basic[input_output1-Llama-4-Maverick-17B-128E-Instruct]", + "parametrize", + "pytestmark", + "input_output1-Llama-4-Maverick-17B-128E-Instruct", + "test_chat_completion.py", + "openai", + "verifications", + "tests", + "llama-stack", + "" + ], + "setup": { + "duration": 0.0061322919791564345, + "outcome": "skipped", + "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 26, 'Skipped: Provider openai does not support model Llama-4-Maverick-17B-128E-Instruct')" + }, + "teardown": { + "duration": 0.0003664169926196337, + "outcome": "passed" + } + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_basic[input_output1-gpt-4o]", + "lineno": 25, + "outcome": "passed", + "keywords": [ + "test_chat_non_streaming_basic[input_output1-gpt-4o]", + "parametrize", + "pytestmark", + "input_output1-gpt-4o", + "test_chat_completion.py", + "openai", + "verifications", + "tests", + "llama-stack", + "" + ], + "setup": { + "duration": 0.00623433303553611, + "outcome": "passed" + }, + "call": { + "duration": 0.7898445830214769, + "outcome": "passed" + }, + "teardown": { + "duration": 0.0006602079374715686, + "outcome": "passed" + } + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_basic[input_output1-gpt-4o-mini]", + "lineno": 25, + "outcome": "passed", + "keywords": [ + "test_chat_non_streaming_basic[input_output1-gpt-4o-mini]", + "parametrize", + "pytestmark", + "input_output1-gpt-4o-mini", + "test_chat_completion.py", + "openai", + "verifications", + "tests", + "llama-stack", + "" + ], + "setup": { + "duration": 0.014758958015590906, + "outcome": "passed" + }, + "call": { + "duration": 1.1555478329537436, + "outcome": "passed" + }, + "teardown": { + "duration": 0.0011781250359490514, + "outcome": "passed" + } + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_basic[input_output0-Llama-3.3-8B-Instruct]", + "lineno": 40, + "outcome": "skipped", + "keywords": [ + "test_chat_streaming_basic[input_output0-Llama-3.3-8B-Instruct]", + "parametrize", + "pytestmark", + "input_output0-Llama-3.3-8B-Instruct", + "test_chat_completion.py", + "openai", + "verifications", + "tests", + "llama-stack", + "" + ], + "setup": { + "duration": 0.03454475000035018, + "outcome": "skipped", + "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 41, 'Skipped: Provider openai does not support model Llama-3.3-8B-Instruct')" + }, + "teardown": { + "duration": 0.000967124942690134, + "outcome": "passed" + } + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_basic[input_output0-Llama-3.3-70B-Instruct]", + "lineno": 40, + "outcome": "skipped", + "keywords": [ + "test_chat_streaming_basic[input_output0-Llama-3.3-70B-Instruct]", + "parametrize", + "pytestmark", + "input_output0-Llama-3.3-70B-Instruct", + "test_chat_completion.py", + "openai", + "verifications", + "tests", + "llama-stack", + "" + ], + "setup": { + "duration": 0.025206666090525687, + "outcome": "skipped", + "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 41, 'Skipped: Provider openai does not support model Llama-3.3-70B-Instruct')" + }, + "teardown": { + "duration": 0.000189624959602952, + "outcome": "passed" + } + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_basic[input_output0-Llama-4-Scout-17B-16E]", + "lineno": 40, + "outcome": "skipped", + "keywords": [ + "test_chat_streaming_basic[input_output0-Llama-4-Scout-17B-16E]", + "parametrize", + "pytestmark", + "input_output0-Llama-4-Scout-17B-16E", + "test_chat_completion.py", + "openai", + "verifications", + "tests", + "llama-stack", + "" + ], + "setup": { + "duration": 0.014331333106383681, + "outcome": "skipped", + "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 41, 'Skipped: Provider openai does not support model Llama-4-Scout-17B-16E')" + }, + "teardown": { + "duration": 0.00023133307695388794, + "outcome": "passed" + } + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_basic[input_output0-Llama-4-Scout-17B-16E-Instruct]", + "lineno": 40, + "outcome": "skipped", + "keywords": [ + "test_chat_streaming_basic[input_output0-Llama-4-Scout-17B-16E-Instruct]", + "parametrize", + "pytestmark", + "input_output0-Llama-4-Scout-17B-16E-Instruct", + "test_chat_completion.py", + "openai", + "verifications", + "tests", + "llama-stack", + "" + ], + "setup": { + "duration": 0.009339665994048119, + "outcome": "skipped", + "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 41, 'Skipped: Provider openai does not support model Llama-4-Scout-17B-16E-Instruct')" + }, + "teardown": { + "duration": 0.00020329200197011232, + "outcome": "passed" + } + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_basic[input_output0-Llama-4-Maverick-17B-128E]", + "lineno": 40, + "outcome": "skipped", + "keywords": [ + "test_chat_streaming_basic[input_output0-Llama-4-Maverick-17B-128E]", + "parametrize", + "pytestmark", + "input_output0-Llama-4-Maverick-17B-128E", + "test_chat_completion.py", + "openai", + "verifications", + "tests", + "llama-stack", + "" + ], + "setup": { + "duration": 0.010387042071670294, + "outcome": "skipped", + "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 41, 'Skipped: Provider openai does not support model Llama-4-Maverick-17B-128E')" + }, + "teardown": { + "duration": 0.00018254201859235764, + "outcome": "passed" + } + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_basic[input_output0-Llama-4-Maverick-17B-128E-Instruct]", + "lineno": 40, + "outcome": "skipped", + "keywords": [ + "test_chat_streaming_basic[input_output0-Llama-4-Maverick-17B-128E-Instruct]", + "parametrize", + "pytestmark", + "input_output0-Llama-4-Maverick-17B-128E-Instruct", + "test_chat_completion.py", + "openai", + "verifications", + "tests", + "llama-stack", + "" + ], + "setup": { + "duration": 0.012297999928705394, + "outcome": "skipped", + "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 41, 'Skipped: Provider openai does not support model Llama-4-Maverick-17B-128E-Instruct')" + }, + "teardown": { + "duration": 0.00018662505317479372, + "outcome": "passed" + } + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_basic[input_output0-gpt-4o]", + "lineno": 40, + "outcome": "passed", + "keywords": [ + "test_chat_streaming_basic[input_output0-gpt-4o]", + "parametrize", + "pytestmark", + "input_output0-gpt-4o", + "test_chat_completion.py", + "openai", + "verifications", + "tests", + "llama-stack", + "" + ], + "setup": { + "duration": 0.006984042003750801, + "outcome": "passed" + }, + "call": { + "duration": 0.32529433304443955, + "outcome": "passed" + }, + "teardown": { + "duration": 0.0033042499562725425, + "outcome": "passed" + } + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_basic[input_output0-gpt-4o-mini]", + "lineno": 40, + "outcome": "passed", + "keywords": [ + "test_chat_streaming_basic[input_output0-gpt-4o-mini]", + "parametrize", + "pytestmark", + "input_output0-gpt-4o-mini", + "test_chat_completion.py", + "openai", + "verifications", + "tests", + "llama-stack", + "" + ], + "setup": { + "duration": 0.01832079200539738, + "outcome": "passed" + }, + "call": { + "duration": 0.48440287495031953, + "outcome": "passed" + }, + "teardown": { + "duration": 0.00047233293298631907, + "outcome": "passed" + } + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_basic[input_output1-Llama-3.3-8B-Instruct]", + "lineno": 40, + "outcome": "skipped", + "keywords": [ + "test_chat_streaming_basic[input_output1-Llama-3.3-8B-Instruct]", + "parametrize", + "pytestmark", + "input_output1-Llama-3.3-8B-Instruct", + "test_chat_completion.py", + "openai", + "verifications", + "tests", + "llama-stack", + "" + ], + "setup": { + "duration": 0.02893691696226597, + "outcome": "skipped", + "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 41, 'Skipped: Provider openai does not support model Llama-3.3-8B-Instruct')" + }, + "teardown": { + "duration": 0.0001747499918565154, + "outcome": "passed" + } + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_basic[input_output1-Llama-3.3-70B-Instruct]", + "lineno": 40, + "outcome": "skipped", + "keywords": [ + "test_chat_streaming_basic[input_output1-Llama-3.3-70B-Instruct]", + "parametrize", + "pytestmark", + "input_output1-Llama-3.3-70B-Instruct", + "test_chat_completion.py", + "openai", + "verifications", + "tests", + "llama-stack", + "" + ], + "setup": { + "duration": 0.006553041050210595, + "outcome": "skipped", + "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 41, 'Skipped: Provider openai does not support model Llama-3.3-70B-Instruct')" + }, + "teardown": { + "duration": 0.00016829196829348803, + "outcome": "passed" + } + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_basic[input_output1-Llama-4-Scout-17B-16E]", + "lineno": 40, + "outcome": "skipped", + "keywords": [ + "test_chat_streaming_basic[input_output1-Llama-4-Scout-17B-16E]", + "parametrize", + "pytestmark", + "input_output1-Llama-4-Scout-17B-16E", + "test_chat_completion.py", + "openai", + "verifications", + "tests", + "llama-stack", + "" + ], + "setup": { + "duration": 0.013746666954830289, + "outcome": "skipped", + "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 41, 'Skipped: Provider openai does not support model Llama-4-Scout-17B-16E')" + }, + "teardown": { + "duration": 0.00019237503875046968, + "outcome": "passed" + } + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_basic[input_output1-Llama-4-Scout-17B-16E-Instruct]", + "lineno": 40, + "outcome": "skipped", + "keywords": [ + "test_chat_streaming_basic[input_output1-Llama-4-Scout-17B-16E-Instruct]", + "parametrize", + "pytestmark", + "input_output1-Llama-4-Scout-17B-16E-Instruct", + "test_chat_completion.py", + "openai", + "verifications", + "tests", + "llama-stack", + "" + ], + "setup": { + "duration": 0.007175332983024418, + "outcome": "skipped", + "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 41, 'Skipped: Provider openai does not support model Llama-4-Scout-17B-16E-Instruct')" + }, + "teardown": { + "duration": 0.0001873329747468233, + "outcome": "passed" + } + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_basic[input_output1-Llama-4-Maverick-17B-128E]", + "lineno": 40, + "outcome": "skipped", + "keywords": [ + "test_chat_streaming_basic[input_output1-Llama-4-Maverick-17B-128E]", + "parametrize", + "pytestmark", + "input_output1-Llama-4-Maverick-17B-128E", + "test_chat_completion.py", + "openai", + "verifications", + "tests", + "llama-stack", + "" + ], + "setup": { + "duration": 0.006127291941083968, + "outcome": "skipped", + "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 41, 'Skipped: Provider openai does not support model Llama-4-Maverick-17B-128E')" + }, + "teardown": { + "duration": 0.00019004102796316147, + "outcome": "passed" + } + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_basic[input_output1-Llama-4-Maverick-17B-128E-Instruct]", + "lineno": 40, + "outcome": "skipped", + "keywords": [ + "test_chat_streaming_basic[input_output1-Llama-4-Maverick-17B-128E-Instruct]", + "parametrize", + "pytestmark", + "input_output1-Llama-4-Maverick-17B-128E-Instruct", + "test_chat_completion.py", + "openai", + "verifications", + "tests", + "llama-stack", + "" + ], + "setup": { + "duration": 0.006421791040338576, + "outcome": "skipped", + "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 41, 'Skipped: Provider openai does not support model Llama-4-Maverick-17B-128E-Instruct')" + }, + "teardown": { + "duration": 0.0001611249754205346, + "outcome": "passed" + } + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_basic[input_output1-gpt-4o]", + "lineno": 40, + "outcome": "passed", + "keywords": [ + "test_chat_streaming_basic[input_output1-gpt-4o]", + "parametrize", + "pytestmark", + "input_output1-gpt-4o", + "test_chat_completion.py", + "openai", + "verifications", + "tests", + "llama-stack", + "" + ], + "setup": { + "duration": 0.009806249989196658, + "outcome": "passed" + }, + "call": { + "duration": 0.9556747920578346, + "outcome": "passed" + }, + "teardown": { + "duration": 0.0004937920020893216, + "outcome": "passed" + } + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_basic[input_output1-gpt-4o-mini]", + "lineno": 40, + "outcome": "passed", + "keywords": [ + "test_chat_streaming_basic[input_output1-gpt-4o-mini]", + "parametrize", + "pytestmark", + "input_output1-gpt-4o-mini", + "test_chat_completion.py", + "openai", + "verifications", + "tests", + "llama-stack", + "" + ], + "setup": { + "duration": 0.03146500000730157, + "outcome": "passed" + }, + "call": { + "duration": 1.082494750036858, + "outcome": "passed" + }, + "teardown": { + "duration": 0.0006242080125957727, + "outcome": "passed" + } + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_image[input_output0-Llama-4-Scout-17B-16E]", + "lineno": 60, + "outcome": "skipped", + "keywords": [ + "test_chat_non_streaming_image[input_output0-Llama-4-Scout-17B-16E]", + "parametrize", + "pytestmark", + "input_output0-Llama-4-Scout-17B-16E", + "test_chat_completion.py", + "openai", + "verifications", + "tests", + "llama-stack", + "" + ], + "setup": { + "duration": 0.021534667001105845, + "outcome": "skipped", + "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 61, 'Skipped: Provider openai does not support model Llama-4-Scout-17B-16E')" + }, + "teardown": { + "duration": 0.0003469999646767974, + "outcome": "passed" + } + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_image[input_output0-Llama-4-Scout-17B-16E-Instruct]", + "lineno": 60, + "outcome": "skipped", + "keywords": [ + "test_chat_non_streaming_image[input_output0-Llama-4-Scout-17B-16E-Instruct]", + "parametrize", + "pytestmark", + "input_output0-Llama-4-Scout-17B-16E-Instruct", + "test_chat_completion.py", + "openai", + "verifications", + "tests", + "llama-stack", + "" + ], + "setup": { + "duration": 0.025929750059731305, + "outcome": "skipped", + "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 61, 'Skipped: Provider openai does not support model Llama-4-Scout-17B-16E-Instruct')" + }, + "teardown": { + "duration": 0.0008774169255048037, + "outcome": "passed" + } + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_image[input_output0-Llama-4-Maverick-17B-128E]", + "lineno": 60, + "outcome": "skipped", + "keywords": [ + "test_chat_non_streaming_image[input_output0-Llama-4-Maverick-17B-128E]", + "parametrize", + "pytestmark", + "input_output0-Llama-4-Maverick-17B-128E", + "test_chat_completion.py", + "openai", + "verifications", + "tests", + "llama-stack", + "" + ], + "setup": { + "duration": 0.012507125036790967, + "outcome": "skipped", + "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 61, 'Skipped: Provider openai does not support model Llama-4-Maverick-17B-128E')" + }, + "teardown": { + "duration": 0.00022008304949849844, + "outcome": "passed" + } + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_image[input_output0-Llama-4-Maverick-17B-128E-Instruct]", + "lineno": 60, + "outcome": "skipped", + "keywords": [ + "test_chat_non_streaming_image[input_output0-Llama-4-Maverick-17B-128E-Instruct]", + "parametrize", + "pytestmark", + "input_output0-Llama-4-Maverick-17B-128E-Instruct", + "test_chat_completion.py", + "openai", + "verifications", + "tests", + "llama-stack", + "" + ], + "setup": { + "duration": 0.008156375028192997, + "outcome": "skipped", + "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 61, 'Skipped: Provider openai does not support model Llama-4-Maverick-17B-128E-Instruct')" + }, + "teardown": { + "duration": 0.0002079169498756528, + "outcome": "passed" + } + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_image[input_output0-gpt-4o]", + "lineno": 60, + "outcome": "passed", + "keywords": [ + "test_chat_non_streaming_image[input_output0-gpt-4o]", + "parametrize", + "pytestmark", + "input_output0-gpt-4o", + "test_chat_completion.py", + "openai", + "verifications", + "tests", + "llama-stack", + "" + ], + "setup": { + "duration": 0.012587749981321394, + "outcome": "passed" + }, + "call": { + "duration": 2.7379885419504717, + "outcome": "passed" + }, + "teardown": { + "duration": 0.00044579198583960533, + "outcome": "passed" + } + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_image[input_output0-gpt-4o-mini]", + "lineno": 60, + "outcome": "passed", + "keywords": [ + "test_chat_non_streaming_image[input_output0-gpt-4o-mini]", + "parametrize", + "pytestmark", + "input_output0-gpt-4o-mini", + "test_chat_completion.py", + "openai", + "verifications", + "tests", + "llama-stack", + "" + ], + "setup": { + "duration": 0.017111250082962215, + "outcome": "passed" + }, + "call": { + "duration": 2.599374584038742, + "outcome": "passed" + }, + "teardown": { + "duration": 0.0009177909232676029, + "outcome": "passed" + } + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_image[input_output0-Llama-4-Scout-17B-16E]", + "lineno": 75, + "outcome": "skipped", + "keywords": [ + "test_chat_streaming_image[input_output0-Llama-4-Scout-17B-16E]", + "parametrize", + "pytestmark", + "input_output0-Llama-4-Scout-17B-16E", + "test_chat_completion.py", + "openai", + "verifications", + "tests", + "llama-stack", + "" + ], + "setup": { + "duration": 0.02198700001463294, + "outcome": "skipped", + "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 76, 'Skipped: Provider openai does not support model Llama-4-Scout-17B-16E')" + }, + "teardown": { + "duration": 0.00042749999556690454, + "outcome": "passed" + } + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_image[input_output0-Llama-4-Scout-17B-16E-Instruct]", + "lineno": 75, + "outcome": "skipped", + "keywords": [ + "test_chat_streaming_image[input_output0-Llama-4-Scout-17B-16E-Instruct]", + "parametrize", + "pytestmark", + "input_output0-Llama-4-Scout-17B-16E-Instruct", + "test_chat_completion.py", + "openai", + "verifications", + "tests", + "llama-stack", + "" + ], + "setup": { + "duration": 0.015032917028293014, + "outcome": "skipped", + "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 76, 'Skipped: Provider openai does not support model Llama-4-Scout-17B-16E-Instruct')" + }, + "teardown": { + "duration": 0.00041016703471541405, + "outcome": "passed" + } + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_image[input_output0-Llama-4-Maverick-17B-128E]", + "lineno": 75, + "outcome": "skipped", + "keywords": [ + "test_chat_streaming_image[input_output0-Llama-4-Maverick-17B-128E]", + "parametrize", + "pytestmark", + "input_output0-Llama-4-Maverick-17B-128E", + "test_chat_completion.py", + "openai", + "verifications", + "tests", + "llama-stack", + "" + ], + "setup": { + "duration": 0.013976250076666474, + "outcome": "skipped", + "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 76, 'Skipped: Provider openai does not support model Llama-4-Maverick-17B-128E')" + }, + "teardown": { + "duration": 0.00027600000612437725, + "outcome": "passed" + } + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_image[input_output0-Llama-4-Maverick-17B-128E-Instruct]", + "lineno": 75, + "outcome": "skipped", + "keywords": [ + "test_chat_streaming_image[input_output0-Llama-4-Maverick-17B-128E-Instruct]", + "parametrize", + "pytestmark", + "input_output0-Llama-4-Maverick-17B-128E-Instruct", + "test_chat_completion.py", + "openai", + "verifications", + "tests", + "llama-stack", + "" + ], + "setup": { + "duration": 0.00799729092977941, + "outcome": "skipped", + "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 76, 'Skipped: Provider openai does not support model Llama-4-Maverick-17B-128E-Instruct')" + }, + "teardown": { + "duration": 0.00020320899784564972, + "outcome": "passed" + } + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_image[input_output0-gpt-4o]", + "lineno": 75, + "outcome": "passed", + "keywords": [ + "test_chat_streaming_image[input_output0-gpt-4o]", + "parametrize", + "pytestmark", + "input_output0-gpt-4o", + "test_chat_completion.py", + "openai", + "verifications", + "tests", + "llama-stack", + "" + ], + "setup": { + "duration": 0.010483540943823755, + "outcome": "passed" + }, + "call": { + "duration": 4.249965250026435, + "outcome": "passed" + }, + "teardown": { + "duration": 0.0008596250554546714, + "outcome": "passed" + } + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_image[input_output0-gpt-4o-mini]", + "lineno": 75, + "outcome": "passed", + "keywords": [ + "test_chat_streaming_image[input_output0-gpt-4o-mini]", + "parametrize", + "pytestmark", + "input_output0-gpt-4o-mini", + "test_chat_completion.py", + "openai", + "verifications", + "tests", + "llama-stack", + "" + ], + "setup": { + "duration": 0.018141582957468927, + "outcome": "passed" + }, + "call": { + "duration": 2.297856790944934, + "outcome": "passed" + }, + "teardown": { + "duration": 0.0005075830267742276, + "outcome": "passed" + } + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_structured_output[input_output0-Llama-3.3-8B-Instruct]", + "lineno": 95, + "outcome": "skipped", + "keywords": [ + "test_chat_non_streaming_structured_output[input_output0-Llama-3.3-8B-Instruct]", + "parametrize", + "pytestmark", + "input_output0-Llama-3.3-8B-Instruct", + "test_chat_completion.py", + "openai", + "verifications", + "tests", + "llama-stack", + "" + ], + "setup": { + "duration": 0.017144332989118993, + "outcome": "skipped", + "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 96, 'Skipped: Provider openai does not support model Llama-3.3-8B-Instruct')" + }, + "teardown": { + "duration": 0.0006829580524936318, + "outcome": "passed" + } + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_structured_output[input_output0-Llama-3.3-70B-Instruct]", + "lineno": 95, + "outcome": "skipped", + "keywords": [ + "test_chat_non_streaming_structured_output[input_output0-Llama-3.3-70B-Instruct]", + "parametrize", + "pytestmark", + "input_output0-Llama-3.3-70B-Instruct", + "test_chat_completion.py", + "openai", + "verifications", + "tests", + "llama-stack", + "" + ], + "setup": { + "duration": 0.009827250032685697, + "outcome": "skipped", + "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 96, 'Skipped: Provider openai does not support model Llama-3.3-70B-Instruct')" + }, + "teardown": { + "duration": 0.00024204188957810402, + "outcome": "passed" + } + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_structured_output[input_output0-Llama-4-Scout-17B-16E]", + "lineno": 95, + "outcome": "skipped", + "keywords": [ + "test_chat_non_streaming_structured_output[input_output0-Llama-4-Scout-17B-16E]", + "parametrize", + "pytestmark", + "input_output0-Llama-4-Scout-17B-16E", + "test_chat_completion.py", + "openai", + "verifications", + "tests", + "llama-stack", + "" + ], + "setup": { + "duration": 0.006737958989106119, + "outcome": "skipped", + "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 96, 'Skipped: Provider openai does not support model Llama-4-Scout-17B-16E')" + }, + "teardown": { + "duration": 0.00022729102056473494, + "outcome": "passed" + } + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_structured_output[input_output0-Llama-4-Scout-17B-16E-Instruct]", + "lineno": 95, + "outcome": "skipped", + "keywords": [ + "test_chat_non_streaming_structured_output[input_output0-Llama-4-Scout-17B-16E-Instruct]", + "parametrize", + "pytestmark", + "input_output0-Llama-4-Scout-17B-16E-Instruct", + "test_chat_completion.py", + "openai", + "verifications", + "tests", + "llama-stack", + "" + ], + "setup": { + "duration": 0.006030917051248252, + "outcome": "skipped", + "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 96, 'Skipped: Provider openai does not support model Llama-4-Scout-17B-16E-Instruct')" + }, + "teardown": { + "duration": 0.00022229203023016453, + "outcome": "passed" + } + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_structured_output[input_output0-Llama-4-Maverick-17B-128E]", + "lineno": 95, + "outcome": "skipped", + "keywords": [ + "test_chat_non_streaming_structured_output[input_output0-Llama-4-Maverick-17B-128E]", + "parametrize", + "pytestmark", + "input_output0-Llama-4-Maverick-17B-128E", + "test_chat_completion.py", + "openai", + "verifications", + "tests", + "llama-stack", + "" + ], + "setup": { + "duration": 0.009183833957649767, + "outcome": "skipped", + "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 96, 'Skipped: Provider openai does not support model Llama-4-Maverick-17B-128E')" + }, + "teardown": { + "duration": 0.00022629194427281618, + "outcome": "passed" + } + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_structured_output[input_output0-Llama-4-Maverick-17B-128E-Instruct]", + "lineno": 95, + "outcome": "skipped", + "keywords": [ + "test_chat_non_streaming_structured_output[input_output0-Llama-4-Maverick-17B-128E-Instruct]", + "parametrize", + "pytestmark", + "input_output0-Llama-4-Maverick-17B-128E-Instruct", + "test_chat_completion.py", + "openai", + "verifications", + "tests", + "llama-stack", + "" + ], + "setup": { + "duration": 0.007097500027157366, + "outcome": "skipped", + "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 96, 'Skipped: Provider openai does not support model Llama-4-Maverick-17B-128E-Instruct')" + }, + "teardown": { + "duration": 0.00826825003605336, + "outcome": "passed" + } + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_structured_output[input_output0-gpt-4o]", + "lineno": 95, + "outcome": "passed", + "keywords": [ + "test_chat_non_streaming_structured_output[input_output0-gpt-4o]", + "parametrize", + "pytestmark", + "input_output0-gpt-4o", + "test_chat_completion.py", + "openai", + "verifications", + "tests", + "llama-stack", + "" + ], + "setup": { + "duration": 0.006604874972254038, + "outcome": "passed" + }, + "call": { + "duration": 1.4057738750707358, + "outcome": "passed" + }, + "teardown": { + "duration": 0.000506040989421308, + "outcome": "passed" + } + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_structured_output[input_output0-gpt-4o-mini]", + "lineno": 95, + "outcome": "passed", + "keywords": [ + "test_chat_non_streaming_structured_output[input_output0-gpt-4o-mini]", + "parametrize", + "pytestmark", + "input_output0-gpt-4o-mini", + "test_chat_completion.py", + "openai", + "verifications", + "tests", + "llama-stack", + "" + ], + "setup": { + "duration": 0.015966624952852726, + "outcome": "passed" + }, + "call": { + "duration": 0.540478374925442, + "outcome": "passed" + }, + "teardown": { + "duration": 0.0009536249563097954, + "outcome": "passed" + } + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_structured_output[input_output1-Llama-3.3-8B-Instruct]", + "lineno": 95, + "outcome": "skipped", + "keywords": [ + "test_chat_non_streaming_structured_output[input_output1-Llama-3.3-8B-Instruct]", + "parametrize", + "pytestmark", + "input_output1-Llama-3.3-8B-Instruct", + "test_chat_completion.py", + "openai", + "verifications", + "tests", + "llama-stack", + "" + ], + "setup": { + "duration": 0.020631707971915603, + "outcome": "skipped", + "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 96, 'Skipped: Provider openai does not support model Llama-3.3-8B-Instruct')" + }, + "teardown": { + "duration": 0.0004928340204060078, + "outcome": "passed" + } + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_structured_output[input_output1-Llama-3.3-70B-Instruct]", + "lineno": 95, + "outcome": "skipped", + "keywords": [ + "test_chat_non_streaming_structured_output[input_output1-Llama-3.3-70B-Instruct]", + "parametrize", + "pytestmark", + "input_output1-Llama-3.3-70B-Instruct", + "test_chat_completion.py", + "openai", + "verifications", + "tests", + "llama-stack", + "" + ], + "setup": { + "duration": 0.016745459055528045, + "outcome": "skipped", + "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 96, 'Skipped: Provider openai does not support model Llama-3.3-70B-Instruct')" + }, + "teardown": { + "duration": 0.0003412909572944045, + "outcome": "passed" + } + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_structured_output[input_output1-Llama-4-Scout-17B-16E]", + "lineno": 95, + "outcome": "skipped", + "keywords": [ + "test_chat_non_streaming_structured_output[input_output1-Llama-4-Scout-17B-16E]", + "parametrize", + "pytestmark", + "input_output1-Llama-4-Scout-17B-16E", + "test_chat_completion.py", + "openai", + "verifications", + "tests", + "llama-stack", + "" + ], + "setup": { + "duration": 0.012252667103894055, + "outcome": "skipped", + "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 96, 'Skipped: Provider openai does not support model Llama-4-Scout-17B-16E')" + }, + "teardown": { + "duration": 0.00028650008607655764, + "outcome": "passed" + } + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_structured_output[input_output1-Llama-4-Scout-17B-16E-Instruct]", + "lineno": 95, + "outcome": "skipped", + "keywords": [ + "test_chat_non_streaming_structured_output[input_output1-Llama-4-Scout-17B-16E-Instruct]", + "parametrize", + "pytestmark", + "input_output1-Llama-4-Scout-17B-16E-Instruct", + "test_chat_completion.py", + "openai", + "verifications", + "tests", + "llama-stack", + "" + ], + "setup": { + "duration": 0.01128904102370143, + "outcome": "skipped", + "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 96, 'Skipped: Provider openai does not support model Llama-4-Scout-17B-16E-Instruct')" + }, + "teardown": { + "duration": 0.00027041707653552294, + "outcome": "passed" + } + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_structured_output[input_output1-Llama-4-Maverick-17B-128E]", + "lineno": 95, + "outcome": "skipped", + "keywords": [ + "test_chat_non_streaming_structured_output[input_output1-Llama-4-Maverick-17B-128E]", + "parametrize", + "pytestmark", + "input_output1-Llama-4-Maverick-17B-128E", + "test_chat_completion.py", + "openai", + "verifications", + "tests", + "llama-stack", + "" + ], + "setup": { + "duration": 0.009191332967020571, + "outcome": "skipped", + "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 96, 'Skipped: Provider openai does not support model Llama-4-Maverick-17B-128E')" + }, + "teardown": { + "duration": 0.0002074999501928687, + "outcome": "passed" + } + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_structured_output[input_output1-Llama-4-Maverick-17B-128E-Instruct]", + "lineno": 95, + "outcome": "skipped", + "keywords": [ + "test_chat_non_streaming_structured_output[input_output1-Llama-4-Maverick-17B-128E-Instruct]", + "parametrize", + "pytestmark", + "input_output1-Llama-4-Maverick-17B-128E-Instruct", + "test_chat_completion.py", + "openai", + "verifications", + "tests", + "llama-stack", + "" + ], + "setup": { + "duration": 0.007687666919082403, + "outcome": "skipped", + "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 96, 'Skipped: Provider openai does not support model Llama-4-Maverick-17B-128E-Instruct')" + }, + "teardown": { + "duration": 0.0002027079463005066, + "outcome": "passed" + } + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_structured_output[input_output1-gpt-4o]", + "lineno": 95, + "outcome": "passed", + "keywords": [ + "test_chat_non_streaming_structured_output[input_output1-gpt-4o]", + "parametrize", + "pytestmark", + "input_output1-gpt-4o", + "test_chat_completion.py", + "openai", + "verifications", + "tests", + "llama-stack", + "" + ], + "setup": { + "duration": 0.007542708073742688, + "outcome": "passed" + }, + "call": { + "duration": 4.244797708000988, + "outcome": "passed" + }, + "teardown": { + "duration": 0.0012778330128639936, + "outcome": "passed" + } + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_structured_output[input_output1-gpt-4o-mini]", + "lineno": 95, + "outcome": "passed", + "keywords": [ + "test_chat_non_streaming_structured_output[input_output1-gpt-4o-mini]", + "parametrize", + "pytestmark", + "input_output1-gpt-4o-mini", + "test_chat_completion.py", + "openai", + "verifications", + "tests", + "llama-stack", + "" + ], + "setup": { + "duration": 0.026919999974779785, + "outcome": "passed" + }, + "call": { + "duration": 9.006108874920756, + "outcome": "passed" + }, + "teardown": { + "duration": 0.00046324997674673796, + "outcome": "passed" + } + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_structured_output[input_output0-Llama-3.3-8B-Instruct]", + "lineno": 117, + "outcome": "skipped", + "keywords": [ + "test_chat_streaming_structured_output[input_output0-Llama-3.3-8B-Instruct]", + "parametrize", + "pytestmark", + "input_output0-Llama-3.3-8B-Instruct", + "test_chat_completion.py", + "openai", + "verifications", + "tests", + "llama-stack", + "" + ], + "setup": { + "duration": 0.01554666692391038, + "outcome": "skipped", + "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 118, 'Skipped: Provider openai does not support model Llama-3.3-8B-Instruct')" + }, + "teardown": { + "duration": 0.0004023330984637141, + "outcome": "passed" + } + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_structured_output[input_output0-Llama-3.3-70B-Instruct]", + "lineno": 117, + "outcome": "skipped", + "keywords": [ + "test_chat_streaming_structured_output[input_output0-Llama-3.3-70B-Instruct]", + "parametrize", + "pytestmark", + "input_output0-Llama-3.3-70B-Instruct", + "test_chat_completion.py", + "openai", + "verifications", + "tests", + "llama-stack", + "" + ], + "setup": { + "duration": 0.007354958914220333, + "outcome": "skipped", + "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 118, 'Skipped: Provider openai does not support model Llama-3.3-70B-Instruct')" + }, + "teardown": { + "duration": 0.0002900830004364252, + "outcome": "passed" + } + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_structured_output[input_output0-Llama-4-Scout-17B-16E]", + "lineno": 117, + "outcome": "skipped", + "keywords": [ + "test_chat_streaming_structured_output[input_output0-Llama-4-Scout-17B-16E]", + "parametrize", + "pytestmark", + "input_output0-Llama-4-Scout-17B-16E", + "test_chat_completion.py", + "openai", + "verifications", + "tests", + "llama-stack", + "" + ], + "setup": { + "duration": 0.017274250043556094, + "outcome": "skipped", + "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 118, 'Skipped: Provider openai does not support model Llama-4-Scout-17B-16E')" + }, + "teardown": { + "duration": 0.0002668329980224371, + "outcome": "passed" + } + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_structured_output[input_output0-Llama-4-Scout-17B-16E-Instruct]", + "lineno": 117, + "outcome": "skipped", + "keywords": [ + "test_chat_streaming_structured_output[input_output0-Llama-4-Scout-17B-16E-Instruct]", + "parametrize", + "pytestmark", + "input_output0-Llama-4-Scout-17B-16E-Instruct", + "test_chat_completion.py", + "openai", + "verifications", + "tests", + "llama-stack", + "" + ], + "setup": { + "duration": 0.006813667016103864, + "outcome": "skipped", + "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 118, 'Skipped: Provider openai does not support model Llama-4-Scout-17B-16E-Instruct')" + }, + "teardown": { + "duration": 0.00024500000290572643, + "outcome": "passed" + } + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_structured_output[input_output0-Llama-4-Maverick-17B-128E]", + "lineno": 117, + "outcome": "skipped", + "keywords": [ + "test_chat_streaming_structured_output[input_output0-Llama-4-Maverick-17B-128E]", + "parametrize", + "pytestmark", + "input_output0-Llama-4-Maverick-17B-128E", + "test_chat_completion.py", + "openai", + "verifications", + "tests", + "llama-stack", + "" + ], + "setup": { + "duration": 0.007385291974060237, + "outcome": "skipped", + "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 118, 'Skipped: Provider openai does not support model Llama-4-Maverick-17B-128E')" + }, + "teardown": { + "duration": 0.00017024995759129524, + "outcome": "passed" + } + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_structured_output[input_output0-Llama-4-Maverick-17B-128E-Instruct]", + "lineno": 117, + "outcome": "skipped", + "keywords": [ + "test_chat_streaming_structured_output[input_output0-Llama-4-Maverick-17B-128E-Instruct]", + "parametrize", + "pytestmark", + "input_output0-Llama-4-Maverick-17B-128E-Instruct", + "test_chat_completion.py", + "openai", + "verifications", + "tests", + "llama-stack", + "" + ], + "setup": { + "duration": 0.00857366609852761, + "outcome": "skipped", + "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 118, 'Skipped: Provider openai does not support model Llama-4-Maverick-17B-128E-Instruct')" + }, + "teardown": { + "duration": 0.00016850000247359276, + "outcome": "passed" + } + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_structured_output[input_output0-gpt-4o]", + "lineno": 117, + "outcome": "passed", + "keywords": [ + "test_chat_streaming_structured_output[input_output0-gpt-4o]", + "parametrize", + "pytestmark", + "input_output0-gpt-4o", + "test_chat_completion.py", + "openai", + "verifications", + "tests", + "llama-stack", + "" + ], + "setup": { + "duration": 0.005570041947066784, + "outcome": "passed" + }, + "call": { + "duration": 0.8564215000951663, + "outcome": "passed" + }, + "teardown": { + "duration": 0.0004029169213026762, + "outcome": "passed" + } + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_structured_output[input_output0-gpt-4o-mini]", + "lineno": 117, + "outcome": "passed", + "keywords": [ + "test_chat_streaming_structured_output[input_output0-gpt-4o-mini]", + "parametrize", + "pytestmark", + "input_output0-gpt-4o-mini", + "test_chat_completion.py", + "openai", + "verifications", + "tests", + "llama-stack", + "" + ], + "setup": { + "duration": 0.00786762498319149, + "outcome": "passed" + }, + "call": { + "duration": 0.6419672920601442, + "outcome": "passed" + }, + "teardown": { + "duration": 0.0005102079594507813, + "outcome": "passed" + } + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_structured_output[input_output1-Llama-3.3-8B-Instruct]", + "lineno": 117, + "outcome": "skipped", + "keywords": [ + "test_chat_streaming_structured_output[input_output1-Llama-3.3-8B-Instruct]", + "parametrize", + "pytestmark", + "input_output1-Llama-3.3-8B-Instruct", + "test_chat_completion.py", + "openai", + "verifications", + "tests", + "llama-stack", + "" + ], + "setup": { + "duration": 0.017147499951533973, + "outcome": "skipped", + "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 118, 'Skipped: Provider openai does not support model Llama-3.3-8B-Instruct')" + }, + "teardown": { + "duration": 0.00032350001856684685, + "outcome": "passed" + } + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_structured_output[input_output1-Llama-3.3-70B-Instruct]", + "lineno": 117, + "outcome": "skipped", + "keywords": [ + "test_chat_streaming_structured_output[input_output1-Llama-3.3-70B-Instruct]", + "parametrize", + "pytestmark", + "input_output1-Llama-3.3-70B-Instruct", + "test_chat_completion.py", + "openai", + "verifications", + "tests", + "llama-stack", + "" + ], + "setup": { + "duration": 0.01194737502373755, + "outcome": "skipped", + "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 118, 'Skipped: Provider openai does not support model Llama-3.3-70B-Instruct')" + }, + "teardown": { + "duration": 0.0005004579434171319, + "outcome": "passed" + } + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_structured_output[input_output1-Llama-4-Scout-17B-16E]", + "lineno": 117, + "outcome": "skipped", + "keywords": [ + "test_chat_streaming_structured_output[input_output1-Llama-4-Scout-17B-16E]", + "parametrize", + "pytestmark", + "input_output1-Llama-4-Scout-17B-16E", + "test_chat_completion.py", + "openai", + "verifications", + "tests", + "llama-stack", + "" + ], + "setup": { + "duration": 0.010250666993670166, + "outcome": "skipped", + "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 118, 'Skipped: Provider openai does not support model Llama-4-Scout-17B-16E')" + }, + "teardown": { + "duration": 0.00022554199676960707, + "outcome": "passed" + } + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_structured_output[input_output1-Llama-4-Scout-17B-16E-Instruct]", + "lineno": 117, + "outcome": "skipped", + "keywords": [ + "test_chat_streaming_structured_output[input_output1-Llama-4-Scout-17B-16E-Instruct]", + "parametrize", + "pytestmark", + "input_output1-Llama-4-Scout-17B-16E-Instruct", + "test_chat_completion.py", + "openai", + "verifications", + "tests", + "llama-stack", + "" + ], + "setup": { + "duration": 0.007847042055800557, + "outcome": "skipped", + "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 118, 'Skipped: Provider openai does not support model Llama-4-Scout-17B-16E-Instruct')" + }, + "teardown": { + "duration": 0.000283458037301898, + "outcome": "passed" + } + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_structured_output[input_output1-Llama-4-Maverick-17B-128E]", + "lineno": 117, + "outcome": "skipped", + "keywords": [ + "test_chat_streaming_structured_output[input_output1-Llama-4-Maverick-17B-128E]", + "parametrize", + "pytestmark", + "input_output1-Llama-4-Maverick-17B-128E", + "test_chat_completion.py", + "openai", + "verifications", + "tests", + "llama-stack", + "" + ], + "setup": { + "duration": 0.008078000042587519, + "outcome": "skipped", + "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 118, 'Skipped: Provider openai does not support model Llama-4-Maverick-17B-128E')" + }, + "teardown": { + "duration": 0.0001794169656932354, + "outcome": "passed" + } + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_structured_output[input_output1-Llama-4-Maverick-17B-128E-Instruct]", + "lineno": 117, + "outcome": "skipped", + "keywords": [ + "test_chat_streaming_structured_output[input_output1-Llama-4-Maverick-17B-128E-Instruct]", + "parametrize", + "pytestmark", + "input_output1-Llama-4-Maverick-17B-128E-Instruct", + "test_chat_completion.py", + "openai", + "verifications", + "tests", + "llama-stack", + "" + ], + "setup": { + "duration": 0.007204750087112188, + "outcome": "skipped", + "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 118, 'Skipped: Provider openai does not support model Llama-4-Maverick-17B-128E-Instruct')" + }, + "teardown": { + "duration": 0.00017725001089274883, + "outcome": "passed" + } + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_structured_output[input_output1-gpt-4o]", + "lineno": 117, + "outcome": "passed", + "keywords": [ + "test_chat_streaming_structured_output[input_output1-gpt-4o]", + "parametrize", + "pytestmark", + "input_output1-gpt-4o", + "test_chat_completion.py", + "openai", + "verifications", + "tests", + "llama-stack", + "" + ], + "setup": { + "duration": 0.006797667010687292, + "outcome": "passed" + }, + "call": { + "duration": 5.411579457926564, + "outcome": "passed" + }, + "teardown": { + "duration": 0.001134666963480413, + "outcome": "passed" + } + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_structured_output[input_output1-gpt-4o-mini]", + "lineno": 117, + "outcome": "passed", + "keywords": [ + "test_chat_streaming_structured_output[input_output1-gpt-4o-mini]", + "parametrize", + "pytestmark", + "input_output1-gpt-4o-mini", + "test_chat_completion.py", + "openai", + "verifications", + "tests", + "llama-stack", + "" + ], + "setup": { + "duration": 0.025059624924324453, + "outcome": "passed" + }, + "call": { + "duration": 9.112342999898829, + "outcome": "passed" + }, + "teardown": { + "duration": 0.0009202499641105533, + "outcome": "passed" + } + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_tool_calling[input_output0-Llama-3.3-70B-Instruct]", + "lineno": 138, + "outcome": "skipped", + "keywords": [ + "test_chat_non_streaming_tool_calling[input_output0-Llama-3.3-70B-Instruct]", + "parametrize", + "pytestmark", + "input_output0-Llama-3.3-70B-Instruct", + "test_chat_completion.py", + "openai", + "verifications", + "tests", + "llama-stack", + "" + ], + "setup": { + "duration": 0.024287916952744126, + "outcome": "skipped", + "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 139, 'Skipped: Provider openai does not support model Llama-3.3-70B-Instruct')" + }, + "teardown": { + "duration": 0.00015587499365210533, + "outcome": "passed" + } + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_tool_calling[input_output0-Llama-4-Scout-17B-16E]", + "lineno": 138, + "outcome": "skipped", + "keywords": [ + "test_chat_non_streaming_tool_calling[input_output0-Llama-4-Scout-17B-16E]", + "parametrize", + "pytestmark", + "input_output0-Llama-4-Scout-17B-16E", + "test_chat_completion.py", + "openai", + "verifications", + "tests", + "llama-stack", + "" + ], + "setup": { + "duration": 0.006531457998789847, + "outcome": "skipped", + "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 139, 'Skipped: Provider openai does not support model Llama-4-Scout-17B-16E')" + }, + "teardown": { + "duration": 0.00014670798555016518, + "outcome": "passed" + } + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_tool_calling[input_output0-Llama-4-Scout-17B-16E-Instruct]", + "lineno": 138, + "outcome": "skipped", + "keywords": [ + "test_chat_non_streaming_tool_calling[input_output0-Llama-4-Scout-17B-16E-Instruct]", + "parametrize", + "pytestmark", + "input_output0-Llama-4-Scout-17B-16E-Instruct", + "test_chat_completion.py", + "openai", + "verifications", + "tests", + "llama-stack", + "" + ], + "setup": { + "duration": 0.006190375075675547, + "outcome": "skipped", + "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 139, 'Skipped: Provider openai does not support model Llama-4-Scout-17B-16E-Instruct')" + }, + "teardown": { + "duration": 0.0001603750279173255, + "outcome": "passed" + } + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_tool_calling[input_output0-Llama-4-Maverick-17B-128E]", + "lineno": 138, + "outcome": "skipped", + "keywords": [ + "test_chat_non_streaming_tool_calling[input_output0-Llama-4-Maverick-17B-128E]", + "parametrize", + "pytestmark", + "input_output0-Llama-4-Maverick-17B-128E", + "test_chat_completion.py", + "openai", + "verifications", + "tests", + "llama-stack", + "" + ], + "setup": { + "duration": 0.005670750048011541, + "outcome": "skipped", + "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 139, 'Skipped: Provider openai does not support model Llama-4-Maverick-17B-128E')" + }, + "teardown": { + "duration": 0.0001479999627918005, + "outcome": "passed" + } + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_tool_calling[input_output0-Llama-4-Maverick-17B-128E-Instruct]", + "lineno": 138, + "outcome": "skipped", + "keywords": [ + "test_chat_non_streaming_tool_calling[input_output0-Llama-4-Maverick-17B-128E-Instruct]", + "parametrize", + "pytestmark", + "input_output0-Llama-4-Maverick-17B-128E-Instruct", + "test_chat_completion.py", + "openai", + "verifications", + "tests", + "llama-stack", + "" + ], + "setup": { + "duration": 0.005662833107635379, + "outcome": "skipped", + "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 139, 'Skipped: Provider openai does not support model Llama-4-Maverick-17B-128E-Instruct')" + }, + "teardown": { + "duration": 0.0001480829669162631, + "outcome": "passed" + } + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_tool_calling[input_output0-gpt-4o]", + "lineno": 138, + "outcome": "passed", + "keywords": [ + "test_chat_non_streaming_tool_calling[input_output0-gpt-4o]", + "parametrize", + "pytestmark", + "input_output0-gpt-4o", + "test_chat_completion.py", + "openai", + "verifications", + "tests", + "llama-stack", + "" + ], + "setup": { + "duration": 0.00573637499473989, + "outcome": "passed" + }, + "call": { + "duration": 0.6269576249178499, + "outcome": "passed" + }, + "teardown": { + "duration": 0.0010142088867723942, + "outcome": "passed" + } + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_tool_calling[input_output0-gpt-4o-mini]", + "lineno": 138, + "outcome": "passed", + "keywords": [ + "test_chat_non_streaming_tool_calling[input_output0-gpt-4o-mini]", + "parametrize", + "pytestmark", + "input_output0-gpt-4o-mini", + "test_chat_completion.py", + "openai", + "verifications", + "tests", + "llama-stack", + "" + ], + "setup": { + "duration": 0.01623620803002268, + "outcome": "passed" + }, + "call": { + "duration": 0.7144521250156686, + "outcome": "passed" + }, + "teardown": { + "duration": 0.0011040839599445462, + "outcome": "passed" + } + } + ] +} diff --git a/tests/verifications/test_results/together_1744154399.json b/tests/verifications/test_results/together_1744154399.json new file mode 100644 index 000000000..ae801e83b --- /dev/null +++ b/tests/verifications/test_results/together_1744154399.json @@ -0,0 +1,2830 @@ +{ + "created": 1744154470.9868789, + "duration": 59.6187219619751, + "exitcode": 1, + "root": "/Users/erichuang/projects/llama-stack", + "environment": {}, + "summary": { + "skipped": 52, + "passed": 21, + "failed": 10, + "total": 83, + "collected": 83 + }, + "collectors": [ + { + "nodeid": "", + "outcome": "passed", + "result": [ + { + "nodeid": "tests/verifications/openai/test_chat_completion.py", + "type": "Module" + } + ] + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py", + "outcome": "passed", + "result": [ + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_basic[input_output0-Llama-3.3-8B-Instruct]", + "type": "Function", + "lineno": 25 + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_basic[input_output0-Llama-3.3-70B-Instruct]", + "type": "Function", + "lineno": 25 + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_basic[input_output0-Llama-4-Scout-17B-16E]", + "type": "Function", + "lineno": 25 + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_basic[input_output0-Llama-4-Scout-17B-16E-Instruct]", + "type": "Function", + "lineno": 25 + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_basic[input_output0-Llama-4-Maverick-17B-128E]", + "type": "Function", + "lineno": 25 + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_basic[input_output0-Llama-4-Maverick-17B-128E-Instruct]", + "type": "Function", + "lineno": 25 + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_basic[input_output0-gpt-4o]", + "type": "Function", + "lineno": 25 + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_basic[input_output0-gpt-4o-mini]", + "type": "Function", + "lineno": 25 + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_basic[input_output1-Llama-3.3-8B-Instruct]", + "type": "Function", + "lineno": 25 + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_basic[input_output1-Llama-3.3-70B-Instruct]", + "type": "Function", + "lineno": 25 + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_basic[input_output1-Llama-4-Scout-17B-16E]", + "type": "Function", + "lineno": 25 + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_basic[input_output1-Llama-4-Scout-17B-16E-Instruct]", + "type": "Function", + "lineno": 25 + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_basic[input_output1-Llama-4-Maverick-17B-128E]", + "type": "Function", + "lineno": 25 + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_basic[input_output1-Llama-4-Maverick-17B-128E-Instruct]", + "type": "Function", + "lineno": 25 + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_basic[input_output1-gpt-4o]", + "type": "Function", + "lineno": 25 + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_basic[input_output1-gpt-4o-mini]", + "type": "Function", + "lineno": 25 + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_basic[input_output0-Llama-3.3-8B-Instruct]", + "type": "Function", + "lineno": 40 + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_basic[input_output0-Llama-3.3-70B-Instruct]", + "type": "Function", + "lineno": 40 + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_basic[input_output0-Llama-4-Scout-17B-16E]", + "type": "Function", + "lineno": 40 + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_basic[input_output0-Llama-4-Scout-17B-16E-Instruct]", + "type": "Function", + "lineno": 40 + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_basic[input_output0-Llama-4-Maverick-17B-128E]", + "type": "Function", + "lineno": 40 + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_basic[input_output0-Llama-4-Maverick-17B-128E-Instruct]", + "type": "Function", + "lineno": 40 + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_basic[input_output0-gpt-4o]", + "type": "Function", + "lineno": 40 + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_basic[input_output0-gpt-4o-mini]", + "type": "Function", + "lineno": 40 + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_basic[input_output1-Llama-3.3-8B-Instruct]", + "type": "Function", + "lineno": 40 + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_basic[input_output1-Llama-3.3-70B-Instruct]", + "type": "Function", + "lineno": 40 + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_basic[input_output1-Llama-4-Scout-17B-16E]", + "type": "Function", + "lineno": 40 + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_basic[input_output1-Llama-4-Scout-17B-16E-Instruct]", + "type": "Function", + "lineno": 40 + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_basic[input_output1-Llama-4-Maverick-17B-128E]", + "type": "Function", + "lineno": 40 + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_basic[input_output1-Llama-4-Maverick-17B-128E-Instruct]", + "type": "Function", + "lineno": 40 + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_basic[input_output1-gpt-4o]", + "type": "Function", + "lineno": 40 + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_basic[input_output1-gpt-4o-mini]", + "type": "Function", + "lineno": 40 + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_image[input_output0-Llama-4-Scout-17B-16E]", + "type": "Function", + "lineno": 60 + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_image[input_output0-Llama-4-Scout-17B-16E-Instruct]", + "type": "Function", + "lineno": 60 + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_image[input_output0-Llama-4-Maverick-17B-128E]", + "type": "Function", + "lineno": 60 + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_image[input_output0-Llama-4-Maverick-17B-128E-Instruct]", + "type": "Function", + "lineno": 60 + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_image[input_output0-gpt-4o]", + "type": "Function", + "lineno": 60 + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_image[input_output0-gpt-4o-mini]", + "type": "Function", + "lineno": 60 + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_image[input_output0-Llama-4-Scout-17B-16E]", + "type": "Function", + "lineno": 75 + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_image[input_output0-Llama-4-Scout-17B-16E-Instruct]", + "type": "Function", + "lineno": 75 + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_image[input_output0-Llama-4-Maverick-17B-128E]", + "type": "Function", + "lineno": 75 + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_image[input_output0-Llama-4-Maverick-17B-128E-Instruct]", + "type": "Function", + "lineno": 75 + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_image[input_output0-gpt-4o]", + "type": "Function", + "lineno": 75 + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_image[input_output0-gpt-4o-mini]", + "type": "Function", + "lineno": 75 + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_structured_output[input_output0-Llama-3.3-8B-Instruct]", + "type": "Function", + "lineno": 95 + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_structured_output[input_output0-Llama-3.3-70B-Instruct]", + "type": "Function", + "lineno": 95 + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_structured_output[input_output0-Llama-4-Scout-17B-16E]", + "type": "Function", + "lineno": 95 + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_structured_output[input_output0-Llama-4-Scout-17B-16E-Instruct]", + "type": "Function", + "lineno": 95 + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_structured_output[input_output0-Llama-4-Maverick-17B-128E]", + "type": "Function", + "lineno": 95 + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_structured_output[input_output0-Llama-4-Maverick-17B-128E-Instruct]", + "type": "Function", + "lineno": 95 + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_structured_output[input_output0-gpt-4o]", + "type": "Function", + "lineno": 95 + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_structured_output[input_output0-gpt-4o-mini]", + "type": "Function", + "lineno": 95 + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_structured_output[input_output1-Llama-3.3-8B-Instruct]", + "type": "Function", + "lineno": 95 + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_structured_output[input_output1-Llama-3.3-70B-Instruct]", + "type": "Function", + "lineno": 95 + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_structured_output[input_output1-Llama-4-Scout-17B-16E]", + "type": "Function", + "lineno": 95 + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_structured_output[input_output1-Llama-4-Scout-17B-16E-Instruct]", + "type": "Function", + "lineno": 95 + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_structured_output[input_output1-Llama-4-Maverick-17B-128E]", + "type": "Function", + "lineno": 95 + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_structured_output[input_output1-Llama-4-Maverick-17B-128E-Instruct]", + "type": "Function", + "lineno": 95 + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_structured_output[input_output1-gpt-4o]", + "type": "Function", + "lineno": 95 + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_structured_output[input_output1-gpt-4o-mini]", + "type": "Function", + "lineno": 95 + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_structured_output[input_output0-Llama-3.3-8B-Instruct]", + "type": "Function", + "lineno": 117 + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_structured_output[input_output0-Llama-3.3-70B-Instruct]", + "type": "Function", + "lineno": 117 + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_structured_output[input_output0-Llama-4-Scout-17B-16E]", + "type": "Function", + "lineno": 117 + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_structured_output[input_output0-Llama-4-Scout-17B-16E-Instruct]", + "type": "Function", + "lineno": 117 + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_structured_output[input_output0-Llama-4-Maverick-17B-128E]", + "type": "Function", + "lineno": 117 + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_structured_output[input_output0-Llama-4-Maverick-17B-128E-Instruct]", + "type": "Function", + "lineno": 117 + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_structured_output[input_output0-gpt-4o]", + "type": "Function", + "lineno": 117 + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_structured_output[input_output0-gpt-4o-mini]", + "type": "Function", + "lineno": 117 + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_structured_output[input_output1-Llama-3.3-8B-Instruct]", + "type": "Function", + "lineno": 117 + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_structured_output[input_output1-Llama-3.3-70B-Instruct]", + "type": "Function", + "lineno": 117 + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_structured_output[input_output1-Llama-4-Scout-17B-16E]", + "type": "Function", + "lineno": 117 + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_structured_output[input_output1-Llama-4-Scout-17B-16E-Instruct]", + "type": "Function", + "lineno": 117 + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_structured_output[input_output1-Llama-4-Maverick-17B-128E]", + "type": "Function", + "lineno": 117 + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_structured_output[input_output1-Llama-4-Maverick-17B-128E-Instruct]", + "type": "Function", + "lineno": 117 + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_structured_output[input_output1-gpt-4o]", + "type": "Function", + "lineno": 117 + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_structured_output[input_output1-gpt-4o-mini]", + "type": "Function", + "lineno": 117 + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_tool_calling[input_output0-Llama-3.3-70B-Instruct]", + "type": "Function", + "lineno": 138 + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_tool_calling[input_output0-Llama-4-Scout-17B-16E]", + "type": "Function", + "lineno": 138 + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_tool_calling[input_output0-Llama-4-Scout-17B-16E-Instruct]", + "type": "Function", + "lineno": 138 + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_tool_calling[input_output0-Llama-4-Maverick-17B-128E]", + "type": "Function", + "lineno": 138 + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_tool_calling[input_output0-Llama-4-Maverick-17B-128E-Instruct]", + "type": "Function", + "lineno": 138 + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_tool_calling[input_output0-gpt-4o]", + "type": "Function", + "lineno": 138 + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_tool_calling[input_output0-gpt-4o-mini]", + "type": "Function", + "lineno": 138 + } + ] + } + ], + "tests": [ + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_basic[input_output0-Llama-3.3-8B-Instruct]", + "lineno": 25, + "outcome": "skipped", + "keywords": [ + "test_chat_non_streaming_basic[input_output0-Llama-3.3-8B-Instruct]", + "parametrize", + "pytestmark", + "input_output0-Llama-3.3-8B-Instruct", + "test_chat_completion.py", + "openai", + "verifications", + "tests", + "llama-stack", + "" + ], + "setup": { + "duration": 0.39231995795853436, + "outcome": "skipped", + "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 26, 'Skipped: Provider together does not support model Llama-3.3-8B-Instruct')" + }, + "teardown": { + "duration": 0.0002014160854741931, + "outcome": "passed" + } + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_basic[input_output0-Llama-3.3-70B-Instruct]", + "lineno": 25, + "outcome": "passed", + "keywords": [ + "test_chat_non_streaming_basic[input_output0-Llama-3.3-70B-Instruct]", + "parametrize", + "pytestmark", + "input_output0-Llama-3.3-70B-Instruct", + "test_chat_completion.py", + "openai", + "verifications", + "tests", + "llama-stack", + "" + ], + "setup": { + "duration": 0.0071710830088704824, + "outcome": "passed" + }, + "call": { + "duration": 0.7968309168936685, + "outcome": "passed" + }, + "teardown": { + "duration": 0.0004362498875707388, + "outcome": "passed" + } + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_basic[input_output0-Llama-4-Scout-17B-16E]", + "lineno": 25, + "outcome": "skipped", + "keywords": [ + "test_chat_non_streaming_basic[input_output0-Llama-4-Scout-17B-16E]", + "parametrize", + "pytestmark", + "input_output0-Llama-4-Scout-17B-16E", + "test_chat_completion.py", + "openai", + "verifications", + "tests", + "llama-stack", + "" + ], + "setup": { + "duration": 0.012780916062183678, + "outcome": "skipped", + "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 26, 'Skipped: Provider together does not support model Llama-4-Scout-17B-16E')" + }, + "teardown": { + "duration": 0.00029158301185816526, + "outcome": "passed" + } + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_basic[input_output0-Llama-4-Scout-17B-16E-Instruct]", + "lineno": 25, + "outcome": "passed", + "keywords": [ + "test_chat_non_streaming_basic[input_output0-Llama-4-Scout-17B-16E-Instruct]", + "parametrize", + "pytestmark", + "input_output0-Llama-4-Scout-17B-16E-Instruct", + "test_chat_completion.py", + "openai", + "verifications", + "tests", + "llama-stack", + "" + ], + "setup": { + "duration": 0.013563874992541969, + "outcome": "passed" + }, + "call": { + "duration": 0.5071627920260653, + "outcome": "passed" + }, + "teardown": { + "duration": 0.0005456249928101897, + "outcome": "passed" + } + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_basic[input_output0-Llama-4-Maverick-17B-128E]", + "lineno": 25, + "outcome": "skipped", + "keywords": [ + "test_chat_non_streaming_basic[input_output0-Llama-4-Maverick-17B-128E]", + "parametrize", + "pytestmark", + "input_output0-Llama-4-Maverick-17B-128E", + "test_chat_completion.py", + "openai", + "verifications", + "tests", + "llama-stack", + "" + ], + "setup": { + "duration": 0.020708917058072984, + "outcome": "skipped", + "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 26, 'Skipped: Provider together does not support model Llama-4-Maverick-17B-128E')" + }, + "teardown": { + "duration": 0.00030325003899633884, + "outcome": "passed" + } + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_basic[input_output0-Llama-4-Maverick-17B-128E-Instruct]", + "lineno": 25, + "outcome": "passed", + "keywords": [ + "test_chat_non_streaming_basic[input_output0-Llama-4-Maverick-17B-128E-Instruct]", + "parametrize", + "pytestmark", + "input_output0-Llama-4-Maverick-17B-128E-Instruct", + "test_chat_completion.py", + "openai", + "verifications", + "tests", + "llama-stack", + "" + ], + "setup": { + "duration": 0.014170082984492183, + "outcome": "passed" + }, + "call": { + "duration": 1.2383921250002459, + "outcome": "passed" + }, + "teardown": { + "duration": 0.0009597090538591146, + "outcome": "passed" + } + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_basic[input_output0-gpt-4o]", + "lineno": 25, + "outcome": "skipped", + "keywords": [ + "test_chat_non_streaming_basic[input_output0-gpt-4o]", + "parametrize", + "pytestmark", + "input_output0-gpt-4o", + "test_chat_completion.py", + "openai", + "verifications", + "tests", + "llama-stack", + "" + ], + "setup": { + "duration": 0.013402250013314188, + "outcome": "skipped", + "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 26, 'Skipped: Provider together does not support model gpt-4o')" + }, + "teardown": { + "duration": 0.00028245802968740463, + "outcome": "passed" + } + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_basic[input_output0-gpt-4o-mini]", + "lineno": 25, + "outcome": "skipped", + "keywords": [ + "test_chat_non_streaming_basic[input_output0-gpt-4o-mini]", + "parametrize", + "pytestmark", + "input_output0-gpt-4o-mini", + "test_chat_completion.py", + "openai", + "verifications", + "tests", + "llama-stack", + "" + ], + "setup": { + "duration": 0.008693707990460098, + "outcome": "skipped", + "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 26, 'Skipped: Provider together does not support model gpt-4o-mini')" + }, + "teardown": { + "duration": 0.00016249995678663254, + "outcome": "passed" + } + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_basic[input_output1-Llama-3.3-8B-Instruct]", + "lineno": 25, + "outcome": "skipped", + "keywords": [ + "test_chat_non_streaming_basic[input_output1-Llama-3.3-8B-Instruct]", + "parametrize", + "pytestmark", + "input_output1-Llama-3.3-8B-Instruct", + "test_chat_completion.py", + "openai", + "verifications", + "tests", + "llama-stack", + "" + ], + "setup": { + "duration": 0.005904874997213483, + "outcome": "skipped", + "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 26, 'Skipped: Provider together does not support model Llama-3.3-8B-Instruct')" + }, + "teardown": { + "duration": 0.0001960420049726963, + "outcome": "passed" + } + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_basic[input_output1-Llama-3.3-70B-Instruct]", + "lineno": 25, + "outcome": "passed", + "keywords": [ + "test_chat_non_streaming_basic[input_output1-Llama-3.3-70B-Instruct]", + "parametrize", + "pytestmark", + "input_output1-Llama-3.3-70B-Instruct", + "test_chat_completion.py", + "openai", + "verifications", + "tests", + "llama-stack", + "" + ], + "setup": { + "duration": 0.006532749976031482, + "outcome": "passed" + }, + "call": { + "duration": 0.5410778749501333, + "outcome": "passed" + }, + "teardown": { + "duration": 0.00019516597967594862, + "outcome": "passed" + } + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_basic[input_output1-Llama-4-Scout-17B-16E]", + "lineno": 25, + "outcome": "skipped", + "keywords": [ + "test_chat_non_streaming_basic[input_output1-Llama-4-Scout-17B-16E]", + "parametrize", + "pytestmark", + "input_output1-Llama-4-Scout-17B-16E", + "test_chat_completion.py", + "openai", + "verifications", + "tests", + "llama-stack", + "" + ], + "setup": { + "duration": 0.009374375105835497, + "outcome": "skipped", + "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 26, 'Skipped: Provider together does not support model Llama-4-Scout-17B-16E')" + }, + "teardown": { + "duration": 0.00015524995978921652, + "outcome": "passed" + } + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_basic[input_output1-Llama-4-Scout-17B-16E-Instruct]", + "lineno": 25, + "outcome": "passed", + "keywords": [ + "test_chat_non_streaming_basic[input_output1-Llama-4-Scout-17B-16E-Instruct]", + "parametrize", + "pytestmark", + "input_output1-Llama-4-Scout-17B-16E-Instruct", + "test_chat_completion.py", + "openai", + "verifications", + "tests", + "llama-stack", + "" + ], + "setup": { + "duration": 0.007205875008367002, + "outcome": "passed" + }, + "call": { + "duration": 0.42584729101508856, + "outcome": "passed" + }, + "teardown": { + "duration": 0.0009506250498816371, + "outcome": "passed" + } + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_basic[input_output1-Llama-4-Maverick-17B-128E]", + "lineno": 25, + "outcome": "skipped", + "keywords": [ + "test_chat_non_streaming_basic[input_output1-Llama-4-Maverick-17B-128E]", + "parametrize", + "pytestmark", + "input_output1-Llama-4-Maverick-17B-128E", + "test_chat_completion.py", + "openai", + "verifications", + "tests", + "llama-stack", + "" + ], + "setup": { + "duration": 0.029625958995893598, + "outcome": "skipped", + "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 26, 'Skipped: Provider together does not support model Llama-4-Maverick-17B-128E')" + }, + "teardown": { + "duration": 0.0001860830234363675, + "outcome": "passed" + } + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_basic[input_output1-Llama-4-Maverick-17B-128E-Instruct]", + "lineno": 25, + "outcome": "passed", + "keywords": [ + "test_chat_non_streaming_basic[input_output1-Llama-4-Maverick-17B-128E-Instruct]", + "parametrize", + "pytestmark", + "input_output1-Llama-4-Maverick-17B-128E-Instruct", + "test_chat_completion.py", + "openai", + "verifications", + "tests", + "llama-stack", + "" + ], + "setup": { + "duration": 0.023576707928441465, + "outcome": "passed" + }, + "call": { + "duration": 1.2249365829629824, + "outcome": "passed" + }, + "teardown": { + "duration": 0.0004278330598026514, + "outcome": "passed" + } + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_basic[input_output1-gpt-4o]", + "lineno": 25, + "outcome": "skipped", + "keywords": [ + "test_chat_non_streaming_basic[input_output1-gpt-4o]", + "parametrize", + "pytestmark", + "input_output1-gpt-4o", + "test_chat_completion.py", + "openai", + "verifications", + "tests", + "llama-stack", + "" + ], + "setup": { + "duration": 0.014816291979514062, + "outcome": "skipped", + "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 26, 'Skipped: Provider together does not support model gpt-4o')" + }, + "teardown": { + "duration": 0.00029558304231613874, + "outcome": "passed" + } + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_basic[input_output1-gpt-4o-mini]", + "lineno": 25, + "outcome": "skipped", + "keywords": [ + "test_chat_non_streaming_basic[input_output1-gpt-4o-mini]", + "parametrize", + "pytestmark", + "input_output1-gpt-4o-mini", + "test_chat_completion.py", + "openai", + "verifications", + "tests", + "llama-stack", + "" + ], + "setup": { + "duration": 0.012769333901815116, + "outcome": "skipped", + "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 26, 'Skipped: Provider together does not support model gpt-4o-mini')" + }, + "teardown": { + "duration": 0.00024329195730388165, + "outcome": "passed" + } + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_basic[input_output0-Llama-3.3-8B-Instruct]", + "lineno": 40, + "outcome": "skipped", + "keywords": [ + "test_chat_streaming_basic[input_output0-Llama-3.3-8B-Instruct]", + "parametrize", + "pytestmark", + "input_output0-Llama-3.3-8B-Instruct", + "test_chat_completion.py", + "openai", + "verifications", + "tests", + "llama-stack", + "" + ], + "setup": { + "duration": 0.009145625052042305, + "outcome": "skipped", + "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 41, 'Skipped: Provider together does not support model Llama-3.3-8B-Instruct')" + }, + "teardown": { + "duration": 0.00021195888984948397, + "outcome": "passed" + } + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_basic[input_output0-Llama-3.3-70B-Instruct]", + "lineno": 40, + "outcome": "passed", + "keywords": [ + "test_chat_streaming_basic[input_output0-Llama-3.3-70B-Instruct]", + "parametrize", + "pytestmark", + "input_output0-Llama-3.3-70B-Instruct", + "test_chat_completion.py", + "openai", + "verifications", + "tests", + "llama-stack", + "" + ], + "setup": { + "duration": 0.0133140409598127, + "outcome": "passed" + }, + "call": { + "duration": 0.7228892090497538, + "outcome": "passed" + }, + "teardown": { + "duration": 0.0004301250446587801, + "outcome": "passed" + } + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_basic[input_output0-Llama-4-Scout-17B-16E]", + "lineno": 40, + "outcome": "skipped", + "keywords": [ + "test_chat_streaming_basic[input_output0-Llama-4-Scout-17B-16E]", + "parametrize", + "pytestmark", + "input_output0-Llama-4-Scout-17B-16E", + "test_chat_completion.py", + "openai", + "verifications", + "tests", + "llama-stack", + "" + ], + "setup": { + "duration": 0.013998750015161932, + "outcome": "skipped", + "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 41, 'Skipped: Provider together does not support model Llama-4-Scout-17B-16E')" + }, + "teardown": { + "duration": 0.0002961249556392431, + "outcome": "passed" + } + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_basic[input_output0-Llama-4-Scout-17B-16E-Instruct]", + "lineno": 40, + "outcome": "failed", + "keywords": [ + "test_chat_streaming_basic[input_output0-Llama-4-Scout-17B-16E-Instruct]", + "parametrize", + "pytestmark", + "input_output0-Llama-4-Scout-17B-16E-Instruct", + "test_chat_completion.py", + "openai", + "verifications", + "tests", + "llama-stack", + "" + ], + "setup": { + "duration": 0.012570249964483082, + "outcome": "passed" + }, + "call": { + "duration": 0.7193170419195667, + "outcome": "failed", + "crash": { + "path": "/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py", + "lineno": 54, + "message": "IndexError: list index out of range" + }, + "traceback": [ + { + "path": "tests/verifications/openai/test_chat_completion.py", + "lineno": 54, + "message": "IndexError" + } + ], + "longrepr": "openai_client = \ninput_output = {'input': {'messages': [{'content': 'Which planet do humans live on?', 'role': 'user'}]}, 'output': 'Earth'}\ncorrect_model_name = 'meta-llama/Llama-4-Scout-17B-16E-Instruct'\n\n @pytest.mark.parametrize(\"model\", chat_completion_test_cases[\"test_chat_basic\"][\"test_params\"][\"model\"])\n @pytest.mark.parametrize(\n \"input_output\",\n chat_completion_test_cases[\"test_chat_basic\"][\"test_params\"][\"input_output\"],\n )\n def test_chat_streaming_basic(openai_client, input_output, correct_model_name):\n response = openai_client.chat.completions.create(\n model=correct_model_name,\n messages=input_output[\"input\"][\"messages\"],\n stream=True,\n )\n content = \"\"\n for chunk in response:\n> content += chunk.choices[0].delta.content or \"\"\nE IndexError: list index out of range\n\ntests/verifications/openai/test_chat_completion.py:54: IndexError" + }, + "teardown": { + "duration": 0.00022504094522446394, + "outcome": "passed" + } + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_basic[input_output0-Llama-4-Maverick-17B-128E]", + "lineno": 40, + "outcome": "skipped", + "keywords": [ + "test_chat_streaming_basic[input_output0-Llama-4-Maverick-17B-128E]", + "parametrize", + "pytestmark", + "input_output0-Llama-4-Maverick-17B-128E", + "test_chat_completion.py", + "openai", + "verifications", + "tests", + "llama-stack", + "" + ], + "setup": { + "duration": 0.006660082959569991, + "outcome": "skipped", + "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 41, 'Skipped: Provider together does not support model Llama-4-Maverick-17B-128E')" + }, + "teardown": { + "duration": 0.0001445829402655363, + "outcome": "passed" + } + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_basic[input_output0-Llama-4-Maverick-17B-128E-Instruct]", + "lineno": 40, + "outcome": "failed", + "keywords": [ + "test_chat_streaming_basic[input_output0-Llama-4-Maverick-17B-128E-Instruct]", + "parametrize", + "pytestmark", + "input_output0-Llama-4-Maverick-17B-128E-Instruct", + "test_chat_completion.py", + "openai", + "verifications", + "tests", + "llama-stack", + "" + ], + "setup": { + "duration": 0.021228999947197735, + "outcome": "passed" + }, + "call": { + "duration": 1.5670281670754775, + "outcome": "failed", + "crash": { + "path": "/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py", + "lineno": 54, + "message": "IndexError: list index out of range" + }, + "traceback": [ + { + "path": "tests/verifications/openai/test_chat_completion.py", + "lineno": 54, + "message": "IndexError" + } + ], + "longrepr": "openai_client = \ninput_output = {'input': {'messages': [{'content': 'Which planet do humans live on?', 'role': 'user'}]}, 'output': 'Earth'}\ncorrect_model_name = 'meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8'\n\n @pytest.mark.parametrize(\"model\", chat_completion_test_cases[\"test_chat_basic\"][\"test_params\"][\"model\"])\n @pytest.mark.parametrize(\n \"input_output\",\n chat_completion_test_cases[\"test_chat_basic\"][\"test_params\"][\"input_output\"],\n )\n def test_chat_streaming_basic(openai_client, input_output, correct_model_name):\n response = openai_client.chat.completions.create(\n model=correct_model_name,\n messages=input_output[\"input\"][\"messages\"],\n stream=True,\n )\n content = \"\"\n for chunk in response:\n> content += chunk.choices[0].delta.content or \"\"\nE IndexError: list index out of range\n\ntests/verifications/openai/test_chat_completion.py:54: IndexError" + }, + "teardown": { + "duration": 0.0004656669916585088, + "outcome": "passed" + } + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_basic[input_output0-gpt-4o]", + "lineno": 40, + "outcome": "skipped", + "keywords": [ + "test_chat_streaming_basic[input_output0-gpt-4o]", + "parametrize", + "pytestmark", + "input_output0-gpt-4o", + "test_chat_completion.py", + "openai", + "verifications", + "tests", + "llama-stack", + "" + ], + "setup": { + "duration": 0.009595917072147131, + "outcome": "skipped", + "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 41, 'Skipped: Provider together does not support model gpt-4o')" + }, + "teardown": { + "duration": 0.00025625003036111593, + "outcome": "passed" + } + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_basic[input_output0-gpt-4o-mini]", + "lineno": 40, + "outcome": "skipped", + "keywords": [ + "test_chat_streaming_basic[input_output0-gpt-4o-mini]", + "parametrize", + "pytestmark", + "input_output0-gpt-4o-mini", + "test_chat_completion.py", + "openai", + "verifications", + "tests", + "llama-stack", + "" + ], + "setup": { + "duration": 0.009242708911187947, + "outcome": "skipped", + "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 41, 'Skipped: Provider together does not support model gpt-4o-mini')" + }, + "teardown": { + "duration": 0.0002484159776940942, + "outcome": "passed" + } + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_basic[input_output1-Llama-3.3-8B-Instruct]", + "lineno": 40, + "outcome": "skipped", + "keywords": [ + "test_chat_streaming_basic[input_output1-Llama-3.3-8B-Instruct]", + "parametrize", + "pytestmark", + "input_output1-Llama-3.3-8B-Instruct", + "test_chat_completion.py", + "openai", + "verifications", + "tests", + "llama-stack", + "" + ], + "setup": { + "duration": 0.00905474997125566, + "outcome": "skipped", + "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 41, 'Skipped: Provider together does not support model Llama-3.3-8B-Instruct')" + }, + "teardown": { + "duration": 0.00023312494158744812, + "outcome": "passed" + } + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_basic[input_output1-Llama-3.3-70B-Instruct]", + "lineno": 40, + "outcome": "passed", + "keywords": [ + "test_chat_streaming_basic[input_output1-Llama-3.3-70B-Instruct]", + "parametrize", + "pytestmark", + "input_output1-Llama-3.3-70B-Instruct", + "test_chat_completion.py", + "openai", + "verifications", + "tests", + "llama-stack", + "" + ], + "setup": { + "duration": 0.007183165987953544, + "outcome": "passed" + }, + "call": { + "duration": 1.0667660840554163, + "outcome": "passed" + }, + "teardown": { + "duration": 0.0005163750611245632, + "outcome": "passed" + } + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_basic[input_output1-Llama-4-Scout-17B-16E]", + "lineno": 40, + "outcome": "skipped", + "keywords": [ + "test_chat_streaming_basic[input_output1-Llama-4-Scout-17B-16E]", + "parametrize", + "pytestmark", + "input_output1-Llama-4-Scout-17B-16E", + "test_chat_completion.py", + "openai", + "verifications", + "tests", + "llama-stack", + "" + ], + "setup": { + "duration": 0.05233616603072733, + "outcome": "skipped", + "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 41, 'Skipped: Provider together does not support model Llama-4-Scout-17B-16E')" + }, + "teardown": { + "duration": 0.0003471659729257226, + "outcome": "passed" + } + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_basic[input_output1-Llama-4-Scout-17B-16E-Instruct]", + "lineno": 40, + "outcome": "failed", + "keywords": [ + "test_chat_streaming_basic[input_output1-Llama-4-Scout-17B-16E-Instruct]", + "parametrize", + "pytestmark", + "input_output1-Llama-4-Scout-17B-16E-Instruct", + "test_chat_completion.py", + "openai", + "verifications", + "tests", + "llama-stack", + "" + ], + "setup": { + "duration": 0.015932541922666132, + "outcome": "passed" + }, + "call": { + "duration": 0.41540695796720684, + "outcome": "failed", + "crash": { + "path": "/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py", + "lineno": 54, + "message": "IndexError: list index out of range" + }, + "traceback": [ + { + "path": "tests/verifications/openai/test_chat_completion.py", + "lineno": 54, + "message": "IndexError" + } + ], + "longrepr": "openai_client = \ninput_output = {'input': {'messages': [{'content': 'Which planet has rings around it with a name starting with letter S?', 'role': 'user'}]}, 'output': 'Saturn'}\ncorrect_model_name = 'meta-llama/Llama-4-Scout-17B-16E-Instruct'\n\n @pytest.mark.parametrize(\"model\", chat_completion_test_cases[\"test_chat_basic\"][\"test_params\"][\"model\"])\n @pytest.mark.parametrize(\n \"input_output\",\n chat_completion_test_cases[\"test_chat_basic\"][\"test_params\"][\"input_output\"],\n )\n def test_chat_streaming_basic(openai_client, input_output, correct_model_name):\n response = openai_client.chat.completions.create(\n model=correct_model_name,\n messages=input_output[\"input\"][\"messages\"],\n stream=True,\n )\n content = \"\"\n for chunk in response:\n> content += chunk.choices[0].delta.content or \"\"\nE IndexError: list index out of range\n\ntests/verifications/openai/test_chat_completion.py:54: IndexError" + }, + "teardown": { + "duration": 0.0002845840062946081, + "outcome": "passed" + } + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_basic[input_output1-Llama-4-Maverick-17B-128E]", + "lineno": 40, + "outcome": "skipped", + "keywords": [ + "test_chat_streaming_basic[input_output1-Llama-4-Maverick-17B-128E]", + "parametrize", + "pytestmark", + "input_output1-Llama-4-Maverick-17B-128E", + "test_chat_completion.py", + "openai", + "verifications", + "tests", + "llama-stack", + "" + ], + "setup": { + "duration": 0.007243875064887106, + "outcome": "skipped", + "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 41, 'Skipped: Provider together does not support model Llama-4-Maverick-17B-128E')" + }, + "teardown": { + "duration": 0.00016258296091109514, + "outcome": "passed" + } + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_basic[input_output1-Llama-4-Maverick-17B-128E-Instruct]", + "lineno": 40, + "outcome": "failed", + "keywords": [ + "test_chat_streaming_basic[input_output1-Llama-4-Maverick-17B-128E-Instruct]", + "parametrize", + "pytestmark", + "input_output1-Llama-4-Maverick-17B-128E-Instruct", + "test_chat_completion.py", + "openai", + "verifications", + "tests", + "llama-stack", + "" + ], + "setup": { + "duration": 0.009275624994188547, + "outcome": "passed" + }, + "call": { + "duration": 1.43309554096777, + "outcome": "failed", + "crash": { + "path": "/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py", + "lineno": 54, + "message": "IndexError: list index out of range" + }, + "traceback": [ + { + "path": "tests/verifications/openai/test_chat_completion.py", + "lineno": 54, + "message": "IndexError" + } + ], + "longrepr": "openai_client = \ninput_output = {'input': {'messages': [{'content': 'Which planet has rings around it with a name starting with letter S?', 'role': 'user'}]}, 'output': 'Saturn'}\ncorrect_model_name = 'meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8'\n\n @pytest.mark.parametrize(\"model\", chat_completion_test_cases[\"test_chat_basic\"][\"test_params\"][\"model\"])\n @pytest.mark.parametrize(\n \"input_output\",\n chat_completion_test_cases[\"test_chat_basic\"][\"test_params\"][\"input_output\"],\n )\n def test_chat_streaming_basic(openai_client, input_output, correct_model_name):\n response = openai_client.chat.completions.create(\n model=correct_model_name,\n messages=input_output[\"input\"][\"messages\"],\n stream=True,\n )\n content = \"\"\n for chunk in response:\n> content += chunk.choices[0].delta.content or \"\"\nE IndexError: list index out of range\n\ntests/verifications/openai/test_chat_completion.py:54: IndexError" + }, + "teardown": { + "duration": 0.0003690000157803297, + "outcome": "passed" + } + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_basic[input_output1-gpt-4o]", + "lineno": 40, + "outcome": "skipped", + "keywords": [ + "test_chat_streaming_basic[input_output1-gpt-4o]", + "parametrize", + "pytestmark", + "input_output1-gpt-4o", + "test_chat_completion.py", + "openai", + "verifications", + "tests", + "llama-stack", + "" + ], + "setup": { + "duration": 0.011570582981221378, + "outcome": "skipped", + "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 41, 'Skipped: Provider together does not support model gpt-4o')" + }, + "teardown": { + "duration": 0.00024937500711530447, + "outcome": "passed" + } + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_basic[input_output1-gpt-4o-mini]", + "lineno": 40, + "outcome": "skipped", + "keywords": [ + "test_chat_streaming_basic[input_output1-gpt-4o-mini]", + "parametrize", + "pytestmark", + "input_output1-gpt-4o-mini", + "test_chat_completion.py", + "openai", + "verifications", + "tests", + "llama-stack", + "" + ], + "setup": { + "duration": 0.010756584000773728, + "outcome": "skipped", + "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 41, 'Skipped: Provider together does not support model gpt-4o-mini')" + }, + "teardown": { + "duration": 0.00026183295994997025, + "outcome": "passed" + } + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_image[input_output0-Llama-4-Scout-17B-16E]", + "lineno": 60, + "outcome": "skipped", + "keywords": [ + "test_chat_non_streaming_image[input_output0-Llama-4-Scout-17B-16E]", + "parametrize", + "pytestmark", + "input_output0-Llama-4-Scout-17B-16E", + "test_chat_completion.py", + "openai", + "verifications", + "tests", + "llama-stack", + "" + ], + "setup": { + "duration": 0.008863041992299259, + "outcome": "skipped", + "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 61, 'Skipped: Provider together does not support model Llama-4-Scout-17B-16E')" + }, + "teardown": { + "duration": 0.00023283297196030617, + "outcome": "passed" + } + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_image[input_output0-Llama-4-Scout-17B-16E-Instruct]", + "lineno": 60, + "outcome": "passed", + "keywords": [ + "test_chat_non_streaming_image[input_output0-Llama-4-Scout-17B-16E-Instruct]", + "parametrize", + "pytestmark", + "input_output0-Llama-4-Scout-17B-16E-Instruct", + "test_chat_completion.py", + "openai", + "verifications", + "tests", + "llama-stack", + "" + ], + "setup": { + "duration": 0.007975792046636343, + "outcome": "passed" + }, + "call": { + "duration": 2.1585817909799516, + "outcome": "passed" + }, + "teardown": { + "duration": 0.0005107080796733499, + "outcome": "passed" + } + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_image[input_output0-Llama-4-Maverick-17B-128E]", + "lineno": 60, + "outcome": "skipped", + "keywords": [ + "test_chat_non_streaming_image[input_output0-Llama-4-Maverick-17B-128E]", + "parametrize", + "pytestmark", + "input_output0-Llama-4-Maverick-17B-128E", + "test_chat_completion.py", + "openai", + "verifications", + "tests", + "llama-stack", + "" + ], + "setup": { + "duration": 0.05228079203516245, + "outcome": "skipped", + "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 61, 'Skipped: Provider together does not support model Llama-4-Maverick-17B-128E')" + }, + "teardown": { + "duration": 0.0017226670170202851, + "outcome": "passed" + } + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_image[input_output0-Llama-4-Maverick-17B-128E-Instruct]", + "lineno": 60, + "outcome": "passed", + "keywords": [ + "test_chat_non_streaming_image[input_output0-Llama-4-Maverick-17B-128E-Instruct]", + "parametrize", + "pytestmark", + "input_output0-Llama-4-Maverick-17B-128E-Instruct", + "test_chat_completion.py", + "openai", + "verifications", + "tests", + "llama-stack", + "" + ], + "setup": { + "duration": 0.009964749915525317, + "outcome": "passed" + }, + "call": { + "duration": 4.6593364590080455, + "outcome": "passed" + }, + "teardown": { + "duration": 0.0009852920193225145, + "outcome": "passed" + } + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_image[input_output0-gpt-4o]", + "lineno": 60, + "outcome": "skipped", + "keywords": [ + "test_chat_non_streaming_image[input_output0-gpt-4o]", + "parametrize", + "pytestmark", + "input_output0-gpt-4o", + "test_chat_completion.py", + "openai", + "verifications", + "tests", + "llama-stack", + "" + ], + "setup": { + "duration": 0.023214041953906417, + "outcome": "skipped", + "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 61, 'Skipped: Provider together does not support model gpt-4o')" + }, + "teardown": { + "duration": 0.0003567079547792673, + "outcome": "passed" + } + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_image[input_output0-gpt-4o-mini]", + "lineno": 60, + "outcome": "skipped", + "keywords": [ + "test_chat_non_streaming_image[input_output0-gpt-4o-mini]", + "parametrize", + "pytestmark", + "input_output0-gpt-4o-mini", + "test_chat_completion.py", + "openai", + "verifications", + "tests", + "llama-stack", + "" + ], + "setup": { + "duration": 0.01705008395947516, + "outcome": "skipped", + "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 61, 'Skipped: Provider together does not support model gpt-4o-mini')" + }, + "teardown": { + "duration": 0.0003085409989580512, + "outcome": "passed" + } + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_image[input_output0-Llama-4-Scout-17B-16E]", + "lineno": 75, + "outcome": "skipped", + "keywords": [ + "test_chat_streaming_image[input_output0-Llama-4-Scout-17B-16E]", + "parametrize", + "pytestmark", + "input_output0-Llama-4-Scout-17B-16E", + "test_chat_completion.py", + "openai", + "verifications", + "tests", + "llama-stack", + "" + ], + "setup": { + "duration": 0.014711958006955683, + "outcome": "skipped", + "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 76, 'Skipped: Provider together does not support model Llama-4-Scout-17B-16E')" + }, + "teardown": { + "duration": 0.0003121249610558152, + "outcome": "passed" + } + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_image[input_output0-Llama-4-Scout-17B-16E-Instruct]", + "lineno": 75, + "outcome": "failed", + "keywords": [ + "test_chat_streaming_image[input_output0-Llama-4-Scout-17B-16E-Instruct]", + "parametrize", + "pytestmark", + "input_output0-Llama-4-Scout-17B-16E-Instruct", + "test_chat_completion.py", + "openai", + "verifications", + "tests", + "llama-stack", + "" + ], + "setup": { + "duration": 0.01843333407305181, + "outcome": "passed" + }, + "call": { + "duration": 2.8683876669965684, + "outcome": "failed", + "crash": { + "path": "/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py", + "lineno": 89, + "message": "IndexError: list index out of range" + }, + "traceback": [ + { + "path": "tests/verifications/openai/test_chat_completion.py", + "lineno": 89, + "message": "IndexError" + } + ], + "longrepr": "openai_client = \ninput_output = {'input': {'messages': [{'content': [{'text': 'What is in this image?', 'type': 'text'}, {'image_url': {...}, 'type': 'image_url'}], 'role': 'user'}]}, 'output': 'llama'}\ncorrect_model_name = 'meta-llama/Llama-4-Scout-17B-16E-Instruct'\n\n @pytest.mark.parametrize(\"model\", chat_completion_test_cases[\"test_chat_image\"][\"test_params\"][\"model\"])\n @pytest.mark.parametrize(\n \"input_output\",\n chat_completion_test_cases[\"test_chat_image\"][\"test_params\"][\"input_output\"],\n )\n def test_chat_streaming_image(openai_client, input_output, correct_model_name):\n response = openai_client.chat.completions.create(\n model=correct_model_name,\n messages=input_output[\"input\"][\"messages\"],\n stream=True,\n )\n content = \"\"\n for chunk in response:\n> content += chunk.choices[0].delta.content or \"\"\nE IndexError: list index out of range\n\ntests/verifications/openai/test_chat_completion.py:89: IndexError" + }, + "teardown": { + "duration": 0.00028662499971687794, + "outcome": "passed" + } + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_image[input_output0-Llama-4-Maverick-17B-128E]", + "lineno": 75, + "outcome": "skipped", + "keywords": [ + "test_chat_streaming_image[input_output0-Llama-4-Maverick-17B-128E]", + "parametrize", + "pytestmark", + "input_output0-Llama-4-Maverick-17B-128E", + "test_chat_completion.py", + "openai", + "verifications", + "tests", + "llama-stack", + "" + ], + "setup": { + "duration": 0.00653208396397531, + "outcome": "skipped", + "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 76, 'Skipped: Provider together does not support model Llama-4-Maverick-17B-128E')" + }, + "teardown": { + "duration": 0.00021291698794811964, + "outcome": "passed" + } + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_image[input_output0-Llama-4-Maverick-17B-128E-Instruct]", + "lineno": 75, + "outcome": "failed", + "keywords": [ + "test_chat_streaming_image[input_output0-Llama-4-Maverick-17B-128E-Instruct]", + "parametrize", + "pytestmark", + "input_output0-Llama-4-Maverick-17B-128E-Instruct", + "test_chat_completion.py", + "openai", + "verifications", + "tests", + "llama-stack", + "" + ], + "setup": { + "duration": 0.006028458010405302, + "outcome": "passed" + }, + "call": { + "duration": 4.981105040991679, + "outcome": "failed", + "crash": { + "path": "/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py", + "lineno": 89, + "message": "IndexError: list index out of range" + }, + "traceback": [ + { + "path": "tests/verifications/openai/test_chat_completion.py", + "lineno": 89, + "message": "IndexError" + } + ], + "longrepr": "openai_client = \ninput_output = {'input': {'messages': [{'content': [{'text': 'What is in this image?', 'type': 'text'}, {'image_url': {...}, 'type': 'image_url'}], 'role': 'user'}]}, 'output': 'llama'}\ncorrect_model_name = 'meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8'\n\n @pytest.mark.parametrize(\"model\", chat_completion_test_cases[\"test_chat_image\"][\"test_params\"][\"model\"])\n @pytest.mark.parametrize(\n \"input_output\",\n chat_completion_test_cases[\"test_chat_image\"][\"test_params\"][\"input_output\"],\n )\n def test_chat_streaming_image(openai_client, input_output, correct_model_name):\n response = openai_client.chat.completions.create(\n model=correct_model_name,\n messages=input_output[\"input\"][\"messages\"],\n stream=True,\n )\n content = \"\"\n for chunk in response:\n> content += chunk.choices[0].delta.content or \"\"\nE IndexError: list index out of range\n\ntests/verifications/openai/test_chat_completion.py:89: IndexError" + }, + "teardown": { + "duration": 0.0010110830189660192, + "outcome": "passed" + } + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_image[input_output0-gpt-4o]", + "lineno": 75, + "outcome": "skipped", + "keywords": [ + "test_chat_streaming_image[input_output0-gpt-4o]", + "parametrize", + "pytestmark", + "input_output0-gpt-4o", + "test_chat_completion.py", + "openai", + "verifications", + "tests", + "llama-stack", + "" + ], + "setup": { + "duration": 0.01591233303770423, + "outcome": "skipped", + "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 76, 'Skipped: Provider together does not support model gpt-4o')" + }, + "teardown": { + "duration": 0.0003783750580623746, + "outcome": "passed" + } + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_image[input_output0-gpt-4o-mini]", + "lineno": 75, + "outcome": "skipped", + "keywords": [ + "test_chat_streaming_image[input_output0-gpt-4o-mini]", + "parametrize", + "pytestmark", + "input_output0-gpt-4o-mini", + "test_chat_completion.py", + "openai", + "verifications", + "tests", + "llama-stack", + "" + ], + "setup": { + "duration": 0.010691000032238662, + "outcome": "skipped", + "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 76, 'Skipped: Provider together does not support model gpt-4o-mini')" + }, + "teardown": { + "duration": 0.00027445796877145767, + "outcome": "passed" + } + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_structured_output[input_output0-Llama-3.3-8B-Instruct]", + "lineno": 95, + "outcome": "skipped", + "keywords": [ + "test_chat_non_streaming_structured_output[input_output0-Llama-3.3-8B-Instruct]", + "parametrize", + "pytestmark", + "input_output0-Llama-3.3-8B-Instruct", + "test_chat_completion.py", + "openai", + "verifications", + "tests", + "llama-stack", + "" + ], + "setup": { + "duration": 0.01258529198821634, + "outcome": "skipped", + "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 96, 'Skipped: Provider together does not support model Llama-3.3-8B-Instruct')" + }, + "teardown": { + "duration": 0.0002044580178335309, + "outcome": "passed" + } + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_structured_output[input_output0-Llama-3.3-70B-Instruct]", + "lineno": 95, + "outcome": "passed", + "keywords": [ + "test_chat_non_streaming_structured_output[input_output0-Llama-3.3-70B-Instruct]", + "parametrize", + "pytestmark", + "input_output0-Llama-3.3-70B-Instruct", + "test_chat_completion.py", + "openai", + "verifications", + "tests", + "llama-stack", + "" + ], + "setup": { + "duration": 0.010904791066423059, + "outcome": "passed" + }, + "call": { + "duration": 0.8311828339938074, + "outcome": "passed" + }, + "teardown": { + "duration": 0.00048687495291233063, + "outcome": "passed" + } + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_structured_output[input_output0-Llama-4-Scout-17B-16E]", + "lineno": 95, + "outcome": "skipped", + "keywords": [ + "test_chat_non_streaming_structured_output[input_output0-Llama-4-Scout-17B-16E]", + "parametrize", + "pytestmark", + "input_output0-Llama-4-Scout-17B-16E", + "test_chat_completion.py", + "openai", + "verifications", + "tests", + "llama-stack", + "" + ], + "setup": { + "duration": 0.029216791968792677, + "outcome": "skipped", + "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 96, 'Skipped: Provider together does not support model Llama-4-Scout-17B-16E')" + }, + "teardown": { + "duration": 0.0002269580727443099, + "outcome": "passed" + } + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_structured_output[input_output0-Llama-4-Scout-17B-16E-Instruct]", + "lineno": 95, + "outcome": "passed", + "keywords": [ + "test_chat_non_streaming_structured_output[input_output0-Llama-4-Scout-17B-16E-Instruct]", + "parametrize", + "pytestmark", + "input_output0-Llama-4-Scout-17B-16E-Instruct", + "test_chat_completion.py", + "openai", + "verifications", + "tests", + "llama-stack", + "" + ], + "setup": { + "duration": 0.013182583032175899, + "outcome": "passed" + }, + "call": { + "duration": 1.7446029160637408, + "outcome": "passed" + }, + "teardown": { + "duration": 0.0008087089518085122, + "outcome": "passed" + } + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_structured_output[input_output0-Llama-4-Maverick-17B-128E]", + "lineno": 95, + "outcome": "skipped", + "keywords": [ + "test_chat_non_streaming_structured_output[input_output0-Llama-4-Maverick-17B-128E]", + "parametrize", + "pytestmark", + "input_output0-Llama-4-Maverick-17B-128E", + "test_chat_completion.py", + "openai", + "verifications", + "tests", + "llama-stack", + "" + ], + "setup": { + "duration": 0.02009516698308289, + "outcome": "skipped", + "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 96, 'Skipped: Provider together does not support model Llama-4-Maverick-17B-128E')" + }, + "teardown": { + "duration": 0.000320291961543262, + "outcome": "passed" + } + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_structured_output[input_output0-Llama-4-Maverick-17B-128E-Instruct]", + "lineno": 95, + "outcome": "passed", + "keywords": [ + "test_chat_non_streaming_structured_output[input_output0-Llama-4-Maverick-17B-128E-Instruct]", + "parametrize", + "pytestmark", + "input_output0-Llama-4-Maverick-17B-128E-Instruct", + "test_chat_completion.py", + "openai", + "verifications", + "tests", + "llama-stack", + "" + ], + "setup": { + "duration": 0.015216833096928895, + "outcome": "passed" + }, + "call": { + "duration": 0.8049291669158265, + "outcome": "passed" + }, + "teardown": { + "duration": 0.0005109170451760292, + "outcome": "passed" + } + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_structured_output[input_output0-gpt-4o]", + "lineno": 95, + "outcome": "skipped", + "keywords": [ + "test_chat_non_streaming_structured_output[input_output0-gpt-4o]", + "parametrize", + "pytestmark", + "input_output0-gpt-4o", + "test_chat_completion.py", + "openai", + "verifications", + "tests", + "llama-stack", + "" + ], + "setup": { + "duration": 0.0171551660168916, + "outcome": "skipped", + "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 96, 'Skipped: Provider together does not support model gpt-4o')" + }, + "teardown": { + "duration": 0.0005707499803975224, + "outcome": "passed" + } + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_structured_output[input_output0-gpt-4o-mini]", + "lineno": 95, + "outcome": "skipped", + "keywords": [ + "test_chat_non_streaming_structured_output[input_output0-gpt-4o-mini]", + "parametrize", + "pytestmark", + "input_output0-gpt-4o-mini", + "test_chat_completion.py", + "openai", + "verifications", + "tests", + "llama-stack", + "" + ], + "setup": { + "duration": 0.01131124992389232, + "outcome": "skipped", + "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 96, 'Skipped: Provider together does not support model gpt-4o-mini')" + }, + "teardown": { + "duration": 0.0003044159384444356, + "outcome": "passed" + } + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_structured_output[input_output1-Llama-3.3-8B-Instruct]", + "lineno": 95, + "outcome": "skipped", + "keywords": [ + "test_chat_non_streaming_structured_output[input_output1-Llama-3.3-8B-Instruct]", + "parametrize", + "pytestmark", + "input_output1-Llama-3.3-8B-Instruct", + "test_chat_completion.py", + "openai", + "verifications", + "tests", + "llama-stack", + "" + ], + "setup": { + "duration": 0.0054290409898385406, + "outcome": "skipped", + "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 96, 'Skipped: Provider together does not support model Llama-3.3-8B-Instruct')" + }, + "teardown": { + "duration": 0.00014645792543888092, + "outcome": "passed" + } + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_structured_output[input_output1-Llama-3.3-70B-Instruct]", + "lineno": 95, + "outcome": "passed", + "keywords": [ + "test_chat_non_streaming_structured_output[input_output1-Llama-3.3-70B-Instruct]", + "parametrize", + "pytestmark", + "input_output1-Llama-3.3-70B-Instruct", + "test_chat_completion.py", + "openai", + "verifications", + "tests", + "llama-stack", + "" + ], + "setup": { + "duration": 0.011368000064976513, + "outcome": "passed" + }, + "call": { + "duration": 4.363120499998331, + "outcome": "passed" + }, + "teardown": { + "duration": 0.0003998749889433384, + "outcome": "passed" + } + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_structured_output[input_output1-Llama-4-Scout-17B-16E]", + "lineno": 95, + "outcome": "skipped", + "keywords": [ + "test_chat_non_streaming_structured_output[input_output1-Llama-4-Scout-17B-16E]", + "parametrize", + "pytestmark", + "input_output1-Llama-4-Scout-17B-16E", + "test_chat_completion.py", + "openai", + "verifications", + "tests", + "llama-stack", + "" + ], + "setup": { + "duration": 0.04945958300959319, + "outcome": "skipped", + "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 96, 'Skipped: Provider together does not support model Llama-4-Scout-17B-16E')" + }, + "teardown": { + "duration": 0.0002401659730821848, + "outcome": "passed" + } + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_structured_output[input_output1-Llama-4-Scout-17B-16E-Instruct]", + "lineno": 95, + "outcome": "passed", + "keywords": [ + "test_chat_non_streaming_structured_output[input_output1-Llama-4-Scout-17B-16E-Instruct]", + "parametrize", + "pytestmark", + "input_output1-Llama-4-Scout-17B-16E-Instruct", + "test_chat_completion.py", + "openai", + "verifications", + "tests", + "llama-stack", + "" + ], + "setup": { + "duration": 0.011090958025306463, + "outcome": "passed" + }, + "call": { + "duration": 4.699277375009842, + "outcome": "passed" + }, + "teardown": { + "duration": 0.000689250067807734, + "outcome": "passed" + } + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_structured_output[input_output1-Llama-4-Maverick-17B-128E]", + "lineno": 95, + "outcome": "skipped", + "keywords": [ + "test_chat_non_streaming_structured_output[input_output1-Llama-4-Maverick-17B-128E]", + "parametrize", + "pytestmark", + "input_output1-Llama-4-Maverick-17B-128E", + "test_chat_completion.py", + "openai", + "verifications", + "tests", + "llama-stack", + "" + ], + "setup": { + "duration": 0.020744459005072713, + "outcome": "skipped", + "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 96, 'Skipped: Provider together does not support model Llama-4-Maverick-17B-128E')" + }, + "teardown": { + "duration": 0.0001836250303313136, + "outcome": "passed" + } + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_structured_output[input_output1-Llama-4-Maverick-17B-128E-Instruct]", + "lineno": 95, + "outcome": "passed", + "keywords": [ + "test_chat_non_streaming_structured_output[input_output1-Llama-4-Maverick-17B-128E-Instruct]", + "parametrize", + "pytestmark", + "input_output1-Llama-4-Maverick-17B-128E-Instruct", + "test_chat_completion.py", + "openai", + "verifications", + "tests", + "llama-stack", + "" + ], + "setup": { + "duration": 0.005926624988205731, + "outcome": "passed" + }, + "call": { + "duration": 2.7814464160474017, + "outcome": "passed" + }, + "teardown": { + "duration": 0.0009554170537739992, + "outcome": "passed" + } + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_structured_output[input_output1-gpt-4o]", + "lineno": 95, + "outcome": "skipped", + "keywords": [ + "test_chat_non_streaming_structured_output[input_output1-gpt-4o]", + "parametrize", + "pytestmark", + "input_output1-gpt-4o", + "test_chat_completion.py", + "openai", + "verifications", + "tests", + "llama-stack", + "" + ], + "setup": { + "duration": 0.03027112502604723, + "outcome": "skipped", + "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 96, 'Skipped: Provider together does not support model gpt-4o')" + }, + "teardown": { + "duration": 0.0003245410043746233, + "outcome": "passed" + } + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_structured_output[input_output1-gpt-4o-mini]", + "lineno": 95, + "outcome": "skipped", + "keywords": [ + "test_chat_non_streaming_structured_output[input_output1-gpt-4o-mini]", + "parametrize", + "pytestmark", + "input_output1-gpt-4o-mini", + "test_chat_completion.py", + "openai", + "verifications", + "tests", + "llama-stack", + "" + ], + "setup": { + "duration": 0.009138708002865314, + "outcome": "skipped", + "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 96, 'Skipped: Provider together does not support model gpt-4o-mini')" + }, + "teardown": { + "duration": 0.0001919999485835433, + "outcome": "passed" + } + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_structured_output[input_output0-Llama-3.3-8B-Instruct]", + "lineno": 117, + "outcome": "skipped", + "keywords": [ + "test_chat_streaming_structured_output[input_output0-Llama-3.3-8B-Instruct]", + "parametrize", + "pytestmark", + "input_output0-Llama-3.3-8B-Instruct", + "test_chat_completion.py", + "openai", + "verifications", + "tests", + "llama-stack", + "" + ], + "setup": { + "duration": 0.0064505410846322775, + "outcome": "skipped", + "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 118, 'Skipped: Provider together does not support model Llama-3.3-8B-Instruct')" + }, + "teardown": { + "duration": 0.00015720794908702374, + "outcome": "passed" + } + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_structured_output[input_output0-Llama-3.3-70B-Instruct]", + "lineno": 117, + "outcome": "passed", + "keywords": [ + "test_chat_streaming_structured_output[input_output0-Llama-3.3-70B-Instruct]", + "parametrize", + "pytestmark", + "input_output0-Llama-3.3-70B-Instruct", + "test_chat_completion.py", + "openai", + "verifications", + "tests", + "llama-stack", + "" + ], + "setup": { + "duration": 0.00582624995149672, + "outcome": "passed" + }, + "call": { + "duration": 0.8302567919017747, + "outcome": "passed" + }, + "teardown": { + "duration": 0.00020354206208139658, + "outcome": "passed" + } + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_structured_output[input_output0-Llama-4-Scout-17B-16E]", + "lineno": 117, + "outcome": "skipped", + "keywords": [ + "test_chat_streaming_structured_output[input_output0-Llama-4-Scout-17B-16E]", + "parametrize", + "pytestmark", + "input_output0-Llama-4-Scout-17B-16E", + "test_chat_completion.py", + "openai", + "verifications", + "tests", + "llama-stack", + "" + ], + "setup": { + "duration": 0.014151416951790452, + "outcome": "skipped", + "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 118, 'Skipped: Provider together does not support model Llama-4-Scout-17B-16E')" + }, + "teardown": { + "duration": 0.00034970801789313555, + "outcome": "passed" + } + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_structured_output[input_output0-Llama-4-Scout-17B-16E-Instruct]", + "lineno": 117, + "outcome": "failed", + "keywords": [ + "test_chat_streaming_structured_output[input_output0-Llama-4-Scout-17B-16E-Instruct]", + "parametrize", + "pytestmark", + "input_output0-Llama-4-Scout-17B-16E-Instruct", + "test_chat_completion.py", + "openai", + "verifications", + "tests", + "llama-stack", + "" + ], + "setup": { + "duration": 0.012150791939347982, + "outcome": "passed" + }, + "call": { + "duration": 0.7078855830477551, + "outcome": "failed", + "crash": { + "path": "/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py", + "lineno": 135, + "message": "IndexError: list index out of range" + }, + "traceback": [ + { + "path": "tests/verifications/openai/test_chat_completion.py", + "lineno": 135, + "message": "IndexError" + } + ], + "longrepr": "openai_client = \ninput_output = {'input': {'messages': [{'content': 'Extract the event information.', 'role': 'system'}, {'content': 'Alice and Bob ar...articipants'], 'title': 'CalendarEvent', 'type': 'object'}}, 'type': 'json_schema'}}, 'output': 'valid_calendar_event'}\ncorrect_model_name = 'meta-llama/Llama-4-Scout-17B-16E-Instruct'\n\n @pytest.mark.parametrize(\n \"model\",\n chat_completion_test_cases[\"test_chat_structured_output\"][\"test_params\"][\"model\"],\n )\n @pytest.mark.parametrize(\n \"input_output\",\n chat_completion_test_cases[\"test_chat_structured_output\"][\"test_params\"][\"input_output\"],\n )\n def test_chat_streaming_structured_output(openai_client, input_output, correct_model_name):\n response = openai_client.chat.completions.create(\n model=correct_model_name,\n messages=input_output[\"input\"][\"messages\"],\n response_format=input_output[\"input\"][\"response_format\"],\n stream=True,\n )\n maybe_json_content = \"\"\n for chunk in response:\n> maybe_json_content += chunk.choices[0].delta.content or \"\"\nE IndexError: list index out of range\n\ntests/verifications/openai/test_chat_completion.py:135: IndexError" + }, + "teardown": { + "duration": 0.0008542909054085612, + "outcome": "passed" + } + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_structured_output[input_output0-Llama-4-Maverick-17B-128E]", + "lineno": 117, + "outcome": "skipped", + "keywords": [ + "test_chat_streaming_structured_output[input_output0-Llama-4-Maverick-17B-128E]", + "parametrize", + "pytestmark", + "input_output0-Llama-4-Maverick-17B-128E", + "test_chat_completion.py", + "openai", + "verifications", + "tests", + "llama-stack", + "" + ], + "setup": { + "duration": 0.022667833953164518, + "outcome": "skipped", + "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 118, 'Skipped: Provider together does not support model Llama-4-Maverick-17B-128E')" + }, + "teardown": { + "duration": 0.0006820419803261757, + "outcome": "passed" + } + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_structured_output[input_output0-Llama-4-Maverick-17B-128E-Instruct]", + "lineno": 117, + "outcome": "failed", + "keywords": [ + "test_chat_streaming_structured_output[input_output0-Llama-4-Maverick-17B-128E-Instruct]", + "parametrize", + "pytestmark", + "input_output0-Llama-4-Maverick-17B-128E-Instruct", + "test_chat_completion.py", + "openai", + "verifications", + "tests", + "llama-stack", + "" + ], + "setup": { + "duration": 0.01285991701297462, + "outcome": "passed" + }, + "call": { + "duration": 0.6888671671040356, + "outcome": "failed", + "crash": { + "path": "/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py", + "lineno": 135, + "message": "IndexError: list index out of range" + }, + "traceback": [ + { + "path": "tests/verifications/openai/test_chat_completion.py", + "lineno": 135, + "message": "IndexError" + } + ], + "longrepr": "openai_client = \ninput_output = {'input': {'messages': [{'content': 'Extract the event information.', 'role': 'system'}, {'content': 'Alice and Bob ar...articipants'], 'title': 'CalendarEvent', 'type': 'object'}}, 'type': 'json_schema'}}, 'output': 'valid_calendar_event'}\ncorrect_model_name = 'meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8'\n\n @pytest.mark.parametrize(\n \"model\",\n chat_completion_test_cases[\"test_chat_structured_output\"][\"test_params\"][\"model\"],\n )\n @pytest.mark.parametrize(\n \"input_output\",\n chat_completion_test_cases[\"test_chat_structured_output\"][\"test_params\"][\"input_output\"],\n )\n def test_chat_streaming_structured_output(openai_client, input_output, correct_model_name):\n response = openai_client.chat.completions.create(\n model=correct_model_name,\n messages=input_output[\"input\"][\"messages\"],\n response_format=input_output[\"input\"][\"response_format\"],\n stream=True,\n )\n maybe_json_content = \"\"\n for chunk in response:\n> maybe_json_content += chunk.choices[0].delta.content or \"\"\nE IndexError: list index out of range\n\ntests/verifications/openai/test_chat_completion.py:135: IndexError" + }, + "teardown": { + "duration": 0.0007953330641612411, + "outcome": "passed" + } + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_structured_output[input_output0-gpt-4o]", + "lineno": 117, + "outcome": "skipped", + "keywords": [ + "test_chat_streaming_structured_output[input_output0-gpt-4o]", + "parametrize", + "pytestmark", + "input_output0-gpt-4o", + "test_chat_completion.py", + "openai", + "verifications", + "tests", + "llama-stack", + "" + ], + "setup": { + "duration": 0.015029000001959503, + "outcome": "skipped", + "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 118, 'Skipped: Provider together does not support model gpt-4o')" + }, + "teardown": { + "duration": 0.00015666603576391935, + "outcome": "passed" + } + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_structured_output[input_output0-gpt-4o-mini]", + "lineno": 117, + "outcome": "skipped", + "keywords": [ + "test_chat_streaming_structured_output[input_output0-gpt-4o-mini]", + "parametrize", + "pytestmark", + "input_output0-gpt-4o-mini", + "test_chat_completion.py", + "openai", + "verifications", + "tests", + "llama-stack", + "" + ], + "setup": { + "duration": 0.00622316705994308, + "outcome": "skipped", + "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 118, 'Skipped: Provider together does not support model gpt-4o-mini')" + }, + "teardown": { + "duration": 0.0001533749746158719, + "outcome": "passed" + } + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_structured_output[input_output1-Llama-3.3-8B-Instruct]", + "lineno": 117, + "outcome": "skipped", + "keywords": [ + "test_chat_streaming_structured_output[input_output1-Llama-3.3-8B-Instruct]", + "parametrize", + "pytestmark", + "input_output1-Llama-3.3-8B-Instruct", + "test_chat_completion.py", + "openai", + "verifications", + "tests", + "llama-stack", + "" + ], + "setup": { + "duration": 0.005598834017291665, + "outcome": "skipped", + "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 118, 'Skipped: Provider together does not support model Llama-3.3-8B-Instruct')" + }, + "teardown": { + "duration": 0.00013062497600913048, + "outcome": "passed" + } + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_structured_output[input_output1-Llama-3.3-70B-Instruct]", + "lineno": 117, + "outcome": "passed", + "keywords": [ + "test_chat_streaming_structured_output[input_output1-Llama-3.3-70B-Instruct]", + "parametrize", + "pytestmark", + "input_output1-Llama-3.3-70B-Instruct", + "test_chat_completion.py", + "openai", + "verifications", + "tests", + "llama-stack", + "" + ], + "setup": { + "duration": 0.005876541952602565, + "outcome": "passed" + }, + "call": { + "duration": 7.561108374968171, + "outcome": "passed" + }, + "teardown": { + "duration": 0.0004579999949783087, + "outcome": "passed" + } + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_structured_output[input_output1-Llama-4-Scout-17B-16E]", + "lineno": 117, + "outcome": "skipped", + "keywords": [ + "test_chat_streaming_structured_output[input_output1-Llama-4-Scout-17B-16E]", + "parametrize", + "pytestmark", + "input_output1-Llama-4-Scout-17B-16E", + "test_chat_completion.py", + "openai", + "verifications", + "tests", + "llama-stack", + "" + ], + "setup": { + "duration": 0.018791542039252818, + "outcome": "skipped", + "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 118, 'Skipped: Provider together does not support model Llama-4-Scout-17B-16E')" + }, + "teardown": { + "duration": 0.0004900830099359155, + "outcome": "passed" + } + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_structured_output[input_output1-Llama-4-Scout-17B-16E-Instruct]", + "lineno": 117, + "outcome": "failed", + "keywords": [ + "test_chat_streaming_structured_output[input_output1-Llama-4-Scout-17B-16E-Instruct]", + "parametrize", + "pytestmark", + "input_output1-Llama-4-Scout-17B-16E-Instruct", + "test_chat_completion.py", + "openai", + "verifications", + "tests", + "llama-stack", + "" + ], + "setup": { + "duration": 0.0065952910808846354, + "outcome": "passed" + }, + "call": { + "duration": 2.6826554159633815, + "outcome": "failed", + "crash": { + "path": "/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py", + "lineno": 135, + "message": "IndexError: list index out of range" + }, + "traceback": [ + { + "path": "tests/verifications/openai/test_chat_completion.py", + "lineno": 135, + "message": "IndexError" + } + ], + "longrepr": "openai_client = \ninput_output = {'input': {'messages': [{'content': 'You are a helpful math tutor. Guide the user through the solution step by step.',... ['steps', 'final_answer'], 'title': 'MathReasoning', ...}}, 'type': 'json_schema'}}, 'output': 'valid_math_reasoning'}\ncorrect_model_name = 'meta-llama/Llama-4-Scout-17B-16E-Instruct'\n\n @pytest.mark.parametrize(\n \"model\",\n chat_completion_test_cases[\"test_chat_structured_output\"][\"test_params\"][\"model\"],\n )\n @pytest.mark.parametrize(\n \"input_output\",\n chat_completion_test_cases[\"test_chat_structured_output\"][\"test_params\"][\"input_output\"],\n )\n def test_chat_streaming_structured_output(openai_client, input_output, correct_model_name):\n response = openai_client.chat.completions.create(\n model=correct_model_name,\n messages=input_output[\"input\"][\"messages\"],\n response_format=input_output[\"input\"][\"response_format\"],\n stream=True,\n )\n maybe_json_content = \"\"\n for chunk in response:\n> maybe_json_content += chunk.choices[0].delta.content or \"\"\nE IndexError: list index out of range\n\ntests/verifications/openai/test_chat_completion.py:135: IndexError" + }, + "teardown": { + "duration": 0.0009669580031186342, + "outcome": "passed" + } + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_structured_output[input_output1-Llama-4-Maverick-17B-128E]", + "lineno": 117, + "outcome": "skipped", + "keywords": [ + "test_chat_streaming_structured_output[input_output1-Llama-4-Maverick-17B-128E]", + "parametrize", + "pytestmark", + "input_output1-Llama-4-Maverick-17B-128E", + "test_chat_completion.py", + "openai", + "verifications", + "tests", + "llama-stack", + "" + ], + "setup": { + "duration": 0.019489208003506064, + "outcome": "skipped", + "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 118, 'Skipped: Provider together does not support model Llama-4-Maverick-17B-128E')" + }, + "teardown": { + "duration": 0.0007419160101562738, + "outcome": "passed" + } + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_structured_output[input_output1-Llama-4-Maverick-17B-128E-Instruct]", + "lineno": 117, + "outcome": "failed", + "keywords": [ + "test_chat_streaming_structured_output[input_output1-Llama-4-Maverick-17B-128E-Instruct]", + "parametrize", + "pytestmark", + "input_output1-Llama-4-Maverick-17B-128E-Instruct", + "test_chat_completion.py", + "openai", + "verifications", + "tests", + "llama-stack", + "" + ], + "setup": { + "duration": 0.012299792026169598, + "outcome": "passed" + }, + "call": { + "duration": 2.829678333015181, + "outcome": "failed", + "crash": { + "path": "/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py", + "lineno": 135, + "message": "IndexError: list index out of range" + }, + "traceback": [ + { + "path": "tests/verifications/openai/test_chat_completion.py", + "lineno": 135, + "message": "IndexError" + } + ], + "longrepr": "openai_client = \ninput_output = {'input': {'messages': [{'content': 'You are a helpful math tutor. Guide the user through the solution step by step.',... ['steps', 'final_answer'], 'title': 'MathReasoning', ...}}, 'type': 'json_schema'}}, 'output': 'valid_math_reasoning'}\ncorrect_model_name = 'meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8'\n\n @pytest.mark.parametrize(\n \"model\",\n chat_completion_test_cases[\"test_chat_structured_output\"][\"test_params\"][\"model\"],\n )\n @pytest.mark.parametrize(\n \"input_output\",\n chat_completion_test_cases[\"test_chat_structured_output\"][\"test_params\"][\"input_output\"],\n )\n def test_chat_streaming_structured_output(openai_client, input_output, correct_model_name):\n response = openai_client.chat.completions.create(\n model=correct_model_name,\n messages=input_output[\"input\"][\"messages\"],\n response_format=input_output[\"input\"][\"response_format\"],\n stream=True,\n )\n maybe_json_content = \"\"\n for chunk in response:\n> maybe_json_content += chunk.choices[0].delta.content or \"\"\nE IndexError: list index out of range\n\ntests/verifications/openai/test_chat_completion.py:135: IndexError" + }, + "teardown": { + "duration": 0.0010418329620733857, + "outcome": "passed" + } + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_structured_output[input_output1-gpt-4o]", + "lineno": 117, + "outcome": "skipped", + "keywords": [ + "test_chat_streaming_structured_output[input_output1-gpt-4o]", + "parametrize", + "pytestmark", + "input_output1-gpt-4o", + "test_chat_completion.py", + "openai", + "verifications", + "tests", + "llama-stack", + "" + ], + "setup": { + "duration": 0.016189916990697384, + "outcome": "skipped", + "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 118, 'Skipped: Provider together does not support model gpt-4o')" + }, + "teardown": { + "duration": 0.00027966592460870743, + "outcome": "passed" + } + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_structured_output[input_output1-gpt-4o-mini]", + "lineno": 117, + "outcome": "skipped", + "keywords": [ + "test_chat_streaming_structured_output[input_output1-gpt-4o-mini]", + "parametrize", + "pytestmark", + "input_output1-gpt-4o-mini", + "test_chat_completion.py", + "openai", + "verifications", + "tests", + "llama-stack", + "" + ], + "setup": { + "duration": 0.010247125057503581, + "outcome": "skipped", + "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 118, 'Skipped: Provider together does not support model gpt-4o-mini')" + }, + "teardown": { + "duration": 0.00023291702382266521, + "outcome": "passed" + } + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_tool_calling[input_output0-Llama-3.3-70B-Instruct]", + "lineno": 138, + "outcome": "passed", + "keywords": [ + "test_chat_non_streaming_tool_calling[input_output0-Llama-3.3-70B-Instruct]", + "parametrize", + "pytestmark", + "input_output0-Llama-3.3-70B-Instruct", + "test_chat_completion.py", + "openai", + "verifications", + "tests", + "llama-stack", + "" + ], + "setup": { + "duration": 0.012632582918740809, + "outcome": "passed" + }, + "call": { + "duration": 0.40774812502786517, + "outcome": "passed" + }, + "teardown": { + "duration": 0.0007319580763578415, + "outcome": "passed" + } + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_tool_calling[input_output0-Llama-4-Scout-17B-16E]", + "lineno": 138, + "outcome": "skipped", + "keywords": [ + "test_chat_non_streaming_tool_calling[input_output0-Llama-4-Scout-17B-16E]", + "parametrize", + "pytestmark", + "input_output0-Llama-4-Scout-17B-16E", + "test_chat_completion.py", + "openai", + "verifications", + "tests", + "llama-stack", + "" + ], + "setup": { + "duration": 0.019890791969373822, + "outcome": "skipped", + "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 139, 'Skipped: Provider together does not support model Llama-4-Scout-17B-16E')" + }, + "teardown": { + "duration": 0.0006391670322045684, + "outcome": "passed" + } + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_tool_calling[input_output0-Llama-4-Scout-17B-16E-Instruct]", + "lineno": 138, + "outcome": "passed", + "keywords": [ + "test_chat_non_streaming_tool_calling[input_output0-Llama-4-Scout-17B-16E-Instruct]", + "parametrize", + "pytestmark", + "input_output0-Llama-4-Scout-17B-16E-Instruct", + "test_chat_completion.py", + "openai", + "verifications", + "tests", + "llama-stack", + "" + ], + "setup": { + "duration": 0.0178165000397712, + "outcome": "passed" + }, + "call": { + "duration": 0.38229950005188584, + "outcome": "passed" + }, + "teardown": { + "duration": 0.0010000420734286308, + "outcome": "passed" + } + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_tool_calling[input_output0-Llama-4-Maverick-17B-128E]", + "lineno": 138, + "outcome": "skipped", + "keywords": [ + "test_chat_non_streaming_tool_calling[input_output0-Llama-4-Maverick-17B-128E]", + "parametrize", + "pytestmark", + "input_output0-Llama-4-Maverick-17B-128E", + "test_chat_completion.py", + "openai", + "verifications", + "tests", + "llama-stack", + "" + ], + "setup": { + "duration": 0.024259291938506067, + "outcome": "skipped", + "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 139, 'Skipped: Provider together does not support model Llama-4-Maverick-17B-128E')" + }, + "teardown": { + "duration": 0.0003602079814299941, + "outcome": "passed" + } + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_tool_calling[input_output0-Llama-4-Maverick-17B-128E-Instruct]", + "lineno": 138, + "outcome": "passed", + "keywords": [ + "test_chat_non_streaming_tool_calling[input_output0-Llama-4-Maverick-17B-128E-Instruct]", + "parametrize", + "pytestmark", + "input_output0-Llama-4-Maverick-17B-128E-Instruct", + "test_chat_completion.py", + "openai", + "verifications", + "tests", + "llama-stack", + "" + ], + "setup": { + "duration": 0.012425708002410829, + "outcome": "passed" + }, + "call": { + "duration": 0.7610744580160826, + "outcome": "passed" + }, + "teardown": { + "duration": 0.0005935420049354434, + "outcome": "passed" + } + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_tool_calling[input_output0-gpt-4o]", + "lineno": 138, + "outcome": "skipped", + "keywords": [ + "test_chat_non_streaming_tool_calling[input_output0-gpt-4o]", + "parametrize", + "pytestmark", + "input_output0-gpt-4o", + "test_chat_completion.py", + "openai", + "verifications", + "tests", + "llama-stack", + "" + ], + "setup": { + "duration": 0.018717541941441596, + "outcome": "skipped", + "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 139, 'Skipped: Provider together does not support model gpt-4o')" + }, + "teardown": { + "duration": 0.000659791985526681, + "outcome": "passed" + } + }, + { + "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_tool_calling[input_output0-gpt-4o-mini]", + "lineno": 138, + "outcome": "skipped", + "keywords": [ + "test_chat_non_streaming_tool_calling[input_output0-gpt-4o-mini]", + "parametrize", + "pytestmark", + "input_output0-gpt-4o-mini", + "test_chat_completion.py", + "openai", + "verifications", + "tests", + "llama-stack", + "" + ], + "setup": { + "duration": 0.012784749967977405, + "outcome": "skipped", + "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 139, 'Skipped: Provider together does not support model gpt-4o-mini')" + }, + "teardown": { + "duration": 0.0002145830076187849, + "outcome": "passed" + } + } + ] +} diff --git a/uv.lock b/uv.lock index 5d7ce4076..1f7adea82 100644 --- a/uv.lock +++ b/uv.lock @@ -1,4 +1,5 @@ version = 1 +revision = 1 requires-python = ">=3.10" resolution-markers = [ "(python_full_version < '3.11' and platform_machine != 'aarch64' and sys_platform == 'linux') or (python_full_version < '3.11' and sys_platform != 'darwin' and sys_platform != 'linux')", @@ -139,6 +140,22 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/7e/b3/6b4067be973ae96ba0d615946e314c5ae35f9f993eca561b356540bb0c2b/alabaster-1.0.0-py3-none-any.whl", hash = "sha256:fc6786402dc3fcb2de3cabd5fe455a2db534b371124f1f21de8731783dec828b", size = 13929 }, ] +[[package]] +name = "altair" +version = "5.5.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "jinja2" }, + { name = "jsonschema" }, + { name = "narwhals" }, + { name = "packaging" }, + { name = "typing-extensions", marker = "python_full_version < '3.14'" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/16/b1/f2969c7bdb8ad8bbdda031687defdce2c19afba2aa2c8e1d2a17f78376d8/altair-5.5.0.tar.gz", hash = "sha256:d960ebe6178c56de3855a68c47b516be38640b73fb3b5111c2a9ca90546dd73d", size = 705305 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/aa/f3/0b6ced594e51cc95d8c1fc1640d3623770d01e4969d29c0bd09945fafefa/altair-5.5.0-py3-none-any.whl", hash = "sha256:91a310b926508d560fe0148d02a194f38b824122641ef528113d029fcd129f8c", size = 731200 }, +] + [[package]] name = "annotated-types" version = "0.7.0" @@ -258,6 +275,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/09/71/54e999902aed72baf26bca0d50781b01838251a462612966e9fc4891eadd/black-25.1.0-py3-none-any.whl", hash = "sha256:95e8176dae143ba9097f351d174fdaf0ccd29efb414b362ae3fd72bf0f710717", size = 207646 }, ] +[[package]] +name = "blinker" +version = "1.9.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/21/28/9b3f50ce0e048515135495f198351908d99540d69bfdc8c1d15b73dc55ce/blinker-1.9.0.tar.gz", hash = "sha256:b4ce2265a7abece45e7cc896e98dbebe6cead56bcf805a3d23136d145f5445bf", size = 22460 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/10/cb/f2ad4230dc2eb1a74edf38f1a38b9b52277f75bef262d8908e60d957e13c/blinker-1.9.0-py3-none-any.whl", hash = "sha256:ba0efaa9080b619ff2f3459d1d500c57bddea4a6b424b60a91141db6fd2f08bc", size = 8458 }, +] + [[package]] name = "blobfile" version = "3.0.0" @@ -282,6 +308,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/b3/58/a255894436f3eca4a20611785a30a43b85bc75adf1b77f227e1e6d0cce0a/braintrust_core-0.0.58-py3-none-any.whl", hash = "sha256:fa272b70376d2c6692acf00ebd9fb9bae057b0c53b2b6a59a64850bf79757311", size = 4438 }, ] +[[package]] +name = "cachetools" +version = "5.5.2" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/6c/81/3747dad6b14fa2cf53fcf10548cf5aea6913e96fab41a3c198676f8948a5/cachetools-5.5.2.tar.gz", hash = "sha256:1a661caa9175d26759571b2e19580f9d6393969e5dfca11fdb1f947a23e640d4", size = 28380 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/72/76/20fa66124dbe6be5cafeb312ece67de6b61dd91a0247d1ea13db4ebb33c2/cachetools-5.5.2-py3-none-any.whl", hash = "sha256:d26a22bcc62eb95c3beabd9f1ee5e820d3d2704fe2967cbe350e20c8ffcd3f0a", size = 10080 }, +] + [[package]] name = "certifi" version = "2025.1.31" @@ -783,6 +818,30 @@ http = [ { name = "aiohttp" }, ] +[[package]] +name = "gitdb" +version = "4.0.12" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "smmap" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/72/94/63b0fc47eb32792c7ba1fe1b694daec9a63620db1e313033d18140c2320a/gitdb-4.0.12.tar.gz", hash = "sha256:5ef71f855d191a3326fcfbc0d5da835f26b13fbcba60c32c21091c349ffdb571", size = 394684 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/a0/61/5c78b91c3143ed5c14207f463aecfc8f9dbb5092fb2869baf37c273b2705/gitdb-4.0.12-py3-none-any.whl", hash = "sha256:67073e15955400952c6565cc3e707c554a4eea2e428946f7a4c162fab9bd9bcf", size = 62794 }, +] + +[[package]] +name = "gitpython" +version = "3.1.44" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "gitdb" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/c0/89/37df0b71473153574a5cdef8f242de422a0f5d26d7a9e231e6f169b4ad14/gitpython-3.1.44.tar.gz", hash = "sha256:c87e30b26253bf5418b01b0660f818967f3c503193838337fe5e573331249269", size = 214196 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/1d/9a/4114a9057db2f1462d5c8f8390ab7383925fe1ac012eaa42402ad65c2963/GitPython-3.1.44-py3-none-any.whl", hash = "sha256:9e0e10cda9bed1ee64bc9a6de50e7e38a9c9943241cd7f585f6df3ed28011110", size = 207599 }, +] + [[package]] name = "googleapis-common-protos" version = "1.67.0" @@ -1386,6 +1445,12 @@ test = [ { name = "torchvision", version = "0.21.0", source = { registry = "https://download.pytorch.org/whl/cpu" }, marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or sys_platform == 'darwin'" }, { name = "torchvision", version = "0.21.0+cpu", source = { registry = "https://download.pytorch.org/whl/cpu" }, marker = "(platform_machine != 'aarch64' and sys_platform == 'linux') or (sys_platform != 'darwin' and sys_platform != 'linux')" }, ] +ui = [ + { name = "llama-stack-client" }, + { name = "pandas" }, + { name = "streamlit" }, + { name = "streamlit-option-menu" }, +] unit = [ { name = "aiohttp" }, { name = "aiosqlite" }, @@ -1416,6 +1481,7 @@ requires-dist = [ { name = "jinja2", marker = "extra == 'codegen'", specifier = ">=3.1.6" }, { name = "jsonschema" }, { name = "llama-stack-client", specifier = ">=0.2.1" }, + { name = "llama-stack-client", marker = "extra == 'ui'", specifier = ">=0.2.1" }, { name = "mcp", marker = "extra == 'test'" }, { name = "myst-parser", marker = "extra == 'docs'" }, { name = "nbval", marker = "extra == 'dev'" }, @@ -1423,6 +1489,7 @@ requires-dist = [ { name = "openai", marker = "extra == 'unit'" }, { name = "opentelemetry-exporter-otlp-proto-http", marker = "extra == 'test'" }, { name = "opentelemetry-sdk", marker = "extra == 'test'" }, + { name = "pandas", marker = "extra == 'ui'" }, { name = "pillow" }, { name = "pre-commit", marker = "extra == 'dev'" }, { name = "prompt-toolkit" }, @@ -1452,6 +1519,8 @@ requires-dist = [ { name = "sphinxcontrib-redoc", marker = "extra == 'docs'" }, { name = "sphinxcontrib-video", marker = "extra == 'docs'" }, { name = "sqlite-vec", marker = "extra == 'unit'" }, + { name = "streamlit", marker = "extra == 'ui'" }, + { name = "streamlit-option-menu", marker = "extra == 'ui'" }, { name = "termcolor" }, { name = "tiktoken" }, { name = "tomli", marker = "extra == 'docs'" }, @@ -1461,6 +1530,7 @@ requires-dist = [ { name = "types-setuptools", marker = "extra == 'dev'" }, { name = "uvicorn", marker = "extra == 'dev'" }, ] +provides-extras = ["dev", "unit", "test", "docs", "codegen", "ui"] [[package]] name = "llama-stack-client" @@ -1815,6 +1885,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/5f/df/76d0321c3797b54b60fef9ec3bd6f4cfd124b9e422182156a1dd418722cf/myst_parser-4.0.1-py3-none-any.whl", hash = "sha256:9134e88959ec3b5780aedf8a99680ea242869d012e8821db3126d427edc9c95d", size = 84579 }, ] +[[package]] +name = "narwhals" +version = "1.34.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/ec/1d/a21496389436e96394a6e3fb1a644d5bc382250baff76e867f0368a94068/narwhals-1.34.0.tar.gz", hash = "sha256:bdd3fa60bea1f1e8b698e483be18dd43af13290da12dba69ea16dc1f3edbb8f7", size = 265432 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/1e/6d/875d5a7f8e14fc044ede74b94e739d7312c3c8d1a3878f649601b15fdd68/narwhals-1.34.0-py3-none-any.whl", hash = "sha256:9502b9aa5dfe125c090a3a0bbca95becfa1fac2cd67f8b80d12b1dc2ed751865", size = 325346 }, +] + [[package]] name = "nbformat" version = "5.10.4" @@ -2571,6 +2650,19 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/0b/53/a64f03044927dc47aafe029c42a5b7aabc38dfb813475e0e1bf71c4a59d0/pydantic_settings-2.8.1-py3-none-any.whl", hash = "sha256:81942d5ac3d905f7f3ee1a70df5dfb62d5569c12f51a5a647defc1c3d9ee2e9c", size = 30839 }, ] +[[package]] +name = "pydeck" +version = "0.9.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "jinja2" }, + { name = "numpy" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/a1/ca/40e14e196864a0f61a92abb14d09b3d3da98f94ccb03b49cf51688140dab/pydeck-0.9.1.tar.gz", hash = "sha256:f74475ae637951d63f2ee58326757f8d4f9cd9f2a457cf42950715003e2cb605", size = 3832240 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/ab/4c/b888e6cf58bd9db9c93f40d1c6be8283ff49d88919231afe93a6bcf61626/pydeck-0.9.1-py2.py3-none-any.whl", hash = "sha256:b3f75ba0d273fc917094fa61224f3f6076ca8752b93d46faf3bcfd9f9d59b038", size = 6900403 }, +] + [[package]] name = "pygments" version = "2.19.1" @@ -3220,6 +3312,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/b7/ce/149a00dd41f10bc29e5921b496af8b574d8413afcd5e30dfa0ed46c2cc5e/six-1.17.0-py2.py3-none-any.whl", hash = "sha256:4721f391ed90541fddacab5acf947aa0d3dc7d27b2e1e8eda2be8970586c3274", size = 11050 }, ] +[[package]] +name = "smmap" +version = "5.0.2" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/44/cd/a040c4b3119bbe532e5b0732286f805445375489fceaec1f48306068ee3b/smmap-5.0.2.tar.gz", hash = "sha256:26ea65a03958fa0c8a1c7e8c7a58fdc77221b8910f6be2131affade476898ad5", size = 22329 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/04/be/d09147ad1ec7934636ad912901c5fd7667e1c858e19d355237db0d0cd5e4/smmap-5.0.2-py3-none-any.whl", hash = "sha256:b30115f0def7d7531d22a0fb6502488d879e75b260a9db4d0819cfb25403af5e", size = 24303 }, +] + [[package]] name = "sniffio" version = "1.3.1" @@ -3502,6 +3603,47 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/d9/61/f2b52e107b1fc8944b33ef56bf6ac4ebbe16d91b94d2b87ce013bf63fb84/starlette-0.45.3-py3-none-any.whl", hash = "sha256:dfb6d332576f136ec740296c7e8bb8c8a7125044e7c6da30744718880cdd059d", size = 71507 }, ] +[[package]] +name = "streamlit" +version = "1.44.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "altair" }, + { name = "blinker" }, + { name = "cachetools" }, + { name = "click" }, + { name = "gitpython" }, + { name = "numpy" }, + { name = "packaging" }, + { name = "pandas" }, + { name = "pillow" }, + { name = "protobuf" }, + { name = "pyarrow" }, + { name = "pydeck" }, + { name = "requests" }, + { name = "tenacity" }, + { name = "toml" }, + { name = "tornado" }, + { name = "typing-extensions" }, + { name = "watchdog", marker = "sys_platform != 'darwin'" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/3e/c0/7286284567e5045f0c587c426d0c41aee5d10c0a2e360e627a83037e9f0c/streamlit-1.44.1.tar.gz", hash = "sha256:c6914ed6d5b76870b461510476806db370f36425ae0e6654d227c988288198d3", size = 9423685 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/eb/17/fc425e1d4d86e31b2aaf0812a2ef2163763a0670d671720c7c36e8679323/streamlit-1.44.1-py3-none-any.whl", hash = "sha256:9fe355f58b11f4eb71e74f115ce1f38c4c9eaff2733e6bcffb510ac1298a5990", size = 9812242 }, +] + +[[package]] +name = "streamlit-option-menu" +version = "0.4.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "streamlit" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/5e/27/72dc451cdaef1714fd0d75cc430e50a06c12c9046295fdf1f94af1b766eb/streamlit-option-menu-0.4.0.tar.gz", hash = "sha256:48ec69d59e547fa2fa4bfae001620df8af56a80de2f765ddbb9fcbfb84017129", size = 827290 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/fd/52/2f525ad4262dc83d67297f69ec5afcee1438b9e9ae22aa318396725ddbed/streamlit_option_menu-0.4.0-py3-none-any.whl", hash = "sha256:a55fc7554047b6db371595af2182e435b8a2c715ee6124e8543685bd4670b07e", size = 829255 }, +] + [[package]] name = "sympy" version = "1.13.1" @@ -3514,6 +3656,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/b2/fe/81695a1aa331a842b582453b605175f419fe8540355886031328089d840a/sympy-1.13.1-py3-none-any.whl", hash = "sha256:db36cdc64bf61b9b24578b6f7bab1ecdd2452cf008f34faa33776680c26d66f8", size = 6189177 }, ] +[[package]] +name = "tenacity" +version = "9.1.2" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/0a/d4/2b0cd0fe285e14b36db076e78c93766ff1d529d70408bd1d2a5a84f1d929/tenacity-9.1.2.tar.gz", hash = "sha256:1169d376c297e7de388d18b4481760d478b0e99a777cad3a9c86e556f4b697cb", size = 48036 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/e5/30/643397144bfbfec6f6ef821f36f33e57d35946c44a2352d3c9f0ae847619/tenacity-9.1.2-py3-none-any.whl", hash = "sha256:f77bf36710d8b73a50b2dd155c97b870017ad21afe6ab300326b0371b3b05138", size = 28248 }, +] + [[package]] name = "termcolor" version = "2.5.0" @@ -3559,6 +3710,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/de/a8/8f499c179ec900783ffe133e9aab10044481679bb9aad78436d239eee716/tiktoken-0.9.0-cp313-cp313-win_amd64.whl", hash = "sha256:5ea0edb6f83dc56d794723286215918c1cde03712cbbafa0348b33448faf5b95", size = 894669 }, ] +[[package]] +name = "toml" +version = "0.10.2" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/be/ba/1f744cdc819428fc6b5084ec34d9b30660f6f9daaf70eead706e3203ec3c/toml-0.10.2.tar.gz", hash = "sha256:b3bda1d108d5dd99f4a20d24d9c348e91c4db7ab1b749200bded2f839ccbe68f", size = 22253 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/44/6f/7120676b6d73228c96e17f1f794d8ab046fc910d781c8d151120c3f1569e/toml-0.10.2-py2.py3-none-any.whl", hash = "sha256:806143ae5bfb6a3c6e736a764057db0e6a0e05e338b5630894a5f779cabb4f9b", size = 16588 }, +] + [[package]] name = "tomli" version = "2.2.1" @@ -3836,6 +3996,24 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/93/fa/849483d56773ae29740ae70043ad88e068f98a6401aa819b5d6bee604683/virtualenv-20.29.2-py3-none-any.whl", hash = "sha256:febddfc3d1ea571bdb1dc0f98d7b45d24def7428214d4fb73cc486c9568cce6a", size = 4301478 }, ] +[[package]] +name = "watchdog" +version = "6.0.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/db/7d/7f3d619e951c88ed75c6037b246ddcf2d322812ee8ea189be89511721d54/watchdog-6.0.0.tar.gz", hash = "sha256:9ddf7c82fda3ae8e24decda1338ede66e1c99883db93711d8fb941eaa2d8c282", size = 131220 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/a9/c7/ca4bf3e518cb57a686b2feb4f55a1892fd9a3dd13f470fca14e00f80ea36/watchdog-6.0.0-py3-none-manylinux2014_aarch64.whl", hash = "sha256:7607498efa04a3542ae3e05e64da8202e58159aa1fa4acddf7678d34a35d4f13", size = 79079 }, + { url = "https://files.pythonhosted.org/packages/5c/51/d46dc9332f9a647593c947b4b88e2381c8dfc0942d15b8edc0310fa4abb1/watchdog-6.0.0-py3-none-manylinux2014_armv7l.whl", hash = "sha256:9041567ee8953024c83343288ccc458fd0a2d811d6a0fd68c4c22609e3490379", size = 79078 }, + { url = "https://files.pythonhosted.org/packages/d4/57/04edbf5e169cd318d5f07b4766fee38e825d64b6913ca157ca32d1a42267/watchdog-6.0.0-py3-none-manylinux2014_i686.whl", hash = "sha256:82dc3e3143c7e38ec49d61af98d6558288c415eac98486a5c581726e0737c00e", size = 79076 }, + { url = "https://files.pythonhosted.org/packages/ab/cc/da8422b300e13cb187d2203f20b9253e91058aaf7db65b74142013478e66/watchdog-6.0.0-py3-none-manylinux2014_ppc64.whl", hash = "sha256:212ac9b8bf1161dc91bd09c048048a95ca3a4c4f5e5d4a7d1b1a7d5752a7f96f", size = 79077 }, + { url = "https://files.pythonhosted.org/packages/2c/3b/b8964e04ae1a025c44ba8e4291f86e97fac443bca31de8bd98d3263d2fcf/watchdog-6.0.0-py3-none-manylinux2014_ppc64le.whl", hash = "sha256:e3df4cbb9a450c6d49318f6d14f4bbc80d763fa587ba46ec86f99f9e6876bb26", size = 79078 }, + { url = "https://files.pythonhosted.org/packages/62/ae/a696eb424bedff7407801c257d4b1afda455fe40821a2be430e173660e81/watchdog-6.0.0-py3-none-manylinux2014_s390x.whl", hash = "sha256:2cce7cfc2008eb51feb6aab51251fd79b85d9894e98ba847408f662b3395ca3c", size = 79077 }, + { url = "https://files.pythonhosted.org/packages/b5/e8/dbf020b4d98251a9860752a094d09a65e1b436ad181faf929983f697048f/watchdog-6.0.0-py3-none-manylinux2014_x86_64.whl", hash = "sha256:20ffe5b202af80ab4266dcd3e91aae72bf2da48c0d33bdb15c66658e685e94e2", size = 79078 }, + { url = "https://files.pythonhosted.org/packages/07/f6/d0e5b343768e8bcb4cda79f0f2f55051bf26177ecd5651f84c07567461cf/watchdog-6.0.0-py3-none-win32.whl", hash = "sha256:07df1fdd701c5d4c8e55ef6cf55b8f0120fe1aef7ef39a1c6fc6bc2e606d517a", size = 79065 }, + { url = "https://files.pythonhosted.org/packages/db/d9/c495884c6e548fce18a8f40568ff120bc3a4b7b99813081c8ac0c936fa64/watchdog-6.0.0-py3-none-win_amd64.whl", hash = "sha256:cbafb470cf848d93b5d013e2ecb245d4aa1c8fd0504e863ccefa32445359d680", size = 79070 }, + { url = "https://files.pythonhosted.org/packages/33/e8/e40370e6d74ddba47f002a32919d91310d6074130fe4e17dabcafc15cbf1/watchdog-6.0.0-py3-none-win_ia64.whl", hash = "sha256:a1914259fa9e1454315171103c6a30961236f508b9b623eae470268bbcc6a22f", size = 79067 }, +] + [[package]] name = "watchfiles" version = "1.0.4"