From 9eb81439d2bc572e94d3d604e83f01e650da6298 Mon Sep 17 00:00:00 2001
From: Akram Ben Aissi <akram.benaissi@gmail.com>
Date: Thu, 13 Nov 2025 14:50:06 +0100
Subject: [PATCH 01/12] docs: Add comprehensive Files API and Vector Store
 integration doc (#3279)

docs: Add comprehensive Files API and Vector Store integration
documentation

- Add Files API documentation with OpenAI-compatible endpoints
- Create comprehensive guide for OpenAI-compatible file operations
- Reorganize documentation structure: move file operations to files/
directory
- Add vector store provider documentation for Milvus, SQLite-vec, FAISS
- Clean up redundant files and improve navigation
- Update cross-references and eliminate documentation duplication
- Support for release 0.2.14 FileResponse and Vector Store API features

# What does this PR do?
<!-- Provide a short summary of what this PR does and why. Link to
relevant issues if applicable. -->

<!-- If resolving an issue, uncomment and update the line below -->
<!-- Closes #[issue-number] -->

## Test Plan
<!-- Describe the tests you ran to verify your changes with result
summaries. *Provide clear instructions so the plan can be easily
re-executed.* -->
---
 docs/docs/api-deprecated/index.mdx            |  62 +++
 docs/docs/api-experimental/index.mdx          | 128 ++++++
 docs/docs/api-openai/index.mdx                | 287 ++++++++++++
 docs/docs/api/index.mdx                       | 144 ++++++
 docs/docs/concepts/apis/index.mdx             |  19 +-
 .../file_operations_vector_stores.mdx         | 420 ++++++++++++++++++
 docs/docs/providers/files/files.mdx           | 290 ++++++++++++
 .../openai_file_operations_quick_reference.md |  80 ++++
 .../files/openai_file_operations_support.md   | 291 ++++++++++++
 docs/docs/providers/index.mdx                 |  15 +-
 docs/docs/providers/openai.mdx                |  19 +-
 11 files changed, 1747 insertions(+), 8 deletions(-)
 create mode 100644 docs/docs/api-deprecated/index.mdx
 create mode 100644 docs/docs/api-experimental/index.mdx
 create mode 100644 docs/docs/api-openai/index.mdx
 create mode 100644 docs/docs/api/index.mdx
 create mode 100644 docs/docs/concepts/file_operations_vector_stores.mdx
 create mode 100644 docs/docs/providers/files/files.mdx
 create mode 100644 docs/docs/providers/files/openai_file_operations_quick_reference.md
 create mode 100644 docs/docs/providers/files/openai_file_operations_support.md

diff --git a/docs/docs/api-deprecated/index.mdx b/docs/docs/api-deprecated/index.mdx
new file mode 100644
index 000000000..0da357e30
--- /dev/null
+++ b/docs/docs/api-deprecated/index.mdx
@@ -0,0 +1,62 @@
+---
+title: Deprecated APIs
+description: Legacy APIs that are being phased out
+sidebar_label: Deprecated
+sidebar_position: 1
+---
+
+# Deprecated APIs
+
+This section contains APIs that are being phased out in favor of newer, more standardized implementations. These APIs are maintained for backward compatibility but are not recommended for new projects.
+
+:::warning Deprecation Notice
+These APIs are deprecated and will be removed in future versions. Please migrate to the recommended alternatives listed below.
+:::
+
+## Migration Guide
+
+When using deprecated APIs, please refer to the migration guides provided for each API to understand how to transition to the supported alternatives.
+
+## Deprecated API List
+
+### Legacy Inference APIs
+Some older inference endpoints that have been superseded by the standardized Inference API.
+
+**Migration Path:** Use the [Inference API](../api/) instead.
+
+### Legacy Vector Operations
+Older vector database operations that have been replaced by the Vector IO API.
+
+**Migration Path:** Use the [Vector IO API](../api/) instead.
+
+### Legacy File Operations
+Older file management endpoints that have been replaced by the Files API.
+
+**Migration Path:** Use the [Files API](../api/) instead.
+
+## Support Timeline
+
+Deprecated APIs will be supported according to the following timeline:
+
+- **Current Version**: Full support with deprecation warnings
+- **Next Major Version**: Limited support with migration notices
+- **Following Major Version**: Removal of deprecated APIs
+
+## Getting Help
+
+If you need assistance migrating from deprecated APIs:
+
+1. Check the specific migration guides for each API
+2. Review the [API Reference](../api/) for current alternatives
+3. Consult the [Community Forums](https://github.com/llamastack/llama-stack/discussions) for migration support
+4. Open an issue on GitHub for specific migration questions
+
+## Contributing
+
+If you find issues with deprecated APIs or have suggestions for improving the migration process, please contribute by:
+
+1. Opening an issue describing the problem
+2. Submitting a pull request with improvements
+3. Updating migration documentation
+
+For more information on contributing, see our [Contributing Guide](../contributing/).
diff --git a/docs/docs/api-experimental/index.mdx b/docs/docs/api-experimental/index.mdx
new file mode 100644
index 000000000..adbd64582
--- /dev/null
+++ b/docs/docs/api-experimental/index.mdx
@@ -0,0 +1,128 @@
+---
+title: Experimental APIs
+description: APIs in development with limited support
+sidebar_label: Experimental
+sidebar_position: 1
+---
+
+# Experimental APIs
+
+This section contains APIs that are currently in development and may have limited support or stability. These APIs are available for testing and feedback but should not be used in production environments.
+
+:::warning Experimental Notice
+These APIs are experimental and may change without notice. Use with caution and provide feedback to help improve them.
+:::
+
+## Current Experimental APIs
+
+### Batch Inference API
+Run inference on a dataset of inputs in batch mode for improved efficiency.
+
+**Status:** In Development
+**Provider Support:** Limited
+**Use Case:** Large-scale inference operations
+
+**Features:**
+- Batch processing of multiple inputs
+- Optimized resource utilization
+- Progress tracking and monitoring
+
+### Batch Agents API
+Run agentic workflows on a dataset of inputs in batch mode.
+
+**Status:** In Development
+**Provider Support:** Limited
+**Use Case:** Large-scale agent operations
+
+**Features:**
+- Batch agent execution
+- Parallel processing capabilities
+- Result aggregation and analysis
+
+### Synthetic Data Generation API
+Generate synthetic data for model development and testing.
+
+**Status:** Early Development
+**Provider Support:** Very Limited
+**Use Case:** Training data augmentation
+
+**Features:**
+- Automated data generation
+- Quality control mechanisms
+- Customizable generation parameters
+
+### Batches API (OpenAI-compatible)
+OpenAI-compatible batch management for inference operations.
+
+**Status:** In Development
+**Provider Support:** Limited
+**Use Case:** OpenAI batch processing compatibility
+
+**Features:**
+- OpenAI batch API compatibility
+- Job scheduling and management
+- Status tracking and monitoring
+
+## Getting Started with Experimental APIs
+
+### Prerequisites
+- Llama Stack server running with experimental features enabled
+- Appropriate provider configurations
+- Understanding of API limitations
+
+### Configuration
+Experimental APIs may require special configuration flags or provider settings. Check the specific API documentation for setup requirements.
+
+### Usage Guidelines
+1. **Testing Only**: Use experimental APIs for testing and development only
+2. **Monitor Changes**: Watch for updates and breaking changes
+3. **Provide Feedback**: Report issues and suggest improvements
+4. **Backup Data**: Always backup important data when using experimental features
+
+## Feedback and Contribution
+
+We encourage feedback on experimental APIs to help improve them:
+
+### Reporting Issues
+- Use GitHub issues with the "experimental" label
+- Include detailed error messages and reproduction steps
+- Specify the API version and provider being used
+
+### Feature Requests
+- Submit feature requests through GitHub discussions
+- Provide use cases and expected behavior
+- Consider contributing implementations
+
+### Testing
+- Test experimental APIs in your environment
+- Report performance issues and optimization opportunities
+- Share success stories and use cases
+
+## Migration to Stable APIs
+
+As experimental APIs mature, they will be moved to the stable API section. When this happens:
+
+1. **Announcement**: We'll announce the promotion in release notes
+2. **Migration Guide**: Detailed migration instructions will be provided
+3. **Deprecation Timeline**: Experimental versions will be deprecated with notice
+4. **Support**: Full support will be available for stable versions
+
+## Provider Support
+
+Experimental APIs may have limited provider support. Check the specific API documentation for:
+
+- Supported providers
+- Configuration requirements
+- Known limitations
+- Performance characteristics
+
+## Roadmap
+
+Experimental APIs are part of our ongoing development roadmap:
+
+- **Q1 2024**: Batch Inference API stabilization
+- **Q2 2024**: Batch Agents API improvements
+- **Q3 2024**: Synthetic Data Generation API expansion
+- **Q4 2024**: Batches API full OpenAI compatibility
+
+For the latest updates, follow our [GitHub releases](https://github.com/llamastack/llama-stack/releases) and [roadmap discussions](https://github.com/llamastack/llama-stack/discussions).
diff --git a/docs/docs/api-openai/index.mdx b/docs/docs/api-openai/index.mdx
new file mode 100644
index 000000000..99f3edaa7
--- /dev/null
+++ b/docs/docs/api-openai/index.mdx
@@ -0,0 +1,287 @@
+---
+title: OpenAI API Compatibility
+description: OpenAI-compatible APIs and features in Llama Stack
+sidebar_label: OpenAI Compatibility
+sidebar_position: 1
+---
+
+# OpenAI API Compatibility
+
+Llama Stack provides comprehensive OpenAI API compatibility, allowing you to use existing OpenAI API clients and tools with Llama Stack providers. This compatibility layer ensures seamless migration and interoperability.
+
+## Overview
+
+OpenAI API compatibility in Llama Stack includes:
+
+- **OpenAI-compatible endpoints** for all major APIs
+- **Request/response format compatibility** with OpenAI standards
+- **Authentication and authorization** using OpenAI-style API keys
+- **Error handling** with OpenAI-compatible error codes and messages
+- **Rate limiting** and usage tracking compatible with OpenAI patterns
+
+## Supported OpenAI APIs
+
+### Chat Completions API
+OpenAI-compatible chat completions for conversational AI applications.
+
+**Endpoint:** `/v1/chat/completions`
+**Compatibility:** Full OpenAI API compatibility
+**Providers:** All inference providers
+
+**Features:**
+- Message-based conversations
+- System prompts and user messages
+- Function calling support
+- Streaming responses
+- Temperature and other parameter controls
+
+### Completions API
+OpenAI-compatible text completions for general text generation.
+
+**Endpoint:** `/v1/completions`
+**Compatibility:** Full OpenAI API compatibility
+**Providers:** All inference providers
+
+**Features:**
+- Text completion generation
+- Prompt engineering support
+- Customizable parameters
+- Batch processing capabilities
+
+### Embeddings API
+OpenAI-compatible embeddings for vector operations.
+
+**Endpoint:** `/v1/embeddings`
+**Compatibility:** Full OpenAI API compatibility
+**Providers:** All embedding providers
+
+**Features:**
+- Text embedding generation
+- Multiple embedding models
+- Batch embedding processing
+- Vector similarity operations
+
+### Files API
+OpenAI-compatible file management for document processing.
+
+**Endpoint:** `/v1/files`
+**Compatibility:** Full OpenAI API compatibility
+**Providers:** Local Filesystem, S3
+
+**Features:**
+- File upload and management
+- Document processing
+- File metadata tracking
+- Secure file access
+
+### Vector Store Files API
+OpenAI-compatible vector store file operations for RAG applications.
+
+**Endpoint:** `/v1/vector_stores/{vector_store_id}/files`
+**Compatibility:** Full OpenAI API compatibility
+**Providers:** FAISS, SQLite-vec, Milvus, ChromaDB, Qdrant, Weaviate, Postgres (PGVector)
+
+**Features:**
+- Automatic document processing
+- Vector store integration
+- File chunking and indexing
+- Search and retrieval operations
+
+### Batches API
+OpenAI-compatible batch processing for large-scale operations.
+
+**Endpoint:** `/v1/batches`
+**Compatibility:** OpenAI API compatibility (experimental)
+**Providers:** Limited support
+
+**Features:**
+- Batch job creation and management
+- Progress tracking
+- Result retrieval
+- Error handling
+
+## Migration from OpenAI
+
+### Step 1: Update API Endpoint
+Change your API endpoint from OpenAI to your Llama Stack server:
+
+```python
+# Before (OpenAI)
+import openai
+client = openai.OpenAI(api_key="your-openai-key")
+
+# After (Llama Stack)
+import openai
+client = openai.OpenAI(
+    api_key="your-llama-stack-key",
+    base_url="http://localhost:8000/v1"  # Your Llama Stack server
+)
+```
+
+### Step 2: Configure Providers
+Set up your preferred providers in the Llama Stack configuration:
+
+```yaml
+# stack-config.yaml
+inference:
+  providers:
+    - name: "meta-reference"
+      type: "inline"
+      model: "llama-3.1-8b"
+```
+
+### Step 3: Test Compatibility
+Verify that your existing code works with Llama Stack:
+
+```python
+# Test chat completions
+response = client.chat.completions.create(
+    model="llama-3.1-8b",
+    messages=[
+        {"role": "user", "content": "Hello, world!"}
+    ]
+)
+print(response.choices[0].message.content)
+```
+
+## Provider-Specific Features
+
+### Meta Reference Provider
+- Full OpenAI API compatibility
+- Local model execution
+- Custom model support
+
+### Remote Providers
+- OpenAI API compatibility
+- Cloud-based execution
+- Scalable infrastructure
+
+### Vector Store Providers
+- OpenAI vector store API compatibility
+- Automatic document processing
+- Advanced search capabilities
+
+## Authentication
+
+Llama Stack supports OpenAI-style authentication:
+
+### API Key Authentication
+```python
+client = openai.OpenAI(
+    api_key="your-api-key",
+    base_url="http://localhost:8000/v1"
+)
+```
+
+### Environment Variables
+```bash
+export OPENAI_API_KEY="your-api-key"
+export OPENAI_BASE_URL="http://localhost:8000/v1"
+```
+
+## Error Handling
+
+Llama Stack provides OpenAI-compatible error responses:
+
+```python
+try:
+    response = client.chat.completions.create(...)
+except openai.APIError as e:
+    print(f"API Error: {e}")
+except openai.RateLimitError as e:
+    print(f"Rate Limit Error: {e}")
+except openai.APIConnectionError as e:
+    print(f"Connection Error: {e}")
+```
+
+## Rate Limiting
+
+OpenAI-compatible rate limiting is supported:
+
+- **Requests per minute** limits
+- **Tokens per minute** limits
+- **Concurrent request** limits
+- **Usage tracking** and monitoring
+
+## Monitoring and Observability
+
+Track your API usage with OpenAI-compatible monitoring:
+
+- **Request/response logging**
+- **Usage metrics** and analytics
+- **Performance monitoring**
+- **Error tracking** and alerting
+
+## Best Practices
+
+### 1. Provider Selection
+Choose providers based on your requirements:
+- **Local development**: Meta Reference, Ollama
+- **Production**: Cloud providers (Fireworks, Together, NVIDIA)
+- **Specialized use cases**: Custom providers
+
+### 2. Model Configuration
+Configure models for optimal performance:
+- **Model selection** based on task requirements
+- **Parameter tuning** for specific use cases
+- **Resource allocation** for performance
+
+### 3. Error Handling
+Implement robust error handling:
+- **Retry logic** for transient failures
+- **Fallback providers** for high availability
+- **Monitoring** and alerting for issues
+
+### 4. Security
+Follow security best practices:
+- **API key management** and rotation
+- **Access control** and authorization
+- **Data privacy** and compliance
+
+## Implementation Examples
+
+For detailed code examples and implementation guides, see our [OpenAI Implementation Guide](../providers/openai.mdx).
+
+## Known Limitations
+
+### Responses API Limitations
+The Responses API is still in active development. For detailed information about current limitations and implementation status, see our [OpenAI Responses API Limitations](../providers/openai_responses_limitations.mdx).
+
+## Troubleshooting
+
+### Common Issues
+
+**Connection Errors**
+- Verify server is running
+- Check network connectivity
+- Validate API endpoint URL
+
+**Authentication Errors**
+- Verify API key is correct
+- Check key permissions
+- Ensure proper authentication headers
+
+**Model Errors**
+- Verify model is available
+- Check provider configuration
+- Validate model parameters
+
+### Getting Help
+
+For OpenAI compatibility issues:
+
+1. **Check Documentation**: Review provider-specific documentation
+2. **Community Support**: Ask questions in GitHub discussions
+3. **Issue Reporting**: Open GitHub issues for bugs
+4. **Professional Support**: Contact support for enterprise issues
+
+## Roadmap
+
+Upcoming OpenAI compatibility features:
+
+- **Enhanced batch processing** support
+- **Advanced function calling** capabilities
+- **Improved error handling** and diagnostics
+- **Performance optimizations** for large-scale deployments
+
+For the latest updates, follow our [GitHub releases](https://github.com/llamastack/llama-stack/releases) and [roadmap discussions](https://github.com/llamastack/llama-stack/discussions).
diff --git a/docs/docs/api/index.mdx b/docs/docs/api/index.mdx
new file mode 100644
index 000000000..7088c6c2b
--- /dev/null
+++ b/docs/docs/api/index.mdx
@@ -0,0 +1,144 @@
+---
+title: API Reference
+description: Complete reference for Llama Stack APIs
+sidebar_label: Overview
+sidebar_position: 1
+---
+
+# API Reference
+
+Llama Stack provides a comprehensive set of APIs for building generative AI applications. All APIs follow OpenAI-compatible standards and can be used interchangeably across different providers.
+
+## Core APIs
+
+### Inference API
+Run inference with Large Language Models (LLMs) and embedding models.
+
+**Supported Providers:**
+- Meta Reference (Single Node)
+- Ollama (Single Node)
+- Fireworks (Hosted)
+- Together (Hosted)
+- NVIDIA NIM (Hosted and Single Node)
+- vLLM (Hosted and Single Node)
+- TGI (Hosted and Single Node)
+- AWS Bedrock (Hosted)
+- Cerebras (Hosted)
+- Groq (Hosted)
+- SambaNova (Hosted)
+- PyTorch ExecuTorch (On-device iOS, Android)
+- OpenAI (Hosted)
+- Anthropic (Hosted)
+- Gemini (Hosted)
+- WatsonX (Hosted)
+
+### Agents API
+Run multi-step agentic workflows with LLMs, including tool usage, memory (RAG), and complex reasoning.
+
+**Supported Providers:**
+- Meta Reference (Single Node)
+- Fireworks (Hosted)
+- Together (Hosted)
+- PyTorch ExecuTorch (On-device iOS)
+
+### Vector IO API
+Perform operations on vector stores, including adding documents, searching, and deleting documents.
+
+**Supported Providers:**
+- FAISS (Single Node)
+- SQLite-Vec (Single Node)
+- Chroma (Hosted and Single Node)
+- Milvus (Hosted and Single Node)
+- Postgres (PGVector) (Hosted and Single Node)
+- Weaviate (Hosted)
+- Qdrant (Hosted and Single Node)
+
+### Files API (OpenAI-compatible)
+Manage file uploads, storage, and retrieval with OpenAI-compatible endpoints.
+
+**Supported Providers:**
+- Local Filesystem (Single Node)
+- S3 (Hosted)
+
+### Vector Store Files API (OpenAI-compatible)
+Integrate file operations with vector stores for automatic document processing and search.
+
+**Supported Providers:**
+- FAISS (Single Node)
+- SQLite-vec (Single Node)
+- Milvus (Single Node)
+- ChromaDB (Hosted and Single Node)
+- Qdrant (Hosted and Single Node)
+- Weaviate (Hosted)
+- Postgres (PGVector) (Hosted and Single Node)
+
+### Safety API
+Apply safety policies to outputs at a systems level, not just model level.
+
+**Supported Providers:**
+- Llama Guard (Depends on Inference Provider)
+- Prompt Guard (Single Node)
+- Code Scanner (Single Node)
+- AWS Bedrock (Hosted)
+
+### Post Training API
+Fine-tune models for specific use cases and domains.
+
+**Supported Providers:**
+- Meta Reference (Single Node)
+- HuggingFace (Single Node)
+- TorchTune (Single Node)
+- NVIDIA NEMO (Hosted)
+
+### Eval API
+Generate outputs and perform scoring to evaluate system performance.
+
+**Supported Providers:**
+- Meta Reference (Single Node)
+- NVIDIA NEMO (Hosted)
+
+### Telemetry API
+Collect telemetry data from the system for monitoring and observability.
+
+**Supported Providers:**
+- Meta Reference (Single Node)
+
+### Tool Runtime API
+Interact with various tools and protocols to extend LLM capabilities.
+
+**Supported Providers:**
+- Brave Search (Hosted)
+- RAG Runtime (Single Node)
+
+## API Compatibility
+
+All Llama Stack APIs are designed to be OpenAI-compatible, allowing you to:
+- Use existing OpenAI API clients and tools
+- Migrate from OpenAI to other providers seamlessly
+- Maintain consistent API contracts across different environments
+
+## Getting Started
+
+To get started with Llama Stack APIs:
+
+1. **Choose a Distribution**: Select a pre-configured distribution that matches your environment
+2. **Configure Providers**: Set up the providers you want to use for each API
+3. **Start the Server**: Launch the Llama Stack server with your configuration
+4. **Use the APIs**: Make requests to the API endpoints using your preferred client
+
+For detailed setup instructions, see our [Getting Started Guide](../getting_started/quickstart).
+
+## Provider Details
+
+For complete provider compatibility and setup instructions, see our [Providers Documentation](../providers/).
+
+## API Stability
+
+Llama Stack APIs are organized by stability level:
+- **[Stable APIs](./index.mdx)** - Production-ready APIs with full support
+- **[Experimental APIs](../api-experimental/)** - APIs in development with limited support
+- **[Deprecated APIs](../api-deprecated/)** - Legacy APIs being phased out
+
+## OpenAI Integration
+
+For specific OpenAI API compatibility features, see our [OpenAI Compatibility Guide](../api-openai/).
diff --git a/docs/docs/concepts/apis/index.mdx b/docs/docs/concepts/apis/index.mdx
index 7519f6eff..7d12478ed 100644
--- a/docs/docs/concepts/apis/index.mdx
+++ b/docs/docs/concepts/apis/index.mdx
@@ -7,7 +7,7 @@ sidebar_position: 1
 
 # APIs
 
-A Llama Stack API is described as a collection of REST endpoints. We currently support the following APIs:
+A Llama Stack API is described as a collection of REST endpoints following OpenAI API standards. We currently support the following APIs:
 
 - **Inference**: run inference with a LLM
 - **Safety**: apply safety policies to the output at a Systems (not only model) level
@@ -16,11 +16,26 @@ A Llama Stack API is described as a collection of REST endpoints. We currently s
 - **Scoring**: evaluate outputs of the system
 - **Eval**: generate outputs (via Inference or Agents) and perform scoring
 - **VectorIO**: perform operations on vector stores, such as adding documents, searching, and deleting documents
+- **Files**: manage file uploads, storage, and retrieval
+- **Telemetry**: collect telemetry data from the system
 - **Post Training**: fine-tune a model
 - **Tool Runtime**: interact with various tools and protocols
-- **Responses**: generate responses from an LLM using this OpenAI compatible API.
+- **Responses**: generate responses from an LLM
 
 We are working on adding a few more APIs to complete the application lifecycle. These will include:
 - **Batch Inference**: run inference on a dataset of inputs
 - **Batch Agents**: run agents on a dataset of inputs
 - **Batches**: OpenAI-compatible batch management for inference
+
+
+## OpenAI API Compatibility
+We are working on adding OpenAI API compatibility to Llama Stack. This will allow you to use Llama Stack with OpenAI API clients and tools.
+
+### File Operations and Vector Store Integration
+
+The Files API and Vector Store APIs work together through file operations, enabling automatic document processing and search. This integration implements the [OpenAI Vector Store Files API specification](https://platform.openai.com/docs/api-reference/vector-stores-files) and allows you to:
+- Upload documents through the Files API
+- Automatically process and chunk documents into searchable vectors
+- Store processed content in vector databases based on the availability of [our providers](../../providers/index.mdx)
+- Search through documents using natural language queries
+For detailed information about this integration, see [File Operations and Vector Store Integration](../file_operations_vector_stores.md).
diff --git a/docs/docs/concepts/file_operations_vector_stores.mdx b/docs/docs/concepts/file_operations_vector_stores.mdx
new file mode 100644
index 000000000..6168ecf9d
--- /dev/null
+++ b/docs/docs/concepts/file_operations_vector_stores.mdx
@@ -0,0 +1,420 @@
+# File Operations and Vector Store Integration
+
+## Overview
+
+Llama Stack provides seamless integration between the Files API and Vector Store APIs, enabling you to upload documents and automatically process them into searchable vector embeddings. This integration implements file operations following the [OpenAI Vector Store Files API specification](https://platform.openai.com/docs/api-reference/vector-stores-files).
+
+## Enhanced Capabilities Beyond OpenAI
+
+While Llama Stack maintains full compatibility with OpenAI's Vector Store API, it provides several additional capabilities that enhance functionality and flexibility:
+
+### **Embedding Model Specification**
+Unlike OpenAI's vector stores which use a fixed embedding model, Llama Stack allows you to specify which embedding model to use when creating a vector store:
+
+```python
+# Create vector store with specific embedding model
+vector_store = client.vector_stores.create(
+    name="my_documents",
+    embedding_model="all-MiniLM-L6-v2",  # Specify your preferred model
+    embedding_dimension=384,
+)
+```
+
+### **Advanced Search Modes**
+Llama Stack supports multiple search modes beyond basic vector similarity:
+
+- **Vector Search**: Pure semantic similarity search using embeddings
+- **Keyword Search**: Traditional keyword-based search for exact matches
+- **Hybrid Search**: Combines both vector and keyword search for optimal results
+
+```python
+# Different search modes
+results = await client.vector_stores.search(
+    vector_store_id=vector_store.id,
+    query="machine learning algorithms",
+    search_mode="hybrid",  # or "vector", "keyword"
+    max_num_results=5,
+)
+```
+
+### **Flexible Ranking Options**
+For hybrid search, Llama Stack offers configurable ranking strategies:
+
+- **RRF (Reciprocal Rank Fusion)**: Combines rankings with configurable impact factor
+- **Weighted Ranker**: Linear combination of vector and keyword scores with adjustable weights
+
+```python
+# Custom ranking configuration
+results = await client.vector_stores.search(
+    vector_store_id=vector_store.id,
+    query="neural networks",
+    search_mode="hybrid",
+    ranking_options={
+        "ranker": {"type": "weighted", "alpha": 0.7}  # 70% vector, 30% keyword
+    },
+)
+```
+
+### **Provider Selection**
+Choose from multiple vector store providers based on your specific needs:
+
+- **Inline Providers**: FAISS (fast in-memory), SQLite-vec (disk-based), Milvus (high-performance)
+- **Remote Providers**: ChromaDB, Qdrant, Weaviate, Postgres (PGVector), Milvus
+
+```python
+# Specify provider when creating vector store
+vector_store = client.vector_stores.create(
+    name="my_documents", provider_id="sqlite-vec"  # Choose your preferred provider
+)
+```
+
+## How It Works
+
+The file operations work through several key components:
+
+1. **File Upload**: Documents are uploaded through the Files API
+2. **Automatic Processing**: Files are automatically chunked and converted to embeddings
+3. **Vector Storage**: Chunks are stored in vector databases with metadata
+4. **Search & Retrieval**: Users can search through processed documents using natural language
+
+## Supported Vector Store Providers
+
+The following vector store providers support file operations:
+
+### Inline Providers (Single Node)
+
+- **FAISS**: Fast in-memory vector similarity search
+- **SQLite-vec**: Disk-based storage with hybrid search capabilities
+
+### Remote Providers (Hosted)
+
+- **ChromaDB**: Vector database with metadata filtering
+- **Weaviate**: Vector database with GraphQL interface
+- **Postgres (PGVector)**: Vector extensions for PostgreSQL
+
+### Both Inline & Remote Providers
+- **Milvus**: High-performance vector database with advanced indexing
+- **Qdrant**: Vector similarity search with payload filtering
+
+## File Processing Pipeline
+
+### 1. File Upload
+
+```python
+from llama_stack import LlamaStackClient
+
+client = LlamaStackClient("http://localhost:8000")
+
+# Upload a document
+with open("document.pdf", "rb") as f:
+    file_info = await client.files.upload(file=f, purpose="assistants")
+```
+
+### 2. Attach to Vector Store
+
+```python
+# Create a vector store
+vector_store = client.vector_stores.create(name="my_documents")
+
+# Attach the file to the vector store
+file_attach_response = await client.vector_stores.files.create(
+    vector_store_id=vector_store.id, file_id=file_info.id
+)
+```
+
+### 3. Automatic Processing
+
+The system automatically:
+- Detects the file type and extracts text content
+- Splits content into chunks (default: 800 tokens with 400 token overlap)
+- Generates embeddings for each chunk
+- Stores chunks with metadata in the vector store
+- Updates file status to "completed"
+
+### 4. Search and Retrieval
+
+```python
+# Search through processed documents
+search_results = await client.vector_stores.search(
+    vector_store_id=vector_store.id,
+    query="What is the main topic discussed?",
+    max_num_results=5,
+)
+
+# Process results
+for result in search_results.data:
+    print(f"Score: {result.score}")
+    for content in result.content:
+        print(f"Content: {content.text}")
+```
+
+## Supported File Types
+
+The FileResponse system supports various document formats:
+
+- **Text Files**: `.txt`, `.md`, `.rst`
+- **Documents**: `.pdf`, `.docx`, `.doc`
+- **Code**: `.py`, `.js`, `.java`, `.cpp`, etc.
+- **Data**: `.json`, `.csv`, `.xml`
+- **Web Content**: HTML files
+
+## Chunking Strategies
+
+### Default Strategy
+
+The default chunking strategy uses:
+- **Max Chunk Size**: 800 tokens
+- **Overlap**: 400 tokens
+- **Method**: Semantic boundary detection
+
+### Custom Chunking
+
+You can customize chunking when attaching files:
+
+```python
+from llama_stack.apis.vector_io import VectorStoreChunkingStrategy
+
+# Attach file with custom chunking
+file_attach_response = await client.vector_stores.files.create(
+    vector_store_id=vector_store.id,
+    file_id=file_info.id,
+    chunking_strategy=chunking_strategy,
+)
+```
+
+**Note**: While Llama Stack is OpenAI-compatible, it also supports additional options beyond the standard OpenAI API. When creating vector stores, you can specify custom embedding models and embedding dimensions that will be used when processing chunks from attached files.
+
+
+## File Management
+
+### List Files in Vector Store
+
+```python
+# List all files in a vector store
+files = await client.vector_stores.files.list(vector_store_id=vector_store.id)
+
+for file in files:
+    print(f"File: {file.filename}, Status: {file.status}")
+```
+
+### File Status Tracking
+
+Files go through several statuses:
+- **in_progress**: File is being processed
+- **completed**: File successfully processed and searchable
+- **failed**: Processing failed (check `last_error` for details)
+- **cancelled**: Processing was cancelled
+
+### Retrieve File Content
+
+```python
+# Get chunked content from vector store
+content_response = await client.vector_stores.files.retrieve_content(
+    vector_store_id=vector_store.id, file_id=file_info.id
+)
+
+for chunk in content_response.content:
+    print(f"Chunk {chunk.metadata.get('chunk_index', 0)}: {chunk.text}")
+```
+
+## Vector Store Management
+
+### List Vector Stores
+
+Retrieve a paginated list of all vector stores:
+
+```python
+# List all vector stores with default pagination
+vector_stores = await client.vector_stores.list()
+
+# Custom pagination and ordering
+vector_stores = await client.vector_stores.list(
+    limit=10,
+    order="asc",  # or "desc"
+    after="vs_12345678",  # cursor-based pagination
+)
+
+for store in vector_stores.data:
+    print(f"Store: {store.name}, Files: {store.file_counts.total}")
+    print(f"Created: {store.created_at}, Status: {store.status}")
+```
+
+### Retrieve Vector Store Details
+
+Get detailed information about a specific vector store:
+
+```python
+# Get vector store details
+store_details = await client.vector_stores.retrieve(vector_store_id="vs_12345678")
+
+print(f"Name: {store_details.name}")
+print(f"Status: {store_details.status}")
+print(f"File Counts: {store_details.file_counts}")
+print(f"Usage: {store_details.usage_bytes} bytes")
+print(f"Created: {store_details.created_at}")
+print(f"Metadata: {store_details.metadata}")
+```
+
+### Update Vector Store
+
+Modify vector store properties such as name, metadata, or expiration settings:
+
+```python
+# Update vector store name and metadata
+updated_store = await client.vector_stores.update(
+    vector_store_id="vs_12345678",
+    name="Updated Document Collection",
+    metadata={
+        "description": "Updated collection for research",
+        "category": "research",
+        "version": "2.0",
+    },
+)
+
+# Set expiration policy
+expired_store = await client.vector_stores.update(
+    vector_store_id="vs_12345678",
+    expires_after={"anchor": "last_active_at", "days": 30},
+)
+
+print(f"Updated store: {updated_store.name}")
+print(f"Last active: {updated_store.last_active_at}")
+```
+
+### Delete Vector Store
+
+Remove a vector store and all its associated data:
+
+```python
+# Delete a vector store
+delete_response = await client.vector_stores.delete(vector_store_id="vs_12345678")
+
+if delete_response.deleted:
+    print(f"Vector store {delete_response.id} successfully deleted")
+else:
+    print("Failed to delete vector store")
+```
+
+**Important Notes:**
+- Deleting a vector store removes all files, chunks, and embeddings
+- This operation cannot be undone
+- The underlying vector database is also cleaned up
+- Consider backing up important data before deletion
+
+## Search Capabilities
+
+### Vector Search
+
+Pure similarity search using embeddings:
+
+```python
+results = await client.vector_stores.search(
+    vector_store_id=vector_store.id,
+    query="machine learning algorithms",
+    max_num_results=10,
+)
+```
+
+### Filtered Search
+
+Combine vector search with metadata filtering:
+
+```python
+results = await client.vector_stores.search(
+    vector_store_id=vector_store.id,
+    query="machine learning algorithms",
+    filters={"file_type": "pdf", "upload_date": "2024-01-01"},
+    max_num_results=10,
+)
+```
+
+### Hybrid Search
+
+[SQLite-vec](../providers/vector_io/inline_sqlite-vec.mdx), [pgvector](../providers/vector_io/remote_pgvector.mdx), and [Milvus](../providers/vector_io/inline_milvus.mdx) support combining vector and keyword search.
+
+## Performance Considerations
+
+> **Note**: For detailed performance optimization strategies, see [Performance Considerations](../providers/files/openai_file_operations_support.md#performance-considerations) in the provider documentation.
+
+**Key Points:**
+- **Chunk Size**: 400-600 tokens for precision, 800-1200 for context
+- **Storage**: Choose provider based on your performance needs
+- **Search**: Optimize for your specific use case
+
+## Error Handling
+
+> **Note**: For comprehensive troubleshooting and error handling, see [Troubleshooting](../providers/files/openai_file_operations_support.md#troubleshooting) in the provider documentation.
+
+**Common Issues:**
+- File processing failures (format, size limits)
+- Search performance optimization
+- Storage and memory issues
+
+## Best Practices
+
+> **Note**: For detailed best practices and recommendations, see [Best Practices](../providers/files/openai_file_operations_support.md#best-practices) in the provider documentation.
+
+**Key Recommendations:**
+- File organization and naming conventions
+- Chunking strategy optimization
+- Metadata and monitoring practices
+- Regular cleanup and maintenance
+
+## Integration Examples
+
+### RAG Application
+
+```python
+# Build a RAG system with file uploads
+async def build_rag_system():
+    # Create vector store
+    vector_store = client.vector_stores.create(name="knowledge_base")
+
+    # Upload and process documents
+    documents = ["doc1.pdf", "doc2.pdf", "doc3.pdf"]
+    for doc in documents:
+        with open(doc, "rb") as f:
+            file_info = await client.files.create(file=f, purpose="assistants")
+            await client.vector_stores.files.create(
+                vector_store_id=vector_store.id, file_id=file_info.id
+            )
+
+    return vector_store
+
+
+# Query the RAG system
+async def query_rag(vector_store_id, question):
+    results = await client.vector_stores.search(
+        vector_store_id=vector_store_id, query=question, max_num_results=5
+    )
+    return results
+```
+
+### Document Analysis
+
+```python
+# Analyze document content through vector search
+async def analyze_document(vector_store_id, file_id):
+    # Get document content
+    content = await client.vector_stores.files.retrieve_content(
+        vector_store_id=vector_store_id, file_id=file_id
+    )
+
+    # Search for specific topics
+    topics = ["introduction", "methodology", "conclusion"]
+    analysis = {}
+
+    for topic in topics:
+        results = await client.vector_stores.search(
+            vector_store_id=vector_store_id, query=topic, max_num_results=3
+        )
+        analysis[topic] = results.data
+
+    return analysis
+```
+
+## Next Steps
+
+- Explore the [Files API documentation](../../providers/files/files.mdx) for detailed API reference
+- Check [Vector Store Providers](../providers/vector_io/index.mdx) for specific implementation details
+- Review [Getting Started](../getting_started/quickstart.mdx) for quick setup instructions
diff --git a/docs/docs/providers/files/files.mdx b/docs/docs/providers/files/files.mdx
new file mode 100644
index 000000000..095642be3
--- /dev/null
+++ b/docs/docs/providers/files/files.mdx
@@ -0,0 +1,290 @@
+---
+sidebar_label: Files
+title: Files
+---
+
+## Overview
+
+The Files API provides file management capabilities for Llama Stack. It allows you to upload, store, retrieve, and manage files that can be used across various endpoints in your application.
+
+## Features
+
+- **File Upload**: Upload files with metadata and purpose classification
+- **File Management**: List, retrieve, and delete files
+- **Content Retrieval**: Access raw file content for processing
+- **API Compatibility**: Full compatibility with OpenAI Files API endpoints
+- **Flexible Storage**: Support for local filesystem and cloud storage backends
+
+## API Endpoints
+
+### Upload File
+
+**POST** `/v1/openai/v1/files`
+
+Upload a file that can be used across various endpoints.
+
+**Request Body:**
+- `file`: The file object to be uploaded (multipart form data)
+- `purpose`: The intended purpose of the uploaded file
+
+**Supported Purposes:**
+- `batch`: Files for batch operations
+
+**Response:**
+```json
+{
+  "id": "file-abc123",
+  "object": "file",
+  "bytes": 140,
+  "created_at": 1613779121,
+  "filename": "mydata.jsonl",
+  "purpose": "batch"
+}
+```
+
+**Example:**
+```python
+import requests
+
+with open("data.jsonl", "rb") as f:
+    files = {"file": f}
+    data = {"purpose": "batch"}
+    response = requests.post(
+        "http://localhost:8000/v1/openai/v1/files", files=files, data=data
+      )
+    file_info = response.json()
+```
+
+### List Files
+
+**GET** `/v1/openai/v1/files`
+
+Returns a list of files that belong to the user's organization.
+
+**Query Parameters:**
+- `after` (optional): A cursor for pagination
+- `limit` (optional): Limit on number of objects (1-10,000, default: 10,000)
+- `order` (optional): Sort order by created_at timestamp (`asc` or `desc`, default: `desc`)
+- `purpose` (optional): Filter files by purpose
+
+**Response:**
+```json
+{
+  "object": "list",
+  "data": [
+    {
+      "id": "file-abc123",
+      "object": "file",
+      "bytes": 140,
+      "created_at": 1613779121,
+      "filename": "mydata.jsonl",
+      "purpose": "fine-tune"
+    }
+  ],
+  "has_more": false
+}
+```
+
+**Example:**
+```python
+import requests
+
+# List all files
+response = requests.get("http://localhost:8000/v1/openai/v1/files")
+files = response.json()
+
+# List files with pagination
+response = requests.get(
+    "http://localhost:8000/v1/openAi/v1/files",
+    params={"limit": 10, "after": "file-abc123"},
+)
+files = response.json()
+
+# Filter by purpose
+response = requests.get(
+    "http://localhost:8000/v1/openAi/v1/files", params={"purpose": "fine-tune"}
+)
+files = response.json()
+```
+
+### Retrieve File
+
+**GET** `/v1/openAi/v1/files/{file_id}`
+
+Returns information about a specific file.
+
+**Path Parameters:**
+- `file_id`: The ID of the file to retrieve
+
+**Response:**
+```json
+{
+  "id": "file-abc123",
+  "object": "file",
+  "bytes": 140,
+  "created_at": 1613779121,
+  "filename": "mydata.jsonl",
+  "purpose": "fine-tune"
+}
+```
+
+**Example:**
+```python
+import requests
+
+file_id = "file-abc123"
+response = requests.get(f"http://localhost:8000/v1/openAi/v1/files/{file_id}")
+file_info = response.json()
+```
+
+### Delete File
+
+**DELETE** `/v1/openAi/v1/files/{file_id}`
+
+Delete a file.
+
+**Path Parameters:**
+- `file_id`: The ID of the file to delete
+
+**Response:**
+```json
+{
+  "id": "file-abc123",
+  "object": "file",
+  "deleted": true
+}
+```
+
+**Example:**
+```python
+import requests
+
+file_id = "file-abc123"
+response = requests.delete(f"http://localhost:8000/v1/openAi/v1/files/{file_id}")
+result = response.json()
+```
+
+### Retrieve File Content
+
+**GET** `/v1/openAi/v1/files/{file_id}/content`
+
+Returns the raw file content as a binary response.
+
+**Path Parameters:**
+- `file_id`: The ID of the file to retrieve content from
+
+**Response:**
+Binary file content with appropriate headers:
+- `Content-Type`: `application/octet-stream`
+- `Content-Disposition`: `attachment; filename="filename"`
+
+**Example:**
+```python
+import requests
+
+file_id = "file-abc123"
+response = requests.get(f"http://localhost:8000/v1/openAi/v1/files/{file_id}/content")
+
+# Save content to file
+with open("downloaded_file.jsonl", "wb") as f:
+    f.write(response.content)
+
+# Or process content directly
+content = response.content
+```
+
+## Vector Store Integration
+
+The Files API integrates with Vector Stores to enable document processing and search. For detailed information about this integration, see [File Operations and Vector Store Integration](../concepts/file_operations_vector_stores.md).
+
+### Vector Store File Operations
+
+**List Vector Store Files:**
+- **GET** `/v1/openAi/v1/vector_stores/{vector_store_id}/files`
+
+**Retrieve Vector Store File Content:**
+- **GET** `/v1/openAi/v1/vector_stores/{vector_store_id}/files/{file_id}/content`
+
+**Attach File to Vector Store:**
+- **POST** `/v1/openAi/v1/vector_stores/{vector_store_id}/files`
+
+## Error Handling
+
+The Files API returns standard HTTP status codes and error responses:
+
+- `400 Bad Request`: Invalid request parameters
+- `404 Not Found`: File not found
+- `429 Too Many Requests`: Rate limit exceeded
+- `500 Internal Server Error`: Server error
+
+**Error Response Format:**
+```json
+{
+  "error": {
+    "message": "Error description",
+    "type": "invalid_request_error",
+    "code": "file_not_found"
+  }
+}
+```
+
+## Rate Limits
+
+The Files API implements rate limiting to ensure fair usage:
+- File uploads: 100 files per minute
+- File retrievals: 1000 requests per minute
+- File deletions: 100 requests per minute
+
+## Best Practices
+
+1. **File Organization**: Use descriptive filenames and appropriate purpose classifications
+2. **Batch Operations**: For multiple files, consider using batch endpoints when available
+3. **Error Handling**: Always check response status codes and handle errors gracefully
+4. **Content Types**: Ensure files are uploaded with appropriate content types
+5. **Cleanup**: Regularly delete unused files to manage storage costs
+
+## Integration Examples
+
+### With Python Client
+
+```python
+from llama_stack import LlamaStackClient
+
+client = LlamaStackClient("http://localhost:8000")
+
+# Upload a file
+with open("data.jsonl", "rb") as f:
+    file_info = await client.files.upload(file=f, purpose="fine-tune")
+
+# List files
+files = await client.files.list(purpose="fine-tune")
+
+# Retrieve file content
+content = await client.files.retrieve_content(file_info.id)
+```
+
+### With cURL
+
+```bash
+# Upload file
+curl -X POST http://localhost:8000/v1/openAi/v1/files \
+  -F "file=@data.jsonl" \
+  -F "purpose=fine-tune"
+
+# List files
+curl http://localhost:8000/v1/openAi/v1/files
+
+# Download file content
+curl http://localhost:8000/v1/openAi/v1/files/file-abc123/content \
+  -o downloaded_file.jsonl
+```
+
+## Provider Support
+
+The Files API supports multiple storage backends:
+
+- **Local Filesystem**: Store files on local disk (inline provider)
+- **S3**: Store files in AWS S3 or S3-compatible services (remote provider)
+- **Custom Backends**: Extensible architecture for custom storage providers
+
+See the [Files Providers](index.md) documentation for detailed configuration options.
diff --git a/docs/docs/providers/files/openai_file_operations_quick_reference.md b/docs/docs/providers/files/openai_file_operations_quick_reference.md
new file mode 100644
index 000000000..43e2318e2
--- /dev/null
+++ b/docs/docs/providers/files/openai_file_operations_quick_reference.md
@@ -0,0 +1,80 @@
+# File Operations Quick Reference
+
+## Overview
+
+As of release 0.2.14, Llama Stack provides comprehensive file operations and Vector Store API integration, following the [OpenAI Vector Store Files API specification](https://platform.openai.com/docs/api-reference/vector-stores-files).
+
+> **Note**: For detailed overview and implementation details, see [Overview](../openai_file_operations_support.md#overview) in the full documentation.
+
+## Supported Providers
+
+> **Note**: For complete provider details and features, see [Supported Providers](../openai_file_operations_support.md#supported-providers) in the full documentation.
+
+**Inline Providers**: FAISS, SQLite-vec, Milvus
+**Remote Providers**: ChromaDB, Qdrant, Weaviate, PGVector
+
+## Quick Start
+
+### 1. Upload File
+```python
+file_info = await client.files.upload(
+    file=open("document.pdf", "rb"), purpose="assistants"
+)
+```
+
+### 2. Create Vector Store
+```python
+vector_store = client.vector_stores.create(name="my_docs")
+```
+
+### 3. Attach File
+```python
+await client.vector_stores.files.create(
+    vector_store_id=vector_store.id, file_id=file_info.id
+)
+```
+
+### 4. Search
+```python
+results = await client.vector_stores.search(
+    vector_store_id=vector_store.id, query="What is the main topic?", max_num_results=5
+)
+```
+
+## File Processing & Search
+
+**Processing**: 800 tokens default chunk size, 400 token overlap
+**Formats**: PDF, DOCX, TXT, Code files, etc.
+**Search**: Vector similarity, Hybrid (SQLite-vec), Filtered with metadata
+
+## Configuration
+
+> **Note**: For detailed configuration examples and options, see [Configuration Examples](../openai_file_operations_support.md#configuration-examples) in the full documentation.
+
+**Basic Setup**: Configure vector_io and files providers in your run.yaml
+
+## Common Use Cases
+
+- **RAG Systems**: Document Q&A with file uploads
+- **Knowledge Bases**: Searchable document collections
+- **Content Analysis**: Document similarity and clustering
+- **Research Tools**: Literature review and analysis
+
+## Performance Tips
+
+> **Note**: For detailed performance optimization strategies, see [Performance Considerations](../openai_file_operations_support.md#performance-considerations) in the full documentation.
+
+**Quick Tips**: Choose provider based on your needs (speed vs. storage vs. scalability)
+
+## Troubleshooting
+
+> **Note**: For comprehensive troubleshooting, see [Troubleshooting](../openai_file_operations_support.md#troubleshooting) in the full documentation.
+
+**Quick Fixes**: Check file format compatibility, optimize chunk sizes, monitor storage
+
+## Resources
+
+- [Full Documentation](openai_file_operations_support.md)
+- [Integration Guide](../concepts/file_operations_vector_stores.md)
+- [Files API](files_api.md)
+- [Provider Details](../vector_io/index.md)
diff --git a/docs/docs/providers/files/openai_file_operations_support.md b/docs/docs/providers/files/openai_file_operations_support.md
new file mode 100644
index 000000000..058c994da
--- /dev/null
+++ b/docs/docs/providers/files/openai_file_operations_support.md
@@ -0,0 +1,291 @@
+# File Operations Support in Vector Store Providers
+
+## Overview
+
+This document provides a comprehensive overview of file operations and Vector Store API support across all available vector store providers in Llama Stack. As of release 0.2.24, the following providers support full file operations integration.
+
+## Supported Providers
+
+### ✅ Full File Operations Support
+
+The following providers support complete file operations integration, including file upload, automatic processing, and search:
+
+#### Inline Providers (Single Node)
+
+| Provider | File Operations | Key Features |
+|----------|----------------|--------------|
+| **FAISS** | ✅ Full Support | Fast in-memory search, GPU acceleration |
+| **SQLite-vec** | ✅ Full Support | Hybrid search, disk-based storage |
+| **Milvus** | ✅ Full Support | High-performance, scalable indexing |
+
+#### Remote Providers (Hosted)
+
+| Provider | File Operations | Key Features |
+|----------|----------------|--------------|
+| **ChromaDB** | ✅ Full Support | Metadata filtering, persistent storage |
+| **Qdrant** | ✅ Full Support | Payload filtering, advanced search |
+| **Weaviate** | ✅ Full Support | GraphQL interface, schema management |
+| **Postgres (PGVector)** | ✅ Full Support | SQL integration, ACID compliance |
+
+### 🔄 Partial Support
+
+Some providers may support basic vector operations but lack full file operations integration:
+
+| Provider | Status | Notes |
+|----------|--------|-------|
+| **Meta Reference** | 🔄 Basic | Core vector operations only |
+
+## File Operations Features
+
+All supported providers offer the following file operations capabilities:
+
+### Core Functionality
+
+- **File Upload & Processing**: Automatic document ingestion and chunking
+- **Vector Storage**: Embedding generation and storage
+- **Search & Retrieval**: Semantic search with metadata filtering
+- **File Management**: List, retrieve, and manage files in vector stores
+
+### Advanced Features
+
+- **Automatic Chunking**: Configurable chunk sizes and overlap
+- **Metadata Preservation**: File attributes and chunk metadata
+- **Status Tracking**: Monitor file processing progress
+- **Error Handling**: Comprehensive error reporting and recovery
+
+## Implementation Details
+
+### File Processing Pipeline
+
+1. **Upload**: File uploaded via Files API
+2. **Extraction**: Text content extracted from various formats
+3. **Chunking**: Content split into optimal chunks (default: 800 tokens)
+4. **Embedding**: Chunks converted to vector embeddings
+5. **Storage**: Vectors stored with metadata in vector database
+6. **Indexing**: Search index updated for fast retrieval
+
+### Supported File Formats
+
+- **Documents**: PDF, DOCX, DOC
+- **Text**: TXT, MD, RST
+- **Code**: Python, JavaScript, Java, C++, etc.
+- **Data**: JSON, CSV, XML
+- **Web**: HTML files
+
+### Chunking Strategies
+
+- **Default**: 800 tokens with 400 token overlap
+- **Custom**: Configurable chunk sizes and overlap
+- **Static**: Fixed-size chunks with overlap
+
+## Provider-Specific Features
+
+### FAISS
+
+- **Storage**: In-memory with optional persistence
+- **Performance**: Optimized for speed and GPU acceleration
+- **Use Case**: High-performance, memory-constrained environments
+
+### SQLite-vec
+
+- **Storage**: Disk-based with SQLite backend
+- **Search**: Hybrid vector + keyword search
+- **Use Case**: Large document collections, frequent updates
+
+### Milvus
+
+- **Storage**: Scalable distributed storage
+- **Indexing**: Multiple index types (IVF, HNSW)
+- **Use Case**: Production deployments, large-scale applications
+
+### ChromaDB
+
+- **Storage**: Persistent storage with metadata
+- **Filtering**: Advanced metadata filtering
+- **Use Case**: Applications requiring rich metadata
+
+### Qdrant
+
+- **Storage**: High-performance vector database
+- **Filtering**: Payload-based filtering
+- **Use Case**: Real-time applications, complex queries
+
+### Weaviate
+
+- **Storage**: GraphQL-native vector database
+- **Schema**: Flexible schema management
+- **Use Case**: Applications requiring complex data relationships
+
+### Postgres (PGVector)
+
+- **Storage**: SQL database with vector extensions
+- **Integration**: ACID compliance, existing SQL workflows
+- **Use Case**: Applications requiring transactional guarantees
+
+## Configuration Examples
+
+### Basic Configuration
+
+```yaml
+vector_io:
+  - provider_id: faiss
+    provider_type: inline::faiss
+    config:
+      kvstore:
+        type: sqlite
+        db_path: ~/.llama/faiss_store.db
+```
+
+### With FileResponse Support
+
+```yaml
+vector_io:
+  - provider_id: faiss
+    provider_type: inline::faiss
+    config:
+      kvstore:
+        type: sqlite
+        db_path: ~/.llama/faiss_store.db
+
+files:
+  - provider_id: local-files
+    provider_type: inline::localfs
+    config:
+      storage_dir: ~/.llama/files
+      metadata_store:
+        type: sqlite
+        db_path: ~/.llama/files_metadata.db
+```
+
+## Usage Examples
+
+### Python Client
+
+```python
+from llama_stack import LlamaStackClient
+
+client = LlamaStackClient("http://localhost:8000")
+
+# Create vector store
+vector_store = client.vector_stores.create(name="documents")
+
+# Upload and process file
+with open("document.pdf", "rb") as f:
+    file_info = await client.files.upload(file=f, purpose="assistants")
+
+# Attach to vector store
+await client.vector_stores.files.create(
+    vector_store_id=vector_store.id, file_id=file_info.id
+)
+
+# Search
+results = await client.vector_stores.search(
+    vector_store_id=vector_store.id, query="What is the main topic?", max_num_results=5
+)
+```
+
+### cURL Commands
+
+```bash
+# Upload file
+curl -X POST http://localhost:8000/v1/openai/v1/files \
+  -F "file=@document.pdf" \
+  -F "purpose=assistants"
+
+# Create vector store
+curl -X POST http://localhost:8000/v1/openai/v1/vector_stores \
+  -H "Content-Type: application/json" \
+  -d '{"name": "documents"}'
+
+# Attach file to vector store
+curl -X POST http://localhost:8000/v1/openai/v1/vector_stores/{store_id}/files \
+  -H "Content-Type: application/json" \
+  -d '{"file_id": "file-abc123"}'
+
+# Search vector store
+curl -X POST http://localhost:8000/v1/openai/v1/vector_stores/{store_id}/search \
+  -H "Content-Type: application/json" \
+  -d '{"query": "What is the main topic?", "max_num_results": 5}'
+```
+
+## Performance Considerations
+
+### Chunk Size Optimization
+
+- **Small chunks (400-600 tokens)**: Better precision, more results
+- **Large chunks (800-1200 tokens)**: Better context, fewer results
+- **Overlap (50%)**: Maintains context between chunks
+
+### Storage Efficiency
+
+- **FAISS**: Fastest, but memory-limited
+- **SQLite-vec**: Good balance of performance and storage
+- **Milvus**: Scalable, production-ready
+- **Remote providers**: Managed, but network-dependent
+
+### Search Performance
+
+- **Vector search**: Fastest for semantic queries
+- **Hybrid search**: Best accuracy (SQLite-vec only)
+- **Filtered search**: Fast with metadata constraints
+
+## Troubleshooting
+
+### Common Issues
+
+1. **File Processing Failures**
+   - Check file format compatibility
+   - Verify file size limits
+   - Review error messages in file status
+
+2. **Search Performance**
+   - Optimize chunk sizes for your use case
+   - Use filters to narrow search scope
+   - Monitor vector store metrics
+
+3. **Storage Issues**
+   - Check available disk space
+   - Verify database permissions
+   - Monitor memory usage (for in-memory providers)
+
+### Monitoring
+
+```python
+# Check file processing status
+file_status = await client.vector_stores.files.retrieve(
+    vector_store_id=vector_store.id, file_id=file_info.id
+)
+
+if file_status.status == "failed":
+    print(f"Error: {file_status.last_error.message}")
+
+# Monitor vector store health
+health = await client.vector_stores.health(vector_store_id=vector_store.id)
+print(f"Status: {health.status}")
+```
+
+## Best Practices
+
+1. **File Organization**: Use descriptive names and organize by purpose
+2. **Chunking Strategy**: Test different sizes for your specific use case
+3. **Metadata**: Add relevant attributes for better filtering
+4. **Monitoring**: Track processing status and search performance
+5. **Cleanup**: Regularly remove unused files to manage storage
+
+## Future Enhancements
+
+Planned improvements for file operations support:
+
+- **Batch Processing**: Process multiple files simultaneously
+- **Advanced Chunking**: More sophisticated chunking algorithms
+- **Custom Embeddings**: Support for custom embedding models
+- **Real-time Updates**: Live file processing and indexing
+- **Multi-format Support**: Enhanced file format support
+
+## Support and Resources
+
+- **Documentation**: [File Operations and Vector Store Integration](../../concepts/file_operations_vector_stores.mdx)
+- **API Reference**: [Files API](files_api.md)
+- **Provider Docs**: [Vector Store Providers](../vector_io/index.md)
+- **Examples**: [Getting Started](../getting_started/index.md)
+- **Community**: [GitHub Discussions](https://github.com/meta-llama/llama-stack/discussions)
diff --git a/docs/docs/providers/index.mdx b/docs/docs/providers/index.mdx
index bfc16b29a..5c81a57ed 100644
--- a/docs/docs/providers/index.mdx
+++ b/docs/docs/providers/index.mdx
@@ -22,6 +22,7 @@ Importantly, Llama Stack always strives to provide at least one fully inline pro
 ## Provider Categories
 
 - **[External Providers](external/index.mdx)** - Guide for building and using external providers
+- **[OpenAI Compatibility](../api-openai/index.mdx)** - OpenAI API compatibility layer
 - **[Inference](inference/index.mdx)** - LLM and embedding model providers
 - **[Agents](agents/index.mdx)** - Agentic system providers
 - **[DatasetIO](datasetio/index.mdx)** - Dataset and data loader providers
@@ -30,6 +31,16 @@ Importantly, Llama Stack always strives to provide at least one fully inline pro
 - **[Tool Runtime](tool_runtime/index.mdx)** - Tool and protocol providers
 - **[Files](files/index.mdx)** - File system and storage providers
 
-## Other information about Providers
-- **[OpenAI Compatibility](./openai.mdx)** - OpenAI API compatibility layer
+## API Documentation
+
+For comprehensive API documentation and reference:
+
+- **[API Reference](../api/index.mdx)** - Complete API documentation
+- **[Experimental APIs](../api-experimental/index.mdx)** - APIs in development
+- **[Deprecated APIs](../api-deprecated/index.mdx)** - Legacy APIs being phased out
+- **[OpenAI Compatibility](../api-openai/index.mdx)** - OpenAI API compatibility guide
+
+## Additional Provider Information
+
+- **[OpenAI Implementation Guide](./openai.mdx)** - Code examples and implementation details for OpenAI APIs
 - **[OpenAI-Compatible Responses Limitations](./openai_responses_limitations.mdx)** - Known limitations of the Responses API in Llama Stack
diff --git a/docs/docs/providers/openai.mdx b/docs/docs/providers/openai.mdx
index 84436e769..c3bb46ecf 100644
--- a/docs/docs/providers/openai.mdx
+++ b/docs/docs/providers/openai.mdx
@@ -1,9 +1,14 @@
 ---
-title: OpenAI Compatibility
-description: OpenAI API Compatibility
-sidebar_label: OpenAI Compatibility
-sidebar_position: 1
+title: OpenAI Implementation Guide
+description: Code examples and implementation details for OpenAI API compatibility
+sidebar_label: OpenAI Implementation
+sidebar_position: 2
 ---
+
+# OpenAI Implementation Guide
+
+This guide provides detailed code examples and implementation details for using OpenAI-compatible APIs with Llama Stack. For a comprehensive overview of OpenAI compatibility features, see our [OpenAI API Compatibility Guide](../api-openai/index.mdx).
+
 ## OpenAI API Compatibility
 
 ### Server path
@@ -195,3 +200,9 @@ Lines of code unfurl
 Logic whispers in the dark
 Art in hidden form
 ```
+
+## Additional Resources
+
+- **[OpenAI API Compatibility Guide](../api-openai/index.mdx)** - Comprehensive overview of OpenAI compatibility features
+- **[OpenAI Responses API Limitations](./openai_responses_limitations.mdx)** - Detailed limitations and known issues
+- **[Provider Documentation](../index.mdx)** - Complete provider ecosystem overview

From 1e81056a22c0fe2607428da31a4e0c8acf7c27da Mon Sep 17 00:00:00 2001
From: Ashwin Bharambe <ashwin.bharambe@gmail.com>
Date: Thu, 13 Nov 2025 07:23:23 -0800
Subject: [PATCH 02/12] feat(tests): enable MCP tests in server mode (#4146)

We would like to run all OpenAI compatibility tests using only the
openai-client library. This is most friendly for contributors since they
can run tests without needing to update the client-sdks (which is
getting easier but still a long pole.)

This is the first step in enabling that -- no using "library client" for
any of the Responses tests. This seems like a reasonable trade-off since
the usage of an embeddeble library client for Responses (or any
OpenAI-compatible) behavior seems to be not very common. To do this, we
needed to enable MCP tests (which only worked in library client mode)
for server mode.
---
 scripts/integration-tests.sh                  |  16 +
 tests/common/mcp.py                           |  10 +-
 tests/integration/responses/conftest.py       |  17 +
 ...9d940455cb083c0fd1330c666a12d74df6f89.json | 549 ++++++++++++
 ...bd9f35d82223c7d1cab613ab2e818d79d6f9b.json | 295 +++++++
 ...27118869d34d768ad87ba072e92e8a43a52f2.json | 833 ++++++++++++++++++
 ...ed6791b1054ce0f36e967eb3793b5608344f3.json | 759 ++++++++++++++++
 ...6f50e862aeddbbeaeb256ef1add34de7c1dc8.json | 549 ++++++++++++
 ...f53ec795fd77ef818827e16691689151bf17c.json | 413 +++++++++
 ...f77eb5d0989d312e929ed59dda07738487d09.json | 586 ++++++++++++
 ...2ff7145784d249c3216c34299c38c28118328.json | 524 +++++++++++
 ...803c4a397f772ad8b1cb90ec44527ce964a45.json | 614 +++++++++++++
 ...18a218bb7f4b8363998abc34ec9bb7ba3a03d.json | 574 ++++++++++++
 ...5fe3ff21e6c39189ab93778335439f288158f.json | 771 ++++++++++++++++
 ...520db560af78e9bc38159e526b68b8daa168e.json | 759 ++++++++++++++++
 ...981ca011dd1b6c29df530d12726b1cf7989e5.json | 833 ++++++++++++++++++
 ...ee40546a0658db3df58b9b4d948e4e95b0961.json | 524 +++++++++++
 ...a91ebca1cbaeb4f7aab22c5b9e246b476272f.json | 649 ++++++++++++++
 ...c53deb1ac47d064a1b5c70a78b7436438818f.json | 450 ++++++++++
 ...d054d5f5dd6bdd3c4333db6cef7361fb32feb.json | 759 ++++++++++++++++
 ...2a453cb8f2e11e80beb8e5506439345c428eb.json | 808 +++++++++++++++++
 ...5fae1f1eb09efe6e4f86c115a78a3db5a59bc.json | 668 ++++++++++++++
 ...34cf2f55727b67c1e1854a106b9d8c7c64b70.json | 700 +++++++++++++++
 ...a9a1a9488fb2347bf73d6e3bc2203a9a47a61.json | 641 ++++++++++++++
 .../responses/test_basic_responses.py         |  30 +-
 .../responses/test_conversation_responses.py  |  12 +-
 .../integration/responses/test_file_search.py |  50 +-
 .../responses/test_tool_responses.py          | 117 +--
 tests/integration/tool_runtime/test_mcp.py    |   5 -
 29 files changed, 13388 insertions(+), 127 deletions(-)
 create mode 100644 tests/integration/responses/conftest.py
 create mode 100644 tests/integration/responses/recordings/0a4aca0cd075369aaf6133ee82d9d940455cb083c0fd1330c666a12d74df6f89.json
 create mode 100644 tests/integration/responses/recordings/2bd4c8dc08b3ee3ffce696864f0bd9f35d82223c7d1cab613ab2e818d79d6f9b.json
 create mode 100644 tests/integration/responses/recordings/2ed23a4289840f93202f94e7e7027118869d34d768ad87ba072e92e8a43a52f2.json
 create mode 100644 tests/integration/responses/recordings/3177a984c900c2bdc2785b502bded6791b1054ce0f36e967eb3793b5608344f3.json
 create mode 100644 tests/integration/responses/recordings/318c5361647df0245c074cd2c7d6f50e862aeddbbeaeb256ef1add34de7c1dc8.json
 create mode 100644 tests/integration/responses/recordings/430a49246c97c29bd958f383627f53ec795fd77ef818827e16691689151bf17c.json
 create mode 100644 tests/integration/responses/recordings/52a2b96781961e252aa3a7b0a5ff77eb5d0989d312e929ed59dda07738487d09.json
 create mode 100644 tests/integration/responses/recordings/541b5db7789e61d2400b70bd41c2ff7145784d249c3216c34299c38c28118328.json
 create mode 100644 tests/integration/responses/recordings/6a05cad89f138e215047fd44d21803c4a397f772ad8b1cb90ec44527ce964a45.json
 create mode 100644 tests/integration/responses/recordings/6d7f54b7be4845c31ae64498e8018a218bb7f4b8363998abc34ec9bb7ba3a03d.json
 create mode 100644 tests/integration/responses/recordings/73c9287059db75cd80dc56cff905fe3ff21e6c39189ab93778335439f288158f.json
 create mode 100644 tests/integration/responses/recordings/9f10c42f1338ae4b535cb877851520db560af78e9bc38159e526b68b8daa168e.json
 create mode 100644 tests/integration/responses/recordings/a97d8a2f2fd75b4a5ca732e632b981ca011dd1b6c29df530d12726b1cf7989e5.json
 create mode 100644 tests/integration/responses/recordings/b30da63114770b8c975bf66e24aee40546a0658db3df58b9b4d948e4e95b0961.json
 create mode 100644 tests/integration/responses/recordings/b6b7282ca0ad5a3c59321d2b045a91ebca1cbaeb4f7aab22c5b9e246b476272f.json
 create mode 100644 tests/integration/responses/recordings/c27df465b2996c4d7c909e9ccfac53deb1ac47d064a1b5c70a78b7436438818f.json
 create mode 100644 tests/integration/responses/recordings/d35c1244fbbe9898da3958113c1d054d5f5dd6bdd3c4333db6cef7361fb32feb.json
 create mode 100644 tests/integration/responses/recordings/d42e1020edee86d9f6da7df909c2a453cb8f2e11e80beb8e5506439345c428eb.json
 create mode 100644 tests/integration/responses/recordings/e2dc09dc546d9b8b99096804fe75fae1f1eb09efe6e4f86c115a78a3db5a59bc.json
 create mode 100644 tests/integration/responses/recordings/e9f1cc3da4297f143b7b2a4b21b34cf2f55727b67c1e1854a106b9d8c7c64b70.json
 create mode 100644 tests/integration/responses/recordings/ed89b57fec937fa8602b4911a21a9a1a9488fb2347bf73d6e3bc2203a9a47a61.json

diff --git a/scripts/integration-tests.sh b/scripts/integration-tests.sh
index 0951feb14..8b0002125 100755
--- a/scripts/integration-tests.sh
+++ b/scripts/integration-tests.sh
@@ -162,6 +162,17 @@ if [[ "$COLLECT_ONLY" == false ]]; then
         export LLAMA_STACK_TEST_STACK_CONFIG_TYPE="library_client"
         echo "Setting stack config type: library_client"
     fi
+
+    # Set MCP host for in-process MCP server tests
+    # - For library client and server mode: localhost (both on same host)
+    # - For docker mode: host.docker.internal (container needs to reach host)
+    if [[ "$STACK_CONFIG" == docker:* ]]; then
+        export LLAMA_STACK_TEST_MCP_HOST="host.docker.internal"
+        echo "Setting MCP host: host.docker.internal (docker mode)"
+    else
+        export LLAMA_STACK_TEST_MCP_HOST="localhost"
+        echo "Setting MCP host: localhost (library/server mode)"
+    fi
 fi
 
 SETUP_ENV=$(PYTHONPATH=$THIS_DIR/.. python "$THIS_DIR/get_setup_env.py" --suite "$TEST_SUITE" --setup "$TEST_SETUP" --format bash)
@@ -338,6 +349,7 @@ if [[ "$STACK_CONFIG" == *"docker:"* && "$COLLECT_ONLY" == false ]]; then
     DOCKER_ENV_VARS=""
     DOCKER_ENV_VARS="$DOCKER_ENV_VARS -e LLAMA_STACK_TEST_INFERENCE_MODE=$INFERENCE_MODE"
     DOCKER_ENV_VARS="$DOCKER_ENV_VARS -e LLAMA_STACK_TEST_STACK_CONFIG_TYPE=server"
+    DOCKER_ENV_VARS="$DOCKER_ENV_VARS -e LLAMA_STACK_TEST_MCP_HOST=${LLAMA_STACK_TEST_MCP_HOST:-host.docker.internal}"
     # Disabled: https://github.com/llamastack/llama-stack/issues/4089
     #DOCKER_ENV_VARS="$DOCKER_ENV_VARS -e OTEL_EXPORTER_OTLP_ENDPOINT=http://localhost:${COLLECTOR_PORT}"
     DOCKER_ENV_VARS="$DOCKER_ENV_VARS -e OTEL_METRIC_EXPORT_INTERVAL=200"
@@ -371,8 +383,11 @@ if [[ "$STACK_CONFIG" == *"docker:"* && "$COLLECT_ONLY" == false ]]; then
     # Use regular port mapping instead
     NETWORK_MODE=""
     PORT_MAPPINGS=""
+    ADD_HOST_FLAG=""
     if [[ "$(uname)" != "Darwin" ]] && [[ "$(uname)" != *"MINGW"* ]]; then
         NETWORK_MODE="--network host"
+        # On Linux with host network, also add host.docker.internal mapping for consistency
+        ADD_HOST_FLAG="--add-host=host.docker.internal:host-gateway"
     else
         # On non-Linux (macOS, Windows), need explicit port mappings for both app and telemetry
         PORT_MAPPINGS="-p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT -p $COLLECTOR_PORT:$COLLECTOR_PORT"
@@ -381,6 +396,7 @@ if [[ "$STACK_CONFIG" == *"docker:"* && "$COLLECT_ONLY" == false ]]; then
 
     docker run -d $NETWORK_MODE --name "$container_name" \
         $PORT_MAPPINGS \
+        $ADD_HOST_FLAG \
         $DOCKER_ENV_VARS \
         "$IMAGE_NAME" \
         --port $LLAMA_STACK_PORT
diff --git a/tests/common/mcp.py b/tests/common/mcp.py
index 644becd2d..085575ec0 100644
--- a/tests/common/mcp.py
+++ b/tests/common/mcp.py
@@ -244,8 +244,14 @@ def make_mcp_server(required_auth_token: str | None = None, tools: dict[str, Cal
     timeout = 2
     start_time = time.time()
 
-    server_url = f"http://localhost:{port}/sse"
-    logger.debug(f"Waiting for MCP server thread to start on port {port}")
+    # Determine the appropriate host for the server URL based on test environment
+    # - For library client and server mode: use localhost (both on same host)
+    # - For docker mode: use host.docker.internal (container needs to reach host)
+    import os
+
+    mcp_host = os.environ.get("LLAMA_STACK_TEST_MCP_HOST", "localhost")
+    server_url = f"http://{mcp_host}:{port}/sse"
+    logger.debug(f"Waiting for MCP server thread to start on port {port} (accessible via {mcp_host})")
 
     while time.time() - start_time < timeout:
         if server_thread.is_alive():
diff --git a/tests/integration/responses/conftest.py b/tests/integration/responses/conftest.py
new file mode 100644
index 000000000..c29575072
--- /dev/null
+++ b/tests/integration/responses/conftest.py
@@ -0,0 +1,17 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+import pytest
+
+from llama_stack.core.library_client import LlamaStackAsLibraryClient
+
+
+@pytest.fixture
+def responses_client(compat_client):
+    """Provide a client for responses tests, skipping library client mode."""
+    if isinstance(compat_client, LlamaStackAsLibraryClient):
+        pytest.skip("Responses API tests are not supported in library client mode")
+    return compat_client
diff --git a/tests/integration/responses/recordings/0a4aca0cd075369aaf6133ee82d9d940455cb083c0fd1330c666a12d74df6f89.json b/tests/integration/responses/recordings/0a4aca0cd075369aaf6133ee82d9d940455cb083c0fd1330c666a12d74df6f89.json
new file mode 100644
index 000000000..9b432130b
--- /dev/null
+++ b/tests/integration/responses/recordings/0a4aca0cd075369aaf6133ee82d9d940455cb083c0fd1330c666a12d74df6f89.json
@@ -0,0 +1,549 @@
+{
+  "test_id": "tests/integration/responses/test_tool_responses.py::test_response_streaming_multi_turn_tool_execution[openai_client-txt=openai/gpt-4o-experiment_analysis_streaming]",
+  "request": {
+    "method": "POST",
+    "url": "https://api.openai.com/v1/v1/chat/completions",
+    "headers": {},
+    "body": {
+      "model": "gpt-4o",
+      "messages": [
+        {
+          "role": "user",
+          "content": "I need a complete analysis: First, get the experiment ID for 'chemical_reaction', then get the results for that experiment, and tell me if the yield was above 80%. Return only one tool call per step.  Please stream your analysis process."
+        },
+        {
+          "role": "assistant",
+          "content": "",
+          "tool_calls": [
+            {
+              "index": 0,
+              "id": "call_Q9Gcxub7UbQsxJWVkiy4FETr",
+              "type": "function",
+              "function": {
+                "name": "get_experiment_id",
+                "arguments": "{\"experiment_name\":\"chemical_reaction\"}"
+              }
+            }
+          ]
+        },
+        {
+          "role": "tool",
+          "tool_call_id": "call_Q9Gcxub7UbQsxJWVkiy4FETr",
+          "content": [
+            {
+              "type": "text",
+              "text": "exp_003"
+            }
+          ]
+        }
+      ],
+      "stream": true,
+      "stream_options": {
+        "include_usage": true
+      },
+      "tools": [
+        {
+          "type": "function",
+          "function": {
+            "name": "get_user_id",
+            "description": "\n        Get the user ID for a given username. This ID is needed for other operations.\n\n        :param username: The username to look up\n        :return: The user ID for the username\n        ",
+            "parameters": {
+              "properties": {
+                "username": {
+                  "title": "Username",
+                  "type": "string"
+                }
+              },
+              "required": [
+                "username"
+              ],
+              "title": "get_user_idArguments",
+              "type": "object"
+            }
+          }
+        },
+        {
+          "type": "function",
+          "function": {
+            "name": "get_user_permissions",
+            "description": "\n        Get the permissions for a user ID. Requires a valid user ID from get_user_id.\n\n        :param user_id: The user ID to check permissions for\n        :return: The permissions for the user\n        ",
+            "parameters": {
+              "properties": {
+                "user_id": {
+                  "title": "User Id",
+                  "type": "string"
+                }
+              },
+              "required": [
+                "user_id"
+              ],
+              "title": "get_user_permissionsArguments",
+              "type": "object"
+            }
+          }
+        },
+        {
+          "type": "function",
+          "function": {
+            "name": "check_file_access",
+            "description": "\n        Check if a user can access a specific file. Requires a valid user ID.\n\n        :param user_id: The user ID to check access for\n        :param filename: The filename to check access to\n        :return: Whether the user can access the file (yes/no)\n        ",
+            "parameters": {
+              "properties": {
+                "user_id": {
+                  "title": "User Id",
+                  "type": "string"
+                },
+                "filename": {
+                  "title": "Filename",
+                  "type": "string"
+                }
+              },
+              "required": [
+                "user_id",
+                "filename"
+              ],
+              "title": "check_file_accessArguments",
+              "type": "object"
+            }
+          }
+        },
+        {
+          "type": "function",
+          "function": {
+            "name": "get_experiment_id",
+            "description": "\n        Get the experiment ID for a given experiment name. This ID is needed to get results.\n\n        :param experiment_name: The name of the experiment\n        :return: The experiment ID\n        ",
+            "parameters": {
+              "properties": {
+                "experiment_name": {
+                  "title": "Experiment Name",
+                  "type": "string"
+                }
+              },
+              "required": [
+                "experiment_name"
+              ],
+              "title": "get_experiment_idArguments",
+              "type": "object"
+            }
+          }
+        },
+        {
+          "type": "function",
+          "function": {
+            "name": "get_experiment_results",
+            "description": "\n        Get the results for an experiment ID. Requires a valid experiment ID from get_experiment_id.\n\n        :param experiment_id: The experiment ID to get results for\n        :return: The experiment results\n        ",
+            "parameters": {
+              "properties": {
+                "experiment_id": {
+                  "title": "Experiment Id",
+                  "type": "string"
+                }
+              },
+              "required": [
+                "experiment_id"
+              ],
+              "title": "get_experiment_resultsArguments",
+              "type": "object"
+            }
+          }
+        }
+      ]
+    },
+    "endpoint": "/v1/chat/completions",
+    "model": "gpt-4o"
+  },
+  "response": {
+    "body": [
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-0a4aca0cd075",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": "call_yTMuQEKu7x115q8XvhqelRub",
+                    "function": {
+                      "arguments": "",
+                      "name": "get_experiment_results"
+                    },
+                    "type": "function"
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "9CSOZwfG5M7nid"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-0a4aca0cd075",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "{\"",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "Wss"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-0a4aca0cd075",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "experiment",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "5AmVsa0S6NBy"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-0a4aca0cd075",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "_id",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "2Sf"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-0a4aca0cd075",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "\":\"",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "z"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-0a4aca0cd075",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "exp",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "leu"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-0a4aca0cd075",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "_",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "omxpR"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-0a4aca0cd075",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "003",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "kW6"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-0a4aca0cd075",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "\"}",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "Zm6"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-0a4aca0cd075",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": "tool_calls",
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "aXvC"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-0a4aca0cd075",
+          "choices": [],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": {
+            "completion_tokens": 19,
+            "prompt_tokens": 457,
+            "total_tokens": 476,
+            "completion_tokens_details": {
+              "accepted_prediction_tokens": 0,
+              "audio_tokens": 0,
+              "reasoning_tokens": 0,
+              "rejected_prediction_tokens": 0
+            },
+            "prompt_tokens_details": {
+              "audio_tokens": 0,
+              "cached_tokens": 0
+            }
+          },
+          "obfuscation": "s13YHOCCaCDcJ"
+        }
+      }
+    ],
+    "is_streaming": true
+  },
+  "id_normalization_mapping": {}
+}
diff --git a/tests/integration/responses/recordings/2bd4c8dc08b3ee3ffce696864f0bd9f35d82223c7d1cab613ab2e818d79d6f9b.json b/tests/integration/responses/recordings/2bd4c8dc08b3ee3ffce696864f0bd9f35d82223c7d1cab613ab2e818d79d6f9b.json
new file mode 100644
index 000000000..5aebcd841
--- /dev/null
+++ b/tests/integration/responses/recordings/2bd4c8dc08b3ee3ffce696864f0bd9f35d82223c7d1cab613ab2e818d79d6f9b.json
@@ -0,0 +1,295 @@
+{
+  "test_id": "tests/integration/responses/test_tool_responses.py::test_response_non_streaming_multi_turn_tool_execution[openai_client-txt=openai/gpt-4o-user_file_access_check]",
+  "request": {
+    "method": "POST",
+    "url": "https://api.openai.com/v1/v1/chat/completions",
+    "headers": {},
+    "body": {
+      "model": "gpt-4o",
+      "messages": [
+        {
+          "role": "user",
+          "content": "I need to check if user 'alice' can access the file 'document.txt'. First, get alice's user ID, then check if that user ID can access the file 'document.txt'. Do this as a series of steps, where each step is a separate message. Return only one tool call per step. Summarize the final result with a single 'yes' or 'no' response."
+        },
+        {
+          "role": "assistant",
+          "content": "",
+          "tool_calls": [
+            {
+              "index": 0,
+              "id": "call_EsVvmBUqtJb42kNkYnK19QkJ",
+              "type": "function",
+              "function": {
+                "name": "get_user_id",
+                "arguments": "{\"username\":\"alice\"}"
+              }
+            }
+          ]
+        },
+        {
+          "role": "tool",
+          "tool_call_id": "call_EsVvmBUqtJb42kNkYnK19QkJ",
+          "content": [
+            {
+              "type": "text",
+              "text": "user_12345"
+            }
+          ]
+        },
+        {
+          "role": "assistant",
+          "content": "",
+          "tool_calls": [
+            {
+              "index": 0,
+              "id": "call_kCmSE8ORKfQoiEsW2UCYr5Sh",
+              "type": "function",
+              "function": {
+                "name": "check_file_access",
+                "arguments": "{\"user_id\":\"user_12345\",\"filename\":\"document.txt\"}"
+              }
+            }
+          ]
+        },
+        {
+          "role": "tool",
+          "tool_call_id": "call_kCmSE8ORKfQoiEsW2UCYr5Sh",
+          "content": [
+            {
+              "type": "text",
+              "text": "yes"
+            }
+          ]
+        }
+      ],
+      "stream": true,
+      "stream_options": {
+        "include_usage": true
+      },
+      "tools": [
+        {
+          "type": "function",
+          "function": {
+            "name": "get_user_id",
+            "description": "\n        Get the user ID for a given username. This ID is needed for other operations.\n\n        :param username: The username to look up\n        :return: The user ID for the username\n        ",
+            "parameters": {
+              "properties": {
+                "username": {
+                  "title": "Username",
+                  "type": "string"
+                }
+              },
+              "required": [
+                "username"
+              ],
+              "title": "get_user_idArguments",
+              "type": "object"
+            }
+          }
+        },
+        {
+          "type": "function",
+          "function": {
+            "name": "get_user_permissions",
+            "description": "\n        Get the permissions for a user ID. Requires a valid user ID from get_user_id.\n\n        :param user_id: The user ID to check permissions for\n        :return: The permissions for the user\n        ",
+            "parameters": {
+              "properties": {
+                "user_id": {
+                  "title": "User Id",
+                  "type": "string"
+                }
+              },
+              "required": [
+                "user_id"
+              ],
+              "title": "get_user_permissionsArguments",
+              "type": "object"
+            }
+          }
+        },
+        {
+          "type": "function",
+          "function": {
+            "name": "check_file_access",
+            "description": "\n        Check if a user can access a specific file. Requires a valid user ID.\n\n        :param user_id: The user ID to check access for\n        :param filename: The filename to check access to\n        :return: Whether the user can access the file (yes/no)\n        ",
+            "parameters": {
+              "properties": {
+                "user_id": {
+                  "title": "User Id",
+                  "type": "string"
+                },
+                "filename": {
+                  "title": "Filename",
+                  "type": "string"
+                }
+              },
+              "required": [
+                "user_id",
+                "filename"
+              ],
+              "title": "check_file_accessArguments",
+              "type": "object"
+            }
+          }
+        },
+        {
+          "type": "function",
+          "function": {
+            "name": "get_experiment_id",
+            "description": "\n        Get the experiment ID for a given experiment name. This ID is needed to get results.\n\n        :param experiment_name: The name of the experiment\n        :return: The experiment ID\n        ",
+            "parameters": {
+              "properties": {
+                "experiment_name": {
+                  "title": "Experiment Name",
+                  "type": "string"
+                }
+              },
+              "required": [
+                "experiment_name"
+              ],
+              "title": "get_experiment_idArguments",
+              "type": "object"
+            }
+          }
+        },
+        {
+          "type": "function",
+          "function": {
+            "name": "get_experiment_results",
+            "description": "\n        Get the results for an experiment ID. Requires a valid experiment ID from get_experiment_id.\n\n        :param experiment_id: The experiment ID to get results for\n        :return: The experiment results\n        ",
+            "parameters": {
+              "properties": {
+                "experiment_id": {
+                  "title": "Experiment Id",
+                  "type": "string"
+                }
+              },
+              "required": [
+                "experiment_id"
+              ],
+              "title": "get_experiment_resultsArguments",
+              "type": "object"
+            }
+          }
+        }
+      ]
+    },
+    "endpoint": "/v1/chat/completions",
+    "model": "gpt-4o"
+  },
+  "response": {
+    "body": [
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-2bd4c8dc08b3",
+          "choices": [
+            {
+              "delta": {
+                "content": "",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "UxHf8fChwO3CUY"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-2bd4c8dc08b3",
+          "choices": [
+            {
+              "delta": {
+                "content": "yes",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "GOexNEhopELIg"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-2bd4c8dc08b3",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": "stop",
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "O41d8hC8zD"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-2bd4c8dc08b3",
+          "choices": [],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": {
+            "completion_tokens": 2,
+            "prompt_tokens": 516,
+            "total_tokens": 518,
+            "completion_tokens_details": {
+              "accepted_prediction_tokens": 0,
+              "audio_tokens": 0,
+              "reasoning_tokens": 0,
+              "rejected_prediction_tokens": 0
+            },
+            "prompt_tokens_details": {
+              "audio_tokens": 0,
+              "cached_tokens": 0
+            }
+          },
+          "obfuscation": "9VQklZAZMYAfa0"
+        }
+      }
+    ],
+    "is_streaming": true
+  },
+  "id_normalization_mapping": {}
+}
diff --git a/tests/integration/responses/recordings/2ed23a4289840f93202f94e7e7027118869d34d768ad87ba072e92e8a43a52f2.json b/tests/integration/responses/recordings/2ed23a4289840f93202f94e7e7027118869d34d768ad87ba072e92e8a43a52f2.json
new file mode 100644
index 000000000..c39483a7c
--- /dev/null
+++ b/tests/integration/responses/recordings/2ed23a4289840f93202f94e7e7027118869d34d768ad87ba072e92e8a43a52f2.json
@@ -0,0 +1,833 @@
+{
+  "test_id": "tests/integration/responses/test_tool_responses.py::test_response_streaming_multi_turn_tool_execution[openai_client-txt=openai/gpt-4o-user_permissions_workflow]",
+  "request": {
+    "method": "POST",
+    "url": "https://api.openai.com/v1/v1/chat/completions",
+    "headers": {},
+    "body": {
+      "model": "gpt-4o",
+      "messages": [
+        {
+          "role": "user",
+          "content": "Help me with this security check: First, get the user ID for 'charlie', then get the permissions for that user ID, and finally check if that user can access 'secret_file.txt'. Stream your progress as you work through each step. Return only one tool call per step. Summarize the final result with a single 'yes' or 'no' response."
+        },
+        {
+          "role": "assistant",
+          "content": "",
+          "tool_calls": [
+            {
+              "index": 0,
+              "id": "call_fsxGbKmceUbLSXCe4sx9WLXO",
+              "type": "function",
+              "function": {
+                "name": "get_user_id",
+                "arguments": "{\"username\":\"charlie\"}"
+              }
+            }
+          ]
+        },
+        {
+          "role": "tool",
+          "tool_call_id": "call_fsxGbKmceUbLSXCe4sx9WLXO",
+          "content": [
+            {
+              "type": "text",
+              "text": "user_11111"
+            }
+          ]
+        },
+        {
+          "role": "assistant",
+          "content": "",
+          "tool_calls": [
+            {
+              "index": 0,
+              "id": "call_moRBxqnBJ48EWTSEoQ1llgib",
+              "type": "function",
+              "function": {
+                "name": "get_user_permissions",
+                "arguments": "{\"user_id\":\"user_11111\"}"
+              }
+            }
+          ]
+        },
+        {
+          "role": "tool",
+          "tool_call_id": "call_moRBxqnBJ48EWTSEoQ1llgib",
+          "content": [
+            {
+              "type": "text",
+              "text": "admin"
+            }
+          ]
+        },
+        {
+          "role": "assistant",
+          "content": "",
+          "tool_calls": [
+            {
+              "index": 0,
+              "id": "call_ybUqAP9oQn3rwQqVdOLs5Wb4",
+              "type": "function",
+              "function": {
+                "name": "check_file_access",
+                "arguments": "{\"user_id\":\"user_11111\",\"filename\":\"secret_file.txt\"}"
+              }
+            }
+          ]
+        },
+        {
+          "role": "tool",
+          "tool_call_id": "call_ybUqAP9oQn3rwQqVdOLs5Wb4",
+          "content": [
+            {
+              "type": "text",
+              "text": "no"
+            }
+          ]
+        }
+      ],
+      "stream": true,
+      "stream_options": {
+        "include_usage": true
+      },
+      "tools": [
+        {
+          "type": "function",
+          "function": {
+            "name": "get_user_id",
+            "description": "\n        Get the user ID for a given username. This ID is needed for other operations.\n\n        :param username: The username to look up\n        :return: The user ID for the username\n        ",
+            "parameters": {
+              "properties": {
+                "username": {
+                  "title": "Username",
+                  "type": "string"
+                }
+              },
+              "required": [
+                "username"
+              ],
+              "title": "get_user_idArguments",
+              "type": "object"
+            }
+          }
+        },
+        {
+          "type": "function",
+          "function": {
+            "name": "get_user_permissions",
+            "description": "\n        Get the permissions for a user ID. Requires a valid user ID from get_user_id.\n\n        :param user_id: The user ID to check permissions for\n        :return: The permissions for the user\n        ",
+            "parameters": {
+              "properties": {
+                "user_id": {
+                  "title": "User Id",
+                  "type": "string"
+                }
+              },
+              "required": [
+                "user_id"
+              ],
+              "title": "get_user_permissionsArguments",
+              "type": "object"
+            }
+          }
+        },
+        {
+          "type": "function",
+          "function": {
+            "name": "check_file_access",
+            "description": "\n        Check if a user can access a specific file. Requires a valid user ID.\n\n        :param user_id: The user ID to check access for\n        :param filename: The filename to check access to\n        :return: Whether the user can access the file (yes/no)\n        ",
+            "parameters": {
+              "properties": {
+                "user_id": {
+                  "title": "User Id",
+                  "type": "string"
+                },
+                "filename": {
+                  "title": "Filename",
+                  "type": "string"
+                }
+              },
+              "required": [
+                "user_id",
+                "filename"
+              ],
+              "title": "check_file_accessArguments",
+              "type": "object"
+            }
+          }
+        },
+        {
+          "type": "function",
+          "function": {
+            "name": "get_experiment_id",
+            "description": "\n        Get the experiment ID for a given experiment name. This ID is needed to get results.\n\n        :param experiment_name: The name of the experiment\n        :return: The experiment ID\n        ",
+            "parameters": {
+              "properties": {
+                "experiment_name": {
+                  "title": "Experiment Name",
+                  "type": "string"
+                }
+              },
+              "required": [
+                "experiment_name"
+              ],
+              "title": "get_experiment_idArguments",
+              "type": "object"
+            }
+          }
+        },
+        {
+          "type": "function",
+          "function": {
+            "name": "get_experiment_results",
+            "description": "\n        Get the results for an experiment ID. Requires a valid experiment ID from get_experiment_id.\n\n        :param experiment_id: The experiment ID to get results for\n        :return: The experiment results\n        ",
+            "parameters": {
+              "properties": {
+                "experiment_id": {
+                  "title": "Experiment Id",
+                  "type": "string"
+                }
+              },
+              "required": [
+                "experiment_id"
+              ],
+              "title": "get_experiment_resultsArguments",
+              "type": "object"
+            }
+          }
+        }
+      ]
+    },
+    "endpoint": "/v1/chat/completions",
+    "model": "gpt-4o"
+  },
+  "response": {
+    "body": [
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-2ed23a428984",
+          "choices": [
+            {
+              "delta": {
+                "content": "",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "WLGSIGDbuImIc2"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-2ed23a428984",
+          "choices": [
+            {
+              "delta": {
+                "content": "The",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "tOPrT8GpCzqCn"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-2ed23a428984",
+          "choices": [
+            {
+              "delta": {
+                "content": " user",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "ViOvVDT7owF"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-2ed23a428984",
+          "choices": [
+            {
+              "delta": {
+                "content": " '",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "EkiYJGYtRb2KCr"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-2ed23a428984",
+          "choices": [
+            {
+              "delta": {
+                "content": "char",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "ioC2G58DuWTx"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-2ed23a428984",
+          "choices": [
+            {
+              "delta": {
+                "content": "lie",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "A5rxByl55APwi"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-2ed23a428984",
+          "choices": [
+            {
+              "delta": {
+                "content": "'",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "kmDNWRqOyy2r3ST"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-2ed23a428984",
+          "choices": [
+            {
+              "delta": {
+                "content": " cannot",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "JHGD4XKFC"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-2ed23a428984",
+          "choices": [
+            {
+              "delta": {
+                "content": " access",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "6IPkFhs93"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-2ed23a428984",
+          "choices": [
+            {
+              "delta": {
+                "content": " '",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "LGHjKnVq2lF1DS"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-2ed23a428984",
+          "choices": [
+            {
+              "delta": {
+                "content": "secret",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "1nGoXVjnK0"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-2ed23a428984",
+          "choices": [
+            {
+              "delta": {
+                "content": "_file",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "OeR7YlvZQLa"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-2ed23a428984",
+          "choices": [
+            {
+              "delta": {
+                "content": ".txt",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "yLKHaSgjE64R"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-2ed23a428984",
+          "choices": [
+            {
+              "delta": {
+                "content": "'.",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "waZY1Js7DPWtoN"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-2ed23a428984",
+          "choices": [
+            {
+              "delta": {
+                "content": " The",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "km3Gr5HspErW"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-2ed23a428984",
+          "choices": [
+            {
+              "delta": {
+                "content": " final",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "Mvzf8AUstX"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-2ed23a428984",
+          "choices": [
+            {
+              "delta": {
+                "content": " result",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "660CrCPne"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-2ed23a428984",
+          "choices": [
+            {
+              "delta": {
+                "content": " is",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "lq7NyKvIo8UEO"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-2ed23a428984",
+          "choices": [
+            {
+              "delta": {
+                "content": ":",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "qjIz07y1RQsKqTo"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-2ed23a428984",
+          "choices": [
+            {
+              "delta": {
+                "content": " no",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "xhcVwxM4RaQcN"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-2ed23a428984",
+          "choices": [
+            {
+              "delta": {
+                "content": ".",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "dPxBJZ3WUesIy8T"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-2ed23a428984",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": "stop",
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "Z9wFfcEaK2"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-2ed23a428984",
+          "choices": [],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": {
+            "completion_tokens": 21,
+            "prompt_tokens": 542,
+            "total_tokens": 563,
+            "completion_tokens_details": {
+              "accepted_prediction_tokens": 0,
+              "audio_tokens": 0,
+              "reasoning_tokens": 0,
+              "rejected_prediction_tokens": 0
+            },
+            "prompt_tokens_details": {
+              "audio_tokens": 0,
+              "cached_tokens": 0
+            }
+          },
+          "obfuscation": "fSoZk1lrb3nJt"
+        }
+      }
+    ],
+    "is_streaming": true
+  },
+  "id_normalization_mapping": {}
+}
diff --git a/tests/integration/responses/recordings/3177a984c900c2bdc2785b502bded6791b1054ce0f36e967eb3793b5608344f3.json b/tests/integration/responses/recordings/3177a984c900c2bdc2785b502bded6791b1054ce0f36e967eb3793b5608344f3.json
new file mode 100644
index 000000000..d86ca8cc9
--- /dev/null
+++ b/tests/integration/responses/recordings/3177a984c900c2bdc2785b502bded6791b1054ce0f36e967eb3793b5608344f3.json
@@ -0,0 +1,759 @@
+{
+  "test_id": "tests/integration/responses/test_tool_responses.py::test_response_mcp_tool_approval[openai_client-txt=openai/gpt-4o-True-boiling_point_tool]",
+  "request": {
+    "method": "POST",
+    "url": "https://api.openai.com/v1/v1/chat/completions",
+    "headers": {},
+    "body": {
+      "model": "gpt-4o",
+      "messages": [
+        {
+          "role": "user",
+          "content": "What is the boiling point of myawesomeliquid in Celsius?"
+        }
+      ],
+      "stream": true,
+      "stream_options": {
+        "include_usage": true
+      },
+      "tools": [
+        {
+          "type": "function",
+          "function": {
+            "name": "greet_everyone",
+            "parameters": {
+              "properties": {
+                "url": {
+                  "title": "Url",
+                  "type": "string"
+                }
+              },
+              "required": [
+                "url"
+              ],
+              "title": "greet_everyoneArguments",
+              "type": "object"
+            }
+          }
+        },
+        {
+          "type": "function",
+          "function": {
+            "name": "get_boiling_point",
+            "description": "\n        Returns the boiling point of a liquid in Celsius or Fahrenheit.\n\n        :param liquid_name: The name of the liquid\n        :param celsius: Whether to return the boiling point in Celsius\n        :return: The boiling point of the liquid in Celcius or Fahrenheit\n        ",
+            "parameters": {
+              "properties": {
+                "liquid_name": {
+                  "title": "Liquid Name",
+                  "type": "string"
+                },
+                "celsius": {
+                  "default": true,
+                  "title": "Celsius",
+                  "type": "boolean"
+                }
+              },
+              "required": [
+                "liquid_name"
+              ],
+              "title": "get_boiling_pointArguments",
+              "type": "object"
+            }
+          }
+        }
+      ]
+    },
+    "endpoint": "/v1/chat/completions",
+    "model": "gpt-4o"
+  },
+  "response": {
+    "body": [
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-3177a984c900",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": "call_bL84OWNnE1s75GJEqGLAK35W",
+                    "function": {
+                      "arguments": "",
+                      "name": "get_boiling_point"
+                    },
+                    "type": "function"
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "ptE"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-3177a984c900",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "{\"",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "UEV"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-3177a984c900",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "li",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "hMko"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-3177a984c900",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "quid",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "nr"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-3177a984c900",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "_name",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "x"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-3177a984c900",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "\":\"",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "D"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-3177a984c900",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "my",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "aLLC"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-3177a984c900",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "aw",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "EZdr"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-3177a984c900",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "esom",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "yV"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-3177a984c900",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "eli",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "0bj"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-3177a984c900",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "quid",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "5J"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-3177a984c900",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "\",\"",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "z"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-3177a984c900",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "c",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "7dZEY"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-3177a984c900",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "elsius",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": ""
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-3177a984c900",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "\":",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "AqP"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-3177a984c900",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "true",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "X8"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-3177a984c900",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "}",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "oa7h2"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-3177a984c900",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": "tool_calls",
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "1Is8"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-3177a984c900",
+          "choices": [],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": {
+            "completion_tokens": 27,
+            "prompt_tokens": 156,
+            "total_tokens": 183,
+            "completion_tokens_details": {
+              "accepted_prediction_tokens": 0,
+              "audio_tokens": 0,
+              "reasoning_tokens": 0,
+              "rejected_prediction_tokens": 0
+            },
+            "prompt_tokens_details": {
+              "audio_tokens": 0,
+              "cached_tokens": 0
+            }
+          },
+          "obfuscation": "DfwHMdbjUVww7"
+        }
+      }
+    ],
+    "is_streaming": true
+  },
+  "id_normalization_mapping": {}
+}
diff --git a/tests/integration/responses/recordings/318c5361647df0245c074cd2c7d6f50e862aeddbbeaeb256ef1add34de7c1dc8.json b/tests/integration/responses/recordings/318c5361647df0245c074cd2c7d6f50e862aeddbbeaeb256ef1add34de7c1dc8.json
new file mode 100644
index 000000000..025246ebe
--- /dev/null
+++ b/tests/integration/responses/recordings/318c5361647df0245c074cd2c7d6f50e862aeddbbeaeb256ef1add34de7c1dc8.json
@@ -0,0 +1,549 @@
+{
+  "test_id": "tests/integration/responses/test_tool_responses.py::test_response_non_streaming_multi_turn_tool_execution[openai_client-txt=openai/gpt-4o-experiment_results_lookup]",
+  "request": {
+    "method": "POST",
+    "url": "https://api.openai.com/v1/v1/chat/completions",
+    "headers": {},
+    "body": {
+      "model": "gpt-4o",
+      "messages": [
+        {
+          "role": "user",
+          "content": "I need to get the results for the 'boiling_point' experiment. First, get the experiment ID for 'boiling_point', then use that ID to get the experiment results. Tell me the boiling point in Celsius."
+        },
+        {
+          "role": "assistant",
+          "content": "",
+          "tool_calls": [
+            {
+              "index": 0,
+              "id": "call_dZwjBxH3aTRhnaS0bJVPqRcz",
+              "type": "function",
+              "function": {
+                "name": "get_experiment_id",
+                "arguments": "{\"experiment_name\":\"boiling_point\"}"
+              }
+            }
+          ]
+        },
+        {
+          "role": "tool",
+          "tool_call_id": "call_dZwjBxH3aTRhnaS0bJVPqRcz",
+          "content": [
+            {
+              "type": "text",
+              "text": "exp_004"
+            }
+          ]
+        }
+      ],
+      "stream": true,
+      "stream_options": {
+        "include_usage": true
+      },
+      "tools": [
+        {
+          "type": "function",
+          "function": {
+            "name": "get_user_id",
+            "description": "\n        Get the user ID for a given username. This ID is needed for other operations.\n\n        :param username: The username to look up\n        :return: The user ID for the username\n        ",
+            "parameters": {
+              "properties": {
+                "username": {
+                  "title": "Username",
+                  "type": "string"
+                }
+              },
+              "required": [
+                "username"
+              ],
+              "title": "get_user_idArguments",
+              "type": "object"
+            }
+          }
+        },
+        {
+          "type": "function",
+          "function": {
+            "name": "get_user_permissions",
+            "description": "\n        Get the permissions for a user ID. Requires a valid user ID from get_user_id.\n\n        :param user_id: The user ID to check permissions for\n        :return: The permissions for the user\n        ",
+            "parameters": {
+              "properties": {
+                "user_id": {
+                  "title": "User Id",
+                  "type": "string"
+                }
+              },
+              "required": [
+                "user_id"
+              ],
+              "title": "get_user_permissionsArguments",
+              "type": "object"
+            }
+          }
+        },
+        {
+          "type": "function",
+          "function": {
+            "name": "check_file_access",
+            "description": "\n        Check if a user can access a specific file. Requires a valid user ID.\n\n        :param user_id: The user ID to check access for\n        :param filename: The filename to check access to\n        :return: Whether the user can access the file (yes/no)\n        ",
+            "parameters": {
+              "properties": {
+                "user_id": {
+                  "title": "User Id",
+                  "type": "string"
+                },
+                "filename": {
+                  "title": "Filename",
+                  "type": "string"
+                }
+              },
+              "required": [
+                "user_id",
+                "filename"
+              ],
+              "title": "check_file_accessArguments",
+              "type": "object"
+            }
+          }
+        },
+        {
+          "type": "function",
+          "function": {
+            "name": "get_experiment_id",
+            "description": "\n        Get the experiment ID for a given experiment name. This ID is needed to get results.\n\n        :param experiment_name: The name of the experiment\n        :return: The experiment ID\n        ",
+            "parameters": {
+              "properties": {
+                "experiment_name": {
+                  "title": "Experiment Name",
+                  "type": "string"
+                }
+              },
+              "required": [
+                "experiment_name"
+              ],
+              "title": "get_experiment_idArguments",
+              "type": "object"
+            }
+          }
+        },
+        {
+          "type": "function",
+          "function": {
+            "name": "get_experiment_results",
+            "description": "\n        Get the results for an experiment ID. Requires a valid experiment ID from get_experiment_id.\n\n        :param experiment_id: The experiment ID to get results for\n        :return: The experiment results\n        ",
+            "parameters": {
+              "properties": {
+                "experiment_id": {
+                  "title": "Experiment Id",
+                  "type": "string"
+                }
+              },
+              "required": [
+                "experiment_id"
+              ],
+              "title": "get_experiment_resultsArguments",
+              "type": "object"
+            }
+          }
+        }
+      ]
+    },
+    "endpoint": "/v1/chat/completions",
+    "model": "gpt-4o"
+  },
+  "response": {
+    "body": [
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-318c5361647d",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": "call_skNUKbERbtdoADH834U9OE91",
+                    "function": {
+                      "arguments": "",
+                      "name": "get_experiment_results"
+                    },
+                    "type": "function"
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "5aHvu2xes6Amy8"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-318c5361647d",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "{\"",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "9HQ"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-318c5361647d",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "experiment",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "ckAh5OXg9JIe"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-318c5361647d",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "_id",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "avh"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-318c5361647d",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "\":\"",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "x"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-318c5361647d",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "exp",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "f75"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-318c5361647d",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "_",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "Nini1"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-318c5361647d",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "004",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "MXB"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-318c5361647d",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "\"}",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "Vc4"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-318c5361647d",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": "tool_calls",
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "rnph"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-318c5361647d",
+          "choices": [],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": {
+            "completion_tokens": 19,
+            "prompt_tokens": 450,
+            "total_tokens": 469,
+            "completion_tokens_details": {
+              "accepted_prediction_tokens": 0,
+              "audio_tokens": 0,
+              "reasoning_tokens": 0,
+              "rejected_prediction_tokens": 0
+            },
+            "prompt_tokens_details": {
+              "audio_tokens": 0,
+              "cached_tokens": 0
+            }
+          },
+          "obfuscation": "nUptVmnQlQZrH"
+        }
+      }
+    ],
+    "is_streaming": true
+  },
+  "id_normalization_mapping": {}
+}
diff --git a/tests/integration/responses/recordings/430a49246c97c29bd958f383627f53ec795fd77ef818827e16691689151bf17c.json b/tests/integration/responses/recordings/430a49246c97c29bd958f383627f53ec795fd77ef818827e16691689151bf17c.json
new file mode 100644
index 000000000..b26cd985e
--- /dev/null
+++ b/tests/integration/responses/recordings/430a49246c97c29bd958f383627f53ec795fd77ef818827e16691689151bf17c.json
@@ -0,0 +1,413 @@
+{
+  "test_id": "tests/integration/responses/test_tool_responses.py::test_response_non_streaming_multi_turn_tool_execution[openai_client-txt=openai/gpt-4o-user_file_access_check]",
+  "request": {
+    "method": "POST",
+    "url": "https://api.openai.com/v1/v1/chat/completions",
+    "headers": {},
+    "body": {
+      "model": "gpt-4o",
+      "messages": [
+        {
+          "role": "user",
+          "content": "I need to check if user 'alice' can access the file 'document.txt'. First, get alice's user ID, then check if that user ID can access the file 'document.txt'. Do this as a series of steps, where each step is a separate message. Return only one tool call per step. Summarize the final result with a single 'yes' or 'no' response."
+        }
+      ],
+      "stream": true,
+      "stream_options": {
+        "include_usage": true
+      },
+      "tools": [
+        {
+          "type": "function",
+          "function": {
+            "name": "get_user_id",
+            "description": "\n        Get the user ID for a given username. This ID is needed for other operations.\n\n        :param username: The username to look up\n        :return: The user ID for the username\n        ",
+            "parameters": {
+              "properties": {
+                "username": {
+                  "title": "Username",
+                  "type": "string"
+                }
+              },
+              "required": [
+                "username"
+              ],
+              "title": "get_user_idArguments",
+              "type": "object"
+            }
+          }
+        },
+        {
+          "type": "function",
+          "function": {
+            "name": "get_user_permissions",
+            "description": "\n        Get the permissions for a user ID. Requires a valid user ID from get_user_id.\n\n        :param user_id: The user ID to check permissions for\n        :return: The permissions for the user\n        ",
+            "parameters": {
+              "properties": {
+                "user_id": {
+                  "title": "User Id",
+                  "type": "string"
+                }
+              },
+              "required": [
+                "user_id"
+              ],
+              "title": "get_user_permissionsArguments",
+              "type": "object"
+            }
+          }
+        },
+        {
+          "type": "function",
+          "function": {
+            "name": "check_file_access",
+            "description": "\n        Check if a user can access a specific file. Requires a valid user ID.\n\n        :param user_id: The user ID to check access for\n        :param filename: The filename to check access to\n        :return: Whether the user can access the file (yes/no)\n        ",
+            "parameters": {
+              "properties": {
+                "user_id": {
+                  "title": "User Id",
+                  "type": "string"
+                },
+                "filename": {
+                  "title": "Filename",
+                  "type": "string"
+                }
+              },
+              "required": [
+                "user_id",
+                "filename"
+              ],
+              "title": "check_file_accessArguments",
+              "type": "object"
+            }
+          }
+        },
+        {
+          "type": "function",
+          "function": {
+            "name": "get_experiment_id",
+            "description": "\n        Get the experiment ID for a given experiment name. This ID is needed to get results.\n\n        :param experiment_name: The name of the experiment\n        :return: The experiment ID\n        ",
+            "parameters": {
+              "properties": {
+                "experiment_name": {
+                  "title": "Experiment Name",
+                  "type": "string"
+                }
+              },
+              "required": [
+                "experiment_name"
+              ],
+              "title": "get_experiment_idArguments",
+              "type": "object"
+            }
+          }
+        },
+        {
+          "type": "function",
+          "function": {
+            "name": "get_experiment_results",
+            "description": "\n        Get the results for an experiment ID. Requires a valid experiment ID from get_experiment_id.\n\n        :param experiment_id: The experiment ID to get results for\n        :return: The experiment results\n        ",
+            "parameters": {
+              "properties": {
+                "experiment_id": {
+                  "title": "Experiment Id",
+                  "type": "string"
+                }
+              },
+              "required": [
+                "experiment_id"
+              ],
+              "title": "get_experiment_resultsArguments",
+              "type": "object"
+            }
+          }
+        }
+      ]
+    },
+    "endpoint": "/v1/chat/completions",
+    "model": "gpt-4o"
+  },
+  "response": {
+    "body": [
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-430a49246c97",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": "call_EsVvmBUqtJb42kNkYnK19QkJ",
+                    "function": {
+                      "arguments": "",
+                      "name": "get_user_id"
+                    },
+                    "type": "function"
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "Ma7aiZxSs"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-430a49246c97",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "{\"",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "DXu"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-430a49246c97",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "username",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "rtfrl7gxu80vmN"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-430a49246c97",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "\":\"",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "r"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-430a49246c97",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "alice",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "M"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-430a49246c97",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "\"}",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "vSu"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-430a49246c97",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": "tool_calls",
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "sXfh"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-430a49246c97",
+          "choices": [],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": {
+            "completion_tokens": 15,
+            "prompt_tokens": 454,
+            "total_tokens": 469,
+            "completion_tokens_details": {
+              "accepted_prediction_tokens": 0,
+              "audio_tokens": 0,
+              "reasoning_tokens": 0,
+              "rejected_prediction_tokens": 0
+            },
+            "prompt_tokens_details": {
+              "audio_tokens": 0,
+              "cached_tokens": 0
+            }
+          },
+          "obfuscation": "bEe7hWJ6U62YQ"
+        }
+      }
+    ],
+    "is_streaming": true
+  },
+  "id_normalization_mapping": {}
+}
diff --git a/tests/integration/responses/recordings/52a2b96781961e252aa3a7b0a5ff77eb5d0989d312e929ed59dda07738487d09.json b/tests/integration/responses/recordings/52a2b96781961e252aa3a7b0a5ff77eb5d0989d312e929ed59dda07738487d09.json
new file mode 100644
index 000000000..fef5f0a62
--- /dev/null
+++ b/tests/integration/responses/recordings/52a2b96781961e252aa3a7b0a5ff77eb5d0989d312e929ed59dda07738487d09.json
@@ -0,0 +1,586 @@
+{
+  "test_id": "tests/integration/responses/test_tool_responses.py::test_response_streaming_multi_turn_tool_execution[openai_client-txt=openai/gpt-4o-user_permissions_workflow]",
+  "request": {
+    "method": "POST",
+    "url": "https://api.openai.com/v1/v1/chat/completions",
+    "headers": {},
+    "body": {
+      "model": "gpt-4o",
+      "messages": [
+        {
+          "role": "user",
+          "content": "Help me with this security check: First, get the user ID for 'charlie', then get the permissions for that user ID, and finally check if that user can access 'secret_file.txt'. Stream your progress as you work through each step. Return only one tool call per step. Summarize the final result with a single 'yes' or 'no' response."
+        },
+        {
+          "role": "assistant",
+          "content": "",
+          "tool_calls": [
+            {
+              "index": 0,
+              "id": "call_fsxGbKmceUbLSXCe4sx9WLXO",
+              "type": "function",
+              "function": {
+                "name": "get_user_id",
+                "arguments": "{\"username\":\"charlie\"}"
+              }
+            }
+          ]
+        },
+        {
+          "role": "tool",
+          "tool_call_id": "call_fsxGbKmceUbLSXCe4sx9WLXO",
+          "content": [
+            {
+              "type": "text",
+              "text": "user_11111"
+            }
+          ]
+        }
+      ],
+      "stream": true,
+      "stream_options": {
+        "include_usage": true
+      },
+      "tools": [
+        {
+          "type": "function",
+          "function": {
+            "name": "get_user_id",
+            "description": "\n        Get the user ID for a given username. This ID is needed for other operations.\n\n        :param username: The username to look up\n        :return: The user ID for the username\n        ",
+            "parameters": {
+              "properties": {
+                "username": {
+                  "title": "Username",
+                  "type": "string"
+                }
+              },
+              "required": [
+                "username"
+              ],
+              "title": "get_user_idArguments",
+              "type": "object"
+            }
+          }
+        },
+        {
+          "type": "function",
+          "function": {
+            "name": "get_user_permissions",
+            "description": "\n        Get the permissions for a user ID. Requires a valid user ID from get_user_id.\n\n        :param user_id: The user ID to check permissions for\n        :return: The permissions for the user\n        ",
+            "parameters": {
+              "properties": {
+                "user_id": {
+                  "title": "User Id",
+                  "type": "string"
+                }
+              },
+              "required": [
+                "user_id"
+              ],
+              "title": "get_user_permissionsArguments",
+              "type": "object"
+            }
+          }
+        },
+        {
+          "type": "function",
+          "function": {
+            "name": "check_file_access",
+            "description": "\n        Check if a user can access a specific file. Requires a valid user ID.\n\n        :param user_id: The user ID to check access for\n        :param filename: The filename to check access to\n        :return: Whether the user can access the file (yes/no)\n        ",
+            "parameters": {
+              "properties": {
+                "user_id": {
+                  "title": "User Id",
+                  "type": "string"
+                },
+                "filename": {
+                  "title": "Filename",
+                  "type": "string"
+                }
+              },
+              "required": [
+                "user_id",
+                "filename"
+              ],
+              "title": "check_file_accessArguments",
+              "type": "object"
+            }
+          }
+        },
+        {
+          "type": "function",
+          "function": {
+            "name": "get_experiment_id",
+            "description": "\n        Get the experiment ID for a given experiment name. This ID is needed to get results.\n\n        :param experiment_name: The name of the experiment\n        :return: The experiment ID\n        ",
+            "parameters": {
+              "properties": {
+                "experiment_name": {
+                  "title": "Experiment Name",
+                  "type": "string"
+                }
+              },
+              "required": [
+                "experiment_name"
+              ],
+              "title": "get_experiment_idArguments",
+              "type": "object"
+            }
+          }
+        },
+        {
+          "type": "function",
+          "function": {
+            "name": "get_experiment_results",
+            "description": "\n        Get the results for an experiment ID. Requires a valid experiment ID from get_experiment_id.\n\n        :param experiment_id: The experiment ID to get results for\n        :return: The experiment results\n        ",
+            "parameters": {
+              "properties": {
+                "experiment_id": {
+                  "title": "Experiment Id",
+                  "type": "string"
+                }
+              },
+              "required": [
+                "experiment_id"
+              ],
+              "title": "get_experiment_resultsArguments",
+              "type": "object"
+            }
+          }
+        }
+      ]
+    },
+    "endpoint": "/v1/chat/completions",
+    "model": "gpt-4o"
+  },
+  "response": {
+    "body": [
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-52a2b9678196",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": "call_moRBxqnBJ48EWTSEoQ1llgib",
+                    "function": {
+                      "arguments": "",
+                      "name": "get_user_permissions"
+                    },
+                    "type": "function"
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": ""
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-52a2b9678196",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "{\"",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "00p"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-52a2b9678196",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "user",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "Y0"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-52a2b9678196",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "_id",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "i2I"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-52a2b9678196",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "\":\"",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "P"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-52a2b9678196",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "user",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "IG"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-52a2b9678196",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "_",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "QY61l"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-52a2b9678196",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "111",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "YAZ"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-52a2b9678196",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "11",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "Nw7U"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-52a2b9678196",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "\"}",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "Ev7"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-52a2b9678196",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": "tool_calls",
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "CSaD"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-52a2b9678196",
+          "choices": [],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": {
+            "completion_tokens": 19,
+            "prompt_tokens": 478,
+            "total_tokens": 497,
+            "completion_tokens_details": {
+              "accepted_prediction_tokens": 0,
+              "audio_tokens": 0,
+              "reasoning_tokens": 0,
+              "rejected_prediction_tokens": 0
+            },
+            "prompt_tokens_details": {
+              "audio_tokens": 0,
+              "cached_tokens": 0
+            }
+          },
+          "obfuscation": "kMNEyeKFT75vK"
+        }
+      }
+    ],
+    "is_streaming": true
+  },
+  "id_normalization_mapping": {}
+}
diff --git a/tests/integration/responses/recordings/541b5db7789e61d2400b70bd41c2ff7145784d249c3216c34299c38c28118328.json b/tests/integration/responses/recordings/541b5db7789e61d2400b70bd41c2ff7145784d249c3216c34299c38c28118328.json
new file mode 100644
index 000000000..6b7e5bc49
--- /dev/null
+++ b/tests/integration/responses/recordings/541b5db7789e61d2400b70bd41c2ff7145784d249c3216c34299c38c28118328.json
@@ -0,0 +1,524 @@
+{
+  "test_id": "tests/integration/responses/test_tool_responses.py::test_response_non_streaming_multi_turn_tool_execution[openai_client-txt=openai/gpt-4o-experiment_results_lookup]",
+  "request": {
+    "method": "POST",
+    "url": "https://api.openai.com/v1/v1/chat/completions",
+    "headers": {},
+    "body": {
+      "model": "gpt-4o",
+      "messages": [
+        {
+          "role": "user",
+          "content": "I need to get the results for the 'boiling_point' experiment. First, get the experiment ID for 'boiling_point', then use that ID to get the experiment results. Tell me the boiling point in Celsius."
+        }
+      ],
+      "stream": true,
+      "stream_options": {
+        "include_usage": true
+      },
+      "tools": [
+        {
+          "type": "function",
+          "function": {
+            "name": "get_user_id",
+            "description": "\n        Get the user ID for a given username. This ID is needed for other operations.\n\n        :param username: The username to look up\n        :return: The user ID for the username\n        ",
+            "parameters": {
+              "properties": {
+                "username": {
+                  "title": "Username",
+                  "type": "string"
+                }
+              },
+              "required": [
+                "username"
+              ],
+              "title": "get_user_idArguments",
+              "type": "object"
+            }
+          }
+        },
+        {
+          "type": "function",
+          "function": {
+            "name": "get_user_permissions",
+            "description": "\n        Get the permissions for a user ID. Requires a valid user ID from get_user_id.\n\n        :param user_id: The user ID to check permissions for\n        :return: The permissions for the user\n        ",
+            "parameters": {
+              "properties": {
+                "user_id": {
+                  "title": "User Id",
+                  "type": "string"
+                }
+              },
+              "required": [
+                "user_id"
+              ],
+              "title": "get_user_permissionsArguments",
+              "type": "object"
+            }
+          }
+        },
+        {
+          "type": "function",
+          "function": {
+            "name": "check_file_access",
+            "description": "\n        Check if a user can access a specific file. Requires a valid user ID.\n\n        :param user_id: The user ID to check access for\n        :param filename: The filename to check access to\n        :return: Whether the user can access the file (yes/no)\n        ",
+            "parameters": {
+              "properties": {
+                "user_id": {
+                  "title": "User Id",
+                  "type": "string"
+                },
+                "filename": {
+                  "title": "Filename",
+                  "type": "string"
+                }
+              },
+              "required": [
+                "user_id",
+                "filename"
+              ],
+              "title": "check_file_accessArguments",
+              "type": "object"
+            }
+          }
+        },
+        {
+          "type": "function",
+          "function": {
+            "name": "get_experiment_id",
+            "description": "\n        Get the experiment ID for a given experiment name. This ID is needed to get results.\n\n        :param experiment_name: The name of the experiment\n        :return: The experiment ID\n        ",
+            "parameters": {
+              "properties": {
+                "experiment_name": {
+                  "title": "Experiment Name",
+                  "type": "string"
+                }
+              },
+              "required": [
+                "experiment_name"
+              ],
+              "title": "get_experiment_idArguments",
+              "type": "object"
+            }
+          }
+        },
+        {
+          "type": "function",
+          "function": {
+            "name": "get_experiment_results",
+            "description": "\n        Get the results for an experiment ID. Requires a valid experiment ID from get_experiment_id.\n\n        :param experiment_id: The experiment ID to get results for\n        :return: The experiment results\n        ",
+            "parameters": {
+              "properties": {
+                "experiment_id": {
+                  "title": "Experiment Id",
+                  "type": "string"
+                }
+              },
+              "required": [
+                "experiment_id"
+              ],
+              "title": "get_experiment_resultsArguments",
+              "type": "object"
+            }
+          }
+        }
+      ]
+    },
+    "endpoint": "/v1/chat/completions",
+    "model": "gpt-4o"
+  },
+  "response": {
+    "body": [
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-541b5db7789e",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": "call_dZwjBxH3aTRhnaS0bJVPqRcz",
+                    "function": {
+                      "arguments": "",
+                      "name": "get_experiment_id"
+                    },
+                    "type": "function"
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "W3B"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-541b5db7789e",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "{\"",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "L7n"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-541b5db7789e",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "experiment",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "lXUc0FKJkRea"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-541b5db7789e",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "_name",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "0"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-541b5db7789e",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "\":\"",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "D"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-541b5db7789e",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "bo",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "3dUQ"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-541b5db7789e",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "iling",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "1"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-541b5db7789e",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "_point",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": ""
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-541b5db7789e",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "\"}",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "48i"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-541b5db7789e",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": "tool_calls",
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "eQyU"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-541b5db7789e",
+          "choices": [],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": {
+            "completion_tokens": 19,
+            "prompt_tokens": 418,
+            "total_tokens": 437,
+            "completion_tokens_details": {
+              "accepted_prediction_tokens": 0,
+              "audio_tokens": 0,
+              "reasoning_tokens": 0,
+              "rejected_prediction_tokens": 0
+            },
+            "prompt_tokens_details": {
+              "audio_tokens": 0,
+              "cached_tokens": 0
+            }
+          },
+          "obfuscation": "5tVrc5IEigum8"
+        }
+      }
+    ],
+    "is_streaming": true
+  },
+  "id_normalization_mapping": {}
+}
diff --git a/tests/integration/responses/recordings/6a05cad89f138e215047fd44d21803c4a397f772ad8b1cb90ec44527ce964a45.json b/tests/integration/responses/recordings/6a05cad89f138e215047fd44d21803c4a397f772ad8b1cb90ec44527ce964a45.json
new file mode 100644
index 000000000..adae894b3
--- /dev/null
+++ b/tests/integration/responses/recordings/6a05cad89f138e215047fd44d21803c4a397f772ad8b1cb90ec44527ce964a45.json
@@ -0,0 +1,614 @@
+{
+  "test_id": "tests/integration/responses/test_tool_responses.py::test_response_non_streaming_mcp_tool[openai_client-txt=openai/gpt-4o-boiling_point_tool]",
+  "request": {
+    "method": "POST",
+    "url": "https://api.openai.com/v1/v1/chat/completions",
+    "headers": {},
+    "body": {
+      "model": "gpt-4o",
+      "messages": [
+        {
+          "role": "user",
+          "content": "What is the boiling point of myawesomeliquid in Celsius?"
+        },
+        {
+          "role": "assistant",
+          "content": "",
+          "tool_calls": [
+            {
+              "index": 0,
+              "id": "call_8kf8fNIDcWOelbCmUEcretON",
+              "type": "function",
+              "function": {
+                "name": "get_boiling_point",
+                "arguments": "{\"liquid_name\":\"myawesomeliquid\",\"celsius\":true}"
+              }
+            }
+          ]
+        },
+        {
+          "role": "tool",
+          "tool_call_id": "call_8kf8fNIDcWOelbCmUEcretON",
+          "content": [
+            {
+              "type": "text",
+              "text": "-100"
+            }
+          ]
+        }
+      ],
+      "stream": true,
+      "stream_options": {
+        "include_usage": true
+      },
+      "tools": [
+        {
+          "type": "function",
+          "function": {
+            "name": "greet_everyone",
+            "parameters": {
+              "properties": {
+                "url": {
+                  "title": "Url",
+                  "type": "string"
+                }
+              },
+              "required": [
+                "url"
+              ],
+              "title": "greet_everyoneArguments",
+              "type": "object"
+            }
+          }
+        },
+        {
+          "type": "function",
+          "function": {
+            "name": "get_boiling_point",
+            "description": "\n        Returns the boiling point of a liquid in Celsius or Fahrenheit.\n\n        :param liquid_name: The name of the liquid\n        :param celsius: Whether to return the boiling point in Celsius\n        :return: The boiling point of the liquid in Celcius or Fahrenheit\n        ",
+            "parameters": {
+              "properties": {
+                "liquid_name": {
+                  "title": "Liquid Name",
+                  "type": "string"
+                },
+                "celsius": {
+                  "default": true,
+                  "title": "Celsius",
+                  "type": "boolean"
+                }
+              },
+              "required": [
+                "liquid_name"
+              ],
+              "title": "get_boiling_pointArguments",
+              "type": "object"
+            }
+          }
+        }
+      ]
+    },
+    "endpoint": "/v1/chat/completions",
+    "model": "gpt-4o"
+  },
+  "response": {
+    "body": [
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-6a05cad89f13",
+          "choices": [
+            {
+              "delta": {
+                "content": "",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "QvigjcdULEdran"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-6a05cad89f13",
+          "choices": [
+            {
+              "delta": {
+                "content": "The",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "sIHyVud88f1Ri"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-6a05cad89f13",
+          "choices": [
+            {
+              "delta": {
+                "content": " boiling",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "L46IcJeM"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-6a05cad89f13",
+          "choices": [
+            {
+              "delta": {
+                "content": " point",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "j0afpRCRBL"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-6a05cad89f13",
+          "choices": [
+            {
+              "delta": {
+                "content": " of",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "tuzBzZB7jURPj"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-6a05cad89f13",
+          "choices": [
+            {
+              "delta": {
+                "content": " \"",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "iq6vUNVBRuRH5"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-6a05cad89f13",
+          "choices": [
+            {
+              "delta": {
+                "content": "my",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "Nkkz9uUPfhHdqZ"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-6a05cad89f13",
+          "choices": [
+            {
+              "delta": {
+                "content": "aw",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "oR3PEQpsXLwYOJ"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-6a05cad89f13",
+          "choices": [
+            {
+              "delta": {
+                "content": "esom",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "VBFf1ewix1rj"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-6a05cad89f13",
+          "choices": [
+            {
+              "delta": {
+                "content": "eli",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "yEx3rYoaZjsTw"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-6a05cad89f13",
+          "choices": [
+            {
+              "delta": {
+                "content": "quid",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "I6VR8wzPmnpa"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-6a05cad89f13",
+          "choices": [
+            {
+              "delta": {
+                "content": "\"",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "xld69F07KIb2Yc"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-6a05cad89f13",
+          "choices": [
+            {
+              "delta": {
+                "content": " is",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "GKgtQZJiWLVKj"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-6a05cad89f13",
+          "choices": [
+            {
+              "delta": {
+                "content": " -",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "1by4tgiJqNgaI1"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-6a05cad89f13",
+          "choices": [
+            {
+              "delta": {
+                "content": "100",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "2RdP6HDQApUpN"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-6a05cad89f13",
+          "choices": [
+            {
+              "delta": {
+                "content": "\u00b0C",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "21ABialEpJBCcX"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-6a05cad89f13",
+          "choices": [
+            {
+              "delta": {
+                "content": ".",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "uoaaRgmiGLD815k"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-6a05cad89f13",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": "stop",
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "QKEKTjUUam"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-6a05cad89f13",
+          "choices": [],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": {
+            "completion_tokens": 17,
+            "prompt_tokens": 195,
+            "total_tokens": 212,
+            "completion_tokens_details": {
+              "accepted_prediction_tokens": 0,
+              "audio_tokens": 0,
+              "reasoning_tokens": 0,
+              "rejected_prediction_tokens": 0
+            },
+            "prompt_tokens_details": {
+              "audio_tokens": 0,
+              "cached_tokens": 0
+            }
+          },
+          "obfuscation": "ceWQr6uzZRuj3"
+        }
+      }
+    ],
+    "is_streaming": true
+  },
+  "id_normalization_mapping": {}
+}
diff --git a/tests/integration/responses/recordings/6d7f54b7be4845c31ae64498e8018a218bb7f4b8363998abc34ec9bb7ba3a03d.json b/tests/integration/responses/recordings/6d7f54b7be4845c31ae64498e8018a218bb7f4b8363998abc34ec9bb7ba3a03d.json
new file mode 100644
index 000000000..997e18bec
--- /dev/null
+++ b/tests/integration/responses/recordings/6d7f54b7be4845c31ae64498e8018a218bb7f4b8363998abc34ec9bb7ba3a03d.json
@@ -0,0 +1,574 @@
+{
+  "test_id": "tests/integration/responses/test_tool_responses.py::test_response_mcp_tool_approval[openai_client-txt=openai/gpt-4o-False-boiling_point_tool]",
+  "request": {
+    "method": "POST",
+    "url": "https://api.openai.com/v1/v1/chat/completions",
+    "headers": {},
+    "body": {
+      "model": "gpt-4o",
+      "messages": [
+        {
+          "role": "user",
+          "content": "What is the boiling point of myawesomeliquid in Celsius?"
+        }
+      ],
+      "stream": true,
+      "stream_options": {
+        "include_usage": true
+      },
+      "tools": [
+        {
+          "type": "function",
+          "function": {
+            "name": "greet_everyone",
+            "parameters": {
+              "properties": {
+                "url": {
+                  "title": "Url",
+                  "type": "string"
+                }
+              },
+              "required": [
+                "url"
+              ],
+              "title": "greet_everyoneArguments",
+              "type": "object"
+            }
+          }
+        },
+        {
+          "type": "function",
+          "function": {
+            "name": "get_boiling_point",
+            "description": "\n        Returns the boiling point of a liquid in Celsius or Fahrenheit.\n\n        :param liquid_name: The name of the liquid\n        :param celsius: Whether to return the boiling point in Celsius\n        :return: The boiling point of the liquid in Celcius or Fahrenheit\n        ",
+            "parameters": {
+              "properties": {
+                "liquid_name": {
+                  "title": "Liquid Name",
+                  "type": "string"
+                },
+                "celsius": {
+                  "default": true,
+                  "title": "Celsius",
+                  "type": "boolean"
+                }
+              },
+              "required": [
+                "liquid_name"
+              ],
+              "title": "get_boiling_pointArguments",
+              "type": "object"
+            }
+          }
+        }
+      ]
+    },
+    "endpoint": "/v1/chat/completions",
+    "model": "gpt-4o"
+  },
+  "response": {
+    "body": [
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-6d7f54b7be48",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": "call_4ldOwO71od1E0lrdgYQCoe2e",
+                    "function": {
+                      "arguments": "",
+                      "name": "get_boiling_point"
+                    },
+                    "type": "function"
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "TdV"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-6d7f54b7be48",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "{\"",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "L5f"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-6d7f54b7be48",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "li",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "qo3z"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-6d7f54b7be48",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "quid",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "i3"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-6d7f54b7be48",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "_name",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "Z"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-6d7f54b7be48",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "\":\"",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "Z"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-6d7f54b7be48",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "my",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "QdX5"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-6d7f54b7be48",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "aw",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "sJYi"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-6d7f54b7be48",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "esom",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "Yk"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-6d7f54b7be48",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "eli",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "pnS"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-6d7f54b7be48",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "quid",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "y5"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-6d7f54b7be48",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "\"}",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "Tjs"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-6d7f54b7be48",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": "tool_calls",
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "Cx0I"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-6d7f54b7be48",
+          "choices": [],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": {
+            "completion_tokens": 22,
+            "prompt_tokens": 156,
+            "total_tokens": 178,
+            "completion_tokens_details": {
+              "accepted_prediction_tokens": 0,
+              "audio_tokens": 0,
+              "reasoning_tokens": 0,
+              "rejected_prediction_tokens": 0
+            },
+            "prompt_tokens_details": {
+              "audio_tokens": 0,
+              "cached_tokens": 0
+            }
+          },
+          "obfuscation": "bmRrd4XLuhmCv"
+        }
+      }
+    ],
+    "is_streaming": true
+  },
+  "id_normalization_mapping": {}
+}
diff --git a/tests/integration/responses/recordings/73c9287059db75cd80dc56cff905fe3ff21e6c39189ab93778335439f288158f.json b/tests/integration/responses/recordings/73c9287059db75cd80dc56cff905fe3ff21e6c39189ab93778335439f288158f.json
new file mode 100644
index 000000000..53f1a8125
--- /dev/null
+++ b/tests/integration/responses/recordings/73c9287059db75cd80dc56cff905fe3ff21e6c39189ab93778335439f288158f.json
@@ -0,0 +1,771 @@
+{
+  "test_id": "tests/integration/responses/test_tool_responses.py::test_response_non_streaming_multi_turn_tool_execution[openai_client-txt=openai/gpt-4o-user_file_access_check]",
+  "request": {
+    "method": "POST",
+    "url": "https://api.openai.com/v1/v1/chat/completions",
+    "headers": {},
+    "body": {
+      "model": "gpt-4o",
+      "messages": [
+        {
+          "role": "user",
+          "content": "I need to check if user 'alice' can access the file 'document.txt'. First, get alice's user ID, then check if that user ID can access the file 'document.txt'. Do this as a series of steps, where each step is a separate message. Return only one tool call per step. Summarize the final result with a single 'yes' or 'no' response."
+        },
+        {
+          "role": "assistant",
+          "content": "",
+          "tool_calls": [
+            {
+              "index": 0,
+              "id": "call_EsVvmBUqtJb42kNkYnK19QkJ",
+              "type": "function",
+              "function": {
+                "name": "get_user_id",
+                "arguments": "{\"username\":\"alice\"}"
+              }
+            }
+          ]
+        },
+        {
+          "role": "tool",
+          "tool_call_id": "call_EsVvmBUqtJb42kNkYnK19QkJ",
+          "content": [
+            {
+              "type": "text",
+              "text": "user_12345"
+            }
+          ]
+        }
+      ],
+      "stream": true,
+      "stream_options": {
+        "include_usage": true
+      },
+      "tools": [
+        {
+          "type": "function",
+          "function": {
+            "name": "get_user_id",
+            "description": "\n        Get the user ID for a given username. This ID is needed for other operations.\n\n        :param username: The username to look up\n        :return: The user ID for the username\n        ",
+            "parameters": {
+              "properties": {
+                "username": {
+                  "title": "Username",
+                  "type": "string"
+                }
+              },
+              "required": [
+                "username"
+              ],
+              "title": "get_user_idArguments",
+              "type": "object"
+            }
+          }
+        },
+        {
+          "type": "function",
+          "function": {
+            "name": "get_user_permissions",
+            "description": "\n        Get the permissions for a user ID. Requires a valid user ID from get_user_id.\n\n        :param user_id: The user ID to check permissions for\n        :return: The permissions for the user\n        ",
+            "parameters": {
+              "properties": {
+                "user_id": {
+                  "title": "User Id",
+                  "type": "string"
+                }
+              },
+              "required": [
+                "user_id"
+              ],
+              "title": "get_user_permissionsArguments",
+              "type": "object"
+            }
+          }
+        },
+        {
+          "type": "function",
+          "function": {
+            "name": "check_file_access",
+            "description": "\n        Check if a user can access a specific file. Requires a valid user ID.\n\n        :param user_id: The user ID to check access for\n        :param filename: The filename to check access to\n        :return: Whether the user can access the file (yes/no)\n        ",
+            "parameters": {
+              "properties": {
+                "user_id": {
+                  "title": "User Id",
+                  "type": "string"
+                },
+                "filename": {
+                  "title": "Filename",
+                  "type": "string"
+                }
+              },
+              "required": [
+                "user_id",
+                "filename"
+              ],
+              "title": "check_file_accessArguments",
+              "type": "object"
+            }
+          }
+        },
+        {
+          "type": "function",
+          "function": {
+            "name": "get_experiment_id",
+            "description": "\n        Get the experiment ID for a given experiment name. This ID is needed to get results.\n\n        :param experiment_name: The name of the experiment\n        :return: The experiment ID\n        ",
+            "parameters": {
+              "properties": {
+                "experiment_name": {
+                  "title": "Experiment Name",
+                  "type": "string"
+                }
+              },
+              "required": [
+                "experiment_name"
+              ],
+              "title": "get_experiment_idArguments",
+              "type": "object"
+            }
+          }
+        },
+        {
+          "type": "function",
+          "function": {
+            "name": "get_experiment_results",
+            "description": "\n        Get the results for an experiment ID. Requires a valid experiment ID from get_experiment_id.\n\n        :param experiment_id: The experiment ID to get results for\n        :return: The experiment results\n        ",
+            "parameters": {
+              "properties": {
+                "experiment_id": {
+                  "title": "Experiment Id",
+                  "type": "string"
+                }
+              },
+              "required": [
+                "experiment_id"
+              ],
+              "title": "get_experiment_resultsArguments",
+              "type": "object"
+            }
+          }
+        }
+      ]
+    },
+    "endpoint": "/v1/chat/completions",
+    "model": "gpt-4o"
+  },
+  "response": {
+    "body": [
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-73c9287059db",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": "call_kCmSE8ORKfQoiEsW2UCYr5Sh",
+                    "function": {
+                      "arguments": "",
+                      "name": "check_file_access"
+                    },
+                    "type": "function"
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "sCU"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-73c9287059db",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "{\"",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "iHp"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-73c9287059db",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "user",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "3b"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-73c9287059db",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "_id",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "4hG"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-73c9287059db",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "\":\"",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "z"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-73c9287059db",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "user",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "zX"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-73c9287059db",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "_",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "WRFf5"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-73c9287059db",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "123",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "PvE"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-73c9287059db",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "45",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "xak8"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-73c9287059db",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "\",\"",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "v"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-73c9287059db",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "filename",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "l7Rfy5le49BJu0"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-73c9287059db",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "\":\"",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "p"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-73c9287059db",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "document",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "EpFPZH128OUIsw"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-73c9287059db",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": ".txt",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "Zg"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-73c9287059db",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "\"}",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "jH3"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-73c9287059db",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": "tool_calls",
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "UubI"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-73c9287059db",
+          "choices": [],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": {
+            "completion_tokens": 24,
+            "prompt_tokens": 482,
+            "total_tokens": 506,
+            "completion_tokens_details": {
+              "accepted_prediction_tokens": 0,
+              "audio_tokens": 0,
+              "reasoning_tokens": 0,
+              "rejected_prediction_tokens": 0
+            },
+            "prompt_tokens_details": {
+              "audio_tokens": 0,
+              "cached_tokens": 0
+            }
+          },
+          "obfuscation": "GITY7sf69sAJd"
+        }
+      }
+    ],
+    "is_streaming": true
+  },
+  "id_normalization_mapping": {}
+}
diff --git a/tests/integration/responses/recordings/9f10c42f1338ae4b535cb877851520db560af78e9bc38159e526b68b8daa168e.json b/tests/integration/responses/recordings/9f10c42f1338ae4b535cb877851520db560af78e9bc38159e526b68b8daa168e.json
new file mode 100644
index 000000000..5c9d6ee91
--- /dev/null
+++ b/tests/integration/responses/recordings/9f10c42f1338ae4b535cb877851520db560af78e9bc38159e526b68b8daa168e.json
@@ -0,0 +1,759 @@
+{
+  "test_id": "tests/integration/responses/test_tool_responses.py::test_response_sequential_mcp_tool[openai_client-txt=openai/gpt-4o-boiling_point_tool]",
+  "request": {
+    "method": "POST",
+    "url": "https://api.openai.com/v1/v1/chat/completions",
+    "headers": {},
+    "body": {
+      "model": "gpt-4o",
+      "messages": [
+        {
+          "role": "user",
+          "content": "What is the boiling point of myawesomeliquid in Celsius?"
+        }
+      ],
+      "stream": true,
+      "stream_options": {
+        "include_usage": true
+      },
+      "tools": [
+        {
+          "type": "function",
+          "function": {
+            "name": "greet_everyone",
+            "parameters": {
+              "properties": {
+                "url": {
+                  "title": "Url",
+                  "type": "string"
+                }
+              },
+              "required": [
+                "url"
+              ],
+              "title": "greet_everyoneArguments",
+              "type": "object"
+            }
+          }
+        },
+        {
+          "type": "function",
+          "function": {
+            "name": "get_boiling_point",
+            "description": "\n        Returns the boiling point of a liquid in Celsius or Fahrenheit.\n\n        :param liquid_name: The name of the liquid\n        :param celsius: Whether to return the boiling point in Celsius\n        :return: The boiling point of the liquid in Celcius or Fahrenheit\n        ",
+            "parameters": {
+              "properties": {
+                "liquid_name": {
+                  "title": "Liquid Name",
+                  "type": "string"
+                },
+                "celsius": {
+                  "default": true,
+                  "title": "Celsius",
+                  "type": "boolean"
+                }
+              },
+              "required": [
+                "liquid_name"
+              ],
+              "title": "get_boiling_pointArguments",
+              "type": "object"
+            }
+          }
+        }
+      ]
+    },
+    "endpoint": "/v1/chat/completions",
+    "model": "gpt-4o"
+  },
+  "response": {
+    "body": [
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-9f10c42f1338",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": "call_b5k2yeqIi5ucElnnrVPyYU4x",
+                    "function": {
+                      "arguments": "",
+                      "name": "get_boiling_point"
+                    },
+                    "type": "function"
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "AhH"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-9f10c42f1338",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "{\"",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "SMa"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-9f10c42f1338",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "li",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "fBD0"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-9f10c42f1338",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "quid",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "LL"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-9f10c42f1338",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "_name",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "h"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-9f10c42f1338",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "\":\"",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "5"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-9f10c42f1338",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "my",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "ySpU"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-9f10c42f1338",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "aw",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "fra1"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-9f10c42f1338",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "esom",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "Hb"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-9f10c42f1338",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "eli",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "INi"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-9f10c42f1338",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "quid",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "jF"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-9f10c42f1338",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "\",\"",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "i"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-9f10c42f1338",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "c",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "2dDeK"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-9f10c42f1338",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "elsius",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": ""
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-9f10c42f1338",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "\":",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "DSb"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-9f10c42f1338",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "true",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "vP"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-9f10c42f1338",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "}",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "9boiy"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-9f10c42f1338",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": "tool_calls",
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "ZZRa"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-9f10c42f1338",
+          "choices": [],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": {
+            "completion_tokens": 27,
+            "prompt_tokens": 156,
+            "total_tokens": 183,
+            "completion_tokens_details": {
+              "accepted_prediction_tokens": 0,
+              "audio_tokens": 0,
+              "reasoning_tokens": 0,
+              "rejected_prediction_tokens": 0
+            },
+            "prompt_tokens_details": {
+              "audio_tokens": 0,
+              "cached_tokens": 0
+            }
+          },
+          "obfuscation": "HoutUcx6gZI1g"
+        }
+      }
+    ],
+    "is_streaming": true
+  },
+  "id_normalization_mapping": {}
+}
diff --git a/tests/integration/responses/recordings/a97d8a2f2fd75b4a5ca732e632b981ca011dd1b6c29df530d12726b1cf7989e5.json b/tests/integration/responses/recordings/a97d8a2f2fd75b4a5ca732e632b981ca011dd1b6c29df530d12726b1cf7989e5.json
new file mode 100644
index 000000000..3ba6af144
--- /dev/null
+++ b/tests/integration/responses/recordings/a97d8a2f2fd75b4a5ca732e632b981ca011dd1b6c29df530d12726b1cf7989e5.json
@@ -0,0 +1,833 @@
+{
+  "test_id": "tests/integration/responses/test_tool_responses.py::test_response_streaming_multi_turn_tool_execution[openai_client-txt=openai/gpt-4o-user_permissions_workflow]",
+  "request": {
+    "method": "POST",
+    "url": "https://api.openai.com/v1/v1/chat/completions",
+    "headers": {},
+    "body": {
+      "model": "gpt-4o",
+      "messages": [
+        {
+          "role": "user",
+          "content": "Help me with this security check: First, get the user ID for 'charlie', then get the permissions for that user ID, and finally check if that user can access 'secret_file.txt'. Stream your progress as you work through each step. Return only one tool call per step. Summarize the final result with a single 'yes' or 'no' response."
+        },
+        {
+          "role": "assistant",
+          "content": "",
+          "tool_calls": [
+            {
+              "index": 0,
+              "id": "call_fsxGbKmceUbLSXCe4sx9WLXO",
+              "type": "function",
+              "function": {
+                "name": "get_user_id",
+                "arguments": "{\"username\":\"charlie\"}"
+              }
+            }
+          ]
+        },
+        {
+          "role": "tool",
+          "tool_call_id": "call_fsxGbKmceUbLSXCe4sx9WLXO",
+          "content": [
+            {
+              "type": "text",
+              "text": "user_11111"
+            }
+          ]
+        },
+        {
+          "role": "assistant",
+          "content": "",
+          "tool_calls": [
+            {
+              "index": 0,
+              "id": "call_moRBxqnBJ48EWTSEoQ1llgib",
+              "type": "function",
+              "function": {
+                "name": "get_user_permissions",
+                "arguments": "{\"user_id\":\"user_11111\"}"
+              }
+            }
+          ]
+        },
+        {
+          "role": "tool",
+          "tool_call_id": "call_moRBxqnBJ48EWTSEoQ1llgib",
+          "content": [
+            {
+              "type": "text",
+              "text": "admin"
+            }
+          ]
+        }
+      ],
+      "stream": true,
+      "stream_options": {
+        "include_usage": true
+      },
+      "tools": [
+        {
+          "type": "function",
+          "function": {
+            "name": "get_user_id",
+            "description": "\n        Get the user ID for a given username. This ID is needed for other operations.\n\n        :param username: The username to look up\n        :return: The user ID for the username\n        ",
+            "parameters": {
+              "properties": {
+                "username": {
+                  "title": "Username",
+                  "type": "string"
+                }
+              },
+              "required": [
+                "username"
+              ],
+              "title": "get_user_idArguments",
+              "type": "object"
+            }
+          }
+        },
+        {
+          "type": "function",
+          "function": {
+            "name": "get_user_permissions",
+            "description": "\n        Get the permissions for a user ID. Requires a valid user ID from get_user_id.\n\n        :param user_id: The user ID to check permissions for\n        :return: The permissions for the user\n        ",
+            "parameters": {
+              "properties": {
+                "user_id": {
+                  "title": "User Id",
+                  "type": "string"
+                }
+              },
+              "required": [
+                "user_id"
+              ],
+              "title": "get_user_permissionsArguments",
+              "type": "object"
+            }
+          }
+        },
+        {
+          "type": "function",
+          "function": {
+            "name": "check_file_access",
+            "description": "\n        Check if a user can access a specific file. Requires a valid user ID.\n\n        :param user_id: The user ID to check access for\n        :param filename: The filename to check access to\n        :return: Whether the user can access the file (yes/no)\n        ",
+            "parameters": {
+              "properties": {
+                "user_id": {
+                  "title": "User Id",
+                  "type": "string"
+                },
+                "filename": {
+                  "title": "Filename",
+                  "type": "string"
+                }
+              },
+              "required": [
+                "user_id",
+                "filename"
+              ],
+              "title": "check_file_accessArguments",
+              "type": "object"
+            }
+          }
+        },
+        {
+          "type": "function",
+          "function": {
+            "name": "get_experiment_id",
+            "description": "\n        Get the experiment ID for a given experiment name. This ID is needed to get results.\n\n        :param experiment_name: The name of the experiment\n        :return: The experiment ID\n        ",
+            "parameters": {
+              "properties": {
+                "experiment_name": {
+                  "title": "Experiment Name",
+                  "type": "string"
+                }
+              },
+              "required": [
+                "experiment_name"
+              ],
+              "title": "get_experiment_idArguments",
+              "type": "object"
+            }
+          }
+        },
+        {
+          "type": "function",
+          "function": {
+            "name": "get_experiment_results",
+            "description": "\n        Get the results for an experiment ID. Requires a valid experiment ID from get_experiment_id.\n\n        :param experiment_id: The experiment ID to get results for\n        :return: The experiment results\n        ",
+            "parameters": {
+              "properties": {
+                "experiment_id": {
+                  "title": "Experiment Id",
+                  "type": "string"
+                }
+              },
+              "required": [
+                "experiment_id"
+              ],
+              "title": "get_experiment_resultsArguments",
+              "type": "object"
+            }
+          }
+        }
+      ]
+    },
+    "endpoint": "/v1/chat/completions",
+    "model": "gpt-4o"
+  },
+  "response": {
+    "body": [
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-a97d8a2f2fd7",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": "call_ybUqAP9oQn3rwQqVdOLs5Wb4",
+                    "function": {
+                      "arguments": "",
+                      "name": "check_file_access"
+                    },
+                    "type": "function"
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "xpc"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-a97d8a2f2fd7",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "{\"",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "xXs"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-a97d8a2f2fd7",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "user",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "XY"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-a97d8a2f2fd7",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "_id",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "HbC"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-a97d8a2f2fd7",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "\":\"",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "f"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-a97d8a2f2fd7",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "user",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "Ds"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-a97d8a2f2fd7",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "_",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "Osfy3"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-a97d8a2f2fd7",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "111",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "ioI"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-a97d8a2f2fd7",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "11",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "GQg6"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-a97d8a2f2fd7",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "\",\"",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "2"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-a97d8a2f2fd7",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "filename",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "b2qqKbGC68nHMB"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-a97d8a2f2fd7",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "\":\"",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "H"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-a97d8a2f2fd7",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "secret",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": ""
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-a97d8a2f2fd7",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "_file",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "6"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-a97d8a2f2fd7",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": ".txt",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "Wz"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-a97d8a2f2fd7",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "\"}",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "ImW"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-a97d8a2f2fd7",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": "tool_calls",
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "nRAE"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-a97d8a2f2fd7",
+          "choices": [],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": {
+            "completion_tokens": 25,
+            "prompt_tokens": 507,
+            "total_tokens": 532,
+            "completion_tokens_details": {
+              "accepted_prediction_tokens": 0,
+              "audio_tokens": 0,
+              "reasoning_tokens": 0,
+              "rejected_prediction_tokens": 0
+            },
+            "prompt_tokens_details": {
+              "audio_tokens": 0,
+              "cached_tokens": 0
+            }
+          },
+          "obfuscation": "rgbYyZ54cN8La"
+        }
+      }
+    ],
+    "is_streaming": true
+  },
+  "id_normalization_mapping": {}
+}
diff --git a/tests/integration/responses/recordings/b30da63114770b8c975bf66e24aee40546a0658db3df58b9b4d948e4e95b0961.json b/tests/integration/responses/recordings/b30da63114770b8c975bf66e24aee40546a0658db3df58b9b4d948e4e95b0961.json
new file mode 100644
index 000000000..80cce1358
--- /dev/null
+++ b/tests/integration/responses/recordings/b30da63114770b8c975bf66e24aee40546a0658db3df58b9b4d948e4e95b0961.json
@@ -0,0 +1,524 @@
+{
+  "test_id": "tests/integration/responses/test_tool_responses.py::test_response_streaming_multi_turn_tool_execution[openai_client-txt=openai/gpt-4o-experiment_analysis_streaming]",
+  "request": {
+    "method": "POST",
+    "url": "https://api.openai.com/v1/v1/chat/completions",
+    "headers": {},
+    "body": {
+      "model": "gpt-4o",
+      "messages": [
+        {
+          "role": "user",
+          "content": "I need a complete analysis: First, get the experiment ID for 'chemical_reaction', then get the results for that experiment, and tell me if the yield was above 80%. Return only one tool call per step.  Please stream your analysis process."
+        }
+      ],
+      "stream": true,
+      "stream_options": {
+        "include_usage": true
+      },
+      "tools": [
+        {
+          "type": "function",
+          "function": {
+            "name": "get_user_id",
+            "description": "\n        Get the user ID for a given username. This ID is needed for other operations.\n\n        :param username: The username to look up\n        :return: The user ID for the username\n        ",
+            "parameters": {
+              "properties": {
+                "username": {
+                  "title": "Username",
+                  "type": "string"
+                }
+              },
+              "required": [
+                "username"
+              ],
+              "title": "get_user_idArguments",
+              "type": "object"
+            }
+          }
+        },
+        {
+          "type": "function",
+          "function": {
+            "name": "get_user_permissions",
+            "description": "\n        Get the permissions for a user ID. Requires a valid user ID from get_user_id.\n\n        :param user_id: The user ID to check permissions for\n        :return: The permissions for the user\n        ",
+            "parameters": {
+              "properties": {
+                "user_id": {
+                  "title": "User Id",
+                  "type": "string"
+                }
+              },
+              "required": [
+                "user_id"
+              ],
+              "title": "get_user_permissionsArguments",
+              "type": "object"
+            }
+          }
+        },
+        {
+          "type": "function",
+          "function": {
+            "name": "check_file_access",
+            "description": "\n        Check if a user can access a specific file. Requires a valid user ID.\n\n        :param user_id: The user ID to check access for\n        :param filename: The filename to check access to\n        :return: Whether the user can access the file (yes/no)\n        ",
+            "parameters": {
+              "properties": {
+                "user_id": {
+                  "title": "User Id",
+                  "type": "string"
+                },
+                "filename": {
+                  "title": "Filename",
+                  "type": "string"
+                }
+              },
+              "required": [
+                "user_id",
+                "filename"
+              ],
+              "title": "check_file_accessArguments",
+              "type": "object"
+            }
+          }
+        },
+        {
+          "type": "function",
+          "function": {
+            "name": "get_experiment_id",
+            "description": "\n        Get the experiment ID for a given experiment name. This ID is needed to get results.\n\n        :param experiment_name: The name of the experiment\n        :return: The experiment ID\n        ",
+            "parameters": {
+              "properties": {
+                "experiment_name": {
+                  "title": "Experiment Name",
+                  "type": "string"
+                }
+              },
+              "required": [
+                "experiment_name"
+              ],
+              "title": "get_experiment_idArguments",
+              "type": "object"
+            }
+          }
+        },
+        {
+          "type": "function",
+          "function": {
+            "name": "get_experiment_results",
+            "description": "\n        Get the results for an experiment ID. Requires a valid experiment ID from get_experiment_id.\n\n        :param experiment_id: The experiment ID to get results for\n        :return: The experiment results\n        ",
+            "parameters": {
+              "properties": {
+                "experiment_id": {
+                  "title": "Experiment Id",
+                  "type": "string"
+                }
+              },
+              "required": [
+                "experiment_id"
+              ],
+              "title": "get_experiment_resultsArguments",
+              "type": "object"
+            }
+          }
+        }
+      ]
+    },
+    "endpoint": "/v1/chat/completions",
+    "model": "gpt-4o"
+  },
+  "response": {
+    "body": [
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-b30da6311477",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": "call_Q9Gcxub7UbQsxJWVkiy4FETr",
+                    "function": {
+                      "arguments": "",
+                      "name": "get_experiment_id"
+                    },
+                    "type": "function"
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "c8d"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-b30da6311477",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "{\"",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "QoE"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-b30da6311477",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "experiment",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "1krtmewG8p36"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-b30da6311477",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "_name",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "P"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-b30da6311477",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "\":\"",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "D"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-b30da6311477",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "chemical",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "FoS4ov7pi99K5h"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-b30da6311477",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "_re",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "BhD"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-b30da6311477",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "action",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": ""
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-b30da6311477",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "\"}",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "KWC"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-b30da6311477",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": "tool_calls",
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "PFmv"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-b30da6311477",
+          "choices": [],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": {
+            "completion_tokens": 19,
+            "prompt_tokens": 425,
+            "total_tokens": 444,
+            "completion_tokens_details": {
+              "accepted_prediction_tokens": 0,
+              "audio_tokens": 0,
+              "reasoning_tokens": 0,
+              "rejected_prediction_tokens": 0
+            },
+            "prompt_tokens_details": {
+              "audio_tokens": 0,
+              "cached_tokens": 0
+            }
+          },
+          "obfuscation": "NYdC3zepOXLsO"
+        }
+      }
+    ],
+    "is_streaming": true
+  },
+  "id_normalization_mapping": {}
+}
diff --git a/tests/integration/responses/recordings/b6b7282ca0ad5a3c59321d2b045a91ebca1cbaeb4f7aab22c5b9e246b476272f.json b/tests/integration/responses/recordings/b6b7282ca0ad5a3c59321d2b045a91ebca1cbaeb4f7aab22c5b9e246b476272f.json
new file mode 100644
index 000000000..040998a3b
--- /dev/null
+++ b/tests/integration/responses/recordings/b6b7282ca0ad5a3c59321d2b045a91ebca1cbaeb4f7aab22c5b9e246b476272f.json
@@ -0,0 +1,649 @@
+{
+  "test_id": "tests/integration/responses/test_tool_responses.py::test_response_sequential_mcp_tool[openai_client-txt=openai/gpt-4o-boiling_point_tool]",
+  "request": {
+    "method": "POST",
+    "url": "https://api.openai.com/v1/v1/chat/completions",
+    "headers": {},
+    "body": {
+      "model": "gpt-4o",
+      "messages": [
+        {
+          "role": "user",
+          "content": "What is the boiling point of myawesomeliquid in Celsius?"
+        },
+        {
+          "role": "assistant",
+          "content": "",
+          "tool_calls": [
+            {
+              "index": 0,
+              "id": "call_b5k2yeqIi5ucElnnrVPyYU4x",
+              "type": "function",
+              "function": {
+                "name": "get_boiling_point",
+                "arguments": "{\"liquid_name\":\"myawesomeliquid\",\"celsius\":true}"
+              }
+            }
+          ]
+        },
+        {
+          "role": "tool",
+          "tool_call_id": "call_b5k2yeqIi5ucElnnrVPyYU4x",
+          "content": [
+            {
+              "type": "text",
+              "text": "-100"
+            }
+          ]
+        },
+        {
+          "role": "assistant",
+          "content": "The boiling point of \"myawesomeliquid\" is -100 degrees Celsius."
+        },
+        {
+          "role": "user",
+          "content": "What is the boiling point of myawesomeliquid in Celsius?"
+        }
+      ],
+      "stream": true,
+      "stream_options": {
+        "include_usage": true
+      },
+      "tools": [
+        {
+          "type": "function",
+          "function": {
+            "name": "greet_everyone",
+            "parameters": {
+              "properties": {
+                "url": {
+                  "title": "Url",
+                  "type": "string"
+                }
+              },
+              "required": [
+                "url"
+              ],
+              "title": "greet_everyoneArguments",
+              "type": "object"
+            }
+          }
+        },
+        {
+          "type": "function",
+          "function": {
+            "name": "get_boiling_point",
+            "description": "\n        Returns the boiling point of a liquid in Celsius or Fahrenheit.\n\n        :param liquid_name: The name of the liquid\n        :param celsius: Whether to return the boiling point in Celsius\n        :return: The boiling point of the liquid in Celcius or Fahrenheit\n        ",
+            "parameters": {
+              "properties": {
+                "liquid_name": {
+                  "title": "Liquid Name",
+                  "type": "string"
+                },
+                "celsius": {
+                  "default": true,
+                  "title": "Celsius",
+                  "type": "boolean"
+                }
+              },
+              "required": [
+                "liquid_name"
+              ],
+              "title": "get_boiling_pointArguments",
+              "type": "object"
+            }
+          }
+        }
+      ]
+    },
+    "endpoint": "/v1/chat/completions",
+    "model": "gpt-4o"
+  },
+  "response": {
+    "body": [
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-b6b7282ca0ad",
+          "choices": [
+            {
+              "delta": {
+                "content": "",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "7S5XpbMeFTTZba"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-b6b7282ca0ad",
+          "choices": [
+            {
+              "delta": {
+                "content": "The",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "G4KYajpQCgm5p"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-b6b7282ca0ad",
+          "choices": [
+            {
+              "delta": {
+                "content": " boiling",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "krw8d3Np"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-b6b7282ca0ad",
+          "choices": [
+            {
+              "delta": {
+                "content": " point",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "sOEsvVtCEV"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-b6b7282ca0ad",
+          "choices": [
+            {
+              "delta": {
+                "content": " of",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "5eAw89OUrx7VT"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-b6b7282ca0ad",
+          "choices": [
+            {
+              "delta": {
+                "content": " \"",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "PFghmTocqCYea"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-b6b7282ca0ad",
+          "choices": [
+            {
+              "delta": {
+                "content": "my",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "IRJRbKIoXwNh0e"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-b6b7282ca0ad",
+          "choices": [
+            {
+              "delta": {
+                "content": "aw",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "wuoL6MoA21KfMP"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-b6b7282ca0ad",
+          "choices": [
+            {
+              "delta": {
+                "content": "esom",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "DLRS3D5YVekk"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-b6b7282ca0ad",
+          "choices": [
+            {
+              "delta": {
+                "content": "eli",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "PQZQlOncwl01F"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-b6b7282ca0ad",
+          "choices": [
+            {
+              "delta": {
+                "content": "quid",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "TVfNNxYtZgXQ"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-b6b7282ca0ad",
+          "choices": [
+            {
+              "delta": {
+                "content": "\"",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "LscPqJGnbMf6Qw"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-b6b7282ca0ad",
+          "choices": [
+            {
+              "delta": {
+                "content": " is",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "X8NSrxHcpYYXL"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-b6b7282ca0ad",
+          "choices": [
+            {
+              "delta": {
+                "content": " -",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "5nfdb4DuFapoeT"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-b6b7282ca0ad",
+          "choices": [
+            {
+              "delta": {
+                "content": "100",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "K2qXQYFAd591w"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-b6b7282ca0ad",
+          "choices": [
+            {
+              "delta": {
+                "content": " degrees",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "b0rvHdF1"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-b6b7282ca0ad",
+          "choices": [
+            {
+              "delta": {
+                "content": " Celsius",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "kFoGt52c"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-b6b7282ca0ad",
+          "choices": [
+            {
+              "delta": {
+                "content": ".",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "SJjhJwz2zgz693C"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-b6b7282ca0ad",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": "stop",
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "MityMxFgBz"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-b6b7282ca0ad",
+          "choices": [],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": {
+            "completion_tokens": 18,
+            "prompt_tokens": 234,
+            "total_tokens": 252,
+            "completion_tokens_details": {
+              "accepted_prediction_tokens": 0,
+              "audio_tokens": 0,
+              "reasoning_tokens": 0,
+              "rejected_prediction_tokens": 0
+            },
+            "prompt_tokens_details": {
+              "audio_tokens": 0,
+              "cached_tokens": 0
+            }
+          },
+          "obfuscation": "qf0j6dzuNPifV"
+        }
+      }
+    ],
+    "is_streaming": true
+  },
+  "id_normalization_mapping": {}
+}
diff --git a/tests/integration/responses/recordings/c27df465b2996c4d7c909e9ccfac53deb1ac47d064a1b5c70a78b7436438818f.json b/tests/integration/responses/recordings/c27df465b2996c4d7c909e9ccfac53deb1ac47d064a1b5c70a78b7436438818f.json
new file mode 100644
index 000000000..c79ed1010
--- /dev/null
+++ b/tests/integration/responses/recordings/c27df465b2996c4d7c909e9ccfac53deb1ac47d064a1b5c70a78b7436438818f.json
@@ -0,0 +1,450 @@
+{
+  "test_id": "tests/integration/responses/test_tool_responses.py::test_response_streaming_multi_turn_tool_execution[openai_client-txt=openai/gpt-4o-user_permissions_workflow]",
+  "request": {
+    "method": "POST",
+    "url": "https://api.openai.com/v1/v1/chat/completions",
+    "headers": {},
+    "body": {
+      "model": "gpt-4o",
+      "messages": [
+        {
+          "role": "user",
+          "content": "Help me with this security check: First, get the user ID for 'charlie', then get the permissions for that user ID, and finally check if that user can access 'secret_file.txt'. Stream your progress as you work through each step. Return only one tool call per step. Summarize the final result with a single 'yes' or 'no' response."
+        }
+      ],
+      "stream": true,
+      "stream_options": {
+        "include_usage": true
+      },
+      "tools": [
+        {
+          "type": "function",
+          "function": {
+            "name": "get_user_id",
+            "description": "\n        Get the user ID for a given username. This ID is needed for other operations.\n\n        :param username: The username to look up\n        :return: The user ID for the username\n        ",
+            "parameters": {
+              "properties": {
+                "username": {
+                  "title": "Username",
+                  "type": "string"
+                }
+              },
+              "required": [
+                "username"
+              ],
+              "title": "get_user_idArguments",
+              "type": "object"
+            }
+          }
+        },
+        {
+          "type": "function",
+          "function": {
+            "name": "get_user_permissions",
+            "description": "\n        Get the permissions for a user ID. Requires a valid user ID from get_user_id.\n\n        :param user_id: The user ID to check permissions for\n        :return: The permissions for the user\n        ",
+            "parameters": {
+              "properties": {
+                "user_id": {
+                  "title": "User Id",
+                  "type": "string"
+                }
+              },
+              "required": [
+                "user_id"
+              ],
+              "title": "get_user_permissionsArguments",
+              "type": "object"
+            }
+          }
+        },
+        {
+          "type": "function",
+          "function": {
+            "name": "check_file_access",
+            "description": "\n        Check if a user can access a specific file. Requires a valid user ID.\n\n        :param user_id: The user ID to check access for\n        :param filename: The filename to check access to\n        :return: Whether the user can access the file (yes/no)\n        ",
+            "parameters": {
+              "properties": {
+                "user_id": {
+                  "title": "User Id",
+                  "type": "string"
+                },
+                "filename": {
+                  "title": "Filename",
+                  "type": "string"
+                }
+              },
+              "required": [
+                "user_id",
+                "filename"
+              ],
+              "title": "check_file_accessArguments",
+              "type": "object"
+            }
+          }
+        },
+        {
+          "type": "function",
+          "function": {
+            "name": "get_experiment_id",
+            "description": "\n        Get the experiment ID for a given experiment name. This ID is needed to get results.\n\n        :param experiment_name: The name of the experiment\n        :return: The experiment ID\n        ",
+            "parameters": {
+              "properties": {
+                "experiment_name": {
+                  "title": "Experiment Name",
+                  "type": "string"
+                }
+              },
+              "required": [
+                "experiment_name"
+              ],
+              "title": "get_experiment_idArguments",
+              "type": "object"
+            }
+          }
+        },
+        {
+          "type": "function",
+          "function": {
+            "name": "get_experiment_results",
+            "description": "\n        Get the results for an experiment ID. Requires a valid experiment ID from get_experiment_id.\n\n        :param experiment_id: The experiment ID to get results for\n        :return: The experiment results\n        ",
+            "parameters": {
+              "properties": {
+                "experiment_id": {
+                  "title": "Experiment Id",
+                  "type": "string"
+                }
+              },
+              "required": [
+                "experiment_id"
+              ],
+              "title": "get_experiment_resultsArguments",
+              "type": "object"
+            }
+          }
+        }
+      ]
+    },
+    "endpoint": "/v1/chat/completions",
+    "model": "gpt-4o"
+  },
+  "response": {
+    "body": [
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-c27df465b299",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": "call_fsxGbKmceUbLSXCe4sx9WLXO",
+                    "function": {
+                      "arguments": "",
+                      "name": "get_user_id"
+                    },
+                    "type": "function"
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "sOa6fZEKZ"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-c27df465b299",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "{\"",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "HBO"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-c27df465b299",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "username",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "7kcXlaglccmA8a"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-c27df465b299",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "\":\"",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "a"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-c27df465b299",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "char",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "bS"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-c27df465b299",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "lie",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "d2e"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-c27df465b299",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "\"}",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "fhE"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-c27df465b299",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": "tool_calls",
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "SlsZ"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-c27df465b299",
+          "choices": [],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": {
+            "completion_tokens": 16,
+            "prompt_tokens": 449,
+            "total_tokens": 465,
+            "completion_tokens_details": {
+              "accepted_prediction_tokens": 0,
+              "audio_tokens": 0,
+              "reasoning_tokens": 0,
+              "rejected_prediction_tokens": 0
+            },
+            "prompt_tokens_details": {
+              "audio_tokens": 0,
+              "cached_tokens": 0
+            }
+          },
+          "obfuscation": "fjMWRTbF1Ni06"
+        }
+      }
+    ],
+    "is_streaming": true
+  },
+  "id_normalization_mapping": {}
+}
diff --git a/tests/integration/responses/recordings/d35c1244fbbe9898da3958113c1d054d5f5dd6bdd3c4333db6cef7361fb32feb.json b/tests/integration/responses/recordings/d35c1244fbbe9898da3958113c1d054d5f5dd6bdd3c4333db6cef7361fb32feb.json
new file mode 100644
index 000000000..a41104fd5
--- /dev/null
+++ b/tests/integration/responses/recordings/d35c1244fbbe9898da3958113c1d054d5f5dd6bdd3c4333db6cef7361fb32feb.json
@@ -0,0 +1,759 @@
+{
+  "test_id": "tests/integration/responses/test_tool_responses.py::test_response_non_streaming_mcp_tool[openai_client-txt=openai/gpt-4o-boiling_point_tool]",
+  "request": {
+    "method": "POST",
+    "url": "https://api.openai.com/v1/v1/chat/completions",
+    "headers": {},
+    "body": {
+      "model": "gpt-4o",
+      "messages": [
+        {
+          "role": "user",
+          "content": "What is the boiling point of myawesomeliquid in Celsius?"
+        }
+      ],
+      "stream": true,
+      "stream_options": {
+        "include_usage": true
+      },
+      "tools": [
+        {
+          "type": "function",
+          "function": {
+            "name": "greet_everyone",
+            "parameters": {
+              "properties": {
+                "url": {
+                  "title": "Url",
+                  "type": "string"
+                }
+              },
+              "required": [
+                "url"
+              ],
+              "title": "greet_everyoneArguments",
+              "type": "object"
+            }
+          }
+        },
+        {
+          "type": "function",
+          "function": {
+            "name": "get_boiling_point",
+            "description": "\n        Returns the boiling point of a liquid in Celsius or Fahrenheit.\n\n        :param liquid_name: The name of the liquid\n        :param celsius: Whether to return the boiling point in Celsius\n        :return: The boiling point of the liquid in Celcius or Fahrenheit\n        ",
+            "parameters": {
+              "properties": {
+                "liquid_name": {
+                  "title": "Liquid Name",
+                  "type": "string"
+                },
+                "celsius": {
+                  "default": true,
+                  "title": "Celsius",
+                  "type": "boolean"
+                }
+              },
+              "required": [
+                "liquid_name"
+              ],
+              "title": "get_boiling_pointArguments",
+              "type": "object"
+            }
+          }
+        }
+      ]
+    },
+    "endpoint": "/v1/chat/completions",
+    "model": "gpt-4o"
+  },
+  "response": {
+    "body": [
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-d35c1244fbbe",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": "call_8kf8fNIDcWOelbCmUEcretON",
+                    "function": {
+                      "arguments": "",
+                      "name": "get_boiling_point"
+                    },
+                    "type": "function"
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "1xG"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-d35c1244fbbe",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "{\"",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "RQj"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-d35c1244fbbe",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "li",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "XncI"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-d35c1244fbbe",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "quid",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "86"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-d35c1244fbbe",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "_name",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "L"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-d35c1244fbbe",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "\":\"",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "8"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-d35c1244fbbe",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "my",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "lnSu"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-d35c1244fbbe",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "aw",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "ksr1"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-d35c1244fbbe",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "esom",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "CU"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-d35c1244fbbe",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "eli",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "hrv"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-d35c1244fbbe",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "quid",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "K9"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-d35c1244fbbe",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "\",\"",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "a"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-d35c1244fbbe",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "c",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "LKw52"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-d35c1244fbbe",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "elsius",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": ""
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-d35c1244fbbe",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "\":",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "yGY"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-d35c1244fbbe",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "true",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "wC"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-d35c1244fbbe",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "}",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "8fF8B"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-d35c1244fbbe",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": "tool_calls",
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "bbwp"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-d35c1244fbbe",
+          "choices": [],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": {
+            "completion_tokens": 27,
+            "prompt_tokens": 156,
+            "total_tokens": 183,
+            "completion_tokens_details": {
+              "accepted_prediction_tokens": 0,
+              "audio_tokens": 0,
+              "reasoning_tokens": 0,
+              "rejected_prediction_tokens": 0
+            },
+            "prompt_tokens_details": {
+              "audio_tokens": 0,
+              "cached_tokens": 0
+            }
+          },
+          "obfuscation": "k0bo4JwUfLNKW"
+        }
+      }
+    ],
+    "is_streaming": true
+  },
+  "id_normalization_mapping": {}
+}
diff --git a/tests/integration/responses/recordings/d42e1020edee86d9f6da7df909c2a453cb8f2e11e80beb8e5506439345c428eb.json b/tests/integration/responses/recordings/d42e1020edee86d9f6da7df909c2a453cb8f2e11e80beb8e5506439345c428eb.json
new file mode 100644
index 000000000..610fe96b1
--- /dev/null
+++ b/tests/integration/responses/recordings/d42e1020edee86d9f6da7df909c2a453cb8f2e11e80beb8e5506439345c428eb.json
@@ -0,0 +1,808 @@
+{
+  "test_id": "tests/integration/responses/test_tool_responses.py::test_response_streaming_multi_turn_tool_execution[openai_client-txt=openai/gpt-4o-experiment_analysis_streaming]",
+  "request": {
+    "method": "POST",
+    "url": "https://api.openai.com/v1/v1/chat/completions",
+    "headers": {},
+    "body": {
+      "model": "gpt-4o",
+      "messages": [
+        {
+          "role": "user",
+          "content": "I need a complete analysis: First, get the experiment ID for 'chemical_reaction', then get the results for that experiment, and tell me if the yield was above 80%. Return only one tool call per step.  Please stream your analysis process."
+        },
+        {
+          "role": "assistant",
+          "content": "",
+          "tool_calls": [
+            {
+              "index": 0,
+              "id": "call_Q9Gcxub7UbQsxJWVkiy4FETr",
+              "type": "function",
+              "function": {
+                "name": "get_experiment_id",
+                "arguments": "{\"experiment_name\":\"chemical_reaction\"}"
+              }
+            }
+          ]
+        },
+        {
+          "role": "tool",
+          "tool_call_id": "call_Q9Gcxub7UbQsxJWVkiy4FETr",
+          "content": [
+            {
+              "type": "text",
+              "text": "exp_003"
+            }
+          ]
+        },
+        {
+          "role": "assistant",
+          "content": "",
+          "tool_calls": [
+            {
+              "index": 0,
+              "id": "call_yTMuQEKu7x115q8XvhqelRub",
+              "type": "function",
+              "function": {
+                "name": "get_experiment_results",
+                "arguments": "{\"experiment_id\":\"exp_003\"}"
+              }
+            }
+          ]
+        },
+        {
+          "role": "tool",
+          "tool_call_id": "call_yTMuQEKu7x115q8XvhqelRub",
+          "content": [
+            {
+              "type": "text",
+              "text": "Yield: 85%, Status: Complete"
+            }
+          ]
+        }
+      ],
+      "stream": true,
+      "stream_options": {
+        "include_usage": true
+      },
+      "tools": [
+        {
+          "type": "function",
+          "function": {
+            "name": "get_user_id",
+            "description": "\n        Get the user ID for a given username. This ID is needed for other operations.\n\n        :param username: The username to look up\n        :return: The user ID for the username\n        ",
+            "parameters": {
+              "properties": {
+                "username": {
+                  "title": "Username",
+                  "type": "string"
+                }
+              },
+              "required": [
+                "username"
+              ],
+              "title": "get_user_idArguments",
+              "type": "object"
+            }
+          }
+        },
+        {
+          "type": "function",
+          "function": {
+            "name": "get_user_permissions",
+            "description": "\n        Get the permissions for a user ID. Requires a valid user ID from get_user_id.\n\n        :param user_id: The user ID to check permissions for\n        :return: The permissions for the user\n        ",
+            "parameters": {
+              "properties": {
+                "user_id": {
+                  "title": "User Id",
+                  "type": "string"
+                }
+              },
+              "required": [
+                "user_id"
+              ],
+              "title": "get_user_permissionsArguments",
+              "type": "object"
+            }
+          }
+        },
+        {
+          "type": "function",
+          "function": {
+            "name": "check_file_access",
+            "description": "\n        Check if a user can access a specific file. Requires a valid user ID.\n\n        :param user_id: The user ID to check access for\n        :param filename: The filename to check access to\n        :return: Whether the user can access the file (yes/no)\n        ",
+            "parameters": {
+              "properties": {
+                "user_id": {
+                  "title": "User Id",
+                  "type": "string"
+                },
+                "filename": {
+                  "title": "Filename",
+                  "type": "string"
+                }
+              },
+              "required": [
+                "user_id",
+                "filename"
+              ],
+              "title": "check_file_accessArguments",
+              "type": "object"
+            }
+          }
+        },
+        {
+          "type": "function",
+          "function": {
+            "name": "get_experiment_id",
+            "description": "\n        Get the experiment ID for a given experiment name. This ID is needed to get results.\n\n        :param experiment_name: The name of the experiment\n        :return: The experiment ID\n        ",
+            "parameters": {
+              "properties": {
+                "experiment_name": {
+                  "title": "Experiment Name",
+                  "type": "string"
+                }
+              },
+              "required": [
+                "experiment_name"
+              ],
+              "title": "get_experiment_idArguments",
+              "type": "object"
+            }
+          }
+        },
+        {
+          "type": "function",
+          "function": {
+            "name": "get_experiment_results",
+            "description": "\n        Get the results for an experiment ID. Requires a valid experiment ID from get_experiment_id.\n\n        :param experiment_id: The experiment ID to get results for\n        :return: The experiment results\n        ",
+            "parameters": {
+              "properties": {
+                "experiment_id": {
+                  "title": "Experiment Id",
+                  "type": "string"
+                }
+              },
+              "required": [
+                "experiment_id"
+              ],
+              "title": "get_experiment_resultsArguments",
+              "type": "object"
+            }
+          }
+        }
+      ]
+    },
+    "endpoint": "/v1/chat/completions",
+    "model": "gpt-4o"
+  },
+  "response": {
+    "body": [
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-d42e1020edee",
+          "choices": [
+            {
+              "delta": {
+                "content": "",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "7yA3503fehs27D"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-d42e1020edee",
+          "choices": [
+            {
+              "delta": {
+                "content": "The",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "T95BeWrgJQMHt"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-d42e1020edee",
+          "choices": [
+            {
+              "delta": {
+                "content": " yield",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "VveNEnHuMQ"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-d42e1020edee",
+          "choices": [
+            {
+              "delta": {
+                "content": " for",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "KupSssWahehO"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-d42e1020edee",
+          "choices": [
+            {
+              "delta": {
+                "content": " the",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "Ogot8KLW0IXw"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-d42e1020edee",
+          "choices": [
+            {
+              "delta": {
+                "content": " '",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "dYKJ6jPstuAso4"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-d42e1020edee",
+          "choices": [
+            {
+              "delta": {
+                "content": "chemical",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "wcSKhZVd"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-d42e1020edee",
+          "choices": [
+            {
+              "delta": {
+                "content": "_re",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "6ZlTlRGLyclHo"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-d42e1020edee",
+          "choices": [
+            {
+              "delta": {
+                "content": "action",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "WpYqOmrhXr"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-d42e1020edee",
+          "choices": [
+            {
+              "delta": {
+                "content": "'",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "qUhq7HrrwdFEyuY"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-d42e1020edee",
+          "choices": [
+            {
+              "delta": {
+                "content": " experiment",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "WWO2y"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-d42e1020edee",
+          "choices": [
+            {
+              "delta": {
+                "content": " is",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "pFVMO1BRN37n4"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-d42e1020edee",
+          "choices": [
+            {
+              "delta": {
+                "content": " ",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "TtQlcHeU2mPl830"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-d42e1020edee",
+          "choices": [
+            {
+              "delta": {
+                "content": "85",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "zyw8OdA0pXZCp5"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-d42e1020edee",
+          "choices": [
+            {
+              "delta": {
+                "content": "%,",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "VcHVTGGXrqvev1"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-d42e1020edee",
+          "choices": [
+            {
+              "delta": {
+                "content": " which",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "FI9FAA2rX6"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-d42e1020edee",
+          "choices": [
+            {
+              "delta": {
+                "content": " is",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "Cc65gPYGA6Xfd"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-d42e1020edee",
+          "choices": [
+            {
+              "delta": {
+                "content": " above",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "T7BlLMIQGs"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-d42e1020edee",
+          "choices": [
+            {
+              "delta": {
+                "content": " ",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "2oKThCybRdG8MzZ"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-d42e1020edee",
+          "choices": [
+            {
+              "delta": {
+                "content": "80",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "QHWdJWXK6hzQVS"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-d42e1020edee",
+          "choices": [
+            {
+              "delta": {
+                "content": "%.",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "lJnplmQYyl0SL3"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-d42e1020edee",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": "stop",
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "NPaAVrOB4J"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-d42e1020edee",
+          "choices": [],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": {
+            "completion_tokens": 21,
+            "prompt_tokens": 494,
+            "total_tokens": 515,
+            "completion_tokens_details": {
+              "accepted_prediction_tokens": 0,
+              "audio_tokens": 0,
+              "reasoning_tokens": 0,
+              "rejected_prediction_tokens": 0
+            },
+            "prompt_tokens_details": {
+              "audio_tokens": 0,
+              "cached_tokens": 0
+            }
+          },
+          "obfuscation": "ngidabPDDHECm"
+        }
+      }
+    ],
+    "is_streaming": true
+  },
+  "id_normalization_mapping": {}
+}
diff --git a/tests/integration/responses/recordings/e2dc09dc546d9b8b99096804fe75fae1f1eb09efe6e4f86c115a78a3db5a59bc.json b/tests/integration/responses/recordings/e2dc09dc546d9b8b99096804fe75fae1f1eb09efe6e4f86c115a78a3db5a59bc.json
new file mode 100644
index 000000000..ce771f24e
--- /dev/null
+++ b/tests/integration/responses/recordings/e2dc09dc546d9b8b99096804fe75fae1f1eb09efe6e4f86c115a78a3db5a59bc.json
@@ -0,0 +1,668 @@
+{
+  "test_id": "tests/integration/responses/test_tool_responses.py::test_response_mcp_tool_approval[openai_client-txt=openai/gpt-4o-True-boiling_point_tool]",
+  "request": {
+    "method": "POST",
+    "url": "https://api.openai.com/v1/v1/chat/completions",
+    "headers": {},
+    "body": {
+      "model": "gpt-4o",
+      "messages": [
+        {
+          "role": "user",
+          "content": "What is the boiling point of myawesomeliquid in Celsius?"
+        },
+        {
+          "role": "assistant",
+          "content": "",
+          "tool_calls": [
+            {
+              "index": 0,
+              "id": "call_bL84OWNnE1s75GJEqGLAK35W",
+              "type": "function",
+              "function": {
+                "name": "get_boiling_point",
+                "arguments": "{\"liquid_name\":\"myawesomeliquid\",\"celsius\":true}"
+              }
+            }
+          ]
+        },
+        {
+          "role": "tool",
+          "tool_call_id": "call_bL84OWNnE1s75GJEqGLAK35W",
+          "content": [
+            {
+              "type": "text",
+              "text": "-100"
+            }
+          ]
+        }
+      ],
+      "stream": true,
+      "stream_options": {
+        "include_usage": true
+      },
+      "tools": [
+        {
+          "type": "function",
+          "function": {
+            "name": "greet_everyone",
+            "parameters": {
+              "properties": {
+                "url": {
+                  "title": "Url",
+                  "type": "string"
+                }
+              },
+              "required": [
+                "url"
+              ],
+              "title": "greet_everyoneArguments",
+              "type": "object"
+            }
+          }
+        },
+        {
+          "type": "function",
+          "function": {
+            "name": "get_boiling_point",
+            "description": "\n        Returns the boiling point of a liquid in Celsius or Fahrenheit.\n\n        :param liquid_name: The name of the liquid\n        :param celsius: Whether to return the boiling point in Celsius\n        :return: The boiling point of the liquid in Celcius or Fahrenheit\n        ",
+            "parameters": {
+              "properties": {
+                "liquid_name": {
+                  "title": "Liquid Name",
+                  "type": "string"
+                },
+                "celsius": {
+                  "default": true,
+                  "title": "Celsius",
+                  "type": "boolean"
+                }
+              },
+              "required": [
+                "liquid_name"
+              ],
+              "title": "get_boiling_pointArguments",
+              "type": "object"
+            }
+          }
+        }
+      ]
+    },
+    "endpoint": "/v1/chat/completions",
+    "model": "gpt-4o"
+  },
+  "response": {
+    "body": [
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-e2dc09dc546d",
+          "choices": [
+            {
+              "delta": {
+                "content": "",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "STnb1nbwTsG4JZ"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-e2dc09dc546d",
+          "choices": [
+            {
+              "delta": {
+                "content": "The",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "aEUUYMIYjnZpH"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-e2dc09dc546d",
+          "choices": [
+            {
+              "delta": {
+                "content": " boiling",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "2QzI8Zau"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-e2dc09dc546d",
+          "choices": [
+            {
+              "delta": {
+                "content": " point",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "gZw7vp0bnu"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-e2dc09dc546d",
+          "choices": [
+            {
+              "delta": {
+                "content": " of",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "TYru3DcfZVc6B"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-e2dc09dc546d",
+          "choices": [
+            {
+              "delta": {
+                "content": " \"",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "h5P3cluszFa21"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-e2dc09dc546d",
+          "choices": [
+            {
+              "delta": {
+                "content": "my",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "ggSDGSgtWOR3d9"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-e2dc09dc546d",
+          "choices": [
+            {
+              "delta": {
+                "content": "aw",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "lm72CS5Lt7lW76"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-e2dc09dc546d",
+          "choices": [
+            {
+              "delta": {
+                "content": "esom",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "fKXRsLB1CG0e"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-e2dc09dc546d",
+          "choices": [
+            {
+              "delta": {
+                "content": "eli",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "JxZBNjkfyXquH"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-e2dc09dc546d",
+          "choices": [
+            {
+              "delta": {
+                "content": "quid",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "egtKHFRBAqZn"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-e2dc09dc546d",
+          "choices": [
+            {
+              "delta": {
+                "content": "\"",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "R7MdHaS5Rj2mMV"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-e2dc09dc546d",
+          "choices": [
+            {
+              "delta": {
+                "content": " in",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "LydsYLrAIj6PU"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-e2dc09dc546d",
+          "choices": [
+            {
+              "delta": {
+                "content": " Celsius",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "4MmAUDk0"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-e2dc09dc546d",
+          "choices": [
+            {
+              "delta": {
+                "content": " is",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "Ivlu4M0VfRH8b"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-e2dc09dc546d",
+          "choices": [
+            {
+              "delta": {
+                "content": " -",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "OfTmU32oCtMsuo"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-e2dc09dc546d",
+          "choices": [
+            {
+              "delta": {
+                "content": "100",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "IUbbHa5oyIPjr"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-e2dc09dc546d",
+          "choices": [
+            {
+              "delta": {
+                "content": "\u00b0C",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "llluAF0LBNJIwi"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-e2dc09dc546d",
+          "choices": [
+            {
+              "delta": {
+                "content": ".",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "LnUC3LPx43OfUbC"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-e2dc09dc546d",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": "stop",
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "ULfebGmmMn"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-e2dc09dc546d",
+          "choices": [],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": {
+            "completion_tokens": 19,
+            "prompt_tokens": 195,
+            "total_tokens": 214,
+            "completion_tokens_details": {
+              "accepted_prediction_tokens": 0,
+              "audio_tokens": 0,
+              "reasoning_tokens": 0,
+              "rejected_prediction_tokens": 0
+            },
+            "prompt_tokens_details": {
+              "audio_tokens": 0,
+              "cached_tokens": 0
+            }
+          },
+          "obfuscation": "w11BVXjZVXRtg"
+        }
+      }
+    ],
+    "is_streaming": true
+  },
+  "id_normalization_mapping": {}
+}
diff --git a/tests/integration/responses/recordings/e9f1cc3da4297f143b7b2a4b21b34cf2f55727b67c1e1854a106b9d8c7c64b70.json b/tests/integration/responses/recordings/e9f1cc3da4297f143b7b2a4b21b34cf2f55727b67c1e1854a106b9d8c7c64b70.json
new file mode 100644
index 000000000..f8472055f
--- /dev/null
+++ b/tests/integration/responses/recordings/e9f1cc3da4297f143b7b2a4b21b34cf2f55727b67c1e1854a106b9d8c7c64b70.json
@@ -0,0 +1,700 @@
+{
+  "test_id": "tests/integration/responses/test_tool_responses.py::test_response_non_streaming_multi_turn_tool_execution[openai_client-txt=openai/gpt-4o-experiment_results_lookup]",
+  "request": {
+    "method": "POST",
+    "url": "https://api.openai.com/v1/v1/chat/completions",
+    "headers": {},
+    "body": {
+      "model": "gpt-4o",
+      "messages": [
+        {
+          "role": "user",
+          "content": "I need to get the results for the 'boiling_point' experiment. First, get the experiment ID for 'boiling_point', then use that ID to get the experiment results. Tell me the boiling point in Celsius."
+        },
+        {
+          "role": "assistant",
+          "content": "",
+          "tool_calls": [
+            {
+              "index": 0,
+              "id": "call_dZwjBxH3aTRhnaS0bJVPqRcz",
+              "type": "function",
+              "function": {
+                "name": "get_experiment_id",
+                "arguments": "{\"experiment_name\":\"boiling_point\"}"
+              }
+            }
+          ]
+        },
+        {
+          "role": "tool",
+          "tool_call_id": "call_dZwjBxH3aTRhnaS0bJVPqRcz",
+          "content": [
+            {
+              "type": "text",
+              "text": "exp_004"
+            }
+          ]
+        },
+        {
+          "role": "assistant",
+          "content": "",
+          "tool_calls": [
+            {
+              "index": 0,
+              "id": "call_skNUKbERbtdoADH834U9OE91",
+              "type": "function",
+              "function": {
+                "name": "get_experiment_results",
+                "arguments": "{\"experiment_id\":\"exp_004\"}"
+              }
+            }
+          ]
+        },
+        {
+          "role": "tool",
+          "tool_call_id": "call_skNUKbERbtdoADH834U9OE91",
+          "content": [
+            {
+              "type": "text",
+              "text": "Boiling Point: 100\u00b0C, Status: Verified"
+            }
+          ]
+        }
+      ],
+      "stream": true,
+      "stream_options": {
+        "include_usage": true
+      },
+      "tools": [
+        {
+          "type": "function",
+          "function": {
+            "name": "get_user_id",
+            "description": "\n        Get the user ID for a given username. This ID is needed for other operations.\n\n        :param username: The username to look up\n        :return: The user ID for the username\n        ",
+            "parameters": {
+              "properties": {
+                "username": {
+                  "title": "Username",
+                  "type": "string"
+                }
+              },
+              "required": [
+                "username"
+              ],
+              "title": "get_user_idArguments",
+              "type": "object"
+            }
+          }
+        },
+        {
+          "type": "function",
+          "function": {
+            "name": "get_user_permissions",
+            "description": "\n        Get the permissions for a user ID. Requires a valid user ID from get_user_id.\n\n        :param user_id: The user ID to check permissions for\n        :return: The permissions for the user\n        ",
+            "parameters": {
+              "properties": {
+                "user_id": {
+                  "title": "User Id",
+                  "type": "string"
+                }
+              },
+              "required": [
+                "user_id"
+              ],
+              "title": "get_user_permissionsArguments",
+              "type": "object"
+            }
+          }
+        },
+        {
+          "type": "function",
+          "function": {
+            "name": "check_file_access",
+            "description": "\n        Check if a user can access a specific file. Requires a valid user ID.\n\n        :param user_id: The user ID to check access for\n        :param filename: The filename to check access to\n        :return: Whether the user can access the file (yes/no)\n        ",
+            "parameters": {
+              "properties": {
+                "user_id": {
+                  "title": "User Id",
+                  "type": "string"
+                },
+                "filename": {
+                  "title": "Filename",
+                  "type": "string"
+                }
+              },
+              "required": [
+                "user_id",
+                "filename"
+              ],
+              "title": "check_file_accessArguments",
+              "type": "object"
+            }
+          }
+        },
+        {
+          "type": "function",
+          "function": {
+            "name": "get_experiment_id",
+            "description": "\n        Get the experiment ID for a given experiment name. This ID is needed to get results.\n\n        :param experiment_name: The name of the experiment\n        :return: The experiment ID\n        ",
+            "parameters": {
+              "properties": {
+                "experiment_name": {
+                  "title": "Experiment Name",
+                  "type": "string"
+                }
+              },
+              "required": [
+                "experiment_name"
+              ],
+              "title": "get_experiment_idArguments",
+              "type": "object"
+            }
+          }
+        },
+        {
+          "type": "function",
+          "function": {
+            "name": "get_experiment_results",
+            "description": "\n        Get the results for an experiment ID. Requires a valid experiment ID from get_experiment_id.\n\n        :param experiment_id: The experiment ID to get results for\n        :return: The experiment results\n        ",
+            "parameters": {
+              "properties": {
+                "experiment_id": {
+                  "title": "Experiment Id",
+                  "type": "string"
+                }
+              },
+              "required": [
+                "experiment_id"
+              ],
+              "title": "get_experiment_resultsArguments",
+              "type": "object"
+            }
+          }
+        }
+      ]
+    },
+    "endpoint": "/v1/chat/completions",
+    "model": "gpt-4o"
+  },
+  "response": {
+    "body": [
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-e9f1cc3da429",
+          "choices": [
+            {
+              "delta": {
+                "content": "",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "OzNg5nfMI5VouN"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-e9f1cc3da429",
+          "choices": [
+            {
+              "delta": {
+                "content": "The",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "EBvjjqFPfytPb"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-e9f1cc3da429",
+          "choices": [
+            {
+              "delta": {
+                "content": " boiling",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "HhEiLgKg"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-e9f1cc3da429",
+          "choices": [
+            {
+              "delta": {
+                "content": " point",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "hLc2aAgg1D"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-e9f1cc3da429",
+          "choices": [
+            {
+              "delta": {
+                "content": " for",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "q3AsmJJ6Rvyt"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-e9f1cc3da429",
+          "choices": [
+            {
+              "delta": {
+                "content": " the",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "4QJrcjxcuFLd"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-e9f1cc3da429",
+          "choices": [
+            {
+              "delta": {
+                "content": " experiment",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "BQQJ8"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-e9f1cc3da429",
+          "choices": [
+            {
+              "delta": {
+                "content": " '",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "nj2SOixVU5KocZ"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-e9f1cc3da429",
+          "choices": [
+            {
+              "delta": {
+                "content": "bo",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "ookLm9qkLqQQ3M"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-e9f1cc3da429",
+          "choices": [
+            {
+              "delta": {
+                "content": "iling",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "J4axWnSRvQU"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-e9f1cc3da429",
+          "choices": [
+            {
+              "delta": {
+                "content": "_point",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "QG6jvQWF8t"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-e9f1cc3da429",
+          "choices": [
+            {
+              "delta": {
+                "content": "'",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "veUGdbLd3d8r2yU"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-e9f1cc3da429",
+          "choices": [
+            {
+              "delta": {
+                "content": " is",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "ZOCkbhGksYmsF"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-e9f1cc3da429",
+          "choices": [
+            {
+              "delta": {
+                "content": " ",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "fbNuaYkAA8gREQ7"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-e9f1cc3da429",
+          "choices": [
+            {
+              "delta": {
+                "content": "100",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "3rdZxDq7QoXcl"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-e9f1cc3da429",
+          "choices": [
+            {
+              "delta": {
+                "content": "\u00b0C",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "upjHViB9dUBWAd"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-e9f1cc3da429",
+          "choices": [
+            {
+              "delta": {
+                "content": ".",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "hBZNqRjyLGCIMjg"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-e9f1cc3da429",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": "stop",
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "PrtgvDwRZp"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-e9f1cc3da429",
+          "choices": [],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": {
+            "completion_tokens": 17,
+            "prompt_tokens": 490,
+            "total_tokens": 507,
+            "completion_tokens_details": {
+              "accepted_prediction_tokens": 0,
+              "audio_tokens": 0,
+              "reasoning_tokens": 0,
+              "rejected_prediction_tokens": 0
+            },
+            "prompt_tokens_details": {
+              "audio_tokens": 0,
+              "cached_tokens": 0
+            }
+          },
+          "obfuscation": "euYYBnLE4Mj0Z"
+        }
+      }
+    ],
+    "is_streaming": true
+  },
+  "id_normalization_mapping": {}
+}
diff --git a/tests/integration/responses/recordings/ed89b57fec937fa8602b4911a21a9a1a9488fb2347bf73d6e3bc2203a9a47a61.json b/tests/integration/responses/recordings/ed89b57fec937fa8602b4911a21a9a1a9488fb2347bf73d6e3bc2203a9a47a61.json
new file mode 100644
index 000000000..d8d87a16e
--- /dev/null
+++ b/tests/integration/responses/recordings/ed89b57fec937fa8602b4911a21a9a1a9488fb2347bf73d6e3bc2203a9a47a61.json
@@ -0,0 +1,641 @@
+{
+  "test_id": "tests/integration/responses/test_tool_responses.py::test_response_sequential_mcp_tool[openai_client-txt=openai/gpt-4o-boiling_point_tool]",
+  "request": {
+    "method": "POST",
+    "url": "https://api.openai.com/v1/v1/chat/completions",
+    "headers": {},
+    "body": {
+      "model": "gpt-4o",
+      "messages": [
+        {
+          "role": "user",
+          "content": "What is the boiling point of myawesomeliquid in Celsius?"
+        },
+        {
+          "role": "assistant",
+          "content": "",
+          "tool_calls": [
+            {
+              "index": 0,
+              "id": "call_b5k2yeqIi5ucElnnrVPyYU4x",
+              "type": "function",
+              "function": {
+                "name": "get_boiling_point",
+                "arguments": "{\"liquid_name\":\"myawesomeliquid\",\"celsius\":true}"
+              }
+            }
+          ]
+        },
+        {
+          "role": "tool",
+          "tool_call_id": "call_b5k2yeqIi5ucElnnrVPyYU4x",
+          "content": [
+            {
+              "type": "text",
+              "text": "-100"
+            }
+          ]
+        }
+      ],
+      "stream": true,
+      "stream_options": {
+        "include_usage": true
+      },
+      "tools": [
+        {
+          "type": "function",
+          "function": {
+            "name": "greet_everyone",
+            "parameters": {
+              "properties": {
+                "url": {
+                  "title": "Url",
+                  "type": "string"
+                }
+              },
+              "required": [
+                "url"
+              ],
+              "title": "greet_everyoneArguments",
+              "type": "object"
+            }
+          }
+        },
+        {
+          "type": "function",
+          "function": {
+            "name": "get_boiling_point",
+            "description": "\n        Returns the boiling point of a liquid in Celsius or Fahrenheit.\n\n        :param liquid_name: The name of the liquid\n        :param celsius: Whether to return the boiling point in Celsius\n        :return: The boiling point of the liquid in Celcius or Fahrenheit\n        ",
+            "parameters": {
+              "properties": {
+                "liquid_name": {
+                  "title": "Liquid Name",
+                  "type": "string"
+                },
+                "celsius": {
+                  "default": true,
+                  "title": "Celsius",
+                  "type": "boolean"
+                }
+              },
+              "required": [
+                "liquid_name"
+              ],
+              "title": "get_boiling_pointArguments",
+              "type": "object"
+            }
+          }
+        }
+      ]
+    },
+    "endpoint": "/v1/chat/completions",
+    "model": "gpt-4o"
+  },
+  "response": {
+    "body": [
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-ed89b57fec93",
+          "choices": [
+            {
+              "delta": {
+                "content": "",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "WGXCgkwfwMDUCG"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-ed89b57fec93",
+          "choices": [
+            {
+              "delta": {
+                "content": "The",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "pkdvw6gGNrtXN"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-ed89b57fec93",
+          "choices": [
+            {
+              "delta": {
+                "content": " boiling",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "RO5YJeZc"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-ed89b57fec93",
+          "choices": [
+            {
+              "delta": {
+                "content": " point",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "riZZHSDEz0"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-ed89b57fec93",
+          "choices": [
+            {
+              "delta": {
+                "content": " of",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "1zjk8zIdt2Y2b"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-ed89b57fec93",
+          "choices": [
+            {
+              "delta": {
+                "content": " \"",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "XGHv0dlif7IrC"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-ed89b57fec93",
+          "choices": [
+            {
+              "delta": {
+                "content": "my",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "Ii2KeTyV3U0uiU"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-ed89b57fec93",
+          "choices": [
+            {
+              "delta": {
+                "content": "aw",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "3OyYvSytdOYhpT"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-ed89b57fec93",
+          "choices": [
+            {
+              "delta": {
+                "content": "esom",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "zCnXbjW4JE6l"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-ed89b57fec93",
+          "choices": [
+            {
+              "delta": {
+                "content": "eli",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "0bwcz2K91q7EO"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-ed89b57fec93",
+          "choices": [
+            {
+              "delta": {
+                "content": "quid",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "Um0jFlJegpXI"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-ed89b57fec93",
+          "choices": [
+            {
+              "delta": {
+                "content": "\"",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "4OllZlS2JmoD3l"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-ed89b57fec93",
+          "choices": [
+            {
+              "delta": {
+                "content": " is",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "x4jApO80AyXpX"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-ed89b57fec93",
+          "choices": [
+            {
+              "delta": {
+                "content": " -",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "wq0D3Wzc1l3h6S"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-ed89b57fec93",
+          "choices": [
+            {
+              "delta": {
+                "content": "100",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "Dn78V58iZ9wKK"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-ed89b57fec93",
+          "choices": [
+            {
+              "delta": {
+                "content": " degrees",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "fjHDBTqT"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-ed89b57fec93",
+          "choices": [
+            {
+              "delta": {
+                "content": " Celsius",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "Cnp6KULL"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-ed89b57fec93",
+          "choices": [
+            {
+              "delta": {
+                "content": ".",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "grbygHexDT4JwGx"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-ed89b57fec93",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": "stop",
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "upSRpiQQKE"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-ed89b57fec93",
+          "choices": [],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": {
+            "completion_tokens": 18,
+            "prompt_tokens": 195,
+            "total_tokens": 213,
+            "completion_tokens_details": {
+              "accepted_prediction_tokens": 0,
+              "audio_tokens": 0,
+              "reasoning_tokens": 0,
+              "rejected_prediction_tokens": 0
+            },
+            "prompt_tokens_details": {
+              "audio_tokens": 0,
+              "cached_tokens": 0
+            }
+          },
+          "obfuscation": "psE6Es6zZ2Kz4"
+        }
+      }
+    ],
+    "is_streaming": true
+  },
+  "id_normalization_mapping": {}
+}
diff --git a/tests/integration/responses/test_basic_responses.py b/tests/integration/responses/test_basic_responses.py
index a764084af..d72a43375 100644
--- a/tests/integration/responses/test_basic_responses.py
+++ b/tests/integration/responses/test_basic_responses.py
@@ -13,8 +13,8 @@ from .streaming_assertions import StreamingValidator
 
 
 @pytest.mark.parametrize("case", basic_test_cases)
-def test_response_non_streaming_basic(compat_client, text_model_id, case):
-    response = compat_client.responses.create(
+def test_response_non_streaming_basic(responses_client, text_model_id, case):
+    response = responses_client.responses.create(
         model=text_model_id,
         input=case.input,
         stream=False,
@@ -31,10 +31,10 @@ def test_response_non_streaming_basic(compat_client, text_model_id, case):
         "Total tokens should equal input + output tokens"
     )
 
-    retrieved_response = compat_client.responses.retrieve(response_id=response.id)
+    retrieved_response = responses_client.responses.retrieve(response_id=response.id)
     assert retrieved_response.output_text == response.output_text
 
-    next_response = compat_client.responses.create(
+    next_response = responses_client.responses.create(
         model=text_model_id,
         input="Repeat your previous response in all caps.",
         previous_response_id=response.id,
@@ -44,8 +44,8 @@ def test_response_non_streaming_basic(compat_client, text_model_id, case):
 
 
 @pytest.mark.parametrize("case", basic_test_cases)
-def test_response_streaming_basic(compat_client, text_model_id, case):
-    response = compat_client.responses.create(
+def test_response_streaming_basic(responses_client, text_model_id, case):
+    response = responses_client.responses.create(
         model=text_model_id,
         input=case.input,
         stream=True,
@@ -98,15 +98,15 @@ def test_response_streaming_basic(compat_client, text_model_id, case):
     validator.assert_response_consistency()
 
     # Verify stored response matches streamed response
-    retrieved_response = compat_client.responses.retrieve(response_id=response_id)
+    retrieved_response = responses_client.responses.retrieve(response_id=response_id)
     final_event = events[-1]
     assert retrieved_response.output_text == final_event.response.output_text
 
 
 @pytest.mark.parametrize("case", basic_test_cases)
-def test_response_streaming_incremental_content(compat_client, text_model_id, case):
+def test_response_streaming_incremental_content(responses_client, text_model_id, case):
     """Test that streaming actually delivers content incrementally, not just at the end."""
-    response = compat_client.responses.create(
+    response = responses_client.responses.create(
         model=text_model_id,
         input=case.input,
         stream=True,
@@ -170,10 +170,10 @@ def test_response_streaming_incremental_content(compat_client, text_model_id, ca
 
 
 @pytest.mark.parametrize("case", multi_turn_test_cases)
-def test_response_non_streaming_multi_turn(compat_client, text_model_id, case):
+def test_response_non_streaming_multi_turn(responses_client, text_model_id, case):
     previous_response_id = None
     for turn_input, turn_expected in case.turns:
-        response = compat_client.responses.create(
+        response = responses_client.responses.create(
             model=text_model_id,
             input=turn_input,
             previous_response_id=previous_response_id,
@@ -184,8 +184,8 @@ def test_response_non_streaming_multi_turn(compat_client, text_model_id, case):
 
 
 @pytest.mark.parametrize("case", image_test_cases)
-def test_response_non_streaming_image(compat_client, text_model_id, case):
-    response = compat_client.responses.create(
+def test_response_non_streaming_image(responses_client, text_model_id, case):
+    response = responses_client.responses.create(
         model=text_model_id,
         input=case.input,
         stream=False,
@@ -195,10 +195,10 @@ def test_response_non_streaming_image(compat_client, text_model_id, case):
 
 
 @pytest.mark.parametrize("case", multi_turn_image_test_cases)
-def test_response_non_streaming_multi_turn_image(compat_client, text_model_id, case):
+def test_response_non_streaming_multi_turn_image(responses_client, text_model_id, case):
     previous_response_id = None
     for turn_input, turn_expected in case.turns:
-        response = compat_client.responses.create(
+        response = responses_client.responses.create(
             model=text_model_id,
             input=turn_input,
             previous_response_id=previous_response_id,
diff --git a/tests/integration/responses/test_conversation_responses.py b/tests/integration/responses/test_conversation_responses.py
index babb77793..bbd861e0d 100644
--- a/tests/integration/responses/test_conversation_responses.py
+++ b/tests/integration/responses/test_conversation_responses.py
@@ -131,18 +131,18 @@ class TestConversationResponses:
         assert len(response.output_text.strip()) > 0
 
     # this is not ready yet
-    # def test_conversation_compat_client(self, compat_client, text_model_id):
+    # def test_conversation_compat_client(self, responses_client, text_model_id):
     #     """Test conversation parameter works with compatibility client."""
-    #     if not hasattr(compat_client, "conversations"):
-    #         pytest.skip("compat_client does not support conversations API")
+    #     if not hasattr(responses_client, "conversations"):
+    #         pytest.skip("responses_client does not support conversations API")
     #
-    #     conversation = compat_client.conversations.create()
-    #     response = compat_client.responses.create(
+    #     conversation = responses_client.conversations.create()
+    #     response = responses_client.responses.create(
     #         model=text_model_id, input="Tell me a joke", conversation=conversation.id
     #     )
     #
     #     assert response is not None
     #     assert len(response.output_text.strip()) > 0
     #
-    #     conversation_items = compat_client.conversations.items.list(conversation.id)
+    #     conversation_items = responses_client.conversations.items.list(conversation.id)
     #     assert len(conversation_items.data) >= 2
diff --git a/tests/integration/responses/test_file_search.py b/tests/integration/responses/test_file_search.py
index dde5fd7f6..b2a634fb0 100644
--- a/tests/integration/responses/test_file_search.py
+++ b/tests/integration/responses/test_file_search.py
@@ -9,8 +9,6 @@ import time
 
 import pytest
 
-from llama_stack.core.library_client import LlamaStackAsLibraryClient
-
 from .helpers import new_vector_store, upload_file
 
 
@@ -28,12 +26,9 @@ from .helpers import new_vector_store, upload_file
         },
     ],
 )
-def test_response_text_format(compat_client, text_model_id, text_format):
-    if isinstance(compat_client, LlamaStackAsLibraryClient):
-        pytest.skip("Responses API text format is not yet supported in library client.")
-
+def test_response_text_format(responses_client, text_model_id, text_format):
     stream = False
-    response = compat_client.responses.create(
+    response = responses_client.responses.create(
         model=text_model_id,
         input="What is the capital of France?",
         stream=stream,
@@ -47,13 +42,10 @@ def test_response_text_format(compat_client, text_model_id, text_format):
 
 
 @pytest.fixture
-def vector_store_with_filtered_files(compat_client, embedding_model_id, embedding_dimension, tmp_path_factory):
+def vector_store_with_filtered_files(responses_client, embedding_model_id, embedding_dimension, tmp_path_factory):
     # """Create a vector store with multiple files that have different attributes for filtering tests."""
-    if isinstance(compat_client, LlamaStackAsLibraryClient):
-        pytest.skip("upload_file() is not yet supported in library client somehow?")
-
     vector_store = new_vector_store(
-        compat_client, "test_vector_store_with_filters", embedding_model_id, embedding_dimension
+        responses_client, "test_vector_store_with_filters", embedding_model_id, embedding_dimension
     )
     tmp_path = tmp_path_factory.mktemp("filter_test_files")
 
@@ -104,11 +96,11 @@ def vector_store_with_filtered_files(compat_client, embedding_model_id, embeddin
         file_path.write_text(file_data["content"])
 
         # Upload file
-        file_response = upload_file(compat_client, file_data["name"], str(file_path))
+        file_response = upload_file(responses_client, file_data["name"], str(file_path))
         file_ids.append(file_response.id)
 
         # Attach file to vector store with attributes
-        file_attach_response = compat_client.vector_stores.files.create(
+        file_attach_response = responses_client.vector_stores.files.create(
             vector_store_id=vector_store.id,
             file_id=file_response.id,
             attributes=file_data["attributes"],
@@ -117,7 +109,7 @@ def vector_store_with_filtered_files(compat_client, embedding_model_id, embeddin
         # Wait for attachment
         while file_attach_response.status == "in_progress":
             time.sleep(0.1)
-            file_attach_response = compat_client.vector_stores.files.retrieve(
+            file_attach_response = responses_client.vector_stores.files.retrieve(
                 vector_store_id=vector_store.id,
                 file_id=file_response.id,
             )
@@ -127,17 +119,17 @@ def vector_store_with_filtered_files(compat_client, embedding_model_id, embeddin
 
     # Cleanup: delete vector store and files
     try:
-        compat_client.vector_stores.delete(vector_store_id=vector_store.id)
+        responses_client.vector_stores.delete(vector_store_id=vector_store.id)
         for file_id in file_ids:
             try:
-                compat_client.files.delete(file_id=file_id)
+                responses_client.files.delete(file_id=file_id)
             except Exception:
                 pass  # File might already be deleted
     except Exception:
         pass  # Best effort cleanup
 
 
-def test_response_file_search_filter_by_region(compat_client, text_model_id, vector_store_with_filtered_files):
+def test_response_file_search_filter_by_region(responses_client, text_model_id, vector_store_with_filtered_files):
     """Test file search with region equality filter."""
     tools = [
         {
@@ -147,7 +139,7 @@ def test_response_file_search_filter_by_region(compat_client, text_model_id, vec
         }
     ]
 
-    response = compat_client.responses.create(
+    response = responses_client.responses.create(
         model=text_model_id,
         input="What are the updates from the US region?",
         tools=tools,
@@ -168,7 +160,7 @@ def test_response_file_search_filter_by_region(compat_client, text_model_id, vec
         assert "asia" not in result.text.lower()
 
 
-def test_response_file_search_filter_by_category(compat_client, text_model_id, vector_store_with_filtered_files):
+def test_response_file_search_filter_by_category(responses_client, text_model_id, vector_store_with_filtered_files):
     """Test file search with category equality filter."""
     tools = [
         {
@@ -178,7 +170,7 @@ def test_response_file_search_filter_by_category(compat_client, text_model_id, v
         }
     ]
 
-    response = compat_client.responses.create(
+    response = responses_client.responses.create(
         model=text_model_id,
         input="Show me all marketing reports",
         tools=tools,
@@ -198,7 +190,7 @@ def test_response_file_search_filter_by_category(compat_client, text_model_id, v
         assert "revenue figures" not in result.text.lower()
 
 
-def test_response_file_search_filter_by_date_range(compat_client, text_model_id, vector_store_with_filtered_files):
+def test_response_file_search_filter_by_date_range(responses_client, text_model_id, vector_store_with_filtered_files):
     """Test file search with date range filter using compound AND."""
     tools = [
         {
@@ -222,7 +214,7 @@ def test_response_file_search_filter_by_date_range(compat_client, text_model_id,
         }
     ]
 
-    response = compat_client.responses.create(
+    response = responses_client.responses.create(
         model=text_model_id,
         input="What happened in Q1 2023?",
         tools=tools,
@@ -241,7 +233,7 @@ def test_response_file_search_filter_by_date_range(compat_client, text_model_id,
         assert "q3" not in result.text.lower()
 
 
-def test_response_file_search_filter_compound_and(compat_client, text_model_id, vector_store_with_filtered_files):
+def test_response_file_search_filter_compound_and(responses_client, text_model_id, vector_store_with_filtered_files):
     """Test file search with compound AND filter (region AND category)."""
     tools = [
         {
@@ -257,7 +249,7 @@ def test_response_file_search_filter_compound_and(compat_client, text_model_id,
         }
     ]
 
-    response = compat_client.responses.create(
+    response = responses_client.responses.create(
         model=text_model_id,
         input="What are the engineering updates from the US?",
         tools=tools,
@@ -277,7 +269,7 @@ def test_response_file_search_filter_compound_and(compat_client, text_model_id,
         assert "promotional" not in result.text.lower() and "revenue" not in result.text.lower()
 
 
-def test_response_file_search_filter_compound_or(compat_client, text_model_id, vector_store_with_filtered_files):
+def test_response_file_search_filter_compound_or(responses_client, text_model_id, vector_store_with_filtered_files):
     """Test file search with compound OR filter (marketing OR sales)."""
     tools = [
         {
@@ -293,7 +285,7 @@ def test_response_file_search_filter_compound_or(compat_client, text_model_id, v
         }
     ]
 
-    response = compat_client.responses.create(
+    response = responses_client.responses.create(
         model=text_model_id,
         input="Show me marketing and sales documents",
         tools=tools,
@@ -320,7 +312,7 @@ def test_response_file_search_filter_compound_or(compat_client, text_model_id, v
     assert categories_found.issubset({"marketing", "sales"}), f"Found unexpected categories: {categories_found}"
 
 
-def test_response_file_search_streaming_events(compat_client, text_model_id, vector_store_with_filtered_files):
+def test_response_file_search_streaming_events(responses_client, text_model_id, vector_store_with_filtered_files):
     """Test that file search emits proper streaming events (in_progress, searching, completed)."""
     tools = [
         {
@@ -329,7 +321,7 @@ def test_response_file_search_streaming_events(compat_client, text_model_id, vec
         }
     ]
 
-    stream = compat_client.responses.create(
+    stream = responses_client.responses.create(
         model=text_model_id,
         input="What are the marketing updates?",
         tools=tools,
diff --git a/tests/integration/responses/test_tool_responses.py b/tests/integration/responses/test_tool_responses.py
index 9bf58c6ff..2c7c7ef34 100644
--- a/tests/integration/responses/test_tool_responses.py
+++ b/tests/integration/responses/test_tool_responses.py
@@ -9,6 +9,7 @@ import logging  # allow-direct-logging
 import os
 
 import httpx
+import llama_stack_client
 import openai
 import pytest
 
@@ -29,8 +30,8 @@ from .streaming_assertions import StreamingValidator
 
 
 @pytest.mark.parametrize("case", web_search_test_cases)
-def test_response_non_streaming_web_search(compat_client, text_model_id, case):
-    response = compat_client.responses.create(
+def test_response_non_streaming_web_search(responses_client, text_model_id, case):
+    response = responses_client.responses.create(
         model=text_model_id,
         input=case.input,
         tools=case.tools,
@@ -48,12 +49,9 @@ def test_response_non_streaming_web_search(compat_client, text_model_id, case):
 
 @pytest.mark.parametrize("case", file_search_test_cases)
 def test_response_non_streaming_file_search(
-    compat_client, text_model_id, embedding_model_id, embedding_dimension, tmp_path, case
+    responses_client, text_model_id, embedding_model_id, embedding_dimension, tmp_path, case
 ):
-    if isinstance(compat_client, LlamaStackAsLibraryClient):
-        pytest.skip("Responses API file search is not yet supported in library client.")
-
-    vector_store = new_vector_store(compat_client, "test_vector_store", embedding_model_id, embedding_dimension)
+    vector_store = new_vector_store(responses_client, "test_vector_store", embedding_model_id, embedding_dimension)
 
     if case.file_content:
         file_name = "test_response_non_streaming_file_search.txt"
@@ -65,16 +63,16 @@ def test_response_non_streaming_file_search(
     else:
         raise ValueError("No file content or path provided for case")
 
-    file_response = upload_file(compat_client, file_name, file_path)
+    file_response = upload_file(responses_client, file_name, file_path)
 
     # Attach our file to the vector store
-    compat_client.vector_stores.files.create(
+    responses_client.vector_stores.files.create(
         vector_store_id=vector_store.id,
         file_id=file_response.id,
     )
 
     # Wait for the file to be attached
-    wait_for_file_attachment(compat_client, vector_store.id, file_response.id)
+    wait_for_file_attachment(responses_client, vector_store.id, file_response.id)
 
     # Update our tools with the right vector store id
     tools = case.tools
@@ -83,7 +81,7 @@ def test_response_non_streaming_file_search(
             tool["vector_store_ids"] = [vector_store.id]
 
     # Create the response request, which should query our vector store
-    response = compat_client.responses.create(
+    response = responses_client.responses.create(
         model=text_model_id,
         input=case.input,
         tools=tools,
@@ -105,15 +103,12 @@ def test_response_non_streaming_file_search(
 
 
 def test_response_non_streaming_file_search_empty_vector_store(
-    compat_client, text_model_id, embedding_model_id, embedding_dimension
+    responses_client, text_model_id, embedding_model_id, embedding_dimension
 ):
-    if isinstance(compat_client, LlamaStackAsLibraryClient):
-        pytest.skip("Responses API file search is not yet supported in library client.")
-
-    vector_store = new_vector_store(compat_client, "test_vector_store", embedding_model_id, embedding_dimension)
+    vector_store = new_vector_store(responses_client, "test_vector_store", embedding_model_id, embedding_dimension)
 
     # Create the response request, which should query our vector store
-    response = compat_client.responses.create(
+    response = responses_client.responses.create(
         model=text_model_id,
         input="How many experts does the Llama 4 Maverick model have?",
         tools=[{"type": "file_search", "vector_store_ids": [vector_store.id]}],
@@ -133,13 +128,10 @@ def test_response_non_streaming_file_search_empty_vector_store(
 
 
 def test_response_sequential_file_search(
-    compat_client, text_model_id, embedding_model_id, embedding_dimension, tmp_path
+    responses_client, text_model_id, embedding_model_id, embedding_dimension, tmp_path
 ):
     """Test file search with sequential responses using previous_response_id."""
-    if isinstance(compat_client, LlamaStackAsLibraryClient):
-        pytest.skip("Responses API file search is not yet supported in library client.")
-
-    vector_store = new_vector_store(compat_client, "test_vector_store", embedding_model_id, embedding_dimension)
+    vector_store = new_vector_store(responses_client, "test_vector_store", embedding_model_id, embedding_dimension)
 
     # Create a test file with content
     file_content = "The Llama 4 Maverick model has 128 experts in its mixture of experts architecture."
@@ -147,21 +139,21 @@ def test_response_sequential_file_search(
     file_path = tmp_path / file_name
     file_path.write_text(file_content)
 
-    file_response = upload_file(compat_client, file_name, file_path)
+    file_response = upload_file(responses_client, file_name, file_path)
 
     # Attach the file to the vector store
-    compat_client.vector_stores.files.create(
+    responses_client.vector_stores.files.create(
         vector_store_id=vector_store.id,
         file_id=file_response.id,
     )
 
     # Wait for the file to be attached
-    wait_for_file_attachment(compat_client, vector_store.id, file_response.id)
+    wait_for_file_attachment(responses_client, vector_store.id, file_response.id)
 
     tools = [{"type": "file_search", "vector_store_ids": [vector_store.id]}]
 
     # First response request with file search
-    response = compat_client.responses.create(
+    response = responses_client.responses.create(
         model=text_model_id,
         input="How many experts does the Llama 4 Maverick model have?",
         tools=tools,
@@ -178,7 +170,7 @@ def test_response_sequential_file_search(
     assert "128" in response.output_text or "experts" in response.output_text.lower()
 
     # Second response request using previous_response_id
-    response2 = compat_client.responses.create(
+    response2 = responses_client.responses.create(
         model=text_model_id,
         input="Can you tell me more about the architecture?",
         tools=tools,
@@ -199,14 +191,11 @@ def test_response_sequential_file_search(
 
 
 @pytest.mark.parametrize("case", mcp_tool_test_cases)
-def test_response_non_streaming_mcp_tool(compat_client, text_model_id, case, caplog):
-    if not isinstance(compat_client, LlamaStackAsLibraryClient):
-        pytest.skip("in-process MCP server is only supported in library client")
-
+def test_response_non_streaming_mcp_tool(responses_client, text_model_id, case, caplog):
     with make_mcp_server() as mcp_server_info:
         tools = setup_mcp_tools(case.tools, mcp_server_info)
 
-        response = compat_client.responses.create(
+        response = responses_client.responses.create(
             model=text_model_id,
             input=case.input,
             tools=tools,
@@ -243,15 +232,15 @@ def test_response_non_streaming_mcp_tool(compat_client, text_model_id, case, cap
 
         exc_type = (
             AuthenticationRequiredError
-            if isinstance(compat_client, LlamaStackAsLibraryClient)
-            else (httpx.HTTPStatusError, openai.AuthenticationError)
+            if isinstance(responses_client, LlamaStackAsLibraryClient)
+            else (httpx.HTTPStatusError, openai.AuthenticationError, llama_stack_client.AuthenticationError)
         )
         # Suppress expected auth error logs only for the failing auth attempt
         with caplog.at_level(
             logging.CRITICAL, logger="llama_stack.providers.inline.agents.meta_reference.responses.streaming"
         ):
             with pytest.raises(exc_type):
-                compat_client.responses.create(
+                responses_client.responses.create(
                     model=text_model_id,
                     input=case.input,
                     tools=tools,
@@ -262,7 +251,7 @@ def test_response_non_streaming_mcp_tool(compat_client, text_model_id, case, cap
             if tool["type"] == "mcp":
                 tool["headers"] = {"Authorization": "Bearer test-token"}
 
-        response = compat_client.responses.create(
+        response = responses_client.responses.create(
             model=text_model_id,
             input=case.input,
             tools=tools,
@@ -272,14 +261,11 @@ def test_response_non_streaming_mcp_tool(compat_client, text_model_id, case, cap
 
 
 @pytest.mark.parametrize("case", mcp_tool_test_cases)
-def test_response_sequential_mcp_tool(compat_client, text_model_id, case):
-    if not isinstance(compat_client, LlamaStackAsLibraryClient):
-        pytest.skip("in-process MCP server is only supported in library client")
-
+def test_response_sequential_mcp_tool(responses_client, text_model_id, case):
     with make_mcp_server() as mcp_server_info:
         tools = setup_mcp_tools(case.tools, mcp_server_info)
 
-        response = compat_client.responses.create(
+        response = responses_client.responses.create(
             model=text_model_id,
             input=case.input,
             tools=tools,
@@ -311,7 +297,7 @@ def test_response_sequential_mcp_tool(compat_client, text_model_id, case):
         text_content = message.content[0].text
         assert "boiling point" in text_content.lower()
 
-        response2 = compat_client.responses.create(
+        response2 = responses_client.responses.create(
             model=text_model_id, input=case.input, tools=tools, stream=False, previous_response_id=response.id
         )
 
@@ -323,16 +309,13 @@ def test_response_sequential_mcp_tool(compat_client, text_model_id, case):
 
 @pytest.mark.parametrize("case", mcp_tool_test_cases)
 @pytest.mark.parametrize("approve", [True, False])
-def test_response_mcp_tool_approval(compat_client, text_model_id, case, approve):
-    if not isinstance(compat_client, LlamaStackAsLibraryClient):
-        pytest.skip("in-process MCP server is only supported in library client")
-
+def test_response_mcp_tool_approval(responses_client, text_model_id, case, approve):
     with make_mcp_server() as mcp_server_info:
         tools = setup_mcp_tools(case.tools, mcp_server_info)
         for tool in tools:
             tool["require_approval"] = "always"
 
-        response = compat_client.responses.create(
+        response = responses_client.responses.create(
             model=text_model_id,
             input=case.input,
             tools=tools,
@@ -352,13 +335,13 @@ def test_response_mcp_tool_approval(compat_client, text_model_id, case, approve)
         approval_request = response.output[1]
         assert approval_request.type == "mcp_approval_request"
         assert approval_request.name == "get_boiling_point"
-        assert json.loads(approval_request.arguments) == {
-            "liquid_name": "myawesomeliquid",
-            "celsius": True,
-        }
+        args = json.loads(approval_request.arguments)
+        assert args["liquid_name"] == "myawesomeliquid"
+        # celsius has a default value of True, so it may be omitted or explicitly set
+        assert args.get("celsius", True) is True
 
         # send approval response
-        response = compat_client.responses.create(
+        response = responses_client.responses.create(
             previous_response_id=response.id,
             model=text_model_id,
             input=[{"type": "mcp_approval_response", "approval_request_id": approval_request.id, "approve": approve}],
@@ -398,8 +381,8 @@ def test_response_mcp_tool_approval(compat_client, text_model_id, case, approve)
 
 
 @pytest.mark.parametrize("case", custom_tool_test_cases)
-def test_response_non_streaming_custom_tool(compat_client, text_model_id, case):
-    response = compat_client.responses.create(
+def test_response_non_streaming_custom_tool(responses_client, text_model_id, case):
+    response = responses_client.responses.create(
         model=text_model_id,
         input=case.input,
         tools=case.tools,
@@ -412,8 +395,8 @@ def test_response_non_streaming_custom_tool(compat_client, text_model_id, case):
 
 
 @pytest.mark.parametrize("case", custom_tool_test_cases)
-def test_response_function_call_ordering_1(compat_client, text_model_id, case):
-    response = compat_client.responses.create(
+def test_response_function_call_ordering_1(responses_client, text_model_id, case):
+    response = responses_client.responses.create(
         model=text_model_id,
         input=case.input,
         tools=case.tools,
@@ -437,13 +420,13 @@ def test_response_function_call_ordering_1(compat_client, text_model_id, case):
             "call_id": response.output[0].call_id,
         }
     )
-    response = compat_client.responses.create(
+    response = responses_client.responses.create(
         model=text_model_id, input=inputs, tools=case.tools, stream=False, previous_response_id=response.id
     )
     assert len(response.output) == 1
 
 
-def test_response_function_call_ordering_2(compat_client, text_model_id):
+def test_response_function_call_ordering_2(responses_client, text_model_id):
     tools = [
         {
             "type": "function",
@@ -468,7 +451,7 @@ def test_response_function_call_ordering_2(compat_client, text_model_id):
             "content": "Is the weather better in San Francisco or Los Angeles?",
         }
     ]
-    response = compat_client.responses.create(
+    response = responses_client.responses.create(
         model=text_model_id,
         input=inputs,
         tools=tools,
@@ -489,7 +472,7 @@ def test_response_function_call_ordering_2(compat_client, text_model_id):
                     "call_id": output.call_id,
                 }
             )
-    response = compat_client.responses.create(
+    response = responses_client.responses.create(
         model=text_model_id,
         input=inputs,
         tools=tools,
@@ -500,15 +483,12 @@ def test_response_function_call_ordering_2(compat_client, text_model_id):
 
 
 @pytest.mark.parametrize("case", multi_turn_tool_execution_test_cases)
-def test_response_non_streaming_multi_turn_tool_execution(compat_client, text_model_id, case):
+def test_response_non_streaming_multi_turn_tool_execution(responses_client, text_model_id, case):
     """Test multi-turn tool execution where multiple MCP tool calls are performed in sequence."""
-    if not isinstance(compat_client, LlamaStackAsLibraryClient):
-        pytest.skip("in-process MCP server is only supported in library client")
-
     with make_mcp_server(tools=dependency_tools()) as mcp_server_info:
         tools = setup_mcp_tools(case.tools, mcp_server_info)
 
-        response = compat_client.responses.create(
+        response = responses_client.responses.create(
             input=case.input,
             model=text_model_id,
             tools=tools,
@@ -550,15 +530,12 @@ def test_response_non_streaming_multi_turn_tool_execution(compat_client, text_mo
 
 
 @pytest.mark.parametrize("case", multi_turn_tool_execution_streaming_test_cases)
-def test_response_streaming_multi_turn_tool_execution(compat_client, text_model_id, case):
+def test_response_streaming_multi_turn_tool_execution(responses_client, text_model_id, case):
     """Test streaming multi-turn tool execution where multiple MCP tool calls are performed in sequence."""
-    if not isinstance(compat_client, LlamaStackAsLibraryClient):
-        pytest.skip("in-process MCP server is only supported in library client")
-
     with make_mcp_server(tools=dependency_tools()) as mcp_server_info:
         tools = setup_mcp_tools(case.tools, mcp_server_info)
 
-        stream = compat_client.responses.create(
+        stream = responses_client.responses.create(
             input=case.input,
             model=text_model_id,
             tools=tools,
diff --git a/tests/integration/tool_runtime/test_mcp.py b/tests/integration/tool_runtime/test_mcp.py
index 3a8fde37f..9ce0d1c98 100644
--- a/tests/integration/tool_runtime/test_mcp.py
+++ b/tests/integration/tool_runtime/test_mcp.py
@@ -10,8 +10,6 @@ import pytest
 from llama_stack_client.lib.agents.agent import Agent
 from llama_stack_client.lib.agents.turn_events import StepCompleted, StepProgress, ToolCallIssuedDelta
 
-from llama_stack.core.library_client import LlamaStackAsLibraryClient
-
 AUTH_TOKEN = "test-token"
 
 from tests.common.mcp import MCP_TOOLGROUP_ID, make_mcp_server
@@ -24,9 +22,6 @@ def mcp_server():
 
 
 def test_mcp_invocation(llama_stack_client, text_model_id, mcp_server):
-    if not isinstance(llama_stack_client, LlamaStackAsLibraryClient):
-        pytest.skip("The local MCP server only reliably reachable from library client.")
-
     test_toolgroup_id = MCP_TOOLGROUP_ID
     uri = mcp_server["server_url"]
 

From aeaf4eb3dd1f465f3a17238ebfb47b76de9de4cd Mon Sep 17 00:00:00 2001
From: Derek Higgins <derekh@redhat.com>
Date: Thu, 13 Nov 2025 15:24:05 +0000
Subject: [PATCH 03/12] fix: remove_disabled_providers filtering models with
 None fields (#4132)

Fixed bug where models with No provider_model_id were incorrectly
filtered from the startup config display. The function was checking
multiple fields when it should only filter items with explicitly
disabled provider_id.

Changes:
o Modified remove_disabled_providers to only check provider_id field o
Changed condition from checking multiple fields with None to only
  checking provider_id for "__disabled__", None or empty string
o Added comprehensive unit tests

Closes: #4131

Signed-off-by: Derek Higgins <derekh@redhat.com>
---
 src/llama_stack/core/server/server.py |  4 +-
 tests/unit/server/test_server.py      | 69 ++++++++++++++++++++++++++-
 2 files changed, 70 insertions(+), 3 deletions(-)

diff --git a/src/llama_stack/core/server/server.py b/src/llama_stack/core/server/server.py
index 80505c3f9..5bf876c02 100644
--- a/src/llama_stack/core/server/server.py
+++ b/src/llama_stack/core/server/server.py
@@ -526,8 +526,8 @@ def extract_path_params(route: str) -> list[str]:
 
 def remove_disabled_providers(obj):
     if isinstance(obj, dict):
-        keys = ["provider_id", "shield_id", "provider_model_id", "model_id"]
-        if any(k in obj and obj[k] in ("__disabled__", "", None) for k in keys):
+        # Filter out items where provider_id is explicitly disabled or empty
+        if "provider_id" in obj and obj["provider_id"] in ("__disabled__", "", None):
             return None
         return {k: v for k, v in ((k, remove_disabled_providers(v)) for k, v in obj.items()) if v is not None}
     elif isinstance(obj, list):
diff --git a/tests/unit/server/test_server.py b/tests/unit/server/test_server.py
index d6d4f4f23..53f193672 100644
--- a/tests/unit/server/test_server.py
+++ b/tests/unit/server/test_server.py
@@ -12,7 +12,7 @@ from pydantic import ValidationError
 
 from llama_stack.core.access_control.access_control import AccessDeniedError
 from llama_stack.core.datatypes import AuthenticationRequiredError
-from llama_stack.core.server.server import translate_exception
+from llama_stack.core.server.server import remove_disabled_providers, translate_exception
 
 
 class TestTranslateException:
@@ -194,3 +194,70 @@ class TestTranslateException:
         assert isinstance(result3, HTTPException)
         assert result3.status_code == 403
         assert result3.detail == "Permission denied: Access denied"
+
+
+class TestRemoveDisabledProviders:
+    """Test cases for the remove_disabled_providers function."""
+
+    def test_remove_explicitly_disabled_provider(self):
+        """Test that providers with provider_id='__disabled__' are removed."""
+        config = {
+            "providers": {
+                "inference": [
+                    {"provider_id": "openai", "provider_type": "remote::openai", "config": {}},
+                    {"provider_id": "__disabled__", "provider_type": "remote::vllm", "config": {}},
+                ]
+            }
+        }
+        result = remove_disabled_providers(config)
+        assert len(result["providers"]["inference"]) == 1
+        assert result["providers"]["inference"][0]["provider_id"] == "openai"
+
+    def test_remove_empty_provider_id(self):
+        """Test that providers with empty provider_id are removed."""
+        config = {
+            "providers": {
+                "inference": [
+                    {"provider_id": "openai", "provider_type": "remote::openai", "config": {}},
+                    {"provider_id": "", "provider_type": "remote::vllm", "config": {}},
+                ]
+            }
+        }
+        result = remove_disabled_providers(config)
+        assert len(result["providers"]["inference"]) == 1
+        assert result["providers"]["inference"][0]["provider_id"] == "openai"
+
+    def test_keep_models_with_none_provider_model_id(self):
+        """Test that models with None provider_model_id are NOT removed."""
+        config = {
+            "registered_resources": {
+                "models": [
+                    {
+                        "model_id": "llama-3-2-3b",
+                        "provider_id": "vllm-inference",
+                        "model_type": "llm",
+                        "provider_model_id": None,
+                        "metadata": {},
+                    },
+                    {
+                        "model_id": "gpt-4o-mini",
+                        "provider_id": "openai",
+                        "model_type": "llm",
+                        "provider_model_id": None,
+                        "metadata": {},
+                    },
+                    {
+                        "model_id": "granite-embedding-125m",
+                        "provider_id": "sentence-transformers",
+                        "model_type": "embedding",
+                        "provider_model_id": "ibm-granite/granite-embedding-125m-english",
+                        "metadata": {"embedding_dimension": 768},
+                    },
+                ]
+            }
+        }
+        result = remove_disabled_providers(config)
+        assert len(result["registered_resources"]["models"]) == 3
+        assert result["registered_resources"]["models"][0]["model_id"] == "llama-3-2-3b"
+        assert result["registered_resources"]["models"][1]["model_id"] == "gpt-4o-mini"
+        assert result["registered_resources"]["models"][2]["model_id"] == "granite-embedding-125m"

From 4442b24de7238364aa2201e6b36e8b0bd9f415cf Mon Sep 17 00:00:00 2001
From: Francisco Arceo <arceofrancisco@gmail.com>
Date: Thu, 13 Nov 2025 12:15:32 -0500
Subject: [PATCH 04/12] chore: Fix docs so can be deployed (#4149)

# What does this PR do?
Building/Deploying docs is failing here:
https://github.com/llamastack/llamastack.github.io/actions/runs/19333785864/job/55303209627#step:8:49

Needs the playground file. Updated it to reflect current admin status.

<!-- If resolving an issue, uncomment and update the line below -->
<!-- Closes #[issue-number] -->

## Test Plan
<!-- Describe the tests you ran to verify your changes with result
summaries. *Provide clear instructions so the plan can be easily
re-executed.* -->

Signed-off-by: Francisco Javier Arceo <farceo@redhat.com>
---
 .../docs/building_applications/playground.mdx | 87 +++++++++++++++++++
 1 file changed, 87 insertions(+)
 create mode 100644 docs/docs/building_applications/playground.mdx

diff --git a/docs/docs/building_applications/playground.mdx b/docs/docs/building_applications/playground.mdx
new file mode 100644
index 000000000..1afb250c4
--- /dev/null
+++ b/docs/docs/building_applications/playground.mdx
@@ -0,0 +1,87 @@
+---
+title: Admin UI & Chat Playground
+description: Web-based admin interface and chat playground for Llama Stack
+sidebar_label: Playground
+sidebar_position: 10
+---
+
+# Admin UI & Chat Playground
+
+The Llama Stack UI provides a comprehensive web-based admin interface for managing your Llama Stack server, with an integrated chat playground for interactive testing. This admin interface is the primary way to monitor, manage, and debug your Llama Stack applications.
+
+## Quick Start
+
+Launch the admin UI with:
+
+```bash
+npx llama-stack-ui
+```
+
+Then visit `http://localhost:8322` to access the interface.
+
+## Admin Interface Features
+
+The Llama Stack UI is organized into three main sections:
+
+### 🎯 Create
+**Chat Playground** - Interactive testing environment
+- Real-time chat interface for testing agents and models
+- Multi-turn conversations with tool calling support
+- Agent SDK integration (will be migrated to Responses API)
+- Custom system prompts and model parameter adjustment
+
+### 📊 Manage
+**Logs & Resource Management** - Monitor and manage your stack
+- **Responses Logs**: View and analyze agent responses and interactions
+- **Chat Completions Logs**: Monitor chat completion requests and responses
+- **Vector Stores**: Create, manage, and monitor vector databases for RAG workflows
+- **Prompts**: Full CRUD operations for prompt templates and management
+- **Files**: Forthcoming file management capabilities
+
+## Key Capabilities for Application Development
+
+### Real-time Monitoring
+- **Response Tracking**: Monitor all agent responses and tool calls
+- **Completion Analysis**: View chat completion performance and patterns
+- **Vector Store Activity**: Track RAG operations and document processing
+- **Prompt Usage**: Analyze prompt template performance
+
+### Resource Management
+- **Vector Store CRUD**: Create, update, and delete vector databases
+- **Prompt Library**: Organize and version control your prompts
+- **File Operations**: Manage documents and assets (forthcoming)
+
+### Interactive Testing
+- **Chat Playground**: Test conversational flows before production deployment
+- **Agent Prototyping**: Validate agent behaviors and tool integrations
+
+## Development Workflow Integration
+
+The admin UI supports your development lifecycle:
+
+1. **Development**: Use chat playground to prototype and test features
+2. **Monitoring**: Track system performance through logs and metrics
+3. **Management**: Organize prompts, vector stores, and other resources
+4. **Debugging**: Analyze logs to identify and resolve issues
+
+## Architecture Notes
+
+- **Current**: Chat playground uses Agents SDK
+- **Future**: Migration to Responses API for improved performance and consistency
+- **Admin Focus**: Primary emphasis on monitoring, logging, and resource management
+
+## Getting Started
+
+1. **Launch the UI**: Run `npx llama-stack-ui`
+2. **Explore Logs**: Start with Responses and Chat Completions logs to understand your system activity
+3. **Test in Playground**: Use the chat interface to validate your agent configurations
+4. **Manage Resources**: Create vector stores and organize prompts through the UI
+
+For detailed setup and configuration, see the [Llama Stack UI documentation](/docs/distributions/llama_stack_ui).
+
+## Next Steps
+
+- Set up your [first agent](/docs/building_applications/agent)
+- Implement [RAG functionality](/docs/building_applications/rag)
+- Add [evaluation metrics](/docs/building_applications/evals)
+- Configure [safety measures](/docs/building_applications/safety)

From ceb716b9a0ed0904e53fd362ce7dce932c15e35c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?S=C3=A9bastien=20Han?= <seb@redhat.com>
Date: Thu, 13 Nov 2025 19:52:38 +0100
Subject: [PATCH 05/12] chore: set minimum pre-commit version (#4148)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

# What does this PR do?

- force a min precommit version
- pin to >= 4.3.0 when installing

---------

Signed-off-by: Sébastien Han <seb@redhat.com>
Co-authored-by: Ashwin Bharambe <ashwin.bharambe@gmail.com>
---
 .github/workflows/pre-commit.yml | 2 +-
 .pre-commit-config.yaml          | 2 +-
 pyproject.toml                   | 2 +-
 uv.lock                          | 8 ++++----
 4 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/.github/workflows/pre-commit.yml b/.github/workflows/pre-commit.yml
index 74f7da19a..ac125bba5 100644
--- a/.github/workflows/pre-commit.yml
+++ b/.github/workflows/pre-commit.yml
@@ -53,7 +53,7 @@ jobs:
         working-directory: src/llama_stack_ui
 
       - name: Install pre-commit
-        run: python -m pip install pre-commit
+        run: python -m pip install 'pre-commit>=4.4.0'
 
       - name: Cache pre-commit
         uses: actions/cache@0057852bfaa89a56745cba8c7296529d2fc39830 # v4
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 42cd2f5ce..19b83563c 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -1,5 +1,5 @@
 exclude: 'build/'
-
+minimum_pre_commit_version: 4.4.0
 default_language_version:
     python: python3.12
     node: "22"
diff --git a/pyproject.toml b/pyproject.toml
index e6808af8a..d12d28e8c 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -69,7 +69,7 @@ dev = [
     "black",
     "ruff",
     "mypy",
-    "pre-commit",
+    "pre-commit>=4.4.0",
     "ruamel.yaml", # needed for openapi generator
 ]
 # Type checking dependencies - includes type stubs and optional runtime dependencies
diff --git a/uv.lock b/uv.lock
index f1808f005..884d41b79 100644
--- a/uv.lock
+++ b/uv.lock
@@ -2125,7 +2125,7 @@ dev = [
     { name = "black" },
     { name = "mypy" },
     { name = "nbval" },
-    { name = "pre-commit" },
+    { name = "pre-commit", specifier = ">=4.4.0" },
     { name = "pytest", specifier = ">=8.4" },
     { name = "pytest-asyncio", specifier = ">=1.0" },
     { name = "pytest-cov" },
@@ -3403,7 +3403,7 @@ wheels = [
 
 [[package]]
 name = "pre-commit"
-version = "4.2.0"
+version = "4.4.0"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "cfgv" },
@@ -3412,9 +3412,9 @@ dependencies = [
     { name = "pyyaml" },
     { name = "virtualenv" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/08/39/679ca9b26c7bb2999ff122d50faa301e49af82ca9c066ec061cfbc0c6784/pre_commit-4.2.0.tar.gz", hash = "sha256:601283b9757afd87d40c4c4a9b2b5de9637a8ea02eaff7adc2d0fb4e04841146", size = 193424, upload-time = "2025-03-18T21:35:20.987Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/a6/49/7845c2d7bf6474efd8e27905b51b11e6ce411708c91e829b93f324de9929/pre_commit-4.4.0.tar.gz", hash = "sha256:f0233ebab440e9f17cabbb558706eb173d19ace965c68cdce2c081042b4fab15", size = 197501, upload-time = "2025-11-08T21:12:11.607Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/88/74/a88bf1b1efeae488a0c0b7bdf71429c313722d1fc0f377537fbe554e6180/pre_commit-4.2.0-py2.py3-none-any.whl", hash = "sha256:a009ca7205f1eb497d10b845e52c838a98b6cdd2102a6c8e4540e94ee75c58bd", size = 220707, upload-time = "2025-03-18T21:35:19.343Z" },
+    { url = "https://files.pythonhosted.org/packages/27/11/574fe7d13acf30bfd0a8dd7fa1647040f2b8064f13f43e8c963b1e65093b/pre_commit-4.4.0-py2.py3-none-any.whl", hash = "sha256:b35ea52957cbf83dcc5d8ee636cbead8624e3a15fbfa61a370e42158ac8a5813", size = 226049, upload-time = "2025-11-08T21:12:10.228Z" },
 ]
 
 [[package]]

From 840ad75fe9bf62ab1cedaf5fbcd2690920ecfdaf Mon Sep 17 00:00:00 2001
From: Charlie Doern <cdoern@redhat.com>
Date: Thu, 13 Nov 2025 14:51:17 -0500
Subject: [PATCH 06/12] feat: split API and provider specs into separate
 llama-stack-api pkg (#3895)

# What does this PR do?

Extract API definitions and provider specifications into a standalone
llama-stack-api package that can be published to PyPI independently of
the main llama-stack server.


see: https://github.com/llamastack/llama-stack/pull/2978 and
https://github.com/llamastack/llama-stack/pull/2978#issuecomment-3145115942

Motivation

External providers currently import from llama-stack, which overrides
the installed version and causes dependency conflicts. This separation
allows external providers to:

- Install only the type definitions they need without server
dependencies
- Avoid version conflicts with the installed llama-stack package
- Be versioned and released independently

This enables us to re-enable external provider module tests that were
previously blocked by these import conflicts.

Changes

- Created llama-stack-api package with minimal dependencies (pydantic,
jsonschema)
- Moved APIs, providers datatypes, strong_typing, and schema_utils
- Updated all imports from llama_stack.* to llama_stack_api.*
- Configured local editable install for development workflow
- Updated linting and type-checking configuration for both packages

Next Steps

- Publish llama-stack-api to PyPI
- Update external provider dependencies
- Re-enable external provider module tests


Pre-cursor PRs to this one:

- #4093
- #3954
- #4064

These PRs moved key pieces _out_ of the Api pkg, limiting the scope of
change here.


relates to #3237

## Test Plan

Package builds successfully and can be imported independently. All
pre-commit hooks pass with expected exclusions maintained.

---------

Signed-off-by: Charlie Doern <cdoern@redhat.com>
---
 .github/workflows/python-build-test.yml       |  13 +-
 .pre-commit-config.yaml                       |   2 +-
 docs/docs/concepts/apis/external.mdx          |   9 +-
 docs/docs/distributions/building_distro.mdx   |   2 +-
 .../external/external-providers-guide.mdx     |   2 +-
 .../providers/vector_io/inline_sqlite-vec.mdx |   4 +-
 docs/openapi_generator/generate.py            |   2 +-
 docs/openapi_generator/pyopenapi/generator.py |  26 +-
 .../openapi_generator/pyopenapi/operations.py |  12 +-
 .../pyopenapi/specification.py                |   2 +-
 docs/openapi_generator/pyopenapi/utility.py   |   7 +-
 pyproject.toml                                |  21 +-
 scripts/generate_prompt_format.py             |   2 +-
 scripts/provider_codegen.py                   |   5 +-
 src/llama-stack-api/README.md                 | 103 +++
 .../llama_stack_api/__init__.py               | 871 ++++++++++++++++++
 .../llama_stack_api}/agents.py                |   6 +-
 .../llama_stack_api}/batches.py               |   4 +-
 .../llama_stack_api}/benchmarks.py            |   6 +-
 .../llama_stack_api/common}/__init__.py       |   0
 .../llama_stack_api}/common/content_types.py  |  44 +-
 .../llama_stack_api}/common/errors.py         |   0
 .../llama_stack_api}/common/job_types.py      |   2 +-
 .../llama_stack_api}/common/responses.py      |   2 +-
 .../llama_stack_api}/common/tracing.py        |   0
 .../llama_stack_api}/common/training_types.py |   2 +-
 .../llama_stack_api}/common/type_system.py    |   2 +-
 .../llama_stack_api}/conversations.py         |   8 +-
 .../llama_stack_api}/datasetio.py             |   8 +-
 .../llama_stack_api}/datasets.py              |   6 +-
 .../llama_stack_api}/datatypes.py             | 171 +++-
 .../llama_stack_api}/eval.py                  |  12 +-
 .../llama_stack_api}/files.py                 |   8 +-
 .../llama_stack_api}/inference.py             |  12 +-
 .../llama_stack_api}/inspect.py               |   6 +-
 .../llama_stack_api}/models.py                |   8 +-
 .../llama_stack_api}/openai_responses.py      |   4 +-
 .../llama_stack_api}/post_training.py         |  10 +-
 .../llama_stack_api}/prompts.py               |   6 +-
 .../llama_stack_api}/providers.py             |   6 +-
 .../llama_stack_api}/py.typed                 |   0
 .../llama_stack_api}/rag_tool.py              |   2 +-
 .../llama_stack_api}/resource.py              |   0
 .../llama_stack_api}/safety.py                |  10 +-
 .../llama_stack_api}/schema_utils.py          |   0
 .../llama_stack_api}/scoring.py               |   6 +-
 .../llama_stack_api}/scoring_functions.py     |   8 +-
 .../llama_stack_api}/shields.py               |   8 +-
 .../strong_typing/__init__.py                 |   0
 .../strong_typing/auxiliary.py                |   0
 .../strong_typing/classdef.py                 |   0
 .../llama_stack_api}/strong_typing/core.py    |   0
 .../strong_typing/deserializer.py             |   0
 .../strong_typing/docstring.py                |   0
 .../strong_typing/exception.py                |   0
 .../strong_typing/inspection.py               |   0
 .../llama_stack_api}/strong_typing/mapping.py |   0
 .../llama_stack_api}/strong_typing/name.py    |   0
 .../llama_stack_api/strong_typing/py.typed    |   0
 .../llama_stack_api}/strong_typing/schema.py  |   0
 .../strong_typing/serialization.py            |   0
 .../strong_typing/serializer.py               |   0
 .../llama_stack_api}/strong_typing/slots.py   |   0
 .../strong_typing/topological.py              |   0
 .../llama_stack_api}/tools.py                 |  10 +-
 .../llama_stack_api}/vector_io.py             |  12 +-
 .../llama_stack_api}/vector_stores.py         |   2 +-
 .../llama_stack_api}/version.py               |   0
 src/llama-stack-api/pyproject.toml            |  82 ++
 src/llama_stack/apis/agents/__init__.py       |   7 -
 src/llama_stack/apis/batches/__init__.py      |   9 -
 src/llama_stack/apis/benchmarks/__init__.py   |   7 -
 src/llama_stack/apis/common/__init__.py       |   5 -
 .../apis/conversations/__init__.py            |  27 -
 src/llama_stack/apis/datasetio/__init__.py    |   7 -
 src/llama_stack/apis/datasets/__init__.py     |   7 -
 src/llama_stack/apis/datatypes.py             | 158 ----
 src/llama_stack/apis/eval/__init__.py         |   7 -
 src/llama_stack/apis/files/__init__.py        |   7 -
 src/llama_stack/apis/inference/__init__.py    |   7 -
 src/llama_stack/apis/inspect/__init__.py      |   7 -
 src/llama_stack/apis/models/__init__.py       |   7 -
 .../apis/post_training/__init__.py            |   7 -
 src/llama_stack/apis/prompts/__init__.py      |   9 -
 src/llama_stack/apis/providers/__init__.py    |   7 -
 src/llama_stack/apis/safety/__init__.py       |   7 -
 src/llama_stack/apis/scoring/__init__.py      |   7 -
 .../apis/scoring_functions/__init__.py        |   7 -
 src/llama_stack/apis/shields/__init__.py      |   7 -
 src/llama_stack/apis/tools/__init__.py        |   8 -
 src/llama_stack/apis/vector_io/__init__.py    |   7 -
 .../apis/vector_stores/__init__.py            |   7 -
 src/llama_stack/cli/stack/_list_deps.py       |   2 +-
 src/llama_stack/cli/stack/utils.py            |   2 +-
 src/llama_stack/core/build.py                 |   2 +-
 src/llama_stack/core/client.py                |   3 +-
 src/llama_stack/core/configure.py             |   3 +-
 .../core/conversations/conversations.py       |   6 +-
 src/llama_stack/core/datatypes.py             |  41 +-
 src/llama_stack/core/distribution.py          |  12 +-
 src/llama_stack/core/external.py              |   2 +-
 src/llama_stack/core/inspect.py               |   8 +-
 src/llama_stack/core/library_client.py        |   2 +-
 src/llama_stack/core/prompts/prompts.py       |   2 +-
 src/llama_stack/core/providers.py             |   3 +-
 src/llama_stack/core/resolver.py              |  75 +-
 src/llama_stack/core/routers/__init__.py      |   3 +-
 src/llama_stack/core/routers/datasets.py      |   6 +-
 src/llama_stack/core/routers/eval_scoring.py  |  10 +-
 src/llama_stack/core/routers/inference.py     |  25 +-
 src/llama_stack/core/routers/safety.py        |   7 +-
 src/llama_stack/core/routers/tool_runtime.py  |   5 +-
 src/llama_stack/core/routers/vector_io.py     |  12 +-
 .../core/routing_tables/benchmarks.py         |   3 +-
 src/llama_stack/core/routing_tables/common.py |   6 +-
 .../core/routing_tables/datasets.py           |   7 +-
 src/llama_stack/core/routing_tables/models.py |  12 +-
 .../core/routing_tables/scoring_functions.py  |   7 +-
 .../core/routing_tables/shields.py            |   4 +-
 .../core/routing_tables/toolgroups.py         |  13 +-
 .../core/routing_tables/vector_stores.py      |  11 +-
 src/llama_stack/core/server/auth_providers.py |   2 +-
 src/llama_stack/core/server/routes.py         |   3 +-
 src/llama_stack/core/server/server.py         |   4 +-
 src/llama_stack/core/stack.py                 |  45 +-
 src/llama_stack/core/telemetry/telemetry.py   |   2 +-
 src/llama_stack/distributions/dell/dell.py    |   3 +-
 .../meta-reference-gpu/meta_reference.py      |   3 +-
 .../open-benchmark/open_benchmark.py          |   4 +-
 .../distributions/starter/starter.py          |   3 +-
 src/llama_stack/distributions/template.py     |   3 +-
 .../inline/agents/meta_reference/agents.py    |  21 +-
 .../responses/openai_responses.py             |  31 +-
 .../meta_reference/responses/streaming.py     |  23 +-
 .../meta_reference/responses/tool_executor.py |  30 +-
 .../agents/meta_reference/responses/types.py  |  11 +-
 .../agents/meta_reference/responses/utils.py  |  38 +-
 .../inline/agents/meta_reference/safety.py    |   4 +-
 .../inline/batches/reference/__init__.py      |   5 +-
 .../inline/batches/reference/batches.py       |  20 +-
 .../inline/datasetio/localfs/datasetio.py     |   6 +-
 .../inline/eval/meta_reference/eval.py        |  25 +-
 .../providers/inline/files/localfs/files.py   |   8 +-
 .../inline/inference/meta_reference/config.py |   2 +-
 .../inference/meta_reference/generators.py    |   6 +-
 .../inference/meta_reference/inference.py     |  21 +-
 .../sentence_transformers.py                  |  14 +-
 .../inline/post_training/common/validator.py  |   7 +-
 .../huggingface/post_training.py              |   7 +-
 .../recipes/finetune_single_device.py         |  16 +-
 .../recipes/finetune_single_device_dpo.py     |  14 +-
 .../inline/post_training/huggingface/utils.py |   3 +-
 .../post_training/torchtune/common/utils.py   |   2 +-
 .../post_training/torchtune/post_training.py  |   7 +-
 .../recipes/lora_finetuning_single_device.py  |  22 +-
 .../safety/code_scanner/code_scanner.py       |  10 +-
 .../inline/safety/llama_guard/llama_guard.py  |  15 +-
 .../safety/prompt_guard/prompt_guard.py       |  14 +-
 .../providers/inline/scoring/basic/scoring.py |  12 +-
 .../basic/scoring_fn/docvqa_scoring_fn.py     |   4 +-
 .../basic/scoring_fn/equality_scoring_fn.py   |   4 +-
 .../basic/scoring_fn/fn_defs/docvqa.py        |   4 +-
 .../basic/scoring_fn/fn_defs/equality.py      |   4 +-
 .../basic/scoring_fn/fn_defs/ifeval.py        |   4 +-
 .../fn_defs/regex_parser_math_response.py     |   4 +-
 .../regex_parser_multiple_choice_answer.py    |   4 +-
 .../basic/scoring_fn/fn_defs/subset_of.py     |   4 +-
 .../basic/scoring_fn/ifeval_scoring_fn.py     |   4 +-
 .../regex_parser_math_response_scoring_fn.py  |   4 +-
 .../scoring_fn/regex_parser_scoring_fn.py     |   4 +-
 .../basic/scoring_fn/subset_of_scoring_fn.py  |   4 +-
 .../inline/scoring/braintrust/braintrust.py   |  15 +-
 .../scoring_fn/fn_defs/answer_correctness.py  |   4 +-
 .../scoring_fn/fn_defs/answer_relevancy.py    |   4 +-
 .../scoring_fn/fn_defs/answer_similarity.py   |   4 +-
 .../fn_defs/context_entity_recall.py          |   4 +-
 .../scoring_fn/fn_defs/context_precision.py   |   4 +-
 .../scoring_fn/fn_defs/context_recall.py      |   4 +-
 .../scoring_fn/fn_defs/context_relevancy.py   |   4 +-
 .../scoring_fn/fn_defs/factuality.py          |   4 +-
 .../scoring_fn/fn_defs/faithfulness.py        |   4 +-
 .../inline/scoring/llm_as_judge/scoring.py    |  14 +-
 .../fn_defs/llm_as_judge_405b_simpleqa.py     |   4 +-
 .../scoring_fn/fn_defs/llm_as_judge_base.py   |   3 +-
 .../scoring_fn/llm_as_judge_scoring_fn.py     |   5 +-
 .../inline/tool_runtime/rag/__init__.py       |   2 +-
 .../tool_runtime/rag/context_retriever.py     |   9 +-
 .../inline/tool_runtime/rag/memory.py         |  21 +-
 .../inline/vector_io/chroma/__init__.py       |   2 +-
 .../inline/vector_io/chroma/config.py         |   2 +-
 .../inline/vector_io/faiss/__init__.py        |   2 +-
 .../inline/vector_io/faiss/config.py          |   2 +-
 .../providers/inline/vector_io/faiss/faiss.py |  19 +-
 .../inline/vector_io/milvus/__init__.py       |   2 +-
 .../inline/vector_io/milvus/config.py         |   2 +-
 .../inline/vector_io/qdrant/__init__.py       |   2 +-
 .../inline/vector_io/qdrant/config.py         |   2 +-
 .../inline/vector_io/sqlite_vec/__init__.py   |   2 +-
 .../inline/vector_io/sqlite_vec/sqlite_vec.py |  16 +-
 src/llama_stack/providers/registry/agents.py  |   3 +-
 src/llama_stack/providers/registry/batches.py |   2 +-
 .../providers/registry/datasetio.py           |   2 +-
 src/llama_stack/providers/registry/eval.py    |   2 +-
 src/llama_stack/providers/registry/files.py   |   3 +-
 .../providers/registry/inference.py           |   2 +-
 .../providers/registry/post_training.py       |   2 +-
 src/llama_stack/providers/registry/safety.py  |   2 +-
 src/llama_stack/providers/registry/scoring.py |   2 +-
 .../providers/registry/tool_runtime.py        |   3 +-
 .../providers/registry/vector_io.py           |   4 +-
 .../datasetio/huggingface/huggingface.py      |   6 +-
 .../remote/datasetio/nvidia/datasetio.py      |   6 +-
 .../providers/remote/eval/nvidia/eval.py      |  24 +-
 .../providers/remote/files/openai/files.py    |   8 +-
 .../providers/remote/files/s3/files.py        |   7 +-
 .../remote/inference/anthropic/config.py      |   2 +-
 .../remote/inference/azure/config.py          |   2 +-
 .../remote/inference/bedrock/bedrock.py       |   6 +-
 .../remote/inference/cerebras/cerebras.py     |   3 +-
 .../remote/inference/cerebras/config.py       |   2 +-
 .../remote/inference/databricks/config.py     |   2 +-
 .../remote/inference/databricks/databricks.py |   2 +-
 .../remote/inference/fireworks/config.py      |   2 +-
 .../remote/inference/gemini/config.py         |   2 +-
 .../remote/inference/gemini/gemini.py         |   3 +-
 .../providers/remote/inference/groq/config.py |   2 +-
 .../inference/llama_openai_compat/config.py   |   2 +-
 .../inference/llama_openai_compat/llama.py    |   3 +-
 .../remote/inference/nvidia/__init__.py       |   2 +-
 .../remote/inference/nvidia/config.py         |   2 +-
 .../remote/inference/nvidia/nvidia.py         |  13 +-
 .../remote/inference/oci/__init__.py          |   2 +-
 .../providers/remote/inference/oci/config.py  |   2 +-
 .../providers/remote/inference/oci/oci.py     |  10 +-
 .../remote/inference/ollama/ollama.py         |  12 +-
 .../remote/inference/openai/config.py         |   2 +-
 .../remote/inference/passthrough/config.py    |   2 +-
 .../inference/passthrough/passthrough.py      |   8 +-
 .../remote/inference/runpod/config.py         |   2 +-
 .../remote/inference/runpod/runpod.py         |   3 +-
 .../remote/inference/sambanova/config.py      |   2 +-
 .../providers/remote/inference/tgi/config.py  |   2 +-
 .../providers/remote/inference/tgi/tgi.py     |   6 +-
 .../remote/inference/together/config.py       |   2 +-
 .../remote/inference/together/together.py     |  12 +-
 .../remote/inference/vertexai/config.py       |   2 +-
 .../providers/remote/inference/vllm/config.py |   2 +-
 .../providers/remote/inference/vllm/vllm.py   |  12 +-
 .../remote/inference/watsonx/config.py        |   2 +-
 .../remote/inference/watsonx/watsonx.py       |  11 +-
 .../remote/post_training/nvidia/README.md     |   2 +-
 .../post_training/nvidia/post_training.py     |   6 +-
 .../remote/post_training/nvidia/utils.py      |   2 +-
 .../remote/safety/bedrock/bedrock.py          |   9 +-
 .../providers/remote/safety/bedrock/config.py |   3 +-
 .../providers/remote/safety/nvidia/README.md  |   4 +-
 .../providers/remote/safety/nvidia/config.py  |   3 +-
 .../providers/remote/safety/nvidia/nvidia.py  |  14 +-
 .../remote/safety/sambanova/config.py         |   3 +-
 .../remote/safety/sambanova/sambanova.py      |  10 +-
 .../tool_runtime/bing_search/bing_search.py   |   8 +-
 .../tool_runtime/brave_search/brave_search.py |   8 +-
 .../model_context_protocol.py                 |   9 +-
 .../tavily_search/tavily_search.py            |   8 +-
 .../wolfram_alpha/wolfram_alpha.py            |   8 +-
 .../remote/vector_io/chroma/__init__.py       |   2 +-
 .../remote/vector_io/chroma/chroma.py         |  15 +-
 .../remote/vector_io/chroma/config.py         |   2 +-
 .../remote/vector_io/milvus/__init__.py       |   2 +-
 .../remote/vector_io/milvus/config.py         |   2 +-
 .../remote/vector_io/milvus/milvus.py         |  17 +-
 .../remote/vector_io/pgvector/__init__.py     |   2 +-
 .../remote/vector_io/pgvector/config.py       |   2 +-
 .../remote/vector_io/pgvector/pgvector.py     |  17 +-
 .../remote/vector_io/qdrant/__init__.py       |   2 +-
 .../remote/vector_io/qdrant/config.py         |   2 +-
 .../remote/vector_io/qdrant/qdrant.py         |  25 +-
 .../remote/vector_io/weaviate/__init__.py     |   2 +-
 .../remote/vector_io/weaviate/config.py       |   2 +-
 .../remote/vector_io/weaviate/weaviate.py     |  18 +-
 .../utils/common/data_schema_validator.py     |   7 +-
 .../providers/utils/files/form_data.py        |   3 +-
 .../utils/inference/embedding_mixin.py        |   2 +-
 .../utils/inference/inference_store.py        |   6 +-
 .../utils/inference/litellm_openai_mixin.py   |   4 +-
 .../utils/inference/model_registry.py         |   4 +-
 .../utils/inference/openai_compat.py          |  26 +-
 .../providers/utils/inference/openai_mixin.py |  10 +-
 .../utils/inference/prompt_adapter.py         |  12 +-
 .../providers/utils/kvstore/sqlite/config.py  |   3 +-
 .../providers/utils/memory/file_utils.py      |   2 +-
 .../utils/memory/openai_vector_store_mixin.py |  13 +-
 .../providers/utils/memory/vector_store.py    |  20 +-
 src/llama_stack/providers/utils/pagination.py |   2 +-
 .../utils/responses/responses_store.py        |   9 +-
 .../utils/scoring/aggregation_utils.py        |   3 +-
 .../utils/scoring/base_scoring_fn.py          |   4 +-
 .../providers/utils/sqlstore/api.py           |   3 +-
 .../utils/sqlstore/sqlalchemy_sqlstore.py     |   2 +-
 src/llama_stack/providers/utils/tools/mcp.py  |  15 +-
 .../src/llama_stack_api_weather/weather.py    |   4 +-
 tests/integration/batches/conftest.py         |   3 +-
 tests/integration/files/test_files.py         |   2 +-
 .../inference/test_provider_data_routing.py   |   6 +-
 .../post_training/test_post_training.py       |   4 +-
 ...c189daa31e88b25d0381a985f24203b7a5a38.json |   2 +-
 ...393e5712917253462292829b37b9320d6df82.json |   2 +-
 ...a0ee18d09bd413189a7c03b24bf3871e3d8d7.json |   2 +-
 tests/integration/safety/test_llama_guard.py  |   2 +-
 tests/integration/safety/test_safety.py       |   3 +-
 .../integration/safety/test_vision_safety.py  |   3 +-
 .../tool_runtime/test_registration.py         |   2 +-
 .../vector_io/test_openai_vector_stores.py    |  19 +-
 tests/integration/vector_io/test_vector_io.py |   3 +-
 tests/unit/conversations/test_api_models.py   |   6 +-
 .../unit/conversations/test_conversations.py  |   5 +-
 tests/unit/core/routers/test_safety_router.py |   4 +-
 tests/unit/core/routers/test_vector_io.py     |   2 +-
 tests/unit/core/test_stack_validation.py      |   4 +-
 .../routers/test_routing_tables.py            |  23 +-
 .../unit/distribution/test_api_recordings.py  |   5 +-
 tests/unit/distribution/test_distribution.py  |  22 +-
 tests/unit/files/test_files.py                |   4 +-
 .../unit/providers/batches/test_reference.py  |   4 +-
 .../batches/test_reference_idempotency.py     |   3 +-
 tests/unit/providers/files/test_s3_files.py   |  12 +-
 .../providers/files/test_s3_files_auth.py     |   3 +-
 .../inference/test_bedrock_adapter.py         |   2 +-
 .../providers/inference/test_remote_vllm.py   |   8 +-
 .../responses/test_streaming.py               |   2 +-
 tests/unit/providers/nvidia/test_datastore.py |   3 +-
 tests/unit/providers/nvidia/test_eval.py      |  16 +-
 .../unit/providers/nvidia/test_parameters.py  |   4 +-
 .../providers/nvidia/test_rerank_inference.py |   2 +-
 tests/unit/providers/nvidia/test_safety.py    |  11 +-
 .../nvidia/test_supervised_fine_tuning.py     |   4 +-
 tests/unit/providers/test_bedrock.py          |   3 +-
 .../utils/inference/test_openai_mixin.py      |   3 +-
 .../utils/inference/test_prompt_adapter.py    |   6 +-
 .../utils/memory/test_vector_store.py         |   3 +-
 .../providers/utils/test_model_registry.py    |   2 +-
 tests/unit/providers/vector_io/conftest.py    |   3 +-
 tests/unit/providers/vector_io/test_faiss.py  |   5 +-
 .../providers/vector_io/test_sqlite_vec.py    |   2 +-
 .../test_vector_io_openai_vector_stores.py    |  12 +-
 .../providers/vector_io/test_vector_utils.py  |   3 +-
 tests/unit/rag/test_rag_query.py              |   7 +-
 tests/unit/rag/test_vector_store.py           |   7 +-
 tests/unit/registry/test_registry.py          |   6 +-
 tests/unit/registry/test_registry_acl.py      |   3 +-
 tests/unit/server/test_access_control.py      |   3 +-
 tests/unit/server/test_auth.py                |   2 +-
 tests/unit/server/test_resolver.py            |   3 +-
 tests/unit/server/test_sse.py                 |   2 +-
 tests/unit/tools/test_tools_json_schema.py    |   2 +-
 .../utils/inference/test_inference_store.py   |   4 +-
 .../utils/responses/test_responses_store.py   |   9 +-
 uv.lock                                       |  21 +
 358 files changed, 2337 insertions(+), 1424 deletions(-)
 create mode 100644 src/llama-stack-api/README.md
 create mode 100644 src/llama-stack-api/llama_stack_api/__init__.py
 rename src/{llama_stack/apis/agents => llama-stack-api/llama_stack_api}/agents.py (96%)
 rename src/{llama_stack/apis/batches => llama-stack-api/llama_stack_api}/batches.py (96%)
 rename src/{llama_stack/apis/benchmarks => llama-stack-api/llama_stack_api}/benchmarks.py (94%)
 rename src/{llama_stack/apis => llama-stack-api/llama_stack_api/common}/__init__.py (100%)
 rename src/{llama_stack/apis => llama-stack-api/llama_stack_api}/common/content_types.py (65%)
 rename src/{llama_stack/apis => llama-stack-api/llama_stack_api}/common/errors.py (100%)
 rename src/{llama_stack/apis => llama-stack-api/llama_stack_api}/common/job_types.py (94%)
 rename src/{llama_stack/apis => llama-stack-api/llama_stack_api}/common/responses.py (97%)
 rename src/{llama_stack/apis => llama-stack-api/llama_stack_api}/common/tracing.py (100%)
 rename src/{llama_stack/apis => llama-stack-api/llama_stack_api}/common/training_types.py (96%)
 rename src/{llama_stack/apis => llama-stack-api/llama_stack_api}/common/type_system.py (97%)
 rename src/{llama_stack/apis/conversations => llama-stack-api/llama_stack_api}/conversations.py (97%)
 rename src/{llama_stack/apis/datasetio => llama-stack-api/llama_stack_api}/datasetio.py (89%)
 rename src/{llama_stack/apis/datasets => llama-stack-api/llama_stack_api}/datasets.py (97%)
 rename src/{llama_stack/providers => llama-stack-api/llama_stack_api}/datatypes.py (51%)
 rename src/{llama_stack/apis/eval => llama-stack-api/llama_stack_api}/eval.py (92%)
 rename src/{llama_stack/apis/files => llama-stack-api/llama_stack_api}/files.py (96%)
 rename src/{llama_stack/apis/inference => llama-stack-api/llama_stack_api}/inference.py (99%)
 rename src/{llama_stack/apis/inspect => llama-stack-api/llama_stack_api}/inspect.py (94%)
 rename src/{llama_stack/apis/models => llama-stack-api/llama_stack_api}/models.py (95%)
 rename src/{llama_stack/apis/agents => llama-stack-api/llama_stack_api}/openai_responses.py (99%)
 rename src/{llama_stack/apis/post_training => llama-stack-api/llama_stack_api}/post_training.py (97%)
 rename src/{llama_stack/apis/prompts => llama-stack-api/llama_stack_api}/prompts.py (97%)
 rename src/{llama_stack/apis/providers => llama-stack-api/llama_stack_api}/providers.py (91%)
 rename src/{llama_stack/strong_typing => llama-stack-api/llama_stack_api}/py.typed (100%)
 rename src/{llama_stack/apis/tools => llama-stack-api/llama_stack_api}/rag_tool.py (98%)
 rename src/{llama_stack/apis => llama-stack-api/llama_stack_api}/resource.py (100%)
 rename src/{llama_stack/apis/safety => llama-stack-api/llama_stack_api}/safety.py (93%)
 rename src/{llama_stack => llama-stack-api/llama_stack_api}/schema_utils.py (100%)
 rename src/{llama_stack/apis/scoring => llama-stack-api/llama_stack_api}/scoring.py (93%)
 rename src/{llama_stack/apis/scoring_functions => llama-stack-api/llama_stack_api}/scoring_functions.py (96%)
 rename src/{llama_stack/apis/shields => llama-stack-api/llama_stack_api}/shields.py (91%)
 rename src/{llama_stack => llama-stack-api/llama_stack_api}/strong_typing/__init__.py (100%)
 rename src/{llama_stack => llama-stack-api/llama_stack_api}/strong_typing/auxiliary.py (100%)
 rename src/{llama_stack => llama-stack-api/llama_stack_api}/strong_typing/classdef.py (100%)
 rename src/{llama_stack => llama-stack-api/llama_stack_api}/strong_typing/core.py (100%)
 rename src/{llama_stack => llama-stack-api/llama_stack_api}/strong_typing/deserializer.py (100%)
 rename src/{llama_stack => llama-stack-api/llama_stack_api}/strong_typing/docstring.py (100%)
 rename src/{llama_stack => llama-stack-api/llama_stack_api}/strong_typing/exception.py (100%)
 rename src/{llama_stack => llama-stack-api/llama_stack_api}/strong_typing/inspection.py (100%)
 rename src/{llama_stack => llama-stack-api/llama_stack_api}/strong_typing/mapping.py (100%)
 rename src/{llama_stack => llama-stack-api/llama_stack_api}/strong_typing/name.py (100%)
 create mode 100644 src/llama-stack-api/llama_stack_api/strong_typing/py.typed
 rename src/{llama_stack => llama-stack-api/llama_stack_api}/strong_typing/schema.py (100%)
 rename src/{llama_stack => llama-stack-api/llama_stack_api}/strong_typing/serialization.py (100%)
 rename src/{llama_stack => llama-stack-api/llama_stack_api}/strong_typing/serializer.py (100%)
 rename src/{llama_stack => llama-stack-api/llama_stack_api}/strong_typing/slots.py (100%)
 rename src/{llama_stack => llama-stack-api/llama_stack_api}/strong_typing/topological.py (100%)
 rename src/{llama_stack/apis/tools => llama-stack-api/llama_stack_api}/tools.py (95%)
 rename src/{llama_stack/apis/vector_io => llama-stack-api/llama_stack_api}/vector_io.py (98%)
 rename src/{llama_stack/apis/vector_stores => llama-stack-api/llama_stack_api}/vector_stores.py (96%)
 rename src/{llama_stack/apis => llama-stack-api/llama_stack_api}/version.py (100%)
 create mode 100644 src/llama-stack-api/pyproject.toml
 delete mode 100644 src/llama_stack/apis/agents/__init__.py
 delete mode 100644 src/llama_stack/apis/batches/__init__.py
 delete mode 100644 src/llama_stack/apis/benchmarks/__init__.py
 delete mode 100644 src/llama_stack/apis/common/__init__.py
 delete mode 100644 src/llama_stack/apis/conversations/__init__.py
 delete mode 100644 src/llama_stack/apis/datasetio/__init__.py
 delete mode 100644 src/llama_stack/apis/datasets/__init__.py
 delete mode 100644 src/llama_stack/apis/datatypes.py
 delete mode 100644 src/llama_stack/apis/eval/__init__.py
 delete mode 100644 src/llama_stack/apis/files/__init__.py
 delete mode 100644 src/llama_stack/apis/inference/__init__.py
 delete mode 100644 src/llama_stack/apis/inspect/__init__.py
 delete mode 100644 src/llama_stack/apis/models/__init__.py
 delete mode 100644 src/llama_stack/apis/post_training/__init__.py
 delete mode 100644 src/llama_stack/apis/prompts/__init__.py
 delete mode 100644 src/llama_stack/apis/providers/__init__.py
 delete mode 100644 src/llama_stack/apis/safety/__init__.py
 delete mode 100644 src/llama_stack/apis/scoring/__init__.py
 delete mode 100644 src/llama_stack/apis/scoring_functions/__init__.py
 delete mode 100644 src/llama_stack/apis/shields/__init__.py
 delete mode 100644 src/llama_stack/apis/tools/__init__.py
 delete mode 100644 src/llama_stack/apis/vector_io/__init__.py
 delete mode 100644 src/llama_stack/apis/vector_stores/__init__.py

diff --git a/.github/workflows/python-build-test.yml b/.github/workflows/python-build-test.yml
index c605a30c3..b0f2c6e69 100644
--- a/.github/workflows/python-build-test.yml
+++ b/.github/workflows/python-build-test.yml
@@ -30,13 +30,16 @@ jobs:
         activate-environment: true
         version: 0.7.6
 
-    - name: Build Llama Stack package
-      run: |
-        uv build
+    - name: Build Llama Stack API package
+      working-directory: src/llama-stack-api
+      run: uv build
 
-    - name: Install Llama Stack package
+    - name: Build Llama Stack package
+      run: uv build
+
+    - name: Install Llama Stack package (with api stubs from local build)
       run: |
-        uv pip install dist/*.whl
+        uv pip install --find-links src/llama-stack-api/dist dist/*.whl
 
     - name: Verify Llama Stack package
       run: |
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 19b83563c..6f4dd6a0e 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -42,7 +42,7 @@ repos:
     hooks:
     -   id: ruff
         args: [ --fix ]
-        exclude: ^src/llama_stack/strong_typing/.*$
+        exclude: ^(src/llama_stack/strong_typing/.*|src/llama-stack-api/llama_stack_api/strong_typing/.*)$
     -   id: ruff-format
 
 -   repo: https://github.com/adamchainz/blacken-docs
diff --git a/docs/docs/concepts/apis/external.mdx b/docs/docs/concepts/apis/external.mdx
index 42819a4ac..005b85647 100644
--- a/docs/docs/concepts/apis/external.mdx
+++ b/docs/docs/concepts/apis/external.mdx
@@ -58,7 +58,7 @@ External APIs must expose a `available_providers()` function in their module tha
 
 ```python
 # llama_stack_api_weather/api.py
-from llama_stack.providers.datatypes import Api, InlineProviderSpec, ProviderSpec
+from llama_stack_api import Api, InlineProviderSpec, ProviderSpec
 
 
 def available_providers() -> list[ProviderSpec]:
@@ -79,7 +79,7 @@ A Protocol class like so:
 # llama_stack_api_weather/api.py
 from typing import Protocol
 
-from llama_stack.schema_utils import webmethod
+from llama_stack_api import webmethod
 
 
 class WeatherAPI(Protocol):
@@ -151,13 +151,12 @@ __all__ = ["WeatherAPI", "available_providers"]
 # llama-stack-api-weather/src/llama_stack_api_weather/weather.py
 from typing import Protocol
 
-from llama_stack.providers.datatypes import (
+from llama_stack_api import (
     Api,
     ProviderSpec,
     RemoteProviderSpec,
+    webmethod,
 )
-from llama_stack.schema_utils import webmethod
-
 
 def available_providers() -> list[ProviderSpec]:
     return [
diff --git a/docs/docs/distributions/building_distro.mdx b/docs/docs/distributions/building_distro.mdx
index c4a01bf7d..532ffaaf0 100644
--- a/docs/docs/distributions/building_distro.mdx
+++ b/docs/docs/distributions/building_distro.mdx
@@ -65,7 +65,7 @@ external_providers_dir: /workspace/providers.d
 Inside `providers.d/custom_ollama/provider.py`, define `get_provider_spec()` so the CLI can discover dependencies:
 
 ```python
-from llama_stack.providers.datatypes import ProviderSpec
+from llama_stack_api.providers.datatypes import ProviderSpec
 
 
 def get_provider_spec() -> ProviderSpec:
diff --git a/docs/docs/providers/external/external-providers-guide.mdx b/docs/docs/providers/external/external-providers-guide.mdx
index 748fd62c0..dc813c75b 100644
--- a/docs/docs/providers/external/external-providers-guide.mdx
+++ b/docs/docs/providers/external/external-providers-guide.mdx
@@ -80,7 +80,7 @@ container_image: custom-vector-store:latest  # optional
 All providers must contain a `get_provider_spec` function in their `provider` module. This is a standardized structure that Llama Stack expects and is necessary for getting things such as the config class. The `get_provider_spec` method returns a structure identical to the `adapter`. An example function may look like:
 
 ```python
-from llama_stack.providers.datatypes import (
+from llama_stack_api.providers.datatypes import (
     ProviderSpec,
     Api,
     RemoteProviderSpec,
diff --git a/docs/docs/providers/vector_io/inline_sqlite-vec.mdx b/docs/docs/providers/vector_io/inline_sqlite-vec.mdx
index bfa2f29de..45631dff3 100644
--- a/docs/docs/providers/vector_io/inline_sqlite-vec.mdx
+++ b/docs/docs/providers/vector_io/inline_sqlite-vec.mdx
@@ -153,7 +153,7 @@ description: |
   Example using RAGQueryConfig with different search modes:
 
   ```python
-  from llama_stack.apis.tools import RAGQueryConfig, RRFRanker, WeightedRanker
+  from llama_stack_api import RAGQueryConfig, RRFRanker, WeightedRanker
 
   # Vector search
   config = RAGQueryConfig(mode="vector", max_chunks=5)
@@ -358,7 +358,7 @@ Two ranker types are supported:
 Example using RAGQueryConfig with different search modes:
 
 ```python
-from llama_stack.apis.tools import RAGQueryConfig, RRFRanker, WeightedRanker
+from llama_stack_api import RAGQueryConfig, RRFRanker, WeightedRanker
 
 # Vector search
 config = RAGQueryConfig(mode="vector", max_chunks=5)
diff --git a/docs/openapi_generator/generate.py b/docs/openapi_generator/generate.py
index 65720df4a..769db32a7 100644
--- a/docs/openapi_generator/generate.py
+++ b/docs/openapi_generator/generate.py
@@ -16,7 +16,7 @@ import sys
 import fire
 import ruamel.yaml as yaml
 
-from llama_stack.apis.version import LLAMA_STACK_API_V1 # noqa: E402
+from llama_stack_api import LLAMA_STACK_API_V1 # noqa: E402
 from llama_stack.core.stack import LlamaStack  # noqa: E402
 
 from .pyopenapi.options import Options  # noqa: E402
diff --git a/docs/openapi_generator/pyopenapi/generator.py b/docs/openapi_generator/pyopenapi/generator.py
index 30fc9038d..afbb5c710 100644
--- a/docs/openapi_generator/pyopenapi/generator.py
+++ b/docs/openapi_generator/pyopenapi/generator.py
@@ -16,27 +16,27 @@ from typing import Annotated, Any, Dict, get_args, get_origin, Set, Union
 
 from fastapi import UploadFile
 
-from llama_stack.apis.datatypes import Error
-from llama_stack.strong_typing.core import JsonType
-from llama_stack.strong_typing.docstring import Docstring, parse_type
-from llama_stack.strong_typing.inspection import (
+from llama_stack_api import (
+    Docstring,
+    Error,
+    JsonSchemaGenerator,
+    JsonType,
+    Schema,
+    SchemaOptions,
+    get_schema_identifier,
     is_generic_list,
     is_type_optional,
     is_type_union,
     is_unwrapped_body_param,
+    json_dump_string,
+    object_to_json,
+    parse_type,
+    python_type_to_name,
+    register_schema,
     unwrap_generic_list,
     unwrap_optional_type,
     unwrap_union_types,
 )
-from llama_stack.strong_typing.name import python_type_to_name
-from llama_stack.strong_typing.schema import (
-    get_schema_identifier,
-    JsonSchemaGenerator,
-    register_schema,
-    Schema,
-    SchemaOptions,
-)
-from llama_stack.strong_typing.serialization import json_dump_string, object_to_json
 from pydantic import BaseModel
 
 from .operations import (
diff --git a/docs/openapi_generator/pyopenapi/operations.py b/docs/openapi_generator/pyopenapi/operations.py
index a1c95c7a7..42a554f2c 100644
--- a/docs/openapi_generator/pyopenapi/operations.py
+++ b/docs/openapi_generator/pyopenapi/operations.py
@@ -11,19 +11,21 @@ import typing
 from dataclasses import dataclass
 from typing import Any, Callable, Dict, Iterable, Iterator, List, Optional, Tuple, Union
 
-from llama_stack.apis.version import LLAMA_STACK_API_V1, LLAMA_STACK_API_V1BETA, LLAMA_STACK_API_V1ALPHA
-
 from termcolor import colored
 
-from llama_stack.strong_typing.inspection import get_signature
-
 from typing import get_origin, get_args
 
 from fastapi import UploadFile
 from fastapi.params import File, Form
 from typing import Annotated
 
-from llama_stack.schema_utils import ExtraBodyField
+from llama_stack_api import (
+    ExtraBodyField,
+    LLAMA_STACK_API_V1,
+    LLAMA_STACK_API_V1ALPHA,
+    LLAMA_STACK_API_V1BETA,
+    get_signature,
+)
 
 
 def split_prefix(
diff --git a/docs/openapi_generator/pyopenapi/specification.py b/docs/openapi_generator/pyopenapi/specification.py
index 90bf54316..bfa35f539 100644
--- a/docs/openapi_generator/pyopenapi/specification.py
+++ b/docs/openapi_generator/pyopenapi/specification.py
@@ -9,7 +9,7 @@ import enum
 from dataclasses import dataclass
 from typing import Any, ClassVar, Dict, List, Optional, Union
 
-from llama_stack.strong_typing.schema import JsonType, Schema, StrictJsonType
+from llama_stack_api import JsonType, Schema, StrictJsonType
 
 URL = str
 
diff --git a/docs/openapi_generator/pyopenapi/utility.py b/docs/openapi_generator/pyopenapi/utility.py
index c1425b250..762249eb8 100644
--- a/docs/openapi_generator/pyopenapi/utility.py
+++ b/docs/openapi_generator/pyopenapi/utility.py
@@ -11,8 +11,7 @@ from pathlib import Path
 from typing import Any, List, Optional, TextIO, Union, get_type_hints, get_origin, get_args
 
 from pydantic import BaseModel
-from llama_stack.strong_typing.schema import object_to_json, StrictJsonType
-from llama_stack.strong_typing.inspection import is_unwrapped_body_param
+from llama_stack_api import StrictJsonType, is_unwrapped_body_param, object_to_json
 from llama_stack.core.resolver import api_protocol_map
 
 from .generator import Generator
@@ -165,12 +164,12 @@ def _validate_api_delete_method_returns_none(method) -> str | None:
         return "has no return type annotation"
 
     return_type = hints['return']
-    
+
     # Allow OpenAI endpoints to return response objects since they follow OpenAI specification
     method_name = getattr(method, '__name__', '')
     if method_name.__contains__('openai_'):
         return None
-    
+
     if return_type is not None and return_type is not type(None):
         return "does not return None where None is mandatory"
 
diff --git a/pyproject.toml b/pyproject.toml
index d12d28e8c..d287b4be7 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -31,6 +31,7 @@ dependencies = [
     "httpx",
     "jinja2>=3.1.6",
     "jsonschema",
+    "llama-stack-api",  # API and provider specifications (local dev via tool.uv.sources)
     "openai>=2.5.0",
     "prompt-toolkit",
     "python-dotenv",
@@ -180,7 +181,7 @@ install-wheel-from-presigned = "llama_stack.cli.scripts.run:install_wheel_from_p
 
 [tool.setuptools.packages.find]
 where = ["src"]
-include = ["llama_stack", "llama_stack.*"]
+include = ["llama_stack", "llama_stack.*", "llama-stack-api", "llama-stack-api.*"]
 
 [[tool.uv.index]]
 name = "pytorch-cpu"
@@ -190,6 +191,7 @@ explicit = true
 [tool.uv.sources]
 torch = [{ index = "pytorch-cpu" }]
 torchvision = [{ index = "pytorch-cpu" }]
+llama-stack-api = [{ path = "src/llama-stack-api", editable = true }]
 
 [tool.ruff]
 line-length = 120
@@ -256,8 +258,8 @@ unfixable = [
 ] # Using import * is acceptable (or at least tolerated) in an __init__.py of a package API
 
 [tool.mypy]
-mypy_path = ["src"]
-packages = ["llama_stack"]
+mypy_path = ["src", "src/llama-stack-api"]
+packages = ["llama_stack", "llama_stack_api"]
 plugins = ['pydantic.mypy']
 disable_error_code = []
 warn_return_any = true
@@ -279,15 +281,18 @@ exclude = [
     "^src/llama_stack/core/store/registry\\.py$",
     "^src/llama_stack/core/utils/exec\\.py$",
     "^src/llama_stack/core/utils/prompt_for_config\\.py$",
+    # Moved to llama-stack-api but still excluded
     "^src/llama_stack/models/llama/llama3/interface\\.py$",
     "^src/llama_stack/models/llama/llama3/tokenizer\\.py$",
     "^src/llama_stack/models/llama/llama3/tool_utils\\.py$",
-    "^src/llama_stack/providers/inline/datasetio/localfs/",
-    "^src/llama_stack/providers/inline/eval/meta_reference/eval\\.py$",
-    "^src/llama_stack/providers/inline/inference/meta_reference/inference\\.py$",
     "^src/llama_stack/models/llama/llama3/generation\\.py$",
     "^src/llama_stack/models/llama/llama3/multimodal/model\\.py$",
     "^src/llama_stack/models/llama/llama4/",
+    "^src/llama-stack-api/llama_stack_api/core/telemetry/telemetry\\.py$",
+    "^src/llama_stack/providers/inline/agents/meta_reference/",
+    "^src/llama_stack/providers/inline/datasetio/localfs/",
+    "^src/llama_stack/providers/inline/eval/meta_reference/eval\\.py$",
+    "^src/llama_stack/providers/inline/inference/meta_reference/inference\\.py$",
     "^src/llama_stack/providers/inline/inference/sentence_transformers/sentence_transformers\\.py$",
     "^src/llama_stack/providers/inline/post_training/common/validator\\.py$",
     "^src/llama_stack/providers/inline/safety/code_scanner/",
@@ -337,7 +342,9 @@ exclude = [
     "^src/llama_stack/providers/utils/telemetry/dataset_mixin\\.py$",
     "^src/llama_stack/providers/utils/telemetry/trace_protocol\\.py$",
     "^src/llama_stack/providers/utils/telemetry/tracing\\.py$",
-    "^src/llama_stack/strong_typing/auxiliary\\.py$",
+    "^src/llama-stack-api/llama_stack_api/core/telemetry/trace_protocol\\.py$",
+    "^src/llama-stack-api/llama_stack_api/core/telemetry/tracing\\.py$",
+    "^src/llama-stack-api/llama_stack_api/strong_typing/auxiliary\\.py$",
     "^src/llama_stack/distributions/template\\.py$",
 ]
 
diff --git a/scripts/generate_prompt_format.py b/scripts/generate_prompt_format.py
index 855033f95..8099a3f0d 100755
--- a/scripts/generate_prompt_format.py
+++ b/scripts/generate_prompt_format.py
@@ -14,8 +14,8 @@ import os
 from pathlib import Path
 
 import fire
+from llama_stack_api import ModelNotFoundError
 
-from llama_stack.apis.common.errors import ModelNotFoundError
 from llama_stack.models.llama.llama3.generation import Llama3
 from llama_stack.models.llama.llama4.generation import Llama4
 from llama_stack.models.llama.sku_list import resolve_model
diff --git a/scripts/provider_codegen.py b/scripts/provider_codegen.py
index de79b4d17..d62d626ad 100755
--- a/scripts/provider_codegen.py
+++ b/scripts/provider_codegen.py
@@ -22,7 +22,7 @@ def get_api_docstring(api_name: str) -> str | None:
     """Extract docstring from the API protocol class."""
     try:
         # Import the API module dynamically
-        api_module = __import__(f"llama_stack.apis.{api_name}", fromlist=[api_name.title()])
+        api_module = __import__(f"llama_stack_api.{api_name}", fromlist=[api_name.title()])
 
         # Get the main protocol class (usually capitalized API name)
         protocol_class_name = api_name.title()
@@ -83,8 +83,9 @@ def get_config_class_info(config_class_path: str) -> dict[str, Any]:
                 # this string replace is ridiculous
                 field_type = field_type.replace("typing.", "").replace("Optional[", "").replace("]", "")
                 field_type = field_type.replace("Annotated[", "").replace("FieldInfo(", "").replace(")", "")
-                field_type = field_type.replace("llama_stack.apis.inference.inference.", "")
+                field_type = field_type.replace("llama_stack_api.inference.", "")
                 field_type = field_type.replace("llama_stack.providers.", "")
+                field_type = field_type.replace("llama_stack_api.datatypes.", "")
 
                 default_value = field.default
                 if field.default_factory is not None:
diff --git a/src/llama-stack-api/README.md b/src/llama-stack-api/README.md
new file mode 100644
index 000000000..aa6b05722
--- /dev/null
+++ b/src/llama-stack-api/README.md
@@ -0,0 +1,103 @@
+# llama-stack-api
+
+API and Provider specifications for Llama Stack - a lightweight package with protocol definitions and provider specs.
+
+## Overview
+
+`llama-stack-api` is a minimal dependency package that contains:
+
+- **API Protocol Definitions**: Type-safe protocol definitions for all Llama Stack APIs (inference, agents, safety, etc.)
+- **Provider Specifications**: Provider spec definitions for building custom providers
+- **Data Types**: Shared data types and models used across the Llama Stack ecosystem
+- **Type Utilities**: Strong typing utilities and schema validation
+
+## What This Package Does NOT Include
+
+- Server implementation (see `llama-stack` package)
+- Provider implementations (see `llama-stack` package)
+- CLI tools (see `llama-stack` package)
+- Runtime orchestration (see `llama-stack` package)
+
+## Use Cases
+
+This package is designed for:
+
+1. **Third-party Provider Developers**: Build custom providers without depending on the full Llama Stack server
+2. **Client Library Authors**: Use type definitions without server dependencies
+3. **Documentation Generation**: Generate API docs from protocol definitions
+4. **Type Checking**: Validate implementations against the official specs
+
+## Installation
+
+```bash
+pip install llama-stack-api
+```
+
+Or with uv:
+
+```bash
+uv pip install llama-stack-api
+```
+
+## Dependencies
+
+Minimal dependencies:
+- `pydantic>=2.11.9` - For data validation and serialization
+- `jsonschema` - For JSON schema utilities
+
+## Versioning
+
+This package follows semantic versioning independently from the main `llama-stack` package:
+
+- **Patch versions** (0.1.x): Documentation, internal improvements
+- **Minor versions** (0.x.0): New APIs, backward-compatible changes
+- **Major versions** (x.0.0): Breaking changes to existing APIs
+
+Current version: **0.1.0**
+
+## Usage Example
+
+```python
+from llama_stack_api.inference import Inference, ChatCompletionRequest
+from llama_stack_api.providers.datatypes import ProviderSpec, InlineProviderSpec
+from llama_stack_api.datatypes import Api
+
+
+# Use protocol definitions for type checking
+class MyInferenceProvider(Inference):
+    async def chat_completion(self, request: ChatCompletionRequest):
+        # Your implementation
+        pass
+
+
+# Define provider specifications
+my_provider_spec = InlineProviderSpec(
+    api=Api.inference,
+    provider_type="inline::my-provider",
+    pip_packages=["my-dependencies"],
+    module="my_package.providers.inference",
+    config_class="my_package.providers.inference.MyConfig",
+)
+```
+
+## Relationship to llama-stack
+
+The main `llama-stack` package depends on `llama-stack-api` and provides:
+- Full server implementation
+- Built-in provider implementations
+- CLI tools for running and managing stacks
+- Runtime provider resolution and orchestration
+
+## Contributing
+
+See the main [Llama Stack repository](https://github.com/llamastack/llama-stack) for contribution guidelines.
+
+## License
+
+MIT License - see LICENSE file for details.
+
+## Links
+
+- [Main Llama Stack Repository](https://github.com/llamastack/llama-stack)
+- [Documentation](https://llamastack.ai/)
+- [Client Library](https://pypi.org/project/llama-stack-client/)
diff --git a/src/llama-stack-api/llama_stack_api/__init__.py b/src/llama-stack-api/llama_stack_api/__init__.py
new file mode 100644
index 000000000..8bbe9f8bd
--- /dev/null
+++ b/src/llama-stack-api/llama_stack_api/__init__.py
@@ -0,0 +1,871 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+"""
+Llama Stack API Specifications
+
+This package contains the API definitions, data types, and protocol specifications
+for Llama Stack. It is designed to be a lightweight dependency for external providers
+and clients that need to interact with Llama Stack APIs without requiring the full
+server implementation.
+
+All imports from this package MUST use the form:
+    from llama_stack_api import <symbol>
+
+Sub-module imports (e.g., from llama_stack_api.agents import Agents) are NOT supported
+and considered a code smell. All exported symbols are explicitly listed in __all__.
+"""
+
+__version__ = "0.4.0"
+
+# Import submodules for those who need them
+from . import common, strong_typing  # noqa: F401
+
+# Import all public API symbols
+from .agents import Agents, ResponseGuardrail, ResponseGuardrailSpec
+from .batches import Batches, BatchObject, ListBatchesResponse
+from .benchmarks import (
+    Benchmark,
+    BenchmarkInput,
+    Benchmarks,
+    CommonBenchmarkFields,
+    ListBenchmarksResponse,
+)
+
+# Import commonly used types from common submodule
+from .common.content_types import (
+    URL,
+    ImageContentItem,
+    InterleavedContent,
+    InterleavedContentItem,
+    TextContentItem,
+    _URLOrData,
+)
+from .common.errors import (
+    ConflictError,
+    DatasetNotFoundError,
+    InvalidConversationIdError,
+    ModelNotFoundError,
+    ModelTypeError,
+    ResourceNotFoundError,
+    TokenValidationError,
+    ToolGroupNotFoundError,
+    UnsupportedModelError,
+    VectorStoreNotFoundError,
+)
+from .common.job_types import Job, JobStatus
+from .common.responses import Order, PaginatedResponse
+from .common.training_types import Checkpoint, PostTrainingMetric
+from .common.type_system import (
+    ChatCompletionInputType,
+    CompletionInputType,
+    NumberType,
+    ParamType,
+    StringType,
+)
+from .conversations import (
+    Conversation,
+    ConversationDeletedResource,
+    ConversationItem,
+    ConversationItemCreateRequest,
+    ConversationItemDeletedResource,
+    ConversationItemInclude,
+    ConversationItemList,
+    ConversationMessage,
+    Conversations,
+    Metadata,
+)
+from .datasetio import DatasetIO, DatasetStore
+from .datasets import (
+    CommonDatasetFields,
+    Dataset,
+    DatasetInput,
+    DatasetPurpose,
+    Datasets,
+    DatasetType,
+    DataSource,
+    ListDatasetsResponse,
+    RowsDataSource,
+    URIDataSource,
+)
+from .datatypes import (
+    Api,
+    BenchmarksProtocolPrivate,
+    DatasetsProtocolPrivate,
+    DynamicApiMeta,
+    Error,
+    ExternalApiSpec,
+    HealthResponse,
+    HealthStatus,
+    InlineProviderSpec,
+    ModelsProtocolPrivate,
+    ProviderSpec,
+    RemoteProviderConfig,
+    RemoteProviderSpec,
+    RoutingTable,
+    ScoringFunctionsProtocolPrivate,
+    ShieldsProtocolPrivate,
+    ToolGroupsProtocolPrivate,
+    VectorStoresProtocolPrivate,
+)
+from .eval import BenchmarkConfig, Eval, EvalCandidate, EvaluateResponse, ModelCandidate
+from .files import (
+    ExpiresAfter,
+    Files,
+    ListOpenAIFileResponse,
+    OpenAIFileDeleteResponse,
+    OpenAIFileObject,
+    OpenAIFilePurpose,
+)
+from .inference import (
+    Bf16QuantizationConfig,
+    ChatCompletionResponseEventType,
+    CompletionRequest,
+    EmbeddingsResponse,
+    EmbeddingTaskType,
+    Fp8QuantizationConfig,
+    GrammarResponseFormat,
+    GreedySamplingStrategy,
+    Inference,
+    InferenceProvider,
+    Int4QuantizationConfig,
+    JsonSchemaResponseFormat,
+    ListOpenAIChatCompletionResponse,
+    LogProbConfig,
+    ModelStore,
+    OpenAIAssistantMessageParam,
+    OpenAIChatCompletion,
+    OpenAIChatCompletionChunk,
+    OpenAIChatCompletionContentPartImageParam,
+    OpenAIChatCompletionContentPartParam,
+    OpenAIChatCompletionContentPartTextParam,
+    OpenAIChatCompletionMessageContent,
+    OpenAIChatCompletionRequestWithExtraBody,
+    OpenAIChatCompletionTextOnlyMessageContent,
+    OpenAIChatCompletionToolCall,
+    OpenAIChatCompletionToolCallFunction,
+    OpenAIChatCompletionUsage,
+    OpenAIChatCompletionUsageCompletionTokensDetails,
+    OpenAIChatCompletionUsagePromptTokensDetails,
+    OpenAIChoice,
+    OpenAIChoiceDelta,
+    OpenAIChoiceLogprobs,
+    OpenAIChunkChoice,
+    OpenAICompletion,
+    OpenAICompletionChoice,
+    OpenAICompletionLogprobs,
+    OpenAICompletionRequestWithExtraBody,
+    OpenAICompletionWithInputMessages,
+    OpenAIDeveloperMessageParam,
+    OpenAIEmbeddingData,
+    OpenAIEmbeddingsRequestWithExtraBody,
+    OpenAIEmbeddingsResponse,
+    OpenAIEmbeddingUsage,
+    OpenAIFile,
+    OpenAIFileFile,
+    OpenAIImageURL,
+    OpenAIJSONSchema,
+    OpenAIMessageParam,
+    OpenAIResponseFormatJSONObject,
+    OpenAIResponseFormatJSONSchema,
+    OpenAIResponseFormatParam,
+    OpenAIResponseFormatText,
+    OpenAISystemMessageParam,
+    OpenAITokenLogProb,
+    OpenAIToolMessageParam,
+    OpenAITopLogProb,
+    OpenAIUserMessageParam,
+    QuantizationConfig,
+    QuantizationType,
+    RerankData,
+    RerankResponse,
+    ResponseFormat,
+    ResponseFormatType,
+    SamplingParams,
+    SamplingStrategy,
+    SystemMessage,
+    SystemMessageBehavior,
+    TextTruncation,
+    TokenLogProbs,
+    ToolChoice,
+    ToolResponseMessage,
+    TopKSamplingStrategy,
+    TopPSamplingStrategy,
+    UserMessage,
+)
+from .inspect import (
+    ApiFilter,
+    HealthInfo,
+    Inspect,
+    ListRoutesResponse,
+    RouteInfo,
+    VersionInfo,
+)
+from .models import (
+    CommonModelFields,
+    ListModelsResponse,
+    Model,
+    ModelInput,
+    Models,
+    ModelType,
+    OpenAIListModelsResponse,
+    OpenAIModel,
+)
+from .openai_responses import (
+    AllowedToolsFilter,
+    ApprovalFilter,
+    ListOpenAIResponseInputItem,
+    ListOpenAIResponseObject,
+    MCPListToolsTool,
+    OpenAIDeleteResponseObject,
+    OpenAIResponseAnnotationCitation,
+    OpenAIResponseAnnotationContainerFileCitation,
+    OpenAIResponseAnnotationFileCitation,
+    OpenAIResponseAnnotationFilePath,
+    OpenAIResponseAnnotations,
+    OpenAIResponseContentPart,
+    OpenAIResponseContentPartOutputText,
+    OpenAIResponseContentPartReasoningSummary,
+    OpenAIResponseContentPartReasoningText,
+    OpenAIResponseContentPartRefusal,
+    OpenAIResponseError,
+    OpenAIResponseInput,
+    OpenAIResponseInputFunctionToolCallOutput,
+    OpenAIResponseInputMessageContent,
+    OpenAIResponseInputMessageContentFile,
+    OpenAIResponseInputMessageContentImage,
+    OpenAIResponseInputMessageContentText,
+    OpenAIResponseInputTool,
+    OpenAIResponseInputToolFileSearch,
+    OpenAIResponseInputToolFunction,
+    OpenAIResponseInputToolMCP,
+    OpenAIResponseInputToolWebSearch,
+    OpenAIResponseMCPApprovalRequest,
+    OpenAIResponseMCPApprovalResponse,
+    OpenAIResponseMessage,
+    OpenAIResponseObject,
+    OpenAIResponseObjectStream,
+    OpenAIResponseObjectStreamResponseCompleted,
+    OpenAIResponseObjectStreamResponseContentPartAdded,
+    OpenAIResponseObjectStreamResponseContentPartDone,
+    OpenAIResponseObjectStreamResponseCreated,
+    OpenAIResponseObjectStreamResponseFailed,
+    OpenAIResponseObjectStreamResponseFileSearchCallCompleted,
+    OpenAIResponseObjectStreamResponseFileSearchCallInProgress,
+    OpenAIResponseObjectStreamResponseFileSearchCallSearching,
+    OpenAIResponseObjectStreamResponseFunctionCallArgumentsDelta,
+    OpenAIResponseObjectStreamResponseFunctionCallArgumentsDone,
+    OpenAIResponseObjectStreamResponseIncomplete,
+    OpenAIResponseObjectStreamResponseInProgress,
+    OpenAIResponseObjectStreamResponseMcpCallArgumentsDelta,
+    OpenAIResponseObjectStreamResponseMcpCallArgumentsDone,
+    OpenAIResponseObjectStreamResponseMcpCallCompleted,
+    OpenAIResponseObjectStreamResponseMcpCallFailed,
+    OpenAIResponseObjectStreamResponseMcpCallInProgress,
+    OpenAIResponseObjectStreamResponseMcpListToolsCompleted,
+    OpenAIResponseObjectStreamResponseMcpListToolsFailed,
+    OpenAIResponseObjectStreamResponseMcpListToolsInProgress,
+    OpenAIResponseObjectStreamResponseOutputItemAdded,
+    OpenAIResponseObjectStreamResponseOutputItemDone,
+    OpenAIResponseObjectStreamResponseOutputTextAnnotationAdded,
+    OpenAIResponseObjectStreamResponseOutputTextDelta,
+    OpenAIResponseObjectStreamResponseOutputTextDone,
+    OpenAIResponseObjectStreamResponseReasoningSummaryPartAdded,
+    OpenAIResponseObjectStreamResponseReasoningSummaryPartDone,
+    OpenAIResponseObjectStreamResponseReasoningSummaryTextDelta,
+    OpenAIResponseObjectStreamResponseReasoningSummaryTextDone,
+    OpenAIResponseObjectStreamResponseReasoningTextDelta,
+    OpenAIResponseObjectStreamResponseReasoningTextDone,
+    OpenAIResponseObjectStreamResponseRefusalDelta,
+    OpenAIResponseObjectStreamResponseRefusalDone,
+    OpenAIResponseObjectStreamResponseWebSearchCallCompleted,
+    OpenAIResponseObjectStreamResponseWebSearchCallInProgress,
+    OpenAIResponseObjectStreamResponseWebSearchCallSearching,
+    OpenAIResponseObjectWithInput,
+    OpenAIResponseOutput,
+    OpenAIResponseOutputMessageContent,
+    OpenAIResponseOutputMessageContentOutputText,
+    OpenAIResponseOutputMessageFileSearchToolCall,
+    OpenAIResponseOutputMessageFileSearchToolCallResults,
+    OpenAIResponseOutputMessageFunctionToolCall,
+    OpenAIResponseOutputMessageMCPCall,
+    OpenAIResponseOutputMessageMCPListTools,
+    OpenAIResponseOutputMessageWebSearchToolCall,
+    OpenAIResponsePrompt,
+    OpenAIResponseText,
+    OpenAIResponseTextFormat,
+    OpenAIResponseTool,
+    OpenAIResponseToolMCP,
+    OpenAIResponseUsage,
+    OpenAIResponseUsageInputTokensDetails,
+    OpenAIResponseUsageOutputTokensDetails,
+    WebSearchToolTypes,
+)
+from .post_training import (
+    AlgorithmConfig,
+    DataConfig,
+    DatasetFormat,
+    DPOAlignmentConfig,
+    DPOLossType,
+    EfficiencyConfig,
+    ListPostTrainingJobsResponse,
+    LoraFinetuningConfig,
+    OptimizerConfig,
+    OptimizerType,
+    PostTraining,
+    PostTrainingJob,
+    PostTrainingJobArtifactsResponse,
+    PostTrainingJobLogStream,
+    PostTrainingJobStatusResponse,
+    PostTrainingRLHFRequest,
+    QATFinetuningConfig,
+    RLHFAlgorithm,
+    TrainingConfig,
+)
+from .prompts import ListPromptsResponse, Prompt, Prompts
+from .providers import ListProvidersResponse, ProviderInfo, Providers
+from .rag_tool import (
+    DefaultRAGQueryGeneratorConfig,
+    LLMRAGQueryGeneratorConfig,
+    RAGDocument,
+    RAGQueryConfig,
+    RAGQueryGenerator,
+    RAGQueryGeneratorConfig,
+    RAGQueryResult,
+    RAGSearchMode,
+    Ranker,
+    RRFRanker,
+    WeightedRanker,
+)
+from .resource import Resource, ResourceType
+from .safety import (
+    ModerationObject,
+    ModerationObjectResults,
+    RunShieldResponse,
+    Safety,
+    SafetyViolation,
+    ShieldStore,
+    ViolationLevel,
+)
+from .schema_utils import (
+    CallableT,
+    ExtraBodyField,
+    WebMethod,
+    json_schema_type,
+    register_schema,
+    webmethod,
+)
+from .scoring import (
+    ScoreBatchResponse,
+    ScoreResponse,
+    Scoring,
+    ScoringFunctionStore,
+    ScoringResult,
+    ScoringResultRow,
+)
+from .scoring_functions import (
+    AggregationFunctionType,
+    BasicScoringFnParams,
+    CommonScoringFnFields,
+    ListScoringFunctionsResponse,
+    LLMAsJudgeScoringFnParams,
+    RegexParserScoringFnParams,
+    ScoringFn,
+    ScoringFnInput,
+    ScoringFnParams,
+    ScoringFnParamsType,
+    ScoringFunctions,
+)
+from .shields import (
+    CommonShieldFields,
+    ListShieldsResponse,
+    Shield,
+    ShieldInput,
+    Shields,
+)
+
+# Import from strong_typing
+from .strong_typing.core import JsonType
+from .strong_typing.docstring import Docstring, parse_type
+from .strong_typing.inspection import (
+    get_signature,
+    is_generic_list,
+    is_type_optional,
+    is_type_union,
+    is_unwrapped_body_param,
+    unwrap_generic_list,
+    unwrap_optional_type,
+    unwrap_union_types,
+)
+from .strong_typing.name import python_type_to_name
+from .strong_typing.schema import (
+    JsonSchemaGenerator,
+    Schema,
+    SchemaOptions,
+    StrictJsonType,
+    get_schema_identifier,
+)
+from .strong_typing.serialization import json_dump_string, object_to_json
+from .tools import (
+    ListToolDefsResponse,
+    ListToolGroupsResponse,
+    SpecialToolGroup,
+    ToolDef,
+    ToolGroup,
+    ToolGroupInput,
+    ToolGroups,
+    ToolInvocationResult,
+    ToolRuntime,
+    ToolStore,
+)
+from .vector_io import (
+    Chunk,
+    ChunkMetadata,
+    OpenAICreateVectorStoreFileBatchRequestWithExtraBody,
+    OpenAICreateVectorStoreRequestWithExtraBody,
+    QueryChunksResponse,
+    SearchRankingOptions,
+    VectorIO,
+    VectorStoreChunkingStrategy,
+    VectorStoreChunkingStrategyAuto,
+    VectorStoreChunkingStrategyStatic,
+    VectorStoreChunkingStrategyStaticConfig,
+    VectorStoreContent,
+    VectorStoreCreateRequest,
+    VectorStoreDeleteResponse,
+    VectorStoreFileBatchObject,
+    VectorStoreFileContentResponse,
+    VectorStoreFileCounts,
+    VectorStoreFileDeleteResponse,
+    VectorStoreFileLastError,
+    VectorStoreFileObject,
+    VectorStoreFilesListInBatchResponse,
+    VectorStoreFileStatus,
+    VectorStoreListFilesResponse,
+    VectorStoreListResponse,
+    VectorStoreModifyRequest,
+    VectorStoreObject,
+    VectorStoreSearchRequest,
+    VectorStoreSearchResponse,
+    VectorStoreSearchResponsePage,
+    VectorStoreTable,
+)
+from .vector_stores import VectorStore, VectorStoreInput
+from .version import (
+    LLAMA_STACK_API_V1,
+    LLAMA_STACK_API_V1ALPHA,
+    LLAMA_STACK_API_V1BETA,
+)
+
+__all__ = [
+    # Submodules
+    "common",
+    "strong_typing",
+    # Version constants
+    "LLAMA_STACK_API_V1",
+    "LLAMA_STACK_API_V1ALPHA",
+    "LLAMA_STACK_API_V1BETA",
+    # API Symbols
+    "Agents",
+    "AggregationFunctionType",
+    "AlgorithmConfig",
+    "AllowedToolsFilter",
+    "Api",
+    "ApiFilter",
+    "ApprovalFilter",
+    "BasicScoringFnParams",
+    "Batches",
+    "BatchObject",
+    "Benchmark",
+    "BenchmarkConfig",
+    "BenchmarkInput",
+    "Benchmarks",
+    "BenchmarksProtocolPrivate",
+    "Bf16QuantizationConfig",
+    "CallableT",
+    "ChatCompletionInputType",
+    "ChatCompletionResponseEventType",
+    "Checkpoint",
+    "Chunk",
+    "ChunkMetadata",
+    "CommonBenchmarkFields",
+    "ConflictError",
+    "CommonDatasetFields",
+    "CommonModelFields",
+    "CommonScoringFnFields",
+    "CommonShieldFields",
+    "CompletionInputType",
+    "CompletionRequest",
+    "Conversation",
+    "ConversationDeletedResource",
+    "ConversationItem",
+    "ConversationItemCreateRequest",
+    "ConversationItemDeletedResource",
+    "ConversationItemInclude",
+    "ConversationItemList",
+    "ConversationMessage",
+    "Conversations",
+    "DPOAlignmentConfig",
+    "DPOLossType",
+    "DataConfig",
+    "DataSource",
+    "Dataset",
+    "DatasetFormat",
+    "DatasetIO",
+    "DatasetInput",
+    "DatasetPurpose",
+    "DatasetNotFoundError",
+    "DatasetStore",
+    "DatasetType",
+    "Datasets",
+    "DatasetsProtocolPrivate",
+    "DefaultRAGQueryGeneratorConfig",
+    "Docstring",
+    "DynamicApiMeta",
+    "EfficiencyConfig",
+    "EmbeddingTaskType",
+    "EmbeddingsResponse",
+    "Error",
+    "Eval",
+    "EvalCandidate",
+    "EvaluateResponse",
+    "ExpiresAfter",
+    "ExternalApiSpec",
+    "ExtraBodyField",
+    "Files",
+    "Fp8QuantizationConfig",
+    "get_schema_identifier",
+    "get_signature",
+    "GrammarResponseFormat",
+    "GreedySamplingStrategy",
+    "HealthInfo",
+    "HealthResponse",
+    "HealthStatus",
+    "ImageContentItem",
+    "Inference",
+    "InferenceProvider",
+    "InlineProviderSpec",
+    "Inspect",
+    "Int4QuantizationConfig",
+    "InterleavedContent",
+    "InterleavedContentItem",
+    "InvalidConversationIdError",
+    "is_generic_list",
+    "is_type_optional",
+    "is_type_union",
+    "is_unwrapped_body_param",
+    "Job",
+    "JobStatus",
+    "json_dump_string",
+    "json_schema_type",
+    "JsonSchemaGenerator",
+    "JsonSchemaResponseFormat",
+    "JsonType",
+    "LLMAsJudgeScoringFnParams",
+    "LLMRAGQueryGeneratorConfig",
+    "ListBatchesResponse",
+    "ListBenchmarksResponse",
+    "ListDatasetsResponse",
+    "ListModelsResponse",
+    "ListOpenAIChatCompletionResponse",
+    "ListOpenAIFileResponse",
+    "ListOpenAIResponseInputItem",
+    "ListOpenAIResponseObject",
+    "ListPostTrainingJobsResponse",
+    "ListPromptsResponse",
+    "ListProvidersResponse",
+    "ListRoutesResponse",
+    "ListScoringFunctionsResponse",
+    "ListShieldsResponse",
+    "ListToolDefsResponse",
+    "ListToolGroupsResponse",
+    "LogProbConfig",
+    "LoraFinetuningConfig",
+    "MCPListToolsTool",
+    "Metadata",
+    "Model",
+    "ModelCandidate",
+    "ModelInput",
+    "ModelNotFoundError",
+    "ModelStore",
+    "ModelType",
+    "ModelTypeError",
+    "Models",
+    "ModelsProtocolPrivate",
+    "ModerationObject",
+    "ModerationObjectResults",
+    "NumberType",
+    "object_to_json",
+    "OpenAIAssistantMessageParam",
+    "OpenAIChatCompletion",
+    "OpenAIChatCompletionChunk",
+    "OpenAIChatCompletionContentPartImageParam",
+    "OpenAIChatCompletionContentPartParam",
+    "OpenAIChatCompletionContentPartTextParam",
+    "OpenAIChatCompletionMessageContent",
+    "OpenAIChatCompletionRequestWithExtraBody",
+    "OpenAIChatCompletionTextOnlyMessageContent",
+    "OpenAIChatCompletionToolCall",
+    "OpenAIChatCompletionToolCallFunction",
+    "OpenAIChatCompletionUsage",
+    "OpenAIChatCompletionUsageCompletionTokensDetails",
+    "OpenAIChatCompletionUsagePromptTokensDetails",
+    "OpenAIChoice",
+    "OpenAIChoiceDelta",
+    "OpenAIChoiceLogprobs",
+    "OpenAIChunkChoice",
+    "OpenAICompletion",
+    "OpenAICompletionChoice",
+    "OpenAICompletionLogprobs",
+    "OpenAICompletionRequestWithExtraBody",
+    "OpenAICompletionWithInputMessages",
+    "OpenAICreateVectorStoreFileBatchRequestWithExtraBody",
+    "OpenAICreateVectorStoreRequestWithExtraBody",
+    "OpenAIDeleteResponseObject",
+    "OpenAIDeveloperMessageParam",
+    "OpenAIEmbeddingData",
+    "OpenAIEmbeddingUsage",
+    "OpenAIEmbeddingsRequestWithExtraBody",
+    "OpenAIEmbeddingsResponse",
+    "OpenAIFile",
+    "OpenAIFileDeleteResponse",
+    "OpenAIFileFile",
+    "OpenAIFileObject",
+    "OpenAIFilePurpose",
+    "OpenAIImageURL",
+    "OpenAIJSONSchema",
+    "OpenAIListModelsResponse",
+    "OpenAIMessageParam",
+    "OpenAIModel",
+    "Order",
+    "OpenAIResponseAnnotationCitation",
+    "OpenAIResponseAnnotationContainerFileCitation",
+    "OpenAIResponseAnnotationFileCitation",
+    "OpenAIResponseAnnotationFilePath",
+    "OpenAIResponseAnnotations",
+    "OpenAIResponseContentPart",
+    "OpenAIResponseContentPartOutputText",
+    "OpenAIResponseContentPartReasoningSummary",
+    "OpenAIResponseContentPartReasoningText",
+    "OpenAIResponseContentPartRefusal",
+    "OpenAIResponseError",
+    "OpenAIResponseFormatJSONObject",
+    "OpenAIResponseFormatJSONSchema",
+    "OpenAIResponseFormatParam",
+    "OpenAIResponseFormatText",
+    "OpenAIResponseInput",
+    "OpenAIResponseInputFunctionToolCallOutput",
+    "OpenAIResponseInputMessageContent",
+    "OpenAIResponseInputMessageContentFile",
+    "OpenAIResponseInputMessageContentImage",
+    "OpenAIResponseInputMessageContentText",
+    "OpenAIResponseInputTool",
+    "OpenAIResponseInputToolFileSearch",
+    "OpenAIResponseInputToolFunction",
+    "OpenAIResponseInputToolMCP",
+    "OpenAIResponseInputToolWebSearch",
+    "OpenAIResponseMCPApprovalRequest",
+    "OpenAIResponseMCPApprovalResponse",
+    "OpenAIResponseMessage",
+    "OpenAIResponseObject",
+    "OpenAIResponseObjectStream",
+    "OpenAIResponseObjectStreamResponseCompleted",
+    "OpenAIResponseObjectStreamResponseContentPartAdded",
+    "OpenAIResponseObjectStreamResponseContentPartDone",
+    "OpenAIResponseObjectStreamResponseCreated",
+    "OpenAIResponseObjectStreamResponseFailed",
+    "OpenAIResponseObjectStreamResponseFileSearchCallCompleted",
+    "OpenAIResponseObjectStreamResponseFileSearchCallInProgress",
+    "OpenAIResponseObjectStreamResponseFileSearchCallSearching",
+    "OpenAIResponseObjectStreamResponseFunctionCallArgumentsDelta",
+    "OpenAIResponseObjectStreamResponseFunctionCallArgumentsDone",
+    "OpenAIResponseObjectStreamResponseInProgress",
+    "OpenAIResponseObjectStreamResponseIncomplete",
+    "OpenAIResponseObjectStreamResponseMcpCallArgumentsDelta",
+    "OpenAIResponseObjectStreamResponseMcpCallArgumentsDone",
+    "OpenAIResponseObjectStreamResponseMcpCallCompleted",
+    "OpenAIResponseObjectStreamResponseMcpCallFailed",
+    "OpenAIResponseObjectStreamResponseMcpCallInProgress",
+    "OpenAIResponseObjectStreamResponseMcpListToolsCompleted",
+    "OpenAIResponseObjectStreamResponseMcpListToolsFailed",
+    "OpenAIResponseObjectStreamResponseMcpListToolsInProgress",
+    "OpenAIResponseObjectStreamResponseOutputItemAdded",
+    "OpenAIResponseObjectStreamResponseOutputItemDone",
+    "OpenAIResponseObjectStreamResponseOutputTextAnnotationAdded",
+    "OpenAIResponseObjectStreamResponseOutputTextDelta",
+    "OpenAIResponseObjectStreamResponseOutputTextDone",
+    "OpenAIResponseObjectStreamResponseReasoningSummaryPartAdded",
+    "OpenAIResponseObjectStreamResponseReasoningSummaryPartDone",
+    "OpenAIResponseObjectStreamResponseReasoningSummaryTextDelta",
+    "OpenAIResponseObjectStreamResponseReasoningSummaryTextDone",
+    "OpenAIResponseObjectStreamResponseReasoningTextDelta",
+    "OpenAIResponseObjectStreamResponseReasoningTextDone",
+    "OpenAIResponseObjectStreamResponseRefusalDelta",
+    "OpenAIResponseObjectStreamResponseRefusalDone",
+    "OpenAIResponseObjectStreamResponseWebSearchCallCompleted",
+    "OpenAIResponseObjectStreamResponseWebSearchCallInProgress",
+    "OpenAIResponseObjectStreamResponseWebSearchCallSearching",
+    "OpenAIResponseObjectWithInput",
+    "OpenAIResponseOutput",
+    "OpenAIResponseOutputMessageContent",
+    "OpenAIResponseOutputMessageContentOutputText",
+    "OpenAIResponseOutputMessageFileSearchToolCall",
+    "OpenAIResponseOutputMessageFileSearchToolCallResults",
+    "OpenAIResponseOutputMessageFunctionToolCall",
+    "OpenAIResponseOutputMessageMCPCall",
+    "OpenAIResponseOutputMessageMCPListTools",
+    "OpenAIResponseOutputMessageWebSearchToolCall",
+    "OpenAIResponsePrompt",
+    "OpenAIResponseText",
+    "OpenAIResponseTextFormat",
+    "OpenAIResponseTool",
+    "OpenAIResponseToolMCP",
+    "OpenAIResponseUsage",
+    "OpenAIResponseUsageInputTokensDetails",
+    "OpenAIResponseUsageOutputTokensDetails",
+    "OpenAISystemMessageParam",
+    "OpenAITokenLogProb",
+    "OpenAIToolMessageParam",
+    "OpenAITopLogProb",
+    "OpenAIUserMessageParam",
+    "OptimizerConfig",
+    "OptimizerType",
+    "PaginatedResponse",
+    "ParamType",
+    "parse_type",
+    "PostTraining",
+    "PostTrainingMetric",
+    "PostTrainingJob",
+    "PostTrainingJobArtifactsResponse",
+    "PostTrainingJobLogStream",
+    "PostTrainingJobStatusResponse",
+    "PostTrainingRLHFRequest",
+    "Prompt",
+    "Prompts",
+    "ProviderInfo",
+    "ProviderSpec",
+    "Providers",
+    "python_type_to_name",
+    "QATFinetuningConfig",
+    "QuantizationConfig",
+    "QuantizationType",
+    "QueryChunksResponse",
+    "RAGDocument",
+    "RAGQueryConfig",
+    "RAGQueryGenerator",
+    "RAGQueryGeneratorConfig",
+    "RAGQueryResult",
+    "RAGSearchMode",
+    "register_schema",
+    "RLHFAlgorithm",
+    "RRFRanker",
+    "Ranker",
+    "RegexParserScoringFnParams",
+    "RemoteProviderConfig",
+    "RemoteProviderSpec",
+    "RerankData",
+    "RerankResponse",
+    "Resource",
+    "ResourceNotFoundError",
+    "ResourceType",
+    "ResponseFormat",
+    "ResponseFormatType",
+    "ResponseGuardrail",
+    "ResponseGuardrailSpec",
+    "RouteInfo",
+    "RoutingTable",
+    "RowsDataSource",
+    "RunShieldResponse",
+    "Safety",
+    "SafetyViolation",
+    "SamplingParams",
+    "SamplingStrategy",
+    "ScoreBatchResponse",
+    "ScoreResponse",
+    "Scoring",
+    "ScoringFn",
+    "ScoringFnInput",
+    "ScoringFnParams",
+    "ScoringFnParamsType",
+    "ScoringFunctionStore",
+    "ScoringFunctions",
+    "ScoringFunctionsProtocolPrivate",
+    "ScoringResult",
+    "ScoringResultRow",
+    "Schema",
+    "SchemaOptions",
+    "SearchRankingOptions",
+    "Shield",
+    "ShieldInput",
+    "ShieldStore",
+    "Shields",
+    "ShieldsProtocolPrivate",
+    "SpecialToolGroup",
+    "StrictJsonType",
+    "StringType",
+    "SystemMessage",
+    "SystemMessageBehavior",
+    "TextContentItem",
+    "TextTruncation",
+    "TokenLogProbs",
+    "TokenValidationError",
+    "ToolChoice",
+    "ToolGroupNotFoundError",
+    "ToolDef",
+    "ToolGroup",
+    "ToolGroupInput",
+    "ToolGroups",
+    "ToolGroupsProtocolPrivate",
+    "ToolInvocationResult",
+    "ToolResponseMessage",
+    "ToolRuntime",
+    "ToolStore",
+    "TopKSamplingStrategy",
+    "TopPSamplingStrategy",
+    "TrainingConfig",
+    "UnsupportedModelError",
+    "unwrap_generic_list",
+    "unwrap_optional_type",
+    "unwrap_union_types",
+    "URIDataSource",
+    "URL",
+    "_URLOrData",
+    "UserMessage",
+    "VectorIO",
+    "VectorStore",
+    "VectorStoreChunkingStrategy",
+    "VectorStoreChunkingStrategyAuto",
+    "VectorStoreChunkingStrategyStatic",
+    "VectorStoreChunkingStrategyStaticConfig",
+    "VectorStoreContent",
+    "VectorStoreCreateRequest",
+    "VectorStoreDeleteResponse",
+    "VectorStoreFileBatchObject",
+    "VectorStoreFileContentResponse",
+    "VectorStoreFileCounts",
+    "VectorStoreFileDeleteResponse",
+    "VectorStoreFileLastError",
+    "VectorStoreFileObject",
+    "VectorStoreFileStatus",
+    "VectorStoreFilesListInBatchResponse",
+    "VectorStoreInput",
+    "VectorStoreListFilesResponse",
+    "VectorStoreListResponse",
+    "VectorStoreModifyRequest",
+    "VectorStoreObject",
+    "VectorStoreSearchRequest",
+    "VectorStoreSearchResponse",
+    "VectorStoreSearchResponsePage",
+    "VectorStoreTable",
+    "VectorStoreNotFoundError",
+    "VectorStoresProtocolPrivate",
+    "VersionInfo",
+    "ViolationLevel",
+    "webmethod",
+    "WebMethod",
+    "WebSearchToolTypes",
+    "WeightedRanker",
+]
diff --git a/src/llama_stack/apis/agents/agents.py b/src/llama-stack-api/llama_stack_api/agents.py
similarity index 96%
rename from src/llama_stack/apis/agents/agents.py
rename to src/llama-stack-api/llama_stack_api/agents.py
index 09687ef33..ca0611746 100644
--- a/src/llama_stack/apis/agents/agents.py
+++ b/src/llama-stack-api/llama_stack_api/agents.py
@@ -9,9 +9,9 @@ from typing import Annotated, Protocol, runtime_checkable
 
 from pydantic import BaseModel
 
-from llama_stack.apis.common.responses import Order
-from llama_stack.apis.version import LLAMA_STACK_API_V1
-from llama_stack.schema_utils import ExtraBodyField, json_schema_type, webmethod
+from llama_stack_api.common.responses import Order
+from llama_stack_api.schema_utils import ExtraBodyField, json_schema_type, webmethod
+from llama_stack_api.version import LLAMA_STACK_API_V1
 
 from .openai_responses import (
     ListOpenAIResponseInputItem,
diff --git a/src/llama_stack/apis/batches/batches.py b/src/llama-stack-api/llama_stack_api/batches.py
similarity index 96%
rename from src/llama_stack/apis/batches/batches.py
rename to src/llama-stack-api/llama_stack_api/batches.py
index 1ee9fdb15..00c47d39f 100644
--- a/src/llama_stack/apis/batches/batches.py
+++ b/src/llama-stack-api/llama_stack_api/batches.py
@@ -8,8 +8,8 @@ from typing import Literal, Protocol, runtime_checkable
 
 from pydantic import BaseModel, Field
 
-from llama_stack.apis.version import LLAMA_STACK_API_V1
-from llama_stack.schema_utils import json_schema_type, webmethod
+from llama_stack_api.schema_utils import json_schema_type, webmethod
+from llama_stack_api.version import LLAMA_STACK_API_V1
 
 try:
     from openai.types import Batch as BatchObject
diff --git a/src/llama_stack/apis/benchmarks/benchmarks.py b/src/llama-stack-api/llama_stack_api/benchmarks.py
similarity index 94%
rename from src/llama_stack/apis/benchmarks/benchmarks.py
rename to src/llama-stack-api/llama_stack_api/benchmarks.py
index 9a67269c3..e9ac3a8b8 100644
--- a/src/llama_stack/apis/benchmarks/benchmarks.py
+++ b/src/llama-stack-api/llama_stack_api/benchmarks.py
@@ -7,9 +7,9 @@ from typing import Any, Literal, Protocol, runtime_checkable
 
 from pydantic import BaseModel, Field
 
-from llama_stack.apis.resource import Resource, ResourceType
-from llama_stack.apis.version import LLAMA_STACK_API_V1ALPHA
-from llama_stack.schema_utils import json_schema_type, webmethod
+from llama_stack_api.resource import Resource, ResourceType
+from llama_stack_api.schema_utils import json_schema_type, webmethod
+from llama_stack_api.version import LLAMA_STACK_API_V1ALPHA
 
 
 class CommonBenchmarkFields(BaseModel):
diff --git a/src/llama_stack/apis/__init__.py b/src/llama-stack-api/llama_stack_api/common/__init__.py
similarity index 100%
rename from src/llama_stack/apis/__init__.py
rename to src/llama-stack-api/llama_stack_api/common/__init__.py
diff --git a/src/llama_stack/apis/common/content_types.py b/src/llama-stack-api/llama_stack_api/common/content_types.py
similarity index 65%
rename from src/llama_stack/apis/common/content_types.py
rename to src/llama-stack-api/llama_stack_api/common/content_types.py
index 950dd17ff..1bfe109c1 100644
--- a/src/llama_stack/apis/common/content_types.py
+++ b/src/llama-stack-api/llama_stack_api/common/content_types.py
@@ -4,13 +4,11 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-from enum import Enum
 from typing import Annotated, Literal
 
 from pydantic import BaseModel, Field, model_validator
 
-from llama_stack.models.llama.datatypes import ToolCall
-from llama_stack.schema_utils import json_schema_type, register_schema
+from llama_stack_api.schema_utils import json_schema_type, register_schema
 
 
 @json_schema_type
@@ -101,43 +99,3 @@ class ImageDelta(BaseModel):
 
     type: Literal["image"] = "image"
     image: bytes
-
-
-class ToolCallParseStatus(Enum):
-    """Status of tool call parsing during streaming.
-    :cvar started: Tool call parsing has begun
-    :cvar in_progress: Tool call parsing is ongoing
-    :cvar failed: Tool call parsing failed
-    :cvar succeeded: Tool call parsing completed successfully
-    """
-
-    started = "started"
-    in_progress = "in_progress"
-    failed = "failed"
-    succeeded = "succeeded"
-
-
-@json_schema_type
-class ToolCallDelta(BaseModel):
-    """A tool call content delta for streaming responses.
-
-    :param type: Discriminator type of the delta. Always "tool_call"
-    :param tool_call: Either an in-progress tool call string or the final parsed tool call
-    :param parse_status: Current parsing status of the tool call
-    """
-
-    type: Literal["tool_call"] = "tool_call"
-
-    # you either send an in-progress tool call so the client can stream a long
-    # code generation or you send the final parsed tool call at the end of the
-    # stream
-    tool_call: str | ToolCall
-    parse_status: ToolCallParseStatus
-
-
-# streaming completions send a stream of ContentDeltas
-ContentDelta = Annotated[
-    TextDelta | ImageDelta | ToolCallDelta,
-    Field(discriminator="type"),
-]
-register_schema(ContentDelta, name="ContentDelta")
diff --git a/src/llama_stack/apis/common/errors.py b/src/llama-stack-api/llama_stack_api/common/errors.py
similarity index 100%
rename from src/llama_stack/apis/common/errors.py
rename to src/llama-stack-api/llama_stack_api/common/errors.py
diff --git a/src/llama_stack/apis/common/job_types.py b/src/llama-stack-api/llama_stack_api/common/job_types.py
similarity index 94%
rename from src/llama_stack/apis/common/job_types.py
rename to src/llama-stack-api/llama_stack_api/common/job_types.py
index 5da42bfd3..b6ef35d7f 100644
--- a/src/llama_stack/apis/common/job_types.py
+++ b/src/llama-stack-api/llama_stack_api/common/job_types.py
@@ -7,7 +7,7 @@ from enum import Enum
 
 from pydantic import BaseModel
 
-from llama_stack.schema_utils import json_schema_type
+from llama_stack_api.schema_utils import json_schema_type
 
 
 class JobStatus(Enum):
diff --git a/src/llama_stack/apis/common/responses.py b/src/llama-stack-api/llama_stack_api/common/responses.py
similarity index 97%
rename from src/llama_stack/apis/common/responses.py
rename to src/llama-stack-api/llama_stack_api/common/responses.py
index 53a290eea..c843ce1d9 100644
--- a/src/llama_stack/apis/common/responses.py
+++ b/src/llama-stack-api/llama_stack_api/common/responses.py
@@ -9,7 +9,7 @@ from typing import Any
 
 from pydantic import BaseModel
 
-from llama_stack.schema_utils import json_schema_type
+from llama_stack_api.schema_utils import json_schema_type
 
 
 class Order(Enum):
diff --git a/src/llama_stack/apis/common/tracing.py b/src/llama-stack-api/llama_stack_api/common/tracing.py
similarity index 100%
rename from src/llama_stack/apis/common/tracing.py
rename to src/llama-stack-api/llama_stack_api/common/tracing.py
diff --git a/src/llama_stack/apis/common/training_types.py b/src/llama-stack-api/llama_stack_api/common/training_types.py
similarity index 96%
rename from src/llama_stack/apis/common/training_types.py
rename to src/llama-stack-api/llama_stack_api/common/training_types.py
index 5c236a25d..aa3481770 100644
--- a/src/llama_stack/apis/common/training_types.py
+++ b/src/llama-stack-api/llama_stack_api/common/training_types.py
@@ -8,7 +8,7 @@ from datetime import datetime
 
 from pydantic import BaseModel
 
-from llama_stack.schema_utils import json_schema_type
+from llama_stack_api.schema_utils import json_schema_type
 
 
 @json_schema_type
diff --git a/src/llama_stack/apis/common/type_system.py b/src/llama-stack-api/llama_stack_api/common/type_system.py
similarity index 97%
rename from src/llama_stack/apis/common/type_system.py
rename to src/llama-stack-api/llama_stack_api/common/type_system.py
index c71501548..8297713cf 100644
--- a/src/llama_stack/apis/common/type_system.py
+++ b/src/llama-stack-api/llama_stack_api/common/type_system.py
@@ -8,7 +8,7 @@ from typing import Annotated, Literal
 
 from pydantic import BaseModel, Field
 
-from llama_stack.schema_utils import json_schema_type, register_schema
+from llama_stack_api.schema_utils import json_schema_type, register_schema
 
 
 @json_schema_type
diff --git a/src/llama_stack/apis/conversations/conversations.py b/src/llama-stack-api/llama_stack_api/conversations.py
similarity index 97%
rename from src/llama_stack/apis/conversations/conversations.py
rename to src/llama-stack-api/llama_stack_api/conversations.py
index 3fdd3b47e..4854181d1 100644
--- a/src/llama_stack/apis/conversations/conversations.py
+++ b/src/llama-stack-api/llama_stack_api/conversations.py
@@ -9,7 +9,8 @@ from typing import Annotated, Literal, Protocol, runtime_checkable
 
 from pydantic import BaseModel, Field
 
-from llama_stack.apis.agents.openai_responses import (
+from llama_stack_api.common.tracing import telemetry_traceable
+from llama_stack_api.openai_responses import (
     OpenAIResponseInputFunctionToolCallOutput,
     OpenAIResponseMCPApprovalRequest,
     OpenAIResponseMCPApprovalResponse,
@@ -20,9 +21,8 @@ from llama_stack.apis.agents.openai_responses import (
     OpenAIResponseOutputMessageMCPListTools,
     OpenAIResponseOutputMessageWebSearchToolCall,
 )
-from llama_stack.apis.common.tracing import telemetry_traceable
-from llama_stack.apis.version import LLAMA_STACK_API_V1
-from llama_stack.schema_utils import json_schema_type, register_schema, webmethod
+from llama_stack_api.schema_utils import json_schema_type, register_schema, webmethod
+from llama_stack_api.version import LLAMA_STACK_API_V1
 
 Metadata = dict[str, str]
 
diff --git a/src/llama_stack/apis/datasetio/datasetio.py b/src/llama-stack-api/llama_stack_api/datasetio.py
similarity index 89%
rename from src/llama_stack/apis/datasetio/datasetio.py
rename to src/llama-stack-api/llama_stack_api/datasetio.py
index a0c4a1afc..309a8ff41 100644
--- a/src/llama_stack/apis/datasetio/datasetio.py
+++ b/src/llama-stack-api/llama_stack_api/datasetio.py
@@ -6,10 +6,10 @@
 
 from typing import Any, Protocol, runtime_checkable
 
-from llama_stack.apis.common.responses import PaginatedResponse
-from llama_stack.apis.datasets import Dataset
-from llama_stack.apis.version import LLAMA_STACK_API_V1BETA
-from llama_stack.schema_utils import webmethod
+from llama_stack_api.common.responses import PaginatedResponse
+from llama_stack_api.datasets import Dataset
+from llama_stack_api.schema_utils import webmethod
+from llama_stack_api.version import LLAMA_STACK_API_V1BETA
 
 
 class DatasetStore(Protocol):
diff --git a/src/llama_stack/apis/datasets/datasets.py b/src/llama-stack-api/llama_stack_api/datasets.py
similarity index 97%
rename from src/llama_stack/apis/datasets/datasets.py
rename to src/llama-stack-api/llama_stack_api/datasets.py
index 9bedc6209..76d787078 100644
--- a/src/llama_stack/apis/datasets/datasets.py
+++ b/src/llama-stack-api/llama_stack_api/datasets.py
@@ -9,9 +9,9 @@ from typing import Annotated, Any, Literal, Protocol
 
 from pydantic import BaseModel, Field
 
-from llama_stack.apis.resource import Resource, ResourceType
-from llama_stack.apis.version import LLAMA_STACK_API_V1BETA
-from llama_stack.schema_utils import json_schema_type, register_schema, webmethod
+from llama_stack_api.resource import Resource, ResourceType
+from llama_stack_api.schema_utils import json_schema_type, register_schema, webmethod
+from llama_stack_api.version import LLAMA_STACK_API_V1BETA
 
 
 class DatasetPurpose(StrEnum):
diff --git a/src/llama_stack/providers/datatypes.py b/src/llama-stack-api/llama_stack_api/datatypes.py
similarity index 51%
rename from src/llama_stack/providers/datatypes.py
rename to src/llama-stack-api/llama_stack_api/datatypes.py
index 9be3edb8e..f024068f3 100644
--- a/src/llama_stack/providers/datatypes.py
+++ b/src/llama-stack-api/llama_stack_api/datatypes.py
@@ -4,21 +4,172 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-from enum import StrEnum
+from enum import Enum, EnumMeta, StrEnum
 from typing import Any, Protocol
 from urllib.parse import urlparse
 
 from pydantic import BaseModel, Field
 
-from llama_stack.apis.benchmarks import Benchmark
-from llama_stack.apis.datasets import Dataset
-from llama_stack.apis.datatypes import Api
-from llama_stack.apis.models import Model
-from llama_stack.apis.scoring_functions import ScoringFn
-from llama_stack.apis.shields import Shield
-from llama_stack.apis.tools import ToolGroup
-from llama_stack.apis.vector_stores import VectorStore
-from llama_stack.schema_utils import json_schema_type
+from llama_stack_api.benchmarks import Benchmark
+from llama_stack_api.datasets import Dataset
+from llama_stack_api.models import Model
+from llama_stack_api.schema_utils import json_schema_type
+from llama_stack_api.scoring_functions import ScoringFn
+from llama_stack_api.shields import Shield
+from llama_stack_api.tools import ToolGroup
+from llama_stack_api.vector_stores import VectorStore
+
+
+class DynamicApiMeta(EnumMeta):
+    def __new__(cls, name, bases, namespace):
+        # Store the original enum values
+        original_values = {k: v for k, v in namespace.items() if not k.startswith("_")}
+
+        # Create the enum class
+        cls = super().__new__(cls, name, bases, namespace)
+
+        # Store the original values for reference
+        cls._original_values = original_values
+        # Initialize _dynamic_values
+        cls._dynamic_values = {}
+
+        return cls
+
+    def __call__(cls, value):
+        try:
+            return super().__call__(value)
+        except ValueError as e:
+            # If this value was already dynamically added, return it
+            if value in cls._dynamic_values:
+                return cls._dynamic_values[value]
+
+            # If the value doesn't exist, create a new enum member
+            # Create a new member name from the value
+            member_name = value.lower().replace("-", "_")
+
+            # If this member name already exists in the enum, return the existing member
+            if member_name in cls._member_map_:
+                return cls._member_map_[member_name]
+
+            # Instead of creating a new member, raise ValueError to force users to use Api.add() to
+            # register new APIs explicitly
+            raise ValueError(f"API '{value}' does not exist. Use Api.add() to register new APIs.") from e
+
+    def __iter__(cls):
+        # Allow iteration over both static and dynamic members
+        yield from super().__iter__()
+        if hasattr(cls, "_dynamic_values"):
+            yield from cls._dynamic_values.values()
+
+    def add(cls, value):
+        """
+        Add a new API to the enum.
+        Used to register external APIs.
+        """
+        member_name = value.lower().replace("-", "_")
+
+        # If this member name already exists in the enum, return it
+        if member_name in cls._member_map_:
+            return cls._member_map_[member_name]
+
+        # Create a new enum member
+        member = object.__new__(cls)
+        member._name_ = member_name
+        member._value_ = value
+
+        # Add it to the enum class
+        cls._member_map_[member_name] = member
+        cls._member_names_.append(member_name)
+        cls._member_type_ = str
+
+        # Store it in our dynamic values
+        cls._dynamic_values[value] = member
+
+        return member
+
+
+@json_schema_type
+class Api(Enum, metaclass=DynamicApiMeta):
+    """Enumeration of all available APIs in the Llama Stack system.
+    :cvar providers: Provider management and configuration
+    :cvar inference: Text generation, chat completions, and embeddings
+    :cvar safety: Content moderation and safety shields
+    :cvar agents: Agent orchestration and execution
+    :cvar batches: Batch processing for asynchronous API requests
+    :cvar vector_io: Vector database operations and queries
+    :cvar datasetio: Dataset input/output operations
+    :cvar scoring: Model output evaluation and scoring
+    :cvar eval: Model evaluation and benchmarking framework
+    :cvar post_training: Fine-tuning and model training
+    :cvar tool_runtime: Tool execution and management
+    :cvar telemetry: Observability and system monitoring
+    :cvar models: Model metadata and management
+    :cvar shields: Safety shield implementations
+    :cvar datasets: Dataset creation and management
+    :cvar scoring_functions: Scoring function definitions
+    :cvar benchmarks: Benchmark suite management
+    :cvar tool_groups: Tool group organization
+    :cvar files: File storage and management
+    :cvar prompts: Prompt versions and management
+    :cvar inspect: Built-in system inspection and introspection
+    """
+
+    providers = "providers"
+    inference = "inference"
+    safety = "safety"
+    agents = "agents"
+    batches = "batches"
+    vector_io = "vector_io"
+    datasetio = "datasetio"
+    scoring = "scoring"
+    eval = "eval"
+    post_training = "post_training"
+    tool_runtime = "tool_runtime"
+
+    models = "models"
+    shields = "shields"
+    vector_stores = "vector_stores"  # only used for routing table
+    datasets = "datasets"
+    scoring_functions = "scoring_functions"
+    benchmarks = "benchmarks"
+    tool_groups = "tool_groups"
+    files = "files"
+    prompts = "prompts"
+    conversations = "conversations"
+
+    # built-in API
+    inspect = "inspect"
+
+
+@json_schema_type
+class Error(BaseModel):
+    """
+    Error response from the API. Roughly follows RFC 7807.
+
+    :param status: HTTP status code
+    :param title: Error title, a short summary of the error which is invariant for an error type
+    :param detail: Error detail, a longer human-readable description of the error
+    :param instance: (Optional) A URL which can be used to retrieve more information about the specific occurrence of the error
+    """
+
+    status: int
+    title: str
+    detail: str
+    instance: str | None = None
+
+
+class ExternalApiSpec(BaseModel):
+    """Specification for an external API implementation."""
+
+    module: str = Field(..., description="Python module containing the API implementation")
+    name: str = Field(..., description="Name of the API")
+    pip_packages: list[str] = Field(default=[], description="List of pip packages to install the API")
+    protocol: str = Field(..., description="Name of the protocol class for the API")
+
+
+# Provider-related types (merged from providers/datatypes.py)
+# NOTE: These imports are forward references to avoid circular dependencies
+# They will be resolved at runtime when the classes are used
 
 
 class ModelsProtocolPrivate(Protocol):
diff --git a/src/llama_stack/apis/eval/eval.py b/src/llama-stack-api/llama_stack_api/eval.py
similarity index 92%
rename from src/llama_stack/apis/eval/eval.py
rename to src/llama-stack-api/llama_stack_api/eval.py
index accb04ce1..7a11c221e 100644
--- a/src/llama_stack/apis/eval/eval.py
+++ b/src/llama-stack-api/llama_stack_api/eval.py
@@ -8,12 +8,12 @@ from typing import Any, Literal, Protocol
 
 from pydantic import BaseModel, Field
 
-from llama_stack.apis.common.job_types import Job
-from llama_stack.apis.inference import SamplingParams, SystemMessage
-from llama_stack.apis.scoring import ScoringResult
-from llama_stack.apis.scoring_functions import ScoringFnParams
-from llama_stack.apis.version import LLAMA_STACK_API_V1ALPHA
-from llama_stack.schema_utils import json_schema_type, webmethod
+from llama_stack_api.common.job_types import Job
+from llama_stack_api.inference import SamplingParams, SystemMessage
+from llama_stack_api.schema_utils import json_schema_type, webmethod
+from llama_stack_api.scoring import ScoringResult
+from llama_stack_api.scoring_functions import ScoringFnParams
+from llama_stack_api.version import LLAMA_STACK_API_V1ALPHA
 
 
 @json_schema_type
diff --git a/src/llama_stack/apis/files/files.py b/src/llama-stack-api/llama_stack_api/files.py
similarity index 96%
rename from src/llama_stack/apis/files/files.py
rename to src/llama-stack-api/llama_stack_api/files.py
index f0ea2f892..8a75a1c39 100644
--- a/src/llama_stack/apis/files/files.py
+++ b/src/llama-stack-api/llama_stack_api/files.py
@@ -10,10 +10,10 @@ from typing import Annotated, ClassVar, Literal, Protocol, runtime_checkable
 from fastapi import File, Form, Response, UploadFile
 from pydantic import BaseModel, Field
 
-from llama_stack.apis.common.responses import Order
-from llama_stack.apis.common.tracing import telemetry_traceable
-from llama_stack.apis.version import LLAMA_STACK_API_V1
-from llama_stack.schema_utils import json_schema_type, webmethod
+from llama_stack_api.common.responses import Order
+from llama_stack_api.common.tracing import telemetry_traceable
+from llama_stack_api.schema_utils import json_schema_type, webmethod
+from llama_stack_api.version import LLAMA_STACK_API_V1
 
 
 # OpenAI Files API Models
diff --git a/src/llama_stack/apis/inference/inference.py b/src/llama-stack-api/llama_stack_api/inference.py
similarity index 99%
rename from src/llama_stack/apis/inference/inference.py
rename to src/llama-stack-api/llama_stack_api/inference.py
index 9f04917c9..b42de95be 100644
--- a/src/llama_stack/apis/inference/inference.py
+++ b/src/llama-stack-api/llama_stack_api/inference.py
@@ -18,14 +18,14 @@ from fastapi import Body
 from pydantic import BaseModel, Field
 from typing_extensions import TypedDict
 
-from llama_stack.apis.common.content_types import InterleavedContent
-from llama_stack.apis.common.responses import (
+from llama_stack_api.common.content_types import InterleavedContent
+from llama_stack_api.common.responses import (
     Order,
 )
-from llama_stack.apis.common.tracing import telemetry_traceable
-from llama_stack.apis.models import Model
-from llama_stack.apis.version import LLAMA_STACK_API_V1, LLAMA_STACK_API_V1ALPHA
-from llama_stack.schema_utils import json_schema_type, register_schema, webmethod
+from llama_stack_api.common.tracing import telemetry_traceable
+from llama_stack_api.models import Model
+from llama_stack_api.schema_utils import json_schema_type, register_schema, webmethod
+from llama_stack_api.version import LLAMA_STACK_API_V1, LLAMA_STACK_API_V1ALPHA
 
 
 @json_schema_type
diff --git a/src/llama_stack/apis/inspect/inspect.py b/src/llama-stack-api/llama_stack_api/inspect.py
similarity index 94%
rename from src/llama_stack/apis/inspect/inspect.py
rename to src/llama-stack-api/llama_stack_api/inspect.py
index 235abb124..8326e9e6b 100644
--- a/src/llama_stack/apis/inspect/inspect.py
+++ b/src/llama-stack-api/llama_stack_api/inspect.py
@@ -8,11 +8,11 @@ from typing import Literal, Protocol, runtime_checkable
 
 from pydantic import BaseModel
 
-from llama_stack.apis.version import (
+from llama_stack_api.datatypes import HealthStatus
+from llama_stack_api.schema_utils import json_schema_type, webmethod
+from llama_stack_api.version import (
     LLAMA_STACK_API_V1,
 )
-from llama_stack.providers.datatypes import HealthStatus
-from llama_stack.schema_utils import json_schema_type, webmethod
 
 # Valid values for the route filter parameter.
 # Actual API levels: v1, v1alpha, v1beta (filters by level, excludes deprecated)
diff --git a/src/llama_stack/apis/models/models.py b/src/llama-stack-api/llama_stack_api/models.py
similarity index 95%
rename from src/llama_stack/apis/models/models.py
rename to src/llama-stack-api/llama_stack_api/models.py
index bbb359b51..833864ec2 100644
--- a/src/llama_stack/apis/models/models.py
+++ b/src/llama-stack-api/llama_stack_api/models.py
@@ -9,10 +9,10 @@ from typing import Any, Literal, Protocol, runtime_checkable
 
 from pydantic import BaseModel, ConfigDict, Field, field_validator
 
-from llama_stack.apis.common.tracing import telemetry_traceable
-from llama_stack.apis.resource import Resource, ResourceType
-from llama_stack.apis.version import LLAMA_STACK_API_V1
-from llama_stack.schema_utils import json_schema_type, webmethod
+from llama_stack_api.common.tracing import telemetry_traceable
+from llama_stack_api.resource import Resource, ResourceType
+from llama_stack_api.schema_utils import json_schema_type, webmethod
+from llama_stack_api.version import LLAMA_STACK_API_V1
 
 
 class CommonModelFields(BaseModel):
diff --git a/src/llama_stack/apis/agents/openai_responses.py b/src/llama-stack-api/llama_stack_api/openai_responses.py
similarity index 99%
rename from src/llama_stack/apis/agents/openai_responses.py
rename to src/llama-stack-api/llama_stack_api/openai_responses.py
index 16657ab32..70139a98a 100644
--- a/src/llama_stack/apis/agents/openai_responses.py
+++ b/src/llama-stack-api/llama_stack_api/openai_responses.py
@@ -10,8 +10,8 @@ from typing import Annotated, Any, Literal
 from pydantic import BaseModel, Field, model_validator
 from typing_extensions import TypedDict
 
-from llama_stack.apis.vector_io import SearchRankingOptions as FileSearchRankingOptions
-from llama_stack.schema_utils import json_schema_type, register_schema
+from llama_stack_api.schema_utils import json_schema_type, register_schema
+from llama_stack_api.vector_io import SearchRankingOptions as FileSearchRankingOptions
 
 # NOTE(ashwin): this file is literally a copy of the OpenAI responses API schema. We should probably
 # take their YAML and generate this file automatically. Their YAML is available.
diff --git a/src/llama_stack/apis/post_training/post_training.py b/src/llama-stack-api/llama_stack_api/post_training.py
similarity index 97%
rename from src/llama_stack/apis/post_training/post_training.py
rename to src/llama-stack-api/llama_stack_api/post_training.py
index 2b7a6222f..0cc9277d9 100644
--- a/src/llama_stack/apis/post_training/post_training.py
+++ b/src/llama-stack-api/llama_stack_api/post_training.py
@@ -10,11 +10,11 @@ from typing import Annotated, Any, Literal, Protocol
 
 from pydantic import BaseModel, Field
 
-from llama_stack.apis.common.content_types import URL
-from llama_stack.apis.common.job_types import JobStatus
-from llama_stack.apis.common.training_types import Checkpoint
-from llama_stack.apis.version import LLAMA_STACK_API_V1ALPHA
-from llama_stack.schema_utils import json_schema_type, register_schema, webmethod
+from llama_stack_api.common.content_types import URL
+from llama_stack_api.common.job_types import JobStatus
+from llama_stack_api.common.training_types import Checkpoint
+from llama_stack_api.schema_utils import json_schema_type, register_schema, webmethod
+from llama_stack_api.version import LLAMA_STACK_API_V1ALPHA
 
 
 @json_schema_type
diff --git a/src/llama_stack/apis/prompts/prompts.py b/src/llama-stack-api/llama_stack_api/prompts.py
similarity index 97%
rename from src/llama_stack/apis/prompts/prompts.py
rename to src/llama-stack-api/llama_stack_api/prompts.py
index 406ae529c..651d03e61 100644
--- a/src/llama_stack/apis/prompts/prompts.py
+++ b/src/llama-stack-api/llama_stack_api/prompts.py
@@ -10,9 +10,9 @@ from typing import Protocol, runtime_checkable
 
 from pydantic import BaseModel, Field, field_validator, model_validator
 
-from llama_stack.apis.common.tracing import telemetry_traceable
-from llama_stack.apis.version import LLAMA_STACK_API_V1
-from llama_stack.schema_utils import json_schema_type, webmethod
+from llama_stack_api.common.tracing import telemetry_traceable
+from llama_stack_api.schema_utils import json_schema_type, webmethod
+from llama_stack_api.version import LLAMA_STACK_API_V1
 
 
 @json_schema_type
diff --git a/src/llama_stack/apis/providers/providers.py b/src/llama-stack-api/llama_stack_api/providers.py
similarity index 91%
rename from src/llama_stack/apis/providers/providers.py
rename to src/llama-stack-api/llama_stack_api/providers.py
index e1872571d..5b555b82f 100644
--- a/src/llama_stack/apis/providers/providers.py
+++ b/src/llama-stack-api/llama_stack_api/providers.py
@@ -8,9 +8,9 @@ from typing import Any, Protocol, runtime_checkable
 
 from pydantic import BaseModel
 
-from llama_stack.apis.version import LLAMA_STACK_API_V1
-from llama_stack.providers.datatypes import HealthResponse
-from llama_stack.schema_utils import json_schema_type, webmethod
+from llama_stack_api.datatypes import HealthResponse
+from llama_stack_api.schema_utils import json_schema_type, webmethod
+from llama_stack_api.version import LLAMA_STACK_API_V1
 
 
 @json_schema_type
diff --git a/src/llama_stack/strong_typing/py.typed b/src/llama-stack-api/llama_stack_api/py.typed
similarity index 100%
rename from src/llama_stack/strong_typing/py.typed
rename to src/llama-stack-api/llama_stack_api/py.typed
diff --git a/src/llama_stack/apis/tools/rag_tool.py b/src/llama-stack-api/llama_stack_api/rag_tool.py
similarity index 98%
rename from src/llama_stack/apis/tools/rag_tool.py
rename to src/llama-stack-api/llama_stack_api/rag_tool.py
index 8bcc89bf0..b5edd51af 100644
--- a/src/llama_stack/apis/tools/rag_tool.py
+++ b/src/llama-stack-api/llama_stack_api/rag_tool.py
@@ -9,7 +9,7 @@ from typing import Annotated, Any, Literal
 
 from pydantic import BaseModel, Field, field_validator
 
-from llama_stack.apis.common.content_types import URL, InterleavedContent
+from llama_stack_api.common.content_types import URL, InterleavedContent
 
 
 class RRFRanker(BaseModel):
diff --git a/src/llama_stack/apis/resource.py b/src/llama-stack-api/llama_stack_api/resource.py
similarity index 100%
rename from src/llama_stack/apis/resource.py
rename to src/llama-stack-api/llama_stack_api/resource.py
diff --git a/src/llama_stack/apis/safety/safety.py b/src/llama-stack-api/llama_stack_api/safety.py
similarity index 93%
rename from src/llama_stack/apis/safety/safety.py
rename to src/llama-stack-api/llama_stack_api/safety.py
index 8872cc518..ef84be2ea 100644
--- a/src/llama_stack/apis/safety/safety.py
+++ b/src/llama-stack-api/llama_stack_api/safety.py
@@ -9,11 +9,11 @@ from typing import Any, Protocol, runtime_checkable
 
 from pydantic import BaseModel, Field
 
-from llama_stack.apis.common.tracing import telemetry_traceable
-from llama_stack.apis.inference import OpenAIMessageParam
-from llama_stack.apis.shields import Shield
-from llama_stack.apis.version import LLAMA_STACK_API_V1
-from llama_stack.schema_utils import json_schema_type, webmethod
+from llama_stack_api.common.tracing import telemetry_traceable
+from llama_stack_api.inference import OpenAIMessageParam
+from llama_stack_api.schema_utils import json_schema_type, webmethod
+from llama_stack_api.shields import Shield
+from llama_stack_api.version import LLAMA_STACK_API_V1
 
 
 @json_schema_type
diff --git a/src/llama_stack/schema_utils.py b/src/llama-stack-api/llama_stack_api/schema_utils.py
similarity index 100%
rename from src/llama_stack/schema_utils.py
rename to src/llama-stack-api/llama_stack_api/schema_utils.py
diff --git a/src/llama_stack/apis/scoring/scoring.py b/src/llama-stack-api/llama_stack_api/scoring.py
similarity index 93%
rename from src/llama_stack/apis/scoring/scoring.py
rename to src/llama-stack-api/llama_stack_api/scoring.py
index 03d943e94..47d144d21 100644
--- a/src/llama_stack/apis/scoring/scoring.py
+++ b/src/llama-stack-api/llama_stack_api/scoring.py
@@ -8,9 +8,9 @@ from typing import Any, Protocol, runtime_checkable
 
 from pydantic import BaseModel
 
-from llama_stack.apis.scoring_functions import ScoringFn, ScoringFnParams
-from llama_stack.apis.version import LLAMA_STACK_API_V1
-from llama_stack.schema_utils import json_schema_type, webmethod
+from llama_stack_api.schema_utils import json_schema_type, webmethod
+from llama_stack_api.scoring_functions import ScoringFn, ScoringFnParams
+from llama_stack_api.version import LLAMA_STACK_API_V1
 
 # mapping of metric to value
 ScoringResultRow = dict[str, Any]
diff --git a/src/llama_stack/apis/scoring_functions/scoring_functions.py b/src/llama-stack-api/llama_stack_api/scoring_functions.py
similarity index 96%
rename from src/llama_stack/apis/scoring_functions/scoring_functions.py
rename to src/llama-stack-api/llama_stack_api/scoring_functions.py
index 78f4a7541..f75336e54 100644
--- a/src/llama_stack/apis/scoring_functions/scoring_functions.py
+++ b/src/llama-stack-api/llama_stack_api/scoring_functions.py
@@ -16,10 +16,10 @@ from typing import (
 
 from pydantic import BaseModel, Field
 
-from llama_stack.apis.common.type_system import ParamType
-from llama_stack.apis.resource import Resource, ResourceType
-from llama_stack.apis.version import LLAMA_STACK_API_V1
-from llama_stack.schema_utils import json_schema_type, register_schema, webmethod
+from llama_stack_api.common.type_system import ParamType
+from llama_stack_api.resource import Resource, ResourceType
+from llama_stack_api.schema_utils import json_schema_type, register_schema, webmethod
+from llama_stack_api.version import LLAMA_STACK_API_V1
 
 
 # Perhaps more structure can be imposed on these functions. Maybe they could be associated
diff --git a/src/llama_stack/apis/shields/shields.py b/src/llama-stack-api/llama_stack_api/shields.py
similarity index 91%
rename from src/llama_stack/apis/shields/shields.py
rename to src/llama-stack-api/llama_stack_api/shields.py
index 659ba8b75..2aeb83333 100644
--- a/src/llama_stack/apis/shields/shields.py
+++ b/src/llama-stack-api/llama_stack_api/shields.py
@@ -8,10 +8,10 @@ from typing import Any, Literal, Protocol, runtime_checkable
 
 from pydantic import BaseModel
 
-from llama_stack.apis.common.tracing import telemetry_traceable
-from llama_stack.apis.resource import Resource, ResourceType
-from llama_stack.apis.version import LLAMA_STACK_API_V1
-from llama_stack.schema_utils import json_schema_type, webmethod
+from llama_stack_api.common.tracing import telemetry_traceable
+from llama_stack_api.resource import Resource, ResourceType
+from llama_stack_api.schema_utils import json_schema_type, webmethod
+from llama_stack_api.version import LLAMA_STACK_API_V1
 
 
 class CommonShieldFields(BaseModel):
diff --git a/src/llama_stack/strong_typing/__init__.py b/src/llama-stack-api/llama_stack_api/strong_typing/__init__.py
similarity index 100%
rename from src/llama_stack/strong_typing/__init__.py
rename to src/llama-stack-api/llama_stack_api/strong_typing/__init__.py
diff --git a/src/llama_stack/strong_typing/auxiliary.py b/src/llama-stack-api/llama_stack_api/strong_typing/auxiliary.py
similarity index 100%
rename from src/llama_stack/strong_typing/auxiliary.py
rename to src/llama-stack-api/llama_stack_api/strong_typing/auxiliary.py
diff --git a/src/llama_stack/strong_typing/classdef.py b/src/llama-stack-api/llama_stack_api/strong_typing/classdef.py
similarity index 100%
rename from src/llama_stack/strong_typing/classdef.py
rename to src/llama-stack-api/llama_stack_api/strong_typing/classdef.py
diff --git a/src/llama_stack/strong_typing/core.py b/src/llama-stack-api/llama_stack_api/strong_typing/core.py
similarity index 100%
rename from src/llama_stack/strong_typing/core.py
rename to src/llama-stack-api/llama_stack_api/strong_typing/core.py
diff --git a/src/llama_stack/strong_typing/deserializer.py b/src/llama-stack-api/llama_stack_api/strong_typing/deserializer.py
similarity index 100%
rename from src/llama_stack/strong_typing/deserializer.py
rename to src/llama-stack-api/llama_stack_api/strong_typing/deserializer.py
diff --git a/src/llama_stack/strong_typing/docstring.py b/src/llama-stack-api/llama_stack_api/strong_typing/docstring.py
similarity index 100%
rename from src/llama_stack/strong_typing/docstring.py
rename to src/llama-stack-api/llama_stack_api/strong_typing/docstring.py
diff --git a/src/llama_stack/strong_typing/exception.py b/src/llama-stack-api/llama_stack_api/strong_typing/exception.py
similarity index 100%
rename from src/llama_stack/strong_typing/exception.py
rename to src/llama-stack-api/llama_stack_api/strong_typing/exception.py
diff --git a/src/llama_stack/strong_typing/inspection.py b/src/llama-stack-api/llama_stack_api/strong_typing/inspection.py
similarity index 100%
rename from src/llama_stack/strong_typing/inspection.py
rename to src/llama-stack-api/llama_stack_api/strong_typing/inspection.py
diff --git a/src/llama_stack/strong_typing/mapping.py b/src/llama-stack-api/llama_stack_api/strong_typing/mapping.py
similarity index 100%
rename from src/llama_stack/strong_typing/mapping.py
rename to src/llama-stack-api/llama_stack_api/strong_typing/mapping.py
diff --git a/src/llama_stack/strong_typing/name.py b/src/llama-stack-api/llama_stack_api/strong_typing/name.py
similarity index 100%
rename from src/llama_stack/strong_typing/name.py
rename to src/llama-stack-api/llama_stack_api/strong_typing/name.py
diff --git a/src/llama-stack-api/llama_stack_api/strong_typing/py.typed b/src/llama-stack-api/llama_stack_api/strong_typing/py.typed
new file mode 100644
index 000000000..e69de29bb
diff --git a/src/llama_stack/strong_typing/schema.py b/src/llama-stack-api/llama_stack_api/strong_typing/schema.py
similarity index 100%
rename from src/llama_stack/strong_typing/schema.py
rename to src/llama-stack-api/llama_stack_api/strong_typing/schema.py
diff --git a/src/llama_stack/strong_typing/serialization.py b/src/llama-stack-api/llama_stack_api/strong_typing/serialization.py
similarity index 100%
rename from src/llama_stack/strong_typing/serialization.py
rename to src/llama-stack-api/llama_stack_api/strong_typing/serialization.py
diff --git a/src/llama_stack/strong_typing/serializer.py b/src/llama-stack-api/llama_stack_api/strong_typing/serializer.py
similarity index 100%
rename from src/llama_stack/strong_typing/serializer.py
rename to src/llama-stack-api/llama_stack_api/strong_typing/serializer.py
diff --git a/src/llama_stack/strong_typing/slots.py b/src/llama-stack-api/llama_stack_api/strong_typing/slots.py
similarity index 100%
rename from src/llama_stack/strong_typing/slots.py
rename to src/llama-stack-api/llama_stack_api/strong_typing/slots.py
diff --git a/src/llama_stack/strong_typing/topological.py b/src/llama-stack-api/llama_stack_api/strong_typing/topological.py
similarity index 100%
rename from src/llama_stack/strong_typing/topological.py
rename to src/llama-stack-api/llama_stack_api/strong_typing/topological.py
diff --git a/src/llama_stack/apis/tools/tools.py b/src/llama-stack-api/llama_stack_api/tools.py
similarity index 95%
rename from src/llama_stack/apis/tools/tools.py
rename to src/llama-stack-api/llama_stack_api/tools.py
index 4e7cf2544..6571c2047 100644
--- a/src/llama_stack/apis/tools/tools.py
+++ b/src/llama-stack-api/llama_stack_api/tools.py
@@ -10,11 +10,11 @@ from typing import Any, Literal, Protocol
 from pydantic import BaseModel
 from typing_extensions import runtime_checkable
 
-from llama_stack.apis.common.content_types import URL, InterleavedContent
-from llama_stack.apis.common.tracing import telemetry_traceable
-from llama_stack.apis.resource import Resource, ResourceType
-from llama_stack.apis.version import LLAMA_STACK_API_V1
-from llama_stack.schema_utils import json_schema_type, webmethod
+from llama_stack_api.common.content_types import URL, InterleavedContent
+from llama_stack_api.common.tracing import telemetry_traceable
+from llama_stack_api.resource import Resource, ResourceType
+from llama_stack_api.schema_utils import json_schema_type, webmethod
+from llama_stack_api.version import LLAMA_STACK_API_V1
 
 
 @json_schema_type
diff --git a/src/llama_stack/apis/vector_io/vector_io.py b/src/llama-stack-api/llama_stack_api/vector_io.py
similarity index 98%
rename from src/llama_stack/apis/vector_io/vector_io.py
rename to src/llama-stack-api/llama_stack_api/vector_io.py
index 699241128..053e569f4 100644
--- a/src/llama_stack/apis/vector_io/vector_io.py
+++ b/src/llama-stack-api/llama_stack_api/vector_io.py
@@ -13,12 +13,12 @@ from typing import Annotated, Any, Literal, Protocol, runtime_checkable
 from fastapi import Body, Query
 from pydantic import BaseModel, Field
 
-from llama_stack.apis.common.tracing import telemetry_traceable
-from llama_stack.apis.inference import InterleavedContent
-from llama_stack.apis.vector_stores import VectorStore
-from llama_stack.apis.version import LLAMA_STACK_API_V1
-from llama_stack.schema_utils import json_schema_type, webmethod
-from llama_stack.strong_typing.schema import register_schema
+from llama_stack_api.common.tracing import telemetry_traceable
+from llama_stack_api.inference import InterleavedContent
+from llama_stack_api.schema_utils import json_schema_type, webmethod
+from llama_stack_api.strong_typing.schema import register_schema
+from llama_stack_api.vector_stores import VectorStore
+from llama_stack_api.version import LLAMA_STACK_API_V1
 
 
 @json_schema_type
diff --git a/src/llama_stack/apis/vector_stores/vector_stores.py b/src/llama-stack-api/llama_stack_api/vector_stores.py
similarity index 96%
rename from src/llama_stack/apis/vector_stores/vector_stores.py
rename to src/llama-stack-api/llama_stack_api/vector_stores.py
index 524624028..0a1e6c53c 100644
--- a/src/llama_stack/apis/vector_stores/vector_stores.py
+++ b/src/llama-stack-api/llama_stack_api/vector_stores.py
@@ -8,7 +8,7 @@ from typing import Literal
 
 from pydantic import BaseModel
 
-from llama_stack.apis.resource import Resource, ResourceType
+from llama_stack_api.resource import Resource, ResourceType
 
 
 # Internal resource type for storing the vector store routing and other information
diff --git a/src/llama_stack/apis/version.py b/src/llama-stack-api/llama_stack_api/version.py
similarity index 100%
rename from src/llama_stack/apis/version.py
rename to src/llama-stack-api/llama_stack_api/version.py
diff --git a/src/llama-stack-api/pyproject.toml b/src/llama-stack-api/pyproject.toml
new file mode 100644
index 000000000..a00472d36
--- /dev/null
+++ b/src/llama-stack-api/pyproject.toml
@@ -0,0 +1,82 @@
+[build-system]
+requires = ["setuptools>=61.0"]
+build-backend = "setuptools.build_meta"
+
+[tool.uv]
+required-version = ">=0.7.0"
+
+[project]
+name = "llama-stack-api"
+version = "0.1.0"
+authors = [{ name = "Meta Llama", email = "llama-oss@meta.com" }]
+description = "API and Provider specifications for Llama Stack - lightweight package with protocol definitions and provider specs"
+readme = "README.md"
+requires-python = ">=3.12"
+license = { "text" = "MIT" }
+classifiers = [
+    "License :: OSI Approved :: MIT License",
+    "Programming Language :: Python :: 3",
+    "Operating System :: OS Independent",
+    "Intended Audience :: Developers",
+    "Intended Audience :: Information Technology",
+    "Intended Audience :: Science/Research",
+    "Topic :: Scientific/Engineering :: Artificial Intelligence",
+    "Topic :: Scientific/Engineering :: Information Analysis",
+]
+dependencies = [
+    "pydantic>=2.11.9",
+    "jsonschema",
+    "opentelemetry-sdk>=1.30.0",
+    "opentelemetry-exporter-otlp-proto-http>=1.30.0",
+]
+
+[project.urls]
+Homepage = "https://github.com/llamastack/llama-stack"
+
+[tool.setuptools.packages.find]
+where = ["."]
+include = ["llama_stack_api", "llama_stack_api.*"]
+
+[tool.setuptools.package-data]
+llama_stack_api = ["py.typed"]
+
+[tool.ruff]
+line-length = 120
+
+[tool.ruff.lint]
+select = [
+    "UP",      # pyupgrade
+    "B",       # flake8-bugbear
+    "B9",      # flake8-bugbear subset
+    "C",       # comprehensions
+    "E",       # pycodestyle
+    "F",       # Pyflakes
+    "N",       # Naming
+    "W",       # Warnings
+    "DTZ",     # datetime rules
+    "I",       # isort (imports order)
+    "RUF001",  # Checks for ambiguous Unicode characters in strings
+    "RUF002",  # Checks for ambiguous Unicode characters in docstrings
+    "RUF003",  # Checks for ambiguous Unicode characters in comments
+    "PLC2401", # Checks for the use of non-ASCII characters in variable names
+]
+ignore = [
+    # The following ignores are desired by the project maintainers.
+    "E402",   # Module level import not at top of file
+    "E501",   # Line too long
+    "F405",   # Maybe undefined or defined from star import
+    "C408",   # Ignored because we like the dict keyword argument syntax
+    "N812",   # Ignored because import torch.nn.functional as F is PyTorch convention
+
+    # These are the additional ones we started ignoring after moving to ruff. We should look into each one of them later.
+    "C901",   # Complexity of the function is too high
+]
+unfixable = [
+    "PLE2515",
+] # Do not fix this automatically since ruff will replace the zero-width space with \u200b - let's do it manually
+
+[tool.ruff.lint.per-file-ignores]
+"llama_stack_api/apis/**/__init__.py" = ["F403"]
+
+[tool.ruff.lint.pep8-naming]
+classmethod-decorators = ["classmethod", "pydantic.field_validator"]
diff --git a/src/llama_stack/apis/agents/__init__.py b/src/llama_stack/apis/agents/__init__.py
deleted file mode 100644
index 6416b283b..000000000
--- a/src/llama_stack/apis/agents/__init__.py
+++ /dev/null
@@ -1,7 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-
-from .agents import *
diff --git a/src/llama_stack/apis/batches/__init__.py b/src/llama_stack/apis/batches/__init__.py
deleted file mode 100644
index 9ce7d3d75..000000000
--- a/src/llama_stack/apis/batches/__init__.py
+++ /dev/null
@@ -1,9 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-
-from .batches import Batches, BatchObject, ListBatchesResponse
-
-__all__ = ["Batches", "BatchObject", "ListBatchesResponse"]
diff --git a/src/llama_stack/apis/benchmarks/__init__.py b/src/llama_stack/apis/benchmarks/__init__.py
deleted file mode 100644
index 62d1b367c..000000000
--- a/src/llama_stack/apis/benchmarks/__init__.py
+++ /dev/null
@@ -1,7 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-
-from .benchmarks import *
diff --git a/src/llama_stack/apis/common/__init__.py b/src/llama_stack/apis/common/__init__.py
deleted file mode 100644
index 756f351d8..000000000
--- a/src/llama_stack/apis/common/__init__.py
+++ /dev/null
@@ -1,5 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
diff --git a/src/llama_stack/apis/conversations/__init__.py b/src/llama_stack/apis/conversations/__init__.py
deleted file mode 100644
index b6ddc5999..000000000
--- a/src/llama_stack/apis/conversations/__init__.py
+++ /dev/null
@@ -1,27 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-
-from .conversations import (
-    Conversation,
-    ConversationDeletedResource,
-    ConversationItem,
-    ConversationItemCreateRequest,
-    ConversationItemDeletedResource,
-    ConversationItemList,
-    Conversations,
-    Metadata,
-)
-
-__all__ = [
-    "Conversation",
-    "ConversationDeletedResource",
-    "ConversationItem",
-    "ConversationItemCreateRequest",
-    "ConversationItemDeletedResource",
-    "ConversationItemList",
-    "Conversations",
-    "Metadata",
-]
diff --git a/src/llama_stack/apis/datasetio/__init__.py b/src/llama_stack/apis/datasetio/__init__.py
deleted file mode 100644
index 8c087bfa4..000000000
--- a/src/llama_stack/apis/datasetio/__init__.py
+++ /dev/null
@@ -1,7 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-
-from .datasetio import *
diff --git a/src/llama_stack/apis/datasets/__init__.py b/src/llama_stack/apis/datasets/__init__.py
deleted file mode 100644
index 9c9a128d2..000000000
--- a/src/llama_stack/apis/datasets/__init__.py
+++ /dev/null
@@ -1,7 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-
-from .datasets import *
diff --git a/src/llama_stack/apis/datatypes.py b/src/llama_stack/apis/datatypes.py
deleted file mode 100644
index ae01c5dfc..000000000
--- a/src/llama_stack/apis/datatypes.py
+++ /dev/null
@@ -1,158 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-
-from enum import Enum, EnumMeta
-
-from pydantic import BaseModel, Field
-
-from llama_stack.schema_utils import json_schema_type
-
-
-class DynamicApiMeta(EnumMeta):
-    def __new__(cls, name, bases, namespace):
-        # Store the original enum values
-        original_values = {k: v for k, v in namespace.items() if not k.startswith("_")}
-
-        # Create the enum class
-        cls = super().__new__(cls, name, bases, namespace)
-
-        # Store the original values for reference
-        cls._original_values = original_values
-        # Initialize _dynamic_values
-        cls._dynamic_values = {}
-
-        return cls
-
-    def __call__(cls, value):
-        try:
-            return super().__call__(value)
-        except ValueError as e:
-            # If this value was already dynamically added, return it
-            if value in cls._dynamic_values:
-                return cls._dynamic_values[value]
-
-            # If the value doesn't exist, create a new enum member
-            # Create a new member name from the value
-            member_name = value.lower().replace("-", "_")
-
-            # If this member name already exists in the enum, return the existing member
-            if member_name in cls._member_map_:
-                return cls._member_map_[member_name]
-
-            # Instead of creating a new member, raise ValueError to force users to use Api.add() to
-            # register new APIs explicitly
-            raise ValueError(f"API '{value}' does not exist. Use Api.add() to register new APIs.") from e
-
-    def __iter__(cls):
-        # Allow iteration over both static and dynamic members
-        yield from super().__iter__()
-        if hasattr(cls, "_dynamic_values"):
-            yield from cls._dynamic_values.values()
-
-    def add(cls, value):
-        """
-        Add a new API to the enum.
-        Used to register external APIs.
-        """
-        member_name = value.lower().replace("-", "_")
-
-        # If this member name already exists in the enum, return it
-        if member_name in cls._member_map_:
-            return cls._member_map_[member_name]
-
-        # Create a new enum member
-        member = object.__new__(cls)
-        member._name_ = member_name
-        member._value_ = value
-
-        # Add it to the enum class
-        cls._member_map_[member_name] = member
-        cls._member_names_.append(member_name)
-        cls._member_type_ = str
-
-        # Store it in our dynamic values
-        cls._dynamic_values[value] = member
-
-        return member
-
-
-@json_schema_type
-class Api(Enum, metaclass=DynamicApiMeta):
-    """Enumeration of all available APIs in the Llama Stack system.
-    :cvar providers: Provider management and configuration
-    :cvar inference: Text generation, chat completions, and embeddings
-    :cvar safety: Content moderation and safety shields
-    :cvar agents: Agent orchestration and execution
-    :cvar batches: Batch processing for asynchronous API requests
-    :cvar vector_io: Vector database operations and queries
-    :cvar datasetio: Dataset input/output operations
-    :cvar scoring: Model output evaluation and scoring
-    :cvar eval: Model evaluation and benchmarking framework
-    :cvar post_training: Fine-tuning and model training
-    :cvar tool_runtime: Tool execution and management
-    :cvar telemetry: Observability and system monitoring
-    :cvar models: Model metadata and management
-    :cvar shields: Safety shield implementations
-    :cvar datasets: Dataset creation and management
-    :cvar scoring_functions: Scoring function definitions
-    :cvar benchmarks: Benchmark suite management
-    :cvar tool_groups: Tool group organization
-    :cvar files: File storage and management
-    :cvar prompts: Prompt versions and management
-    :cvar inspect: Built-in system inspection and introspection
-    """
-
-    providers = "providers"
-    inference = "inference"
-    safety = "safety"
-    agents = "agents"
-    batches = "batches"
-    vector_io = "vector_io"
-    datasetio = "datasetio"
-    scoring = "scoring"
-    eval = "eval"
-    post_training = "post_training"
-    tool_runtime = "tool_runtime"
-
-    models = "models"
-    shields = "shields"
-    vector_stores = "vector_stores"  # only used for routing table
-    datasets = "datasets"
-    scoring_functions = "scoring_functions"
-    benchmarks = "benchmarks"
-    tool_groups = "tool_groups"
-    files = "files"
-    prompts = "prompts"
-    conversations = "conversations"
-
-    # built-in API
-    inspect = "inspect"
-
-
-@json_schema_type
-class Error(BaseModel):
-    """
-    Error response from the API. Roughly follows RFC 7807.
-
-    :param status: HTTP status code
-    :param title: Error title, a short summary of the error which is invariant for an error type
-    :param detail: Error detail, a longer human-readable description of the error
-    :param instance: (Optional) A URL which can be used to retrieve more information about the specific occurrence of the error
-    """
-
-    status: int
-    title: str
-    detail: str
-    instance: str | None = None
-
-
-class ExternalApiSpec(BaseModel):
-    """Specification for an external API implementation."""
-
-    module: str = Field(..., description="Python module containing the API implementation")
-    name: str = Field(..., description="Name of the API")
-    pip_packages: list[str] = Field(default=[], description="List of pip packages to install the API")
-    protocol: str = Field(..., description="Name of the protocol class for the API")
diff --git a/src/llama_stack/apis/eval/__init__.py b/src/llama_stack/apis/eval/__init__.py
deleted file mode 100644
index 28a1d6049..000000000
--- a/src/llama_stack/apis/eval/__init__.py
+++ /dev/null
@@ -1,7 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-
-from .eval import *
diff --git a/src/llama_stack/apis/files/__init__.py b/src/llama_stack/apis/files/__init__.py
deleted file mode 100644
index 189e4de19..000000000
--- a/src/llama_stack/apis/files/__init__.py
+++ /dev/null
@@ -1,7 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-
-from .files import *
diff --git a/src/llama_stack/apis/inference/__init__.py b/src/llama_stack/apis/inference/__init__.py
deleted file mode 100644
index f0c8783c1..000000000
--- a/src/llama_stack/apis/inference/__init__.py
+++ /dev/null
@@ -1,7 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-
-from .inference import *
diff --git a/src/llama_stack/apis/inspect/__init__.py b/src/llama_stack/apis/inspect/__init__.py
deleted file mode 100644
index 016937e3d..000000000
--- a/src/llama_stack/apis/inspect/__init__.py
+++ /dev/null
@@ -1,7 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-
-from .inspect import *
diff --git a/src/llama_stack/apis/models/__init__.py b/src/llama_stack/apis/models/__init__.py
deleted file mode 100644
index ee90106b6..000000000
--- a/src/llama_stack/apis/models/__init__.py
+++ /dev/null
@@ -1,7 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-
-from .models import *
diff --git a/src/llama_stack/apis/post_training/__init__.py b/src/llama_stack/apis/post_training/__init__.py
deleted file mode 100644
index 695575a30..000000000
--- a/src/llama_stack/apis/post_training/__init__.py
+++ /dev/null
@@ -1,7 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-
-from .post_training import *
diff --git a/src/llama_stack/apis/prompts/__init__.py b/src/llama_stack/apis/prompts/__init__.py
deleted file mode 100644
index 6070f3450..000000000
--- a/src/llama_stack/apis/prompts/__init__.py
+++ /dev/null
@@ -1,9 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-
-from .prompts import ListPromptsResponse, Prompt, Prompts
-
-__all__ = ["Prompt", "Prompts", "ListPromptsResponse"]
diff --git a/src/llama_stack/apis/providers/__init__.py b/src/llama_stack/apis/providers/__init__.py
deleted file mode 100644
index e35e2fe47..000000000
--- a/src/llama_stack/apis/providers/__init__.py
+++ /dev/null
@@ -1,7 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-
-from .providers import *
diff --git a/src/llama_stack/apis/safety/__init__.py b/src/llama_stack/apis/safety/__init__.py
deleted file mode 100644
index d93bc1355..000000000
--- a/src/llama_stack/apis/safety/__init__.py
+++ /dev/null
@@ -1,7 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-
-from .safety import *
diff --git a/src/llama_stack/apis/scoring/__init__.py b/src/llama_stack/apis/scoring/__init__.py
deleted file mode 100644
index 624b9e704..000000000
--- a/src/llama_stack/apis/scoring/__init__.py
+++ /dev/null
@@ -1,7 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-
-from .scoring import *
diff --git a/src/llama_stack/apis/scoring_functions/__init__.py b/src/llama_stack/apis/scoring_functions/__init__.py
deleted file mode 100644
index fc1de0311..000000000
--- a/src/llama_stack/apis/scoring_functions/__init__.py
+++ /dev/null
@@ -1,7 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-
-from .scoring_functions import *
diff --git a/src/llama_stack/apis/shields/__init__.py b/src/llama_stack/apis/shields/__init__.py
deleted file mode 100644
index 783a4d124..000000000
--- a/src/llama_stack/apis/shields/__init__.py
+++ /dev/null
@@ -1,7 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-
-from .shields import *
diff --git a/src/llama_stack/apis/tools/__init__.py b/src/llama_stack/apis/tools/__init__.py
deleted file mode 100644
index b25310ecf..000000000
--- a/src/llama_stack/apis/tools/__init__.py
+++ /dev/null
@@ -1,8 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-
-from .rag_tool import *
-from .tools import *
diff --git a/src/llama_stack/apis/vector_io/__init__.py b/src/llama_stack/apis/vector_io/__init__.py
deleted file mode 100644
index 3f4c60805..000000000
--- a/src/llama_stack/apis/vector_io/__init__.py
+++ /dev/null
@@ -1,7 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-
-from .vector_io import *
diff --git a/src/llama_stack/apis/vector_stores/__init__.py b/src/llama_stack/apis/vector_stores/__init__.py
deleted file mode 100644
index 8fc34058a..000000000
--- a/src/llama_stack/apis/vector_stores/__init__.py
+++ /dev/null
@@ -1,7 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-
-from .vector_stores import *
diff --git a/src/llama_stack/cli/stack/_list_deps.py b/src/llama_stack/cli/stack/_list_deps.py
index 18141be5f..50fe394fc 100644
--- a/src/llama_stack/cli/stack/_list_deps.py
+++ b/src/llama_stack/cli/stack/_list_deps.py
@@ -9,6 +9,7 @@ import sys
 from pathlib import Path
 
 import yaml
+from llama_stack_api import Api
 from termcolor import cprint
 
 from llama_stack.cli.stack.utils import ImageType
@@ -21,7 +22,6 @@ from llama_stack.core.datatypes import (
 from llama_stack.core.distribution import get_provider_registry
 from llama_stack.core.stack import replace_env_vars
 from llama_stack.log import get_logger
-from llama_stack.providers.datatypes import Api
 
 TEMPLATES_PATH = Path(__file__).parent.parent.parent / "templates"
 
diff --git a/src/llama_stack/cli/stack/utils.py b/src/llama_stack/cli/stack/utils.py
index cc1ca051b..0a4e22b09 100644
--- a/src/llama_stack/cli/stack/utils.py
+++ b/src/llama_stack/cli/stack/utils.py
@@ -11,6 +11,7 @@ from functools import lru_cache
 from pathlib import Path
 
 import yaml
+from llama_stack_api import Api
 from termcolor import cprint
 
 from llama_stack.core.datatypes import (
@@ -32,7 +33,6 @@ from llama_stack.core.storage.datatypes import (
 from llama_stack.core.utils.config_dirs import DISTRIBS_BASE_DIR, EXTERNAL_PROVIDERS_DIR
 from llama_stack.core.utils.dynamic import instantiate_class_type
 from llama_stack.core.utils.image_types import LlamaStackImageType
-from llama_stack.providers.datatypes import Api
 
 TEMPLATES_PATH = Path(__file__).parent.parent.parent / "distributions"
 
diff --git a/src/llama_stack/core/build.py b/src/llama_stack/core/build.py
index fb3a22109..27ded7ede 100644
--- a/src/llama_stack/core/build.py
+++ b/src/llama_stack/core/build.py
@@ -6,6 +6,7 @@
 
 import sys
 
+from llama_stack_api import Api
 from pydantic import BaseModel
 from termcolor import cprint
 
@@ -13,7 +14,6 @@ from llama_stack.core.datatypes import BuildConfig
 from llama_stack.core.distribution import get_provider_registry
 from llama_stack.distributions.template import DistributionTemplate
 from llama_stack.log import get_logger
-from llama_stack.providers.datatypes import Api
 
 log = get_logger(name=__name__, category="core")
 
diff --git a/src/llama_stack/core/client.py b/src/llama_stack/core/client.py
index 49e01794e..41acacdb5 100644
--- a/src/llama_stack/core/client.py
+++ b/src/llama_stack/core/client.py
@@ -12,11 +12,10 @@ from enum import Enum
 from typing import Any, Union, get_args, get_origin
 
 import httpx
+from llama_stack_api import RemoteProviderConfig
 from pydantic import BaseModel, parse_obj_as
 from termcolor import cprint
 
-from llama_stack.providers.datatypes import RemoteProviderConfig
-
 _CLIENT_CLASSES = {}
 
 
diff --git a/src/llama_stack/core/configure.py b/src/llama_stack/core/configure.py
index 5d4a54184..bdb3b9734 100644
--- a/src/llama_stack/core/configure.py
+++ b/src/llama_stack/core/configure.py
@@ -6,6 +6,8 @@
 import textwrap
 from typing import Any
 
+from llama_stack_api import Api, ProviderSpec
+
 from llama_stack.core.datatypes import (
     LLAMA_STACK_RUN_CONFIG_VERSION,
     DistributionSpec,
@@ -20,7 +22,6 @@ from llama_stack.core.stack import cast_image_name_to_string, replace_env_vars
 from llama_stack.core.utils.dynamic import instantiate_class_type
 from llama_stack.core.utils.prompt_for_config import prompt_for_config
 from llama_stack.log import get_logger
-from llama_stack.providers.datatypes import Api, ProviderSpec
 
 logger = get_logger(name=__name__, category="core")
 
diff --git a/src/llama_stack/core/conversations/conversations.py b/src/llama_stack/core/conversations/conversations.py
index f83834522..b94cd4fdd 100644
--- a/src/llama_stack/core/conversations/conversations.py
+++ b/src/llama_stack/core/conversations/conversations.py
@@ -8,9 +8,7 @@ import secrets
 import time
 from typing import Any, Literal
 
-from pydantic import BaseModel, TypeAdapter
-
-from llama_stack.apis.conversations.conversations import (
+from llama_stack_api import (
     Conversation,
     ConversationDeletedResource,
     ConversationItem,
@@ -20,6 +18,8 @@ from llama_stack.apis.conversations.conversations import (
     Conversations,
     Metadata,
 )
+from pydantic import BaseModel, TypeAdapter
+
 from llama_stack.core.datatypes import AccessRule, StackRunConfig
 from llama_stack.log import get_logger
 from llama_stack.providers.utils.sqlstore.api import ColumnDefinition, ColumnType
diff --git a/src/llama_stack/core/datatypes.py b/src/llama_stack/core/datatypes.py
index 2182ea4e5..4231363b6 100644
--- a/src/llama_stack/core/datatypes.py
+++ b/src/llama_stack/core/datatypes.py
@@ -9,22 +9,34 @@ from pathlib import Path
 from typing import Annotated, Any, Literal, Self
 from urllib.parse import urlparse
 
+from llama_stack_api import (
+    Api,
+    Benchmark,
+    BenchmarkInput,
+    Dataset,
+    DatasetInput,
+    DatasetIO,
+    Eval,
+    Inference,
+    Model,
+    ModelInput,
+    ProviderSpec,
+    Resource,
+    Safety,
+    Scoring,
+    ScoringFn,
+    ScoringFnInput,
+    Shield,
+    ShieldInput,
+    ToolGroup,
+    ToolGroupInput,
+    ToolRuntime,
+    VectorIO,
+    VectorStore,
+    VectorStoreInput,
+)
 from pydantic import BaseModel, Field, field_validator, model_validator
 
-from llama_stack.apis.benchmarks import Benchmark, BenchmarkInput
-from llama_stack.apis.datasetio import DatasetIO
-from llama_stack.apis.datasets import Dataset, DatasetInput
-from llama_stack.apis.eval import Eval
-from llama_stack.apis.inference import Inference
-from llama_stack.apis.models import Model, ModelInput
-from llama_stack.apis.resource import Resource
-from llama_stack.apis.safety import Safety
-from llama_stack.apis.scoring import Scoring
-from llama_stack.apis.scoring_functions import ScoringFn, ScoringFnInput
-from llama_stack.apis.shields import Shield, ShieldInput
-from llama_stack.apis.tools import ToolGroup, ToolGroupInput, ToolRuntime
-from llama_stack.apis.vector_io import VectorIO
-from llama_stack.apis.vector_stores import VectorStore, VectorStoreInput
 from llama_stack.core.access_control.datatypes import AccessRule
 from llama_stack.core.storage.datatypes import (
     KVStoreReference,
@@ -32,7 +44,6 @@ from llama_stack.core.storage.datatypes import (
     StorageConfig,
 )
 from llama_stack.log import LoggingConfig
-from llama_stack.providers.datatypes import Api, ProviderSpec
 
 LLAMA_STACK_BUILD_CONFIG_VERSION = 2
 LLAMA_STACK_RUN_CONFIG_VERSION = 2
diff --git a/src/llama_stack/core/distribution.py b/src/llama_stack/core/distribution.py
index 9be5ffb49..162f9f2b0 100644
--- a/src/llama_stack/core/distribution.py
+++ b/src/llama_stack/core/distribution.py
@@ -10,17 +10,17 @@ import os
 from typing import Any
 
 import yaml
-from pydantic import BaseModel
-
-from llama_stack.core.datatypes import BuildConfig, DistributionSpec
-from llama_stack.core.external import load_external_apis
-from llama_stack.log import get_logger
-from llama_stack.providers.datatypes import (
+from llama_stack_api import (
     Api,
     InlineProviderSpec,
     ProviderSpec,
     RemoteProviderSpec,
 )
+from pydantic import BaseModel
+
+from llama_stack.core.datatypes import BuildConfig, DistributionSpec
+from llama_stack.core.external import load_external_apis
+from llama_stack.log import get_logger
 
 logger = get_logger(name=__name__, category="core")
 
diff --git a/src/llama_stack/core/external.py b/src/llama_stack/core/external.py
index 12e9824ad..ce0c7eb72 100644
--- a/src/llama_stack/core/external.py
+++ b/src/llama_stack/core/external.py
@@ -6,8 +6,8 @@
 
 
 import yaml
+from llama_stack_api import Api, ExternalApiSpec
 
-from llama_stack.apis.datatypes import Api, ExternalApiSpec
 from llama_stack.core.datatypes import BuildConfig, StackRunConfig
 from llama_stack.log import get_logger
 
diff --git a/src/llama_stack/core/inspect.py b/src/llama_stack/core/inspect.py
index 07b51128f..53ddd3475 100644
--- a/src/llama_stack/core/inspect.py
+++ b/src/llama_stack/core/inspect.py
@@ -6,19 +6,19 @@
 
 from importlib.metadata import version
 
-from pydantic import BaseModel
-
-from llama_stack.apis.inspect import (
+from llama_stack_api import (
     HealthInfo,
+    HealthStatus,
     Inspect,
     ListRoutesResponse,
     RouteInfo,
     VersionInfo,
 )
+from pydantic import BaseModel
+
 from llama_stack.core.datatypes import StackRunConfig
 from llama_stack.core.external import load_external_apis
 from llama_stack.core.server.routes import get_all_api_routes
-from llama_stack.providers.datatypes import HealthStatus
 
 
 class DistributionInspectConfig(BaseModel):
diff --git a/src/llama_stack/core/library_client.py b/src/llama_stack/core/library_client.py
index db990368b..959284720 100644
--- a/src/llama_stack/core/library_client.py
+++ b/src/llama_stack/core/library_client.py
@@ -18,6 +18,7 @@ from typing import Any, TypeVar, Union, get_args, get_origin
 import httpx
 import yaml
 from fastapi import Response as FastAPIResponse
+from llama_stack_api import is_unwrapped_body_param
 
 try:
     from llama_stack_client import (
@@ -57,7 +58,6 @@ from llama_stack.core.utils.config import redact_sensitive_fields
 from llama_stack.core.utils.context import preserve_contexts_async_generator
 from llama_stack.core.utils.exec import in_notebook
 from llama_stack.log import get_logger, setup_logging
-from llama_stack.strong_typing.inspection import is_unwrapped_body_param
 
 logger = get_logger(name=__name__, category="core")
 
diff --git a/src/llama_stack/core/prompts/prompts.py b/src/llama_stack/core/prompts/prompts.py
index 1a6f38cb5..d9532b978 100644
--- a/src/llama_stack/core/prompts/prompts.py
+++ b/src/llama_stack/core/prompts/prompts.py
@@ -7,9 +7,9 @@
 import json
 from typing import Any
 
+from llama_stack_api import ListPromptsResponse, Prompt, Prompts
 from pydantic import BaseModel
 
-from llama_stack.apis.prompts import ListPromptsResponse, Prompt, Prompts
 from llama_stack.core.datatypes import StackRunConfig
 from llama_stack.providers.utils.kvstore import KVStore, kvstore_impl
 
diff --git a/src/llama_stack/core/providers.py b/src/llama_stack/core/providers.py
index 7095ffd18..7337d9e35 100644
--- a/src/llama_stack/core/providers.py
+++ b/src/llama_stack/core/providers.py
@@ -7,11 +7,10 @@
 import asyncio
 from typing import Any
 
+from llama_stack_api import HealthResponse, HealthStatus, ListProvidersResponse, ProviderInfo, Providers
 from pydantic import BaseModel
 
-from llama_stack.apis.providers import ListProvidersResponse, ProviderInfo, Providers
 from llama_stack.log import get_logger
-from llama_stack.providers.datatypes import HealthResponse, HealthStatus
 
 from .datatypes import StackRunConfig
 from .utils.config import redact_sensitive_fields
diff --git a/src/llama_stack/core/resolver.py b/src/llama_stack/core/resolver.py
index 8bf371fed..ca154fbc6 100644
--- a/src/llama_stack/core/resolver.py
+++ b/src/llama_stack/core/resolver.py
@@ -8,29 +8,46 @@ import importlib.metadata
 import inspect
 from typing import Any
 
-from llama_stack.apis.agents import Agents
-from llama_stack.apis.batches import Batches
-from llama_stack.apis.benchmarks import Benchmarks
-from llama_stack.apis.conversations import Conversations
-from llama_stack.apis.datasetio import DatasetIO
-from llama_stack.apis.datasets import Datasets
-from llama_stack.apis.datatypes import ExternalApiSpec
-from llama_stack.apis.eval import Eval
-from llama_stack.apis.files import Files
-from llama_stack.apis.inference import Inference, InferenceProvider
-from llama_stack.apis.inspect import Inspect
-from llama_stack.apis.models import Models
-from llama_stack.apis.post_training import PostTraining
-from llama_stack.apis.prompts import Prompts
-from llama_stack.apis.providers import Providers as ProvidersAPI
-from llama_stack.apis.safety import Safety
-from llama_stack.apis.scoring import Scoring
-from llama_stack.apis.scoring_functions import ScoringFunctions
-from llama_stack.apis.shields import Shields
-from llama_stack.apis.tools import ToolGroups, ToolRuntime
-from llama_stack.apis.vector_io import VectorIO
-from llama_stack.apis.vector_stores import VectorStore
-from llama_stack.apis.version import LLAMA_STACK_API_V1ALPHA
+from llama_stack_api import (
+    LLAMA_STACK_API_V1ALPHA,
+    Agents,
+    Api,
+    Batches,
+    Benchmarks,
+    BenchmarksProtocolPrivate,
+    Conversations,
+    DatasetIO,
+    Datasets,
+    DatasetsProtocolPrivate,
+    Eval,
+    ExternalApiSpec,
+    Files,
+    Inference,
+    InferenceProvider,
+    Inspect,
+    Models,
+    ModelsProtocolPrivate,
+    PostTraining,
+    Prompts,
+    ProviderSpec,
+    RemoteProviderConfig,
+    RemoteProviderSpec,
+    Safety,
+    Scoring,
+    ScoringFunctions,
+    ScoringFunctionsProtocolPrivate,
+    Shields,
+    ShieldsProtocolPrivate,
+    ToolGroups,
+    ToolGroupsProtocolPrivate,
+    ToolRuntime,
+    VectorIO,
+    VectorStore,
+)
+from llama_stack_api import (
+    Providers as ProvidersAPI,
+)
+
 from llama_stack.core.client import get_client_impl
 from llama_stack.core.datatypes import (
     AccessRule,
@@ -44,18 +61,6 @@ from llama_stack.core.external import load_external_apis
 from llama_stack.core.store import DistributionRegistry
 from llama_stack.core.utils.dynamic import instantiate_class_type
 from llama_stack.log import get_logger
-from llama_stack.providers.datatypes import (
-    Api,
-    BenchmarksProtocolPrivate,
-    DatasetsProtocolPrivate,
-    ModelsProtocolPrivate,
-    ProviderSpec,
-    RemoteProviderConfig,
-    RemoteProviderSpec,
-    ScoringFunctionsProtocolPrivate,
-    ShieldsProtocolPrivate,
-    ToolGroupsProtocolPrivate,
-)
 
 logger = get_logger(name=__name__, category="core")
 
diff --git a/src/llama_stack/core/routers/__init__.py b/src/llama_stack/core/routers/__init__.py
index 729d1c9ea..c2d051422 100644
--- a/src/llama_stack/core/routers/__init__.py
+++ b/src/llama_stack/core/routers/__init__.py
@@ -6,13 +6,14 @@
 
 from typing import Any
 
+from llama_stack_api import Api, RoutingTable
+
 from llama_stack.core.datatypes import (
     AccessRule,
     RoutedProtocol,
 )
 from llama_stack.core.stack import StackRunConfig
 from llama_stack.core.store import DistributionRegistry
-from llama_stack.providers.datatypes import Api, RoutingTable
 from llama_stack.providers.utils.inference.inference_store import InferenceStore
 
 
diff --git a/src/llama_stack/core/routers/datasets.py b/src/llama_stack/core/routers/datasets.py
index 2f1d5f78e..dcf247874 100644
--- a/src/llama_stack/core/routers/datasets.py
+++ b/src/llama_stack/core/routers/datasets.py
@@ -6,11 +6,9 @@
 
 from typing import Any
 
-from llama_stack.apis.common.responses import PaginatedResponse
-from llama_stack.apis.datasetio import DatasetIO
-from llama_stack.apis.datasets import DatasetPurpose, DataSource
+from llama_stack_api import DatasetIO, DatasetPurpose, DataSource, PaginatedResponse, RoutingTable
+
 from llama_stack.log import get_logger
-from llama_stack.providers.datatypes import RoutingTable
 
 logger = get_logger(name=__name__, category="core::routers")
 
diff --git a/src/llama_stack/core/routers/eval_scoring.py b/src/llama_stack/core/routers/eval_scoring.py
index ffca81bf0..cbbbf5cc5 100644
--- a/src/llama_stack/core/routers/eval_scoring.py
+++ b/src/llama_stack/core/routers/eval_scoring.py
@@ -6,15 +6,19 @@
 
 from typing import Any
 
-from llama_stack.apis.eval import BenchmarkConfig, Eval, EvaluateResponse, Job
-from llama_stack.apis.scoring import (
+from llama_stack_api import (
+    BenchmarkConfig,
+    Eval,
+    EvaluateResponse,
+    Job,
+    RoutingTable,
     ScoreBatchResponse,
     ScoreResponse,
     Scoring,
     ScoringFnParams,
 )
+
 from llama_stack.log import get_logger
-from llama_stack.providers.datatypes import RoutingTable
 
 logger = get_logger(name=__name__, category="core::routers")
 
diff --git a/src/llama_stack/core/routers/inference.py b/src/llama_stack/core/routers/inference.py
index d6270d428..a538ab02e 100644
--- a/src/llama_stack/core/routers/inference.py
+++ b/src/llama_stack/core/routers/inference.py
@@ -11,17 +11,19 @@ from datetime import UTC, datetime
 from typing import Annotated, Any
 
 from fastapi import Body
-from openai.types.chat import ChatCompletionToolChoiceOptionParam as OpenAIChatCompletionToolChoiceOptionParam
-from openai.types.chat import ChatCompletionToolParam as OpenAIChatCompletionToolParam
-from pydantic import TypeAdapter
-
-from llama_stack.apis.common.errors import ModelNotFoundError, ModelTypeError
-from llama_stack.apis.inference import (
+from llama_stack_api import (
+    HealthResponse,
+    HealthStatus,
     Inference,
     ListOpenAIChatCompletionResponse,
+    ModelNotFoundError,
+    ModelType,
+    ModelTypeError,
     OpenAIAssistantMessageParam,
     OpenAIChatCompletion,
     OpenAIChatCompletionChunk,
+    OpenAIChatCompletionContentPartImageParam,
+    OpenAIChatCompletionContentPartTextParam,
     OpenAIChatCompletionRequestWithExtraBody,
     OpenAIChatCompletionToolCall,
     OpenAIChatCompletionToolCallFunction,
@@ -35,18 +37,17 @@ from llama_stack.apis.inference import (
     OpenAIMessageParam,
     Order,
     RerankResponse,
+    RoutingTable,
 )
-from llama_stack.apis.inference.inference import (
-    OpenAIChatCompletionContentPartImageParam,
-    OpenAIChatCompletionContentPartTextParam,
-)
-from llama_stack.apis.models import ModelType
+from openai.types.chat import ChatCompletionToolChoiceOptionParam as OpenAIChatCompletionToolChoiceOptionParam
+from openai.types.chat import ChatCompletionToolParam as OpenAIChatCompletionToolParam
+from pydantic import TypeAdapter
+
 from llama_stack.core.telemetry.telemetry import MetricEvent
 from llama_stack.core.telemetry.tracing import enqueue_event, get_current_span
 from llama_stack.log import get_logger
 from llama_stack.models.llama.llama3.chat_format import ChatFormat
 from llama_stack.models.llama.llama3.tokenizer import Tokenizer
-from llama_stack.providers.datatypes import HealthResponse, HealthStatus, RoutingTable
 from llama_stack.providers.utils.inference.inference_store import InferenceStore
 
 logger = get_logger(name=__name__, category="core::routers")
diff --git a/src/llama_stack/core/routers/safety.py b/src/llama_stack/core/routers/safety.py
index e5ff2ada9..f85bbb767 100644
--- a/src/llama_stack/core/routers/safety.py
+++ b/src/llama_stack/core/routers/safety.py
@@ -6,13 +6,10 @@
 
 from typing import Any
 
-from llama_stack.apis.inference import OpenAIMessageParam
-from llama_stack.apis.safety import RunShieldResponse, Safety
-from llama_stack.apis.safety.safety import ModerationObject
-from llama_stack.apis.shields import Shield
+from llama_stack_api import ModerationObject, OpenAIMessageParam, RoutingTable, RunShieldResponse, Safety, Shield
+
 from llama_stack.core.datatypes import SafetyConfig
 from llama_stack.log import get_logger
-from llama_stack.providers.datatypes import RoutingTable
 
 logger = get_logger(name=__name__, category="core::routers")
 
diff --git a/src/llama_stack/core/routers/tool_runtime.py b/src/llama_stack/core/routers/tool_runtime.py
index fb13d94a4..984a8e2a7 100644
--- a/src/llama_stack/core/routers/tool_runtime.py
+++ b/src/llama_stack/core/routers/tool_runtime.py
@@ -6,13 +6,12 @@
 
 from typing import Any
 
-from llama_stack.apis.common.content_types import (
+from llama_stack_api import (
     URL,
-)
-from llama_stack.apis.tools import (
     ListToolDefsResponse,
     ToolRuntime,
 )
+
 from llama_stack.log import get_logger
 
 from ..routing_tables.toolgroups import ToolGroupsRoutingTable
diff --git a/src/llama_stack/core/routers/vector_io.py b/src/llama_stack/core/routers/vector_io.py
index ed5fb8253..bfd090e32 100644
--- a/src/llama_stack/core/routers/vector_io.py
+++ b/src/llama_stack/core/routers/vector_io.py
@@ -9,14 +9,16 @@ import uuid
 from typing import Annotated, Any
 
 from fastapi import Body
-
-from llama_stack.apis.common.content_types import InterleavedContent
-from llama_stack.apis.models import ModelType
-from llama_stack.apis.vector_io import (
+from llama_stack_api import (
     Chunk,
+    HealthResponse,
+    HealthStatus,
+    InterleavedContent,
+    ModelType,
     OpenAICreateVectorStoreFileBatchRequestWithExtraBody,
     OpenAICreateVectorStoreRequestWithExtraBody,
     QueryChunksResponse,
+    RoutingTable,
     SearchRankingOptions,
     VectorIO,
     VectorStoreChunkingStrategy,
@@ -33,9 +35,9 @@ from llama_stack.apis.vector_io import (
     VectorStoreObject,
     VectorStoreSearchResponsePage,
 )
+
 from llama_stack.core.datatypes import VectorStoresConfig
 from llama_stack.log import get_logger
-from llama_stack.providers.datatypes import HealthResponse, HealthStatus, RoutingTable
 
 logger = get_logger(name=__name__, category="core::routers")
 
diff --git a/src/llama_stack/core/routing_tables/benchmarks.py b/src/llama_stack/core/routing_tables/benchmarks.py
index 8c87d395d..66830bc41 100644
--- a/src/llama_stack/core/routing_tables/benchmarks.py
+++ b/src/llama_stack/core/routing_tables/benchmarks.py
@@ -6,7 +6,8 @@
 
 from typing import Any
 
-from llama_stack.apis.benchmarks import Benchmark, Benchmarks, ListBenchmarksResponse
+from llama_stack_api import Benchmark, Benchmarks, ListBenchmarksResponse
+
 from llama_stack.core.datatypes import (
     BenchmarkWithOwner,
 )
diff --git a/src/llama_stack/core/routing_tables/common.py b/src/llama_stack/core/routing_tables/common.py
index d6faf93c5..cfbafc9a8 100644
--- a/src/llama_stack/core/routing_tables/common.py
+++ b/src/llama_stack/core/routing_tables/common.py
@@ -6,9 +6,8 @@
 
 from typing import Any
 
-from llama_stack.apis.common.errors import ModelNotFoundError
-from llama_stack.apis.models import Model
-from llama_stack.apis.resource import ResourceType
+from llama_stack_api import Api, Model, ModelNotFoundError, ResourceType, RoutingTable
+
 from llama_stack.core.access_control.access_control import AccessDeniedError, is_action_allowed
 from llama_stack.core.access_control.datatypes import Action
 from llama_stack.core.datatypes import (
@@ -21,7 +20,6 @@ from llama_stack.core.datatypes import (
 from llama_stack.core.request_headers import get_authenticated_user
 from llama_stack.core.store import DistributionRegistry
 from llama_stack.log import get_logger
-from llama_stack.providers.datatypes import Api, RoutingTable
 
 logger = get_logger(name=__name__, category="core::routing_tables")
 
diff --git a/src/llama_stack/core/routing_tables/datasets.py b/src/llama_stack/core/routing_tables/datasets.py
index b129c9ec5..c49c9769b 100644
--- a/src/llama_stack/core/routing_tables/datasets.py
+++ b/src/llama_stack/core/routing_tables/datasets.py
@@ -7,18 +7,19 @@
 import uuid
 from typing import Any
 
-from llama_stack.apis.common.errors import DatasetNotFoundError
-from llama_stack.apis.datasets import (
+from llama_stack_api import (
     Dataset,
+    DatasetNotFoundError,
     DatasetPurpose,
     Datasets,
     DatasetType,
     DataSource,
     ListDatasetsResponse,
+    ResourceType,
     RowsDataSource,
     URIDataSource,
 )
-from llama_stack.apis.resource import ResourceType
+
 from llama_stack.core.datatypes import (
     DatasetWithOwner,
 )
diff --git a/src/llama_stack/core/routing_tables/models.py b/src/llama_stack/core/routing_tables/models.py
index 1fb1186cd..e1210a139 100644
--- a/src/llama_stack/core/routing_tables/models.py
+++ b/src/llama_stack/core/routing_tables/models.py
@@ -7,8 +7,16 @@
 import time
 from typing import Any
 
-from llama_stack.apis.common.errors import ModelNotFoundError
-from llama_stack.apis.models import ListModelsResponse, Model, Models, ModelType, OpenAIListModelsResponse, OpenAIModel
+from llama_stack_api import (
+    ListModelsResponse,
+    Model,
+    ModelNotFoundError,
+    Models,
+    ModelType,
+    OpenAIListModelsResponse,
+    OpenAIModel,
+)
+
 from llama_stack.core.datatypes import (
     ModelWithOwner,
     RegistryEntrySource,
diff --git a/src/llama_stack/core/routing_tables/scoring_functions.py b/src/llama_stack/core/routing_tables/scoring_functions.py
index 520f07014..66165ac2f 100644
--- a/src/llama_stack/core/routing_tables/scoring_functions.py
+++ b/src/llama_stack/core/routing_tables/scoring_functions.py
@@ -4,14 +4,15 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-from llama_stack.apis.common.type_system import ParamType
-from llama_stack.apis.resource import ResourceType
-from llama_stack.apis.scoring_functions import (
+from llama_stack_api import (
     ListScoringFunctionsResponse,
+    ParamType,
+    ResourceType,
     ScoringFn,
     ScoringFnParams,
     ScoringFunctions,
 )
+
 from llama_stack.core.datatypes import (
     ScoringFnWithOwner,
 )
diff --git a/src/llama_stack/core/routing_tables/shields.py b/src/llama_stack/core/routing_tables/shields.py
index b1918d20a..0f981c49d 100644
--- a/src/llama_stack/core/routing_tables/shields.py
+++ b/src/llama_stack/core/routing_tables/shields.py
@@ -6,8 +6,8 @@
 
 from typing import Any
 
-from llama_stack.apis.resource import ResourceType
-from llama_stack.apis.shields import ListShieldsResponse, Shield, Shields
+from llama_stack_api import ListShieldsResponse, ResourceType, Shield, Shields
+
 from llama_stack.core.datatypes import (
     ShieldWithOwner,
 )
diff --git a/src/llama_stack/core/routing_tables/toolgroups.py b/src/llama_stack/core/routing_tables/toolgroups.py
index 2d47bbb17..a552cb96e 100644
--- a/src/llama_stack/core/routing_tables/toolgroups.py
+++ b/src/llama_stack/core/routing_tables/toolgroups.py
@@ -6,9 +6,16 @@
 
 from typing import Any
 
-from llama_stack.apis.common.content_types import URL
-from llama_stack.apis.common.errors import ToolGroupNotFoundError
-from llama_stack.apis.tools import ListToolDefsResponse, ListToolGroupsResponse, ToolDef, ToolGroup, ToolGroups
+from llama_stack_api import (
+    URL,
+    ListToolDefsResponse,
+    ListToolGroupsResponse,
+    ToolDef,
+    ToolGroup,
+    ToolGroupNotFoundError,
+    ToolGroups,
+)
+
 from llama_stack.core.datatypes import AuthenticationRequiredError, ToolGroupWithOwner
 from llama_stack.log import get_logger
 
diff --git a/src/llama_stack/core/routing_tables/vector_stores.py b/src/llama_stack/core/routing_tables/vector_stores.py
index e77739abe..f95463b3c 100644
--- a/src/llama_stack/core/routing_tables/vector_stores.py
+++ b/src/llama_stack/core/routing_tables/vector_stores.py
@@ -6,12 +6,12 @@
 
 from typing import Any
 
-from llama_stack.apis.common.errors import ModelNotFoundError, ModelTypeError
-from llama_stack.apis.models import ModelType
-from llama_stack.apis.resource import ResourceType
-
 # Removed VectorStores import to avoid exposing public API
-from llama_stack.apis.vector_io.vector_io import (
+from llama_stack_api import (
+    ModelNotFoundError,
+    ModelType,
+    ModelTypeError,
+    ResourceType,
     SearchRankingOptions,
     VectorStoreChunkingStrategy,
     VectorStoreDeleteResponse,
@@ -22,6 +22,7 @@ from llama_stack.apis.vector_io.vector_io import (
     VectorStoreObject,
     VectorStoreSearchResponsePage,
 )
+
 from llama_stack.core.datatypes import (
     VectorStoreWithOwner,
 )
diff --git a/src/llama_stack/core/server/auth_providers.py b/src/llama_stack/core/server/auth_providers.py
index da398bf99..a7f5d7916 100644
--- a/src/llama_stack/core/server/auth_providers.py
+++ b/src/llama_stack/core/server/auth_providers.py
@@ -11,9 +11,9 @@ from urllib.parse import parse_qs, urljoin, urlparse
 
 import httpx
 import jwt
+from llama_stack_api import TokenValidationError
 from pydantic import BaseModel, Field
 
-from llama_stack.apis.common.errors import TokenValidationError
 from llama_stack.core.datatypes import (
     AuthenticationConfig,
     CustomAuthConfig,
diff --git a/src/llama_stack/core/server/routes.py b/src/llama_stack/core/server/routes.py
index 4f7ff2295..e7a84937d 100644
--- a/src/llama_stack/core/server/routes.py
+++ b/src/llama_stack/core/server/routes.py
@@ -10,11 +10,10 @@ from collections.abc import Callable
 from typing import Any
 
 from aiohttp import hdrs
+from llama_stack_api import Api, ExternalApiSpec, WebMethod
 from starlette.routing import Route
 
-from llama_stack.apis.datatypes import Api, ExternalApiSpec
 from llama_stack.core.resolver import api_protocol_map
-from llama_stack.schema_utils import WebMethod
 
 EndpointFunc = Callable[..., Any]
 PathParams = dict[str, str]
diff --git a/src/llama_stack/core/server/server.py b/src/llama_stack/core/server/server.py
index 5bf876c02..8116348ec 100644
--- a/src/llama_stack/core/server/server.py
+++ b/src/llama_stack/core/server/server.py
@@ -28,11 +28,10 @@ from fastapi import Path as FastapiPath
 from fastapi.exceptions import RequestValidationError
 from fastapi.middleware.cors import CORSMiddleware
 from fastapi.responses import JSONResponse, StreamingResponse
+from llama_stack_api import Api, ConflictError, PaginatedResponse, ResourceNotFoundError
 from openai import BadRequestError
 from pydantic import BaseModel, ValidationError
 
-from llama_stack.apis.common.errors import ConflictError, ResourceNotFoundError
-from llama_stack.apis.common.responses import PaginatedResponse
 from llama_stack.core.access_control.access_control import AccessDeniedError
 from llama_stack.core.datatypes import (
     AuthenticationRequiredError,
@@ -58,7 +57,6 @@ from llama_stack.core.utils.config import redact_sensitive_fields
 from llama_stack.core.utils.config_resolution import Mode, resolve_config_or_distro
 from llama_stack.core.utils.context import preserve_contexts_async_generator
 from llama_stack.log import LoggingConfig, get_logger, setup_logging
-from llama_stack.providers.datatypes import Api
 
 from .auth import AuthenticationMiddleware
 from .quota import QuotaMiddleware
diff --git a/src/llama_stack/core/stack.py b/src/llama_stack/core/stack.py
index 2ed0eccd2..674c35f31 100644
--- a/src/llama_stack/core/stack.py
+++ b/src/llama_stack/core/stack.py
@@ -12,27 +12,31 @@ import tempfile
 from typing import Any
 
 import yaml
+from llama_stack_api import (
+    Agents,
+    Api,
+    Batches,
+    Benchmarks,
+    Conversations,
+    DatasetIO,
+    Datasets,
+    Eval,
+    Files,
+    Inference,
+    Inspect,
+    Models,
+    PostTraining,
+    Prompts,
+    Providers,
+    Safety,
+    Scoring,
+    ScoringFunctions,
+    Shields,
+    ToolGroups,
+    ToolRuntime,
+    VectorIO,
+)
 
-from llama_stack.apis.agents import Agents
-from llama_stack.apis.batches import Batches
-from llama_stack.apis.benchmarks import Benchmarks
-from llama_stack.apis.conversations import Conversations
-from llama_stack.apis.datasetio import DatasetIO
-from llama_stack.apis.datasets import Datasets
-from llama_stack.apis.eval import Eval
-from llama_stack.apis.files import Files
-from llama_stack.apis.inference import Inference
-from llama_stack.apis.inspect import Inspect
-from llama_stack.apis.models import Models
-from llama_stack.apis.post_training import PostTraining
-from llama_stack.apis.prompts import Prompts
-from llama_stack.apis.providers import Providers
-from llama_stack.apis.safety import Safety
-from llama_stack.apis.scoring import Scoring
-from llama_stack.apis.scoring_functions import ScoringFunctions
-from llama_stack.apis.shields import Shields
-from llama_stack.apis.tools import ToolGroups, ToolRuntime
-from llama_stack.apis.vector_io import VectorIO
 from llama_stack.core.conversations.conversations import ConversationServiceConfig, ConversationServiceImpl
 from llama_stack.core.datatypes import Provider, SafetyConfig, StackRunConfig, VectorStoresConfig
 from llama_stack.core.distribution import get_provider_registry
@@ -54,7 +58,6 @@ from llama_stack.core.storage.datatypes import (
 from llama_stack.core.store.registry import create_dist_registry
 from llama_stack.core.utils.dynamic import instantiate_class_type
 from llama_stack.log import get_logger
-from llama_stack.providers.datatypes import Api
 
 logger = get_logger(name=__name__, category="core")
 
diff --git a/src/llama_stack/core/telemetry/telemetry.py b/src/llama_stack/core/telemetry/telemetry.py
index 459c1aa1a..1a56277ea 100644
--- a/src/llama_stack/core/telemetry/telemetry.py
+++ b/src/llama_stack/core/telemetry/telemetry.py
@@ -16,6 +16,7 @@ from typing import (
     cast,
 )
 
+from llama_stack_api import json_schema_type, register_schema
 from opentelemetry import metrics, trace
 from opentelemetry.exporter.otlp.proto.http.metric_exporter import OTLPMetricExporter
 from opentelemetry.exporter.otlp.proto.http.trace_exporter import OTLPSpanExporter
@@ -28,7 +29,6 @@ from pydantic import BaseModel, Field
 
 from llama_stack.log import get_logger
 from llama_stack.models.llama.datatypes import Primitive
-from llama_stack.schema_utils import json_schema_type, register_schema
 
 ROOT_SPAN_MARKERS = ["__root__", "__root_span__"]
 
diff --git a/src/llama_stack/distributions/dell/dell.py b/src/llama_stack/distributions/dell/dell.py
index 88e72688f..fd76e3ccb 100644
--- a/src/llama_stack/distributions/dell/dell.py
+++ b/src/llama_stack/distributions/dell/dell.py
@@ -4,7 +4,8 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-from llama_stack.apis.models import ModelType
+from llama_stack_api import ModelType
+
 from llama_stack.core.datatypes import (
     BuildProvider,
     ModelInput,
diff --git a/src/llama_stack/distributions/meta-reference-gpu/meta_reference.py b/src/llama_stack/distributions/meta-reference-gpu/meta_reference.py
index 4e4ddef33..67af0e92a 100644
--- a/src/llama_stack/distributions/meta-reference-gpu/meta_reference.py
+++ b/src/llama_stack/distributions/meta-reference-gpu/meta_reference.py
@@ -6,7 +6,8 @@
 
 from pathlib import Path
 
-from llama_stack.apis.models import ModelType
+from llama_stack_api import ModelType
+
 from llama_stack.core.datatypes import (
     BuildProvider,
     ModelInput,
diff --git a/src/llama_stack/distributions/open-benchmark/open_benchmark.py b/src/llama_stack/distributions/open-benchmark/open_benchmark.py
index 2b7760894..59deca6d0 100644
--- a/src/llama_stack/distributions/open-benchmark/open_benchmark.py
+++ b/src/llama_stack/distributions/open-benchmark/open_benchmark.py
@@ -5,8 +5,8 @@
 # the root directory of this source tree.
 
 
-from llama_stack.apis.datasets import DatasetPurpose, URIDataSource
-from llama_stack.apis.models import ModelType
+from llama_stack_api import DatasetPurpose, ModelType, URIDataSource
+
 from llama_stack.core.datatypes import (
     BenchmarkInput,
     BuildProvider,
diff --git a/src/llama_stack/distributions/starter/starter.py b/src/llama_stack/distributions/starter/starter.py
index 7b7773289..1a8126290 100644
--- a/src/llama_stack/distributions/starter/starter.py
+++ b/src/llama_stack/distributions/starter/starter.py
@@ -7,6 +7,8 @@
 
 from typing import Any
 
+from llama_stack_api import RemoteProviderSpec
+
 from llama_stack.core.datatypes import (
     BuildProvider,
     Provider,
@@ -19,7 +21,6 @@ from llama_stack.core.datatypes import (
 )
 from llama_stack.core.utils.dynamic import instantiate_class_type
 from llama_stack.distributions.template import DistributionTemplate, RunConfigSettings
-from llama_stack.providers.datatypes import RemoteProviderSpec
 from llama_stack.providers.inline.files.localfs.config import LocalfsFilesImplConfig
 from llama_stack.providers.inline.inference.sentence_transformers import (
     SentenceTransformersInferenceConfig,
diff --git a/src/llama_stack/distributions/template.py b/src/llama_stack/distributions/template.py
index e6813806a..faf5fb085 100644
--- a/src/llama_stack/distributions/template.py
+++ b/src/llama_stack/distributions/template.py
@@ -10,10 +10,9 @@ from typing import Any, Literal
 import jinja2
 import rich
 import yaml
+from llama_stack_api import DatasetPurpose, ModelType
 from pydantic import BaseModel, Field
 
-from llama_stack.apis.datasets import DatasetPurpose
-from llama_stack.apis.models import ModelType
 from llama_stack.core.datatypes import (
     LLAMA_STACK_RUN_CONFIG_VERSION,
     Api,
diff --git a/src/llama_stack/providers/inline/agents/meta_reference/agents.py b/src/llama_stack/providers/inline/agents/meta_reference/agents.py
index 880e0b680..025fcc676 100644
--- a/src/llama_stack/providers/inline/agents/meta_reference/agents.py
+++ b/src/llama_stack/providers/inline/agents/meta_reference/agents.py
@@ -5,25 +5,26 @@
 # the root directory of this source tree.
 
 
-from llama_stack.apis.agents import (
+from llama_stack_api import (
     Agents,
+    Conversations,
+    Inference,
     ListOpenAIResponseInputItem,
     ListOpenAIResponseObject,
     OpenAIDeleteResponseObject,
     OpenAIResponseInput,
     OpenAIResponseInputTool,
     OpenAIResponseObject,
+    OpenAIResponsePrompt,
+    OpenAIResponseText,
     Order,
+    ResponseGuardrail,
+    Safety,
+    ToolGroups,
+    ToolRuntime,
+    VectorIO,
 )
-from llama_stack.apis.agents.agents import ResponseGuardrail
-from llama_stack.apis.agents.openai_responses import OpenAIResponsePrompt, OpenAIResponseText
-from llama_stack.apis.conversations import Conversations
-from llama_stack.apis.inference import (
-    Inference,
-)
-from llama_stack.apis.safety import Safety
-from llama_stack.apis.tools import ToolGroups, ToolRuntime
-from llama_stack.apis.vector_io import VectorIO
+
 from llama_stack.core.datatypes import AccessRule
 from llama_stack.log import get_logger
 from llama_stack.providers.utils.kvstore import InmemoryKVStoreImpl, kvstore_impl
diff --git a/src/llama_stack/providers/inline/agents/meta_reference/responses/openai_responses.py b/src/llama_stack/providers/inline/agents/meta_reference/responses/openai_responses.py
index ed7f959c0..347eeef78 100644
--- a/src/llama_stack/providers/inline/agents/meta_reference/responses/openai_responses.py
+++ b/src/llama_stack/providers/inline/agents/meta_reference/responses/openai_responses.py
@@ -8,14 +8,15 @@ import time
 import uuid
 from collections.abc import AsyncIterator
 
-from pydantic import BaseModel, TypeAdapter
-
-from llama_stack.apis.agents import Order
-from llama_stack.apis.agents.agents import ResponseGuardrailSpec
-from llama_stack.apis.agents.openai_responses import (
+from llama_stack_api import (
+    ConversationItem,
+    Conversations,
+    Inference,
+    InvalidConversationIdError,
     ListOpenAIResponseInputItem,
     ListOpenAIResponseObject,
     OpenAIDeleteResponseObject,
+    OpenAIMessageParam,
     OpenAIResponseInput,
     OpenAIResponseInputMessageContentText,
     OpenAIResponseInputTool,
@@ -25,20 +26,16 @@ from llama_stack.apis.agents.openai_responses import (
     OpenAIResponsePrompt,
     OpenAIResponseText,
     OpenAIResponseTextFormat,
-)
-from llama_stack.apis.common.errors import (
-    InvalidConversationIdError,
-)
-from llama_stack.apis.conversations import Conversations
-from llama_stack.apis.conversations.conversations import ConversationItem
-from llama_stack.apis.inference import (
-    Inference,
-    OpenAIMessageParam,
     OpenAISystemMessageParam,
+    Order,
+    ResponseGuardrailSpec,
+    Safety,
+    ToolGroups,
+    ToolRuntime,
+    VectorIO,
 )
-from llama_stack.apis.safety import Safety
-from llama_stack.apis.tools import ToolGroups, ToolRuntime
-from llama_stack.apis.vector_io import VectorIO
+from pydantic import BaseModel, TypeAdapter
+
 from llama_stack.log import get_logger
 from llama_stack.providers.utils.responses.responses_store import (
     ResponsesStore,
diff --git a/src/llama_stack/providers/inline/agents/meta_reference/responses/streaming.py b/src/llama_stack/providers/inline/agents/meta_reference/responses/streaming.py
index c16bc8df3..6a791e92d 100644
--- a/src/llama_stack/providers/inline/agents/meta_reference/responses/streaming.py
+++ b/src/llama_stack/providers/inline/agents/meta_reference/responses/streaming.py
@@ -8,10 +8,18 @@ import uuid
 from collections.abc import AsyncIterator
 from typing import Any
 
-from llama_stack.apis.agents.openai_responses import (
+from llama_stack_api import (
     AllowedToolsFilter,
     ApprovalFilter,
+    Inference,
     MCPListToolsTool,
+    OpenAIAssistantMessageParam,
+    OpenAIChatCompletion,
+    OpenAIChatCompletionChunk,
+    OpenAIChatCompletionRequestWithExtraBody,
+    OpenAIChatCompletionToolCall,
+    OpenAIChoice,
+    OpenAIMessageParam,
     OpenAIResponseContentPartOutputText,
     OpenAIResponseContentPartReasoningText,
     OpenAIResponseContentPartRefusal,
@@ -56,16 +64,7 @@ from llama_stack.apis.agents.openai_responses import (
     OpenAIResponseUsageOutputTokensDetails,
     WebSearchToolTypes,
 )
-from llama_stack.apis.inference import (
-    Inference,
-    OpenAIAssistantMessageParam,
-    OpenAIChatCompletion,
-    OpenAIChatCompletionChunk,
-    OpenAIChatCompletionRequestWithExtraBody,
-    OpenAIChatCompletionToolCall,
-    OpenAIChoice,
-    OpenAIMessageParam,
-)
+
 from llama_stack.core.telemetry import tracing
 from llama_stack.log import get_logger
 from llama_stack.providers.utils.inference.prompt_adapter import interleaved_content_as_str
@@ -1023,9 +1022,9 @@ class StreamingResponseOrchestrator:
         self, tools: list[OpenAIResponseInputTool], output_messages: list[OpenAIResponseOutput]
     ) -> AsyncIterator[OpenAIResponseObjectStream]:
         """Process all tools and emit appropriate streaming events."""
+        from llama_stack_api import ToolDef
         from openai.types.chat import ChatCompletionToolParam
 
-        from llama_stack.apis.tools import ToolDef
         from llama_stack.models.llama.datatypes import ToolDefinition
         from llama_stack.providers.utils.inference.openai_compat import convert_tooldef_to_openai_tool
 
diff --git a/src/llama_stack/providers/inline/agents/meta_reference/responses/tool_executor.py b/src/llama_stack/providers/inline/agents/meta_reference/responses/tool_executor.py
index 09a161d50..38fb2a94f 100644
--- a/src/llama_stack/providers/inline/agents/meta_reference/responses/tool_executor.py
+++ b/src/llama_stack/providers/inline/agents/meta_reference/responses/tool_executor.py
@@ -9,7 +9,12 @@ import json
 from collections.abc import AsyncIterator
 from typing import Any
 
-from llama_stack.apis.agents.openai_responses import (
+from llama_stack_api import (
+    ImageContentItem,
+    OpenAIChatCompletionContentPartImageParam,
+    OpenAIChatCompletionContentPartTextParam,
+    OpenAIChatCompletionToolCall,
+    OpenAIImageURL,
     OpenAIResponseInputToolFileSearch,
     OpenAIResponseInputToolMCP,
     OpenAIResponseObjectStreamResponseFileSearchCallCompleted,
@@ -23,22 +28,15 @@ from llama_stack.apis.agents.openai_responses import (
     OpenAIResponseObjectStreamResponseWebSearchCallSearching,
     OpenAIResponseOutputMessageFileSearchToolCall,
     OpenAIResponseOutputMessageFileSearchToolCallResults,
-    OpenAIResponseOutputMessageMCPCall,
     OpenAIResponseOutputMessageWebSearchToolCall,
-)
-from llama_stack.apis.common.content_types import (
-    ImageContentItem,
-    TextContentItem,
-)
-from llama_stack.apis.inference import (
-    OpenAIChatCompletionContentPartImageParam,
-    OpenAIChatCompletionContentPartTextParam,
-    OpenAIChatCompletionToolCall,
-    OpenAIImageURL,
     OpenAIToolMessageParam,
+    TextContentItem,
+    ToolGroups,
+    ToolInvocationResult,
+    ToolRuntime,
+    VectorIO,
 )
-from llama_stack.apis.tools import ToolGroups, ToolInvocationResult, ToolRuntime
-from llama_stack.apis.vector_io import VectorIO
+
 from llama_stack.core.telemetry import tracing
 from llama_stack.log import get_logger
 
@@ -398,6 +396,10 @@ class ToolExecutor:
         # Build output message
         message: Any
         if mcp_tool_to_server and function.name in mcp_tool_to_server:
+            from llama_stack_api import (
+                OpenAIResponseOutputMessageMCPCall,
+            )
+
             message = OpenAIResponseOutputMessageMCPCall(
                 id=item_id,
                 arguments=function.arguments,
diff --git a/src/llama_stack/providers/inline/agents/meta_reference/responses/types.py b/src/llama_stack/providers/inline/agents/meta_reference/responses/types.py
index 3b9a14b01..35ad03378 100644
--- a/src/llama_stack/providers/inline/agents/meta_reference/responses/types.py
+++ b/src/llama_stack/providers/inline/agents/meta_reference/responses/types.py
@@ -7,10 +7,10 @@
 from dataclasses import dataclass
 from typing import cast
 
-from openai.types.chat import ChatCompletionToolParam
-from pydantic import BaseModel
-
-from llama_stack.apis.agents.openai_responses import (
+from llama_stack_api import (
+    OpenAIChatCompletionToolCall,
+    OpenAIMessageParam,
+    OpenAIResponseFormatParam,
     OpenAIResponseInput,
     OpenAIResponseInputTool,
     OpenAIResponseInputToolFileSearch,
@@ -26,7 +26,8 @@ from llama_stack.apis.agents.openai_responses import (
     OpenAIResponseTool,
     OpenAIResponseToolMCP,
 )
-from llama_stack.apis.inference import OpenAIChatCompletionToolCall, OpenAIMessageParam, OpenAIResponseFormatParam
+from openai.types.chat import ChatCompletionToolParam
+from pydantic import BaseModel
 
 
 class ToolExecutionResult(BaseModel):
diff --git a/src/llama_stack/providers/inline/agents/meta_reference/responses/utils.py b/src/llama_stack/providers/inline/agents/meta_reference/responses/utils.py
index 26af1d595..943bbae41 100644
--- a/src/llama_stack/providers/inline/agents/meta_reference/responses/utils.py
+++ b/src/llama_stack/providers/inline/agents/meta_reference/responses/utils.py
@@ -9,9 +9,23 @@ import re
 import uuid
 from collections.abc import Sequence
 
-from llama_stack.apis.agents.agents import ResponseGuardrailSpec
-from llama_stack.apis.agents.openai_responses import (
+from llama_stack_api import (
+    OpenAIAssistantMessageParam,
+    OpenAIChatCompletionContentPartImageParam,
+    OpenAIChatCompletionContentPartParam,
+    OpenAIChatCompletionContentPartTextParam,
+    OpenAIChatCompletionToolCall,
+    OpenAIChatCompletionToolCallFunction,
+    OpenAIChoice,
+    OpenAIDeveloperMessageParam,
+    OpenAIImageURL,
+    OpenAIJSONSchema,
+    OpenAIMessageParam,
     OpenAIResponseAnnotationFileCitation,
+    OpenAIResponseFormatJSONObject,
+    OpenAIResponseFormatJSONSchema,
+    OpenAIResponseFormatParam,
+    OpenAIResponseFormatText,
     OpenAIResponseInput,
     OpenAIResponseInputFunctionToolCallOutput,
     OpenAIResponseInputMessageContent,
@@ -27,28 +41,12 @@ from llama_stack.apis.agents.openai_responses import (
     OpenAIResponseOutputMessageMCPCall,
     OpenAIResponseOutputMessageMCPListTools,
     OpenAIResponseText,
-)
-from llama_stack.apis.inference import (
-    OpenAIAssistantMessageParam,
-    OpenAIChatCompletionContentPartImageParam,
-    OpenAIChatCompletionContentPartParam,
-    OpenAIChatCompletionContentPartTextParam,
-    OpenAIChatCompletionToolCall,
-    OpenAIChatCompletionToolCallFunction,
-    OpenAIChoice,
-    OpenAIDeveloperMessageParam,
-    OpenAIImageURL,
-    OpenAIJSONSchema,
-    OpenAIMessageParam,
-    OpenAIResponseFormatJSONObject,
-    OpenAIResponseFormatJSONSchema,
-    OpenAIResponseFormatParam,
-    OpenAIResponseFormatText,
     OpenAISystemMessageParam,
     OpenAIToolMessageParam,
     OpenAIUserMessageParam,
+    ResponseGuardrailSpec,
+    Safety,
 )
-from llama_stack.apis.safety import Safety
 
 
 async def convert_chat_choice_to_response_message(
diff --git a/src/llama_stack/providers/inline/agents/meta_reference/safety.py b/src/llama_stack/providers/inline/agents/meta_reference/safety.py
index f0ae51423..dd90ac298 100644
--- a/src/llama_stack/providers/inline/agents/meta_reference/safety.py
+++ b/src/llama_stack/providers/inline/agents/meta_reference/safety.py
@@ -6,8 +6,8 @@
 
 import asyncio
 
-from llama_stack.apis.inference import OpenAIMessageParam
-from llama_stack.apis.safety import Safety, SafetyViolation, ViolationLevel
+from llama_stack_api import OpenAIMessageParam, Safety, SafetyViolation, ViolationLevel
+
 from llama_stack.core.telemetry import tracing
 from llama_stack.log import get_logger
 
diff --git a/src/llama_stack/providers/inline/batches/reference/__init__.py b/src/llama_stack/providers/inline/batches/reference/__init__.py
index a8ae92eb2..27d0f4213 100644
--- a/src/llama_stack/providers/inline/batches/reference/__init__.py
+++ b/src/llama_stack/providers/inline/batches/reference/__init__.py
@@ -6,9 +6,8 @@
 
 from typing import Any
 
-from llama_stack.apis.files import Files
-from llama_stack.apis.inference import Inference
-from llama_stack.apis.models import Models
+from llama_stack_api import Files, Inference, Models
+
 from llama_stack.core.datatypes import AccessRule, Api
 from llama_stack.providers.utils.kvstore import kvstore_impl
 
diff --git a/src/llama_stack/providers/inline/batches/reference/batches.py b/src/llama_stack/providers/inline/batches/reference/batches.py
index 7c4358b84..f0f8da96c 100644
--- a/src/llama_stack/providers/inline/batches/reference/batches.py
+++ b/src/llama_stack/providers/inline/batches/reference/batches.py
@@ -13,25 +13,29 @@ import uuid
 from io import BytesIO
 from typing import Any, Literal
 
-from openai.types.batch import BatchError, Errors
-from pydantic import BaseModel
-
-from llama_stack.apis.batches import Batches, BatchObject, ListBatchesResponse
-from llama_stack.apis.common.errors import ConflictError, ResourceNotFoundError
-from llama_stack.apis.files import Files, OpenAIFilePurpose
-from llama_stack.apis.inference import (
+from llama_stack_api import (
+    Batches,
+    BatchObject,
+    ConflictError,
+    Files,
     Inference,
+    ListBatchesResponse,
+    Models,
     OpenAIAssistantMessageParam,
     OpenAIChatCompletionRequestWithExtraBody,
     OpenAICompletionRequestWithExtraBody,
     OpenAIDeveloperMessageParam,
     OpenAIEmbeddingsRequestWithExtraBody,
+    OpenAIFilePurpose,
     OpenAIMessageParam,
     OpenAISystemMessageParam,
     OpenAIToolMessageParam,
     OpenAIUserMessageParam,
+    ResourceNotFoundError,
 )
-from llama_stack.apis.models import Models
+from openai.types.batch import BatchError, Errors
+from pydantic import BaseModel
+
 from llama_stack.log import get_logger
 from llama_stack.providers.utils.kvstore import KVStore
 
diff --git a/src/llama_stack/providers/inline/datasetio/localfs/datasetio.py b/src/llama_stack/providers/inline/datasetio/localfs/datasetio.py
index e8ebeb30d..1fcfbbef4 100644
--- a/src/llama_stack/providers/inline/datasetio/localfs/datasetio.py
+++ b/src/llama_stack/providers/inline/datasetio/localfs/datasetio.py
@@ -5,10 +5,8 @@
 # the root directory of this source tree.
 from typing import Any
 
-from llama_stack.apis.common.responses import PaginatedResponse
-from llama_stack.apis.datasetio import DatasetIO
-from llama_stack.apis.datasets import Dataset
-from llama_stack.providers.datatypes import DatasetsProtocolPrivate
+from llama_stack_api import Dataset, DatasetIO, DatasetsProtocolPrivate, PaginatedResponse
+
 from llama_stack.providers.utils.datasetio.url_utils import get_dataframe_from_uri
 from llama_stack.providers.utils.kvstore import kvstore_impl
 from llama_stack.providers.utils.pagination import paginate_records
diff --git a/src/llama_stack/providers/inline/eval/meta_reference/eval.py b/src/llama_stack/providers/inline/eval/meta_reference/eval.py
index 5ddbd56c5..e6020e8a3 100644
--- a/src/llama_stack/providers/inline/eval/meta_reference/eval.py
+++ b/src/llama_stack/providers/inline/eval/meta_reference/eval.py
@@ -6,26 +6,29 @@
 import json
 from typing import Any
 
-from tqdm import tqdm
-
-from llama_stack.apis.agents import Agents
-from llama_stack.apis.benchmarks import Benchmark
-from llama_stack.apis.datasetio import DatasetIO
-from llama_stack.apis.datasets import Datasets
-from llama_stack.apis.inference import (
+from llama_stack_api import (
+    Agents,
+    Benchmark,
+    BenchmarkConfig,
+    BenchmarksProtocolPrivate,
+    DatasetIO,
+    Datasets,
+    Eval,
+    EvaluateResponse,
     Inference,
+    Job,
+    JobStatus,
     OpenAIChatCompletionRequestWithExtraBody,
     OpenAICompletionRequestWithExtraBody,
     OpenAISystemMessageParam,
     OpenAIUserMessageParam,
+    Scoring,
 )
-from llama_stack.apis.scoring import Scoring
-from llama_stack.providers.datatypes import BenchmarksProtocolPrivate
+from tqdm import tqdm
+
 from llama_stack.providers.utils.common.data_schema_validator import ColumnName
 from llama_stack.providers.utils.kvstore import kvstore_impl
 
-from .....apis.common.job_types import Job, JobStatus
-from .....apis.eval.eval import BenchmarkConfig, Eval, EvaluateResponse
 from .config import MetaReferenceEvalConfig
 
 EVAL_TASKS_PREFIX = "benchmarks:"
diff --git a/src/llama_stack/providers/inline/files/localfs/files.py b/src/llama_stack/providers/inline/files/localfs/files.py
index a76b982ce..5e8c887f1 100644
--- a/src/llama_stack/providers/inline/files/localfs/files.py
+++ b/src/llama_stack/providers/inline/files/localfs/files.py
@@ -10,17 +10,17 @@ from pathlib import Path
 from typing import Annotated
 
 from fastapi import Depends, File, Form, Response, UploadFile
-
-from llama_stack.apis.common.errors import ResourceNotFoundError
-from llama_stack.apis.common.responses import Order
-from llama_stack.apis.files import (
+from llama_stack_api import (
     ExpiresAfter,
     Files,
     ListOpenAIFileResponse,
     OpenAIFileDeleteResponse,
     OpenAIFileObject,
     OpenAIFilePurpose,
+    Order,
+    ResourceNotFoundError,
 )
+
 from llama_stack.core.datatypes import AccessRule
 from llama_stack.core.id_generation import generate_object_id
 from llama_stack.log import get_logger
diff --git a/src/llama_stack/providers/inline/inference/meta_reference/config.py b/src/llama_stack/providers/inline/inference/meta_reference/config.py
index 961548f9c..802e79f15 100644
--- a/src/llama_stack/providers/inline/inference/meta_reference/config.py
+++ b/src/llama_stack/providers/inline/inference/meta_reference/config.py
@@ -6,9 +6,9 @@
 
 from typing import Any
 
+from llama_stack_api import QuantizationConfig
 from pydantic import BaseModel, field_validator
 
-from llama_stack.apis.inference import QuantizationConfig
 from llama_stack.providers.utils.inference import supported_inference_models
 
 
diff --git a/src/llama_stack/providers/inline/inference/meta_reference/generators.py b/src/llama_stack/providers/inline/inference/meta_reference/generators.py
index 51a2ddfad..2155a1ae8 100644
--- a/src/llama_stack/providers/inline/inference/meta_reference/generators.py
+++ b/src/llama_stack/providers/inline/inference/meta_reference/generators.py
@@ -8,9 +8,7 @@ import math
 from typing import Optional
 
 import torch
-from lmformatenforcer import JsonSchemaParser, TokenEnforcer, TokenEnforcerTokenizerData
-
-from llama_stack.apis.inference import (
+from llama_stack_api import (
     GreedySamplingStrategy,
     JsonSchemaResponseFormat,
     OpenAIChatCompletionRequestWithExtraBody,
@@ -20,6 +18,8 @@ from llama_stack.apis.inference import (
     SamplingParams,
     TopPSamplingStrategy,
 )
+from lmformatenforcer import JsonSchemaParser, TokenEnforcer, TokenEnforcerTokenizerData
+
 from llama_stack.models.llama.datatypes import QuantizationMode, ToolPromptFormat
 from llama_stack.models.llama.llama3.generation import Llama3
 from llama_stack.models.llama.llama3.tokenizer import Tokenizer as Llama3Tokenizer
diff --git a/src/llama_stack/providers/inline/inference/meta_reference/inference.py b/src/llama_stack/providers/inline/inference/meta_reference/inference.py
index ef21132a0..753185fe7 100644
--- a/src/llama_stack/providers/inline/inference/meta_reference/inference.py
+++ b/src/llama_stack/providers/inline/inference/meta_reference/inference.py
@@ -9,22 +9,23 @@ import time
 import uuid
 from collections.abc import AsyncIterator
 
-from llama_stack.apis.inference import (
+from llama_stack_api import (
     InferenceProvider,
+    Model,
+    ModelsProtocolPrivate,
+    ModelType,
     OpenAIAssistantMessageParam,
+    OpenAIChatCompletion,
+    OpenAIChatCompletionChunk,
     OpenAIChatCompletionRequestWithExtraBody,
     OpenAIChatCompletionUsage,
     OpenAIChoice,
+    OpenAICompletion,
     OpenAICompletionRequestWithExtraBody,
     OpenAIUserMessageParam,
     ToolChoice,
 )
-from llama_stack.apis.inference.inference import (
-    OpenAIChatCompletion,
-    OpenAIChatCompletionChunk,
-    OpenAICompletion,
-)
-from llama_stack.apis.models import Model, ModelType
+
 from llama_stack.log import get_logger
 from llama_stack.models.llama.datatypes import RawMessage, RawTextItem, ToolDefinition
 from llama_stack.models.llama.llama3.chat_format import ChatFormat as Llama3ChatFormat
@@ -40,7 +41,6 @@ from llama_stack.models.llama.llama4.prompt_templates.system_prompts import (
 from llama_stack.models.llama.llama4.tokenizer import Tokenizer as Llama4Tokenizer
 from llama_stack.models.llama.sku_list import resolve_model
 from llama_stack.models.llama.sku_types import ModelFamily, is_multimodal
-from llama_stack.providers.datatypes import ModelsProtocolPrivate
 from llama_stack.providers.utils.inference.embedding_mixin import (
     SentenceTransformerEmbeddingMixin,
 )
@@ -376,7 +376,7 @@ class MetaReferenceInferenceImpl(
         # Convert tool calls to OpenAI format
         openai_tool_calls = None
         if decoded_message.tool_calls:
-            from llama_stack.apis.inference import (
+            from llama_stack_api import (
                 OpenAIChatCompletionToolCall,
                 OpenAIChatCompletionToolCallFunction,
             )
@@ -441,13 +441,14 @@ class MetaReferenceInferenceImpl(
         params: OpenAIChatCompletionRequestWithExtraBody,
     ) -> AsyncIterator[OpenAIChatCompletionChunk]:
         """Stream chat completion chunks as they're generated."""
-        from llama_stack.apis.inference import (
+        from llama_stack_api import (
             OpenAIChatCompletionChunk,
             OpenAIChatCompletionToolCall,
             OpenAIChatCompletionToolCallFunction,
             OpenAIChoiceDelta,
             OpenAIChunkChoice,
         )
+
         from llama_stack.models.llama.datatypes import StopReason
         from llama_stack.providers.utils.inference.prompt_adapter import decode_assistant_message
 
diff --git a/src/llama_stack/providers/inline/inference/sentence_transformers/sentence_transformers.py b/src/llama_stack/providers/inline/inference/sentence_transformers/sentence_transformers.py
index e6dcf3ae7..14c9a41a4 100644
--- a/src/llama_stack/providers/inline/inference/sentence_transformers/sentence_transformers.py
+++ b/src/llama_stack/providers/inline/inference/sentence_transformers/sentence_transformers.py
@@ -6,19 +6,19 @@
 
 from collections.abc import AsyncIterator
 
-from llama_stack.apis.inference import (
+from llama_stack_api import (
     InferenceProvider,
-    OpenAIChatCompletionRequestWithExtraBody,
-    OpenAICompletionRequestWithExtraBody,
-)
-from llama_stack.apis.inference.inference import (
+    Model,
+    ModelsProtocolPrivate,
+    ModelType,
     OpenAIChatCompletion,
     OpenAIChatCompletionChunk,
+    OpenAIChatCompletionRequestWithExtraBody,
     OpenAICompletion,
+    OpenAICompletionRequestWithExtraBody,
 )
-from llama_stack.apis.models import ModelType
+
 from llama_stack.log import get_logger
-from llama_stack.providers.datatypes import Model, ModelsProtocolPrivate
 from llama_stack.providers.utils.inference.embedding_mixin import (
     SentenceTransformerEmbeddingMixin,
 )
diff --git a/src/llama_stack/providers/inline/post_training/common/validator.py b/src/llama_stack/providers/inline/post_training/common/validator.py
index 950b75f86..7a85d0e03 100644
--- a/src/llama_stack/providers/inline/post_training/common/validator.py
+++ b/src/llama_stack/providers/inline/post_training/common/validator.py
@@ -12,11 +12,8 @@
 
 from typing import Any
 
-from llama_stack.apis.common.type_system import (
-    ChatCompletionInputType,
-    DialogType,
-    StringType,
-)
+from llama_stack_api import ChatCompletionInputType, DialogType, StringType
+
 from llama_stack.providers.utils.common.data_schema_validator import (
     ColumnName,
 )
diff --git a/src/llama_stack/providers/inline/post_training/huggingface/post_training.py b/src/llama_stack/providers/inline/post_training/huggingface/post_training.py
index 22ace1ae0..f3f3d8d56 100644
--- a/src/llama_stack/providers/inline/post_training/huggingface/post_training.py
+++ b/src/llama_stack/providers/inline/post_training/huggingface/post_training.py
@@ -6,11 +6,11 @@
 from enum import Enum
 from typing import Any
 
-from llama_stack.apis.datasetio import DatasetIO
-from llama_stack.apis.datasets import Datasets
-from llama_stack.apis.post_training import (
+from llama_stack_api import (
     AlgorithmConfig,
     Checkpoint,
+    DatasetIO,
+    Datasets,
     DPOAlignmentConfig,
     JobStatus,
     ListPostTrainingJobsResponse,
@@ -19,6 +19,7 @@ from llama_stack.apis.post_training import (
     PostTrainingJobStatusResponse,
     TrainingConfig,
 )
+
 from llama_stack.providers.inline.post_training.huggingface.config import (
     HuggingFacePostTrainingConfig,
 )
diff --git a/src/llama_stack/providers/inline/post_training/huggingface/recipes/finetune_single_device.py b/src/llama_stack/providers/inline/post_training/huggingface/recipes/finetune_single_device.py
index 39b83a3fd..58a30618c 100644
--- a/src/llama_stack/providers/inline/post_training/huggingface/recipes/finetune_single_device.py
+++ b/src/llama_stack/providers/inline/post_training/huggingface/recipes/finetune_single_device.py
@@ -12,20 +12,20 @@ from typing import Any
 
 import torch
 from datasets import Dataset
+from llama_stack_api import (
+    Checkpoint,
+    DataConfig,
+    DatasetIO,
+    Datasets,
+    LoraFinetuningConfig,
+    TrainingConfig,
+)
 from peft import LoraConfig
 from transformers import (
     AutoTokenizer,
 )
 from trl import SFTConfig, SFTTrainer
 
-from llama_stack.apis.datasetio import DatasetIO
-from llama_stack.apis.datasets import Datasets
-from llama_stack.apis.post_training import (
-    Checkpoint,
-    DataConfig,
-    LoraFinetuningConfig,
-    TrainingConfig,
-)
 from llama_stack.log import get_logger
 from llama_stack.providers.inline.post_training.common.utils import evacuate_model_from_device
 
diff --git a/src/llama_stack/providers/inline/post_training/huggingface/recipes/finetune_single_device_dpo.py b/src/llama_stack/providers/inline/post_training/huggingface/recipes/finetune_single_device_dpo.py
index 11d707df9..f7dc3ebf2 100644
--- a/src/llama_stack/providers/inline/post_training/huggingface/recipes/finetune_single_device_dpo.py
+++ b/src/llama_stack/providers/inline/post_training/huggingface/recipes/finetune_single_device_dpo.py
@@ -11,18 +11,18 @@ from typing import Any
 
 import torch
 from datasets import Dataset
+from llama_stack_api import (
+    Checkpoint,
+    DatasetIO,
+    Datasets,
+    DPOAlignmentConfig,
+    TrainingConfig,
+)
 from transformers import (
     AutoTokenizer,
 )
 from trl import DPOConfig, DPOTrainer
 
-from llama_stack.apis.datasetio import DatasetIO
-from llama_stack.apis.datasets import Datasets
-from llama_stack.apis.post_training import (
-    Checkpoint,
-    DPOAlignmentConfig,
-    TrainingConfig,
-)
 from llama_stack.log import get_logger
 from llama_stack.providers.inline.post_training.common.utils import evacuate_model_from_device
 
diff --git a/src/llama_stack/providers/inline/post_training/huggingface/utils.py b/src/llama_stack/providers/inline/post_training/huggingface/utils.py
index a930602d0..86c3c3f52 100644
--- a/src/llama_stack/providers/inline/post_training/huggingface/utils.py
+++ b/src/llama_stack/providers/inline/post_training/huggingface/utils.py
@@ -14,6 +14,7 @@ from typing import TYPE_CHECKING, Any, Protocol
 import psutil
 import torch
 from datasets import Dataset
+from llama_stack_api import Checkpoint, DatasetIO, TrainingConfig
 from transformers import AutoConfig, AutoModelForCausalLM
 
 if TYPE_CHECKING:
@@ -34,8 +35,6 @@ class HFAutoModel(Protocol):
     def save_pretrained(self, save_directory: str | Path) -> None: ...
 
 
-from llama_stack.apis.datasetio import DatasetIO
-from llama_stack.apis.post_training import Checkpoint, TrainingConfig
 from llama_stack.log import get_logger
 
 from .config import HuggingFacePostTrainingConfig
diff --git a/src/llama_stack/providers/inline/post_training/torchtune/common/utils.py b/src/llama_stack/providers/inline/post_training/torchtune/common/utils.py
index f0fa052a2..1483b8385 100644
--- a/src/llama_stack/providers/inline/post_training/torchtune/common/utils.py
+++ b/src/llama_stack/providers/inline/post_training/torchtune/common/utils.py
@@ -13,6 +13,7 @@
 from collections.abc import Callable
 
 import torch
+from llama_stack_api import DatasetFormat
 from pydantic import BaseModel
 from torchtune.data._messages import InputOutputToMessages, ShareGPTToMessages
 from torchtune.models.llama3 import llama3_tokenizer
@@ -21,7 +22,6 @@ from torchtune.models.llama3_1 import lora_llama3_1_8b
 from torchtune.models.llama3_2 import lora_llama3_2_3b
 from torchtune.modules.transforms import Transform
 
-from llama_stack.apis.post_training import DatasetFormat
 from llama_stack.models.llama.sku_list import resolve_model
 from llama_stack.models.llama.sku_types import Model
 
diff --git a/src/llama_stack/providers/inline/post_training/torchtune/post_training.py b/src/llama_stack/providers/inline/post_training/torchtune/post_training.py
index 765f6789d..3370d42fa 100644
--- a/src/llama_stack/providers/inline/post_training/torchtune/post_training.py
+++ b/src/llama_stack/providers/inline/post_training/torchtune/post_training.py
@@ -6,11 +6,11 @@
 from enum import Enum
 from typing import Any
 
-from llama_stack.apis.datasetio import DatasetIO
-from llama_stack.apis.datasets import Datasets
-from llama_stack.apis.post_training import (
+from llama_stack_api import (
     AlgorithmConfig,
     Checkpoint,
+    DatasetIO,
+    Datasets,
     DPOAlignmentConfig,
     JobStatus,
     ListPostTrainingJobsResponse,
@@ -20,6 +20,7 @@ from llama_stack.apis.post_training import (
     PostTrainingJobStatusResponse,
     TrainingConfig,
 )
+
 from llama_stack.providers.inline.post_training.torchtune.config import (
     TorchtunePostTrainingConfig,
 )
diff --git a/src/llama_stack/providers/inline/post_training/torchtune/recipes/lora_finetuning_single_device.py b/src/llama_stack/providers/inline/post_training/torchtune/recipes/lora_finetuning_single_device.py
index c648cdc46..2bf1d0fe7 100644
--- a/src/llama_stack/providers/inline/post_training/torchtune/recipes/lora_finetuning_single_device.py
+++ b/src/llama_stack/providers/inline/post_training/torchtune/recipes/lora_finetuning_single_device.py
@@ -12,6 +12,17 @@ from pathlib import Path
 from typing import Any
 
 import torch
+from llama_stack_api import (
+    Checkpoint,
+    DataConfig,
+    DatasetIO,
+    Datasets,
+    LoraFinetuningConfig,
+    OptimizerConfig,
+    PostTrainingMetric,
+    QATFinetuningConfig,
+    TrainingConfig,
+)
 from torch import nn
 from torch.optim import Optimizer
 from torch.utils.data import DataLoader, DistributedSampler
@@ -32,17 +43,6 @@ from torchtune.training.lr_schedulers import get_cosine_schedule_with_warmup
 from torchtune.training.metric_logging import DiskLogger
 from tqdm import tqdm
 
-from llama_stack.apis.common.training_types import PostTrainingMetric
-from llama_stack.apis.datasetio import DatasetIO
-from llama_stack.apis.datasets import Datasets
-from llama_stack.apis.post_training import (
-    Checkpoint,
-    DataConfig,
-    LoraFinetuningConfig,
-    OptimizerConfig,
-    QATFinetuningConfig,
-    TrainingConfig,
-)
 from llama_stack.core.utils.config_dirs import DEFAULT_CHECKPOINT_DIR
 from llama_stack.core.utils.model_utils import model_local_dir
 from llama_stack.log import get_logger
diff --git a/src/llama_stack/providers/inline/safety/code_scanner/code_scanner.py b/src/llama_stack/providers/inline/safety/code_scanner/code_scanner.py
index 7da9ea0d7..80e907c10 100644
--- a/src/llama_stack/providers/inline/safety/code_scanner/code_scanner.py
+++ b/src/llama_stack/providers/inline/safety/code_scanner/code_scanner.py
@@ -10,15 +10,17 @@ from typing import TYPE_CHECKING, Any
 if TYPE_CHECKING:
     from codeshield.cs import CodeShieldScanResult
 
-from llama_stack.apis.inference import OpenAIMessageParam
-from llama_stack.apis.safety import (
+from llama_stack_api import (
+    ModerationObject,
+    ModerationObjectResults,
+    OpenAIMessageParam,
     RunShieldResponse,
     Safety,
     SafetyViolation,
+    Shield,
     ViolationLevel,
 )
-from llama_stack.apis.safety.safety import ModerationObject, ModerationObjectResults
-from llama_stack.apis.shields import Shield
+
 from llama_stack.log import get_logger
 from llama_stack.providers.utils.inference.prompt_adapter import (
     interleaved_content_as_str,
diff --git a/src/llama_stack/providers/inline/safety/llama_guard/llama_guard.py b/src/llama_stack/providers/inline/safety/llama_guard/llama_guard.py
index 6f6346e82..36e4280b9 100644
--- a/src/llama_stack/providers/inline/safety/llama_guard/llama_guard.py
+++ b/src/llama_stack/providers/inline/safety/llama_guard/llama_guard.py
@@ -9,26 +9,27 @@ import uuid
 from string import Template
 from typing import Any
 
-from llama_stack.apis.common.content_types import ImageContentItem, TextContentItem
-from llama_stack.apis.inference import (
+from llama_stack_api import (
+    ImageContentItem,
     Inference,
+    ModerationObject,
+    ModerationObjectResults,
     OpenAIChatCompletionRequestWithExtraBody,
     OpenAIMessageParam,
     OpenAIUserMessageParam,
-)
-from llama_stack.apis.safety import (
     RunShieldResponse,
     Safety,
     SafetyViolation,
+    Shield,
+    ShieldsProtocolPrivate,
+    TextContentItem,
     ViolationLevel,
 )
-from llama_stack.apis.safety.safety import ModerationObject, ModerationObjectResults
-from llama_stack.apis.shields import Shield
+
 from llama_stack.core.datatypes import Api
 from llama_stack.log import get_logger
 from llama_stack.models.llama.datatypes import Role
 from llama_stack.models.llama.sku_types import CoreModelId
-from llama_stack.providers.datatypes import ShieldsProtocolPrivate
 from llama_stack.providers.utils.inference.prompt_adapter import (
     interleaved_content_as_str,
 )
diff --git a/src/llama_stack/providers/inline/safety/prompt_guard/prompt_guard.py b/src/llama_stack/providers/inline/safety/prompt_guard/prompt_guard.py
index 2015e1150..b4f495f19 100644
--- a/src/llama_stack/providers/inline/safety/prompt_guard/prompt_guard.py
+++ b/src/llama_stack/providers/inline/safety/prompt_guard/prompt_guard.py
@@ -7,21 +7,21 @@
 from typing import Any
 
 import torch
-from transformers import AutoModelForSequenceClassification, AutoTokenizer
-
-from llama_stack.apis.inference import OpenAIMessageParam
-from llama_stack.apis.safety import (
+from llama_stack_api import (
+    ModerationObject,
+    OpenAIMessageParam,
     RunShieldResponse,
     Safety,
     SafetyViolation,
+    Shield,
+    ShieldsProtocolPrivate,
     ShieldStore,
     ViolationLevel,
 )
-from llama_stack.apis.safety.safety import ModerationObject
-from llama_stack.apis.shields import Shield
+from transformers import AutoModelForSequenceClassification, AutoTokenizer
+
 from llama_stack.core.utils.model_utils import model_local_dir
 from llama_stack.log import get_logger
-from llama_stack.providers.datatypes import ShieldsProtocolPrivate
 from llama_stack.providers.utils.inference.prompt_adapter import interleaved_content_as_str
 
 from .config import PromptGuardConfig, PromptGuardType
diff --git a/src/llama_stack/providers/inline/scoring/basic/scoring.py b/src/llama_stack/providers/inline/scoring/basic/scoring.py
index b19b68039..326fd9211 100644
--- a/src/llama_stack/providers/inline/scoring/basic/scoring.py
+++ b/src/llama_stack/providers/inline/scoring/basic/scoring.py
@@ -5,17 +5,19 @@
 # the root directory of this source tree.
 from typing import Any
 
-from llama_stack.apis.datasetio import DatasetIO
-from llama_stack.apis.datasets import Datasets
-from llama_stack.apis.scoring import (
+from llama_stack_api import (
+    DatasetIO,
+    Datasets,
     ScoreBatchResponse,
     ScoreResponse,
     Scoring,
+    ScoringFn,
+    ScoringFnParams,
+    ScoringFunctionsProtocolPrivate,
     ScoringResult,
 )
-from llama_stack.apis.scoring_functions import ScoringFn, ScoringFnParams
+
 from llama_stack.core.datatypes import Api
-from llama_stack.providers.datatypes import ScoringFunctionsProtocolPrivate
 from llama_stack.providers.utils.common.data_schema_validator import (
     get_valid_schemas,
     validate_dataset_schema,
diff --git a/src/llama_stack/providers/inline/scoring/basic/scoring_fn/docvqa_scoring_fn.py b/src/llama_stack/providers/inline/scoring/basic/scoring_fn/docvqa_scoring_fn.py
index b87974d08..93c2627dd 100644
--- a/src/llama_stack/providers/inline/scoring/basic/scoring_fn/docvqa_scoring_fn.py
+++ b/src/llama_stack/providers/inline/scoring/basic/scoring_fn/docvqa_scoring_fn.py
@@ -8,8 +8,8 @@ import json
 import re
 from typing import Any
 
-from llama_stack.apis.scoring import ScoringResultRow
-from llama_stack.apis.scoring_functions import ScoringFnParams
+from llama_stack_api import ScoringFnParams, ScoringResultRow
+
 from llama_stack.providers.utils.scoring.base_scoring_fn import RegisteredBaseScoringFn
 
 from .fn_defs.docvqa import docvqa
diff --git a/src/llama_stack/providers/inline/scoring/basic/scoring_fn/equality_scoring_fn.py b/src/llama_stack/providers/inline/scoring/basic/scoring_fn/equality_scoring_fn.py
index 60804330f..382c64d88 100644
--- a/src/llama_stack/providers/inline/scoring/basic/scoring_fn/equality_scoring_fn.py
+++ b/src/llama_stack/providers/inline/scoring/basic/scoring_fn/equality_scoring_fn.py
@@ -6,8 +6,8 @@
 
 from typing import Any
 
-from llama_stack.apis.scoring import ScoringResultRow
-from llama_stack.apis.scoring_functions import ScoringFnParams
+from llama_stack_api import ScoringFnParams, ScoringResultRow
+
 from llama_stack.providers.utils.scoring.base_scoring_fn import RegisteredBaseScoringFn
 
 from .fn_defs.equality import equality
diff --git a/src/llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/docvqa.py b/src/llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/docvqa.py
index aad3dfe26..a7305d13a 100644
--- a/src/llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/docvqa.py
+++ b/src/llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/docvqa.py
@@ -4,10 +4,10 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-from llama_stack.apis.common.type_system import NumberType
-from llama_stack.apis.scoring_functions import (
+from llama_stack_api import (
     AggregationFunctionType,
     BasicScoringFnParams,
+    NumberType,
     ScoringFn,
 )
 
diff --git a/src/llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/equality.py b/src/llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/equality.py
index 9b24ff791..f7d2f32ae 100644
--- a/src/llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/equality.py
+++ b/src/llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/equality.py
@@ -4,10 +4,10 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-from llama_stack.apis.common.type_system import NumberType
-from llama_stack.apis.scoring_functions import (
+from llama_stack_api import (
     AggregationFunctionType,
     BasicScoringFnParams,
+    NumberType,
     ScoringFn,
 )
 
diff --git a/src/llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/ifeval.py b/src/llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/ifeval.py
index adca0791d..a2ed1d695 100644
--- a/src/llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/ifeval.py
+++ b/src/llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/ifeval.py
@@ -4,10 +4,10 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-from llama_stack.apis.common.type_system import NumberType
-from llama_stack.apis.scoring_functions import (
+from llama_stack_api import (
     AggregationFunctionType,
     BasicScoringFnParams,
+    NumberType,
     ScoringFn,
 )
 
diff --git a/src/llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/regex_parser_math_response.py b/src/llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/regex_parser_math_response.py
index 8b1bf5352..4e2b49a1f 100644
--- a/src/llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/regex_parser_math_response.py
+++ b/src/llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/regex_parser_math_response.py
@@ -4,9 +4,9 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-from llama_stack.apis.common.type_system import NumberType
-from llama_stack.apis.scoring_functions import (
+from llama_stack_api import (
     AggregationFunctionType,
+    NumberType,
     RegexParserScoringFnParams,
     ScoringFn,
 )
diff --git a/src/llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/regex_parser_multiple_choice_answer.py b/src/llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/regex_parser_multiple_choice_answer.py
index ea04331c9..df0cf52d9 100644
--- a/src/llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/regex_parser_multiple_choice_answer.py
+++ b/src/llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/regex_parser_multiple_choice_answer.py
@@ -4,9 +4,9 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-from llama_stack.apis.common.type_system import NumberType
-from llama_stack.apis.scoring_functions import (
+from llama_stack_api import (
     AggregationFunctionType,
+    NumberType,
     RegexParserScoringFnParams,
     ScoringFn,
 )
diff --git a/src/llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/subset_of.py b/src/llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/subset_of.py
index 9cae66fa6..1f143c4a6 100644
--- a/src/llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/subset_of.py
+++ b/src/llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/subset_of.py
@@ -4,10 +4,10 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-from llama_stack.apis.common.type_system import NumberType
-from llama_stack.apis.scoring_functions import (
+from llama_stack_api import (
     AggregationFunctionType,
     BasicScoringFnParams,
+    NumberType,
     ScoringFn,
 )
 
diff --git a/src/llama_stack/providers/inline/scoring/basic/scoring_fn/ifeval_scoring_fn.py b/src/llama_stack/providers/inline/scoring/basic/scoring_fn/ifeval_scoring_fn.py
index 77f6176e6..4ec85bb09 100644
--- a/src/llama_stack/providers/inline/scoring/basic/scoring_fn/ifeval_scoring_fn.py
+++ b/src/llama_stack/providers/inline/scoring/basic/scoring_fn/ifeval_scoring_fn.py
@@ -6,8 +6,8 @@
 
 from typing import Any
 
-from llama_stack.apis.scoring import ScoringResultRow
-from llama_stack.apis.scoring_functions import ScoringFnParams
+from llama_stack_api import ScoringFnParams, ScoringResultRow
+
 from llama_stack.providers.utils.scoring.base_scoring_fn import RegisteredBaseScoringFn
 
 from .fn_defs.ifeval import (
diff --git a/src/llama_stack/providers/inline/scoring/basic/scoring_fn/regex_parser_math_response_scoring_fn.py b/src/llama_stack/providers/inline/scoring/basic/scoring_fn/regex_parser_math_response_scoring_fn.py
index d765959a8..4e9d49e96 100644
--- a/src/llama_stack/providers/inline/scoring/basic/scoring_fn/regex_parser_math_response_scoring_fn.py
+++ b/src/llama_stack/providers/inline/scoring/basic/scoring_fn/regex_parser_math_response_scoring_fn.py
@@ -5,8 +5,8 @@
 # the root directory of this source tree.
 from typing import Any
 
-from llama_stack.apis.scoring import ScoringResultRow
-from llama_stack.apis.scoring_functions import ScoringFnParams, ScoringFnParamsType
+from llama_stack_api import ScoringFnParams, ScoringFnParamsType, ScoringResultRow
+
 from llama_stack.providers.utils.scoring.base_scoring_fn import RegisteredBaseScoringFn
 
 from ..utils.math_utils import first_answer, normalize_final_answer, try_evaluate_frac, try_evaluate_latex
diff --git a/src/llama_stack/providers/inline/scoring/basic/scoring_fn/regex_parser_scoring_fn.py b/src/llama_stack/providers/inline/scoring/basic/scoring_fn/regex_parser_scoring_fn.py
index cb336e303..7f213b38c 100644
--- a/src/llama_stack/providers/inline/scoring/basic/scoring_fn/regex_parser_scoring_fn.py
+++ b/src/llama_stack/providers/inline/scoring/basic/scoring_fn/regex_parser_scoring_fn.py
@@ -6,8 +6,8 @@
 import re
 from typing import Any
 
-from llama_stack.apis.scoring import ScoringResultRow
-from llama_stack.apis.scoring_functions import ScoringFnParams, ScoringFnParamsType
+from llama_stack_api import ScoringFnParams, ScoringFnParamsType, ScoringResultRow
+
 from llama_stack.providers.utils.scoring.base_scoring_fn import RegisteredBaseScoringFn
 
 from .fn_defs.regex_parser_multiple_choice_answer import (
diff --git a/src/llama_stack/providers/inline/scoring/basic/scoring_fn/subset_of_scoring_fn.py b/src/llama_stack/providers/inline/scoring/basic/scoring_fn/subset_of_scoring_fn.py
index d6e10e6c9..b291924d5 100644
--- a/src/llama_stack/providers/inline/scoring/basic/scoring_fn/subset_of_scoring_fn.py
+++ b/src/llama_stack/providers/inline/scoring/basic/scoring_fn/subset_of_scoring_fn.py
@@ -6,8 +6,8 @@
 
 from typing import Any
 
-from llama_stack.apis.scoring import ScoringResultRow
-from llama_stack.apis.scoring_functions import ScoringFnParams
+from llama_stack_api import ScoringFnParams, ScoringResultRow
+
 from llama_stack.providers.utils.scoring.base_scoring_fn import RegisteredBaseScoringFn
 
 from .fn_defs.subset_of import subset_of
diff --git a/src/llama_stack/providers/inline/scoring/braintrust/braintrust.py b/src/llama_stack/providers/inline/scoring/braintrust/braintrust.py
index 14810f706..cbab93c74 100644
--- a/src/llama_stack/providers/inline/scoring/braintrust/braintrust.py
+++ b/src/llama_stack/providers/inline/scoring/braintrust/braintrust.py
@@ -17,21 +17,22 @@ from autoevals.ragas import (
     ContextRelevancy,
     Faithfulness,
 )
-from pydantic import BaseModel
-
-from llama_stack.apis.datasetio import DatasetIO
-from llama_stack.apis.datasets import Datasets
-from llama_stack.apis.scoring import (
+from llama_stack_api import (
+    DatasetIO,
+    Datasets,
     ScoreBatchResponse,
     ScoreResponse,
     Scoring,
+    ScoringFn,
+    ScoringFnParams,
+    ScoringFunctionsProtocolPrivate,
     ScoringResult,
     ScoringResultRow,
 )
-from llama_stack.apis.scoring_functions import ScoringFn, ScoringFnParams
+from pydantic import BaseModel
+
 from llama_stack.core.datatypes import Api
 from llama_stack.core.request_headers import NeedsRequestProviderData
-from llama_stack.providers.datatypes import ScoringFunctionsProtocolPrivate
 from llama_stack.providers.utils.common.data_schema_validator import (
     get_valid_schemas,
     validate_dataset_schema,
diff --git a/src/llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_correctness.py b/src/llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_correctness.py
index 4fe07f822..b058305b4 100644
--- a/src/llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_correctness.py
+++ b/src/llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_correctness.py
@@ -4,10 +4,10 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-from llama_stack.apis.common.type_system import NumberType
-from llama_stack.apis.scoring_functions import (
+from llama_stack_api import (
     AggregationFunctionType,
     BasicScoringFnParams,
+    NumberType,
     ScoringFn,
 )
 
diff --git a/src/llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_relevancy.py b/src/llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_relevancy.py
index a1995cc4e..d619d38a8 100644
--- a/src/llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_relevancy.py
+++ b/src/llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_relevancy.py
@@ -4,10 +4,10 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-from llama_stack.apis.common.type_system import NumberType
-from llama_stack.apis.scoring_functions import (
+from llama_stack_api import (
     AggregationFunctionType,
     BasicScoringFnParams,
+    NumberType,
     ScoringFn,
 )
 
diff --git a/src/llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_similarity.py b/src/llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_similarity.py
index e8fe15259..34354a1fc 100644
--- a/src/llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_similarity.py
+++ b/src/llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_similarity.py
@@ -4,10 +4,10 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-from llama_stack.apis.common.type_system import NumberType
-from llama_stack.apis.scoring_functions import (
+from llama_stack_api import (
     AggregationFunctionType,
     BasicScoringFnParams,
+    NumberType,
     ScoringFn,
 )
 
diff --git a/src/llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_entity_recall.py b/src/llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_entity_recall.py
index d9b129a8b..4092ccc4a 100644
--- a/src/llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_entity_recall.py
+++ b/src/llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_entity_recall.py
@@ -4,10 +4,10 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-from llama_stack.apis.common.type_system import NumberType
-from llama_stack.apis.scoring_functions import (
+from llama_stack_api import (
     AggregationFunctionType,
     BasicScoringFnParams,
+    NumberType,
     ScoringFn,
 )
 
diff --git a/src/llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_precision.py b/src/llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_precision.py
index c1d7e855b..2b32b9eec 100644
--- a/src/llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_precision.py
+++ b/src/llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_precision.py
@@ -4,10 +4,10 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-from llama_stack.apis.common.type_system import NumberType
-from llama_stack.apis.scoring_functions import (
+from llama_stack_api import (
     AggregationFunctionType,
     BasicScoringFnParams,
+    NumberType,
     ScoringFn,
 )
 
diff --git a/src/llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_recall.py b/src/llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_recall.py
index 01ddd0dd0..4d6547002 100644
--- a/src/llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_recall.py
+++ b/src/llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_recall.py
@@ -4,10 +4,10 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-from llama_stack.apis.common.type_system import NumberType
-from llama_stack.apis.scoring_functions import (
+from llama_stack_api import (
     AggregationFunctionType,
     BasicScoringFnParams,
+    NumberType,
     ScoringFn,
 )
 
diff --git a/src/llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_relevancy.py b/src/llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_relevancy.py
index 55d89344a..739dfd7bd 100644
--- a/src/llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_relevancy.py
+++ b/src/llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_relevancy.py
@@ -4,10 +4,10 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-from llama_stack.apis.common.type_system import NumberType
-from llama_stack.apis.scoring_functions import (
+from llama_stack_api import (
     AggregationFunctionType,
     BasicScoringFnParams,
+    NumberType,
     ScoringFn,
 )
 
diff --git a/src/llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/factuality.py b/src/llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/factuality.py
index c621ecf7f..59ed5949b 100644
--- a/src/llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/factuality.py
+++ b/src/llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/factuality.py
@@ -4,10 +4,10 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-from llama_stack.apis.common.type_system import NumberType
-from llama_stack.apis.scoring_functions import (
+from llama_stack_api import (
     AggregationFunctionType,
     BasicScoringFnParams,
+    NumberType,
     ScoringFn,
 )
 
diff --git a/src/llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/faithfulness.py b/src/llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/faithfulness.py
index 2e85c0c7c..96c36d226 100644
--- a/src/llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/faithfulness.py
+++ b/src/llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/faithfulness.py
@@ -4,10 +4,10 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-from llama_stack.apis.common.type_system import NumberType
-from llama_stack.apis.scoring_functions import (
+from llama_stack_api import (
     AggregationFunctionType,
     BasicScoringFnParams,
+    NumberType,
     ScoringFn,
 )
 
diff --git a/src/llama_stack/providers/inline/scoring/llm_as_judge/scoring.py b/src/llama_stack/providers/inline/scoring/llm_as_judge/scoring.py
index 9b7628524..aa636d2b3 100644
--- a/src/llama_stack/providers/inline/scoring/llm_as_judge/scoring.py
+++ b/src/llama_stack/providers/inline/scoring/llm_as_judge/scoring.py
@@ -5,18 +5,20 @@
 # the root directory of this source tree.
 from typing import Any
 
-from llama_stack.apis.datasetio import DatasetIO
-from llama_stack.apis.datasets import Datasets
-from llama_stack.apis.inference import Inference
-from llama_stack.apis.scoring import (
+from llama_stack_api import (
+    DatasetIO,
+    Datasets,
+    Inference,
     ScoreBatchResponse,
     ScoreResponse,
     Scoring,
+    ScoringFn,
+    ScoringFnParams,
+    ScoringFunctionsProtocolPrivate,
     ScoringResult,
 )
-from llama_stack.apis.scoring_functions import ScoringFn, ScoringFnParams
+
 from llama_stack.core.datatypes import Api
-from llama_stack.providers.datatypes import ScoringFunctionsProtocolPrivate
 from llama_stack.providers.utils.common.data_schema_validator import (
     get_valid_schemas,
     validate_dataset_schema,
diff --git a/src/llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/fn_defs/llm_as_judge_405b_simpleqa.py b/src/llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/fn_defs/llm_as_judge_405b_simpleqa.py
index 074f1ff46..ed26169a5 100644
--- a/src/llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/fn_defs/llm_as_judge_405b_simpleqa.py
+++ b/src/llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/fn_defs/llm_as_judge_405b_simpleqa.py
@@ -4,10 +4,10 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-from llama_stack.apis.common.type_system import NumberType
-from llama_stack.apis.scoring_functions import (
+from llama_stack_api import (
     AggregationFunctionType,
     LLMAsJudgeScoringFnParams,
+    NumberType,
     ScoringFn,
 )
 
diff --git a/src/llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/fn_defs/llm_as_judge_base.py b/src/llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/fn_defs/llm_as_judge_base.py
index 205e0bbf3..bffffd878 100644
--- a/src/llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/fn_defs/llm_as_judge_base.py
+++ b/src/llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/fn_defs/llm_as_judge_base.py
@@ -4,8 +4,7 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-from llama_stack.apis.common.type_system import NumberType
-from llama_stack.apis.scoring_functions import LLMAsJudgeScoringFnParams, ScoringFn
+from llama_stack_api import LLMAsJudgeScoringFnParams, NumberType, ScoringFn
 
 llm_as_judge_base = ScoringFn(
     identifier="llm-as-judge::base",
diff --git a/src/llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/llm_as_judge_scoring_fn.py b/src/llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/llm_as_judge_scoring_fn.py
index fbecb6e20..169a4d8b7 100644
--- a/src/llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/llm_as_judge_scoring_fn.py
+++ b/src/llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/llm_as_judge_scoring_fn.py
@@ -6,9 +6,8 @@
 import re
 from typing import Any
 
-from llama_stack.apis.inference import Inference, OpenAIChatCompletionRequestWithExtraBody
-from llama_stack.apis.scoring import ScoringResultRow
-from llama_stack.apis.scoring_functions import ScoringFnParams
+from llama_stack_api import Inference, OpenAIChatCompletionRequestWithExtraBody, ScoringFnParams, ScoringResultRow
+
 from llama_stack.providers.utils.scoring.base_scoring_fn import RegisteredBaseScoringFn
 
 from .fn_defs.llm_as_judge_405b_simpleqa import llm_as_judge_405b_simpleqa
diff --git a/src/llama_stack/providers/inline/tool_runtime/rag/__init__.py b/src/llama_stack/providers/inline/tool_runtime/rag/__init__.py
index f9a7e7b89..60117dc3d 100644
--- a/src/llama_stack/providers/inline/tool_runtime/rag/__init__.py
+++ b/src/llama_stack/providers/inline/tool_runtime/rag/__init__.py
@@ -6,7 +6,7 @@
 
 from typing import Any
 
-from llama_stack.providers.datatypes import Api
+from llama_stack_api import Api
 
 from .config import RagToolRuntimeConfig
 
diff --git a/src/llama_stack/providers/inline/tool_runtime/rag/context_retriever.py b/src/llama_stack/providers/inline/tool_runtime/rag/context_retriever.py
index 14cbec49d..f499989cb 100644
--- a/src/llama_stack/providers/inline/tool_runtime/rag/context_retriever.py
+++ b/src/llama_stack/providers/inline/tool_runtime/rag/context_retriever.py
@@ -6,15 +6,16 @@
 
 
 from jinja2 import Template
-
-from llama_stack.apis.common.content_types import InterleavedContent
-from llama_stack.apis.inference import OpenAIChatCompletionRequestWithExtraBody, OpenAIUserMessageParam
-from llama_stack.apis.tools.rag_tool import (
+from llama_stack_api import (
     DefaultRAGQueryGeneratorConfig,
+    InterleavedContent,
     LLMRAGQueryGeneratorConfig,
+    OpenAIChatCompletionRequestWithExtraBody,
+    OpenAIUserMessageParam,
     RAGQueryGenerator,
     RAGQueryGeneratorConfig,
 )
+
 from llama_stack.providers.utils.inference.prompt_adapter import (
     interleaved_content_as_str,
 )
diff --git a/src/llama_stack/providers/inline/tool_runtime/rag/memory.py b/src/llama_stack/providers/inline/tool_runtime/rag/memory.py
index 6a59be0ca..aacb7bb38 100644
--- a/src/llama_stack/providers/inline/tool_runtime/rag/memory.py
+++ b/src/llama_stack/providers/inline/tool_runtime/rag/memory.py
@@ -12,34 +12,31 @@ from typing import Any
 
 import httpx
 from fastapi import UploadFile
-from pydantic import TypeAdapter
-
-from llama_stack.apis.common.content_types import (
+from llama_stack_api import (
     URL,
+    Files,
+    Inference,
     InterleavedContent,
     InterleavedContentItem,
-    TextContentItem,
-)
-from llama_stack.apis.files import Files, OpenAIFilePurpose
-from llama_stack.apis.inference import Inference
-from llama_stack.apis.tools import (
     ListToolDefsResponse,
+    OpenAIFilePurpose,
+    QueryChunksResponse,
     RAGDocument,
     RAGQueryConfig,
     RAGQueryResult,
+    TextContentItem,
     ToolDef,
     ToolGroup,
+    ToolGroupsProtocolPrivate,
     ToolInvocationResult,
     ToolRuntime,
-)
-from llama_stack.apis.vector_io import (
-    QueryChunksResponse,
     VectorIO,
     VectorStoreChunkingStrategyStatic,
     VectorStoreChunkingStrategyStaticConfig,
 )
+from pydantic import TypeAdapter
+
 from llama_stack.log import get_logger
-from llama_stack.providers.datatypes import ToolGroupsProtocolPrivate
 from llama_stack.providers.utils.inference.prompt_adapter import interleaved_content_as_str
 from llama_stack.providers.utils.memory.vector_store import parse_data_url
 
diff --git a/src/llama_stack/providers/inline/vector_io/chroma/__init__.py b/src/llama_stack/providers/inline/vector_io/chroma/__init__.py
index 575e5ad88..155b8a0cb 100644
--- a/src/llama_stack/providers/inline/vector_io/chroma/__init__.py
+++ b/src/llama_stack/providers/inline/vector_io/chroma/__init__.py
@@ -6,7 +6,7 @@
 
 from typing import Any
 
-from llama_stack.providers.datatypes import Api
+from llama_stack_api import Api
 
 from .config import ChromaVectorIOConfig
 
diff --git a/src/llama_stack/providers/inline/vector_io/chroma/config.py b/src/llama_stack/providers/inline/vector_io/chroma/config.py
index 1798f10de..d955b1d06 100644
--- a/src/llama_stack/providers/inline/vector_io/chroma/config.py
+++ b/src/llama_stack/providers/inline/vector_io/chroma/config.py
@@ -6,10 +6,10 @@
 
 from typing import Any
 
+from llama_stack_api import json_schema_type
 from pydantic import BaseModel, Field
 
 from llama_stack.core.storage.datatypes import KVStoreReference
-from llama_stack.schema_utils import json_schema_type
 
 
 @json_schema_type
diff --git a/src/llama_stack/providers/inline/vector_io/faiss/__init__.py b/src/llama_stack/providers/inline/vector_io/faiss/__init__.py
index 24d1f292a..b834589e3 100644
--- a/src/llama_stack/providers/inline/vector_io/faiss/__init__.py
+++ b/src/llama_stack/providers/inline/vector_io/faiss/__init__.py
@@ -6,7 +6,7 @@
 
 from typing import Any
 
-from llama_stack.providers.datatypes import Api
+from llama_stack_api import Api
 
 from .config import FaissVectorIOConfig
 
diff --git a/src/llama_stack/providers/inline/vector_io/faiss/config.py b/src/llama_stack/providers/inline/vector_io/faiss/config.py
index dd7a7aeca..dd433f818 100644
--- a/src/llama_stack/providers/inline/vector_io/faiss/config.py
+++ b/src/llama_stack/providers/inline/vector_io/faiss/config.py
@@ -6,10 +6,10 @@
 
 from typing import Any
 
+from llama_stack_api import json_schema_type
 from pydantic import BaseModel
 
 from llama_stack.core.storage.datatypes import KVStoreReference
-from llama_stack.schema_utils import json_schema_type
 
 
 @json_schema_type
diff --git a/src/llama_stack/providers/inline/vector_io/faiss/faiss.py b/src/llama_stack/providers/inline/vector_io/faiss/faiss.py
index 96760b834..abef42499 100644
--- a/src/llama_stack/providers/inline/vector_io/faiss/faiss.py
+++ b/src/llama_stack/providers/inline/vector_io/faiss/faiss.py
@@ -12,15 +12,22 @@ from typing import Any
 
 import faiss  # type: ignore[import-untyped]
 import numpy as np
+from llama_stack_api import (
+    Chunk,
+    Files,
+    HealthResponse,
+    HealthStatus,
+    Inference,
+    InterleavedContent,
+    QueryChunksResponse,
+    VectorIO,
+    VectorStore,
+    VectorStoreNotFoundError,
+    VectorStoresProtocolPrivate,
+)
 from numpy.typing import NDArray
 
-from llama_stack.apis.common.errors import VectorStoreNotFoundError
-from llama_stack.apis.files import Files
-from llama_stack.apis.inference import Inference, InterleavedContent
-from llama_stack.apis.vector_io import Chunk, QueryChunksResponse, VectorIO
-from llama_stack.apis.vector_stores import VectorStore
 from llama_stack.log import get_logger
-from llama_stack.providers.datatypes import HealthResponse, HealthStatus, VectorStoresProtocolPrivate
 from llama_stack.providers.utils.kvstore import kvstore_impl
 from llama_stack.providers.utils.kvstore.api import KVStore
 from llama_stack.providers.utils.memory.openai_vector_store_mixin import OpenAIVectorStoreMixin
diff --git a/src/llama_stack/providers/inline/vector_io/milvus/__init__.py b/src/llama_stack/providers/inline/vector_io/milvus/__init__.py
index 7dc9c6a33..2f84769f3 100644
--- a/src/llama_stack/providers/inline/vector_io/milvus/__init__.py
+++ b/src/llama_stack/providers/inline/vector_io/milvus/__init__.py
@@ -6,7 +6,7 @@
 
 from typing import Any
 
-from llama_stack.providers.datatypes import Api
+from llama_stack_api import Api
 
 from .config import MilvusVectorIOConfig
 
diff --git a/src/llama_stack/providers/inline/vector_io/milvus/config.py b/src/llama_stack/providers/inline/vector_io/milvus/config.py
index b333b04ea..08d05c991 100644
--- a/src/llama_stack/providers/inline/vector_io/milvus/config.py
+++ b/src/llama_stack/providers/inline/vector_io/milvus/config.py
@@ -6,10 +6,10 @@
 
 from typing import Any
 
+from llama_stack_api import json_schema_type
 from pydantic import BaseModel, Field
 
 from llama_stack.core.storage.datatypes import KVStoreReference
-from llama_stack.schema_utils import json_schema_type
 
 
 @json_schema_type
diff --git a/src/llama_stack/providers/inline/vector_io/qdrant/__init__.py b/src/llama_stack/providers/inline/vector_io/qdrant/__init__.py
index bef6d50e6..145d19455 100644
--- a/src/llama_stack/providers/inline/vector_io/qdrant/__init__.py
+++ b/src/llama_stack/providers/inline/vector_io/qdrant/__init__.py
@@ -6,7 +6,7 @@
 
 from typing import Any
 
-from llama_stack.providers.datatypes import Api
+from llama_stack_api import Api
 
 from .config import QdrantVectorIOConfig
 
diff --git a/src/llama_stack/providers/inline/vector_io/qdrant/config.py b/src/llama_stack/providers/inline/vector_io/qdrant/config.py
index e7ecde7b7..437d643f0 100644
--- a/src/llama_stack/providers/inline/vector_io/qdrant/config.py
+++ b/src/llama_stack/providers/inline/vector_io/qdrant/config.py
@@ -7,10 +7,10 @@
 
 from typing import Any
 
+from llama_stack_api import json_schema_type
 from pydantic import BaseModel
 
 from llama_stack.core.storage.datatypes import KVStoreReference
-from llama_stack.schema_utils import json_schema_type
 
 
 @json_schema_type
diff --git a/src/llama_stack/providers/inline/vector_io/sqlite_vec/__init__.py b/src/llama_stack/providers/inline/vector_io/sqlite_vec/__init__.py
index df96e927c..e84c299dc 100644
--- a/src/llama_stack/providers/inline/vector_io/sqlite_vec/__init__.py
+++ b/src/llama_stack/providers/inline/vector_io/sqlite_vec/__init__.py
@@ -6,7 +6,7 @@
 
 from typing import Any
 
-from llama_stack.providers.datatypes import Api
+from llama_stack_api import Api
 
 from .config import SQLiteVectorIOConfig
 
diff --git a/src/llama_stack/providers/inline/vector_io/sqlite_vec/sqlite_vec.py b/src/llama_stack/providers/inline/vector_io/sqlite_vec/sqlite_vec.py
index 399800d3e..e979ff323 100644
--- a/src/llama_stack/providers/inline/vector_io/sqlite_vec/sqlite_vec.py
+++ b/src/llama_stack/providers/inline/vector_io/sqlite_vec/sqlite_vec.py
@@ -12,15 +12,19 @@ from typing import Any
 
 import numpy as np
 import sqlite_vec  # type: ignore[import-untyped]
+from llama_stack_api import (
+    Chunk,
+    Files,
+    Inference,
+    QueryChunksResponse,
+    VectorIO,
+    VectorStore,
+    VectorStoreNotFoundError,
+    VectorStoresProtocolPrivate,
+)
 from numpy.typing import NDArray
 
-from llama_stack.apis.common.errors import VectorStoreNotFoundError
-from llama_stack.apis.files import Files
-from llama_stack.apis.inference import Inference
-from llama_stack.apis.vector_io import Chunk, QueryChunksResponse, VectorIO
-from llama_stack.apis.vector_stores import VectorStore
 from llama_stack.log import get_logger
-from llama_stack.providers.datatypes import VectorStoresProtocolPrivate
 from llama_stack.providers.utils.kvstore import kvstore_impl
 from llama_stack.providers.utils.kvstore.api import KVStore
 from llama_stack.providers.utils.memory.openai_vector_store_mixin import OpenAIVectorStoreMixin
diff --git a/src/llama_stack/providers/registry/agents.py b/src/llama_stack/providers/registry/agents.py
index 1845d6f46..bd204cecd 100644
--- a/src/llama_stack/providers/registry/agents.py
+++ b/src/llama_stack/providers/registry/agents.py
@@ -5,11 +5,12 @@
 # the root directory of this source tree.
 
 
-from llama_stack.providers.datatypes import (
+from llama_stack_api import (
     Api,
     InlineProviderSpec,
     ProviderSpec,
 )
+
 from llama_stack.providers.utils.kvstore import kvstore_dependencies
 
 
diff --git a/src/llama_stack/providers/registry/batches.py b/src/llama_stack/providers/registry/batches.py
index a07942486..e11bb8332 100644
--- a/src/llama_stack/providers/registry/batches.py
+++ b/src/llama_stack/providers/registry/batches.py
@@ -5,7 +5,7 @@
 # the root directory of this source tree.
 
 
-from llama_stack.providers.datatypes import Api, InlineProviderSpec, ProviderSpec
+from llama_stack_api import Api, InlineProviderSpec, ProviderSpec
 
 
 def available_providers() -> list[ProviderSpec]:
diff --git a/src/llama_stack/providers/registry/datasetio.py b/src/llama_stack/providers/registry/datasetio.py
index a9feb0bac..bfd7ede3c 100644
--- a/src/llama_stack/providers/registry/datasetio.py
+++ b/src/llama_stack/providers/registry/datasetio.py
@@ -5,7 +5,7 @@
 # the root directory of this source tree.
 
 
-from llama_stack.providers.datatypes import (
+from llama_stack_api import (
     Api,
     InlineProviderSpec,
     ProviderSpec,
diff --git a/src/llama_stack/providers/registry/eval.py b/src/llama_stack/providers/registry/eval.py
index 4ef0bb41f..9c8b1eebd 100644
--- a/src/llama_stack/providers/registry/eval.py
+++ b/src/llama_stack/providers/registry/eval.py
@@ -5,7 +5,7 @@
 # the root directory of this source tree.
 
 
-from llama_stack.providers.datatypes import Api, InlineProviderSpec, ProviderSpec, RemoteProviderSpec
+from llama_stack_api import Api, InlineProviderSpec, ProviderSpec, RemoteProviderSpec
 
 
 def available_providers() -> list[ProviderSpec]:
diff --git a/src/llama_stack/providers/registry/files.py b/src/llama_stack/providers/registry/files.py
index 3f5949ba2..dfc527816 100644
--- a/src/llama_stack/providers/registry/files.py
+++ b/src/llama_stack/providers/registry/files.py
@@ -4,7 +4,8 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-from llama_stack.providers.datatypes import Api, InlineProviderSpec, ProviderSpec, RemoteProviderSpec
+from llama_stack_api import Api, InlineProviderSpec, ProviderSpec, RemoteProviderSpec
+
 from llama_stack.providers.utils.sqlstore.sqlstore import sql_store_pip_packages
 
 
diff --git a/src/llama_stack/providers/registry/inference.py b/src/llama_stack/providers/registry/inference.py
index 3cbfd408b..819e5aff5 100644
--- a/src/llama_stack/providers/registry/inference.py
+++ b/src/llama_stack/providers/registry/inference.py
@@ -5,7 +5,7 @@
 # the root directory of this source tree.
 
 
-from llama_stack.providers.datatypes import (
+from llama_stack_api import (
     Api,
     InlineProviderSpec,
     ProviderSpec,
diff --git a/src/llama_stack/providers/registry/post_training.py b/src/llama_stack/providers/registry/post_training.py
index 2092e3b2d..a5529b714 100644
--- a/src/llama_stack/providers/registry/post_training.py
+++ b/src/llama_stack/providers/registry/post_training.py
@@ -7,7 +7,7 @@
 
 from typing import cast
 
-from llama_stack.providers.datatypes import Api, InlineProviderSpec, ProviderSpec, RemoteProviderSpec
+from llama_stack_api import Api, InlineProviderSpec, ProviderSpec, RemoteProviderSpec
 
 # We provide two versions of these providers so that distributions can package the appropriate version of torch.
 # The CPU version is used for distributions that don't have GPU support -- they result in smaller container images.
diff --git a/src/llama_stack/providers/registry/safety.py b/src/llama_stack/providers/registry/safety.py
index b30074398..c9dbbce24 100644
--- a/src/llama_stack/providers/registry/safety.py
+++ b/src/llama_stack/providers/registry/safety.py
@@ -5,7 +5,7 @@
 # the root directory of this source tree.
 
 
-from llama_stack.providers.datatypes import (
+from llama_stack_api import (
     Api,
     InlineProviderSpec,
     ProviderSpec,
diff --git a/src/llama_stack/providers/registry/scoring.py b/src/llama_stack/providers/registry/scoring.py
index a4ec54ed2..45c5dbed7 100644
--- a/src/llama_stack/providers/registry/scoring.py
+++ b/src/llama_stack/providers/registry/scoring.py
@@ -5,7 +5,7 @@
 # the root directory of this source tree.
 
 
-from llama_stack.providers.datatypes import Api, InlineProviderSpec, ProviderSpec
+from llama_stack_api import Api, InlineProviderSpec, ProviderSpec
 
 
 def available_providers() -> list[ProviderSpec]:
diff --git a/src/llama_stack/providers/registry/tool_runtime.py b/src/llama_stack/providers/registry/tool_runtime.py
index 39dc7fccd..3f0a83a30 100644
--- a/src/llama_stack/providers/registry/tool_runtime.py
+++ b/src/llama_stack/providers/registry/tool_runtime.py
@@ -5,12 +5,13 @@
 # the root directory of this source tree.
 
 
-from llama_stack.providers.datatypes import (
+from llama_stack_api import (
     Api,
     InlineProviderSpec,
     ProviderSpec,
     RemoteProviderSpec,
 )
+
 from llama_stack.providers.registry.vector_io import DEFAULT_VECTOR_IO_DEPS
 
 
diff --git a/src/llama_stack/providers/registry/vector_io.py b/src/llama_stack/providers/registry/vector_io.py
index 55b302751..a00941586 100644
--- a/src/llama_stack/providers/registry/vector_io.py
+++ b/src/llama_stack/providers/registry/vector_io.py
@@ -5,7 +5,7 @@
 # the root directory of this source tree.
 
 
-from llama_stack.providers.datatypes import (
+from llama_stack_api import (
     Api,
     InlineProviderSpec,
     ProviderSpec,
@@ -244,7 +244,7 @@ Two ranker types are supported:
 Example using RAGQueryConfig with different search modes:
 
 ```python
-from llama_stack.apis.tools import RAGQueryConfig, RRFRanker, WeightedRanker
+from llama_stack_api import RAGQueryConfig, RRFRanker, WeightedRanker
 
 # Vector search
 config = RAGQueryConfig(mode="vector", max_chunks=5)
diff --git a/src/llama_stack/providers/remote/datasetio/huggingface/huggingface.py b/src/llama_stack/providers/remote/datasetio/huggingface/huggingface.py
index a34e354bf..1260ce644 100644
--- a/src/llama_stack/providers/remote/datasetio/huggingface/huggingface.py
+++ b/src/llama_stack/providers/remote/datasetio/huggingface/huggingface.py
@@ -6,10 +6,8 @@
 from typing import Any
 from urllib.parse import parse_qs, urlparse
 
-from llama_stack.apis.common.responses import PaginatedResponse
-from llama_stack.apis.datasetio import DatasetIO
-from llama_stack.apis.datasets import Dataset
-from llama_stack.providers.datatypes import DatasetsProtocolPrivate
+from llama_stack_api import Dataset, DatasetIO, DatasetsProtocolPrivate, PaginatedResponse
+
 from llama_stack.providers.utils.kvstore import kvstore_impl
 from llama_stack.providers.utils.pagination import paginate_records
 
diff --git a/src/llama_stack/providers/remote/datasetio/nvidia/datasetio.py b/src/llama_stack/providers/remote/datasetio/nvidia/datasetio.py
index f723c92cc..cb674b0d7 100644
--- a/src/llama_stack/providers/remote/datasetio/nvidia/datasetio.py
+++ b/src/llama_stack/providers/remote/datasetio/nvidia/datasetio.py
@@ -7,11 +7,7 @@
 from typing import Any
 
 import aiohttp
-
-from llama_stack.apis.common.content_types import URL
-from llama_stack.apis.common.responses import PaginatedResponse
-from llama_stack.apis.common.type_system import ParamType
-from llama_stack.apis.datasets import Dataset
+from llama_stack_api import URL, Dataset, PaginatedResponse, ParamType
 
 from .config import NvidiaDatasetIOConfig
 
diff --git a/src/llama_stack/providers/remote/eval/nvidia/eval.py b/src/llama_stack/providers/remote/eval/nvidia/eval.py
index 8fc7ffdd3..fbdec0d4d 100644
--- a/src/llama_stack/providers/remote/eval/nvidia/eval.py
+++ b/src/llama_stack/providers/remote/eval/nvidia/eval.py
@@ -6,18 +6,24 @@
 from typing import Any
 
 import requests
+from llama_stack_api import (
+    Agents,
+    Benchmark,
+    BenchmarkConfig,
+    BenchmarksProtocolPrivate,
+    DatasetIO,
+    Datasets,
+    Eval,
+    EvaluateResponse,
+    Inference,
+    Job,
+    JobStatus,
+    Scoring,
+    ScoringResult,
+)
 
-from llama_stack.apis.agents import Agents
-from llama_stack.apis.benchmarks import Benchmark
-from llama_stack.apis.datasetio import DatasetIO
-from llama_stack.apis.datasets import Datasets
-from llama_stack.apis.inference import Inference
-from llama_stack.apis.scoring import Scoring, ScoringResult
-from llama_stack.providers.datatypes import BenchmarksProtocolPrivate
 from llama_stack.providers.utils.inference.model_registry import ModelRegistryHelper
 
-from .....apis.common.job_types import Job, JobStatus
-from .....apis.eval.eval import BenchmarkConfig, Eval, EvaluateResponse
 from .config import NVIDIAEvalConfig
 
 DEFAULT_NAMESPACE = "nvidia"
diff --git a/src/llama_stack/providers/remote/files/openai/files.py b/src/llama_stack/providers/remote/files/openai/files.py
index c5d4194df..bbd630977 100644
--- a/src/llama_stack/providers/remote/files/openai/files.py
+++ b/src/llama_stack/providers/remote/files/openai/files.py
@@ -8,17 +8,17 @@ from datetime import UTC, datetime
 from typing import Annotated, Any
 
 from fastapi import Depends, File, Form, Response, UploadFile
-
-from llama_stack.apis.common.errors import ResourceNotFoundError
-from llama_stack.apis.common.responses import Order
-from llama_stack.apis.files import (
+from llama_stack_api import (
     ExpiresAfter,
     Files,
     ListOpenAIFileResponse,
     OpenAIFileDeleteResponse,
     OpenAIFileObject,
     OpenAIFilePurpose,
+    Order,
+    ResourceNotFoundError,
 )
+
 from llama_stack.core.datatypes import AccessRule
 from llama_stack.providers.utils.files.form_data import parse_expires_after
 from llama_stack.providers.utils.sqlstore.api import ColumnDefinition, ColumnType
diff --git a/src/llama_stack/providers/remote/files/s3/files.py b/src/llama_stack/providers/remote/files/s3/files.py
index 76261bdf4..14f1e3852 100644
--- a/src/llama_stack/providers/remote/files/s3/files.py
+++ b/src/llama_stack/providers/remote/files/s3/files.py
@@ -17,16 +17,17 @@ from fastapi import Depends, File, Form, Response, UploadFile
 if TYPE_CHECKING:
     from mypy_boto3_s3.client import S3Client
 
-from llama_stack.apis.common.errors import ResourceNotFoundError
-from llama_stack.apis.common.responses import Order
-from llama_stack.apis.files import (
+from llama_stack_api import (
     ExpiresAfter,
     Files,
     ListOpenAIFileResponse,
     OpenAIFileDeleteResponse,
     OpenAIFileObject,
     OpenAIFilePurpose,
+    Order,
+    ResourceNotFoundError,
 )
+
 from llama_stack.core.datatypes import AccessRule
 from llama_stack.core.id_generation import generate_object_id
 from llama_stack.providers.utils.files.form_data import parse_expires_after
diff --git a/src/llama_stack/providers/remote/inference/anthropic/config.py b/src/llama_stack/providers/remote/inference/anthropic/config.py
index 31e6aa12b..7ee4c54e2 100644
--- a/src/llama_stack/providers/remote/inference/anthropic/config.py
+++ b/src/llama_stack/providers/remote/inference/anthropic/config.py
@@ -6,10 +6,10 @@
 
 from typing import Any
 
+from llama_stack_api import json_schema_type
 from pydantic import BaseModel, Field
 
 from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig
-from llama_stack.schema_utils import json_schema_type
 
 
 class AnthropicProviderDataValidator(BaseModel):
diff --git a/src/llama_stack/providers/remote/inference/azure/config.py b/src/llama_stack/providers/remote/inference/azure/config.py
index 7c31df7a6..596f6c234 100644
--- a/src/llama_stack/providers/remote/inference/azure/config.py
+++ b/src/llama_stack/providers/remote/inference/azure/config.py
@@ -7,10 +7,10 @@
 import os
 from typing import Any
 
+from llama_stack_api import json_schema_type
 from pydantic import BaseModel, Field, HttpUrl, SecretStr
 
 from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig
-from llama_stack.schema_utils import json_schema_type
 
 
 class AzureProviderDataValidator(BaseModel):
diff --git a/src/llama_stack/providers/remote/inference/bedrock/bedrock.py b/src/llama_stack/providers/remote/inference/bedrock/bedrock.py
index 1bf44b51a..1a9fe533b 100644
--- a/src/llama_stack/providers/remote/inference/bedrock/bedrock.py
+++ b/src/llama_stack/providers/remote/inference/bedrock/bedrock.py
@@ -6,9 +6,7 @@
 
 from collections.abc import AsyncIterator, Iterable
 
-from openai import AuthenticationError
-
-from llama_stack.apis.inference import (
+from llama_stack_api import (
     OpenAIChatCompletion,
     OpenAIChatCompletionChunk,
     OpenAIChatCompletionRequestWithExtraBody,
@@ -17,6 +15,8 @@ from llama_stack.apis.inference import (
     OpenAIEmbeddingsRequestWithExtraBody,
     OpenAIEmbeddingsResponse,
 )
+from openai import AuthenticationError
+
 from llama_stack.core.telemetry.tracing import get_current_span
 from llama_stack.log import get_logger
 from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin
diff --git a/src/llama_stack/providers/remote/inference/cerebras/cerebras.py b/src/llama_stack/providers/remote/inference/cerebras/cerebras.py
index d5def9da1..c7f3111f9 100644
--- a/src/llama_stack/providers/remote/inference/cerebras/cerebras.py
+++ b/src/llama_stack/providers/remote/inference/cerebras/cerebras.py
@@ -6,10 +6,11 @@
 
 from urllib.parse import urljoin
 
-from llama_stack.apis.inference import (
+from llama_stack_api import (
     OpenAIEmbeddingsRequestWithExtraBody,
     OpenAIEmbeddingsResponse,
 )
+
 from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin
 
 from .config import CerebrasImplConfig
diff --git a/src/llama_stack/providers/remote/inference/cerebras/config.py b/src/llama_stack/providers/remote/inference/cerebras/config.py
index 9ba773724..a1fd41e2d 100644
--- a/src/llama_stack/providers/remote/inference/cerebras/config.py
+++ b/src/llama_stack/providers/remote/inference/cerebras/config.py
@@ -7,10 +7,10 @@
 import os
 from typing import Any
 
+from llama_stack_api import json_schema_type
 from pydantic import BaseModel, Field
 
 from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig
-from llama_stack.schema_utils import json_schema_type
 
 DEFAULT_BASE_URL = "https://api.cerebras.ai"
 
diff --git a/src/llama_stack/providers/remote/inference/databricks/config.py b/src/llama_stack/providers/remote/inference/databricks/config.py
index 84357f764..4974593d2 100644
--- a/src/llama_stack/providers/remote/inference/databricks/config.py
+++ b/src/llama_stack/providers/remote/inference/databricks/config.py
@@ -6,10 +6,10 @@
 
 from typing import Any
 
+from llama_stack_api import json_schema_type
 from pydantic import BaseModel, Field, SecretStr
 
 from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig
-from llama_stack.schema_utils import json_schema_type
 
 
 class DatabricksProviderDataValidator(BaseModel):
diff --git a/src/llama_stack/providers/remote/inference/databricks/databricks.py b/src/llama_stack/providers/remote/inference/databricks/databricks.py
index 636241383..8b802379f 100644
--- a/src/llama_stack/providers/remote/inference/databricks/databricks.py
+++ b/src/llama_stack/providers/remote/inference/databricks/databricks.py
@@ -7,8 +7,8 @@
 from collections.abc import Iterable
 
 from databricks.sdk import WorkspaceClient
+from llama_stack_api import OpenAICompletion, OpenAICompletionRequestWithExtraBody
 
-from llama_stack.apis.inference import OpenAICompletion, OpenAICompletionRequestWithExtraBody
 from llama_stack.log import get_logger
 from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin
 
diff --git a/src/llama_stack/providers/remote/inference/fireworks/config.py b/src/llama_stack/providers/remote/inference/fireworks/config.py
index 20ba99606..d786655eb 100644
--- a/src/llama_stack/providers/remote/inference/fireworks/config.py
+++ b/src/llama_stack/providers/remote/inference/fireworks/config.py
@@ -6,10 +6,10 @@
 
 from typing import Any
 
+from llama_stack_api import json_schema_type
 from pydantic import Field
 
 from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig
-from llama_stack.schema_utils import json_schema_type
 
 
 @json_schema_type
diff --git a/src/llama_stack/providers/remote/inference/gemini/config.py b/src/llama_stack/providers/remote/inference/gemini/config.py
index df5da29a2..6c25c005c 100644
--- a/src/llama_stack/providers/remote/inference/gemini/config.py
+++ b/src/llama_stack/providers/remote/inference/gemini/config.py
@@ -6,10 +6,10 @@
 
 from typing import Any
 
+from llama_stack_api import json_schema_type
 from pydantic import BaseModel, Field
 
 from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig
-from llama_stack.schema_utils import json_schema_type
 
 
 class GeminiProviderDataValidator(BaseModel):
diff --git a/src/llama_stack/providers/remote/inference/gemini/gemini.py b/src/llama_stack/providers/remote/inference/gemini/gemini.py
index ee960d13b..79d694f06 100644
--- a/src/llama_stack/providers/remote/inference/gemini/gemini.py
+++ b/src/llama_stack/providers/remote/inference/gemini/gemini.py
@@ -6,12 +6,13 @@
 
 from typing import Any
 
-from llama_stack.apis.inference import (
+from llama_stack_api import (
     OpenAIEmbeddingData,
     OpenAIEmbeddingsRequestWithExtraBody,
     OpenAIEmbeddingsResponse,
     OpenAIEmbeddingUsage,
 )
+
 from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin
 
 from .config import GeminiConfig
diff --git a/src/llama_stack/providers/remote/inference/groq/config.py b/src/llama_stack/providers/remote/inference/groq/config.py
index c1aedca3e..cec327716 100644
--- a/src/llama_stack/providers/remote/inference/groq/config.py
+++ b/src/llama_stack/providers/remote/inference/groq/config.py
@@ -6,10 +6,10 @@
 
 from typing import Any
 
+from llama_stack_api import json_schema_type
 from pydantic import BaseModel, Field
 
 from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig
-from llama_stack.schema_utils import json_schema_type
 
 
 class GroqProviderDataValidator(BaseModel):
diff --git a/src/llama_stack/providers/remote/inference/llama_openai_compat/config.py b/src/llama_stack/providers/remote/inference/llama_openai_compat/config.py
index 4b5750ed4..c16311830 100644
--- a/src/llama_stack/providers/remote/inference/llama_openai_compat/config.py
+++ b/src/llama_stack/providers/remote/inference/llama_openai_compat/config.py
@@ -6,10 +6,10 @@
 
 from typing import Any
 
+from llama_stack_api import json_schema_type
 from pydantic import BaseModel, Field
 
 from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig
-from llama_stack.schema_utils import json_schema_type
 
 
 class LlamaProviderDataValidator(BaseModel):
diff --git a/src/llama_stack/providers/remote/inference/llama_openai_compat/llama.py b/src/llama_stack/providers/remote/inference/llama_openai_compat/llama.py
index 05d6e8cc8..1dea3e3cb 100644
--- a/src/llama_stack/providers/remote/inference/llama_openai_compat/llama.py
+++ b/src/llama_stack/providers/remote/inference/llama_openai_compat/llama.py
@@ -4,12 +4,13 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-from llama_stack.apis.inference.inference import (
+from llama_stack_api import (
     OpenAICompletion,
     OpenAICompletionRequestWithExtraBody,
     OpenAIEmbeddingsRequestWithExtraBody,
     OpenAIEmbeddingsResponse,
 )
+
 from llama_stack.log import get_logger
 from llama_stack.providers.remote.inference.llama_openai_compat.config import LlamaCompatConfig
 from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin
diff --git a/src/llama_stack/providers/remote/inference/nvidia/__init__.py b/src/llama_stack/providers/remote/inference/nvidia/__init__.py
index b4926f33e..b89b2a750 100644
--- a/src/llama_stack/providers/remote/inference/nvidia/__init__.py
+++ b/src/llama_stack/providers/remote/inference/nvidia/__init__.py
@@ -4,7 +4,7 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-from llama_stack.apis.inference import Inference
+from llama_stack_api import Inference
 
 from .config import NVIDIAConfig
 
diff --git a/src/llama_stack/providers/remote/inference/nvidia/config.py b/src/llama_stack/providers/remote/inference/nvidia/config.py
index 618bbe078..6ff98d290 100644
--- a/src/llama_stack/providers/remote/inference/nvidia/config.py
+++ b/src/llama_stack/providers/remote/inference/nvidia/config.py
@@ -7,10 +7,10 @@
 import os
 from typing import Any
 
+from llama_stack_api import json_schema_type
 from pydantic import BaseModel, Field
 
 from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig
-from llama_stack.schema_utils import json_schema_type
 
 
 class NVIDIAProviderDataValidator(BaseModel):
diff --git a/src/llama_stack/providers/remote/inference/nvidia/nvidia.py b/src/llama_stack/providers/remote/inference/nvidia/nvidia.py
index bc5aa7953..9e4c6f559 100644
--- a/src/llama_stack/providers/remote/inference/nvidia/nvidia.py
+++ b/src/llama_stack/providers/remote/inference/nvidia/nvidia.py
@@ -8,16 +8,15 @@
 from collections.abc import Iterable
 
 import aiohttp
-
-from llama_stack.apis.inference import (
+from llama_stack_api import (
+    Model,
+    ModelType,
+    OpenAIChatCompletionContentPartImageParam,
+    OpenAIChatCompletionContentPartTextParam,
     RerankData,
     RerankResponse,
 )
-from llama_stack.apis.inference.inference import (
-    OpenAIChatCompletionContentPartImageParam,
-    OpenAIChatCompletionContentPartTextParam,
-)
-from llama_stack.apis.models import Model, ModelType
+
 from llama_stack.log import get_logger
 from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin
 
diff --git a/src/llama_stack/providers/remote/inference/oci/__init__.py b/src/llama_stack/providers/remote/inference/oci/__init__.py
index 280a8c1d2..b7d6125f3 100644
--- a/src/llama_stack/providers/remote/inference/oci/__init__.py
+++ b/src/llama_stack/providers/remote/inference/oci/__init__.py
@@ -4,7 +4,7 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-from llama_stack.apis.inference import InferenceProvider
+from llama_stack_api import InferenceProvider
 
 from .config import OCIConfig
 
diff --git a/src/llama_stack/providers/remote/inference/oci/config.py b/src/llama_stack/providers/remote/inference/oci/config.py
index 9747b08ea..24b4ad926 100644
--- a/src/llama_stack/providers/remote/inference/oci/config.py
+++ b/src/llama_stack/providers/remote/inference/oci/config.py
@@ -7,10 +7,10 @@
 import os
 from typing import Any
 
+from llama_stack_api import json_schema_type
 from pydantic import BaseModel, Field
 
 from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig
-from llama_stack.schema_utils import json_schema_type
 
 
 class OCIProviderDataValidator(BaseModel):
diff --git a/src/llama_stack/providers/remote/inference/oci/oci.py b/src/llama_stack/providers/remote/inference/oci/oci.py
index 253dcf2b6..36e56cf6c 100644
--- a/src/llama_stack/providers/remote/inference/oci/oci.py
+++ b/src/llama_stack/providers/remote/inference/oci/oci.py
@@ -10,15 +10,15 @@ from typing import Any
 
 import httpx
 import oci
+from llama_stack_api import (
+    ModelType,
+    OpenAIEmbeddingsRequestWithExtraBody,
+    OpenAIEmbeddingsResponse,
+)
 from oci.generative_ai.generative_ai_client import GenerativeAiClient
 from oci.generative_ai.models import ModelCollection
 from openai._base_client import DefaultAsyncHttpxClient
 
-from llama_stack.apis.inference.inference import (
-    OpenAIEmbeddingsRequestWithExtraBody,
-    OpenAIEmbeddingsResponse,
-)
-from llama_stack.apis.models import ModelType
 from llama_stack.log import get_logger
 from llama_stack.providers.remote.inference.oci.auth import OciInstancePrincipalAuth, OciUserPrincipalAuth
 from llama_stack.providers.remote.inference.oci.config import OCIConfig
diff --git a/src/llama_stack/providers/remote/inference/ollama/ollama.py b/src/llama_stack/providers/remote/inference/ollama/ollama.py
index 50f36d045..6a471429e 100644
--- a/src/llama_stack/providers/remote/inference/ollama/ollama.py
+++ b/src/llama_stack/providers/remote/inference/ollama/ollama.py
@@ -7,15 +7,15 @@
 
 import asyncio
 
-from ollama import AsyncClient as AsyncOllamaClient
-
-from llama_stack.apis.common.errors import UnsupportedModelError
-from llama_stack.apis.models import Model
-from llama_stack.log import get_logger
-from llama_stack.providers.datatypes import (
+from llama_stack_api import (
     HealthResponse,
     HealthStatus,
+    Model,
+    UnsupportedModelError,
 )
+from ollama import AsyncClient as AsyncOllamaClient
+
+from llama_stack.log import get_logger
 from llama_stack.providers.remote.inference.ollama.config import OllamaImplConfig
 from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin
 
diff --git a/src/llama_stack/providers/remote/inference/openai/config.py b/src/llama_stack/providers/remote/inference/openai/config.py
index 36c66bd28..cbb01b2d0 100644
--- a/src/llama_stack/providers/remote/inference/openai/config.py
+++ b/src/llama_stack/providers/remote/inference/openai/config.py
@@ -6,10 +6,10 @@
 
 from typing import Any
 
+from llama_stack_api import json_schema_type
 from pydantic import BaseModel, Field
 
 from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig
-from llama_stack.schema_utils import json_schema_type
 
 
 class OpenAIProviderDataValidator(BaseModel):
diff --git a/src/llama_stack/providers/remote/inference/passthrough/config.py b/src/llama_stack/providers/remote/inference/passthrough/config.py
index eca28a86a..7045dbf2e 100644
--- a/src/llama_stack/providers/remote/inference/passthrough/config.py
+++ b/src/llama_stack/providers/remote/inference/passthrough/config.py
@@ -6,10 +6,10 @@
 
 from typing import Any
 
+from llama_stack_api import json_schema_type
 from pydantic import Field
 
 from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig
-from llama_stack.schema_utils import json_schema_type
 
 
 @json_schema_type
diff --git a/src/llama_stack/providers/remote/inference/passthrough/passthrough.py b/src/llama_stack/providers/remote/inference/passthrough/passthrough.py
index 3c56acfbd..19cf0c5d7 100644
--- a/src/llama_stack/providers/remote/inference/passthrough/passthrough.py
+++ b/src/llama_stack/providers/remote/inference/passthrough/passthrough.py
@@ -6,10 +6,9 @@
 
 from collections.abc import AsyncIterator
 
-from openai import AsyncOpenAI
-
-from llama_stack.apis.inference import (
+from llama_stack_api import (
     Inference,
+    Model,
     OpenAIChatCompletion,
     OpenAIChatCompletionChunk,
     OpenAIChatCompletionRequestWithExtraBody,
@@ -18,7 +17,8 @@ from llama_stack.apis.inference import (
     OpenAIEmbeddingsRequestWithExtraBody,
     OpenAIEmbeddingsResponse,
 )
-from llama_stack.apis.models import Model
+from openai import AsyncOpenAI
+
 from llama_stack.core.request_headers import NeedsRequestProviderData
 
 from .config import PassthroughImplConfig
diff --git a/src/llama_stack/providers/remote/inference/runpod/config.py b/src/llama_stack/providers/remote/inference/runpod/config.py
index a2a1add97..aaa4230a8 100644
--- a/src/llama_stack/providers/remote/inference/runpod/config.py
+++ b/src/llama_stack/providers/remote/inference/runpod/config.py
@@ -6,10 +6,10 @@
 
 from typing import Any
 
+from llama_stack_api import json_schema_type
 from pydantic import BaseModel, Field, SecretStr
 
 from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig
-from llama_stack.schema_utils import json_schema_type
 
 
 class RunpodProviderDataValidator(BaseModel):
diff --git a/src/llama_stack/providers/remote/inference/runpod/runpod.py b/src/llama_stack/providers/remote/inference/runpod/runpod.py
index a76e941cb..4596b2df5 100644
--- a/src/llama_stack/providers/remote/inference/runpod/runpod.py
+++ b/src/llama_stack/providers/remote/inference/runpod/runpod.py
@@ -6,11 +6,12 @@
 
 from collections.abc import AsyncIterator
 
-from llama_stack.apis.inference import (
+from llama_stack_api import (
     OpenAIChatCompletion,
     OpenAIChatCompletionChunk,
     OpenAIChatCompletionRequestWithExtraBody,
 )
+
 from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin
 
 from .config import RunpodImplConfig
diff --git a/src/llama_stack/providers/remote/inference/sambanova/config.py b/src/llama_stack/providers/remote/inference/sambanova/config.py
index f63210434..6d72e7205 100644
--- a/src/llama_stack/providers/remote/inference/sambanova/config.py
+++ b/src/llama_stack/providers/remote/inference/sambanova/config.py
@@ -6,10 +6,10 @@
 
 from typing import Any
 
+from llama_stack_api import json_schema_type
 from pydantic import BaseModel, Field
 
 from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig
-from llama_stack.schema_utils import json_schema_type
 
 
 class SambaNovaProviderDataValidator(BaseModel):
diff --git a/src/llama_stack/providers/remote/inference/tgi/config.py b/src/llama_stack/providers/remote/inference/tgi/config.py
index 47952abba..051a2afa3 100644
--- a/src/llama_stack/providers/remote/inference/tgi/config.py
+++ b/src/llama_stack/providers/remote/inference/tgi/config.py
@@ -5,10 +5,10 @@
 # the root directory of this source tree.
 
 
+from llama_stack_api import json_schema_type
 from pydantic import BaseModel, Field, SecretStr
 
 from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig
-from llama_stack.schema_utils import json_schema_type
 
 
 @json_schema_type
diff --git a/src/llama_stack/providers/remote/inference/tgi/tgi.py b/src/llama_stack/providers/remote/inference/tgi/tgi.py
index 6ae7b2544..831a26e39 100644
--- a/src/llama_stack/providers/remote/inference/tgi/tgi.py
+++ b/src/llama_stack/providers/remote/inference/tgi/tgi.py
@@ -8,12 +8,12 @@
 from collections.abc import Iterable
 
 from huggingface_hub import AsyncInferenceClient, HfApi
-from pydantic import SecretStr
-
-from llama_stack.apis.inference import (
+from llama_stack_api import (
     OpenAIEmbeddingsRequestWithExtraBody,
     OpenAIEmbeddingsResponse,
 )
+from pydantic import SecretStr
+
 from llama_stack.log import get_logger
 from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin
 
diff --git a/src/llama_stack/providers/remote/inference/together/config.py b/src/llama_stack/providers/remote/inference/together/config.py
index 47392c8e7..96c0538e3 100644
--- a/src/llama_stack/providers/remote/inference/together/config.py
+++ b/src/llama_stack/providers/remote/inference/together/config.py
@@ -6,10 +6,10 @@
 
 from typing import Any
 
+from llama_stack_api import json_schema_type
 from pydantic import Field
 
 from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig
-from llama_stack.schema_utils import json_schema_type
 
 
 @json_schema_type
diff --git a/src/llama_stack/providers/remote/inference/together/together.py b/src/llama_stack/providers/remote/inference/together/together.py
index 963b384a0..f1355a760 100644
--- a/src/llama_stack/providers/remote/inference/together/together.py
+++ b/src/llama_stack/providers/remote/inference/together/together.py
@@ -8,15 +8,15 @@
 from collections.abc import Iterable
 from typing import Any, cast
 
+from llama_stack_api import (
+    Model,
+    OpenAIEmbeddingsRequestWithExtraBody,
+    OpenAIEmbeddingsResponse,
+    OpenAIEmbeddingUsage,
+)
 from together import AsyncTogether  # type: ignore[import-untyped]
 from together.constants import BASE_URL  # type: ignore[import-untyped]
 
-from llama_stack.apis.inference import (
-    OpenAIEmbeddingsRequestWithExtraBody,
-    OpenAIEmbeddingsResponse,
-)
-from llama_stack.apis.inference.inference import OpenAIEmbeddingUsage
-from llama_stack.apis.models import Model
 from llama_stack.core.request_headers import NeedsRequestProviderData
 from llama_stack.log import get_logger
 from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin
diff --git a/src/llama_stack/providers/remote/inference/vertexai/config.py b/src/llama_stack/providers/remote/inference/vertexai/config.py
index 5f2efa894..53e2b3e65 100644
--- a/src/llama_stack/providers/remote/inference/vertexai/config.py
+++ b/src/llama_stack/providers/remote/inference/vertexai/config.py
@@ -6,10 +6,10 @@
 
 from typing import Any
 
+from llama_stack_api import json_schema_type
 from pydantic import BaseModel, Field, SecretStr
 
 from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig
-from llama_stack.schema_utils import json_schema_type
 
 
 class VertexAIProviderDataValidator(BaseModel):
diff --git a/src/llama_stack/providers/remote/inference/vllm/config.py b/src/llama_stack/providers/remote/inference/vllm/config.py
index e362aece6..23f713961 100644
--- a/src/llama_stack/providers/remote/inference/vllm/config.py
+++ b/src/llama_stack/providers/remote/inference/vllm/config.py
@@ -6,10 +6,10 @@
 
 from pathlib import Path
 
+from llama_stack_api import json_schema_type
 from pydantic import Field, SecretStr, field_validator
 
 from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig
-from llama_stack.schema_utils import json_schema_type
 
 
 @json_schema_type
diff --git a/src/llama_stack/providers/remote/inference/vllm/vllm.py b/src/llama_stack/providers/remote/inference/vllm/vllm.py
index fa350ec48..f7938c22c 100644
--- a/src/llama_stack/providers/remote/inference/vllm/vllm.py
+++ b/src/llama_stack/providers/remote/inference/vllm/vllm.py
@@ -7,19 +7,17 @@ from collections.abc import AsyncIterator
 from urllib.parse import urljoin
 
 import httpx
-from pydantic import ConfigDict
-
-from llama_stack.apis.inference import (
+from llama_stack_api import (
+    HealthResponse,
+    HealthStatus,
     OpenAIChatCompletion,
     OpenAIChatCompletionChunk,
     OpenAIChatCompletionRequestWithExtraBody,
     ToolChoice,
 )
+from pydantic import ConfigDict
+
 from llama_stack.log import get_logger
-from llama_stack.providers.datatypes import (
-    HealthResponse,
-    HealthStatus,
-)
 from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin
 
 from .config import VLLMInferenceAdapterConfig
diff --git a/src/llama_stack/providers/remote/inference/watsonx/config.py b/src/llama_stack/providers/remote/inference/watsonx/config.py
index 8d8df13b4..1bba040ef 100644
--- a/src/llama_stack/providers/remote/inference/watsonx/config.py
+++ b/src/llama_stack/providers/remote/inference/watsonx/config.py
@@ -7,10 +7,10 @@
 import os
 from typing import Any
 
+from llama_stack_api import json_schema_type
 from pydantic import BaseModel, Field
 
 from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig
-from llama_stack.schema_utils import json_schema_type
 
 
 class WatsonXProviderDataValidator(BaseModel):
diff --git a/src/llama_stack/providers/remote/inference/watsonx/watsonx.py b/src/llama_stack/providers/remote/inference/watsonx/watsonx.py
index e71ffe5e1..de23c25d7 100644
--- a/src/llama_stack/providers/remote/inference/watsonx/watsonx.py
+++ b/src/llama_stack/providers/remote/inference/watsonx/watsonx.py
@@ -9,8 +9,9 @@ from typing import Any
 
 import litellm
 import requests
-
-from llama_stack.apis.inference.inference import (
+from llama_stack_api import (
+    Model,
+    ModelType,
     OpenAIChatCompletion,
     OpenAIChatCompletionChunk,
     OpenAIChatCompletionRequestWithExtraBody,
@@ -20,8 +21,7 @@ from llama_stack.apis.inference.inference import (
     OpenAIEmbeddingsRequestWithExtraBody,
     OpenAIEmbeddingsResponse,
 )
-from llama_stack.apis.models import Model
-from llama_stack.apis.models.models import ModelType
+
 from llama_stack.core.telemetry.tracing import get_current_span
 from llama_stack.log import get_logger
 from llama_stack.providers.remote.inference.watsonx.config import WatsonXConfig
@@ -238,7 +238,8 @@ class WatsonXInferenceAdapter(LiteLLMOpenAIMixin):
         )
 
         # Convert response to OpenAI format
-        from llama_stack.apis.inference import OpenAIEmbeddingUsage
+        from llama_stack_api import OpenAIEmbeddingUsage
+
         from llama_stack.providers.utils.inference.litellm_openai_mixin import b64_encode_openai_embeddings_response
 
         data = b64_encode_openai_embeddings_response(response.data, params.encoding_format)
diff --git a/src/llama_stack/providers/remote/post_training/nvidia/README.md b/src/llama_stack/providers/remote/post_training/nvidia/README.md
index 83f20a44e..f998f44ba 100644
--- a/src/llama_stack/providers/remote/post_training/nvidia/README.md
+++ b/src/llama_stack/providers/remote/post_training/nvidia/README.md
@@ -128,7 +128,7 @@ client.post_training.job.cancel(job_uuid="your-job-id")
 #### 1. Register the model
 
 ```python
-from llama_stack.apis.models import Model, ModelType
+from llama_stack_api.models import Model, ModelType
 
 client.models.register(
     model_id="test-example-model@v1",
diff --git a/src/llama_stack/providers/remote/post_training/nvidia/post_training.py b/src/llama_stack/providers/remote/post_training/nvidia/post_training.py
index d839ffd6f..02c35241b 100644
--- a/src/llama_stack/providers/remote/post_training/nvidia/post_training.py
+++ b/src/llama_stack/providers/remote/post_training/nvidia/post_training.py
@@ -8,9 +8,7 @@ from datetime import datetime
 from typing import Any, Literal
 
 import aiohttp
-from pydantic import BaseModel, ConfigDict
-
-from llama_stack.apis.post_training import (
+from llama_stack_api import (
     AlgorithmConfig,
     DPOAlignmentConfig,
     JobStatus,
@@ -19,6 +17,8 @@ from llama_stack.apis.post_training import (
     PostTrainingJobStatusResponse,
     TrainingConfig,
 )
+from pydantic import BaseModel, ConfigDict
+
 from llama_stack.providers.remote.post_training.nvidia.config import NvidiaPostTrainingConfig
 from llama_stack.providers.remote.post_training.nvidia.utils import warn_unsupported_params
 from llama_stack.providers.utils.inference.model_registry import ModelRegistryHelper
diff --git a/src/llama_stack/providers/remote/post_training/nvidia/utils.py b/src/llama_stack/providers/remote/post_training/nvidia/utils.py
index 162951ff3..78762155d 100644
--- a/src/llama_stack/providers/remote/post_training/nvidia/utils.py
+++ b/src/llama_stack/providers/remote/post_training/nvidia/utils.py
@@ -7,9 +7,9 @@
 import warnings
 from typing import Any
 
+from llama_stack_api import TrainingConfig
 from pydantic import BaseModel
 
-from llama_stack.apis.post_training import TrainingConfig
 from llama_stack.log import get_logger
 from llama_stack.providers.remote.post_training.nvidia.config import SFTLoRADefaultConfig
 
diff --git a/src/llama_stack/providers/remote/safety/bedrock/bedrock.py b/src/llama_stack/providers/remote/safety/bedrock/bedrock.py
index 75f96816a..86b93c32e 100644
--- a/src/llama_stack/providers/remote/safety/bedrock/bedrock.py
+++ b/src/llama_stack/providers/remote/safety/bedrock/bedrock.py
@@ -7,16 +7,17 @@
 import json
 from typing import Any
 
-from llama_stack.apis.inference import OpenAIMessageParam
-from llama_stack.apis.safety import (
+from llama_stack_api import (
+    OpenAIMessageParam,
     RunShieldResponse,
     Safety,
     SafetyViolation,
+    Shield,
+    ShieldsProtocolPrivate,
     ViolationLevel,
 )
-from llama_stack.apis.shields import Shield
+
 from llama_stack.log import get_logger
-from llama_stack.providers.datatypes import ShieldsProtocolPrivate
 from llama_stack.providers.utils.bedrock.client import create_bedrock_client
 
 from .config import BedrockSafetyConfig
diff --git a/src/llama_stack/providers/remote/safety/bedrock/config.py b/src/llama_stack/providers/remote/safety/bedrock/config.py
index 1ca8d95cb..ca28924d4 100644
--- a/src/llama_stack/providers/remote/safety/bedrock/config.py
+++ b/src/llama_stack/providers/remote/safety/bedrock/config.py
@@ -5,8 +5,9 @@
 # the root directory of this source tree.
 
 
+from llama_stack_api import json_schema_type
+
 from llama_stack.providers.utils.bedrock.config import BedrockBaseConfig
-from llama_stack.schema_utils import json_schema_type
 
 
 @json_schema_type
diff --git a/src/llama_stack/providers/remote/safety/nvidia/README.md b/src/llama_stack/providers/remote/safety/nvidia/README.md
index af11b2539..f3ec0f1e0 100644
--- a/src/llama_stack/providers/remote/safety/nvidia/README.md
+++ b/src/llama_stack/providers/remote/safety/nvidia/README.md
@@ -42,8 +42,8 @@ client.initialize()
 #### Create a safety shield
 
 ```python
-from llama_stack.apis.safety import Shield
-from llama_stack.apis.inference import Message
+from llama_stack_api.safety import Shield
+from llama_stack_api.inference import Message
 
 # Create a safety shield
 shield = Shield(
diff --git a/src/llama_stack/providers/remote/safety/nvidia/config.py b/src/llama_stack/providers/remote/safety/nvidia/config.py
index 1c618f4f4..fc686ae73 100644
--- a/src/llama_stack/providers/remote/safety/nvidia/config.py
+++ b/src/llama_stack/providers/remote/safety/nvidia/config.py
@@ -6,10 +6,9 @@
 import os
 from typing import Any
 
+from llama_stack_api import json_schema_type
 from pydantic import BaseModel, Field
 
-from llama_stack.schema_utils import json_schema_type
-
 
 @json_schema_type
 class NVIDIASafetyConfig(BaseModel):
diff --git a/src/llama_stack/providers/remote/safety/nvidia/nvidia.py b/src/llama_stack/providers/remote/safety/nvidia/nvidia.py
index 236f16207..b3b5090e0 100644
--- a/src/llama_stack/providers/remote/safety/nvidia/nvidia.py
+++ b/src/llama_stack/providers/remote/safety/nvidia/nvidia.py
@@ -7,12 +7,18 @@
 from typing import Any
 
 import requests
+from llama_stack_api import (
+    ModerationObject,
+    OpenAIMessageParam,
+    RunShieldResponse,
+    Safety,
+    SafetyViolation,
+    Shield,
+    ShieldsProtocolPrivate,
+    ViolationLevel,
+)
 
-from llama_stack.apis.inference import OpenAIMessageParam
-from llama_stack.apis.safety import ModerationObject, RunShieldResponse, Safety, SafetyViolation, ViolationLevel
-from llama_stack.apis.shields import Shield
 from llama_stack.log import get_logger
-from llama_stack.providers.datatypes import ShieldsProtocolPrivate
 
 from .config import NVIDIASafetyConfig
 
diff --git a/src/llama_stack/providers/remote/safety/sambanova/config.py b/src/llama_stack/providers/remote/safety/sambanova/config.py
index 2cde97098..a8e745851 100644
--- a/src/llama_stack/providers/remote/safety/sambanova/config.py
+++ b/src/llama_stack/providers/remote/safety/sambanova/config.py
@@ -6,10 +6,9 @@
 
 from typing import Any
 
+from llama_stack_api import json_schema_type
 from pydantic import BaseModel, Field, SecretStr
 
-from llama_stack.schema_utils import json_schema_type
-
 
 class SambaNovaProviderDataValidator(BaseModel):
     sambanova_api_key: str | None = Field(
diff --git a/src/llama_stack/providers/remote/safety/sambanova/sambanova.py b/src/llama_stack/providers/remote/safety/sambanova/sambanova.py
index 72359badd..119ebb6ed 100644
--- a/src/llama_stack/providers/remote/safety/sambanova/sambanova.py
+++ b/src/llama_stack/providers/remote/safety/sambanova/sambanova.py
@@ -8,18 +8,18 @@ from typing import Any
 
 import litellm
 import requests
-
-from llama_stack.apis.inference import OpenAIMessageParam
-from llama_stack.apis.safety import (
+from llama_stack_api import (
+    OpenAIMessageParam,
     RunShieldResponse,
     Safety,
     SafetyViolation,
+    Shield,
+    ShieldsProtocolPrivate,
     ViolationLevel,
 )
-from llama_stack.apis.shields import Shield
+
 from llama_stack.core.request_headers import NeedsRequestProviderData
 from llama_stack.log import get_logger
-from llama_stack.providers.datatypes import ShieldsProtocolPrivate
 
 from .config import SambaNovaSafetyConfig
 
diff --git a/src/llama_stack/providers/remote/tool_runtime/bing_search/bing_search.py b/src/llama_stack/providers/remote/tool_runtime/bing_search/bing_search.py
index 9a98964b7..84e47dd4f 100644
--- a/src/llama_stack/providers/remote/tool_runtime/bing_search/bing_search.py
+++ b/src/llama_stack/providers/remote/tool_runtime/bing_search/bing_search.py
@@ -8,17 +8,17 @@ import json
 from typing import Any
 
 import httpx
-
-from llama_stack.apis.common.content_types import URL
-from llama_stack.apis.tools import (
+from llama_stack_api import (
+    URL,
     ListToolDefsResponse,
     ToolDef,
     ToolGroup,
+    ToolGroupsProtocolPrivate,
     ToolInvocationResult,
     ToolRuntime,
 )
+
 from llama_stack.core.request_headers import NeedsRequestProviderData
-from llama_stack.providers.datatypes import ToolGroupsProtocolPrivate
 
 from .config import BingSearchToolConfig
 
diff --git a/src/llama_stack/providers/remote/tool_runtime/brave_search/brave_search.py b/src/llama_stack/providers/remote/tool_runtime/brave_search/brave_search.py
index 02e5b5c69..b7eee776a 100644
--- a/src/llama_stack/providers/remote/tool_runtime/brave_search/brave_search.py
+++ b/src/llama_stack/providers/remote/tool_runtime/brave_search/brave_search.py
@@ -7,18 +7,18 @@
 from typing import Any
 
 import httpx
-
-from llama_stack.apis.common.content_types import URL
-from llama_stack.apis.tools import (
+from llama_stack_api import (
+    URL,
     ListToolDefsResponse,
     ToolDef,
     ToolGroup,
+    ToolGroupsProtocolPrivate,
     ToolInvocationResult,
     ToolRuntime,
 )
+
 from llama_stack.core.request_headers import NeedsRequestProviderData
 from llama_stack.models.llama.datatypes import BuiltinTool
-from llama_stack.providers.datatypes import ToolGroupsProtocolPrivate
 
 from .config import BraveSearchToolConfig
 
diff --git a/src/llama_stack/providers/remote/tool_runtime/model_context_protocol/model_context_protocol.py b/src/llama_stack/providers/remote/tool_runtime/model_context_protocol/model_context_protocol.py
index 578bb6d34..efb1eb2df 100644
--- a/src/llama_stack/providers/remote/tool_runtime/model_context_protocol/model_context_protocol.py
+++ b/src/llama_stack/providers/remote/tool_runtime/model_context_protocol/model_context_protocol.py
@@ -7,17 +7,18 @@
 from typing import Any
 from urllib.parse import urlparse
 
-from llama_stack.apis.common.content_types import URL
-from llama_stack.apis.datatypes import Api
-from llama_stack.apis.tools import (
+from llama_stack_api import (
+    URL,
+    Api,
     ListToolDefsResponse,
     ToolGroup,
+    ToolGroupsProtocolPrivate,
     ToolInvocationResult,
     ToolRuntime,
 )
+
 from llama_stack.core.request_headers import NeedsRequestProviderData
 from llama_stack.log import get_logger
-from llama_stack.providers.datatypes import ToolGroupsProtocolPrivate
 from llama_stack.providers.utils.tools.mcp import invoke_mcp_tool, list_mcp_tools
 
 from .config import MCPProviderConfig
diff --git a/src/llama_stack/providers/remote/tool_runtime/tavily_search/tavily_search.py b/src/llama_stack/providers/remote/tool_runtime/tavily_search/tavily_search.py
index ca629fced..d65d66e67 100644
--- a/src/llama_stack/providers/remote/tool_runtime/tavily_search/tavily_search.py
+++ b/src/llama_stack/providers/remote/tool_runtime/tavily_search/tavily_search.py
@@ -8,17 +8,17 @@ import json
 from typing import Any
 
 import httpx
-
-from llama_stack.apis.common.content_types import URL
-from llama_stack.apis.tools import (
+from llama_stack_api import (
+    URL,
     ListToolDefsResponse,
     ToolDef,
     ToolGroup,
+    ToolGroupsProtocolPrivate,
     ToolInvocationResult,
     ToolRuntime,
 )
+
 from llama_stack.core.request_headers import NeedsRequestProviderData
-from llama_stack.providers.datatypes import ToolGroupsProtocolPrivate
 
 from .config import TavilySearchToolConfig
 
diff --git a/src/llama_stack/providers/remote/tool_runtime/wolfram_alpha/wolfram_alpha.py b/src/llama_stack/providers/remote/tool_runtime/wolfram_alpha/wolfram_alpha.py
index 410e34195..9cc865092 100644
--- a/src/llama_stack/providers/remote/tool_runtime/wolfram_alpha/wolfram_alpha.py
+++ b/src/llama_stack/providers/remote/tool_runtime/wolfram_alpha/wolfram_alpha.py
@@ -8,17 +8,17 @@ import json
 from typing import Any
 
 import httpx
-
-from llama_stack.apis.common.content_types import URL
-from llama_stack.apis.tools import (
+from llama_stack_api import (
+    URL,
     ListToolDefsResponse,
     ToolDef,
     ToolGroup,
+    ToolGroupsProtocolPrivate,
     ToolInvocationResult,
     ToolRuntime,
 )
+
 from llama_stack.core.request_headers import NeedsRequestProviderData
-from llama_stack.providers.datatypes import ToolGroupsProtocolPrivate
 
 from .config import WolframAlphaToolConfig
 
diff --git a/src/llama_stack/providers/remote/vector_io/chroma/__init__.py b/src/llama_stack/providers/remote/vector_io/chroma/__init__.py
index e4b77c68d..d774ea643 100644
--- a/src/llama_stack/providers/remote/vector_io/chroma/__init__.py
+++ b/src/llama_stack/providers/remote/vector_io/chroma/__init__.py
@@ -4,7 +4,7 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-from llama_stack.providers.datatypes import Api, ProviderSpec
+from llama_stack_api import Api, ProviderSpec
 
 from .config import ChromaVectorIOConfig
 
diff --git a/src/llama_stack/providers/remote/vector_io/chroma/chroma.py b/src/llama_stack/providers/remote/vector_io/chroma/chroma.py
index 97e2244b8..eca5d349b 100644
--- a/src/llama_stack/providers/remote/vector_io/chroma/chroma.py
+++ b/src/llama_stack/providers/remote/vector_io/chroma/chroma.py
@@ -9,14 +9,19 @@ from typing import Any
 from urllib.parse import urlparse
 
 import chromadb
+from llama_stack_api import (
+    Chunk,
+    Files,
+    Inference,
+    InterleavedContent,
+    QueryChunksResponse,
+    VectorIO,
+    VectorStore,
+    VectorStoresProtocolPrivate,
+)
 from numpy.typing import NDArray
 
-from llama_stack.apis.files import Files
-from llama_stack.apis.inference import Inference, InterleavedContent
-from llama_stack.apis.vector_io import Chunk, QueryChunksResponse, VectorIO
-from llama_stack.apis.vector_stores import VectorStore
 from llama_stack.log import get_logger
-from llama_stack.providers.datatypes import VectorStoresProtocolPrivate
 from llama_stack.providers.inline.vector_io.chroma import ChromaVectorIOConfig as InlineChromaVectorIOConfig
 from llama_stack.providers.utils.kvstore import kvstore_impl
 from llama_stack.providers.utils.kvstore.api import KVStore
diff --git a/src/llama_stack/providers/remote/vector_io/chroma/config.py b/src/llama_stack/providers/remote/vector_io/chroma/config.py
index 209ba90bb..b1e4f9a4a 100644
--- a/src/llama_stack/providers/remote/vector_io/chroma/config.py
+++ b/src/llama_stack/providers/remote/vector_io/chroma/config.py
@@ -6,10 +6,10 @@
 
 from typing import Any
 
+from llama_stack_api import json_schema_type
 from pydantic import BaseModel, Field
 
 from llama_stack.core.storage.datatypes import KVStoreReference
-from llama_stack.schema_utils import json_schema_type
 
 
 @json_schema_type
diff --git a/src/llama_stack/providers/remote/vector_io/milvus/__init__.py b/src/llama_stack/providers/remote/vector_io/milvus/__init__.py
index 526075bb2..1b703d486 100644
--- a/src/llama_stack/providers/remote/vector_io/milvus/__init__.py
+++ b/src/llama_stack/providers/remote/vector_io/milvus/__init__.py
@@ -4,7 +4,7 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-from llama_stack.providers.datatypes import Api, ProviderSpec
+from llama_stack_api import Api, ProviderSpec
 
 from .config import MilvusVectorIOConfig
 
diff --git a/src/llama_stack/providers/remote/vector_io/milvus/config.py b/src/llama_stack/providers/remote/vector_io/milvus/config.py
index 8ff9e1328..2e2c788c7 100644
--- a/src/llama_stack/providers/remote/vector_io/milvus/config.py
+++ b/src/llama_stack/providers/remote/vector_io/milvus/config.py
@@ -6,10 +6,10 @@
 
 from typing import Any
 
+from llama_stack_api import json_schema_type
 from pydantic import BaseModel, ConfigDict, Field
 
 from llama_stack.core.storage.datatypes import KVStoreReference
-from llama_stack.schema_utils import json_schema_type
 
 
 @json_schema_type
diff --git a/src/llama_stack/providers/remote/vector_io/milvus/milvus.py b/src/llama_stack/providers/remote/vector_io/milvus/milvus.py
index 73339b5be..b856bf918 100644
--- a/src/llama_stack/providers/remote/vector_io/milvus/milvus.py
+++ b/src/llama_stack/providers/remote/vector_io/milvus/milvus.py
@@ -8,16 +8,21 @@ import asyncio
 import os
 from typing import Any
 
+from llama_stack_api import (
+    Chunk,
+    Files,
+    Inference,
+    InterleavedContent,
+    QueryChunksResponse,
+    VectorIO,
+    VectorStore,
+    VectorStoreNotFoundError,
+    VectorStoresProtocolPrivate,
+)
 from numpy.typing import NDArray
 from pymilvus import AnnSearchRequest, DataType, Function, FunctionType, MilvusClient, RRFRanker, WeightedRanker
 
-from llama_stack.apis.common.errors import VectorStoreNotFoundError
-from llama_stack.apis.files import Files
-from llama_stack.apis.inference import Inference, InterleavedContent
-from llama_stack.apis.vector_io import Chunk, QueryChunksResponse, VectorIO
-from llama_stack.apis.vector_stores import VectorStore
 from llama_stack.log import get_logger
-from llama_stack.providers.datatypes import VectorStoresProtocolPrivate
 from llama_stack.providers.inline.vector_io.milvus import MilvusVectorIOConfig as InlineMilvusVectorIOConfig
 from llama_stack.providers.utils.kvstore import kvstore_impl
 from llama_stack.providers.utils.kvstore.api import KVStore
diff --git a/src/llama_stack/providers/remote/vector_io/pgvector/__init__.py b/src/llama_stack/providers/remote/vector_io/pgvector/__init__.py
index 8086b7650..36018fd95 100644
--- a/src/llama_stack/providers/remote/vector_io/pgvector/__init__.py
+++ b/src/llama_stack/providers/remote/vector_io/pgvector/__init__.py
@@ -4,7 +4,7 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-from llama_stack.providers.datatypes import Api, ProviderSpec
+from llama_stack_api import Api, ProviderSpec
 
 from .config import PGVectorVectorIOConfig
 
diff --git a/src/llama_stack/providers/remote/vector_io/pgvector/config.py b/src/llama_stack/providers/remote/vector_io/pgvector/config.py
index d81e524e4..aeb1c83bb 100644
--- a/src/llama_stack/providers/remote/vector_io/pgvector/config.py
+++ b/src/llama_stack/providers/remote/vector_io/pgvector/config.py
@@ -6,10 +6,10 @@
 
 from typing import Any
 
+from llama_stack_api import json_schema_type
 from pydantic import BaseModel, Field
 
 from llama_stack.core.storage.datatypes import KVStoreReference
-from llama_stack.schema_utils import json_schema_type
 
 
 @json_schema_type
diff --git a/src/llama_stack/providers/remote/vector_io/pgvector/pgvector.py b/src/llama_stack/providers/remote/vector_io/pgvector/pgvector.py
index cf10a0e01..8aa0303b6 100644
--- a/src/llama_stack/providers/remote/vector_io/pgvector/pgvector.py
+++ b/src/llama_stack/providers/remote/vector_io/pgvector/pgvector.py
@@ -8,18 +8,23 @@ import heapq
 from typing import Any
 
 import psycopg2
+from llama_stack_api import (
+    Chunk,
+    Files,
+    Inference,
+    InterleavedContent,
+    QueryChunksResponse,
+    VectorIO,
+    VectorStore,
+    VectorStoreNotFoundError,
+    VectorStoresProtocolPrivate,
+)
 from numpy.typing import NDArray
 from psycopg2 import sql
 from psycopg2.extras import Json, execute_values
 from pydantic import BaseModel, TypeAdapter
 
-from llama_stack.apis.common.errors import VectorStoreNotFoundError
-from llama_stack.apis.files import Files
-from llama_stack.apis.inference import Inference, InterleavedContent
-from llama_stack.apis.vector_io import Chunk, QueryChunksResponse, VectorIO
-from llama_stack.apis.vector_stores import VectorStore
 from llama_stack.log import get_logger
-from llama_stack.providers.datatypes import VectorStoresProtocolPrivate
 from llama_stack.providers.utils.inference.prompt_adapter import interleaved_content_as_str
 from llama_stack.providers.utils.kvstore import kvstore_impl
 from llama_stack.providers.utils.kvstore.api import KVStore
diff --git a/src/llama_stack/providers/remote/vector_io/qdrant/__init__.py b/src/llama_stack/providers/remote/vector_io/qdrant/__init__.py
index e9527f101..b5b02fe59 100644
--- a/src/llama_stack/providers/remote/vector_io/qdrant/__init__.py
+++ b/src/llama_stack/providers/remote/vector_io/qdrant/__init__.py
@@ -4,7 +4,7 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-from llama_stack.providers.datatypes import Api, ProviderSpec
+from llama_stack_api import Api, ProviderSpec
 
 from .config import QdrantVectorIOConfig
 
diff --git a/src/llama_stack/providers/remote/vector_io/qdrant/config.py b/src/llama_stack/providers/remote/vector_io/qdrant/config.py
index 01fbcc5cb..8cc4cbb2b 100644
--- a/src/llama_stack/providers/remote/vector_io/qdrant/config.py
+++ b/src/llama_stack/providers/remote/vector_io/qdrant/config.py
@@ -6,10 +6,10 @@
 
 from typing import Any
 
+from llama_stack_api import json_schema_type
 from pydantic import BaseModel
 
 from llama_stack.core.storage.datatypes import KVStoreReference
-from llama_stack.schema_utils import json_schema_type
 
 
 @json_schema_type
diff --git a/src/llama_stack/providers/remote/vector_io/qdrant/qdrant.py b/src/llama_stack/providers/remote/vector_io/qdrant/qdrant.py
index 7d17c5591..53d6be2b6 100644
--- a/src/llama_stack/providers/remote/vector_io/qdrant/qdrant.py
+++ b/src/llama_stack/providers/remote/vector_io/qdrant/qdrant.py
@@ -9,23 +9,24 @@ import hashlib
 import uuid
 from typing import Any
 
+from llama_stack_api import (
+    Chunk,
+    Files,
+    Inference,
+    InterleavedContent,
+    QueryChunksResponse,
+    VectorIO,
+    VectorStore,
+    VectorStoreChunkingStrategy,
+    VectorStoreFileObject,
+    VectorStoreNotFoundError,
+    VectorStoresProtocolPrivate,
+)
 from numpy.typing import NDArray
 from qdrant_client import AsyncQdrantClient, models
 from qdrant_client.models import PointStruct
 
-from llama_stack.apis.common.errors import VectorStoreNotFoundError
-from llama_stack.apis.files import Files
-from llama_stack.apis.inference import Inference, InterleavedContent
-from llama_stack.apis.vector_io import (
-    Chunk,
-    QueryChunksResponse,
-    VectorIO,
-    VectorStoreChunkingStrategy,
-    VectorStoreFileObject,
-)
-from llama_stack.apis.vector_stores import VectorStore
 from llama_stack.log import get_logger
-from llama_stack.providers.datatypes import VectorStoresProtocolPrivate
 from llama_stack.providers.inline.vector_io.qdrant import QdrantVectorIOConfig as InlineQdrantVectorIOConfig
 from llama_stack.providers.utils.kvstore import kvstore_impl
 from llama_stack.providers.utils.memory.openai_vector_store_mixin import OpenAIVectorStoreMixin
diff --git a/src/llama_stack/providers/remote/vector_io/weaviate/__init__.py b/src/llama_stack/providers/remote/vector_io/weaviate/__init__.py
index 12e11d013..47546d459 100644
--- a/src/llama_stack/providers/remote/vector_io/weaviate/__init__.py
+++ b/src/llama_stack/providers/remote/vector_io/weaviate/__init__.py
@@ -4,7 +4,7 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-from llama_stack.providers.datatypes import Api, ProviderSpec
+from llama_stack_api import Api, ProviderSpec
 
 from .config import WeaviateVectorIOConfig
 
diff --git a/src/llama_stack/providers/remote/vector_io/weaviate/config.py b/src/llama_stack/providers/remote/vector_io/weaviate/config.py
index 66dbf1fed..19f9679fb 100644
--- a/src/llama_stack/providers/remote/vector_io/weaviate/config.py
+++ b/src/llama_stack/providers/remote/vector_io/weaviate/config.py
@@ -6,10 +6,10 @@
 
 from typing import Any
 
+from llama_stack_api import json_schema_type
 from pydantic import BaseModel, Field
 
 from llama_stack.core.storage.datatypes import KVStoreReference
-from llama_stack.schema_utils import json_schema_type
 
 
 @json_schema_type
diff --git a/src/llama_stack/providers/remote/vector_io/weaviate/weaviate.py b/src/llama_stack/providers/remote/vector_io/weaviate/weaviate.py
index d200662da..c72666f63 100644
--- a/src/llama_stack/providers/remote/vector_io/weaviate/weaviate.py
+++ b/src/llama_stack/providers/remote/vector_io/weaviate/weaviate.py
@@ -8,19 +8,23 @@ from typing import Any
 
 import weaviate
 import weaviate.classes as wvc
+from llama_stack_api import (
+    Chunk,
+    Files,
+    Inference,
+    InterleavedContent,
+    QueryChunksResponse,
+    VectorIO,
+    VectorStore,
+    VectorStoreNotFoundError,
+    VectorStoresProtocolPrivate,
+)
 from numpy.typing import NDArray
 from weaviate.classes.init import Auth
 from weaviate.classes.query import Filter, HybridFusion
 
-from llama_stack.apis.common.content_types import InterleavedContent
-from llama_stack.apis.common.errors import VectorStoreNotFoundError
-from llama_stack.apis.files import Files
-from llama_stack.apis.inference import Inference
-from llama_stack.apis.vector_io import Chunk, QueryChunksResponse, VectorIO
-from llama_stack.apis.vector_stores import VectorStore
 from llama_stack.core.request_headers import NeedsRequestProviderData
 from llama_stack.log import get_logger
-from llama_stack.providers.datatypes import VectorStoresProtocolPrivate
 from llama_stack.providers.utils.kvstore import kvstore_impl
 from llama_stack.providers.utils.kvstore.api import KVStore
 from llama_stack.providers.utils.memory.openai_vector_store_mixin import OpenAIVectorStoreMixin
diff --git a/src/llama_stack/providers/utils/common/data_schema_validator.py b/src/llama_stack/providers/utils/common/data_schema_validator.py
index b0305104f..7ef245779 100644
--- a/src/llama_stack/providers/utils/common/data_schema_validator.py
+++ b/src/llama_stack/providers/utils/common/data_schema_validator.py
@@ -7,11 +7,8 @@
 from enum import Enum
 from typing import Any
 
-from llama_stack.apis.common.type_system import (
-    ChatCompletionInputType,
-    CompletionInputType,
-    StringType,
-)
+from llama_stack_api import ChatCompletionInputType, CompletionInputType, StringType
+
 from llama_stack.core.datatypes import Api
 
 
diff --git a/src/llama_stack/providers/utils/files/form_data.py b/src/llama_stack/providers/utils/files/form_data.py
index 3d8fb6d85..21afbec2b 100644
--- a/src/llama_stack/providers/utils/files/form_data.py
+++ b/src/llama_stack/providers/utils/files/form_data.py
@@ -7,10 +7,9 @@
 import json
 
 from fastapi import Request
+from llama_stack_api import ExpiresAfter
 from pydantic import BaseModel, ValidationError
 
-from llama_stack.apis.files import ExpiresAfter
-
 
 async def parse_pydantic_from_form[T: BaseModel](request: Request, field_name: str, model_class: type[T]) -> T | None:
     """
diff --git a/src/llama_stack/providers/utils/inference/embedding_mixin.py b/src/llama_stack/providers/utils/inference/embedding_mixin.py
index bab495eef..f7e5c711b 100644
--- a/src/llama_stack/providers/utils/inference/embedding_mixin.py
+++ b/src/llama_stack/providers/utils/inference/embedding_mixin.py
@@ -17,7 +17,7 @@ from llama_stack.log import get_logger
 if TYPE_CHECKING:
     from sentence_transformers import SentenceTransformer
 
-from llama_stack.apis.inference import (
+from llama_stack_api import (
     ModelStore,
     OpenAIEmbeddingData,
     OpenAIEmbeddingsRequestWithExtraBody,
diff --git a/src/llama_stack/providers/utils/inference/inference_store.py b/src/llama_stack/providers/utils/inference/inference_store.py
index a3a28aec0..3c707dd01 100644
--- a/src/llama_stack/providers/utils/inference/inference_store.py
+++ b/src/llama_stack/providers/utils/inference/inference_store.py
@@ -6,15 +6,15 @@
 import asyncio
 from typing import Any
 
-from sqlalchemy.exc import IntegrityError
-
-from llama_stack.apis.inference import (
+from llama_stack_api import (
     ListOpenAIChatCompletionResponse,
     OpenAIChatCompletion,
     OpenAICompletionWithInputMessages,
     OpenAIMessageParam,
     Order,
 )
+from sqlalchemy.exc import IntegrityError
+
 from llama_stack.core.datatypes import AccessRule
 from llama_stack.core.storage.datatypes import InferenceStoreReference, StorageBackendType
 from llama_stack.log import get_logger
diff --git a/src/llama_stack/providers/utils/inference/litellm_openai_mixin.py b/src/llama_stack/providers/utils/inference/litellm_openai_mixin.py
index a793c499e..4f468725b 100644
--- a/src/llama_stack/providers/utils/inference/litellm_openai_mixin.py
+++ b/src/llama_stack/providers/utils/inference/litellm_openai_mixin.py
@@ -9,8 +9,7 @@ import struct
 from collections.abc import AsyncIterator
 
 import litellm
-
-from llama_stack.apis.inference import (
+from llama_stack_api import (
     InferenceProvider,
     OpenAIChatCompletion,
     OpenAIChatCompletionChunk,
@@ -22,6 +21,7 @@ from llama_stack.apis.inference import (
     OpenAIEmbeddingsResponse,
     OpenAIEmbeddingUsage,
 )
+
 from llama_stack.core.request_headers import NeedsRequestProviderData
 from llama_stack.log import get_logger
 from llama_stack.providers.utils.inference.model_registry import ModelRegistryHelper, ProviderModelEntry
diff --git a/src/llama_stack/providers/utils/inference/model_registry.py b/src/llama_stack/providers/utils/inference/model_registry.py
index 8a120b698..e7ca5ab74 100644
--- a/src/llama_stack/providers/utils/inference/model_registry.py
+++ b/src/llama_stack/providers/utils/inference/model_registry.py
@@ -6,12 +6,10 @@
 
 from typing import Any
 
+from llama_stack_api import Model, ModelsProtocolPrivate, ModelType, UnsupportedModelError
 from pydantic import BaseModel, Field, SecretStr
 
-from llama_stack.apis.common.errors import UnsupportedModelError
-from llama_stack.apis.models import ModelType
 from llama_stack.log import get_logger
-from llama_stack.providers.datatypes import Model, ModelsProtocolPrivate
 from llama_stack.providers.utils.inference import (
     ALL_HUGGINGFACE_REPOS_TO_MODEL_DESCRIPTOR,
 )
diff --git a/src/llama_stack/providers/utils/inference/openai_compat.py b/src/llama_stack/providers/utils/inference/openai_compat.py
index c2e6829e0..c97e42274 100644
--- a/src/llama_stack/providers/utils/inference/openai_compat.py
+++ b/src/llama_stack/providers/utils/inference/openai_compat.py
@@ -20,25 +20,23 @@ except ImportError:
     from openai.types.chat.chat_completion_message_tool_call import (
         ChatCompletionMessageToolCall as OpenAIChatCompletionMessageFunctionToolCall,
     )
+from llama_stack_api import (
+    URL,
+    GreedySamplingStrategy,
+    ImageContentItem,
+    JsonSchemaResponseFormat,
+    OpenAIResponseFormatParam,
+    SamplingParams,
+    TextContentItem,
+    TopKSamplingStrategy,
+    TopPSamplingStrategy,
+    _URLOrData,
+)
 from openai.types.chat import (
     ChatCompletionMessageToolCall,
 )
 from pydantic import BaseModel
 
-from llama_stack.apis.common.content_types import (
-    URL,
-    ImageContentItem,
-    TextContentItem,
-    _URLOrData,
-)
-from llama_stack.apis.inference import (
-    GreedySamplingStrategy,
-    JsonSchemaResponseFormat,
-    OpenAIResponseFormatParam,
-    SamplingParams,
-    TopKSamplingStrategy,
-    TopPSamplingStrategy,
-)
 from llama_stack.log import get_logger
 from llama_stack.models.llama.datatypes import (
     BuiltinTool,
diff --git a/src/llama_stack/providers/utils/inference/openai_mixin.py b/src/llama_stack/providers/utils/inference/openai_mixin.py
index 09059da09..c05873df5 100644
--- a/src/llama_stack/providers/utils/inference/openai_mixin.py
+++ b/src/llama_stack/providers/utils/inference/openai_mixin.py
@@ -10,11 +10,9 @@ from abc import ABC, abstractmethod
 from collections.abc import AsyncIterator, Iterable
 from typing import Any
 
-from openai import AsyncOpenAI
-from pydantic import BaseModel, ConfigDict
-
-from llama_stack.apis.inference import (
+from llama_stack_api import (
     Model,
+    ModelType,
     OpenAIChatCompletion,
     OpenAIChatCompletionChunk,
     OpenAIChatCompletionRequestWithExtraBody,
@@ -26,7 +24,9 @@ from llama_stack.apis.inference import (
     OpenAIEmbeddingUsage,
     OpenAIMessageParam,
 )
-from llama_stack.apis.models import ModelType
+from openai import AsyncOpenAI
+from pydantic import BaseModel, ConfigDict
+
 from llama_stack.core.request_headers import NeedsRequestProviderData
 from llama_stack.log import get_logger
 from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig
diff --git a/src/llama_stack/providers/utils/inference/prompt_adapter.py b/src/llama_stack/providers/utils/inference/prompt_adapter.py
index 35a7b3484..ea01a34e9 100644
--- a/src/llama_stack/providers/utils/inference/prompt_adapter.py
+++ b/src/llama_stack/providers/utils/inference/prompt_adapter.py
@@ -12,16 +12,11 @@ import re
 from typing import Any
 
 import httpx
-from PIL import Image as PIL_Image
-
-from llama_stack.apis.common.content_types import (
+from llama_stack_api import (
+    CompletionRequest,
     ImageContentItem,
     InterleavedContent,
     InterleavedContentItem,
-    TextContentItem,
-)
-from llama_stack.apis.inference import (
-    CompletionRequest,
     OpenAIAssistantMessageParam,
     OpenAIChatCompletionContentPartImageParam,
     OpenAIChatCompletionContentPartTextParam,
@@ -32,8 +27,11 @@ from llama_stack.apis.inference import (
     OpenAIUserMessageParam,
     ResponseFormat,
     ResponseFormatType,
+    TextContentItem,
     ToolChoice,
 )
+from PIL import Image as PIL_Image
+
 from llama_stack.log import get_logger
 from llama_stack.models.llama.datatypes import (
     RawContent,
diff --git a/src/llama_stack/providers/utils/kvstore/sqlite/config.py b/src/llama_stack/providers/utils/kvstore/sqlite/config.py
index 6a8b0a7cf..895268a4f 100644
--- a/src/llama_stack/providers/utils/kvstore/sqlite/config.py
+++ b/src/llama_stack/providers/utils/kvstore/sqlite/config.py
@@ -4,10 +4,9 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
+from llama_stack_api import json_schema_type
 from pydantic import BaseModel, Field
 
-from llama_stack.schema_utils import json_schema_type
-
 
 @json_schema_type
 class SqliteControlPlaneConfig(BaseModel):
diff --git a/src/llama_stack/providers/utils/memory/file_utils.py b/src/llama_stack/providers/utils/memory/file_utils.py
index 4c40056f3..6786293c6 100644
--- a/src/llama_stack/providers/utils/memory/file_utils.py
+++ b/src/llama_stack/providers/utils/memory/file_utils.py
@@ -8,7 +8,7 @@ import base64
 import mimetypes
 import os
 
-from llama_stack.apis.common.content_types import URL
+from llama_stack_api import URL
 
 
 def data_url_from_file(file_path: str) -> URL:
diff --git a/src/llama_stack/providers/utils/memory/openai_vector_store_mixin.py b/src/llama_stack/providers/utils/memory/openai_vector_store_mixin.py
index 853245598..68d1c11e5 100644
--- a/src/llama_stack/providers/utils/memory/openai_vector_store_mixin.py
+++ b/src/llama_stack/providers/utils/memory/openai_vector_store_mixin.py
@@ -13,16 +13,15 @@ from abc import ABC, abstractmethod
 from typing import Annotated, Any
 
 from fastapi import Body
-from pydantic import TypeAdapter
-
-from llama_stack.apis.common.errors import VectorStoreNotFoundError
-from llama_stack.apis.files import Files, OpenAIFileObject
-from llama_stack.apis.vector_io import (
+from llama_stack_api import (
     Chunk,
+    Files,
     OpenAICreateVectorStoreFileBatchRequestWithExtraBody,
     OpenAICreateVectorStoreRequestWithExtraBody,
+    OpenAIFileObject,
     QueryChunksResponse,
     SearchRankingOptions,
+    VectorStore,
     VectorStoreChunkingStrategy,
     VectorStoreChunkingStrategyAuto,
     VectorStoreChunkingStrategyStatic,
@@ -39,11 +38,13 @@ from llama_stack.apis.vector_io import (
     VectorStoreFileStatus,
     VectorStoreListFilesResponse,
     VectorStoreListResponse,
+    VectorStoreNotFoundError,
     VectorStoreObject,
     VectorStoreSearchResponse,
     VectorStoreSearchResponsePage,
 )
-from llama_stack.apis.vector_stores import VectorStore
+from pydantic import TypeAdapter
+
 from llama_stack.core.id_generation import generate_object_id
 from llama_stack.log import get_logger
 from llama_stack.providers.utils.kvstore.api import KVStore
diff --git a/src/llama_stack/providers/utils/memory/vector_store.py b/src/llama_stack/providers/utils/memory/vector_store.py
index 99f875227..37ac79039 100644
--- a/src/llama_stack/providers/utils/memory/vector_store.py
+++ b/src/llama_stack/providers/utils/memory/vector_store.py
@@ -14,20 +14,22 @@ from urllib.parse import unquote
 
 import httpx
 import numpy as np
+from llama_stack_api import (
+    URL,
+    Api,
+    Chunk,
+    ChunkMetadata,
+    InterleavedContent,
+    OpenAIEmbeddingsRequestWithExtraBody,
+    QueryChunksResponse,
+    RAGDocument,
+    VectorStore,
+)
 from numpy.typing import NDArray
 from pydantic import BaseModel
 
-from llama_stack.apis.common.content_types import (
-    URL,
-    InterleavedContent,
-)
-from llama_stack.apis.inference import OpenAIEmbeddingsRequestWithExtraBody
-from llama_stack.apis.tools import RAGDocument
-from llama_stack.apis.vector_io import Chunk, ChunkMetadata, QueryChunksResponse
-from llama_stack.apis.vector_stores import VectorStore
 from llama_stack.log import get_logger
 from llama_stack.models.llama.llama3.tokenizer import Tokenizer
-from llama_stack.providers.datatypes import Api
 from llama_stack.providers.utils.inference.prompt_adapter import (
     interleaved_content_as_str,
 )
diff --git a/src/llama_stack/providers/utils/pagination.py b/src/llama_stack/providers/utils/pagination.py
index 033022491..d1d9e36c5 100644
--- a/src/llama_stack/providers/utils/pagination.py
+++ b/src/llama_stack/providers/utils/pagination.py
@@ -6,7 +6,7 @@
 
 from typing import Any
 
-from llama_stack.apis.common.responses import PaginatedResponse
+from llama_stack_api import PaginatedResponse
 
 
 def paginate_records(
diff --git a/src/llama_stack/providers/utils/responses/responses_store.py b/src/llama_stack/providers/utils/responses/responses_store.py
index fdca8ddee..c7dfed15a 100644
--- a/src/llama_stack/providers/utils/responses/responses_store.py
+++ b/src/llama_stack/providers/utils/responses/responses_store.py
@@ -4,18 +4,17 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-from llama_stack.apis.agents import (
-    Order,
-)
-from llama_stack.apis.agents.openai_responses import (
+from llama_stack_api import (
     ListOpenAIResponseInputItem,
     ListOpenAIResponseObject,
     OpenAIDeleteResponseObject,
+    OpenAIMessageParam,
     OpenAIResponseInput,
     OpenAIResponseObject,
     OpenAIResponseObjectWithInput,
+    Order,
 )
-from llama_stack.apis.inference import OpenAIMessageParam
+
 from llama_stack.core.datatypes import AccessRule
 from llama_stack.core.storage.datatypes import ResponsesStoreReference, SqlStoreReference
 from llama_stack.log import get_logger
diff --git a/src/llama_stack/providers/utils/scoring/aggregation_utils.py b/src/llama_stack/providers/utils/scoring/aggregation_utils.py
index cff9a112f..aa6fe7248 100644
--- a/src/llama_stack/providers/utils/scoring/aggregation_utils.py
+++ b/src/llama_stack/providers/utils/scoring/aggregation_utils.py
@@ -6,8 +6,7 @@
 import statistics
 from typing import Any
 
-from llama_stack.apis.scoring import ScoringResultRow
-from llama_stack.apis.scoring_functions import AggregationFunctionType
+from llama_stack_api import AggregationFunctionType, ScoringResultRow
 
 
 def aggregate_accuracy(scoring_results: list[ScoringResultRow]) -> dict[str, Any]:
diff --git a/src/llama_stack/providers/utils/scoring/base_scoring_fn.py b/src/llama_stack/providers/utils/scoring/base_scoring_fn.py
index 2fae177b7..d16c75263 100644
--- a/src/llama_stack/providers/utils/scoring/base_scoring_fn.py
+++ b/src/llama_stack/providers/utils/scoring/base_scoring_fn.py
@@ -6,8 +6,8 @@
 from abc import ABC, abstractmethod
 from typing import Any
 
-from llama_stack.apis.scoring import ScoringFnParams, ScoringResultRow
-from llama_stack.apis.scoring_functions import ScoringFn
+from llama_stack_api import ScoringFn, ScoringFnParams, ScoringResultRow
+
 from llama_stack.providers.utils.scoring.aggregation_utils import aggregate_metrics
 
 
diff --git a/src/llama_stack/providers/utils/sqlstore/api.py b/src/llama_stack/providers/utils/sqlstore/api.py
index bcd224234..033a00edc 100644
--- a/src/llama_stack/providers/utils/sqlstore/api.py
+++ b/src/llama_stack/providers/utils/sqlstore/api.py
@@ -8,10 +8,9 @@ from collections.abc import Mapping, Sequence
 from enum import Enum
 from typing import Any, Literal, Protocol
 
+from llama_stack_api import PaginatedResponse
 from pydantic import BaseModel
 
-from llama_stack.apis.common.responses import PaginatedResponse
-
 
 class ColumnType(Enum):
     INTEGER = "INTEGER"
diff --git a/src/llama_stack/providers/utils/sqlstore/sqlalchemy_sqlstore.py b/src/llama_stack/providers/utils/sqlstore/sqlalchemy_sqlstore.py
index cfc3131f4..263f5e69f 100644
--- a/src/llama_stack/providers/utils/sqlstore/sqlalchemy_sqlstore.py
+++ b/src/llama_stack/providers/utils/sqlstore/sqlalchemy_sqlstore.py
@@ -6,6 +6,7 @@
 from collections.abc import Mapping, Sequence
 from typing import Any, Literal, cast
 
+from llama_stack_api import PaginatedResponse
 from sqlalchemy import (
     JSON,
     Boolean,
@@ -26,7 +27,6 @@ from sqlalchemy.ext.asyncio import async_sessionmaker, create_async_engine
 from sqlalchemy.ext.asyncio.engine import AsyncEngine
 from sqlalchemy.sql.elements import ColumnElement
 
-from llama_stack.apis.common.responses import PaginatedResponse
 from llama_stack.core.storage.datatypes import SqlAlchemySqlStoreConfig
 from llama_stack.log import get_logger
 
diff --git a/src/llama_stack/providers/utils/tools/mcp.py b/src/llama_stack/providers/utils/tools/mcp.py
index a271cb959..82c85f46c 100644
--- a/src/llama_stack/providers/utils/tools/mcp.py
+++ b/src/llama_stack/providers/utils/tools/mcp.py
@@ -10,17 +10,20 @@ from enum import Enum
 from typing import Any, cast
 
 import httpx
+from llama_stack_api import (
+    ImageContentItem,
+    InterleavedContentItem,
+    ListToolDefsResponse,
+    TextContentItem,
+    ToolDef,
+    ToolInvocationResult,
+    _URLOrData,
+)
 from mcp import ClientSession, McpError
 from mcp import types as mcp_types
 from mcp.client.sse import sse_client
 from mcp.client.streamable_http import streamablehttp_client
 
-from llama_stack.apis.common.content_types import ImageContentItem, InterleavedContentItem, TextContentItem, _URLOrData
-from llama_stack.apis.tools import (
-    ListToolDefsResponse,
-    ToolDef,
-    ToolInvocationResult,
-)
 from llama_stack.core.datatypes import AuthenticationRequiredError
 from llama_stack.log import get_logger
 from llama_stack.providers.utils.tools.ttl_dict import TTLDict
diff --git a/tests/external/llama-stack-api-weather/src/llama_stack_api_weather/weather.py b/tests/external/llama-stack-api-weather/src/llama_stack_api_weather/weather.py
index e97a9d8fb..9c399b7bf 100644
--- a/tests/external/llama-stack-api-weather/src/llama_stack_api_weather/weather.py
+++ b/tests/external/llama-stack-api-weather/src/llama_stack_api_weather/weather.py
@@ -6,9 +6,7 @@
 
 from typing import Protocol
 
-from llama_stack.apis.version import LLAMA_STACK_API_V1
-from llama_stack.providers.datatypes import Api, ProviderSpec, RemoteProviderSpec
-from llama_stack.schema_utils import webmethod
+from llama_stack_api import LLAMA_STACK_API_V1, Api, ProviderSpec, RemoteProviderSpec, webmethod
 
 
 def available_providers() -> list[ProviderSpec]:
diff --git a/tests/integration/batches/conftest.py b/tests/integration/batches/conftest.py
index 3ab8df3d9..b9c0ac916 100644
--- a/tests/integration/batches/conftest.py
+++ b/tests/integration/batches/conftest.py
@@ -13,8 +13,7 @@ from contextlib import contextmanager
 from io import BytesIO
 
 import pytest
-
-from llama_stack.apis.files import OpenAIFilePurpose
+from llama_stack_api import OpenAIFilePurpose
 
 
 class BatchHelper:
diff --git a/tests/integration/files/test_files.py b/tests/integration/files/test_files.py
index d9e8dd501..61878ac4c 100644
--- a/tests/integration/files/test_files.py
+++ b/tests/integration/files/test_files.py
@@ -9,8 +9,8 @@ from unittest.mock import patch
 
 import pytest
 import requests
+from llama_stack_api import OpenAIFilePurpose
 
-from llama_stack.apis.files import OpenAIFilePurpose
 from llama_stack.core.datatypes import User
 
 purpose = OpenAIFilePurpose.ASSISTANTS
diff --git a/tests/integration/inference/test_provider_data_routing.py b/tests/integration/inference/test_provider_data_routing.py
index 99aa75395..d007b57d6 100644
--- a/tests/integration/inference/test_provider_data_routing.py
+++ b/tests/integration/inference/test_provider_data_routing.py
@@ -15,14 +15,14 @@ that enables routing based on provider_data alone.
 from unittest.mock import AsyncMock, patch
 
 import pytest
-
-from llama_stack.apis.datatypes import Api
-from llama_stack.apis.inference.inference import (
+from llama_stack_api import (
+    Api,
     OpenAIAssistantMessageParam,
     OpenAIChatCompletion,
     OpenAIChatCompletionUsage,
     OpenAIChoice,
 )
+
 from llama_stack.core.library_client import LlamaStackAsLibraryClient
 from llama_stack.core.telemetry.telemetry import MetricEvent
 
diff --git a/tests/integration/post_training/test_post_training.py b/tests/integration/post_training/test_post_training.py
index b5be71c7c..ff6925b58 100644
--- a/tests/integration/post_training/test_post_training.py
+++ b/tests/integration/post_training/test_post_training.py
@@ -9,8 +9,7 @@ import time
 import uuid
 
 import pytest
-
-from llama_stack.apis.post_training import (
+from llama_stack_api import (
     DataConfig,
     DatasetFormat,
     DPOAlignmentConfig,
@@ -18,6 +17,7 @@ from llama_stack.apis.post_training import (
     LoraFinetuningConfig,
     TrainingConfig,
 )
+
 from llama_stack.log import get_logger
 
 # Configure logging
diff --git a/tests/integration/responses/recordings/42c357284497af596ae6c9341b0c189daa31e88b25d0381a985f24203b7a5a38.json b/tests/integration/responses/recordings/42c357284497af596ae6c9341b0c189daa31e88b25d0381a985f24203b7a5a38.json
index 7ec2ac931..4e80e1cdd 100644
--- a/tests/integration/responses/recordings/42c357284497af596ae6c9341b0c189daa31e88b25d0381a985f24203b7a5a38.json
+++ b/tests/integration/responses/recordings/42c357284497af596ae6c9341b0c189daa31e88b25d0381a985f24203b7a5a38.json
@@ -10,7 +10,7 @@
   },
   "response": {
     "body": {
-      "__type__": "llama_stack.apis.tools.tools.ToolInvocationResult",
+      "__type__": "llama_stack_api.tools.ToolInvocationResult",
       "__data__": {
         "content": "{\"query\": \"Llama 4 Maverick model experts\", \"top_k\": [{\"url\": \"https://console.groq.com/docs/model/meta-llama/llama-4-maverick-17b-128e-instruct\", \"title\": \"Llama 4 Maverick 17B 128E\", \"content\": \"Llama 4 Maverick is Meta's natively multimodal model that enables text and image understanding. With a 17 billion parameter mixture-of-experts architecture (128 experts), this model offers industry-leading performance for multimodal tasks like natural assistant-like chat, image recognition, and coding tasks. Llama 4 Maverick features an auto-regressive language model that uses a mixture-of-experts (MoE) architecture with 17B activated parameters (400B total) and incorporates early fusion for native multimodality. The model uses 128 experts to efficiently handle both text and image inputs while maintaining high performance across chat, knowledge, and code generation tasks, with a knowledge cutoff of August 2024. * For multimodal applications, this model supports up to 5 image inputs create(  model =\\\"meta-llama/llama-4-maverick-17b-128e-instruct\\\",   messages =[  {  \\\"role\\\":  \\\"user\\\",   \\\"content\\\":  \\\"Explain why fast inference is critical for reasoning models\\\"   }   ]  )  print(completion.\", \"score\": 0.9170729, \"raw_content\": null}, {\"url\": \"https://huggingface.co/meta-llama/Llama-4-Maverick-17B-128E\", \"title\": \"meta-llama/Llama-4-Maverick-17B-128E - Hugging Face\", \"content\": \"Model Architecture: The Llama 4 models are auto-regressive language models that use a mixture-of-experts (MoE) architecture and incorporate\", \"score\": 0.8021998, \"raw_content\": null}, {\"url\": \"https://www.ibm.com/new/announcements/meta-llama-4-maverick-and-llama-4-scout-now-available-in-watsonx-ai\", \"title\": \"Meta Llama 4 Maverick and Llama 4 Scout now available in watsonx ...\", \"content\": \"# Meta Llama 4 Maverick and Llama 4 Scout now available in watsonx.ai **IBM is excited to announce the addition of Meta\\u2019s latest generation of open models, Llama 4, to** **watsonx.ai****.** Llama 4 Scout and Llama 4 Maverick, the first mixture of experts (MoE) models released by Meta, provide frontier multimodal performance, high speeds, low cost, and industry leading context length. With the introduction of these latest offerings from Meta, IBM now supports a total of 13 Meta models in the expansive library of \\u00a0foundation models available in watsonx.ai. Trained on 40 trillion tokens of data, Llama 4 Scout offers performance rivalling or exceeding that of models with significantly larger active parameter counts while keeping costs and latency low. ## Llama 4 models on IBM watsonx\", \"score\": 0.78194773, \"raw_content\": null}, {\"url\": \"https://medium.com/@divyanshbhatiajm19/metas-llama-4-family-the-complete-guide-to-scout-maverick-and-behemoth-ai-models-in-2025-21a90c882e8a\", \"title\": \"Meta's Llama 4 Family: The Complete Guide to Scout, Maverick, and ...\", \"content\": \"# Meta\\u2019s Llama 4 Family: The Complete Guide to Scout, Maverick, and Behemoth AI Models in 2025 Feature Llama 4 Scout Llama 4 Maverick Llama 4 Behemoth **Total Parameters** 109B 400B ~2T **Active Parameters** 17B 17B 288B **Expert Count** 16 128 16 **Context Window** 10M tokens 1M tokens Not specified **Hardware Requirements** Single H100 GPU Single H100 DGX host Multiple GPUs **Inference Cost** Not specified $0.19-$0.49 per 1M tokens Not specified **Release Status** Available now Available now In training **Primary Use Cases** Long-context analysis, code processing High-performance multimodal applications Research, STEM reasoning The Llama 4 family represents Meta\\u2019s most significant AI development to date, with each model offering distinct advantages for different use cases:\", \"score\": 0.69672287, \"raw_content\": null}, {\"url\": \"https://www.llama.com/models/llama-4/\", \"title\": \"Unmatched Performance and Efficiency | Llama 4\", \"content\": \"# Llama 4 # Llama 4 Llama 4 Scout Class-leading natively multimodal model that offers superior text and visual intelligence, single H100 GPU efficiency, and a 10M context window for seamless long document analysis. Llama 4 MaverickIndustry-leading natively multimodal model for image and text understanding with groundbreaking intelligence and fast responses at a low cost. We evaluated model performance on a suite of common benchmarks across a wide range of languages, testing for coding, reasoning, knowledge, vision understanding, multilinguality, and long context. 4. Specialized long context evals are not traditionally reported for generalist models, so we share internal runs to showcase llama's frontier performance. 4. Specialized long context evals are not traditionally reported for generalist models, so we share internal runs to showcase llama's frontier performance.\", \"score\": 0.629889, \"raw_content\": null}]}",
         "error_message": null,
diff --git a/tests/integration/responses/recordings/54aa690e31b5c33a0488a5d7403393e5712917253462292829b37b9320d6df82.json b/tests/integration/responses/recordings/54aa690e31b5c33a0488a5d7403393e5712917253462292829b37b9320d6df82.json
index a6c31dc72..a8e1e8611 100644
--- a/tests/integration/responses/recordings/54aa690e31b5c33a0488a5d7403393e5712917253462292829b37b9320d6df82.json
+++ b/tests/integration/responses/recordings/54aa690e31b5c33a0488a5d7403393e5712917253462292829b37b9320d6df82.json
@@ -10,7 +10,7 @@
   },
   "response": {
     "body": {
-      "__type__": "llama_stack.apis.tools.tools.ToolInvocationResult",
+      "__type__": "llama_stack_api.tools.ToolInvocationResult",
       "__data__": {
         "content": "{\"query\": \"Llama 4 Maverick model number of experts\", \"top_k\": [{\"url\": \"https://console.groq.com/docs/model/meta-llama/llama-4-maverick-17b-128e-instruct\", \"title\": \"Llama 4 Maverick 17B 128E\", \"content\": \"Llama 4 Maverick is Meta's natively multimodal model that enables text and image understanding. With a 17 billion parameter mixture-of-experts architecture (128 experts), this model offers industry-leading performance for multimodal tasks like natural assistant-like chat, image recognition, and coding tasks. Llama 4 Maverick features an auto-regressive language model that uses a mixture-of-experts (MoE) architecture with 17B activated parameters (400B total) and incorporates early fusion for native multimodality. The model uses 128 experts to efficiently handle both text and image inputs while maintaining high performance across chat, knowledge, and code generation tasks, with a knowledge cutoff of August 2024. * For multimodal applications, this model supports up to 5 image inputs create(  model =\\\"meta-llama/llama-4-maverick-17b-128e-instruct\\\",   messages =[  {  \\\"role\\\":  \\\"user\\\",   \\\"content\\\":  \\\"Explain why fast inference is critical for reasoning models\\\"   }   ]  )  print(completion.\", \"score\": 0.9287263, \"raw_content\": null}, {\"url\": \"https://huggingface.co/meta-llama/Llama-4-Maverick-17B-128E\", \"title\": \"meta-llama/Llama-4-Maverick-17B-128E\", \"content\": \"... model with 16 experts, and Llama 4 Maverick, a 17 billion parameter model with 128 experts. Model developer: Meta. Model Architecture: The\", \"score\": 0.9183121, \"raw_content\": null}, {\"url\": \"https://build.nvidia.com/meta/llama-4-maverick-17b-128e-instruct/modelcard\", \"title\": \"llama-4-maverick-17b-128e-instruct Model by Meta\", \"content\": \"... model with 16 experts, and Llama 4 Maverick, a 17 billion parameter model with 128 experts. Third-Party Community Consideration. This model\", \"score\": 0.91399205, \"raw_content\": null}, {\"url\": \"https://replicate.com/meta/llama-4-maverick-instruct\", \"title\": \"meta/llama-4-maverick-instruct | Run with an API on ...\", \"content\": \"... model with 16 experts, and Llama 4 Maverick, a 17 billion parameter model with 128 experts. All services are online \\u00b7 Home \\u00b7 About \\u00b7 Changelog\", \"score\": 0.9073207, \"raw_content\": null}, {\"url\": \"https://openrouter.ai/meta-llama/llama-4-maverick\", \"title\": \"Llama 4 Maverick - API, Providers, Stats\", \"content\": \"# Meta: Llama 4 Maverick ### meta-llama/llama-4-maverick Llama 4 Maverick 17B Instruct (128E) is a high-capacity multimodal language model from Meta, built on a mixture-of-experts (MoE) architecture with 128 experts and 17 billion active parameters per forward pass (400B total). Released on April 5, 2025 under the Llama 4 Community License, Maverick is suited for research and commercial applications requiring advanced multimodal understanding and high model throughput. Llama 4 Maverick - API, Providers, Stats | OpenRouter ## Providers for Llama 4 Maverick ## Performance for Llama 4 Maverick ## Apps using Llama 4 Maverick ## Recent activity on Llama 4 Maverick ## Uptime stats for Llama 4 Maverick ## Sample code and API for Llama 4 Maverick\", \"score\": 0.8958969, \"raw_content\": null}]}",
         "error_message": null,
diff --git a/tests/integration/responses/recordings/77ad6e42c34823ac51a784cfe4fa0ee18d09bd413189a7c03b24bf3871e3d8d7.json b/tests/integration/responses/recordings/77ad6e42c34823ac51a784cfe4fa0ee18d09bd413189a7c03b24bf3871e3d8d7.json
index b92c67940..dd7884012 100644
--- a/tests/integration/responses/recordings/77ad6e42c34823ac51a784cfe4fa0ee18d09bd413189a7c03b24bf3871e3d8d7.json
+++ b/tests/integration/responses/recordings/77ad6e42c34823ac51a784cfe4fa0ee18d09bd413189a7c03b24bf3871e3d8d7.json
@@ -10,7 +10,7 @@
   },
   "response": {
     "body": {
-      "__type__": "llama_stack.apis.tools.tools.ToolInvocationResult",
+      "__type__": "llama_stack_api.tools.ToolInvocationResult",
       "__data__": {
         "content": "{\"query\": \"latest version of Python\", \"top_k\": [{\"url\": \"https://www.liquidweb.com/blog/latest-python-version/\", \"title\": \"The latest Python version: Python 3.14 - Liquid Web\", \"content\": \"The latest major version, Python 3.14 was officially released on October 7, 2025. Let's explore the key features of Python's current version, how to download\", \"score\": 0.890761, \"raw_content\": null}, {\"url\": \"https://docs.python.org/3/whatsnew/3.14.html\", \"title\": \"What's new in Python 3.14 \\u2014 Python 3.14.0 documentation\", \"content\": \"Python 3.14 is the latest stable release of the Python programming language, with a mix of changes to the language, the implementation, and the standard\", \"score\": 0.8124067, \"raw_content\": null}, {\"url\": \"https://devguide.python.org/versions/\", \"title\": \"Status of Python versions - Python Developer's Guide\", \"content\": \"The main branch is currently the future Python 3.15, and is the only branch that accepts new features. The latest release for each Python version can be found\", \"score\": 0.80089486, \"raw_content\": null}, {\"url\": \"https://www.python.org/doc/versions/\", \"title\": \"Python documentation by version\", \"content\": \"Python 3.12.4, documentation released on 6 June 2024. Python 3.12.3, documentation released on 9 April 2024. Python 3.12.2, documentation released on 6 February\", \"score\": 0.74563974, \"raw_content\": null}, {\"url\": \"https://www.python.org/downloads/\", \"title\": \"Download Python | Python.org\", \"content\": \"Active Python Releases \\u00b7 3.15 pre-release 2026-10-07 (planned) 2031-10 PEP 790 \\u00b7 3.14 bugfix 2025-10-07 2030-10 PEP 745 \\u00b7 3.13 bugfix 2024-10-07 2029-10 PEP 719\", \"score\": 0.6551821, \"raw_content\": null}]}",
         "error_message": null,
diff --git a/tests/integration/safety/test_llama_guard.py b/tests/integration/safety/test_llama_guard.py
index 5a73bb044..99b4982f0 100644
--- a/tests/integration/safety/test_llama_guard.py
+++ b/tests/integration/safety/test_llama_guard.py
@@ -12,8 +12,8 @@ import warnings
 from collections.abc import Generator
 
 import pytest
+from llama_stack_api import ViolationLevel
 
-from llama_stack.apis.safety import ViolationLevel
 from llama_stack.models.llama.sku_types import CoreModelId
 
 # Llama Guard models available for text and vision shields
diff --git a/tests/integration/safety/test_safety.py b/tests/integration/safety/test_safety.py
index 6337abc9c..6a926f1d5 100644
--- a/tests/integration/safety/test_safety.py
+++ b/tests/integration/safety/test_safety.py
@@ -7,8 +7,7 @@ import base64
 import mimetypes
 
 import pytest
-
-from llama_stack.apis.safety import ViolationLevel
+from llama_stack_api import ViolationLevel
 
 CODE_SCANNER_ENABLED_PROVIDERS = {"ollama", "together", "fireworks"}
 
diff --git a/tests/integration/safety/test_vision_safety.py b/tests/integration/safety/test_vision_safety.py
index 7b3779e9e..b85a23263 100644
--- a/tests/integration/safety/test_vision_safety.py
+++ b/tests/integration/safety/test_vision_safety.py
@@ -9,8 +9,7 @@ import mimetypes
 import os
 
 import pytest
-
-from llama_stack.apis.safety import ViolationLevel
+from llama_stack_api import ViolationLevel
 
 VISION_SHIELD_ENABLED_PROVIDERS = {"together"}
 
diff --git a/tests/integration/tool_runtime/test_registration.py b/tests/integration/tool_runtime/test_registration.py
index 4d532ed87..1b1b6ef28 100644
--- a/tests/integration/tool_runtime/test_registration.py
+++ b/tests/integration/tool_runtime/test_registration.py
@@ -7,8 +7,8 @@
 import re
 
 import pytest
+from llama_stack_api import ToolGroupNotFoundError
 
-from llama_stack.apis.common.errors import ToolGroupNotFoundError
 from llama_stack.core.library_client import LlamaStackAsLibraryClient
 from tests.common.mcp import MCP_TOOLGROUP_ID, make_mcp_server
 
diff --git a/tests/integration/vector_io/test_openai_vector_stores.py b/tests/integration/vector_io/test_openai_vector_stores.py
index 1043d4903..c65dfecac 100644
--- a/tests/integration/vector_io/test_openai_vector_stores.py
+++ b/tests/integration/vector_io/test_openai_vector_stores.py
@@ -8,11 +8,10 @@ import time
 from io import BytesIO
 
 import pytest
+from llama_stack_api import Chunk, ExpiresAfter
 from llama_stack_client import BadRequestError
 from openai import BadRequestError as OpenAIBadRequestError
 
-from llama_stack.apis.files import ExpiresAfter
-from llama_stack.apis.vector_io import Chunk
 from llama_stack.core.library_client import LlamaStackAsLibraryClient
 from llama_stack.log import get_logger
 
@@ -646,7 +645,7 @@ def test_openai_vector_store_attach_file(
 ):
     """Test OpenAI vector store attach file."""
     skip_if_provider_doesnt_support_openai_vector_stores(client_with_models)
-    from llama_stack.apis.files import ExpiresAfter
+    from llama_stack_api import ExpiresAfter
 
     compat_client = compat_client_with_empty_stores
 
@@ -710,7 +709,7 @@ def test_openai_vector_store_attach_files_on_creation(
     skip_if_provider_doesnt_support_openai_vector_stores(client_with_models)
 
     compat_client = compat_client_with_empty_stores
-    from llama_stack.apis.files import ExpiresAfter
+    from llama_stack_api import ExpiresAfter
 
     # Create some files and attach them to the vector store
     valid_file_ids = []
@@ -775,7 +774,7 @@ def test_openai_vector_store_list_files(
     skip_if_provider_doesnt_support_openai_vector_stores(client_with_models)
 
     compat_client = compat_client_with_empty_stores
-    from llama_stack.apis.files import ExpiresAfter
+    from llama_stack_api import ExpiresAfter
 
     # Create a vector store
     vector_store = compat_client.vector_stores.create(
@@ -867,7 +866,7 @@ def test_openai_vector_store_retrieve_file_contents(
     skip_if_provider_doesnt_support_openai_vector_stores(client_with_models)
 
     compat_client = compat_client_with_empty_stores
-    from llama_stack.apis.files import ExpiresAfter
+    from llama_stack_api import ExpiresAfter
 
     # Create a vector store
     vector_store = compat_client.vector_stores.create(
@@ -928,7 +927,7 @@ def test_openai_vector_store_delete_file(
     skip_if_provider_doesnt_support_openai_vector_stores(client_with_models)
 
     compat_client = compat_client_with_empty_stores
-    from llama_stack.apis.files import ExpiresAfter
+    from llama_stack_api import ExpiresAfter
 
     # Create a vector store
     vector_store = compat_client.vector_stores.create(
@@ -994,7 +993,7 @@ def test_openai_vector_store_delete_file_removes_from_vector_store(
     skip_if_provider_doesnt_support_openai_vector_stores(client_with_models)
 
     compat_client = compat_client_with_empty_stores
-    from llama_stack.apis.files import ExpiresAfter
+    from llama_stack_api import ExpiresAfter
 
     # Create a vector store
     vector_store = compat_client.vector_stores.create(
@@ -1046,7 +1045,7 @@ def test_openai_vector_store_update_file(
     skip_if_provider_doesnt_support_openai_vector_stores(client_with_models)
 
     compat_client = compat_client_with_empty_stores
-    from llama_stack.apis.files import ExpiresAfter
+    from llama_stack_api import ExpiresAfter
 
     # Create a vector store
     vector_store = compat_client.vector_stores.create(
@@ -1103,7 +1102,7 @@ def test_create_vector_store_files_duplicate_vector_store_name(
     This test confirms that client.vector_stores.create() creates a unique ID
     """
     skip_if_provider_doesnt_support_openai_vector_stores(client_with_models)
-    from llama_stack.apis.files import ExpiresAfter
+    from llama_stack_api import ExpiresAfter
 
     compat_client = compat_client_with_empty_stores
 
diff --git a/tests/integration/vector_io/test_vector_io.py b/tests/integration/vector_io/test_vector_io.py
index 1b2099069..acaa44bcb 100644
--- a/tests/integration/vector_io/test_vector_io.py
+++ b/tests/integration/vector_io/test_vector_io.py
@@ -5,8 +5,7 @@
 # the root directory of this source tree.
 
 import pytest
-
-from llama_stack.apis.vector_io import Chunk
+from llama_stack_api import Chunk
 
 from ..conftest import vector_provider_wrapper
 
diff --git a/tests/unit/conversations/test_api_models.py b/tests/unit/conversations/test_api_models.py
index 8416cba0b..f8576f076 100644
--- a/tests/unit/conversations/test_api_models.py
+++ b/tests/unit/conversations/test_api_models.py
@@ -5,11 +5,7 @@
 # the root directory of this source tree.
 
 
-from llama_stack.apis.conversations.conversations import (
-    Conversation,
-    ConversationItem,
-    ConversationItemList,
-)
+from llama_stack_api import Conversation, ConversationItem, ConversationItemList
 
 
 def test_conversation_model_defaults():
diff --git a/tests/unit/conversations/test_conversations.py b/tests/unit/conversations/test_conversations.py
index 3f0175831..2f942eb9c 100644
--- a/tests/unit/conversations/test_conversations.py
+++ b/tests/unit/conversations/test_conversations.py
@@ -8,14 +8,11 @@ import tempfile
 from pathlib import Path
 
 import pytest
+from llama_stack_api import OpenAIResponseInputMessageContentText, OpenAIResponseMessage
 from openai.types.conversations.conversation import Conversation as OpenAIConversation
 from openai.types.conversations.conversation_item import ConversationItem as OpenAIConversationItem
 from pydantic import TypeAdapter
 
-from llama_stack.apis.agents.openai_responses import (
-    OpenAIResponseInputMessageContentText,
-    OpenAIResponseMessage,
-)
 from llama_stack.core.conversations.conversations import (
     ConversationServiceConfig,
     ConversationServiceImpl,
diff --git a/tests/unit/core/routers/test_safety_router.py b/tests/unit/core/routers/test_safety_router.py
index bf195ff33..7e465513e 100644
--- a/tests/unit/core/routers/test_safety_router.py
+++ b/tests/unit/core/routers/test_safety_router.py
@@ -6,8 +6,8 @@
 
 from unittest.mock import AsyncMock
 
-from llama_stack.apis.safety.safety import ModerationObject, ModerationObjectResults
-from llama_stack.apis.shields import ListShieldsResponse, Shield
+from llama_stack_api import ListShieldsResponse, ModerationObject, ModerationObjectResults, Shield
+
 from llama_stack.core.datatypes import SafetyConfig
 from llama_stack.core.routers.safety import SafetyRouter
 
diff --git a/tests/unit/core/routers/test_vector_io.py b/tests/unit/core/routers/test_vector_io.py
index f9bd84a37..071fbe6e7 100644
--- a/tests/unit/core/routers/test_vector_io.py
+++ b/tests/unit/core/routers/test_vector_io.py
@@ -7,8 +7,8 @@
 from unittest.mock import AsyncMock, Mock
 
 import pytest
+from llama_stack_api import OpenAICreateVectorStoreRequestWithExtraBody
 
-from llama_stack.apis.vector_io import OpenAICreateVectorStoreRequestWithExtraBody
 from llama_stack.core.routers.vector_io import VectorIORouter
 
 
diff --git a/tests/unit/core/test_stack_validation.py b/tests/unit/core/test_stack_validation.py
index d28803006..acb31e1c9 100644
--- a/tests/unit/core/test_stack_validation.py
+++ b/tests/unit/core/test_stack_validation.py
@@ -9,12 +9,10 @@
 from unittest.mock import AsyncMock
 
 import pytest
+from llama_stack_api import Api, ListModelsResponse, ListShieldsResponse, Model, ModelType, Shield
 
-from llama_stack.apis.models import ListModelsResponse, Model, ModelType
-from llama_stack.apis.shields import ListShieldsResponse, Shield
 from llama_stack.core.datatypes import QualifiedModel, SafetyConfig, StackRunConfig, StorageConfig, VectorStoresConfig
 from llama_stack.core.stack import validate_safety_config, validate_vector_stores_config
-from llama_stack.providers.datatypes import Api
 
 
 class TestVectorStoresValidation:
diff --git a/tests/unit/distribution/routers/test_routing_tables.py b/tests/unit/distribution/routers/test_routing_tables.py
index 8c1838ba3..2405d536e 100644
--- a/tests/unit/distribution/routers/test_routing_tables.py
+++ b/tests/unit/distribution/routers/test_routing_tables.py
@@ -9,15 +9,22 @@
 from unittest.mock import AsyncMock
 
 import pytest
+from llama_stack_api import (
+    URL,
+    Api,
+    Dataset,
+    DatasetPurpose,
+    ListToolDefsResponse,
+    Model,
+    ModelNotFoundError,
+    ModelType,
+    NumberType,
+    Shield,
+    ToolDef,
+    ToolGroup,
+    URIDataSource,
+)
 
-from llama_stack.apis.common.content_types import URL
-from llama_stack.apis.common.errors import ModelNotFoundError
-from llama_stack.apis.common.type_system import NumberType
-from llama_stack.apis.datasets.datasets import Dataset, DatasetPurpose, URIDataSource
-from llama_stack.apis.datatypes import Api
-from llama_stack.apis.models import Model, ModelType
-from llama_stack.apis.shields.shields import Shield
-from llama_stack.apis.tools import ListToolDefsResponse, ToolDef, ToolGroup
 from llama_stack.core.datatypes import RegistryEntrySource
 from llama_stack.core.routing_tables.benchmarks import BenchmarksRoutingTable
 from llama_stack.core.routing_tables.datasets import DatasetsRoutingTable
diff --git a/tests/unit/distribution/test_api_recordings.py b/tests/unit/distribution/test_api_recordings.py
index 2b7ce5c4e..f66b57df8 100644
--- a/tests/unit/distribution/test_api_recordings.py
+++ b/tests/unit/distribution/test_api_recordings.py
@@ -9,10 +9,9 @@ from pathlib import Path
 from unittest.mock import patch
 
 import pytest
-from openai import AsyncOpenAI
 
 # Import the real Pydantic response types instead of using Mocks
-from llama_stack.apis.inference import (
+from llama_stack_api import (
     OpenAIAssistantMessageParam,
     OpenAIChatCompletion,
     OpenAIChoice,
@@ -20,6 +19,8 @@ from llama_stack.apis.inference import (
     OpenAIEmbeddingsResponse,
     OpenAIEmbeddingUsage,
 )
+from openai import AsyncOpenAI
+
 from llama_stack.testing.api_recorder import (
     APIRecordingMode,
     ResponseStorage,
diff --git a/tests/unit/distribution/test_distribution.py b/tests/unit/distribution/test_distribution.py
index 11f55cfdb..a27455e24 100644
--- a/tests/unit/distribution/test_distribution.py
+++ b/tests/unit/distribution/test_distribution.py
@@ -9,6 +9,7 @@ from unittest.mock import patch
 
 import pytest
 import yaml
+from llama_stack_api import ProviderSpec
 from pydantic import BaseModel, Field, ValidationError
 
 from llama_stack.core.datatypes import Api, Provider, StackRunConfig
@@ -22,7 +23,6 @@ from llama_stack.core.storage.datatypes import (
     SqlStoreReference,
     StorageConfig,
 )
-from llama_stack.providers.datatypes import ProviderSpec
 
 
 class SampleConfig(BaseModel):
@@ -312,7 +312,7 @@ pip_packages:
         """Test loading an external provider from a module (success path)."""
         from types import SimpleNamespace
 
-        from llama_stack.providers.datatypes import Api, ProviderSpec
+        from llama_stack_api import Api, ProviderSpec
 
         # Simulate a provider module with get_provider_spec
         fake_spec = ProviderSpec(
@@ -395,8 +395,9 @@ pip_packages:
 
     def test_external_provider_from_module_building(self, mock_providers):
         """Test loading an external provider from a module during build (building=True, partial spec)."""
+        from llama_stack_api import Api
+
         from llama_stack.core.datatypes import BuildConfig, BuildProvider, DistributionSpec
-        from llama_stack.providers.datatypes import Api
 
         # No importlib patch needed, should not import module when type of `config` is BuildConfig or DistributionSpec
         build_config = BuildConfig(
@@ -456,8 +457,9 @@ class TestGetExternalProvidersFromModule:
         """Test provider with module containing version spec (e.g., package==1.0.0)."""
         from types import SimpleNamespace
 
+        from llama_stack_api import ProviderSpec
+
         from llama_stack.core.distribution import get_external_providers_from_module
-        from llama_stack.providers.datatypes import ProviderSpec
 
         fake_spec = ProviderSpec(
             api=Api.inference,
@@ -593,8 +595,9 @@ class TestGetExternalProvidersFromModule:
         """Test when get_provider_spec returns a list of specs."""
         from types import SimpleNamespace
 
+        from llama_stack_api import ProviderSpec
+
         from llama_stack.core.distribution import get_external_providers_from_module
-        from llama_stack.providers.datatypes import ProviderSpec
 
         spec1 = ProviderSpec(
             api=Api.inference,
@@ -641,8 +644,9 @@ class TestGetExternalProvidersFromModule:
         """Test that list return filters specs by provider_type."""
         from types import SimpleNamespace
 
+        from llama_stack_api import ProviderSpec
+
         from llama_stack.core.distribution import get_external_providers_from_module
-        from llama_stack.providers.datatypes import ProviderSpec
 
         spec1 = ProviderSpec(
             api=Api.inference,
@@ -689,8 +693,9 @@ class TestGetExternalProvidersFromModule:
         """Test that list return adds multiple different provider_types when config requests them."""
         from types import SimpleNamespace
 
+        from llama_stack_api import ProviderSpec
+
         from llama_stack.core.distribution import get_external_providers_from_module
-        from llama_stack.providers.datatypes import ProviderSpec
 
         # Module returns both inline and remote variants
         spec1 = ProviderSpec(
@@ -828,8 +833,9 @@ class TestGetExternalProvidersFromModule:
         """Test multiple APIs with providers."""
         from types import SimpleNamespace
 
+        from llama_stack_api import ProviderSpec
+
         from llama_stack.core.distribution import get_external_providers_from_module
-        from llama_stack.providers.datatypes import ProviderSpec
 
         inference_spec = ProviderSpec(
             api=Api.inference,
diff --git a/tests/unit/files/test_files.py b/tests/unit/files/test_files.py
index 426e2cf64..080d1ddbe 100644
--- a/tests/unit/files/test_files.py
+++ b/tests/unit/files/test_files.py
@@ -6,10 +6,8 @@
 
 
 import pytest
+from llama_stack_api import OpenAIFilePurpose, Order, ResourceNotFoundError
 
-from llama_stack.apis.common.errors import ResourceNotFoundError
-from llama_stack.apis.common.responses import Order
-from llama_stack.apis.files import OpenAIFilePurpose
 from llama_stack.core.access_control.access_control import default_policy
 from llama_stack.core.storage.datatypes import SqliteSqlStoreConfig, SqlStoreReference
 from llama_stack.providers.inline.files.localfs import (
diff --git a/tests/unit/providers/batches/test_reference.py b/tests/unit/providers/batches/test_reference.py
index 89cb1af9d..3c93a578d 100644
--- a/tests/unit/providers/batches/test_reference.py
+++ b/tests/unit/providers/batches/test_reference.py
@@ -58,9 +58,7 @@ import json
 from unittest.mock import AsyncMock, MagicMock
 
 import pytest
-
-from llama_stack.apis.batches import BatchObject
-from llama_stack.apis.common.errors import ConflictError, ResourceNotFoundError
+from llama_stack_api import BatchObject, ConflictError, ResourceNotFoundError
 
 
 class TestReferenceBatchesImpl:
diff --git a/tests/unit/providers/batches/test_reference_idempotency.py b/tests/unit/providers/batches/test_reference_idempotency.py
index e6cb29b9b..4cd5d962d 100644
--- a/tests/unit/providers/batches/test_reference_idempotency.py
+++ b/tests/unit/providers/batches/test_reference_idempotency.py
@@ -43,8 +43,7 @@ Key Behaviors Tested:
 import asyncio
 
 import pytest
-
-from llama_stack.apis.common.errors import ConflictError
+from llama_stack_api import ConflictError
 
 
 class TestReferenceBatchesIdempotency:
diff --git a/tests/unit/providers/files/test_s3_files.py b/tests/unit/providers/files/test_s3_files.py
index 92a45a9f2..ae63c1a78 100644
--- a/tests/unit/providers/files/test_s3_files.py
+++ b/tests/unit/providers/files/test_s3_files.py
@@ -8,9 +8,7 @@ from unittest.mock import patch
 
 import pytest
 from botocore.exceptions import ClientError
-
-from llama_stack.apis.common.errors import ResourceNotFoundError
-from llama_stack.apis.files import OpenAIFilePurpose
+from llama_stack_api import OpenAIFilePurpose, ResourceNotFoundError
 
 
 class TestS3FilesImpl:
@@ -228,7 +226,7 @@ class TestS3FilesImpl:
 
             mock_now.return_value = 0
 
-            from llama_stack.apis.files import ExpiresAfter
+            from llama_stack_api import ExpiresAfter
 
             sample_text_file.filename = "test_expired_file"
             uploaded = await s3_provider.openai_upload_file(
@@ -260,7 +258,7 @@ class TestS3FilesImpl:
 
     async def test_unsupported_expires_after_anchor(self, s3_provider, sample_text_file):
         """Unsupported anchor value should raise ValueError."""
-        from llama_stack.apis.files import ExpiresAfter
+        from llama_stack_api import ExpiresAfter
 
         sample_text_file.filename = "test_unsupported_expires_after_anchor"
 
@@ -273,7 +271,7 @@ class TestS3FilesImpl:
 
     async def test_nonint_expires_after_seconds(self, s3_provider, sample_text_file):
         """Non-integer seconds in expires_after should raise ValueError."""
-        from llama_stack.apis.files import ExpiresAfter
+        from llama_stack_api import ExpiresAfter
 
         sample_text_file.filename = "test_nonint_expires_after_seconds"
 
@@ -286,7 +284,7 @@ class TestS3FilesImpl:
 
     async def test_expires_after_seconds_out_of_bounds(self, s3_provider, sample_text_file):
         """Seconds outside allowed range should raise ValueError."""
-        from llama_stack.apis.files import ExpiresAfter
+        from llama_stack_api import ExpiresAfter
 
         with pytest.raises(ValueError, match="greater than or equal to 3600"):
             await s3_provider.openai_upload_file(
diff --git a/tests/unit/providers/files/test_s3_files_auth.py b/tests/unit/providers/files/test_s3_files_auth.py
index 6097f2808..873db4e27 100644
--- a/tests/unit/providers/files/test_s3_files_auth.py
+++ b/tests/unit/providers/files/test_s3_files_auth.py
@@ -7,9 +7,8 @@
 from unittest.mock import patch
 
 import pytest
+from llama_stack_api import OpenAIFilePurpose, ResourceNotFoundError
 
-from llama_stack.apis.common.errors import ResourceNotFoundError
-from llama_stack.apis.files import OpenAIFilePurpose
 from llama_stack.core.datatypes import User
 from llama_stack.providers.remote.files.s3.files import S3FilesImpl
 
diff --git a/tests/unit/providers/inference/test_bedrock_adapter.py b/tests/unit/providers/inference/test_bedrock_adapter.py
index fdd07c032..b3eecc558 100644
--- a/tests/unit/providers/inference/test_bedrock_adapter.py
+++ b/tests/unit/providers/inference/test_bedrock_adapter.py
@@ -8,9 +8,9 @@ from types import SimpleNamespace
 from unittest.mock import AsyncMock, MagicMock
 
 import pytest
+from llama_stack_api import OpenAIChatCompletionRequestWithExtraBody
 from openai import AuthenticationError
 
-from llama_stack.apis.inference import OpenAIChatCompletionRequestWithExtraBody
 from llama_stack.providers.remote.inference.bedrock.bedrock import BedrockInferenceAdapter
 from llama_stack.providers.remote.inference.bedrock.config import BedrockConfig
 
diff --git a/tests/unit/providers/inference/test_remote_vllm.py b/tests/unit/providers/inference/test_remote_vllm.py
index ffd45798e..e2a5455b7 100644
--- a/tests/unit/providers/inference/test_remote_vllm.py
+++ b/tests/unit/providers/inference/test_remote_vllm.py
@@ -9,8 +9,9 @@ import time
 from unittest.mock import AsyncMock, MagicMock, PropertyMock, patch
 
 import pytest
-
-from llama_stack.apis.inference import (
+from llama_stack_api import (
+    HealthStatus,
+    Model,
     OpenAIAssistantMessageParam,
     OpenAIChatCompletion,
     OpenAIChatCompletionRequestWithExtraBody,
@@ -20,10 +21,9 @@ from llama_stack.apis.inference import (
     OpenAICompletionRequestWithExtraBody,
     ToolChoice,
 )
-from llama_stack.apis.models import Model
+
 from llama_stack.core.routers.inference import InferenceRouter
 from llama_stack.core.routing_tables.models import ModelsRoutingTable
-from llama_stack.providers.datatypes import HealthStatus
 from llama_stack.providers.remote.inference.vllm.config import VLLMInferenceAdapterConfig
 from llama_stack.providers.remote.inference.vllm.vllm import VLLMInferenceAdapter
 
diff --git a/tests/unit/providers/inline/agents/meta_reference/responses/test_streaming.py b/tests/unit/providers/inline/agents/meta_reference/responses/test_streaming.py
index fff29928c..36d2b86a9 100644
--- a/tests/unit/providers/inline/agents/meta_reference/responses/test_streaming.py
+++ b/tests/unit/providers/inline/agents/meta_reference/responses/test_streaming.py
@@ -7,8 +7,8 @@
 from unittest.mock import AsyncMock
 
 import pytest
+from llama_stack_api import ToolDef
 
-from llama_stack.apis.tools import ToolDef
 from llama_stack.providers.inline.agents.meta_reference.responses.streaming import (
     convert_tooldef_to_chat_tool,
 )
diff --git a/tests/unit/providers/nvidia/test_datastore.py b/tests/unit/providers/nvidia/test_datastore.py
index b59636f7b..0d9f1cc35 100644
--- a/tests/unit/providers/nvidia/test_datastore.py
+++ b/tests/unit/providers/nvidia/test_datastore.py
@@ -8,9 +8,8 @@ import os
 from unittest.mock import patch
 
 import pytest
+from llama_stack_api import Dataset, DatasetPurpose, ResourceType, URIDataSource
 
-from llama_stack.apis.datasets import Dataset, DatasetPurpose, URIDataSource
-from llama_stack.apis.resource import ResourceType
 from llama_stack.providers.remote.datasetio.nvidia.config import NvidiaDatasetIOConfig
 from llama_stack.providers.remote.datasetio.nvidia.datasetio import NvidiaDatasetIOAdapter
 
diff --git a/tests/unit/providers/nvidia/test_eval.py b/tests/unit/providers/nvidia/test_eval.py
index 86e005b76..c41379801 100644
--- a/tests/unit/providers/nvidia/test_eval.py
+++ b/tests/unit/providers/nvidia/test_eval.py
@@ -8,12 +8,18 @@ import os
 from unittest.mock import MagicMock, patch
 
 import pytest
+from llama_stack_api import (
+    Benchmark,
+    BenchmarkConfig,
+    EvaluateResponse,
+    Job,
+    JobStatus,
+    ModelCandidate,
+    ResourceType,
+    SamplingParams,
+    TopPSamplingStrategy,
+)
 
-from llama_stack.apis.benchmarks import Benchmark
-from llama_stack.apis.common.job_types import Job, JobStatus
-from llama_stack.apis.eval.eval import BenchmarkConfig, EvaluateResponse, ModelCandidate, SamplingParams
-from llama_stack.apis.inference.inference import TopPSamplingStrategy
-from llama_stack.apis.resource import ResourceType
 from llama_stack.models.llama.sku_types import CoreModelId
 from llama_stack.providers.remote.eval.nvidia.config import NVIDIAEvalConfig
 from llama_stack.providers.remote.eval.nvidia.eval import NVIDIAEvalImpl
diff --git a/tests/unit/providers/nvidia/test_parameters.py b/tests/unit/providers/nvidia/test_parameters.py
index ad381da26..ba68a7abe 100644
--- a/tests/unit/providers/nvidia/test_parameters.py
+++ b/tests/unit/providers/nvidia/test_parameters.py
@@ -9,8 +9,7 @@ import warnings
 from unittest.mock import patch
 
 import pytest
-
-from llama_stack.apis.post_training.post_training import (
+from llama_stack_api import (
     DataConfig,
     DatasetFormat,
     EfficiencyConfig,
@@ -19,6 +18,7 @@ from llama_stack.apis.post_training.post_training import (
     OptimizerType,
     TrainingConfig,
 )
+
 from llama_stack.core.library_client import convert_pydantic_to_json_value
 from llama_stack.providers.remote.post_training.nvidia.post_training import (
     NvidiaPostTrainingAdapter,
diff --git a/tests/unit/providers/nvidia/test_rerank_inference.py b/tests/unit/providers/nvidia/test_rerank_inference.py
index 2793b5f44..8b313abcd 100644
--- a/tests/unit/providers/nvidia/test_rerank_inference.py
+++ b/tests/unit/providers/nvidia/test_rerank_inference.py
@@ -8,8 +8,8 @@ from unittest.mock import AsyncMock, MagicMock, patch
 
 import aiohttp
 import pytest
+from llama_stack_api import ModelType
 
-from llama_stack.apis.models import ModelType
 from llama_stack.providers.remote.inference.nvidia.config import NVIDIAConfig
 from llama_stack.providers.remote.inference.nvidia.nvidia import NVIDIAInferenceAdapter
 from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin
diff --git a/tests/unit/providers/nvidia/test_safety.py b/tests/unit/providers/nvidia/test_safety.py
index 622302630..ea6254841 100644
--- a/tests/unit/providers/nvidia/test_safety.py
+++ b/tests/unit/providers/nvidia/test_safety.py
@@ -9,14 +9,15 @@ from typing import Any
 from unittest.mock import AsyncMock, MagicMock, patch
 
 import pytest
-
-from llama_stack.apis.inference import (
+from llama_stack_api import (
     OpenAIAssistantMessageParam,
     OpenAIUserMessageParam,
+    ResourceType,
+    RunShieldResponse,
+    Shield,
+    ViolationLevel,
 )
-from llama_stack.apis.resource import ResourceType
-from llama_stack.apis.safety import RunShieldResponse, ViolationLevel
-from llama_stack.apis.shields import Shield
+
 from llama_stack.providers.remote.safety.nvidia.config import NVIDIASafetyConfig
 from llama_stack.providers.remote.safety.nvidia.nvidia import NVIDIASafetyAdapter
 
diff --git a/tests/unit/providers/nvidia/test_supervised_fine_tuning.py b/tests/unit/providers/nvidia/test_supervised_fine_tuning.py
index 91148605d..4d0ce695b 100644
--- a/tests/unit/providers/nvidia/test_supervised_fine_tuning.py
+++ b/tests/unit/providers/nvidia/test_supervised_fine_tuning.py
@@ -9,8 +9,7 @@ import warnings
 from unittest.mock import patch
 
 import pytest
-
-from llama_stack.apis.post_training.post_training import (
+from llama_stack_api import (
     DataConfig,
     DatasetFormat,
     LoraFinetuningConfig,
@@ -19,6 +18,7 @@ from llama_stack.apis.post_training.post_training import (
     QATFinetuningConfig,
     TrainingConfig,
 )
+
 from llama_stack.core.library_client import convert_pydantic_to_json_value
 from llama_stack.providers.remote.post_training.nvidia.post_training import (
     ListNvidiaPostTrainingJobs,
diff --git a/tests/unit/providers/test_bedrock.py b/tests/unit/providers/test_bedrock.py
index 684fcf262..df7453712 100644
--- a/tests/unit/providers/test_bedrock.py
+++ b/tests/unit/providers/test_bedrock.py
@@ -7,7 +7,8 @@
 from types import SimpleNamespace
 from unittest.mock import AsyncMock, PropertyMock, patch
 
-from llama_stack.apis.inference import OpenAIChatCompletionRequestWithExtraBody
+from llama_stack_api import OpenAIChatCompletionRequestWithExtraBody
+
 from llama_stack.providers.remote.inference.bedrock.bedrock import BedrockInferenceAdapter
 from llama_stack.providers.remote.inference.bedrock.config import BedrockConfig
 
diff --git a/tests/unit/providers/utils/inference/test_openai_mixin.py b/tests/unit/providers/utils/inference/test_openai_mixin.py
index 0b5ea078b..b9b59bb79 100644
--- a/tests/unit/providers/utils/inference/test_openai_mixin.py
+++ b/tests/unit/providers/utils/inference/test_openai_mixin.py
@@ -10,10 +10,9 @@ from typing import Any
 from unittest.mock import AsyncMock, MagicMock, Mock, PropertyMock, patch
 
 import pytest
+from llama_stack_api import Model, ModelType, OpenAIChatCompletionRequestWithExtraBody, OpenAIUserMessageParam
 from pydantic import BaseModel, Field
 
-from llama_stack.apis.inference import Model, OpenAIChatCompletionRequestWithExtraBody, OpenAIUserMessageParam
-from llama_stack.apis.models import ModelType
 from llama_stack.core.request_headers import request_provider_data_context
 from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig
 from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin
diff --git a/tests/unit/providers/utils/inference/test_prompt_adapter.py b/tests/unit/providers/utils/inference/test_prompt_adapter.py
index 62c8db74d..a7c9289d7 100644
--- a/tests/unit/providers/utils/inference/test_prompt_adapter.py
+++ b/tests/unit/providers/utils/inference/test_prompt_adapter.py
@@ -4,10 +4,8 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-from llama_stack.apis.inference import (
-    OpenAIAssistantMessageParam,
-    OpenAIUserMessageParam,
-)
+from llama_stack_api import OpenAIAssistantMessageParam, OpenAIUserMessageParam
+
 from llama_stack.models.llama.datatypes import RawTextItem
 from llama_stack.providers.utils.inference.prompt_adapter import (
     convert_openai_message_to_raw_message,
diff --git a/tests/unit/providers/utils/memory/test_vector_store.py b/tests/unit/providers/utils/memory/test_vector_store.py
index 590bdd1d2..00db5795a 100644
--- a/tests/unit/providers/utils/memory/test_vector_store.py
+++ b/tests/unit/providers/utils/memory/test_vector_store.py
@@ -7,9 +7,8 @@
 from unittest.mock import AsyncMock, MagicMock, patch
 
 import pytest
+from llama_stack_api import URL, RAGDocument, TextContentItem
 
-from llama_stack.apis.common.content_types import URL, TextContentItem
-from llama_stack.apis.tools import RAGDocument
 from llama_stack.providers.utils.memory.vector_store import content_from_data_and_mime_type, content_from_doc
 
 
diff --git a/tests/unit/providers/utils/test_model_registry.py b/tests/unit/providers/utils/test_model_registry.py
index 04e75aa82..4a85cf8b8 100644
--- a/tests/unit/providers/utils/test_model_registry.py
+++ b/tests/unit/providers/utils/test_model_registry.py
@@ -34,8 +34,8 @@
 #
 
 import pytest
+from llama_stack_api import Model
 
-from llama_stack.apis.models import Model
 from llama_stack.providers.utils.inference.model_registry import ModelRegistryHelper, ProviderModelEntry
 
 
diff --git a/tests/unit/providers/vector_io/conftest.py b/tests/unit/providers/vector_io/conftest.py
index 5e56ea417..216e9b8ea 100644
--- a/tests/unit/providers/vector_io/conftest.py
+++ b/tests/unit/providers/vector_io/conftest.py
@@ -9,9 +9,8 @@ from unittest.mock import AsyncMock, MagicMock, patch
 
 import numpy as np
 import pytest
+from llama_stack_api import Chunk, ChunkMetadata, QueryChunksResponse, VectorStore
 
-from llama_stack.apis.vector_io import Chunk, ChunkMetadata, QueryChunksResponse
-from llama_stack.apis.vector_stores import VectorStore
 from llama_stack.core.storage.datatypes import KVStoreReference, SqliteKVStoreConfig
 from llama_stack.providers.inline.vector_io.faiss.config import FaissVectorIOConfig
 from llama_stack.providers.inline.vector_io.faiss.faiss import FaissIndex, FaissVectorIOAdapter
diff --git a/tests/unit/providers/vector_io/test_faiss.py b/tests/unit/providers/vector_io/test_faiss.py
index 44bcd0cfd..0d5c1399f 100644
--- a/tests/unit/providers/vector_io/test_faiss.py
+++ b/tests/unit/providers/vector_io/test_faiss.py
@@ -9,11 +9,8 @@ from unittest.mock import MagicMock, patch
 
 import numpy as np
 import pytest
+from llama_stack_api import Chunk, Files, HealthStatus, QueryChunksResponse, VectorStore
 
-from llama_stack.apis.files import Files
-from llama_stack.apis.vector_io import Chunk, QueryChunksResponse
-from llama_stack.apis.vector_stores import VectorStore
-from llama_stack.providers.datatypes import HealthStatus
 from llama_stack.providers.inline.vector_io.faiss.config import FaissVectorIOConfig
 from llama_stack.providers.inline.vector_io.faiss.faiss import (
     FaissIndex,
diff --git a/tests/unit/providers/vector_io/test_sqlite_vec.py b/tests/unit/providers/vector_io/test_sqlite_vec.py
index 5ee62cd63..17a99ce1c 100644
--- a/tests/unit/providers/vector_io/test_sqlite_vec.py
+++ b/tests/unit/providers/vector_io/test_sqlite_vec.py
@@ -8,8 +8,8 @@ import asyncio
 
 import numpy as np
 import pytest
+from llama_stack_api import Chunk, QueryChunksResponse
 
-from llama_stack.apis.vector_io import Chunk, QueryChunksResponse
 from llama_stack.providers.inline.vector_io.sqlite_vec.sqlite_vec import (
     SQLiteVecIndex,
     SQLiteVecVectorIOAdapter,
diff --git a/tests/unit/providers/vector_io/test_vector_io_openai_vector_stores.py b/tests/unit/providers/vector_io/test_vector_io_openai_vector_stores.py
index 121623e1b..7ba40eefb 100644
--- a/tests/unit/providers/vector_io/test_vector_io_openai_vector_stores.py
+++ b/tests/unit/providers/vector_io/test_vector_io_openai_vector_stores.py
@@ -10,17 +10,17 @@ from unittest.mock import AsyncMock, patch
 
 import numpy as np
 import pytest
-
-from llama_stack.apis.common.errors import VectorStoreNotFoundError
-from llama_stack.apis.vector_io import (
+from llama_stack_api import (
     Chunk,
     OpenAICreateVectorStoreFileBatchRequestWithExtraBody,
     OpenAICreateVectorStoreRequestWithExtraBody,
     QueryChunksResponse,
+    VectorStore,
     VectorStoreChunkingStrategyAuto,
     VectorStoreFileObject,
+    VectorStoreNotFoundError,
 )
-from llama_stack.apis.vector_stores import VectorStore
+
 from llama_stack.providers.inline.vector_io.sqlite_vec.sqlite_vec import VECTOR_DBS_PREFIX
 
 # This test is a unit test for the inline VectorIO providers. This should only contain
@@ -222,7 +222,7 @@ async def test_insert_chunks_missing_db_raises(vector_io_adapter):
 
 async def test_insert_chunks_with_missing_document_id(vector_io_adapter):
     """Ensure no KeyError when document_id is missing or in different places."""
-    from llama_stack.apis.vector_io import Chunk, ChunkMetadata
+    from llama_stack_api import Chunk, ChunkMetadata
 
     fake_index = AsyncMock()
     vector_io_adapter.cache["db1"] = fake_index
@@ -255,7 +255,7 @@ async def test_insert_chunks_with_missing_document_id(vector_io_adapter):
 
 async def test_document_id_with_invalid_type_raises_error():
     """Ensure TypeError is raised when document_id is not a string."""
-    from llama_stack.apis.vector_io import Chunk
+    from llama_stack_api import Chunk
 
     # Integer document_id should raise TypeError
     from llama_stack.providers.utils.vector_io.vector_utils import generate_chunk_id
diff --git a/tests/unit/providers/vector_io/test_vector_utils.py b/tests/unit/providers/vector_io/test_vector_utils.py
index 1ca753a44..678b76fbd 100644
--- a/tests/unit/providers/vector_io/test_vector_utils.py
+++ b/tests/unit/providers/vector_io/test_vector_utils.py
@@ -4,7 +4,8 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-from llama_stack.apis.vector_io import Chunk, ChunkMetadata
+from llama_stack_api import Chunk, ChunkMetadata
+
 from llama_stack.providers.utils.vector_io.vector_utils import generate_chunk_id
 
 # This test is a unit test for the chunk_utils.py helpers. This should only contain
diff --git a/tests/unit/rag/test_rag_query.py b/tests/unit/rag/test_rag_query.py
index 8563d0d53..e3f5e46d7 100644
--- a/tests/unit/rag/test_rag_query.py
+++ b/tests/unit/rag/test_rag_query.py
@@ -7,13 +7,8 @@
 from unittest.mock import AsyncMock, MagicMock
 
 import pytest
+from llama_stack_api import Chunk, ChunkMetadata, QueryChunksResponse, RAGQueryConfig
 
-from llama_stack.apis.tools.rag_tool import RAGQueryConfig
-from llama_stack.apis.vector_io import (
-    Chunk,
-    ChunkMetadata,
-    QueryChunksResponse,
-)
 from llama_stack.providers.inline.tool_runtime.rag.memory import MemoryToolRuntimeImpl
 
 
diff --git a/tests/unit/rag/test_vector_store.py b/tests/unit/rag/test_vector_store.py
index 1f73fdb8e..23c12dcab 100644
--- a/tests/unit/rag/test_vector_store.py
+++ b/tests/unit/rag/test_vector_store.py
@@ -12,13 +12,8 @@ from unittest.mock import AsyncMock, MagicMock
 
 import numpy as np
 import pytest
+from llama_stack_api import Chunk, OpenAIEmbeddingData, OpenAIEmbeddingsRequestWithExtraBody, RAGDocument
 
-from llama_stack.apis.inference.inference import (
-    OpenAIEmbeddingData,
-    OpenAIEmbeddingsRequestWithExtraBody,
-)
-from llama_stack.apis.tools import RAGDocument
-from llama_stack.apis.vector_io import Chunk
 from llama_stack.providers.utils.memory.vector_store import (
     URL,
     VectorStoreWithIndex,
diff --git a/tests/unit/registry/test_registry.py b/tests/unit/registry/test_registry.py
index d4c9786d1..01f486ab2 100644
--- a/tests/unit/registry/test_registry.py
+++ b/tests/unit/registry/test_registry.py
@@ -6,9 +6,8 @@
 
 
 import pytest
+from llama_stack_api import Model, VectorStore
 
-from llama_stack.apis.inference import Model
-from llama_stack.apis.vector_stores import VectorStore
 from llama_stack.core.datatypes import VectorStoreWithOwner
 from llama_stack.core.storage.datatypes import KVStoreReference, SqliteKVStoreConfig
 from llama_stack.core.store.registry import (
@@ -304,7 +303,8 @@ async def test_double_registration_different_objects(disk_dist_registry):
 
 async def test_double_registration_with_cache(cached_disk_dist_registry):
     """Test double registration behavior with caching enabled."""
-    from llama_stack.apis.models import ModelType
+    from llama_stack_api import ModelType
+
     from llama_stack.core.datatypes import ModelWithOwner
 
     model1 = ModelWithOwner(
diff --git a/tests/unit/registry/test_registry_acl.py b/tests/unit/registry/test_registry_acl.py
index 09b9a3cfb..2827f60b9 100644
--- a/tests/unit/registry/test_registry_acl.py
+++ b/tests/unit/registry/test_registry_acl.py
@@ -5,7 +5,8 @@
 # the root directory of this source tree.
 
 
-from llama_stack.apis.models import ModelType
+from llama_stack_api import ModelType
+
 from llama_stack.core.datatypes import ModelWithOwner, User
 from llama_stack.core.store.registry import CachedDiskDistributionRegistry
 
diff --git a/tests/unit/server/test_access_control.py b/tests/unit/server/test_access_control.py
index ea4f9b8b2..1df933d4d 100644
--- a/tests/unit/server/test_access_control.py
+++ b/tests/unit/server/test_access_control.py
@@ -8,10 +8,9 @@ from unittest.mock import MagicMock, Mock, patch
 
 import pytest
 import yaml
+from llama_stack_api import Api, ModelType
 from pydantic import TypeAdapter, ValidationError
 
-from llama_stack.apis.datatypes import Api
-from llama_stack.apis.models import ModelType
 from llama_stack.core.access_control.access_control import AccessDeniedError, is_action_allowed
 from llama_stack.core.datatypes import AccessRule, ModelWithOwner, User
 from llama_stack.core.routing_tables.models import ModelsRoutingTable
diff --git a/tests/unit/server/test_auth.py b/tests/unit/server/test_auth.py
index cc9397f07..57a552514 100644
--- a/tests/unit/server/test_auth.py
+++ b/tests/unit/server/test_auth.py
@@ -144,7 +144,7 @@ def middleware_with_mocks(mock_auth_endpoint):
     middleware = AuthenticationMiddleware(mock_app, auth_config, {})
 
     # Mock the route_impls to simulate finding routes with required scopes
-    from llama_stack.schema_utils import WebMethod
+    from llama_stack_api import WebMethod
 
     routes = {
         ("POST", "/test/scoped"): WebMethod(route="/test/scoped", method="POST", required_scope="test.read"),
diff --git a/tests/unit/server/test_resolver.py b/tests/unit/server/test_resolver.py
index b44f12f7e..071178f96 100644
--- a/tests/unit/server/test_resolver.py
+++ b/tests/unit/server/test_resolver.py
@@ -9,9 +9,9 @@ import sys
 from typing import Any, Protocol
 from unittest.mock import AsyncMock, MagicMock
 
+from llama_stack_api import Inference, InlineProviderSpec, ProviderSpec
 from pydantic import BaseModel, Field
 
-from llama_stack.apis.inference import Inference
 from llama_stack.core.datatypes import Api, Provider, StackRunConfig
 from llama_stack.core.resolver import resolve_impls
 from llama_stack.core.routers.inference import InferenceRouter
@@ -25,7 +25,6 @@ from llama_stack.core.storage.datatypes import (
     SqlStoreReference,
     StorageConfig,
 )
-from llama_stack.providers.datatypes import InlineProviderSpec, ProviderSpec
 from llama_stack.providers.utils.kvstore import register_kvstore_backends
 from llama_stack.providers.utils.sqlstore.sqlstore import register_sqlstore_backends
 
diff --git a/tests/unit/server/test_sse.py b/tests/unit/server/test_sse.py
index 0303a6ded..fdaf9022b 100644
--- a/tests/unit/server/test_sse.py
+++ b/tests/unit/server/test_sse.py
@@ -9,8 +9,8 @@ import logging  # allow-direct-logging
 from unittest.mock import AsyncMock, MagicMock
 
 import pytest
+from llama_stack_api import PaginatedResponse
 
-from llama_stack.apis.common.responses import PaginatedResponse
 from llama_stack.core.server.server import create_dynamic_typed_route, create_sse_event, sse_generator
 
 
diff --git a/tests/unit/tools/test_tools_json_schema.py b/tests/unit/tools/test_tools_json_schema.py
index 8fe3103bc..79e0b6e28 100644
--- a/tests/unit/tools/test_tools_json_schema.py
+++ b/tests/unit/tools/test_tools_json_schema.py
@@ -9,9 +9,9 @@ Unit tests for JSON Schema-based tool definitions.
 Tests the new input_schema and output_schema fields.
 """
 
+from llama_stack_api import ToolDef
 from pydantic import ValidationError
 
-from llama_stack.apis.tools import ToolDef
 from llama_stack.models.llama.datatypes import BuiltinTool, ToolDefinition
 
 
diff --git a/tests/unit/utils/inference/test_inference_store.py b/tests/unit/utils/inference/test_inference_store.py
index d2de1c759..4da20b125 100644
--- a/tests/unit/utils/inference/test_inference_store.py
+++ b/tests/unit/utils/inference/test_inference_store.py
@@ -7,14 +7,14 @@
 import time
 
 import pytest
-
-from llama_stack.apis.inference import (
+from llama_stack_api import (
     OpenAIAssistantMessageParam,
     OpenAIChatCompletion,
     OpenAIChoice,
     OpenAIUserMessageParam,
     Order,
 )
+
 from llama_stack.core.storage.datatypes import InferenceStoreReference, SqliteSqlStoreConfig
 from llama_stack.providers.utils.inference.inference_store import InferenceStore
 from llama_stack.providers.utils.sqlstore.sqlstore import register_sqlstore_backends
diff --git a/tests/unit/utils/responses/test_responses_store.py b/tests/unit/utils/responses/test_responses_store.py
index 34cff3d3f..1119a93d8 100644
--- a/tests/unit/utils/responses/test_responses_store.py
+++ b/tests/unit/utils/responses/test_responses_store.py
@@ -9,13 +9,8 @@ from tempfile import TemporaryDirectory
 from uuid import uuid4
 
 import pytest
+from llama_stack_api import OpenAIMessageParam, OpenAIResponseInput, OpenAIResponseObject, OpenAIUserMessageParam, Order
 
-from llama_stack.apis.agents import Order
-from llama_stack.apis.agents.openai_responses import (
-    OpenAIResponseInput,
-    OpenAIResponseObject,
-)
-from llama_stack.apis.inference import OpenAIMessageParam, OpenAIUserMessageParam
 from llama_stack.core.storage.datatypes import ResponsesStoreReference, SqliteSqlStoreConfig
 from llama_stack.providers.utils.responses.responses_store import ResponsesStore
 from llama_stack.providers.utils.sqlstore.sqlstore import register_sqlstore_backends
@@ -46,7 +41,7 @@ def create_test_response_object(
 
 def create_test_response_input(content: str, input_id: str) -> OpenAIResponseInput:
     """Helper to create a test response input."""
-    from llama_stack.apis.agents.openai_responses import OpenAIResponseMessage
+    from llama_stack_api import OpenAIResponseMessage
 
     return OpenAIResponseMessage(
         id=input_id,
diff --git a/uv.lock b/uv.lock
index 884d41b79..ddf8c1cd4 100644
--- a/uv.lock
+++ b/uv.lock
@@ -1945,6 +1945,7 @@ dependencies = [
     { name = "httpx" },
     { name = "jinja2" },
     { name = "jsonschema" },
+    { name = "llama-stack-api" },
     { name = "openai" },
     { name = "opentelemetry-exporter-otlp-proto-http" },
     { name = "opentelemetry-sdk" },
@@ -2094,6 +2095,7 @@ requires-dist = [
     { name = "httpx" },
     { name = "jinja2", specifier = ">=3.1.6" },
     { name = "jsonschema" },
+    { name = "llama-stack-api", editable = "src/llama-stack-api" },
     { name = "llama-stack-client", marker = "extra == 'client'", specifier = ">=0.3.0" },
     { name = "openai", specifier = ">=2.5.0" },
     { name = "opentelemetry-exporter-otlp-proto-http", specifier = ">=1.30.0" },
@@ -2226,6 +2228,25 @@ unit = [
     { name = "together" },
 ]
 
+[[package]]
+name = "llama-stack-api"
+version = "0.1.0"
+source = { editable = "src/llama-stack-api" }
+dependencies = [
+    { name = "jsonschema" },
+    { name = "opentelemetry-exporter-otlp-proto-http" },
+    { name = "opentelemetry-sdk" },
+    { name = "pydantic" },
+]
+
+[package.metadata]
+requires-dist = [
+    { name = "jsonschema" },
+    { name = "opentelemetry-exporter-otlp-proto-http", specifier = ">=1.30.0" },
+    { name = "opentelemetry-sdk", specifier = ">=1.30.0" },
+    { name = "pydantic", specifier = ">=2.11.9" },
+]
+
 [[package]]
 name = "llama-stack-client"
 version = "0.3.0"

From 2441ca9389f3febabea2504daf8a68c31a00eb75 Mon Sep 17 00:00:00 2001
From: Ashwin Bharambe <ashwin.bharambe@gmail.com>
Date: Thu, 13 Nov 2025 13:16:02 -0800
Subject: [PATCH 07/12] fix(api): ensure openapi spec has deprecated routes
 (#4156)

Deprecated doesn't mean it's "gone", it just means it is "going away" in
the next major version of the package.
---
 client-sdks/stainless/openapi.yml             | 371 ++++++++++++++++++
 docs/openapi_generator/pyopenapi/generator.py |   4 +-
 docs/static/stainless-llama-stack-spec.yaml   | 371 ++++++++++++++++++
 3 files changed, 744 insertions(+), 2 deletions(-)

diff --git a/client-sdks/stainless/openapi.yml b/client-sdks/stainless/openapi.yml
index 1be4af6c9..65a255c17 100644
--- a/client-sdks/stainless/openapi.yml
+++ b/client-sdks/stainless/openapi.yml
@@ -998,6 +998,39 @@ paths:
       description: List models using the OpenAI API.
       parameters: []
       deprecated: false
+    post:
+      responses:
+        '200':
+          description: A Model.
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/Model'
+        '400':
+          $ref: '#/components/responses/BadRequest400'
+        '429':
+          $ref: >-
+            #/components/responses/TooManyRequests429
+        '500':
+          $ref: >-
+            #/components/responses/InternalServerError500
+        default:
+          $ref: '#/components/responses/DefaultError'
+      tags:
+        - Models
+      summary: Register model.
+      description: >-
+        Register model.
+
+        Register a model.
+      parameters: []
+      requestBody:
+        content:
+          application/json:
+            schema:
+              $ref: '#/components/schemas/RegisterModelRequest'
+        required: true
+      deprecated: true
   /v1/models/{model_id}:
     get:
       responses:
@@ -1032,6 +1065,36 @@ paths:
           schema:
             type: string
       deprecated: false
+    delete:
+      responses:
+        '200':
+          description: OK
+        '400':
+          $ref: '#/components/responses/BadRequest400'
+        '429':
+          $ref: >-
+            #/components/responses/TooManyRequests429
+        '500':
+          $ref: >-
+            #/components/responses/InternalServerError500
+        default:
+          $ref: '#/components/responses/DefaultError'
+      tags:
+        - Models
+      summary: Unregister model.
+      description: >-
+        Unregister model.
+
+        Unregister a model.
+      parameters:
+        - name: model_id
+          in: path
+          description: >-
+            The identifier of the model to unregister.
+          required: true
+          schema:
+            type: string
+      deprecated: true
   /v1/moderations:
     post:
       responses:
@@ -1662,6 +1725,32 @@ paths:
       description: List all scoring functions.
       parameters: []
       deprecated: false
+    post:
+      responses:
+        '200':
+          description: OK
+        '400':
+          $ref: '#/components/responses/BadRequest400'
+        '429':
+          $ref: >-
+            #/components/responses/TooManyRequests429
+        '500':
+          $ref: >-
+            #/components/responses/InternalServerError500
+        default:
+          $ref: '#/components/responses/DefaultError'
+      tags:
+        - ScoringFunctions
+      summary: Register a scoring function.
+      description: Register a scoring function.
+      parameters: []
+      requestBody:
+        content:
+          application/json:
+            schema:
+              $ref: '#/components/schemas/RegisterScoringFunctionRequest'
+        required: true
+      deprecated: true
   /v1/scoring-functions/{scoring_fn_id}:
     get:
       responses:
@@ -1693,6 +1782,33 @@ paths:
           schema:
             type: string
       deprecated: false
+    delete:
+      responses:
+        '200':
+          description: OK
+        '400':
+          $ref: '#/components/responses/BadRequest400'
+        '429':
+          $ref: >-
+            #/components/responses/TooManyRequests429
+        '500':
+          $ref: >-
+            #/components/responses/InternalServerError500
+        default:
+          $ref: '#/components/responses/DefaultError'
+      tags:
+        - ScoringFunctions
+      summary: Unregister a scoring function.
+      description: Unregister a scoring function.
+      parameters:
+        - name: scoring_fn_id
+          in: path
+          description: >-
+            The ID of the scoring function to unregister.
+          required: true
+          schema:
+            type: string
+      deprecated: true
   /v1/scoring/score:
     post:
       responses:
@@ -1781,6 +1897,36 @@ paths:
       description: List all shields.
       parameters: []
       deprecated: false
+    post:
+      responses:
+        '200':
+          description: A Shield.
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/Shield'
+        '400':
+          $ref: '#/components/responses/BadRequest400'
+        '429':
+          $ref: >-
+            #/components/responses/TooManyRequests429
+        '500':
+          $ref: >-
+            #/components/responses/InternalServerError500
+        default:
+          $ref: '#/components/responses/DefaultError'
+      tags:
+        - Shields
+      summary: Register a shield.
+      description: Register a shield.
+      parameters: []
+      requestBody:
+        content:
+          application/json:
+            schema:
+              $ref: '#/components/schemas/RegisterShieldRequest'
+        required: true
+      deprecated: true
   /v1/shields/{identifier}:
     get:
       responses:
@@ -1812,6 +1958,33 @@ paths:
           schema:
             type: string
       deprecated: false
+    delete:
+      responses:
+        '200':
+          description: OK
+        '400':
+          $ref: '#/components/responses/BadRequest400'
+        '429':
+          $ref: >-
+            #/components/responses/TooManyRequests429
+        '500':
+          $ref: >-
+            #/components/responses/InternalServerError500
+        default:
+          $ref: '#/components/responses/DefaultError'
+      tags:
+        - Shields
+      summary: Unregister a shield.
+      description: Unregister a shield.
+      parameters:
+        - name: identifier
+          in: path
+          description: >-
+            The identifier of the shield to unregister.
+          required: true
+          schema:
+            type: string
+      deprecated: true
   /v1/tool-runtime/invoke:
     post:
       responses:
@@ -1907,6 +2080,32 @@ paths:
       description: List tool groups with optional provider.
       parameters: []
       deprecated: false
+    post:
+      responses:
+        '200':
+          description: OK
+        '400':
+          $ref: '#/components/responses/BadRequest400'
+        '429':
+          $ref: >-
+            #/components/responses/TooManyRequests429
+        '500':
+          $ref: >-
+            #/components/responses/InternalServerError500
+        default:
+          $ref: '#/components/responses/DefaultError'
+      tags:
+        - ToolGroups
+      summary: Register a tool group.
+      description: Register a tool group.
+      parameters: []
+      requestBody:
+        content:
+          application/json:
+            schema:
+              $ref: '#/components/schemas/RegisterToolGroupRequest'
+        required: true
+      deprecated: true
   /v1/toolgroups/{toolgroup_id}:
     get:
       responses:
@@ -1938,6 +2137,32 @@ paths:
           schema:
             type: string
       deprecated: false
+    delete:
+      responses:
+        '200':
+          description: OK
+        '400':
+          $ref: '#/components/responses/BadRequest400'
+        '429':
+          $ref: >-
+            #/components/responses/TooManyRequests429
+        '500':
+          $ref: >-
+            #/components/responses/InternalServerError500
+        default:
+          $ref: '#/components/responses/DefaultError'
+      tags:
+        - ToolGroups
+      summary: Unregister a tool group.
+      description: Unregister a tool group.
+      parameters:
+        - name: toolgroup_id
+          in: path
+          description: The ID of the tool group to unregister.
+          required: true
+          schema:
+            type: string
+      deprecated: true
   /v1/tools:
     get:
       responses:
@@ -11420,6 +11645,152 @@ components:
         - hyperparam_search_config
         - logger_config
       title: SupervisedFineTuneRequest
+    RegisterModelRequest:
+      type: object
+      properties:
+        model_id:
+          type: string
+          description: The identifier of the model to register.
+        provider_model_id:
+          type: string
+          description: >-
+            The identifier of the model in the provider.
+        provider_id:
+          type: string
+          description: The identifier of the provider.
+        metadata:
+          type: object
+          additionalProperties:
+            oneOf:
+              - type: 'null'
+              - type: boolean
+              - type: number
+              - type: string
+              - type: array
+              - type: object
+          description: Any additional metadata for this model.
+        model_type:
+          $ref: '#/components/schemas/ModelType'
+          description: The type of model to register.
+      additionalProperties: false
+      required:
+        - model_id
+      title: RegisterModelRequest
+    ParamType:
+      oneOf:
+        - $ref: '#/components/schemas/StringType'
+        - $ref: '#/components/schemas/NumberType'
+        - $ref: '#/components/schemas/BooleanType'
+        - $ref: '#/components/schemas/ArrayType'
+        - $ref: '#/components/schemas/ObjectType'
+        - $ref: '#/components/schemas/JsonType'
+        - $ref: '#/components/schemas/UnionType'
+        - $ref: '#/components/schemas/ChatCompletionInputType'
+        - $ref: '#/components/schemas/CompletionInputType'
+      discriminator:
+        propertyName: type
+        mapping:
+          string: '#/components/schemas/StringType'
+          number: '#/components/schemas/NumberType'
+          boolean: '#/components/schemas/BooleanType'
+          array: '#/components/schemas/ArrayType'
+          object: '#/components/schemas/ObjectType'
+          json: '#/components/schemas/JsonType'
+          union: '#/components/schemas/UnionType'
+          chat_completion_input: '#/components/schemas/ChatCompletionInputType'
+          completion_input: '#/components/schemas/CompletionInputType'
+    RegisterScoringFunctionRequest:
+      type: object
+      properties:
+        scoring_fn_id:
+          type: string
+          description: >-
+            The ID of the scoring function to register.
+        description:
+          type: string
+          description: The description of the scoring function.
+        return_type:
+          $ref: '#/components/schemas/ParamType'
+          description: The return type of the scoring function.
+        provider_scoring_fn_id:
+          type: string
+          description: >-
+            The ID of the provider scoring function to use for the scoring function.
+        provider_id:
+          type: string
+          description: >-
+            The ID of the provider to use for the scoring function.
+        params:
+          $ref: '#/components/schemas/ScoringFnParams'
+          description: >-
+            The parameters for the scoring function for benchmark eval, these can
+            be overridden for app eval.
+      additionalProperties: false
+      required:
+        - scoring_fn_id
+        - description
+        - return_type
+      title: RegisterScoringFunctionRequest
+    RegisterShieldRequest:
+      type: object
+      properties:
+        shield_id:
+          type: string
+          description: >-
+            The identifier of the shield to register.
+        provider_shield_id:
+          type: string
+          description: >-
+            The identifier of the shield in the provider.
+        provider_id:
+          type: string
+          description: The identifier of the provider.
+        params:
+          type: object
+          additionalProperties:
+            oneOf:
+              - type: 'null'
+              - type: boolean
+              - type: number
+              - type: string
+              - type: array
+              - type: object
+          description: The parameters of the shield.
+      additionalProperties: false
+      required:
+        - shield_id
+      title: RegisterShieldRequest
+    RegisterToolGroupRequest:
+      type: object
+      properties:
+        toolgroup_id:
+          type: string
+          description: The ID of the tool group to register.
+        provider_id:
+          type: string
+          description: >-
+            The ID of the provider to use for the tool group.
+        mcp_endpoint:
+          $ref: '#/components/schemas/URL'
+          description: >-
+            The MCP endpoint to use for the tool group.
+        args:
+          type: object
+          additionalProperties:
+            oneOf:
+              - type: 'null'
+              - type: boolean
+              - type: number
+              - type: string
+              - type: array
+              - type: object
+          description: >-
+            A dictionary of arguments to pass to the tool group.
+      additionalProperties: false
+      required:
+        - toolgroup_id
+        - provider_id
+      title: RegisterToolGroupRequest
     DataSource:
       oneOf:
         - $ref: '#/components/schemas/URIDataSource'
diff --git a/docs/openapi_generator/pyopenapi/generator.py b/docs/openapi_generator/pyopenapi/generator.py
index afbb5c710..9b5f76e2a 100644
--- a/docs/openapi_generator/pyopenapi/generator.py
+++ b/docs/openapi_generator/pyopenapi/generator.py
@@ -979,8 +979,8 @@ class Generator:
                     if deprecated:
                         filtered_operations.append(op)
                 elif self.options.stability_filter == "stainless":
-                    # Include both stable (v1 non-deprecated) and experimental (v1alpha, v1beta) endpoints
-                    if (stability_level == "v1" and not deprecated) or stability_level in ["v1alpha", "v1beta"]:
+                    # Include stable (v1), deprecated (v1 deprecated), and experimental (v1alpha, v1beta) endpoints
+                    if stability_level == "v1" or stability_level in ["v1alpha", "v1beta"]:
                         filtered_operations.append(op)
 
             operations = filtered_operations
diff --git a/docs/static/stainless-llama-stack-spec.yaml b/docs/static/stainless-llama-stack-spec.yaml
index 1be4af6c9..65a255c17 100644
--- a/docs/static/stainless-llama-stack-spec.yaml
+++ b/docs/static/stainless-llama-stack-spec.yaml
@@ -998,6 +998,39 @@ paths:
       description: List models using the OpenAI API.
       parameters: []
       deprecated: false
+    post:
+      responses:
+        '200':
+          description: A Model.
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/Model'
+        '400':
+          $ref: '#/components/responses/BadRequest400'
+        '429':
+          $ref: >-
+            #/components/responses/TooManyRequests429
+        '500':
+          $ref: >-
+            #/components/responses/InternalServerError500
+        default:
+          $ref: '#/components/responses/DefaultError'
+      tags:
+        - Models
+      summary: Register model.
+      description: >-
+        Register model.
+
+        Register a model.
+      parameters: []
+      requestBody:
+        content:
+          application/json:
+            schema:
+              $ref: '#/components/schemas/RegisterModelRequest'
+        required: true
+      deprecated: true
   /v1/models/{model_id}:
     get:
       responses:
@@ -1032,6 +1065,36 @@ paths:
           schema:
             type: string
       deprecated: false
+    delete:
+      responses:
+        '200':
+          description: OK
+        '400':
+          $ref: '#/components/responses/BadRequest400'
+        '429':
+          $ref: >-
+            #/components/responses/TooManyRequests429
+        '500':
+          $ref: >-
+            #/components/responses/InternalServerError500
+        default:
+          $ref: '#/components/responses/DefaultError'
+      tags:
+        - Models
+      summary: Unregister model.
+      description: >-
+        Unregister model.
+
+        Unregister a model.
+      parameters:
+        - name: model_id
+          in: path
+          description: >-
+            The identifier of the model to unregister.
+          required: true
+          schema:
+            type: string
+      deprecated: true
   /v1/moderations:
     post:
       responses:
@@ -1662,6 +1725,32 @@ paths:
       description: List all scoring functions.
       parameters: []
       deprecated: false
+    post:
+      responses:
+        '200':
+          description: OK
+        '400':
+          $ref: '#/components/responses/BadRequest400'
+        '429':
+          $ref: >-
+            #/components/responses/TooManyRequests429
+        '500':
+          $ref: >-
+            #/components/responses/InternalServerError500
+        default:
+          $ref: '#/components/responses/DefaultError'
+      tags:
+        - ScoringFunctions
+      summary: Register a scoring function.
+      description: Register a scoring function.
+      parameters: []
+      requestBody:
+        content:
+          application/json:
+            schema:
+              $ref: '#/components/schemas/RegisterScoringFunctionRequest'
+        required: true
+      deprecated: true
   /v1/scoring-functions/{scoring_fn_id}:
     get:
       responses:
@@ -1693,6 +1782,33 @@ paths:
           schema:
             type: string
       deprecated: false
+    delete:
+      responses:
+        '200':
+          description: OK
+        '400':
+          $ref: '#/components/responses/BadRequest400'
+        '429':
+          $ref: >-
+            #/components/responses/TooManyRequests429
+        '500':
+          $ref: >-
+            #/components/responses/InternalServerError500
+        default:
+          $ref: '#/components/responses/DefaultError'
+      tags:
+        - ScoringFunctions
+      summary: Unregister a scoring function.
+      description: Unregister a scoring function.
+      parameters:
+        - name: scoring_fn_id
+          in: path
+          description: >-
+            The ID of the scoring function to unregister.
+          required: true
+          schema:
+            type: string
+      deprecated: true
   /v1/scoring/score:
     post:
       responses:
@@ -1781,6 +1897,36 @@ paths:
       description: List all shields.
       parameters: []
       deprecated: false
+    post:
+      responses:
+        '200':
+          description: A Shield.
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/Shield'
+        '400':
+          $ref: '#/components/responses/BadRequest400'
+        '429':
+          $ref: >-
+            #/components/responses/TooManyRequests429
+        '500':
+          $ref: >-
+            #/components/responses/InternalServerError500
+        default:
+          $ref: '#/components/responses/DefaultError'
+      tags:
+        - Shields
+      summary: Register a shield.
+      description: Register a shield.
+      parameters: []
+      requestBody:
+        content:
+          application/json:
+            schema:
+              $ref: '#/components/schemas/RegisterShieldRequest'
+        required: true
+      deprecated: true
   /v1/shields/{identifier}:
     get:
       responses:
@@ -1812,6 +1958,33 @@ paths:
           schema:
             type: string
       deprecated: false
+    delete:
+      responses:
+        '200':
+          description: OK
+        '400':
+          $ref: '#/components/responses/BadRequest400'
+        '429':
+          $ref: >-
+            #/components/responses/TooManyRequests429
+        '500':
+          $ref: >-
+            #/components/responses/InternalServerError500
+        default:
+          $ref: '#/components/responses/DefaultError'
+      tags:
+        - Shields
+      summary: Unregister a shield.
+      description: Unregister a shield.
+      parameters:
+        - name: identifier
+          in: path
+          description: >-
+            The identifier of the shield to unregister.
+          required: true
+          schema:
+            type: string
+      deprecated: true
   /v1/tool-runtime/invoke:
     post:
       responses:
@@ -1907,6 +2080,32 @@ paths:
       description: List tool groups with optional provider.
       parameters: []
       deprecated: false
+    post:
+      responses:
+        '200':
+          description: OK
+        '400':
+          $ref: '#/components/responses/BadRequest400'
+        '429':
+          $ref: >-
+            #/components/responses/TooManyRequests429
+        '500':
+          $ref: >-
+            #/components/responses/InternalServerError500
+        default:
+          $ref: '#/components/responses/DefaultError'
+      tags:
+        - ToolGroups
+      summary: Register a tool group.
+      description: Register a tool group.
+      parameters: []
+      requestBody:
+        content:
+          application/json:
+            schema:
+              $ref: '#/components/schemas/RegisterToolGroupRequest'
+        required: true
+      deprecated: true
   /v1/toolgroups/{toolgroup_id}:
     get:
       responses:
@@ -1938,6 +2137,32 @@ paths:
           schema:
             type: string
       deprecated: false
+    delete:
+      responses:
+        '200':
+          description: OK
+        '400':
+          $ref: '#/components/responses/BadRequest400'
+        '429':
+          $ref: >-
+            #/components/responses/TooManyRequests429
+        '500':
+          $ref: >-
+            #/components/responses/InternalServerError500
+        default:
+          $ref: '#/components/responses/DefaultError'
+      tags:
+        - ToolGroups
+      summary: Unregister a tool group.
+      description: Unregister a tool group.
+      parameters:
+        - name: toolgroup_id
+          in: path
+          description: The ID of the tool group to unregister.
+          required: true
+          schema:
+            type: string
+      deprecated: true
   /v1/tools:
     get:
       responses:
@@ -11420,6 +11645,152 @@ components:
         - hyperparam_search_config
         - logger_config
       title: SupervisedFineTuneRequest
+    RegisterModelRequest:
+      type: object
+      properties:
+        model_id:
+          type: string
+          description: The identifier of the model to register.
+        provider_model_id:
+          type: string
+          description: >-
+            The identifier of the model in the provider.
+        provider_id:
+          type: string
+          description: The identifier of the provider.
+        metadata:
+          type: object
+          additionalProperties:
+            oneOf:
+              - type: 'null'
+              - type: boolean
+              - type: number
+              - type: string
+              - type: array
+              - type: object
+          description: Any additional metadata for this model.
+        model_type:
+          $ref: '#/components/schemas/ModelType'
+          description: The type of model to register.
+      additionalProperties: false
+      required:
+        - model_id
+      title: RegisterModelRequest
+    ParamType:
+      oneOf:
+        - $ref: '#/components/schemas/StringType'
+        - $ref: '#/components/schemas/NumberType'
+        - $ref: '#/components/schemas/BooleanType'
+        - $ref: '#/components/schemas/ArrayType'
+        - $ref: '#/components/schemas/ObjectType'
+        - $ref: '#/components/schemas/JsonType'
+        - $ref: '#/components/schemas/UnionType'
+        - $ref: '#/components/schemas/ChatCompletionInputType'
+        - $ref: '#/components/schemas/CompletionInputType'
+      discriminator:
+        propertyName: type
+        mapping:
+          string: '#/components/schemas/StringType'
+          number: '#/components/schemas/NumberType'
+          boolean: '#/components/schemas/BooleanType'
+          array: '#/components/schemas/ArrayType'
+          object: '#/components/schemas/ObjectType'
+          json: '#/components/schemas/JsonType'
+          union: '#/components/schemas/UnionType'
+          chat_completion_input: '#/components/schemas/ChatCompletionInputType'
+          completion_input: '#/components/schemas/CompletionInputType'
+    RegisterScoringFunctionRequest:
+      type: object
+      properties:
+        scoring_fn_id:
+          type: string
+          description: >-
+            The ID of the scoring function to register.
+        description:
+          type: string
+          description: The description of the scoring function.
+        return_type:
+          $ref: '#/components/schemas/ParamType'
+          description: The return type of the scoring function.
+        provider_scoring_fn_id:
+          type: string
+          description: >-
+            The ID of the provider scoring function to use for the scoring function.
+        provider_id:
+          type: string
+          description: >-
+            The ID of the provider to use for the scoring function.
+        params:
+          $ref: '#/components/schemas/ScoringFnParams'
+          description: >-
+            The parameters for the scoring function for benchmark eval, these can
+            be overridden for app eval.
+      additionalProperties: false
+      required:
+        - scoring_fn_id
+        - description
+        - return_type
+      title: RegisterScoringFunctionRequest
+    RegisterShieldRequest:
+      type: object
+      properties:
+        shield_id:
+          type: string
+          description: >-
+            The identifier of the shield to register.
+        provider_shield_id:
+          type: string
+          description: >-
+            The identifier of the shield in the provider.
+        provider_id:
+          type: string
+          description: The identifier of the provider.
+        params:
+          type: object
+          additionalProperties:
+            oneOf:
+              - type: 'null'
+              - type: boolean
+              - type: number
+              - type: string
+              - type: array
+              - type: object
+          description: The parameters of the shield.
+      additionalProperties: false
+      required:
+        - shield_id
+      title: RegisterShieldRequest
+    RegisterToolGroupRequest:
+      type: object
+      properties:
+        toolgroup_id:
+          type: string
+          description: The ID of the tool group to register.
+        provider_id:
+          type: string
+          description: >-
+            The ID of the provider to use for the tool group.
+        mcp_endpoint:
+          $ref: '#/components/schemas/URL'
+          description: >-
+            The MCP endpoint to use for the tool group.
+        args:
+          type: object
+          additionalProperties:
+            oneOf:
+              - type: 'null'
+              - type: boolean
+              - type: number
+              - type: string
+              - type: array
+              - type: object
+          description: >-
+            A dictionary of arguments to pass to the tool group.
+      additionalProperties: false
+      required:
+        - toolgroup_id
+        - provider_id
+      title: RegisterToolGroupRequest
     DataSource:
       oneOf:
         - $ref: '#/components/schemas/URIDataSource'

From a82b79ce57fce407d4a980149a575e41f73d43b5 Mon Sep 17 00:00:00 2001
From: Francisco Arceo <arceofrancisco@gmail.com>
Date: Thu, 13 Nov 2025 16:43:31 -0500
Subject: [PATCH 08/12] fix: Error out when creating vector store with unknown
 embedding model (#4154)

# What does this PR do?
Error out when creating vector store with unknown embedding model

Closes https://github.com/llamastack/llama-stack/issues/4047

## Test Plan
Added tests

Signed-off-by: Francisco Javier Arceo <farceo@redhat.com>
---
 src/llama_stack/core/routers/vector_io.py | 10 ++++++
 tests/unit/core/routers/test_vector_io.py | 37 ++++++++++++++++++++++-
 2 files changed, 46 insertions(+), 1 deletion(-)

diff --git a/src/llama_stack/core/routers/vector_io.py b/src/llama_stack/core/routers/vector_io.py
index bfd090e32..47412c07f 100644
--- a/src/llama_stack/core/routers/vector_io.py
+++ b/src/llama_stack/core/routers/vector_io.py
@@ -14,7 +14,9 @@ from llama_stack_api import (
     HealthResponse,
     HealthStatus,
     InterleavedContent,
+    ModelNotFoundError,
     ModelType,
+    ModelTypeError,
     OpenAICreateVectorStoreFileBatchRequestWithExtraBody,
     OpenAICreateVectorStoreRequestWithExtraBody,
     QueryChunksResponse,
@@ -124,6 +126,14 @@ class VectorIORouter(VectorIO):
         if embedding_model is not None and embedding_dimension is None:
             embedding_dimension = await self._get_embedding_model_dimension(embedding_model)
 
+        # Validate that embedding model exists and is of the correct type
+        if embedding_model is not None:
+            model = await self.routing_table.get_object_by_identifier("model", embedding_model)
+            if model is None:
+                raise ModelNotFoundError(embedding_model)
+            if model.model_type != ModelType.embedding:
+                raise ModelTypeError(embedding_model, model.model_type, ModelType.embedding)
+
         # Auto-select provider if not specified
         if provider_id is None:
             num_providers = len(self.routing_table.impls_by_provider_id)
diff --git a/tests/unit/core/routers/test_vector_io.py b/tests/unit/core/routers/test_vector_io.py
index 071fbe6e7..03bc1ff5f 100644
--- a/tests/unit/core/routers/test_vector_io.py
+++ b/tests/unit/core/routers/test_vector_io.py
@@ -7,7 +7,12 @@
 from unittest.mock import AsyncMock, Mock
 
 import pytest
-from llama_stack_api import OpenAICreateVectorStoreRequestWithExtraBody
+from llama_stack_api import (
+    ModelNotFoundError,
+    ModelType,
+    ModelTypeError,
+    OpenAICreateVectorStoreRequestWithExtraBody,
+)
 
 from llama_stack.core.routers.vector_io import VectorIORouter
 
@@ -21,6 +26,7 @@ async def test_single_provider_auto_selection():
             Mock(identifier="all-MiniLM-L6-v2", model_type="embedding", metadata={"embedding_dimension": 384})
         ]
     )
+    mock_routing_table.get_object_by_identifier = AsyncMock(return_value=Mock(model_type=ModelType.embedding))
     mock_routing_table.register_vector_store = AsyncMock(
         return_value=Mock(identifier="vs_123", provider_id="inline::faiss", provider_resource_id="vs_123")
     )
@@ -48,6 +54,7 @@ async def test_create_vector_stores_multiple_providers_missing_provider_id_error
             Mock(identifier="all-MiniLM-L6-v2", model_type="embedding", metadata={"embedding_dimension": 384})
         ]
     )
+    mock_routing_table.get_object_by_identifier = AsyncMock(return_value=Mock(model_type=ModelType.embedding))
     router = VectorIORouter(mock_routing_table)
     request = OpenAICreateVectorStoreRequestWithExtraBody.model_validate(
         {"name": "test_store", "embedding_model": "all-MiniLM-L6-v2"}
@@ -117,3 +124,31 @@ async def test_update_vector_store_same_provider_id_succeeds():
     provider.openai_update_vector_store.assert_called_once_with(
         vector_store_id="vs_123", name="updated_name", expires_after=None, metadata={"provider_id": "inline::faiss"}
     )
+
+
+async def test_create_vector_store_with_unknown_embedding_model_raises_error():
+    """Test that creating a vector store with an unknown embedding model raises ModelNotFoundError."""
+    mock_routing_table = Mock(impls_by_provider_id={"provider": "mock"})
+    mock_routing_table.get_object_by_identifier = AsyncMock(return_value=None)
+
+    router = VectorIORouter(mock_routing_table)
+    request = OpenAICreateVectorStoreRequestWithExtraBody.model_validate(
+        {"embedding_model": "unknown-model", "embedding_dimension": 384}
+    )
+
+    with pytest.raises(ModelNotFoundError, match="Model 'unknown-model' not found"):
+        await router.openai_create_vector_store(request)
+
+
+async def test_create_vector_store_with_wrong_model_type_raises_error():
+    """Test that creating a vector store with a non-embedding model raises ModelTypeError."""
+    mock_routing_table = Mock(impls_by_provider_id={"provider": "mock"})
+    mock_routing_table.get_object_by_identifier = AsyncMock(return_value=Mock(model_type=ModelType.llm))
+
+    router = VectorIORouter(mock_routing_table)
+    request = OpenAICreateVectorStoreRequestWithExtraBody.model_validate(
+        {"embedding_model": "text-model", "embedding_dimension": 384}
+    )
+
+    with pytest.raises(ModelTypeError, match="Model 'text-model' is of type"):
+        await router.openai_create_vector_store(request)

From ba744d791ad9cb6e7eccf2fd7128138c02cf7f58 Mon Sep 17 00:00:00 2001
From: slekkala1 <swapna942@meta.com>
Date: Thu, 13 Nov 2025 14:21:03 -0800
Subject: [PATCH 09/12] fix: failure in responses during construct metrics
 (#4157)

# What does this PR do?
Without this we get below in server logs
```
RuntimeError: OpenAI response failed: InferenceRouter._construct_metrics() got an unexpected keyword argument
         'model_id'
```
Seems the method signature got update but this callsite was not updated
## Test Plan
CI and test with Sabre (Agent framework integration)
---
 src/llama_stack/core/routers/inference.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/llama_stack/core/routers/inference.py b/src/llama_stack/core/routers/inference.py
index a538ab02e..292a7c4bb 100644
--- a/src/llama_stack/core/routers/inference.py
+++ b/src/llama_stack/core/routers/inference.py
@@ -417,7 +417,7 @@ class InferenceRouter(Inference):
                             prompt_tokens=chunk.usage.prompt_tokens,
                             completion_tokens=chunk.usage.completion_tokens,
                             total_tokens=chunk.usage.total_tokens,
-                            model_id=fully_qualified_model_id,
+                            fully_qualified_model_id=fully_qualified_model_id,
                             provider_id=provider_id,
                         )
                         for metric in metrics:

From a078f089d9070d5618d185fb9dfdbc53f5e3c34f Mon Sep 17 00:00:00 2001
From: Charlie Doern <cdoern@redhat.com>
Date: Thu, 13 Nov 2025 18:04:36 -0500
Subject: [PATCH 10/12] fix: rename llama_stack_api dir (#4155)

# What does this PR do?

the directory structure was src/llama-stack-api/llama_stack_api

instead it should just be src/llama_stack_api to match the other
packages.

update the structure and pyproject/linting config

---------

Signed-off-by: Charlie Doern <cdoern@redhat.com>
Co-authored-by: Ashwin Bharambe <ashwin.bharambe@gmail.com>
---
 .github/workflows/python-build-test.yml       |   4 +-
 .pre-commit-config.yaml                       |   2 +-
 pyproject.toml                                |  12 +-
 scripts/generate_prompt_format.py             |   2 +-
 src/llama_stack/cli/stack/_list_deps.py       |   2 +-
 src/llama_stack/cli/stack/utils.py            |   2 +-
 src/llama_stack/core/build.py                 |   2 +-
 src/llama_stack/core/client.py                |   3 +-
 src/llama_stack/core/configure.py             |   3 +-
 .../core/conversations/conversations.py       |  14 +-
 src/llama_stack/core/datatypes.py             |  18 +-
 src/llama_stack/core/distribution.py          |  10 +-
 src/llama_stack/core/external.py              |   2 +-
 src/llama_stack/core/inspect.py               |  10 +-
 src/llama_stack/core/library_client.py        |   1 +
 src/llama_stack/core/prompts/prompts.py       |   2 +-
 src/llama_stack/core/providers.py             |   2 +-
 src/llama_stack/core/resolver.py              |  27 +-
 src/llama_stack/core/routers/__init__.py      |   3 +-
 src/llama_stack/core/routers/datasets.py      |   3 +-
 src/llama_stack/core/routers/eval_scoring.py  |   3 +-
 src/llama_stack/core/routers/inference.py     |  20 +-
 src/llama_stack/core/routers/safety.py        |   3 +-
 src/llama_stack/core/routers/tool_runtime.py  |   3 +-
 src/llama_stack/core/routers/vector_io.py     |   6 +-
 .../core/routing_tables/benchmarks.py         |   3 +-
 src/llama_stack/core/routing_tables/common.py |   3 +-
 .../core/routing_tables/datasets.py           |   9 +-
 src/llama_stack/core/routing_tables/models.py |  15 +-
 .../core/routing_tables/scoring_functions.py  |   9 +-
 .../core/routing_tables/shields.py            |   3 +-
 .../core/routing_tables/toolgroups.py         |   5 +-
 .../core/routing_tables/vector_stores.py      |  10 +-
 src/llama_stack/core/server/auth_providers.py |   2 +-
 src/llama_stack/core/server/routes.py         |   2 +-
 src/llama_stack/core/server/server.py         |   2 +-
 src/llama_stack/core/stack.py                 |  44 +-
 src/llama_stack/core/telemetry/telemetry.py   |   2 +-
 src/llama_stack/distributions/dell/dell.py    |   3 +-
 .../meta-reference-gpu/meta_reference.py      |   3 +-
 .../open-benchmark/open_benchmark.py          |   3 +-
 .../distributions/starter/starter.py          |   3 +-
 src/llama_stack/distributions/template.py     |   2 +-
 .../inline/agents/meta_reference/agents.py    |   9 +-
 .../responses/openai_responses.py             |  14 +-
 .../meta_reference/responses/streaming.py     |   9 +-
 .../meta_reference/responses/tool_executor.py |   5 +-
 .../agents/meta_reference/responses/types.py  |   5 +-
 .../inline/agents/meta_reference/safety.py    |   3 +-
 .../inline/batches/reference/__init__.py      |   3 +-
 .../inline/batches/reference/batches.py       |  10 +-
 .../inline/datasetio/localfs/datasetio.py     |   3 +-
 .../inline/eval/meta_reference/eval.py        |   8 +-
 .../providers/inline/files/localfs/files.py   |  16 +-
 .../inline/inference/meta_reference/config.py |   2 +-
 .../inference/meta_reference/generators.py    |  16 +-
 .../inference/meta_reference/inference.py     |  38 +-
 .../sentence_transformers.py                  |   9 +-
 .../inline/post_training/common/validator.py  |   3 +-
 .../huggingface/post_training.py              |  11 +-
 .../recipes/finetune_single_device.py         |  16 +-
 .../recipes/finetune_single_device_dpo.py     |  14 +-
 .../inline/post_training/huggingface/utils.py |   3 +-
 .../post_training/torchtune/common/utils.py   |   2 +-
 .../post_training/torchtune/post_training.py  |  11 +-
 .../recipes/lora_finetuning_single_device.py  |  22 +-
 .../safety/code_scanner/code_scanner.py       |   9 +-
 .../inline/safety/llama_guard/llama_guard.py  |  15 +-
 .../safety/prompt_guard/prompt_guard.py       |  10 +-
 .../providers/inline/scoring/basic/scoring.py |  11 +-
 .../basic/scoring_fn/docvqa_scoring_fn.py     |   3 +-
 .../basic/scoring_fn/equality_scoring_fn.py   |   3 +-
 .../basic/scoring_fn/ifeval_scoring_fn.py     |   3 +-
 .../regex_parser_math_response_scoring_fn.py  |   3 +-
 .../scoring_fn/regex_parser_scoring_fn.py     |   3 +-
 .../basic/scoring_fn/subset_of_scoring_fn.py  |   3 +-
 .../inline/scoring/braintrust/braintrust.py   |  20 +-
 .../inline/scoring/llm_as_judge/scoring.py    |  11 +-
 .../scoring_fn/llm_as_judge_scoring_fn.py     |   3 +-
 .../tool_runtime/rag/context_retriever.py     |   8 +-
 .../inline/tool_runtime/rag/memory.py         |  10 +-
 .../inline/vector_io/chroma/config.py         |   2 +-
 .../inline/vector_io/faiss/config.py          |   2 +-
 .../providers/inline/vector_io/faiss/faiss.py |  14 +-
 .../inline/vector_io/milvus/config.py         |   2 +-
 .../inline/vector_io/qdrant/config.py         |   2 +-
 .../inline/vector_io/sqlite_vec/sqlite_vec.py |  20 +-
 src/llama_stack/providers/registry/agents.py  |   3 +-
 src/llama_stack/providers/registry/files.py   |   3 +-
 .../providers/registry/tool_runtime.py        |   3 +-
 .../datasetio/huggingface/huggingface.py      |   3 +-
 .../remote/datasetio/nvidia/datasetio.py      |   1 +
 .../providers/remote/eval/nvidia/eval.py      |   4 +-
 .../providers/remote/files/openai/files.py    |  12 +-
 .../providers/remote/files/s3/files.py        |  13 +-
 .../remote/inference/anthropic/config.py      |   2 +-
 .../remote/inference/azure/config.py          |   2 +-
 .../remote/inference/bedrock/bedrock.py       |  10 +-
 .../remote/inference/cerebras/cerebras.py     |   3 +-
 .../remote/inference/cerebras/config.py       |   2 +-
 .../remote/inference/databricks/config.py     |   2 +-
 .../remote/inference/databricks/databricks.py |   2 +-
 .../remote/inference/fireworks/config.py      |   2 +-
 .../remote/inference/gemini/config.py         |   2 +-
 .../remote/inference/gemini/gemini.py         |   3 +-
 .../providers/remote/inference/groq/config.py |   2 +-
 .../inference/llama_openai_compat/config.py   |   2 +-
 .../inference/llama_openai_compat/llama.py    |   7 +-
 .../remote/inference/nvidia/config.py         |   2 +-
 .../remote/inference/nvidia/nvidia.py         |   6 +-
 .../providers/remote/inference/oci/config.py  |   2 +-
 .../providers/remote/inference/oci/oci.py     |  10 +-
 .../remote/inference/ollama/ollama.py         |  10 +-
 .../remote/inference/openai/config.py         |   2 +-
 .../remote/inference/passthrough/config.py    |   2 +-
 .../inference/passthrough/passthrough.py      |   6 +-
 .../remote/inference/runpod/config.py         |   2 +-
 .../remote/inference/runpod/runpod.py         |   3 +-
 .../remote/inference/sambanova/config.py      |   2 +-
 .../providers/remote/inference/tgi/config.py  |   2 +-
 .../providers/remote/inference/tgi/tgi.py     |   8 +-
 .../remote/inference/together/config.py       |   2 +-
 .../remote/inference/together/together.py     |  12 +-
 .../remote/inference/vertexai/config.py       |   2 +-
 .../providers/remote/inference/vllm/config.py |   2 +-
 .../providers/remote/inference/vllm/vllm.py   |   8 +-
 .../remote/inference/watsonx/config.py        |   2 +-
 .../remote/inference/watsonx/watsonx.py       |  15 +-
 .../post_training/nvidia/post_training.py     |  10 +-
 .../remote/post_training/nvidia/utils.py      |   2 +-
 .../remote/safety/bedrock/bedrock.py          |   5 +-
 .../providers/remote/safety/bedrock/config.py |   3 +-
 .../providers/remote/safety/nvidia/config.py  |   3 +-
 .../providers/remote/safety/nvidia/nvidia.py  |   4 +-
 .../remote/safety/sambanova/config.py         |   3 +-
 .../remote/safety/sambanova/sambanova.py      |   6 +-
 .../tool_runtime/bing_search/bing_search.py   |   4 +-
 .../tool_runtime/brave_search/brave_search.py |   6 +-
 .../model_context_protocol.py                 |   7 +-
 .../tavily_search/tavily_search.py            |   4 +-
 .../wolfram_alpha/wolfram_alpha.py            |   4 +-
 .../remote/vector_io/chroma/chroma.py         |  16 +-
 .../remote/vector_io/chroma/config.py         |   2 +-
 .../remote/vector_io/milvus/config.py         |   2 +-
 .../remote/vector_io/milvus/milvus.py         |  22 +-
 .../remote/vector_io/pgvector/config.py       |   2 +-
 .../remote/vector_io/pgvector/pgvector.py     |  22 +-
 .../remote/vector_io/qdrant/config.py         |   2 +-
 .../remote/vector_io/qdrant/qdrant.py         |  18 +-
 .../remote/vector_io/weaviate/config.py       |   2 +-
 .../remote/vector_io/weaviate/weaviate.py     |  22 +-
 .../utils/common/data_schema_validator.py     |   3 +-
 .../providers/utils/files/form_data.py        |   3 +-
 .../utils/inference/inference_store.py        |  10 +-
 .../utils/inference/litellm_openai_mixin.py   |  14 +-
 .../utils/inference/model_registry.py         |   2 +-
 .../utils/inference/openai_compat.py          |  24 +-
 .../providers/utils/inference/openai_mixin.py |  16 +-
 .../utils/inference/prompt_adapter.py         |  36 +-
 .../providers/utils/kvstore/sqlite/config.py  |   3 +-
 .../utils/memory/openai_vector_store_mixin.py |  20 +-
 .../providers/utils/memory/vector_store.py    |  18 +-
 .../utils/responses/responses_store.py        |   7 +-
 .../utils/scoring/base_scoring_fn.py          |   3 +-
 .../providers/utils/sqlstore/api.py           |   3 +-
 .../utils/sqlstore/sqlalchemy_sqlstore.py     |   2 +-
 src/llama_stack/providers/utils/tools/mcp.py  |  16 +-
 .../README.md                                 |   2 +-
 .../llama_stack_api/__init__.py               |   2 +-
 .../llama_stack_api/agents.py                 |   0
 .../llama_stack_api/batches.py                |   0
 .../llama_stack_api/benchmarks.py             |   0
 .../llama_stack_api/common/__init__.py        |   0
 .../llama_stack_api/common/content_types.py   |   0
 .../llama_stack_api/common/errors.py          |   0
 .../llama_stack_api/common/job_types.py       |   0
 .../llama_stack_api/common/responses.py       |   0
 .../llama_stack_api/common/tracing.py         |   0
 .../llama_stack_api/common/training_types.py  |   0
 .../llama_stack_api/common/type_system.py     |   0
 .../llama_stack_api/conversations.py          |   0
 .../llama_stack_api/datasetio.py              |   0
 .../llama_stack_api/datasets.py               |   0
 .../llama_stack_api/datatypes.py              |   0
 .../llama_stack_api/eval.py                   |   0
 .../llama_stack_api/files.py                  |   0
 .../llama_stack_api/inference.py              |   0
 .../llama_stack_api/inspect.py                |   0
 .../llama_stack_api/models.py                 |   0
 .../llama_stack_api/openai_responses.py       |   0
 .../llama_stack_api/post_training.py          |   0
 .../llama_stack_api/prompts.py                |   0
 .../llama_stack_api/providers.py              |   0
 .../llama_stack_api/py.typed                  |   0
 .../pyproject.toml                            |   2 +-
 .../llama_stack_api/rag_tool.py               |   0
 .../llama_stack_api/resource.py               |   0
 .../llama_stack_api/safety.py                 |   0
 .../llama_stack_api/schema_utils.py           |   0
 .../llama_stack_api/scoring.py                |   0
 .../llama_stack_api/scoring_functions.py      |   0
 .../llama_stack_api/shields.py                |   0
 .../llama_stack_api/strong_typing/__init__.py |   0
 .../strong_typing/auxiliary.py                |   0
 .../llama_stack_api/strong_typing/classdef.py |   0
 .../llama_stack_api/strong_typing/core.py     |   0
 .../strong_typing/deserializer.py             |   0
 .../strong_typing/docstring.py                |   0
 .../strong_typing/exception.py                |   0
 .../strong_typing/inspection.py               |   0
 .../llama_stack_api/strong_typing/mapping.py  |   0
 .../llama_stack_api/strong_typing/name.py     |   0
 .../llama_stack_api/strong_typing/py.typed    |   0
 .../llama_stack_api/strong_typing/schema.py   |   0
 .../strong_typing/serialization.py            |   0
 .../strong_typing/serializer.py               |   0
 .../llama_stack_api/strong_typing/slots.py    |   0
 .../strong_typing/topological.py              |   0
 .../llama_stack_api/tools.py                  |   0
 src/llama_stack_api/uv.lock                   | 498 ++++++++++++++++++
 .../llama_stack_api/vector_io.py              |   0
 .../llama_stack_api/vector_stores.py          |   0
 .../llama_stack_api/version.py                |   0
 tests/integration/batches/conftest.py         |   1 +
 tests/integration/files/test_files.py         |   2 +-
 .../inference/test_provider_data_routing.py   |   6 +-
 .../post_training/test_post_training.py       |   4 +-
 tests/integration/safety/test_llama_guard.py  |   2 +-
 tests/integration/safety/test_safety.py       |   1 +
 .../integration/safety/test_vision_safety.py  |   1 +
 .../tool_runtime/test_registration.py         |   2 +-
 .../vector_io/test_openai_vector_stores.py    |   2 +-
 tests/integration/vector_io/test_vector_io.py |   1 +
 .../unit/conversations/test_conversations.py  |   2 +-
 tests/unit/core/routers/test_safety_router.py |   3 +-
 tests/unit/core/routers/test_vector_io.py     |   7 +-
 tests/unit/core/test_stack_validation.py      |   2 +-
 .../routers/test_routing_tables.py            |  16 +-
 .../unit/distribution/test_api_recordings.py  |  16 +-
 tests/unit/distribution/test_distribution.py  |  20 +-
 tests/unit/files/test_files.py                |   2 +-
 .../unit/providers/batches/test_reference.py  |   1 +
 .../batches/test_reference_idempotency.py     |   1 +
 tests/unit/providers/files/test_s3_files.py   |   1 +
 .../providers/files/test_s3_files_auth.py     |   2 +-
 .../inference/test_bedrock_adapter.py         |   2 +-
 .../providers/inference/test_remote_vllm.py   |  10 +-
 .../responses/test_streaming.py               |   2 +-
 tests/unit/providers/nvidia/test_datastore.py |   2 +-
 tests/unit/providers/nvidia/test_eval.py      |   8 +-
 .../unit/providers/nvidia/test_parameters.py  |  12 +-
 .../providers/nvidia/test_rerank_inference.py |   2 +-
 tests/unit/providers/nvidia/test_safety.py    |   6 +-
 .../nvidia/test_supervised_fine_tuning.py     |  18 +-
 tests/unit/providers/test_bedrock.py          |   3 +-
 .../utils/inference/test_openai_mixin.py      |   2 +-
 .../utils/inference/test_prompt_adapter.py    |   3 +-
 .../utils/memory/test_vector_store.py         |   2 +-
 .../providers/utils/test_model_registry.py    |   2 +-
 tests/unit/providers/vector_io/conftest.py    |   2 +-
 tests/unit/providers/vector_io/test_faiss.py  |   2 +-
 .../providers/vector_io/test_sqlite_vec.py    |   2 +-
 .../test_vector_io_openai_vector_stores.py    |   7 +-
 .../providers/vector_io/test_vector_utils.py  |   3 +-
 tests/unit/rag/test_rag_query.py              |   2 +-
 tests/unit/rag/test_vector_store.py           |   2 +-
 tests/unit/registry/test_registry.py          |   5 +-
 tests/unit/registry/test_registry_acl.py      |   3 +-
 tests/unit/server/test_access_control.py      |   2 +-
 tests/unit/server/test_resolver.py            |   2 +-
 tests/unit/server/test_sse.py                 |   2 +-
 tests/unit/tools/test_tools_json_schema.py    |   2 +-
 .../utils/inference/test_inference_store.py   |   8 +-
 .../utils/responses/test_responses_store.py   |   2 +-
 uv.lock                                       |   8 +-
 275 files changed, 1187 insertions(+), 745 deletions(-)
 rename src/{llama-stack-api => llama_stack_api}/README.md (98%)
 rename src/{llama-stack-api => }/llama_stack_api/__init__.py (99%)
 rename src/{llama-stack-api => }/llama_stack_api/agents.py (100%)
 rename src/{llama-stack-api => }/llama_stack_api/batches.py (100%)
 rename src/{llama-stack-api => }/llama_stack_api/benchmarks.py (100%)
 rename src/{llama-stack-api => }/llama_stack_api/common/__init__.py (100%)
 rename src/{llama-stack-api => }/llama_stack_api/common/content_types.py (100%)
 rename src/{llama-stack-api => }/llama_stack_api/common/errors.py (100%)
 rename src/{llama-stack-api => }/llama_stack_api/common/job_types.py (100%)
 rename src/{llama-stack-api => }/llama_stack_api/common/responses.py (100%)
 rename src/{llama-stack-api => }/llama_stack_api/common/tracing.py (100%)
 rename src/{llama-stack-api => }/llama_stack_api/common/training_types.py (100%)
 rename src/{llama-stack-api => }/llama_stack_api/common/type_system.py (100%)
 rename src/{llama-stack-api => }/llama_stack_api/conversations.py (100%)
 rename src/{llama-stack-api => }/llama_stack_api/datasetio.py (100%)
 rename src/{llama-stack-api => }/llama_stack_api/datasets.py (100%)
 rename src/{llama-stack-api => }/llama_stack_api/datatypes.py (100%)
 rename src/{llama-stack-api => }/llama_stack_api/eval.py (100%)
 rename src/{llama-stack-api => }/llama_stack_api/files.py (100%)
 rename src/{llama-stack-api => }/llama_stack_api/inference.py (100%)
 rename src/{llama-stack-api => }/llama_stack_api/inspect.py (100%)
 rename src/{llama-stack-api => }/llama_stack_api/models.py (100%)
 rename src/{llama-stack-api => }/llama_stack_api/openai_responses.py (100%)
 rename src/{llama-stack-api => }/llama_stack_api/post_training.py (100%)
 rename src/{llama-stack-api => }/llama_stack_api/prompts.py (100%)
 rename src/{llama-stack-api => }/llama_stack_api/providers.py (100%)
 rename src/{llama-stack-api => }/llama_stack_api/py.typed (100%)
 rename src/{llama-stack-api => llama_stack_api}/pyproject.toml (99%)
 rename src/{llama-stack-api => }/llama_stack_api/rag_tool.py (100%)
 rename src/{llama-stack-api => }/llama_stack_api/resource.py (100%)
 rename src/{llama-stack-api => }/llama_stack_api/safety.py (100%)
 rename src/{llama-stack-api => }/llama_stack_api/schema_utils.py (100%)
 rename src/{llama-stack-api => }/llama_stack_api/scoring.py (100%)
 rename src/{llama-stack-api => }/llama_stack_api/scoring_functions.py (100%)
 rename src/{llama-stack-api => }/llama_stack_api/shields.py (100%)
 rename src/{llama-stack-api => }/llama_stack_api/strong_typing/__init__.py (100%)
 rename src/{llama-stack-api => }/llama_stack_api/strong_typing/auxiliary.py (100%)
 rename src/{llama-stack-api => }/llama_stack_api/strong_typing/classdef.py (100%)
 rename src/{llama-stack-api => }/llama_stack_api/strong_typing/core.py (100%)
 rename src/{llama-stack-api => }/llama_stack_api/strong_typing/deserializer.py (100%)
 rename src/{llama-stack-api => }/llama_stack_api/strong_typing/docstring.py (100%)
 rename src/{llama-stack-api => }/llama_stack_api/strong_typing/exception.py (100%)
 rename src/{llama-stack-api => }/llama_stack_api/strong_typing/inspection.py (100%)
 rename src/{llama-stack-api => }/llama_stack_api/strong_typing/mapping.py (100%)
 rename src/{llama-stack-api => }/llama_stack_api/strong_typing/name.py (100%)
 rename src/{llama-stack-api => }/llama_stack_api/strong_typing/py.typed (100%)
 rename src/{llama-stack-api => }/llama_stack_api/strong_typing/schema.py (100%)
 rename src/{llama-stack-api => }/llama_stack_api/strong_typing/serialization.py (100%)
 rename src/{llama-stack-api => }/llama_stack_api/strong_typing/serializer.py (100%)
 rename src/{llama-stack-api => }/llama_stack_api/strong_typing/slots.py (100%)
 rename src/{llama-stack-api => }/llama_stack_api/strong_typing/topological.py (100%)
 rename src/{llama-stack-api => }/llama_stack_api/tools.py (100%)
 create mode 100644 src/llama_stack_api/uv.lock
 rename src/{llama-stack-api => }/llama_stack_api/vector_io.py (100%)
 rename src/{llama-stack-api => }/llama_stack_api/vector_stores.py (100%)
 rename src/{llama-stack-api => }/llama_stack_api/version.py (100%)

diff --git a/.github/workflows/python-build-test.yml b/.github/workflows/python-build-test.yml
index b0f2c6e69..b58f4eb69 100644
--- a/.github/workflows/python-build-test.yml
+++ b/.github/workflows/python-build-test.yml
@@ -31,7 +31,7 @@ jobs:
         version: 0.7.6
 
     - name: Build Llama Stack API package
-      working-directory: src/llama-stack-api
+      working-directory: src/llama_stack_api
       run: uv build
 
     - name: Build Llama Stack package
@@ -39,7 +39,7 @@ jobs:
 
     - name: Install Llama Stack package (with api stubs from local build)
       run: |
-        uv pip install --find-links src/llama-stack-api/dist dist/*.whl
+        uv pip install --find-links src/llama_stack_api/dist dist/*.whl
 
     - name: Verify Llama Stack package
       run: |
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 6f4dd6a0e..c60440173 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -42,7 +42,7 @@ repos:
     hooks:
     -   id: ruff
         args: [ --fix ]
-        exclude: ^(src/llama_stack/strong_typing/.*|src/llama-stack-api/llama_stack_api/strong_typing/.*)$
+        exclude: ^(src/llama_stack_api/strong_typing/.*)$
     -   id: ruff-format
 
 -   repo: https://github.com/adamchainz/blacken-docs
diff --git a/pyproject.toml b/pyproject.toml
index d287b4be7..34728d6ea 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -181,7 +181,7 @@ install-wheel-from-presigned = "llama_stack.cli.scripts.run:install_wheel_from_p
 
 [tool.setuptools.packages.find]
 where = ["src"]
-include = ["llama_stack", "llama_stack.*", "llama-stack-api", "llama-stack-api.*"]
+include = ["llama_stack", "llama_stack.*", "llama_stack_api", "llama_stack_api.*"]
 
 [[tool.uv.index]]
 name = "pytorch-cpu"
@@ -191,7 +191,7 @@ explicit = true
 [tool.uv.sources]
 torch = [{ index = "pytorch-cpu" }]
 torchvision = [{ index = "pytorch-cpu" }]
-llama-stack-api = [{ path = "src/llama-stack-api", editable = true }]
+llama-stack-api = [{ path = "src/llama_stack_api", editable = true }]
 
 [tool.ruff]
 line-length = 120
@@ -258,7 +258,7 @@ unfixable = [
 ] # Using import * is acceptable (or at least tolerated) in an __init__.py of a package API
 
 [tool.mypy]
-mypy_path = ["src", "src/llama-stack-api"]
+mypy_path = ["src"]
 packages = ["llama_stack", "llama_stack_api"]
 plugins = ['pydantic.mypy']
 disable_error_code = []
@@ -281,14 +281,12 @@ exclude = [
     "^src/llama_stack/core/store/registry\\.py$",
     "^src/llama_stack/core/utils/exec\\.py$",
     "^src/llama_stack/core/utils/prompt_for_config\\.py$",
-    # Moved to llama-stack-api but still excluded
     "^src/llama_stack/models/llama/llama3/interface\\.py$",
     "^src/llama_stack/models/llama/llama3/tokenizer\\.py$",
     "^src/llama_stack/models/llama/llama3/tool_utils\\.py$",
     "^src/llama_stack/models/llama/llama3/generation\\.py$",
     "^src/llama_stack/models/llama/llama3/multimodal/model\\.py$",
     "^src/llama_stack/models/llama/llama4/",
-    "^src/llama-stack-api/llama_stack_api/core/telemetry/telemetry\\.py$",
     "^src/llama_stack/providers/inline/agents/meta_reference/",
     "^src/llama_stack/providers/inline/datasetio/localfs/",
     "^src/llama_stack/providers/inline/eval/meta_reference/eval\\.py$",
@@ -342,9 +340,7 @@ exclude = [
     "^src/llama_stack/providers/utils/telemetry/dataset_mixin\\.py$",
     "^src/llama_stack/providers/utils/telemetry/trace_protocol\\.py$",
     "^src/llama_stack/providers/utils/telemetry/tracing\\.py$",
-    "^src/llama-stack-api/llama_stack_api/core/telemetry/trace_protocol\\.py$",
-    "^src/llama-stack-api/llama_stack_api/core/telemetry/tracing\\.py$",
-    "^src/llama-stack-api/llama_stack_api/strong_typing/auxiliary\\.py$",
+    "^src/llama_stack_api/strong_typing/auxiliary\\.py$",
     "^src/llama_stack/distributions/template\\.py$",
 ]
 
diff --git a/scripts/generate_prompt_format.py b/scripts/generate_prompt_format.py
index 8099a3f0d..381bbc6a7 100755
--- a/scripts/generate_prompt_format.py
+++ b/scripts/generate_prompt_format.py
@@ -14,11 +14,11 @@ import os
 from pathlib import Path
 
 import fire
-from llama_stack_api import ModelNotFoundError
 
 from llama_stack.models.llama.llama3.generation import Llama3
 from llama_stack.models.llama.llama4.generation import Llama4
 from llama_stack.models.llama.sku_list import resolve_model
+from llama_stack_api import ModelNotFoundError
 
 THIS_DIR = Path(__file__).parent.resolve()
 
diff --git a/src/llama_stack/cli/stack/_list_deps.py b/src/llama_stack/cli/stack/_list_deps.py
index 50fe394fc..82bef1a4f 100644
--- a/src/llama_stack/cli/stack/_list_deps.py
+++ b/src/llama_stack/cli/stack/_list_deps.py
@@ -9,7 +9,6 @@ import sys
 from pathlib import Path
 
 import yaml
-from llama_stack_api import Api
 from termcolor import cprint
 
 from llama_stack.cli.stack.utils import ImageType
@@ -22,6 +21,7 @@ from llama_stack.core.datatypes import (
 from llama_stack.core.distribution import get_provider_registry
 from llama_stack.core.stack import replace_env_vars
 from llama_stack.log import get_logger
+from llama_stack_api import Api
 
 TEMPLATES_PATH = Path(__file__).parent.parent.parent / "templates"
 
diff --git a/src/llama_stack/cli/stack/utils.py b/src/llama_stack/cli/stack/utils.py
index 0a4e22b09..d49b142e0 100644
--- a/src/llama_stack/cli/stack/utils.py
+++ b/src/llama_stack/cli/stack/utils.py
@@ -11,7 +11,6 @@ from functools import lru_cache
 from pathlib import Path
 
 import yaml
-from llama_stack_api import Api
 from termcolor import cprint
 
 from llama_stack.core.datatypes import (
@@ -33,6 +32,7 @@ from llama_stack.core.storage.datatypes import (
 from llama_stack.core.utils.config_dirs import DISTRIBS_BASE_DIR, EXTERNAL_PROVIDERS_DIR
 from llama_stack.core.utils.dynamic import instantiate_class_type
 from llama_stack.core.utils.image_types import LlamaStackImageType
+from llama_stack_api import Api
 
 TEMPLATES_PATH = Path(__file__).parent.parent.parent / "distributions"
 
diff --git a/src/llama_stack/core/build.py b/src/llama_stack/core/build.py
index 27ded7ede..630b2a47f 100644
--- a/src/llama_stack/core/build.py
+++ b/src/llama_stack/core/build.py
@@ -6,7 +6,6 @@
 
 import sys
 
-from llama_stack_api import Api
 from pydantic import BaseModel
 from termcolor import cprint
 
@@ -14,6 +13,7 @@ from llama_stack.core.datatypes import BuildConfig
 from llama_stack.core.distribution import get_provider_registry
 from llama_stack.distributions.template import DistributionTemplate
 from llama_stack.log import get_logger
+from llama_stack_api import Api
 
 log = get_logger(name=__name__, category="core")
 
diff --git a/src/llama_stack/core/client.py b/src/llama_stack/core/client.py
index 41acacdb5..ba935a35e 100644
--- a/src/llama_stack/core/client.py
+++ b/src/llama_stack/core/client.py
@@ -12,10 +12,11 @@ from enum import Enum
 from typing import Any, Union, get_args, get_origin
 
 import httpx
-from llama_stack_api import RemoteProviderConfig
 from pydantic import BaseModel, parse_obj_as
 from termcolor import cprint
 
+from llama_stack_api import RemoteProviderConfig
+
 _CLIENT_CLASSES = {}
 
 
diff --git a/src/llama_stack/core/configure.py b/src/llama_stack/core/configure.py
index bdb3b9734..d738b8a61 100644
--- a/src/llama_stack/core/configure.py
+++ b/src/llama_stack/core/configure.py
@@ -6,8 +6,6 @@
 import textwrap
 from typing import Any
 
-from llama_stack_api import Api, ProviderSpec
-
 from llama_stack.core.datatypes import (
     LLAMA_STACK_RUN_CONFIG_VERSION,
     DistributionSpec,
@@ -22,6 +20,7 @@ from llama_stack.core.stack import cast_image_name_to_string, replace_env_vars
 from llama_stack.core.utils.dynamic import instantiate_class_type
 from llama_stack.core.utils.prompt_for_config import prompt_for_config
 from llama_stack.log import get_logger
+from llama_stack_api import Api, ProviderSpec
 
 logger = get_logger(name=__name__, category="core")
 
diff --git a/src/llama_stack/core/conversations/conversations.py b/src/llama_stack/core/conversations/conversations.py
index b94cd4fdd..4cf5a82ee 100644
--- a/src/llama_stack/core/conversations/conversations.py
+++ b/src/llama_stack/core/conversations/conversations.py
@@ -8,6 +8,13 @@ import secrets
 import time
 from typing import Any, Literal
 
+from pydantic import BaseModel, TypeAdapter
+
+from llama_stack.core.datatypes import AccessRule, StackRunConfig
+from llama_stack.log import get_logger
+from llama_stack.providers.utils.sqlstore.api import ColumnDefinition, ColumnType
+from llama_stack.providers.utils.sqlstore.authorized_sqlstore import AuthorizedSqlStore
+from llama_stack.providers.utils.sqlstore.sqlstore import sqlstore_impl
 from llama_stack_api import (
     Conversation,
     ConversationDeletedResource,
@@ -18,13 +25,6 @@ from llama_stack_api import (
     Conversations,
     Metadata,
 )
-from pydantic import BaseModel, TypeAdapter
-
-from llama_stack.core.datatypes import AccessRule, StackRunConfig
-from llama_stack.log import get_logger
-from llama_stack.providers.utils.sqlstore.api import ColumnDefinition, ColumnType
-from llama_stack.providers.utils.sqlstore.authorized_sqlstore import AuthorizedSqlStore
-from llama_stack.providers.utils.sqlstore.sqlstore import sqlstore_impl
 
 logger = get_logger(name=__name__, category="openai_conversations")
 
diff --git a/src/llama_stack/core/datatypes.py b/src/llama_stack/core/datatypes.py
index 4231363b6..1e29690ff 100644
--- a/src/llama_stack/core/datatypes.py
+++ b/src/llama_stack/core/datatypes.py
@@ -9,6 +9,15 @@ from pathlib import Path
 from typing import Annotated, Any, Literal, Self
 from urllib.parse import urlparse
 
+from pydantic import BaseModel, Field, field_validator, model_validator
+
+from llama_stack.core.access_control.datatypes import AccessRule
+from llama_stack.core.storage.datatypes import (
+    KVStoreReference,
+    StorageBackendType,
+    StorageConfig,
+)
+from llama_stack.log import LoggingConfig
 from llama_stack_api import (
     Api,
     Benchmark,
@@ -35,15 +44,6 @@ from llama_stack_api import (
     VectorStore,
     VectorStoreInput,
 )
-from pydantic import BaseModel, Field, field_validator, model_validator
-
-from llama_stack.core.access_control.datatypes import AccessRule
-from llama_stack.core.storage.datatypes import (
-    KVStoreReference,
-    StorageBackendType,
-    StorageConfig,
-)
-from llama_stack.log import LoggingConfig
 
 LLAMA_STACK_BUILD_CONFIG_VERSION = 2
 LLAMA_STACK_RUN_CONFIG_VERSION = 2
diff --git a/src/llama_stack/core/distribution.py b/src/llama_stack/core/distribution.py
index 162f9f2b0..658c75ef2 100644
--- a/src/llama_stack/core/distribution.py
+++ b/src/llama_stack/core/distribution.py
@@ -10,17 +10,17 @@ import os
 from typing import Any
 
 import yaml
+from pydantic import BaseModel
+
+from llama_stack.core.datatypes import BuildConfig, DistributionSpec
+from llama_stack.core.external import load_external_apis
+from llama_stack.log import get_logger
 from llama_stack_api import (
     Api,
     InlineProviderSpec,
     ProviderSpec,
     RemoteProviderSpec,
 )
-from pydantic import BaseModel
-
-from llama_stack.core.datatypes import BuildConfig, DistributionSpec
-from llama_stack.core.external import load_external_apis
-from llama_stack.log import get_logger
 
 logger = get_logger(name=__name__, category="core")
 
diff --git a/src/llama_stack/core/external.py b/src/llama_stack/core/external.py
index ce0c7eb72..d1a2d6e42 100644
--- a/src/llama_stack/core/external.py
+++ b/src/llama_stack/core/external.py
@@ -6,10 +6,10 @@
 
 
 import yaml
-from llama_stack_api import Api, ExternalApiSpec
 
 from llama_stack.core.datatypes import BuildConfig, StackRunConfig
 from llama_stack.log import get_logger
+from llama_stack_api import Api, ExternalApiSpec
 
 logger = get_logger(name=__name__, category="core")
 
diff --git a/src/llama_stack/core/inspect.py b/src/llama_stack/core/inspect.py
index 53ddd3475..272c9d1bc 100644
--- a/src/llama_stack/core/inspect.py
+++ b/src/llama_stack/core/inspect.py
@@ -6,6 +6,11 @@
 
 from importlib.metadata import version
 
+from pydantic import BaseModel
+
+from llama_stack.core.datatypes import StackRunConfig
+from llama_stack.core.external import load_external_apis
+from llama_stack.core.server.routes import get_all_api_routes
 from llama_stack_api import (
     HealthInfo,
     HealthStatus,
@@ -14,11 +19,6 @@ from llama_stack_api import (
     RouteInfo,
     VersionInfo,
 )
-from pydantic import BaseModel
-
-from llama_stack.core.datatypes import StackRunConfig
-from llama_stack.core.external import load_external_apis
-from llama_stack.core.server.routes import get_all_api_routes
 
 
 class DistributionInspectConfig(BaseModel):
diff --git a/src/llama_stack/core/library_client.py b/src/llama_stack/core/library_client.py
index 959284720..2a224d915 100644
--- a/src/llama_stack/core/library_client.py
+++ b/src/llama_stack/core/library_client.py
@@ -18,6 +18,7 @@ from typing import Any, TypeVar, Union, get_args, get_origin
 import httpx
 import yaml
 from fastapi import Response as FastAPIResponse
+
 from llama_stack_api import is_unwrapped_body_param
 
 try:
diff --git a/src/llama_stack/core/prompts/prompts.py b/src/llama_stack/core/prompts/prompts.py
index d9532b978..9f532c1cd 100644
--- a/src/llama_stack/core/prompts/prompts.py
+++ b/src/llama_stack/core/prompts/prompts.py
@@ -7,11 +7,11 @@
 import json
 from typing import Any
 
-from llama_stack_api import ListPromptsResponse, Prompt, Prompts
 from pydantic import BaseModel
 
 from llama_stack.core.datatypes import StackRunConfig
 from llama_stack.providers.utils.kvstore import KVStore, kvstore_impl
+from llama_stack_api import ListPromptsResponse, Prompt, Prompts
 
 
 class PromptServiceConfig(BaseModel):
diff --git a/src/llama_stack/core/providers.py b/src/llama_stack/core/providers.py
index 7337d9e35..e3fe3c7b3 100644
--- a/src/llama_stack/core/providers.py
+++ b/src/llama_stack/core/providers.py
@@ -7,10 +7,10 @@
 import asyncio
 from typing import Any
 
-from llama_stack_api import HealthResponse, HealthStatus, ListProvidersResponse, ProviderInfo, Providers
 from pydantic import BaseModel
 
 from llama_stack.log import get_logger
+from llama_stack_api import HealthResponse, HealthStatus, ListProvidersResponse, ProviderInfo, Providers
 
 from .datatypes import StackRunConfig
 from .utils.config import redact_sensitive_fields
diff --git a/src/llama_stack/core/resolver.py b/src/llama_stack/core/resolver.py
index ca154fbc6..6bc32c2d0 100644
--- a/src/llama_stack/core/resolver.py
+++ b/src/llama_stack/core/resolver.py
@@ -8,6 +8,19 @@ import importlib.metadata
 import inspect
 from typing import Any
 
+from llama_stack.core.client import get_client_impl
+from llama_stack.core.datatypes import (
+    AccessRule,
+    AutoRoutedProviderSpec,
+    Provider,
+    RoutingTableProviderSpec,
+    StackRunConfig,
+)
+from llama_stack.core.distribution import builtin_automatically_routed_apis
+from llama_stack.core.external import load_external_apis
+from llama_stack.core.store import DistributionRegistry
+from llama_stack.core.utils.dynamic import instantiate_class_type
+from llama_stack.log import get_logger
 from llama_stack_api import (
     LLAMA_STACK_API_V1ALPHA,
     Agents,
@@ -48,20 +61,6 @@ from llama_stack_api import (
     Providers as ProvidersAPI,
 )
 
-from llama_stack.core.client import get_client_impl
-from llama_stack.core.datatypes import (
-    AccessRule,
-    AutoRoutedProviderSpec,
-    Provider,
-    RoutingTableProviderSpec,
-    StackRunConfig,
-)
-from llama_stack.core.distribution import builtin_automatically_routed_apis
-from llama_stack.core.external import load_external_apis
-from llama_stack.core.store import DistributionRegistry
-from llama_stack.core.utils.dynamic import instantiate_class_type
-from llama_stack.log import get_logger
-
 logger = get_logger(name=__name__, category="core")
 
 
diff --git a/src/llama_stack/core/routers/__init__.py b/src/llama_stack/core/routers/__init__.py
index c2d051422..289755bcb 100644
--- a/src/llama_stack/core/routers/__init__.py
+++ b/src/llama_stack/core/routers/__init__.py
@@ -6,8 +6,6 @@
 
 from typing import Any
 
-from llama_stack_api import Api, RoutingTable
-
 from llama_stack.core.datatypes import (
     AccessRule,
     RoutedProtocol,
@@ -15,6 +13,7 @@ from llama_stack.core.datatypes import (
 from llama_stack.core.stack import StackRunConfig
 from llama_stack.core.store import DistributionRegistry
 from llama_stack.providers.utils.inference.inference_store import InferenceStore
+from llama_stack_api import Api, RoutingTable
 
 
 async def get_routing_table_impl(
diff --git a/src/llama_stack/core/routers/datasets.py b/src/llama_stack/core/routers/datasets.py
index dcf247874..b6a5f3b96 100644
--- a/src/llama_stack/core/routers/datasets.py
+++ b/src/llama_stack/core/routers/datasets.py
@@ -6,9 +6,8 @@
 
 from typing import Any
 
-from llama_stack_api import DatasetIO, DatasetPurpose, DataSource, PaginatedResponse, RoutingTable
-
 from llama_stack.log import get_logger
+from llama_stack_api import DatasetIO, DatasetPurpose, DataSource, PaginatedResponse, RoutingTable
 
 logger = get_logger(name=__name__, category="core::routers")
 
diff --git a/src/llama_stack/core/routers/eval_scoring.py b/src/llama_stack/core/routers/eval_scoring.py
index cbbbf5cc5..4d7269180 100644
--- a/src/llama_stack/core/routers/eval_scoring.py
+++ b/src/llama_stack/core/routers/eval_scoring.py
@@ -6,6 +6,7 @@
 
 from typing import Any
 
+from llama_stack.log import get_logger
 from llama_stack_api import (
     BenchmarkConfig,
     Eval,
@@ -18,8 +19,6 @@ from llama_stack_api import (
     ScoringFnParams,
 )
 
-from llama_stack.log import get_logger
-
 logger = get_logger(name=__name__, category="core::routers")
 
 
diff --git a/src/llama_stack/core/routers/inference.py b/src/llama_stack/core/routers/inference.py
index 292a7c4bb..719624e86 100644
--- a/src/llama_stack/core/routers/inference.py
+++ b/src/llama_stack/core/routers/inference.py
@@ -11,6 +11,16 @@ from datetime import UTC, datetime
 from typing import Annotated, Any
 
 from fastapi import Body
+from openai.types.chat import ChatCompletionToolChoiceOptionParam as OpenAIChatCompletionToolChoiceOptionParam
+from openai.types.chat import ChatCompletionToolParam as OpenAIChatCompletionToolParam
+from pydantic import TypeAdapter
+
+from llama_stack.core.telemetry.telemetry import MetricEvent
+from llama_stack.core.telemetry.tracing import enqueue_event, get_current_span
+from llama_stack.log import get_logger
+from llama_stack.models.llama.llama3.chat_format import ChatFormat
+from llama_stack.models.llama.llama3.tokenizer import Tokenizer
+from llama_stack.providers.utils.inference.inference_store import InferenceStore
 from llama_stack_api import (
     HealthResponse,
     HealthStatus,
@@ -39,16 +49,6 @@ from llama_stack_api import (
     RerankResponse,
     RoutingTable,
 )
-from openai.types.chat import ChatCompletionToolChoiceOptionParam as OpenAIChatCompletionToolChoiceOptionParam
-from openai.types.chat import ChatCompletionToolParam as OpenAIChatCompletionToolParam
-from pydantic import TypeAdapter
-
-from llama_stack.core.telemetry.telemetry import MetricEvent
-from llama_stack.core.telemetry.tracing import enqueue_event, get_current_span
-from llama_stack.log import get_logger
-from llama_stack.models.llama.llama3.chat_format import ChatFormat
-from llama_stack.models.llama.llama3.tokenizer import Tokenizer
-from llama_stack.providers.utils.inference.inference_store import InferenceStore
 
 logger = get_logger(name=__name__, category="core::routers")
 
diff --git a/src/llama_stack/core/routers/safety.py b/src/llama_stack/core/routers/safety.py
index f85bbb767..2bc99f14f 100644
--- a/src/llama_stack/core/routers/safety.py
+++ b/src/llama_stack/core/routers/safety.py
@@ -6,10 +6,9 @@
 
 from typing import Any
 
-from llama_stack_api import ModerationObject, OpenAIMessageParam, RoutingTable, RunShieldResponse, Safety, Shield
-
 from llama_stack.core.datatypes import SafetyConfig
 from llama_stack.log import get_logger
+from llama_stack_api import ModerationObject, OpenAIMessageParam, RoutingTable, RunShieldResponse, Safety, Shield
 
 logger = get_logger(name=__name__, category="core::routers")
 
diff --git a/src/llama_stack/core/routers/tool_runtime.py b/src/llama_stack/core/routers/tool_runtime.py
index 984a8e2a7..eccc05732 100644
--- a/src/llama_stack/core/routers/tool_runtime.py
+++ b/src/llama_stack/core/routers/tool_runtime.py
@@ -6,14 +6,13 @@
 
 from typing import Any
 
+from llama_stack.log import get_logger
 from llama_stack_api import (
     URL,
     ListToolDefsResponse,
     ToolRuntime,
 )
 
-from llama_stack.log import get_logger
-
 from ..routing_tables.toolgroups import ToolGroupsRoutingTable
 
 logger = get_logger(name=__name__, category="core::routers")
diff --git a/src/llama_stack/core/routers/vector_io.py b/src/llama_stack/core/routers/vector_io.py
index 47412c07f..5256dda44 100644
--- a/src/llama_stack/core/routers/vector_io.py
+++ b/src/llama_stack/core/routers/vector_io.py
@@ -9,6 +9,9 @@ import uuid
 from typing import Annotated, Any
 
 from fastapi import Body
+
+from llama_stack.core.datatypes import VectorStoresConfig
+from llama_stack.log import get_logger
 from llama_stack_api import (
     Chunk,
     HealthResponse,
@@ -38,9 +41,6 @@ from llama_stack_api import (
     VectorStoreSearchResponsePage,
 )
 
-from llama_stack.core.datatypes import VectorStoresConfig
-from llama_stack.log import get_logger
-
 logger = get_logger(name=__name__, category="core::routers")
 
 
diff --git a/src/llama_stack/core/routing_tables/benchmarks.py b/src/llama_stack/core/routing_tables/benchmarks.py
index 66830bc41..9037ffe8b 100644
--- a/src/llama_stack/core/routing_tables/benchmarks.py
+++ b/src/llama_stack/core/routing_tables/benchmarks.py
@@ -6,12 +6,11 @@
 
 from typing import Any
 
-from llama_stack_api import Benchmark, Benchmarks, ListBenchmarksResponse
-
 from llama_stack.core.datatypes import (
     BenchmarkWithOwner,
 )
 from llama_stack.log import get_logger
+from llama_stack_api import Benchmark, Benchmarks, ListBenchmarksResponse
 
 from .common import CommonRoutingTableImpl
 
diff --git a/src/llama_stack/core/routing_tables/common.py b/src/llama_stack/core/routing_tables/common.py
index cfbafc9a8..a9e3ff95f 100644
--- a/src/llama_stack/core/routing_tables/common.py
+++ b/src/llama_stack/core/routing_tables/common.py
@@ -6,8 +6,6 @@
 
 from typing import Any
 
-from llama_stack_api import Api, Model, ModelNotFoundError, ResourceType, RoutingTable
-
 from llama_stack.core.access_control.access_control import AccessDeniedError, is_action_allowed
 from llama_stack.core.access_control.datatypes import Action
 from llama_stack.core.datatypes import (
@@ -20,6 +18,7 @@ from llama_stack.core.datatypes import (
 from llama_stack.core.request_headers import get_authenticated_user
 from llama_stack.core.store import DistributionRegistry
 from llama_stack.log import get_logger
+from llama_stack_api import Api, Model, ModelNotFoundError, ResourceType, RoutingTable
 
 logger = get_logger(name=__name__, category="core::routing_tables")
 
diff --git a/src/llama_stack/core/routing_tables/datasets.py b/src/llama_stack/core/routing_tables/datasets.py
index c49c9769b..62fd07b13 100644
--- a/src/llama_stack/core/routing_tables/datasets.py
+++ b/src/llama_stack/core/routing_tables/datasets.py
@@ -7,6 +7,10 @@
 import uuid
 from typing import Any
 
+from llama_stack.core.datatypes import (
+    DatasetWithOwner,
+)
+from llama_stack.log import get_logger
 from llama_stack_api import (
     Dataset,
     DatasetNotFoundError,
@@ -20,11 +24,6 @@ from llama_stack_api import (
     URIDataSource,
 )
 
-from llama_stack.core.datatypes import (
-    DatasetWithOwner,
-)
-from llama_stack.log import get_logger
-
 from .common import CommonRoutingTableImpl
 
 logger = get_logger(name=__name__, category="core::routing_tables")
diff --git a/src/llama_stack/core/routing_tables/models.py b/src/llama_stack/core/routing_tables/models.py
index e1210a139..1facbb27b 100644
--- a/src/llama_stack/core/routing_tables/models.py
+++ b/src/llama_stack/core/routing_tables/models.py
@@ -7,6 +7,13 @@
 import time
 from typing import Any
 
+from llama_stack.core.datatypes import (
+    ModelWithOwner,
+    RegistryEntrySource,
+)
+from llama_stack.core.request_headers import PROVIDER_DATA_VAR, NeedsRequestProviderData
+from llama_stack.core.utils.dynamic import instantiate_class_type
+from llama_stack.log import get_logger
 from llama_stack_api import (
     ListModelsResponse,
     Model,
@@ -17,14 +24,6 @@ from llama_stack_api import (
     OpenAIModel,
 )
 
-from llama_stack.core.datatypes import (
-    ModelWithOwner,
-    RegistryEntrySource,
-)
-from llama_stack.core.request_headers import PROVIDER_DATA_VAR, NeedsRequestProviderData
-from llama_stack.core.utils.dynamic import instantiate_class_type
-from llama_stack.log import get_logger
-
 from .common import CommonRoutingTableImpl, lookup_model
 
 logger = get_logger(name=__name__, category="core::routing_tables")
diff --git a/src/llama_stack/core/routing_tables/scoring_functions.py b/src/llama_stack/core/routing_tables/scoring_functions.py
index 66165ac2f..65ed26b85 100644
--- a/src/llama_stack/core/routing_tables/scoring_functions.py
+++ b/src/llama_stack/core/routing_tables/scoring_functions.py
@@ -4,6 +4,10 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
+from llama_stack.core.datatypes import (
+    ScoringFnWithOwner,
+)
+from llama_stack.log import get_logger
 from llama_stack_api import (
     ListScoringFunctionsResponse,
     ParamType,
@@ -13,11 +17,6 @@ from llama_stack_api import (
     ScoringFunctions,
 )
 
-from llama_stack.core.datatypes import (
-    ScoringFnWithOwner,
-)
-from llama_stack.log import get_logger
-
 from .common import CommonRoutingTableImpl
 
 logger = get_logger(name=__name__, category="core::routing_tables")
diff --git a/src/llama_stack/core/routing_tables/shields.py b/src/llama_stack/core/routing_tables/shields.py
index 0f981c49d..97b2efb96 100644
--- a/src/llama_stack/core/routing_tables/shields.py
+++ b/src/llama_stack/core/routing_tables/shields.py
@@ -6,12 +6,11 @@
 
 from typing import Any
 
-from llama_stack_api import ListShieldsResponse, ResourceType, Shield, Shields
-
 from llama_stack.core.datatypes import (
     ShieldWithOwner,
 )
 from llama_stack.log import get_logger
+from llama_stack_api import ListShieldsResponse, ResourceType, Shield, Shields
 
 from .common import CommonRoutingTableImpl
 
diff --git a/src/llama_stack/core/routing_tables/toolgroups.py b/src/llama_stack/core/routing_tables/toolgroups.py
index a552cb96e..7e2068608 100644
--- a/src/llama_stack/core/routing_tables/toolgroups.py
+++ b/src/llama_stack/core/routing_tables/toolgroups.py
@@ -6,6 +6,8 @@
 
 from typing import Any
 
+from llama_stack.core.datatypes import AuthenticationRequiredError, ToolGroupWithOwner
+from llama_stack.log import get_logger
 from llama_stack_api import (
     URL,
     ListToolDefsResponse,
@@ -16,9 +18,6 @@ from llama_stack_api import (
     ToolGroups,
 )
 
-from llama_stack.core.datatypes import AuthenticationRequiredError, ToolGroupWithOwner
-from llama_stack.log import get_logger
-
 from .common import CommonRoutingTableImpl
 
 logger = get_logger(name=__name__, category="core::routing_tables")
diff --git a/src/llama_stack/core/routing_tables/vector_stores.py b/src/llama_stack/core/routing_tables/vector_stores.py
index f95463b3c..93c119542 100644
--- a/src/llama_stack/core/routing_tables/vector_stores.py
+++ b/src/llama_stack/core/routing_tables/vector_stores.py
@@ -6,6 +6,11 @@
 
 from typing import Any
 
+from llama_stack.core.datatypes import (
+    VectorStoreWithOwner,
+)
+from llama_stack.log import get_logger
+
 # Removed VectorStores import to avoid exposing public API
 from llama_stack_api import (
     ModelNotFoundError,
@@ -23,11 +28,6 @@ from llama_stack_api import (
     VectorStoreSearchResponsePage,
 )
 
-from llama_stack.core.datatypes import (
-    VectorStoreWithOwner,
-)
-from llama_stack.log import get_logger
-
 from .common import CommonRoutingTableImpl, lookup_model
 
 logger = get_logger(name=__name__, category="core::routing_tables")
diff --git a/src/llama_stack/core/server/auth_providers.py b/src/llama_stack/core/server/auth_providers.py
index a7f5d7916..66942dd39 100644
--- a/src/llama_stack/core/server/auth_providers.py
+++ b/src/llama_stack/core/server/auth_providers.py
@@ -11,7 +11,6 @@ from urllib.parse import parse_qs, urljoin, urlparse
 
 import httpx
 import jwt
-from llama_stack_api import TokenValidationError
 from pydantic import BaseModel, Field
 
 from llama_stack.core.datatypes import (
@@ -23,6 +22,7 @@ from llama_stack.core.datatypes import (
     User,
 )
 from llama_stack.log import get_logger
+from llama_stack_api import TokenValidationError
 
 logger = get_logger(name=__name__, category="core::auth")
 
diff --git a/src/llama_stack/core/server/routes.py b/src/llama_stack/core/server/routes.py
index e7a84937d..af5002565 100644
--- a/src/llama_stack/core/server/routes.py
+++ b/src/llama_stack/core/server/routes.py
@@ -10,10 +10,10 @@ from collections.abc import Callable
 from typing import Any
 
 from aiohttp import hdrs
-from llama_stack_api import Api, ExternalApiSpec, WebMethod
 from starlette.routing import Route
 
 from llama_stack.core.resolver import api_protocol_map
+from llama_stack_api import Api, ExternalApiSpec, WebMethod
 
 EndpointFunc = Callable[..., Any]
 PathParams = dict[str, str]
diff --git a/src/llama_stack/core/server/server.py b/src/llama_stack/core/server/server.py
index 8116348ec..0d3513980 100644
--- a/src/llama_stack/core/server/server.py
+++ b/src/llama_stack/core/server/server.py
@@ -28,7 +28,6 @@ from fastapi import Path as FastapiPath
 from fastapi.exceptions import RequestValidationError
 from fastapi.middleware.cors import CORSMiddleware
 from fastapi.responses import JSONResponse, StreamingResponse
-from llama_stack_api import Api, ConflictError, PaginatedResponse, ResourceNotFoundError
 from openai import BadRequestError
 from pydantic import BaseModel, ValidationError
 
@@ -57,6 +56,7 @@ from llama_stack.core.utils.config import redact_sensitive_fields
 from llama_stack.core.utils.config_resolution import Mode, resolve_config_or_distro
 from llama_stack.core.utils.context import preserve_contexts_async_generator
 from llama_stack.log import LoggingConfig, get_logger, setup_logging
+from llama_stack_api import Api, ConflictError, PaginatedResponse, ResourceNotFoundError
 
 from .auth import AuthenticationMiddleware
 from .quota import QuotaMiddleware
diff --git a/src/llama_stack/core/stack.py b/src/llama_stack/core/stack.py
index 674c35f31..00d990cb1 100644
--- a/src/llama_stack/core/stack.py
+++ b/src/llama_stack/core/stack.py
@@ -12,6 +12,28 @@ import tempfile
 from typing import Any
 
 import yaml
+
+from llama_stack.core.conversations.conversations import ConversationServiceConfig, ConversationServiceImpl
+from llama_stack.core.datatypes import Provider, SafetyConfig, StackRunConfig, VectorStoresConfig
+from llama_stack.core.distribution import get_provider_registry
+from llama_stack.core.inspect import DistributionInspectConfig, DistributionInspectImpl
+from llama_stack.core.prompts.prompts import PromptServiceConfig, PromptServiceImpl
+from llama_stack.core.providers import ProviderImpl, ProviderImplConfig
+from llama_stack.core.resolver import ProviderRegistry, resolve_impls
+from llama_stack.core.routing_tables.common import CommonRoutingTableImpl
+from llama_stack.core.storage.datatypes import (
+    InferenceStoreReference,
+    KVStoreReference,
+    ServerStoresConfig,
+    SqliteKVStoreConfig,
+    SqliteSqlStoreConfig,
+    SqlStoreReference,
+    StorageBackendConfig,
+    StorageConfig,
+)
+from llama_stack.core.store.registry import create_dist_registry
+from llama_stack.core.utils.dynamic import instantiate_class_type
+from llama_stack.log import get_logger
 from llama_stack_api import (
     Agents,
     Api,
@@ -37,28 +59,6 @@ from llama_stack_api import (
     VectorIO,
 )
 
-from llama_stack.core.conversations.conversations import ConversationServiceConfig, ConversationServiceImpl
-from llama_stack.core.datatypes import Provider, SafetyConfig, StackRunConfig, VectorStoresConfig
-from llama_stack.core.distribution import get_provider_registry
-from llama_stack.core.inspect import DistributionInspectConfig, DistributionInspectImpl
-from llama_stack.core.prompts.prompts import PromptServiceConfig, PromptServiceImpl
-from llama_stack.core.providers import ProviderImpl, ProviderImplConfig
-from llama_stack.core.resolver import ProviderRegistry, resolve_impls
-from llama_stack.core.routing_tables.common import CommonRoutingTableImpl
-from llama_stack.core.storage.datatypes import (
-    InferenceStoreReference,
-    KVStoreReference,
-    ServerStoresConfig,
-    SqliteKVStoreConfig,
-    SqliteSqlStoreConfig,
-    SqlStoreReference,
-    StorageBackendConfig,
-    StorageConfig,
-)
-from llama_stack.core.store.registry import create_dist_registry
-from llama_stack.core.utils.dynamic import instantiate_class_type
-from llama_stack.log import get_logger
-
 logger = get_logger(name=__name__, category="core")
 
 
diff --git a/src/llama_stack/core/telemetry/telemetry.py b/src/llama_stack/core/telemetry/telemetry.py
index 1a56277ea..5268fa641 100644
--- a/src/llama_stack/core/telemetry/telemetry.py
+++ b/src/llama_stack/core/telemetry/telemetry.py
@@ -16,7 +16,6 @@ from typing import (
     cast,
 )
 
-from llama_stack_api import json_schema_type, register_schema
 from opentelemetry import metrics, trace
 from opentelemetry.exporter.otlp.proto.http.metric_exporter import OTLPMetricExporter
 from opentelemetry.exporter.otlp.proto.http.trace_exporter import OTLPSpanExporter
@@ -29,6 +28,7 @@ from pydantic import BaseModel, Field
 
 from llama_stack.log import get_logger
 from llama_stack.models.llama.datatypes import Primitive
+from llama_stack_api import json_schema_type, register_schema
 
 ROOT_SPAN_MARKERS = ["__root__", "__root_span__"]
 
diff --git a/src/llama_stack/distributions/dell/dell.py b/src/llama_stack/distributions/dell/dell.py
index fd76e3ccb..52a07b7f1 100644
--- a/src/llama_stack/distributions/dell/dell.py
+++ b/src/llama_stack/distributions/dell/dell.py
@@ -4,8 +4,6 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-from llama_stack_api import ModelType
-
 from llama_stack.core.datatypes import (
     BuildProvider,
     ModelInput,
@@ -18,6 +16,7 @@ from llama_stack.providers.inline.inference.sentence_transformers import (
     SentenceTransformersInferenceConfig,
 )
 from llama_stack.providers.remote.vector_io.chroma import ChromaVectorIOConfig
+from llama_stack_api import ModelType
 
 
 def get_distribution_template() -> DistributionTemplate:
diff --git a/src/llama_stack/distributions/meta-reference-gpu/meta_reference.py b/src/llama_stack/distributions/meta-reference-gpu/meta_reference.py
index 67af0e92a..a515794d5 100644
--- a/src/llama_stack/distributions/meta-reference-gpu/meta_reference.py
+++ b/src/llama_stack/distributions/meta-reference-gpu/meta_reference.py
@@ -6,8 +6,6 @@
 
 from pathlib import Path
 
-from llama_stack_api import ModelType
-
 from llama_stack.core.datatypes import (
     BuildProvider,
     ModelInput,
@@ -23,6 +21,7 @@ from llama_stack.providers.inline.inference.sentence_transformers import (
     SentenceTransformersInferenceConfig,
 )
 from llama_stack.providers.inline.vector_io.faiss.config import FaissVectorIOConfig
+from llama_stack_api import ModelType
 
 
 def get_distribution_template() -> DistributionTemplate:
diff --git a/src/llama_stack/distributions/open-benchmark/open_benchmark.py b/src/llama_stack/distributions/open-benchmark/open_benchmark.py
index 59deca6d0..1f4dbf2c2 100644
--- a/src/llama_stack/distributions/open-benchmark/open_benchmark.py
+++ b/src/llama_stack/distributions/open-benchmark/open_benchmark.py
@@ -5,8 +5,6 @@
 # the root directory of this source tree.
 
 
-from llama_stack_api import DatasetPurpose, ModelType, URIDataSource
-
 from llama_stack.core.datatypes import (
     BenchmarkInput,
     BuildProvider,
@@ -34,6 +32,7 @@ from llama_stack.providers.remote.vector_io.pgvector.config import (
     PGVectorVectorIOConfig,
 )
 from llama_stack.providers.utils.inference.model_registry import ProviderModelEntry
+from llama_stack_api import DatasetPurpose, ModelType, URIDataSource
 
 
 def get_inference_providers() -> tuple[list[Provider], dict[str, list[ProviderModelEntry]]]:
diff --git a/src/llama_stack/distributions/starter/starter.py b/src/llama_stack/distributions/starter/starter.py
index 1a8126290..4c21a8c99 100644
--- a/src/llama_stack/distributions/starter/starter.py
+++ b/src/llama_stack/distributions/starter/starter.py
@@ -7,8 +7,6 @@
 
 from typing import Any
 
-from llama_stack_api import RemoteProviderSpec
-
 from llama_stack.core.datatypes import (
     BuildProvider,
     Provider,
@@ -39,6 +37,7 @@ from llama_stack.providers.remote.vector_io.qdrant.config import QdrantVectorIOC
 from llama_stack.providers.remote.vector_io.weaviate.config import WeaviateVectorIOConfig
 from llama_stack.providers.utils.kvstore.config import PostgresKVStoreConfig
 from llama_stack.providers.utils.sqlstore.sqlstore import PostgresSqlStoreConfig
+from llama_stack_api import RemoteProviderSpec
 
 
 def _get_config_for_provider(provider_spec: ProviderSpec) -> dict[str, Any]:
diff --git a/src/llama_stack/distributions/template.py b/src/llama_stack/distributions/template.py
index faf5fb085..5755a26de 100644
--- a/src/llama_stack/distributions/template.py
+++ b/src/llama_stack/distributions/template.py
@@ -10,7 +10,6 @@ from typing import Any, Literal
 import jinja2
 import rich
 import yaml
-from llama_stack_api import DatasetPurpose, ModelType
 from pydantic import BaseModel, Field
 
 from llama_stack.core.datatypes import (
@@ -43,6 +42,7 @@ from llama_stack.providers.utils.kvstore.config import SqliteKVStoreConfig
 from llama_stack.providers.utils.kvstore.config import get_pip_packages as get_kv_pip_packages
 from llama_stack.providers.utils.sqlstore.sqlstore import SqliteSqlStoreConfig
 from llama_stack.providers.utils.sqlstore.sqlstore import get_pip_packages as get_sql_pip_packages
+from llama_stack_api import DatasetPurpose, ModelType
 
 
 def filter_empty_values(obj: Any) -> Any:
diff --git a/src/llama_stack/providers/inline/agents/meta_reference/agents.py b/src/llama_stack/providers/inline/agents/meta_reference/agents.py
index 025fcc676..347f6fdb1 100644
--- a/src/llama_stack/providers/inline/agents/meta_reference/agents.py
+++ b/src/llama_stack/providers/inline/agents/meta_reference/agents.py
@@ -5,6 +5,10 @@
 # the root directory of this source tree.
 
 
+from llama_stack.core.datatypes import AccessRule
+from llama_stack.log import get_logger
+from llama_stack.providers.utils.kvstore import InmemoryKVStoreImpl, kvstore_impl
+from llama_stack.providers.utils.responses.responses_store import ResponsesStore
 from llama_stack_api import (
     Agents,
     Conversations,
@@ -25,11 +29,6 @@ from llama_stack_api import (
     VectorIO,
 )
 
-from llama_stack.core.datatypes import AccessRule
-from llama_stack.log import get_logger
-from llama_stack.providers.utils.kvstore import InmemoryKVStoreImpl, kvstore_impl
-from llama_stack.providers.utils.responses.responses_store import ResponsesStore
-
 from .config import MetaReferenceAgentsImplConfig
 from .responses.openai_responses import OpenAIResponsesImpl
 
diff --git a/src/llama_stack/providers/inline/agents/meta_reference/responses/openai_responses.py b/src/llama_stack/providers/inline/agents/meta_reference/responses/openai_responses.py
index 347eeef78..3f88b1562 100644
--- a/src/llama_stack/providers/inline/agents/meta_reference/responses/openai_responses.py
+++ b/src/llama_stack/providers/inline/agents/meta_reference/responses/openai_responses.py
@@ -8,6 +8,13 @@ import time
 import uuid
 from collections.abc import AsyncIterator
 
+from pydantic import BaseModel, TypeAdapter
+
+from llama_stack.log import get_logger
+from llama_stack.providers.utils.responses.responses_store import (
+    ResponsesStore,
+    _OpenAIResponseObjectWithInputAndMessages,
+)
 from llama_stack_api import (
     ConversationItem,
     Conversations,
@@ -34,13 +41,6 @@ from llama_stack_api import (
     ToolRuntime,
     VectorIO,
 )
-from pydantic import BaseModel, TypeAdapter
-
-from llama_stack.log import get_logger
-from llama_stack.providers.utils.responses.responses_store import (
-    ResponsesStore,
-    _OpenAIResponseObjectWithInputAndMessages,
-)
 
 from .streaming import StreamingResponseOrchestrator
 from .tool_executor import ToolExecutor
diff --git a/src/llama_stack/providers/inline/agents/meta_reference/responses/streaming.py b/src/llama_stack/providers/inline/agents/meta_reference/responses/streaming.py
index 6a791e92d..ea4486b62 100644
--- a/src/llama_stack/providers/inline/agents/meta_reference/responses/streaming.py
+++ b/src/llama_stack/providers/inline/agents/meta_reference/responses/streaming.py
@@ -8,6 +8,9 @@ import uuid
 from collections.abc import AsyncIterator
 from typing import Any
 
+from llama_stack.core.telemetry import tracing
+from llama_stack.log import get_logger
+from llama_stack.providers.utils.inference.prompt_adapter import interleaved_content_as_str
 from llama_stack_api import (
     AllowedToolsFilter,
     ApprovalFilter,
@@ -65,10 +68,6 @@ from llama_stack_api import (
     WebSearchToolTypes,
 )
 
-from llama_stack.core.telemetry import tracing
-from llama_stack.log import get_logger
-from llama_stack.providers.utils.inference.prompt_adapter import interleaved_content_as_str
-
 from .types import ChatCompletionContext, ChatCompletionResult
 from .utils import (
     convert_chat_choice_to_response_message,
@@ -1022,11 +1021,11 @@ class StreamingResponseOrchestrator:
         self, tools: list[OpenAIResponseInputTool], output_messages: list[OpenAIResponseOutput]
     ) -> AsyncIterator[OpenAIResponseObjectStream]:
         """Process all tools and emit appropriate streaming events."""
-        from llama_stack_api import ToolDef
         from openai.types.chat import ChatCompletionToolParam
 
         from llama_stack.models.llama.datatypes import ToolDefinition
         from llama_stack.providers.utils.inference.openai_compat import convert_tooldef_to_openai_tool
+        from llama_stack_api import ToolDef
 
         def make_openai_tool(tool_name: str, tool: ToolDef) -> ChatCompletionToolParam:
             tool_def = ToolDefinition(
diff --git a/src/llama_stack/providers/inline/agents/meta_reference/responses/tool_executor.py b/src/llama_stack/providers/inline/agents/meta_reference/responses/tool_executor.py
index 38fb2a94f..616ec2477 100644
--- a/src/llama_stack/providers/inline/agents/meta_reference/responses/tool_executor.py
+++ b/src/llama_stack/providers/inline/agents/meta_reference/responses/tool_executor.py
@@ -9,6 +9,8 @@ import json
 from collections.abc import AsyncIterator
 from typing import Any
 
+from llama_stack.core.telemetry import tracing
+from llama_stack.log import get_logger
 from llama_stack_api import (
     ImageContentItem,
     OpenAIChatCompletionContentPartImageParam,
@@ -37,9 +39,6 @@ from llama_stack_api import (
     VectorIO,
 )
 
-from llama_stack.core.telemetry import tracing
-from llama_stack.log import get_logger
-
 from .types import ChatCompletionContext, ToolExecutionResult
 
 logger = get_logger(name=__name__, category="agents::meta_reference")
diff --git a/src/llama_stack/providers/inline/agents/meta_reference/responses/types.py b/src/llama_stack/providers/inline/agents/meta_reference/responses/types.py
index 35ad03378..f6efcee22 100644
--- a/src/llama_stack/providers/inline/agents/meta_reference/responses/types.py
+++ b/src/llama_stack/providers/inline/agents/meta_reference/responses/types.py
@@ -7,6 +7,9 @@
 from dataclasses import dataclass
 from typing import cast
 
+from openai.types.chat import ChatCompletionToolParam
+from pydantic import BaseModel
+
 from llama_stack_api import (
     OpenAIChatCompletionToolCall,
     OpenAIMessageParam,
@@ -26,8 +29,6 @@ from llama_stack_api import (
     OpenAIResponseTool,
     OpenAIResponseToolMCP,
 )
-from openai.types.chat import ChatCompletionToolParam
-from pydantic import BaseModel
 
 
 class ToolExecutionResult(BaseModel):
diff --git a/src/llama_stack/providers/inline/agents/meta_reference/safety.py b/src/llama_stack/providers/inline/agents/meta_reference/safety.py
index dd90ac298..bfb557a99 100644
--- a/src/llama_stack/providers/inline/agents/meta_reference/safety.py
+++ b/src/llama_stack/providers/inline/agents/meta_reference/safety.py
@@ -6,10 +6,9 @@
 
 import asyncio
 
-from llama_stack_api import OpenAIMessageParam, Safety, SafetyViolation, ViolationLevel
-
 from llama_stack.core.telemetry import tracing
 from llama_stack.log import get_logger
+from llama_stack_api import OpenAIMessageParam, Safety, SafetyViolation, ViolationLevel
 
 log = get_logger(name=__name__, category="agents::meta_reference")
 
diff --git a/src/llama_stack/providers/inline/batches/reference/__init__.py b/src/llama_stack/providers/inline/batches/reference/__init__.py
index 27d0f4213..11c4b06a9 100644
--- a/src/llama_stack/providers/inline/batches/reference/__init__.py
+++ b/src/llama_stack/providers/inline/batches/reference/__init__.py
@@ -6,10 +6,9 @@
 
 from typing import Any
 
-from llama_stack_api import Files, Inference, Models
-
 from llama_stack.core.datatypes import AccessRule, Api
 from llama_stack.providers.utils.kvstore import kvstore_impl
+from llama_stack_api import Files, Inference, Models
 
 from .batches import ReferenceBatchesImpl
 from .config import ReferenceBatchesImplConfig
diff --git a/src/llama_stack/providers/inline/batches/reference/batches.py b/src/llama_stack/providers/inline/batches/reference/batches.py
index f0f8da96c..73727799d 100644
--- a/src/llama_stack/providers/inline/batches/reference/batches.py
+++ b/src/llama_stack/providers/inline/batches/reference/batches.py
@@ -13,6 +13,11 @@ import uuid
 from io import BytesIO
 from typing import Any, Literal
 
+from openai.types.batch import BatchError, Errors
+from pydantic import BaseModel
+
+from llama_stack.log import get_logger
+from llama_stack.providers.utils.kvstore import KVStore
 from llama_stack_api import (
     Batches,
     BatchObject,
@@ -33,11 +38,6 @@ from llama_stack_api import (
     OpenAIUserMessageParam,
     ResourceNotFoundError,
 )
-from openai.types.batch import BatchError, Errors
-from pydantic import BaseModel
-
-from llama_stack.log import get_logger
-from llama_stack.providers.utils.kvstore import KVStore
 
 from .config import ReferenceBatchesImplConfig
 
diff --git a/src/llama_stack/providers/inline/datasetio/localfs/datasetio.py b/src/llama_stack/providers/inline/datasetio/localfs/datasetio.py
index 1fcfbbef4..6ab1a540f 100644
--- a/src/llama_stack/providers/inline/datasetio/localfs/datasetio.py
+++ b/src/llama_stack/providers/inline/datasetio/localfs/datasetio.py
@@ -5,11 +5,10 @@
 # the root directory of this source tree.
 from typing import Any
 
-from llama_stack_api import Dataset, DatasetIO, DatasetsProtocolPrivate, PaginatedResponse
-
 from llama_stack.providers.utils.datasetio.url_utils import get_dataframe_from_uri
 from llama_stack.providers.utils.kvstore import kvstore_impl
 from llama_stack.providers.utils.pagination import paginate_records
+from llama_stack_api import Dataset, DatasetIO, DatasetsProtocolPrivate, PaginatedResponse
 
 from .config import LocalFSDatasetIOConfig
 
diff --git a/src/llama_stack/providers/inline/eval/meta_reference/eval.py b/src/llama_stack/providers/inline/eval/meta_reference/eval.py
index e6020e8a3..d43e569e2 100644
--- a/src/llama_stack/providers/inline/eval/meta_reference/eval.py
+++ b/src/llama_stack/providers/inline/eval/meta_reference/eval.py
@@ -6,6 +6,10 @@
 import json
 from typing import Any
 
+from tqdm import tqdm
+
+from llama_stack.providers.utils.common.data_schema_validator import ColumnName
+from llama_stack.providers.utils.kvstore import kvstore_impl
 from llama_stack_api import (
     Agents,
     Benchmark,
@@ -24,10 +28,6 @@ from llama_stack_api import (
     OpenAIUserMessageParam,
     Scoring,
 )
-from tqdm import tqdm
-
-from llama_stack.providers.utils.common.data_schema_validator import ColumnName
-from llama_stack.providers.utils.kvstore import kvstore_impl
 
 from .config import MetaReferenceEvalConfig
 
diff --git a/src/llama_stack/providers/inline/files/localfs/files.py b/src/llama_stack/providers/inline/files/localfs/files.py
index 5e8c887f1..5fb35a378 100644
--- a/src/llama_stack/providers/inline/files/localfs/files.py
+++ b/src/llama_stack/providers/inline/files/localfs/files.py
@@ -10,6 +10,14 @@ from pathlib import Path
 from typing import Annotated
 
 from fastapi import Depends, File, Form, Response, UploadFile
+
+from llama_stack.core.datatypes import AccessRule
+from llama_stack.core.id_generation import generate_object_id
+from llama_stack.log import get_logger
+from llama_stack.providers.utils.files.form_data import parse_expires_after
+from llama_stack.providers.utils.sqlstore.api import ColumnDefinition, ColumnType
+from llama_stack.providers.utils.sqlstore.authorized_sqlstore import AuthorizedSqlStore
+from llama_stack.providers.utils.sqlstore.sqlstore import sqlstore_impl
 from llama_stack_api import (
     ExpiresAfter,
     Files,
@@ -21,14 +29,6 @@ from llama_stack_api import (
     ResourceNotFoundError,
 )
 
-from llama_stack.core.datatypes import AccessRule
-from llama_stack.core.id_generation import generate_object_id
-from llama_stack.log import get_logger
-from llama_stack.providers.utils.files.form_data import parse_expires_after
-from llama_stack.providers.utils.sqlstore.api import ColumnDefinition, ColumnType
-from llama_stack.providers.utils.sqlstore.authorized_sqlstore import AuthorizedSqlStore
-from llama_stack.providers.utils.sqlstore.sqlstore import sqlstore_impl
-
 from .config import LocalfsFilesImplConfig
 
 logger = get_logger(name=__name__, category="files")
diff --git a/src/llama_stack/providers/inline/inference/meta_reference/config.py b/src/llama_stack/providers/inline/inference/meta_reference/config.py
index 802e79f15..ec6e8bfe8 100644
--- a/src/llama_stack/providers/inline/inference/meta_reference/config.py
+++ b/src/llama_stack/providers/inline/inference/meta_reference/config.py
@@ -6,10 +6,10 @@
 
 from typing import Any
 
-from llama_stack_api import QuantizationConfig
 from pydantic import BaseModel, field_validator
 
 from llama_stack.providers.utils.inference import supported_inference_models
+from llama_stack_api import QuantizationConfig
 
 
 class MetaReferenceInferenceConfig(BaseModel):
diff --git a/src/llama_stack/providers/inline/inference/meta_reference/generators.py b/src/llama_stack/providers/inline/inference/meta_reference/generators.py
index 2155a1ae8..6781d0af9 100644
--- a/src/llama_stack/providers/inline/inference/meta_reference/generators.py
+++ b/src/llama_stack/providers/inline/inference/meta_reference/generators.py
@@ -8,6 +8,14 @@ import math
 from typing import Optional
 
 import torch
+from lmformatenforcer import JsonSchemaParser, TokenEnforcer, TokenEnforcerTokenizerData
+
+from llama_stack.models.llama.datatypes import QuantizationMode, ToolPromptFormat
+from llama_stack.models.llama.llama3.generation import Llama3
+from llama_stack.models.llama.llama3.tokenizer import Tokenizer as Llama3Tokenizer
+from llama_stack.models.llama.llama4.generation import Llama4
+from llama_stack.models.llama.llama4.tokenizer import Tokenizer as Llama4Tokenizer
+from llama_stack.models.llama.sku_types import Model, ModelFamily
 from llama_stack_api import (
     GreedySamplingStrategy,
     JsonSchemaResponseFormat,
@@ -18,14 +26,6 @@ from llama_stack_api import (
     SamplingParams,
     TopPSamplingStrategy,
 )
-from lmformatenforcer import JsonSchemaParser, TokenEnforcer, TokenEnforcerTokenizerData
-
-from llama_stack.models.llama.datatypes import QuantizationMode, ToolPromptFormat
-from llama_stack.models.llama.llama3.generation import Llama3
-from llama_stack.models.llama.llama3.tokenizer import Tokenizer as Llama3Tokenizer
-from llama_stack.models.llama.llama4.generation import Llama4
-from llama_stack.models.llama.llama4.tokenizer import Tokenizer as Llama4Tokenizer
-from llama_stack.models.llama.sku_types import Model, ModelFamily
 
 from .common import model_checkpoint_dir
 from .config import MetaReferenceInferenceConfig
diff --git a/src/llama_stack/providers/inline/inference/meta_reference/inference.py b/src/llama_stack/providers/inline/inference/meta_reference/inference.py
index 753185fe7..42d1299ab 100644
--- a/src/llama_stack/providers/inline/inference/meta_reference/inference.py
+++ b/src/llama_stack/providers/inline/inference/meta_reference/inference.py
@@ -9,23 +9,6 @@ import time
 import uuid
 from collections.abc import AsyncIterator
 
-from llama_stack_api import (
-    InferenceProvider,
-    Model,
-    ModelsProtocolPrivate,
-    ModelType,
-    OpenAIAssistantMessageParam,
-    OpenAIChatCompletion,
-    OpenAIChatCompletionChunk,
-    OpenAIChatCompletionRequestWithExtraBody,
-    OpenAIChatCompletionUsage,
-    OpenAIChoice,
-    OpenAICompletion,
-    OpenAICompletionRequestWithExtraBody,
-    OpenAIUserMessageParam,
-    ToolChoice,
-)
-
 from llama_stack.log import get_logger
 from llama_stack.models.llama.datatypes import RawMessage, RawTextItem, ToolDefinition
 from llama_stack.models.llama.llama3.chat_format import ChatFormat as Llama3ChatFormat
@@ -48,6 +31,22 @@ from llama_stack.providers.utils.inference.model_registry import (
     ModelRegistryHelper,
     build_hf_repo_model_entry,
 )
+from llama_stack_api import (
+    InferenceProvider,
+    Model,
+    ModelsProtocolPrivate,
+    ModelType,
+    OpenAIAssistantMessageParam,
+    OpenAIChatCompletion,
+    OpenAIChatCompletionChunk,
+    OpenAIChatCompletionRequestWithExtraBody,
+    OpenAIChatCompletionUsage,
+    OpenAIChoice,
+    OpenAICompletion,
+    OpenAICompletionRequestWithExtraBody,
+    OpenAIUserMessageParam,
+    ToolChoice,
+)
 
 from .config import MetaReferenceInferenceConfig
 from .generators import LlamaGenerator
@@ -441,6 +440,8 @@ class MetaReferenceInferenceImpl(
         params: OpenAIChatCompletionRequestWithExtraBody,
     ) -> AsyncIterator[OpenAIChatCompletionChunk]:
         """Stream chat completion chunks as they're generated."""
+        from llama_stack.models.llama.datatypes import StopReason
+        from llama_stack.providers.utils.inference.prompt_adapter import decode_assistant_message
         from llama_stack_api import (
             OpenAIChatCompletionChunk,
             OpenAIChatCompletionToolCall,
@@ -449,9 +450,6 @@ class MetaReferenceInferenceImpl(
             OpenAIChunkChoice,
         )
 
-        from llama_stack.models.llama.datatypes import StopReason
-        from llama_stack.providers.utils.inference.prompt_adapter import decode_assistant_message
-
         response_id = f"chatcmpl-{uuid.uuid4().hex[:24]}"
         created = int(time.time())
         generated_text = ""
diff --git a/src/llama_stack/providers/inline/inference/sentence_transformers/sentence_transformers.py b/src/llama_stack/providers/inline/inference/sentence_transformers/sentence_transformers.py
index 14c9a41a4..b5cadeec2 100644
--- a/src/llama_stack/providers/inline/inference/sentence_transformers/sentence_transformers.py
+++ b/src/llama_stack/providers/inline/inference/sentence_transformers/sentence_transformers.py
@@ -6,6 +6,10 @@
 
 from collections.abc import AsyncIterator
 
+from llama_stack.log import get_logger
+from llama_stack.providers.utils.inference.embedding_mixin import (
+    SentenceTransformerEmbeddingMixin,
+)
 from llama_stack_api import (
     InferenceProvider,
     Model,
@@ -18,11 +22,6 @@ from llama_stack_api import (
     OpenAICompletionRequestWithExtraBody,
 )
 
-from llama_stack.log import get_logger
-from llama_stack.providers.utils.inference.embedding_mixin import (
-    SentenceTransformerEmbeddingMixin,
-)
-
 from .config import SentenceTransformersInferenceConfig
 
 log = get_logger(name=__name__, category="inference")
diff --git a/src/llama_stack/providers/inline/post_training/common/validator.py b/src/llama_stack/providers/inline/post_training/common/validator.py
index 7a85d0e03..cc018c865 100644
--- a/src/llama_stack/providers/inline/post_training/common/validator.py
+++ b/src/llama_stack/providers/inline/post_training/common/validator.py
@@ -12,11 +12,10 @@
 
 from typing import Any
 
-from llama_stack_api import ChatCompletionInputType, DialogType, StringType
-
 from llama_stack.providers.utils.common.data_schema_validator import (
     ColumnName,
 )
+from llama_stack_api import ChatCompletionInputType, DialogType, StringType
 
 EXPECTED_DATASET_SCHEMA: dict[str, list[dict[str, Any]]] = {
     "instruct": [
diff --git a/src/llama_stack/providers/inline/post_training/huggingface/post_training.py b/src/llama_stack/providers/inline/post_training/huggingface/post_training.py
index f3f3d8d56..fa939d439 100644
--- a/src/llama_stack/providers/inline/post_training/huggingface/post_training.py
+++ b/src/llama_stack/providers/inline/post_training/huggingface/post_training.py
@@ -6,6 +6,11 @@
 from enum import Enum
 from typing import Any
 
+from llama_stack.providers.inline.post_training.huggingface.config import (
+    HuggingFacePostTrainingConfig,
+)
+from llama_stack.providers.utils.scheduler import JobArtifact, Scheduler
+from llama_stack.providers.utils.scheduler import JobStatus as SchedulerJobStatus
 from llama_stack_api import (
     AlgorithmConfig,
     Checkpoint,
@@ -20,12 +25,6 @@ from llama_stack_api import (
     TrainingConfig,
 )
 
-from llama_stack.providers.inline.post_training.huggingface.config import (
-    HuggingFacePostTrainingConfig,
-)
-from llama_stack.providers.utils.scheduler import JobArtifact, Scheduler
-from llama_stack.providers.utils.scheduler import JobStatus as SchedulerJobStatus
-
 
 class TrainingArtifactType(Enum):
     CHECKPOINT = "checkpoint"
diff --git a/src/llama_stack/providers/inline/post_training/huggingface/recipes/finetune_single_device.py b/src/llama_stack/providers/inline/post_training/huggingface/recipes/finetune_single_device.py
index 58a30618c..c7c737fbd 100644
--- a/src/llama_stack/providers/inline/post_training/huggingface/recipes/finetune_single_device.py
+++ b/src/llama_stack/providers/inline/post_training/huggingface/recipes/finetune_single_device.py
@@ -12,14 +12,6 @@ from typing import Any
 
 import torch
 from datasets import Dataset
-from llama_stack_api import (
-    Checkpoint,
-    DataConfig,
-    DatasetIO,
-    Datasets,
-    LoraFinetuningConfig,
-    TrainingConfig,
-)
 from peft import LoraConfig
 from transformers import (
     AutoTokenizer,
@@ -28,6 +20,14 @@ from trl import SFTConfig, SFTTrainer
 
 from llama_stack.log import get_logger
 from llama_stack.providers.inline.post_training.common.utils import evacuate_model_from_device
+from llama_stack_api import (
+    Checkpoint,
+    DataConfig,
+    DatasetIO,
+    Datasets,
+    LoraFinetuningConfig,
+    TrainingConfig,
+)
 
 from ..config import HuggingFacePostTrainingConfig
 from ..utils import (
diff --git a/src/llama_stack/providers/inline/post_training/huggingface/recipes/finetune_single_device_dpo.py b/src/llama_stack/providers/inline/post_training/huggingface/recipes/finetune_single_device_dpo.py
index f7dc3ebf2..da2626555 100644
--- a/src/llama_stack/providers/inline/post_training/huggingface/recipes/finetune_single_device_dpo.py
+++ b/src/llama_stack/providers/inline/post_training/huggingface/recipes/finetune_single_device_dpo.py
@@ -11,13 +11,6 @@ from typing import Any
 
 import torch
 from datasets import Dataset
-from llama_stack_api import (
-    Checkpoint,
-    DatasetIO,
-    Datasets,
-    DPOAlignmentConfig,
-    TrainingConfig,
-)
 from transformers import (
     AutoTokenizer,
 )
@@ -25,6 +18,13 @@ from trl import DPOConfig, DPOTrainer
 
 from llama_stack.log import get_logger
 from llama_stack.providers.inline.post_training.common.utils import evacuate_model_from_device
+from llama_stack_api import (
+    Checkpoint,
+    DatasetIO,
+    Datasets,
+    DPOAlignmentConfig,
+    TrainingConfig,
+)
 
 from ..config import HuggingFacePostTrainingConfig
 from ..utils import (
diff --git a/src/llama_stack/providers/inline/post_training/huggingface/utils.py b/src/llama_stack/providers/inline/post_training/huggingface/utils.py
index 86c3c3f52..2037f70e7 100644
--- a/src/llama_stack/providers/inline/post_training/huggingface/utils.py
+++ b/src/llama_stack/providers/inline/post_training/huggingface/utils.py
@@ -14,9 +14,10 @@ from typing import TYPE_CHECKING, Any, Protocol
 import psutil
 import torch
 from datasets import Dataset
-from llama_stack_api import Checkpoint, DatasetIO, TrainingConfig
 from transformers import AutoConfig, AutoModelForCausalLM
 
+from llama_stack_api import Checkpoint, DatasetIO, TrainingConfig
+
 if TYPE_CHECKING:
     from transformers import PretrainedConfig
 
diff --git a/src/llama_stack/providers/inline/post_training/torchtune/common/utils.py b/src/llama_stack/providers/inline/post_training/torchtune/common/utils.py
index 1483b8385..f929ea4dd 100644
--- a/src/llama_stack/providers/inline/post_training/torchtune/common/utils.py
+++ b/src/llama_stack/providers/inline/post_training/torchtune/common/utils.py
@@ -13,7 +13,6 @@
 from collections.abc import Callable
 
 import torch
-from llama_stack_api import DatasetFormat
 from pydantic import BaseModel
 from torchtune.data._messages import InputOutputToMessages, ShareGPTToMessages
 from torchtune.models.llama3 import llama3_tokenizer
@@ -24,6 +23,7 @@ from torchtune.modules.transforms import Transform
 
 from llama_stack.models.llama.sku_list import resolve_model
 from llama_stack.models.llama.sku_types import Model
+from llama_stack_api import DatasetFormat
 
 BuildLoraModelCallable = Callable[..., torch.nn.Module]
 BuildTokenizerCallable = Callable[..., Llama3Tokenizer]
diff --git a/src/llama_stack/providers/inline/post_training/torchtune/post_training.py b/src/llama_stack/providers/inline/post_training/torchtune/post_training.py
index 3370d42fa..515ff7b66 100644
--- a/src/llama_stack/providers/inline/post_training/torchtune/post_training.py
+++ b/src/llama_stack/providers/inline/post_training/torchtune/post_training.py
@@ -6,6 +6,11 @@
 from enum import Enum
 from typing import Any
 
+from llama_stack.providers.inline.post_training.torchtune.config import (
+    TorchtunePostTrainingConfig,
+)
+from llama_stack.providers.utils.scheduler import JobArtifact, Scheduler
+from llama_stack.providers.utils.scheduler import JobStatus as SchedulerJobStatus
 from llama_stack_api import (
     AlgorithmConfig,
     Checkpoint,
@@ -21,12 +26,6 @@ from llama_stack_api import (
     TrainingConfig,
 )
 
-from llama_stack.providers.inline.post_training.torchtune.config import (
-    TorchtunePostTrainingConfig,
-)
-from llama_stack.providers.utils.scheduler import JobArtifact, Scheduler
-from llama_stack.providers.utils.scheduler import JobStatus as SchedulerJobStatus
-
 
 class TrainingArtifactType(Enum):
     CHECKPOINT = "checkpoint"
diff --git a/src/llama_stack/providers/inline/post_training/torchtune/recipes/lora_finetuning_single_device.py b/src/llama_stack/providers/inline/post_training/torchtune/recipes/lora_finetuning_single_device.py
index 2bf1d0fe7..f5e5db415 100644
--- a/src/llama_stack/providers/inline/post_training/torchtune/recipes/lora_finetuning_single_device.py
+++ b/src/llama_stack/providers/inline/post_training/torchtune/recipes/lora_finetuning_single_device.py
@@ -12,17 +12,6 @@ from pathlib import Path
 from typing import Any
 
 import torch
-from llama_stack_api import (
-    Checkpoint,
-    DataConfig,
-    DatasetIO,
-    Datasets,
-    LoraFinetuningConfig,
-    OptimizerConfig,
-    PostTrainingMetric,
-    QATFinetuningConfig,
-    TrainingConfig,
-)
 from torch import nn
 from torch.optim import Optimizer
 from torch.utils.data import DataLoader, DistributedSampler
@@ -56,6 +45,17 @@ from llama_stack.providers.inline.post_training.torchtune.config import (
     TorchtunePostTrainingConfig,
 )
 from llama_stack.providers.inline.post_training.torchtune.datasets.sft import SFTDataset
+from llama_stack_api import (
+    Checkpoint,
+    DataConfig,
+    DatasetIO,
+    Datasets,
+    LoraFinetuningConfig,
+    OptimizerConfig,
+    PostTrainingMetric,
+    QATFinetuningConfig,
+    TrainingConfig,
+)
 
 log = get_logger(name=__name__, category="post_training")
 
diff --git a/src/llama_stack/providers/inline/safety/code_scanner/code_scanner.py b/src/llama_stack/providers/inline/safety/code_scanner/code_scanner.py
index 80e907c10..071fbe2dc 100644
--- a/src/llama_stack/providers/inline/safety/code_scanner/code_scanner.py
+++ b/src/llama_stack/providers/inline/safety/code_scanner/code_scanner.py
@@ -10,6 +10,10 @@ from typing import TYPE_CHECKING, Any
 if TYPE_CHECKING:
     from codeshield.cs import CodeShieldScanResult
 
+from llama_stack.log import get_logger
+from llama_stack.providers.utils.inference.prompt_adapter import (
+    interleaved_content_as_str,
+)
 from llama_stack_api import (
     ModerationObject,
     ModerationObjectResults,
@@ -21,11 +25,6 @@ from llama_stack_api import (
     ViolationLevel,
 )
 
-from llama_stack.log import get_logger
-from llama_stack.providers.utils.inference.prompt_adapter import (
-    interleaved_content_as_str,
-)
-
 from .config import CodeScannerConfig
 
 log = get_logger(name=__name__, category="safety")
diff --git a/src/llama_stack/providers/inline/safety/llama_guard/llama_guard.py b/src/llama_stack/providers/inline/safety/llama_guard/llama_guard.py
index 36e4280b9..ff1536bea 100644
--- a/src/llama_stack/providers/inline/safety/llama_guard/llama_guard.py
+++ b/src/llama_stack/providers/inline/safety/llama_guard/llama_guard.py
@@ -9,6 +9,13 @@ import uuid
 from string import Template
 from typing import Any
 
+from llama_stack.core.datatypes import Api
+from llama_stack.log import get_logger
+from llama_stack.models.llama.datatypes import Role
+from llama_stack.models.llama.sku_types import CoreModelId
+from llama_stack.providers.utils.inference.prompt_adapter import (
+    interleaved_content_as_str,
+)
 from llama_stack_api import (
     ImageContentItem,
     Inference,
@@ -26,14 +33,6 @@ from llama_stack_api import (
     ViolationLevel,
 )
 
-from llama_stack.core.datatypes import Api
-from llama_stack.log import get_logger
-from llama_stack.models.llama.datatypes import Role
-from llama_stack.models.llama.sku_types import CoreModelId
-from llama_stack.providers.utils.inference.prompt_adapter import (
-    interleaved_content_as_str,
-)
-
 from .config import LlamaGuardConfig
 
 CANNED_RESPONSE_TEXT = "I can't answer that. Can I help with something else?"
diff --git a/src/llama_stack/providers/inline/safety/prompt_guard/prompt_guard.py b/src/llama_stack/providers/inline/safety/prompt_guard/prompt_guard.py
index b4f495f19..51383da1b 100644
--- a/src/llama_stack/providers/inline/safety/prompt_guard/prompt_guard.py
+++ b/src/llama_stack/providers/inline/safety/prompt_guard/prompt_guard.py
@@ -7,6 +7,11 @@
 from typing import Any
 
 import torch
+from transformers import AutoModelForSequenceClassification, AutoTokenizer
+
+from llama_stack.core.utils.model_utils import model_local_dir
+from llama_stack.log import get_logger
+from llama_stack.providers.utils.inference.prompt_adapter import interleaved_content_as_str
 from llama_stack_api import (
     ModerationObject,
     OpenAIMessageParam,
@@ -18,11 +23,6 @@ from llama_stack_api import (
     ShieldStore,
     ViolationLevel,
 )
-from transformers import AutoModelForSequenceClassification, AutoTokenizer
-
-from llama_stack.core.utils.model_utils import model_local_dir
-from llama_stack.log import get_logger
-from llama_stack.providers.utils.inference.prompt_adapter import interleaved_content_as_str
 
 from .config import PromptGuardConfig, PromptGuardType
 
diff --git a/src/llama_stack/providers/inline/scoring/basic/scoring.py b/src/llama_stack/providers/inline/scoring/basic/scoring.py
index 326fd9211..cf5cb79ba 100644
--- a/src/llama_stack/providers/inline/scoring/basic/scoring.py
+++ b/src/llama_stack/providers/inline/scoring/basic/scoring.py
@@ -5,6 +5,11 @@
 # the root directory of this source tree.
 from typing import Any
 
+from llama_stack.core.datatypes import Api
+from llama_stack.providers.utils.common.data_schema_validator import (
+    get_valid_schemas,
+    validate_dataset_schema,
+)
 from llama_stack_api import (
     DatasetIO,
     Datasets,
@@ -17,12 +22,6 @@ from llama_stack_api import (
     ScoringResult,
 )
 
-from llama_stack.core.datatypes import Api
-from llama_stack.providers.utils.common.data_schema_validator import (
-    get_valid_schemas,
-    validate_dataset_schema,
-)
-
 from .config import BasicScoringConfig
 from .scoring_fn.docvqa_scoring_fn import DocVQAScoringFn
 from .scoring_fn.equality_scoring_fn import EqualityScoringFn
diff --git a/src/llama_stack/providers/inline/scoring/basic/scoring_fn/docvqa_scoring_fn.py b/src/llama_stack/providers/inline/scoring/basic/scoring_fn/docvqa_scoring_fn.py
index 93c2627dd..e48bab8fa 100644
--- a/src/llama_stack/providers/inline/scoring/basic/scoring_fn/docvqa_scoring_fn.py
+++ b/src/llama_stack/providers/inline/scoring/basic/scoring_fn/docvqa_scoring_fn.py
@@ -8,9 +8,8 @@ import json
 import re
 from typing import Any
 
-from llama_stack_api import ScoringFnParams, ScoringResultRow
-
 from llama_stack.providers.utils.scoring.base_scoring_fn import RegisteredBaseScoringFn
+from llama_stack_api import ScoringFnParams, ScoringResultRow
 
 from .fn_defs.docvqa import docvqa
 
diff --git a/src/llama_stack/providers/inline/scoring/basic/scoring_fn/equality_scoring_fn.py b/src/llama_stack/providers/inline/scoring/basic/scoring_fn/equality_scoring_fn.py
index 382c64d88..2e79240be 100644
--- a/src/llama_stack/providers/inline/scoring/basic/scoring_fn/equality_scoring_fn.py
+++ b/src/llama_stack/providers/inline/scoring/basic/scoring_fn/equality_scoring_fn.py
@@ -6,9 +6,8 @@
 
 from typing import Any
 
-from llama_stack_api import ScoringFnParams, ScoringResultRow
-
 from llama_stack.providers.utils.scoring.base_scoring_fn import RegisteredBaseScoringFn
+from llama_stack_api import ScoringFnParams, ScoringResultRow
 
 from .fn_defs.equality import equality
 
diff --git a/src/llama_stack/providers/inline/scoring/basic/scoring_fn/ifeval_scoring_fn.py b/src/llama_stack/providers/inline/scoring/basic/scoring_fn/ifeval_scoring_fn.py
index 4ec85bb09..33b1c5a31 100644
--- a/src/llama_stack/providers/inline/scoring/basic/scoring_fn/ifeval_scoring_fn.py
+++ b/src/llama_stack/providers/inline/scoring/basic/scoring_fn/ifeval_scoring_fn.py
@@ -6,9 +6,8 @@
 
 from typing import Any
 
-from llama_stack_api import ScoringFnParams, ScoringResultRow
-
 from llama_stack.providers.utils.scoring.base_scoring_fn import RegisteredBaseScoringFn
+from llama_stack_api import ScoringFnParams, ScoringResultRow
 
 from .fn_defs.ifeval import (
     ifeval,
diff --git a/src/llama_stack/providers/inline/scoring/basic/scoring_fn/regex_parser_math_response_scoring_fn.py b/src/llama_stack/providers/inline/scoring/basic/scoring_fn/regex_parser_math_response_scoring_fn.py
index 4e9d49e96..1f4f2f979 100644
--- a/src/llama_stack/providers/inline/scoring/basic/scoring_fn/regex_parser_math_response_scoring_fn.py
+++ b/src/llama_stack/providers/inline/scoring/basic/scoring_fn/regex_parser_math_response_scoring_fn.py
@@ -5,9 +5,8 @@
 # the root directory of this source tree.
 from typing import Any
 
-from llama_stack_api import ScoringFnParams, ScoringFnParamsType, ScoringResultRow
-
 from llama_stack.providers.utils.scoring.base_scoring_fn import RegisteredBaseScoringFn
+from llama_stack_api import ScoringFnParams, ScoringFnParamsType, ScoringResultRow
 
 from ..utils.math_utils import first_answer, normalize_final_answer, try_evaluate_frac, try_evaluate_latex
 from .fn_defs.regex_parser_math_response import (
diff --git a/src/llama_stack/providers/inline/scoring/basic/scoring_fn/regex_parser_scoring_fn.py b/src/llama_stack/providers/inline/scoring/basic/scoring_fn/regex_parser_scoring_fn.py
index 7f213b38c..1cc74f874 100644
--- a/src/llama_stack/providers/inline/scoring/basic/scoring_fn/regex_parser_scoring_fn.py
+++ b/src/llama_stack/providers/inline/scoring/basic/scoring_fn/regex_parser_scoring_fn.py
@@ -6,9 +6,8 @@
 import re
 from typing import Any
 
-from llama_stack_api import ScoringFnParams, ScoringFnParamsType, ScoringResultRow
-
 from llama_stack.providers.utils.scoring.base_scoring_fn import RegisteredBaseScoringFn
+from llama_stack_api import ScoringFnParams, ScoringFnParamsType, ScoringResultRow
 
 from .fn_defs.regex_parser_multiple_choice_answer import (
     regex_parser_multiple_choice_answer,
diff --git a/src/llama_stack/providers/inline/scoring/basic/scoring_fn/subset_of_scoring_fn.py b/src/llama_stack/providers/inline/scoring/basic/scoring_fn/subset_of_scoring_fn.py
index b291924d5..fe15a4972 100644
--- a/src/llama_stack/providers/inline/scoring/basic/scoring_fn/subset_of_scoring_fn.py
+++ b/src/llama_stack/providers/inline/scoring/basic/scoring_fn/subset_of_scoring_fn.py
@@ -6,9 +6,8 @@
 
 from typing import Any
 
-from llama_stack_api import ScoringFnParams, ScoringResultRow
-
 from llama_stack.providers.utils.scoring.base_scoring_fn import RegisteredBaseScoringFn
+from llama_stack_api import ScoringFnParams, ScoringResultRow
 
 from .fn_defs.subset_of import subset_of
 
diff --git a/src/llama_stack/providers/inline/scoring/braintrust/braintrust.py b/src/llama_stack/providers/inline/scoring/braintrust/braintrust.py
index cbab93c74..cfa35547b 100644
--- a/src/llama_stack/providers/inline/scoring/braintrust/braintrust.py
+++ b/src/llama_stack/providers/inline/scoring/braintrust/braintrust.py
@@ -17,6 +17,16 @@ from autoevals.ragas import (
     ContextRelevancy,
     Faithfulness,
 )
+from pydantic import BaseModel
+
+from llama_stack.core.datatypes import Api
+from llama_stack.core.request_headers import NeedsRequestProviderData
+from llama_stack.providers.utils.common.data_schema_validator import (
+    get_valid_schemas,
+    validate_dataset_schema,
+    validate_row_schema,
+)
+from llama_stack.providers.utils.scoring.aggregation_utils import aggregate_metrics
 from llama_stack_api import (
     DatasetIO,
     Datasets,
@@ -29,16 +39,6 @@ from llama_stack_api import (
     ScoringResult,
     ScoringResultRow,
 )
-from pydantic import BaseModel
-
-from llama_stack.core.datatypes import Api
-from llama_stack.core.request_headers import NeedsRequestProviderData
-from llama_stack.providers.utils.common.data_schema_validator import (
-    get_valid_schemas,
-    validate_dataset_schema,
-    validate_row_schema,
-)
-from llama_stack.providers.utils.scoring.aggregation_utils import aggregate_metrics
 
 from .config import BraintrustScoringConfig
 from .scoring_fn.fn_defs.answer_correctness import answer_correctness_fn_def
diff --git a/src/llama_stack/providers/inline/scoring/llm_as_judge/scoring.py b/src/llama_stack/providers/inline/scoring/llm_as_judge/scoring.py
index aa636d2b3..23e6ad705 100644
--- a/src/llama_stack/providers/inline/scoring/llm_as_judge/scoring.py
+++ b/src/llama_stack/providers/inline/scoring/llm_as_judge/scoring.py
@@ -5,6 +5,11 @@
 # the root directory of this source tree.
 from typing import Any
 
+from llama_stack.core.datatypes import Api
+from llama_stack.providers.utils.common.data_schema_validator import (
+    get_valid_schemas,
+    validate_dataset_schema,
+)
 from llama_stack_api import (
     DatasetIO,
     Datasets,
@@ -18,12 +23,6 @@ from llama_stack_api import (
     ScoringResult,
 )
 
-from llama_stack.core.datatypes import Api
-from llama_stack.providers.utils.common.data_schema_validator import (
-    get_valid_schemas,
-    validate_dataset_schema,
-)
-
 from .config import LlmAsJudgeScoringConfig
 from .scoring_fn.llm_as_judge_scoring_fn import LlmAsJudgeScoringFn
 
diff --git a/src/llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/llm_as_judge_scoring_fn.py b/src/llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/llm_as_judge_scoring_fn.py
index 169a4d8b7..73ce82cda 100644
--- a/src/llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/llm_as_judge_scoring_fn.py
+++ b/src/llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/llm_as_judge_scoring_fn.py
@@ -6,9 +6,8 @@
 import re
 from typing import Any
 
-from llama_stack_api import Inference, OpenAIChatCompletionRequestWithExtraBody, ScoringFnParams, ScoringResultRow
-
 from llama_stack.providers.utils.scoring.base_scoring_fn import RegisteredBaseScoringFn
+from llama_stack_api import Inference, OpenAIChatCompletionRequestWithExtraBody, ScoringFnParams, ScoringResultRow
 
 from .fn_defs.llm_as_judge_405b_simpleqa import llm_as_judge_405b_simpleqa
 from .fn_defs.llm_as_judge_base import llm_as_judge_base
diff --git a/src/llama_stack/providers/inline/tool_runtime/rag/context_retriever.py b/src/llama_stack/providers/inline/tool_runtime/rag/context_retriever.py
index f499989cb..240df199b 100644
--- a/src/llama_stack/providers/inline/tool_runtime/rag/context_retriever.py
+++ b/src/llama_stack/providers/inline/tool_runtime/rag/context_retriever.py
@@ -6,6 +6,10 @@
 
 
 from jinja2 import Template
+
+from llama_stack.providers.utils.inference.prompt_adapter import (
+    interleaved_content_as_str,
+)
 from llama_stack_api import (
     DefaultRAGQueryGeneratorConfig,
     InterleavedContent,
@@ -16,10 +20,6 @@ from llama_stack_api import (
     RAGQueryGeneratorConfig,
 )
 
-from llama_stack.providers.utils.inference.prompt_adapter import (
-    interleaved_content_as_str,
-)
-
 
 async def generate_rag_query(
     config: RAGQueryGeneratorConfig,
diff --git a/src/llama_stack/providers/inline/tool_runtime/rag/memory.py b/src/llama_stack/providers/inline/tool_runtime/rag/memory.py
index aacb7bb38..895d219bb 100644
--- a/src/llama_stack/providers/inline/tool_runtime/rag/memory.py
+++ b/src/llama_stack/providers/inline/tool_runtime/rag/memory.py
@@ -12,6 +12,11 @@ from typing import Any
 
 import httpx
 from fastapi import UploadFile
+from pydantic import TypeAdapter
+
+from llama_stack.log import get_logger
+from llama_stack.providers.utils.inference.prompt_adapter import interleaved_content_as_str
+from llama_stack.providers.utils.memory.vector_store import parse_data_url
 from llama_stack_api import (
     URL,
     Files,
@@ -34,11 +39,6 @@ from llama_stack_api import (
     VectorStoreChunkingStrategyStatic,
     VectorStoreChunkingStrategyStaticConfig,
 )
-from pydantic import TypeAdapter
-
-from llama_stack.log import get_logger
-from llama_stack.providers.utils.inference.prompt_adapter import interleaved_content_as_str
-from llama_stack.providers.utils.memory.vector_store import parse_data_url
 
 from .config import RagToolRuntimeConfig
 from .context_retriever import generate_rag_query
diff --git a/src/llama_stack/providers/inline/vector_io/chroma/config.py b/src/llama_stack/providers/inline/vector_io/chroma/config.py
index d955b1d06..3897991f5 100644
--- a/src/llama_stack/providers/inline/vector_io/chroma/config.py
+++ b/src/llama_stack/providers/inline/vector_io/chroma/config.py
@@ -6,10 +6,10 @@
 
 from typing import Any
 
-from llama_stack_api import json_schema_type
 from pydantic import BaseModel, Field
 
 from llama_stack.core.storage.datatypes import KVStoreReference
+from llama_stack_api import json_schema_type
 
 
 @json_schema_type
diff --git a/src/llama_stack/providers/inline/vector_io/faiss/config.py b/src/llama_stack/providers/inline/vector_io/faiss/config.py
index dd433f818..d516d9fe9 100644
--- a/src/llama_stack/providers/inline/vector_io/faiss/config.py
+++ b/src/llama_stack/providers/inline/vector_io/faiss/config.py
@@ -6,10 +6,10 @@
 
 from typing import Any
 
-from llama_stack_api import json_schema_type
 from pydantic import BaseModel
 
 from llama_stack.core.storage.datatypes import KVStoreReference
+from llama_stack_api import json_schema_type
 
 
 @json_schema_type
diff --git a/src/llama_stack/providers/inline/vector_io/faiss/faiss.py b/src/llama_stack/providers/inline/vector_io/faiss/faiss.py
index abef42499..d52a54e6a 100644
--- a/src/llama_stack/providers/inline/vector_io/faiss/faiss.py
+++ b/src/llama_stack/providers/inline/vector_io/faiss/faiss.py
@@ -12,6 +12,13 @@ from typing import Any
 
 import faiss  # type: ignore[import-untyped]
 import numpy as np
+from numpy.typing import NDArray
+
+from llama_stack.log import get_logger
+from llama_stack.providers.utils.kvstore import kvstore_impl
+from llama_stack.providers.utils.kvstore.api import KVStore
+from llama_stack.providers.utils.memory.openai_vector_store_mixin import OpenAIVectorStoreMixin
+from llama_stack.providers.utils.memory.vector_store import ChunkForDeletion, EmbeddingIndex, VectorStoreWithIndex
 from llama_stack_api import (
     Chunk,
     Files,
@@ -25,13 +32,6 @@ from llama_stack_api import (
     VectorStoreNotFoundError,
     VectorStoresProtocolPrivate,
 )
-from numpy.typing import NDArray
-
-from llama_stack.log import get_logger
-from llama_stack.providers.utils.kvstore import kvstore_impl
-from llama_stack.providers.utils.kvstore.api import KVStore
-from llama_stack.providers.utils.memory.openai_vector_store_mixin import OpenAIVectorStoreMixin
-from llama_stack.providers.utils.memory.vector_store import ChunkForDeletion, EmbeddingIndex, VectorStoreWithIndex
 
 from .config import FaissVectorIOConfig
 
diff --git a/src/llama_stack/providers/inline/vector_io/milvus/config.py b/src/llama_stack/providers/inline/vector_io/milvus/config.py
index 08d05c991..14ddd2362 100644
--- a/src/llama_stack/providers/inline/vector_io/milvus/config.py
+++ b/src/llama_stack/providers/inline/vector_io/milvus/config.py
@@ -6,10 +6,10 @@
 
 from typing import Any
 
-from llama_stack_api import json_schema_type
 from pydantic import BaseModel, Field
 
 from llama_stack.core.storage.datatypes import KVStoreReference
+from llama_stack_api import json_schema_type
 
 
 @json_schema_type
diff --git a/src/llama_stack/providers/inline/vector_io/qdrant/config.py b/src/llama_stack/providers/inline/vector_io/qdrant/config.py
index 437d643f0..4251f2f39 100644
--- a/src/llama_stack/providers/inline/vector_io/qdrant/config.py
+++ b/src/llama_stack/providers/inline/vector_io/qdrant/config.py
@@ -7,10 +7,10 @@
 
 from typing import Any
 
-from llama_stack_api import json_schema_type
 from pydantic import BaseModel
 
 from llama_stack.core.storage.datatypes import KVStoreReference
+from llama_stack_api import json_schema_type
 
 
 @json_schema_type
diff --git a/src/llama_stack/providers/inline/vector_io/sqlite_vec/sqlite_vec.py b/src/llama_stack/providers/inline/vector_io/sqlite_vec/sqlite_vec.py
index e979ff323..74bc349a5 100644
--- a/src/llama_stack/providers/inline/vector_io/sqlite_vec/sqlite_vec.py
+++ b/src/llama_stack/providers/inline/vector_io/sqlite_vec/sqlite_vec.py
@@ -12,16 +12,6 @@ from typing import Any
 
 import numpy as np
 import sqlite_vec  # type: ignore[import-untyped]
-from llama_stack_api import (
-    Chunk,
-    Files,
-    Inference,
-    QueryChunksResponse,
-    VectorIO,
-    VectorStore,
-    VectorStoreNotFoundError,
-    VectorStoresProtocolPrivate,
-)
 from numpy.typing import NDArray
 
 from llama_stack.log import get_logger
@@ -35,6 +25,16 @@ from llama_stack.providers.utils.memory.vector_store import (
     VectorStoreWithIndex,
 )
 from llama_stack.providers.utils.vector_io.vector_utils import WeightedInMemoryAggregator
+from llama_stack_api import (
+    Chunk,
+    Files,
+    Inference,
+    QueryChunksResponse,
+    VectorIO,
+    VectorStore,
+    VectorStoreNotFoundError,
+    VectorStoresProtocolPrivate,
+)
 
 logger = get_logger(name=__name__, category="vector_io")
 
diff --git a/src/llama_stack/providers/registry/agents.py b/src/llama_stack/providers/registry/agents.py
index bd204cecd..455be1ae7 100644
--- a/src/llama_stack/providers/registry/agents.py
+++ b/src/llama_stack/providers/registry/agents.py
@@ -5,14 +5,13 @@
 # the root directory of this source tree.
 
 
+from llama_stack.providers.utils.kvstore import kvstore_dependencies
 from llama_stack_api import (
     Api,
     InlineProviderSpec,
     ProviderSpec,
 )
 
-from llama_stack.providers.utils.kvstore import kvstore_dependencies
-
 
 def available_providers() -> list[ProviderSpec]:
     return [
diff --git a/src/llama_stack/providers/registry/files.py b/src/llama_stack/providers/registry/files.py
index dfc527816..024254b57 100644
--- a/src/llama_stack/providers/registry/files.py
+++ b/src/llama_stack/providers/registry/files.py
@@ -4,9 +4,8 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-from llama_stack_api import Api, InlineProviderSpec, ProviderSpec, RemoteProviderSpec
-
 from llama_stack.providers.utils.sqlstore.sqlstore import sql_store_pip_packages
+from llama_stack_api import Api, InlineProviderSpec, ProviderSpec, RemoteProviderSpec
 
 
 def available_providers() -> list[ProviderSpec]:
diff --git a/src/llama_stack/providers/registry/tool_runtime.py b/src/llama_stack/providers/registry/tool_runtime.py
index 3f0a83a30..d34312353 100644
--- a/src/llama_stack/providers/registry/tool_runtime.py
+++ b/src/llama_stack/providers/registry/tool_runtime.py
@@ -5,6 +5,7 @@
 # the root directory of this source tree.
 
 
+from llama_stack.providers.registry.vector_io import DEFAULT_VECTOR_IO_DEPS
 from llama_stack_api import (
     Api,
     InlineProviderSpec,
@@ -12,8 +13,6 @@ from llama_stack_api import (
     RemoteProviderSpec,
 )
 
-from llama_stack.providers.registry.vector_io import DEFAULT_VECTOR_IO_DEPS
-
 
 def available_providers() -> list[ProviderSpec]:
     return [
diff --git a/src/llama_stack/providers/remote/datasetio/huggingface/huggingface.py b/src/llama_stack/providers/remote/datasetio/huggingface/huggingface.py
index 1260ce644..72069f716 100644
--- a/src/llama_stack/providers/remote/datasetio/huggingface/huggingface.py
+++ b/src/llama_stack/providers/remote/datasetio/huggingface/huggingface.py
@@ -6,10 +6,9 @@
 from typing import Any
 from urllib.parse import parse_qs, urlparse
 
-from llama_stack_api import Dataset, DatasetIO, DatasetsProtocolPrivate, PaginatedResponse
-
 from llama_stack.providers.utils.kvstore import kvstore_impl
 from llama_stack.providers.utils.pagination import paginate_records
+from llama_stack_api import Dataset, DatasetIO, DatasetsProtocolPrivate, PaginatedResponse
 
 from .config import HuggingfaceDatasetIOConfig
 
diff --git a/src/llama_stack/providers/remote/datasetio/nvidia/datasetio.py b/src/llama_stack/providers/remote/datasetio/nvidia/datasetio.py
index cb674b0d7..2f5548fa9 100644
--- a/src/llama_stack/providers/remote/datasetio/nvidia/datasetio.py
+++ b/src/llama_stack/providers/remote/datasetio/nvidia/datasetio.py
@@ -7,6 +7,7 @@
 from typing import Any
 
 import aiohttp
+
 from llama_stack_api import URL, Dataset, PaginatedResponse, ParamType
 
 from .config import NvidiaDatasetIOConfig
diff --git a/src/llama_stack/providers/remote/eval/nvidia/eval.py b/src/llama_stack/providers/remote/eval/nvidia/eval.py
index fbdec0d4d..5802cb098 100644
--- a/src/llama_stack/providers/remote/eval/nvidia/eval.py
+++ b/src/llama_stack/providers/remote/eval/nvidia/eval.py
@@ -6,6 +6,8 @@
 from typing import Any
 
 import requests
+
+from llama_stack.providers.utils.inference.model_registry import ModelRegistryHelper
 from llama_stack_api import (
     Agents,
     Benchmark,
@@ -22,8 +24,6 @@ from llama_stack_api import (
     ScoringResult,
 )
 
-from llama_stack.providers.utils.inference.model_registry import ModelRegistryHelper
-
 from .config import NVIDIAEvalConfig
 
 DEFAULT_NAMESPACE = "nvidia"
diff --git a/src/llama_stack/providers/remote/files/openai/files.py b/src/llama_stack/providers/remote/files/openai/files.py
index bbd630977..d2f5a08eb 100644
--- a/src/llama_stack/providers/remote/files/openai/files.py
+++ b/src/llama_stack/providers/remote/files/openai/files.py
@@ -8,6 +8,12 @@ from datetime import UTC, datetime
 from typing import Annotated, Any
 
 from fastapi import Depends, File, Form, Response, UploadFile
+
+from llama_stack.core.datatypes import AccessRule
+from llama_stack.providers.utils.files.form_data import parse_expires_after
+from llama_stack.providers.utils.sqlstore.api import ColumnDefinition, ColumnType
+from llama_stack.providers.utils.sqlstore.authorized_sqlstore import AuthorizedSqlStore
+from llama_stack.providers.utils.sqlstore.sqlstore import sqlstore_impl
 from llama_stack_api import (
     ExpiresAfter,
     Files,
@@ -18,12 +24,6 @@ from llama_stack_api import (
     Order,
     ResourceNotFoundError,
 )
-
-from llama_stack.core.datatypes import AccessRule
-from llama_stack.providers.utils.files.form_data import parse_expires_after
-from llama_stack.providers.utils.sqlstore.api import ColumnDefinition, ColumnType
-from llama_stack.providers.utils.sqlstore.authorized_sqlstore import AuthorizedSqlStore
-from llama_stack.providers.utils.sqlstore.sqlstore import sqlstore_impl
 from openai import OpenAI
 
 from .config import OpenAIFilesImplConfig
diff --git a/src/llama_stack/providers/remote/files/s3/files.py b/src/llama_stack/providers/remote/files/s3/files.py
index 14f1e3852..68822eb77 100644
--- a/src/llama_stack/providers/remote/files/s3/files.py
+++ b/src/llama_stack/providers/remote/files/s3/files.py
@@ -17,6 +17,12 @@ from fastapi import Depends, File, Form, Response, UploadFile
 if TYPE_CHECKING:
     from mypy_boto3_s3.client import S3Client
 
+from llama_stack.core.datatypes import AccessRule
+from llama_stack.core.id_generation import generate_object_id
+from llama_stack.providers.utils.files.form_data import parse_expires_after
+from llama_stack.providers.utils.sqlstore.api import ColumnDefinition, ColumnType
+from llama_stack.providers.utils.sqlstore.authorized_sqlstore import AuthorizedSqlStore
+from llama_stack.providers.utils.sqlstore.sqlstore import sqlstore_impl
 from llama_stack_api import (
     ExpiresAfter,
     Files,
@@ -28,13 +34,6 @@ from llama_stack_api import (
     ResourceNotFoundError,
 )
 
-from llama_stack.core.datatypes import AccessRule
-from llama_stack.core.id_generation import generate_object_id
-from llama_stack.providers.utils.files.form_data import parse_expires_after
-from llama_stack.providers.utils.sqlstore.api import ColumnDefinition, ColumnType
-from llama_stack.providers.utils.sqlstore.authorized_sqlstore import AuthorizedSqlStore
-from llama_stack.providers.utils.sqlstore.sqlstore import sqlstore_impl
-
 from .config import S3FilesImplConfig
 
 # TODO: provider data for S3 credentials
diff --git a/src/llama_stack/providers/remote/inference/anthropic/config.py b/src/llama_stack/providers/remote/inference/anthropic/config.py
index 7ee4c54e2..b706b90e1 100644
--- a/src/llama_stack/providers/remote/inference/anthropic/config.py
+++ b/src/llama_stack/providers/remote/inference/anthropic/config.py
@@ -6,10 +6,10 @@
 
 from typing import Any
 
-from llama_stack_api import json_schema_type
 from pydantic import BaseModel, Field
 
 from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig
+from llama_stack_api import json_schema_type
 
 
 class AnthropicProviderDataValidator(BaseModel):
diff --git a/src/llama_stack/providers/remote/inference/azure/config.py b/src/llama_stack/providers/remote/inference/azure/config.py
index 596f6c234..b801b91b2 100644
--- a/src/llama_stack/providers/remote/inference/azure/config.py
+++ b/src/llama_stack/providers/remote/inference/azure/config.py
@@ -7,10 +7,10 @@
 import os
 from typing import Any
 
-from llama_stack_api import json_schema_type
 from pydantic import BaseModel, Field, HttpUrl, SecretStr
 
 from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig
+from llama_stack_api import json_schema_type
 
 
 class AzureProviderDataValidator(BaseModel):
diff --git a/src/llama_stack/providers/remote/inference/bedrock/bedrock.py b/src/llama_stack/providers/remote/inference/bedrock/bedrock.py
index 1a9fe533b..70ee95916 100644
--- a/src/llama_stack/providers/remote/inference/bedrock/bedrock.py
+++ b/src/llama_stack/providers/remote/inference/bedrock/bedrock.py
@@ -6,6 +6,11 @@
 
 from collections.abc import AsyncIterator, Iterable
 
+from openai import AuthenticationError
+
+from llama_stack.core.telemetry.tracing import get_current_span
+from llama_stack.log import get_logger
+from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin
 from llama_stack_api import (
     OpenAIChatCompletion,
     OpenAIChatCompletionChunk,
@@ -15,11 +20,6 @@ from llama_stack_api import (
     OpenAIEmbeddingsRequestWithExtraBody,
     OpenAIEmbeddingsResponse,
 )
-from openai import AuthenticationError
-
-from llama_stack.core.telemetry.tracing import get_current_span
-from llama_stack.log import get_logger
-from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin
 
 from .config import BedrockConfig
 
diff --git a/src/llama_stack/providers/remote/inference/cerebras/cerebras.py b/src/llama_stack/providers/remote/inference/cerebras/cerebras.py
index c7f3111f9..680431e22 100644
--- a/src/llama_stack/providers/remote/inference/cerebras/cerebras.py
+++ b/src/llama_stack/providers/remote/inference/cerebras/cerebras.py
@@ -6,13 +6,12 @@
 
 from urllib.parse import urljoin
 
+from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin
 from llama_stack_api import (
     OpenAIEmbeddingsRequestWithExtraBody,
     OpenAIEmbeddingsResponse,
 )
 
-from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin
-
 from .config import CerebrasImplConfig
 
 
diff --git a/src/llama_stack/providers/remote/inference/cerebras/config.py b/src/llama_stack/providers/remote/inference/cerebras/config.py
index a1fd41e2d..db357fd1c 100644
--- a/src/llama_stack/providers/remote/inference/cerebras/config.py
+++ b/src/llama_stack/providers/remote/inference/cerebras/config.py
@@ -7,10 +7,10 @@
 import os
 from typing import Any
 
-from llama_stack_api import json_schema_type
 from pydantic import BaseModel, Field
 
 from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig
+from llama_stack_api import json_schema_type
 
 DEFAULT_BASE_URL = "https://api.cerebras.ai"
 
diff --git a/src/llama_stack/providers/remote/inference/databricks/config.py b/src/llama_stack/providers/remote/inference/databricks/config.py
index 4974593d2..bd409fa13 100644
--- a/src/llama_stack/providers/remote/inference/databricks/config.py
+++ b/src/llama_stack/providers/remote/inference/databricks/config.py
@@ -6,10 +6,10 @@
 
 from typing import Any
 
-from llama_stack_api import json_schema_type
 from pydantic import BaseModel, Field, SecretStr
 
 from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig
+from llama_stack_api import json_schema_type
 
 
 class DatabricksProviderDataValidator(BaseModel):
diff --git a/src/llama_stack/providers/remote/inference/databricks/databricks.py b/src/llama_stack/providers/remote/inference/databricks/databricks.py
index 8b802379f..c07d97b67 100644
--- a/src/llama_stack/providers/remote/inference/databricks/databricks.py
+++ b/src/llama_stack/providers/remote/inference/databricks/databricks.py
@@ -7,10 +7,10 @@
 from collections.abc import Iterable
 
 from databricks.sdk import WorkspaceClient
-from llama_stack_api import OpenAICompletion, OpenAICompletionRequestWithExtraBody
 
 from llama_stack.log import get_logger
 from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin
+from llama_stack_api import OpenAICompletion, OpenAICompletionRequestWithExtraBody
 
 from .config import DatabricksImplConfig
 
diff --git a/src/llama_stack/providers/remote/inference/fireworks/config.py b/src/llama_stack/providers/remote/inference/fireworks/config.py
index d786655eb..e36c76054 100644
--- a/src/llama_stack/providers/remote/inference/fireworks/config.py
+++ b/src/llama_stack/providers/remote/inference/fireworks/config.py
@@ -6,10 +6,10 @@
 
 from typing import Any
 
-from llama_stack_api import json_schema_type
 from pydantic import Field
 
 from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig
+from llama_stack_api import json_schema_type
 
 
 @json_schema_type
diff --git a/src/llama_stack/providers/remote/inference/gemini/config.py b/src/llama_stack/providers/remote/inference/gemini/config.py
index 6c25c005c..46cec7d0d 100644
--- a/src/llama_stack/providers/remote/inference/gemini/config.py
+++ b/src/llama_stack/providers/remote/inference/gemini/config.py
@@ -6,10 +6,10 @@
 
 from typing import Any
 
-from llama_stack_api import json_schema_type
 from pydantic import BaseModel, Field
 
 from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig
+from llama_stack_api import json_schema_type
 
 
 class GeminiProviderDataValidator(BaseModel):
diff --git a/src/llama_stack/providers/remote/inference/gemini/gemini.py b/src/llama_stack/providers/remote/inference/gemini/gemini.py
index 79d694f06..f6f48cc2b 100644
--- a/src/llama_stack/providers/remote/inference/gemini/gemini.py
+++ b/src/llama_stack/providers/remote/inference/gemini/gemini.py
@@ -6,6 +6,7 @@
 
 from typing import Any
 
+from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin
 from llama_stack_api import (
     OpenAIEmbeddingData,
     OpenAIEmbeddingsRequestWithExtraBody,
@@ -13,8 +14,6 @@ from llama_stack_api import (
     OpenAIEmbeddingUsage,
 )
 
-from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin
-
 from .config import GeminiConfig
 
 
diff --git a/src/llama_stack/providers/remote/inference/groq/config.py b/src/llama_stack/providers/remote/inference/groq/config.py
index cec327716..cca53a4e8 100644
--- a/src/llama_stack/providers/remote/inference/groq/config.py
+++ b/src/llama_stack/providers/remote/inference/groq/config.py
@@ -6,10 +6,10 @@
 
 from typing import Any
 
-from llama_stack_api import json_schema_type
 from pydantic import BaseModel, Field
 
 from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig
+from llama_stack_api import json_schema_type
 
 
 class GroqProviderDataValidator(BaseModel):
diff --git a/src/llama_stack/providers/remote/inference/llama_openai_compat/config.py b/src/llama_stack/providers/remote/inference/llama_openai_compat/config.py
index c16311830..ded210d89 100644
--- a/src/llama_stack/providers/remote/inference/llama_openai_compat/config.py
+++ b/src/llama_stack/providers/remote/inference/llama_openai_compat/config.py
@@ -6,10 +6,10 @@
 
 from typing import Any
 
-from llama_stack_api import json_schema_type
 from pydantic import BaseModel, Field
 
 from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig
+from llama_stack_api import json_schema_type
 
 
 class LlamaProviderDataValidator(BaseModel):
diff --git a/src/llama_stack/providers/remote/inference/llama_openai_compat/llama.py b/src/llama_stack/providers/remote/inference/llama_openai_compat/llama.py
index 1dea3e3cb..a5f67ecd1 100644
--- a/src/llama_stack/providers/remote/inference/llama_openai_compat/llama.py
+++ b/src/llama_stack/providers/remote/inference/llama_openai_compat/llama.py
@@ -4,6 +4,9 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
+from llama_stack.log import get_logger
+from llama_stack.providers.remote.inference.llama_openai_compat.config import LlamaCompatConfig
+from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin
 from llama_stack_api import (
     OpenAICompletion,
     OpenAICompletionRequestWithExtraBody,
@@ -11,10 +14,6 @@ from llama_stack_api import (
     OpenAIEmbeddingsResponse,
 )
 
-from llama_stack.log import get_logger
-from llama_stack.providers.remote.inference.llama_openai_compat.config import LlamaCompatConfig
-from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin
-
 logger = get_logger(name=__name__, category="inference::llama_openai_compat")
 
 
diff --git a/src/llama_stack/providers/remote/inference/nvidia/config.py b/src/llama_stack/providers/remote/inference/nvidia/config.py
index 6ff98d290..e5b0c6b73 100644
--- a/src/llama_stack/providers/remote/inference/nvidia/config.py
+++ b/src/llama_stack/providers/remote/inference/nvidia/config.py
@@ -7,10 +7,10 @@
 import os
 from typing import Any
 
-from llama_stack_api import json_schema_type
 from pydantic import BaseModel, Field
 
 from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig
+from llama_stack_api import json_schema_type
 
 
 class NVIDIAProviderDataValidator(BaseModel):
diff --git a/src/llama_stack/providers/remote/inference/nvidia/nvidia.py b/src/llama_stack/providers/remote/inference/nvidia/nvidia.py
index 9e4c6f559..17f8775bf 100644
--- a/src/llama_stack/providers/remote/inference/nvidia/nvidia.py
+++ b/src/llama_stack/providers/remote/inference/nvidia/nvidia.py
@@ -8,6 +8,9 @@
 from collections.abc import Iterable
 
 import aiohttp
+
+from llama_stack.log import get_logger
+from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin
 from llama_stack_api import (
     Model,
     ModelType,
@@ -17,9 +20,6 @@ from llama_stack_api import (
     RerankResponse,
 )
 
-from llama_stack.log import get_logger
-from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin
-
 from . import NVIDIAConfig
 from .utils import _is_nvidia_hosted
 
diff --git a/src/llama_stack/providers/remote/inference/oci/config.py b/src/llama_stack/providers/remote/inference/oci/config.py
index 24b4ad926..93cc36d76 100644
--- a/src/llama_stack/providers/remote/inference/oci/config.py
+++ b/src/llama_stack/providers/remote/inference/oci/config.py
@@ -7,10 +7,10 @@
 import os
 from typing import Any
 
-from llama_stack_api import json_schema_type
 from pydantic import BaseModel, Field
 
 from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig
+from llama_stack_api import json_schema_type
 
 
 class OCIProviderDataValidator(BaseModel):
diff --git a/src/llama_stack/providers/remote/inference/oci/oci.py b/src/llama_stack/providers/remote/inference/oci/oci.py
index 36e56cf6c..239443963 100644
--- a/src/llama_stack/providers/remote/inference/oci/oci.py
+++ b/src/llama_stack/providers/remote/inference/oci/oci.py
@@ -10,11 +10,6 @@ from typing import Any
 
 import httpx
 import oci
-from llama_stack_api import (
-    ModelType,
-    OpenAIEmbeddingsRequestWithExtraBody,
-    OpenAIEmbeddingsResponse,
-)
 from oci.generative_ai.generative_ai_client import GenerativeAiClient
 from oci.generative_ai.models import ModelCollection
 from openai._base_client import DefaultAsyncHttpxClient
@@ -23,6 +18,11 @@ from llama_stack.log import get_logger
 from llama_stack.providers.remote.inference.oci.auth import OciInstancePrincipalAuth, OciUserPrincipalAuth
 from llama_stack.providers.remote.inference.oci.config import OCIConfig
 from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin
+from llama_stack_api import (
+    ModelType,
+    OpenAIEmbeddingsRequestWithExtraBody,
+    OpenAIEmbeddingsResponse,
+)
 
 logger = get_logger(name=__name__, category="inference::oci")
 
diff --git a/src/llama_stack/providers/remote/inference/ollama/ollama.py b/src/llama_stack/providers/remote/inference/ollama/ollama.py
index 6a471429e..d1bf85361 100644
--- a/src/llama_stack/providers/remote/inference/ollama/ollama.py
+++ b/src/llama_stack/providers/remote/inference/ollama/ollama.py
@@ -7,17 +7,17 @@
 
 import asyncio
 
+from ollama import AsyncClient as AsyncOllamaClient
+
+from llama_stack.log import get_logger
+from llama_stack.providers.remote.inference.ollama.config import OllamaImplConfig
+from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin
 from llama_stack_api import (
     HealthResponse,
     HealthStatus,
     Model,
     UnsupportedModelError,
 )
-from ollama import AsyncClient as AsyncOllamaClient
-
-from llama_stack.log import get_logger
-from llama_stack.providers.remote.inference.ollama.config import OllamaImplConfig
-from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin
 
 logger = get_logger(name=__name__, category="inference::ollama")
 
diff --git a/src/llama_stack/providers/remote/inference/openai/config.py b/src/llama_stack/providers/remote/inference/openai/config.py
index cbb01b2d0..ab28e571f 100644
--- a/src/llama_stack/providers/remote/inference/openai/config.py
+++ b/src/llama_stack/providers/remote/inference/openai/config.py
@@ -6,10 +6,10 @@
 
 from typing import Any
 
-from llama_stack_api import json_schema_type
 from pydantic import BaseModel, Field
 
 from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig
+from llama_stack_api import json_schema_type
 
 
 class OpenAIProviderDataValidator(BaseModel):
diff --git a/src/llama_stack/providers/remote/inference/passthrough/config.py b/src/llama_stack/providers/remote/inference/passthrough/config.py
index 7045dbf2e..54508b6fb 100644
--- a/src/llama_stack/providers/remote/inference/passthrough/config.py
+++ b/src/llama_stack/providers/remote/inference/passthrough/config.py
@@ -6,10 +6,10 @@
 
 from typing import Any
 
-from llama_stack_api import json_schema_type
 from pydantic import Field
 
 from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig
+from llama_stack_api import json_schema_type
 
 
 @json_schema_type
diff --git a/src/llama_stack/providers/remote/inference/passthrough/passthrough.py b/src/llama_stack/providers/remote/inference/passthrough/passthrough.py
index 19cf0c5d7..75eedf026 100644
--- a/src/llama_stack/providers/remote/inference/passthrough/passthrough.py
+++ b/src/llama_stack/providers/remote/inference/passthrough/passthrough.py
@@ -6,6 +6,9 @@
 
 from collections.abc import AsyncIterator
 
+from openai import AsyncOpenAI
+
+from llama_stack.core.request_headers import NeedsRequestProviderData
 from llama_stack_api import (
     Inference,
     Model,
@@ -17,9 +20,6 @@ from llama_stack_api import (
     OpenAIEmbeddingsRequestWithExtraBody,
     OpenAIEmbeddingsResponse,
 )
-from openai import AsyncOpenAI
-
-from llama_stack.core.request_headers import NeedsRequestProviderData
 
 from .config import PassthroughImplConfig
 
diff --git a/src/llama_stack/providers/remote/inference/runpod/config.py b/src/llama_stack/providers/remote/inference/runpod/config.py
index aaa4230a8..2ee56ca94 100644
--- a/src/llama_stack/providers/remote/inference/runpod/config.py
+++ b/src/llama_stack/providers/remote/inference/runpod/config.py
@@ -6,10 +6,10 @@
 
 from typing import Any
 
-from llama_stack_api import json_schema_type
 from pydantic import BaseModel, Field, SecretStr
 
 from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig
+from llama_stack_api import json_schema_type
 
 
 class RunpodProviderDataValidator(BaseModel):
diff --git a/src/llama_stack/providers/remote/inference/runpod/runpod.py b/src/llama_stack/providers/remote/inference/runpod/runpod.py
index 4596b2df5..9c770cc24 100644
--- a/src/llama_stack/providers/remote/inference/runpod/runpod.py
+++ b/src/llama_stack/providers/remote/inference/runpod/runpod.py
@@ -6,14 +6,13 @@
 
 from collections.abc import AsyncIterator
 
+from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin
 from llama_stack_api import (
     OpenAIChatCompletion,
     OpenAIChatCompletionChunk,
     OpenAIChatCompletionRequestWithExtraBody,
 )
 
-from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin
-
 from .config import RunpodImplConfig
 
 
diff --git a/src/llama_stack/providers/remote/inference/sambanova/config.py b/src/llama_stack/providers/remote/inference/sambanova/config.py
index 6d72e7205..93679ba99 100644
--- a/src/llama_stack/providers/remote/inference/sambanova/config.py
+++ b/src/llama_stack/providers/remote/inference/sambanova/config.py
@@ -6,10 +6,10 @@
 
 from typing import Any
 
-from llama_stack_api import json_schema_type
 from pydantic import BaseModel, Field
 
 from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig
+from llama_stack_api import json_schema_type
 
 
 class SambaNovaProviderDataValidator(BaseModel):
diff --git a/src/llama_stack/providers/remote/inference/tgi/config.py b/src/llama_stack/providers/remote/inference/tgi/config.py
index 051a2afa3..74edc8523 100644
--- a/src/llama_stack/providers/remote/inference/tgi/config.py
+++ b/src/llama_stack/providers/remote/inference/tgi/config.py
@@ -5,10 +5,10 @@
 # the root directory of this source tree.
 
 
-from llama_stack_api import json_schema_type
 from pydantic import BaseModel, Field, SecretStr
 
 from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig
+from llama_stack_api import json_schema_type
 
 
 @json_schema_type
diff --git a/src/llama_stack/providers/remote/inference/tgi/tgi.py b/src/llama_stack/providers/remote/inference/tgi/tgi.py
index 831a26e39..dd47ccc62 100644
--- a/src/llama_stack/providers/remote/inference/tgi/tgi.py
+++ b/src/llama_stack/providers/remote/inference/tgi/tgi.py
@@ -8,14 +8,14 @@
 from collections.abc import Iterable
 
 from huggingface_hub import AsyncInferenceClient, HfApi
-from llama_stack_api import (
-    OpenAIEmbeddingsRequestWithExtraBody,
-    OpenAIEmbeddingsResponse,
-)
 from pydantic import SecretStr
 
 from llama_stack.log import get_logger
 from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin
+from llama_stack_api import (
+    OpenAIEmbeddingsRequestWithExtraBody,
+    OpenAIEmbeddingsResponse,
+)
 
 from .config import InferenceAPIImplConfig, InferenceEndpointImplConfig, TGIImplConfig
 
diff --git a/src/llama_stack/providers/remote/inference/together/config.py b/src/llama_stack/providers/remote/inference/together/config.py
index 96c0538e3..c1b3c4a55 100644
--- a/src/llama_stack/providers/remote/inference/together/config.py
+++ b/src/llama_stack/providers/remote/inference/together/config.py
@@ -6,10 +6,10 @@
 
 from typing import Any
 
-from llama_stack_api import json_schema_type
 from pydantic import Field
 
 from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig
+from llama_stack_api import json_schema_type
 
 
 @json_schema_type
diff --git a/src/llama_stack/providers/remote/inference/together/together.py b/src/llama_stack/providers/remote/inference/together/together.py
index f1355a760..cd34aec5e 100644
--- a/src/llama_stack/providers/remote/inference/together/together.py
+++ b/src/llama_stack/providers/remote/inference/together/together.py
@@ -8,18 +8,18 @@
 from collections.abc import Iterable
 from typing import Any, cast
 
-from llama_stack_api import (
-    Model,
-    OpenAIEmbeddingsRequestWithExtraBody,
-    OpenAIEmbeddingsResponse,
-    OpenAIEmbeddingUsage,
-)
 from together import AsyncTogether  # type: ignore[import-untyped]
 from together.constants import BASE_URL  # type: ignore[import-untyped]
 
 from llama_stack.core.request_headers import NeedsRequestProviderData
 from llama_stack.log import get_logger
 from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin
+from llama_stack_api import (
+    Model,
+    OpenAIEmbeddingsRequestWithExtraBody,
+    OpenAIEmbeddingsResponse,
+    OpenAIEmbeddingUsage,
+)
 
 from .config import TogetherImplConfig
 
diff --git a/src/llama_stack/providers/remote/inference/vertexai/config.py b/src/llama_stack/providers/remote/inference/vertexai/config.py
index 53e2b3e65..5891f7cd0 100644
--- a/src/llama_stack/providers/remote/inference/vertexai/config.py
+++ b/src/llama_stack/providers/remote/inference/vertexai/config.py
@@ -6,10 +6,10 @@
 
 from typing import Any
 
-from llama_stack_api import json_schema_type
 from pydantic import BaseModel, Field, SecretStr
 
 from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig
+from llama_stack_api import json_schema_type
 
 
 class VertexAIProviderDataValidator(BaseModel):
diff --git a/src/llama_stack/providers/remote/inference/vllm/config.py b/src/llama_stack/providers/remote/inference/vllm/config.py
index 23f713961..c43533ee4 100644
--- a/src/llama_stack/providers/remote/inference/vllm/config.py
+++ b/src/llama_stack/providers/remote/inference/vllm/config.py
@@ -6,10 +6,10 @@
 
 from pathlib import Path
 
-from llama_stack_api import json_schema_type
 from pydantic import Field, SecretStr, field_validator
 
 from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig
+from llama_stack_api import json_schema_type
 
 
 @json_schema_type
diff --git a/src/llama_stack/providers/remote/inference/vllm/vllm.py b/src/llama_stack/providers/remote/inference/vllm/vllm.py
index f7938c22c..1510e9384 100644
--- a/src/llama_stack/providers/remote/inference/vllm/vllm.py
+++ b/src/llama_stack/providers/remote/inference/vllm/vllm.py
@@ -7,6 +7,10 @@ from collections.abc import AsyncIterator
 from urllib.parse import urljoin
 
 import httpx
+from pydantic import ConfigDict
+
+from llama_stack.log import get_logger
+from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin
 from llama_stack_api import (
     HealthResponse,
     HealthStatus,
@@ -15,10 +19,6 @@ from llama_stack_api import (
     OpenAIChatCompletionRequestWithExtraBody,
     ToolChoice,
 )
-from pydantic import ConfigDict
-
-from llama_stack.log import get_logger
-from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin
 
 from .config import VLLMInferenceAdapterConfig
 
diff --git a/src/llama_stack/providers/remote/inference/watsonx/config.py b/src/llama_stack/providers/remote/inference/watsonx/config.py
index 1bba040ef..914f80820 100644
--- a/src/llama_stack/providers/remote/inference/watsonx/config.py
+++ b/src/llama_stack/providers/remote/inference/watsonx/config.py
@@ -7,10 +7,10 @@
 import os
 from typing import Any
 
-from llama_stack_api import json_schema_type
 from pydantic import BaseModel, Field
 
 from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig
+from llama_stack_api import json_schema_type
 
 
 class WatsonXProviderDataValidator(BaseModel):
diff --git a/src/llama_stack/providers/remote/inference/watsonx/watsonx.py b/src/llama_stack/providers/remote/inference/watsonx/watsonx.py
index de23c25d7..aab9e2dca 100644
--- a/src/llama_stack/providers/remote/inference/watsonx/watsonx.py
+++ b/src/llama_stack/providers/remote/inference/watsonx/watsonx.py
@@ -9,6 +9,12 @@ from typing import Any
 
 import litellm
 import requests
+
+from llama_stack.core.telemetry.tracing import get_current_span
+from llama_stack.log import get_logger
+from llama_stack.providers.remote.inference.watsonx.config import WatsonXConfig
+from llama_stack.providers.utils.inference.litellm_openai_mixin import LiteLLMOpenAIMixin
+from llama_stack.providers.utils.inference.openai_compat import prepare_openai_completion_params
 from llama_stack_api import (
     Model,
     ModelType,
@@ -22,12 +28,6 @@ from llama_stack_api import (
     OpenAIEmbeddingsResponse,
 )
 
-from llama_stack.core.telemetry.tracing import get_current_span
-from llama_stack.log import get_logger
-from llama_stack.providers.remote.inference.watsonx.config import WatsonXConfig
-from llama_stack.providers.utils.inference.litellm_openai_mixin import LiteLLMOpenAIMixin
-from llama_stack.providers.utils.inference.openai_compat import prepare_openai_completion_params
-
 logger = get_logger(name=__name__, category="providers::remote::watsonx")
 
 
@@ -238,9 +238,8 @@ class WatsonXInferenceAdapter(LiteLLMOpenAIMixin):
         )
 
         # Convert response to OpenAI format
-        from llama_stack_api import OpenAIEmbeddingUsage
-
         from llama_stack.providers.utils.inference.litellm_openai_mixin import b64_encode_openai_embeddings_response
+        from llama_stack_api import OpenAIEmbeddingUsage
 
         data = b64_encode_openai_embeddings_response(response.data, params.encoding_format)
 
diff --git a/src/llama_stack/providers/remote/post_training/nvidia/post_training.py b/src/llama_stack/providers/remote/post_training/nvidia/post_training.py
index 02c35241b..830a9f747 100644
--- a/src/llama_stack/providers/remote/post_training/nvidia/post_training.py
+++ b/src/llama_stack/providers/remote/post_training/nvidia/post_training.py
@@ -8,6 +8,11 @@ from datetime import datetime
 from typing import Any, Literal
 
 import aiohttp
+from pydantic import BaseModel, ConfigDict
+
+from llama_stack.providers.remote.post_training.nvidia.config import NvidiaPostTrainingConfig
+from llama_stack.providers.remote.post_training.nvidia.utils import warn_unsupported_params
+from llama_stack.providers.utils.inference.model_registry import ModelRegistryHelper
 from llama_stack_api import (
     AlgorithmConfig,
     DPOAlignmentConfig,
@@ -17,11 +22,6 @@ from llama_stack_api import (
     PostTrainingJobStatusResponse,
     TrainingConfig,
 )
-from pydantic import BaseModel, ConfigDict
-
-from llama_stack.providers.remote.post_training.nvidia.config import NvidiaPostTrainingConfig
-from llama_stack.providers.remote.post_training.nvidia.utils import warn_unsupported_params
-from llama_stack.providers.utils.inference.model_registry import ModelRegistryHelper
 
 from .models import _MODEL_ENTRIES
 
diff --git a/src/llama_stack/providers/remote/post_training/nvidia/utils.py b/src/llama_stack/providers/remote/post_training/nvidia/utils.py
index 78762155d..bd40dacb4 100644
--- a/src/llama_stack/providers/remote/post_training/nvidia/utils.py
+++ b/src/llama_stack/providers/remote/post_training/nvidia/utils.py
@@ -7,11 +7,11 @@
 import warnings
 from typing import Any
 
-from llama_stack_api import TrainingConfig
 from pydantic import BaseModel
 
 from llama_stack.log import get_logger
 from llama_stack.providers.remote.post_training.nvidia.config import SFTLoRADefaultConfig
+from llama_stack_api import TrainingConfig
 
 from .config import NvidiaPostTrainingConfig
 
diff --git a/src/llama_stack/providers/remote/safety/bedrock/bedrock.py b/src/llama_stack/providers/remote/safety/bedrock/bedrock.py
index 86b93c32e..c321f759b 100644
--- a/src/llama_stack/providers/remote/safety/bedrock/bedrock.py
+++ b/src/llama_stack/providers/remote/safety/bedrock/bedrock.py
@@ -7,6 +7,8 @@
 import json
 from typing import Any
 
+from llama_stack.log import get_logger
+from llama_stack.providers.utils.bedrock.client import create_bedrock_client
 from llama_stack_api import (
     OpenAIMessageParam,
     RunShieldResponse,
@@ -17,9 +19,6 @@ from llama_stack_api import (
     ViolationLevel,
 )
 
-from llama_stack.log import get_logger
-from llama_stack.providers.utils.bedrock.client import create_bedrock_client
-
 from .config import BedrockSafetyConfig
 
 logger = get_logger(name=__name__, category="safety::bedrock")
diff --git a/src/llama_stack/providers/remote/safety/bedrock/config.py b/src/llama_stack/providers/remote/safety/bedrock/config.py
index ca28924d4..0b1f2581a 100644
--- a/src/llama_stack/providers/remote/safety/bedrock/config.py
+++ b/src/llama_stack/providers/remote/safety/bedrock/config.py
@@ -5,9 +5,8 @@
 # the root directory of this source tree.
 
 
-from llama_stack_api import json_schema_type
-
 from llama_stack.providers.utils.bedrock.config import BedrockBaseConfig
+from llama_stack_api import json_schema_type
 
 
 @json_schema_type
diff --git a/src/llama_stack/providers/remote/safety/nvidia/config.py b/src/llama_stack/providers/remote/safety/nvidia/config.py
index fc686ae73..f11de5feb 100644
--- a/src/llama_stack/providers/remote/safety/nvidia/config.py
+++ b/src/llama_stack/providers/remote/safety/nvidia/config.py
@@ -6,9 +6,10 @@
 import os
 from typing import Any
 
-from llama_stack_api import json_schema_type
 from pydantic import BaseModel, Field
 
+from llama_stack_api import json_schema_type
+
 
 @json_schema_type
 class NVIDIASafetyConfig(BaseModel):
diff --git a/src/llama_stack/providers/remote/safety/nvidia/nvidia.py b/src/llama_stack/providers/remote/safety/nvidia/nvidia.py
index b3b5090e0..43ff45cc9 100644
--- a/src/llama_stack/providers/remote/safety/nvidia/nvidia.py
+++ b/src/llama_stack/providers/remote/safety/nvidia/nvidia.py
@@ -7,6 +7,8 @@
 from typing import Any
 
 import requests
+
+from llama_stack.log import get_logger
 from llama_stack_api import (
     ModerationObject,
     OpenAIMessageParam,
@@ -18,8 +20,6 @@ from llama_stack_api import (
     ViolationLevel,
 )
 
-from llama_stack.log import get_logger
-
 from .config import NVIDIASafetyConfig
 
 logger = get_logger(name=__name__, category="safety::nvidia")
diff --git a/src/llama_stack/providers/remote/safety/sambanova/config.py b/src/llama_stack/providers/remote/safety/sambanova/config.py
index a8e745851..bfb42d88a 100644
--- a/src/llama_stack/providers/remote/safety/sambanova/config.py
+++ b/src/llama_stack/providers/remote/safety/sambanova/config.py
@@ -6,9 +6,10 @@
 
 from typing import Any
 
-from llama_stack_api import json_schema_type
 from pydantic import BaseModel, Field, SecretStr
 
+from llama_stack_api import json_schema_type
+
 
 class SambaNovaProviderDataValidator(BaseModel):
     sambanova_api_key: str | None = Field(
diff --git a/src/llama_stack/providers/remote/safety/sambanova/sambanova.py b/src/llama_stack/providers/remote/safety/sambanova/sambanova.py
index 119ebb6ed..c11cb544d 100644
--- a/src/llama_stack/providers/remote/safety/sambanova/sambanova.py
+++ b/src/llama_stack/providers/remote/safety/sambanova/sambanova.py
@@ -8,6 +8,9 @@ from typing import Any
 
 import litellm
 import requests
+
+from llama_stack.core.request_headers import NeedsRequestProviderData
+from llama_stack.log import get_logger
 from llama_stack_api import (
     OpenAIMessageParam,
     RunShieldResponse,
@@ -18,9 +21,6 @@ from llama_stack_api import (
     ViolationLevel,
 )
 
-from llama_stack.core.request_headers import NeedsRequestProviderData
-from llama_stack.log import get_logger
-
 from .config import SambaNovaSafetyConfig
 
 logger = get_logger(name=__name__, category="safety::sambanova")
diff --git a/src/llama_stack/providers/remote/tool_runtime/bing_search/bing_search.py b/src/llama_stack/providers/remote/tool_runtime/bing_search/bing_search.py
index 84e47dd4f..a5a53a9eb 100644
--- a/src/llama_stack/providers/remote/tool_runtime/bing_search/bing_search.py
+++ b/src/llama_stack/providers/remote/tool_runtime/bing_search/bing_search.py
@@ -8,6 +8,8 @@ import json
 from typing import Any
 
 import httpx
+
+from llama_stack.core.request_headers import NeedsRequestProviderData
 from llama_stack_api import (
     URL,
     ListToolDefsResponse,
@@ -18,8 +20,6 @@ from llama_stack_api import (
     ToolRuntime,
 )
 
-from llama_stack.core.request_headers import NeedsRequestProviderData
-
 from .config import BingSearchToolConfig
 
 
diff --git a/src/llama_stack/providers/remote/tool_runtime/brave_search/brave_search.py b/src/llama_stack/providers/remote/tool_runtime/brave_search/brave_search.py
index b7eee776a..4888730e4 100644
--- a/src/llama_stack/providers/remote/tool_runtime/brave_search/brave_search.py
+++ b/src/llama_stack/providers/remote/tool_runtime/brave_search/brave_search.py
@@ -7,6 +7,9 @@
 from typing import Any
 
 import httpx
+
+from llama_stack.core.request_headers import NeedsRequestProviderData
+from llama_stack.models.llama.datatypes import BuiltinTool
 from llama_stack_api import (
     URL,
     ListToolDefsResponse,
@@ -17,9 +20,6 @@ from llama_stack_api import (
     ToolRuntime,
 )
 
-from llama_stack.core.request_headers import NeedsRequestProviderData
-from llama_stack.models.llama.datatypes import BuiltinTool
-
 from .config import BraveSearchToolConfig
 
 
diff --git a/src/llama_stack/providers/remote/tool_runtime/model_context_protocol/model_context_protocol.py b/src/llama_stack/providers/remote/tool_runtime/model_context_protocol/model_context_protocol.py
index efb1eb2df..544597a51 100644
--- a/src/llama_stack/providers/remote/tool_runtime/model_context_protocol/model_context_protocol.py
+++ b/src/llama_stack/providers/remote/tool_runtime/model_context_protocol/model_context_protocol.py
@@ -7,6 +7,9 @@
 from typing import Any
 from urllib.parse import urlparse
 
+from llama_stack.core.request_headers import NeedsRequestProviderData
+from llama_stack.log import get_logger
+from llama_stack.providers.utils.tools.mcp import invoke_mcp_tool, list_mcp_tools
 from llama_stack_api import (
     URL,
     Api,
@@ -17,10 +20,6 @@ from llama_stack_api import (
     ToolRuntime,
 )
 
-from llama_stack.core.request_headers import NeedsRequestProviderData
-from llama_stack.log import get_logger
-from llama_stack.providers.utils.tools.mcp import invoke_mcp_tool, list_mcp_tools
-
 from .config import MCPProviderConfig
 
 logger = get_logger(__name__, category="tools")
diff --git a/src/llama_stack/providers/remote/tool_runtime/tavily_search/tavily_search.py b/src/llama_stack/providers/remote/tool_runtime/tavily_search/tavily_search.py
index d65d66e67..d86cf5d8e 100644
--- a/src/llama_stack/providers/remote/tool_runtime/tavily_search/tavily_search.py
+++ b/src/llama_stack/providers/remote/tool_runtime/tavily_search/tavily_search.py
@@ -8,6 +8,8 @@ import json
 from typing import Any
 
 import httpx
+
+from llama_stack.core.request_headers import NeedsRequestProviderData
 from llama_stack_api import (
     URL,
     ListToolDefsResponse,
@@ -18,8 +20,6 @@ from llama_stack_api import (
     ToolRuntime,
 )
 
-from llama_stack.core.request_headers import NeedsRequestProviderData
-
 from .config import TavilySearchToolConfig
 
 
diff --git a/src/llama_stack/providers/remote/tool_runtime/wolfram_alpha/wolfram_alpha.py b/src/llama_stack/providers/remote/tool_runtime/wolfram_alpha/wolfram_alpha.py
index 9cc865092..f8d806a5c 100644
--- a/src/llama_stack/providers/remote/tool_runtime/wolfram_alpha/wolfram_alpha.py
+++ b/src/llama_stack/providers/remote/tool_runtime/wolfram_alpha/wolfram_alpha.py
@@ -8,6 +8,8 @@ import json
 from typing import Any
 
 import httpx
+
+from llama_stack.core.request_headers import NeedsRequestProviderData
 from llama_stack_api import (
     URL,
     ListToolDefsResponse,
@@ -18,8 +20,6 @@ from llama_stack_api import (
     ToolRuntime,
 )
 
-from llama_stack.core.request_headers import NeedsRequestProviderData
-
 from .config import WolframAlphaToolConfig
 
 
diff --git a/src/llama_stack/providers/remote/vector_io/chroma/chroma.py b/src/llama_stack/providers/remote/vector_io/chroma/chroma.py
index eca5d349b..645b40661 100644
--- a/src/llama_stack/providers/remote/vector_io/chroma/chroma.py
+++ b/src/llama_stack/providers/remote/vector_io/chroma/chroma.py
@@ -9,6 +9,14 @@ from typing import Any
 from urllib.parse import urlparse
 
 import chromadb
+from numpy.typing import NDArray
+
+from llama_stack.log import get_logger
+from llama_stack.providers.inline.vector_io.chroma import ChromaVectorIOConfig as InlineChromaVectorIOConfig
+from llama_stack.providers.utils.kvstore import kvstore_impl
+from llama_stack.providers.utils.kvstore.api import KVStore
+from llama_stack.providers.utils.memory.openai_vector_store_mixin import OpenAIVectorStoreMixin
+from llama_stack.providers.utils.memory.vector_store import ChunkForDeletion, EmbeddingIndex, VectorStoreWithIndex
 from llama_stack_api import (
     Chunk,
     Files,
@@ -19,14 +27,6 @@ from llama_stack_api import (
     VectorStore,
     VectorStoresProtocolPrivate,
 )
-from numpy.typing import NDArray
-
-from llama_stack.log import get_logger
-from llama_stack.providers.inline.vector_io.chroma import ChromaVectorIOConfig as InlineChromaVectorIOConfig
-from llama_stack.providers.utils.kvstore import kvstore_impl
-from llama_stack.providers.utils.kvstore.api import KVStore
-from llama_stack.providers.utils.memory.openai_vector_store_mixin import OpenAIVectorStoreMixin
-from llama_stack.providers.utils.memory.vector_store import ChunkForDeletion, EmbeddingIndex, VectorStoreWithIndex
 
 from .config import ChromaVectorIOConfig as RemoteChromaVectorIOConfig
 
diff --git a/src/llama_stack/providers/remote/vector_io/chroma/config.py b/src/llama_stack/providers/remote/vector_io/chroma/config.py
index b1e4f9a4a..648d641ad 100644
--- a/src/llama_stack/providers/remote/vector_io/chroma/config.py
+++ b/src/llama_stack/providers/remote/vector_io/chroma/config.py
@@ -6,10 +6,10 @@
 
 from typing import Any
 
-from llama_stack_api import json_schema_type
 from pydantic import BaseModel, Field
 
 from llama_stack.core.storage.datatypes import KVStoreReference
+from llama_stack_api import json_schema_type
 
 
 @json_schema_type
diff --git a/src/llama_stack/providers/remote/vector_io/milvus/config.py b/src/llama_stack/providers/remote/vector_io/milvus/config.py
index 2e2c788c7..4b9d6a566 100644
--- a/src/llama_stack/providers/remote/vector_io/milvus/config.py
+++ b/src/llama_stack/providers/remote/vector_io/milvus/config.py
@@ -6,10 +6,10 @@
 
 from typing import Any
 
-from llama_stack_api import json_schema_type
 from pydantic import BaseModel, ConfigDict, Field
 
 from llama_stack.core.storage.datatypes import KVStoreReference
+from llama_stack_api import json_schema_type
 
 
 @json_schema_type
diff --git a/src/llama_stack/providers/remote/vector_io/milvus/milvus.py b/src/llama_stack/providers/remote/vector_io/milvus/milvus.py
index b856bf918..aefa20317 100644
--- a/src/llama_stack/providers/remote/vector_io/milvus/milvus.py
+++ b/src/llama_stack/providers/remote/vector_io/milvus/milvus.py
@@ -8,17 +8,6 @@ import asyncio
 import os
 from typing import Any
 
-from llama_stack_api import (
-    Chunk,
-    Files,
-    Inference,
-    InterleavedContent,
-    QueryChunksResponse,
-    VectorIO,
-    VectorStore,
-    VectorStoreNotFoundError,
-    VectorStoresProtocolPrivate,
-)
 from numpy.typing import NDArray
 from pymilvus import AnnSearchRequest, DataType, Function, FunctionType, MilvusClient, RRFRanker, WeightedRanker
 
@@ -34,6 +23,17 @@ from llama_stack.providers.utils.memory.vector_store import (
     VectorStoreWithIndex,
 )
 from llama_stack.providers.utils.vector_io.vector_utils import sanitize_collection_name
+from llama_stack_api import (
+    Chunk,
+    Files,
+    Inference,
+    InterleavedContent,
+    QueryChunksResponse,
+    VectorIO,
+    VectorStore,
+    VectorStoreNotFoundError,
+    VectorStoresProtocolPrivate,
+)
 
 from .config import MilvusVectorIOConfig as RemoteMilvusVectorIOConfig
 
diff --git a/src/llama_stack/providers/remote/vector_io/pgvector/config.py b/src/llama_stack/providers/remote/vector_io/pgvector/config.py
index aeb1c83bb..87d40a883 100644
--- a/src/llama_stack/providers/remote/vector_io/pgvector/config.py
+++ b/src/llama_stack/providers/remote/vector_io/pgvector/config.py
@@ -6,10 +6,10 @@
 
 from typing import Any
 
-from llama_stack_api import json_schema_type
 from pydantic import BaseModel, Field
 
 from llama_stack.core.storage.datatypes import KVStoreReference
+from llama_stack_api import json_schema_type
 
 
 @json_schema_type
diff --git a/src/llama_stack/providers/remote/vector_io/pgvector/pgvector.py b/src/llama_stack/providers/remote/vector_io/pgvector/pgvector.py
index 8aa0303b6..2901bad97 100644
--- a/src/llama_stack/providers/remote/vector_io/pgvector/pgvector.py
+++ b/src/llama_stack/providers/remote/vector_io/pgvector/pgvector.py
@@ -8,17 +8,6 @@ import heapq
 from typing import Any
 
 import psycopg2
-from llama_stack_api import (
-    Chunk,
-    Files,
-    Inference,
-    InterleavedContent,
-    QueryChunksResponse,
-    VectorIO,
-    VectorStore,
-    VectorStoreNotFoundError,
-    VectorStoresProtocolPrivate,
-)
 from numpy.typing import NDArray
 from psycopg2 import sql
 from psycopg2.extras import Json, execute_values
@@ -31,6 +20,17 @@ from llama_stack.providers.utils.kvstore.api import KVStore
 from llama_stack.providers.utils.memory.openai_vector_store_mixin import OpenAIVectorStoreMixin
 from llama_stack.providers.utils.memory.vector_store import ChunkForDeletion, EmbeddingIndex, VectorStoreWithIndex
 from llama_stack.providers.utils.vector_io.vector_utils import WeightedInMemoryAggregator, sanitize_collection_name
+from llama_stack_api import (
+    Chunk,
+    Files,
+    Inference,
+    InterleavedContent,
+    QueryChunksResponse,
+    VectorIO,
+    VectorStore,
+    VectorStoreNotFoundError,
+    VectorStoresProtocolPrivate,
+)
 
 from .config import PGVectorVectorIOConfig
 
diff --git a/src/llama_stack/providers/remote/vector_io/qdrant/config.py b/src/llama_stack/providers/remote/vector_io/qdrant/config.py
index 8cc4cbb2b..e0a3fe207 100644
--- a/src/llama_stack/providers/remote/vector_io/qdrant/config.py
+++ b/src/llama_stack/providers/remote/vector_io/qdrant/config.py
@@ -6,10 +6,10 @@
 
 from typing import Any
 
-from llama_stack_api import json_schema_type
 from pydantic import BaseModel
 
 from llama_stack.core.storage.datatypes import KVStoreReference
+from llama_stack_api import json_schema_type
 
 
 @json_schema_type
diff --git a/src/llama_stack/providers/remote/vector_io/qdrant/qdrant.py b/src/llama_stack/providers/remote/vector_io/qdrant/qdrant.py
index 53d6be2b6..20ab653d0 100644
--- a/src/llama_stack/providers/remote/vector_io/qdrant/qdrant.py
+++ b/src/llama_stack/providers/remote/vector_io/qdrant/qdrant.py
@@ -9,6 +9,15 @@ import hashlib
 import uuid
 from typing import Any
 
+from numpy.typing import NDArray
+from qdrant_client import AsyncQdrantClient, models
+from qdrant_client.models import PointStruct
+
+from llama_stack.log import get_logger
+from llama_stack.providers.inline.vector_io.qdrant import QdrantVectorIOConfig as InlineQdrantVectorIOConfig
+from llama_stack.providers.utils.kvstore import kvstore_impl
+from llama_stack.providers.utils.memory.openai_vector_store_mixin import OpenAIVectorStoreMixin
+from llama_stack.providers.utils.memory.vector_store import ChunkForDeletion, EmbeddingIndex, VectorStoreWithIndex
 from llama_stack_api import (
     Chunk,
     Files,
@@ -22,15 +31,6 @@ from llama_stack_api import (
     VectorStoreNotFoundError,
     VectorStoresProtocolPrivate,
 )
-from numpy.typing import NDArray
-from qdrant_client import AsyncQdrantClient, models
-from qdrant_client.models import PointStruct
-
-from llama_stack.log import get_logger
-from llama_stack.providers.inline.vector_io.qdrant import QdrantVectorIOConfig as InlineQdrantVectorIOConfig
-from llama_stack.providers.utils.kvstore import kvstore_impl
-from llama_stack.providers.utils.memory.openai_vector_store_mixin import OpenAIVectorStoreMixin
-from llama_stack.providers.utils.memory.vector_store import ChunkForDeletion, EmbeddingIndex, VectorStoreWithIndex
 
 from .config import QdrantVectorIOConfig as RemoteQdrantVectorIOConfig
 
diff --git a/src/llama_stack/providers/remote/vector_io/weaviate/config.py b/src/llama_stack/providers/remote/vector_io/weaviate/config.py
index 19f9679fb..75d1b7c51 100644
--- a/src/llama_stack/providers/remote/vector_io/weaviate/config.py
+++ b/src/llama_stack/providers/remote/vector_io/weaviate/config.py
@@ -6,10 +6,10 @@
 
 from typing import Any
 
-from llama_stack_api import json_schema_type
 from pydantic import BaseModel, Field
 
 from llama_stack.core.storage.datatypes import KVStoreReference
+from llama_stack_api import json_schema_type
 
 
 @json_schema_type
diff --git a/src/llama_stack/providers/remote/vector_io/weaviate/weaviate.py b/src/llama_stack/providers/remote/vector_io/weaviate/weaviate.py
index c72666f63..ba3e6b7ea 100644
--- a/src/llama_stack/providers/remote/vector_io/weaviate/weaviate.py
+++ b/src/llama_stack/providers/remote/vector_io/weaviate/weaviate.py
@@ -8,17 +8,6 @@ from typing import Any
 
 import weaviate
 import weaviate.classes as wvc
-from llama_stack_api import (
-    Chunk,
-    Files,
-    Inference,
-    InterleavedContent,
-    QueryChunksResponse,
-    VectorIO,
-    VectorStore,
-    VectorStoreNotFoundError,
-    VectorStoresProtocolPrivate,
-)
 from numpy.typing import NDArray
 from weaviate.classes.init import Auth
 from weaviate.classes.query import Filter, HybridFusion
@@ -35,6 +24,17 @@ from llama_stack.providers.utils.memory.vector_store import (
     VectorStoreWithIndex,
 )
 from llama_stack.providers.utils.vector_io.vector_utils import sanitize_collection_name
+from llama_stack_api import (
+    Chunk,
+    Files,
+    Inference,
+    InterleavedContent,
+    QueryChunksResponse,
+    VectorIO,
+    VectorStore,
+    VectorStoreNotFoundError,
+    VectorStoresProtocolPrivate,
+)
 
 from .config import WeaviateVectorIOConfig
 
diff --git a/src/llama_stack/providers/utils/common/data_schema_validator.py b/src/llama_stack/providers/utils/common/data_schema_validator.py
index 7ef245779..c9a3b0920 100644
--- a/src/llama_stack/providers/utils/common/data_schema_validator.py
+++ b/src/llama_stack/providers/utils/common/data_schema_validator.py
@@ -7,9 +7,8 @@
 from enum import Enum
 from typing import Any
 
-from llama_stack_api import ChatCompletionInputType, CompletionInputType, StringType
-
 from llama_stack.core.datatypes import Api
+from llama_stack_api import ChatCompletionInputType, CompletionInputType, StringType
 
 
 class ColumnName(Enum):
diff --git a/src/llama_stack/providers/utils/files/form_data.py b/src/llama_stack/providers/utils/files/form_data.py
index 21afbec2b..3fac14f38 100644
--- a/src/llama_stack/providers/utils/files/form_data.py
+++ b/src/llama_stack/providers/utils/files/form_data.py
@@ -7,9 +7,10 @@
 import json
 
 from fastapi import Request
-from llama_stack_api import ExpiresAfter
 from pydantic import BaseModel, ValidationError
 
+from llama_stack_api import ExpiresAfter
+
 
 async def parse_pydantic_from_form[T: BaseModel](request: Request, field_name: str, model_class: type[T]) -> T | None:
     """
diff --git a/src/llama_stack/providers/utils/inference/inference_store.py b/src/llama_stack/providers/utils/inference/inference_store.py
index 3c707dd01..49e3af7a1 100644
--- a/src/llama_stack/providers/utils/inference/inference_store.py
+++ b/src/llama_stack/providers/utils/inference/inference_store.py
@@ -6,6 +6,11 @@
 import asyncio
 from typing import Any
 
+from sqlalchemy.exc import IntegrityError
+
+from llama_stack.core.datatypes import AccessRule
+from llama_stack.core.storage.datatypes import InferenceStoreReference, StorageBackendType
+from llama_stack.log import get_logger
 from llama_stack_api import (
     ListOpenAIChatCompletionResponse,
     OpenAIChatCompletion,
@@ -13,11 +18,6 @@ from llama_stack_api import (
     OpenAIMessageParam,
     Order,
 )
-from sqlalchemy.exc import IntegrityError
-
-from llama_stack.core.datatypes import AccessRule
-from llama_stack.core.storage.datatypes import InferenceStoreReference, StorageBackendType
-from llama_stack.log import get_logger
 
 from ..sqlstore.api import ColumnDefinition, ColumnType
 from ..sqlstore.authorized_sqlstore import AuthorizedSqlStore
diff --git a/src/llama_stack/providers/utils/inference/litellm_openai_mixin.py b/src/llama_stack/providers/utils/inference/litellm_openai_mixin.py
index 4f468725b..c462d1aad 100644
--- a/src/llama_stack/providers/utils/inference/litellm_openai_mixin.py
+++ b/src/llama_stack/providers/utils/inference/litellm_openai_mixin.py
@@ -9,6 +9,13 @@ import struct
 from collections.abc import AsyncIterator
 
 import litellm
+
+from llama_stack.core.request_headers import NeedsRequestProviderData
+from llama_stack.log import get_logger
+from llama_stack.providers.utils.inference.model_registry import ModelRegistryHelper, ProviderModelEntry
+from llama_stack.providers.utils.inference.openai_compat import (
+    prepare_openai_completion_params,
+)
 from llama_stack_api import (
     InferenceProvider,
     OpenAIChatCompletion,
@@ -22,13 +29,6 @@ from llama_stack_api import (
     OpenAIEmbeddingUsage,
 )
 
-from llama_stack.core.request_headers import NeedsRequestProviderData
-from llama_stack.log import get_logger
-from llama_stack.providers.utils.inference.model_registry import ModelRegistryHelper, ProviderModelEntry
-from llama_stack.providers.utils.inference.openai_compat import (
-    prepare_openai_completion_params,
-)
-
 logger = get_logger(name=__name__, category="providers::utils")
 
 
diff --git a/src/llama_stack/providers/utils/inference/model_registry.py b/src/llama_stack/providers/utils/inference/model_registry.py
index e7ca5ab74..42b54497f 100644
--- a/src/llama_stack/providers/utils/inference/model_registry.py
+++ b/src/llama_stack/providers/utils/inference/model_registry.py
@@ -6,13 +6,13 @@
 
 from typing import Any
 
-from llama_stack_api import Model, ModelsProtocolPrivate, ModelType, UnsupportedModelError
 from pydantic import BaseModel, Field, SecretStr
 
 from llama_stack.log import get_logger
 from llama_stack.providers.utils.inference import (
     ALL_HUGGINGFACE_REPOS_TO_MODEL_DESCRIPTOR,
 )
+from llama_stack_api import Model, ModelsProtocolPrivate, ModelType, UnsupportedModelError
 
 logger = get_logger(name=__name__, category="providers::utils")
 
diff --git a/src/llama_stack/providers/utils/inference/openai_compat.py b/src/llama_stack/providers/utils/inference/openai_compat.py
index c97e42274..32d41ffde 100644
--- a/src/llama_stack/providers/utils/inference/openai_compat.py
+++ b/src/llama_stack/providers/utils/inference/openai_compat.py
@@ -20,18 +20,6 @@ except ImportError:
     from openai.types.chat.chat_completion_message_tool_call import (
         ChatCompletionMessageToolCall as OpenAIChatCompletionMessageFunctionToolCall,
     )
-from llama_stack_api import (
-    URL,
-    GreedySamplingStrategy,
-    ImageContentItem,
-    JsonSchemaResponseFormat,
-    OpenAIResponseFormatParam,
-    SamplingParams,
-    TextContentItem,
-    TopKSamplingStrategy,
-    TopPSamplingStrategy,
-    _URLOrData,
-)
 from openai.types.chat import (
     ChatCompletionMessageToolCall,
 )
@@ -44,6 +32,18 @@ from llama_stack.models.llama.datatypes import (
     ToolCall,
     ToolDefinition,
 )
+from llama_stack_api import (
+    URL,
+    GreedySamplingStrategy,
+    ImageContentItem,
+    JsonSchemaResponseFormat,
+    OpenAIResponseFormatParam,
+    SamplingParams,
+    TextContentItem,
+    TopKSamplingStrategy,
+    TopPSamplingStrategy,
+    _URLOrData,
+)
 
 logger = get_logger(name=__name__, category="providers::utils")
 
diff --git a/src/llama_stack/providers/utils/inference/openai_mixin.py b/src/llama_stack/providers/utils/inference/openai_mixin.py
index c05873df5..559ac90ce 100644
--- a/src/llama_stack/providers/utils/inference/openai_mixin.py
+++ b/src/llama_stack/providers/utils/inference/openai_mixin.py
@@ -10,6 +10,14 @@ from abc import ABC, abstractmethod
 from collections.abc import AsyncIterator, Iterable
 from typing import Any
 
+from openai import AsyncOpenAI
+from pydantic import BaseModel, ConfigDict
+
+from llama_stack.core.request_headers import NeedsRequestProviderData
+from llama_stack.log import get_logger
+from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig
+from llama_stack.providers.utils.inference.openai_compat import prepare_openai_completion_params
+from llama_stack.providers.utils.inference.prompt_adapter import localize_image_content
 from llama_stack_api import (
     Model,
     ModelType,
@@ -24,14 +32,6 @@ from llama_stack_api import (
     OpenAIEmbeddingUsage,
     OpenAIMessageParam,
 )
-from openai import AsyncOpenAI
-from pydantic import BaseModel, ConfigDict
-
-from llama_stack.core.request_headers import NeedsRequestProviderData
-from llama_stack.log import get_logger
-from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig
-from llama_stack.providers.utils.inference.openai_compat import prepare_openai_completion_params
-from llama_stack.providers.utils.inference.prompt_adapter import localize_image_content
 
 logger = get_logger(name=__name__, category="providers::utils")
 
diff --git a/src/llama_stack/providers/utils/inference/prompt_adapter.py b/src/llama_stack/providers/utils/inference/prompt_adapter.py
index ea01a34e9..6272c9eed 100644
--- a/src/llama_stack/providers/utils/inference/prompt_adapter.py
+++ b/src/llama_stack/providers/utils/inference/prompt_adapter.py
@@ -12,24 +12,6 @@ import re
 from typing import Any
 
 import httpx
-from llama_stack_api import (
-    CompletionRequest,
-    ImageContentItem,
-    InterleavedContent,
-    InterleavedContentItem,
-    OpenAIAssistantMessageParam,
-    OpenAIChatCompletionContentPartImageParam,
-    OpenAIChatCompletionContentPartTextParam,
-    OpenAIFile,
-    OpenAIMessageParam,
-    OpenAISystemMessageParam,
-    OpenAIToolMessageParam,
-    OpenAIUserMessageParam,
-    ResponseFormat,
-    ResponseFormatType,
-    TextContentItem,
-    ToolChoice,
-)
 from PIL import Image as PIL_Image
 
 from llama_stack.log import get_logger
@@ -48,6 +30,24 @@ from llama_stack.models.llama.llama3.chat_format import ChatFormat
 from llama_stack.models.llama.llama3.tokenizer import Tokenizer
 from llama_stack.models.llama.sku_list import resolve_model
 from llama_stack.models.llama.sku_types import ModelFamily, is_multimodal
+from llama_stack_api import (
+    CompletionRequest,
+    ImageContentItem,
+    InterleavedContent,
+    InterleavedContentItem,
+    OpenAIAssistantMessageParam,
+    OpenAIChatCompletionContentPartImageParam,
+    OpenAIChatCompletionContentPartTextParam,
+    OpenAIFile,
+    OpenAIMessageParam,
+    OpenAISystemMessageParam,
+    OpenAIToolMessageParam,
+    OpenAIUserMessageParam,
+    ResponseFormat,
+    ResponseFormatType,
+    TextContentItem,
+    ToolChoice,
+)
 
 log = get_logger(name=__name__, category="providers::utils")
 
diff --git a/src/llama_stack/providers/utils/kvstore/sqlite/config.py b/src/llama_stack/providers/utils/kvstore/sqlite/config.py
index 895268a4f..0f8fa0a95 100644
--- a/src/llama_stack/providers/utils/kvstore/sqlite/config.py
+++ b/src/llama_stack/providers/utils/kvstore/sqlite/config.py
@@ -4,9 +4,10 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-from llama_stack_api import json_schema_type
 from pydantic import BaseModel, Field
 
+from llama_stack_api import json_schema_type
+
 
 @json_schema_type
 class SqliteControlPlaneConfig(BaseModel):
diff --git a/src/llama_stack/providers/utils/memory/openai_vector_store_mixin.py b/src/llama_stack/providers/utils/memory/openai_vector_store_mixin.py
index 68d1c11e5..540ff5940 100644
--- a/src/llama_stack/providers/utils/memory/openai_vector_store_mixin.py
+++ b/src/llama_stack/providers/utils/memory/openai_vector_store_mixin.py
@@ -13,6 +13,16 @@ from abc import ABC, abstractmethod
 from typing import Annotated, Any
 
 from fastapi import Body
+from pydantic import TypeAdapter
+
+from llama_stack.core.id_generation import generate_object_id
+from llama_stack.log import get_logger
+from llama_stack.providers.utils.kvstore.api import KVStore
+from llama_stack.providers.utils.memory.vector_store import (
+    ChunkForDeletion,
+    content_from_data_and_mime_type,
+    make_overlapped_chunks,
+)
 from llama_stack_api import (
     Chunk,
     Files,
@@ -43,16 +53,6 @@ from llama_stack_api import (
     VectorStoreSearchResponse,
     VectorStoreSearchResponsePage,
 )
-from pydantic import TypeAdapter
-
-from llama_stack.core.id_generation import generate_object_id
-from llama_stack.log import get_logger
-from llama_stack.providers.utils.kvstore.api import KVStore
-from llama_stack.providers.utils.memory.vector_store import (
-    ChunkForDeletion,
-    content_from_data_and_mime_type,
-    make_overlapped_chunks,
-)
 
 EMBEDDING_DIMENSION = 768
 
diff --git a/src/llama_stack/providers/utils/memory/vector_store.py b/src/llama_stack/providers/utils/memory/vector_store.py
index 37ac79039..b6a671ddb 100644
--- a/src/llama_stack/providers/utils/memory/vector_store.py
+++ b/src/llama_stack/providers/utils/memory/vector_store.py
@@ -14,6 +14,15 @@ from urllib.parse import unquote
 
 import httpx
 import numpy as np
+from numpy.typing import NDArray
+from pydantic import BaseModel
+
+from llama_stack.log import get_logger
+from llama_stack.models.llama.llama3.tokenizer import Tokenizer
+from llama_stack.providers.utils.inference.prompt_adapter import (
+    interleaved_content_as_str,
+)
+from llama_stack.providers.utils.vector_io.vector_utils import generate_chunk_id
 from llama_stack_api import (
     URL,
     Api,
@@ -25,15 +34,6 @@ from llama_stack_api import (
     RAGDocument,
     VectorStore,
 )
-from numpy.typing import NDArray
-from pydantic import BaseModel
-
-from llama_stack.log import get_logger
-from llama_stack.models.llama.llama3.tokenizer import Tokenizer
-from llama_stack.providers.utils.inference.prompt_adapter import (
-    interleaved_content_as_str,
-)
-from llama_stack.providers.utils.vector_io.vector_utils import generate_chunk_id
 
 log = get_logger(name=__name__, category="providers::utils")
 
diff --git a/src/llama_stack/providers/utils/responses/responses_store.py b/src/llama_stack/providers/utils/responses/responses_store.py
index c7dfed15a..f6e7c435d 100644
--- a/src/llama_stack/providers/utils/responses/responses_store.py
+++ b/src/llama_stack/providers/utils/responses/responses_store.py
@@ -4,6 +4,9 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
+from llama_stack.core.datatypes import AccessRule
+from llama_stack.core.storage.datatypes import ResponsesStoreReference, SqlStoreReference
+from llama_stack.log import get_logger
 from llama_stack_api import (
     ListOpenAIResponseInputItem,
     ListOpenAIResponseObject,
@@ -15,10 +18,6 @@ from llama_stack_api import (
     Order,
 )
 
-from llama_stack.core.datatypes import AccessRule
-from llama_stack.core.storage.datatypes import ResponsesStoreReference, SqlStoreReference
-from llama_stack.log import get_logger
-
 from ..sqlstore.api import ColumnDefinition, ColumnType
 from ..sqlstore.authorized_sqlstore import AuthorizedSqlStore
 from ..sqlstore.sqlstore import sqlstore_impl
diff --git a/src/llama_stack/providers/utils/scoring/base_scoring_fn.py b/src/llama_stack/providers/utils/scoring/base_scoring_fn.py
index d16c75263..f372db8b5 100644
--- a/src/llama_stack/providers/utils/scoring/base_scoring_fn.py
+++ b/src/llama_stack/providers/utils/scoring/base_scoring_fn.py
@@ -6,9 +6,8 @@
 from abc import ABC, abstractmethod
 from typing import Any
 
-from llama_stack_api import ScoringFn, ScoringFnParams, ScoringResultRow
-
 from llama_stack.providers.utils.scoring.aggregation_utils import aggregate_metrics
+from llama_stack_api import ScoringFn, ScoringFnParams, ScoringResultRow
 
 
 class BaseScoringFn(ABC):
diff --git a/src/llama_stack/providers/utils/sqlstore/api.py b/src/llama_stack/providers/utils/sqlstore/api.py
index 033a00edc..708fc7095 100644
--- a/src/llama_stack/providers/utils/sqlstore/api.py
+++ b/src/llama_stack/providers/utils/sqlstore/api.py
@@ -8,9 +8,10 @@ from collections.abc import Mapping, Sequence
 from enum import Enum
 from typing import Any, Literal, Protocol
 
-from llama_stack_api import PaginatedResponse
 from pydantic import BaseModel
 
+from llama_stack_api import PaginatedResponse
+
 
 class ColumnType(Enum):
     INTEGER = "INTEGER"
diff --git a/src/llama_stack/providers/utils/sqlstore/sqlalchemy_sqlstore.py b/src/llama_stack/providers/utils/sqlstore/sqlalchemy_sqlstore.py
index 263f5e69f..10009d396 100644
--- a/src/llama_stack/providers/utils/sqlstore/sqlalchemy_sqlstore.py
+++ b/src/llama_stack/providers/utils/sqlstore/sqlalchemy_sqlstore.py
@@ -6,7 +6,6 @@
 from collections.abc import Mapping, Sequence
 from typing import Any, Literal, cast
 
-from llama_stack_api import PaginatedResponse
 from sqlalchemy import (
     JSON,
     Boolean,
@@ -29,6 +28,7 @@ from sqlalchemy.sql.elements import ColumnElement
 
 from llama_stack.core.storage.datatypes import SqlAlchemySqlStoreConfig
 from llama_stack.log import get_logger
+from llama_stack_api import PaginatedResponse
 
 from .api import ColumnDefinition, ColumnType, SqlStore
 
diff --git a/src/llama_stack/providers/utils/tools/mcp.py b/src/llama_stack/providers/utils/tools/mcp.py
index 82c85f46c..fad1bf0f0 100644
--- a/src/llama_stack/providers/utils/tools/mcp.py
+++ b/src/llama_stack/providers/utils/tools/mcp.py
@@ -10,6 +10,14 @@ from enum import Enum
 from typing import Any, cast
 
 import httpx
+from mcp import ClientSession, McpError
+from mcp import types as mcp_types
+from mcp.client.sse import sse_client
+from mcp.client.streamable_http import streamablehttp_client
+
+from llama_stack.core.datatypes import AuthenticationRequiredError
+from llama_stack.log import get_logger
+from llama_stack.providers.utils.tools.ttl_dict import TTLDict
 from llama_stack_api import (
     ImageContentItem,
     InterleavedContentItem,
@@ -19,14 +27,6 @@ from llama_stack_api import (
     ToolInvocationResult,
     _URLOrData,
 )
-from mcp import ClientSession, McpError
-from mcp import types as mcp_types
-from mcp.client.sse import sse_client
-from mcp.client.streamable_http import streamablehttp_client
-
-from llama_stack.core.datatypes import AuthenticationRequiredError
-from llama_stack.log import get_logger
-from llama_stack.providers.utils.tools.ttl_dict import TTLDict
 
 logger = get_logger(__name__, category="tools")
 
diff --git a/src/llama-stack-api/README.md b/src/llama_stack_api/README.md
similarity index 98%
rename from src/llama-stack-api/README.md
rename to src/llama_stack_api/README.md
index aa6b05722..9bf1d2726 100644
--- a/src/llama-stack-api/README.md
+++ b/src/llama_stack_api/README.md
@@ -53,7 +53,7 @@ This package follows semantic versioning independently from the main `llama-stac
 - **Minor versions** (0.x.0): New APIs, backward-compatible changes
 - **Major versions** (x.0.0): Breaking changes to existing APIs
 
-Current version: **0.1.0**
+Current version: **0.4.0.dev0**
 
 ## Usage Example
 
diff --git a/src/llama-stack-api/llama_stack_api/__init__.py b/src/llama_stack_api/__init__.py
similarity index 99%
rename from src/llama-stack-api/llama_stack_api/__init__.py
rename to src/llama_stack_api/__init__.py
index 8bbe9f8bd..19b29301b 100644
--- a/src/llama-stack-api/llama_stack_api/__init__.py
+++ b/src/llama_stack_api/__init__.py
@@ -19,7 +19,7 @@ Sub-module imports (e.g., from llama_stack_api.agents import Agents) are NOT sup
 and considered a code smell. All exported symbols are explicitly listed in __all__.
 """
 
-__version__ = "0.4.0"
+__version__ = "0.4.0.dev0"
 
 # Import submodules for those who need them
 from . import common, strong_typing  # noqa: F401
diff --git a/src/llama-stack-api/llama_stack_api/agents.py b/src/llama_stack_api/agents.py
similarity index 100%
rename from src/llama-stack-api/llama_stack_api/agents.py
rename to src/llama_stack_api/agents.py
diff --git a/src/llama-stack-api/llama_stack_api/batches.py b/src/llama_stack_api/batches.py
similarity index 100%
rename from src/llama-stack-api/llama_stack_api/batches.py
rename to src/llama_stack_api/batches.py
diff --git a/src/llama-stack-api/llama_stack_api/benchmarks.py b/src/llama_stack_api/benchmarks.py
similarity index 100%
rename from src/llama-stack-api/llama_stack_api/benchmarks.py
rename to src/llama_stack_api/benchmarks.py
diff --git a/src/llama-stack-api/llama_stack_api/common/__init__.py b/src/llama_stack_api/common/__init__.py
similarity index 100%
rename from src/llama-stack-api/llama_stack_api/common/__init__.py
rename to src/llama_stack_api/common/__init__.py
diff --git a/src/llama-stack-api/llama_stack_api/common/content_types.py b/src/llama_stack_api/common/content_types.py
similarity index 100%
rename from src/llama-stack-api/llama_stack_api/common/content_types.py
rename to src/llama_stack_api/common/content_types.py
diff --git a/src/llama-stack-api/llama_stack_api/common/errors.py b/src/llama_stack_api/common/errors.py
similarity index 100%
rename from src/llama-stack-api/llama_stack_api/common/errors.py
rename to src/llama_stack_api/common/errors.py
diff --git a/src/llama-stack-api/llama_stack_api/common/job_types.py b/src/llama_stack_api/common/job_types.py
similarity index 100%
rename from src/llama-stack-api/llama_stack_api/common/job_types.py
rename to src/llama_stack_api/common/job_types.py
diff --git a/src/llama-stack-api/llama_stack_api/common/responses.py b/src/llama_stack_api/common/responses.py
similarity index 100%
rename from src/llama-stack-api/llama_stack_api/common/responses.py
rename to src/llama_stack_api/common/responses.py
diff --git a/src/llama-stack-api/llama_stack_api/common/tracing.py b/src/llama_stack_api/common/tracing.py
similarity index 100%
rename from src/llama-stack-api/llama_stack_api/common/tracing.py
rename to src/llama_stack_api/common/tracing.py
diff --git a/src/llama-stack-api/llama_stack_api/common/training_types.py b/src/llama_stack_api/common/training_types.py
similarity index 100%
rename from src/llama-stack-api/llama_stack_api/common/training_types.py
rename to src/llama_stack_api/common/training_types.py
diff --git a/src/llama-stack-api/llama_stack_api/common/type_system.py b/src/llama_stack_api/common/type_system.py
similarity index 100%
rename from src/llama-stack-api/llama_stack_api/common/type_system.py
rename to src/llama_stack_api/common/type_system.py
diff --git a/src/llama-stack-api/llama_stack_api/conversations.py b/src/llama_stack_api/conversations.py
similarity index 100%
rename from src/llama-stack-api/llama_stack_api/conversations.py
rename to src/llama_stack_api/conversations.py
diff --git a/src/llama-stack-api/llama_stack_api/datasetio.py b/src/llama_stack_api/datasetio.py
similarity index 100%
rename from src/llama-stack-api/llama_stack_api/datasetio.py
rename to src/llama_stack_api/datasetio.py
diff --git a/src/llama-stack-api/llama_stack_api/datasets.py b/src/llama_stack_api/datasets.py
similarity index 100%
rename from src/llama-stack-api/llama_stack_api/datasets.py
rename to src/llama_stack_api/datasets.py
diff --git a/src/llama-stack-api/llama_stack_api/datatypes.py b/src/llama_stack_api/datatypes.py
similarity index 100%
rename from src/llama-stack-api/llama_stack_api/datatypes.py
rename to src/llama_stack_api/datatypes.py
diff --git a/src/llama-stack-api/llama_stack_api/eval.py b/src/llama_stack_api/eval.py
similarity index 100%
rename from src/llama-stack-api/llama_stack_api/eval.py
rename to src/llama_stack_api/eval.py
diff --git a/src/llama-stack-api/llama_stack_api/files.py b/src/llama_stack_api/files.py
similarity index 100%
rename from src/llama-stack-api/llama_stack_api/files.py
rename to src/llama_stack_api/files.py
diff --git a/src/llama-stack-api/llama_stack_api/inference.py b/src/llama_stack_api/inference.py
similarity index 100%
rename from src/llama-stack-api/llama_stack_api/inference.py
rename to src/llama_stack_api/inference.py
diff --git a/src/llama-stack-api/llama_stack_api/inspect.py b/src/llama_stack_api/inspect.py
similarity index 100%
rename from src/llama-stack-api/llama_stack_api/inspect.py
rename to src/llama_stack_api/inspect.py
diff --git a/src/llama-stack-api/llama_stack_api/models.py b/src/llama_stack_api/models.py
similarity index 100%
rename from src/llama-stack-api/llama_stack_api/models.py
rename to src/llama_stack_api/models.py
diff --git a/src/llama-stack-api/llama_stack_api/openai_responses.py b/src/llama_stack_api/openai_responses.py
similarity index 100%
rename from src/llama-stack-api/llama_stack_api/openai_responses.py
rename to src/llama_stack_api/openai_responses.py
diff --git a/src/llama-stack-api/llama_stack_api/post_training.py b/src/llama_stack_api/post_training.py
similarity index 100%
rename from src/llama-stack-api/llama_stack_api/post_training.py
rename to src/llama_stack_api/post_training.py
diff --git a/src/llama-stack-api/llama_stack_api/prompts.py b/src/llama_stack_api/prompts.py
similarity index 100%
rename from src/llama-stack-api/llama_stack_api/prompts.py
rename to src/llama_stack_api/prompts.py
diff --git a/src/llama-stack-api/llama_stack_api/providers.py b/src/llama_stack_api/providers.py
similarity index 100%
rename from src/llama-stack-api/llama_stack_api/providers.py
rename to src/llama_stack_api/providers.py
diff --git a/src/llama-stack-api/llama_stack_api/py.typed b/src/llama_stack_api/py.typed
similarity index 100%
rename from src/llama-stack-api/llama_stack_api/py.typed
rename to src/llama_stack_api/py.typed
diff --git a/src/llama-stack-api/pyproject.toml b/src/llama_stack_api/pyproject.toml
similarity index 99%
rename from src/llama-stack-api/pyproject.toml
rename to src/llama_stack_api/pyproject.toml
index a00472d36..0ceb2bb4e 100644
--- a/src/llama-stack-api/pyproject.toml
+++ b/src/llama_stack_api/pyproject.toml
@@ -7,7 +7,7 @@ required-version = ">=0.7.0"
 
 [project]
 name = "llama-stack-api"
-version = "0.1.0"
+version = "0.4.0.dev0"
 authors = [{ name = "Meta Llama", email = "llama-oss@meta.com" }]
 description = "API and Provider specifications for Llama Stack - lightweight package with protocol definitions and provider specs"
 readme = "README.md"
diff --git a/src/llama-stack-api/llama_stack_api/rag_tool.py b/src/llama_stack_api/rag_tool.py
similarity index 100%
rename from src/llama-stack-api/llama_stack_api/rag_tool.py
rename to src/llama_stack_api/rag_tool.py
diff --git a/src/llama-stack-api/llama_stack_api/resource.py b/src/llama_stack_api/resource.py
similarity index 100%
rename from src/llama-stack-api/llama_stack_api/resource.py
rename to src/llama_stack_api/resource.py
diff --git a/src/llama-stack-api/llama_stack_api/safety.py b/src/llama_stack_api/safety.py
similarity index 100%
rename from src/llama-stack-api/llama_stack_api/safety.py
rename to src/llama_stack_api/safety.py
diff --git a/src/llama-stack-api/llama_stack_api/schema_utils.py b/src/llama_stack_api/schema_utils.py
similarity index 100%
rename from src/llama-stack-api/llama_stack_api/schema_utils.py
rename to src/llama_stack_api/schema_utils.py
diff --git a/src/llama-stack-api/llama_stack_api/scoring.py b/src/llama_stack_api/scoring.py
similarity index 100%
rename from src/llama-stack-api/llama_stack_api/scoring.py
rename to src/llama_stack_api/scoring.py
diff --git a/src/llama-stack-api/llama_stack_api/scoring_functions.py b/src/llama_stack_api/scoring_functions.py
similarity index 100%
rename from src/llama-stack-api/llama_stack_api/scoring_functions.py
rename to src/llama_stack_api/scoring_functions.py
diff --git a/src/llama-stack-api/llama_stack_api/shields.py b/src/llama_stack_api/shields.py
similarity index 100%
rename from src/llama-stack-api/llama_stack_api/shields.py
rename to src/llama_stack_api/shields.py
diff --git a/src/llama-stack-api/llama_stack_api/strong_typing/__init__.py b/src/llama_stack_api/strong_typing/__init__.py
similarity index 100%
rename from src/llama-stack-api/llama_stack_api/strong_typing/__init__.py
rename to src/llama_stack_api/strong_typing/__init__.py
diff --git a/src/llama-stack-api/llama_stack_api/strong_typing/auxiliary.py b/src/llama_stack_api/strong_typing/auxiliary.py
similarity index 100%
rename from src/llama-stack-api/llama_stack_api/strong_typing/auxiliary.py
rename to src/llama_stack_api/strong_typing/auxiliary.py
diff --git a/src/llama-stack-api/llama_stack_api/strong_typing/classdef.py b/src/llama_stack_api/strong_typing/classdef.py
similarity index 100%
rename from src/llama-stack-api/llama_stack_api/strong_typing/classdef.py
rename to src/llama_stack_api/strong_typing/classdef.py
diff --git a/src/llama-stack-api/llama_stack_api/strong_typing/core.py b/src/llama_stack_api/strong_typing/core.py
similarity index 100%
rename from src/llama-stack-api/llama_stack_api/strong_typing/core.py
rename to src/llama_stack_api/strong_typing/core.py
diff --git a/src/llama-stack-api/llama_stack_api/strong_typing/deserializer.py b/src/llama_stack_api/strong_typing/deserializer.py
similarity index 100%
rename from src/llama-stack-api/llama_stack_api/strong_typing/deserializer.py
rename to src/llama_stack_api/strong_typing/deserializer.py
diff --git a/src/llama-stack-api/llama_stack_api/strong_typing/docstring.py b/src/llama_stack_api/strong_typing/docstring.py
similarity index 100%
rename from src/llama-stack-api/llama_stack_api/strong_typing/docstring.py
rename to src/llama_stack_api/strong_typing/docstring.py
diff --git a/src/llama-stack-api/llama_stack_api/strong_typing/exception.py b/src/llama_stack_api/strong_typing/exception.py
similarity index 100%
rename from src/llama-stack-api/llama_stack_api/strong_typing/exception.py
rename to src/llama_stack_api/strong_typing/exception.py
diff --git a/src/llama-stack-api/llama_stack_api/strong_typing/inspection.py b/src/llama_stack_api/strong_typing/inspection.py
similarity index 100%
rename from src/llama-stack-api/llama_stack_api/strong_typing/inspection.py
rename to src/llama_stack_api/strong_typing/inspection.py
diff --git a/src/llama-stack-api/llama_stack_api/strong_typing/mapping.py b/src/llama_stack_api/strong_typing/mapping.py
similarity index 100%
rename from src/llama-stack-api/llama_stack_api/strong_typing/mapping.py
rename to src/llama_stack_api/strong_typing/mapping.py
diff --git a/src/llama-stack-api/llama_stack_api/strong_typing/name.py b/src/llama_stack_api/strong_typing/name.py
similarity index 100%
rename from src/llama-stack-api/llama_stack_api/strong_typing/name.py
rename to src/llama_stack_api/strong_typing/name.py
diff --git a/src/llama-stack-api/llama_stack_api/strong_typing/py.typed b/src/llama_stack_api/strong_typing/py.typed
similarity index 100%
rename from src/llama-stack-api/llama_stack_api/strong_typing/py.typed
rename to src/llama_stack_api/strong_typing/py.typed
diff --git a/src/llama-stack-api/llama_stack_api/strong_typing/schema.py b/src/llama_stack_api/strong_typing/schema.py
similarity index 100%
rename from src/llama-stack-api/llama_stack_api/strong_typing/schema.py
rename to src/llama_stack_api/strong_typing/schema.py
diff --git a/src/llama-stack-api/llama_stack_api/strong_typing/serialization.py b/src/llama_stack_api/strong_typing/serialization.py
similarity index 100%
rename from src/llama-stack-api/llama_stack_api/strong_typing/serialization.py
rename to src/llama_stack_api/strong_typing/serialization.py
diff --git a/src/llama-stack-api/llama_stack_api/strong_typing/serializer.py b/src/llama_stack_api/strong_typing/serializer.py
similarity index 100%
rename from src/llama-stack-api/llama_stack_api/strong_typing/serializer.py
rename to src/llama_stack_api/strong_typing/serializer.py
diff --git a/src/llama-stack-api/llama_stack_api/strong_typing/slots.py b/src/llama_stack_api/strong_typing/slots.py
similarity index 100%
rename from src/llama-stack-api/llama_stack_api/strong_typing/slots.py
rename to src/llama_stack_api/strong_typing/slots.py
diff --git a/src/llama-stack-api/llama_stack_api/strong_typing/topological.py b/src/llama_stack_api/strong_typing/topological.py
similarity index 100%
rename from src/llama-stack-api/llama_stack_api/strong_typing/topological.py
rename to src/llama_stack_api/strong_typing/topological.py
diff --git a/src/llama-stack-api/llama_stack_api/tools.py b/src/llama_stack_api/tools.py
similarity index 100%
rename from src/llama-stack-api/llama_stack_api/tools.py
rename to src/llama_stack_api/tools.py
diff --git a/src/llama_stack_api/uv.lock b/src/llama_stack_api/uv.lock
new file mode 100644
index 000000000..d61eb9be7
--- /dev/null
+++ b/src/llama_stack_api/uv.lock
@@ -0,0 +1,498 @@
+version = 1
+revision = 3
+requires-python = ">=3.12"
+
+[[package]]
+name = "annotated-types"
+version = "0.7.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/ee/67/531ea369ba64dcff5ec9c3402f9f51bf748cec26dde048a2f973a4eea7f5/annotated_types-0.7.0.tar.gz", hash = "sha256:aff07c09a53a08bc8cfccb9c85b05f1aa9a2a6f23728d790723543408344ce89", size = 16081, upload-time = "2024-05-20T21:33:25.928Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/78/b6/6307fbef88d9b5ee7421e68d78a9f162e0da4900bc5f5793f6d3d0e34fb8/annotated_types-0.7.0-py3-none-any.whl", hash = "sha256:1f02e8b43a8fbbc3f3e0d4f0f4bfc8131bcb4eebe8849b8e5c773f3a1c582a53", size = 13643, upload-time = "2024-05-20T21:33:24.1Z" },
+]
+
+[[package]]
+name = "attrs"
+version = "25.4.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/6b/5c/685e6633917e101e5dcb62b9dd76946cbb57c26e133bae9e0cd36033c0a9/attrs-25.4.0.tar.gz", hash = "sha256:16d5969b87f0859ef33a48b35d55ac1be6e42ae49d5e853b597db70c35c57e11", size = 934251, upload-time = "2025-10-06T13:54:44.725Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/3a/2a/7cc015f5b9f5db42b7d48157e23356022889fc354a2813c15934b7cb5c0e/attrs-25.4.0-py3-none-any.whl", hash = "sha256:adcf7e2a1fb3b36ac48d97835bb6d8ade15b8dcce26aba8bf1d14847b57a3373", size = 67615, upload-time = "2025-10-06T13:54:43.17Z" },
+]
+
+[[package]]
+name = "certifi"
+version = "2025.11.12"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/a2/8c/58f469717fa48465e4a50c014a0400602d3c437d7c0c468e17ada824da3a/certifi-2025.11.12.tar.gz", hash = "sha256:d8ab5478f2ecd78af242878415affce761ca6bc54a22a27e026d7c25357c3316", size = 160538, upload-time = "2025-11-12T02:54:51.517Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/70/7d/9bc192684cea499815ff478dfcdc13835ddf401365057044fb721ec6bddb/certifi-2025.11.12-py3-none-any.whl", hash = "sha256:97de8790030bbd5c2d96b7ec782fc2f7820ef8dba6db909ccf95449f2d062d4b", size = 159438, upload-time = "2025-11-12T02:54:49.735Z" },
+]
+
+[[package]]
+name = "charset-normalizer"
+version = "3.4.4"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/13/69/33ddede1939fdd074bce5434295f38fae7136463422fe4fd3e0e89b98062/charset_normalizer-3.4.4.tar.gz", hash = "sha256:94537985111c35f28720e43603b8e7b43a6ecfb2ce1d3058bbe955b73404e21a", size = 129418, upload-time = "2025-10-14T04:42:32.879Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/f3/85/1637cd4af66fa687396e757dec650f28025f2a2f5a5531a3208dc0ec43f2/charset_normalizer-3.4.4-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:0a98e6759f854bd25a58a73fa88833fba3b7c491169f86ce1180c948ab3fd394", size = 208425, upload-time = "2025-10-14T04:40:53.353Z" },
+    { url = "https://files.pythonhosted.org/packages/9d/6a/04130023fef2a0d9c62d0bae2649b69f7b7d8d24ea5536feef50551029df/charset_normalizer-3.4.4-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:b5b290ccc2a263e8d185130284f8501e3e36c5e02750fc6b6bdeb2e9e96f1e25", size = 148162, upload-time = "2025-10-14T04:40:54.558Z" },
+    { url = "https://files.pythonhosted.org/packages/78/29/62328d79aa60da22c9e0b9a66539feae06ca0f5a4171ac4f7dc285b83688/charset_normalizer-3.4.4-cp312-cp312-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:74bb723680f9f7a6234dcf67aea57e708ec1fbdf5699fb91dfd6f511b0a320ef", size = 144558, upload-time = "2025-10-14T04:40:55.677Z" },
+    { url = "https://files.pythonhosted.org/packages/86/bb/b32194a4bf15b88403537c2e120b817c61cd4ecffa9b6876e941c3ee38fe/charset_normalizer-3.4.4-cp312-cp312-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:f1e34719c6ed0b92f418c7c780480b26b5d9c50349e9a9af7d76bf757530350d", size = 161497, upload-time = "2025-10-14T04:40:57.217Z" },
+    { url = "https://files.pythonhosted.org/packages/19/89/a54c82b253d5b9b111dc74aca196ba5ccfcca8242d0fb64146d4d3183ff1/charset_normalizer-3.4.4-cp312-cp312-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:2437418e20515acec67d86e12bf70056a33abdacb5cb1655042f6538d6b085a8", size = 159240, upload-time = "2025-10-14T04:40:58.358Z" },
+    { url = "https://files.pythonhosted.org/packages/c0/10/d20b513afe03acc89ec33948320a5544d31f21b05368436d580dec4e234d/charset_normalizer-3.4.4-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:11d694519d7f29d6cd09f6ac70028dba10f92f6cdd059096db198c283794ac86", size = 153471, upload-time = "2025-10-14T04:40:59.468Z" },
+    { url = "https://files.pythonhosted.org/packages/61/fa/fbf177b55bdd727010f9c0a3c49eefa1d10f960e5f09d1d887bf93c2e698/charset_normalizer-3.4.4-cp312-cp312-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:ac1c4a689edcc530fc9d9aa11f5774b9e2f33f9a0c6a57864e90908f5208d30a", size = 150864, upload-time = "2025-10-14T04:41:00.623Z" },
+    { url = "https://files.pythonhosted.org/packages/05/12/9fbc6a4d39c0198adeebbde20b619790e9236557ca59fc40e0e3cebe6f40/charset_normalizer-3.4.4-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:21d142cc6c0ec30d2efee5068ca36c128a30b0f2c53c1c07bd78cb6bc1d3be5f", size = 150647, upload-time = "2025-10-14T04:41:01.754Z" },
+    { url = "https://files.pythonhosted.org/packages/ad/1f/6a9a593d52e3e8c5d2b167daf8c6b968808efb57ef4c210acb907c365bc4/charset_normalizer-3.4.4-cp312-cp312-musllinux_1_2_armv7l.whl", hash = "sha256:5dbe56a36425d26d6cfb40ce79c314a2e4dd6211d51d6d2191c00bed34f354cc", size = 145110, upload-time = "2025-10-14T04:41:03.231Z" },
+    { url = "https://files.pythonhosted.org/packages/30/42/9a52c609e72471b0fc54386dc63c3781a387bb4fe61c20231a4ebcd58bdd/charset_normalizer-3.4.4-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:5bfbb1b9acf3334612667b61bd3002196fe2a1eb4dd74d247e0f2a4d50ec9bbf", size = 162839, upload-time = "2025-10-14T04:41:04.715Z" },
+    { url = "https://files.pythonhosted.org/packages/c4/5b/c0682bbf9f11597073052628ddd38344a3d673fda35a36773f7d19344b23/charset_normalizer-3.4.4-cp312-cp312-musllinux_1_2_riscv64.whl", hash = "sha256:d055ec1e26e441f6187acf818b73564e6e6282709e9bcb5b63f5b23068356a15", size = 150667, upload-time = "2025-10-14T04:41:05.827Z" },
+    { url = "https://files.pythonhosted.org/packages/e4/24/a41afeab6f990cf2daf6cb8c67419b63b48cf518e4f56022230840c9bfb2/charset_normalizer-3.4.4-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:af2d8c67d8e573d6de5bc30cdb27e9b95e49115cd9baad5ddbd1a6207aaa82a9", size = 160535, upload-time = "2025-10-14T04:41:06.938Z" },
+    { url = "https://files.pythonhosted.org/packages/2a/e5/6a4ce77ed243c4a50a1fecca6aaaab419628c818a49434be428fe24c9957/charset_normalizer-3.4.4-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:780236ac706e66881f3b7f2f32dfe90507a09e67d1d454c762cf642e6e1586e0", size = 154816, upload-time = "2025-10-14T04:41:08.101Z" },
+    { url = "https://files.pythonhosted.org/packages/a8/ef/89297262b8092b312d29cdb2517cb1237e51db8ecef2e9af5edbe7b683b1/charset_normalizer-3.4.4-cp312-cp312-win32.whl", hash = "sha256:5833d2c39d8896e4e19b689ffc198f08ea58116bee26dea51e362ecc7cd3ed26", size = 99694, upload-time = "2025-10-14T04:41:09.23Z" },
+    { url = "https://files.pythonhosted.org/packages/3d/2d/1e5ed9dd3b3803994c155cd9aacb60c82c331bad84daf75bcb9c91b3295e/charset_normalizer-3.4.4-cp312-cp312-win_amd64.whl", hash = "sha256:a79cfe37875f822425b89a82333404539ae63dbdddf97f84dcbc3d339aae9525", size = 107131, upload-time = "2025-10-14T04:41:10.467Z" },
+    { url = "https://files.pythonhosted.org/packages/d0/d9/0ed4c7098a861482a7b6a95603edce4c0d9db2311af23da1fb2b75ec26fc/charset_normalizer-3.4.4-cp312-cp312-win_arm64.whl", hash = "sha256:376bec83a63b8021bb5c8ea75e21c4ccb86e7e45ca4eb81146091b56599b80c3", size = 100390, upload-time = "2025-10-14T04:41:11.915Z" },
+    { url = "https://files.pythonhosted.org/packages/97/45/4b3a1239bbacd321068ea6e7ac28875b03ab8bc0aa0966452db17cd36714/charset_normalizer-3.4.4-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:e1f185f86a6f3403aa2420e815904c67b2f9ebc443f045edd0de921108345794", size = 208091, upload-time = "2025-10-14T04:41:13.346Z" },
+    { url = "https://files.pythonhosted.org/packages/7d/62/73a6d7450829655a35bb88a88fca7d736f9882a27eacdca2c6d505b57e2e/charset_normalizer-3.4.4-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:6b39f987ae8ccdf0d2642338faf2abb1862340facc796048b604ef14919e55ed", size = 147936, upload-time = "2025-10-14T04:41:14.461Z" },
+    { url = "https://files.pythonhosted.org/packages/89/c5/adb8c8b3d6625bef6d88b251bbb0d95f8205831b987631ab0c8bb5d937c2/charset_normalizer-3.4.4-cp313-cp313-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:3162d5d8ce1bb98dd51af660f2121c55d0fa541b46dff7bb9b9f86ea1d87de72", size = 144180, upload-time = "2025-10-14T04:41:15.588Z" },
+    { url = "https://files.pythonhosted.org/packages/91/ed/9706e4070682d1cc219050b6048bfd293ccf67b3d4f5a4f39207453d4b99/charset_normalizer-3.4.4-cp313-cp313-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:81d5eb2a312700f4ecaa977a8235b634ce853200e828fbadf3a9c50bab278328", size = 161346, upload-time = "2025-10-14T04:41:16.738Z" },
+    { url = "https://files.pythonhosted.org/packages/d5/0d/031f0d95e4972901a2f6f09ef055751805ff541511dc1252ba3ca1f80cf5/charset_normalizer-3.4.4-cp313-cp313-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:5bd2293095d766545ec1a8f612559f6b40abc0eb18bb2f5d1171872d34036ede", size = 158874, upload-time = "2025-10-14T04:41:17.923Z" },
+    { url = "https://files.pythonhosted.org/packages/f5/83/6ab5883f57c9c801ce5e5677242328aa45592be8a00644310a008d04f922/charset_normalizer-3.4.4-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:a8a8b89589086a25749f471e6a900d3f662d1d3b6e2e59dcecf787b1cc3a1894", size = 153076, upload-time = "2025-10-14T04:41:19.106Z" },
+    { url = "https://files.pythonhosted.org/packages/75/1e/5ff781ddf5260e387d6419959ee89ef13878229732732ee73cdae01800f2/charset_normalizer-3.4.4-cp313-cp313-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:bc7637e2f80d8530ee4a78e878bce464f70087ce73cf7c1caf142416923b98f1", size = 150601, upload-time = "2025-10-14T04:41:20.245Z" },
+    { url = "https://files.pythonhosted.org/packages/d7/57/71be810965493d3510a6ca79b90c19e48696fb1ff964da319334b12677f0/charset_normalizer-3.4.4-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:f8bf04158c6b607d747e93949aa60618b61312fe647a6369f88ce2ff16043490", size = 150376, upload-time = "2025-10-14T04:41:21.398Z" },
+    { url = "https://files.pythonhosted.org/packages/e5/d5/c3d057a78c181d007014feb7e9f2e65905a6c4ef182c0ddf0de2924edd65/charset_normalizer-3.4.4-cp313-cp313-musllinux_1_2_armv7l.whl", hash = "sha256:554af85e960429cf30784dd47447d5125aaa3b99a6f0683589dbd27e2f45da44", size = 144825, upload-time = "2025-10-14T04:41:22.583Z" },
+    { url = "https://files.pythonhosted.org/packages/e6/8c/d0406294828d4976f275ffbe66f00266c4b3136b7506941d87c00cab5272/charset_normalizer-3.4.4-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:74018750915ee7ad843a774364e13a3db91682f26142baddf775342c3f5b1133", size = 162583, upload-time = "2025-10-14T04:41:23.754Z" },
+    { url = "https://files.pythonhosted.org/packages/d7/24/e2aa1f18c8f15c4c0e932d9287b8609dd30ad56dbe41d926bd846e22fb8d/charset_normalizer-3.4.4-cp313-cp313-musllinux_1_2_riscv64.whl", hash = "sha256:c0463276121fdee9c49b98908b3a89c39be45d86d1dbaa22957e38f6321d4ce3", size = 150366, upload-time = "2025-10-14T04:41:25.27Z" },
+    { url = "https://files.pythonhosted.org/packages/e4/5b/1e6160c7739aad1e2df054300cc618b06bf784a7a164b0f238360721ab86/charset_normalizer-3.4.4-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:362d61fd13843997c1c446760ef36f240cf81d3ebf74ac62652aebaf7838561e", size = 160300, upload-time = "2025-10-14T04:41:26.725Z" },
+    { url = "https://files.pythonhosted.org/packages/7a/10/f882167cd207fbdd743e55534d5d9620e095089d176d55cb22d5322f2afd/charset_normalizer-3.4.4-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:9a26f18905b8dd5d685d6d07b0cdf98a79f3c7a918906af7cc143ea2e164c8bc", size = 154465, upload-time = "2025-10-14T04:41:28.322Z" },
+    { url = "https://files.pythonhosted.org/packages/89/66/c7a9e1b7429be72123441bfdbaf2bc13faab3f90b933f664db506dea5915/charset_normalizer-3.4.4-cp313-cp313-win32.whl", hash = "sha256:9b35f4c90079ff2e2edc5b26c0c77925e5d2d255c42c74fdb70fb49b172726ac", size = 99404, upload-time = "2025-10-14T04:41:29.95Z" },
+    { url = "https://files.pythonhosted.org/packages/c4/26/b9924fa27db384bdcd97ab83b4f0a8058d96ad9626ead570674d5e737d90/charset_normalizer-3.4.4-cp313-cp313-win_amd64.whl", hash = "sha256:b435cba5f4f750aa6c0a0d92c541fb79f69a387c91e61f1795227e4ed9cece14", size = 107092, upload-time = "2025-10-14T04:41:31.188Z" },
+    { url = "https://files.pythonhosted.org/packages/af/8f/3ed4bfa0c0c72a7ca17f0380cd9e4dd842b09f664e780c13cff1dcf2ef1b/charset_normalizer-3.4.4-cp313-cp313-win_arm64.whl", hash = "sha256:542d2cee80be6f80247095cc36c418f7bddd14f4a6de45af91dfad36d817bba2", size = 100408, upload-time = "2025-10-14T04:41:32.624Z" },
+    { url = "https://files.pythonhosted.org/packages/2a/35/7051599bd493e62411d6ede36fd5af83a38f37c4767b92884df7301db25d/charset_normalizer-3.4.4-cp314-cp314-macosx_10_13_universal2.whl", hash = "sha256:da3326d9e65ef63a817ecbcc0df6e94463713b754fe293eaa03da99befb9a5bd", size = 207746, upload-time = "2025-10-14T04:41:33.773Z" },
+    { url = "https://files.pythonhosted.org/packages/10/9a/97c8d48ef10d6cd4fcead2415523221624bf58bcf68a802721a6bc807c8f/charset_normalizer-3.4.4-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:8af65f14dc14a79b924524b1e7fffe304517b2bff5a58bf64f30b98bbc5079eb", size = 147889, upload-time = "2025-10-14T04:41:34.897Z" },
+    { url = "https://files.pythonhosted.org/packages/10/bf/979224a919a1b606c82bd2c5fa49b5c6d5727aa47b4312bb27b1734f53cd/charset_normalizer-3.4.4-cp314-cp314-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:74664978bb272435107de04e36db5a9735e78232b85b77d45cfb38f758efd33e", size = 143641, upload-time = "2025-10-14T04:41:36.116Z" },
+    { url = "https://files.pythonhosted.org/packages/ba/33/0ad65587441fc730dc7bd90e9716b30b4702dc7b617e6ba4997dc8651495/charset_normalizer-3.4.4-cp314-cp314-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:752944c7ffbfdd10c074dc58ec2d5a8a4cd9493b314d367c14d24c17684ddd14", size = 160779, upload-time = "2025-10-14T04:41:37.229Z" },
+    { url = "https://files.pythonhosted.org/packages/67/ed/331d6b249259ee71ddea93f6f2f0a56cfebd46938bde6fcc6f7b9a3d0e09/charset_normalizer-3.4.4-cp314-cp314-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:d1f13550535ad8cff21b8d757a3257963e951d96e20ec82ab44bc64aeb62a191", size = 159035, upload-time = "2025-10-14T04:41:38.368Z" },
+    { url = "https://files.pythonhosted.org/packages/67/ff/f6b948ca32e4f2a4576aa129d8bed61f2e0543bf9f5f2b7fc3758ed005c9/charset_normalizer-3.4.4-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:ecaae4149d99b1c9e7b88bb03e3221956f68fd6d50be2ef061b2381b61d20838", size = 152542, upload-time = "2025-10-14T04:41:39.862Z" },
+    { url = "https://files.pythonhosted.org/packages/16/85/276033dcbcc369eb176594de22728541a925b2632f9716428c851b149e83/charset_normalizer-3.4.4-cp314-cp314-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:cb6254dc36b47a990e59e1068afacdcd02958bdcce30bb50cc1700a8b9d624a6", size = 149524, upload-time = "2025-10-14T04:41:41.319Z" },
+    { url = "https://files.pythonhosted.org/packages/9e/f2/6a2a1f722b6aba37050e626530a46a68f74e63683947a8acff92569f979a/charset_normalizer-3.4.4-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:c8ae8a0f02f57a6e61203a31428fa1d677cbe50c93622b4149d5c0f319c1d19e", size = 150395, upload-time = "2025-10-14T04:41:42.539Z" },
+    { url = "https://files.pythonhosted.org/packages/60/bb/2186cb2f2bbaea6338cad15ce23a67f9b0672929744381e28b0592676824/charset_normalizer-3.4.4-cp314-cp314-musllinux_1_2_armv7l.whl", hash = "sha256:47cc91b2f4dd2833fddaedd2893006b0106129d4b94fdb6af1f4ce5a9965577c", size = 143680, upload-time = "2025-10-14T04:41:43.661Z" },
+    { url = "https://files.pythonhosted.org/packages/7d/a5/bf6f13b772fbb2a90360eb620d52ed8f796f3c5caee8398c3b2eb7b1c60d/charset_normalizer-3.4.4-cp314-cp314-musllinux_1_2_ppc64le.whl", hash = "sha256:82004af6c302b5d3ab2cfc4cc5f29db16123b1a8417f2e25f9066f91d4411090", size = 162045, upload-time = "2025-10-14T04:41:44.821Z" },
+    { url = "https://files.pythonhosted.org/packages/df/c5/d1be898bf0dc3ef9030c3825e5d3b83f2c528d207d246cbabe245966808d/charset_normalizer-3.4.4-cp314-cp314-musllinux_1_2_riscv64.whl", hash = "sha256:2b7d8f6c26245217bd2ad053761201e9f9680f8ce52f0fcd8d0755aeae5b2152", size = 149687, upload-time = "2025-10-14T04:41:46.442Z" },
+    { url = "https://files.pythonhosted.org/packages/a5/42/90c1f7b9341eef50c8a1cb3f098ac43b0508413f33affd762855f67a410e/charset_normalizer-3.4.4-cp314-cp314-musllinux_1_2_s390x.whl", hash = "sha256:799a7a5e4fb2d5898c60b640fd4981d6a25f1c11790935a44ce38c54e985f828", size = 160014, upload-time = "2025-10-14T04:41:47.631Z" },
+    { url = "https://files.pythonhosted.org/packages/76/be/4d3ee471e8145d12795ab655ece37baed0929462a86e72372fd25859047c/charset_normalizer-3.4.4-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:99ae2cffebb06e6c22bdc25801d7b30f503cc87dbd283479e7b606f70aff57ec", size = 154044, upload-time = "2025-10-14T04:41:48.81Z" },
+    { url = "https://files.pythonhosted.org/packages/b0/6f/8f7af07237c34a1defe7defc565a9bc1807762f672c0fde711a4b22bf9c0/charset_normalizer-3.4.4-cp314-cp314-win32.whl", hash = "sha256:f9d332f8c2a2fcbffe1378594431458ddbef721c1769d78e2cbc06280d8155f9", size = 99940, upload-time = "2025-10-14T04:41:49.946Z" },
+    { url = "https://files.pythonhosted.org/packages/4b/51/8ade005e5ca5b0d80fb4aff72a3775b325bdc3d27408c8113811a7cbe640/charset_normalizer-3.4.4-cp314-cp314-win_amd64.whl", hash = "sha256:8a6562c3700cce886c5be75ade4a5db4214fda19fede41d9792d100288d8f94c", size = 107104, upload-time = "2025-10-14T04:41:51.051Z" },
+    { url = "https://files.pythonhosted.org/packages/da/5f/6b8f83a55bb8278772c5ae54a577f3099025f9ade59d0136ac24a0df4bde/charset_normalizer-3.4.4-cp314-cp314-win_arm64.whl", hash = "sha256:de00632ca48df9daf77a2c65a484531649261ec9f25489917f09e455cb09ddb2", size = 100743, upload-time = "2025-10-14T04:41:52.122Z" },
+    { url = "https://files.pythonhosted.org/packages/0a/4c/925909008ed5a988ccbb72dcc897407e5d6d3bd72410d69e051fc0c14647/charset_normalizer-3.4.4-py3-none-any.whl", hash = "sha256:7a32c560861a02ff789ad905a2fe94e3f840803362c84fecf1851cb4cf3dc37f", size = 53402, upload-time = "2025-10-14T04:42:31.76Z" },
+]
+
+[[package]]
+name = "googleapis-common-protos"
+version = "1.72.0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "protobuf" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/e5/7b/adfd75544c415c487b33061fe7ae526165241c1ea133f9a9125a56b39fd8/googleapis_common_protos-1.72.0.tar.gz", hash = "sha256:e55a601c1b32b52d7a3e65f43563e2aa61bcd737998ee672ac9b951cd49319f5", size = 147433, upload-time = "2025-11-06T18:29:24.087Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/c4/ab/09169d5a4612a5f92490806649ac8d41e3ec9129c636754575b3553f4ea4/googleapis_common_protos-1.72.0-py3-none-any.whl", hash = "sha256:4299c5a82d5ae1a9702ada957347726b167f9f8d1fc352477702a1e851ff4038", size = 297515, upload-time = "2025-11-06T18:29:13.14Z" },
+]
+
+[[package]]
+name = "idna"
+version = "3.11"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/6f/6d/0703ccc57f3a7233505399edb88de3cbd678da106337b9fcde432b65ed60/idna-3.11.tar.gz", hash = "sha256:795dafcc9c04ed0c1fb032c2aa73654d8e8c5023a7df64a53f39190ada629902", size = 194582, upload-time = "2025-10-12T14:55:20.501Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/0e/61/66938bbb5fc52dbdf84594873d5b51fb1f7c7794e9c0f5bd885f30bc507b/idna-3.11-py3-none-any.whl", hash = "sha256:771a87f49d9defaf64091e6e6fe9c18d4833f140bd19464795bc32d966ca37ea", size = 71008, upload-time = "2025-10-12T14:55:18.883Z" },
+]
+
+[[package]]
+name = "importlib-metadata"
+version = "8.7.0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "zipp" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/76/66/650a33bd90f786193e4de4b3ad86ea60b53c89b669a5c7be931fac31cdb0/importlib_metadata-8.7.0.tar.gz", hash = "sha256:d13b81ad223b890aa16c5471f2ac3056cf76c5f10f82d6f9292f0b415f389000", size = 56641, upload-time = "2025-04-27T15:29:01.736Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/20/b0/36bd937216ec521246249be3bf9855081de4c5e06a0c9b4219dbeda50373/importlib_metadata-8.7.0-py3-none-any.whl", hash = "sha256:e5dd1551894c77868a30651cef00984d50e1002d06942a7101d34870c5f02afd", size = 27656, upload-time = "2025-04-27T15:29:00.214Z" },
+]
+
+[[package]]
+name = "jsonschema"
+version = "4.25.1"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "attrs" },
+    { name = "jsonschema-specifications" },
+    { name = "referencing" },
+    { name = "rpds-py" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/74/69/f7185de793a29082a9f3c7728268ffb31cb5095131a9c139a74078e27336/jsonschema-4.25.1.tar.gz", hash = "sha256:e4a9655ce0da0c0b67a085847e00a3a51449e1157f4f75e9fb5aa545e122eb85", size = 357342, upload-time = "2025-08-18T17:03:50.038Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/bf/9c/8c95d856233c1f82500c2450b8c68576b4cf1c871db3afac5c34ff84e6fd/jsonschema-4.25.1-py3-none-any.whl", hash = "sha256:3fba0169e345c7175110351d456342c364814cfcf3b964ba4587f22915230a63", size = 90040, upload-time = "2025-08-18T17:03:48.373Z" },
+]
+
+[[package]]
+name = "jsonschema-specifications"
+version = "2025.9.1"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "referencing" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/19/74/a633ee74eb36c44aa6d1095e7cc5569bebf04342ee146178e2d36600708b/jsonschema_specifications-2025.9.1.tar.gz", hash = "sha256:b540987f239e745613c7a9176f3edb72b832a4ac465cf02712288397832b5e8d", size = 32855, upload-time = "2025-09-08T01:34:59.186Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/41/45/1a4ed80516f02155c51f51e8cedb3c1902296743db0bbc66608a0db2814f/jsonschema_specifications-2025.9.1-py3-none-any.whl", hash = "sha256:98802fee3a11ee76ecaca44429fda8a41bff98b00a0f2838151b113f210cc6fe", size = 18437, upload-time = "2025-09-08T01:34:57.871Z" },
+]
+
+[[package]]
+name = "llama-stack-api"
+version = "0.4.0.dev0"
+source = { editable = "." }
+dependencies = [
+    { name = "jsonschema" },
+    { name = "opentelemetry-exporter-otlp-proto-http" },
+    { name = "opentelemetry-sdk" },
+    { name = "pydantic" },
+]
+
+[package.metadata]
+requires-dist = [
+    { name = "jsonschema" },
+    { name = "opentelemetry-exporter-otlp-proto-http", specifier = ">=1.30.0" },
+    { name = "opentelemetry-sdk", specifier = ">=1.30.0" },
+    { name = "pydantic", specifier = ">=2.11.9" },
+]
+
+[[package]]
+name = "opentelemetry-api"
+version = "1.38.0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "importlib-metadata" },
+    { name = "typing-extensions" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/08/d8/0f354c375628e048bd0570645b310797299754730079853095bf000fba69/opentelemetry_api-1.38.0.tar.gz", hash = "sha256:f4c193b5e8acb0912b06ac5b16321908dd0843d75049c091487322284a3eea12", size = 65242, upload-time = "2025-10-16T08:35:50.25Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/ae/a2/d86e01c28300bd41bab8f18afd613676e2bd63515417b77636fc1add426f/opentelemetry_api-1.38.0-py3-none-any.whl", hash = "sha256:2891b0197f47124454ab9f0cf58f3be33faca394457ac3e09daba13ff50aa582", size = 65947, upload-time = "2025-10-16T08:35:30.23Z" },
+]
+
+[[package]]
+name = "opentelemetry-exporter-otlp-proto-common"
+version = "1.38.0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "opentelemetry-proto" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/19/83/dd4660f2956ff88ed071e9e0e36e830df14b8c5dc06722dbde1841accbe8/opentelemetry_exporter_otlp_proto_common-1.38.0.tar.gz", hash = "sha256:e333278afab4695aa8114eeb7bf4e44e65c6607d54968271a249c180b2cb605c", size = 20431, upload-time = "2025-10-16T08:35:53.285Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/a7/9e/55a41c9601191e8cd8eb626b54ee6827b9c9d4a46d736f32abc80d8039fc/opentelemetry_exporter_otlp_proto_common-1.38.0-py3-none-any.whl", hash = "sha256:03cb76ab213300fe4f4c62b7d8f17d97fcfd21b89f0b5ce38ea156327ddda74a", size = 18359, upload-time = "2025-10-16T08:35:34.099Z" },
+]
+
+[[package]]
+name = "opentelemetry-exporter-otlp-proto-http"
+version = "1.38.0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "googleapis-common-protos" },
+    { name = "opentelemetry-api" },
+    { name = "opentelemetry-exporter-otlp-proto-common" },
+    { name = "opentelemetry-proto" },
+    { name = "opentelemetry-sdk" },
+    { name = "requests" },
+    { name = "typing-extensions" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/81/0a/debcdfb029fbd1ccd1563f7c287b89a6f7bef3b2902ade56797bfd020854/opentelemetry_exporter_otlp_proto_http-1.38.0.tar.gz", hash = "sha256:f16bd44baf15cbe07633c5112ffc68229d0edbeac7b37610be0b2def4e21e90b", size = 17282, upload-time = "2025-10-16T08:35:54.422Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/e5/77/154004c99fb9f291f74aa0822a2f5bbf565a72d8126b3a1b63ed8e5f83c7/opentelemetry_exporter_otlp_proto_http-1.38.0-py3-none-any.whl", hash = "sha256:84b937305edfc563f08ec69b9cb2298be8188371217e867c1854d77198d0825b", size = 19579, upload-time = "2025-10-16T08:35:36.269Z" },
+]
+
+[[package]]
+name = "opentelemetry-proto"
+version = "1.38.0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "protobuf" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/51/14/f0c4f0f6371b9cb7f9fa9ee8918bfd59ac7040c7791f1e6da32a1839780d/opentelemetry_proto-1.38.0.tar.gz", hash = "sha256:88b161e89d9d372ce723da289b7da74c3a8354a8e5359992be813942969ed468", size = 46152, upload-time = "2025-10-16T08:36:01.612Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/b6/6a/82b68b14efca5150b2632f3692d627afa76b77378c4999f2648979409528/opentelemetry_proto-1.38.0-py3-none-any.whl", hash = "sha256:b6ebe54d3217c42e45462e2a1ae28c3e2bf2ec5a5645236a490f55f45f1a0a18", size = 72535, upload-time = "2025-10-16T08:35:45.749Z" },
+]
+
+[[package]]
+name = "opentelemetry-sdk"
+version = "1.38.0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "opentelemetry-api" },
+    { name = "opentelemetry-semantic-conventions" },
+    { name = "typing-extensions" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/85/cb/f0eee1445161faf4c9af3ba7b848cc22a50a3d3e2515051ad8628c35ff80/opentelemetry_sdk-1.38.0.tar.gz", hash = "sha256:93df5d4d871ed09cb4272305be4d996236eedb232253e3ab864c8620f051cebe", size = 171942, upload-time = "2025-10-16T08:36:02.257Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/2f/2e/e93777a95d7d9c40d270a371392b6d6f1ff170c2a3cb32d6176741b5b723/opentelemetry_sdk-1.38.0-py3-none-any.whl", hash = "sha256:1c66af6564ecc1553d72d811a01df063ff097cdc82ce188da9951f93b8d10f6b", size = 132349, upload-time = "2025-10-16T08:35:46.995Z" },
+]
+
+[[package]]
+name = "opentelemetry-semantic-conventions"
+version = "0.59b0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "opentelemetry-api" },
+    { name = "typing-extensions" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/40/bc/8b9ad3802cd8ac6583a4eb7de7e5d7db004e89cb7efe7008f9c8a537ee75/opentelemetry_semantic_conventions-0.59b0.tar.gz", hash = "sha256:7a6db3f30d70202d5bf9fa4b69bc866ca6a30437287de6c510fb594878aed6b0", size = 129861, upload-time = "2025-10-16T08:36:03.346Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/24/7d/c88d7b15ba8fe5c6b8f93be50fc11795e9fc05386c44afaf6b76fe191f9b/opentelemetry_semantic_conventions-0.59b0-py3-none-any.whl", hash = "sha256:35d3b8833ef97d614136e253c1da9342b4c3c083bbaf29ce31d572a1c3825eed", size = 207954, upload-time = "2025-10-16T08:35:48.054Z" },
+]
+
+[[package]]
+name = "protobuf"
+version = "6.33.1"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/0a/03/a1440979a3f74f16cab3b75b0da1a1a7f922d56a8ddea96092391998edc0/protobuf-6.33.1.tar.gz", hash = "sha256:97f65757e8d09870de6fd973aeddb92f85435607235d20b2dfed93405d00c85b", size = 443432, upload-time = "2025-11-13T16:44:18.895Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/06/f1/446a9bbd2c60772ca36556bac8bfde40eceb28d9cc7838755bc41e001d8f/protobuf-6.33.1-cp310-abi3-win32.whl", hash = "sha256:f8d3fdbc966aaab1d05046d0240dd94d40f2a8c62856d41eaa141ff64a79de6b", size = 425593, upload-time = "2025-11-13T16:44:06.275Z" },
+    { url = "https://files.pythonhosted.org/packages/a6/79/8780a378c650e3df849b73de8b13cf5412f521ca2ff9b78a45c247029440/protobuf-6.33.1-cp310-abi3-win_amd64.whl", hash = "sha256:923aa6d27a92bf44394f6abf7ea0500f38769d4b07f4be41cb52bd8b1123b9ed", size = 436883, upload-time = "2025-11-13T16:44:09.222Z" },
+    { url = "https://files.pythonhosted.org/packages/cd/93/26213ff72b103ae55bb0d73e7fb91ea570ef407c3ab4fd2f1f27cac16044/protobuf-6.33.1-cp39-abi3-macosx_10_9_universal2.whl", hash = "sha256:fe34575f2bdde76ac429ec7b570235bf0c788883e70aee90068e9981806f2490", size = 427522, upload-time = "2025-11-13T16:44:10.475Z" },
+    { url = "https://files.pythonhosted.org/packages/c2/32/df4a35247923393aa6b887c3b3244a8c941c32a25681775f96e2b418f90e/protobuf-6.33.1-cp39-abi3-manylinux2014_aarch64.whl", hash = "sha256:f8adba2e44cde2d7618996b3fc02341f03f5bc3f2748be72dc7b063319276178", size = 324445, upload-time = "2025-11-13T16:44:11.869Z" },
+    { url = "https://files.pythonhosted.org/packages/8e/d0/d796e419e2ec93d2f3fa44888861c3f88f722cde02b7c3488fcc6a166820/protobuf-6.33.1-cp39-abi3-manylinux2014_s390x.whl", hash = "sha256:0f4cf01222c0d959c2b399142deb526de420be8236f22c71356e2a544e153c53", size = 339161, upload-time = "2025-11-13T16:44:12.778Z" },
+    { url = "https://files.pythonhosted.org/packages/1d/2a/3c5f05a4af06649547027d288747f68525755de692a26a7720dced3652c0/protobuf-6.33.1-cp39-abi3-manylinux2014_x86_64.whl", hash = "sha256:8fd7d5e0eb08cd5b87fd3df49bc193f5cfd778701f47e11d127d0afc6c39f1d1", size = 323171, upload-time = "2025-11-13T16:44:14.035Z" },
+    { url = "https://files.pythonhosted.org/packages/08/b4/46310463b4f6ceef310f8348786f3cff181cea671578e3d9743ba61a459e/protobuf-6.33.1-py3-none-any.whl", hash = "sha256:d595a9fd694fdeb061a62fbe10eb039cc1e444df81ec9bb70c7fc59ebcb1eafa", size = 170477, upload-time = "2025-11-13T16:44:17.633Z" },
+]
+
+[[package]]
+name = "pydantic"
+version = "2.12.4"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "annotated-types" },
+    { name = "pydantic-core" },
+    { name = "typing-extensions" },
+    { name = "typing-inspection" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/96/ad/a17bc283d7d81837c061c49e3eaa27a45991759a1b7eae1031921c6bd924/pydantic-2.12.4.tar.gz", hash = "sha256:0f8cb9555000a4b5b617f66bfd2566264c4984b27589d3b845685983e8ea85ac", size = 821038, upload-time = "2025-11-05T10:50:08.59Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/82/2f/e68750da9b04856e2a7ec56fc6f034a5a79775e9b9a81882252789873798/pydantic-2.12.4-py3-none-any.whl", hash = "sha256:92d3d202a745d46f9be6df459ac5a064fdaa3c1c4cd8adcfa332ccf3c05f871e", size = 463400, upload-time = "2025-11-05T10:50:06.732Z" },
+]
+
+[[package]]
+name = "pydantic-core"
+version = "2.41.5"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "typing-extensions" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/71/70/23b021c950c2addd24ec408e9ab05d59b035b39d97cdc1130e1bce647bb6/pydantic_core-2.41.5.tar.gz", hash = "sha256:08daa51ea16ad373ffd5e7606252cc32f07bc72b28284b6bc9c6df804816476e", size = 460952, upload-time = "2025-11-04T13:43:49.098Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/5f/5d/5f6c63eebb5afee93bcaae4ce9a898f3373ca23df3ccaef086d0233a35a7/pydantic_core-2.41.5-cp312-cp312-macosx_10_12_x86_64.whl", hash = "sha256:f41a7489d32336dbf2199c8c0a215390a751c5b014c2c1c5366e817202e9cdf7", size = 2110990, upload-time = "2025-11-04T13:39:58.079Z" },
+    { url = "https://files.pythonhosted.org/packages/aa/32/9c2e8ccb57c01111e0fd091f236c7b371c1bccea0fa85247ac55b1e2b6b6/pydantic_core-2.41.5-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:070259a8818988b9a84a449a2a7337c7f430a22acc0859c6b110aa7212a6d9c0", size = 1896003, upload-time = "2025-11-04T13:39:59.956Z" },
+    { url = "https://files.pythonhosted.org/packages/68/b8/a01b53cb0e59139fbc9e4fda3e9724ede8de279097179be4ff31f1abb65a/pydantic_core-2.41.5-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e96cea19e34778f8d59fe40775a7a574d95816eb150850a85a7a4c8f4b94ac69", size = 1919200, upload-time = "2025-11-04T13:40:02.241Z" },
+    { url = "https://files.pythonhosted.org/packages/38/de/8c36b5198a29bdaade07b5985e80a233a5ac27137846f3bc2d3b40a47360/pydantic_core-2.41.5-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:ed2e99c456e3fadd05c991f8f437ef902e00eedf34320ba2b0842bd1c3ca3a75", size = 2052578, upload-time = "2025-11-04T13:40:04.401Z" },
+    { url = "https://files.pythonhosted.org/packages/00/b5/0e8e4b5b081eac6cb3dbb7e60a65907549a1ce035a724368c330112adfdd/pydantic_core-2.41.5-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:65840751b72fbfd82c3c640cff9284545342a4f1eb1586ad0636955b261b0b05", size = 2208504, upload-time = "2025-11-04T13:40:06.072Z" },
+    { url = "https://files.pythonhosted.org/packages/77/56/87a61aad59c7c5b9dc8caad5a41a5545cba3810c3e828708b3d7404f6cef/pydantic_core-2.41.5-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:e536c98a7626a98feb2d3eaf75944ef6f3dbee447e1f841eae16f2f0a72d8ddc", size = 2335816, upload-time = "2025-11-04T13:40:07.835Z" },
+    { url = "https://files.pythonhosted.org/packages/0d/76/941cc9f73529988688a665a5c0ecff1112b3d95ab48f81db5f7606f522d3/pydantic_core-2.41.5-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:eceb81a8d74f9267ef4081e246ffd6d129da5d87e37a77c9bde550cb04870c1c", size = 2075366, upload-time = "2025-11-04T13:40:09.804Z" },
+    { url = "https://files.pythonhosted.org/packages/d3/43/ebef01f69baa07a482844faaa0a591bad1ef129253ffd0cdaa9d8a7f72d3/pydantic_core-2.41.5-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:d38548150c39b74aeeb0ce8ee1d8e82696f4a4e16ddc6de7b1d8823f7de4b9b5", size = 2171698, upload-time = "2025-11-04T13:40:12.004Z" },
+    { url = "https://files.pythonhosted.org/packages/b1/87/41f3202e4193e3bacfc2c065fab7706ebe81af46a83d3e27605029c1f5a6/pydantic_core-2.41.5-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:c23e27686783f60290e36827f9c626e63154b82b116d7fe9adba1fda36da706c", size = 2132603, upload-time = "2025-11-04T13:40:13.868Z" },
+    { url = "https://files.pythonhosted.org/packages/49/7d/4c00df99cb12070b6bccdef4a195255e6020a550d572768d92cc54dba91a/pydantic_core-2.41.5-cp312-cp312-musllinux_1_1_armv7l.whl", hash = "sha256:482c982f814460eabe1d3bb0adfdc583387bd4691ef00b90575ca0d2b6fe2294", size = 2329591, upload-time = "2025-11-04T13:40:15.672Z" },
+    { url = "https://files.pythonhosted.org/packages/cc/6a/ebf4b1d65d458f3cda6a7335d141305dfa19bdc61140a884d165a8a1bbc7/pydantic_core-2.41.5-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:bfea2a5f0b4d8d43adf9d7b8bf019fb46fdd10a2e5cde477fbcb9d1fa08c68e1", size = 2319068, upload-time = "2025-11-04T13:40:17.532Z" },
+    { url = "https://files.pythonhosted.org/packages/49/3b/774f2b5cd4192d5ab75870ce4381fd89cf218af999515baf07e7206753f0/pydantic_core-2.41.5-cp312-cp312-win32.whl", hash = "sha256:b74557b16e390ec12dca509bce9264c3bbd128f8a2c376eaa68003d7f327276d", size = 1985908, upload-time = "2025-11-04T13:40:19.309Z" },
+    { url = "https://files.pythonhosted.org/packages/86/45/00173a033c801cacf67c190fef088789394feaf88a98a7035b0e40d53dc9/pydantic_core-2.41.5-cp312-cp312-win_amd64.whl", hash = "sha256:1962293292865bca8e54702b08a4f26da73adc83dd1fcf26fbc875b35d81c815", size = 2020145, upload-time = "2025-11-04T13:40:21.548Z" },
+    { url = "https://files.pythonhosted.org/packages/f9/22/91fbc821fa6d261b376a3f73809f907cec5ca6025642c463d3488aad22fb/pydantic_core-2.41.5-cp312-cp312-win_arm64.whl", hash = "sha256:1746d4a3d9a794cacae06a5eaaccb4b8643a131d45fbc9af23e353dc0a5ba5c3", size = 1976179, upload-time = "2025-11-04T13:40:23.393Z" },
+    { url = "https://files.pythonhosted.org/packages/87/06/8806241ff1f70d9939f9af039c6c35f2360cf16e93c2ca76f184e76b1564/pydantic_core-2.41.5-cp313-cp313-macosx_10_12_x86_64.whl", hash = "sha256:941103c9be18ac8daf7b7adca8228f8ed6bb7a1849020f643b3a14d15b1924d9", size = 2120403, upload-time = "2025-11-04T13:40:25.248Z" },
+    { url = "https://files.pythonhosted.org/packages/94/02/abfa0e0bda67faa65fef1c84971c7e45928e108fe24333c81f3bfe35d5f5/pydantic_core-2.41.5-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:112e305c3314f40c93998e567879e887a3160bb8689ef3d2c04b6cc62c33ac34", size = 1896206, upload-time = "2025-11-04T13:40:27.099Z" },
+    { url = "https://files.pythonhosted.org/packages/15/df/a4c740c0943e93e6500f9eb23f4ca7ec9bf71b19e608ae5b579678c8d02f/pydantic_core-2.41.5-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0cbaad15cb0c90aa221d43c00e77bb33c93e8d36e0bf74760cd00e732d10a6a0", size = 1919307, upload-time = "2025-11-04T13:40:29.806Z" },
+    { url = "https://files.pythonhosted.org/packages/9a/e3/6324802931ae1d123528988e0e86587c2072ac2e5394b4bc2bc34b61ff6e/pydantic_core-2.41.5-cp313-cp313-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:03ca43e12fab6023fc79d28ca6b39b05f794ad08ec2feccc59a339b02f2b3d33", size = 2063258, upload-time = "2025-11-04T13:40:33.544Z" },
+    { url = "https://files.pythonhosted.org/packages/c9/d4/2230d7151d4957dd79c3044ea26346c148c98fbf0ee6ebd41056f2d62ab5/pydantic_core-2.41.5-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:dc799088c08fa04e43144b164feb0c13f9a0bc40503f8df3e9fde58a3c0c101e", size = 2214917, upload-time = "2025-11-04T13:40:35.479Z" },
+    { url = "https://files.pythonhosted.org/packages/e6/9f/eaac5df17a3672fef0081b6c1bb0b82b33ee89aa5cec0d7b05f52fd4a1fa/pydantic_core-2.41.5-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:97aeba56665b4c3235a0e52b2c2f5ae9cd071b8a8310ad27bddb3f7fb30e9aa2", size = 2332186, upload-time = "2025-11-04T13:40:37.436Z" },
+    { url = "https://files.pythonhosted.org/packages/cf/4e/35a80cae583a37cf15604b44240e45c05e04e86f9cfd766623149297e971/pydantic_core-2.41.5-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:406bf18d345822d6c21366031003612b9c77b3e29ffdb0f612367352aab7d586", size = 2073164, upload-time = "2025-11-04T13:40:40.289Z" },
+    { url = "https://files.pythonhosted.org/packages/bf/e3/f6e262673c6140dd3305d144d032f7bd5f7497d3871c1428521f19f9efa2/pydantic_core-2.41.5-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:b93590ae81f7010dbe380cdeab6f515902ebcbefe0b9327cc4804d74e93ae69d", size = 2179146, upload-time = "2025-11-04T13:40:42.809Z" },
+    { url = "https://files.pythonhosted.org/packages/75/c7/20bd7fc05f0c6ea2056a4565c6f36f8968c0924f19b7d97bbfea55780e73/pydantic_core-2.41.5-cp313-cp313-musllinux_1_1_aarch64.whl", hash = "sha256:01a3d0ab748ee531f4ea6c3e48ad9dac84ddba4b0d82291f87248f2f9de8d740", size = 2137788, upload-time = "2025-11-04T13:40:44.752Z" },
+    { url = "https://files.pythonhosted.org/packages/3a/8d/34318ef985c45196e004bc46c6eab2eda437e744c124ef0dbe1ff2c9d06b/pydantic_core-2.41.5-cp313-cp313-musllinux_1_1_armv7l.whl", hash = "sha256:6561e94ba9dacc9c61bce40e2d6bdc3bfaa0259d3ff36ace3b1e6901936d2e3e", size = 2340133, upload-time = "2025-11-04T13:40:46.66Z" },
+    { url = "https://files.pythonhosted.org/packages/9c/59/013626bf8c78a5a5d9350d12e7697d3d4de951a75565496abd40ccd46bee/pydantic_core-2.41.5-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:915c3d10f81bec3a74fbd4faebe8391013ba61e5a1a8d48c4455b923bdda7858", size = 2324852, upload-time = "2025-11-04T13:40:48.575Z" },
+    { url = "https://files.pythonhosted.org/packages/1a/d9/c248c103856f807ef70c18a4f986693a46a8ffe1602e5d361485da502d20/pydantic_core-2.41.5-cp313-cp313-win32.whl", hash = "sha256:650ae77860b45cfa6e2cdafc42618ceafab3a2d9a3811fcfbd3bbf8ac3c40d36", size = 1994679, upload-time = "2025-11-04T13:40:50.619Z" },
+    { url = "https://files.pythonhosted.org/packages/9e/8b/341991b158ddab181cff136acd2552c9f35bd30380422a639c0671e99a91/pydantic_core-2.41.5-cp313-cp313-win_amd64.whl", hash = "sha256:79ec52ec461e99e13791ec6508c722742ad745571f234ea6255bed38c6480f11", size = 2019766, upload-time = "2025-11-04T13:40:52.631Z" },
+    { url = "https://files.pythonhosted.org/packages/73/7d/f2f9db34af103bea3e09735bb40b021788a5e834c81eedb541991badf8f5/pydantic_core-2.41.5-cp313-cp313-win_arm64.whl", hash = "sha256:3f84d5c1b4ab906093bdc1ff10484838aca54ef08de4afa9de0f5f14d69639cd", size = 1981005, upload-time = "2025-11-04T13:40:54.734Z" },
+    { url = "https://files.pythonhosted.org/packages/ea/28/46b7c5c9635ae96ea0fbb779e271a38129df2550f763937659ee6c5dbc65/pydantic_core-2.41.5-cp314-cp314-macosx_10_12_x86_64.whl", hash = "sha256:3f37a19d7ebcdd20b96485056ba9e8b304e27d9904d233d7b1015db320e51f0a", size = 2119622, upload-time = "2025-11-04T13:40:56.68Z" },
+    { url = "https://files.pythonhosted.org/packages/74/1a/145646e5687e8d9a1e8d09acb278c8535ebe9e972e1f162ed338a622f193/pydantic_core-2.41.5-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:1d1d9764366c73f996edd17abb6d9d7649a7eb690006ab6adbda117717099b14", size = 1891725, upload-time = "2025-11-04T13:40:58.807Z" },
+    { url = "https://files.pythonhosted.org/packages/23/04/e89c29e267b8060b40dca97bfc64a19b2a3cf99018167ea1677d96368273/pydantic_core-2.41.5-cp314-cp314-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:25e1c2af0fce638d5f1988b686f3b3ea8cd7de5f244ca147c777769e798a9cd1", size = 1915040, upload-time = "2025-11-04T13:41:00.853Z" },
+    { url = "https://files.pythonhosted.org/packages/84/a3/15a82ac7bd97992a82257f777b3583d3e84bdb06ba6858f745daa2ec8a85/pydantic_core-2.41.5-cp314-cp314-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:506d766a8727beef16b7adaeb8ee6217c64fc813646b424d0804d67c16eddb66", size = 2063691, upload-time = "2025-11-04T13:41:03.504Z" },
+    { url = "https://files.pythonhosted.org/packages/74/9b/0046701313c6ef08c0c1cf0e028c67c770a4e1275ca73131563c5f2a310a/pydantic_core-2.41.5-cp314-cp314-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:4819fa52133c9aa3c387b3328f25c1facc356491e6135b459f1de698ff64d869", size = 2213897, upload-time = "2025-11-04T13:41:05.804Z" },
+    { url = "https://files.pythonhosted.org/packages/8a/cd/6bac76ecd1b27e75a95ca3a9a559c643b3afcd2dd62086d4b7a32a18b169/pydantic_core-2.41.5-cp314-cp314-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:2b761d210c9ea91feda40d25b4efe82a1707da2ef62901466a42492c028553a2", size = 2333302, upload-time = "2025-11-04T13:41:07.809Z" },
+    { url = "https://files.pythonhosted.org/packages/4c/d2/ef2074dc020dd6e109611a8be4449b98cd25e1b9b8a303c2f0fca2f2bcf7/pydantic_core-2.41.5-cp314-cp314-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:22f0fb8c1c583a3b6f24df2470833b40207e907b90c928cc8d3594b76f874375", size = 2064877, upload-time = "2025-11-04T13:41:09.827Z" },
+    { url = "https://files.pythonhosted.org/packages/18/66/e9db17a9a763d72f03de903883c057b2592c09509ccfe468187f2a2eef29/pydantic_core-2.41.5-cp314-cp314-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:2782c870e99878c634505236d81e5443092fba820f0373997ff75f90f68cd553", size = 2180680, upload-time = "2025-11-04T13:41:12.379Z" },
+    { url = "https://files.pythonhosted.org/packages/d3/9e/3ce66cebb929f3ced22be85d4c2399b8e85b622db77dad36b73c5387f8f8/pydantic_core-2.41.5-cp314-cp314-musllinux_1_1_aarch64.whl", hash = "sha256:0177272f88ab8312479336e1d777f6b124537d47f2123f89cb37e0accea97f90", size = 2138960, upload-time = "2025-11-04T13:41:14.627Z" },
+    { url = "https://files.pythonhosted.org/packages/a6/62/205a998f4327d2079326b01abee48e502ea739d174f0a89295c481a2272e/pydantic_core-2.41.5-cp314-cp314-musllinux_1_1_armv7l.whl", hash = "sha256:63510af5e38f8955b8ee5687740d6ebf7c2a0886d15a6d65c32814613681bc07", size = 2339102, upload-time = "2025-11-04T13:41:16.868Z" },
+    { url = "https://files.pythonhosted.org/packages/3c/0d/f05e79471e889d74d3d88f5bd20d0ed189ad94c2423d81ff8d0000aab4ff/pydantic_core-2.41.5-cp314-cp314-musllinux_1_1_x86_64.whl", hash = "sha256:e56ba91f47764cc14f1daacd723e3e82d1a89d783f0f5afe9c364b8bb491ccdb", size = 2326039, upload-time = "2025-11-04T13:41:18.934Z" },
+    { url = "https://files.pythonhosted.org/packages/ec/e1/e08a6208bb100da7e0c4b288eed624a703f4d129bde2da475721a80cab32/pydantic_core-2.41.5-cp314-cp314-win32.whl", hash = "sha256:aec5cf2fd867b4ff45b9959f8b20ea3993fc93e63c7363fe6851424c8a7e7c23", size = 1995126, upload-time = "2025-11-04T13:41:21.418Z" },
+    { url = "https://files.pythonhosted.org/packages/48/5d/56ba7b24e9557f99c9237e29f5c09913c81eeb2f3217e40e922353668092/pydantic_core-2.41.5-cp314-cp314-win_amd64.whl", hash = "sha256:8e7c86f27c585ef37c35e56a96363ab8de4e549a95512445b85c96d3e2f7c1bf", size = 2015489, upload-time = "2025-11-04T13:41:24.076Z" },
+    { url = "https://files.pythonhosted.org/packages/4e/bb/f7a190991ec9e3e0ba22e4993d8755bbc4a32925c0b5b42775c03e8148f9/pydantic_core-2.41.5-cp314-cp314-win_arm64.whl", hash = "sha256:e672ba74fbc2dc8eea59fb6d4aed6845e6905fc2a8afe93175d94a83ba2a01a0", size = 1977288, upload-time = "2025-11-04T13:41:26.33Z" },
+    { url = "https://files.pythonhosted.org/packages/92/ed/77542d0c51538e32e15afe7899d79efce4b81eee631d99850edc2f5e9349/pydantic_core-2.41.5-cp314-cp314t-macosx_10_12_x86_64.whl", hash = "sha256:8566def80554c3faa0e65ac30ab0932b9e3a5cd7f8323764303d468e5c37595a", size = 2120255, upload-time = "2025-11-04T13:41:28.569Z" },
+    { url = "https://files.pythonhosted.org/packages/bb/3d/6913dde84d5be21e284439676168b28d8bbba5600d838b9dca99de0fad71/pydantic_core-2.41.5-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:b80aa5095cd3109962a298ce14110ae16b8c1aece8b72f9dafe81cf597ad80b3", size = 1863760, upload-time = "2025-11-04T13:41:31.055Z" },
+    { url = "https://files.pythonhosted.org/packages/5a/f0/e5e6b99d4191da102f2b0eb9687aaa7f5bea5d9964071a84effc3e40f997/pydantic_core-2.41.5-cp314-cp314t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3006c3dd9ba34b0c094c544c6006cc79e87d8612999f1a5d43b769b89181f23c", size = 1878092, upload-time = "2025-11-04T13:41:33.21Z" },
+    { url = "https://files.pythonhosted.org/packages/71/48/36fb760642d568925953bcc8116455513d6e34c4beaa37544118c36aba6d/pydantic_core-2.41.5-cp314-cp314t-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:72f6c8b11857a856bcfa48c86f5368439f74453563f951e473514579d44aa612", size = 2053385, upload-time = "2025-11-04T13:41:35.508Z" },
+    { url = "https://files.pythonhosted.org/packages/20/25/92dc684dd8eb75a234bc1c764b4210cf2646479d54b47bf46061657292a8/pydantic_core-2.41.5-cp314-cp314t-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:5cb1b2f9742240e4bb26b652a5aeb840aa4b417c7748b6f8387927bc6e45e40d", size = 2218832, upload-time = "2025-11-04T13:41:37.732Z" },
+    { url = "https://files.pythonhosted.org/packages/e2/09/f53e0b05023d3e30357d82eb35835d0f6340ca344720a4599cd663dca599/pydantic_core-2.41.5-cp314-cp314t-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:bd3d54f38609ff308209bd43acea66061494157703364ae40c951f83ba99a1a9", size = 2327585, upload-time = "2025-11-04T13:41:40Z" },
+    { url = "https://files.pythonhosted.org/packages/aa/4e/2ae1aa85d6af35a39b236b1b1641de73f5a6ac4d5a7509f77b814885760c/pydantic_core-2.41.5-cp314-cp314t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2ff4321e56e879ee8d2a879501c8e469414d948f4aba74a2d4593184eb326660", size = 2041078, upload-time = "2025-11-04T13:41:42.323Z" },
+    { url = "https://files.pythonhosted.org/packages/cd/13/2e215f17f0ef326fc72afe94776edb77525142c693767fc347ed6288728d/pydantic_core-2.41.5-cp314-cp314t-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:d0d2568a8c11bf8225044aa94409e21da0cb09dcdafe9ecd10250b2baad531a9", size = 2173914, upload-time = "2025-11-04T13:41:45.221Z" },
+    { url = "https://files.pythonhosted.org/packages/02/7a/f999a6dcbcd0e5660bc348a3991c8915ce6599f4f2c6ac22f01d7a10816c/pydantic_core-2.41.5-cp314-cp314t-musllinux_1_1_aarch64.whl", hash = "sha256:a39455728aabd58ceabb03c90e12f71fd30fa69615760a075b9fec596456ccc3", size = 2129560, upload-time = "2025-11-04T13:41:47.474Z" },
+    { url = "https://files.pythonhosted.org/packages/3a/b1/6c990ac65e3b4c079a4fb9f5b05f5b013afa0f4ed6780a3dd236d2cbdc64/pydantic_core-2.41.5-cp314-cp314t-musllinux_1_1_armv7l.whl", hash = "sha256:239edca560d05757817c13dc17c50766136d21f7cd0fac50295499ae24f90fdf", size = 2329244, upload-time = "2025-11-04T13:41:49.992Z" },
+    { url = "https://files.pythonhosted.org/packages/d9/02/3c562f3a51afd4d88fff8dffb1771b30cfdfd79befd9883ee094f5b6c0d8/pydantic_core-2.41.5-cp314-cp314t-musllinux_1_1_x86_64.whl", hash = "sha256:2a5e06546e19f24c6a96a129142a75cee553cc018ffee48a460059b1185f4470", size = 2331955, upload-time = "2025-11-04T13:41:54.079Z" },
+    { url = "https://files.pythonhosted.org/packages/5c/96/5fb7d8c3c17bc8c62fdb031c47d77a1af698f1d7a406b0f79aaa1338f9ad/pydantic_core-2.41.5-cp314-cp314t-win32.whl", hash = "sha256:b4ececa40ac28afa90871c2cc2b9ffd2ff0bf749380fbdf57d165fd23da353aa", size = 1988906, upload-time = "2025-11-04T13:41:56.606Z" },
+    { url = "https://files.pythonhosted.org/packages/22/ed/182129d83032702912c2e2d8bbe33c036f342cc735737064668585dac28f/pydantic_core-2.41.5-cp314-cp314t-win_amd64.whl", hash = "sha256:80aa89cad80b32a912a65332f64a4450ed00966111b6615ca6816153d3585a8c", size = 1981607, upload-time = "2025-11-04T13:41:58.889Z" },
+    { url = "https://files.pythonhosted.org/packages/9f/ed/068e41660b832bb0b1aa5b58011dea2a3fe0ba7861ff38c4d4904c1c1a99/pydantic_core-2.41.5-cp314-cp314t-win_arm64.whl", hash = "sha256:35b44f37a3199f771c3eaa53051bc8a70cd7b54f333531c59e29fd4db5d15008", size = 1974769, upload-time = "2025-11-04T13:42:01.186Z" },
+    { url = "https://files.pythonhosted.org/packages/09/32/59b0c7e63e277fa7911c2fc70ccfb45ce4b98991e7ef37110663437005af/pydantic_core-2.41.5-graalpy312-graalpy250_312_native-macosx_10_12_x86_64.whl", hash = "sha256:7da7087d756b19037bc2c06edc6c170eeef3c3bafcb8f532ff17d64dc427adfd", size = 2110495, upload-time = "2025-11-04T13:42:49.689Z" },
+    { url = "https://files.pythonhosted.org/packages/aa/81/05e400037eaf55ad400bcd318c05bb345b57e708887f07ddb2d20e3f0e98/pydantic_core-2.41.5-graalpy312-graalpy250_312_native-macosx_11_0_arm64.whl", hash = "sha256:aabf5777b5c8ca26f7824cb4a120a740c9588ed58df9b2d196ce92fba42ff8dc", size = 1915388, upload-time = "2025-11-04T13:42:52.215Z" },
+    { url = "https://files.pythonhosted.org/packages/6e/0d/e3549b2399f71d56476b77dbf3cf8937cec5cd70536bdc0e374a421d0599/pydantic_core-2.41.5-graalpy312-graalpy250_312_native-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c007fe8a43d43b3969e8469004e9845944f1a80e6acd47c150856bb87f230c56", size = 1942879, upload-time = "2025-11-04T13:42:56.483Z" },
+    { url = "https://files.pythonhosted.org/packages/f7/07/34573da085946b6a313d7c42f82f16e8920bfd730665de2d11c0c37a74b5/pydantic_core-2.41.5-graalpy312-graalpy250_312_native-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:76d0819de158cd855d1cbb8fcafdf6f5cf1eb8e470abe056d5d161106e38062b", size = 2139017, upload-time = "2025-11-04T13:42:59.471Z" },
+]
+
+[[package]]
+name = "referencing"
+version = "0.37.0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "attrs" },
+    { name = "rpds-py" },
+    { name = "typing-extensions", marker = "python_full_version < '3.13'" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/22/f5/df4e9027acead3ecc63e50fe1e36aca1523e1719559c499951bb4b53188f/referencing-0.37.0.tar.gz", hash = "sha256:44aefc3142c5b842538163acb373e24cce6632bd54bdb01b21ad5863489f50d8", size = 78036, upload-time = "2025-10-13T15:30:48.871Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/2c/58/ca301544e1fa93ed4f80d724bf5b194f6e4b945841c5bfd555878eea9fcb/referencing-0.37.0-py3-none-any.whl", hash = "sha256:381329a9f99628c9069361716891d34ad94af76e461dcb0335825aecc7692231", size = 26766, upload-time = "2025-10-13T15:30:47.625Z" },
+]
+
+[[package]]
+name = "requests"
+version = "2.32.5"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "certifi" },
+    { name = "charset-normalizer" },
+    { name = "idna" },
+    { name = "urllib3" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/c9/74/b3ff8e6c8446842c3f5c837e9c3dfcfe2018ea6ecef224c710c85ef728f4/requests-2.32.5.tar.gz", hash = "sha256:dbba0bac56e100853db0ea71b82b4dfd5fe2bf6d3754a8893c3af500cec7d7cf", size = 134517, upload-time = "2025-08-18T20:46:02.573Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/1e/db/4254e3eabe8020b458f1a747140d32277ec7a271daf1d235b70dc0b4e6e3/requests-2.32.5-py3-none-any.whl", hash = "sha256:2462f94637a34fd532264295e186976db0f5d453d1cdd31473c85a6a161affb6", size = 64738, upload-time = "2025-08-18T20:46:00.542Z" },
+]
+
+[[package]]
+name = "rpds-py"
+version = "0.28.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/48/dc/95f074d43452b3ef5d06276696ece4b3b5d696e7c9ad7173c54b1390cd70/rpds_py-0.28.0.tar.gz", hash = "sha256:abd4df20485a0983e2ca334a216249b6186d6e3c1627e106651943dbdb791aea", size = 27419, upload-time = "2025-10-22T22:24:29.327Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/b8/5c/6c3936495003875fe7b14f90ea812841a08fca50ab26bd840e924097d9c8/rpds_py-0.28.0-cp312-cp312-macosx_10_12_x86_64.whl", hash = "sha256:6b4f28583a4f247ff60cd7bdda83db8c3f5b05a7a82ff20dd4b078571747708f", size = 366439, upload-time = "2025-10-22T22:22:04.525Z" },
+    { url = "https://files.pythonhosted.org/packages/56/f9/a0f1ca194c50aa29895b442771f036a25b6c41a35e4f35b1a0ea713bedae/rpds_py-0.28.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:d678e91b610c29c4b3d52a2c148b641df2b4676ffe47c59f6388d58b99cdc424", size = 348170, upload-time = "2025-10-22T22:22:06.397Z" },
+    { url = "https://files.pythonhosted.org/packages/18/ea/42d243d3a586beb72c77fa5def0487daf827210069a95f36328e869599ea/rpds_py-0.28.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e819e0e37a44a78e1383bf1970076e2ccc4dc8c2bbaa2f9bd1dc987e9afff628", size = 378838, upload-time = "2025-10-22T22:22:07.932Z" },
+    { url = "https://files.pythonhosted.org/packages/e7/78/3de32e18a94791af8f33601402d9d4f39613136398658412a4e0b3047327/rpds_py-0.28.0-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:5ee514e0f0523db5d3fb171f397c54875dbbd69760a414dccf9d4d7ad628b5bd", size = 393299, upload-time = "2025-10-22T22:22:09.435Z" },
+    { url = "https://files.pythonhosted.org/packages/13/7e/4bdb435afb18acea2eb8a25ad56b956f28de7c59f8a1d32827effa0d4514/rpds_py-0.28.0-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:5f3fa06d27fdcee47f07a39e02862da0100cb4982508f5ead53ec533cd5fe55e", size = 518000, upload-time = "2025-10-22T22:22:11.326Z" },
+    { url = "https://files.pythonhosted.org/packages/31/d0/5f52a656875cdc60498ab035a7a0ac8f399890cc1ee73ebd567bac4e39ae/rpds_py-0.28.0-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:46959ef2e64f9e4a41fc89aa20dbca2b85531f9a72c21099a3360f35d10b0d5a", size = 408746, upload-time = "2025-10-22T22:22:13.143Z" },
+    { url = "https://files.pythonhosted.org/packages/3e/cd/49ce51767b879cde77e7ad9fae164ea15dce3616fe591d9ea1df51152706/rpds_py-0.28.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8455933b4bcd6e83fde3fefc987a023389c4b13f9a58c8d23e4b3f6d13f78c84", size = 386379, upload-time = "2025-10-22T22:22:14.602Z" },
+    { url = "https://files.pythonhosted.org/packages/6a/99/e4e1e1ee93a98f72fc450e36c0e4d99c35370220e815288e3ecd2ec36a2a/rpds_py-0.28.0-cp312-cp312-manylinux_2_31_riscv64.whl", hash = "sha256:ad50614a02c8c2962feebe6012b52f9802deec4263946cddea37aaf28dd25a66", size = 401280, upload-time = "2025-10-22T22:22:16.063Z" },
+    { url = "https://files.pythonhosted.org/packages/61/35/e0c6a57488392a8b319d2200d03dad2b29c0db9996f5662c3b02d0b86c02/rpds_py-0.28.0-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:e5deca01b271492553fdb6c7fd974659dce736a15bae5dad7ab8b93555bceb28", size = 412365, upload-time = "2025-10-22T22:22:17.504Z" },
+    { url = "https://files.pythonhosted.org/packages/ff/6a/841337980ea253ec797eb084665436007a1aad0faac1ba097fb906c5f69c/rpds_py-0.28.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:735f8495a13159ce6a0d533f01e8674cec0c57038c920495f87dcb20b3ddb48a", size = 559573, upload-time = "2025-10-22T22:22:19.108Z" },
+    { url = "https://files.pythonhosted.org/packages/e7/5e/64826ec58afd4c489731f8b00729c5f6afdb86f1df1df60bfede55d650bb/rpds_py-0.28.0-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:961ca621ff10d198bbe6ba4957decca61aa2a0c56695384c1d6b79bf61436df5", size = 583973, upload-time = "2025-10-22T22:22:20.768Z" },
+    { url = "https://files.pythonhosted.org/packages/b6/ee/44d024b4843f8386a4eeaa4c171b3d31d55f7177c415545fd1a24c249b5d/rpds_py-0.28.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:2374e16cc9131022e7d9a8f8d65d261d9ba55048c78f3b6e017971a4f5e6353c", size = 553800, upload-time = "2025-10-22T22:22:22.25Z" },
+    { url = "https://files.pythonhosted.org/packages/7d/89/33e675dccff11a06d4d85dbb4d1865f878d5020cbb69b2c1e7b2d3f82562/rpds_py-0.28.0-cp312-cp312-win32.whl", hash = "sha256:d15431e334fba488b081d47f30f091e5d03c18527c325386091f31718952fe08", size = 216954, upload-time = "2025-10-22T22:22:24.105Z" },
+    { url = "https://files.pythonhosted.org/packages/af/36/45f6ebb3210887e8ee6dbf1bc710ae8400bb417ce165aaf3024b8360d999/rpds_py-0.28.0-cp312-cp312-win_amd64.whl", hash = "sha256:a410542d61fc54710f750d3764380b53bf09e8c4edbf2f9141a82aa774a04f7c", size = 227844, upload-time = "2025-10-22T22:22:25.551Z" },
+    { url = "https://files.pythonhosted.org/packages/57/91/f3fb250d7e73de71080f9a221d19bd6a1c1eb0d12a1ea26513f6c1052ad6/rpds_py-0.28.0-cp312-cp312-win_arm64.whl", hash = "sha256:1f0cfd1c69e2d14f8c892b893997fa9a60d890a0c8a603e88dca4955f26d1edd", size = 217624, upload-time = "2025-10-22T22:22:26.914Z" },
+    { url = "https://files.pythonhosted.org/packages/d3/03/ce566d92611dfac0085c2f4b048cd53ed7c274a5c05974b882a908d540a2/rpds_py-0.28.0-cp313-cp313-macosx_10_12_x86_64.whl", hash = "sha256:e9e184408a0297086f880556b6168fa927d677716f83d3472ea333b42171ee3b", size = 366235, upload-time = "2025-10-22T22:22:28.397Z" },
+    { url = "https://files.pythonhosted.org/packages/00/34/1c61da1b25592b86fd285bd7bd8422f4c9d748a7373b46126f9ae792a004/rpds_py-0.28.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:edd267266a9b0448f33dc465a97cfc5d467594b600fe28e7fa2f36450e03053a", size = 348241, upload-time = "2025-10-22T22:22:30.171Z" },
+    { url = "https://files.pythonhosted.org/packages/fc/00/ed1e28616848c61c493a067779633ebf4b569eccaacf9ccbdc0e7cba2b9d/rpds_py-0.28.0-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:85beb8b3f45e4e32f6802fb6cd6b17f615ef6c6a52f265371fb916fae02814aa", size = 378079, upload-time = "2025-10-22T22:22:31.644Z" },
+    { url = "https://files.pythonhosted.org/packages/11/b2/ccb30333a16a470091b6e50289adb4d3ec656fd9951ba8c5e3aaa0746a67/rpds_py-0.28.0-cp313-cp313-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:d2412be8d00a1b895f8ad827cc2116455196e20ed994bb704bf138fe91a42724", size = 393151, upload-time = "2025-10-22T22:22:33.453Z" },
+    { url = "https://files.pythonhosted.org/packages/8c/d0/73e2217c3ee486d555cb84920597480627d8c0240ff3062005c6cc47773e/rpds_py-0.28.0-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:cf128350d384b777da0e68796afdcebc2e9f63f0e9f242217754e647f6d32491", size = 517520, upload-time = "2025-10-22T22:22:34.949Z" },
+    { url = "https://files.pythonhosted.org/packages/c4/91/23efe81c700427d0841a4ae7ea23e305654381831e6029499fe80be8a071/rpds_py-0.28.0-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:a2036d09b363aa36695d1cc1a97b36865597f4478470b0697b5ee9403f4fe399", size = 408699, upload-time = "2025-10-22T22:22:36.584Z" },
+    { url = "https://files.pythonhosted.org/packages/ca/ee/a324d3198da151820a326c1f988caaa4f37fc27955148a76fff7a2d787a9/rpds_py-0.28.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b8e1e9be4fa6305a16be628959188e4fd5cd6f1b0e724d63c6d8b2a8adf74ea6", size = 385720, upload-time = "2025-10-22T22:22:38.014Z" },
+    { url = "https://files.pythonhosted.org/packages/19/ad/e68120dc05af8b7cab4a789fccd8cdcf0fe7e6581461038cc5c164cd97d2/rpds_py-0.28.0-cp313-cp313-manylinux_2_31_riscv64.whl", hash = "sha256:0a403460c9dd91a7f23fc3188de6d8977f1d9603a351d5db6cf20aaea95b538d", size = 401096, upload-time = "2025-10-22T22:22:39.869Z" },
+    { url = "https://files.pythonhosted.org/packages/99/90/c1e070620042459d60df6356b666bb1f62198a89d68881816a7ed121595a/rpds_py-0.28.0-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:d7366b6553cdc805abcc512b849a519167db8f5e5c3472010cd1228b224265cb", size = 411465, upload-time = "2025-10-22T22:22:41.395Z" },
+    { url = "https://files.pythonhosted.org/packages/68/61/7c195b30d57f1b8d5970f600efee72a4fad79ec829057972e13a0370fd24/rpds_py-0.28.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:5b43c6a3726efd50f18d8120ec0551241c38785b68952d240c45ea553912ac41", size = 558832, upload-time = "2025-10-22T22:22:42.871Z" },
+    { url = "https://files.pythonhosted.org/packages/b0/3d/06f3a718864773f69941d4deccdf18e5e47dd298b4628062f004c10f3b34/rpds_py-0.28.0-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:0cb7203c7bc69d7c1585ebb33a2e6074492d2fc21ad28a7b9d40457ac2a51ab7", size = 583230, upload-time = "2025-10-22T22:22:44.877Z" },
+    { url = "https://files.pythonhosted.org/packages/66/df/62fc783781a121e77fee9a21ead0a926f1b652280a33f5956a5e7833ed30/rpds_py-0.28.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:7a52a5169c664dfb495882adc75c304ae1d50df552fbd68e100fdc719dee4ff9", size = 553268, upload-time = "2025-10-22T22:22:46.441Z" },
+    { url = "https://files.pythonhosted.org/packages/84/85/d34366e335140a4837902d3dea89b51f087bd6a63c993ebdff59e93ee61d/rpds_py-0.28.0-cp313-cp313-win32.whl", hash = "sha256:2e42456917b6687215b3e606ab46aa6bca040c77af7df9a08a6dcfe8a4d10ca5", size = 217100, upload-time = "2025-10-22T22:22:48.342Z" },
+    { url = "https://files.pythonhosted.org/packages/3c/1c/f25a3f3752ad7601476e3eff395fe075e0f7813fbb9862bd67c82440e880/rpds_py-0.28.0-cp313-cp313-win_amd64.whl", hash = "sha256:e0a0311caedc8069d68fc2bf4c9019b58a2d5ce3cd7cb656c845f1615b577e1e", size = 227759, upload-time = "2025-10-22T22:22:50.219Z" },
+    { url = "https://files.pythonhosted.org/packages/e0/d6/5f39b42b99615b5bc2f36ab90423ea404830bdfee1c706820943e9a645eb/rpds_py-0.28.0-cp313-cp313-win_arm64.whl", hash = "sha256:04c1b207ab8b581108801528d59ad80aa83bb170b35b0ddffb29c20e411acdc1", size = 217326, upload-time = "2025-10-22T22:22:51.647Z" },
+    { url = "https://files.pythonhosted.org/packages/5c/8b/0c69b72d1cee20a63db534be0df271effe715ef6c744fdf1ff23bb2b0b1c/rpds_py-0.28.0-cp313-cp313t-macosx_10_12_x86_64.whl", hash = "sha256:f296ea3054e11fc58ad42e850e8b75c62d9a93a9f981ad04b2e5ae7d2186ff9c", size = 355736, upload-time = "2025-10-22T22:22:53.211Z" },
+    { url = "https://files.pythonhosted.org/packages/f7/6d/0c2ee773cfb55c31a8514d2cece856dd299170a49babd50dcffb15ddc749/rpds_py-0.28.0-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:5a7306c19b19005ad98468fcefeb7100b19c79fc23a5f24a12e06d91181193fa", size = 342677, upload-time = "2025-10-22T22:22:54.723Z" },
+    { url = "https://files.pythonhosted.org/packages/e2/1c/22513ab25a27ea205144414724743e305e8153e6abe81833b5e678650f5a/rpds_py-0.28.0-cp313-cp313t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e5d9b86aa501fed9862a443c5c3116f6ead8bc9296185f369277c42542bd646b", size = 371847, upload-time = "2025-10-22T22:22:56.295Z" },
+    { url = "https://files.pythonhosted.org/packages/60/07/68e6ccdb4b05115ffe61d31afc94adef1833d3a72f76c9632d4d90d67954/rpds_py-0.28.0-cp313-cp313t-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:e5bbc701eff140ba0e872691d573b3d5d30059ea26e5785acba9132d10c8c31d", size = 381800, upload-time = "2025-10-22T22:22:57.808Z" },
+    { url = "https://files.pythonhosted.org/packages/73/bf/6d6d15df80781d7f9f368e7c1a00caf764436518c4877fb28b029c4624af/rpds_py-0.28.0-cp313-cp313t-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:9a5690671cd672a45aa8616d7374fdf334a1b9c04a0cac3c854b1136e92374fe", size = 518827, upload-time = "2025-10-22T22:22:59.826Z" },
+    { url = "https://files.pythonhosted.org/packages/7b/d3/2decbb2976cc452cbf12a2b0aaac5f1b9dc5dd9d1f7e2509a3ee00421249/rpds_py-0.28.0-cp313-cp313t-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:9f1d92ecea4fa12f978a367c32a5375a1982834649cdb96539dcdc12e609ab1a", size = 399471, upload-time = "2025-10-22T22:23:01.968Z" },
+    { url = "https://files.pythonhosted.org/packages/b1/2c/f30892f9e54bd02e5faca3f6a26d6933c51055e67d54818af90abed9748e/rpds_py-0.28.0-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8d252db6b1a78d0a3928b6190156042d54c93660ce4d98290d7b16b5296fb7cc", size = 377578, upload-time = "2025-10-22T22:23:03.52Z" },
+    { url = "https://files.pythonhosted.org/packages/f0/5d/3bce97e5534157318f29ac06bf2d279dae2674ec12f7cb9c12739cee64d8/rpds_py-0.28.0-cp313-cp313t-manylinux_2_31_riscv64.whl", hash = "sha256:d61b355c3275acb825f8777d6c4505f42b5007e357af500939d4a35b19177259", size = 390482, upload-time = "2025-10-22T22:23:05.391Z" },
+    { url = "https://files.pythonhosted.org/packages/e3/f0/886bd515ed457b5bd93b166175edb80a0b21a210c10e993392127f1e3931/rpds_py-0.28.0-cp313-cp313t-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:acbe5e8b1026c0c580d0321c8aae4b0a1e1676861d48d6e8c6586625055b606a", size = 402447, upload-time = "2025-10-22T22:23:06.93Z" },
+    { url = "https://files.pythonhosted.org/packages/42/b5/71e8777ac55e6af1f4f1c05b47542a1eaa6c33c1cf0d300dca6a1c6e159a/rpds_py-0.28.0-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:8aa23b6f0fc59b85b4c7d89ba2965af274346f738e8d9fc2455763602e62fd5f", size = 552385, upload-time = "2025-10-22T22:23:08.557Z" },
+    { url = "https://files.pythonhosted.org/packages/5d/cb/6ca2d70cbda5a8e36605e7788c4aa3bea7c17d71d213465a5a675079b98d/rpds_py-0.28.0-cp313-cp313t-musllinux_1_2_i686.whl", hash = "sha256:7b14b0c680286958817c22d76fcbca4800ddacef6f678f3a7c79a1fe7067fe37", size = 575642, upload-time = "2025-10-22T22:23:10.348Z" },
+    { url = "https://files.pythonhosted.org/packages/4a/d4/407ad9960ca7856d7b25c96dcbe019270b5ffdd83a561787bc682c797086/rpds_py-0.28.0-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:bcf1d210dfee61a6c86551d67ee1031899c0fdbae88b2d44a569995d43797712", size = 544507, upload-time = "2025-10-22T22:23:12.434Z" },
+    { url = "https://files.pythonhosted.org/packages/51/31/2f46fe0efcac23fbf5797c6b6b7e1c76f7d60773e525cb65fcbc582ee0f2/rpds_py-0.28.0-cp313-cp313t-win32.whl", hash = "sha256:3aa4dc0fdab4a7029ac63959a3ccf4ed605fee048ba67ce89ca3168da34a1342", size = 205376, upload-time = "2025-10-22T22:23:13.979Z" },
+    { url = "https://files.pythonhosted.org/packages/92/e4/15947bda33cbedfc134490a41841ab8870a72a867a03d4969d886f6594a2/rpds_py-0.28.0-cp313-cp313t-win_amd64.whl", hash = "sha256:7b7d9d83c942855e4fdcfa75d4f96f6b9e272d42fffcb72cd4bb2577db2e2907", size = 215907, upload-time = "2025-10-22T22:23:15.5Z" },
+    { url = "https://files.pythonhosted.org/packages/08/47/ffe8cd7a6a02833b10623bf765fbb57ce977e9a4318ca0e8cf97e9c3d2b3/rpds_py-0.28.0-cp314-cp314-macosx_10_12_x86_64.whl", hash = "sha256:dcdcb890b3ada98a03f9f2bb108489cdc7580176cb73b4f2d789e9a1dac1d472", size = 353830, upload-time = "2025-10-22T22:23:17.03Z" },
+    { url = "https://files.pythonhosted.org/packages/f9/9f/890f36cbd83a58491d0d91ae0db1702639edb33fb48eeb356f80ecc6b000/rpds_py-0.28.0-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:f274f56a926ba2dc02976ca5b11c32855cbd5925534e57cfe1fda64e04d1add2", size = 341819, upload-time = "2025-10-22T22:23:18.57Z" },
+    { url = "https://files.pythonhosted.org/packages/09/e3/921eb109f682aa24fb76207698fbbcf9418738f35a40c21652c29053f23d/rpds_py-0.28.0-cp314-cp314-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4fe0438ac4a29a520ea94c8c7f1754cdd8feb1bc490dfda1bfd990072363d527", size = 373127, upload-time = "2025-10-22T22:23:20.216Z" },
+    { url = "https://files.pythonhosted.org/packages/23/13/bce4384d9f8f4989f1a9599c71b7a2d877462e5fd7175e1f69b398f729f4/rpds_py-0.28.0-cp314-cp314-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:8a358a32dd3ae50e933347889b6af9a1bdf207ba5d1a3f34e1a38cd3540e6733", size = 382767, upload-time = "2025-10-22T22:23:21.787Z" },
+    { url = "https://files.pythonhosted.org/packages/23/e1/579512b2d89a77c64ccef5a0bc46a6ef7f72ae0cf03d4b26dcd52e57ee0a/rpds_py-0.28.0-cp314-cp314-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:e80848a71c78aa328fefaba9c244d588a342c8e03bda518447b624ea64d1ff56", size = 517585, upload-time = "2025-10-22T22:23:23.699Z" },
+    { url = "https://files.pythonhosted.org/packages/62/3c/ca704b8d324a2591b0b0adcfcaadf9c862375b11f2f667ac03c61b4fd0a6/rpds_py-0.28.0-cp314-cp314-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:f586db2e209d54fe177e58e0bc4946bea5fb0102f150b1b2f13de03e1f0976f8", size = 399828, upload-time = "2025-10-22T22:23:25.713Z" },
+    { url = "https://files.pythonhosted.org/packages/da/37/e84283b9e897e3adc46b4c88bb3f6ec92a43bd4d2f7ef5b13459963b2e9c/rpds_py-0.28.0-cp314-cp314-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5ae8ee156d6b586e4292491e885d41483136ab994e719a13458055bec14cf370", size = 375509, upload-time = "2025-10-22T22:23:27.32Z" },
+    { url = "https://files.pythonhosted.org/packages/1a/c2/a980beab869d86258bf76ec42dec778ba98151f253a952b02fe36d72b29c/rpds_py-0.28.0-cp314-cp314-manylinux_2_31_riscv64.whl", hash = "sha256:a805e9b3973f7e27f7cab63a6b4f61d90f2e5557cff73b6e97cd5b8540276d3d", size = 392014, upload-time = "2025-10-22T22:23:29.332Z" },
+    { url = "https://files.pythonhosted.org/packages/da/b5/b1d3c5f9d3fa5aeef74265f9c64de3c34a0d6d5cd3c81c8b17d5c8f10ed4/rpds_py-0.28.0-cp314-cp314-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:5d3fd16b6dc89c73a4da0b4ac8b12a7ecc75b2864b95c9e5afed8003cb50a728", size = 402410, upload-time = "2025-10-22T22:23:31.14Z" },
+    { url = "https://files.pythonhosted.org/packages/74/ae/cab05ff08dfcc052afc73dcb38cbc765ffc86f94e966f3924cd17492293c/rpds_py-0.28.0-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:6796079e5d24fdaba6d49bda28e2c47347e89834678f2bc2c1b4fc1489c0fb01", size = 553593, upload-time = "2025-10-22T22:23:32.834Z" },
+    { url = "https://files.pythonhosted.org/packages/70/80/50d5706ea2a9bfc9e9c5f401d91879e7c790c619969369800cde202da214/rpds_py-0.28.0-cp314-cp314-musllinux_1_2_i686.whl", hash = "sha256:76500820c2af232435cbe215e3324c75b950a027134e044423f59f5b9a1ba515", size = 576925, upload-time = "2025-10-22T22:23:34.47Z" },
+    { url = "https://files.pythonhosted.org/packages/ab/12/85a57d7a5855a3b188d024b099fd09c90db55d32a03626d0ed16352413ff/rpds_py-0.28.0-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:bbdc5640900a7dbf9dd707fe6388972f5bbd883633eb68b76591044cfe346f7e", size = 542444, upload-time = "2025-10-22T22:23:36.093Z" },
+    { url = "https://files.pythonhosted.org/packages/6c/65/10643fb50179509150eb94d558e8837c57ca8b9adc04bd07b98e57b48f8c/rpds_py-0.28.0-cp314-cp314-win32.whl", hash = "sha256:adc8aa88486857d2b35d75f0640b949759f79dc105f50aa2c27816b2e0dd749f", size = 207968, upload-time = "2025-10-22T22:23:37.638Z" },
+    { url = "https://files.pythonhosted.org/packages/b4/84/0c11fe4d9aaea784ff4652499e365963222481ac647bcd0251c88af646eb/rpds_py-0.28.0-cp314-cp314-win_amd64.whl", hash = "sha256:66e6fa8e075b58946e76a78e69e1a124a21d9a48a5b4766d15ba5b06869d1fa1", size = 218876, upload-time = "2025-10-22T22:23:39.179Z" },
+    { url = "https://files.pythonhosted.org/packages/0f/e0/3ab3b86ded7bb18478392dc3e835f7b754cd446f62f3fc96f4fe2aca78f6/rpds_py-0.28.0-cp314-cp314-win_arm64.whl", hash = "sha256:a6fe887c2c5c59413353b7c0caff25d0e566623501ccfff88957fa438a69377d", size = 212506, upload-time = "2025-10-22T22:23:40.755Z" },
+    { url = "https://files.pythonhosted.org/packages/51/ec/d5681bb425226c3501eab50fc30e9d275de20c131869322c8a1729c7b61c/rpds_py-0.28.0-cp314-cp314t-macosx_10_12_x86_64.whl", hash = "sha256:7a69df082db13c7070f7b8b1f155fa9e687f1d6aefb7b0e3f7231653b79a067b", size = 355433, upload-time = "2025-10-22T22:23:42.259Z" },
+    { url = "https://files.pythonhosted.org/packages/be/ec/568c5e689e1cfb1ea8b875cffea3649260955f677fdd7ddc6176902d04cd/rpds_py-0.28.0-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:b1cde22f2c30ebb049a9e74c5374994157b9b70a16147d332f89c99c5960737a", size = 342601, upload-time = "2025-10-22T22:23:44.372Z" },
+    { url = "https://files.pythonhosted.org/packages/32/fe/51ada84d1d2a1d9d8f2c902cfddd0133b4a5eb543196ab5161d1c07ed2ad/rpds_py-0.28.0-cp314-cp314t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5338742f6ba7a51012ea470bd4dc600a8c713c0c72adaa0977a1b1f4327d6592", size = 372039, upload-time = "2025-10-22T22:23:46.025Z" },
+    { url = "https://files.pythonhosted.org/packages/07/c1/60144a2f2620abade1a78e0d91b298ac2d9b91bc08864493fa00451ef06e/rpds_py-0.28.0-cp314-cp314t-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:e1460ebde1bcf6d496d80b191d854adedcc619f84ff17dc1c6d550f58c9efbba", size = 382407, upload-time = "2025-10-22T22:23:48.098Z" },
+    { url = "https://files.pythonhosted.org/packages/45/ed/091a7bbdcf4038a60a461df50bc4c82a7ed6d5d5e27649aab61771c17585/rpds_py-0.28.0-cp314-cp314t-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:e3eb248f2feba84c692579257a043a7699e28a77d86c77b032c1d9fbb3f0219c", size = 518172, upload-time = "2025-10-22T22:23:50.16Z" },
+    { url = "https://files.pythonhosted.org/packages/54/dd/02cc90c2fd9c2ef8016fd7813bfacd1c3a1325633ec8f244c47b449fc868/rpds_py-0.28.0-cp314-cp314t-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:bd3bbba5def70b16cd1c1d7255666aad3b290fbf8d0fe7f9f91abafb73611a91", size = 399020, upload-time = "2025-10-22T22:23:51.81Z" },
+    { url = "https://files.pythonhosted.org/packages/ab/81/5d98cc0329bbb911ccecd0b9e19fbf7f3a5de8094b4cda5e71013b2dd77e/rpds_py-0.28.0-cp314-cp314t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3114f4db69ac5a1f32e7e4d1cbbe7c8f9cf8217f78e6e002cedf2d54c2a548ed", size = 377451, upload-time = "2025-10-22T22:23:53.711Z" },
+    { url = "https://files.pythonhosted.org/packages/b4/07/4d5bcd49e3dfed2d38e2dcb49ab6615f2ceb9f89f5a372c46dbdebb4e028/rpds_py-0.28.0-cp314-cp314t-manylinux_2_31_riscv64.whl", hash = "sha256:4b0cb8a906b1a0196b863d460c0222fb8ad0f34041568da5620f9799b83ccf0b", size = 390355, upload-time = "2025-10-22T22:23:55.299Z" },
+    { url = "https://files.pythonhosted.org/packages/3f/79/9f14ba9010fee74e4f40bf578735cfcbb91d2e642ffd1abe429bb0b96364/rpds_py-0.28.0-cp314-cp314t-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:cf681ac76a60b667106141e11a92a3330890257e6f559ca995fbb5265160b56e", size = 403146, upload-time = "2025-10-22T22:23:56.929Z" },
+    { url = "https://files.pythonhosted.org/packages/39/4c/f08283a82ac141331a83a40652830edd3a4a92c34e07e2bbe00baaea2f5f/rpds_py-0.28.0-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:1e8ee6413cfc677ce8898d9cde18cc3a60fc2ba756b0dec5b71eb6eb21c49fa1", size = 552656, upload-time = "2025-10-22T22:23:58.62Z" },
+    { url = "https://files.pythonhosted.org/packages/61/47/d922fc0666f0dd8e40c33990d055f4cc6ecff6f502c2d01569dbed830f9b/rpds_py-0.28.0-cp314-cp314t-musllinux_1_2_i686.whl", hash = "sha256:b3072b16904d0b5572a15eb9d31c1954e0d3227a585fc1351aa9878729099d6c", size = 576782, upload-time = "2025-10-22T22:24:00.312Z" },
+    { url = "https://files.pythonhosted.org/packages/d3/0c/5bafdd8ccf6aa9d3bfc630cfece457ff5b581af24f46a9f3590f790e3df2/rpds_py-0.28.0-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:b670c30fd87a6aec281c3c9896d3bae4b205fd75d79d06dc87c2503717e46092", size = 544671, upload-time = "2025-10-22T22:24:02.297Z" },
+    { url = "https://files.pythonhosted.org/packages/2c/37/dcc5d8397caa924988693519069d0beea077a866128719351a4ad95e82fc/rpds_py-0.28.0-cp314-cp314t-win32.whl", hash = "sha256:8014045a15b4d2b3476f0a287fcc93d4f823472d7d1308d47884ecac9e612be3", size = 205749, upload-time = "2025-10-22T22:24:03.848Z" },
+    { url = "https://files.pythonhosted.org/packages/d7/69/64d43b21a10d72b45939a28961216baeb721cc2a430f5f7c3bfa21659a53/rpds_py-0.28.0-cp314-cp314t-win_amd64.whl", hash = "sha256:7a4e59c90d9c27c561eb3160323634a9ff50b04e4f7820600a2beb0ac90db578", size = 216233, upload-time = "2025-10-22T22:24:05.471Z" },
+]
+
+[[package]]
+name = "typing-extensions"
+version = "4.15.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/72/94/1a15dd82efb362ac84269196e94cf00f187f7ed21c242792a923cdb1c61f/typing_extensions-4.15.0.tar.gz", hash = "sha256:0cea48d173cc12fa28ecabc3b837ea3cf6f38c6d1136f85cbaaf598984861466", size = 109391, upload-time = "2025-08-25T13:49:26.313Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/18/67/36e9267722cc04a6b9f15c7f3441c2363321a3ea07da7ae0c0707beb2a9c/typing_extensions-4.15.0-py3-none-any.whl", hash = "sha256:f0fa19c6845758ab08074a0cfa8b7aecb71c999ca73d62883bc25cc018c4e548", size = 44614, upload-time = "2025-08-25T13:49:24.86Z" },
+]
+
+[[package]]
+name = "typing-inspection"
+version = "0.4.2"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "typing-extensions" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/55/e3/70399cb7dd41c10ac53367ae42139cf4b1ca5f36bb3dc6c9d33acdb43655/typing_inspection-0.4.2.tar.gz", hash = "sha256:ba561c48a67c5958007083d386c3295464928b01faa735ab8547c5692e87f464", size = 75949, upload-time = "2025-10-01T02:14:41.687Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/dc/9b/47798a6c91d8bdb567fe2698fe81e0c6b7cb7ef4d13da4114b41d239f65d/typing_inspection-0.4.2-py3-none-any.whl", hash = "sha256:4ed1cacbdc298c220f1bd249ed5287caa16f34d44ef4e9c3d0cbad5b521545e7", size = 14611, upload-time = "2025-10-01T02:14:40.154Z" },
+]
+
+[[package]]
+name = "urllib3"
+version = "2.5.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/15/22/9ee70a2574a4f4599c47dd506532914ce044817c7752a79b6a51286319bc/urllib3-2.5.0.tar.gz", hash = "sha256:3fc47733c7e419d4bc3f6b3dc2b4f890bb743906a30d56ba4a5bfa4bbff92760", size = 393185, upload-time = "2025-06-18T14:07:41.644Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/a7/c2/fe1e52489ae3122415c51f387e221dd0773709bad6c6cdaa599e8a2c5185/urllib3-2.5.0-py3-none-any.whl", hash = "sha256:e6b01673c0fa6a13e374b50871808eb3bf7046c4b125b216f6bf1cc604cff0dc", size = 129795, upload-time = "2025-06-18T14:07:40.39Z" },
+]
+
+[[package]]
+name = "zipp"
+version = "3.23.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/e3/02/0f2892c661036d50ede074e376733dca2ae7c6eb617489437771209d4180/zipp-3.23.0.tar.gz", hash = "sha256:a07157588a12518c9d4034df3fbbee09c814741a33ff63c05fa29d26a2404166", size = 25547, upload-time = "2025-06-08T17:06:39.4Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/2e/54/647ade08bf0db230bfea292f893923872fd20be6ac6f53b2b936ba839d75/zipp-3.23.0-py3-none-any.whl", hash = "sha256:071652d6115ed432f5ce1d34c336c0adfd6a884660d1e9712a256d3d3bd4b14e", size = 10276, upload-time = "2025-06-08T17:06:38.034Z" },
+]
diff --git a/src/llama-stack-api/llama_stack_api/vector_io.py b/src/llama_stack_api/vector_io.py
similarity index 100%
rename from src/llama-stack-api/llama_stack_api/vector_io.py
rename to src/llama_stack_api/vector_io.py
diff --git a/src/llama-stack-api/llama_stack_api/vector_stores.py b/src/llama_stack_api/vector_stores.py
similarity index 100%
rename from src/llama-stack-api/llama_stack_api/vector_stores.py
rename to src/llama_stack_api/vector_stores.py
diff --git a/src/llama-stack-api/llama_stack_api/version.py b/src/llama_stack_api/version.py
similarity index 100%
rename from src/llama-stack-api/llama_stack_api/version.py
rename to src/llama_stack_api/version.py
diff --git a/tests/integration/batches/conftest.py b/tests/integration/batches/conftest.py
index b9c0ac916..4dc5b7993 100644
--- a/tests/integration/batches/conftest.py
+++ b/tests/integration/batches/conftest.py
@@ -13,6 +13,7 @@ from contextlib import contextmanager
 from io import BytesIO
 
 import pytest
+
 from llama_stack_api import OpenAIFilePurpose
 
 
diff --git a/tests/integration/files/test_files.py b/tests/integration/files/test_files.py
index 61878ac4c..1f19c88c5 100644
--- a/tests/integration/files/test_files.py
+++ b/tests/integration/files/test_files.py
@@ -9,9 +9,9 @@ from unittest.mock import patch
 
 import pytest
 import requests
-from llama_stack_api import OpenAIFilePurpose
 
 from llama_stack.core.datatypes import User
+from llama_stack_api import OpenAIFilePurpose
 
 purpose = OpenAIFilePurpose.ASSISTANTS
 
diff --git a/tests/integration/inference/test_provider_data_routing.py b/tests/integration/inference/test_provider_data_routing.py
index d007b57d6..e4a0a24b5 100644
--- a/tests/integration/inference/test_provider_data_routing.py
+++ b/tests/integration/inference/test_provider_data_routing.py
@@ -15,6 +15,9 @@ that enables routing based on provider_data alone.
 from unittest.mock import AsyncMock, patch
 
 import pytest
+
+from llama_stack.core.library_client import LlamaStackAsLibraryClient
+from llama_stack.core.telemetry.telemetry import MetricEvent
 from llama_stack_api import (
     Api,
     OpenAIAssistantMessageParam,
@@ -23,9 +26,6 @@ from llama_stack_api import (
     OpenAIChoice,
 )
 
-from llama_stack.core.library_client import LlamaStackAsLibraryClient
-from llama_stack.core.telemetry.telemetry import MetricEvent
-
 
 class OpenAIChatCompletionWithMetrics(OpenAIChatCompletion):
     metrics: list[MetricEvent] | None = None
diff --git a/tests/integration/post_training/test_post_training.py b/tests/integration/post_training/test_post_training.py
index ff6925b58..e6868019a 100644
--- a/tests/integration/post_training/test_post_training.py
+++ b/tests/integration/post_training/test_post_training.py
@@ -9,6 +9,8 @@ import time
 import uuid
 
 import pytest
+
+from llama_stack.log import get_logger
 from llama_stack_api import (
     DataConfig,
     DatasetFormat,
@@ -18,8 +20,6 @@ from llama_stack_api import (
     TrainingConfig,
 )
 
-from llama_stack.log import get_logger
-
 # Configure logging
 logger = get_logger(name=__name__, category="post_training")
 
diff --git a/tests/integration/safety/test_llama_guard.py b/tests/integration/safety/test_llama_guard.py
index 99b4982f0..a554752cd 100644
--- a/tests/integration/safety/test_llama_guard.py
+++ b/tests/integration/safety/test_llama_guard.py
@@ -12,9 +12,9 @@ import warnings
 from collections.abc import Generator
 
 import pytest
-from llama_stack_api import ViolationLevel
 
 from llama_stack.models.llama.sku_types import CoreModelId
+from llama_stack_api import ViolationLevel
 
 # Llama Guard models available for text and vision shields
 LLAMA_GUARD_TEXT_MODELS = [CoreModelId.llama_guard_4_12b.value]
diff --git a/tests/integration/safety/test_safety.py b/tests/integration/safety/test_safety.py
index 6a926f1d5..857ff2f81 100644
--- a/tests/integration/safety/test_safety.py
+++ b/tests/integration/safety/test_safety.py
@@ -7,6 +7,7 @@ import base64
 import mimetypes
 
 import pytest
+
 from llama_stack_api import ViolationLevel
 
 CODE_SCANNER_ENABLED_PROVIDERS = {"ollama", "together", "fireworks"}
diff --git a/tests/integration/safety/test_vision_safety.py b/tests/integration/safety/test_vision_safety.py
index b85a23263..dc7b7e1ad 100644
--- a/tests/integration/safety/test_vision_safety.py
+++ b/tests/integration/safety/test_vision_safety.py
@@ -9,6 +9,7 @@ import mimetypes
 import os
 
 import pytest
+
 from llama_stack_api import ViolationLevel
 
 VISION_SHIELD_ENABLED_PROVIDERS = {"together"}
diff --git a/tests/integration/tool_runtime/test_registration.py b/tests/integration/tool_runtime/test_registration.py
index 1b1b6ef28..036a5f018 100644
--- a/tests/integration/tool_runtime/test_registration.py
+++ b/tests/integration/tool_runtime/test_registration.py
@@ -7,9 +7,9 @@
 import re
 
 import pytest
-from llama_stack_api import ToolGroupNotFoundError
 
 from llama_stack.core.library_client import LlamaStackAsLibraryClient
+from llama_stack_api import ToolGroupNotFoundError
 from tests.common.mcp import MCP_TOOLGROUP_ID, make_mcp_server
 
 
diff --git a/tests/integration/vector_io/test_openai_vector_stores.py b/tests/integration/vector_io/test_openai_vector_stores.py
index c65dfecac..102f3f00c 100644
--- a/tests/integration/vector_io/test_openai_vector_stores.py
+++ b/tests/integration/vector_io/test_openai_vector_stores.py
@@ -8,12 +8,12 @@ import time
 from io import BytesIO
 
 import pytest
-from llama_stack_api import Chunk, ExpiresAfter
 from llama_stack_client import BadRequestError
 from openai import BadRequestError as OpenAIBadRequestError
 
 from llama_stack.core.library_client import LlamaStackAsLibraryClient
 from llama_stack.log import get_logger
+from llama_stack_api import Chunk, ExpiresAfter
 
 from ..conftest import vector_provider_wrapper
 
diff --git a/tests/integration/vector_io/test_vector_io.py b/tests/integration/vector_io/test_vector_io.py
index acaa44bcb..29dbd3e56 100644
--- a/tests/integration/vector_io/test_vector_io.py
+++ b/tests/integration/vector_io/test_vector_io.py
@@ -5,6 +5,7 @@
 # the root directory of this source tree.
 
 import pytest
+
 from llama_stack_api import Chunk
 
 from ..conftest import vector_provider_wrapper
diff --git a/tests/unit/conversations/test_conversations.py b/tests/unit/conversations/test_conversations.py
index 2f942eb9c..95c54d379 100644
--- a/tests/unit/conversations/test_conversations.py
+++ b/tests/unit/conversations/test_conversations.py
@@ -8,7 +8,6 @@ import tempfile
 from pathlib import Path
 
 import pytest
-from llama_stack_api import OpenAIResponseInputMessageContentText, OpenAIResponseMessage
 from openai.types.conversations.conversation import Conversation as OpenAIConversation
 from openai.types.conversations.conversation_item import ConversationItem as OpenAIConversationItem
 from pydantic import TypeAdapter
@@ -25,6 +24,7 @@ from llama_stack.core.storage.datatypes import (
     StorageConfig,
 )
 from llama_stack.providers.utils.sqlstore.sqlstore import register_sqlstore_backends
+from llama_stack_api import OpenAIResponseInputMessageContentText, OpenAIResponseMessage
 
 
 @pytest.fixture
diff --git a/tests/unit/core/routers/test_safety_router.py b/tests/unit/core/routers/test_safety_router.py
index 7e465513e..1b24a59a2 100644
--- a/tests/unit/core/routers/test_safety_router.py
+++ b/tests/unit/core/routers/test_safety_router.py
@@ -6,10 +6,9 @@
 
 from unittest.mock import AsyncMock
 
-from llama_stack_api import ListShieldsResponse, ModerationObject, ModerationObjectResults, Shield
-
 from llama_stack.core.datatypes import SafetyConfig
 from llama_stack.core.routers.safety import SafetyRouter
+from llama_stack_api import ListShieldsResponse, ModerationObject, ModerationObjectResults, Shield
 
 
 async def test_run_moderation_uses_default_shield_when_model_missing():
diff --git a/tests/unit/core/routers/test_vector_io.py b/tests/unit/core/routers/test_vector_io.py
index 03bc1ff5f..a6df0694b 100644
--- a/tests/unit/core/routers/test_vector_io.py
+++ b/tests/unit/core/routers/test_vector_io.py
@@ -7,6 +7,8 @@
 from unittest.mock import AsyncMock, Mock
 
 import pytest
+
+from llama_stack.core.routers.vector_io import VectorIORouter
 from llama_stack_api import (
     ModelNotFoundError,
     ModelType,
@@ -14,8 +16,6 @@ from llama_stack_api import (
     OpenAICreateVectorStoreRequestWithExtraBody,
 )
 
-from llama_stack.core.routers.vector_io import VectorIORouter
-
 
 async def test_single_provider_auto_selection():
     # provider_id automatically selected during vector store create() when only one provider available
@@ -127,7 +127,8 @@ async def test_update_vector_store_same_provider_id_succeeds():
 
 
 async def test_create_vector_store_with_unknown_embedding_model_raises_error():
-    """Test that creating a vector store with an unknown embedding model raises ModelNotFoundError."""
+    """Test that creating a vector store with an unknown embedding model raises
+    FoundError."""
     mock_routing_table = Mock(impls_by_provider_id={"provider": "mock"})
     mock_routing_table.get_object_by_identifier = AsyncMock(return_value=None)
 
diff --git a/tests/unit/core/test_stack_validation.py b/tests/unit/core/test_stack_validation.py
index acb31e1c9..462a25c8b 100644
--- a/tests/unit/core/test_stack_validation.py
+++ b/tests/unit/core/test_stack_validation.py
@@ -9,10 +9,10 @@
 from unittest.mock import AsyncMock
 
 import pytest
-from llama_stack_api import Api, ListModelsResponse, ListShieldsResponse, Model, ModelType, Shield
 
 from llama_stack.core.datatypes import QualifiedModel, SafetyConfig, StackRunConfig, StorageConfig, VectorStoresConfig
 from llama_stack.core.stack import validate_safety_config, validate_vector_stores_config
+from llama_stack_api import Api, ListModelsResponse, ListShieldsResponse, Model, ModelType, Shield
 
 
 class TestVectorStoresValidation:
diff --git a/tests/unit/distribution/routers/test_routing_tables.py b/tests/unit/distribution/routers/test_routing_tables.py
index 2405d536e..8fd9d6ec3 100644
--- a/tests/unit/distribution/routers/test_routing_tables.py
+++ b/tests/unit/distribution/routers/test_routing_tables.py
@@ -9,6 +9,14 @@
 from unittest.mock import AsyncMock
 
 import pytest
+
+from llama_stack.core.datatypes import RegistryEntrySource
+from llama_stack.core.routing_tables.benchmarks import BenchmarksRoutingTable
+from llama_stack.core.routing_tables.datasets import DatasetsRoutingTable
+from llama_stack.core.routing_tables.models import ModelsRoutingTable
+from llama_stack.core.routing_tables.scoring_functions import ScoringFunctionsRoutingTable
+from llama_stack.core.routing_tables.shields import ShieldsRoutingTable
+from llama_stack.core.routing_tables.toolgroups import ToolGroupsRoutingTable
 from llama_stack_api import (
     URL,
     Api,
@@ -25,14 +33,6 @@ from llama_stack_api import (
     URIDataSource,
 )
 
-from llama_stack.core.datatypes import RegistryEntrySource
-from llama_stack.core.routing_tables.benchmarks import BenchmarksRoutingTable
-from llama_stack.core.routing_tables.datasets import DatasetsRoutingTable
-from llama_stack.core.routing_tables.models import ModelsRoutingTable
-from llama_stack.core.routing_tables.scoring_functions import ScoringFunctionsRoutingTable
-from llama_stack.core.routing_tables.shields import ShieldsRoutingTable
-from llama_stack.core.routing_tables.toolgroups import ToolGroupsRoutingTable
-
 
 class Impl:
     def __init__(self, api: Api):
diff --git a/tests/unit/distribution/test_api_recordings.py b/tests/unit/distribution/test_api_recordings.py
index f66b57df8..889f063e6 100644
--- a/tests/unit/distribution/test_api_recordings.py
+++ b/tests/unit/distribution/test_api_recordings.py
@@ -9,6 +9,14 @@ from pathlib import Path
 from unittest.mock import patch
 
 import pytest
+from openai import AsyncOpenAI
+
+from llama_stack.testing.api_recorder import (
+    APIRecordingMode,
+    ResponseStorage,
+    api_recording,
+    normalize_inference_request,
+)
 
 # Import the real Pydantic response types instead of using Mocks
 from llama_stack_api import (
@@ -19,14 +27,6 @@ from llama_stack_api import (
     OpenAIEmbeddingsResponse,
     OpenAIEmbeddingUsage,
 )
-from openai import AsyncOpenAI
-
-from llama_stack.testing.api_recorder import (
-    APIRecordingMode,
-    ResponseStorage,
-    api_recording,
-    normalize_inference_request,
-)
 
 
 @pytest.fixture
diff --git a/tests/unit/distribution/test_distribution.py b/tests/unit/distribution/test_distribution.py
index a27455e24..b8d6ba55d 100644
--- a/tests/unit/distribution/test_distribution.py
+++ b/tests/unit/distribution/test_distribution.py
@@ -9,7 +9,6 @@ from unittest.mock import patch
 
 import pytest
 import yaml
-from llama_stack_api import ProviderSpec
 from pydantic import BaseModel, Field, ValidationError
 
 from llama_stack.core.datatypes import Api, Provider, StackRunConfig
@@ -23,6 +22,7 @@ from llama_stack.core.storage.datatypes import (
     SqlStoreReference,
     StorageConfig,
 )
+from llama_stack_api import ProviderSpec
 
 
 class SampleConfig(BaseModel):
@@ -395,9 +395,8 @@ pip_packages:
 
     def test_external_provider_from_module_building(self, mock_providers):
         """Test loading an external provider from a module during build (building=True, partial spec)."""
-        from llama_stack_api import Api
-
         from llama_stack.core.datatypes import BuildConfig, BuildProvider, DistributionSpec
+        from llama_stack_api import Api
 
         # No importlib patch needed, should not import module when type of `config` is BuildConfig or DistributionSpec
         build_config = BuildConfig(
@@ -457,9 +456,8 @@ class TestGetExternalProvidersFromModule:
         """Test provider with module containing version spec (e.g., package==1.0.0)."""
         from types import SimpleNamespace
 
-        from llama_stack_api import ProviderSpec
-
         from llama_stack.core.distribution import get_external_providers_from_module
+        from llama_stack_api import ProviderSpec
 
         fake_spec = ProviderSpec(
             api=Api.inference,
@@ -595,9 +593,8 @@ class TestGetExternalProvidersFromModule:
         """Test when get_provider_spec returns a list of specs."""
         from types import SimpleNamespace
 
-        from llama_stack_api import ProviderSpec
-
         from llama_stack.core.distribution import get_external_providers_from_module
+        from llama_stack_api import ProviderSpec
 
         spec1 = ProviderSpec(
             api=Api.inference,
@@ -644,9 +641,8 @@ class TestGetExternalProvidersFromModule:
         """Test that list return filters specs by provider_type."""
         from types import SimpleNamespace
 
-        from llama_stack_api import ProviderSpec
-
         from llama_stack.core.distribution import get_external_providers_from_module
+        from llama_stack_api import ProviderSpec
 
         spec1 = ProviderSpec(
             api=Api.inference,
@@ -693,9 +689,8 @@ class TestGetExternalProvidersFromModule:
         """Test that list return adds multiple different provider_types when config requests them."""
         from types import SimpleNamespace
 
-        from llama_stack_api import ProviderSpec
-
         from llama_stack.core.distribution import get_external_providers_from_module
+        from llama_stack_api import ProviderSpec
 
         # Module returns both inline and remote variants
         spec1 = ProviderSpec(
@@ -833,9 +828,8 @@ class TestGetExternalProvidersFromModule:
         """Test multiple APIs with providers."""
         from types import SimpleNamespace
 
-        from llama_stack_api import ProviderSpec
-
         from llama_stack.core.distribution import get_external_providers_from_module
+        from llama_stack_api import ProviderSpec
 
         inference_spec = ProviderSpec(
             api=Api.inference,
diff --git a/tests/unit/files/test_files.py b/tests/unit/files/test_files.py
index 080d1ddbe..793f4edd3 100644
--- a/tests/unit/files/test_files.py
+++ b/tests/unit/files/test_files.py
@@ -6,7 +6,6 @@
 
 
 import pytest
-from llama_stack_api import OpenAIFilePurpose, Order, ResourceNotFoundError
 
 from llama_stack.core.access_control.access_control import default_policy
 from llama_stack.core.storage.datatypes import SqliteSqlStoreConfig, SqlStoreReference
@@ -15,6 +14,7 @@ from llama_stack.providers.inline.files.localfs import (
     LocalfsFilesImplConfig,
 )
 from llama_stack.providers.utils.sqlstore.sqlstore import register_sqlstore_backends
+from llama_stack_api import OpenAIFilePurpose, Order, ResourceNotFoundError
 
 
 class MockUploadFile:
diff --git a/tests/unit/providers/batches/test_reference.py b/tests/unit/providers/batches/test_reference.py
index 3c93a578d..32d59234d 100644
--- a/tests/unit/providers/batches/test_reference.py
+++ b/tests/unit/providers/batches/test_reference.py
@@ -58,6 +58,7 @@ import json
 from unittest.mock import AsyncMock, MagicMock
 
 import pytest
+
 from llama_stack_api import BatchObject, ConflictError, ResourceNotFoundError
 
 
diff --git a/tests/unit/providers/batches/test_reference_idempotency.py b/tests/unit/providers/batches/test_reference_idempotency.py
index 4cd5d962d..acb7ca01c 100644
--- a/tests/unit/providers/batches/test_reference_idempotency.py
+++ b/tests/unit/providers/batches/test_reference_idempotency.py
@@ -43,6 +43,7 @@ Key Behaviors Tested:
 import asyncio
 
 import pytest
+
 from llama_stack_api import ConflictError
 
 
diff --git a/tests/unit/providers/files/test_s3_files.py b/tests/unit/providers/files/test_s3_files.py
index ae63c1a78..de6c92e9c 100644
--- a/tests/unit/providers/files/test_s3_files.py
+++ b/tests/unit/providers/files/test_s3_files.py
@@ -8,6 +8,7 @@ from unittest.mock import patch
 
 import pytest
 from botocore.exceptions import ClientError
+
 from llama_stack_api import OpenAIFilePurpose, ResourceNotFoundError
 
 
diff --git a/tests/unit/providers/files/test_s3_files_auth.py b/tests/unit/providers/files/test_s3_files_auth.py
index 873db4e27..e113611bd 100644
--- a/tests/unit/providers/files/test_s3_files_auth.py
+++ b/tests/unit/providers/files/test_s3_files_auth.py
@@ -7,10 +7,10 @@
 from unittest.mock import patch
 
 import pytest
-from llama_stack_api import OpenAIFilePurpose, ResourceNotFoundError
 
 from llama_stack.core.datatypes import User
 from llama_stack.providers.remote.files.s3.files import S3FilesImpl
+from llama_stack_api import OpenAIFilePurpose, ResourceNotFoundError
 
 
 async def test_listing_hides_other_users_file(s3_provider, sample_text_file):
diff --git a/tests/unit/providers/inference/test_bedrock_adapter.py b/tests/unit/providers/inference/test_bedrock_adapter.py
index b3eecc558..a20f2860a 100644
--- a/tests/unit/providers/inference/test_bedrock_adapter.py
+++ b/tests/unit/providers/inference/test_bedrock_adapter.py
@@ -8,11 +8,11 @@ from types import SimpleNamespace
 from unittest.mock import AsyncMock, MagicMock
 
 import pytest
-from llama_stack_api import OpenAIChatCompletionRequestWithExtraBody
 from openai import AuthenticationError
 
 from llama_stack.providers.remote.inference.bedrock.bedrock import BedrockInferenceAdapter
 from llama_stack.providers.remote.inference.bedrock.config import BedrockConfig
+from llama_stack_api import OpenAIChatCompletionRequestWithExtraBody
 
 
 def test_adapter_initialization():
diff --git a/tests/unit/providers/inference/test_remote_vllm.py b/tests/unit/providers/inference/test_remote_vllm.py
index e2a5455b7..958895cc4 100644
--- a/tests/unit/providers/inference/test_remote_vllm.py
+++ b/tests/unit/providers/inference/test_remote_vllm.py
@@ -9,6 +9,11 @@ import time
 from unittest.mock import AsyncMock, MagicMock, PropertyMock, patch
 
 import pytest
+
+from llama_stack.core.routers.inference import InferenceRouter
+from llama_stack.core.routing_tables.models import ModelsRoutingTable
+from llama_stack.providers.remote.inference.vllm.config import VLLMInferenceAdapterConfig
+from llama_stack.providers.remote.inference.vllm.vllm import VLLMInferenceAdapter
 from llama_stack_api import (
     HealthStatus,
     Model,
@@ -22,11 +27,6 @@ from llama_stack_api import (
     ToolChoice,
 )
 
-from llama_stack.core.routers.inference import InferenceRouter
-from llama_stack.core.routing_tables.models import ModelsRoutingTable
-from llama_stack.providers.remote.inference.vllm.config import VLLMInferenceAdapterConfig
-from llama_stack.providers.remote.inference.vllm.vllm import VLLMInferenceAdapter
-
 # These are unit test for the remote vllm provider
 # implementation. This should only contain tests which are specific to
 # the implementation details of those classes. More general
diff --git a/tests/unit/providers/inline/agents/meta_reference/responses/test_streaming.py b/tests/unit/providers/inline/agents/meta_reference/responses/test_streaming.py
index 36d2b86a9..658132340 100644
--- a/tests/unit/providers/inline/agents/meta_reference/responses/test_streaming.py
+++ b/tests/unit/providers/inline/agents/meta_reference/responses/test_streaming.py
@@ -7,12 +7,12 @@
 from unittest.mock import AsyncMock
 
 import pytest
-from llama_stack_api import ToolDef
 
 from llama_stack.providers.inline.agents.meta_reference.responses.streaming import (
     convert_tooldef_to_chat_tool,
 )
 from llama_stack.providers.inline.agents.meta_reference.responses.types import ChatCompletionContext
+from llama_stack_api import ToolDef
 
 
 @pytest.fixture
diff --git a/tests/unit/providers/nvidia/test_datastore.py b/tests/unit/providers/nvidia/test_datastore.py
index 0d9f1cc35..36006cc39 100644
--- a/tests/unit/providers/nvidia/test_datastore.py
+++ b/tests/unit/providers/nvidia/test_datastore.py
@@ -8,10 +8,10 @@ import os
 from unittest.mock import patch
 
 import pytest
-from llama_stack_api import Dataset, DatasetPurpose, ResourceType, URIDataSource
 
 from llama_stack.providers.remote.datasetio.nvidia.config import NvidiaDatasetIOConfig
 from llama_stack.providers.remote.datasetio.nvidia.datasetio import NvidiaDatasetIOAdapter
+from llama_stack_api import Dataset, DatasetPurpose, ResourceType, URIDataSource
 
 
 @pytest.fixture
diff --git a/tests/unit/providers/nvidia/test_eval.py b/tests/unit/providers/nvidia/test_eval.py
index c41379801..783d664bf 100644
--- a/tests/unit/providers/nvidia/test_eval.py
+++ b/tests/unit/providers/nvidia/test_eval.py
@@ -8,6 +8,10 @@ import os
 from unittest.mock import MagicMock, patch
 
 import pytest
+
+from llama_stack.models.llama.sku_types import CoreModelId
+from llama_stack.providers.remote.eval.nvidia.config import NVIDIAEvalConfig
+from llama_stack.providers.remote.eval.nvidia.eval import NVIDIAEvalImpl
 from llama_stack_api import (
     Benchmark,
     BenchmarkConfig,
@@ -20,10 +24,6 @@ from llama_stack_api import (
     TopPSamplingStrategy,
 )
 
-from llama_stack.models.llama.sku_types import CoreModelId
-from llama_stack.providers.remote.eval.nvidia.config import NVIDIAEvalConfig
-from llama_stack.providers.remote.eval.nvidia.eval import NVIDIAEvalImpl
-
 MOCK_DATASET_ID = "default/test-dataset"
 MOCK_BENCHMARK_ID = "test-benchmark"
 
diff --git a/tests/unit/providers/nvidia/test_parameters.py b/tests/unit/providers/nvidia/test_parameters.py
index ba68a7abe..b714fc607 100644
--- a/tests/unit/providers/nvidia/test_parameters.py
+++ b/tests/unit/providers/nvidia/test_parameters.py
@@ -9,6 +9,12 @@ import warnings
 from unittest.mock import patch
 
 import pytest
+
+from llama_stack.core.library_client import convert_pydantic_to_json_value
+from llama_stack.providers.remote.post_training.nvidia.post_training import (
+    NvidiaPostTrainingAdapter,
+    NvidiaPostTrainingConfig,
+)
 from llama_stack_api import (
     DataConfig,
     DatasetFormat,
@@ -19,12 +25,6 @@ from llama_stack_api import (
     TrainingConfig,
 )
 
-from llama_stack.core.library_client import convert_pydantic_to_json_value
-from llama_stack.providers.remote.post_training.nvidia.post_training import (
-    NvidiaPostTrainingAdapter,
-    NvidiaPostTrainingConfig,
-)
-
 
 class TestNvidiaParameters:
     @pytest.fixture(autouse=True)
diff --git a/tests/unit/providers/nvidia/test_rerank_inference.py b/tests/unit/providers/nvidia/test_rerank_inference.py
index 8b313abcd..ee62910b8 100644
--- a/tests/unit/providers/nvidia/test_rerank_inference.py
+++ b/tests/unit/providers/nvidia/test_rerank_inference.py
@@ -8,11 +8,11 @@ from unittest.mock import AsyncMock, MagicMock, patch
 
 import aiohttp
 import pytest
-from llama_stack_api import ModelType
 
 from llama_stack.providers.remote.inference.nvidia.config import NVIDIAConfig
 from llama_stack.providers.remote.inference.nvidia.nvidia import NVIDIAInferenceAdapter
 from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin
+from llama_stack_api import ModelType
 
 
 class MockResponse:
diff --git a/tests/unit/providers/nvidia/test_safety.py b/tests/unit/providers/nvidia/test_safety.py
index ea6254841..07e04ddea 100644
--- a/tests/unit/providers/nvidia/test_safety.py
+++ b/tests/unit/providers/nvidia/test_safety.py
@@ -9,6 +9,9 @@ from typing import Any
 from unittest.mock import AsyncMock, MagicMock, patch
 
 import pytest
+
+from llama_stack.providers.remote.safety.nvidia.config import NVIDIASafetyConfig
+from llama_stack.providers.remote.safety.nvidia.nvidia import NVIDIASafetyAdapter
 from llama_stack_api import (
     OpenAIAssistantMessageParam,
     OpenAIUserMessageParam,
@@ -18,9 +21,6 @@ from llama_stack_api import (
     ViolationLevel,
 )
 
-from llama_stack.providers.remote.safety.nvidia.config import NVIDIASafetyConfig
-from llama_stack.providers.remote.safety.nvidia.nvidia import NVIDIASafetyAdapter
-
 
 class FakeNVIDIASafetyAdapter(NVIDIASafetyAdapter):
     """Test implementation that provides the required shield_store."""
diff --git a/tests/unit/providers/nvidia/test_supervised_fine_tuning.py b/tests/unit/providers/nvidia/test_supervised_fine_tuning.py
index 4d0ce695b..94948da41 100644
--- a/tests/unit/providers/nvidia/test_supervised_fine_tuning.py
+++ b/tests/unit/providers/nvidia/test_supervised_fine_tuning.py
@@ -9,15 +9,6 @@ import warnings
 from unittest.mock import patch
 
 import pytest
-from llama_stack_api import (
-    DataConfig,
-    DatasetFormat,
-    LoraFinetuningConfig,
-    OptimizerConfig,
-    OptimizerType,
-    QATFinetuningConfig,
-    TrainingConfig,
-)
 
 from llama_stack.core.library_client import convert_pydantic_to_json_value
 from llama_stack.providers.remote.post_training.nvidia.post_training import (
@@ -27,6 +18,15 @@ from llama_stack.providers.remote.post_training.nvidia.post_training import (
     NvidiaPostTrainingJob,
     NvidiaPostTrainingJobStatusResponse,
 )
+from llama_stack_api import (
+    DataConfig,
+    DatasetFormat,
+    LoraFinetuningConfig,
+    OptimizerConfig,
+    OptimizerType,
+    QATFinetuningConfig,
+    TrainingConfig,
+)
 
 
 @pytest.fixture
diff --git a/tests/unit/providers/test_bedrock.py b/tests/unit/providers/test_bedrock.py
index df7453712..7126e1b69 100644
--- a/tests/unit/providers/test_bedrock.py
+++ b/tests/unit/providers/test_bedrock.py
@@ -7,10 +7,9 @@
 from types import SimpleNamespace
 from unittest.mock import AsyncMock, PropertyMock, patch
 
-from llama_stack_api import OpenAIChatCompletionRequestWithExtraBody
-
 from llama_stack.providers.remote.inference.bedrock.bedrock import BedrockInferenceAdapter
 from llama_stack.providers.remote.inference.bedrock.config import BedrockConfig
+from llama_stack_api import OpenAIChatCompletionRequestWithExtraBody
 
 
 def test_can_create_adapter():
diff --git a/tests/unit/providers/utils/inference/test_openai_mixin.py b/tests/unit/providers/utils/inference/test_openai_mixin.py
index b9b59bb79..5b13a75f4 100644
--- a/tests/unit/providers/utils/inference/test_openai_mixin.py
+++ b/tests/unit/providers/utils/inference/test_openai_mixin.py
@@ -10,12 +10,12 @@ from typing import Any
 from unittest.mock import AsyncMock, MagicMock, Mock, PropertyMock, patch
 
 import pytest
-from llama_stack_api import Model, ModelType, OpenAIChatCompletionRequestWithExtraBody, OpenAIUserMessageParam
 from pydantic import BaseModel, Field
 
 from llama_stack.core.request_headers import request_provider_data_context
 from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig
 from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin
+from llama_stack_api import Model, ModelType, OpenAIChatCompletionRequestWithExtraBody, OpenAIUserMessageParam
 
 
 class OpenAIMixinImpl(OpenAIMixin):
diff --git a/tests/unit/providers/utils/inference/test_prompt_adapter.py b/tests/unit/providers/utils/inference/test_prompt_adapter.py
index a7c9289d7..ab5736ac5 100644
--- a/tests/unit/providers/utils/inference/test_prompt_adapter.py
+++ b/tests/unit/providers/utils/inference/test_prompt_adapter.py
@@ -4,12 +4,11 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-from llama_stack_api import OpenAIAssistantMessageParam, OpenAIUserMessageParam
-
 from llama_stack.models.llama.datatypes import RawTextItem
 from llama_stack.providers.utils.inference.prompt_adapter import (
     convert_openai_message_to_raw_message,
 )
+from llama_stack_api import OpenAIAssistantMessageParam, OpenAIUserMessageParam
 
 
 class TestConvertOpenAIMessageToRawMessage:
diff --git a/tests/unit/providers/utils/memory/test_vector_store.py b/tests/unit/providers/utils/memory/test_vector_store.py
index 00db5795a..f3241ba20 100644
--- a/tests/unit/providers/utils/memory/test_vector_store.py
+++ b/tests/unit/providers/utils/memory/test_vector_store.py
@@ -7,9 +7,9 @@
 from unittest.mock import AsyncMock, MagicMock, patch
 
 import pytest
-from llama_stack_api import URL, RAGDocument, TextContentItem
 
 from llama_stack.providers.utils.memory.vector_store import content_from_data_and_mime_type, content_from_doc
+from llama_stack_api import URL, RAGDocument, TextContentItem
 
 
 async def test_content_from_doc_with_url():
diff --git a/tests/unit/providers/utils/test_model_registry.py b/tests/unit/providers/utils/test_model_registry.py
index 4a85cf8b8..1e3efafa1 100644
--- a/tests/unit/providers/utils/test_model_registry.py
+++ b/tests/unit/providers/utils/test_model_registry.py
@@ -34,9 +34,9 @@
 #
 
 import pytest
-from llama_stack_api import Model
 
 from llama_stack.providers.utils.inference.model_registry import ModelRegistryHelper, ProviderModelEntry
+from llama_stack_api import Model
 
 
 @pytest.fixture
diff --git a/tests/unit/providers/vector_io/conftest.py b/tests/unit/providers/vector_io/conftest.py
index 216e9b8ea..6408e25ab 100644
--- a/tests/unit/providers/vector_io/conftest.py
+++ b/tests/unit/providers/vector_io/conftest.py
@@ -9,7 +9,6 @@ from unittest.mock import AsyncMock, MagicMock, patch
 
 import numpy as np
 import pytest
-from llama_stack_api import Chunk, ChunkMetadata, QueryChunksResponse, VectorStore
 
 from llama_stack.core.storage.datatypes import KVStoreReference, SqliteKVStoreConfig
 from llama_stack.providers.inline.vector_io.faiss.config import FaissVectorIOConfig
@@ -19,6 +18,7 @@ from llama_stack.providers.inline.vector_io.sqlite_vec.sqlite_vec import SQLiteV
 from llama_stack.providers.remote.vector_io.pgvector.config import PGVectorVectorIOConfig
 from llama_stack.providers.remote.vector_io.pgvector.pgvector import PGVectorIndex, PGVectorVectorIOAdapter
 from llama_stack.providers.utils.kvstore import register_kvstore_backends
+from llama_stack_api import Chunk, ChunkMetadata, QueryChunksResponse, VectorStore
 
 EMBEDDING_DIMENSION = 768
 COLLECTION_PREFIX = "test_collection"
diff --git a/tests/unit/providers/vector_io/test_faiss.py b/tests/unit/providers/vector_io/test_faiss.py
index 0d5c1399f..075296cbb 100644
--- a/tests/unit/providers/vector_io/test_faiss.py
+++ b/tests/unit/providers/vector_io/test_faiss.py
@@ -9,13 +9,13 @@ from unittest.mock import MagicMock, patch
 
 import numpy as np
 import pytest
-from llama_stack_api import Chunk, Files, HealthStatus, QueryChunksResponse, VectorStore
 
 from llama_stack.providers.inline.vector_io.faiss.config import FaissVectorIOConfig
 from llama_stack.providers.inline.vector_io.faiss.faiss import (
     FaissIndex,
     FaissVectorIOAdapter,
 )
+from llama_stack_api import Chunk, Files, HealthStatus, QueryChunksResponse, VectorStore
 
 # This test is a unit test for the FaissVectorIOAdapter class. This should only contain
 # tests which are specific to this class. More general (API-level) tests should be placed in
diff --git a/tests/unit/providers/vector_io/test_sqlite_vec.py b/tests/unit/providers/vector_io/test_sqlite_vec.py
index 17a99ce1c..d1548cf37 100644
--- a/tests/unit/providers/vector_io/test_sqlite_vec.py
+++ b/tests/unit/providers/vector_io/test_sqlite_vec.py
@@ -8,13 +8,13 @@ import asyncio
 
 import numpy as np
 import pytest
-from llama_stack_api import Chunk, QueryChunksResponse
 
 from llama_stack.providers.inline.vector_io.sqlite_vec.sqlite_vec import (
     SQLiteVecIndex,
     SQLiteVecVectorIOAdapter,
     _create_sqlite_connection,
 )
+from llama_stack_api import Chunk, QueryChunksResponse
 
 # This test is a unit test for the SQLiteVecVectorIOAdapter class. This should only contain
 # tests which are specific to this class. More general (API-level) tests should be placed in
diff --git a/tests/unit/providers/vector_io/test_vector_io_openai_vector_stores.py b/tests/unit/providers/vector_io/test_vector_io_openai_vector_stores.py
index 7ba40eefb..3797abb2c 100644
--- a/tests/unit/providers/vector_io/test_vector_io_openai_vector_stores.py
+++ b/tests/unit/providers/vector_io/test_vector_io_openai_vector_stores.py
@@ -10,6 +10,8 @@ from unittest.mock import AsyncMock, patch
 
 import numpy as np
 import pytest
+
+from llama_stack.providers.inline.vector_io.sqlite_vec.sqlite_vec import VECTOR_DBS_PREFIX
 from llama_stack_api import (
     Chunk,
     OpenAICreateVectorStoreFileBatchRequestWithExtraBody,
@@ -21,8 +23,6 @@ from llama_stack_api import (
     VectorStoreNotFoundError,
 )
 
-from llama_stack.providers.inline.vector_io.sqlite_vec.sqlite_vec import VECTOR_DBS_PREFIX
-
 # This test is a unit test for the inline VectorIO providers. This should only contain
 # tests which are specific to this class. More general (API-level) tests should be placed in
 # tests/integration/vector_io/
@@ -255,10 +255,9 @@ async def test_insert_chunks_with_missing_document_id(vector_io_adapter):
 
 async def test_document_id_with_invalid_type_raises_error():
     """Ensure TypeError is raised when document_id is not a string."""
-    from llama_stack_api import Chunk
-
     # Integer document_id should raise TypeError
     from llama_stack.providers.utils.vector_io.vector_utils import generate_chunk_id
+    from llama_stack_api import Chunk
 
     chunk = Chunk(content="test", chunk_id=generate_chunk_id("test", "test"), metadata={"document_id": 12345})
     with pytest.raises(TypeError) as exc_info:
diff --git a/tests/unit/providers/vector_io/test_vector_utils.py b/tests/unit/providers/vector_io/test_vector_utils.py
index 678b76fbd..7f6b4af79 100644
--- a/tests/unit/providers/vector_io/test_vector_utils.py
+++ b/tests/unit/providers/vector_io/test_vector_utils.py
@@ -4,9 +4,8 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-from llama_stack_api import Chunk, ChunkMetadata
-
 from llama_stack.providers.utils.vector_io.vector_utils import generate_chunk_id
+from llama_stack_api import Chunk, ChunkMetadata
 
 # This test is a unit test for the chunk_utils.py helpers. This should only contain
 # tests which are specific to this file. More general (API-level) tests should be placed in
diff --git a/tests/unit/rag/test_rag_query.py b/tests/unit/rag/test_rag_query.py
index e3f5e46d7..7eb17b74b 100644
--- a/tests/unit/rag/test_rag_query.py
+++ b/tests/unit/rag/test_rag_query.py
@@ -7,9 +7,9 @@
 from unittest.mock import AsyncMock, MagicMock
 
 import pytest
-from llama_stack_api import Chunk, ChunkMetadata, QueryChunksResponse, RAGQueryConfig
 
 from llama_stack.providers.inline.tool_runtime.rag.memory import MemoryToolRuntimeImpl
+from llama_stack_api import Chunk, ChunkMetadata, QueryChunksResponse, RAGQueryConfig
 
 
 class TestRagQuery:
diff --git a/tests/unit/rag/test_vector_store.py b/tests/unit/rag/test_vector_store.py
index 23c12dcab..2562df8d6 100644
--- a/tests/unit/rag/test_vector_store.py
+++ b/tests/unit/rag/test_vector_store.py
@@ -12,7 +12,6 @@ from unittest.mock import AsyncMock, MagicMock
 
 import numpy as np
 import pytest
-from llama_stack_api import Chunk, OpenAIEmbeddingData, OpenAIEmbeddingsRequestWithExtraBody, RAGDocument
 
 from llama_stack.providers.utils.memory.vector_store import (
     URL,
@@ -22,6 +21,7 @@ from llama_stack.providers.utils.memory.vector_store import (
     make_overlapped_chunks,
 )
 from llama_stack.providers.utils.vector_io.vector_utils import generate_chunk_id
+from llama_stack_api import Chunk, OpenAIEmbeddingData, OpenAIEmbeddingsRequestWithExtraBody, RAGDocument
 
 DUMMY_PDF_PATH = Path(os.path.abspath(__file__)).parent / "fixtures" / "dummy.pdf"
 # Depending on the machine, this can get parsed a couple of ways
diff --git a/tests/unit/registry/test_registry.py b/tests/unit/registry/test_registry.py
index 01f486ab2..1b5032782 100644
--- a/tests/unit/registry/test_registry.py
+++ b/tests/unit/registry/test_registry.py
@@ -6,7 +6,6 @@
 
 
 import pytest
-from llama_stack_api import Model, VectorStore
 
 from llama_stack.core.datatypes import VectorStoreWithOwner
 from llama_stack.core.storage.datatypes import KVStoreReference, SqliteKVStoreConfig
@@ -16,6 +15,7 @@ from llama_stack.core.store.registry import (
     DiskDistributionRegistry,
 )
 from llama_stack.providers.utils.kvstore import kvstore_impl, register_kvstore_backends
+from llama_stack_api import Model, VectorStore
 
 
 @pytest.fixture
@@ -303,9 +303,8 @@ async def test_double_registration_different_objects(disk_dist_registry):
 
 async def test_double_registration_with_cache(cached_disk_dist_registry):
     """Test double registration behavior with caching enabled."""
-    from llama_stack_api import ModelType
-
     from llama_stack.core.datatypes import ModelWithOwner
+    from llama_stack_api import ModelType
 
     model1 = ModelWithOwner(
         identifier="test_model",
diff --git a/tests/unit/registry/test_registry_acl.py b/tests/unit/registry/test_registry_acl.py
index 2827f60b9..a09d2a30d 100644
--- a/tests/unit/registry/test_registry_acl.py
+++ b/tests/unit/registry/test_registry_acl.py
@@ -5,10 +5,9 @@
 # the root directory of this source tree.
 
 
-from llama_stack_api import ModelType
-
 from llama_stack.core.datatypes import ModelWithOwner, User
 from llama_stack.core.store.registry import CachedDiskDistributionRegistry
+from llama_stack_api import ModelType
 
 
 async def test_registry_cache_with_acl(cached_disk_dist_registry):
diff --git a/tests/unit/server/test_access_control.py b/tests/unit/server/test_access_control.py
index 1df933d4d..23a9636d5 100644
--- a/tests/unit/server/test_access_control.py
+++ b/tests/unit/server/test_access_control.py
@@ -8,12 +8,12 @@ from unittest.mock import MagicMock, Mock, patch
 
 import pytest
 import yaml
-from llama_stack_api import Api, ModelType
 from pydantic import TypeAdapter, ValidationError
 
 from llama_stack.core.access_control.access_control import AccessDeniedError, is_action_allowed
 from llama_stack.core.datatypes import AccessRule, ModelWithOwner, User
 from llama_stack.core.routing_tables.models import ModelsRoutingTable
+from llama_stack_api import Api, ModelType
 
 
 class AsyncMock(MagicMock):
diff --git a/tests/unit/server/test_resolver.py b/tests/unit/server/test_resolver.py
index 071178f96..8f8a61ea7 100644
--- a/tests/unit/server/test_resolver.py
+++ b/tests/unit/server/test_resolver.py
@@ -9,7 +9,6 @@ import sys
 from typing import Any, Protocol
 from unittest.mock import AsyncMock, MagicMock
 
-from llama_stack_api import Inference, InlineProviderSpec, ProviderSpec
 from pydantic import BaseModel, Field
 
 from llama_stack.core.datatypes import Api, Provider, StackRunConfig
@@ -27,6 +26,7 @@ from llama_stack.core.storage.datatypes import (
 )
 from llama_stack.providers.utils.kvstore import register_kvstore_backends
 from llama_stack.providers.utils.sqlstore.sqlstore import register_sqlstore_backends
+from llama_stack_api import Inference, InlineProviderSpec, ProviderSpec
 
 
 def add_protocol_methods(cls: type, protocol: type[Protocol]) -> None:
diff --git a/tests/unit/server/test_sse.py b/tests/unit/server/test_sse.py
index fdaf9022b..d82743c80 100644
--- a/tests/unit/server/test_sse.py
+++ b/tests/unit/server/test_sse.py
@@ -9,9 +9,9 @@ import logging  # allow-direct-logging
 from unittest.mock import AsyncMock, MagicMock
 
 import pytest
-from llama_stack_api import PaginatedResponse
 
 from llama_stack.core.server.server import create_dynamic_typed_route, create_sse_event, sse_generator
+from llama_stack_api import PaginatedResponse
 
 
 @pytest.fixture
diff --git a/tests/unit/tools/test_tools_json_schema.py b/tests/unit/tools/test_tools_json_schema.py
index 79e0b6e28..623955984 100644
--- a/tests/unit/tools/test_tools_json_schema.py
+++ b/tests/unit/tools/test_tools_json_schema.py
@@ -9,10 +9,10 @@ Unit tests for JSON Schema-based tool definitions.
 Tests the new input_schema and output_schema fields.
 """
 
-from llama_stack_api import ToolDef
 from pydantic import ValidationError
 
 from llama_stack.models.llama.datatypes import BuiltinTool, ToolDefinition
+from llama_stack_api import ToolDef
 
 
 class TestToolDefValidation:
diff --git a/tests/unit/utils/inference/test_inference_store.py b/tests/unit/utils/inference/test_inference_store.py
index 4da20b125..bdcc529ce 100644
--- a/tests/unit/utils/inference/test_inference_store.py
+++ b/tests/unit/utils/inference/test_inference_store.py
@@ -7,6 +7,10 @@
 import time
 
 import pytest
+
+from llama_stack.core.storage.datatypes import InferenceStoreReference, SqliteSqlStoreConfig
+from llama_stack.providers.utils.inference.inference_store import InferenceStore
+from llama_stack.providers.utils.sqlstore.sqlstore import register_sqlstore_backends
 from llama_stack_api import (
     OpenAIAssistantMessageParam,
     OpenAIChatCompletion,
@@ -15,10 +19,6 @@ from llama_stack_api import (
     Order,
 )
 
-from llama_stack.core.storage.datatypes import InferenceStoreReference, SqliteSqlStoreConfig
-from llama_stack.providers.utils.inference.inference_store import InferenceStore
-from llama_stack.providers.utils.sqlstore.sqlstore import register_sqlstore_backends
-
 
 @pytest.fixture(autouse=True)
 def setup_backends(tmp_path):
diff --git a/tests/unit/utils/responses/test_responses_store.py b/tests/unit/utils/responses/test_responses_store.py
index 1119a93d8..8c108d9c1 100644
--- a/tests/unit/utils/responses/test_responses_store.py
+++ b/tests/unit/utils/responses/test_responses_store.py
@@ -9,11 +9,11 @@ from tempfile import TemporaryDirectory
 from uuid import uuid4
 
 import pytest
-from llama_stack_api import OpenAIMessageParam, OpenAIResponseInput, OpenAIResponseObject, OpenAIUserMessageParam, Order
 
 from llama_stack.core.storage.datatypes import ResponsesStoreReference, SqliteSqlStoreConfig
 from llama_stack.providers.utils.responses.responses_store import ResponsesStore
 from llama_stack.providers.utils.sqlstore.sqlstore import register_sqlstore_backends
+from llama_stack_api import OpenAIMessageParam, OpenAIResponseInput, OpenAIResponseObject, OpenAIUserMessageParam, Order
 
 
 def build_store(db_path: str, policy: list | None = None) -> ResponsesStore:
diff --git a/uv.lock b/uv.lock
index ddf8c1cd4..0b8b555f6 100644
--- a/uv.lock
+++ b/uv.lock
@@ -1,5 +1,5 @@
 version = 1
-revision = 2
+revision = 3
 requires-python = ">=3.12"
 resolution-markers = [
     "(python_full_version >= '3.13' and platform_machine != 'aarch64' and sys_platform == 'linux') or (python_full_version >= '3.13' and sys_platform != 'darwin' and sys_platform != 'linux')",
@@ -2095,7 +2095,7 @@ requires-dist = [
     { name = "httpx" },
     { name = "jinja2", specifier = ">=3.1.6" },
     { name = "jsonschema" },
-    { name = "llama-stack-api", editable = "src/llama-stack-api" },
+    { name = "llama-stack-api", editable = "src/llama_stack_api" },
     { name = "llama-stack-client", marker = "extra == 'client'", specifier = ">=0.3.0" },
     { name = "openai", specifier = ">=2.5.0" },
     { name = "opentelemetry-exporter-otlp-proto-http", specifier = ">=1.30.0" },
@@ -2230,8 +2230,8 @@ unit = [
 
 [[package]]
 name = "llama-stack-api"
-version = "0.1.0"
-source = { editable = "src/llama-stack-api" }
+version = "0.4.0.dev0"
+source = { editable = "src/llama_stack_api" }
 dependencies = [
     { name = "jsonschema" },
     { name = "opentelemetry-exporter-otlp-proto-http" },

From dc49ad3f890f7091033ff83581d02c6209b2acf3 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?S=C3=A9bastien=20Han?= <seb@redhat.com>
Date: Fri, 14 Nov 2025 17:47:37 +0100
Subject: [PATCH 11/12] chore: bump starlette version (#4158)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

# What does this PR do?

Require at least 0.49.1 which fixes a security vulnerability in the
parsing logic of the Range header in FileResponse. Release note:
https://github.com/Kludex/starlette/releases/tag/0.49.1

Signed-off-by: Sébastien Han <seb@redhat.com>
---
 pyproject.toml |  1 +
 uv.lock        | 25 ++++++++++++++++++-------
 2 files changed, 19 insertions(+), 7 deletions(-)

diff --git a/pyproject.toml b/pyproject.toml
index 34728d6ea..f6d28fd03 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -50,6 +50,7 @@ dependencies = [
     "aiosqlite>=0.21.0",                              # server - for metadata store
     "asyncpg",                                        # for metadata store
     "sqlalchemy[asyncio]>=2.0.41",                    # server - for conversations
+    "starlette>=0.49.1",
 ]
 
 [project.optional-dependencies]
diff --git a/uv.lock b/uv.lock
index 0b8b555f6..8f45f0564 100644
--- a/uv.lock
+++ b/uv.lock
@@ -1,5 +1,5 @@
 version = 1
-revision = 3
+revision = 2
 requires-python = ">=3.12"
 resolution-markers = [
     "(python_full_version >= '3.13' and platform_machine != 'aarch64' and sys_platform == 'linux') or (python_full_version >= '3.13' and sys_platform != 'darwin' and sys_platform != 'linux')",
@@ -139,6 +139,15 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/aa/f3/0b6ced594e51cc95d8c1fc1640d3623770d01e4969d29c0bd09945fafefa/altair-5.5.0-py3-none-any.whl", hash = "sha256:91a310b926508d560fe0148d02a194f38b824122641ef528113d029fcd129f8c", size = 731200, upload-time = "2024-11-23T23:39:56.4Z" },
 ]
 
+[[package]]
+name = "annotated-doc"
+version = "0.0.4"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/57/ba/046ceea27344560984e26a590f90bc7f4a75b06701f653222458922b558c/annotated_doc-0.0.4.tar.gz", hash = "sha256:fbcda96e87e9c92ad167c2e53839e57503ecfda18804ea28102353485033faa4", size = 7288, upload-time = "2025-11-10T22:07:42.062Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/1e/d3/26bf1008eb3d2daa8ef4cacc7f3bfdc11818d111f7e2d0201bc6e3b49d45/annotated_doc-0.0.4-py3-none-any.whl", hash = "sha256:571ac1dc6991c450b25a9c2d84a3705e2ae7a53467b5d111c24fa8baabbed320", size = 5303, upload-time = "2025-11-10T22:07:40.673Z" },
+]
+
 [[package]]
 name = "annotated-types"
 version = "0.7.0"
@@ -1037,16 +1046,17 @@ wheels = [
 
 [[package]]
 name = "fastapi"
-version = "0.119.0"
+version = "0.121.2"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
+    { name = "annotated-doc" },
     { name = "pydantic" },
     { name = "starlette" },
     { name = "typing-extensions" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/0a/f9/5c5bcce82a7997cc0eb8c47b7800f862f6b56adc40486ed246e5010d443b/fastapi-0.119.0.tar.gz", hash = "sha256:451082403a2c1f0b99c6bd57c09110ed5463856804c8078d38e5a1f1035dbbb7", size = 336756, upload-time = "2025-10-11T17:13:40.53Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/fb/48/f08f264da34cf160db82c62ffb335e838b1fc16cbcc905f474c7d4c815db/fastapi-0.121.2.tar.gz", hash = "sha256:ca8e932b2b823ec1721c641e3669472c855ad9564a2854c9899d904c2848b8b9", size = 342944, upload-time = "2025-11-13T17:05:54.692Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/ce/70/584c4d7cad80f5e833715c0a29962d7c93b4d18eed522a02981a6d1b6ee5/fastapi-0.119.0-py3-none-any.whl", hash = "sha256:90a2e49ed19515320abb864df570dd766be0662c5d577688f1600170f7f73cf2", size = 107095, upload-time = "2025-10-11T17:13:39.048Z" },
+    { url = "https://files.pythonhosted.org/packages/eb/23/dfb161e91db7c92727db505dc72a384ee79681fe0603f706f9f9f52c2901/fastapi-0.121.2-py3-none-any.whl", hash = "sha256:f2d80b49a86a846b70cc3a03eb5ea6ad2939298bf6a7fe377aa9cd3dd079d358", size = 109201, upload-time = "2025-11-13T17:05:52.718Z" },
 ]
 
 [[package]]
@@ -2110,6 +2120,7 @@ requires-dist = [
     { name = "rich" },
     { name = "sqlalchemy", extras = ["asyncio"], specifier = ">=2.0.41" },
     { name = "starlette" },
+    { name = "starlette", specifier = ">=0.49.1" },
     { name = "termcolor" },
     { name = "tiktoken" },
     { name = "uvicorn", specifier = ">=0.34.0" },
@@ -5060,15 +5071,15 @@ wheels = [
 
 [[package]]
 name = "starlette"
-version = "0.47.2"
+version = "0.49.3"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "anyio" },
     { name = "typing-extensions", marker = "python_full_version < '3.13'" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/04/57/d062573f391d062710d4088fa1369428c38d51460ab6fedff920efef932e/starlette-0.47.2.tar.gz", hash = "sha256:6ae9aa5db235e4846decc1e7b79c4f346adf41e9777aebeb49dfd09bbd7023d8", size = 2583948, upload-time = "2025-07-20T17:31:58.522Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/de/1a/608df0b10b53b0beb96a37854ee05864d182ddd4b1156a22f1ad3860425a/starlette-0.49.3.tar.gz", hash = "sha256:1c14546f299b5901a1ea0e34410575bc33bbd741377a10484a54445588d00284", size = 2655031, upload-time = "2025-11-01T15:12:26.13Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/f7/1f/b876b1f83aef204198a42dc101613fefccb32258e5428b5f9259677864b4/starlette-0.47.2-py3-none-any.whl", hash = "sha256:c5847e96134e5c5371ee9fac6fdf1a67336d5815e09eb2a01fdb57a351ef915b", size = 72984, upload-time = "2025-07-20T17:31:56.738Z" },
+    { url = "https://files.pythonhosted.org/packages/a3/e0/021c772d6a662f43b63044ab481dc6ac7592447605b5b35a957785363122/starlette-0.49.3-py3-none-any.whl", hash = "sha256:b579b99715fdc2980cf88c8ec96d3bf1ce16f5a8051a7c2b84ef9b1cdecaea2f", size = 74340, upload-time = "2025-11-01T15:12:24.387Z" },
 ]
 
 [[package]]

From eb545034ab2a7d4273fdf54e841b69cb33e45d6f Mon Sep 17 00:00:00 2001
From: Omar Abdelwahab <omaryashraf10@gmail.com>
Date: Fri, 14 Nov 2025 08:54:42 -0800
Subject: [PATCH 12/12] fix: MCP authorization parameter implementation (#4052)

# What does this PR do?
Adding a user-facing `authorization ` parameter to MCP tool definitions
that allows users to explicitly configure credentials per MCP server,
addressing GitHub Issue #4034 in a secure manner.


## Test Plan
tests/integration/responses/test_mcp_authentication.py

---------

Co-authored-by: Omar Abdelwahab <omara@fb.com>
Co-authored-by: Ashwin Bharambe <ashwin.bharambe@gmail.com>
---
 client-sdks/stainless/openapi.yml             |  15 +
 docs/static/llama-stack-spec.yaml             |  15 +
 docs/static/stainless-llama-stack-spec.yaml   |  15 +
 src/llama_stack/core/routers/tool_runtime.py  |   8 +-
 .../core/routing_tables/toolgroups.py         |  12 +-
 .../responses/openai_responses.py             |  13 +
 .../meta_reference/responses/streaming.py     |   4 +-
 .../meta_reference/responses/tool_executor.py |   4 +-
 .../inline/tool_runtime/rag/memory.py         |   9 +-
 .../tool_runtime/bing_search/bing_search.py   |   9 +-
 .../tool_runtime/brave_search/brave_search.py |   9 +-
 .../model_context_protocol/config.py          |  10 +-
 .../model_context_protocol.py                 |  72 +-
 .../tavily_search/tavily_search.py            |   9 +-
 .../wolfram_alpha/wolfram_alpha.py            |   9 +-
 src/llama_stack/providers/utils/tools/mcp.py  |  84 ++-
 src/llama_stack/testing/api_recorder.py       |  12 +-
 src/llama_stack_api/openai_responses.py       |   2 +
 src/llama_stack_api/tools.py                  |  14 +-
 .../inference/test_tools_with_schemas.py      |   9 +-
 ...30294237eb43063c00efc83b8a1202c1cc20c.json | 614 ++++++++++++++++++
 ...4866a73cc04ce93db40346beb070f30fafee1.json | 614 ++++++++++++++++++
 ...d0532e8f5b9418b22e5f874afff695601da16.json | 574 ++++++++++++++++
 ...7dc01025aeb2ee6203ef478133313e0a0e250.json | 614 ++++++++++++++++++
 ...b37c6ec15eb17dfaa95f015dcc6f65fa10c94.json | 574 ++++++++++++++++
 ...ea14cd2869c77972c33e66d9b42438e2165cd.json | 574 ++++++++++++++++
 ...b610b38555bb86f93c507ede8752af47cda6a.json | 574 ++++++++++++++++
 ...9b84bf814950e3c8f11eed7ed9f11d4462237.json | 614 ++++++++++++++++++
 .../responses/test_conversation_responses.py  |   1 +
 .../responses/test_mcp_authentication.py      | 105 +++
 .../responses/test_tool_responses.py          |   2 +-
 tests/integration/tool_runtime/test_mcp.py    |  10 +-
 .../tool_runtime/test_mcp_json_schema.py      |  61 +-
 .../routers/test_routing_tables.py            |   2 +-
 34 files changed, 5205 insertions(+), 62 deletions(-)
 create mode 100644 tests/integration/responses/recordings/51e3ddbc9d23c614ead9a8fd6ad30294237eb43063c00efc83b8a1202c1cc20c.json
 create mode 100644 tests/integration/responses/recordings/5236eb1d546e5a1bd0712891d8b4866a73cc04ce93db40346beb070f30fafee1.json
 create mode 100644 tests/integration/responses/recordings/56ddb450d81590f461113ec5a55d0532e8f5b9418b22e5f874afff695601da16.json
 create mode 100644 tests/integration/responses/recordings/59faeeca84b137e9b2c7d310ea47dc01025aeb2ee6203ef478133313e0a0e250.json
 create mode 100644 tests/integration/responses/recordings/775a161a318a252454fd44f9850b37c6ec15eb17dfaa95f015dcc6f65fa10c94.json
 create mode 100644 tests/integration/responses/recordings/c84e894f47a6d7f4d4556829d24ea14cd2869c77972c33e66d9b42438e2165cd.json
 create mode 100644 tests/integration/responses/recordings/c9c723cd01233311d9033f55d6db610b38555bb86f93c507ede8752af47cda6a.json
 create mode 100644 tests/integration/responses/recordings/db81127157a8364ce8f7a81e10d9b84bf814950e3c8f11eed7ed9f11d4462237.json
 create mode 100644 tests/integration/responses/test_mcp_authentication.py

diff --git a/client-sdks/stainless/openapi.yml b/client-sdks/stainless/openapi.yml
index 65a255c17..d0813de4d 100644
--- a/client-sdks/stainless/openapi.yml
+++ b/client-sdks/stainless/openapi.yml
@@ -2054,6 +2054,13 @@ paths:
           required: false
           schema:
             $ref: '#/components/schemas/URL'
+        - name: authorization
+          in: query
+          description: >-
+            (Optional) OAuth access token for authenticating with the MCP server.
+          required: false
+          schema:
+            type: string
       deprecated: false
   /v1/toolgroups:
     get:
@@ -7123,6 +7130,10 @@ components:
               - type: object
           description: >-
             (Optional) HTTP headers to include when connecting to the server
+        authorization:
+          type: string
+          description: >-
+            (Optional) OAuth access token for authenticating with the MCP server
         require_approval:
           oneOf:
             - type: string
@@ -9307,6 +9318,10 @@ components:
               - type: object
           description: >-
             A dictionary of arguments to pass to the tool.
+        authorization:
+          type: string
+          description: >-
+            (Optional) OAuth access token for authenticating with the MCP server.
       additionalProperties: false
       required:
         - tool_name
diff --git a/docs/static/llama-stack-spec.yaml b/docs/static/llama-stack-spec.yaml
index 66eda78c7..759c7501a 100644
--- a/docs/static/llama-stack-spec.yaml
+++ b/docs/static/llama-stack-spec.yaml
@@ -1878,6 +1878,13 @@ paths:
           required: false
           schema:
             $ref: '#/components/schemas/URL'
+        - name: authorization
+          in: query
+          description: >-
+            (Optional) OAuth access token for authenticating with the MCP server.
+          required: false
+          schema:
+            type: string
       deprecated: false
   /v1/toolgroups:
     get:
@@ -6182,6 +6189,10 @@ components:
               - type: object
           description: >-
             (Optional) HTTP headers to include when connecting to the server
+        authorization:
+          type: string
+          description: >-
+            (Optional) OAuth access token for authenticating with the MCP server
         require_approval:
           oneOf:
             - type: string
@@ -8366,6 +8377,10 @@ components:
               - type: object
           description: >-
             A dictionary of arguments to pass to the tool.
+        authorization:
+          type: string
+          description: >-
+            (Optional) OAuth access token for authenticating with the MCP server.
       additionalProperties: false
       required:
         - tool_name
diff --git a/docs/static/stainless-llama-stack-spec.yaml b/docs/static/stainless-llama-stack-spec.yaml
index 65a255c17..d0813de4d 100644
--- a/docs/static/stainless-llama-stack-spec.yaml
+++ b/docs/static/stainless-llama-stack-spec.yaml
@@ -2054,6 +2054,13 @@ paths:
           required: false
           schema:
             $ref: '#/components/schemas/URL'
+        - name: authorization
+          in: query
+          description: >-
+            (Optional) OAuth access token for authenticating with the MCP server.
+          required: false
+          schema:
+            type: string
       deprecated: false
   /v1/toolgroups:
     get:
@@ -7123,6 +7130,10 @@ components:
               - type: object
           description: >-
             (Optional) HTTP headers to include when connecting to the server
+        authorization:
+          type: string
+          description: >-
+            (Optional) OAuth access token for authenticating with the MCP server
         require_approval:
           oneOf:
             - type: string
@@ -9307,6 +9318,10 @@ components:
               - type: object
           description: >-
             A dictionary of arguments to pass to the tool.
+        authorization:
+          type: string
+          description: >-
+            (Optional) OAuth access token for authenticating with the MCP server.
       additionalProperties: false
       required:
         - tool_name
diff --git a/src/llama_stack/core/routers/tool_runtime.py b/src/llama_stack/core/routers/tool_runtime.py
index eccc05732..b387cb657 100644
--- a/src/llama_stack/core/routers/tool_runtime.py
+++ b/src/llama_stack/core/routers/tool_runtime.py
@@ -34,16 +34,16 @@ class ToolRuntimeRouter(ToolRuntime):
         logger.debug("ToolRuntimeRouter.shutdown")
         pass
 
-    async def invoke_tool(self, tool_name: str, kwargs: dict[str, Any]) -> Any:
+    async def invoke_tool(self, tool_name: str, kwargs: dict[str, Any], authorization: str | None = None) -> Any:
         logger.debug(f"ToolRuntimeRouter.invoke_tool: {tool_name}")
         provider = await self.routing_table.get_provider_impl(tool_name)
         return await provider.invoke_tool(
             tool_name=tool_name,
             kwargs=kwargs,
+            authorization=authorization,
         )
 
     async def list_runtime_tools(
-        self, tool_group_id: str | None = None, mcp_endpoint: URL | None = None
+        self, tool_group_id: str | None = None, mcp_endpoint: URL | None = None, authorization: str | None = None
     ) -> ListToolDefsResponse:
-        logger.debug(f"ToolRuntimeRouter.list_runtime_tools: {tool_group_id}")
-        return await self.routing_table.list_tools(tool_group_id)
+        return await self.routing_table.list_tools(tool_group_id, authorization=authorization)
diff --git a/src/llama_stack/core/routing_tables/toolgroups.py b/src/llama_stack/core/routing_tables/toolgroups.py
index 7e2068608..8676ce35e 100644
--- a/src/llama_stack/core/routing_tables/toolgroups.py
+++ b/src/llama_stack/core/routing_tables/toolgroups.py
@@ -49,7 +49,9 @@ class ToolGroupsRoutingTable(CommonRoutingTableImpl, ToolGroups):
             routing_key = self.tool_to_toolgroup[routing_key]
         return await super().get_provider_impl(routing_key, provider_id)
 
-    async def list_tools(self, toolgroup_id: str | None = None) -> ListToolDefsResponse:
+    async def list_tools(
+        self, toolgroup_id: str | None = None, authorization: str | None = None
+    ) -> ListToolDefsResponse:
         if toolgroup_id:
             if group_id := parse_toolgroup_from_toolgroup_name_pair(toolgroup_id):
                 toolgroup_id = group_id
@@ -61,7 +63,7 @@ class ToolGroupsRoutingTable(CommonRoutingTableImpl, ToolGroups):
         for toolgroup in toolgroups:
             if toolgroup.identifier not in self.toolgroups_to_tools:
                 try:
-                    await self._index_tools(toolgroup)
+                    await self._index_tools(toolgroup, authorization=authorization)
                 except AuthenticationRequiredError:
                     # Send authentication errors back to the client so it knows
                     # that it needs to supply credentials for remote MCP servers.
@@ -76,9 +78,11 @@ class ToolGroupsRoutingTable(CommonRoutingTableImpl, ToolGroups):
 
         return ListToolDefsResponse(data=all_tools)
 
-    async def _index_tools(self, toolgroup: ToolGroup):
+    async def _index_tools(self, toolgroup: ToolGroup, authorization: str | None = None):
         provider_impl = await super().get_provider_impl(toolgroup.identifier, toolgroup.provider_id)
-        tooldefs_response = await provider_impl.list_runtime_tools(toolgroup.identifier, toolgroup.mcp_endpoint)
+        tooldefs_response = await provider_impl.list_runtime_tools(
+            toolgroup.identifier, toolgroup.mcp_endpoint, authorization=authorization
+        )
 
         tooldefs = tooldefs_response.data
         for t in tooldefs:
diff --git a/src/llama_stack/providers/inline/agents/meta_reference/responses/openai_responses.py b/src/llama_stack/providers/inline/agents/meta_reference/responses/openai_responses.py
index 3f88b1562..cb0fe284e 100644
--- a/src/llama_stack/providers/inline/agents/meta_reference/responses/openai_responses.py
+++ b/src/llama_stack/providers/inline/agents/meta_reference/responses/openai_responses.py
@@ -257,6 +257,19 @@ class OpenAIResponsesImpl:
         stream = bool(stream)
         text = OpenAIResponseText(format=OpenAIResponseTextFormat(type="text")) if text is None else text
 
+        # Validate MCP tools: ensure Authorization header is not passed via headers dict
+        if tools:
+            from llama_stack_api.openai_responses import OpenAIResponseInputToolMCP
+
+            for tool in tools:
+                if isinstance(tool, OpenAIResponseInputToolMCP) and tool.headers:
+                    for key in tool.headers.keys():
+                        if key.lower() == "authorization":
+                            raise ValueError(
+                                "Authorization header cannot be passed via 'headers'. "
+                                "Please use the 'authorization' parameter instead."
+                            )
+
         guardrail_ids = extract_guardrail_ids(guardrails) if guardrails else []
 
         if conversation is not None:
diff --git a/src/llama_stack/providers/inline/agents/meta_reference/responses/streaming.py b/src/llama_stack/providers/inline/agents/meta_reference/responses/streaming.py
index ea4486b62..c0b62958f 100644
--- a/src/llama_stack/providers/inline/agents/meta_reference/responses/streaming.py
+++ b/src/llama_stack/providers/inline/agents/meta_reference/responses/streaming.py
@@ -1091,10 +1091,12 @@ class StreamingResponseOrchestrator:
                 "server_url": mcp_tool.server_url,
                 "mcp_list_tools_id": list_id,
             }
+            # List MCP tools with authorization from tool config
             async with tracing.span("list_mcp_tools", attributes):
                 tool_defs = await list_mcp_tools(
                     endpoint=mcp_tool.server_url,
-                    headers=mcp_tool.headers or {},
+                    headers=mcp_tool.headers,
+                    authorization=mcp_tool.authorization,
                 )
 
             # Create the MCP list tools message
diff --git a/src/llama_stack/providers/inline/agents/meta_reference/responses/tool_executor.py b/src/llama_stack/providers/inline/agents/meta_reference/responses/tool_executor.py
index 616ec2477..4f294a979 100644
--- a/src/llama_stack/providers/inline/agents/meta_reference/responses/tool_executor.py
+++ b/src/llama_stack/providers/inline/agents/meta_reference/responses/tool_executor.py
@@ -296,12 +296,14 @@ class ToolExecutor:
                     "server_url": mcp_tool.server_url,
                     "tool_name": function_name,
                 }
+                # Invoke MCP tool with authorization from tool config
                 async with tracing.span("invoke_mcp_tool", attributes):
                     result = await invoke_mcp_tool(
                         endpoint=mcp_tool.server_url,
-                        headers=mcp_tool.headers or {},
                         tool_name=function_name,
                         kwargs=tool_kwargs,
+                        headers=mcp_tool.headers,
+                        authorization=mcp_tool.authorization,
                     )
             elif function_name == "knowledge_search":
                 response_file_search_tool = (
diff --git a/src/llama_stack/providers/inline/tool_runtime/rag/memory.py b/src/llama_stack/providers/inline/tool_runtime/rag/memory.py
index 895d219bb..afb54a8a9 100644
--- a/src/llama_stack/providers/inline/tool_runtime/rag/memory.py
+++ b/src/llama_stack/providers/inline/tool_runtime/rag/memory.py
@@ -276,7 +276,10 @@ class MemoryToolRuntimeImpl(ToolGroupsProtocolPrivate, ToolRuntime):
         )
 
     async def list_runtime_tools(
-        self, tool_group_id: str | None = None, mcp_endpoint: URL | None = None
+        self,
+        tool_group_id: str | None = None,
+        mcp_endpoint: URL | None = None,
+        authorization: str | None = None,
     ) -> ListToolDefsResponse:
         # Parameters are not listed since these methods are not yet invoked automatically
         # by the LLM. The method is only implemented so things like /tools can list without
@@ -304,7 +307,9 @@ class MemoryToolRuntimeImpl(ToolGroupsProtocolPrivate, ToolRuntime):
             ]
         )
 
-    async def invoke_tool(self, tool_name: str, kwargs: dict[str, Any]) -> ToolInvocationResult:
+    async def invoke_tool(
+        self, tool_name: str, kwargs: dict[str, Any], authorization: str | None = None
+    ) -> ToolInvocationResult:
         vector_store_ids = kwargs.get("vector_store_ids", [])
         query_config = kwargs.get("query_config")
         if query_config:
diff --git a/src/llama_stack/providers/remote/tool_runtime/bing_search/bing_search.py b/src/llama_stack/providers/remote/tool_runtime/bing_search/bing_search.py
index a5a53a9eb..77c5a3bf7 100644
--- a/src/llama_stack/providers/remote/tool_runtime/bing_search/bing_search.py
+++ b/src/llama_stack/providers/remote/tool_runtime/bing_search/bing_search.py
@@ -49,7 +49,10 @@ class BingSearchToolRuntimeImpl(ToolGroupsProtocolPrivate, ToolRuntime, NeedsReq
         return provider_data.bing_search_api_key
 
     async def list_runtime_tools(
-        self, tool_group_id: str | None = None, mcp_endpoint: URL | None = None
+        self,
+        tool_group_id: str | None = None,
+        mcp_endpoint: URL | None = None,
+        authorization: str | None = None,
     ) -> ListToolDefsResponse:
         return ListToolDefsResponse(
             data=[
@@ -70,7 +73,9 @@ class BingSearchToolRuntimeImpl(ToolGroupsProtocolPrivate, ToolRuntime, NeedsReq
             ]
         )
 
-    async def invoke_tool(self, tool_name: str, kwargs: dict[str, Any]) -> ToolInvocationResult:
+    async def invoke_tool(
+        self, tool_name: str, kwargs: dict[str, Any], authorization: str | None = None
+    ) -> ToolInvocationResult:
         api_key = self._get_api_key()
         headers = {
             "Ocp-Apim-Subscription-Key": api_key,
diff --git a/src/llama_stack/providers/remote/tool_runtime/brave_search/brave_search.py b/src/llama_stack/providers/remote/tool_runtime/brave_search/brave_search.py
index 4888730e4..1f49671cf 100644
--- a/src/llama_stack/providers/remote/tool_runtime/brave_search/brave_search.py
+++ b/src/llama_stack/providers/remote/tool_runtime/brave_search/brave_search.py
@@ -48,7 +48,10 @@ class BraveSearchToolRuntimeImpl(ToolGroupsProtocolPrivate, ToolRuntime, NeedsRe
         return provider_data.brave_search_api_key
 
     async def list_runtime_tools(
-        self, tool_group_id: str | None = None, mcp_endpoint: URL | None = None
+        self,
+        tool_group_id: str | None = None,
+        mcp_endpoint: URL | None = None,
+        authorization: str | None = None,
     ) -> ListToolDefsResponse:
         return ListToolDefsResponse(
             data=[
@@ -70,7 +73,9 @@ class BraveSearchToolRuntimeImpl(ToolGroupsProtocolPrivate, ToolRuntime, NeedsRe
             ]
         )
 
-    async def invoke_tool(self, tool_name: str, kwargs: dict[str, Any]) -> ToolInvocationResult:
+    async def invoke_tool(
+        self, tool_name: str, kwargs: dict[str, Any], authorization: str | None = None
+    ) -> ToolInvocationResult:
         api_key = self._get_api_key()
         url = "https://api.search.brave.com/res/v1/web/search"
         headers = {
diff --git a/src/llama_stack/providers/remote/tool_runtime/model_context_protocol/config.py b/src/llama_stack/providers/remote/tool_runtime/model_context_protocol/config.py
index b8c5e77fd..9acabfc34 100644
--- a/src/llama_stack/providers/remote/tool_runtime/model_context_protocol/config.py
+++ b/src/llama_stack/providers/remote/tool_runtime/model_context_protocol/config.py
@@ -10,8 +10,14 @@ from pydantic import BaseModel
 
 
 class MCPProviderDataValidator(BaseModel):
-    # mcp_endpoint => dict of headers to send
-    mcp_headers: dict[str, dict[str, str]] | None = None
+    """
+    Validator for MCP provider-specific data passed via request headers.
+
+    Phase 1: Support old header-based authentication for backward compatibility.
+    In Phase 2, this will be deprecated in favor of the authorization parameter.
+    """
+
+    mcp_headers: dict[str, dict[str, str]] | None = None  # Map of URI -> headers dict
 
 
 class MCPProviderConfig(BaseModel):
diff --git a/src/llama_stack/providers/remote/tool_runtime/model_context_protocol/model_context_protocol.py b/src/llama_stack/providers/remote/tool_runtime/model_context_protocol/model_context_protocol.py
index 544597a51..649bddecb 100644
--- a/src/llama_stack/providers/remote/tool_runtime/model_context_protocol/model_context_protocol.py
+++ b/src/llama_stack/providers/remote/tool_runtime/model_context_protocol/model_context_protocol.py
@@ -39,15 +39,29 @@ class ModelContextProtocolToolRuntimeImpl(ToolGroupsProtocolPrivate, ToolRuntime
         return
 
     async def list_runtime_tools(
-        self, tool_group_id: str | None = None, mcp_endpoint: URL | None = None
+        self,
+        tool_group_id: str | None = None,
+        mcp_endpoint: URL | None = None,
+        authorization: str | None = None,
     ) -> ListToolDefsResponse:
         # this endpoint should be retrieved by getting the tool group right?
         if mcp_endpoint is None:
             raise ValueError("mcp_endpoint is required")
-        headers = await self.get_headers_from_request(mcp_endpoint.uri)
-        return await list_mcp_tools(mcp_endpoint.uri, headers)
 
-    async def invoke_tool(self, tool_name: str, kwargs: dict[str, Any]) -> ToolInvocationResult:
+        # Phase 1: Support both old header-based auth AND new authorization parameter
+        # Get headers and auth from provider data (old approach)
+        provider_headers, provider_auth = await self.get_headers_from_request(mcp_endpoint.uri)
+
+        # New authorization parameter takes precedence over provider data
+        final_authorization = authorization or provider_auth
+
+        return await list_mcp_tools(
+            endpoint=mcp_endpoint.uri, headers=provider_headers, authorization=final_authorization
+        )
+
+    async def invoke_tool(
+        self, tool_name: str, kwargs: dict[str, Any], authorization: str | None = None
+    ) -> ToolInvocationResult:
         tool = await self.tool_store.get_tool(tool_name)
         if tool.metadata is None or tool.metadata.get("endpoint") is None:
             raise ValueError(f"Tool {tool_name} does not have metadata")
@@ -55,19 +69,57 @@ class ModelContextProtocolToolRuntimeImpl(ToolGroupsProtocolPrivate, ToolRuntime
         if urlparse(endpoint).scheme not in ("http", "https"):
             raise ValueError(f"Endpoint {endpoint} is not a valid HTTP(S) URL")
 
-        headers = await self.get_headers_from_request(endpoint)
-        return await invoke_mcp_tool(endpoint, headers, tool_name, kwargs)
+        # Phase 1: Support both old header-based auth AND new authorization parameter
+        # Get headers and auth from provider data (old approach)
+        provider_headers, provider_auth = await self.get_headers_from_request(endpoint)
+
+        # New authorization parameter takes precedence over provider data
+        final_authorization = authorization or provider_auth
+
+        return await invoke_mcp_tool(
+            endpoint=endpoint,
+            tool_name=tool_name,
+            kwargs=kwargs,
+            headers=provider_headers,
+            authorization=final_authorization,
+        )
+
+    async def get_headers_from_request(self, mcp_endpoint_uri: str) -> tuple[dict[str, str], str | None]:
+        """
+        Extract headers and authorization from request provider data (Phase 1 backward compatibility).
+
+        Phase 1: Temporarily allows Authorization to be passed via mcp_headers for backward compatibility.
+        Phase 2: Will enforce that Authorization should use the dedicated authorization parameter instead.
+
+        Returns:
+            Tuple of (headers_dict, authorization_token)
+            - headers_dict: All headers except Authorization
+            - authorization_token: Token from Authorization header (with "Bearer " prefix removed), or None
+        """
 
-    async def get_headers_from_request(self, mcp_endpoint_uri: str) -> dict[str, str]:
         def canonicalize_uri(uri: str) -> str:
             return f"{urlparse(uri).netloc or ''}/{urlparse(uri).path or ''}"
 
         headers = {}
+        authorization = None
 
         provider_data = self.get_request_provider_data()
-        if provider_data and provider_data.mcp_headers:
+        if provider_data and hasattr(provider_data, "mcp_headers") and provider_data.mcp_headers:
             for uri, values in provider_data.mcp_headers.items():
                 if canonicalize_uri(uri) != canonicalize_uri(mcp_endpoint_uri):
                     continue
-                headers.update(values)
-        return headers
+
+                # Phase 1: Extract Authorization from mcp_headers for backward compatibility
+                # (Phase 2 will reject this and require the dedicated authorization parameter)
+                for key in values.keys():
+                    if key.lower() == "authorization":
+                        # Extract authorization token and strip "Bearer " prefix if present
+                        auth_value = values[key]
+                        if auth_value.startswith("Bearer "):
+                            authorization = auth_value[7:]  # Remove "Bearer " prefix
+                        else:
+                            authorization = auth_value
+                    else:
+                        headers[key] = values[key]
+
+        return headers, authorization
diff --git a/src/llama_stack/providers/remote/tool_runtime/tavily_search/tavily_search.py b/src/llama_stack/providers/remote/tool_runtime/tavily_search/tavily_search.py
index d86cf5d8e..e12b41885 100644
--- a/src/llama_stack/providers/remote/tool_runtime/tavily_search/tavily_search.py
+++ b/src/llama_stack/providers/remote/tool_runtime/tavily_search/tavily_search.py
@@ -48,7 +48,10 @@ class TavilySearchToolRuntimeImpl(ToolGroupsProtocolPrivate, ToolRuntime, NeedsR
         return provider_data.tavily_search_api_key
 
     async def list_runtime_tools(
-        self, tool_group_id: str | None = None, mcp_endpoint: URL | None = None
+        self,
+        tool_group_id: str | None = None,
+        mcp_endpoint: URL | None = None,
+        authorization: str | None = None,
     ) -> ListToolDefsResponse:
         return ListToolDefsResponse(
             data=[
@@ -69,7 +72,9 @@ class TavilySearchToolRuntimeImpl(ToolGroupsProtocolPrivate, ToolRuntime, NeedsR
             ]
         )
 
-    async def invoke_tool(self, tool_name: str, kwargs: dict[str, Any]) -> ToolInvocationResult:
+    async def invoke_tool(
+        self, tool_name: str, kwargs: dict[str, Any], authorization: str | None = None
+    ) -> ToolInvocationResult:
         api_key = self._get_api_key()
         async with httpx.AsyncClient() as client:
             response = await client.post(
diff --git a/src/llama_stack/providers/remote/tool_runtime/wolfram_alpha/wolfram_alpha.py b/src/llama_stack/providers/remote/tool_runtime/wolfram_alpha/wolfram_alpha.py
index f8d806a5c..68f0ebaef 100644
--- a/src/llama_stack/providers/remote/tool_runtime/wolfram_alpha/wolfram_alpha.py
+++ b/src/llama_stack/providers/remote/tool_runtime/wolfram_alpha/wolfram_alpha.py
@@ -49,7 +49,10 @@ class WolframAlphaToolRuntimeImpl(ToolGroupsProtocolPrivate, ToolRuntime, NeedsR
         return provider_data.wolfram_alpha_api_key
 
     async def list_runtime_tools(
-        self, tool_group_id: str | None = None, mcp_endpoint: URL | None = None
+        self,
+        tool_group_id: str | None = None,
+        mcp_endpoint: URL | None = None,
+        authorization: str | None = None,
     ) -> ListToolDefsResponse:
         return ListToolDefsResponse(
             data=[
@@ -70,7 +73,9 @@ class WolframAlphaToolRuntimeImpl(ToolGroupsProtocolPrivate, ToolRuntime, NeedsR
             ]
         )
 
-    async def invoke_tool(self, tool_name: str, kwargs: dict[str, Any]) -> ToolInvocationResult:
+    async def invoke_tool(
+        self, tool_name: str, kwargs: dict[str, Any], authorization: str | None = None
+    ) -> ToolInvocationResult:
         api_key = self._get_api_key()
         params = {
             "input": kwargs["query"],
diff --git a/src/llama_stack/providers/utils/tools/mcp.py b/src/llama_stack/providers/utils/tools/mcp.py
index fad1bf0f0..9c5e9cd96 100644
--- a/src/llama_stack/providers/utils/tools/mcp.py
+++ b/src/llama_stack/providers/utils/tools/mcp.py
@@ -30,6 +30,40 @@ from llama_stack_api import (
 
 logger = get_logger(__name__, category="tools")
 
+
+def prepare_mcp_headers(base_headers: dict[str, str] | None, authorization: str | None) -> dict[str, str]:
+    """
+    Prepare headers for MCP requests with authorization support.
+
+    Args:
+        base_headers: Base headers dictionary (can be None)
+        authorization: OAuth access token (without "Bearer " prefix)
+
+    Returns:
+        Headers dictionary with Authorization header if token provided
+
+    Raises:
+        ValueError: If Authorization header is specified in the headers dict (security risk)
+    """
+    headers = dict(base_headers or {})
+
+    # Security check: reject any Authorization header in the headers dict
+    # Users must use the authorization parameter instead to avoid security risks
+    existing_keys_lower = {k.lower() for k in headers.keys()}
+    if "authorization" in existing_keys_lower:
+        raise ValueError(
+            "For security reasons, Authorization header cannot be passed via 'headers'. "
+            "Please use the 'authorization' parameter instead."
+        )
+
+    # Add Authorization header if token provided
+    if authorization:
+        # OAuth access token - add "Bearer " prefix
+        headers["Authorization"] = f"Bearer {authorization}"
+
+    return headers
+
+
 protocol_cache = TTLDict(ttl_seconds=3600)
 
 
@@ -112,9 +146,29 @@ async def client_wrapper(endpoint: str, headers: dict[str, str]) -> AsyncGenerat
                 raise
 
 
-async def list_mcp_tools(endpoint: str, headers: dict[str, str]) -> ListToolDefsResponse:
+async def list_mcp_tools(
+    endpoint: str,
+    headers: dict[str, str] | None = None,
+    authorization: str | None = None,
+) -> ListToolDefsResponse:
+    """List tools available from an MCP server.
+
+    Args:
+        endpoint: MCP server endpoint URL
+        headers: Optional base headers to include
+        authorization: Optional OAuth access token (just the token, not "Bearer <token>")
+
+    Returns:
+        List of tool definitions from the MCP server
+
+    Raises:
+        ValueError: If Authorization is found in the headers parameter
+    """
+    # Prepare headers with authorization handling
+    final_headers = prepare_mcp_headers(headers, authorization)
+
     tools = []
-    async with client_wrapper(endpoint, headers) as session:
+    async with client_wrapper(endpoint, final_headers) as session:
         tools_result = await session.list_tools()
         for tool in tools_result.tools:
             tools.append(
@@ -132,9 +186,31 @@ async def list_mcp_tools(endpoint: str, headers: dict[str, str]) -> ListToolDefs
 
 
 async def invoke_mcp_tool(
-    endpoint: str, headers: dict[str, str], tool_name: str, kwargs: dict[str, Any]
+    endpoint: str,
+    tool_name: str,
+    kwargs: dict[str, Any],
+    headers: dict[str, str] | None = None,
+    authorization: str | None = None,
 ) -> ToolInvocationResult:
-    async with client_wrapper(endpoint, headers) as session:
+    """Invoke an MCP tool with the given arguments.
+
+    Args:
+        endpoint: MCP server endpoint URL
+        tool_name: Name of the tool to invoke
+        kwargs: Tool invocation arguments
+        headers: Optional base headers to include
+        authorization: Optional OAuth access token (just the token, not "Bearer <token>")
+
+    Returns:
+        Tool invocation result with content and error information
+
+    Raises:
+        ValueError: If Authorization header is found in the headers parameter
+    """
+    # Prepare headers with authorization handling
+    final_headers = prepare_mcp_headers(headers, authorization)
+
+    async with client_wrapper(endpoint, final_headers) as session:
         result = await session.call_tool(tool_name, kwargs)
 
         content: list[InterleavedContentItem] = []
diff --git a/src/llama_stack/testing/api_recorder.py b/src/llama_stack/testing/api_recorder.py
index f46f07458..a7ad582f3 100644
--- a/src/llama_stack/testing/api_recorder.py
+++ b/src/llama_stack/testing/api_recorder.py
@@ -609,14 +609,14 @@ def _combine_model_list_responses(endpoint: str, records: list[dict[str, Any]])
 
 
 async def _patched_tool_invoke_method(
-    original_method, provider_name: str, self, tool_name: str, kwargs: dict[str, Any]
+    original_method, provider_name: str, self, tool_name: str, kwargs: dict[str, Any], authorization: str | None = None
 ):
     """Patched version of tool runtime invoke_tool method for recording/replay."""
     global _current_mode, _current_storage
 
     if _current_mode == APIRecordingMode.LIVE or _current_storage is None:
         # Normal operation
-        return await original_method(self, tool_name, kwargs)
+        return await original_method(self, tool_name, kwargs, authorization=authorization)
 
     request_hash = normalize_tool_request(provider_name, tool_name, kwargs)
 
@@ -634,7 +634,7 @@ async def _patched_tool_invoke_method(
 
     if _current_mode in (APIRecordingMode.RECORD, APIRecordingMode.RECORD_IF_MISSING):
         # Make the tool call and record it
-        result = await original_method(self, tool_name, kwargs)
+        result = await original_method(self, tool_name, kwargs, authorization=authorization)
 
         request_data = {
             "test_id": get_test_context(),
@@ -885,9 +885,11 @@ def patch_inference_clients():
     OllamaAsyncClient.list = patched_ollama_list
 
     # Create patched methods for tool runtimes
-    async def patched_tavily_invoke_tool(self, tool_name: str, kwargs: dict[str, Any]):
+    async def patched_tavily_invoke_tool(
+        self, tool_name: str, kwargs: dict[str, Any], authorization: str | None = None
+    ):
         return await _patched_tool_invoke_method(
-            _original_methods["tavily_invoke_tool"], "tavily", self, tool_name, kwargs
+            _original_methods["tavily_invoke_tool"], "tavily", self, tool_name, kwargs, authorization=authorization
         )
 
     # Apply tool runtime patches
diff --git a/src/llama_stack_api/openai_responses.py b/src/llama_stack_api/openai_responses.py
index 70139a98a..2dd73e90a 100644
--- a/src/llama_stack_api/openai_responses.py
+++ b/src/llama_stack_api/openai_responses.py
@@ -490,6 +490,7 @@ class OpenAIResponseInputToolMCP(BaseModel):
     :param server_label: Label to identify this MCP server
     :param server_url: URL endpoint of the MCP server
     :param headers: (Optional) HTTP headers to include when connecting to the server
+    :param authorization: (Optional) OAuth access token for authenticating with the MCP server
     :param require_approval: Approval requirement for tool calls ("always", "never", or filter)
     :param allowed_tools: (Optional) Restriction on which tools can be used from this server
     """
@@ -498,6 +499,7 @@ class OpenAIResponseInputToolMCP(BaseModel):
     server_label: str
     server_url: str
     headers: dict[str, Any] | None = None
+    authorization: str | None = Field(default=None, exclude=True)
 
     require_approval: Literal["always"] | Literal["never"] | ApprovalFilter = "never"
     allowed_tools: list[str] | AllowedToolsFilter | None = None
diff --git a/src/llama_stack_api/tools.py b/src/llama_stack_api/tools.py
index 6571c2047..81c989f88 100644
--- a/src/llama_stack_api/tools.py
+++ b/src/llama_stack_api/tools.py
@@ -196,22 +196,32 @@ class ToolRuntime(Protocol):
     # TODO: This needs to be renamed once OPEN API generator name conflict issue is fixed.
     @webmethod(route="/tool-runtime/list-tools", method="GET", level=LLAMA_STACK_API_V1)
     async def list_runtime_tools(
-        self, tool_group_id: str | None = None, mcp_endpoint: URL | None = None
+        self,
+        tool_group_id: str | None = None,
+        mcp_endpoint: URL | None = None,
+        authorization: str | None = None,
     ) -> ListToolDefsResponse:
         """List all tools in the runtime.
 
         :param tool_group_id: The ID of the tool group to list tools for.
         :param mcp_endpoint: The MCP endpoint to use for the tool group.
+        :param authorization: (Optional) OAuth access token for authenticating with the MCP server.
         :returns: A ListToolDefsResponse.
         """
         ...
 
     @webmethod(route="/tool-runtime/invoke", method="POST", level=LLAMA_STACK_API_V1)
-    async def invoke_tool(self, tool_name: str, kwargs: dict[str, Any]) -> ToolInvocationResult:
+    async def invoke_tool(
+        self,
+        tool_name: str,
+        kwargs: dict[str, Any],
+        authorization: str | None = None,
+    ) -> ToolInvocationResult:
         """Run a tool with the given arguments.
 
         :param tool_name: The name of the tool to invoke.
         :param kwargs: A dictionary of arguments to pass to the tool.
+        :param authorization: (Optional) OAuth access token for authenticating with the MCP server.
         :returns: A ToolInvocationResult.
         """
         ...
diff --git a/tests/integration/inference/test_tools_with_schemas.py b/tests/integration/inference/test_tools_with_schemas.py
index f30e9ece5..5b6e69ae3 100644
--- a/tests/integration/inference/test_tools_with_schemas.py
+++ b/tests/integration/inference/test_tools_with_schemas.py
@@ -193,7 +193,14 @@ class TestMCPToolsInChatCompletion:
             mcp_endpoint=dict(uri=uri),
         )
 
-        provider_data = {"mcp_headers": {uri: {"Authorization": f"Bearer {AUTH_TOKEN}"}}}
+        # Use old header-based approach for Phase 1 (backward compatibility)
+        provider_data = {
+            "mcp_headers": {
+                uri: {
+                    "Authorization": f"Bearer {AUTH_TOKEN}",
+                },
+            },
+        }
         auth_headers = {
             "X-LlamaStack-Provider-Data": json.dumps(provider_data),
         }
diff --git a/tests/integration/responses/recordings/51e3ddbc9d23c614ead9a8fd6ad30294237eb43063c00efc83b8a1202c1cc20c.json b/tests/integration/responses/recordings/51e3ddbc9d23c614ead9a8fd6ad30294237eb43063c00efc83b8a1202c1cc20c.json
new file mode 100644
index 000000000..464de788f
--- /dev/null
+++ b/tests/integration/responses/recordings/51e3ddbc9d23c614ead9a8fd6ad30294237eb43063c00efc83b8a1202c1cc20c.json
@@ -0,0 +1,614 @@
+{
+  "test_id": "tests/integration/responses/test_mcp_authentication.py::test_mcp_authorization_backward_compatibility[openai_client-txt=openai/gpt-4o]",
+  "request": {
+    "method": "POST",
+    "url": "https://api.openai.com/v1/v1/chat/completions",
+    "headers": {},
+    "body": {
+      "model": "gpt-4o",
+      "messages": [
+        {
+          "role": "user",
+          "content": "What is the boiling point of myawesomeliquid?"
+        },
+        {
+          "role": "assistant",
+          "content": "",
+          "tool_calls": [
+            {
+              "index": 0,
+              "id": "call_UeAsx9M8mAXo1F1LZj6TsEV9",
+              "type": "function",
+              "function": {
+                "name": "get_boiling_point",
+                "arguments": "{\"liquid_name\":\"myawesomeliquid\"}"
+              }
+            }
+          ]
+        },
+        {
+          "role": "tool",
+          "tool_call_id": "call_UeAsx9M8mAXo1F1LZj6TsEV9",
+          "content": [
+            {
+              "type": "text",
+              "text": "-100"
+            }
+          ]
+        }
+      ],
+      "stream": true,
+      "stream_options": {
+        "include_usage": true
+      },
+      "tools": [
+        {
+          "type": "function",
+          "function": {
+            "name": "greet_everyone",
+            "parameters": {
+              "properties": {
+                "url": {
+                  "title": "Url",
+                  "type": "string"
+                }
+              },
+              "required": [
+                "url"
+              ],
+              "title": "greet_everyoneArguments",
+              "type": "object"
+            }
+          }
+        },
+        {
+          "type": "function",
+          "function": {
+            "name": "get_boiling_point",
+            "description": "\n        Returns the boiling point of a liquid in Celsius or Fahrenheit.\n\n        :param liquid_name: The name of the liquid\n        :param celsius: Whether to return the boiling point in Celsius\n        :return: The boiling point of the liquid in Celcius or Fahrenheit\n        ",
+            "parameters": {
+              "properties": {
+                "liquid_name": {
+                  "title": "Liquid Name",
+                  "type": "string"
+                },
+                "celsius": {
+                  "default": true,
+                  "title": "Celsius",
+                  "type": "boolean"
+                }
+              },
+              "required": [
+                "liquid_name"
+              ],
+              "title": "get_boiling_pointArguments",
+              "type": "object"
+            }
+          }
+        }
+      ]
+    },
+    "endpoint": "/v1/chat/completions",
+    "model": "gpt-4o"
+  },
+  "response": {
+    "body": [
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-51e3ddbc9d23",
+          "choices": [
+            {
+              "delta": {
+                "content": "",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "c5g42LQpiBwmVH"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-51e3ddbc9d23",
+          "choices": [
+            {
+              "delta": {
+                "content": "The",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "MEmQFjCKEsNDL"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-51e3ddbc9d23",
+          "choices": [
+            {
+              "delta": {
+                "content": " boiling",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "dF3UemYO"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-51e3ddbc9d23",
+          "choices": [
+            {
+              "delta": {
+                "content": " point",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "ENDOmjG37D"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-51e3ddbc9d23",
+          "choices": [
+            {
+              "delta": {
+                "content": " of",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "6kb5u2d4ILV59"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-51e3ddbc9d23",
+          "choices": [
+            {
+              "delta": {
+                "content": " \"",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "Y6Dp6rbT9OdBG"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-51e3ddbc9d23",
+          "choices": [
+            {
+              "delta": {
+                "content": "my",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "EN0ShAkdxF2jIs"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-51e3ddbc9d23",
+          "choices": [
+            {
+              "delta": {
+                "content": "aw",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "1NHavCOT2fSI63"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-51e3ddbc9d23",
+          "choices": [
+            {
+              "delta": {
+                "content": "esom",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "VTwbnRFtKY2W"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-51e3ddbc9d23",
+          "choices": [
+            {
+              "delta": {
+                "content": "eli",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "VJuNhLeGK43e6"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-51e3ddbc9d23",
+          "choices": [
+            {
+              "delta": {
+                "content": "quid",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "bFgxcYCjU42I"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-51e3ddbc9d23",
+          "choices": [
+            {
+              "delta": {
+                "content": "\"",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "5KR4mGTP0Rpu0O"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-51e3ddbc9d23",
+          "choices": [
+            {
+              "delta": {
+                "content": " is",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "KCeY3i4Qo9L1j"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-51e3ddbc9d23",
+          "choices": [
+            {
+              "delta": {
+                "content": " -",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "GgtT2kqCUk8jGH"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-51e3ddbc9d23",
+          "choices": [
+            {
+              "delta": {
+                "content": "100",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "H3E18AkuuATh3"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-51e3ddbc9d23",
+          "choices": [
+            {
+              "delta": {
+                "content": "\u00b0C",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "5kuUoomGw6aPf0"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-51e3ddbc9d23",
+          "choices": [
+            {
+              "delta": {
+                "content": ".",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "CKIiDxWMV3zzcNj"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-51e3ddbc9d23",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": "stop",
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "9KZoS4rawE"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-51e3ddbc9d23",
+          "choices": [],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": {
+            "completion_tokens": 17,
+            "prompt_tokens": 188,
+            "total_tokens": 205,
+            "completion_tokens_details": {
+              "accepted_prediction_tokens": 0,
+              "audio_tokens": 0,
+              "reasoning_tokens": 0,
+              "rejected_prediction_tokens": 0
+            },
+            "prompt_tokens_details": {
+              "audio_tokens": 0,
+              "cached_tokens": 0
+            }
+          },
+          "obfuscation": "iq2ecCxqopvPO"
+        }
+      }
+    ],
+    "is_streaming": true
+  },
+  "id_normalization_mapping": {}
+}
diff --git a/tests/integration/responses/recordings/5236eb1d546e5a1bd0712891d8b4866a73cc04ce93db40346beb070f30fafee1.json b/tests/integration/responses/recordings/5236eb1d546e5a1bd0712891d8b4866a73cc04ce93db40346beb070f30fafee1.json
new file mode 100644
index 000000000..66c87e3bb
--- /dev/null
+++ b/tests/integration/responses/recordings/5236eb1d546e5a1bd0712891d8b4866a73cc04ce93db40346beb070f30fafee1.json
@@ -0,0 +1,614 @@
+{
+  "test_id": "tests/integration/responses/test_mcp_authentication.py::test_mcp_authorization_bearer[client_with_models-txt=openai/gpt-4o]",
+  "request": {
+    "method": "POST",
+    "url": "https://api.openai.com/v1/v1/chat/completions",
+    "headers": {},
+    "body": {
+      "model": "gpt-4o",
+      "messages": [
+        {
+          "role": "user",
+          "content": "What is the boiling point of myawesomeliquid?"
+        },
+        {
+          "role": "assistant",
+          "content": "",
+          "tool_calls": [
+            {
+              "index": 0,
+              "id": "call_mitVYvmPaFfoSmKjzKo5xmZp",
+              "type": "function",
+              "function": {
+                "name": "get_boiling_point",
+                "arguments": "{\"liquid_name\":\"myawesomeliquid\"}"
+              }
+            }
+          ]
+        },
+        {
+          "role": "tool",
+          "tool_call_id": "call_mitVYvmPaFfoSmKjzKo5xmZp",
+          "content": [
+            {
+              "type": "text",
+              "text": "-100"
+            }
+          ]
+        }
+      ],
+      "stream": true,
+      "stream_options": {
+        "include_usage": true
+      },
+      "tools": [
+        {
+          "type": "function",
+          "function": {
+            "name": "greet_everyone",
+            "parameters": {
+              "properties": {
+                "url": {
+                  "title": "Url",
+                  "type": "string"
+                }
+              },
+              "required": [
+                "url"
+              ],
+              "title": "greet_everyoneArguments",
+              "type": "object"
+            }
+          }
+        },
+        {
+          "type": "function",
+          "function": {
+            "name": "get_boiling_point",
+            "description": "\n        Returns the boiling point of a liquid in Celsius or Fahrenheit.\n\n        :param liquid_name: The name of the liquid\n        :param celsius: Whether to return the boiling point in Celsius\n        :return: The boiling point of the liquid in Celcius or Fahrenheit\n        ",
+            "parameters": {
+              "properties": {
+                "liquid_name": {
+                  "title": "Liquid Name",
+                  "type": "string"
+                },
+                "celsius": {
+                  "default": true,
+                  "title": "Celsius",
+                  "type": "boolean"
+                }
+              },
+              "required": [
+                "liquid_name"
+              ],
+              "title": "get_boiling_pointArguments",
+              "type": "object"
+            }
+          }
+        }
+      ]
+    },
+    "endpoint": "/v1/chat/completions",
+    "model": "gpt-4o"
+  },
+  "response": {
+    "body": [
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-5236eb1d546e",
+          "choices": [
+            {
+              "delta": {
+                "content": "",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "veiGKPHTdRNcOX"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-5236eb1d546e",
+          "choices": [
+            {
+              "delta": {
+                "content": "The",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "u9RK8eZYDguJs"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-5236eb1d546e",
+          "choices": [
+            {
+              "delta": {
+                "content": " boiling",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "U0L1RjHF"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-5236eb1d546e",
+          "choices": [
+            {
+              "delta": {
+                "content": " point",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "TMS6QVLJfj"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-5236eb1d546e",
+          "choices": [
+            {
+              "delta": {
+                "content": " of",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "5zokjwZ0nBNlD"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-5236eb1d546e",
+          "choices": [
+            {
+              "delta": {
+                "content": " \"",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "CmOp3DQRu0AqZ"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-5236eb1d546e",
+          "choices": [
+            {
+              "delta": {
+                "content": "my",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "OlnZU0jlGyE2mD"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-5236eb1d546e",
+          "choices": [
+            {
+              "delta": {
+                "content": "aw",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "PGCsCfw8zUqRAj"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-5236eb1d546e",
+          "choices": [
+            {
+              "delta": {
+                "content": "esom",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "8P65fJ4x3QVF"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-5236eb1d546e",
+          "choices": [
+            {
+              "delta": {
+                "content": "eli",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "HVTNGb62o54Ol"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-5236eb1d546e",
+          "choices": [
+            {
+              "delta": {
+                "content": "quid",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "bdRgQioKQZM6"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-5236eb1d546e",
+          "choices": [
+            {
+              "delta": {
+                "content": "\"",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "5djjyePEzwsPID"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-5236eb1d546e",
+          "choices": [
+            {
+              "delta": {
+                "content": " is",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "xoN3TaCEum6A9"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-5236eb1d546e",
+          "choices": [
+            {
+              "delta": {
+                "content": " -",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "UmU8LCL6WJIDrf"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-5236eb1d546e",
+          "choices": [
+            {
+              "delta": {
+                "content": "100",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "FFXxvyme7JKyc"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-5236eb1d546e",
+          "choices": [
+            {
+              "delta": {
+                "content": "\u00b0C",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "8BpDPmgFmIBJQQ"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-5236eb1d546e",
+          "choices": [
+            {
+              "delta": {
+                "content": ".",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "Mey7rwshfBQbVlP"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-5236eb1d546e",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": "stop",
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "IXaz4vn8As"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-5236eb1d546e",
+          "choices": [],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": {
+            "completion_tokens": 17,
+            "prompt_tokens": 188,
+            "total_tokens": 205,
+            "completion_tokens_details": {
+              "accepted_prediction_tokens": 0,
+              "audio_tokens": 0,
+              "reasoning_tokens": 0,
+              "rejected_prediction_tokens": 0
+            },
+            "prompt_tokens_details": {
+              "audio_tokens": 0,
+              "cached_tokens": 0
+            }
+          },
+          "obfuscation": "9ebnd6bFXcdOY"
+        }
+      }
+    ],
+    "is_streaming": true
+  },
+  "id_normalization_mapping": {}
+}
diff --git a/tests/integration/responses/recordings/56ddb450d81590f461113ec5a55d0532e8f5b9418b22e5f874afff695601da16.json b/tests/integration/responses/recordings/56ddb450d81590f461113ec5a55d0532e8f5b9418b22e5f874afff695601da16.json
new file mode 100644
index 000000000..bacefe818
--- /dev/null
+++ b/tests/integration/responses/recordings/56ddb450d81590f461113ec5a55d0532e8f5b9418b22e5f874afff695601da16.json
@@ -0,0 +1,574 @@
+{
+  "test_id": "tests/integration/responses/test_mcp_authentication.py::test_mcp_authorization_backward_compatibility[openai_client-txt=openai/gpt-4o]",
+  "request": {
+    "method": "POST",
+    "url": "https://api.openai.com/v1/v1/chat/completions",
+    "headers": {},
+    "body": {
+      "model": "gpt-4o",
+      "messages": [
+        {
+          "role": "user",
+          "content": "What is the boiling point of myawesomeliquid?"
+        }
+      ],
+      "stream": true,
+      "stream_options": {
+        "include_usage": true
+      },
+      "tools": [
+        {
+          "type": "function",
+          "function": {
+            "name": "greet_everyone",
+            "parameters": {
+              "properties": {
+                "url": {
+                  "title": "Url",
+                  "type": "string"
+                }
+              },
+              "required": [
+                "url"
+              ],
+              "title": "greet_everyoneArguments",
+              "type": "object"
+            }
+          }
+        },
+        {
+          "type": "function",
+          "function": {
+            "name": "get_boiling_point",
+            "description": "\n        Returns the boiling point of a liquid in Celsius or Fahrenheit.\n\n        :param liquid_name: The name of the liquid\n        :param celsius: Whether to return the boiling point in Celsius\n        :return: The boiling point of the liquid in Celcius or Fahrenheit\n        ",
+            "parameters": {
+              "properties": {
+                "liquid_name": {
+                  "title": "Liquid Name",
+                  "type": "string"
+                },
+                "celsius": {
+                  "default": true,
+                  "title": "Celsius",
+                  "type": "boolean"
+                }
+              },
+              "required": [
+                "liquid_name"
+              ],
+              "title": "get_boiling_pointArguments",
+              "type": "object"
+            }
+          }
+        }
+      ]
+    },
+    "endpoint": "/v1/chat/completions",
+    "model": "gpt-4o"
+  },
+  "response": {
+    "body": [
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-56ddb450d815",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": "call_UeAsx9M8mAXo1F1LZj6TsEV9",
+                    "function": {
+                      "arguments": "",
+                      "name": "get_boiling_point"
+                    },
+                    "type": "function"
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "bKe"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-56ddb450d815",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "{\"",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "kxw"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-56ddb450d815",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "li",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "cKkF"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-56ddb450d815",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "quid",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "md"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-56ddb450d815",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "_name",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "O"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-56ddb450d815",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "\":\"",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "o"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-56ddb450d815",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "my",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "nRfv"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-56ddb450d815",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "aw",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "1M8i"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-56ddb450d815",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "esom",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "7q"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-56ddb450d815",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "eli",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "R2Q"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-56ddb450d815",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "quid",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "lB"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-56ddb450d815",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "\"}",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "MDi"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-56ddb450d815",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": "tool_calls",
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "7KwE"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-56ddb450d815",
+          "choices": [],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": {
+            "completion_tokens": 22,
+            "prompt_tokens": 154,
+            "total_tokens": 176,
+            "completion_tokens_details": {
+              "accepted_prediction_tokens": 0,
+              "audio_tokens": 0,
+              "reasoning_tokens": 0,
+              "rejected_prediction_tokens": 0
+            },
+            "prompt_tokens_details": {
+              "audio_tokens": 0,
+              "cached_tokens": 0
+            }
+          },
+          "obfuscation": "9IipvPESur5Y7"
+        }
+      }
+    ],
+    "is_streaming": true
+  },
+  "id_normalization_mapping": {}
+}
diff --git a/tests/integration/responses/recordings/59faeeca84b137e9b2c7d310ea47dc01025aeb2ee6203ef478133313e0a0e250.json b/tests/integration/responses/recordings/59faeeca84b137e9b2c7d310ea47dc01025aeb2ee6203ef478133313e0a0e250.json
new file mode 100644
index 000000000..7ab319fb8
--- /dev/null
+++ b/tests/integration/responses/recordings/59faeeca84b137e9b2c7d310ea47dc01025aeb2ee6203ef478133313e0a0e250.json
@@ -0,0 +1,614 @@
+{
+  "test_id": "tests/integration/responses/test_mcp_authentication.py::test_mcp_authorization_bearer[openai_client-txt=openai/gpt-4o]",
+  "request": {
+    "method": "POST",
+    "url": "https://api.openai.com/v1/v1/chat/completions",
+    "headers": {},
+    "body": {
+      "model": "gpt-4o",
+      "messages": [
+        {
+          "role": "user",
+          "content": "What is the boiling point of myawesomeliquid?"
+        },
+        {
+          "role": "assistant",
+          "content": "",
+          "tool_calls": [
+            {
+              "index": 0,
+              "id": "call_2lYntxgdJV66JFvD6OuICQCB",
+              "type": "function",
+              "function": {
+                "name": "get_boiling_point",
+                "arguments": "{\"liquid_name\":\"myawesomeliquid\"}"
+              }
+            }
+          ]
+        },
+        {
+          "role": "tool",
+          "tool_call_id": "call_2lYntxgdJV66JFvD6OuICQCB",
+          "content": [
+            {
+              "type": "text",
+              "text": "-100"
+            }
+          ]
+        }
+      ],
+      "stream": true,
+      "stream_options": {
+        "include_usage": true
+      },
+      "tools": [
+        {
+          "type": "function",
+          "function": {
+            "name": "greet_everyone",
+            "parameters": {
+              "properties": {
+                "url": {
+                  "title": "Url",
+                  "type": "string"
+                }
+              },
+              "required": [
+                "url"
+              ],
+              "title": "greet_everyoneArguments",
+              "type": "object"
+            }
+          }
+        },
+        {
+          "type": "function",
+          "function": {
+            "name": "get_boiling_point",
+            "description": "\n        Returns the boiling point of a liquid in Celsius or Fahrenheit.\n\n        :param liquid_name: The name of the liquid\n        :param celsius: Whether to return the boiling point in Celsius\n        :return: The boiling point of the liquid in Celcius or Fahrenheit\n        ",
+            "parameters": {
+              "properties": {
+                "liquid_name": {
+                  "title": "Liquid Name",
+                  "type": "string"
+                },
+                "celsius": {
+                  "default": true,
+                  "title": "Celsius",
+                  "type": "boolean"
+                }
+              },
+              "required": [
+                "liquid_name"
+              ],
+              "title": "get_boiling_pointArguments",
+              "type": "object"
+            }
+          }
+        }
+      ]
+    },
+    "endpoint": "/v1/chat/completions",
+    "model": "gpt-4o"
+  },
+  "response": {
+    "body": [
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-59faeeca84b1",
+          "choices": [
+            {
+              "delta": {
+                "content": "",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "BNpFmbWkpYEjZX"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-59faeeca84b1",
+          "choices": [
+            {
+              "delta": {
+                "content": "The",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "HdnyHcq2CLvjn"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-59faeeca84b1",
+          "choices": [
+            {
+              "delta": {
+                "content": " boiling",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "gOMuwgrp"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-59faeeca84b1",
+          "choices": [
+            {
+              "delta": {
+                "content": " point",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "OTfqq7Yggw"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-59faeeca84b1",
+          "choices": [
+            {
+              "delta": {
+                "content": " of",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "cwJMhZJyf5PIp"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-59faeeca84b1",
+          "choices": [
+            {
+              "delta": {
+                "content": " \"",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "54NR7IGiuBTw5"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-59faeeca84b1",
+          "choices": [
+            {
+              "delta": {
+                "content": "my",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "q1x9cVVPTflQti"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-59faeeca84b1",
+          "choices": [
+            {
+              "delta": {
+                "content": "aw",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "vcudLe3yaadkvB"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-59faeeca84b1",
+          "choices": [
+            {
+              "delta": {
+                "content": "esom",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "uql1pBt4elRL"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-59faeeca84b1",
+          "choices": [
+            {
+              "delta": {
+                "content": "eli",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "M2kzUEkJctjYp"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-59faeeca84b1",
+          "choices": [
+            {
+              "delta": {
+                "content": "quid",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "Waet2ux2zs9P"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-59faeeca84b1",
+          "choices": [
+            {
+              "delta": {
+                "content": "\"",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "KjbjxdGYUZDuiI"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-59faeeca84b1",
+          "choices": [
+            {
+              "delta": {
+                "content": " is",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "Fg8IXJhJv8iAI"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-59faeeca84b1",
+          "choices": [
+            {
+              "delta": {
+                "content": " -",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "wiAqPLAoinVhQq"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-59faeeca84b1",
+          "choices": [
+            {
+              "delta": {
+                "content": "100",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "vJnb9sE969jph"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-59faeeca84b1",
+          "choices": [
+            {
+              "delta": {
+                "content": "\u00b0C",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "5Hgi5CU0aV0sPw"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-59faeeca84b1",
+          "choices": [
+            {
+              "delta": {
+                "content": ".",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "RDfKhuQo4E4TLXU"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-59faeeca84b1",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": "stop",
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "oN1EYVkDbW"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-59faeeca84b1",
+          "choices": [],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": {
+            "completion_tokens": 17,
+            "prompt_tokens": 188,
+            "total_tokens": 205,
+            "completion_tokens_details": {
+              "accepted_prediction_tokens": 0,
+              "audio_tokens": 0,
+              "reasoning_tokens": 0,
+              "rejected_prediction_tokens": 0
+            },
+            "prompt_tokens_details": {
+              "audio_tokens": 0,
+              "cached_tokens": 0
+            }
+          },
+          "obfuscation": "OfhOTT3VdJ2s7"
+        }
+      }
+    ],
+    "is_streaming": true
+  },
+  "id_normalization_mapping": {}
+}
diff --git a/tests/integration/responses/recordings/775a161a318a252454fd44f9850b37c6ec15eb17dfaa95f015dcc6f65fa10c94.json b/tests/integration/responses/recordings/775a161a318a252454fd44f9850b37c6ec15eb17dfaa95f015dcc6f65fa10c94.json
new file mode 100644
index 000000000..c2c8bbd80
--- /dev/null
+++ b/tests/integration/responses/recordings/775a161a318a252454fd44f9850b37c6ec15eb17dfaa95f015dcc6f65fa10c94.json
@@ -0,0 +1,574 @@
+{
+  "test_id": "tests/integration/responses/test_mcp_authentication.py::test_mcp_authorization_bearer[openai_client-txt=openai/gpt-4o]",
+  "request": {
+    "method": "POST",
+    "url": "https://api.openai.com/v1/v1/chat/completions",
+    "headers": {},
+    "body": {
+      "model": "gpt-4o",
+      "messages": [
+        {
+          "role": "user",
+          "content": "What is the boiling point of myawesomeliquid?"
+        }
+      ],
+      "stream": true,
+      "stream_options": {
+        "include_usage": true
+      },
+      "tools": [
+        {
+          "type": "function",
+          "function": {
+            "name": "greet_everyone",
+            "parameters": {
+              "properties": {
+                "url": {
+                  "title": "Url",
+                  "type": "string"
+                }
+              },
+              "required": [
+                "url"
+              ],
+              "title": "greet_everyoneArguments",
+              "type": "object"
+            }
+          }
+        },
+        {
+          "type": "function",
+          "function": {
+            "name": "get_boiling_point",
+            "description": "\n        Returns the boiling point of a liquid in Celsius or Fahrenheit.\n\n        :param liquid_name: The name of the liquid\n        :param celsius: Whether to return the boiling point in Celsius\n        :return: The boiling point of the liquid in Celcius or Fahrenheit\n        ",
+            "parameters": {
+              "properties": {
+                "liquid_name": {
+                  "title": "Liquid Name",
+                  "type": "string"
+                },
+                "celsius": {
+                  "default": true,
+                  "title": "Celsius",
+                  "type": "boolean"
+                }
+              },
+              "required": [
+                "liquid_name"
+              ],
+              "title": "get_boiling_pointArguments",
+              "type": "object"
+            }
+          }
+        }
+      ]
+    },
+    "endpoint": "/v1/chat/completions",
+    "model": "gpt-4o"
+  },
+  "response": {
+    "body": [
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-775a161a318a",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": "call_2lYntxgdJV66JFvD6OuICQCB",
+                    "function": {
+                      "arguments": "",
+                      "name": "get_boiling_point"
+                    },
+                    "type": "function"
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "UmB"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-775a161a318a",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "{\"",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "ejb"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-775a161a318a",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "li",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "Loxj"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-775a161a318a",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "quid",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "IQ"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-775a161a318a",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "_name",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "8"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-775a161a318a",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "\":\"",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "G"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-775a161a318a",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "my",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "lo9p"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-775a161a318a",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "aw",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "YWPA"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-775a161a318a",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "esom",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "vV"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-775a161a318a",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "eli",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "e0t"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-775a161a318a",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "quid",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "kv"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-775a161a318a",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "\"}",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "h2F"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-775a161a318a",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": "tool_calls",
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "B9QY"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-775a161a318a",
+          "choices": [],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": {
+            "completion_tokens": 22,
+            "prompt_tokens": 154,
+            "total_tokens": 176,
+            "completion_tokens_details": {
+              "accepted_prediction_tokens": 0,
+              "audio_tokens": 0,
+              "reasoning_tokens": 0,
+              "rejected_prediction_tokens": 0
+            },
+            "prompt_tokens_details": {
+              "audio_tokens": 0,
+              "cached_tokens": 0
+            }
+          },
+          "obfuscation": "MH88zIptmy2Xs"
+        }
+      }
+    ],
+    "is_streaming": true
+  },
+  "id_normalization_mapping": {}
+}
diff --git a/tests/integration/responses/recordings/c84e894f47a6d7f4d4556829d24ea14cd2869c77972c33e66d9b42438e2165cd.json b/tests/integration/responses/recordings/c84e894f47a6d7f4d4556829d24ea14cd2869c77972c33e66d9b42438e2165cd.json
new file mode 100644
index 000000000..37a29324e
--- /dev/null
+++ b/tests/integration/responses/recordings/c84e894f47a6d7f4d4556829d24ea14cd2869c77972c33e66d9b42438e2165cd.json
@@ -0,0 +1,574 @@
+{
+  "test_id": "tests/integration/responses/test_mcp_authentication.py::test_mcp_authorization_bearer[client_with_models-txt=openai/gpt-4o]",
+  "request": {
+    "method": "POST",
+    "url": "https://api.openai.com/v1/v1/chat/completions",
+    "headers": {},
+    "body": {
+      "model": "gpt-4o",
+      "messages": [
+        {
+          "role": "user",
+          "content": "What is the boiling point of myawesomeliquid?"
+        }
+      ],
+      "stream": true,
+      "stream_options": {
+        "include_usage": true
+      },
+      "tools": [
+        {
+          "type": "function",
+          "function": {
+            "name": "greet_everyone",
+            "parameters": {
+              "properties": {
+                "url": {
+                  "title": "Url",
+                  "type": "string"
+                }
+              },
+              "required": [
+                "url"
+              ],
+              "title": "greet_everyoneArguments",
+              "type": "object"
+            }
+          }
+        },
+        {
+          "type": "function",
+          "function": {
+            "name": "get_boiling_point",
+            "description": "\n        Returns the boiling point of a liquid in Celsius or Fahrenheit.\n\n        :param liquid_name: The name of the liquid\n        :param celsius: Whether to return the boiling point in Celsius\n        :return: The boiling point of the liquid in Celcius or Fahrenheit\n        ",
+            "parameters": {
+              "properties": {
+                "liquid_name": {
+                  "title": "Liquid Name",
+                  "type": "string"
+                },
+                "celsius": {
+                  "default": true,
+                  "title": "Celsius",
+                  "type": "boolean"
+                }
+              },
+              "required": [
+                "liquid_name"
+              ],
+              "title": "get_boiling_pointArguments",
+              "type": "object"
+            }
+          }
+        }
+      ]
+    },
+    "endpoint": "/v1/chat/completions",
+    "model": "gpt-4o"
+  },
+  "response": {
+    "body": [
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-c84e894f47a6",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": "call_mitVYvmPaFfoSmKjzKo5xmZp",
+                    "function": {
+                      "arguments": "",
+                      "name": "get_boiling_point"
+                    },
+                    "type": "function"
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "5Y1"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-c84e894f47a6",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "{\"",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "QzQ"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-c84e894f47a6",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "li",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "4NPm"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-c84e894f47a6",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "quid",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "Lh"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-c84e894f47a6",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "_name",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "r"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-c84e894f47a6",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "\":\"",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "w"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-c84e894f47a6",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "my",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "GSVa"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-c84e894f47a6",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "aw",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "AWZm"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-c84e894f47a6",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "esom",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "DG"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-c84e894f47a6",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "eli",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "1Bw"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-c84e894f47a6",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "quid",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "Oq"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-c84e894f47a6",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "\"}",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "cI8"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-c84e894f47a6",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": "tool_calls",
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "kKqh"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-c84e894f47a6",
+          "choices": [],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": {
+            "completion_tokens": 22,
+            "prompt_tokens": 154,
+            "total_tokens": 176,
+            "completion_tokens_details": {
+              "accepted_prediction_tokens": 0,
+              "audio_tokens": 0,
+              "reasoning_tokens": 0,
+              "rejected_prediction_tokens": 0
+            },
+            "prompt_tokens_details": {
+              "audio_tokens": 0,
+              "cached_tokens": 0
+            }
+          },
+          "obfuscation": "etTUytEvlkJ99"
+        }
+      }
+    ],
+    "is_streaming": true
+  },
+  "id_normalization_mapping": {}
+}
diff --git a/tests/integration/responses/recordings/c9c723cd01233311d9033f55d6db610b38555bb86f93c507ede8752af47cda6a.json b/tests/integration/responses/recordings/c9c723cd01233311d9033f55d6db610b38555bb86f93c507ede8752af47cda6a.json
new file mode 100644
index 000000000..e98f64b93
--- /dev/null
+++ b/tests/integration/responses/recordings/c9c723cd01233311d9033f55d6db610b38555bb86f93c507ede8752af47cda6a.json
@@ -0,0 +1,574 @@
+{
+  "test_id": "tests/integration/responses/test_mcp_authentication.py::test_mcp_authorization_backward_compatibility[client_with_models-txt=openai/gpt-4o]",
+  "request": {
+    "method": "POST",
+    "url": "https://api.openai.com/v1/v1/chat/completions",
+    "headers": {},
+    "body": {
+      "model": "gpt-4o",
+      "messages": [
+        {
+          "role": "user",
+          "content": "What is the boiling point of myawesomeliquid?"
+        }
+      ],
+      "stream": true,
+      "stream_options": {
+        "include_usage": true
+      },
+      "tools": [
+        {
+          "type": "function",
+          "function": {
+            "name": "greet_everyone",
+            "parameters": {
+              "properties": {
+                "url": {
+                  "title": "Url",
+                  "type": "string"
+                }
+              },
+              "required": [
+                "url"
+              ],
+              "title": "greet_everyoneArguments",
+              "type": "object"
+            }
+          }
+        },
+        {
+          "type": "function",
+          "function": {
+            "name": "get_boiling_point",
+            "description": "\n        Returns the boiling point of a liquid in Celsius or Fahrenheit.\n\n        :param liquid_name: The name of the liquid\n        :param celsius: Whether to return the boiling point in Celsius\n        :return: The boiling point of the liquid in Celcius or Fahrenheit\n        ",
+            "parameters": {
+              "properties": {
+                "liquid_name": {
+                  "title": "Liquid Name",
+                  "type": "string"
+                },
+                "celsius": {
+                  "default": true,
+                  "title": "Celsius",
+                  "type": "boolean"
+                }
+              },
+              "required": [
+                "liquid_name"
+              ],
+              "title": "get_boiling_pointArguments",
+              "type": "object"
+            }
+          }
+        }
+      ]
+    },
+    "endpoint": "/v1/chat/completions",
+    "model": "gpt-4o"
+  },
+  "response": {
+    "body": [
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-c9c723cd0123",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": "call_wnbihJuwYAfnI8uxy84Yl48j",
+                    "function": {
+                      "arguments": "",
+                      "name": "get_boiling_point"
+                    },
+                    "type": "function"
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "TC0"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-c9c723cd0123",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "{\"",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "hDL"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-c9c723cd0123",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "li",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "4G8Z"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-c9c723cd0123",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "quid",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "ow"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-c9c723cd0123",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "_name",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "P"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-c9c723cd0123",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "\":\"",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "M"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-c9c723cd0123",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "my",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "yhAk"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-c9c723cd0123",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "aw",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "SdIN"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-c9c723cd0123",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "esom",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "2z"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-c9c723cd0123",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "eli",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "nEC"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-c9c723cd0123",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "quid",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "2B"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-c9c723cd0123",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "\"}",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "DoL"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-c9c723cd0123",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": "tool_calls",
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "cSRf"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-c9c723cd0123",
+          "choices": [],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": {
+            "completion_tokens": 22,
+            "prompt_tokens": 154,
+            "total_tokens": 176,
+            "completion_tokens_details": {
+              "accepted_prediction_tokens": 0,
+              "audio_tokens": 0,
+              "reasoning_tokens": 0,
+              "rejected_prediction_tokens": 0
+            },
+            "prompt_tokens_details": {
+              "audio_tokens": 0,
+              "cached_tokens": 0
+            }
+          },
+          "obfuscation": "ejlSF0NzXFFso"
+        }
+      }
+    ],
+    "is_streaming": true
+  },
+  "id_normalization_mapping": {}
+}
diff --git a/tests/integration/responses/recordings/db81127157a8364ce8f7a81e10d9b84bf814950e3c8f11eed7ed9f11d4462237.json b/tests/integration/responses/recordings/db81127157a8364ce8f7a81e10d9b84bf814950e3c8f11eed7ed9f11d4462237.json
new file mode 100644
index 000000000..67c78f3ed
--- /dev/null
+++ b/tests/integration/responses/recordings/db81127157a8364ce8f7a81e10d9b84bf814950e3c8f11eed7ed9f11d4462237.json
@@ -0,0 +1,614 @@
+{
+  "test_id": "tests/integration/responses/test_mcp_authentication.py::test_mcp_authorization_backward_compatibility[client_with_models-txt=openai/gpt-4o]",
+  "request": {
+    "method": "POST",
+    "url": "https://api.openai.com/v1/v1/chat/completions",
+    "headers": {},
+    "body": {
+      "model": "gpt-4o",
+      "messages": [
+        {
+          "role": "user",
+          "content": "What is the boiling point of myawesomeliquid?"
+        },
+        {
+          "role": "assistant",
+          "content": "",
+          "tool_calls": [
+            {
+              "index": 0,
+              "id": "call_wnbihJuwYAfnI8uxy84Yl48j",
+              "type": "function",
+              "function": {
+                "name": "get_boiling_point",
+                "arguments": "{\"liquid_name\":\"myawesomeliquid\"}"
+              }
+            }
+          ]
+        },
+        {
+          "role": "tool",
+          "tool_call_id": "call_wnbihJuwYAfnI8uxy84Yl48j",
+          "content": [
+            {
+              "type": "text",
+              "text": "-100"
+            }
+          ]
+        }
+      ],
+      "stream": true,
+      "stream_options": {
+        "include_usage": true
+      },
+      "tools": [
+        {
+          "type": "function",
+          "function": {
+            "name": "greet_everyone",
+            "parameters": {
+              "properties": {
+                "url": {
+                  "title": "Url",
+                  "type": "string"
+                }
+              },
+              "required": [
+                "url"
+              ],
+              "title": "greet_everyoneArguments",
+              "type": "object"
+            }
+          }
+        },
+        {
+          "type": "function",
+          "function": {
+            "name": "get_boiling_point",
+            "description": "\n        Returns the boiling point of a liquid in Celsius or Fahrenheit.\n\n        :param liquid_name: The name of the liquid\n        :param celsius: Whether to return the boiling point in Celsius\n        :return: The boiling point of the liquid in Celcius or Fahrenheit\n        ",
+            "parameters": {
+              "properties": {
+                "liquid_name": {
+                  "title": "Liquid Name",
+                  "type": "string"
+                },
+                "celsius": {
+                  "default": true,
+                  "title": "Celsius",
+                  "type": "boolean"
+                }
+              },
+              "required": [
+                "liquid_name"
+              ],
+              "title": "get_boiling_pointArguments",
+              "type": "object"
+            }
+          }
+        }
+      ]
+    },
+    "endpoint": "/v1/chat/completions",
+    "model": "gpt-4o"
+  },
+  "response": {
+    "body": [
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-db81127157a8",
+          "choices": [
+            {
+              "delta": {
+                "content": "",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "Usdowqbd6beiYB"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-db81127157a8",
+          "choices": [
+            {
+              "delta": {
+                "content": "The",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "nVevItSH27TBR"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-db81127157a8",
+          "choices": [
+            {
+              "delta": {
+                "content": " boiling",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "HWyYtVAl"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-db81127157a8",
+          "choices": [
+            {
+              "delta": {
+                "content": " point",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "kvvcut6Eib"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-db81127157a8",
+          "choices": [
+            {
+              "delta": {
+                "content": " of",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "E0osAbGBpCPvy"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-db81127157a8",
+          "choices": [
+            {
+              "delta": {
+                "content": " \"",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "GmH7m44fmv0Mk"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-db81127157a8",
+          "choices": [
+            {
+              "delta": {
+                "content": "my",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "oJ4DV7z5GiqJqX"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-db81127157a8",
+          "choices": [
+            {
+              "delta": {
+                "content": "aw",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "8AmNNAYPXMNrEr"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-db81127157a8",
+          "choices": [
+            {
+              "delta": {
+                "content": "esom",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "JEzK8X8AD9hP"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-db81127157a8",
+          "choices": [
+            {
+              "delta": {
+                "content": "eli",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "8EGj5LyQzpZMt"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-db81127157a8",
+          "choices": [
+            {
+              "delta": {
+                "content": "quid",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "wQG19uBuvC7j"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-db81127157a8",
+          "choices": [
+            {
+              "delta": {
+                "content": "\"",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "8Wyenb7E997f9E"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-db81127157a8",
+          "choices": [
+            {
+              "delta": {
+                "content": " is",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "SVXiel7RHA6f3"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-db81127157a8",
+          "choices": [
+            {
+              "delta": {
+                "content": " -",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "ynScunJEjmOWBo"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-db81127157a8",
+          "choices": [
+            {
+              "delta": {
+                "content": "100",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "po2PLlPavc9TN"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-db81127157a8",
+          "choices": [
+            {
+              "delta": {
+                "content": "\u00b0C",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "mt2jiL22pWkH93"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-db81127157a8",
+          "choices": [
+            {
+              "delta": {
+                "content": ".",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "32gJJ61zmjmftOn"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-db81127157a8",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": "stop",
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": null,
+          "obfuscation": "HszNIiCJ12"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-db81127157a8",
+          "choices": [],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_cbf1785567",
+          "usage": {
+            "completion_tokens": 17,
+            "prompt_tokens": 188,
+            "total_tokens": 205,
+            "completion_tokens_details": {
+              "accepted_prediction_tokens": 0,
+              "audio_tokens": 0,
+              "reasoning_tokens": 0,
+              "rejected_prediction_tokens": 0
+            },
+            "prompt_tokens_details": {
+              "audio_tokens": 0,
+              "cached_tokens": 0
+            }
+          },
+          "obfuscation": "cAx3IDg7toBDJ"
+        }
+      }
+    ],
+    "is_streaming": true
+  },
+  "id_normalization_mapping": {}
+}
diff --git a/tests/integration/responses/test_conversation_responses.py b/tests/integration/responses/test_conversation_responses.py
index bbd861e0d..ce249f6a0 100644
--- a/tests/integration/responses/test_conversation_responses.py
+++ b/tests/integration/responses/test_conversation_responses.py
@@ -88,6 +88,7 @@ class TestConversationResponses:
 
         assert "apple" in response.output_text.lower()
 
+    @pytest.mark.timeout(60, method="thread")
     def test_conversation_error_handling(self, openai_client, text_model_id):
         """Test error handling for invalid and nonexistent conversations."""
         # Invalid conversation ID format
diff --git a/tests/integration/responses/test_mcp_authentication.py b/tests/integration/responses/test_mcp_authentication.py
new file mode 100644
index 000000000..5c990ff6a
--- /dev/null
+++ b/tests/integration/responses/test_mcp_authentication.py
@@ -0,0 +1,105 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+
+import pytest
+
+from tests.common.mcp import make_mcp_server
+
+from .helpers import setup_mcp_tools
+
+# MCP authentication tests with recordings
+# Tests for bearer token authorization support in MCP tool configurations
+
+
+def test_mcp_authorization_bearer(responses_client, text_model_id):
+    """Test that bearer authorization is correctly applied to MCP requests."""
+    test_token = "test-bearer-token-789"
+    with make_mcp_server(required_auth_token=test_token) as mcp_server_info:
+        tools = setup_mcp_tools(
+            [
+                {
+                    "type": "mcp",
+                    "server_label": "auth-mcp",
+                    "server_url": "<FILLED_BY_TEST_RUNNER>",
+                    "authorization": test_token,  # Just the token, not "Bearer <token>"
+                }
+            ],
+            mcp_server_info,
+        )
+
+        # Create response - authorization should be applied
+        response = responses_client.responses.create(
+            model=text_model_id,
+            input="What is the boiling point of myawesomeliquid?",
+            tools=tools,
+            stream=False,
+        )
+
+        # Verify list_tools succeeded (requires auth)
+        assert len(response.output) >= 3
+        assert response.output[0].type == "mcp_list_tools"
+        assert len(response.output[0].tools) == 2
+
+        # Verify tool invocation succeeded (requires auth)
+        assert response.output[1].type == "mcp_call"
+        assert response.output[1].error is None
+
+
+def test_mcp_authorization_error_when_header_provided(responses_client, text_model_id):
+    """Test that providing Authorization in headers raises a security error."""
+    test_token = "test-token-123"
+    with make_mcp_server(required_auth_token=test_token) as mcp_server_info:
+        tools = setup_mcp_tools(
+            [
+                {
+                    "type": "mcp",
+                    "server_label": "header-auth-mcp",
+                    "server_url": "<FILLED_BY_TEST_RUNNER>",
+                    "headers": {"Authorization": f"Bearer {test_token}"},  # Security risk - should be rejected
+                }
+            ],
+            mcp_server_info,
+        )
+
+        # Create response - should raise BadRequestError for security reasons
+        with pytest.raises((ValueError, Exception), match="Authorization header cannot be passed via 'headers'"):
+            responses_client.responses.create(
+                model=text_model_id,
+                input="What is the boiling point of myawesomeliquid?",
+                tools=tools,
+                stream=False,
+            )
+
+
+def test_mcp_authorization_backward_compatibility(responses_client, text_model_id):
+    """Test that MCP tools work without authorization (backward compatibility)."""
+    # No authorization required
+    with make_mcp_server(required_auth_token=None) as mcp_server_info:
+        tools = setup_mcp_tools(
+            [
+                {
+                    "type": "mcp",
+                    "server_label": "noauth-mcp",
+                    "server_url": "<FILLED_BY_TEST_RUNNER>",
+                }
+            ],
+            mcp_server_info,
+        )
+
+        # Create response without authorization
+        response = responses_client.responses.create(
+            model=text_model_id,
+            input="What is the boiling point of myawesomeliquid?",
+            tools=tools,
+            stream=False,
+        )
+
+        # Verify operations succeeded without auth
+        assert len(response.output) >= 3
+        assert response.output[0].type == "mcp_list_tools"
+        assert response.output[1].type == "mcp_call"
+        assert response.output[1].error is None
diff --git a/tests/integration/responses/test_tool_responses.py b/tests/integration/responses/test_tool_responses.py
index 2c7c7ef34..742d45f8b 100644
--- a/tests/integration/responses/test_tool_responses.py
+++ b/tests/integration/responses/test_tool_responses.py
@@ -249,7 +249,7 @@ def test_response_non_streaming_mcp_tool(responses_client, text_model_id, case,
 
         for tool in tools:
             if tool["type"] == "mcp":
-                tool["headers"] = {"Authorization": "Bearer test-token"}
+                tool["authorization"] = "test-token"
 
         response = responses_client.responses.create(
             model=text_model_id,
diff --git a/tests/integration/tool_runtime/test_mcp.py b/tests/integration/tool_runtime/test_mcp.py
index 9ce0d1c98..1b7f509d2 100644
--- a/tests/integration/tool_runtime/test_mcp.py
+++ b/tests/integration/tool_runtime/test_mcp.py
@@ -37,6 +37,7 @@ def test_mcp_invocation(llama_stack_client, text_model_id, mcp_server):
         mcp_endpoint=dict(uri=uri),
     )
 
+    # Use old header-based approach for Phase 1 (backward compatibility)
     provider_data = {
         "mcp_headers": {
             uri: {
@@ -53,7 +54,7 @@ def test_mcp_invocation(llama_stack_client, text_model_id, mcp_server):
 
     tools_list = llama_stack_client.tools.list(
         toolgroup_id=test_toolgroup_id,
-        extra_headers=auth_headers,
+        extra_headers=auth_headers,  # Use old header-based approach
     )
     assert len(tools_list) == 2
     assert {t.name for t in tools_list} == {"greet_everyone", "get_boiling_point"}
@@ -61,7 +62,7 @@ def test_mcp_invocation(llama_stack_client, text_model_id, mcp_server):
     response = llama_stack_client.tool_runtime.invoke_tool(
         tool_name="greet_everyone",
         kwargs=dict(url="https://www.google.com"),
-        extra_headers=auth_headers,
+        extra_headers=auth_headers,  # Use old header-based approach
     )
     content = response.content
     assert len(content) == 1
@@ -76,9 +77,7 @@ def test_mcp_invocation(llama_stack_client, text_model_id, mcp_server):
             "server_label": test_toolgroup_id,
             "require_approval": "never",
             "allowed_tools": [tool.name for tool in tools_list],
-            "headers": {
-                "Authorization": f"Bearer {AUTH_TOKEN}",
-            },
+            "authorization": AUTH_TOKEN,
         }
     ]
     agent = Agent(
@@ -104,7 +103,6 @@ def test_mcp_invocation(llama_stack_client, text_model_id, mcp_server):
                 }
             ],
             stream=True,
-            extra_headers=auth_headers,
         )
     )
     events = [chunk.event for chunk in chunks]
diff --git a/tests/integration/tool_runtime/test_mcp_json_schema.py b/tests/integration/tool_runtime/test_mcp_json_schema.py
index def0b27b8..719588c7f 100644
--- a/tests/integration/tool_runtime/test_mcp_json_schema.py
+++ b/tests/integration/tool_runtime/test_mcp_json_schema.py
@@ -4,8 +4,7 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-"""
-Integration tests for MCP tools with complex JSON Schema support.
+"""Integration tests for MCP tools with complex JSON Schema support.
 Tests $ref, $defs, and other JSON Schema features through MCP integration.
 """
 
@@ -123,7 +122,14 @@ class TestMCPSchemaPreservation:
             mcp_endpoint=dict(uri=uri),
         )
 
-        provider_data = {"mcp_headers": {uri: {"Authorization": f"Bearer {AUTH_TOKEN}"}}}
+        # Use old header-based approach for Phase 1 (backward compatibility)
+        provider_data = {
+            "mcp_headers": {
+                uri: {
+                    "Authorization": f"Bearer {AUTH_TOKEN}",
+                },
+            },
+        }
         auth_headers = {
             "X-LlamaStack-Provider-Data": json.dumps(provider_data),
         }
@@ -166,7 +172,15 @@ class TestMCPSchemaPreservation:
             provider_id="model-context-protocol",
             mcp_endpoint=dict(uri=uri),
         )
-        provider_data = {"mcp_headers": {uri: {"Authorization": f"Bearer {AUTH_TOKEN}"}}}
+
+        # Use old header-based approach for Phase 1 (backward compatibility)
+        provider_data = {
+            "mcp_headers": {
+                uri: {
+                    "Authorization": f"Bearer {AUTH_TOKEN}",
+                },
+            },
+        }
         auth_headers = {
             "X-LlamaStack-Provider-Data": json.dumps(provider_data),
         }
@@ -216,7 +230,14 @@ class TestMCPSchemaPreservation:
             mcp_endpoint=dict(uri=uri),
         )
 
-        provider_data = {"mcp_headers": {uri: {"Authorization": f"Bearer {AUTH_TOKEN}"}}}
+        # Use old header-based approach for Phase 1 (backward compatibility)
+        provider_data = {
+            "mcp_headers": {
+                uri: {
+                    "Authorization": f"Bearer {AUTH_TOKEN}",
+                },
+            },
+        }
         auth_headers = {
             "X-LlamaStack-Provider-Data": json.dumps(provider_data),
         }
@@ -263,7 +284,14 @@ class TestMCPToolInvocation:
             mcp_endpoint=dict(uri=uri),
         )
 
-        provider_data = {"mcp_headers": {uri: {"Authorization": f"Bearer {AUTH_TOKEN}"}}}
+        # Use old header-based approach for Phase 1 (backward compatibility)
+        provider_data = {
+            "mcp_headers": {
+                uri: {
+                    "Authorization": f"Bearer {AUTH_TOKEN}",
+                },
+            },
+        }
         auth_headers = {
             "X-LlamaStack-Provider-Data": json.dumps(provider_data),
         }
@@ -309,7 +337,14 @@ class TestMCPToolInvocation:
             mcp_endpoint=dict(uri=uri),
         )
 
-        provider_data = {"mcp_headers": {uri: {"Authorization": f"Bearer {AUTH_TOKEN}"}}}
+        # Use old header-based approach for Phase 1 (backward compatibility)
+        provider_data = {
+            "mcp_headers": {
+                uri: {
+                    "Authorization": f"Bearer {AUTH_TOKEN}",
+                },
+            },
+        }
         auth_headers = {
             "X-LlamaStack-Provider-Data": json.dumps(provider_data),
         }
@@ -365,7 +400,14 @@ class TestAgentWithMCPTools:
             mcp_endpoint=dict(uri=uri),
         )
 
-        provider_data = {"mcp_headers": {uri: {"Authorization": f"Bearer {AUTH_TOKEN}"}}}
+        # Use old header-based approach for Phase 1 (backward compatibility)
+        provider_data = {
+            "mcp_headers": {
+                uri: {
+                    "Authorization": f"Bearer {AUTH_TOKEN}",
+                },
+            },
+        }
         auth_headers = {
             "X-LlamaStack-Provider-Data": json.dumps(provider_data),
         }
@@ -381,6 +423,7 @@ class TestAgentWithMCPTools:
                 "server_label": test_toolgroup_id,
                 "require_approval": "never",
                 "allowed_tools": [tool.name for tool in tools_list],
+                "authorization": AUTH_TOKEN,
             }
         ]
 
@@ -389,7 +432,6 @@ class TestAgentWithMCPTools:
             model=text_model_id,
             instructions="You are a helpful assistant that can process orders and book flights.",
             tools=tool_defs,
-            extra_headers=auth_headers,
         )
 
         session_id = agent.create_session("test-session-complex")
@@ -411,7 +453,6 @@ class TestAgentWithMCPTools:
                     }
                 ],
                 stream=True,
-                extra_headers=auth_headers,
             )
         )
 
diff --git a/tests/unit/distribution/routers/test_routing_tables.py b/tests/unit/distribution/routers/test_routing_tables.py
index 8fd9d6ec3..292ee8384 100644
--- a/tests/unit/distribution/routers/test_routing_tables.py
+++ b/tests/unit/distribution/routers/test_routing_tables.py
@@ -137,7 +137,7 @@ class ToolGroupsImpl(Impl):
     async def unregister_toolgroup(self, toolgroup_id: str):
         return toolgroup_id
 
-    async def list_runtime_tools(self, toolgroup_id, mcp_endpoint):
+    async def list_runtime_tools(self, toolgroup_id, mcp_endpoint, authorization=None):
         return ListToolDefsResponse(
             data=[
                 ToolDef(